Viewing File: /usr/lib/python3.6/site-packages/pip/_vendor/html5lib/__pycache__/_inputstream.cpython-36.pyc

3

�Pf�)@sddlmZmZmZddlmZmZddlmZm	Z	ddl
Z
ddlZddlm
Z
ddlmZmZmZmZddlmZdd	lmZdd
lmZyddlmZWnek
r�eZYnXedd
�eD��Zedd
�eD��Zedd
�eD��Zeeddg�BZdZej �rJedFdk�r&ej!d�dk�s*t"�ej#eddG�e$d�d�Z%n
ej#e�Z%e&dddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4g �Z'ej#d5�Z(iZ)Gd6d7�d7e*�Z+d8d9�Z,Gd:d;�d;e*�Z-Gd<d=�d=e-�Z.Gd>d?�d?e/�Z0Gd@dA�dAe*�Z1GdBdC�dCe*�Z2dDdE�Z3dS)H�)�absolute_import�division�unicode_literals)�	text_type�binary_type)�http_client�urllibN)�webencodings�)�EOF�spaceCharacters�asciiLetters�asciiUppercase)�ReparseException)�_utils)�StringIO)�BytesIOcCsg|]}|jd��qS)�ascii)�encode)�.0�item�r�"/usr/lib/python3.6/_inputstream.py�
<listcomp>srcCsg|]}|jd��qS)r)r)rrrrrrscCsg|]}|jd��qS)r)r)rrrrrrs�>�<u�[---Ÿ﷐-﷯￾￿🿾🿿𯿾𯿿𿿾𿿿񏿾񏿿񟿾񟿿񯿾񯿿񿿾񿿿򏿾򏿿򟿾򟿿򯿾򯿿򿿾򿿿󏿾󏿿󟿾󟿿󯿾󯿿󿿾󿿿􏿾􏿿]�]z"\uD800-\uDFFF"i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��	i��	i��
i��
i��i��i��i��i��
i��
i��i��i��i��i��i��z[	-
 -/:-@[-`{-~]c@sHeZdZdZdd�Zdd�Zdd�Zdd	�Zd
d�Zdd
�Z	dd�Z
dS)�BufferedStreamz�Buffering for streams that do not have buffering of their own

    The buffer is implemented as a list of chunks on the assumption that
    joining many strings will be slow since it is O(n**2)
    cCs||_g|_ddg|_dS)Nr
r���)�stream�buffer�position)�selfrrrr�__init__@szBufferedStream.__init__cCs@d}x(|jd|jd�D]}|t|�7}qW||jd7}|S)Nrr
)r r!�len)r"�pos�chunkrrr�tellEs
zBufferedStream.tellcCsX||j�kst�|}d}x0t|j|�|krH|t|j|�8}|d7}qW||g|_dS)Nrr
)�_bufferedBytes�AssertionErrorr$r r!)r"r%�offset�irrr�seekLszBufferedStream.seekcCsT|js|j|�S|jdt|j�krF|jdt|jd�krF|j|�S|j|�SdS)Nrr
r)r �_readStreamr!r$�_readFromBuffer)r"�bytesrrr�readUs

zBufferedStream.readcCstdd�|jD��S)NcSsg|]}t|��qSr)r$)rrrrrr_sz1BufferedStream._bufferedBytes.<locals>.<listcomp>)�sumr )r"rrrr(^szBufferedStream._bufferedBytescCs<|jj|�}|jj|�|jdd7<t|�|jd<|S)Nrr
)rr0r �appendr!r$)r"r/�datarrrr-as
zBufferedStream._readStreamcCs�|}g}|jd}|jd}x�|t|j�kr�|dkr�|dks@t�|j|}|t|�|krn|}|||g|_n"t|�|}|t|�g|_|d7}|j||||��||8}d}qW|r�|j|j|��dj|�S)Nrr
�)r!r$r r)r2r-�join)r"r/ZremainingBytes�rvZbufferIndexZbufferOffsetZbufferedDataZbytesToReadrrrr.hs&


zBufferedStream._readFromBufferN)�__name__�
__module__�__qualname__�__doc__r#r'r,r0r(r-r.rrrrr9s		rcKs�t|tj�s(t|tjj�r.t|jtj�r.d}n&t|d�rJt|jd�t	�}n
t|t	�}|r�dd�|D�}|rvt
d|��t|f|�St|f|�SdS)NFr0rcSsg|]}|jd�r|�qS)Z	_encoding)�endswith)r�xrrrr�sz#HTMLInputStream.<locals>.<listcomp>z3Cannot set an encoding with a unicode input, set %r)
�
isinstancerZHTTPResponserZresponseZaddbase�fp�hasattrr0r�	TypeError�HTMLUnicodeInputStream�HTMLBinaryInputStream)�source�kwargsZ	isUnicodeZ	encodingsrrr�HTMLInputStream�s

rEc@speZdZdZdZdd�Zdd�Zdd�Zd	d
�Zdd�Z	d
d�Z
ddd�Zdd�Zdd�Z
ddd�Zdd�ZdS)rAz�Provides a unicode stream of characters to the HTMLTokenizer.

    This class takes care of character encoding and removing or replacing
    incorrect byte-sequences and also provides column and line tracking.

    i(cCsZtjsd|_ntd�dkr$|j|_n|j|_dg|_td�df|_|j	|�|_
|j�dS)a�Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
        for use by html5lib.

        source can be either a file-object, local filename or a string.

        The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

        Nu􏿿r
rzutf-8�certain)r�supports_lone_surrogates�reportCharacterErrorsr$�characterErrorsUCS4�characterErrorsUCS2ZnewLines�lookupEncoding�charEncoding�
openStream�
dataStream�reset)r"rCrrrr#�s
zHTMLUnicodeInputStream.__init__cCs.d|_d|_d|_g|_d|_d|_d|_dS)N�r)r&�	chunkSize�chunkOffset�errors�prevNumLines�prevNumCols�_bufferedCharacter)r"rrrrO�szHTMLUnicodeInputStream.resetcCst|d�r|}nt|�}|S)zvProduces a file object from source.

        source can be either a file object, local filename or a string.

        r0)r?r)r"rCrrrrrM�s
z!HTMLUnicodeInputStream.openStreamcCsT|j}|jdd|�}|j|}|jdd|�}|dkr@|j|}n||d}||fS)N�
rr
r)r&�countrT�rfindrU)r"r*r&ZnLinesZpositionLineZlastLinePosZpositionColumnrrr�	_position�s
z HTMLUnicodeInputStream._positioncCs|j|j�\}}|d|fS)z:Returns (line, col) of the current position in the stream.r
)rZrR)r"�line�colrrrr!�szHTMLUnicodeInputStream.positioncCs6|j|jkr|j�stS|j}|j|}|d|_|S)zo Read one character from the stream or queue if available. Return
            EOF when EOF is reached.
        r
)rRrQ�	readChunkrr&)r"rR�charrrrr^�s

zHTMLUnicodeInputStream.charNcCs�|dkr|j}|j|j�\|_|_d|_d|_d|_|jj|�}|j	rX|j	|}d|_	n|s`dSt
|�dkr�t|d�}|dks�d|ko�dknr�|d
|_	|dd�}|jr�|j|�|j
dd	�}|j
d
d	�}||_t
|�|_dS)NrPrFr
�
i�i��z
rW�
Trrr)�_defaultChunkSizerZrQrTrUr&rRrNr0rVr$�ordrH�replace)r"rQr3Zlastvrrrr]�s0
 


z HTMLUnicodeInputStream.readChunkcCs,x&tttj|���D]}|jjd�qWdS)Nzinvalid-codepoint)�ranger$�invalid_unicode_re�findallrSr2)r"r3�_rrrrI%sz*HTMLUnicodeInputStream.characterErrorsUCS4cCs�d}x�tj|�D]�}|rqt|j��}|j�}tj|||d��rttj|||d��}|tkrn|j	j
d�d}q|dkr�|dkr�|t|�dkr�|j	j
d�qd}|j	j
d�qWdS)NF�zinvalid-codepointTi�i��r
)re�finditerrb�group�startrZisSurrogatePairZsurrogatePairToCodepoint�non_bmp_invalid_codepointsrSr2r$)r"r3�skip�matchZ	codepointr%Zchar_valrrrrJ)s z*HTMLUnicodeInputStream.characterErrorsUCS2Fc
Csyt||f}Wnltk
r|x|D]}t|�dks&t�q&Wdjdd�|D��}|s^d|}tjd|�}t||f<YnXg}x||j|j|j	�}|dkr�|j	|j
kr�Pn0|j�}||j
kr�|j|j|j	|��||_	P|j|j|j	d��|j
�s�Pq�Wdj|�}	|	S)z� Returns a string of characters from the stream up to but not
        including any character in 'characters' or EOF. 'characters' must be
        a container that supports the 'in' method and iteration over its
        characters.
        �rPcSsg|]}dt|��qS)z\x%02x)rb)r�crrrrNsz5HTMLUnicodeInputStream.charsUntil.<locals>.<listcomp>z^%sz[%s]+N)�charsUntilRegEx�KeyErrorrbr)r5�re�compilernr&rRrQ�endr2r])
r"Z
charactersZopposite�charsrpZregexr6�mru�rrrr�
charsUntil@s2
 

z!HTMLUnicodeInputStream.charsUntilcCsT|dk	rP|jdkr.||j|_|jd7_n"|jd8_|j|j|ksPt�dS)Nrr
)rRr&rQr))r"r^rrr�ungetos
zHTMLUnicodeInputStream.unget)N)F)r7r8r9r:rar#rOrMrZr!r^r]rIrJryrzrrrrrA�s 
&
/rAc@sLeZdZdZddd�Zdd�Zd	d
�Zddd�Zd
d�Zdd�Z	dd�Z
dS)rBz�Provides a unicode stream of characters to the HTMLTokenizer.

    This class takes care of character encoding and removing or replacing
    incorrect byte-sequences and also provides column and line tracking.

    N�windows-1252TcCsn|j|�|_tj||j�d|_d|_||_||_||_||_	||_
|j|�|_|jddk	sbt
�|j�dS)a�Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
        for use by html5lib.

        source can be either a file-object, local filename or a string.

        The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

        i�drN)rM�	rawStreamrAr#�numBytesMeta�numBytesChardet�override_encoding�transport_encoding�same_origin_parent_encoding�likely_encoding�default_encoding�determineEncodingrLr)rO)r"rCr�r�r�r�r�Z
useChardetrrrr#�szHTMLBinaryInputStream.__init__cCs&|jdjj|jd�|_tj|�dS)Nrrc)rLZ
codec_info�streamreaderr}rNrArO)r"rrrrO�szHTMLBinaryInputStream.resetc	CsDt|d�r|}nt|�}y|j|j��Wnt|�}YnX|S)zvProduces a file object from source.

        source can be either a file object, local filename or a string.

        r0)r?rr,r'r)r"rCrrrrrM�s
z HTMLBinaryInputStream.openStreamcCs�|j�df}|ddk	r|St|j�df}|ddk	r:|St|j�df}|ddk	rX|S|j�df}|ddk	rt|St|j�df}|ddk	r�|djjd�r�|St|j�df}|ddk	r�|S|�rtyddl	m
}Wntk
r�Yn�Xg}|�}xF|j�s>|j
j|j�}t|t��s t�|�s(P|j|�|j|�q�W|j�t|jd�}|j
jd�|dk	�rt|dfSt|j�df}|ddk	�r�|Std�dfS)NrFrZ	tentativezutf-16)�UniversalDetector�encodingzwindows-1252)�	detectBOMrKr�r��detectEncodingMetar��name�
startswithr�Zchardet.universaldetectorr��ImportError�doner}r0rr=r/r)r2Zfeed�close�resultr,r�)r"ZchardetrLr�ZbuffersZdetectorr r�rrrr��sR


z'HTMLBinaryInputStream.determineEncodingcCs�|jddkst�t|�}|dkr&dS|jdkrFtd�}|dk	s�t�nT||jdkrf|jddf|_n4|jjd�|df|_|j�td|jd|f��dS)	Nr
rF�utf-16be�utf-16lezutf-8rzEncoding changed from %s to %s)r�r�)rLr)rKr�r}r,rOr)r"ZnewEncodingrrr�changeEncodings

z$HTMLBinaryInputStream.changeEncodingc
Cs�tjdtjdtjdtjdtjdi}|jjd�}t|t	�s<t
�|j|dd��}d}|s~|j|�}d}|s~|j|dd	��}d	}|r�|jj|�t
|�S|jjd
�dSdS)z�Attempts to detect at BOM at the start of the stream. If
        an encoding can be determined from the BOM return the name of the
        encoding otherwise return Nonezutf-8zutf-16lezutf-16bezutf-32lezutf-32be�N�rhr)�codecs�BOM_UTF8�BOM_UTF16_LE�BOM_UTF16_BE�BOM_UTF32_LE�BOM_UTF32_BEr}r0r=r/r)�getr,rK)r"ZbomDict�stringr�r,rrrr�s$
zHTMLBinaryInputStream.detectBOMcCsV|jj|j�}t|t�st�t|�}|jjd�|j�}|dk	rR|j	dkrRt
d�}|S)z9Report the encoding declared by the meta element
        rN�utf-16be�utf-16lezutf-8)r�r�)r}r0r~r=r/r)�EncodingParserr,�getEncodingr�rK)r"r �parserr�rrrr�9sz(HTMLBinaryInputStream.detectEncodingMeta)NNNNr{T)T)r7r8r9r:r#rOrMr�r�r�r�rrrrrB�s
(
>"rBc@s�eZdZdZdd�Zdd�Zdd�Zdd	�Zd
d�Zdd
�Z	dd�Z
dd�Zeee
�Z
dd�Zee�Zefdd�Zdd�Zdd�Zdd�ZdS)�
EncodingBytesz�String-like object with an associated position and various extra methods
    If the position is ever greater than the string length then an exception is
    raisedcCst|t�st�tj||j��S)N)r=r/r)�__new__�lower)r"�valuerrrr�LszEncodingBytes.__new__cCs
d|_dS)Nr
r)rZ)r"r�rrrr#PszEncodingBytes.__init__cCs|S)Nr)r"rrr�__iter__TszEncodingBytes.__iter__cCs>|jd}|_|t|�kr"t�n|dkr.t�|||d�S)Nr
r)rZr$�
StopIterationr@)r"�prrr�__next__WszEncodingBytes.__next__cCs|j�S)N)r�)r"rrr�next_szEncodingBytes.nextcCsB|j}|t|�krt�n|dkr$t�|d|_}|||d�S)Nrr
)rZr$r�r@)r"r�rrr�previouscszEncodingBytes.previouscCs|jt|�krt�||_dS)N)rZr$r�)r"r!rrr�setPositionlszEncodingBytes.setPositioncCs*|jt|�krt�|jdkr"|jSdSdS)Nr)rZr$r�)r"rrr�getPositionqs

zEncodingBytes.getPositioncCs||j|jd�S)Nr
)r!)r"rrr�getCurrentByte{szEncodingBytes.getCurrentBytecCsL|j}x:|t|�kr@|||d�}||kr6||_|S|d7}qW||_dS)zSkip past a list of charactersr
N)r!r$rZ)r"rvr�rprrrrm�szEncodingBytes.skipcCsL|j}x:|t|�kr@|||d�}||kr6||_|S|d7}qW||_dS)Nr
)r!r$rZ)r"rvr�rprrr�	skipUntil�szEncodingBytes.skipUntilcCs>|j}|||t|��}|j|�}|r:|jt|�7_|S)z�Look for a sequence of bytes at the start of a string. If the bytes
        are found return True and advance the position to the byte after the
        match. Otherwise return False and leave the position alone)r!r$r�)r"r/r�r3r6rrr�
matchBytes�s
zEncodingBytes.matchBytescCsR||jd�j|�}|dkrJ|jdkr,d|_|j|t|�d7_dSt�dS)z�Look for the next sequence of bytes matching a given sequence. If
        a match is found advance the position to the last byte of the matchNr
rTrr)r!�findrZr$r�)r"r/ZnewPositionrrr�jumpTo�s
zEncodingBytes.jumpToN)r7r8r9r:r�r#r�r�r�r�r�r��propertyr!r��currentByte�spaceCharactersBytesrmr�r�r�rrrrr�Hs 	
r�c@sXeZdZdZdd�Zdd�Zdd�Zdd	�Zd
d�Zdd
�Z	dd�Z
dd�Zdd�ZdS)r�z?Mini parser for detecting character encoding from meta elementscCst|�|_d|_dS)z3string - the data to work on for encoding detectionN)r�r3r�)r"r3rrrr#�s
zEncodingParser.__init__c
Cs�d|jfd|jfd|jfd|jfd|jfd|jff}x^|jD]T}d}xD|D]<\}}|jj|�rJy|�}PWqJtk
r�d}PYqJXqJW|s<Pq<W|jS)	Ns<!--s<metas</s<!s<?rTF)	�
handleComment�
handleMeta�handlePossibleEndTag�handleOther�handlePossibleStartTagr3r�r�r�)r"ZmethodDispatchrgZkeepParsing�key�methodrrrr��s&zEncodingParser.getEncodingcCs|jjd�S)zSkip over commentss-->)r3r�)r"rrrr��szEncodingParser.handleCommentcCs�|jjtkrdSd}d}x�|j�}|dkr.dS|ddkr^|ddk}|r�|dk	r�||_dSq|ddkr�|d}t|�}|dk	r�||_dSq|ddkrtt|d��}|j�}|dk	rt|�}|dk	r|r�||_dS|}qWdS)	NTFrs
http-equivr
scontent-typescharsetscontent)	r3r�r��getAttributer�rK�ContentAttrParserr��parse)r"Z	hasPragmaZpendingEncoding�attrZtentativeEncoding�codecZ
contentParserrrrr��s:zEncodingParser.handleMetacCs
|jd�S)NF)�handlePossibleTag)r"rrrr��sz%EncodingParser.handlePossibleStartTagcCst|j�|jd�S)NT)r�r3r�)r"rrrr��s
z#EncodingParser.handlePossibleEndTagcCsf|j}|jtkr(|r$|j�|j�dS|jt�}|dkrD|j�n|j�}x|dk	r`|j�}qNWdS)NTr)r3r��asciiLettersBytesr�r�r��spacesAngleBracketsr�)r"ZendTagr3rpr�rrrr��s



z EncodingParser.handlePossibleTagcCs|jjd�S)Nr)r3r�)r"rrrr�szEncodingParser.handleOthercCs|j}|jttdg�B�}|dks2t|�dks2t�|d	kr>dSg}g}xt|dkrX|rXPnX|tkrl|j�}PnD|d
kr�dj|�dfS|tkr�|j|j	��n|dkr�dS|j|�t
|�}qHW|dkr�|j�dj|�dfSt
|�|j�}|dk�rT|}x�t
|�}||k�r(t
|�dj|�dj|�fS|tk�rB|j|j	��n
|j|��q�WnJ|dk�rldj|�dfS|tk�r�|j|j	��n|dk�r�dS|j|�x^t
|�}|tk�r�dj|�dj|�fS|tk�r�|j|j	��n|dk�r�dS|j|��q�WdS)z_Return a name,value pair for the next attribute in the stream,
        if one is found, or None�/Nr
r�=r4�'�")rN)r�r)r�r�)
r3rmr��	frozensetr$r)r5�asciiUppercaseBytesr2r�r�r�r�)r"r3rpZattrNameZ	attrValueZ	quoteCharrrrr�sh










zEncodingParser.getAttributeN)
r7r8r9r:r#r�r�r�r�r�r�r�r�rrrrr��s$r�c@seZdZdd�Zdd�ZdS)r�cCst|t�st�||_dS)N)r=r/r)r3)r"r3rrrr#fszContentAttrParser.__init__cCsy�|jjd�|jjd7_|jj�|jjdks8dS|jjd7_|jj�|jjdkr�|jj}|jjd7_|jj}|jj|�r�|j||jj�SdSnF|jj}y|jjt�|j||jj�Stk
r�|j|d�SXWntk
�rdSXdS)Nscharsetr
r�r�r�)r�r�)r3r�r!rmr�r�r�r�)r"Z	quoteMarkZoldPositionrrrr�js.

zContentAttrParser.parseN)r7r8r9r#r�rrrrr�esr�cCs`t|t�r.y|jd�}Wntk
r,dSX|dk	rXy
tj|�Stk
rTdSXndSdS)z{Return the python codec name corresponding to an encoding or None if the
    string doesn't correspond to a valid encoding.rN)r=r�decode�UnicodeDecodeErrorr	�lookup�AttributeError)r�rrrrK�s

rKrr)4Z
__future__rrrZpip._vendor.sixrrZpip._vendor.six.movesrrr�rsZpip._vendorr	Z	constantsrrr
rrrPr�iorrr�r�r�r�r�r�Zinvalid_unicode_no_surrogaterGrXr)rt�evalre�setrlZascii_punctuation_rerq�objectrrErArBr/r�r�r�rKrrrr�<module>sX
"








JgIh6'
Back to Directory File Manager
<