Viewing File: /lib/python3.6/site-packages/pip/_vendor/html5lib/__pycache__/_inputstream.cpython-36.opt-1.pyc

3

�Pf�)@s�ddlmZmZmZddlmZmZddlmZm	Z	ddl
Z
ddlZddlm
Z
ddlmZmZmZmZddlmZdd	lmZdd
lmZyddlmZWnek
r�eZYnXedd
�eD��Zedd
�eD��Zedd
�eD��Zeeddg�BZdZej �r(ej!eddF�e"d�d�Z#n
ej!e�Z#e$dddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4g �Z%ej!d5�Z&iZ'Gd6d7�d7e(�Z)d8d9�Z*Gd:d;�d;e(�Z+Gd<d=�d=e+�Z,Gd>d?�d?e-�Z.Gd@dA�dAe(�Z/GdBdC�dCe(�Z0dDdE�Z1dS)G�)�absolute_import�division�unicode_literals)�	text_type�binary_type)�http_client�urllibN)�webencodings�)�EOF�spaceCharacters�asciiLetters�asciiUppercase)�ReparseException)�_utils)�StringIO)�BytesIOcCsg|]}|jd��qS)�ascii)�encode)�.0�item�r�"/usr/lib/python3.6/_inputstream.py�
<listcomp>srcCsg|]}|jd��qS)r)r)rrrrrrscCsg|]}|jd��qS)r)r)rrrrrrs�>�<u�[---Ÿ﷐-﷯￾￿🿾🿿𯿾𯿿𿿾𿿿񏿾񏿿񟿾񟿿񯿾񯿿񿿾񿿿򏿾򏿿򟿾򟿿򯿾򯿿򿿾򿿿󏿾󏿿󟿾󟿿󯿾󯿿󿿾󿿿􏿾􏿿]z"\uD800-\uDFFF"�]i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��i��	i��	i��
i��
i��i��i��i��i��
i��
i��i��i��i��i��i��z[	-
 -/:-@[-`{-~]c@sHeZdZdZdd�Zdd�Zdd�Zdd	�Zd
d�Zdd
�Z	dd�Z
dS)�BufferedStreamz�Buffering for streams that do not have buffering of their own

    The buffer is implemented as a list of chunks on the assumption that
    joining many strings will be slow since it is O(n**2)
    cCs||_g|_ddg|_dS)Nr
r���)�stream�buffer�position)�selfrrrr�__init__@szBufferedStream.__init__cCs@d}x(|jd|jd�D]}|t|�7}qW||jd7}|S)Nrr
)r r!�len)r"�pos�chunkrrr�tellEs
zBufferedStream.tellcCsH|}d}x0t|j|�|kr8|t|j|�8}|d7}q
W||g|_dS)Nrr
)r$r r!)r"r%�offset�irrr�seekLszBufferedStream.seekcCsT|js|j|�S|jdt|j�krF|jdt|jd�krF|j|�S|j|�SdS)Nrr
r)r �_readStreamr!r$�_readFromBuffer)r"�bytesrrr�readUs

zBufferedStream.readcCstdd�|jD��S)NcSsg|]}t|��qSr)r$)rrrrrr_sz1BufferedStream._bufferedBytes.<locals>.<listcomp>)�sumr )r"rrr�_bufferedBytes^szBufferedStream._bufferedBytescCs<|jj|�}|jj|�|jdd7<t|�|jd<|S)Nrr
)rr.r �appendr!r$)r"r-�datarrrr+as
zBufferedStream._readStreamcCs�|}g}|jd}|jd}x�|t|j�kr�|dkr�|j|}|t|�|krb|}|||g|_n"t|�|}|t|�g|_|d7}|j||||��||8}d}qW|r�|j|j|��dj|�S)Nrr
�)r!r$r r1r+�join)r"r-ZremainingBytes�rvZbufferIndexZbufferOffsetZbufferedDataZbytesToReadrrrr,hs$


zBufferedStream._readFromBufferN)�__name__�
__module__�__qualname__�__doc__r#r'r*r.r0r+r,rrrrr9s		rcKs�t|tj�s(t|tjj�r.t|jtj�r.d}n&t|d�rJt|jd�t	�}n
t|t	�}|r�dd�|D�}|rvt
d|��t|f|�St|f|�SdS)NFr.rcSsg|]}|jd�r|�qS)Z	_encoding)�endswith)r�xrrrr�sz#HTMLInputStream.<locals>.<listcomp>z3Cannot set an encoding with a unicode input, set %r)
�
isinstancerZHTTPResponserZresponseZaddbase�fp�hasattrr.r�	TypeError�HTMLUnicodeInputStream�HTMLBinaryInputStream)�source�kwargsZ	isUnicodeZ	encodingsrrr�HTMLInputStream�s

rDc@speZdZdZdZdd�Zdd�Zdd�Zd	d
�Zdd�Z	d
d�Z
ddd�Zdd�Zdd�Z
ddd�Zdd�ZdS)r@z�Provides a unicode stream of characters to the HTMLTokenizer.

    This class takes care of character encoding and removing or replacing
    incorrect byte-sequences and also provides column and line tracking.

    i(cCsZtjsd|_ntd�dkr$|j|_n|j|_dg|_td�df|_|j	|�|_
|j�dS)a�Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
        for use by html5lib.

        source can be either a file-object, local filename or a string.

        The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

        Nu􏿿r
rzutf-8�certain)r�supports_lone_surrogates�reportCharacterErrorsr$�characterErrorsUCS4�characterErrorsUCS2ZnewLines�lookupEncoding�charEncoding�
openStream�
dataStream�reset)r"rBrrrr#�s
zHTMLUnicodeInputStream.__init__cCs.d|_d|_d|_g|_d|_d|_d|_dS)N�r)r&�	chunkSize�chunkOffset�errors�prevNumLines�prevNumCols�_bufferedCharacter)r"rrrrN�szHTMLUnicodeInputStream.resetcCst|d�r|}nt|�}|S)zvProduces a file object from source.

        source can be either a file object, local filename or a string.

        r.)r>r)r"rBrrrrrL�s
z!HTMLUnicodeInputStream.openStreamcCsT|j}|jdd|�}|j|}|jdd|�}|dkr@|j|}n||d}||fS)N�
rr
r)r&�countrS�rfindrT)r"r(r&ZnLinesZpositionLineZlastLinePosZpositionColumnrrr�	_position�s
z HTMLUnicodeInputStream._positioncCs|j|j�\}}|d|fS)z:Returns (line, col) of the current position in the stream.r
)rYrQ)r"�line�colrrrr!�szHTMLUnicodeInputStream.positioncCs6|j|jkr|j�stS|j}|j|}|d|_|S)zo Read one character from the stream or queue if available. Return
            EOF when EOF is reached.
        r
)rQrP�	readChunkrr&)r"rQ�charrrrr]�s

zHTMLUnicodeInputStream.charNcCs�|dkr|j}|j|j�\|_|_d|_d|_d|_|jj|�}|j	rX|j	|}d|_	n|s`dSt
|�dkr�t|d�}|dks�d|ko�dknr�|d
|_	|dd�}|jr�|j|�|j
dd	�}|j
d
d	�}||_t
|�|_dS)NrOrFr
�
i�i��z
rV�
Trrr)�_defaultChunkSizerYrPrSrTr&rQrMr.rUr$�ordrG�replace)r"rPr2Zlastvrrrr\�s0
 


z HTMLUnicodeInputStream.readChunkcCs,x&tttj|���D]}|jjd�qWdS)Nzinvalid-codepoint)�ranger$�invalid_unicode_re�findallrRr1)r"r2�_rrrrH%sz*HTMLUnicodeInputStream.characterErrorsUCS4cCs�d}x�tj|�D]�}|rqt|j��}|j�}tj|||d��rttj|||d��}|tkrn|j	j
d�d}q|dkr�|dkr�|t|�dkr�|j	j
d�qd}|j	j
d�qWdS)NF�zinvalid-codepointTi�i��r
)rd�finditerra�group�startrZisSurrogatePairZsurrogatePairToCodepoint�non_bmp_invalid_codepointsrRr1r$)r"r2�skip�matchZ	codepointr%Zchar_valrrrrI)s z*HTMLUnicodeInputStream.characterErrorsUCS2Fc	Cs�yt||f}WnNtk
r^djdd�|D��}|s@d|}tjd|�}t||f<YnXg}x||j|j|j�}|dkr�|j|jkr�Pn0|j	�}||jkr�|j
|j|j|��||_P|j
|j|jd��|j�sfPqfWdj|�}|S)z� Returns a string of characters from the stream up to but not
        including any character in 'characters' or EOF. 'characters' must be
        a container that supports the 'in' method and iteration over its
        characters.
        rOcSsg|]}dt|��qS)z\x%02x)ra)r�crrrrNsz5HTMLUnicodeInputStream.charsUntil.<locals>.<listcomp>z^%sz[%s]+N)�charsUntilRegEx�KeyErrorr4�re�compilermr&rQrP�endr1r\)	r"Z
charactersZopposite�charsZregexr5�mrs�rrrr�
charsUntil@s. 

z!HTMLUnicodeInputStream.charsUntilcCs@|dk	r<|jdkr.||j|_|jd7_n|jd8_dS)Nrr
)rQr&rP)r"r]rrr�ungetos
zHTMLUnicodeInputStream.unget)N)F)r6r7r8r9r`r#rNrLrYr!r]r\rHrIrwrxrrrrr@�s 
&
/r@c@sLeZdZdZddd�Zdd�Zd	d
�Zddd�Zd
d�Zdd�Z	dd�Z
dS)rAz�Provides a unicode stream of characters to the HTMLTokenizer.

    This class takes care of character encoding and removing or replacing
    incorrect byte-sequences and also provides column and line tracking.

    N�windows-1252TcCs\|j|�|_tj||j�d|_d|_||_||_||_||_	||_
|j|�|_|j
�dS)a�Initialises the HTMLInputStream.

        HTMLInputStream(source, [encoding]) -> Normalized stream from source
        for use by html5lib.

        source can be either a file-object, local filename or a string.

        The optional encoding parameter must be a string that indicates
        the encoding.  If specified, that encoding will be used,
        regardless of any BOM or later declaration (such as in a meta
        element)

        i�dN)rL�	rawStreamr@r#�numBytesMeta�numBytesChardet�override_encoding�transport_encoding�same_origin_parent_encoding�likely_encoding�default_encoding�determineEncodingrKrN)r"rBr~rr�r�r�Z
useChardetrrrr#�szHTMLBinaryInputStream.__init__cCs&|jdjj|jd�|_tj|�dS)Nrrb)rKZ
codec_info�streamreaderr{rMr@rN)r"rrrrN�szHTMLBinaryInputStream.resetc	CsDt|d�r|}nt|�}y|j|j��Wnt|�}YnX|S)zvProduces a file object from source.

        source can be either a file object, local filename or a string.

        r.)r>rr*r'r)r"rBrrrrrL�s
z HTMLBinaryInputStream.openStreamcCs�|j�df}|ddk	r|St|j�df}|ddk	r:|St|j�df}|ddk	rX|S|j�df}|ddk	rt|St|j�df}|ddk	r�|djjd�r�|St|j�df}|ddk	r�|S|�rdyddl	m
}Wntk
r�YnxXg}|�}x6|j�s.|j
j|j�}|�sP|j|�|j|�q�W|j�t|jd�}|j
jd�|dk	�rd|dfSt|j�df}|ddk	�r�|Std�dfS)NrErZ	tentativezutf-16)�UniversalDetector�encodingzwindows-1252)�	detectBOMrJr~r�detectEncodingMetar��name�
startswithr�Zchardet.universaldetectorr��ImportError�doner{r.r}r1Zfeed�close�resultr*r�)r"ZchardetrKr�ZbuffersZdetectorr r�rrrr��sP


z'HTMLBinaryInputStream.determineEncodingcCs�t|�}|dkrdS|jdkr(td�}nT||jdkrH|jddf|_n4|jjd�|df|_|j�td|jd|f��dS)N�utf-16be�utf-16lezutf-8rrEzEncoding changed from %s to %s)r�r�)rJr�rKr{r*rNr)r"ZnewEncodingrrr�changeEncodings

z$HTMLBinaryInputStream.changeEncodingc
Cs�tjdtjdtjdtjdtjdi}|jjd�}|j|dd��}d}|sp|j|�}d}|sp|j|dd	��}d	}|r�|jj	|�t
|�S|jj	d
�dSdS)z�Attempts to detect at BOM at the start of the stream. If
        an encoding can be determined from the BOM return the name of the
        encoding otherwise return Nonezutf-8zutf-16lezutf-16bezutf-32lezutf-32be�N�rgr)�codecs�BOM_UTF8�BOM_UTF16_LE�BOM_UTF16_BE�BOM_UTF32_LE�BOM_UTF32_BEr{r.�getr*rJ)r"ZbomDict�stringr�r*rrrr�s"
zHTMLBinaryInputStream.detectBOMcCsH|jj|j�}t|�}|jjd�|j�}|dk	rD|jdkrDtd�}|S)z9Report the encoding declared by the meta element
        rN�utf-16be�utf-16lezutf-8)r�r�)r{r.r|�EncodingParserr*�getEncodingr�rJ)r"r �parserr�rrrr�9sz(HTMLBinaryInputStream.detectEncodingMeta)NNNNryT)T)r6r7r8r9r#rNrLr�r�r�r�rrrrrA�s
(
>"rAc@s�eZdZdZdd�Zdd�Zdd�Zdd	�Zd
d�Zdd
�Z	dd�Z
dd�Zeee
�Z
dd�Zee�Zefdd�Zdd�Zdd�Zdd�ZdS)�
EncodingBytesz�String-like object with an associated position and various extra methods
    If the position is ever greater than the string length then an exception is
    raisedcCstj||j��S)N)r-�__new__�lower)r"�valuerrrr�LszEncodingBytes.__new__cCs
d|_dS)Nr
r)rY)r"r�rrrr#PszEncodingBytes.__init__cCs|S)Nr)r"rrr�__iter__TszEncodingBytes.__iter__cCs>|jd}|_|t|�kr"t�n|dkr.t�|||d�S)Nr
r)rYr$�
StopIterationr?)r"�prrr�__next__WszEncodingBytes.__next__cCs|j�S)N)r�)r"rrr�next_szEncodingBytes.nextcCsB|j}|t|�krt�n|dkr$t�|d|_}|||d�S)Nrr
)rYr$r�r?)r"r�rrr�previouscszEncodingBytes.previouscCs|jt|�krt�||_dS)N)rYr$r�)r"r!rrr�setPositionlszEncodingBytes.setPositioncCs*|jt|�krt�|jdkr"|jSdSdS)Nr)rYr$r�)r"rrr�getPositionqs

zEncodingBytes.getPositioncCs||j|jd�S)Nr
)r!)r"rrr�getCurrentByte{szEncodingBytes.getCurrentBytecCsL|j}x:|t|�kr@|||d�}||kr6||_|S|d7}qW||_dS)zSkip past a list of charactersr
N)r!r$rY)r"rtr�rnrrrrl�szEncodingBytes.skipcCsL|j}x:|t|�kr@|||d�}||kr6||_|S|d7}qW||_dS)Nr
)r!r$rY)r"rtr�rnrrr�	skipUntil�szEncodingBytes.skipUntilcCs>|j}|||t|��}|j|�}|r:|jt|�7_|S)z�Look for a sequence of bytes at the start of a string. If the bytes
        are found return True and advance the position to the byte after the
        match. Otherwise return False and leave the position alone)r!r$r�)r"r-r�r2r5rrr�
matchBytes�s
zEncodingBytes.matchBytescCsR||jd�j|�}|dkrJ|jdkr,d|_|j|t|�d7_dSt�dS)z�Look for the next sequence of bytes matching a given sequence. If
        a match is found advance the position to the last byte of the matchNr
rTrr)r!�findrYr$r�)r"r-ZnewPositionrrr�jumpTo�s
zEncodingBytes.jumpToN)r6r7r8r9r�r#r�r�r�r�r�r��propertyr!r��currentByte�spaceCharactersBytesrlr�r�r�rrrrr�Hs 	
r�c@sXeZdZdZdd�Zdd�Zdd�Zdd	�Zd
d�Zdd
�Z	dd�Z
dd�Zdd�ZdS)r�z?Mini parser for detecting character encoding from meta elementscCst|�|_d|_dS)z3string - the data to work on for encoding detectionN)r�r2r�)r"r2rrrr#�s
zEncodingParser.__init__c
Cs�d|jfd|jfd|jfd|jfd|jfd|jff}x^|jD]T}d}xD|D]<\}}|jj|�rJy|�}PWqJtk
r�d}PYqJXqJW|s<Pq<W|jS)	Ns<!--s<metas</s<!s<?rTF)	�
handleComment�
handleMeta�handlePossibleEndTag�handleOther�handlePossibleStartTagr2r�r�r�)r"ZmethodDispatchrfZkeepParsing�key�methodrrrr��s&zEncodingParser.getEncodingcCs|jjd�S)zSkip over commentss-->)r2r�)r"rrrr��szEncodingParser.handleCommentcCs�|jjtkrdSd}d}x�|j�}|dkr.dS|ddkr^|ddk}|r�|dk	r�||_dSq|ddkr�|d}t|�}|dk	r�||_dSq|ddkrtt|d��}|j�}|dk	rt|�}|dk	r|r�||_dS|}qWdS)	NTFrs
http-equivr
scontent-typescharsetscontent)	r2r�r��getAttributer�rJ�ContentAttrParserr��parse)r"Z	hasPragmaZpendingEncoding�attrZtentativeEncoding�codecZ
contentParserrrrr��s:zEncodingParser.handleMetacCs
|jd�S)NF)�handlePossibleTag)r"rrrr��sz%EncodingParser.handlePossibleStartTagcCst|j�|jd�S)NT)r�r2r�)r"rrrr��s
z#EncodingParser.handlePossibleEndTagcCsf|j}|jtkr(|r$|j�|j�dS|jt�}|dkrD|j�n|j�}x|dk	r`|j�}qNWdS)NTr)r2r��asciiLettersBytesr�r�r��spacesAngleBracketsr�)r"ZendTagr2rnr�rrrr��s



z EncodingParser.handlePossibleTagcCs|jjd�S)Nr)r2r�)r"rrrr�szEncodingParser.handleOthercCs�|j}|jttdg�B�}|dkr&dSg}g}xt|dkr@|r@PnX|tkrT|j�}PnD|d	krjdj|�dfS|tkr�|j|j��n|dkr�dS|j|�t|�}q0W|dkr�|j	�dj|�dfSt|�|j�}|d
k�r:|}x�t|�}||k�rt|�dj|�dj|�fS|tk�r*|j|j��q�|j|�q�WnJ|dk�rRdj|�dfS|tk�rl|j|j��n|dk�rzdS|j|�x^t|�}|t
k�r�dj|�dj|�fS|tk�r�|j|j��n|dk�r�dS|j|��q�WdS)z_Return a name,value pair for the next attribute in the stream,
        if one is found, or None�/rN�=r3�'�")rN)r�r)r�r�)r2rlr��	frozensetr4�asciiUppercaseBytesr1r�r�r�r�)r"r2rnZattrNameZ	attrValueZ	quoteCharrrrr�sf










zEncodingParser.getAttributeN)
r6r7r8r9r#r�r�r�r�r�r�r�r�rrrrr��s$r�c@seZdZdd�Zdd�ZdS)r�cCs
||_dS)N)r2)r"r2rrrr#fszContentAttrParser.__init__cCsy�|jjd�|jjd7_|jj�|jjdks8dS|jjd7_|jj�|jjdkr�|jj}|jjd7_|jj}|jj|�r�|j||jj�SdSnF|jj}y|jjt�|j||jj�Stk
r�|j|d�SXWntk
�rdSXdS)Nscharsetr
r�r�r�)r�r�)r2r�r!rlr�r�r�r�)r"Z	quoteMarkZoldPositionrrrr�js.

zContentAttrParser.parseN)r6r7r8r#r�rrrrr�esr�cCs`t|t�r.y|jd�}Wntk
r,dSX|dk	rXy
tj|�Stk
rTdSXndSdS)z{Return the python codec name corresponding to an encoding or None if the
    string doesn't correspond to a valid encoding.rN)r<r�decode�UnicodeDecodeErrorr	�lookup�AttributeError)r�rrrrJ�s

rJr)2Z
__future__rrrZpip._vendor.sixrrZpip._vendor.six.movesrrr�rqZpip._vendorr	Z	constantsrrr
rrrOr�iorrr�r�r�r�r�r�Zinvalid_unicode_no_surrogaterFrr�evalrd�setrkZascii_punctuation_rero�objectrrDr@rAr-r�r�r�rJrrrr�<module>sV









JgIh6'
Back to Directory File Manager
<