Current File : //lib64/python3.8/html/__pycache__/parser.cpython-38.opt-2.pyc
U
e5d9E�@s�ddlZddlZddlZddlmZdgZe�d�Ze�d�Ze�d�Z e�d�Z
e�d�Ze�d �Ze�d
�Z
e�d�Ze�d�Ze�d
ej�Ze�d �Ze�d�ZGdd�dej�ZdS)�N)�unescape�
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z <[a-zA-Z]�>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF
<[a-zA-Z][^\t\n\r\f />\x00]* # tag name
(?:[\s/]* # optional whitespace before attribute name
(?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name
(?:\s*=+\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|"[^"]*" # LIT-enclosed value
|(?!['"])[^>\s]* # bare value
)
\s* # possibly followed by a space
)?(?:\s|/(?!>))*
)*
)?
\s* # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c@s�eZdZdZdd�dd�Zdd�Zdd �Zd
d�ZdZd
d�Z dd�Z
dd�Zdd�Zdd�Z
d8dd�Zdd�Zdd�Zdd�Zd d!�Zd"d#�Zd$d%�Zd&d'�Zd(d)�Zd*d+�Zd,d-�Zd.d/�Zd0d1�Zd2d3�Zd4d5�Zd6d7�ZdS)9r)ZscriptZstyleT)�convert_charrefscCs||_|��dS�N)r�reset)�selfr�r �#/usr/lib64/python3.8/html/parser.py�__init__WszHTMLParser.__init__cCs(d|_d|_t|_d|_tj�|�dS)N�z???)�rawdata�lasttag�interesting_normal�interesting�
cdata_elem�_markupbase�
ParserBaser�rr r r
r`s
zHTMLParser.resetcCs|j||_|�d�dS)Nr)r
�goahead�r�datar r r
�feedhszHTMLParser.feedcCs|�d�dS)N�)rrr r r
�closeqszHTMLParser.closeNcCs|jSr)�_HTMLParser__starttag_textrr r r
�get_starttag_textwszHTMLParser.get_starttag_textcCs$|��|_t�d|jtj�|_dS)Nz</\s*%s\s*>)�lowerr�re�compile�Ir)r�elemr r r
�set_cdata_mode{s
zHTMLParser.set_cdata_modecCst|_d|_dSr)rrrrr r r
�clear_cdata_modeszHTMLParser.clear_cdata_modecCsJ|j}d}t|�}||k�r�|jrv|jsv|�d|�}|dkr�|�dt||d��}|dkrpt�d�� ||�sp�q�|}n*|j
� ||�}|r�|��}n|jr��q�|}||kr�|jr�|js�|�t
|||���n|�|||��|�||�}||kr��q�|j}|d|��rJt�||��r"|�|�} n�|d|��r:|�|�} nn|d|��rR|�|�} nV|d|��rj|�|�} n>|d |��r�|�|�} n&|d
|k�r�|�d�|d
} n�q�| dk�r<|�s��q�|�d|d
�} | dk�r�|�d|d
�} | dk�r|d
} n| d
7} |j�r*|j�s*|�t
||| ���n|�||| ��|�|| �}q|d|��r�t�||�}|�r�|��d
d�}
|�|
�|��} |d| d
��s�| d
} |�|| �}qn<d||d�k�r�|�|||d
��|�||d
�}�q�q|d|�rt�||�}|�rN|�d
�}
|�|
�|��} |d| d
��s@| d
} |�|| �}qt�||�}|�r�|�r�|��||d�k�r�|��} | |k�r�|} |�||d
�}�q�n.|d
|k�r�|�d�|�||d
�}n�q�qq|�r8||k�r8|j�s8|j�r|j�s|�t
|||���n|�|||��|�||�}||d�|_dS)Nr�<�&�"z[\s;]z</�<!--z<?z<!rrz&#�����;)r
�lenrr�find�rfind�maxrr�searchr�start�handle_datarZ updatepos�
startswith�starttagopen�match�parse_starttag�parse_endtag�
parse_comment�parse_pi�parse_html_declaration�charref�group�handle_charref�end� entityref�handle_entityref�
incomplete)rr=r
�i�n�jZampposr4r2�k�namer r r
r�s�
�
zHTMLParser.goaheadcCs�|j}|||d�dkr$|�|�S|||d�dkrB|�|�S|||d���dkr�|�d|d�}|dkrvdS|�||d |��|d
S|�|�SdS)N�r'�z<![� z <!doctyperr)r(r)r
r7Zparse_marked_sectionrr,�handle_decl�parse_bogus_comment)rrAr
�gtposr r r
r9s
z!HTMLParser.parse_html_declarationrcCsD|j}|�d|d�}|dkr"dS|r<|�||d|��|dS)Nrr(r)r)r
r,�handle_comment)rrAZreportr
�posr r r
rJszHTMLParser.parse_bogus_commentcCsH|j}t�||d�}|sdS|��}|�||d|��|��}|S)Nr(r))r
�picloser/r0� handle_pir=)rrAr
r4rCr r r
r8!szHTMLParser.parse_picCs�d|_|�|�}|dkr|S|j}|||�|_g}t�||d�}|��}|�d���|_}||k�r t �||�}|s~�q |�ddd�\} }
}|
s�d}nZ|dd�dkr�|dd�ks�n|dd�dkr�|dd�kr�nn|dd�}|�rt
|�}|�| ��|f�|��}q`|||���}|dk�r�|�
�\}
}d |jk�rz|
|j�d �}
t|j�|j�d �}n|t|j�}|�|||��|S|�d
��r�|�||�n"|�||�||jk�r�|�|�|S)Nrrr(rG�'r)�")r�/>�
rR)r�check_for_whole_start_tagr
�tagfind_tolerantr4r=r;rr�attrfind_tolerantr�append�stripZgetpos�countr+r-r1�endswith�handle_startendtag�handle_starttag�CDATA_CONTENT_ELEMENTSr")rrA�endposr
�attrsr4rD�tag�m�attrname�restZ attrvaluer=�lineno�offsetr r r
r5-sZ
&
�
�
�
zHTMLParser.parse_starttagcCs�|j}t�||�}|r�|��}|||d�}|dkr>|dS|dkr~|�d|�rZ|dS|�d|�rjdS||krv|S|dS|dkr�dS|dkr�dS||kr�|S|dStd ��dS)
Nrr�/rRr(r)rz6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!)r
�locatestarttagend_tolerantr4r=r2�AssertionError)rrAr
rarC�nextr r r
rT`s.z$HTMLParser.check_for_whole_start_tagcCs|j}t�||d�}|sdS|��}t�||�}|s�|jdk rV|�|||��|St�||d�}|s�|||d�dkr�|dS|� |�S|�
d���}|�d|���}|�
|�|dS|�
d���}|jdk r�||jkr�|�|||��|S|�
|�|��|S)Nrr)r(rGz</>r)r
� endendtagr/r=�
endtagfindr4rr1rUrJr;rr,�
handle_endtagr#)rrAr
r4rKZ namematchZtagnamer!r r r
r6�s6
zHTMLParser.parse_endtagcCs|�||�|�|�dSr)r\rl�rr`r_r r r
r[�szHTMLParser.handle_startendtagcCsdSrr rmr r r
r\�szHTMLParser.handle_starttagcCsdSrr )rr`r r r
rl�szHTMLParser.handle_endtagcCsdSrr �rrEr r r
r<�szHTMLParser.handle_charrefcCsdSrr rnr r r
r?�szHTMLParser.handle_entityrefcCsdSrr rr r r
r1�szHTMLParser.handle_datacCsdSrr rr r r
rL�szHTMLParser.handle_commentcCsdSrr )rZdeclr r r
rI�szHTMLParser.handle_declcCsdSrr rr r r
rO�szHTMLParser.handle_picCsdSrr rr r r
�unknown_decl�szHTMLParser.unknown_declcCstjdtdd�t|�S)NzZThe unescape method is deprecated and will be removed in 3.5, use html.unescape() instead.r()�
stacklevel)�warnings�warn�DeprecationWarningr)r�sr r r
r�s
�zHTMLParser.unescape)r)�__name__�
__module__�__qualname__r]rrrrrrr"r#rr9rJr8r5rTr6r[r\rlr<r?r1rLrIrOrorr r r r
r?s6 z
3"()rrqrZhtmlr�__all__rrr@r>r:r3rNZcommentcloserUrV�VERBOSErgrjrkrrr r r r
�<module>s*
��