# encoding: utf-8 """Helper classes for tests.""" # Use of this source code is governed by the MIT license. __license__ = "MIT" import pickleDcopy functools@unitd from " iITestCase4bs4BeautifulSoup.element&( CharsetMetaAttributeValue, om4 Onten,cDoctyp9{Strainer>fTag ) tbuilderPHTMLPbrTreeB default_-/= (@ BAD_DOCUMENT = """A bare string /|PUBLIC /div> tagaQA Within a , markup like if (i < 2) { alert("Mtw,sl >"); } UwO psubsequ!8a!! @docu  starts n a bogus declara>a Ajains T~)Yf$! MThtml -//W3C//DTD X 1.0 Transial//EN">The d-R`invali@caus"extra whitespacetable>Paragraph block display bs, but onHT
,?

4">Multiple values for the same attribute.
<blockquote>

SpUis cut off by<aend ofG< d ^bare markup7\n doctypeH Mixed case_ s 4@foldGpo lower)A>Uour\u2603>Tag n;Unicode charactersABVaBhtml PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1=/ '-t:.dtd"> """ class SoupTest(unittest.Case): 7@property def default_builder(self):1mreturn&)OAAsoup6,, **kwargsH@"""Bb a Beautiful object fromA.}2 = Tc.pop('0', .)s"(#, E=2_fo T%1an Afrag5`into aE.WThe details depn&5er. ._{D_to_`assert cEqualsto_parse, compared_to=NonE3/ =Vobj =r(fE.if 4 = an0p(obj.de%$()C W)>ConnectednesA0ele Ensure that next_& and previous_element are properly set for all descendants of the given element.65""" earlier = NoneU6e i=^:&3if D self.assertEqual(e,-c.next_)R:, e.previous>Q = e Wdef linkage_validator( , el, _recursive_call=False)"""EnsureM throughoutmAdocuh_7 b3# D1 should have no  &orvsibling# It alsoG2n'tJ a k 1l.parent isa= els0!,\c-E"Bad\nNODE: {}\nPREV EXPECTED".format(QY, 2 O 0=W_; ,@NEXTv/\nSuidx = 0\child _last_Dlen(el.contents) - 1WA& Ae2# P 5<0 toir first r C3Tha 91if := 0 1not*."is61 42-Xf)el, / x/el T5 h%ild.previous_sibling is None,\ "Bad previous7\nNODE: {}\nPREV EXPECTED".format(Wmqchild, .d,@)) # If not the firstW, index should link as rto this% ZP elementDQmatchqBlast;6 orbubbled up descendant]Yelse:Sasser el.contents[idx - 1]? {} )BT.next1INEXT/,  '  0if P_)B0.-'isM#F.>2 ?CONTENTS?h ,u.parent .UIP]CBG5G#W & if isinstance( Tag) andM5J = self."age_validatorT>ruenO# A .have no L wsa0 0/,  AMarkReithe5@R0cur 9 8'if#1 last_child = descendant Yelse:last_chi: $P# If % , there are non next siblings9if idx ==>N_idx}bassertZ.K_K is None,\TA"Bad{0\nNODE: {}\nNEXT EXPECTED {}".format(OS ,8)F+= 1@M 7 if1nota  V"if( A^&elr0if Q_recursive_call andJ!Ntarget = elwhile Tru<3if 7 ?telement/, =5/W=,_8 Ybreak7%el!c62 s RA{ PX# =uv.parentP2# Wd>0 sohing to return 1# R;4OEthe i i%erEr5l class HTMLTreeBuilderSmokeTest(object): """A basic test of a treeb8's competence.41Any_(`, presm or future, should be able2pasQthesess. With invalid markup 's room for interpretation,rPdiffe parsers can handle it ly. But wo1theBo0 inu~AOmuch.E2"""1defs_empty__tags(self)"""Verify that @4F !5 E E (aka void ) X,d correctlyO@name[A'area', 'baser', 'col', 'embepd', 'hr1img@nput `keygen @linkmenuitem taApara`sourceAtrac0{wbr', a'spaceseframe'"%]: 4soup = self. I("")!pnew_tag$Coup.Z(name)CassertEqual(True, 1.is_empty_element=kdef test_pickle_and_un identity(`)0# P.ing a tree, then 9it, yields# M1calx@# to9 original.Ufoo(dumped = . s(@2, 2lHload' s(-&ed&.H' ed.__class__, BeautifulSoup:decode(),(.!) ~oDoctypeHandledq2, dQ_fragA"""A that a given2A str2s hR correctly."""}bPstr, uP_docutP_with(0K# Make sure a  object was createdMscontents[0] 4k5Qi@str(e)[:lenVstr)]I5thep associ aq# parse2B andLprest of.X-W 47.p.`w, 'foo'O L Generatea  = '' %Hy%Amark? + '\nvdL;0Uretur<`normal%s6""",, everyday HTML_s are uW"html*('7 PUBLIC "-//W3C//DTD X1.0 Transitional//EN"Co  e" %"".7p()dpublic_with_url(seclf): Hdoctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xL1= '-t:e.dtd"'self.assertDHandled(!) ,def test_system_(@O'foo SYSTEMexample.com/"'nnamespacedy # We can h6 a ; with a A ID.xsl:stylesheetRmlentOopublic0o# Test A? id!R`real_x#\umentP"""A (1doc() should come out more or less the same as it went in."""y0markup = b""" < xmlns="CB1999:",ead>Hello.</�0</h� <body>Goodbye.</�0 </f�>�!so�l� �(�) �eEqual(�Aoup.#e( ).replace(b"\n",F)),6�d�(����JWhen0XML^ is parsed#it!�be treat%�weird tagA�-ns1:foo>content</�< �`/><ns2$�/,p2, len(.find_all("g�6"))�-processing_instruc Z�] both Unicode and by�pring toify thath��## e�_correctly sets� �g_classE�`even w�K�is already�there is noB�@need�� anythingV�!= PITarget PIC?,`�!,  deE#()�+ �??�Lf8")) d ef test_deepcopy(self): �"""Make sure you can )� the tree builder. 5�PThis �mportant because5�0��!�part of a;�BeautifulSoup object, and we wK�to be able to�5at.C�5""" ���.�.default_�)�d  p_tag_is_never_empty_element�A <p> tag�3� designated as an D�-D��,�+qEven if�markup shows it>�E, it�shouldn'cp preseny�that ways\ = . �("<p/>")V�elf.assertFalse(&�Z.p.is2�Equal(str6�), "<p></pc�d�nunclosedubs_get_�j�f��r's not *�1 by=0endJ�@docu�J0uld/,�xpappliesH0alls exceptsK*��sP)>"�OtZ 0�b0�Ub></b'�5�1�r2�r W br[alway�i]5<brSome parsers0at L� </Q�`as one�/^�8, s/�!as���1twoP, but ��J�<����  brZr[�.���Z3Tru/br"!brbnested_formattingOs� @em><�/�"double_headU�html = '''<!DOCTYPE �4> <� ead> <title>Ordinary HEAD Rest</"�0 </h� > <script type="text/java�"> alert("Help!"); </� > <body> Hello, world! </�"</�5''' �#�~ ��yup.find('�@')['�,']N0com@ J2# C�s are rep$as�/=foo<!--foobar-->baz�QD�� "�������o���="foobar")  self.assertEqual(comment.__class__, C�0) 2���c# The +� is properly integrated �o the tree.<� foo = soup.find(text="foo")$��, foo.next_ele�4�?bazX��?bazX�baz.previous\��def test_preserved_whitespace_in�P_and_�Parea(�!):=��.E"""W1� must be N�� in <pre> and <F�u> tags,F� even if that would mean notL�ttifyingrfmarkupt5"""H�Bpre_ �B = "y�@ </�&��+��> woo\nwoo8��=���UASoupY&s(y�*�S�}�y�7��� �U� {�(*����(0(),4`� �����e�/� j��o�"$</8�> �*� w�L� nested_inlines�I"� yPs can@� indefinitely.pQb_tagkb>Inside a BO</b>5 >�n�_�U�rp>A <i>�p <b>tagY�</i></pa�9�Q�h�xdouble_�o�!a>�yy�a}�% block_level B'� �0 B!'<c��quote><p><b>FooCGp></�'��M���`$$�.p.b.string, 'Foo\� [7� 5�m�5correctly_tableAOne �Cgo inother one832('<8�id="1">'����O'<tr�"<td>Here's aj�N�:  G��� j�2j��U�f_td></{�'</table></td>')  �self.assertSoupEquals( ��ymarkup,�'<table id="1"><tr><td>Here\'s another %�/:';�2;�foo</td></L�/:�>:�!�%) ]��"�u><thead�F|��"Y��u"<tbody3�9Bar�� 3�Gfoot3�z3���"� def test_multivalued_attribute_with_whitespace(�%):�5# W� separating the L�qs of a ]�-�%d ^�G�should be ignored.h = '<div class=" foo bar "></apsoup = �. �(A�)o��(['foo', 'bar'], 6�a.div['m�#']-�# If you search by�literal name��=� it's like(�L�# wasn't �5re.� ���find('div',3_="&")�P�ddeeply_nested�html5lib can se���s�tag many times}�7as it rearranges� tree. This has caused problems C�G Fs2*�1>�!P="css/bdiv></�*��f?W�8<�i0on_f|uses a different API to9<\�_> tagm c@�m�^�b0a bb��+"a", "b"X�R� Uangle_brackets_in[�s_are_escapedfj'<a b="<a>� a>!�<a>��strings_resembling_character_entity_re<ces�# "&T" and "&p" lookAincomplete P� P�qies, buWy are6not#�.��"<p>• AT&T is ins&p 500</p>"�4�`\u20224�amp;T is in the s&a�mp;p 500</p>" �!) �def test_entities_in_foreign_documen�coding(self)::��H�# “ and �8; are invalid numeric ]� referencingE�Windows-1252 characters. -5�Fes a�w commonG�9to J���Unicode,�J9731Q�@�z�� only found in D�.+� � All of these�should be converted toJ�?�t�s_�markup = "<p>aPHello �"8;!&#� !so7��. �(J�)p��assertEqual("“\�” -☃", :�.p.string<�E�/attributes_�R_to_u| 5,expect = '<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata">�'��~�ASoup�&s(V�fñ9�", {��B�/xfB�-XB�-ontilde� Otext>�%pi�3�"<�~<�"x<�!r >� p0quoJtityk��`ation_ �u��I said &@�;good day!�����C'<p>E�"@�";�'out_of_rangey�.� \N{REPLACEMENT CHARACTER}P�H �;?&#x<�(;�s�H9multipart_s p"Mostly�prevent a recurePa bug.the html5lib treebu'r.,  "<2�><h2>\nfoo</h2><p!</�"4("p"h2g.next_element.name) � self.assertEqual("p", soup.p+�Connectedness(.�!) S� def test_empty_element_tags(k�!):'��}�"""Verify consistent handling of C�-C� C�,=�no matter how theF�me in through� markup.7�5""" ����ASoup�s('<br/>�F', "� �"> D� E��G� �;@head2_between_�and_bodyG@Prev;recurrence=Ra buge html5lib treebuilder. 1con}`= """<-� ><t�1></� <link></�����a>foo</ �1 </8� baoup = i�$(i�@#NoCNone�S�.`�2�multiple_copies_of_aZDN!DOCTYPEz"> T0 <�><article id="a" �div><a href="1"></�0�afooter'�% $�2$�a�&</#�#</_���6p.g� `basic_i_spaceParsers don't need to *understand* @�, but at��? very leas�y should not choke onC� or lose?�Pdata.�-Q = b' xmlns="http://www.w3.org/1999/x"%�:mathml,�a8/Math�$ML2�?svg/�2000/svg":<g�:msqrt>4</�<b svg:fill="red#b>'�� �j(!�encode()0���Q��;� �j� c'R 'a��C�!['&']f�Mg��Z� `��g�*_�0svg\��valued_attribute_�_becomes_lista class="foo bar">f.soup(markuCp) �#self.assertEqual(['foo', 'bar'], soup.a['class']) ;�#�% Generally speaking, tests below this point are more �#ofC�Beautiful Soup than#� the tree builders. But parsersT�F�Pweird�p we run:�sl�separately for everyW�E� to detect any difference�AtweeV�"m..� 1def�Q_can_�_unicode_document(d!):6��v # A seemingly innocuous 0�... but it's in UP�W! AndE�it contains charact that can't be representedK�7theB�encoding foun �` decl/ion! The horror!:� markup = '<html><head><meta T�="euc-jp"></�%body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</4�'y��0 = P. �(�'q�2xe9R�'"0up.X�s.string�X�strainerC"""Pz`shouldcable to work withSC�Vs."""��) =%�G("b"����"A <b>bold</b> i/> <i>statede</i>",h��g �Tonly=�q� �0.deW(), "~�"0�e_quote_attribute_values_becom 3ubl%�sV�s("<foo Q� ='o></foo� �'8�@"bar�8�'��Z� ��r_nested�s_are_left_alone�text = ""� "brawls" happen'>a� t��S��!doR �2get�d�; �o" "Pfoo['T ']B at "Bob\'s Bar" 6foo � ~"w�&;|��" ompersand_in_gets_escapec!'<ais="re)messed up & stuff�%�/>'mM� Lamp;Q�� self.assertSoupEquals( �"'<a href="http://example.org?a=1&b=2;3">foo</a>',>�Kamp;B�!) C�4def test_escaped_ampersand_in_attribute_value_is_left_alone(self): ���.assertS���� �pntities�strings_converted_du�_parsing�# Both XML and HTML eP�U are I� to Unicode characters#��"# i� i�.��text = "<p><� sacré bleu!>�U</p>"?�expectedC��\N{LATIN SMALL LETTER E WITH ACUTE} Z���$, v� lsmart_quoteeon_the_way_indMicrosoft @� @�`V#`M�@ = bb\x91Foo\x92Gqsoup = �� �(7�)Y�� ~�2�2.p.^ [" EFT SINGLE QUOTATION MARK}�oN{RIGH"�"dnon_breaking_spack�� �"<a> ��A"��A�$.a� �NO-BREAK SPACE}" * 2�3x<out�f�f0.enx("utf-8" 8��A pO�Qreal_iso_latin_documen0q# Smoke of interrelafunctionality, u5 an# easy-to-understj�.Here it is in. Note that�`claims be&�PISO-L�&-1u*_html = '< �><head><metaY`tent="Q/�;XGset=S� " -equiv="Con5�-type"/></Q�body><p>Sacr� "</;��(>'  That's because we're going�/�Winto �,I7use;!to$html = unicode_html.encode("iso-8859-1") �# Parse the ISO-Latin-1 HTML.&�soup = self. �`(iso_l+�d�))�1# Eo� it to UTF-8G�`resultI�5oup�Zutf-8�What do we expect�?�to look like? Well,o�Wwould�$�@ uni�p, excepQ�a�e META tag A�G sayE�� instead of$��Ied =l�.replace("ZO", "��@And,`�fcourse�b be inIr, not U�~��\ eTa-da!�assertEqual(f�(�y#ed-def test_real_shift_jis_document(I�%):X�q# Smoke4�� make surkpu�r can handle a J�8 inF��j�Q-JIS ing, without choking ��E = (J��0b'<�><head></�body><pre>'.�\x82\xb1�"ea�4cdS��5\x83R\x81[ �fH�� 3B�@\x93�O9�b �42\xZ�"bd!�fa\x96{\x8cK�-� ccJ�t�@�C�B\x8b\�c���7�BH� </�"</�/'3����)�0.deP!("oV-jis":���2�+K�L�># M tree is correctly edCuvarious#�Gings�y��� ,Z$e(�M� euc_jp")N��� Aohebrew> A io-worldE�!we=qconvertG9 (a!Hg�()G1 �; = 'ctitle>J�Q(ISO j� 8)Visual Diionality</4� b/h1E�h1>\xed\xe5\xec\xf9�� v��l �q, from_@ing=�b# Somebuilders calll2iso1�[, oth�f9b# That'sa difference�ly care about8� �.original��}%('�', 'iso-8859-8')  �self.assertEqual( ��soup.encode('utf-8'),"�hebrew_document.de-�"iso8859-8")A�"A�A")) B�def test_meta_tag_reflects_current_z�@ing(�!):7���# Here's the <@�> tag saying that a �7 is:�T�ed in Shift-JIS. ��L���Q = ('Z� content="text/html; charset=x-sjis" 'A����'http-equiv="CG�-type"/>'�,���ncorporat������s�P_jis_��� � '<�><head>\n%s\n���language"� ja� b�/]�e<body>P markup goes here.') % ��]C��_0 = . �(� #PrQ, and� is seemingly unaffected1pE�dr�x��`find('�V', {' H': '5'})�7 = R�#['�%'])��_'F$',k �2ButPvalue�actually a �MetaAttributeV(�fobject��True(isinstance(�,N�And it will take on a �{S its !# ?ing�&Futf8$U*8"�1Forqrest of �`story, TestSubstitutions in�tree.py.�Pq5_styleE�Did="j#" "$=" qq q�q �:harset) ) # But that value is actually a CharsetMetaAttributeV(�object. E��� self.assertTrue(isinstance(cE�,N�5)) I�# And it will take on a��reflects its current@�encoding�Equal('utf8', �.3�0e("�"�def test_tag_with_no_as_can_have�`added( %):�pdata = �.soup("<a>text</a>")(�.a['foo'] = 'bar'�?�b�<a foo="bar"P�0', p�.a.decode(�worst_case�`"""Tes!e &� &�# (d ly) for linking issues."""�� �BAD_DOCUMENT���H�age_validator(>�) class XMLTreeBuilderSmoke�(|"):v�pickle_and_un �identity�0# P.���a tree, then 9���it, yields #� M�5cal@# to9� originalt�U� �l<b>fooumped = �p.dumps(@�6, 23Hload'��� s(-�&ed&�Y&�'�Qed.__T__, BeautifulSoup:�G,(Vdocstring_generat� Xroot/ �lf4oup ), b'<?xml version="1.0" @ing=-8"?>\n<i�'i4�xml_declaration'markup = b"""l�k�Pfoo/>���� �W�{!� , :����-8"pprocessy�r?ruc�6?PITarget PIContent?�Wreal_xht|ocumentP"""A (� XHTML d(� should come out *exactly*`same a? went in���}a <!DOCTYPE � PUBLIC "-//W3C//DTD�1.0 Transial//EN"> <7�xmlns="http://www.w3.org/1999/ ,�ead><title>Hello.</�0</h� <body>Goodbye.</� </html>"""  soup = self. �(markup) ����assertEqual(��oup.encode("utf-8"), D�+� def test_nested_namespaces(\�!):&��M�doc = b"""<?xml version="1.0" f�Cing=h�H?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/ �.dtd"> <parent xmlns=>�@ns1/�]child�22/"�G:ns34�34�Rgrand9��ns3:attr="value6�2�4/"/> <//�� ��E>"""5� ?docAdoc,)-�T� formatter_processes_script_tag_for_xml_document�<9� type="text/java�r"> </ �"> 2��BeautifulSoup(�"lxml-xml"� # �( would have stripped this while parsing, but we can add��v�# it later.��e.�.T�ng = 'console.log("< < hey > > ");'<� ed-��.aTrue(b"< ��Q� &g�rgt;" inAed) `�@can_�Pe_uni e��e .euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</4��?��e'q�2xe9R�'�X� vpopping_d�3rss dc="foo"><dc:creator>b</��� date>2012-07-02T20:33:42Z*���'�rights>c� �<image>d</ �o</rss>!)���@str(dO.rss2doc_includes_correct_<ing.�T� "'/>u  latin1"),�/b' 6�C?>\n~�'Ylarge �Q"""A '�T XML 1 sh-come out the same asw8in.A(� ����+ b'0' * (2**12����#�S � �!��nX�2�.encode("utf-88"), markup) def test_tags_are_empty_element_if_and_only_if_they_&�q(self):I��M���.assertSoupEquals("<p>", "<p/>")-�`foo</p,�-��namespace�preserved�� = '<root xmlns:a="http://example.com/"�b�net/"><a:foo>This tag is in the a �"</&�/<b-�b-�&� </�!>'�>aoup = ERsoup(�2�0��!�@oup. �:�3elfxt(� , 5�#['&']Z�t�A�  A�bA��closing_dWp~%dcpurl.org/dc/Ps/1.1dc:date>20010504</��0J �@str(t(.p"���attributes�3foo�5xsi�awww.w3���/XMLSchema-instance"><bar xsi:s�LocationD�P"/></2?fooF_xml:lang="fr">bar�Tfind_by_prefixed� �doc = """<?xml versw1.0" encodi�utf-8"?> <Docu&Q/ns0"%v?ns1'�1'�2'�2'�<ns1:tag>, �i��^ �26� key="valueQz</ns�D> </�B> ""m����|�j� �R(doc)C��# There are three <��3!s.��&� wb3, lenzall('tag'))^�But two of|mh�� and one�3is �z�2z�S�~����� �1;�B ;��D� ,' Q� �A$['��, 'ns2:tag'])))# � �def test_copy_tag_preserves_namespace(self):1��=�xml = """<? �Tversion="1.0" encoding="UTF-8" standalone="yes"?> <w:document xmlns:w="http://example.com/ns0"/>"""{����psoup = �. �U(xml)�0tag�Doup.e��duplicate = �.�e(tag) $�# The two tags have the same  prefix.7��assertEqualV�.%�%, x��o�~0wor-aslq"""Test�&� case (currently) for linking issues."""��#�2elf/BAD_DOCUMENT8��H�age_validator(>�) class HTML5TreeBuilderSmoke�(��81""",�n�a tree bI� that supporth��)real_xhtml_ # Since X�r is not�,� parsers are��ed to handle#E�k�s in anyB�ticular wayBpassy���J_JUAmarkU"<a>"�� (� x"Iwww.w3.org/1999/60", R�5.a. 0svg s�'<svg><circle/></�'���T�@2000S���B��8svg��3��8� �;� omathml�5�><msqrt>5</ �!</�'$''1998/Math�'MLZ�/ ����,��0�:�xeclaration_becomes_com O = 'W utf-8"?><Z1></�� X�1True(isin��5contents[0], C�)?�P 5�' �'S�"�[�.next_ele@y0) pskipIf(condil, reason}def nothing(test, *args, **kw �B): �return None �def decorator(test_item1�if conditionF��L�J�pnothingL���Oelse'�Z�v�<�y� """Diagnostic funcp�s, mainly for use when doing tech support.""" # Use of this source code is governed by the MIT license. __license__ = "MIT" import cProfile from io i�StringIO�html.parser!�QHTMLP�K�2bs4.�4bs4&� BeautifulSoup, __version__+�f.buildY��_registry `�$os �Upstat�drandom�Atemp�i�Etime�raceback�%syF�  5ce(data/ Qsuite`isolat�common problems�7hprint(@runn2�5on # $h %s" %))}C�sPython F-�Csys.�)r basic_gs = ["1", �@5lib �alxml"]f��zname inD��#�&�s2��5if X�2�.feature)��-�Qbreak��� t �.remove(_�+�5���3(I�����"I noticed that %s is� installed. I ���it may help." %P�z�y@if 'P' .append("6�-�6try6���f�)�_Yetree��q"Found +�,".".join(map(str,C��.LXML_VERSION))�except ILError as  t� i�� \� or couldn't be�bed.") �E��I <�"� I�. 5\� l �if hasattr, 'read'�"��0 = �.�(�Belif�startswith("http:")w�1� �7s:"[�0('"looks like a URL.an HTTP client.' %����U�p"You neosome other library to get the docume nt behind the URL, and fee�at document to Beautiful Soup.") �areturn�Qelse: ���6try ���if os.path.exists(data)%��)�print('"%s" looks like a filename. Reading B�Q from�� �A.' %���T���with openy�n as fp��+�X�= fp.read(L�except ValueError@�# This can happen on some platforms when�'���E' is8��# too long. Assume it's�8not =�6pasN�L!) "�for parser in basic_�s�2�@"Trya"to2� your markup I�%�r �success = Falsew��� fsoup =Y�X, features=�]��I�a�6Tru`�E0ion�:��%s could Db��>." ��=�traceback.G�G_exc3if �H�6�,�p("Here'at %s didC�yx�:x�B��-.prettify()b�-" * 80) def lxml_�RPhtml=0, **kwargsx@"""PoA out�<� events that occur dur�0Bing.P@lets see how~�6s aFnK� code is runn^���1""""��import etre��Q, ele"in �P.iter�@e(St�!IO 1), �� �w�q("%s, %4�s(h�E.tagu�`.text)class AnnouncingPmR(HTML �v%�6es �6~,$� do{Panyth ��g.�_p(self, �s,�'��handle_starttag4��h, attr>��P�0._p` START�+�P�:endN����G�?ENDE��q�� �C�@DATAD�#��scharrefG���pCHARREFJ�&� J�oentityL�oENTITYN� 0com���QOMMENk&� �OdeclG�@DECL� D��unknown_decl(self, data): � self._p("%s UNKNOWN-DECL" %+� +�def handle_pi(5�J� 'PI@�<�tmlparser_trace(�"""Print out the HTMLP-� events that occur during M�Bing.�This lets you see howH��/�es a document when no� Beautiful Soup code is runnd��,1""",�J�r = Announcing�'() �R.feed� _vowels = "aeiou" _consona� = "bcdfghjklmnpqrstvwxyz" :rword(length=5"Generate a random '�-like st".��v�!''�for i in2�#geS�Q���if i % 2 == 0"��Gt = �O���Oelse*���&�@s +=u�dom.choice(t-return s�tsentenc�#=4���+���U�" ".join(B'.�int(4,9)) list(ran)���doc(num_eleAds=1000�1""REly gn invalid{.Ktag_name['p', 'div�@span�i�b�`script �qtable']^�B = [������� 60,3�4if (� # New tag.��T����Z�B�(�s_��0��.append("<%s>;0�/�)el�1w I�r �5domG1,4) V�2V�`# Close�@/�<html>" + "\n�)�"</"�;benchmark_sVery basic head-to-� performance S�p("Comparative&er,�K on N %s__version__��' W�k�d a largeR (%d bytes).m�2len|M���["lxml", �"OA"], �S5lib" �.7�&"]`succesQFalsea��6try%���a = time.�"����sl=/�|�,W��+���bK� �6Tru�except Exception as e: �print("%s could not parse the markup." %�)r)=�traceback.G�V_exc("�if successs�bBS4+%sX�dn� in %.2fsw�(�r, b-a)) �from lxml import etree�a = time.���.HTML(data�b)���R("Raw^�{����html5lib�?�Dr = ��P]r��7�.��d�P� def profile(num_elements=10�,H�0r="h3"):zilehandle?0emp@�.NamedTe@aryFS�e��0nam-�� �:�.�O��y = rdoc�>�vars = dict(bs4=bs4, 3�=��)"cP� .runctx('bs4.BeautifulSoupR<�P)' , g������i@stat�p � .S�($��G"# �strip_dirs�Aort_ �("cumulative"��; �4('_|bs4', 50) if __�__ == '__main__'Xdiagnose(sys.stdin.read()) # -*- coding: utf-8�E """  bonus library: Unicode, Dammit Thi� converts a bytestream to7� through any means ne  ary. It is heavily based on�!e Mark Pilgrim'sU�versal Feed AG�works bestA�XML and , but it doesarewrit *�!or)���reflect a new en0; that's�a� builder's job.E # Use of t bsource�is governed byE��MIT license. __ �� "MIT" G�{1cs �.entities�!�`point27�$re �tlogging��png # I����pautodetcharacter0s. �pdet_typNone try}# First tr�`fast C�Qaation.# PyPI package: cZ������!_d2(s)x��|ereturnG�.�T(s)[''] except�RErrorB���K�p# Fall ��pure Python��6�v Debian�p1�-�*� �ehardet!�� � ��:� ing']  #import chardet.constants �#�._debug = 1&�except IJ�aError:��A�D# No]� available. �4def�_dammit(s)?��C�return None # AE� from http://cjkpython.i18n.org/. tryK��iconv_codec �pass xml_encoding_re = re.compile(�'^<\\?.*&�=[\'"](.*?) �.*\\?>'. �@e(),B�I) html_metaW�P<\\s* �P[^>]+|0set�=�p["\']?(��i� /;\'">]g� class EntitySubstitution(object): �5"""��e XML or HTML e;�ies for the corresponacters."""M��_populate_�P_vari's(lookup = {}�overse_��m�B_for:%[];����point, name in list(�2�.items()��|�`�f = chr5�)d��'�5if O�b != 34_���# There's no ����1ingHPquota mark intof��J�0# &'�&;, unless it happens within an attribute value, whichM��is handled elsew��q���sf.r�%d(� �4�["�1] = �# But we do want to �� �(re [�m�z�r�re_definid= "[%s]" % "".join���l��t��� f�A�(CHARACTER_TO_E_ENTITY,R ���)�,����9�_RE) = Ty @�3XMLk��aa� "'": "apos"v� '"': "k�!"&*�(mp�<�(lt�>�g�}� BARE_AMPERSAND_OR_BRACKETH"([<>]|"��Q� "&(?!#\\d+;|#x[0-9a-fA-F]+;|\\w+;)O�2)")��%&]2�@amethod�sl!e_G�y(cls, matchobj~��w9ls.T.get(=�.group(0)u�0P"&%s;U�� �def _substitute_xml_entity(cls, matchobj): �$"""Used with a regular expression to substitute the<�appropriate XML k�q for an�special character."""@�0�= cls.CHARACTER_TO_XML_ENTITY[�.group(0)]@��return "&%s;" %� �@classmethod�def quoted_attrib!value(self, � bMake a�s into aD��H�, possibly escaping it.�iMost strings will beK� us0�Sdoubl�s;�� Bob's Bar -> "�"_�CIf a_� contains R�O, ity�����pWelcome "my bar"�'��'� Sboth k�<and�` ��c@ed, ?�4the� W�` �^��"�&=; �2bar�e��)�!e_QU= '"'%� if�2 inR�oif "'"��!�6# T�Os��9���#mPs. T4�7 7��cies. W� rather thanC���l�s becausM name is?��"kS" wheg�is is HTML orv. If w�����d� �, we'd hav_decid@�between 't &s�-;.�ereplac� (�k" =.6�c('"', �?)9�Jelse3�There aredbut nok��.es�#@can  !�6�<C�%�� =� �+�" +5�1x�<�1ke_Z�j=FalseS1ies��}b:param�4: Aj to�!d.Pless-H sig DQ<,Svgreater2�(�Egt;,K yany ampersandH/�amp;. If you want ampersands �that appear to be part of an entity definition#�GleftD�alone, use substitute_xml_containing_L��ies() instead. D�:param make_quoted_attribute: If True, then�� string will beG� <�, as befits�J�u value./�5""" �# Escape angle bracke@�d9<�K� = cls.AMPERSAND_OR_BRACKET.sub(^��o�*�_ "y,�)/if  R���6_�(�)6�breturno�n�@classmethod'�?def,���,�=False):2�E"""S'B XMLh�ies for special�characters-�4: A ad. The less-than sign3� become <_ greater2�(�g(�wand anyG�[ Qore notLJ�B��Namp;�!, S�%n'��#A_BARE_Fɺhtml(cls, s Replace cer) Unicode  with named HT-& This differs from data.enL�(�ing, 'xmlZ�Brefry�'i[!at`pgoal is�{�result more readable (to thos�ASCII displays) rather$Cqrecover�%aerrors�re's absolutelyKh!roth a UTF-8E�qg a LATIN SMALL LETTER E WITH ACUTE, but �\��G�x~"é"@, i fto som _peoplor�CHARACTER_TO__ENTITY_REE�Y[y\1  EingDetector: : """Suggests a number of possible encodings for a bytestring. Ord3�recedence: �E1. E>�you specifically tell �Detector to try firstE�(the override_�argument2�he construE�)�U2. An� declared within Y�� itself, either in an�3XML<�ation (if<�s� be interpreted as an XMLF�1doc�B), o^� <meta> tagT� >�X� HT�U�".)r3��Ved through textual analysis by chardet,h�c��� a similar external librarya4. UTF-8�5. Windows-1252.R�1"""�def __init__(jmarkup,�=None, is_html=False���Xexclu8�!):q��&��h�.=�=t����%[];� \�> = �7or 4�o�%�= set([x.lower()Jxb ,�%])�M�0 =  %�'$ =1 � E�'�0# Fp$ obusiness: p- ��c����xsniffedo��Ptrip_r_I�_6�(�#up%cusable$w, tried&ifais not��?����(� M�!n �  [��7�qreturn P����o��C��T.add(��c���g�ETruef��{��@property��KR�sM<"""Yield.� that might workRthis .��� ��,�J "if(�_e�k���y�e��@# Di$e bq origineQstart'C��`indica5its\?'� �% ��1��ALook��/h��OL��#�d_encoding is None: � self.declared_encoding = �]find_�(B��M�-�rmarkup,:�is_html)+�"if�_usable(3� [�, tried)�Ryield?� 2� b�*# Use third-party character set detection to guess at theB�.���L�6det#iO +�=��>�bdammit�0" L�! 1� As a last-ditch effortS� y utf-8 and windows-1252for e in ('+�H', '*�'8eg��>e�@classmethoddef strip_byte_order_<(cls, data{�"""If a ,�-,� ,� is present,N�1 it�`return ��implies."""��� �=I�if isinstance(�d���>�# Unicode� cannot have�w�:��Y��~�A(lenx�q) >= 4)��[:2] == b'\xfe\xff') \<��^�' /�2:4] != '\x00� �1 = Y16be'P���! =�E[2:]�_elif �f���,l���5[:3@ef\xbb\xbf' 83e�4e�v��i� /32} 4l� fg���� /32= l�e��TO`=Falsecarch_entire_document�w"""Given a $�s�x�T its T.�eAn XML��H/�>p beginn6!ofHl� G�/HTH�i� <meta> tag, hopefully nearT�>��m�����if search_ent�ire_document: ��xml_endpos = ht�len(markup)3�OelseA�E1024,��f� R�max(2048, int(`� * 0.05)) >� declared_encoding = None!�_match = �6�_re.search�", �=&����}if not |�[�pand is_� 1�SOmeta��O� �U�!is�� � / =�.groups()[0].decode(Z���'ascii', 'replace'� �nreturn�v.lower(K�)��� class Uni�SDammiR"""A �for detectEthe $of a *ML A and�aconver2�tit to af� string. If M�WsourcT�!isA�windows-1252, can  MS smart quotes withJ�ir HTML or XMLE��equivalents."""# This dictionary maps commonly seen values �"charset" inb�[� # gP tags���correspon@pPython Yc names. It c�Rcover�#k�that aren'ti�?�'s aliasesZ�Q't beRrminen@# by heuristicsD�Qfind_�.�CHARSET_ALIASES = {"macintosh": �-roman",1�� �p"x-sjis.�`shift- �}Z ENCODINGS_WITH_SMART_QUOTl�[S�"k�"iso-8859-1�2�]s�def __init__(self, E , dErideOs=[]��M_M@_to=,y=False, excluL�)�z�.I�" =[�/�7trip�*[]"�contains_�P_Pacter2�� 5�2$ =� � lo@logggetLogger(__&__)�cor = ED *orvLF�# Short-circuit iadata i�oQbegin�if isinstance@str)�f == ''�8���>4!�u$_)�3stre�3 sel f.original_encoding = None �ureturn �e# The 2�detector may have stripped a byte-orde�7rk.E�gUse th.� markup from this point on6�Sself.(�!= ����u�6for�+in?��%s:��m� �"���p��_convert_�(X�)W�if u is notq u��S�Vbreak�!if,�u)�#=�1 ofJk�s worked. As an absolute last resort,�P# tryD�m again with character replacement.���U"�9�&if!= "ascii"��(���[ 3, "�"f�<�j�&�log.warning(�"��V"Someqs couldbe decoded, and were "N� �dvREPLACEMENT CHARACTER.C�����contains_^��_zs = Tru�<��� {0# I}"neh#at_C, we�(atforce it to�# ASCII, bu,�E��4 destroy so much data that IS�'nkB�0giv up:kbetter0uni|_u?�A+=�def _sub_ms@(4�, match) """Changes a MS smart quot*0 toXML or HTMLc��upentity,�"an-b6.""��" =�.group(1 "if�_�s_to == 'd'��x�Csub K0MS_qQS_TO_P.get(o�!).\&e(h�OelseJ�A���t�if type(sub)�KtuplM�'��l.�;xml��2����U'&#x'� + sub[1]�&';!� ��F������`�=�^�0^�Z�V�5sub���� � d�&, proposyerrors="7ct"proposed = se lf.find_codec(proposed) �tif not �E or &�, errors) in self.tried_encodings:G��N�return None�6�.append(h��markup =w��U�"# Convert smart quotes to HTML if coming from an �B�that might have them. �Aif (�a�_a�a_to is/ ���6andD. ENCODINGS_WITH_SMART_QUOTES); g�re = b"([\x80-\x9f])"��n� /�compiled = re.�(#�&reZ�@�P�^�@�;�Rd.sub�_sub_ms_char, 6�%) �:try�#print "TryCto cdocumentB%s (Y=%s)" % (S���#� #�u��p_to_uni(�,;�+�I = uW��original% =S�.�except E�ion as e �jdidn't work!43��K)�"Correct@: %sE2�E���Hdef 8�!�u, data,U�<="strict"'''Given a �0ng 7its>� desW%�binto Uj%�[Q�recognized byW�s.aliases'''�?tr(�@ Aerty:��declared_htm�!�%1.is,� g �5�detector.p���4� , r7set�@valu�.�-�.CHARSET_ALIASES.get(@�I� ! &�K�W��.replace("-", ""))F�0_G��olower(w�#��3��1if 5�.`�E���(�!PU�!����P��R����:c =+ N&�s.lookup�I� a� except (LookupError, Value �G): �Epass�return codec �0# A partial mapping of ISO-Latin-1 to HTML entities/XML numeric�.N�MS_CHARS = {b'\x80': ('euro', '20AC'),+�� +�1': ' '�2I�CsbquJ�/1AJ�3,�@fnofu�?192*�4*�&bdV�E,�5,�`hellipX�O2026-�6-�cdagger-�0-�7-�D-�1-�8-�AcircX�C�9*�apermil,�/03�A-�`Scaron-�/16,�B,�5lsa7/39�C-�QOEligX�5D?EI�Q#x17DI�/7Dt�FI� 9'g�9E('ls�/18h�9r,�9lk 9r,��93bul2`9PndashBO2013�m,�4,�QtildeX�D�3tra+�1�as!9b,�r !9c-�%oedJ eI��E�/17>!9f+�1YumI'),}0ochASCII. Contain@# hos like stridiacritic0rksá into a, but also3# cZ�Y non-X��9��2“=�"A_TO_��N��0' : 'EUR�1��2�,.�3�f�4�,/�5�<...1�6�+�7� �8�^/�9�%�a�S�b�<�c�,OE�d�e' : 'Z',  ��b'\x8f' : '?', �����/90�1�;"'".�2�3�,'"\�4�5�*.�6�-�7� �8�~/�9�L(TM)�a�s�b�>�c�,oe�d� Ge�z.�uY�au �u,'!�u,'c�u<GBP�w#$', #This approximation is especially parochial--t,�eis the#�'� #generic currency symbol.1��Ta<YEN�|�S�,..� �<(tha,<<2� o -(R_� bou�b+/�2/�3�( 'acute')bu9� 3b b,E�1�b>eb<1/4K�-1/32� cA`�c � � �'�z �z,AE�{C�{ .�{ �{ �x �wIs�u �s �q �dqD\�dq dqO.�q �q �q �q �p dp .�pU�p �p �p �p dpbs�pB�epa�'\xe1' : 'a'3, �b'\xe2' : 'a'�3�4�5�6�e]�7�c�8� .�9�a�b�c�is�d�e�f�!f0�o\�!f1�n�q .�q �q �q �q �p/�p .�pu.�p �p �p �py\�pb�p.�!} )# A map used when removing rogue Windows-1252/ISO-8859-1=�characters in otherwise UTF-8 documents./�� Note that \x81,�d�f�90, and \x9d are undefined inB��\�WINDOWS_�_TO_UTF8 = {1��r0x80 : \x82\xac', # €&�2&�R0\x9a&�(&�3&�c6\x92',c�X # ƒ%�4%�Pe2\x8K�eK�&�5&�#a6&�&�6&�0&�&�7&�1&�&�8&�Pcb\x8n���H# ˆ%�9%��bq�&�a&�Pc5\xa"�K�Ŗ�b%�K�9�K�c&��K�SSe%�%bdx'ŽJ�#91%�p�#98p�&� �&�L�5(&� ��X# ”&� �9�&�&� #93�&� #94&�&�9�˽� C4\xa�� �WK�Z9=� ��K� � �9%�J�f�4b%�%�ac¤a4%����a2 : b'\xc2\xa20', �P# ¢ ���0xa3 : b'\xc2\xa3%�%�4%�4%�%�5%�5%�%�6%�6%�%�7%�7%�%�8%�8%�%�9%�9%�%�a%�a%�%�b%�b%�%�c%�c%�%�d%�d%�%�e%�e%�%�f%�f%�%�'b0%�&b0%�%�1%�1%�%�2%�2%�%�PbP%�PbP%�PbP%�PbP%�PbP%�PbP%�PbP%�PbP%�PbP%�PbP%�PbP%�PbP%�PbP%�cPF3\x8P(À%�P�%�P(Á%�P�%�P(Â%�P�%�P(Ã%�P�%�P(Ä%�P�%�P(Å%�P�%�P(Æ%�P�%�P(Ç%�P�%�P(È%�P�%�P(É%�P�%�P(Ê%�P�%�P(Ë%�P�%�P(Ì%�P�%�P(Í%�P�%�P(Î%�P�%�P'Ï%�dP9P%�P9P%�P9P%�P9P%�P9P%�P9P%�P9P%�P9P%�P9P%�P�99', # Ù  �0xda : b'\xc3\x9a',�Q# Ú �x 0xdb%�b%�%�c%�c%�%�d%�d%�%�e%�e%�%�f%�f%�%�'e0%�&a0%�%�1%�#a1!���H# á%�2%��(#a2)�H# â%�3%�3%�%�4%�4%�%�5%�5%�%�6%�6%�%�7%�7%�%�8%�8%�%�9%�9%�%�PaP%�PaP%�PaP%�PaP%�PaP%�PaP%�fPbP%�P�+bTH# ñ%�PbP%�PbP%�PbP%�PbP%�PbP%�PbP%�PbP%�PbP%�PbP%�PbP%�PbP%�PbP%�PbP%�!} �MULTIBYTE_MARKERS_AND_SIZES = [$��m`(0xc2,jf, 2), # 2-byte characters start with a �XC2-DFE� e0E�Qef, 3E�3E� IE0-E>�f>�Qf4, 4>�4>� UF0-F4|�]�lFIRST_�< = � a[0][0]J�/LA>�S-1][1�@classmethodQ�def detwingle(cls, in_< s, main_encoding="utf8",7���embedded,��windows-1252"):5�h"""Fixfrom one i� O� in some other �.J��Currently the only situation supported is W� (or its�subset ISO-8859-1), embedded in UTF-8. �,The input must be a bytestring. If you've already convertedD� the document to Unicode,:�re too latew�0outx�)iss���twhich `�_encoding`s�characters have been���their `main@��equivalents.U�5""" �=if �.replace('_', '-').lower() not�(@��_'windows-12522��_�)):-�raise NotImpleKzedErrorT��X�'"WX� and ISO-8859-1 aqhe only currently suppo ��L���"� iFs.")$9if G �P'utf8�outf-8'�(@ is T���� ��_chunks = []��_start = 0| po*��`while �< len(in_U�s���4 = �U[pos]T��U"ifsinstance(3�[, intK��.�# Python 2.xG����9�a = ordD�)!�1if � >= cls.FIRST_MULTIBYTE_MARKER3��T��6X�<7�/LA6���DThisOBof aYa multiX�m. Skip�#F end����Afor ]�, �P, siz� �S_AND_SIZESS�I���!if�"T�3� <=_�6��:��!+=~� �� ���Ybreak�'elr�F0x80q�!in�qWINDOWS _TO_UTF8�# We found a ]!y���0# S�@2 upkis point ary�Au.append[3�C:#�z���# Now trans�� o6���Radd i�nother, one-� ���o[H�]L��1�s�� =! "�OelseRGo onue next � |�"if�w�:= 0e� #  is unchanged.  return in_bytes �Uelse:��� # Store the final chunk.%��E�_�s.append([�[�_start:])7��b''.join(C�T) """Beautiful Soup Elixir and Tonic "The Screen-Scraper's Friend" http://www.crummy.com/software/]��\�5/ �m� uses a pluggable XML or HTML parser to �, a (possibly invalid) document into a tree representation. v� provides methods�@Pyth� idioms that make it easy�navigate, search,;�amodify��.�works with m�A 2.7A�rup. It !�$better if lxml and/or html5lib is installed. For m8an you ever wanted�know aboutA, seo D.:bs4/doc/K __author__ = "Leonard Richardson (l� r@segfault.org)" __version;�P4.7.1�copyright�C�� (c) 2004-2019 k� " # Use of this source codeEgoverned b�MIT license. __ �u�AMIT"�!ll�%['R�'] import os �$re �%sy�traceback�warnings from .buildO��_registry, PRejectedMarkup<�sdammit �0Uni�D�"�2ele,^�(%bCData, � om � �DEFAULT_OUTPUT_ENCODING�Qeclar1�boctype �N'bleString�RPageE��rocessingInstruc�ResultSet��eStrainer�#TaV�P) # very firstng we doPive al�Aerro^`someon ` # run�;�s# un3�Dconverting it. 'You a ry� toK� ;�#2 �7 d�0. Trwill noA.'!=d�"ne�b��, either��ait (`p setup.p58 `)c by��q2to3 (`�-w bs4`).' class6�R(Tag)1"""5�+�`define<pe basic@erfax���3s. I�5eseIbe c8�`r��^Dset(afeed(m)R� j�A mayQ��?l�|Aits C�)Cn�bhandlebtag(name, attrs) # See note about return value �handle_endtag(name)�Pdata(� ) # Appends to the current �D nodU�@endD7�containerClass=NavigableStringQ�EN�K�� No matter how complicated�underlying parser is, you should b��t����build a tree us8� 'start tag' events, 'end��'�%�and "done with"B�!. 1�!If�`encoun�an empty-elem8tag (aka a self-clo�Atag,C�like HTML's <br>�), call ��I���Athen9��.�1"""�ROOT_TAG_NAME = '[docu�"]'�A# If`Pend-uW gives no indication whichKX er4�yd�# want, look for (these features�DEFAULT_BUILDER_FEATURES = ['html', 'fast']�ASCII_SPAC%�'\x20\x0a\x09\x0c\x0d�NO_PARSER_SPECIFIED_WARNING = "No:`was ex_itly specified, so I'm0�best availY�%(markup_type)sQ���this system (\"%()s\"). T�usually isn't a problem, but iCPrun t-��code on another[��, or in a diffe�virtual environ, it may use,�� AfbehaveH�ly.\n\nT�|�that causpis warnH is�pline %(�_number)s oM0fil���gs. To get rid%�P�`, passQP addiqaal arg$ 'A=U'�BeautifulSoup constructor.\n"Edef __init__(", E="",P=NoneRilder�>�BR_only"�` from_Cding�`excludl co�s�N�**kwargs):�G"""C���~�c:param�Q: A s\� a l-Dobject representing\���:"beda�qq: Desir�<�rI��#i�Ay be��7�aicz�r ("lxml", �- �B�".n!r"�`5lib")�f���f��F� to���! (@��n��Z�5 ��r�). It's recommend atG�� �0' D�"th�_same results across platforms�sc TreeBuilder to use instead of looking one �)up based on `features`. You shouldn't need to use this. A�:param parse_only: A SoupStrainer. Only!� ts�the documentF�Pmatch���>� will be considered. This is�5fulE�@whenh�sI�@parto�am�p that w� otherwise[�5tooD�Plarge��fit into memory�from_encoding�0stry�pindicat ���"���&thy�l�!be���APassc in if BeautifulT5 is�Agues�wrongly about t�\�4's ��uexclude�s�1lis6�&s �x�R�ss known��,. �`you doC�-�D��P but 3��-� �1's �u� �kwargs: For bac� ds compatibility purposes,1��etructor accepts certain keyword argEd in>� s3. None'se:�qdo anytA�P4 and�ure's noaactualU$ss����l�.(�6""";if 'convertEntities'�=5gs:5��7warnings. �(���"BS4 doe�t respec a��D��"L����$. � are alway�?ed L�� to Unicode characters.")markupMassageBdel *['2�]��#," U�*,The tree builder<��S?ble,fny necessary �" m 7smartQuotesTo7 2�74 U�7,S�! q�e<pselfClo?Tagkg 4�2  �&"BS4 does not respect the selfClosingTags argument to � " @�� "BeautifulSoup constructor. The tree builder isn�onsible N��for understanding�"-c� tags.") �if 'isHTML' in kwargs:���3del�[+�]!�warnings. �(��@�.y�%,Suggest you us�eatures='lxml' %��i and f�-!�]�9XML=�def deprecated_(old_name, new �)S4if $�v�:�PY�#�'$M"%s"FX- 'J�has been re�d"%s."' % �)H�tvalue =/� /�m�T%�qreturn U� k��FNoneparse_only = �/or Q�"0�OnlyThese", �O�/from_enco[*= �k��3�EB��i���S�l�+if_��@isince(markup, str_ 8"You provided Unicode >� but also!�aq% �k. Your��will be ignored�� A�: = rif len(uZ) > 0'arg = list$�.keys()).pop(�raise TypeError�__init__() got an unexed keywordFp'%s'" %� /�� �original_V% =a r7if .��K �D = [ � U�-�&��1 or_1�L) ==b ]�� .DEFAULT_BUILDER_FEATURES�_class =_registry.lookup(*~� ?�%if2�B�!is N��F�NotFound�+�"Couldn't find a<Awith�2J� requested: %s. Do you need to install a parser library?" ��% ",".join(features))*�builder = �z_class(&�if not (original_N�% =9�.NAME or_��9�$inr�.ALTERNATE_C�=S):C�%if-�nis_xml#��j�markup_type = "XMLOelse>� HT?� �2# This code adapted from warnings.py so that we get the same lineR��#ofP�Vs ourJ��S�@() cgets, even ifR��answer is wrongX��(as it may be in a multithreading situation).@��w�/No��?tryG7��sys._getframe(1R�except ValueErrorQ�Mpass�!ife4�globals =&�3.f_� J����Q_numb��0.f_�-no2�Cd�1 = �}_dict__A� s�0= 1� � 7�PfilenG#= i�.get('__�>__'c0if ��6�]Ufnl =$�k.lower��L�nl.endswith((".pyc", �?o")�;��k�][:-1]��d���1# I re%no� at all,uis most likely]REPL,v���Q# and=��v�!isnecessary"�8�v�i�([��'�= ���+��v= �1� =-�$= ��P��  s. �(self.NO_PARSER_SPECIFIED_WARNING % #, stacklevel=2) r�B�� ��.� %�Pknown���%�_ Uspace��Or_only =g ��.initialize_soup�if hasattr(n0, '}0'):L��# It's a-object. markup = �.read() �elif len(�) <= 256 and (&�-� (isinstance1�, bytes)1�not b'<' ino�i�C�/orF�6strD�C�)):�$# Print out warnings for a couple beginner problems@�involving pass�Bnon-  to Beautiful Soup.>� � will st�parse the input as,C�@just-!ca0�at's what<�user really wants�/ifI f��y��os.path.supports_unicode_filenamesd�=�vpossibl%�M en>�z("utf8"JelseL� �!is� = False�>tryX�1��}exists(� �qxcept E�0ion�� # This is almost certainly a] N ���# characters Pvalidxz on thi<�system. J-let it go �*�� {�3if 1��� �,��]�G�/�. �H�#�'"%s" looks like a ,%s�. You should'��E���'ably opeGK��O�]�Phandl/toL� C.' % 9self._check_�s_is_url%) ��(0�)�!, �original_K3ing�declared_htm�O�>�0cons_replacement_f)] xv��builder.prepare� *��V�Gfrom�uexclude�-s=�)Dx�Wresetx�c�.�H_fee�.���Vbreak A e PrrRejectedM� q# Clear@��5Rremov1's circular�# reference.�0 ob��Z�V= Non�_�1.so!�a�def __copy__>V��r = type� self.encode('utf-8'), builder=self. �, from_encoding='.�# �%) �# Although we 2�ed the tree to UTF-8, that may not haveC�Abeen4�@�aing of�original markup. Set�gcopy'sC�.+���|�breflec3�!� object>� .\��k�^��!= 2V�w�8�preturn ?�/def __getstate__(A�%):1�# Frequently a@ can't be pickled�d = dictN�0.__ �5__)V�Cif 'F�' in d and��.X�Fable��%d[=�v] = Non�d�@�icmethod~��bcheck__is_url(��A""" 3��s�C2�3 if looks like it's actual#1url�raise a warn>L�if so. M>�E�C unicode or str (py2) / bytes�&3)^8"""M�isinstance�,7����space = b' '?���cant_start_with = (b"http:", �6s:"_elif u��?strs�r�q� � p�s��cif any�.�s�(prefix) for ��n � �"if/#in u�� R��.��� dedy# =[�.�(, 'replace't�?���,el ����E� ]� �(� s. �(��#�X'"%s" a URL. Beautiful Soup is)*anO�B���' HTTP client. You should probably u!n(�SP�0stsEgtpdocumen2hinAURL,OfeedM�>�+to�Z.' % T ,rS�k�# Converl�U creset(^��K �j�.�# Close out unfinishedTAingsc%�1all op;ags�endData(RwhileacurrentTag.name !yROOT_TAG_NAME3�dpopTag��9reset(self):  �Tag.__init__(self, �.builder�.ROOT_TAG_NAME) B�0�.hidden = 1�B�{.reset(5�current_data = []<��Tag = None�tagStack:��po�rve_whitespace_tag_s0�aushTag!) g�pdef new7��b, name�Q�=�, nsprefix�attrs={}, **kw�!):Q��/"""Create a]� tag associated with this soup.""":�L�.update(Y��preturn ��� ��#, X� �Rstrin , s, subclass=NavigableS"�� *���p�'(s�insert_beforesuccessor�0raise NotImplementedError("BeautifulSoup objects don't support g�>)."�_after�Nf� �0pop���G=G.popo"if� 1and=M�.� F[-1]���.4� �#print "Pop",z�.��n�� #�-���J�WFelf.3�p��]�?ush� X�q is not� .contents.append(tag#�"�x�4� %ifa2 ins��+�Z �}�FsendData0��oainerCQ �% �� = ''.joinh�6�S�V# If �1d,  �s��C���#�hing but ASCII s, replace ita single!� G� or Yline.� ifb�� ��� ppc = Tru9 for i in current_data: ��if i not.�self.ASCII_SPACES3��F�strippable = False^��+�]break�6if D�h�pif '\n'���j�&�! ==� x��(�Oelses��� F�9 ' F�# Reset the X collector.(�N M�,[]L�Should we add thisQng toc�tree at all?`��B�parse_only and len({�tagStack) <= 1�\A���@ (2�J�.text or 2�0 2�xsearch(�.))fjreturno = containerClassG� �x�object_was_�Bd(o)T�Ndef ��5�p, o, pa�=None, most_rec"qelement��"""Add anU�v� E."""�#ifb�@ is e� ~�0 = �.+8Tag@����M��Q��revious_�/ =�_�pA��_��a�6Gnexts�E�sibling = "������ssinstance(o, Tagsa��!o.P��I�*�i�*�� *���,o.� 2�� ��J��7 = _��CSfix =- �/�o.setup(B,, n�,rg� �  ��,����w�.ents.appenu�I�# Check ifre insert@intoDpalready�sed node4jif fix1�linkage_fixer��_ ��4�G, elS"""Make sure .�2 ofo@frag�yQsound�first = el�5[0]�[child�&-1 �descendanD�-�1if ��z�g��2el.o��1# P" s>be linked to first child �el.next_element =$��# We are no longer linked to whatever this G�)isf�tprev_el^�.�Diousw� -�0if ��0��is not None and�9el::���;� ��C� ,�c# Firs should be�the parent,}�!no��� siblings.T�_ �_ = el(� M�! =� +�x1havy%� as we've been appended as �Flast��?�d�Tindex is a tag, dig deeper for a "Z� descendant"�if isinstance(s�a, Tag)5�.contentsN�_g�_�x(False)A�final step, :��M. It�#�10's & (if found), else walk updWchainJ��cfind a/t with aN��n�� '� ���Ptarge~while True3if /�!is ��O�Ubreak���%el7���H� �3=� K� p��t�� $�qw =��O� |�odef _popToTag(self, name, nsprefix=V�, inclusivePop=\)�b"""Poptag stacJo <�Ading[most recent�[1 of$�PgivenI. If|��f07, pt�J�|�but *not*B�� z�q5 ofE��5�&"" p#print �p�qo %s" %5@�!if�@ == S.ROOT_TAG_NAME8�# The BeautifulSoup object it=�a can n~ be�6pedN�C�breturn���a_a ly=pedx(_size = lenP.tagS)��i in range(7�- 1, 0, -1 �����N�5[i]P��!�@if (e� !t. ��`r�~ [�>�Q�:  �most_recently_popped = self.popTag() +��Ybreak�G�2�return 5� %�def handle_starttag(�b, name�@spac �sprefix, attrs):A���"""Push a D� tag on to the�2ck.o��.��If this method �qs None,1��?��was rejected by�o�SoupStrainer. You should proceed as ie�e�had not occurredH� inW� document. For instan�C�0is �a-closingW�,G�don't callJuendtag."�6"""�# print "Sp�K� %s: %s" % (va@��.endData()K�1if .parse_only and len�tagStack) <= 1P���(�%�=�Y.text&��*�1 or@�-�psearch_2�)�?��� = �V�,^.builderu�{|�a���H�.SntTagS�_@_elef�Bag i}�R��5tag"if�U��E��e� I��3�T.next� = k�5�(�@pushK6tag��V�f3, n=�#0End�Q: " +T��(������� To�!f��d�4� , ����_�.append( �I�cdecode�qpretty_�m=False3eventual_encoding=DEFAULT_OUTPUT_ENCODING:�formatter="minimal"�A"""Rusr/`or Uni� representation o&isXdTo get8�R, pasT for �.[jis_xml1# Pw�XML declar�t��Z�_part = ''�=if :1 != q�M��="%s"' %N� o�r�� <?xml version="1.0"%s?>\nE��A�Jelse�O��!if3 5�indent_level �c�.�_level = 0  �$return prefix + super(BeautifulSoup, self).decode( 9��Rindent_level, eventual_encoding, formatter) # Alias to make it easier to type import: 'from bs4 � _soup' _s = � � �g class+�@tone� �!):�"""Deprecated interface�an XML parser.""" 2�def __init__(, *args, **kw �[��'�['features'] = 'xml'L��#�warnings. � cO'The�� �Eis d�. Instead of using c��g�`'it, p�`="xml"�9o ti�oup constructor.')� %]* # pStopPar�(Exception3��(�F3NotFound(ValueError-� #By default, act as an HTretty-pr0. iname__ == '__main__' r1sys��y _(sys.stdin[�(*� .pm�ify()) # Usethis source D� is governed byy�MIT license. __ ��� "MIT" try�f`collec7cs.abc �Callable # Python 3.6 ec0 Im_?�!eO� K� [�$re ��������_sieve}� '�q = NonetU�  1�ppackageMnot installed. CSSePs can �be used.1) Gbs4.dammit�EntitySubstitua DEFAULT_OUTPUT_ENCODING = "utf-8" PY3K = +version_info[0] > 2) nonwhitespace_re = re.compile(r"\S+"%pNOTE: T4Qisn't��of 4.7.0. I'm leav ite a little bit on #Voff chance someond�9�their ownh�/. � s�aD(att2"""�f���`ibute jJQother�0bac d �atibility^�@property �m�l��h�'�0getm�� �)@6�0.seK�sK��j�P�2NamQdAttr�!(s l�__new__(cls,c,� =�!if"��������obj = str.[���'�Celifp� H� # Not really namespaced. �obj = str.__new__(cls,/�))�Uelse:��>�7�prefix + ":" +F�Cobj.!�"= �T�����! =5� ��� �"�-return obj class AttributeValueWithCharsetSubstitution(str)�"""A stand-iI�#ect for a character encoding specified in HTML."""{�i�JMeta�(���vgeneric���Pthe v� of a meta tag's '�dset' a�!. %When Beautiful Soup p s O�markup '<M�F�="utf8">',$�G�w��5�n�� will be one�PtheseBectst)�>Idef ooriginal_f��*�h 0��/�.�: = �,�9�<���ue(self,�4�'�Eing oontentb{�$ http-equiv="cW�s-type" �="text/html;Bset=*lT)H��)CHARSET_RE = re.compile(r"((^|;)\s*)([^;]*)",)�"M)��jmatch = cls.w�.search(D2if 9�kis Non&g# No sv necessary�x��XKstr,��1�|�grewrite(4����.group(1) +c � 3ub(`�!, � �AwareEntity�"):&2"""� @rule%at are aX��Asomes quirksiScp87s5�<script> and <style>s should notundergo e� �i�PIncomNavigableStr�objects are checked to see if they're�# direct children of a <script> or <style> tag.2�A""" �cdata_containing_tags = set(["C�A", "A�2"])6�preformatted2�8pre&�serve_whitespace-�'pre', 'textarea'_�@classmethod�def _substitute_if_appropriate(cls, ns, f):0���if (isinstance(�NavigableString),��0� and ns.parent is not None&�.name in cls.J��d�@# DoX�3hin����return nsn�5# S�(�Cf(ns-,Dhtml"�q���Z��|��CeEntity�Kion.l��x�Waxml) T# Fr(objectB"""CBs inion about how~� a parse tree.I�)# By default, represMvoid elements as <tag/> rather than�>F��.�_.�p_close_pix = '/'�yeies(self, *args, **kw ����"""Trans�@ cer character�0to $d T���@�raise NotImp�edError(cFHTMLg ����$he& -� ��Lrk� ���M�_Aware(#!pMinimalK��( ��2A m*��b�> ����5�(���n��pt omits]` slash a @��7�X Lae onlyx� es8~ial XML# R��?� _�RusingZ_rules%return HTMLA wareEntitySubstitution.s �e_html(*args, **kw �) class PageElement(object): �"""Contains the navigaa�al inform� for some part of.�ApageG��(either a tag o �Qpiece)�rext)"""�# There are five possible valuesi���"|�ter" argu� passed inv�# to methods like encode() and prettify(�#5�"0" - All Uni3� characters with corresponding HTML e3iesG� �converted�@thos�tities on output.3�z�5{�The same as �", but empty voidJsc�represene�a~�<tag> rayPthan �$/>l�pminimaln�Bnampersandsangle bracketi��F�7 X�: & < >&�`None -P nullmatter.`��Snever>�( y�g . This is not recommended+6it' qfaster "�wpA callae@func - it will be� ed��Py strthat needsrundergo�%y ��1A F1��pinstanc�(d�)�^��mRIn an1docE,udefault}15",Bz�6�ll leaveF�contents<script>B�q<style>E�`alone."���W�1all+�Qgivenc��&B�Ptreat.���_FORMATTERS = {*���1" :�:(),"�5#�5$�KB : MP�u�,� : �d�}�?XML�?XML[� :$��x� �3def_v(self, s,�Ster='�'.E"""l�B�2 us""e �.�=���Tif is ($�,bh��+��0 = �%._�� �V_nameB�)ci�us�V����W�5�Jelse%��C; ��Y�P# Baco ds compatibility -- you u!toT in ap�<�N��output = formattwer(s) �Yelse:���output = formatter.substituteC�rreturn 0� H�@property�def _is_xml(self)m�"""Is this element part of an XML tree or�#HT�?f���T;�is used when mapping a� name ("minimal") to an��E� ap� riate function (one that pers entity-�'�oG�the contents� <script> and <style> tags,��not). It can be�"inefficient, but it should be called very rarely.:�5""" �0if k`.knownz is not NonQ# Moss�� time we will have determinedf&th��Ca# docu is6sed��"�' � # Otherwise, it's likely "was created bc�a��# direct invoca0of �constructor from within�euser's9# Python code�P0par/isI�t�top-level objectX5set����pion. If, take a guess--BSfusuall F�1on :fmarkup��#�qgetattr2, '', False-.� .�+/P_for_zW�� �yLook up~ babon its�EXNree.�[��XML_FORMATTERS.get(�,fF&()���SP��� Q��Ssetup@=, previous_�Jnext�K����8�xsibling8��w`""Setsy�Ninitial rel2s betwee5and{�o(s.vi� =2ent��� ��(� �j�-if�^�.C! = � "���_��� +�.�1 = B�����t_sibling.pr evious_sibling = self �kif (pr%�uis None%��,�1and;�.parent is not(� �.contents):C� h��R�6�F[-1]���.:�� �j�<if �� �^�_.next2��SJ_alias("�/�") # BS3��4��8�9�def replace_with(�,�],if� �raise ValueError(h���@"Canx� one elem�� another when the"C�0�Tto be�dpart of a tree.")A�i��1��� �_eturn4� O ;� � �aa Tag �Bits �Bold_�Dh��my_index�.����.extract(�c�.insert(T� t�r�dWith =A'��^bunwrap�,/my� jC*ani�Dn/ato`I'for child in reversed?':]�WOL�_,�bren = C�!�  , �w_inside�$meX�{�(-�v����.append(me�&��w�~� """Destructively rips this!ou<3the>"�1del-���P#FindD two���at would�  to each -%if7#�(62anyVren) hadn't beensed. ConnectE�e�.�Blast#_last_descendant() � next_element = last_child.� 0�if (self.previous=��is not None and2��i�2� i�/):8� �8 = @�>�)if� ��� �l�/= C���! =4%�'�(�M�"ar�w�xsibling.��;and� 2�n��� $� � E�����.��}��=�1�� I��g�6�[��g��areturnnWQdef _adescen�_�, is_initialized=True, accept_"���"Finds the beneath this object to be parsed."�+ifv��� Q #�Ci�y�OelseJ� ,��Hwhile isinstance(y�a, Tag)��.contentse��K�%� 2�E[-1]|��Z(ot i��� h� d� !�k�# BS3: NotAt ofAAPI! �2RecursiveC- =Hbainsert�, position, newF������� �raise ValueError("Canf� into a tag."q ]�*]� S�^� it?� _�(�V, str����C/�NavigableString) �=�*�� +�(�)�0�from bs4 import BeautifulSoup�&�}H�,/� �# We don't wan end up with a situation where one BeautifulSoup �# object contains another. Insert the childrenM�at a time.J�qfor sub+� in list(new_�.`�ients):6���aself.in�(position,L�)0�$�Y += 1�ereturn�)�W= min\�Alen(v��f�if hasattr�, 'parent') and �� is not None�# We're '�ing' an elemen]at's already_��# of this%'s }/if��� ��V�0cur�_index = &�. ���2���:if 9�$< `c��1��bmoving��0fureQ downb�. ��B��� That means t �6whe�>��# we extracu$is, our target _ will�Bjump�+onj�, -5�5() h��R �previousN! =k�%ifZ == 0\�F�@sibl H���.�� �;els �[e[5 1]�u�� V�8�f.next_E��@� � x�._last_descendant(False) jO�"�.p�4�� �& =7�s�-��� >>>= O c����/s_C�� ��bwhile H� .�!is!��O�$� E�=\�l/�7���;� -�� '��I' e��\����+a found `comes (�V�@docu> �H���break  �if parents_next_sibling is not None: )�� new_childs_last_element.B� �/ =Y� L�?els^��`# The Y� L�of this ta�t �)inX��� # &�Adocu� ���<���  = self.contO[position]?���w�-o&= J� 0�?if 3� g�.previous�x� w�t� �?��>� �|��#� v�� .insert(,g!) 3�def append(:�,}) A"""A�siPgiven#�1 to� A."""\�� �U��1lene�>)o��9ext�s�s�1for�i�s�� 7tagDq_before�k, *arg�HMake&1(s)*immediate predecessor7Bone.���?�s will haveG�CsamesQ, and�2�|:�5be {�2ly �hu��p���,�V�L�>�9 Hraise ValueError(9��f���""EI0 ha�Bso '�'�meaning.")F�if any(x~��] [x in �k��� �r"Can't 5 an}�0 itX�t��c�m� �# Extract first s"!atindex wox��be screwed up i%ey���c# are 5)s.�if isinstance(�c, Page[7�Q���+�".e�(V�"��.� '�&�,:QafterJ�9+?suc7nts will have the same parent, and�given elements �will be immediately after this one.,�5""" �# Do all error checking before modify���7treG�� = self.�X�#if� is None:���raise ValueEy�(��"�""E�s has noN�a, so '�'�meaning.")E�if any(x}���� for x in args)� �"Can't insert anQ itS�o�x�offset = 0��z�successor� ��# Extract first�Athatindex wo�be screwed up if!�yi��5# are siblings��if isinstance(�c, Pagem �7�)�".e�(g� ��=.�(! '�.T(&�2+1+0%, e� 5��a += 1 �def find_nextf�p, name=b, attrs={}, text�K**kw"""ReturnsCvQitem xbmatche�ocriteria���sappearsg0Tag�,@docu.nrv�1_�1one�.�1all����%, ���0�0Nexm� �7�{ # BS3,U�h�Z�0Climi<i�����P�K�J�>1at KeI Ae�H@all(  4�,q�4_9,w�� ��'��a8AlloUsk�wr�W�RPclose6 to- ���w:�sc ��� ��'��n�kS�=_Tvsw!}� ����0s o0is g�U� appear after this Tag in the document.""" � return self._find_all(name, attrs, text, limit,8�?� �G� next_siblings, **kwargs)<��c�SNextS �0 = �_5� # BS32�Oetch3��2�!2 3�2defV�previous(� , �D=Non�2={}���:G���A"""R#sDfirst item that matche�given criteria andF�appears before t#Eone(S�����.?Dall_�� �A��4�P?�|)�Ja�N2ZL��� ��r1alll�`mk ci�j+ -/@_eleVs ���<Alld�1f-� /���Kclosest 6 to ���1 �s��� �'��~ 4a3&�}� ��`e? ofWw�_,W x�� ��ssg ;� o_arentf �[pL�r8ria."""  # NOTE: We can't use _find_one beca���Parents takes a diffe� I���# set of argum-�.�r = None�l = self.\�"_p]� (name, attrs, 1, **kwargs)8�Uif l:��w��[�El[0]�return r��_��0 = �l���ABS3 %�8def� s(� , �=��={}, limit���A"""R�cs the ��this Tag that match#�Qgiven���criteria."""������.:all.��,Z|�,\��J� ��'��"/s0s.�2�B�_fetch(�R# BS2�@property]��gAnext_)4�� �@_ele@D�rpreviouH�$� L�#These methods doreal heavy lifting.{���o1one�,<�g, text�� B�.h��&�5�,generator�0"It�es over a /� looking for thing.!if!1 is and 'string' in 5�b�# =�[.��$�>del!���if isinstancemSoupStrainer9�/�s�! =G�Jelse�*�P�c���QP�h� 2notX��F��!!if��UuTrue or��  ��y�# Optimization to all tags�-���result = ( �&in -�7� � =�,)3� Ry�DSet(k", � 3�*el^��L, st�_�� P with�Ir.count(':') == 1jp�Ti+��a�prefix. If4sap��space-aware docA# we needHQlocalK�@ aga�.}�o�not,  �)# we need to match the fully-qualified name against tag.�$. F��prefix, local_(�! =@�.split(':', 1)<�]else:��Z�R� = None"� l� &�result = (element for �in generator;����Pif isDanceI�^, Tag�2���_and (V��$��W" =03�O) orT�%Z����u!isv�p���2== �� ��return RSet(strainer, 3��?s =.��&�while TruP>tryaiEext(I�� except StopIt">ionF�YbreakW�,(�found = �.search(iu��y�1if .�E��� .append(*�D��*�if limit Clen(;�B) >=�b��<� �@� �f#These>s can be usT navigate starting from bothA�N!�bleStrings�BTags_@property.�def next_vs(self)����.&�;���i is not;��!�uyield i1����S�iP� ��@sibl�� &�I��e��-P�Q��previous8 *�Q��, T�U���R�*�Q��5T�U���_arent� �=��-J�K�# Old non- versions of!s,backwardsA� compatibility with BS3. def nextGenerator(self):��!�preturn �.)�_elements #�@�SiblingG�s1�G�previousD�-�����4�_��Oaren p+�s class NavigableString(str, PageED"):EPREFIX = ''U5SUF��# We can't tell just by looking at a se� whether it's containedG�in an XML docu0 or�'HT�.�known_xml = None��__new__(cls, value2�"""Create a new]��\When unpick=, a,�, this method is called��F�4the�in DEFAULT_OUTPUT_ENCODING. That encod2needs to beI�passed in�V�Auper3's ��0 �0 wo�.K�0howE� handle non-ASCII characters��5"""8�if isinstance(Z,�_�3�0u =�.�K�Felse6� ,7)O�u.setup(�ulcopy__DP"""A �> ofQ has S1ame�_Q and |�'�original, but it0not:�`nectedz�P�parse treem�Btype���getnewargs�?��9�+,);�Cattr8� , �"""text.gives you � Tis for backwards compatibility$�k*l�>�`CData*(1letc�G�2gety:Rithou�6� wrapper.��R == '�'g�2j�raise AttributeError(0����"'%s' object(!noI4� �N" % 8��<���1.__D __,Same__output_ready,Smatter="minimal"1��elf.format_  @� /�#+ J�+`E@property�u3nampl@&��3ter:�f name(self, name): �*raise AttributeError("A NavigableString cannot be given aI�.") class Preformatted4�(D�v�/""_���\�subject to the nN�#l U���qrules. �RThe s�@will�passed inA�:�@er (�rigger side effects),L�1butr�return value V�ignored.*�2"""�def output_ready(self,�er="minimal"��o"""CData�os are �r��8�B�4is ����(�����.�_  �)��,��3�PREFIX + ��t.SUFFIX�("):C�= '<![CDATA['l�G� = ']]>'O�ProcessingInstruction_���7eSGML p9�& i:�.o��?��?XML�����?n X� ��� ?�Comment� �<!--� --L�DeclaraG �M�ooctype�@amethod��@for_b_and_ids(cls,+, pub_id, system_idU�0t=*�R or '���#if:�@ is (FNone�#�>�+= ' PUBLIC "%s"' %:���)���&if�S��*�W�P��S�/elQ�M�eSYSTEM�T�U�D<�)fDOCTYPE -gT>\n' hTag(PageElel"""Represents a found HTML tag with i�s and cont2�o�__init__lpparser=u, build���� �Rspace���{�bprefix�c attrs �dparent �creviou�F�ais_xml-�q`"Basic�|3or.�{��t��!�� p�$.�_0 = &�}�;els # We don't actually storezL�! o: that lets extracteb����# chunksgarbage-colle*�8�+�self.parser_class = parser.__class__ �if name is None:�� �raise ValueError("No v� provided for new tag'sL�5.")F�Qself.a�=��bspace � � #�`prefix� ��if builder�?not��preserve_white]�@_tag<�.#�h�;elsif is_xml,�0N�U = []S��*�W���@� HTMLAwareEntitySubstitution�]���if attrsl'�E= {}1�$el3� &D and.cdata_list_Y�jibutesF��,��_replace_@�_%s(��B���,��"�~�Qdict( �6�2�!�8 = .��$# If possible, determine ahead of time whether thism�'anF�0XML�.�l >�Pknown% =z@�� :� 2� ���M��pcontent�setup(parent, previousU%��hidden = False ]�# Set up any ss, such as the cht in a META1�]T9yset_up_m�(���>��.can_be_empty_elementw�(K�:����*C = _alias("�") # BS3[def __copy__�IP"""A ��Pa Tag4a Tag, unconnected tok�V treeEIts Uare aV��0old\�'%�;�1"""���@clonFBtype�(!, ��[ �,J��N� �D�8�,= �_ ��(��n/('B', '&')Z�}�0set;�(�3, g��o��)n�Pchildo�self.contents:' �clone.append(child.__copy__())+�qreturn .� �@property� def is_empty_element(self): ��r�"""Is this tag an /�-/���? (aka a ;�-closing�)u��G�A� that has contents is neverY�.@�&noC�pmay or �mnot beL��tag. It de:s on the builder used to create�2�f �F�/���a designated list of" s,A�Rn onlz�awhose name shows up i�"atP�i.sidered an� /I����/no�a�then any�Hith w-ish�)�.?�5""" �1len`.�) == 0 and=z.can_beE�sisSelfC\=r # BS31astring|� Convenience �0get 2le @�Q withq!:Rl!: z�aC�#, Qvalue���#o�.M���Aren, oraZG�d���[�N5 T�Aone 1��,���>��)'�' attribute��B�cursively?if �F!= 1�����q�=7O5[0]!�iinstance%, NavigableS h�[�H�}��.�f �q.setter0��Q�k,8g��#v.clear(�?��rlass__(�)L�b�R_all_�sh�p=False, types=(�, CData)�"""Yield all�s`certai9�asses, possibly%�!pp7hem�By default, yT��],lyq� objects. So<�� om�u�Proces ructions, etcP0forpcendantGZ3des�sXEif (|�����(?��p��W is�LP�Q�L���o� ��c��Z�g��y�(]�) not in types[)): �continue�if strip/�descendant = �./�)()F��j�vif len(*�n) == 0Y��-��Wyieldk� d��p�ngs = property(_all_�)&�@�4�#de�4ped)�W(self1for�`ng in �.X�W(True/����y�w��i�get_texta��, separator="",b�p=False,��H��types=(NavigableString, CData5""">�Get all childa�ngs, concatenated using the give��.G�S�Xretur&�join([s % I��! �2p, ��")]+`getTexL0A��9��`decomposea"""Recursively9Qtroys��aents of this tree.�wextract1i = ���while i is not Nonen� i. �_element?��C�i.__dict__.clearo���J= [];�0 = X�oB���%, b H�E�5vren. If?����R� instead1i=� �28�:[:]-��@f ism�Sance(;�W, Tag��)��#�.� O�(�;els����>�4��]��Qindex#, '��AFind3��aby identity, value. Avoids issues withw���Dtag..Q�cd) gett'��l�Tequal�s�#i,�in enumeratp����$if2�s`�i�praise V�Error("Tag�:G� win tag"9$ge0key`fault=�F�9S� �=�'key' attribute��utag, or���9�o�!1'dep� ' if it doesn't have tha#`�  �."� s.�� �_attribute_list(self, key, default=None): �$"""The same as get(), but always returns a list."""<�value = f�.:�i�)'�if not isinstance(9�,Q�����A = [ �]D�� ���def has_attr�S�7�bkey in��,�%s ?�__hash__?�:�sY�%).)�)=�sgetitem@�kE�"""tag[key]H2the�@ of �`'key' �ibute for�Atag,��and throws an exception if it'saYthere��a�s��Citer��Iterating over a tag ,�Bates�its contents.s��,�S.$� 7?lenv���`lengthu�s ��x���#�\��?len��8�Cains�7, x��x2�.���8Cbool>�;�""A)� n-jC eve�? no5HTruee�s],<t�Q""Setl2set70�*id9tag> 7" =|��?del JDele�d�{es all s�Q.pop(d�[ Q4cal, *args, **kw � -@Call_like a funcP8�W��-�5its'find_all() method. Eg.U('a')1�}���I�ound within thisD� w� �0get,Q��#print "G&� %s.%s" % +__class__8��0if Otag) > 3 pag.ends�g('Tag'h��# BS3: soup.aTag -> "���F("a"!�0�Ptag_n=�F[:-3� �warnings. �(�����@'.%(?� )sl�is deprecated, use .w�&�0")  ead. If you really were look�)� 4ed @�?TagX�Tag")' % dict�����=� �  � (F�'�# We special case contents to avoid recursion. �elif not tag.startswith("__") and�T == "S�%":A��H�return self.find(tag)"�raise AttributeError(��@�"'%s' object has no a0� �Q" % (`�__class__,�1)) J�def __eq__&�, other)�A"""R��s true iff thisE�s��the same name, ��!s,j����!�# (_vely) asD�Pgivene�E."""A�!if-2 is���_�OHTrue2�(0has�(�0, '�U') or*��B�*��7�s+��.��t�2 !=�.� �'�q�(�� )�1len)(�� ��)��FFals for i, my_child in enumerateT�.� N�&if5��,�:[i]��}/ne#it identical�d�As define� _����G�,Erepr� encoding="unicode-escape";B@nder9as a string�zif PY3KJ# "The r value must beF�", i.e. U�_���@���0.de�(lOelsev�.ca byte� |�@# ByXv"on'��o\ should also b��# an ASCII? h�S��!e(�9ingA�%c�$st5�J�����o/��#e(� ^�!__�S = __2� ��I��!, ��DEFAULT_OUTPUT_ENCODING(��P inT_level=None, formatter="minimal"7�es="xmlcharrefreplac0# T�1dat ucture into(�����##�1u =��� return u.encode(�0ing, errors) def _should_pretty_print(self, indent_level):2��6�A"""S4� this tag be @�-@�bed?"""d��2�return (C���^� is not None%�@and �Q.name#�"in� preserve_whitespace_tags?��!de�I� �=q�,3��� eventual_7 =DEFAULT_OUTPUT_ENCODING:�formatter="minimal"9R ps a Uni�1 re�ntation ofV��its contents.Rn:param�Q: Theis destined to bX�red intop��`ing. T`methodX_not_"responsible for per�ing that J� in��F�qis pass� s�at it canNsubstitut!�i �document0ainSa<META>��� �ions the 1�'"�� ��� # First off, Pa str��2terE4a Fr objectJX�# will stop�lookup from happenV�Pover  �Ugain.C�"ifsinstance(�,�)@��Ni)�Callable�K�" =a_�� �_p�)�attrs = []�"ifB��:�f� key, val Qortedp./�.items()��<�"if6�s' X�� ���0d =f� z���Oelse8�7if .val, list) or�=tup>�^����= ' '.join(val.$���.elz�?str���3���_�?strZ� n�:�� � AttributeValueWithCharsetS?ion��S�� 3�;��a�5val |�i��Dtext_�,�'&�bkey) + '='L�+ Entity,.quoted_Y _vZ(�)��'�!s.#d(�&�close = '��2Tag��prefix = ''  if self.prefix: ��( ="�u + ":" (�@�is_empty_elementJ�close = ''9��h� if isinstance(formatter, F �)H��5�L�/�T.voidx�_�_�1or �v�JelseS� �0Tag�</%s%s>' % (B�,�Vname)�pretty_printp_shouldz��(indent_level)�Kspac)��#f �F� is not Non�&�C�y(' ' * �V - 1)�8if � H�W����contents =�5+ 1/�v=���1�U�tdecode_�()���, eventual_encoding, -jhidden�`# Thismthe 'docu root' object.������'�%[]!����attribute_strin (�%�s�<� ' + ' '.join(9��5��w��S�-�`# Even(tparticular tag;�G-9ed,�E�2we !&p up to ^start of �:tagX�?�s.append �#� '<�� ��Q�  , (,:/�9�3 �!��O"\n"�' � [�T and *� �[-1] != b��b��?TagI�n > � �u� �next_siblin��6're now done witht,�]add a;# newlineappropriate� �'s�return sf#de@ify("$, =*="minimal"-%if0���3� return self.decode(True, formatter= �3) �Uelse:���breturnH�!enH��_ing, R�E�Bdef }�_contents(�, indent_level=None,1��r� �eventual_w� =DEFAULT_OUTPUT_ENCODINGB��="minimal")��"""Renders the � of this tag as a Uni� string. �i:param�: Each lineI�1e re�ing will be@�# <�"eds�many spacesg� Q: The�is destinI�oc�?red intog���� T��method is _not_�responsible@@ perU��[that J� in�XationF�qis pass� s�at it can be substitut!�i %document0aina<META>��� �2ion1�('s�� �2terS`output�1 converting`�tities to4characters.+�5""" �# First off, qah�4a F object.K�`Astoplookup from happenover and �Fgain�if not isinstance(,�)@�)�Callablel�K�" =V_�� �G_namp�)pretty_print = (C����Jes = []rfor c i��0texT��?�/���(ifc, Navig�S ��3�K�"c.?f_readyA��-�-elb�?TagV��!s.4d(c�-, �w��y)��{�� g��. == 'pre'O�`���.3/p(e�9�9if ���u��y�+[" " *(;- 1� 3��� ����76\n"_�r[''.join(s�5e5�>=DEFAULT_OUTPUT_ENCODING, �formatter="minimal"):�&"""Renders the contents of this tag as a bytestring. >�:param indent_level: Each lineE�1e ra�ing will be@�# <�"edo�many spacesg�eventual_encoding: The�`�2 inO�)�L�D�uoutput � responsible��xverting�entities to Unicode characters.+�6"""5d= self.de8�_�(N$, �,�)X�`return�.-�!e(�Bing)u�# Old methodF� BS3 compatibilityQ�3defC(��=DEFAULT_OUTPUls�prettyPrint=False,L*'=0wif not 2��O�8�f = NonE��L��@���D�= �Q�� :CSoup:sM�&CfindPname=�, attrs={}, recursive=True, text%� � **kwargsAonlyfirst childRis Tag matchV�Ugiven^�criteria.y����r9#l ��P_all(����F, 1,�k�Jif l�h�El[0]#�r��l�C� = � V�� , �5Z ��Climiw��`"""Ext�@list^�Y`object"atfc You can specif���P���T�band ani���-�ibutes you wante��,�wto have�AvaluSba key-�Bpair0e 'T�as' map�Abe as�p�D, a ��s�regular expression G, orE�Pcallathat take;I� �s whether>��&th.�!essome custom definit�3f ''� '.�@ame r�e��u*�generato�W.descendants%�|$�: C�rn�_��L�!, %, R� P�<� Al� ��F���@ #���?Aren Bd_all # BS2 #Generator methods�@property�def children(self):�A� return iter() to make the purpose of�a�e clear@�>��[� .contents) # XXX This seems]�be untested.��descendants�if not l�n������stopNode = �f_last_g��().next_element8�tcurrent7��5[0]#�dwhile )�!is�p� �Tyield+�A���?�=� �,�P# CSS� ecAcode��select_one0,'� , namespaces=None, **kwargs"""Perform af�@ion F!at �n�+A."""���Evalud�����7, 1�?�1if B� 6�y���H �u�f _limitC��Auses9SoupSieve library�*�e:param: A string s0ain �.or>��@�0dicary mappA�!� prefixe,��bused i g&toB URIs. By default,Beautiful � will usFm� it encoAred :B�@pars��x@docu�: After find3��wPumberresults,  lookingK��: Any extra argw�s you'd liao passo�Psoups.%).��@ is  sg_�U� �1if M��8= 00��4�raise NotImp�edError(E��#��@"Canwexecute,ors becat�ppackage|��;�installed.8�[�)m� �O�pw!orf]aN�R# Old for bacds compatibili1^S3�Delf.[�recursiveCvE�sH�qhas_key}�, key): 2"""This was kind of misleading because has_key() (attributes) ��>�different from __in__ (contents).E�is gone inC�Python 3, anyway."""�warnings. �#('� is deprecated. U���("%s") instead.' % (O��v key)�return self.b�G��#� # Next, a couple classes to r�s� queries and their results. 0� SoupStrainer(object):�"""EncapsulatC�p number@ways�Pmatcha markup elemy�q(tag orJ�Qtext)=�1defPit__(�, name=None, �`s={}, 7��**kwargs��-�9�.8�" =  _normalize_search_value($�Rif not isAance=Us, di��_���# Treat a non-!� S�B for�b as a o��Qthe 'Z'��G���#0�.��[4�1] =)�s$�0 = ! �#ifj�b_' in I��r�����_="foo"�!, overri?any� ��5_']���3del�!�"�#if� �"if1 ��O�6�f.copy(�!����rupdate(� %�Oelse\�2��!d_Y�U = {}��6Ckey,Erin list.items()�� Q�@[key$))-�� R��&��sP �$�^�R� w�P���# Leave it aloPf it'FUnicode string`allabl�# regular expression,�boolean, or(Gif (,f�),��Cv��0has:(6�''#(orO*���B���� �#�5# IDbytepconvert<3to /0, t��Tys UTF-8��V��0.des("utf8"����glike, �ainto a��s� Q__iter__'):  ��new_value = [] ��for v in �:��$�if (hasattr(v, '__iter__') and not isinstance"�mbytes)H��L�1�^str))y��1�"# This is almost certainly the user's mistake. In�{�"# interests of avoiding infinite loops, we'll letF�t through as-is rather than doS�a recursive call.K�.append(v4Oelse >�self._normalize_search:(v)\�ureturn E� �0# O�wise, convert it=o a Unicode string��2e u�(str()) thPis so �s will d �`e same�ng on Python 2��,�3i���b���"))�def __str__(T0if �Z.textQQ��?���|+�"%s|%s" % f�b.name,_��%s�1tag+�, markup_3�U=None�:�7={}�found = (��A� �(if$�e�W, TagT��T�( =�`�!�_�'�"�� _function_with_tag_data = ((��J���, Callable� M��[[�@f ((0�Y� O�!or|� �Bor (k�Q�_matches��� K�=���A� �\�F� /if� ���" =x�R��i�� O�ITrue��L_map~�#����J�$��ptch_aga�\Alist.!�s.items(!ifW��w��u���4if 4�s, 'get'i�1��2��f�< ;�G���Y�/{}O��1��@"k,=�s��6���.���[k] = v  �attr_value = markup_�map.get( �*) 5��if not self._matches/�P�!, �_against):E��K�(� = False&�Ybreak�"ifc�Z������q�Ufound� T���Oelse9�F_nam�2if X�2andPtext �'*�.string,.� %&�f = Nonh�breturnl��searchTag = �Q_tag �3def�(��h�# print 'looking for %s in %s' % 9�V��%�# If given a list of items, scan it`�!a element thatY�#5es.�`if has(, '__iter__')c isiEance'�0(Ta`7tr)��$r �#in7if V�1�, NavigableS5) \��]����z.(C� �1�U&�U� rqit's a �make sure its @ or <bibutes^\# Don't bother withI�s if we're@�>�*el";Tag�.�D���� ���s$�J��c�� 2thetm]� /or�?str�����LP��b�� V��6,�1raise Exception(+��F���A"I d know how to-  a %s" %..__class__ c���������4tch , already_tried=�0u"Mj�%%s�� Y��Msulti�)�Wtuple��?# This should only happen whenEa multi-ud.�J���# like 'a'p����T�1ies(item, match_against): ��return True �$# We didn't match any particular value of the multi� E�attribute, buJ�cybe weS��;�"�R�KwhenE�considered as a string.&�if self._�es(' '.join(markup),t� �FFals$}�*ifY�1 isN o#�*�!esLnon-None� �s���R�@not -� x� isQance(��, CallableW�t Z�# Custom cF�as take1tagn argument5all�# other ways!�2ing�G�AnameL� original_�"= �b�9if ��[, Tag�7� @�.s��PEnsur�Bat `-�`n!ei�a Unicode�A, or5�# =}normalize_search_c5���n�� .��0!ess�!, , an empty��@list �vd so on���H,pif (has O'__iter__')�U�@and R� \,�)�Z 'rbked to� ���l���=Zitems�0# Tst must bQat least X�;�0 inq �$# a�. We'll try each7� n  t� B� To avoid infinite recursion"ne�keep track ofC���s we've already see!ifb!�j_tried-@��*t(0for�m M��^tem.__hash__"��s�Qkey =S� ���Oelse5� d(� ��9�0if W�$in�L�kcontin�F�� F.add(key��E� P ,�,� �H��rh�.�_ �# Beyond this pointumight need to run th e test twice: once against �# the tag's name and*� its prefixed#�.=�match = False� �rif not &��X� is|�Qance(�_�u, str):9���p# Exact�2ingE� !�B� = markup = �4tchT� -� �|hasattr�'search'�oRegexp��breturn�x�.F�(�)�(��� original_C�m, Tag)1� &�. �1Trywhole thLvQ with�:� ed ���aself._u9es(��&�� + ':' + ��k,9 S�8�z� class ResultSet(list�F"""A� is just a $� that keeps track of�SoupStrainer�)�created it."""�def __init__(, source, r�3=()��esuper(� , 3�%).B�4���.Q�# =Z�>y�0get|�7keyn�raise AttributeError"�� object has no V�5� '%s'. You're probably t 8Iof items like a single�. Did you call find_all() when�meant to"�()?" % keyg�<) """Testsbuilder registry import unit�warnings from bs4 *�Beautiful�.^�&�9�_s�5 as�,��HTMLParserTreeB���R�, ) try� |��T�5>�i� 5LIB_PRESENT = True except I]�E� 0����GLXML�bForXML�����>�� ,���W4tInF�(.�CCase 6�Et-indefault #�4derBeredX���v�U_combinationl��$�8if �}����assertEqual(I.lookup('fast', 'html')B�>� ��i�" �NT: , self.assertEqual(registry.lookup('permissive', 'xml'), ;��LXMLTreeBuilderForXML)4�s�`stricto�/htp� HTMLParsers�m�0if "�5LIB_PRESENT:������O5lib��T�r� �s�def test_[_by_markup_type(�)�0if I�)1'),D� �����s�  F� Felse��^�Z�FNone�;��%�`����@�����^�� named_library<l3�q�`��:���~  ��g�!.pb �i beautifulsoup_constructor_does#�with warnings.catch_�(record=True) as w# This will create aL� about not explicitlyv���# specifying a , but we'll ignore it. 9�You can pass in a 9ng.(�BSoup("", features="c"# Or a list ofR�sS�[T�, "fast"])�P# You�get an exception if BS�p't find�appropriate�2# b�_Raises(ValueError, �U no-such-)`") cleRTest(unitn.�ACase\ """Test th2e TreeBuilderRegistry class in general.""" def setUp(self): �����$.rA�=W�&()D�br�_for_featuresS�3, *�f_list)b�cls = type('�_' + '_'.join(8�,����(object,), {'/�Ss' : �s�}��8� ��uer(cls)`�return cls�Rtest_er_with_no�� 1 = p�� �# Since the ,� advertises no�s, you can't find it��# by looking up4�."��assertEqual0ry.<��up('foo'), None�4But�}��s� doq�a{�0up  �%if�# this happens to b�Tonly `$ed�$),:����s_makes_�_succeed5v, 'bar'Jq?�t���fails_whenk�_implements�^;bazn�qgets_mo4ceneration�_specifi1 (24� 2� Stree_�s�~�qx��_supporting_all31has_one5 f5�3�the_other9� n�9�both_early:� �s��zH�?lat�@�r', 'quuxH�Olack�����1�# There are two builders featuring 'foo' and 'bar', but �# the one that also;�es 'quux' was registered later.B�self.assertEqual(�0�ry.lookup(�,�%),=���has_both_g�%) *��# There is only�b���g�,�/z'�)N�/z'�Searly�def test__fails_when_cannot_reconcile_requested_!esG%):��A1 = a�F_for4� f ;�2;�5z');�k�X0'ba( None) "Test harness for doc4s." # pylint: disable-msg=E0611,W0142 __metaclass__ = type __all_�[�'additional_]�']] import atexit���os #from pkg_resources ;�0( #�_filename, �gexists�listdir, cleanupV�)u�@unit� DOCTEST_FLAGS = (��.ELLIPSIS |�NORMALIZE_WHITESPACE#�REPORT_NDIFF) n� +2():�Q "Run 1docs (README.txtH�docs/*, if any �#)"@��!Ss = [��>os.path.abs�(;N('bs4', 'r�C'))]C�,ifm0���'��l��f�6 in6�7��;�0if :�.endswith('� _��(��� �s.append,�� � �� $� ���a/%s' %�3)))G�Pkwargrdict(moduMlative=False, op!iflags=rF�#.?er(�  return .hSuite(�.DocFile!�*�$B, **���"The beautifulsoupQ." ""Hs to ensur�lxml tree � generates good�Us.""" $rewarnings tryg�W� .eX�wLXML_PRESENT = True�VERSION = 6�.2�� except IError as er�*�\�]�(0,) if 1�C�from bs4.builder import LXMLTreeBuilder,�ForXML from bs48�( BeautifulSoup,�Ftone�)C�.elementK�0Com�, Doctype, 6�Strainer7�ttesting7�jskipIf�s��,�_htmlparsE��)HTSmokeTest� " ���+��b, ) @ �a�"noj_PRESENTY�"lxml seems "�to be present,��ts tree b.") class�(� �!):"""See ``(�q``.""" -�@property�def default_�S(selfZ��#lreturn�"()O��A�out_of_range_entityJ��Y�p.assert�bEquals`�i���"<p>foo �;bar</p>", "��)��;�^�x_�K^�� �@3ies_in_foreign_docuZ_encodingR# We can'this case correctly because by the time we��<!# hear about markup like "“", it's been (inY�) converted intoR�Pa strI�uu'\x93' �Epass � �Q# In Q< 2.3.5, an empty dY �s a segfp. Skip �E��0 if<�old version of\�is installed. ���W1 or_VERSION < (2,3,5,0) �6�"�p���Ofor �to avoid�"����T qsoup = � �("<!DOCTYPE>I���� = %�.contents[0]/�#�C�D("",q.<p()b7s$�[�_is_xml_ �# Make sure thatpdepreca;BBSS @uses�Y�#Jone ����with warnings.catch_�(record=True) as w;�8�����' =���h("<b /S00<b/@str(H�2.b)Y�3elf�|�&("rc��6is @" in@ (w[0].message)) @skipIf(  not LXML_PRESENT, �"lxml seems"�to be present,�esting its XML tree builder.") class[�XMLTreeB�SmokeTest(Soup �,@�"�"):�""See ``HTJ�q``.""" -�@property�def default_�S(selfZ���areturn��ForXML()U��G�test_namespace_indexingO�# We should'rack un-prefixed D� s as we can only hold one���# and it willrecognized<�4the�V� by soupsieveK�which mayI�aconfus n some situations. When noU�is provided�for a selector, �!(ipined) i�bsumed.\���� =I� f. �(d���0'<?version="1.1"?>\n'&�\root>�tag xmlns="http://un-.com">content</tagI�5�:P�:�W��/edU�/�)��.assertEqual�%.s�1{'xml': '�www.w3.org/XML/1998/�B', '�2�� �%'}�P) """s to ensure thathtml.parser generates good 4s from pdb import set_trace �qpickle (�@bs4.0�M&;� k��1�P� .� ._���Beautiful~�G� �Y���)�#�"):$%�X� �d_system_doctype�can't handle*#d =��s, so skip this.?Bpass�[d_public�bB_is_qd"""Unlike most?s, r��be restorQfter n�6ing$�����) !�"<a><b>foo</a>"8dumped =Q� e.� s(@�, 2) loade d = pickle.loads(dumped) � self.assertTrue(isinstance(1�ed.builder, type(tree�A))) I�def test_redundant_empty_element_closing_tags(s�!):9������SoupEquals('<br></��� �', "<br/>�"� P�C���D�"� � �0# This verifies that any buffered data present when the parser ���# finishes working is handled. "foo &# bar", �Bamp;�) class TestHTMLP|�1Sub�(��~\error�@"""V�y�Four S�# sT�B imp"s G�) in a way�+doesn't cause a crash�5"""0�3 = Beautiful�~�(+�.{�1"dob��Z�`") """�s to ensure��html5lib  � generates good� s.� import warnings tryfrom bs4��+���b5TreeBA��LIB_PRESENT = w except If�ER as eb�5��eFalse {�h{��.Strainer%��oe�& (��PSmoke6,��J��skipIf, ) @ �E�1not5�@�"h`seems *�Tto be(,�1est�!tsu3.")#ibM�,��"""See ``L�)�"``�@propert�+default_- 0mreturn�(eSsoups D��d%e 4 �@-1sup�sI\�8 = "�7("b%markup = "<p>A <b>bold</b> statV.</p>Ewith�t.catch_(record=)w�I����1 = � �(�,d_only=�/�m�P����H�.decode(),\�Adocuq3ford�3� ����"�{�Yzu�I" inF�str(w[0].message��A�_correctly_nested_table&""; ins <tbody> F�`re otherF1rs .""" markup = ('<table id="1">' �O'<tr�"<td>Here's another N�/:"G�j�2j��U�foo</td></{�1'</j��"�Ud>') q�self.assertSoupEquals(����,� p<tbody>�����\� -B�;2">B� �#</_�� I�� )��"u><thead�F^"</� "�6Bar� 3�Efootf�9Baz3���"�def test_xml_declaration_followed_by_doctype(%):0 = '''<?xml version="1.0" encoding="utf-8"?> <!DOCTYPE html> <h�1 <!U </ ��n�1<p>!p>!��!</C�&''!soT��. �(�)D��x$ # Verify that we can reach the <p> tag; this means�tree is connected.R�l�6(b"�0", �1.p. <e()vreparented_�dd<p><em \n<p>bar<a></�gem></p'�z�1 = �� � ��!"<Zi�V��y�@\n</��{��!</?��� �a.decod�" �v�p2, len(�.find_all('p')-h ._ends_with_whitespac�/p>C/\nRE=�f�:>G:�}Fcontaining_identicalQl_nodes;"""MBkeep06wo  ?�1 inW�ocument distinct whe1ingL�Padjac'��s&""B�w 4��0ims` �_11, g2]6oup0str6' 'rh1, tbody2 = soup.find_all('tbody') �assert space1.next_element is .�1-��2-�D�!2 .�&def test_reparented_markup_containing_children(self)::���*� = '<div><a>aftermath<p><noscript>target</�'�/a></p></A�'Z�psoup = p�. �(k�K��%�Doup.�B�Belf.6rEqual("�4", ,� [�(�Y�find(string='�"')=�u# The '�"' ,� was duplicated; we want the second one.�efinal_H��W�=j�\)[-1]�uA tag�moved beneath a copy of�0<a>(�,�B# bu���Pithinnstill connected toL�C�(�)�� ", n ?�.�+, G�.previous0�pprocessinstructioE"""P)� )�s become comQs."""b��' = b"""<?PIT PIContent?>/����� � 8�( �).startswith("<!--c�C-->"�@clonultivalue_node�a class="my_ � ">//a�P1, a2���h''a a5�  a1\not arfoster_?ing� table><td></@/>A � "<4>A<Z��4<tre�j�ro�#</)�/)�0", . �@.dec)_extra0Tes6at 0�` does 8pdestroyf tree.Zhttps://bugs.launchpad.net/beautiful�/+bug/1782928w��? ���<html><hea� <style> </ �<C$</ �� �pRhello<�/O� Z��s�����aH3[s. () for s inr"('q�5')]�.� ���-�.� lB "p")), 1) def test_empty_comment(self): �5""" �Test that 3� 3� does not break structure. ;�&https://bugs.launchpad.net/beautifulsoup/+bug/1806598>��� �markup = �#<html> <body> <form> <!----><input type="text"> </#�/2�/A��L�� �0 = . �(r�)�X�es = []�@for [�1 inA�.find_all('�'S�h�C�.extend("�1��&')q��assertEqual(len(&� s), 1) # -*- coding: utf-8�@ """ds of Bh Soup as a whole.� from pdb import set_trace �tlogging�@unit?�4sys �tempfile U�4bs4U�(0���,�Btone�)?�@.ele G� CharsetMetaAttributeValueS�oConten��d�Strainer1�NamespacedO��)�bs4.dammit�� �EntitySubstitutionI�UnicodeDC��"EnDetector, ��`:ing]����@�`skipIf6�warnings try��tbuilderP�LXMLTreeB�+, �aForXML�� _PRESENT = True except I0PErrorUet� 1��False PYTHON_3K�_3_2 = (sys.version_info[0] == 3 and � < (3,2)) class 2Cont4or("):}#dershort_u_= @dataX<h1>éé</h1>�  �0�"D�"",h1.string)�embedded_null� foo\0bar�-H��xclude_es�Putf8_�5�Räksmörgås".5�1e(""&�|]E�<, eq�#=[@�]A�Helf. "windows-1252�original�() 2W%/P_no_p[r_specified�, s, is_there=v.startswith(.NO_PARSER_SPECIFIED_WARNING[:80��c�,(v2?_if���� 3��s.catch_warnings(record=True) as w: �soup = self. �("<a><b></�Ea>")/�0msg*��tr(w[0].message �D�_assert_no_parser_specified(msg) O�def test_warning_if/�_too_vague(Z�)�Swith :�ts.catchI��/, "html�L$no���explicit�a.�.Equal([], w7�OnlyThese_renamed_t __only�\ �=SoupStrainer("b")�J*("[�Q" in [41�-� a#b"0", .encode()uPfromE��Es��_5�?ingv9utf8 = b"\xc3\xa9"� O�0� , �� ="�_|N�M z�/� 0� }�yoriginalA�  unbgnized_keyword_argumentzyRaises(>TypeError, , ", no_such\� class TestWM s(��+):odisk_file~ ���handle = temp� .NTemporaryFile(;�"��0 = �>�.��6try�+)�@����z��s���'�Z �$�/looks like a��X�7fl���@clos'Q# TheQ� no longer existsl Beautiful W will)�issue the.Y� Mtch_warnings(record=True) as w: �soup = self. �(filename)'��assertEqual(0, len(w)) %�def test_url_warning_with_bytes�(E�){��� +�ts.catch:�s�S�FlistC����� �b"http://www.crummy�f.com/"�# Be aware this isn't the only� that can be raised during���# execution..���.3�y(any("looks like a URL" in str(w.message) B��\�Pfor w%�)_uunicodewH# note -<url must differ fromKz one otherwise�# python'sqs system swallowsH�dsecond$� C� � j_and_spacer is great'%_Falsec(^E�SD(a class TestSelectiveParsing(Soup�+):Rparse�'strainermAmark"No<b>Yes</b><a>�2 <c� c>�"R�0 = �Sa�("bC ap�", ��o=I�yt��0.eng(), b"�� �7) 6EntitySubstitP(unit.=CCase"""Standal�"�As ofF� 2.""� \setUpe�nsub = E� simple_html_s��E���2# U characters corresponding to named HTML entites �# are substituted, and no others.*�s = "foo\u2200\N{SNOWMAN}\u00f5bar",�elf.assertEqual(s�Fsub.j�_html(s),6����p�∀r�õt�!) @�def test_smart_quote_m�1ion�!):-��b�Q# MS -� -�s a common source of frustration, so weD�give them a special�7]�= b"\x91\x92Evx93\x943dammit = UnicodeD�(?�)s�IZQ�.markupf ‘&r�bfoo&ld��"dxml_convers_includes_nozs_if_make�d_attribute_is_false�_'Welcome to "my '�xT0, Fc�O), s���^ing_normally_uses_doubl�s�� ��"�", True '-�'j�Bob's Barl�CBob\0�' Esing_when_value Otain9 ? = 7S"l�\N?\"'escapes_�Dboth'�?and> �� L�si[�U&;t��"�+�es_arent_di@t_beA-�d,��#�T�"),d�#���Phandl�bracketm�k<bar>"��0<O>o �mpersand�"AT&T"), ��amp;T") de f test_xml_quoting_includ �$ampersands_when_they_are_part_of_an_entity(self): ���.assertEqual(��!��sub.substitute�("ÁT&T"),4�e"& �� �AT") $�/de�Ognor�`_contain{�?ies� � �es_not_html_6dx2"""There's no need to do this except inside attribute values.""" ��text = 'Bob\'s "bar"'m��� ��(H�1), � class TestEncoe�Conversion(Soup�H#(� Beautiful �'s ability�0eco�@nd eP�e from various�#�Vings..\setUp=`super(�� , �").9�)[� 0uni�P_data5<�><head><meta charset="utf-8"/></�*body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</3�"</?�/p� Dutf8���.�./(��# Just so you know what it looks like.� �� /b'&'xc3\xa9  ascii_in_<out# ASCII input is ctDU�. The original_G5ing�#W`is set'Q', a e��"ofp�M�% dewbs4.dammit.�_�h�logging.disable(�vWARNING6try��-noop(strj���return Nonem���2# Dk� det, which will realize t,2the��ET��K��0 = � x�Q = b"&a�Q�Psoup__.���(��/�� = )�@�!.dW(6�S�True(isinstance(Q� , Z=� 3�elf.document_for��)P� � .lower(), "utf-8") finally: ��logging.disable(�yNOTSET),�bs4.dammit.chardet_�3 = � 1�def test_unicode_in �out(self)�2# U(� input is left alone. The orig�_encoding attributer���#9�not set.�soup_from�0 = �.��.�Fdata�%�assertEqual(1� J�0.de�2(),R�H�& foo.string, 'Sacr\xe9 bleu!'� '|, None)?tf8FTF-8converted to|#�?tf8���?tf8/�E� ����internal � structures can be ped as UlSv�('utf-8'����+@skipIf(��]PYTHON_3_PRE_3_2,� "Bad HTMLParser detected; D�p�  of non-ASCII/acters in&bname."g_�_containing_~O�\markup = '<div><a \N{SNOWMAN}="snowman"></a></#�'�2elf8W�c).div.R"utf8"), � �) class TestDLP(unit".�CCase�"""StandPsE =�L."""L_L"I'm already!" z�(��" .8�^0�h�smart_quotes_to� b"<foo>\x91\x92\x93\x94</��/�����`u2018\�9�c�d�� xml_entities�<eDammit(mark up, smart_quotes_to="xml") �self.assertEqual(��!�dammit.unicode_markT�"<foo>‘�9�C�@D;</&�1") S�def test_��_html_entities(�!):2���r�B = bt�\x91\x92\x93\x94d�0��B = U�D�(G� ��}�4lsquo;&r�"ld���  _ascii]u�."C''""�"detect_utf8���Sacr\xc3\xa9 bleu! \xeo8\x83" �I�.originawcoding.lower(), 'utf-8'D� '�e�N{SNOWMAN}') convert_hebrew�\xed\xe5\xec\xf9<�, ["iso-8859-8"]� J�!a\u05dd�5�c� e9�dont_see"qwhere_t�are_none1Rutf_80343\202\261\ �S3\274 �V2\277 �44 WatchVa�E+X#0.enz!("?�C"), � ;ignore_inappropriate_9�cR�"_data = "Räksmörgås"m�d�F VK�o;� �_valid�8for bad_jing in ['.�p', '...�utF---16.!']_��>�� exclude_encodings(sself): �# This is UTF-8.�utf8_data = "Räksmörgås".encode("utf-8") 5�# But if we excludeX� from consideration, the gues�C�Windows-1252�dammit = Uni~�D�(�,q�_�cings=[�%])_�self.assertEqual(X�.original>�.lower(), 'w�'�;And�Athat� reUno valid�oat all����8, "�"�!c, None�def test _detector_replaces_junk_in#�name_with$��ment_character(}|R�`ed = ERCingDe�(b'<?xml version="1.0"  ="\xdb" ?>'!�s = list(q��#-� '\N{REPLACEMENT CHARACTER}' inx�s*^�_html5_style_meta_tag��Afor ]=in ��B� ><<� <set="euc-jp" /></ �%',M��&1b"<� 7�'7�'7�"7�6�5�"�"��h �A, is7V=True�7��3  ��"�", c� last_ditch_entity�[���!a@docu �tains by0rV��# completely in �patible &P�B(ie.U"ed�some otherJ�)?#�PSinceSynRstent^�@ing x�w�#Y", [ will eventuallyE�c� ?�# a8�8and+� �� s:�d@�n�!If0det`installed, it��c�`can beverted into ISO-8859-1out errors.$whappensH� toG��e�Rwrong�`ing, b�ag�*si@, soL��a we'rex�here won't run Soltemporarily disab�l&�f it's present.# �@doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?> <html><b>\330\250\�!2\�61</b> <i>\310\322\321\220\31 �355\344</i></hN�5"""�pchardet�s4.dammit.�_�,�logging.disable(�WARNING))�Etry: ��def noop(str)���return None7�b�! =O� -�� = UnicodeD�F(doc��m�self.assertEqual(True, ?�.contains_replacement_oactersK��E�("\ufffd" inP�u�_markup) �soup = BeautifulSoup�0, ".parser"u� �E��kfinall jNOTSET� �(2det���test_byte_order_removed( # A docur writtenI� 16LE will have its V� V� \er stripped.d@data�'\xff\xfe<\x00a�>�C\xe1�9��/ �$�'N��a6ata#�� ^"<a>áé</a>"g ���?�utf-16le<�@orig_/VqdetwinglecHere's a UTF8o2 utf8 = ("\N{SNOWMAN}" * 3).n�0e("#�"s���%# U�Windows-1252]��w�_�5= (Z�G��r�LEFT DOUBLE QUOTATION MARK}Hi, I like f�!@�ORIGHA�"�� �)Through some unholy alchemy, they've been stuck togetherA�$8 + i�0 + ���U# The& can't be ped into&-80tRaises(e`Error,S�!.d�/, �3�", !p thinks�W whol�$isl,b# and d�Bs it�x"☃�?“�8”%� �"But if we runa�tfix_embedded__, it's fixed:K�� .! _��C"☃��☃☃☃", fBixed.decode("utf8")) def test_detwingle_ignores_multibyte_characters(self): ;���# Each of these *� has a UTF-8 representation ending�E�in \x93.� i9�smart quote if interpreted as;�Windows-1252. But our � knows to skip over �B���@, so�y'll survive� process unscathed.F�for tricky_uni~�_Q�U in (%���"\N{LATIN SMALL LIGATURE OE}", # 2-sI` '\xc5'D�UBSCRIPTN�rETTER XK�3K�oe2\x82O�dxf0\x9�3�BThis1CJK�P, notPe whi&ne��)��input =0.en* �self.assertTrue(G�.endswith(b'�)))5�2outm�UuDammit.F� 4�i�bEqual(E�!, r�) class TestNamedspacedAttribute(Soup�+):Qname_may_be_none;@a = R� Q�"xmlns", None� �3a, +� a�_is_equivalent_to_colon_separated_string�a", "b"�"a:b", a�Xs_are�if_prefix_and_6identical�G, "c�bc� /�cb��# The actual ��%�considerc�c���{�R and 8 are importantd}� z-�2Not Ga, d�eR�M�� R�8e) VK ValueWithCharsetSubstitu3P(unit.lMCasep@contVmeta_vV� H�3 = j�JMeta�("euc-jpM�#�!, O� M�*�.original_9�7� /�F� tribute_value(self): �value = ContentMetaAttributeV�("text/html; charset=euc-jp")G��V�.assertEqual6� !, u�=�*.original_L�!Sutf8"�.encode("�)) # -*- coding: utf-8�+ """Tests for Beautiful Soup's tree traversal methods. Th�� are the main advantage of usinge� e� over just�0a pr. Different�s will build d� ��ps given�@ sam�rkup, but all��4�Pcan b��qed withD�� tested here.; from pdb import set_trace �Dcopy �Upickl�r �warningsN�4bs4N���+�.$er&�(�_registry,�QHTMLPQPTreeB;Ver, )M�@elemd�M�BPY3KA�SCData � om*� �Declaration�boctype ��NavigableString���`Strain��9Tag��`�$ ��7��E�`skipIf6� XML_BUILDER_PRESENT = ( .lookup("xml") is not None) LD�<�l=��p class Y��(�1): |Bdef >qSelects, tags, should_match"""Make suH atrags havacorrect .r�T0his�qused ins that define a bunch�Aeach��@�`contai0 a Ble s@ and�1n s�@ cer,� �Es byG� sowechanism.�5""" � `R[tag.m�1tag�Kags]2� c/IDfD ID�!��Teo['id']destFind("""Basice of f*�).�Qcalls&�b_all()h�limit=1, so it'� ed5thouroughly here. """ � def test_find_tag(self):��!�psoup = �. �("<a>1</a><b>2</b><a>3�4�&")=�elf.assertEqual(B�.k�("b").string, "2")�unicode_tex�� '<h1>Räksmörgås</h1>'��)='@�L'), ��attributeI�Y id="p�">here it i�!tr>�">�6", k-id�.W+) ��_everythin """Test an optimization that <�s all tags.a�?���� C3foo5?bar7 a2, len�>|_all())4���_with_name� E� a given M�� `<a>baz� �1'a'�`class FindAll(Tree�"""Basics of the.q method� 2alll_nodes1�You can search]�tree for N 9�_�����html>Foo<b>a\xbb</�# Exact match.�R�Q�"bar"), [ �]3��R��=4�=�0# M�@ any^a number �x�s���;oupv�["Foo", ~�]� �@�� regular expression���re.compile('.*')),e��� � �0, ''Y�n;�OTrue~�!�_limit �items returned by: aa0a>4@<a>5�L{SelectsQ'a', �!=3 12�'3"vI�t=1), ["1"])# �self.assertSelects(��#�soup.find_all('a', limit=10), ["1", "2�3�4�U5"]) F�2# A3� of 0 means no�.'��&� def test_calling_a_tag_is�������&):�`oup = �.  ("<a>1</a><b>2<a id='foo'>3�V/b>")>�]E�GF]6�.b(id="foo"4�3f����`_with_�_referential_data_structure_does_nouse_infinite_recursion(B�  ���# Create a5-�V listel = [],l.append(l<4Without special code in _normalize_search_value, this would cause i�j�%# ����Equal([], #l)�presultset3t"""All  s return a R8�eSet"""��v� t gq��&� '"a�True(hasattr(<�, "source"��� X��@���Y�)Ctext�6_� class TestFindAllBasicNamespaces(Tree�"):d Dby_n,�d �  '<mathml:msqrt>4</�<a svg:fill="red">':f"1�c�g(("K�").string@� 'a"(ds= { "�A" : �@ }).�?) 5�y2,� 0""":P ways�2ing tags by� S�.�]]setUpFEuper`�,").+� �qtree = #�t"vPFirstt�. o����<b>Second5�/b>5�c>Third <a>Neste>�#a>G�_c>""" !byZ�  #  all thea��R�s��-��� 'G', '�'def test_find_all_by_name_and_text(self): ����.assertSelects(��#� �tree.find_all('a', L�='First tag.'), [�5]) I�E�e�%LTrue]�, 'Nested|�l�6re.compile("tag")),�� ���-�def test_2_on_non_root_elemen�# You can call ;� on any node, not jur0he N�.q���c��cM?���4ing�_invokes�E}��/r�,:������tag_strainerx��Q���TSoupSO�?'a'�W���js�0[cO'b'] SSeconU=dic1p{'a' : D, 'b �}�H/red0'^[ab]$ n"�with_tags_matchWlmethod�define an oracle 1� that determines whether0# a i�es the search(�"id�!esG(tag�`returnM�.1 == �get('id'�`1 = 9soup("""<a id="a">M�u 1.</a>��a� �3�q1">Does�:�bm�bm�23�7"""����# �"�3", �42."w�Tmulti_valued_attribut9oup= $ "<div class='a b'>1</div>�Fc'>2� class='a d'>3</div>" �) �r1 = soup.find('div', 'a d');&�2&�re.compile(r2�3�X3, r47�D_alla�T['a bi�]7�self.assertEqual('3', r1.string�)�2)�1R�3)�{�4)� class TestFindAllByAttribute(Tree�1): 2�def test_b�T_by_a/�`_name(�!):/��# You can pi� in keyword arguments to Q�� �search byE�e�.�@tree�g�6(""�{� �<a id="first">Matching a.</a>T� 3�� �7�second")�d Non-mW�/<bn�vb.</b>a��c� ���?"""Selects(��Q(id='�?'),�G ["�6", �_b."])Gutf8Mvalu@peac"םולש".encode("?��data = '<a title=3�>�'9�� �J�)o[1�F.a],1v��)?�+8.de�N�'[�, "sometelse"]) __dicty a (�ionary as the 'N�s'. ThisH# lets you @for +�!s like '' (a fixeG�q) and 'y5�reserved in Python.�  �� ="�11" j�!="�1">Name �B�H�2H�A2">C:I�3I�53">:� ��1>A tag called{R1'.</�s V� �V�# doesn't do whatFwant��x�=1��}������ want. % self.assertSelects(tree.find_all(attrs={'name' : �1'}), �["Name match."]) -�q�Qclassr� �2s�C2� t�def test_�A_by_R�(�%):���" =soup("""� �1<a F�R="1">z�o1.</a>3�23�23� bf�bf� c3�43 4h�3 and 4.</c;�7"""# Passing in theW�_ keyword argument will search against#:�'+ ' Meibute.!�� C'a',{�='1'), ['p?'])I�cI�3I�O�'4O�� NPa strZ@to '"s'FDalsoK�s2CSS�1**�m/, y('r4'k _when_multiplees_present B<gar ='foo bar'>Found it</gar>:@f = NFU"gar"�re.compile("o") Tf, ["k� Jf�af�&A# Iffails toh�individualqs "foo"t "b�i1be tried3K�combinedI�H��n"��?o b�"d5 with_non_dictionary_for_+\sY�M Y='JaH��L�WHa", Nb2def big_attribute_value(�G): �return len�U > 3 #�#self.assertSelects(soup.find_all("a", big_attributm�f), [])I�def small)�w�� /<=� �����}���"Found it"���Ttest_�_with_string_for�s�s_multiple_cl) s(4f��0 = �. �Q('<a ,��="foo bar"></a>��%')�Pa, a2H� #�c���rtEqual([5�0], x� �|�/")=� 9���)# If you specify the�b as a 8 that contains a�# space, onl<�#atI�1ic  will be f.9����m_n�"=� �"bar fooo�Jdef a!byTA�strainerN@tree��(�A("""T�� �<a id="first">Match.k2�second">Non-m7�7"""� = SoupS�� s={'id' : '}�/'}jT��Y��'�' @miss�|�E 8# You can pass in None�heof an C�D to vb# ThisR tagsdo not have�L�6setD��� 1">ID present <a>No 2�g�is emptyh�B\@'a',I��F�"�L vdefinedb�2any~Selects(  �tree.find_all(id=True), ["ID present.", �is empty."]) B�def test_B�_with_numeric_attribute(self):4��8�(# If you search for a number, it's treated as a string.@���0 = V�.soup("""<a id=1>Unquo:�~�U.</a><��� �<�_"1">Q<�3"""��9�Pexpeca�O= ["n���. G�%"]��assertSelects(� D1), h�):�9"1"<�=�Flist~_valuesYou can pas�6�E of  9� instead�just one,��6P# and 'll get tags that match any:�3theP��1O2">2,�O3">3,�a>No IDa%����0["1+3�F4"]),�B� �-�{ regular_exLsionC��0resC�n,9youDwhos67theD�|�a">One aO� C�aa">Two as3�cb">MixOnd b<�b�bm� S�Tre.compile("^a+$")W 8�by_name_and_containing_ T� <b>foo</b><b>bar �a�6a>"a8�Roup.ac��Equal([a], e�5"a", text="foo")R�>�=� ?bar=�.d? S_whenK_is_buriedUA <aX"<cQ!c>_/Yself.assertEq ual(soup.find_all("a"), �, text="foo")) def test_=�by_attribute_and_containing_string(self):<��@��e�0 = �. �('<b id="1">foo</b><a�2�Ea>')?�a<���a�S�elf.assertEq�<[a]�Kid=2��;���� ?�>�1>�@bar"  class TestIndex(Tree��0"""�` Tag.i!�1"""�dD� "@tree���""""<div><�����<a>Identical</a-� vb>Not i1�b1� ^�� Fc><d� with child</d></cm� b>Also n�u�,/c6"""l`div = E.div�for i, element in enumerate(div.Cents g0i, /�N�)~�0�Raises(ValueError,�;�I, 1)ParentOp�Oionsnavigation and searching through an�A's pV� s. �\setUp`super( �,").7� ��h a'''<ul empty"></ul; m6�Otop"/� 0�omiddle3�4�obottom4� <b>Start hereI��� (�''',Bsq�q�d2.b ;� �O�.2�['id'], ' z�<� C�|C�! J�Ltop'��_of_top_tag_is_\|_object�&�75[0]�9��.?� �}�_has_no_� � c�GNoneY�k� `�R�s(self): self.assertSelectsIDs( �#�start.find_parents('ul'), ['bottom', 'middle �etop'])H�D�g�'b, id="^�"t� ��c�d�def test_�(u�!): ������QEqual �.��['id'], ��G��' )Q�� ��S_of_text_element����1 = �Atree(�0="So here"��,�.m�P.name�+')��%��P �(��0S*X ��_generator' �Rs = [ �Q�R for �2 inF�s�)�%if3��s not None and �H�=�u.attrs]C�%'�s/ class ProximityTest(Tree �&):\setUp�Usuper/�,�").+� �@Jsoupa'<htmlu#"><head></�qbody><b!�1">One</b�g2">Two�r3">Thre$�/<�/a�>|���`NextOpYions(�:estG� ��&.bdQ=nex �L�.0�>_, "On 9� F�\, "2" m�of_last_item_is_none�!� ��v;6 �=� �,�Oroot�# The docu +� is outside the �/previous chain.XA�8�y� ���P_all_+� "I� "� ;�!'b"Two", "A  t�Kid=3s�:�6, [m�Bh�� self.start.fin d_next('b')['id'], '2') �self.assertEqual(�start.fin?��text="Three"), � G�def test_0�P_for_4�_elementV�!):/��y���1 = l�Atreek�(�o="One"��%�/��"b").string�/wo<�qSelectsc�>�6allB�, ["Two"C��+])��F_generator�0��Y��uccessors = [node for �1in J�.n�<%s]7# There are two sL� : the final <b> tag and its 2 contents.M�Etag,���M�#�5tagF3F o�Q) class TestPreviousOpOions(Proximity �&):~]setUppPuper(+�K�,W").9�y�0end�� � xz�p� ��C�,Dend.2�1/"3w?� P��S}w�of_first_item_is_none�"� _'html 8���LNone3�Oroot�P docu+ +� is outside�/P�f chain# XXX This7�qbroken!�k# R�", ��8���Kpass�<<� Z��aaining�9s�Rprede�1 of� 1�t�p, which�4why shows upD��,'�<�,��� ����>#!, ]bjb�U�Pid=1)V�� p�dV�i� > 7���' R��x��;�  q�*�������� ) self.assertSelects( �text.find_all_previous("b"), ["Three", "Two�qOne"]) C�def test6�_generator(self):'��m�start = �Q.trees�(}� ="T�)+� predecessors = [node for �1in L�.�_elements]��n�# There ar:�8ur T� : the <b> tag containing ��#'�1ody*�,�Chead�3and�1tml�.>�b, body, 0�, html =�+�Equal(b['id'], '1''�ody.name, "l�",���,�� �,� 5tml,�tml") class SiblingTest(Tree �"):x� ]setUpWuper(;�,")..��markup = '''<2>$��<span id="1""� #�`.1"></3�,�� �7�j�2j�2j�33j�3j�34 5'''# All that whitespace looks good but makes�gs moreB� difficult. Get rid of it;re.compile(r"\n\s*").sub("", &�=�YeTsoup(&��CNext B����� Dbnext_sa_of_root_is_noneS3elf3o�b�G�o, Nonei� Y��.Z�2 �=�J�#3'��a# NoteF�1�erence between � �I�Y��2�� 1.q��_[�may_not_exist<�J � ��qnested_q &1/.1 @�i�ibling, NoneC) �last_span = self.tree.find(id="4")+��assertEqual(<�.next_sibling, Nonec�def test_R�_'�(O�/):^�� �a.start� =�'�')['id'], '2'q�l�sm�SelectsID%�r�0s("s�5"),�B �['2', '3�34']��%��@u�id='3'), ['3M�U�`_for_t}element�4oupsoup("Foo<b>bar</�z�x�.�1oup([�=3�&��1��.name, 'b'I7���RD'bazK(D� g s({��@0barB�?���� A��baz"), "�E�nonesuchJ�% class TestPreviousSH(���"):�]setUpPuper(+� F�,").6� 0end���q�p�_of_root_is_� ��U� ��f� K�1 &�a�Lend.:�%3 ��?�P�u�P# Note the difference between �F and�.s�M�X �4��$.1"may_not_exis!�d�Mhtml ��onestedN?1.1> @�m�?fir j� =�i� J ?� �w� r�6 T�x�us_siblings("span"), � ['3', '2�U1']) 0�self.assertSelectsIDs(�end.find_previous_siblings(id='1'), ['O�def test0�_for_text_element`�&):�aoup = v�soup("Foo<b>bar</�6z")-�@tart.�1oup�([�I="ba&��pEqual(s7�.�.name, 'b'a�;� L�, 'Foo'] �W���T0bar�E� pFoo"), H;�I� nonesuchN�None) class TestTagCreation(Soup�0"""� the ability to c/�e new tags."""=new_tag#�4��("foo", bar, attrs={"a": "a �}True(isinstance(W�o, Tag)2�w�/�b.�Wdict(���=� ),*�.�H��.�fparent/� dtag_inherits_^ _closing_rules_from_builderif XML_BUILDER_PRESENT�@xml_ = Beautiful("", "lxml-xml5�Tbr = >�.BO("br,���2+�'p"�W�a# Both<br> and <p>~ are empty-, just because5�D���#C�y have no contents.%��LN b"<br/>",�br.encode(@�]�8�p7�p6�o�/hte ��h.parse<��_e ��f ��c � )� d# The HTML 2a users�1's R about whichzs{�#!�,�=+@ ref�"seX�~Bz�{5�Pp></p7� { 0str5s_navigable� :�E��� �'("s���@�I�ertEqual("foo", s) �!self.assertTrue(isinstance(s, NavigableString)) 9�def test_new_s�_can_create_n6��subclass(m�!):C���psoup = �. �7(""���Foup.d�@("fo�wComment,��TEqual)��R�1) � TestTreeModification(Soup�+):attribute_m0���� '<a id="1"></a>'���a['id'] = 23 ��8.decode(),L@docu!T_for(e�2e�f�Adel(@�j����Q'�g� �F��2�_'foo'�.!2=n�Q� 2tag���builder = � _registry.lookup('html')(���|� ("<body></�$",L�=T�;�qa = Tagg'�G, 'a/ol%�'ol&�ra['hrefI�http://foo.com/U�s��0.inV(0, a���.�W1, ol ���]���>�#enq �#b' a �+="�a<ol></����F� append_to_contents_moves_tI7elfdoc = """<pKDon't leave me <b>here</b>.</p>l����:�t2">Don\;�!,�5"""/��� 8doc9cond_par�,�.find(id='2@boldF�F��# Move th�q tag to�aend of � sed� d�igraph.� p�.Dw#.bi�Td�is now a chilh� ��p.parent|��r�� K���*X .��2�'1�&\n�0���4� �'replace_with_returns_thing_that_was_$�d@text!<a<b><c></�/b>X�8�YOa ���#a.{�� Equal(a, new0_a) def test_unwrap_returns_thing_that_was_replaced(self):;��?�text = "<a><b></b><c></�5a>"'�psoup = =�. �(8�)�a�Uoup.a�@new_�"a.�(.�F��assertEqual(a, /� ��_with_and�give_useful_exception_when_tag_has_no_parent�����2elf��1Foo� /a0Bar"����<��a.extract� None, a.�P�Raises(ValueError0�60�,!, �cP�%T it� #� � @d></��3@e></�,c~�,c,�.�/(c�00�decode(),document_for�%!_rM/��f.��b�  in6Rinto_�p"a.�?, 0� beautiful�_object_=�s_childrenA"""I�ing one BF�SF�  another actually \�E allrtof its o� -- you'll never combig�%s.G�/""# "<p>And now, a word:</p>�we're back.�k_�@p>p2%�P<p>p3 �w���W�:�A�oE(1, 3�"�for i in:descendants:��'� not isinstance(i,z�=�p1, p2, p3, p4 = list(.6�/"H", p1.string8� p2*�2*�3*�3*����7�47�� �_maintains_next_elethroughou soup = self.soup('<p><a>one</a><b>three</b></p>') �@a = 2�%.a�b = a.contents[0]� # Make it so the <a> tag has two text children.8�a.insert(1, "two") � # Now replace each one withc�empty stringR�left, right�W��!�.V�gWith('�8� �<�0# T�b�is still connected t�Ftree�pself.as�qEqual("t"",`"b.��def test_�_final_node(J�%):����`�� �("<b>Argh!"��`.find(!="$�$")�_T("Hooray!8�@new_2��e�8oupC�-��i�b����Q�.previous_element, b�7�/ar-� d�V.nextq��x� :�LNoneconsecutive3s# A builder should never create+J�7�%s,U# but if you ?��� another, Beautiful Soup willE�Qhandlcorrectly�1� = p7!"<-9�]P<c></�'a>0'{�U���zC�0dec"),�@docu%Y_for(.��2��o� ��/� �>��V�=�L�sibling, �!=�� �$, �?��_;�$�q��6� co� _' S�3���4� �(/l|0, "bar � fo����� were adda � ["M�t�]GP# Andyd�cconverWNavigableSs��P�.a�ents[0].next!_element, "bar") def test_insert_tag(self):��#�builder = �.default_�'�Dsoup$�� �(��E�"<a><b>Find</b><c>lady!</c><d></�Sa>", W�=�)F�Pmagic�@ = T�5oup/�1, ' �Otag'3��.�(0, "the"#���$.a �11, G��g�!��� as!ZEqual�C�decode(),%0ocumJ_for.��P� t�tag>the</�%)�+# Make sure all the relationships are hooked up correctly.Yb��Uoup.b� �(�.next_sibling.���� 8���d�.previous@�T��qfind = �. �p(text="8Helf.�+��w� 7�$��@��S��{c&S� *@�� ��bu��=)  �qthe.par���0���2� ��8�3theNsappend_child_thats_already_at_the_endsdata = CLE/a>"����� �(1���"a.{�(!�'.b��6�X�=�!, '� �?ext�e><f><g></�Sf></e/c>�Ql = [�g�f�e�d�c�)b]&3.a.�li��g�@/f><�e��c�o�1@move_to_beginning_of_s��4��Ad� �f�d���w� h��_works_on_empty� � # This is a little strang ince most HTML parsers don't%ow# markup like this to come through. But in general, we don't �# know what the parser would o �n't have allowed, soB�I'm letting this succeed for now.,�soup = self. �("<br/>")"�.br.insert(1, "Contents&��A� as#�Equal(str(Q�`.br), V�>7�@</brb�c�def test_`�_before(�!):"���J� �a>foo</a><b>bar</bT��-����1�#.b�U�H"BAZ�\oup.a$�HQUUX%� ��g�decode(),Gdocument_for("O�<�8BAZ�%) N�� ��b�1/ �'")�@# Cacp an ele� 0 itGb�Ob��Raises(ValueError, ?, b�t�f�`has no7ent�.extract(6�l"nope"� identical�Nanew_tag("a�[��Y�d"multiple�+Cq, " ", � �%�2� i �����D ;H Z"S� ��xb, "FOO��5�9FOO[��d_after<T� #��E 8��Y��B�a$�0���� es(ValueError, b.insert_afte�C) �# Can't "� "� if an element has no parent.:�b.extract()�pself.asm�Raises(ValueErrox� �l"nope"��n identical�i�aoup = p�soup("<a>"�Toup.ap�pbefore()�.new_tag("a")0�Y�def test_ _multiple8(m�!):*��=�N� �foo</a><b>bar</b� ("BAZ", " �OQUUX�!("$�1��=����}eEqual(��.decode(),:@docuA_forB��e�F BAZ�@BAZ �"4�.��a, "FOO ��"�|� �3<a>��k���d?!_r_exception_if�_has_no_meaning�0tag�r]�zItrin#�s�'("E�:�1��_, tag@�bNotImp#edG�?tag�� qtrFnoti�e�|%�}�~/H� >� replace_with;���"<p>There's <b>no business like�Ishow�8</p@no, $�find_all('b'Ihow.�'no�4 �L  ����� ��a���.!o, None�%no*�p,� Snext__, "no5�no.next_sibling, " $business") def test_replace_first_child(self):(�,�ata = "<a><b></b><c></�5a>"'�psoup = =�. �i(data)�4.b.p�Pwith('�'.c$�elf.assertEqual(t��i���Qa>", 3�decode()�/la�Pc��b� @/�nested_tag4 � vq� ""BWe<b>reserve<c>thed>right</d /a><e>to<f>refuse</f><g>8�ice</g></e>""E�b�2# RB the entire <b> tag and its contents ("��.��!# �0") ���2 <fB��1���5")./�`remove��Uoup.b� �f�6� 2�L��E���r�,document_for(.��2� Wm`�;�F# ThPis now an orphan ��parent, None2�find(text=".next_ele�K�previous<�!t�siblingt�" <�VdVconnected toae<.c�. 2� O"We":� � E� gap where -"us!be has been mended,c�word "to" ege0to_z'to g�gw� O� )>�m 6�'6�  �.N�p� :�r�:��lunwrap@tree7�tsoup("""  �<p>Unneeded <em>formatting</em> is u �P</p> 4��E""")��tree.em.unwrap(�self.assertEqual(*�o, None(� p.text, "���1") �def test_��|�!):���psoup = �. �("I wish I was bold.�Pvalue0�oup.string.W���new_tag("b")�E��.decode(), "<b>v�G</b>z��4��R�-�document_for(d� (_extracts�_from_elsewhere(_�D 0<b>����2b.next_siblKb@�LPputs_contents_at_the_end�I like beingB Hp2, len(4.b.� ? .��kJ ����A�dWH6��o���'<html><body>Some �. <div id="nav">Nav crap</div> Mor+�!</?�!</L�'�_�� Q6odyT6, 3�$edTfind(�#). �(h� ���� a f�'ed*'A %e# The C�@ tagmnow an orphan.�S2S 5�n�.parent 1�revious_ele;�7� �D��Agap - t"used to be hOeen mended,)_1J�*="5�25�� 5� acontent_1.next_element, content_2) �self.assertEqual($�1.next_sibling<�2.previous_e|�1|�@� �11) �*def test_extract_distinguishes_between_identical_strings(�!):D��psoup = �. �("<a>foo</a><b>bar</b>"�Pfoo_12�boup.a.W�O�6bar�b��<�2<�3new�W("fooc��E�'�7bar'���.a.append(\���b�R�:��# Now there are two + �@s in#� <a> tag, and*��#"�b"�. Let's remove<�afirst �?�he secondF�e"bar".���#1.(�����A�12, G� /�X�/�, fmultiples_of_same_tagT "" <html> <head> <scriptj �3 </�tbody> %�%���2 </'�&�4z</&�!</o�'""0["�G` for i�find_all(")�5")] :!"<�p\n\n<a>�#\n� ",G(T��%�?)) works_when._is_surrounded_by 2 'r0\n' ���3>hi��%'�� ('1�&')(CNone?;� �\clear,r"""Tag.�F)"""h�ap><a>S <em>Italicized</emf0ano8</p!# f�~ using `aa�$.p�� 62len#p.AWs), 0Y2�True(hasattr(a, "2�"$��� �decomposeem = a.em�a�.�=l��00, �&em�  O_set!�B = ' �' ( <b><c></� `2" =<� /�D1, [m]) soup.b.string = "bar" �self.assertEqual(so/�contents, [1�1]) 4�def test_P�_set_does_not_affect_original$�(d�!):?��v��b�0 = �. � ("<a><b>foo</b><c>bar</c>")4���8�#.c�&��a.encode(), bl�b�l�0</ap�K��e�preserves_class_of�^�����cdata = CData("foo�a�1 = -���;�6�True(isinstance9�,]�A)) � TestElementObjects(Soup�0"""� various features of e:�" o;�K."""<len�Q�he lengthF�%anI�is its number�children[������ +top>1<b>2,03</�"�3��# The Beautiful���q�@elf ains one�U: thee0# <W�U tag.�z*��.�Q��,G), 1 0�'�X��q��Ethre�s�� text node "1",� b>�P, and�.�/3"�top), 3�W.top.�3< me_access_invokes_find'�A&��a Python @�c .foo >� >�`'foo')��� &'<b><i></�?b>'��<�A.b, �`�?b')1�,.i3�.=�i=�|a, None1deprecated_�< /�� �with warnings.catch_�(record=Ff) as w�U ta-�m�U.bTag #�,z7@��\�'S�F is �b, use FP"b")  ead. If you really were look.Pfor az� called � J�yTag")',�str(w[0].message&��has_attr|5"""�) checks �Athe #0nce0an @�bibute.*�a�Please not�': O��ifferent from�x__in__.-�|��x�Vtag'so�s__in__  checks the tag's chidlren. �5""" �soup = self. �("<foo attr='bar'>")-��&�assertTrue(1�.foo.has_3� ('�/')3�OFals4�25� j�def testR�ibutes_come_out_in_alphabetical_order(�!):>���Amark�'<b a="1" z="5" m="3" f="2" y="4"></b>'9��H�.�SoupEquals(O�&, N�B�N�H� `�N�)�lstring�0# A that contains only a text node makea��# available as .e�� b>foo</b�bO�|, 'foo'�empty_tag_2no_}��pwith noldren has�.stirng� �LNone�����_multiple_chi��.�� aub<����d�?bar2'�`�# Even if allG-R are s, due to trickery,pit won'�rk--but this would be a good optimizatio� 7oup.a.in(1, "baraWq)recursive(� a singlea whicha inherit5�M <?</a�O"foo�#, -��slack_of C"""O$agEing ��. b>f<i>e</i>\�S�M� @all_��@Tag.�P and �4get,�p=u"") ->]Y,2catenated aa>a<b> <r> t </r></aS>") �self.assertEqual(soup.a.text, "ar t 0�@get_4�(strip=True)@�t=�1","6�_,r, ,y�[",", ~�0,r,��def test_�_ignores_comments( !):.��� 0 = �. �("foo<!--IGNORE-->bar�o�(�2foo4�e� ��.B� types=(NavigableString, C�)b��h��E����g�1Non8fooQ�8�L@all_t?ngsOO['foo', 'bar'], list�)) class TestCDAtaListAttributes(Soup�1): �0"""�ing cdata-T�% a1�q like 'U�$'.3��single_value_becomes_H�P1<a [�=� [}0"],3.a[�"])��dmultip�s�) g�0, "l?], �separated_by_weird_whitespace=\tbar\nbaz'��zU_joined_into_on_output�!� %b'"��t"></a>'m.encode(� �5�id='abc def� ['� t�V'id')o :accept_charse?r'<form 6�-6�="ISO-8859-1 UTF-8">'q� &['0��L3��N�a�*['b�'e+_� applying_only_to_one_tag�b0 =  c��0��� �L��# We saw in another that accept-charset is a cdata-list �# attribute for the <form> tag. But it's notB�any otherA�i�0self.assertEqual('ISO-8859-1 UTF-8', soup.a['accept-charset']) H�def test_string_has_immutable_name_property(p�!):7���2�0 = �.h�b("s").�'� �bNone, &�.b�),��(d�h�*� = 'foo'1�{�Raises(A|Error, t) class TestPersistence(Soup�o�" �ing features like pickle and deepcopy." ���]setUpPuper(P�p�,(").2���page = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD�)4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/t3�.dtd"> <���ead> <meta G�-equiv="Content-Type" c�p="text/;�#; U`=utf-8R�title>Beautiful k!: We called him Tortoise because he taught us.</D� <link rev="made" href �ilto:leonardr@segfault�����]="Descrip'� �"anXp r optimizedscreen-scraping.b�generator`�Markov Approxima� 1.4 (module: �)N�IauthK�L�0 Rii@dson2�"/hbody> <afoo">foo</a�<b>bar</b>�/;�A>"""s�0tre �� <$s_and_un �nidenti60# PR�i a c�,!#n 9���it, yielda�#� M�5cal�@# to9� original.�dumped =.�s.M�6, 2}Hload,��� s(2�&ed&��fi�'�Qed.__ZE__, O�:�decode()�.� ^w_S0Mak9aB of _JnFx1Acopio�2�;����e�_preserves_encodingo�Zoup =X(b'<p> </p>', '.'�V���&o�.��&� __�6__(I� l("<p> </p>�", str(copy)) � self.assertEqual(encoding, *�.original_�!) <� def test_unicode_pickle(P�!):#��b�# A tree containing U2� characters can be D�%d.?� html = "<b>\N{SNOWMAN}</b>"$�psoup = y�. �(5��dumped =]�.� s()�,�HIGHEST_PROTOCOL=�Hload=��� s(C�&ed&�{�D�'�Ped.de@(), o�� B�h_navigablestring_is_not_attached_to_H `��!Foo<a></�b><b>Bar* s1Goup.find(�g="Foo"�22 =��7(s1� Kos1, s2!�@None#�.parent*�next_elem0�#No]�!1.3�siblingc�0�previous�"�) subclass_has_same_typ?<!--Foo-->$��s3True(isinstance(s2, Compentire_0div </!�?end�n�_ ?oup�5�;��0tag�@ies_Oents�W4div5div0div�y�3div�� # The two tags look the , and evaluate to e �W�`�a�e Udiv, )�R�0But�qy're no �� object�`False(/isV�1AndV� don't have�Z�relation��Qparse.'X#1 is�associated with a 8� at all�f�0� s�>�� al(None, div_copy.find(string='Bar').next_element) �self.assertNotEqual(None, divG� class TestSubstitutions(Soup�1): l�def test_default_formatter_is_minimal(�!):1���markup = "<b><�"Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>�U</b>"X�!soV��n�. �(i�decoded = �.�(�#="�"3�# The < is converted back into � but the e-with-acute1�left alone.����)d,��@docu�a(��8�I3+))O_htm /r>}��Ex'\"<br/�!&e;�Q 5 R5SAR���\ u?nulu z�^7qNeitherangle brackets no� 06areb0 # This i#s not valid HTML, but it's what the user wanted. � self.assertEqual(decoded,"�)��4�<document_for("<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) [�qdef tesO�matter_custom(n�%):�markup = t�<foo></b><b>bar �Er/>"5�!so3��K�. �(F�)!��0 = �.�B(for� = lambda x: x.upper()?�# Instead of n6�l entity conversion Y,�~�# callable is �ed on every stringH�I�g� �1FOOc<b>BAR8 is_run_on_attribute_values'<a href="http://a.com?a=b&c=é">e</a>'����� � a�Roup.a;��expect_minimal = v�Iamp;z�{��t�  W�4, a\5�O#="�"�?htm�éA��Y����� � �0>�GNone3#  HTTP://A.COM?A=B&C=É">E�P��)dUskips_script_tag_�_ 0doco"O<:� type="text/java� ">Jconsole.log("< < hey > > "); </-�1> "R��A en = BeautifulSoup(doc, '�.parser').,�(~hTrue(bw�D" in_�Otyle"�9�?css�;� qprettify_leaves_pre# d_l_alone���� �("<div> foo <pre> \tbar\n �"</�fbaz "0# Ething outside.E�0tag�d, but everysthing �# inside is left alone. �self.assertEqual(��A�'<div>\n foo\n <pre> \tbar\n � </�\n baz\n</1�)',F�soup.div.prettify()) "�def test_� _accepts_formatter_function(�!):8����V� = BeautifulSoup("<html><body>foo</ �!</�@", ' �.parser')M��@y = V��� = lambda x: x.upper�������GTrue("FOO" in Y�� outputs_unicode_by_default�i���("<a></a>"��str, type(3��)acan_en�Odata�2_bytes�~"utf-8"��_entity_substituO_off&0markup = "<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>"�/��� �(Y�k#d$��"b.�(�J��C�)�2d, S�9�' '/�9ing "# Here's the <meta> tag saying that a document is�!# a� edShift-JIS �L�_K�A= ('Z� content="textY@; chRt=x-sjis" 'a���'http-equiv="CG�-type"/>�q�Y�:� �(�G�O�0# Pr���Q, and�� apprears unchanged�-��k�#['�_'], '�'�E4~�� into somee6ing�w� !toP�����Putf_8#��J�ez #b"=kI��Peuc_j@�x"�i [�+��N �]�s_L_jis��-9a c�.��f���U�i���L16_uh���p16").de3�� v� 3%164�^�q�i_doesnt_happen_ifM_is_strained o_head>usjis" '  �'http-equiv="Content-type"/></head><pre>foo</ �D') D� # Beautiful Soup used to try� rewrite the meta tag even if�I��got filtered out by+� strainer. This test makesE�0surm�at doesn't happen.$�F�! =�SU�v('pre')'���= self.soup(markup, parse_only=K�6��/�assertEqual(;�".cUs[0].name, m� class TestEncoding(F��!):�0"""�aabilitXe/�e objects into ings."""1def&_unicode_string_can_be_D� d(�n�� html = "<b>\N{SNOWMAN}</b>"��$��� �5� "b.�.y�("utf-8"),W��[� �"�"9��@tag_l0ain��n ��3,.b� , : � �1��substitutes_unrecognized_characters_by_defaultX�e("ascii"), b@q☃<�#mac�LyRaises(U!eError, �#, �0, e�0s="<ct"�!dei�OentsQ #�,b.�)��0�U s ��}.� B��*�@ing=M-8"�deprecated_renderR �� � �OreprT!soup = self.soup(html) �if PY3K:���self.assertEqual4�p, repr(D�)@�Oelse=� b'<b>\\u2603</b>'J� class TestNavigableStringSub�pes(Soup"�1): ��def test_cdata(�)�.# None of the current builders turn CDATA sections into CData`��# objects, but you can createZ�m manually.5��0 = �. �6(""-�" =h�W("foo��.�0.inuA(1, ,�;�Q�#sta, "<![�x[foo]]>U� f�find(text=�", ��7�contents[0]2�3� _is_never_formatted"""Text inside apP is p�d�J�"r.��0But1 rePvalue<�vignored6"""6�Icount = 0��M����increment(*argsQ�"�9�E+= 1:�����"BITTER FAILUREv�?oup <>�?oup �b�V� <> !, uencode(\=�� "1,D)Y�>doctype_ends_in_newline��# Unlike other % s, a DOCTYPE always [��s# in a g��B = D�nM� �O0), u�}foo>\n")`eclara} d�� "<?foo?>", d.output_ready()) ^�DSelector(TreeQpHTML = +$<!��PUBLIC "-//W3C//DTD �4.01//EN" "http://www.w3.org/TR/i4/strict.dtd"> <���ead> <title>The �!</�> <link rel="stylesheet" href="blah.css" j ="/�`id="l1g�"/ha� body> <custom-dashed-tag X"="� " :�� �1">HelloX]re.</:�a> <div1�Tmain"J�Pfancy{��Pinner�!h1.��� er1">An H1</h1> <p>Some �0</p�P�Bonep�2p1"&�8mor+�'h2X�2X�X2</h2G�class1 class2 Bclass3" id="pmulti">Another</p> <a href="http://bob.example.org/" rel="friend metK�bob">Bob</a> <h2`�uheader3a�q H2</h2e���:me"m�simonwillison.neto�bme">me]�Qspan �Q="s1"N��#�`s1a1">'�21a14��"�2"�02 <+��@s1a2S�`test</�>@�� �t�Opan2w�2w�2w�C� a��cpan3">_� <custom-dashed-tagh���� �2"/> <div data*� ="�tedvalue)�1ta1)�|�/div> <x�0xid�z �Azida+��b�c�a/x> <y4�yA�1��/y> <p lang="en���-en">English_&�9-gb)���,�= UK/�*us/�%us[�: US/�'fr,�fr">Frenc�K�`footer� """ def setUp(self):�����.soup = BeautifulSou)�.HTML, 'html.parser')R�assertSelectsZ�1, s� or, expected_ids, **kwargs|� el� = [el['id'] for el inH�f�.R��_�0ectY�K�]�K�v.sort()�� ���jEqual(�Q�,K��,"�Vor %s� [%s], got �Y" % (7��;����', '.joinq�7), ���A����8 = �sM#0pleJ@f, *�|��.�8��C��Z�?����F�_one_tag��2�^els =v'title'{�len(els), 1&�els[0].name, U�/�contents, ['The �]�Omany�?div�4���<oin els4div�l�/elj�'*���� l'main',  E�K�_returns_nif_no_match ���>�xistenttag') ' self.assertEqual(None, match) def test_tag_in�@one(<�!):#��'�Rels =Q�soup.select('div div')*��9�u�S%�s&�, ['inner', 'data1'])�Lmany�0for��ector in ('html�", �Dbody� ������� or��p, 'main��afooter��@\limit�g����4�i�`�1], >�/=1G ���?�� H�2H� C��,��6���01no_l1len$�.\}el')), \�pinvalid7�qRaises(SyntaxErr�e�, 'tag%t'� a_dashei�O_idsm�rcustom-9�P-tag'��1H� �2>�L= o�\by_idm��:�/ed t�`[id=\"2�O2\"]z�$�ed[0].name, 'L�>�a['id']=sh2Nf��O_tex& � !('/> �����U��, 'Hello there.� ces_find_alln�;�:(*,fZ�)� o`header!eMultiple(0('ha"['K��Y A('h2!�� �,�F3']),� �oclass_�P.onepp ���S&or�-��3gels), 5*�0els$p/�!['��!['� !�B_misL& G��7#�� ?one"�for selector in ('div#inner', ' �Fdiv �G): �self.assertSelects(s�bor, ['O�1]) 5��def test_bad_id*�fP�aels = R�Rsoup.I�('#doesnotexist')K���*�|�Equal(len(els), 0r�items_inw��/ pu�3�for el in els2�el.name, 'h� �els[1]['class']^fonep']v�PFalse [0].has_attr(=�)a_bunch_of_empty�� or�-main del0�.�siv.oops�;��'�u�)amulti__supportT�.0�1�p � �2�52',k��'#�3#� �Shtml K�@��  2�2�5p���6��e?ion��� �� �����1+��3�4dchild_ � %'.s1 > a'1s1a@s1a2,�8�T span=�02s17� c �-�^�#�c��pibute_e k b�MAple(��A('p[ ="!"]�p� �)�oid="p1$� L�[K�link[rel="stylesheet{�l�0�type="text/css/�href="blah./�:no-2�,])+�[�[�+��p+�*� []),  ) def test_attribute_tilde(self):$��(���.assertSelectMultiple(#��'�('p[class~="�1"]', ['pm0�O']),0� 20�30� � /�� /��a[rel~="friend�?bob�+�?met(� R�*�Q�startswith&�~�^="style�/l1��Olink,�7not/�7not^�jV�'� R�+�href^="bla� #a[(�shttp://,��_, 'me �2� cid^="p\�sO, 'pi[-�m-�0me'�?ain��;div.�(�a&���L�data-tag^="dashe ����#?end,�s$=".css,�Tid$="S�[#��P, 'l1p�aheader �2s1a�2�A1a2s��<�/ S�cc���&�noending&�contains$l# FromyfT�*<,�<?*=" �'� ?relR�+��X*< ?*=";=id* , 'p1']),  � ('div[id*="m"]', ['main']), %�k ('a&�e$�# From test_attribute_endswithO�[href*=".cssy�-l1S�o('link,��1S� �#�data1', 'l�p�aheader �"s1�@s1a2'�&s2�s"�Odash�Z k�[�noending� &�# New for thiseRA�@'bob� me�\aY 3� � 0�V��n&�Q, 'inner�1�2�*� �-tag*="edvale����!) �1def-xact_or_hypen(self):-����.assertSelectMultiple(#��'�('p[lang|="e���1-enpang-en-g�?-us��G�4�#frv�fr,�"gb,�9 Oists1&B[relck�]0�#�aP�J� _w� � �'�8RclassN�* pmAblah*��/p[����@�'� quoted_space_in_s|or_name!html = """<div style="display: wrong">nope</div>/�right">yes.�5""":�soup = BeautifulSoup(�0, '�.parser')2�[chosen] = =�.� ��]>�Equal("yes", U�|.stringunsupporJapseudo ?\�jRaisesNotImplementedError, 4��, "a:no-such-l�"��4�h�oSyntax`� th-of-type(a)") def t est_nth_of_type(self): �# Try to select first paragraph(�Pels =%�rf.soup./�('div#inner p:nth-of-g�E1)')=��t� .assertEqual(len(els), 1&� els[0].string, 'Some text') \� �_third�03�@Another� (non-existent!) fourth�04�0\Zero will� no tags. 0�[def t_direct_descendant�>�R�id_child_B/or 9S?s('�2)', ['p1']s�j�_on_eleme40# On s operate on the tree; thi�s�3an N� v# withi7�&#Afind("div", id="main" ected = <�q2�'�# The <divE�+� ">C was:ed.'�footer">�1tag2�6not�0IDsq��oq�', 'data{�overspecifie idC�h�(".fancy "l�]�7�gnormal8� adjacent_sibling�'#p1 + h2mpheader2rO�rf4�A + p8�opmulti7�#e� + .class1C�e2[],� �<p')&general&~2, '�3�$~ ��:�[�y�A + a8�me']) sel#f.assertSelects('#p1 ~ h2 + [rel="me"]', ['me']) �4sel<�Equal([], �asoup.sP�('#innerR�A')) ?�def test_dangling_combinator(C�!):(��j����Raises(SyntaxErrorq�, 'h1 >'j�;sibi�b_wont_��_same_tag_twice�'p[lang] ~ p��-en-gb', '�%us� fr>�# Test the1ector grouping oper  ( �Rcommau6multiple��Ax, y�0xid�5yid�X�_with_no_spa/x,e�$Omoreg��k�duplicated3x3]P _`Y�y7A2=fr]9+�and_direct_descendant�5> z�, 'zidb o�_�Otagsx�`div > ,}���a��P_zidac�-in��_x,y, �2invalid_#�,9' �Mx,,y �__attre=en], �2-gb en ��I/id|��  > z[id=&], �b ��^ X�?nes-vbody > ��^~_element# When markup contains 9� 9�D, a ]s�0# will find all of thSem. � markup = '<div class="c1"/>�2��"�'G�!soE�BeautifulSoup(\�, 'html.parser')4�elected = ?�!.s�(".c1, .c2"+�f.assertEqual(3, len(3�Ued)) W�# Verify that find_all �s the same elements, though becauseG�of an imp+�ation detail iZ�Q�m in a differentC�Frderm4fory��4���({_=['c1', 'c2']):L�� B�3ed # Use of thisOrce code is governed be MIT license. __ �__ = "MIT" from col@ionsort defaultdict imp�itertools�2sys@�Cbs4.g@�(� Ch tMetaAttributeValue,�_onten�HTMLAwareEntitySubstitu@��nonwhitespace_r@) _�[h�'J�TreeBuilder'?�O'SAX��� RegistryD� ] # Some useful features 'a M� to have. FASTfast' PERMISSIVE�permissive' STRIC*�strict' XML%�Pxml' �� ��t�$_5�Q5' h��(object):Ldef __init__ifbself.b7cs_for_�( =$[(list6�V = []j�rA!erj�c, tree*�_�}�2"""�Her a*� based on its adverti��Qs."""�N��� r�.�s�[0�@].in/(0�� H�3� 4��clookup#*p�s$ifj) == 0��# There are no js at all�(return None��i��d�y didn't ask%ny `. Givem�F mos=�f�# recently �x� V#[0aL�qGo downk���/��o, and eliminate anyL�\ do�match ever��`� = s�(�� .rC�6se(�candidates =�}����L_set�Qwhile\��) > 0: feature = �s.pop() ��we_have_the_1�self.builders_for�S.get((�J, []M�if len(T��i) > 0:z���if candidates is None'��+�(�/= b�� \� 5�_set = set(I� ��0�?els��# Eliminate any�that don't KT thiss.����.intersection(@������5)) 3�# The only valid�Qare t��yn��C�Go through0�riginal list oand pick)�0fir!�'neL��*O's io���areturn,]�6for� b�sPc� (��p� +��gl�q���" jBeautifulSoup class will take9s from developers <!us�m # to look up ��[registry. �_� = TreeBR(�B() ��(object)�@"""TSa document into a � �6� tree."""lNAME = "[Unknown��!]"3ALTERNATE_.�aS = []�Ms��is_xml = Fals@�W@labl�reserve_whitespace_tagL��empty_ele��� # A tagbe considered an 8�-8����#H�1hensw�i@s no]�RtentsP���`value 0the�ag/attribut� mbt1ion!a �C- orB��comma-separated^� CDATA, rather than a single ��`cdata__v��?2{} def __init__(2��� .s=��2��0�pialize_ �9� , �?��u&hebahas been J��d1nowi���being associ�2wit��/���������T� ����^t��Fpassqcan_be_,d2�,W_name<�"""Might a� �0is (� bZ w�'�?n����f~markup may or�not actually present this tag as �self-closing. �For instance: an HTMLBuilder does not consi�Qa <p>[�Uto beG�an empty-elem�rag (it'@�%in*�`�.3�_3�_tags). T�Pmeans�#�u�E�will be p�ed as "<p></p>",�w"<p />"��The default imp�ation ha�P opin�about which�Fs ar���ps, so a*� �a� =� if and only �$it�contents.�"<foo></�"k�Qcome ���,M��H>bar,�G�be left alone^�%""#�0if 1� is None:,��areturn Tru(�tag_name inT�K� �def feed(�, markup)p�raise NotIedError()�A�prepare_>� K� , user_specified_encoding=�,��� �@docu|_declar8�� {�,.�VFalse�test_fragY�4to_e���]�p"""Wrap�K #� to make it look like a R�`Differparsers do4d�Hly. e, lxmlintroduceAheadhtml5lib5��Nn't. Abstractingw�away lets us write s !"ru�s through the �0com���results agaa otherP�,thod should] beEPd out2 of�N ��set_up_substitus7tagB� ?_replace_cdata_list_attribute_valueO� , !�s[�B"""RG�s class="foo barc#th�[�@", "�]��"0ModFs its input� !ifB��K����s@�universal =j4�.get('*', [])g����cC� ;�?��.lower() c�1for�$�(�w.keys()�P���� tt �For (�ic and attr in tag_specific): �� # We have a "class"-type attribute whose stringD�value is a whitespace-separated list of>� s. Split �Qnto a6�.4�p�=� s[�](�if isinstance(6�,� � %�vs = non�_re.findallG�)p�Oelseh�[�# html5lib sometimes calls setAqs twicea�G�for the same when rearranging�OparsF� tree. On-�asecond���T�2herdlready If this happens,D� le��n� alone rathe�aan try�oF�s@_again:�Ad�� �Q���Y1 = (�s)�breturn}! 5 SAXTreeBuilder( �l"""A Beautiful Soup b2�Q that@ens �SAX events.""" ~�def feed(self, markupa��� raise NotImplementedError()A�QcloseB�:�Fpass#�bstartEB�*�r, name,�7�( = dict((key[1],0)�3key�1 in�(4�.items())I#print "S}� %s, %r" % ({����.soup.handle_�:tag/�0��7>end��~�`End %sx��w�?�o�4endm�\�h�$NS;sTuple, nodeNH# Throw away (ns,�G6now��&j�(Y����a��&r.&�5NS(�.�)���RPrefixMappingVp�9�V �aIgnore)� G ?endf�[�'# � W��# �characters�wcontent�Rdata((�t) def startDocument(self): �Qpass �def end)�  class HTMLTreeBuilder( �\�"""This � knows facts aboutH�.�Such as which tags are empty-ele��.�2"""8�preserve_whitespace_>�=^� AwareEntitySubstitution.7�]�u�_u�N�Qset([��;q# These�Afromm�5�� �'area', 'base�r�0col�Pembed �h�0img �Pinput �`keygen �@link�menuitem �tZ�Apara�Qsource�Atrac0�Uwbr',� � �earlier versions of�A and�removed in����1fon�abgsoun�Rcomma �Afram�Bimag �`sindex�Qnexti2�!r'�"])�s� standard defines tzs block-level s. BeautifulQ�# Soup does not treatC�4� differently UotherT�J# but it may do so eventually,2tpinformag is available if�you need to use it9�xAs = t"address", "article �1sid �6�Aquot�Qcanva,� dd'�1div�l�t�sfieldse �@gcap��1gurG�afooter�0orm�!h1�2�3�4�5�6�2ead6�h<� li�@main�!na�qnoscrip�o�Qoutpu�p(�p�4sec�t@��t�� �u9�video"])X�8ttributesocontaining aA�0-se`ted listqvalues,Fa single �. That is)&�="foo bar" meanz! ' �4' a� has twoO�G�Q'foo'Y0'ba*)he}� &��g�'. When we�encounter one�:w�s, we will pars�S intoF�a if possible. Upon ,W-�T�b�`converLAback\� a string`cdata_j_�{��Qs"*" : [;, 'accesskeyMdropzone']2�a2�!ret9rev��[�!�? ['"� td �1["h(s"\�/th�8��:���Ppt-chzt]�cobject'�`rchive � }�5 specific,�are *.accesskey and *.dropzone above. �"area" : ["rel"],�Bicon�Zsizes�Sframe�iandbox �boutput �8for�!} c�def set_up_substitutions(self, tag):)���# We are only interested in <meta>1�s0� if�.name != '#�'O��S�return False���http_equiv =C�Pget('�-�%')d�xcontent(��%�Zharse%��&')v��� that say w �encoding the�p# docum�was original1 . This means HTML 5-style?E�Btagso�pprovidee�2 "c�" attribute. It alsoW�C�a�4a��MW�F">�a�fhave ""6S to "nf-type"#K� In both cases we will replac�value of�appropriatV�with a standin object,pcan takG any���_�f = Nonp�3if kq is not� #  �E�="utf8">�� �q� $�Stag['�b'] = CTMetaAV>(&�celif (�e� ~� )�.lower() == 'X�'&��4 M=E Z�="text/html;IH�$$ �$�$(0) %register_treebuilders_from(module"""Copy TreeB'� '�bgiven 1��A1o tB�2.""# I'm fairly sur$$isthe best waydE��O�_`�b= sys. �ss['bs4.�!']Afor i�.__all__obj = get�,6�Eif issubclass(obj,����sL��Q�,�,�$��.append(� -�3# Rl� while we're at it*�_���c�3�_2ry. � er�1) � PM0rRewedMarkup(Excep;��pass # Builder8s are registered in reverse order of priority, so that custom # builderB�,rations will take precedence. In general, we want lxml # to/� over html5lib, because it's fa�P. AndJ�bonly #Q�0to '�HTMLParser as a last result. from . import _e�p,� b_treeb� s_0�('�) try: � K����M� �)�) except I�RErrorZ�# They don't have  installed.h�Opass�a��� �%���0��# encoding: utf-8 """Use the plibraryz�b filesa0are��too bad.""" # J� of this source code is g0aned bym��MIT license. __ �__ = "MIT" __all�[G'�RTreeB!',�"] 6�,.�r@7� -� ��e[�.���RremovPython 3.5. Sinc` can n1 be�# thrown13.5z�%�just define our #�Pclass�a placeholdere� �!(EBion)��h \4sys �warningsStarting��!2,c�r constructor�'strict' # argument, which we'd likeW0set�False. Unfortunately, # http://bugs.pI.org/issue13273 makes n�=True a better��fefore �.3. # # At�0end=�{monkeypatchX) #q�works well oni�2. major, min�release = sys.ion_info[:3] CONSTRUCTOR_TAKES_STRICT = H� == 3 andP���2�X�I>= 3F�@�_IS_DEPRECATEDN� =��CONVERT_CHARREFS?�S>= 4 pbs4.ele(RCData0Com � �RDecla�boctype �ProcessingIj2ion�)�q�ddammitp�EntitySubstituP, UniD)��B;���d�� �k�Z �@) %�PARSER = 'S2' BeautifulSoup/�( �1): &def __init__(self, *args, **kw �C): � HTMLParser.__init__(self,4� 4�*# Keep a list of empty-element tags that were encounteredB� without an explicit closingA�g. If w=�( a�I�of this type, we'll associate it j�@ one�@thosU�ftries.F� � TM�isn't a stack because� do�pcare ab�7the=�order. It's�s�ve already handled anll ignore,�!umhey ever show up��.T�_o�1ed__B= []def error7msg"""In Python 3,  subclasses must impO�P), al!ghbrequir2doe\ppear to be docu;2ed.���2�w�s�) as rai<PzceptionM�!an>1nt,� method is cal only on very strange markup our best�Etegy��o pretendt0did(ahappen=�k2Vgoing5"""C�warnings. �(� ��-P_stari�3tag0nam)Gttrs# ��uwhen th�looks likg<tag/>@E# isy�() tells�ptag notd�1tag��0# j6M@its �p matche0^knownM8. W��&�_tD uPwe wao�K0end�xourselv��!= X,�w�t��0, an, mu=False)o�F�r�(D�!�*� �k��:�,'q� ITrue1XXX7�Fspac=��_dict = {}v��for key, value in�%�%�A# ChNL�J�Ribute4�s�U�!&in?��f�consistency�4�other tree builder�<�"iff���~� ��"��E= ''������r[key] =G� #�>�&""@�#print "START":�_soup.#��", �� ifktag.is?and_ �@# Un Gp�s, html.parser doesn't send separate �Wtag �"# events for empty-element tags. (It's handled in>��*_startendtag, but only if the original markup looked likeN�<tag/>.)� � So we need to call�_�() ourselves. Since wj�Aknow�� is identical\����5#,E� doeMwant���ross off any previous �_��t o7is name.,�Sself.s��%�, check_already_closed=False) B�P# But@ might encounter an explicit @�0ing@this � J� la9�on. If so! to ignore it�� �_v_v.append(� }�;defk�,+aTrue):>��#print "END"8��/ifHA anda�@ in o�.� j�n� T<is a redund-�T1tag\�x w 4# We've v�+ed)M�it, so just��!1 it�;lis## ALREADY CLOSE&�P�{.removeJelse I�Osoup�Pdata(3� , ���.H�� �(�F�scharrefI�i�# XXX workaroundWPa bugHTMLParser. R� 6onca0# i31fix1 supported versions# http://bugs.python.org/issue13633!if.swith('x' �Preal_�a = intq.lstrip.�F, 16\/elU�XU�<X')U� ����f = NonI6if 6�V< 256�4� #  numeric ties are qs#`referefUnicodb�F���r pointssometimes"&y @�*� �6�a otherWdB(ahem, Windows-1252). E.g. “�pinsteadaÉ LEFT DOUBLE QUOTATION MARK.`E� # co$de tries to detect this situation and compensate. �for encoding in (self.soup.original_ �, 'windows-1252'):K��V�eif notR�!��%�continue>�?try2�data = bytearray([real_name]).decode(�)X�except Uni)�D/�Error as es�EpassI���� .���'�u = chr(� ��a(Value�, Overflow�)� �h��or "\N{REPLACEMENT CHARACTER}"�phandle_8�(�!) �Cdef �entityref , icharacter = E+�Substitup.HTML_ENTITY_TO_�Q.get(K�p�� 6if M�!is=;Non��Qw�?�;els+�B# If�were XML, it would be ambiguous whether "&foo"��# was an� reference with a missing�?� semicolon or the literal strit�@. SiK�inD��F, we have a0letL�st of all�+s,��j�0one� n't found, so assume it's� S�q"&%s" %$ �hscommentf�Q�fD�endData(E��<��?��$�@�Co�Odecl�0if w�p.starts("DOCTYPE "����)�! =0�I[len)�]7#elT�S == 'L�'s# i.e. "<! �>j�%''V� -wDoctype-unknown_.�.upper()|'CDATA[5pcls = C�0?�. N�aDeclarT� M-7clsZ/piG�E�� ta(ProcessingInstruction) class HTMLParserTreeBuilder(��): is_xml = False��picklable = Tru�bNAME =\�QARSER+�features = ["�,!�, STRICT]c�def __init__(self, *args, **kw �!):N��f CONSTRUCTOR_TAKES_O� and not!��_IS_DEPRECATEDN��R�c�['strict']��!�is�CONVERT_CHARREFST�convert_charrefs^����!.p_�@ = (�", �)%prepare_markup+ �, user_specified_encoding=None,?����document_declar8� excludeO�sP�5"""]�:return: A 4-tuple (�original �, �@�� within <�whether any J�acters had to b��replacedA� REPLACEMENT uACTER).o��if isinstance�7str��b�Vyield�] �!)^��.���6tryJ�/, ]b�dammit = UnicodeD�� m�,Phtml=���� ���.�� |�)�_�.�contains_5Ifeed+h< = j = BeautifulSoup2H( :�T.soupa�� � �:try:k�.�Ki� �fclose(�aexcept^�Error as e_�warnings. �(RuntimeW�(��i���"Python's built-inb�Pr cann� the given. This isa bug inC D�%�e best solu�2�1to ll an exter<k�@r (l0or +a5lib),;use_� Rthat <�. See http://www.crummy.com/software//bs4/doc/#�bing-a-O� f�felp.")�;fraise 0�.already_d_empty_ele� ] # Patch 3.2 versions ofzearlier than(� .3 ��some # 3.2.3 c>ode. This ensures they don't treat markup like <p></p> as a # string. # # XXXI�"code can be removed once most Python 3 users are � .2.3. if major == 3 and min�2�not CONSTRUCTOR_TAKES_STRICT: import re�attrfind_tolerant = re.compile($��6� r'\s*((?<=[\'"\s])[^\s/>]��=>]*)(\s*=+\s*'6�(\'[^\']*\'|"[^"]*"|(?!E��C�>\s]*))?')4�HTMLParserTreeBuilder.� � A�locatestarttagend�r""" <[a-zA-Z][-. �p0-9:_]*��# tag nam'b(?:\s+��# whitespace before �TibuteH�S (?:|�_}�:~�}�#6�8��^'\s��# value indicator �!�(?:'[^']*'� ��# LITA-enclosedE�;� |\"[^\"]*\":� ;�:�P[^'\" 70# b�l��R )?��_)* ��# trailingP """,qVERBOSER� BeautifulSoupp.: �efrom html.pS0tagK,� �/�Adef )�_(self, i)����%._"�@_tex0Non�"� endpos = -��check_for_whole;�b_tag(i��5�$if6�6< 0n���creturn�t rawdatae���z��'�2[i:�]��g�R# Now��: ^�between i+1j into au��;sv��U = []�pmatch =1��j.�(�,Y�Passer tch, 'unexpected call to�B�A)Ak = \�V.end(T� 0lasg��j=[i+1:k].lower4�while k <u "ifjstrict��I5m =Y �k��+�OelseA� J� 0if RmN�Ubreak�v���1�t"st�C.group(1, 2, 3�e�?res���V�F� �s�Aelif"�@[:1]@'\''�;�[-1:] or \��B�>��"' == attrvalue[-1:]: �attrvalue = �i[1:-1],�&if�F� self.unescape(T�)O���s.append(%�name.lower(),l�)8�`k = m.5�%) Q�end = rawdata[k:endpos].strip(A�0if +�not in (">", "/>")�lineno, offset = �ggetposN��Ipif "\n"O�%��__starttag_textX��1�\�3 = �+]� 8�p.count([���?����Olen(v�5) \�1� �-q�_rfindq��Nelse�� �/+ � �"if~��c��error("junk charactersc�\ tag: %r"Z� �] % (k[:20],Pr�phandle_>7�iF `returnG5pos�Y�swith('/>'U# XHTML-style empty�Q<span!="E" />Z����_endtag(tag)s� >�;��k� if�l�CDATA_CONTENT_ELEMENTSd�Pset_cDP_mode�� 7� de  3��G�g, elem=��.X���! =!�*K���'�interesting e.compile(r'</\s*%s\s*>' %V�c, re.ISBeautifulSoupParser.p�: = ��<� T�lCONSTRUCTOR_TAKES_STRICT = True # Use of this source code is governed by the MIT license. __license__ = "MIT" __all__ = [ C 'LXMLTreeBuilderForXML',� ��b] try+from collections.abc import Callable # Python 3.6 except I$�EC as q O� K�! "�$io�|BytesIO�dString�Dlxml1�Retree/�@bs4._4ent�(�rCommentrDoctype �NamespacedAttribut�pProcessu�Qnstru4�?XML�)�b �BFASTD�� ��PERMISSIVE, ParserRejectedMarkup, TreeBuilder�>XML) from bs4.dammit import EncodingDetector LXML = 'lxml' def _invert(d): b�!"I�� a dictionary."}�areturn�((v,k) for k, v in list(d.items())) class t��wForXML(�)y�DEFAULT_PARSER_CLASS = etree.XMLParser �is_xml = True�processing_instru�_� = @�"�I!�O�NAME = "B-xml�ALTERNATE_ ��� ["�]5� # Well, it's permissive byn�! p� standards.�features = [V�,,2�, FAST, PERMISSIVEi�CHUNK_SIZ�4512�This namespace mapping is specified0the_�$ N*��#��~PNSMAP��(xml='http://www.w3.org/XML/1998/~�"')� G�_INVERTED = h "�7��initialize_soup(self, �!`""Let �BeautifulSoup ob- know abou$�~'��J�/���"""� �osuper( , �-).�Eoup)^��$�._register_(s�.3�, #� encountered�Qwhile�B�^document.�pmight be useful later on when creatC�0CSS(w�C�d<ey, valueBH�=���if key and�0not9�n�*.j 9��=�/# <Va new5%�G���@# If�re are multiple3�s defined with+�B sam�A��# prefix,#�first on` takes,�~cedence�r[key] =d�ldefault_,�/b!# can either a-�xQor a mu, which[`# will2�dantiat�A argm* if@3f._$����JNone� 5��$� S( ��ptarget=H�, strip_cdata=False, recover=z4=V=e]O_fora 1Use� parser.  �parser = self.default_�(encoding) ,���if isinstance((�, Callable):)��-�1# I*�tiate the 1�c with m� arguments<�*�=3�(target=�, strip_cdata=False, �= �)O�creturnI��def __init__(T�q�@=NonN�mpty_ele�Q_tags�� # TODO: Issue a warning ifi� is present but not a�3# c4`, sincat means*re's no way to creC7newB��s for differl��%s.+���#._p Z&�Mif e������ U�1���K.t(�:�psoup = X���nsmaps = [� DEFAULT_NSMAPS_INVERTED]5� �getNsTag9tagPSplit\! namespace URL out of a fully-qualified lxml tagb��:�P. Cop �Afrom%�p's src/0�v/sax.py| ifC�[0] == '{'E�tuple(tag[1:].s�('}', 1)7Oelse>�(E,g��prepare_markup �, user_spec�_3,����uexclude.�/�1doc_declared9�g�5"""t�:yield: A series!4- $ (��$, g� �has undergone character replac<��SEach v��#�0s ayQategyt����� 0ead� us5�UnicodeDammo conver@byteUng to## 5� C��# EDetectorE�@iterMAoverDs, and tell�y!tr��e��P as e2�[0in mis_html =eis_xmlN�S%� ,�`proces2_vruction_clas$P��j�Z�2T�?XMLW�X����7str�H�# We were givenr. Maybed#ca�5 oni�E�this system?�, �w, False  if isinstance(markup, str): �'# No, apparently not. Convert the Unicode to UTF-8 andC�tell lxml%�parse it as1�..�cyield �0.en\�("utf8"), �,2��document_declared_=�ing, False) 7�5try�s = [user_specifi:�V�]N�detector = E�CingD�(%���g �, is_html, exclude�)?�Dfor �D in t�"JingsG'�{�D�/, � 0�def feed(self, I�&JWbytes&��@ = B�#IO�/elM�� = StringL�2b# Call�) at least once, even�L�is empty]A# orr won't be initializedcdata = B.rea2.CHUNK_SIZE�:try�\.g�'= �A_forz.soup.original#T��b9�.�� #�while len(�Z != 0��I��7w cA!on  reDf ��A�, chunk by ��G����&��0���/if��'��#�fclose(except ( DelError, Lookup �petree.Pr�)e7braise %�RejectedMXs(str(e)�P���#��a.nsmap� DEFAULT_NSMAPS_INVERTED]�K�Qstart"�, name, attrs, J�7={}b�# Make sure&� is a mutable dict--may send an im!�Vproxym�! =<�( �`�nsprefix = NoneI?Aeach�space mapXit comes inh��`) == 0h&)�js) > 1�(P# The�re no newv� s [this tag, but��<���# non-default;� ar  play, so we need aC��separateg�` stackknow whe^Gy enR�{����.append(Pi8�� # A new namespace mapping has come into play. �%# First, Let the BeautifulSoup object know about it.A�self._register_namespaces(nsmap)o� Then, add it to our runn�list of inverted N� s�#�s��v�s.append(_K�� �Also trea�o�a� as a se�attributes on, �tag, so we can recY�e�Zlater��N�As = �y.copy()Q�for prefix,@ in &�.items())::���W��D = N$dA�(1��5�"xmlns",x� "http://www.w3.org/2000/(�*/"��B��w� s[�}�6] =� #�@s ar Find anyC�s that ca�# from lxml withg�s<�Pached_Rtheir�W, andA�1turn&�g@new_��%{}M����a, valu���sm,�Br = KgetNsTag(=� b"if��` is None�����?els:�sq��7_fo(� ��W���qn\��F�� �f��(�Z�a��U�!�r��(��soup.handle_starttag}�$, d��s_def _n��[�A�1""" �currently active � �Vgiven.E."""G!re�`6��Z�N1ed_f�reversed(�.�s��i8�Qs not���R� d�� '�[q]�)����t���b���.wendDatacompleted_tag,,�tagStack[-1y��e�1�_��4�� 3 ted_nsmap in reversed(self.nsmaps): �if inverted_%� is not None and namespace in,�O��^�nsprefix =/�[K�]��9�Ubreak��soup.handle_endtag(E�$, e�)0�iif len�J > 1��# This tag, or � of its parents, introduced a�g���# mapping, so pop it off the stack.0��^s.pop() �adef pi�, target, data;�endData(��"�<�(I� + ' ' +P�o���3�O�� processing_instruction_class�`��1�, content��(��sdoctype�pubid, systemT���H�B = DR�P.for_P�_and_ids+ ]� �object_was_pa4\��scomment��]!"H (�bs as C4� `�?s."|����`.H���'�C�o��test_frag�_to_docu ���r"""See `TreeBuilder`.""�return '<?xml dion="1.0" encoding="utf-8"?>\n%s' % j�1 UW LXMLd�9(HT�,!�ForXML):HaNAME =$�TALTERNATE_�S = ["lxml-html"]5�features = 0�0+ [9� , �, FAST, PERMISSIVE0is_�q= Falsex�8 E = PWIV��rdefaulturDs`etree.�PrI�#fe, markup=�P�6 = 3original_���W6try��5�rC��T_for(@� 4�.��f� %�Wcloseexcept (UnicodeDe�Error, Lookup ��)ye�braise %� Re1edM((str(e)) L4J�><body>%s</ �!</� &p# Use oQis source �governed byqMIT license. __license__ = "MIT" __all� [ 'HTML5TreeBuilder',�] import warnings�re from bs4.b4� +�(;�PERMISSIVEK��b� �&_5 �w��)[�zelement[�NamespacedAttribute9�rnonwhit�d_re, )�html5lib[��.constantsb�nb�sY�sprefixe�� 0Com�;�cDoctyp��NavigableString"�#Ta �q) try:?# Pre-0.9��r�Ctree{�_base as ��@�Dnew_?� = False except IPError@�e��q and upJ���True class ((�#)�u"""Use �1to ! a�Q.""" �@NAME*��features = [#�', �1_5,�]H� def prepare_markup(self, �, user_specified_encod�e �@docu'w_declar3�=None, excludeJ�s���M�# Store the~�-~� ~�@ for� later on.�>���.� =5�� �@�/# �+ �p aren't[�d�# ATM becaus� S does<�;�UnicodeDammit�.if=7����. �("You provided a value; �Q, butq��j|%er�1sup7 @�5.")�tyield (@�)l�# These methodsE defined by Beautiful Soup�Ifeed91if s2�parse_only is not �/#L�):�.�u entirec will be (�dG�5r = .2P� r(=�pcreate_�)#er`�extra_kwargVdict(!if1isi-%ce7strW�J��,�f G�!�k�['overriB'] =?M�#ls���c�rgs['encoding'] = self.user_specified_encoding �doc = parser.� (markup, **extra_kwargs) 4�# Set the character Y� detected by#�tokenizer.@�if isinstanceo�Ustr):$���# We need to �al-case this because html5lib setsA���E��F�`UTF-8 �t gets Unicode input���doc.original9u = Noneg�Jelse�3�Y&r. xstream.�5[0]V���hif not* Z�., 5�;�# In 0.9�P and �Uis an= {�D�8pobject.@wantH�xa~�ing for compatibilityM�� with other tree builderss�y� �= = �F.nam�7� ;�def create_��(4 , ]�spaceHTMLElements�&�.underlying_>� = TreeB�CForH(^�,Usoup).�breturn�o��test_frag�_to_docu ����"""See `�\`."""|� '<_><head></�body>%s</ �!</#�D' % i� classsg_base.�&):�__init__�:@oup=�0if L ���5oup� from bs4 import BeautifulS8��yU� &�`("", "Y&r"gsuper(�. %).���Cs+ �v.reset(�^8�.� �WinsertDoctype3�!, r���" =� ["�"publicId!��%�hsystem%��""]��yd�3 = �P.for_p�_and_ids( �$, b�$, G�Q_was_#d(a�je i,�20tagh��W.new_tag���2� 9tag2�3��0com�Class(self, datSa): �return TextNode(Comment(data), self.soup) 3�def frag$�`Class("�P�from bs4 import BeautifulSoupC��y��5�W�* ="�("", "html.parser")5�.name = "[docu�_�%]"/��0Ele"��s��g, None�appendChild1�g, node�!# XXX This code is not covered by the BS4 tests.��.d�(Y�".e��CgetD��|��J�CgetF5�treebuilder_base.TreeB�.?��b���Serializere�$, �Nurv = []doctype_rre.compile(r'^(.*?)(?: PUBLIC "�"� �)?| SYSTEM�C)?$'R�l�s��, indent=0h�5�if isinstance(.�  3��7�Epass���H�D�B��Fm = d.match� �[���Fif m~���#m.group(1<��*��@�P.last_x > 1N��,�publicId = V�v2) or "t�(��isystem4�34�B�4�B�$rv1"""|%s<!DOCTYPE %s "%s"�i>""" %z� �c(' ' * , &$, �$, �)/Oelse!� �_>" % {��g��c�_�]�� X�/el� m� --p/--m�� 4u��NavigableString}�P\"%s\ B �$if�=Ospac��\0%s ^% (prefixes[q�B�/],s�}:� '�����?� r� /%sname))  �if element.attrs: � ���ibutes = []$� for name, value in list(_�.items())h��s�if isinstance(R�NamespacedAttr�B��F��8� = "%s %s" % (prefixes[�.�R�!],��)��O� ��,�� *��� ".join(�h�i.append(8�/) �{�vsorted(P��$rvm�'|%s%s="%s"'4' ' * (indent + 2).)�+�== 2���Pchild�?renserializeE(/�", g�� (�_�7, 04return "\nrv) class YLcobject/def __init__(self,/!����.q�$ =!�����=AdictI�1�w�n�3tern�e��SR�PB.__i8�K�0setqN� q# If this �^ �q multi-{d��/��,�!# a5itsAto a*.&���_k� = HTML5TreeBuilder.cdata_#��s;�1if �B$�J['*':or e� 4�R����1and  &�[j�#]# A node that7being cloned may have already undergone�#procedure,��Pif no\�� = nonwhit:_re.findall�2] =���FsS=�?key<�+�W�x�S__lenz��!en�Dg�@���Dw�contains|�;B�X�.�^Rtreeb_base.NodeW�y�lb, soup�2��R�� ent.name)  ��self.element = � �1 �psoup = � �namespace = � $�def appendChild(I�, node):!��b�string_c"�4 = �ENone$�if isinstance(A�W, strF��J� # Some other piece of code decided to pass in aB�5inge�C��f�1ead:��reating a TextE!`objectK�contain th�F��.\�� ���(�/el�?Tag�,/Ta�/n �:Tag����b .__class__ == NavigableS�I3N�;�0�g�2par�o�Jelse]��N�O�hif notO�B and.�!is/�� ��q.extract()?��Iif (J�g����.Eents��C���&�O[-1] ucWe are� onto an|� # TODO This has O(n^2) perform�, for input likSf"a</a>�I..."�Cold_J��� 4�<new4��.��(W�+4)K�(�.replace_with(L�� 2�o�_most_recent�:�k�2� 0# C`e a br<newp from t �G�c���.��0�# Tell Beautiful SL�0`ct as �`t pars�z�/�F�immediately aftery�8� ent's last descendant. (OrB�,��"noJren."if�Z ��m=�_�_�j(Falseq� 6�6nex%� thfurther ahead in the parse tree is �# being inserted into this earlier element. T�D�very annoyL�because it means an expensive search��@for �Dlastu�1 in���.4�most_recent_� = self.soup._K�_descendant()C�Yelse:��U�d� 4�o�object_was_u)d()��]�`child,Aent=;�U�,,���=��)�def getAttributes(^�)�if isinstance�.{�@ Com.���return {}���e�IListF��s�5, a�"):��c�'if� is not None and len(8�G > 0<��@�Aconv_(�U = []��&��Zname, valueUl�4�.items()�=� 8G�_tuple,���0�@new_&� = Namespaced2 (*� �6���7del[)� ��%����[n�2] =�\�*�� builder._replace_cdata_<_ sH��� e7`���W# The% may contain variables that need substitutionA# Call set_up_(�s manually2� ��0Tag��structor calledAmethod whe�,�was created# but we just�a/changL��A, sof��_Kagai�J� �+U = property(S,�s?2Tex , ],VpBefore=4ptext = 4�3Nod��pstring(G�)w�9�^�����(n� �K�@�pappendC?� �[��-�, node, ref��Tindex.�(-��@if (P��.__class__ =V0vigS$*��.�t.contents  �4and self.element.contents[index-1].__class__ == NavigableString): M� # (See comL�s in appendChild),�old_node =� 6�tnew_str5�Ssoup.�Eing(U� + \��z�.replace_with(F�+�Yelse:}� )qinsert(',n��cparent� U�def remove(]�H�U=�extract()A�Z�C�4renF�2ew_�L� """Move all of this tag's cren into another�A."""���# print "MOVE",.�KFROM.�%�PTO", ���}�B �����;�_*�� 0�# Determine whatEnextF�will be onc:5the0�3are%d.�ifinal_P�#= �.�vsibling���s_last_descenda>��.&�(False, �aif lencA�Z) > 0P# The 4� already 4�6ain:S. We ,��#8ingdr0to R'en7�8��2� =� �I[-1]}� E�M +�`� .&%no3� ENonek�*��"l�X�2to_ ���%�9,�' SeBre first�'s previous� m����D# to! s !in 3new��4�p�_�*[0'if7P@ is not t � c�.�K  X� �/]�C T���W �st_child  �if new_parents_last_descendant is not None: 4��9� .next_element = first_childG�?elsY� C�P��v��4�u�{sibling� �# Find the very F � be>�moved. ISw-� D�"'s?�6� ha �*anNS# its$��is whatevert� previouf� �t�Y had.���&= to_append[-1].G(False, True) W���= _o `�� 8�# # TODO: This code test coverage and I'm6Isure��9# how to get html5lib�o through td�path, but it'sH��Ajustother side of�:ine�� .H�! = �0X�^Afor 6 inX ~ �.1 � ���P.cont.(Q�0# N hab%is/�ren, changexus�U = []��k9nal! # print "DONE WITH MOVE"T�!�FROM", self.i�%�GTO",\#p�def cloneNode(I�) ta��`.soup.h1tag'�p�S.name�� �espace)�nI2= E)C(tag,��Q� �7���$key,value(�attributes�Fode.�[key] = =��preturn ��BhasC*��/�x�[@�getNameTupl>"if�? ==i�i�&�0s["6'"]�O?�self.namespace, �self.name � Tuple = property(getName� ) class TextNode(Element):A�def __init__(]�2, e!�c, soup'��l�treebuilder_base.O�.=�QNone)Z��3��Q�.P�$ =Z� ��f�! =m����Pclone`���raise NotImp�edError COPYING.txt LICENSE MANIFEST.in NEWS�README.md TODO�convert-py3k setup.cfg �py test-all-versions beautiful�4.egg-info/PKG-INFO!�bSOURCE�E�dependency_links�-�require%� top_levelK�4s4/.py bs4/dammit�tiagnose���4ing� T��html5lib1���mparser�Dlxml��p�s_����_j�_registry9�#�Odocs������,� s/�_� �������_f���<��doc/Makefile doc.zh�source/conf3��p6.1.jpg9�/�&��index.rst scripts/demonstrate_�_difference*�ion_markup3Pbs4 �sieve>=1.2 [!�<$] �0 [ ] � Metadata-V7`: 2.1 *:  "�D4.7.1 Summary: Screen-scraping library Home-page: http://www.crummy.com/software/BePSoup/ Author: Leonard Richardson�-email: l!�r@segfault.org License: MIT Download-URL� d?�B/ DeUion: � �T is a� that makes it easy to " e urmationgfrom web 0s. It sits atop an HTML or XML ,M0vidrPythonicO� idioms for iterating, search �and modify@�2theV� G� �# Quick start� �5``` �a >>> �bs4 import-�, ,��* =#�("<p>Some<b>bad<i>"g�Qprint.prettify($�<>p�i <body�: <p� x�4�K <b#�H bad#� <i>  �UHTML � �J</i>�:</b�9</p�h</body�h</html� >>> soup.find(text="bad")l�eu'bad'� �% >�i�0<i>� ��4� = BeautifulSoup("<tag1>Some �`2/>bad �3>XML", "xml��K�Rprint�prettify(�<?xml version="1.0" encoding="utf-8"�q�1� �� "�92 /$�<bad#�3!�* XY</tag#��z�5```S� �TTo go beyond the basics, [comprehensive documentation is available](http://www.crummy.com/software//bs4/doc/).� �u# Links� �* [Homepago�!H* [D�]�$�`Discus e groupT��s.google�$/b�'/�`evelopiG�s://code.launchpad.netC� Bug trackerC�ObugsC�Complete changelogJ�[azaarL�~leonardr��view/head:/CHANGELOG�I`# BuilD o%� �TI directory contains fulln SphinxE�format. Run `make ` in thatX�to create A� �qRunning�unit test�#  supports 4�� discovery fromQ�project roo�:� �z $ nose�!�6�python -m��@�� -s bs4 # P%�2.7 and upN��If you checked out�source tree,!�should see a scripteK�GhomeacalledoS-all-ts. ThisD�pwill rue�O����7der�,�n a temporary$�3l�6 ofO� heR���s��s again unq�3S�P Plat: UNKNOWN Classifier: Development Sta+tus :: 5 - Production/Stable Classifier: Intended Audience7�Developers,�aLicens"�OSI Approved2�CMIT � 3�Programming Languag@�oPython+��e�?2.72�30�PTopicI�@Text@cess�:: Markup�OHTML7� X6�"/SG7�Software @ment�LibrarieG Modules Descrip0-Cot-Type: text/markdown Provides-Extra: html5lib�Extra: lxml ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������

Here's a tp