iFirst HTML parsing with missing entities. - zs - Zeitungsschau rss to email converter Err gopher.r-36.net 70 i Err gopher.r-36.net 70 1Log /scm/zs//log.gph gopher.r-36.net 70 1Files /scm/zs//files.gph gopher.r-36.net 70 1Refs /scm/zs//refs.gph gopher.r-36.net 70 1LICENSE /scm/zs//file/LICENSE.gph gopher.r-36.net 70 i--- Err gopher.r-36.net 70 1commit 45fa8f0de67209e4cceeb1df6e3439efe55e7617 /scm/zs//commit/45fa8f0de67209e4cceeb1df6e3439efe55e7617.gph gopher.r-36.net 70 1parent f9891c3921193415865fa7d3aae02407dea12ab0 /scm/zs//commit/f9891c3921193415865fa7d3aae02407dea12ab0.gph gopher.r-36.net 70 hAuthor: Christoph Lohmann <20h@r-36.net> URL:mailto:20h@r-36.net gopher.r-36.net 70 iDate: Wed, 11 Nov 2015 18:02:11 +0100 Err gopher.r-36.net 70 i Err gopher.r-36.net 70 iFirst HTML parsing with missing entities. Err gopher.r-36.net 70 i Err gopher.r-36.net 70 iDiffstat: Err gopher.r-36.net 70 i zeitungsschau/feed.py | 10 ++++++++-- Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i1 file changed, 8 insertions(+), 2 deletions(-) Err gopher.r-36.net 70 i--- Err gopher.r-36.net 70 1diff --git a/zeitungsschau/feed.py b/zeitungsschau/feed.py /scm/zs//file/zeitungsschau/feed.py.gph gopher.r-36.net 70 i@@ -33,10 +33,12 @@ def parsexml(astr): Err gopher.r-36.net 70 i except etree.XMLSyntaxError: Err gopher.r-36.net 70 i try: Err gopher.r-36.net 70 i parser = etree.HTMLParser() Err gopher.r-36.net 70 i- xml = objectify.fromstring(astr) Err gopher.r-36.net 70 i+ xml = objectify.fromstring(astr, parser) Err gopher.r-36.net 70 i removenamespaces(xml) Err gopher.r-36.net 70 i except etree.XMLSyntaxError: Err gopher.r-36.net 70 i- return None Err gopher.r-36.net 70 i+ parser = etree.XMLParser(resolve_entities=False) Err gopher.r-36.net 70 i+ xml = objectify.fromstring(astr, parser) Err gopher.r-36.net 70 i+ removenamespaces(xml) Err gopher.r-36.net 70 i return xml Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i def parse(astr): Err gopher.r-36.net 70 i@@ -50,6 +52,10 @@ def parse(astr): Err gopher.r-36.net 70 i isrdf = False Err gopher.r-36.net 70 i now = datetime.now(pytz.utc) Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i+ feede = xml.xpath(".//feed") Err gopher.r-36.net 70 i+ if len(feede) > 0: Err gopher.r-36.net 70 i+ xml = feede[0] Err gopher.r-36.net 70 i+ Err gopher.r-36.net 70 i if hasattr(xml, "channel"): Err gopher.r-36.net 70 i if hasattr(xml, "item"): Err gopher.r-36.net 70 i isrdf = True Err gopher.r-36.net 70 .