iFirst HTML parsing with missing entities. - zs - Zeitungsschau rss to email converter Err gopher.r-36.net 70
i Err gopher.r-36.net 70
1Log /scm/zs//log.gph gopher.r-36.net 70
1Files /scm/zs//files.gph gopher.r-36.net 70
1Refs /scm/zs//refs.gph gopher.r-36.net 70
1LICENSE /scm/zs//file/LICENSE.gph gopher.r-36.net 70
i--- Err gopher.r-36.net 70
1commit 45fa8f0de67209e4cceeb1df6e3439efe55e7617 /scm/zs//commit/45fa8f0de67209e4cceeb1df6e3439efe55e7617.gph gopher.r-36.net 70
1parent f9891c3921193415865fa7d3aae02407dea12ab0 /scm/zs//commit/f9891c3921193415865fa7d3aae02407dea12ab0.gph gopher.r-36.net 70
hAuthor: Christoph Lohmann <20h@r-36.net> URL:mailto:20h@r-36.net gopher.r-36.net 70
iDate: Wed, 11 Nov 2015 18:02:11 +0100 Err gopher.r-36.net 70
i Err gopher.r-36.net 70
iFirst HTML parsing with missing entities. Err gopher.r-36.net 70
i Err gopher.r-36.net 70
iDiffstat: Err gopher.r-36.net 70
i zeitungsschau/feed.py | 10 ++++++++-- Err gopher.r-36.net 70
i Err gopher.r-36.net 70
i1 file changed, 8 insertions(+), 2 deletions(-) Err gopher.r-36.net 70
i--- Err gopher.r-36.net 70
1diff --git a/zeitungsschau/feed.py b/zeitungsschau/feed.py /scm/zs//file/zeitungsschau/feed.py.gph gopher.r-36.net 70
i@@ -33,10 +33,12 @@ def parsexml(astr): Err gopher.r-36.net 70
i except etree.XMLSyntaxError: Err gopher.r-36.net 70
i try: Err gopher.r-36.net 70
i parser = etree.HTMLParser() Err gopher.r-36.net 70
i- xml = objectify.fromstring(astr) Err gopher.r-36.net 70
i+ xml = objectify.fromstring(astr, parser) Err gopher.r-36.net 70
i removenamespaces(xml) Err gopher.r-36.net 70
i except etree.XMLSyntaxError: Err gopher.r-36.net 70
i- return None Err gopher.r-36.net 70
i+ parser = etree.XMLParser(resolve_entities=False) Err gopher.r-36.net 70
i+ xml = objectify.fromstring(astr, parser) Err gopher.r-36.net 70
i+ removenamespaces(xml) Err gopher.r-36.net 70
i return xml Err gopher.r-36.net 70
i Err gopher.r-36.net 70
i def parse(astr): Err gopher.r-36.net 70
i@@ -50,6 +52,10 @@ def parse(astr): Err gopher.r-36.net 70
i isrdf = False Err gopher.r-36.net 70
i now = datetime.now(pytz.utc) Err gopher.r-36.net 70
i Err gopher.r-36.net 70
i+ feede = xml.xpath(".//feed") Err gopher.r-36.net 70
i+ if len(feede) > 0: Err gopher.r-36.net 70
i+ xml = feede[0] Err gopher.r-36.net 70
i+ Err gopher.r-36.net 70
i if hasattr(xml, "channel"): Err gopher.r-36.net 70
i if hasattr(xml, "item"): Err gopher.r-36.net 70
i isrdf = True Err gopher.r-36.net 70
.