iAdd handling for xml syntax errors. - zs - Zeitungsschau rss to email converter Err gopher.r-36.net 70 i Err gopher.r-36.net 70 1Log /scm/zs//log.gph gopher.r-36.net 70 1Files /scm/zs//files.gph gopher.r-36.net 70 1Refs /scm/zs//refs.gph gopher.r-36.net 70 1LICENSE /scm/zs//file/LICENSE.gph gopher.r-36.net 70 i--- Err gopher.r-36.net 70 1commit 18454a1fe6f19aa9cdd780128a713066ee23ba9d /scm/zs//commit/18454a1fe6f19aa9cdd780128a713066ee23ba9d.gph gopher.r-36.net 70 1parent 0626d417d52845d03223244faa8238210ff87229 /scm/zs//commit/0626d417d52845d03223244faa8238210ff87229.gph gopher.r-36.net 70 hAuthor: Christoph Lohmann <20h@r-36.net> URL:mailto:20h@r-36.net gopher.r-36.net 70 iDate: Wed, 19 Mar 2014 18:14:03 +0100 Err gopher.r-36.net 70 i Err gopher.r-36.net 70 iAdd handling for xml syntax errors. Err gopher.r-36.net 70 i Err gopher.r-36.net 70 iIf XML fails, try HTML. If that fails, bail. Err gopher.r-36.net 70 i Err gopher.r-36.net 70 iDiffstat: Err gopher.r-36.net 70 i feed.py | 15 +++++++++++++-- Err gopher.r-36.net 70 i zs.py | 3 +++ Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i2 files changed, 16 insertions(+), 2 deletions(-) Err gopher.r-36.net 70 i--- Err gopher.r-36.net 70 1diff --git a/feed.py b/feed.py /scm/zs//file/feed.py.gph gopher.r-36.net 70 i@@ -6,6 +6,7 @@ Err gopher.r-36.net 70 i # Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i from lxml import objectify Err gopher.r-36.net 70 i+from lxml import etree Err gopher.r-36.net 70 i from datetime import datetime Err gopher.r-36.net 70 i import dateutil.parser Err gopher.r-36.net 70 i import urllib.request, urllib.parse, urllib.error Err gopher.r-36.net 70 i@@ -23,12 +24,22 @@ def removenamespaces(xml): Err gopher.r-36.net 70 i elem.tag = elem.tag[nsl:] Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i def parsexml(astr): Err gopher.r-36.net 70 i- xml = objectify.fromstring(astr) Err gopher.r-36.net 70 i- removenamespaces(xml) Err gopher.r-36.net 70 i+ try: Err gopher.r-36.net 70 i+ xml = objectify.fromstring(astr) Err gopher.r-36.net 70 i+ removenamespaces(xml) Err gopher.r-36.net 70 i+ except etree.XMLSyntaxError: Err gopher.r-36.net 70 i+ try: Err gopher.r-36.net 70 i+ parser = etree.HTMLParser() Err gopher.r-36.net 70 i+ xml = objectify.fromstring(astr) Err gopher.r-36.net 70 i+ removenamespaces(xml) Err gopher.r-36.net 70 i+ except etree.XMLSyntaxError: Err gopher.r-36.net 70 i+ return None Err gopher.r-36.net 70 i return xml Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i def parse(astr): Err gopher.r-36.net 70 i xml = parsexml(astr) Err gopher.r-36.net 70 i+ if xml == None: Err gopher.r-36.net 70 i+ return None Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i feed = {} Err gopher.r-36.net 70 i articles = [] Err gopher.r-36.net 70 1diff --git a/zs.py b/zs.py /scm/zs//file/zs.py.gph gopher.r-36.net 70 i@@ -46,6 +46,9 @@ def run(db, selfeed=None, dryrun=False): Err gopher.r-36.net 70 i estr = "incompleteread" Err gopher.r-36.net 70 i continue Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i+ if curfeed == None: Err gopher.r-36.net 70 i+ continue Err gopher.r-36.net 70 i+ Err gopher.r-36.net 70 i # retry handling Err gopher.r-36.net 70 i if estr != None: Err gopher.r-36.net 70 i if retries > 2: Err gopher.r-36.net 70 .