Add handling for xml syntax errors. - zs - Zeitungsschau rss to email converter
       
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 18454a1fe6f19aa9cdd780128a713066ee23ba9d
 (DIR) parent 0626d417d52845d03223244faa8238210ff87229
 (HTM) Author: Christoph Lohmann <20h@r-36.net>
       Date:   Wed, 19 Mar 2014 18:14:03 +0100
       
       Add handling for xml syntax errors.
       
       If XML fails, try HTML. If that fails, bail.
       
       Diffstat:
         feed.py                             |      15 +++++++++++++--
         zs.py                               |       3 +++
       
       2 files changed, 16 insertions(+), 2 deletions(-)
       ---
 (DIR) diff --git a/feed.py b/feed.py
       @@ -6,6 +6,7 @@
        #
        
        from lxml import objectify 
       +from lxml import etree
        from datetime import datetime
        import dateutil.parser
        import urllib.request, urllib.parse, urllib.error
       @@ -23,12 +24,22 @@ def removenamespaces(xml):
                                        elem.tag = elem.tag[nsl:]
        
        def parsexml(astr):
       -        xml = objectify.fromstring(astr)
       -        removenamespaces(xml)
       +        try:
       +                xml = objectify.fromstring(astr)
       +                removenamespaces(xml)
       +        except etree.XMLSyntaxError:
       +                try:
       +                        parser = etree.HTMLParser()
       +                        xml = objectify.fromstring(astr)
       +                        removenamespaces(xml)
       +                except etree.XMLSyntaxError:
       +                        return None
                return xml
        
        def parse(astr):
                xml = parsexml(astr)
       +        if xml == None:
       +                return None
        
                feed = {}
                articles = []
 (DIR) diff --git a/zs.py b/zs.py
       @@ -46,6 +46,9 @@ def run(db, selfeed=None, dryrun=False):
                                estr = "incompleteread"
                                continue
        
       +                if curfeed == None:
       +                        continue
       +
                        # retry handling
                        if estr != None:
                                if retries > 2: