tMerge branch 'master' of ssh://r-36.net:443/srv/git/zs - zs - Zeitungsschau rss to email converter
 (HTM) git clone git://r-36.net/zs
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 622855cba32e158a1b0cacea44e21361685d2577
 (DIR) parent 10c3bbd37c63294007b0f3c28d47665fc625785b
 (HTM) Author: Christoph Lohmann <20h@r-36.net>
       Date:   Thu, 10 Sep 2020 14:11:51 +0200
       
       Merge branch 'master' of ssh://r-36.net:443/srv/git/zs
       
       Diffstat:
         zeitungsschau/feed.py               |      12 +++++++-----
         zeitungsschau/feedemail.py          |      10 +++++++---
         zs                                  |       5 +++++
       
       3 files changed, 19 insertions(+), 8 deletions(-)
       ---
 (DIR) diff --git a/zeitungsschau/feed.py b/zeitungsschau/feed.py
       t@@ -5,8 +5,9 @@
        # by 20h
        #
        
       -from lxml import objectify 
       -from lxml import etree
       +import lxml
       +import lxml.objectify
       +import html
        from datetime import datetime
        import dateutil.parser
        from dateutil.tz import gettz
       t@@ -14,7 +15,6 @@ import requests
        import hashlib
        import pytz
        import codecs
       -import html
        import urllib.parse
        import socket
        import json
       t@@ -44,9 +44,10 @@ def removenamespaces(xml):
                                        elem.tag = elem.tag[nsl:]
        
        def parsexml(astr):
       -        xml = objectify.fromstring(astr)
       +        xml = lxml.objectify.fromstring(html.unescape(astr.decode("utf-8")).encode("utf-8"))
                removenamespaces(xml)
                # Throw XML parsing errors so we can blame the feed authors.
       +        #print(lxml.objectify.dump(xml))
                return xml
        
        def parsetwtxtfeed(astr, uri):
       t@@ -278,6 +279,8 @@ def parseatomfeed(astr):
                                        if hasattr(entry, "updated"):
                                                article["updated"] = parseiso(entry.updated,\
                                                                now)
       +                                elif hasattr(entry, "temporary"):
       +                                        article["updated"] = now
                                        elif hasattr(entry, "pubDate"):
                                                article["updated"] = parseiso(entry.pubDate,\
                                                                now)
       t@@ -397,7 +400,6 @@ def fetch(uri):
                        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                        s.connect((host, port))
                        s.send(("%s\r\n" % (selector)).encode("utf-8"))
       -                s.shutdown(1)
                        fd = s.makefile("r")
                        fval = fd.read().encode("utf-8")
                        s.close()
 (DIR) diff --git a/zeitungsschau/feedemail.py b/zeitungsschau/feedemail.py
       t@@ -13,6 +13,7 @@ from email.header import Header
        import time
        import subprocess
        import lxml.html
       +import lxml.etree
        import urllib.parse
        
        import html2text
       t@@ -20,10 +21,13 @@ import html2text
        def normalizeheader(hstr):
                if len(hstr) == 0:
                        return ""
       +        try:
       +                return lxml.html.fromstring(hstr).text_content().\
       +                                replace(u"\xa0", "").\
       +                                replace("\n", " ").strip()
       +        except lxml.etree.ParserError:
       +                return ""
        
       -        return lxml.html.fromstring(hstr).text_content().\
       -                        replace(u"\xa0", "").\
       -                        replace("\n", " ").strip()
        
        class LocalSendmail(object):
                cmd="/usr/sbin/sendmail -f \"%s\" \"%s\""
 (DIR) diff --git a/zs b/zs
       t@@ -52,6 +52,11 @@ def run(db, selfeed=None, dryrun=False, onlychanges=False):
                                print("fetch %s" % (feeduri))
                        curfeed = None
                        rcode = 0
       +
       +                """
       +                # All errors.
       +                (rcode, curfeed) = feed.fetch(feeduri)
       +                """
                        try:
                                (rcode, curfeed) = feed.fetch(feeduri)
                        except socket.gaierror: