itMerge branch 'master' of ssh://r-36.net:443/srv/git/zs - zs - Zeitungsschau rss to email converter Err gopher.r-36.net 70 hgit clone git://r-36.net/zs URL:git://r-36.net/zs gopher.r-36.net 70 1Log /scm/zs/log.gph gopher.r-36.net 70 1Files /scm/zs/files.gph gopher.r-36.net 70 1Refs /scm/zs/refs.gph gopher.r-36.net 70 1LICENSE /scm/zs/file/LICENSE.gph gopher.r-36.net 70 i--- Err gopher.r-36.net 70 1commit 622855cba32e158a1b0cacea44e21361685d2577 /scm/zs/commit/622855cba32e158a1b0cacea44e21361685d2577.gph gopher.r-36.net 70 1parent 10c3bbd37c63294007b0f3c28d47665fc625785b /scm/zs/commit/10c3bbd37c63294007b0f3c28d47665fc625785b.gph gopher.r-36.net 70 hAuthor: Christoph Lohmann <20h@r-36.net> URL:mailto:20h@r-36.net gopher.r-36.net 70 iDate: Thu, 10 Sep 2020 14:11:51 +0200 Err gopher.r-36.net 70 i Err gopher.r-36.net 70 iMerge branch 'master' of ssh://r-36.net:443/srv/git/zs Err gopher.r-36.net 70 i Err gopher.r-36.net 70 iDiffstat: Err gopher.r-36.net 70 i zeitungsschau/feed.py | 12 +++++++----- Err gopher.r-36.net 70 i zeitungsschau/feedemail.py | 10 +++++++--- Err gopher.r-36.net 70 i zs | 5 +++++ Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i3 files changed, 19 insertions(+), 8 deletions(-) Err gopher.r-36.net 70 i--- Err gopher.r-36.net 70 1diff --git a/zeitungsschau/feed.py b/zeitungsschau/feed.py /scm/zs/file/zeitungsschau/feed.py.gph gopher.r-36.net 70 it@@ -5,8 +5,9 @@ Err gopher.r-36.net 70 i # by 20h Err gopher.r-36.net 70 i # Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i-from lxml import objectify Err gopher.r-36.net 70 i-from lxml import etree Err gopher.r-36.net 70 i+import lxml Err gopher.r-36.net 70 i+import lxml.objectify Err gopher.r-36.net 70 i+import html Err gopher.r-36.net 70 i from datetime import datetime Err gopher.r-36.net 70 i import dateutil.parser Err gopher.r-36.net 70 i from dateutil.tz import gettz Err gopher.r-36.net 70 it@@ -14,7 +15,6 @@ import requests Err gopher.r-36.net 70 i import hashlib Err gopher.r-36.net 70 i import pytz Err gopher.r-36.net 70 i import codecs Err gopher.r-36.net 70 i-import html Err gopher.r-36.net 70 i import urllib.parse Err gopher.r-36.net 70 i import socket Err gopher.r-36.net 70 i import json Err gopher.r-36.net 70 it@@ -44,9 +44,10 @@ def removenamespaces(xml): Err gopher.r-36.net 70 i elem.tag = elem.tag[nsl:] Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i def parsexml(astr): Err gopher.r-36.net 70 i- xml = objectify.fromstring(astr) Err gopher.r-36.net 70 i+ xml = lxml.objectify.fromstring(html.unescape(astr.decode("utf-8")).encode("utf-8")) Err gopher.r-36.net 70 i removenamespaces(xml) Err gopher.r-36.net 70 i # Throw XML parsing errors so we can blame the feed authors. Err gopher.r-36.net 70 i+ #print(lxml.objectify.dump(xml)) Err gopher.r-36.net 70 i return xml Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i def parsetwtxtfeed(astr, uri): Err gopher.r-36.net 70 it@@ -278,6 +279,8 @@ def parseatomfeed(astr): Err gopher.r-36.net 70 i if hasattr(entry, "updated"): Err gopher.r-36.net 70 i article["updated"] = parseiso(entry.updated,\ Err gopher.r-36.net 70 i now) Err gopher.r-36.net 70 i+ elif hasattr(entry, "temporary"): Err gopher.r-36.net 70 i+ article["updated"] = now Err gopher.r-36.net 70 i elif hasattr(entry, "pubDate"): Err gopher.r-36.net 70 i article["updated"] = parseiso(entry.pubDate,\ Err gopher.r-36.net 70 i now) Err gopher.r-36.net 70 it@@ -397,7 +400,6 @@ def fetch(uri): Err gopher.r-36.net 70 i s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) Err gopher.r-36.net 70 i s.connect((host, port)) Err gopher.r-36.net 70 i s.send(("%s\r\n" % (selector)).encode("utf-8")) Err gopher.r-36.net 70 i- s.shutdown(1) Err gopher.r-36.net 70 i fd = s.makefile("r") Err gopher.r-36.net 70 i fval = fd.read().encode("utf-8") Err gopher.r-36.net 70 i s.close() Err gopher.r-36.net 70 1diff --git a/zeitungsschau/feedemail.py b/zeitungsschau/feedemail.py /scm/zs/file/zeitungsschau/feedemail.py.gph gopher.r-36.net 70 it@@ -13,6 +13,7 @@ from email.header import Header Err gopher.r-36.net 70 i import time Err gopher.r-36.net 70 i import subprocess Err gopher.r-36.net 70 i import lxml.html Err gopher.r-36.net 70 i+import lxml.etree Err gopher.r-36.net 70 i import urllib.parse Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i import html2text Err gopher.r-36.net 70 it@@ -20,10 +21,13 @@ import html2text Err gopher.r-36.net 70 i def normalizeheader(hstr): Err gopher.r-36.net 70 i if len(hstr) == 0: Err gopher.r-36.net 70 i return "" Err gopher.r-36.net 70 i+ try: Err gopher.r-36.net 70 i+ return lxml.html.fromstring(hstr).text_content().\ Err gopher.r-36.net 70 i+ replace(u"\xa0", "").\ Err gopher.r-36.net 70 i+ replace("\n", " ").strip() Err gopher.r-36.net 70 i+ except lxml.etree.ParserError: Err gopher.r-36.net 70 i+ return "" Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i- return lxml.html.fromstring(hstr).text_content().\ Err gopher.r-36.net 70 i- replace(u"\xa0", "").\ Err gopher.r-36.net 70 i- replace("\n", " ").strip() Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i class LocalSendmail(object): Err gopher.r-36.net 70 i cmd="/usr/sbin/sendmail -f \"%s\" \"%s\"" Err gopher.r-36.net 70 1diff --git a/zs b/zs /scm/zs/file/zs.gph gopher.r-36.net 70 it@@ -52,6 +52,11 @@ def run(db, selfeed=None, dryrun=False, onlychanges=False): Err gopher.r-36.net 70 i print("fetch %s" % (feeduri)) Err gopher.r-36.net 70 i curfeed = None Err gopher.r-36.net 70 i rcode = 0 Err gopher.r-36.net 70 i+ Err gopher.r-36.net 70 i+ """ Err gopher.r-36.net 70 i+ # All errors. Err gopher.r-36.net 70 i+ (rcode, curfeed) = feed.fetch(feeduri) Err gopher.r-36.net 70 i+ """ Err gopher.r-36.net 70 i try: Err gopher.r-36.net 70 i (rcode, curfeed) = feed.fetch(feeduri) Err gopher.r-36.net 70 i except socket.gaierror: Err gopher.r-36.net 70 .