itParse subjects for HTML content and clean it. - zs - Zeitungsschau rss to email converter Err gopher.r-36.net 70 hgit clone git://r-36.net/zs URL:git://r-36.net/zs gopher.r-36.net 70 1Log /scm/zs//log.gph gopher.r-36.net 70 1Files /scm/zs//files.gph gopher.r-36.net 70 1Refs /scm/zs//refs.gph gopher.r-36.net 70 1LICENSE /scm/zs//file/LICENSE.gph gopher.r-36.net 70 i--- Err gopher.r-36.net 70 1commit cc1e0defe58a83c1d59a31d72b7e8e7dec726883 /scm/zs//commit/cc1e0defe58a83c1d59a31d72b7e8e7dec726883.gph gopher.r-36.net 70 1parent ee341c7915c2d85c90eb6deef11c964fb88986fa /scm/zs//commit/ee341c7915c2d85c90eb6deef11c964fb88986fa.gph gopher.r-36.net 70 hAuthor: Christoph Lohmann <20h@r-36.net> URL:mailto:20h@r-36.net gopher.r-36.net 70 iDate: Wed, 24 Jan 2018 14:07:16 +0100 Err gopher.r-36.net 70 i Err gopher.r-36.net 70 iParse subjects for HTML content and clean it. Err gopher.r-36.net 70 i Err gopher.r-36.net 70 iSome RSS feeds have escaped HTML in escaped HTML in subjects. We need to Err gopher.r-36.net 70 iclean this mess up manually on our side. Err gopher.r-36.net 70 i Err gopher.r-36.net 70 iDiffstat: Err gopher.r-36.net 70 i zeitungsschau/feedemail.py | 7 +++++-- Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i1 file changed, 5 insertions(+), 2 deletions(-) Err gopher.r-36.net 70 i--- Err gopher.r-36.net 70 1diff --git a/zeitungsschau/feedemail.py b/zeitungsschau/feedemail.py /scm/zs//file/zeitungsschau/feedemail.py.gph gopher.r-36.net 70 it@@ -12,11 +12,14 @@ from email.utils import formataddr, formatdate, parseaddr Err gopher.r-36.net 70 i from email.header import Header Err gopher.r-36.net 70 i import time Err gopher.r-36.net 70 i import subprocess Err gopher.r-36.net 70 i+import lxml.html Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i import html2text Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i def normalizeheader(hstr): Err gopher.r-36.net 70 i- return hstr.replace("\n", " ").strip() Err gopher.r-36.net 70 i+ return lxml.html.fromstring(hstr).text_content().\ Err gopher.r-36.net 70 i+ replace(u"\xa0", "").\ Err gopher.r-36.net 70 i+ replace("\n", " ").strip() Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i class LocalSendmail(object): Err gopher.r-36.net 70 i cmd="/usr/sbin/sendmail -f \"%s\" \"%s\"" Err gopher.r-36.net 70 it@@ -58,7 +61,7 @@ def send(feed, to, smtphost="localhost", smtpport=None, ssl="False",\ Err gopher.r-36.net 70 i normalizeheader(article["title"]),\ Err gopher.r-36.net 70 i "utf-8") Err gopher.r-36.net 70 i else: Err gopher.r-36.net 70 i- subject = Header(normalizeheader(text[:70]),\ Err gopher.r-36.net 70 i+ subject = Header(normalizeheader(text[:20]),\ Err gopher.r-36.net 70 i "utf-8") Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i # Append metadata. Err gopher.r-36.net 70 .