iAdd JSON Feed support. - zs - Zeitungsschau rss to email converter Err gopher.r-36.net 70 i Err gopher.r-36.net 70 1Log /scm/zs//log.gph gopher.r-36.net 70 1Files /scm/zs//files.gph gopher.r-36.net 70 1Refs /scm/zs//refs.gph gopher.r-36.net 70 1LICENSE /scm/zs//file/LICENSE.gph gopher.r-36.net 70 i--- Err gopher.r-36.net 70 1commit ac54587c59ad0bdd7c84681f295d924f27019644 /scm/zs//commit/ac54587c59ad0bdd7c84681f295d924f27019644.gph gopher.r-36.net 70 1parent 63fe7a682af0938334c13a7132b52b933dafec13 /scm/zs//commit/63fe7a682af0938334c13a7132b52b933dafec13.gph gopher.r-36.net 70 hAuthor: Christoph Lohmann <20h@r-36.net> URL:mailto:20h@r-36.net gopher.r-36.net 70 iDate: Mon, 22 May 2017 19:29:14 +0200 Err gopher.r-36.net 70 i Err gopher.r-36.net 70 iAdd JSON Feed support. Err gopher.r-36.net 70 i Err gopher.r-36.net 70 iDiffstat: Err gopher.r-36.net 70 i zeitungsschau/feed.py | 96 ++++++++++++++++++++++++++++++- Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i1 file changed, 94 insertions(+), 2 deletions(-) Err gopher.r-36.net 70 i--- Err gopher.r-36.net 70 1diff --git a/zeitungsschau/feed.py b/zeitungsschau/feed.py /scm/zs//file/zeitungsschau/feed.py.gph gopher.r-36.net 70 i@@ -16,6 +16,7 @@ import codecs Err gopher.r-36.net 70 i import html Err gopher.r-36.net 70 i import urllib.parse Err gopher.r-36.net 70 i import socket Err gopher.r-36.net 70 i+import json Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i def parseiso(dstr, now): Err gopher.r-36.net 70 i try: Err gopher.r-36.net 70 i@@ -39,7 +40,84 @@ def parsexml(astr): Err gopher.r-36.net 70 i # Throw XML parsing errors so we can blame the feed authors. Err gopher.r-36.net 70 i return xml Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i-def parse(astr): Err gopher.r-36.net 70 i+def parsejson(astr): Err gopher.r-36.net 70 i+ js = json.loads(astr) Err gopher.r-36.net 70 i+ Err gopher.r-36.net 70 i+ feed = {} Err gopher.r-36.net 70 i+ articles = [] Err gopher.r-36.net 70 i+ now = datetime.now(pytz.utc) Err gopher.r-36.net 70 i+ now = now.replace(hour=20, minute=20, second=20, microsecond=20) Err gopher.r-36.net 70 i+ Err gopher.r-36.net 70 i+ if "title" in js: Err gopher.r-36.net 70 i+ feed["title"] = js["title"] Err gopher.r-36.net 70 i+ if "description" in js: Err gopher.r-36.net 70 i+ feed["description"] = js["description"] Err gopher.r-36.net 70 i+ if "home_page_url" in js: Err gopher.r-36.net 70 i+ feed["link"] = js["home_page_url"] Err gopher.r-36.net 70 i+ if "feed_url" in js: Err gopher.r-36.net 70 i+ feed["link"] = js["feed_url"] Err gopher.r-36.net 70 i+ if "author" in js: Err gopher.r-36.net 70 i+ if "name" in js["author"]: Err gopher.r-36.net 70 i+ feed["author"] = js["author"]["name"] Err gopher.r-36.net 70 i+ feed["updated"] = now Err gopher.r-36.net 70 i+ Err gopher.r-36.net 70 i+ for item in js["items"]: Err gopher.r-36.net 70 i+ article = {} Err gopher.r-36.net 70 i+ if "url" in item: Err gopher.r-36.net 70 i+ article["file"] = item["url"] Err gopher.r-36.net 70 i+ if "title" in item: Err gopher.r-36.net 70 i+ article["title"] = item["title"] Err gopher.r-36.net 70 i+ if "id" in item: Err gopher.r-36.net 70 i+ article["id"] = item["id"] Err gopher.r-36.net 70 i+ else: Err gopher.r-36.net 70 i+ if "link" in article: Err gopher.r-36.net 70 i+ article["id"] = article["link"] Err gopher.r-36.net 70 i+ elif "file" in article: Err gopher.r-36.net 70 i+ article["id"] = article["file"] Err gopher.r-36.net 70 i+ else: Err gopher.r-36.net 70 i+ article["id"] = article["text"][:30] Err gopher.r-36.net 70 i+ Err gopher.r-36.net 70 i+ if "summary" in item: Err gopher.r-36.net 70 i+ article["text"] = html.unescape(item["summary"]) Err gopher.r-36.net 70 i+ if "content_html" in item: Err gopher.r-36.net 70 i+ article["text"] = html.unescape(item["content_html"]) Err gopher.r-36.net 70 i+ if "content_text" in item: Err gopher.r-36.net 70 i+ article["text"] = html.unescape(item["content_text"]) Err gopher.r-36.net 70 i+ if "date_published" in item: Err gopher.r-36.net 70 i+ article["updated"] = \ Err gopher.r-36.net 70 i+ dateutil.parser.parse(item["date_published"]) Err gopher.r-36.net 70 i+ else: Err gopher.r-36.net 70 i+ article["updated"] = now Err gopher.r-36.net 70 i+ Err gopher.r-36.net 70 i+ if article["updated"] == now: Err gopher.r-36.net 70 i+ article["uuid"] = "" Err gopher.r-36.net 70 i+ else: Err gopher.r-36.net 70 i+ article["uuid"] = "%s" % (article["updated"]) Err gopher.r-36.net 70 i+ Err gopher.r-36.net 70 i+ for e in ("id", "title", "file"): Err gopher.r-36.net 70 i+ if e in article: Err gopher.r-36.net 70 i+ article["uuid"] = "%s-%s" % \ Err gopher.r-36.net 70 i+ (article["uuid"],\ Err gopher.r-36.net 70 i+ article[e]) Err gopher.r-36.net 70 i+ Err gopher.r-36.net 70 i+ def mkuuid(s): Err gopher.r-36.net 70 i+ return hashlib.sha256(str(s).\ Err gopher.r-36.net 70 i+ encode("utf8")).hexdigest() Err gopher.r-36.net 70 i+ if len(article["uuid"]) == 0: Err gopher.r-36.net 70 i+ article["uuid"] = mkuuid(now) Err gopher.r-36.net 70 i+ else: Err gopher.r-36.net 70 i+ article["uuid"] = mkuuid(article["uuid"]) Err gopher.r-36.net 70 i+ Err gopher.r-36.net 70 i+ # sanity checks Err gopher.r-36.net 70 i+ if "title" not in article and "text" not in article \ Err gopher.r-36.net 70 i+ and "file" not in article: Err gopher.r-36.net 70 i+ continue Err gopher.r-36.net 70 i+ Err gopher.r-36.net 70 i+ articles.append(article) Err gopher.r-36.net 70 i+ Err gopher.r-36.net 70 i+ return feed Err gopher.r-36.net 70 i+ Err gopher.r-36.net 70 i+def parseatom(astr): Err gopher.r-36.net 70 i xml = parsexml(astr) Err gopher.r-36.net 70 i if xml == None: Err gopher.r-36.net 70 i return None Err gopher.r-36.net 70 i@@ -246,6 +324,7 @@ def parse(astr): Err gopher.r-36.net 70 i return feed Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i def fetch(uri): Err gopher.r-36.net 70 i+ ftype = "xml" Err gopher.r-36.net 70 i if "file://" in uri: Err gopher.r-36.net 70 i fd = codecs.open(uri[7:], "r", "utf-8") Err gopher.r-36.net 70 i fval = fd.read().encode("utf-8") Err gopher.r-36.net 70 i@@ -280,5 +359,18 @@ def fetch(uri): Err gopher.r-36.net 70 i fval = fd.content Err gopher.r-36.net 70 i rcode = fd.status_code Err gopher.r-36.net 70 i Err gopher.r-36.net 70 i- return (rcode, parse(fval)) Err gopher.r-36.net 70 i+ if "Content-Type" in fd.headers: Err gopher.r-36.net 70 i+ if "application/json" in fd.headers["Content-Type"]: Err gopher.r-36.net 70 i+ ftype = "json" Err gopher.r-36.net 70 i+ Err gopher.r-36.net 70 i+ if ftype == "xml": Err gopher.r-36.net 70 i+ suri = uri.lower().rsplit(".", 1) Err gopher.r-36.net 70 i+ if len(suri) > 1: Err gopher.r-36.net 70 i+ if suri[-1] == "json": Err gopher.r-36.net 70 i+ ftype = "json" Err gopher.r-36.net 70 i+ Err gopher.r-36.net 70 i+ if ftype == "xml": Err gopher.r-36.net 70 i+ return (rcode, parsexml(fval)) Err gopher.r-36.net 70 i+ else: Err gopher.r-36.net 70 i+ return (rcode, parsejson(fval)) Err gopher.r-36.net 70 i Err gopher.r-36.net 70 .