gopher.r-36.net

       Add JSON Feed support. - zs - Zeitungsschau rss to email converter
       
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit ac54587c59ad0bdd7c84681f295d924f27019644
 (DIR) parent 63fe7a682af0938334c13a7132b52b933dafec13
 (HTM) Author: Christoph Lohmann <20h@r-36.net>
       Date:   Mon, 22 May 2017 19:29:14 +0200
       
       Add JSON Feed support.
       
       Diffstat:
         zeitungsschau/feed.py               |      96 ++++++++++++++++++++++++++++++-
       
       1 file changed, 94 insertions(+), 2 deletions(-)
       ---
 (DIR) diff --git a/zeitungsschau/feed.py b/zeitungsschau/feed.py
       @@ -16,6 +16,7 @@ import codecs
        import html
        import urllib.parse
        import socket
       +import json
        
        def parseiso(dstr, now):
                try:
       @@ -39,7 +40,84 @@ def parsexml(astr):
                # Throw XML parsing errors so we can blame the feed authors.
                return xml
        
       -def parse(astr):
       +def parsejson(astr):
       +        js = json.loads(astr)
       +
       +        feed = {}
       +        articles = []
       +        now = datetime.now(pytz.utc)
       +        now = now.replace(hour=20, minute=20, second=20, microsecond=20)
       +
       +        if "title" in js:
       +                feed["title"] = js["title"]
       +        if "description" in js:
       +                feed["description"] = js["description"]
       +        if "home_page_url" in js:
       +                feed["link"] = js["home_page_url"]
       +        if "feed_url" in js:
       +                feed["link"] = js["feed_url"]
       +        if "author" in js:
       +                if "name" in js["author"]:
       +                        feed["author"] = js["author"]["name"]
       +        feed["updated"] = now
       +
       +        for item in js["items"]:
       +                article = {}
       +                if "url" in item:
       +                        article["file"] = item["url"]
       +                if "title" in item:
       +                        article["title"] = item["title"]
       +                if "id" in item:
       +                        article["id"] = item["id"]
       +                else:
       +                        if "link" in article:
       +                                article["id"] = article["link"]
       +                        elif "file" in article:
       +                                article["id"] = article["file"]
       +                        else:
       +                                article["id"] = article["text"][:30]
       +
       +                if "summary" in item:
       +                        article["text"] = html.unescape(item["summary"])
       +                if "content_html" in item:
       +                        article["text"] = html.unescape(item["content_html"])
       +                if "content_text" in item:
       +                        article["text"] = html.unescape(item["content_text"])
       +                if "date_published" in item:
       +                        article["updated"] = \
       +                                dateutil.parser.parse(item["date_published"])
       +                else:
       +                        article["updated"] = now
       +
       +                if article["updated"] == now:
       +                        article["uuid"] = ""
       +                else:
       +                        article["uuid"] = "%s" % (article["updated"])
       +
       +                for e in ("id", "title", "file"):
       +                        if e in article:
       +                                article["uuid"] = "%s-%s" % \
       +                                        (article["uuid"],\
       +                                         article[e])
       +
       +                def mkuuid(s):
       +                        return hashlib.sha256(str(s).\
       +                                encode("utf8")).hexdigest()
       +                if len(article["uuid"]) == 0:
       +                        article["uuid"] = mkuuid(now)
       +                else:
       +                        article["uuid"] = mkuuid(article["uuid"])
       +
       +                # sanity checks
       +                if "title" not in article and "text" not in article \
       +                                and "file" not in article:
       +                        continue
       +
       +                articles.append(article)
       +
       +        return feed
       +
       +def parseatom(astr):
                xml = parsexml(astr)
                if xml == None:
                        return None
       @@ -246,6 +324,7 @@ def parse(astr):
                return feed
        
        def fetch(uri):
       +        ftype = "xml"
                if "file://" in uri:
                        fd = codecs.open(uri[7:], "r", "utf-8")
                        fval = fd.read().encode("utf-8")
       @@ -280,5 +359,18 @@ def fetch(uri):
                        fval = fd.content
                        rcode = fd.status_code
        
       -        return (rcode, parse(fval))
       +                if "Content-Type" in fd.headers:
       +                        if "application/json" in fd.headers["Content-Type"]:
       +                                ftype = "json"
       +
       +        if ftype == "xml":
       +                suri = uri.lower().rsplit(".", 1)
       +                if len(suri) > 1:
       +                        if suri[-1] == "json":
       +                                ftype = "json"
       +
       +        if ftype == "xml":
       +                return (rcode, parsexml(fval))
       +        else:
       +                return (rcode, parsejson(fval))