Initial commit of Zeitungsschau. - zs - Zeitungsschau rss to email converter
       
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 5a5d10ddc8ffc58403a4469fa04edf781148e9d7
 (HTM) Author: Christoph Lohmann <20h@r-36.net>
       Date:   Sun,  9 Mar 2014 18:26:25 +0100
       
       Initial commit of Zeitungsschau.
       
       Diffstat:
         feed.py                             |     170 +++++++++++++++++++++++++++++++
         feeddb.py                           |     180 +++++++++++++++++++++++++++++++
         feedemail.py                        |      97 ++++++++++++++++++++++++++++++
         opml.py                             |      51 +++++++++++++++++++++++++++++++
         zs.py                               |     122 +++++++++++++++++++++++++++++++
       
       5 files changed, 620 insertions(+), 0 deletions(-)
       ---
 (DIR) diff --git a/feed.py b/feed.py
       @@ -0,0 +1,170 @@
       +#
       +# Copy me if you can.
       +# by 20h
       +#
       +
       +from lxml import objectify 
       +from datetime import datetime
       +import dateutil.parser
       +import urllib.request, urllib.parse, urllib.error
       +
       +def parseiso(dstr):
       +        return dateutil.parser.parse(str(dstr))
       +
       +def removenamespaces(xml):
       +        for key in xml.nsmap:
       +                nsstr = u'{%s}' % (xml.nsmap[key])
       +                nsl = len(nsstr)
       +
       +                for elem in xml.getiterator():
       +                        if elem.tag.startswith(nsstr):
       +                                elem.tag = elem.tag[nsl:]
       +
       +def parsexml(astr):
       +        xml = objectify.fromstring(astr)
       +        removenamespaces(xml)
       +        return xml
       +
       +def parse(astr):
       +        xml = parsexml(astr)
       +
       +        feed = {}
       +        articles = []
       +        isrss = False
       +        isrdf = False
       +
       +        if hasattr(xml, "channel"):
       +                if hasattr(xml, "item"):
       +                        isrdf = True
       +                        oxml = xml
       +                xml = xml.channel
       +                isrss = True
       +
       +        feed["title"] = ""
       +        for e in ("title", "description"):
       +                if hasattr(xml, e):
       +                        feed[e] = str(xml[e])
       +        
       +        if hasattr(xml, "image") and hasattr(xml.image, "title"):
       +                if "title" not in feed:
       +                        feed["title"] = str(xml.image.title)
       +
       +        if hasattr(xml, "updated"):
       +                feed["updated"] = parseiso(xml.updated) 
       +        elif hasattr(xml, "pubDate"):
       +                feed["updated"] = parseiso(xml.pubDate)
       +        elif hasattr(xml, "lastBuildDate"):
       +                feed["updated"] = parseiso(xml.lastBuildDate)
       +        else:
       +                feed["updated"] = datetime.now()
       +
       +        if hasattr(xml, "link"):
       +                if "href" in xml.link.attrib:
       +                        feed["link"] = str(xml.link.attrib["href"])
       +                else:
       +                        feed["link"] = str(xml.link)
       +
       +        if hasattr(xml, "webmaster"):
       +                feed["email"] = str(xml.webmaster)
       +        elif hasattr(xml, "owner") and hasattr(xml.owner, "email"):
       +                feed["email"] = str(xml.owner.email)
       +        elif hasattr(xml, "author") and hasattr(xml.author, "email"):
       +                feed["email"] = str(xml.author.email)
       +        elif hasattr(xml, "webMaster"):
       +                feed["email"] = str(xml.webMaster)
       +        elif hasattr(xml, "managingeditor"):
       +                feed["email"] = str(xml.managingeditor)
       +        elif hasattr(xml, "managingEditor"):
       +                feed["email"] = str(xml.managingEditor)
       +
       +        if hasattr(xml, "author"):
       +                if hasattr(xml.author, "name"):
       +                        feed["author"] = str(xml.author.name)
       +                else:
       +                        feed["author"] = str(xml.author)
       +        elif hasattr(xml, "creator"):
       +                feed["author"] = str(xml.creator)
       +
       +        entryname = "entry"
       +        if isrss == True or isrdf == True:
       +                entryname = "item"
       +        if isrdf == True:
       +                xml = oxml
       +        if hasattr(xml, entryname):
       +                for entry in xml[entryname][:]:
       +                        article = {}
       +                        # title
       +                        if hasattr(entry, "title"):
       +                                article["title"] = str(entry["title"])
       +
       +                        # link
       +                        if hasattr(entry, "link"):
       +                                if "href" in entry.link.attrib:
       +                                        article["link"] = str(entry.link.attrib["href"])
       +                                else:
       +                                        article["link"] = str(entry.link)
       +                        elif hasattr(entry, "source"):
       +                                article["link"] = str(entry.source)
       +
       +                        # id
       +                        if hasattr(entry, "id"):
       +                                article["id"] = str(entry["id"])
       +
       +                        # enclosure
       +                        if hasattr(entry, "enclosure"):
       +                                if "href" in entry.enclosure.attrib:
       +                                        article["file"] = \
       +                                                str(entry.enclosure.attrib["href"])
       +                                elif "url" in entry.enclosure.attrib:
       +                                        article["file"] = \
       +                                                str(entry.enclosure.attrib["url"])
       +                                else:
       +                                        article["file"] = str(entry.enclosure)
       +
       +                        # updated
       +                        if hasattr(entry, "updated"):
       +                                article["updated"] = parseiso(entry.updated)
       +                        elif hasattr(entry, "pubDate"):
       +                                article["updated"] = parseiso(entry.pubDate)
       +                        elif hasattr(entry, "date"):
       +                                article["updated"] = parseiso(entry.date)
       +                        else:
       +                                article["updated"] = datetime.now()
       +
       +                        # author
       +                        if hasattr(entry, "author"):
       +                                if hasattr(entry.author, "name"):
       +                                        article["author"] = str(entry.author.name)
       +                                else:
       +                                        article["author"] = str(entry.author)
       +                        elif hasattr(entry, "creator"):
       +                                article["author"] = str(entry.creator)
       +
       +                        # tags
       +                        if hasattr(entry, "category"):
       +                                article["tags"] = []
       +                                for cat in entry["category"][:]:
       +                                        article["tags"].append(str(cat))
       +
       +                        # text
       +                        if hasattr(entry, "encoded"):
       +                                article["text"] = str(entry.encoded)
       +                        elif hasattr(entry, "content"):
       +                                article["text"] = str(entry.content)
       +                        elif hasattr(entry, "summary"):
       +                                article["text"] = str(entry.summary)
       +                        elif hasattr(entry, "description"):
       +                                article["text"] = str(entry.description)
       +
       +                        articles.append(article)
       +        feed["articles"] = articles
       +
       +        return feed
       +
       +class feedopener(urllib.request.FancyURLopener):
       +        version = "Zeitungsschau/1.0"
       +urllib.request._urlopener = feedopener
       +
       +def fetch(uri):
       +        return parse(urllib.request.urlopen(uri).read())
       +
 (DIR) diff --git a/feeddb.py b/feeddb.py
       @@ -0,0 +1,180 @@
       +#!/usr/bin/env python
       +# coding=utf-8
       +#
       +# Copy me if you can.
       +# by 20h
       +#
       +
       +import shelve
       +import os
       +import os.path
       +import fcntl
       +from subprocess import Popen
       +
       +class feeddb(object):
       +        db = None
       +        lockf = None
       +        feeds = {}
       +        cfg = {}
       +
       +        def __init__(self, path="~/.zs/feed.db", email=None):
       +                dbpath = os.path.expanduser(path)
       +                path = os.path.abspath(os.path.dirname(dbpath))
       +                if not os.path.exists(path):
       +                        os.makedirs(path, 0o750)
       +                lockpath = "%s.lck" % (dbpath)
       +                self.lockf = open(lockpath, "w")
       +                fcntl.lockf(self.lockf.fileno(), fcntl.LOCK_EX)
       +                self.db = shelve.open(dbpath)
       +                if "feeds" in self.db:
       +                        self.feeds = self.db["feeds"]
       +                if "cfg" in self.db:
       +                        self.cfg = self.db["cfg"]
       +
       +                if not "email" in self.cfg:
       +                        print("You need to specify the default email. Please "\
       +                                        "run 'zs cfg email me@me.com' to "\
       +                                        "set it.")
       +
       +                if not "smtphost" in self.cfg:
       +                        self.cfg["smtphost"] = "localhost"
       +                if not "smtpport" in self.cfg:
       +                        self.cfg["smtpport"] = None
       +                if not "smtpssl" in self.cfg:
       +                        self.cfg["smtpssl"] = False
       +                if not "smtpuser" in self.cfg:
       +                        self.cfg["smtpuser"] = None
       +                if not "smtppassword" in self.cfg:
       +                        self.cfg["smtppassword"] = None
       +
       +        def __del__(self):
       +                if self.db != None:
       +                        self.db["feeds"] = self.feeds
       +                        self.db["cfg"] = self.cfg
       +                        self.db.close()
       +                if self.lockf != None:
       +                        fcntl.flock(self.lockf.fileno(), fcntl.LOCK_UN)
       +                        self.lockf.close()
       +
       +        def readfeed(self, uri):
       +                if not uri in self.feeds:
       +                        return None
       +                return self.feeds[uri]
       +
       +        def writefeed(self, uri, feed):
       +                self.feeds[uri] = feed
       +
       +        def sethook(self, uri, hookfile):
       +                feed = self.readfeed(uri)
       +                if feed == None:
       +                        return
       +                feed["hook"] = hookfile
       +                self.writefeed(uri, feed)
       +
       +        def runhook(self, uri):
       +                feed = self.readfeed(uri)
       +                if feed == None:
       +                        return
       +                if not "hook" in feed:
       +                        return
       +
       +                cmd = os.path.expanduser(feed["hook"])
       +                if not os.path.exists(cmd):
       +                        return
       +
       +                fd = open("/dev/null")
       +                if os.fork() == 0:
       +                        p = Popen(cmd, shell=True, stdout=fd, stderr=fd)
       +                        p.wait()
       +
       +        def setfeedval(self, uri, key, value):
       +                feed = self.readfeed(uri)
       +                if feed == None:
       +                        return
       +                feed[key] = value
       +                self.writefeed(uri, feed)
       +
       +        def pause(self, uri):
       +                self.setfeedval(uri, "pause", True)
       +
       +        def unpause(self, uri):
       +                self.setfeedval(uri, "pause", False)
       +
       +        def addfeed(self, uri, email=None):
       +                if not uri in self.feeds:
       +                        feed = {}
       +                        if email == None:
       +                                feed["toemail"] = self.cfg["email"]
       +                        else:
       +                                feed["toemail"] = email
       +                        feed["uri"] = uri
       +                        feed["pause"] = False
       +                        feed["articles"] = []
       +                        self.writefeed(uri, feed)
       +
       +        def delfeed(self, uri):
       +                if uri in self.feeds:
       +                        del self.feeds[uri]
       +
       +        def listfeeds(self):
       +                return list(self.feeds.keys())
       +
       +        def listactivefeeds(self):
       +                rfeeds = []
       +                for f in self.feeds:
       +                        if self.feeds[f]["pause"] == False:
       +                                rfeeds.append(f)
       +                return rfeeds
       +
       +        def mergefeed(self, uri, curfeed):
       +                rarticles = []
       +                feed = self.readfeed(uri)
       +                if feed == None:
       +                        return curfeed
       +
       +                history = feed["articles"]
       +                for article in curfeed["articles"]:
       +                        if not article in history:
       +                                article["unread"] = True
       +                                history.append(article)
       +                                rarticles.append(article)
       +                feed["articles"] = history
       +
       +                for metakey in ("link", "title", "updated", "author", \
       +                                "email"):
       +                        if metakey in curfeed:
       +                                feed[metakey] = curfeed[metakey]
       +
       +                self.writefeed(uri, feed)
       +                curfeed["articles"] = rarticles
       +
       +                return curfeed
       +
       +        def unreadarticles(self, uri):
       +                rfeed = {}
       +                rfeed["articles"] = []
       +                feed = self.readfeed(uri)
       +                if feed == None:
       +                        return rfeed
       +
       +                for metakey in ("link", "title", "updated", "author", \
       +                                "email", "toemail"):
       +                        if metakey in feed:
       +                                rfeed[metakey] = feed[metakey]
       +
       +                history = feed["articles"]
       +                for article in history:
       +                        if article["unread"] == True:
       +                                rfeed["articles"].append(article)
       +
       +                return rfeed
       +
       +        def setreadarticles(self, uri, curfeed=None):
       +                feed = self.readfeed(uri)
       +                if feed == None:
       +                        return
       +
       +                for article in curfeed["articles"]:
       +                        if article in feed["history"]:
       +                                article["unread"] == False
       +
 (DIR) diff --git a/feedemail.py b/feedemail.py
       @@ -0,0 +1,97 @@
       +#!/usr/bin/env python
       +# coding=utf-8
       +#
       +# Copy me if you can.
       +# by 20h
       +#
       +
       +import smtplib
       +from email.mime.text import MIMEText
       +from email.mime.multipart import MIMEMultipart
       +from email.utils import formataddr, formatdate, parseaddr
       +from email.header import Header
       +
       +import html2text as h2t
       +h2t.UNICODE_SNOB = 1 
       +h2t.LINKS_EACH_PARAGRAPH = 0
       +h2t.BODY_WIDTH = 0 
       +h2t.INLINE_LINKS = 0
       +html2text = h2t.html2text
       +
       +def normalizeheader(hstr):
       +        return hstr.replace("\n", " ").strip()
       +
       +def send(feed, to, smtphost="localhost", smtpport=None, ssl=False, \
       +                user=None, password=None):
       +        articles = feed["articles"]
       +
       +        for article in articles:
       +                if "text" in article:
       +                        text = html2text(article["text"])
       +                else:
       +                        text = ""
       +
       +                if "title" in article:
       +                        subject = Header( \
       +                                        normalizeheader(article["title"]),\
       +                                        "utf-8")
       +                else:
       +                        subject = Header(normalizeheader(text[:70]),\
       +                                        "utf-8")
       +
       +                # Append metadata.
       +                if "link" in article:
       +                        text = "%sLink: %s\n" % (text, article["link"])
       +                if "file" in article:
       +                        text = "%sEnclosure: %s\n" % (text, article["file"])
       +
       +                msg = MIMEText(text, "plain", "utf-8")
       +
       +                if "email" in feed:
       +                        faddr = feed["email"]
       +                else:
       +                        faddr = "none@none.no"
       +                if "title" in feed:
       +                        if "author" in article:
       +                                fname = "%s: %s" % (feed["title"], \
       +                                                article["author"])
       +                        else:
       +                                fname = feed["title"]
       +
       +                msg["From"] = formataddr((fname, faddr))
       +                msg["To"] = formataddr(parseaddr(to))
       +                msg["Date"] = formatdate()
       +                msg["Subject"] = subject
       +
       +                if "link" in article:
       +                        msg["X-RSS-URL"] = article["link"]
       +                if "link" in feed:
       +                        msg["X-RSS-Feed"] = feed["link"]
       +                if "id" in article:
       +                        msg["X-RSS-ID"] = article["id"]
       +                if "tags" in article:
       +                        msg["X-RSS-TAGS"] = Header(",".join(article["tags"]),\
       +                                        "utf-8")
       +                msg["User-Agent"] = "Zeitungsschau"
       +
       +                print(msg.as_string())
       +
       +                if ssl == True:
       +                        s = smtplib.SMTP_SSL()
       +                else:
       +                        s = smtplib.SMTP()
       +                if smtpport != None:
       +                        s.connect(smtphost, smtpport)
       +                else:
       +                        s.connect(smtphost)
       +
       +                if user != None and password != None:
       +                        s.ehlo()
       +                        if ssl == False:
       +                                s.starttls()
       +                        s.ehlo()
       +                        s.login(user, password)
       +
       +                s.sendmail(faddr, to, msg.as_string())
       +                s.quit()
       +
 (DIR) diff --git a/opml.py b/opml.py
       @@ -0,0 +1,51 @@
       +#
       +# Copy me if you can.
       +# by 20h
       +#
       +
       +from lxml import etree
       +from datetime import datetime
       +
       +def read(ostr):
       +        parser = etree.XMLParser(recover=True, encoding='utf-8')
       +        xml = etree.fromstring(ostr, parser)
       +
       +        rssfeeds = []
       +
       +        feeds = xml.xpath("//outline")
       +        for feed in feeds:
       +                if "xmlUrl" in feed.attrib:
       +                        rssfeeds.append(feed.attrib["xmlUrl"])
       +                elif "text" in feed.attrib:
       +                        rssfeeds.append(feed.attrib["text"])
       +        
       +        return rssfeeds
       +
       +def write(rssfeeds):
       +        opmle = etree.Element("opml")
       +
       +        heade = etree.SubElement(opmle, "head")
       +        titlee = etree.SubElement(heade, "title")
       +
       +        daten = datetime.now().strftime("%Y-%m-%dT%H:%M:%S%Z")
       +        datece = etree.SubElement(heade, "dateCreated")
       +        datece.text = daten
       +        dateme = etree.SubElement(heade, "dateModified")
       +        dateme.text = daten
       +        ownerne = etree.SubElement(heade, "ownerName")
       +        ownerne.text = "Me"
       +        docse = etree.SubElement(heade, "docs")
       +        docse.text = "http://dev.opml.org/spec2.html"
       +
       +        bodye = etree.SubElement(opmle, "body")
       +
       +        for rss in rssfeeds:
       +                outlinee = etree.SubElement(bodye, "outline")
       +                outlinee.attrib["type"] = "rss"
       +                outlinee.attrib["text"] = rss
       +                outlinee.attrib["xmlUrl"] = rss
       +        
       +        return etree.tostring(opmle, encoding="utf-8", \
       +                        pretty_print=True, \
       +                        xml_declaration=True).decode("utf-8")
       +
 (DIR) diff --git a/zs.py b/zs.py
       @@ -0,0 +1,122 @@
       +#!/usr/bin/env python
       +# coding=utf-8
       +#
       +# Copy me if you can.
       +# by 20h
       +#
       +
       +import sys
       +import os
       +import feed
       +import feeddb
       +import opml
       +import feedemail
       +
       +def run(db, selfeed=None):
       +        feeduris = db.listfeeds()
       +
       +        if feeduris != None and selfeed in feeduris:
       +                feeduris = [selfeed] 
       +        print("feeduris: %s" % (feeduris))
       +
       +        for feeduri in feeduris:
       +                curfeed = feed.fetch(feeduri)
       +                print("curfeed: %s" % (curfeed))
       +                db.mergefeed(feeduri, curfeed)
       +                ufeed = db.unreadarticles(feeduri)
       +                print("unread: %s" % (ufeed))
       +
       +                if "toemail" in ufeed:
       +                        toemail = ufeed["toemail"]
       +                else:
       +                        toemail = db.cfg["email"]
       +                feedemail.send(ufeed, toemail, db.cfg["smtphost"], \
       +                                db.cfg["smtpport"], db.cfg["smtpssl"], \
       +                                db.cfg["smtpuser"], db.cfg["smtppassword"])
       +                db.setreadarticles(feeduri, ufeed)
       +
       +def usage(app):
       +        app = os.path.basename(app)
       +        sys.stderr.write("usage: %s [-h] cmd\n" % (app))
       +        sys.exit(1)
       +
       +def main(args):
       +        retval = 0
       +
       +        if len(args) < 2:
       +                usage(args[0])
       +
       +        db = feeddb.feeddb()
       +        
       +        if args[1] == "run":
       +                if len(args) > 2:
       +                        run(db, args[2])
       +                else:
       +                        run(db)
       +
       +        elif args[1] == "cfg":
       +                if len(args) < 3:
       +                        for k in db.cfg:
       +                                print("%s = '%s'" % (k, db.cfg[k]))
       +                elif len(args) < 4:
       +                        if args[2] in db.cfg:
       +                                print("%s = '%s'" % (args[2], \
       +                                        db.cfg[args[2]]))
       +                        else:
       +                                retval = 1
       +                else:
       +                        db.cfg[args[2]] = args[3]
       +                        print("%s = '%s'" % (args[2], db.cfg[args[2]]))
       +
       +        elif args[1] == "add":
       +                if len(args) < 3:
       +                        usage(args[0])
       +                email = None
       +                if len(args) > 3:
       +                        email = args[3]
       +                db.addfeed(args[2], email)
       +
       +        elif args[1] == "list":
       +                for f in db.listfeeds():
       +                        print(f)
       +
       +        elif args[1] == "delete":
       +                if len(args) < 3:
       +                        usage(args[0])
       +                db.delfeed(args[1])
       +
       +        elif args[1] == "pause":
       +                if len(args) < 3:
       +                        usage(args[0])
       +                db.pause(args[2])
       +
       +        elif args[1] == "unpause":
       +                if len(args) < 3:
       +                        usage(args[0])
       +                db.unpause(args[2])
       +
       +        elif args[1] == "opmlexport":
       +                if len(args) > 2:
       +                        filen = open(args[2], "w")
       +                else:
       +                        filen = sys.stdout
       +                filen.write(opml.write(db.listfeeds()))
       +
       +        elif args[1] == "opmlimport":
       +                if len(args) > 2:
       +                        filen = open(args[2], "r")
       +                else:
       +                        filen = sys.stdin
       +                feedlist = db.listfeeds()
       +                nfeedlist = opml.read(filen.read().encode("utf-8"))
       +                for f in nfeedlist:
       +                        if not f in feedlist:
       +                                print("import feed: %s" % (f))
       +                                db.addfeed(f)
       +
       +        del db
       +        return retval
       +
       +if __name__ == "__main__":
       +        sys.exit(main(sys.argv))
       +