feedemail.py - zs - Zeitungsschau rss to email converter
 (HTM) git clone git://r-36.net/zs
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       feedemail.py (3777B)
       ---
            1 #
            2 # See LICENSE for licensing details.
            3 #
            4 # Copy me if you can.
            5 # by 20h
            6 #
            7 
            8 import smtplib
            9 from email.mime.text import MIMEText
           10 from email.mime.multipart import MIMEMultipart
           11 from email.utils import formataddr, formatdate, parseaddr
           12 from email.header import Header
           13 import time
           14 import subprocess
           15 import lxml.html
           16 import lxml.etree
           17 import urllib.parse
           18 
           19 import html2text
           20 
           21 def normalizeheader(hstr):
           22         if len(hstr) == 0:
           23                 return ""
           24         try:
           25                 return lxml.html.fromstring(hstr).text_content().\
           26                                 replace(u"\xa0", "").\
           27                                 replace("\n", " ").strip()
           28         except lxml.etree.ParserError:
           29                 return ""
           30 
           31 
           32 class LocalSendmail(object):
           33         cmd="/usr/sbin/sendmail -f \"%s\" \"%s\""
           34 
           35         def __init__(self, cmd=None):
           36                 if cmd != None:
           37                         self.cmd = cmd
           38 
           39         def sendmail(self, faddr, taddr, msg):
           40                 cmd = self.cmd % (faddr, taddr)
           41                 p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE)
           42                 p.communicate(input=msg.encode("utf8"))
           43 
           44 def send(feed, to, smtphost="localhost", smtpport=None, ssl="False",\
           45                 starttls="True", user=None, password=None, smtpcmd=None,\
           46                 smtpuselocal=False):
           47         articles = feed["articles"]
           48 
           49         for article in articles:
           50                 if "text" in article:
           51                         h2t = html2text.HTML2Text()
           52                         h2t.body_width = 0
           53                         h2t.unicode_snob = 1
           54                         h2t.inline_links = 0
           55                         h2t.links_each_paragraph = 0
           56 
           57                         try:
           58                                 text = "%s\n" % (h2t.handle(article["text"]))
           59                         except:
           60                                 text = article["text"]
           61 
           62                         del h2t
           63                 else:
           64                         text = ""
           65 
           66                 # Larger than 10 MiB, something is wrong.
           67                 if len(text) > 10 * 1024 * 1024:
           68                         continue
           69 
           70                 if "title" in article:
           71                         subject = Header( \
           72                                         normalizeheader(article["title"]),\
           73                                         "utf-8")
           74                 else:
           75                         subject = Header(normalizeheader(text[:20]),\
           76                                         "utf-8")
           77 
           78                 # Append metadata.
           79                 if "link" in article:
           80                         text = "%sURL: %s\n" % (text, article["link"])
           81                 if "file" in article:
           82                         text = "%sEnclosure: %s\n" % (text, article["file"])
           83 
           84                 msg = MIMEText(text, "plain", "utf-8")
           85 
           86                 if "email" in feed:
           87                         faddr = feed["email"]
           88                 else:
           89                         faddr = "none@none.no"
           90                 if "title" in feed:
           91                         if "author" in article:
           92                                 fname = "%s: %s" % (feed["title"], \
           93                                                 article["author"])
           94                         else:
           95                                 fname = feed["title"]
           96 
           97                 msg["From"] = formataddr((fname, faddr))
           98                 msg["To"] = formataddr(parseaddr(to))
           99                 if "updated" in article:
          100                         msg["Date"] = formatdate(time.mktime(\
          101                                         article["updated"].timetuple()))
          102                 else:
          103                         msg["Date"] = formatdate()
          104                 msg["Subject"] = subject
          105 
          106                 if "link" in article:
          107                         if "://" not in article["link"]:
          108                                 aurl = urllib.parse.urljoin(feed["feeduri"],\
          109                                         article["link"])
          110                                 if "gopher://" in aurl:
          111                                         urls = urllib.parse.urlparse(aurl, \
          112                                                         allow_fragments=False)
          113                                         if urls.path.startswith("/0"):
          114                                                 aurl = "%s://%s%s" % \
          115                                                 (urls.scheme, urls.netloc, \
          116                                                         urls.path.replace(\
          117                                                         "/0", "/1", 1))
          118                                                 if len(urls.query) > 0:
          119                                                         aurl = "%s?%s" % \
          120                                                         (aurl, urls.query)
          121                         else:
          122                                 aurl = article["link"]
          123                         msg["X-RSS-URL"] = aurl
          124                 if "link" in feed:
          125                         msg["X-RSS-Feed"] = feed["link"]
          126                 else:
          127                         msg["X-RSS-Feed"] = feed["feeduri"]
          128                 if "id" in article:
          129                         msg["X-RSS-ID"] = article["id"]
          130                 if "uuid" in article:
          131                         msg["X-RSS-UUID"] = article["uuid"]
          132                 if "tags" in article:
          133                         msg["X-RSS-TAGS"] = Header(",".join(article["tags"]),\
          134                                         "utf-8")
          135                 msg["User-Agent"] = "Zeitungsschau"
          136 
          137                 if smtpuselocal == "True":
          138                         s = LocalSendmail(smtpcmd)
          139                         s.sendmail(faddr, to, msg.as_string())
          140                 else:
          141                         if ssl == "True":
          142                                 s = smtplib.SMTP_SSL(smtphost)
          143                         else:
          144                                 s = smtplib.SMTP(smtphost)
          145                         if smtpport != None:
          146                                 s.connect(smtphost, smtpport)
          147                         else:
          148                                 s.connect(smtphost)
          149 
          150                         s.ehlo()
          151                         if ssl == "False" and starttls == "True":
          152                                 s.starttls()
          153                                 s.ehlo()
          154 
          155                         if user != None and password != None:
          156                                 s.login(user, password)
          157 
          158                         s.sendmail(faddr, to, msg.as_string())
          159                         s.quit()
          160