bitreich.org

       kvssachsen2atom - brcon2023-hackathons - Bitreichcon 2023 Hackathon Repository
 (HTM) git clone git://bitreich.org/brcon2023-hackathons git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/brcon2023-hackathons
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
       ---
       kvssachsen2atom (3188B)
       ---
            1 #!/usr/bin/env python
            2 # coding=utf-8
            3 #
            4 # Copy me if you can.
            5 # by 20h
            6 #
            7 
            8 import os
            9 import sys
           10 import getopt
           11 
           12 from selenium import webdriver
           13 from selenium.webdriver.chrome.options import Options as chromeoptions
           14 from selenium.webdriver.support.ui import WebDriverWait
           15 from selenium.webdriver.support import expected_conditions as EC
           16 from selenium.webdriver.common.by import By
           17 
           18 from datetime import datetime
           19 import pytz
           20 
           21 def usage(app):
           22         app = os.path.basename(app)
           23         sys.stderr.write("usage: %s [-h] URI\n" % (app))
           24         sys.exit(1)
           25 
           26 def main(args):
           27         try:
           28                 opts, largs = getopt.getopt(args[1:], "h")
           29         except getopt.GetoptError as err:
           30                 print(str(err))
           31                 usage(args[0])
           32         
           33         for o, a in opts:
           34                 if o == "-h":
           35                         usage(args[0])
           36                 else:
           37                         assert False, "unhandled option"
           38 
           39         if len(largs) < 1:
           40                 usage(args[0])
           41 
           42         link = largs[0]
           43 
           44         options = chromeoptions()
           45         chromearguments = [
           46                 "headless",
           47                 "no-sandbox",
           48                 "disable-extensions",
           49                 "disable-dev-shm-usage",
           50                 "start-maximized",
           51                 "window-size=1900,1080",
           52                 "disable-gpu"
           53         ]
           54         for carg in chromearguments:
           55                 options.add_argument(carg)
           56 
           57         driver = webdriver.Chrome(options=options)
           58         driver.get(link)
           59 
           60         isnews = WebDriverWait(driver=driver, timeout=60).until(
           61                         EC.presence_of_element_located((By.XPATH,
           62                                 "//div[@data-last-letter]")
           63                         )
           64         )
           65         newslist = driver.find_elements(By.XPATH, "//div[@data-filter-target=\"list\"]")[0]
           66 
           67         title = driver.find_elements(By.XPATH, "//meta[@property=\"og:title\"]")[0].get_attribute("content")
           68         description = title
           69         globaltags = ""
           70 
           71         print("""<?xml version="1.0" encoding="utf-8"?>""")
           72         print("""<feed xmlns="http://www.w3.org/2005/Atom">""")
           73         print("\t<title><![CDATA[%s]]></title>" % (title))
           74         print("\t<subtitle><![CDATA[%s]]></subtitle>" % (description))
           75         print("\t<id>%s</id>" % (link))
           76         print("\t<link href=\"%s\" rel=\"self\" />" % (link))
           77         print("\t<link href=\"%s\" />" % (link))
           78 
           79         utcnow = datetime.now(pytz.utc)
           80         print("\t<updated>%s</updated>" % (utcnow.isoformat()))
           81 
           82         articles = newslist.find_elements(By.XPATH, "./div")
           83         baselink = "/".join(link.split("/", 3)[:-1])
           84         for article in articles[::-1]:
           85                 link = article.find_elements(By.XPATH, "./a")[0]
           86                 plink = link.get_attribute("href")
           87                 if not plink.startswith("http"):
           88                         plink = "%s/%s" % (baselink, plink)
           89                 ptitle = link.get_attribute("data-title")
           90                 pcontent = article.text
           91                 pauthor = "sachsen@kvsachsen.de"
           92 
           93                 # Normalize datetime.
           94                 updateds = article.find_elements(By.XPATH, ".//time")[0].text
           95                 try:
           96                         dtupdated = datetime.strptime(updateds, "%d.%m.%Y")
           97                 except ValueError:
           98                         continue
           99 
          100                 dtupdated = dtupdated.replace(hour=12, minute=0,\
          101                                 second=0, tzinfo=pytz.utc)
          102                 if dtupdated.year > utcnow.year:
          103                         dtupdated = dtupdated.replace(year=utcnow.year)
          104                 pupdated = dtupdated
          105 
          106                 print("\t<entry>")
          107                 print("\t\t<id>%s</id>" % (plink))
          108                 print("\t\t<title><![CDATA[%s]]></title>" % (ptitle))
          109                 print("\t\t<link href=\"%s\" />" % (plink))
          110                 print("\t\t<author><name>%s</name></author>" % (pauthor))
          111                 print("\t\t<updated>%s</updated>" % (pupdated.isoformat()))
          112                 print("\t\t<content><![CDATA[%s]]></content>" % (pcontent))
          113                 print("\t</entry>")
          114         
          115         print("</feed>")
          116 
          117         return 0
          118 
          119 if __name__ == "__main__":
          120         sys.exit(main(sys.argv))
          121