codemadness.org

       global - randomcrap - random crap programs of varying quality
 (HTM) git clone git://git.codemadness.org/randomcrap
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       global (3424B)
       ---
            1 #!/bin/sh
            2 # increase max parallel jobs allowed.
            3 # NOTE: decreased jobs because server load was high and
            4 #       processing feeds (per feed) was slower.
            5 maxjobs=8
            6 
            7 starttime=$(echo | ts -m '%.s')
            8 
            9 # custom connectors.
           10 #. ~/.sfeed/connectors/vimeo
           11 . ~/.sfeed/connectors/github
           12 . ~/.sfeed/connectors/youtube-videos-with-duration
           13 . ~/.sfeed/connectors/repology_history_atom.sh
           14 
           15 # log(name, s)
           16 log() {
           17         # NOTE: uses "ts" and bc for timings.
           18         endtime=$(echo | ts -m '%.s')
           19         duration=$(echo "$endtime - $starttime" | bc)
           20 
           21         printf '[%s] %-50.50s %s (%s)\n' "$(date +'%H:%M:%S')" "$1" "$2" "$duration"
           22 }
           23 
           24 # log_error(name, s)
           25 log_error() {
           26         # NOTE: uses "ts" and bc for timings.
           27         endtime=$(echo | ts -m '%.s')
           28         duration=$(echo "$endtime - $starttime" | bc)
           29 
           30         printf '[%s] %-50.50s %s (%s)\n' "$(date +'%H:%M:%S')" "$1" "$2" "$duration" >&2
           31         # set error exit status indicator for parallel jobs.
           32         rm -f "${sfeedtmpdir}/ok"
           33 }
           34 
           35 # parse(name, feedurl, basesiteurl)
           36 parse() {
           37         case "$2" in
           38         *://vimeo.com/api/v2/user/*/videos.json)
           39                 # create a feed from the public Vimeo API (v2).
           40                 vimeo2atom | sfeed "$3";;
           41         *://api.github.com/**)
           42                 github_json_to_atom | sfeed "$3";;
           43         *://www.youtube.com/**)
           44                 youtube_with_duration "$2";;
           45         *://repology.org/**)
           46                 repology_history "$2" | sfeed "$3";;
           47         *)
           48                 sfeed "$3";;
           49         esac
           50 }
           51 
           52 # merge raw files: unique sort by id, title, link.
           53 # merge(name, oldfile, newfile)
           54 merge() {
           55         LC_ALL=C sort -t '        ' -u -k6,6 -k2,2 -k3,3 "$3" "$2" 2>/dev/null
           56 }
           57 
           58 # order by timestamp (descending).
           59 # order(name)
           60 order() {
           61         LC_ALL=C sort -t '        ' -k1rn,1 2>/dev/null
           62 }
           63 
           64 # fetch a feed via HTTP/HTTPS etc.
           65 # fetch(name, url, feedfile)
           66 fetch() {
           67         useragent=""
           68 
           69         case "$2" in
           70         *://api.github.com/**)
           71                 useragent="github";;
           72 #        *://tilde.news/*)
           73 #                # requires User-Agent
           74 #                useragent="bla";;
           75         *://www.anandtech.com/*)
           76                 # requires User-Agent
           77                 useragent="bla";;
           78 #        *://lobste.rs/*)
           79 #                # requires User-Agent
           80 #                useragent="bla";;
           81         *://old.reddit.com/*)
           82                 # requires User-Agent
           83                 useragent="Firefox";;
           84         *)
           85                 useragent="";;
           86         esac
           87 
           88 #        curl -L --max-redirs 0 -H "User-Agent:${useragent}" -f -s -m 15 \
           89 #                "$2" 2>/dev/null;
           90 
           91 #        t="$(mktemp)"
           92 
           93         # e-tag cache and optimizations (If-Modified-Since).
           94         basename="$(basename "$3")"
           95         etag="$HOME/.sfeed/etags/${basename}"
           96         lastmod="$HOME/.sfeed/lastmod/${basename}"
           97         output="${sfeedtmpdir}/feeds/${basename}.xml"
           98 
           99         curl \
          100                 -L --max-redirs 0 -H "User-Agent:${useragent}" -f -s -m 15 \
          101                 --compressed \
          102                 --etag-save "${etag}" --etag-compare "${etag}" \
          103                 -R -o "${output}" \
          104                 -z "${lastmod}" \
          105                 "$2" 2>/dev/null || return 1
          106 
          107 #        # DEBUG
          108 #        curl -v \
          109 #                -L --max-redirs 0 -H "User-Agent:${useragent}" -f -s -m 15 \
          110 #                --compressed \
          111 #                --etag-save "${etag}" --etag-compare "${etag}" \
          112 #                -R -o "${output}" \
          113 #                -z "${lastmod}" \
          114 #                "$2" || return 1
          115 
          116         # succesful, but no file written: assume it is OK and Not Modified.
          117         [ -e "${output}" ] || return 0
          118 
          119         # use server timestamp from curl -R to set Last-Modified.
          120         touch -r "${output}" "${lastmod}" 2>/dev/null
          121         cat "${output}" 2>/dev/null
          122         # use write output status, other errors are ignored here.
          123         fetchstatus="$?"
          124         rm -f "${output}" 2>/dev/null
          125         return "${fetchstatus}"
          126 
          127         # DEBUG:
          128 
          129         # no e-tag or optimizations
          130         #curl \
          131         #        -L --max-redirs 0 -H "User-Agent:${useragent}" -f -s -m 15 \
          132         #        "$2" \
          133         #        2>/dev/null > "$t"
          134 
          135         #count="$(wc -c < "$t")"
          136         #count=$((count+0))
          137 
          138         #echo "$(date)        $name        $count" >> ~/.sfeed/bandwidth_no_etag
          139 
          140         #cat "$t"
          141 
          142         #rm -f "$t"
          143 }