global - randomcrap - random crap programs of varying quality
(HTM) git clone git://git.codemadness.org/randomcrap
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
global (3424B)
---
1 #!/bin/sh
2 # increase max parallel jobs allowed.
3 # NOTE: decreased jobs because server load was high and
4 # processing feeds (per feed) was slower.
5 maxjobs=8
6
7 starttime=$(echo | ts -m '%.s')
8
9 # custom connectors.
10 #. ~/.sfeed/connectors/vimeo
11 . ~/.sfeed/connectors/github
12 . ~/.sfeed/connectors/youtube-videos-with-duration
13 . ~/.sfeed/connectors/repology_history_atom.sh
14
15 # log(name, s)
16 log() {
17 # NOTE: uses "ts" and bc for timings.
18 endtime=$(echo | ts -m '%.s')
19 duration=$(echo "$endtime - $starttime" | bc)
20
21 printf '[%s] %-50.50s %s (%s)\n' "$(date +'%H:%M:%S')" "$1" "$2" "$duration"
22 }
23
24 # log_error(name, s)
25 log_error() {
26 # NOTE: uses "ts" and bc for timings.
27 endtime=$(echo | ts -m '%.s')
28 duration=$(echo "$endtime - $starttime" | bc)
29
30 printf '[%s] %-50.50s %s (%s)\n' "$(date +'%H:%M:%S')" "$1" "$2" "$duration" >&2
31 # set error exit status indicator for parallel jobs.
32 rm -f "${sfeedtmpdir}/ok"
33 }
34
35 # parse(name, feedurl, basesiteurl)
36 parse() {
37 case "$2" in
38 *://vimeo.com/api/v2/user/*/videos.json)
39 # create a feed from the public Vimeo API (v2).
40 vimeo2atom | sfeed "$3";;
41 *://api.github.com/**)
42 github_json_to_atom | sfeed "$3";;
43 *://www.youtube.com/**)
44 youtube_with_duration "$2";;
45 *://repology.org/**)
46 repology_history "$2" | sfeed "$3";;
47 *)
48 sfeed "$3";;
49 esac
50 }
51
52 # merge raw files: unique sort by id, title, link.
53 # merge(name, oldfile, newfile)
54 merge() {
55 LC_ALL=C sort -t ' ' -u -k6,6 -k2,2 -k3,3 "$3" "$2" 2>/dev/null
56 }
57
58 # order by timestamp (descending).
59 # order(name)
60 order() {
61 LC_ALL=C sort -t ' ' -k1rn,1 2>/dev/null
62 }
63
64 # fetch a feed via HTTP/HTTPS etc.
65 # fetch(name, url, feedfile)
66 fetch() {
67 useragent=""
68
69 case "$2" in
70 *://api.github.com/**)
71 useragent="github";;
72 # *://tilde.news/*)
73 # # requires User-Agent
74 # useragent="bla";;
75 *://www.anandtech.com/*)
76 # requires User-Agent
77 useragent="bla";;
78 # *://lobste.rs/*)
79 # # requires User-Agent
80 # useragent="bla";;
81 *://old.reddit.com/*)
82 # requires User-Agent
83 useragent="Firefox";;
84 *)
85 useragent="";;
86 esac
87
88 # curl -L --max-redirs 0 -H "User-Agent:${useragent}" -f -s -m 15 \
89 # "$2" 2>/dev/null;
90
91 # t="$(mktemp)"
92
93 # e-tag cache and optimizations (If-Modified-Since).
94 basename="$(basename "$3")"
95 etag="$HOME/.sfeed/etags/${basename}"
96 lastmod="$HOME/.sfeed/lastmod/${basename}"
97 output="${sfeedtmpdir}/feeds/${basename}.xml"
98
99 curl \
100 -L --max-redirs 0 -H "User-Agent:${useragent}" -f -s -m 15 \
101 --compressed \
102 --etag-save "${etag}" --etag-compare "${etag}" \
103 -R -o "${output}" \
104 -z "${lastmod}" \
105 "$2" 2>/dev/null || return 1
106
107 # # DEBUG
108 # curl -v \
109 # -L --max-redirs 0 -H "User-Agent:${useragent}" -f -s -m 15 \
110 # --compressed \
111 # --etag-save "${etag}" --etag-compare "${etag}" \
112 # -R -o "${output}" \
113 # -z "${lastmod}" \
114 # "$2" || return 1
115
116 # succesful, but no file written: assume it is OK and Not Modified.
117 [ -e "${output}" ] || return 0
118
119 # use server timestamp from curl -R to set Last-Modified.
120 touch -r "${output}" "${lastmod}" 2>/dev/null
121 cat "${output}" 2>/dev/null
122 # use write output status, other errors are ignored here.
123 fetchstatus="$?"
124 rm -f "${output}" 2>/dev/null
125 return "${fetchstatus}"
126
127 # DEBUG:
128
129 # no e-tag or optimizations
130 #curl \
131 # -L --max-redirs 0 -H "User-Agent:${useragent}" -f -s -m 15 \
132 # "$2" \
133 # 2>/dev/null > "$t"
134
135 #count="$(wc -c < "$t")"
136 #count=$((count+0))
137
138 #echo "$(date) $name $count" >> ~/.sfeed/bandwidth_no_etag
139
140 #cat "$t"
141
142 #rm -f "$t"
143 }