tccr.it

       Add a patch to sync tscrape_update with sfeed_update - pkgsrc-localpatches - leot's pkgsrc LOCALPATCHES
 (HTM) hg clone https://bitbucket.org/iamleot/pkgsrc-localpatches
 (DIR) Log
 (DIR) Files
 (DIR) Refs
       ---
 (DIR) changeset 07743a30c060a43e3fa9a3959ed163df5d2f8237
 (DIR) parent 024501ed3cc79721a8f4d48050a5f3a8f56443ef
 (HTM) Author: Leonardo Taccari <iamleot@gmail.com>
       Date:   Sat, 15 Dec 2018 23:34:47 
       
       Add a patch to sync tscrape_update with sfeed_update
       
       Diffstat:
        wip/tscrape-git/patch-tscrape_update.patch |  172 +++++++++++++++++++++++++++++
        1 files changed, 172 insertions(+), 0 deletions(-)
       ---
       diff -r 024501ed3cc7 -r 07743a30c060 wip/tscrape-git/patch-tscrape_update.patch
       --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
       +++ b/wip/tscrape-git/patch-tscrape_update.patch        Sat Dec 15 23:34:47 2018 +0100
       @@ -0,0 +1,172 @@
       +tscrape_update: Sync with sfeed_update
       +
       +- Handle signals consistently in different shells
       +- Improve SIGINT handling
       +- Add a variable for max amount of feeds to update concurrently
       +- Add filter(), order() support per feed
       +- Don't always exit 1, exit 130 on SIGINT, exit 0 otherwise
       +- Fail on feed HTTP redirect
       +
       +--- tscrape_update.orig
       ++++ tscrape_update
       +@@ -5,11 +5,15 @@
       + # defaults
       + tscrapepath="$HOME/.tscrape/feeds"
       + 
       ++# used for processing feeds concurrently: wait until ${maxjobs} amount of
       ++# feeds are finished at a time.
       ++maxjobs=8
       ++
       + # load config (evaluate shellscript).
       + # loadconfig(configfile)
       + loadconfig() {
       +       # allow to specify config via argv[1].
       +-      if [ ! x"$1" = x"" ]; then
       ++      if [ "$1" != "" ]; then
       +               # get absolute path of config file.
       +               config=$(readlink -f "$1")
       +       else
       +@@ -17,8 +21,7 @@ loadconfig() {
       +               config="$HOME/.tscrape/tscraperc"
       +       fi
       + 
       +-      # load config: config is loaded here to be able to override $tscrapepath
       +-      # or functions.
       ++      # config is loaded here to be able to override $tscrapepath or functions.
       +       if [ -r "${config}" ]; then
       +               . "${config}"
       +       else
       +@@ -28,46 +31,69 @@ loadconfig() {
       +       fi
       + }
       + 
       +-# merge raw files.
       ++# merge raw files: unique sort by id, retweetid.
       + # merge(oldfile, newfile)
       + merge() {
       +-      # unique sort by id, retweetid.
       +-      # order by timestamp (desc).
       +-      (sort -t '      ' -u -k5,5 -k8,8 "$1" "$2" 2>/dev/null) |
       ++      sort -t '       ' -u -k5,5 -k8,8 "$1" "$2" 2>/dev/null
       ++}
       ++
       ++# filter fields.
       ++# filter(name)
       ++filter() {
       ++      cat
       ++}
       ++
       ++# order by timestamp (descending).
       ++# order(name)
       ++order() {
       +       sort -t '       ' -k1rn,1
       + }
       + 
       + # fetch a feed via HTTP/HTTPS etc.
       +-# fetchfeed(url, name, feedfile)
       ++# fetchfeed(name, url, feedfile)
       + fetchfeed() {
       +-      if curl --http1.0 -H 'User-Agent:' -f -s -S --max-time 15 -z "$3" "$1"; then
       +-              printf "  OK %s %s\n" "$(date +'%H:%M:%S')" "$2" >&2
       ++      if curl -L --max-redirs 0 -H "User-Agent:" -f -s -m 15 \
       ++              -z "$3" "$2" 2>/dev/null; then
       ++              printf "  OK %s %s\n" "$(date +'%H:%M:%S')" "$1" >&2
       +       else
       +-              printf "FAIL %s %s\n" "$(date +'%H:%M:%S')" "$2" >&2
       ++              printf "FAIL %s %s\n" "$(date +'%H:%M:%S')" "$1" >&2
       +       fi
       + }
       + 
       + # fetch and parse feed.
       + # feed(name, feedurl)
       + feed() {
       ++      # wait until ${maxjobs} are finished: throughput using this logic is
       ++      # non-optimal, but it is simple and portable.
       ++      [ ${signo} -ne 0 ] && return
       ++      [ $((curjobs % maxjobs)) -eq 0 ] && wait
       ++      [ ${signo} -ne 0 ] && return
       ++      curjobs=$((curjobs + 1))
       ++
       +       (name="$1"
       +-      tmpfeedfile="${tscrapetmpdir}/${name}"
       ++      filename="$(printf '%s' "$1" | tr '/' '_')"
       ++      feedurl="$2"
       ++      tmpfeedfile="${tscrapetmpdir}/${filename}"
       +       tmpencfile=""
       +-      tscrapefile="${tscrapepath}/$1"
       ++      tscrapefile="${tscrapepath}/${filename}"
       + 
       +-      fetchfeed "$2" "$1" "${tscrapefile}" | tscrape > "${tmpfeedfile}"
       ++      fetchfeed "${name}" "${feedurl}" "${tscrapefile}" | \
       ++              tscrape | filter "${name}" > "${tmpfeedfile}"
       + 
       +       # get new data and merge with old.
       +-      tscrapefilenew="${tscrapepath}/${name}.new"
       ++      tscrapefilenew="${tscrapepath}/${filename}.new"
       +       # new feed data is non-empty.
       +       if [ -s "${tmpfeedfile}" ]; then
       +               # if file exists, merge
       +               if [ -e "${tscrapefile}" ]; then
       +-                      merge "${tscrapefile}" "${tmpfeedfile}" > "${tscrapefilenew}"
       ++                      merge "${name}" "${tscrapefile}" "${tmpfeedfile}" | \
       ++                              order "${name}" > "${tscrapefilenew}"
       ++
       +                       # overwrite old file with updated file
       +                       mv "${tscrapefilenew}" "${tscrapefile}"
       +               else
       +-                      merge "/dev/null" "${tmpfeedfile}" > "${tscrapefile}"
       ++                      merge "${name}" "/dev/null" "${tmpfeedfile}" | \
       ++                              order "${name}" > "${tscrapefile}"
       +               fi
       +       fi) &
       + }
       +@@ -81,28 +107,39 @@ cleanup() {
       +       rm -rf "${tscrapetmpdir}"
       + }
       + 
       ++sighandler() {
       ++      signo="$1"
       ++      # ignore TERM signal for myself.
       ++      trap -- "" TERM
       ++      # kill all running childs >:D
       ++      kill -TERM -$$
       ++}
       ++
       + feeds() {
       +       echo "Configuration file \"${config}\" is invalid or does not contain a \"feeds\" function." >&2
       +       echo "See tscraperc.example for an example." >&2
       + }
       + 
       ++# job counter.
       ++curjobs=0
       ++# signal number received for parent.
       ++signo=0
       ++# SIGINT: signal to interrupt parent.
       ++trap -- "sighandler 2" "INT"
       ++# SIGTERM: signal to terminate parent.
       ++trap -- "sighandler 15" "TERM"
       + # load config file.
       + loadconfig "$1"
       +-# fetch feeds and store in temporary file.
       ++# fetch feeds and store in temporary directory.
       + tscrapetmpdir="$(mktemp -d '/tmp/tscrape_XXXXXX')"
       +-# kill whole current process group on ^C.
       +-isrunning="1"
       +-# SIGTERM: signal to terminate parent.
       +-trap -- "terminated" "15"
       +-# SIGINT: kill all running childs >:D
       +-trap -- "kill -TERM -$$" "2"
       + # make sure path exists.
       + mkdir -p "${tscrapepath}"
       + # fetch feeds specified in config file.
       + feeds
       + # wait till all feeds are fetched (concurrently).
       +-wait
       ++[ ${signo} -eq 0 ] && wait
       + # cleanup temporary files etc.
       + cleanup
       +-# if terminated.
       +-[ "${isrunning}" = "0" ] && exit 1
       ++# on signal SIGINT and SIGTERM exit with signal number + 128.
       ++[ ${signo} -ne 0 ] && exit $((signo+128))
       ++exit 0