sync sfeed config - randomcrap - random crap programs of varying quality
 (HTM) git clone git://git.codemadness.org/randomcrap
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit e14b3d0828974aeefde224dac8f92b3749d39e8c
 (DIR) parent 372c03151bff2e14c68ff0eaa7d25cabc79001da
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sat,  2 Aug 2025 13:09:23 +0200
       
       sync sfeed config
       
       Some fixes for e-tag and last modified handling.
       Some comments, useful for debugging sometimes.
       
       Diffstat:
         M config/sfeed/global                 |      68 ++++++++++++++++++++++++++-----
       
       1 file changed, 58 insertions(+), 10 deletions(-)
       ---
 (DIR) diff --git a/config/sfeed/global b/config/sfeed/global
       @@ -1,6 +1,8 @@
        #!/bin/sh
        # increase max parallel jobs allowed.
       -#maxjobs=16
       +# NOTE: decreased jobs because server load was high and
       +#       processing feeds (per feed) was slower.
       +maxjobs=8
        
        starttime=$(echo | ts -m '%.s')
        
       @@ -67,15 +69,18 @@ fetch() {
                case "$2" in
                *://api.github.com/**)
                        useragent="github";;
       -        *://tilde.news/*)
       -                # requires User-Agent
       -                useragent="bla";;
       -        *://lobste.rs/*)
       +#        *://tilde.news/*)
       +#                # requires User-Agent
       +#                useragent="bla";;
       +        *://www.anandtech.com/*)
                        # requires User-Agent
                        useragent="bla";;
       +#        *://lobste.rs/*)
       +#                # requires User-Agent
       +#                useragent="bla";;
                *://old.reddit.com/*)
                        # requires User-Agent
       -                useragent="007";;
       +                useragent="Firefox";;
                *)
                        useragent="";;
                esac
       @@ -83,13 +88,56 @@ fetch() {
        #        curl -L --max-redirs 0 -H "User-Agent:${useragent}" -f -s -m 15 \
        #                "$2" 2>/dev/null;
        
       +#        t="$(mktemp)"
       +
                # e-tag cache and optimizations (If-Modified-Since).
       -        etag="$HOME/.sfeed/etags/$(basename "$3")"
       +        basename="$(basename "$3")"
       +        etag="$HOME/.sfeed/etags/${basename}"
       +        lastmod="$HOME/.sfeed/lastmod/${basename}"
       +        output="${sfeedtmpdir}/feeds/${filename}.xml"
       +
                curl \
                        -L --max-redirs 0 -H "User-Agent:${useragent}" -f -s -m 15 \
                        --compressed \
                        --etag-save "${etag}" --etag-compare "${etag}" \
       -                -z "${etag}" \
       -                "$2" \
       -                2>/dev/null
       +                -R -o "${output}" \
       +                -z "${lastmod}" \
       +                "$2" 2>/dev/null || return 1
       +
       +#        # DEBUG
       +#        curl -v \
       +#                -L --max-redirs 0 -H "User-Agent:${useragent}" -f -s -m 15 \
       +#                --compressed \
       +#                --etag-save "${etag}" --etag-compare "${etag}" \
       +#                -R -o "${output}" \
       +#                -z "${lastmod}" \
       +#                "$2" || return 1
       +
       +        # succesful, but no file written: assume it is OK and Not Modified.
       +        [ -e "${output}" ] || return 0
       +
       +        # use server timestamp from curl -R to set Last-Modified.
       +        touch -r "${output}" "${lastmod}" 2>/dev/null
       +        cat "${output}" 2>/dev/null
       +        # use write output status, other errors are ignored here.
       +        fetchstatus="$?"
       +        rm -f "${output}" 2>/dev/null
       +        return "${fetchstatus}"
       +
       +        # DEBUG:
       +
       +        # no e-tag or optimizations
       +        #curl \
       +        #        -L --max-redirs 0 -H "User-Agent:${useragent}" -f -s -m 15 \
       +        #        "$2" \
       +        #        2>/dev/null > "$t"
       +
       +        #count="$(wc -c < "$t")"
       +        #count=$((count+0))
       +
       +        #echo "$(date)        $name        $count" >> ~/.sfeed/bandwidth_no_etag
       +
       +        #cat "$t"
       +
       +        #rm -f "$t"
        }