#!/bin/sh # # Given https://mupdf.com/release_history.html URL generates entry for every # news item # fetch() { url=$1 curl -g -L --max-redirs 2 -f -s -S -m 15 "$url" } extract_news_entry() { xmllint --format --html --xpath '//h3' - 2>/dev/null | sed -E -e 's;;;g' -e '/^$/d' -e 's/^ +//g' -e 's/ +$//g' } sfeedify() { awk ' BEGIN { author = "Artifex" base_link = "https://mupdf.com/release_history.html" } { date = substr($NF, 2, length($NF) - 2) title = substr($0, 1, length($0) - length($NF) - 1) id = tolower($0) gsub(/ /, "-", id) gsub(/\./, "_", id) gsub(/[()]/, "", id) link = base_link "#" id gsub(/[^A-Za-z0-9:+ ]/, "", date) # XXX: this is not portable (NetBSD date(1)) cmd = sprintf("date -ud \"%s\" +%%s", date) cmd | getline timestamp close(cmd) print timestamp "\t" title "\t" link "\t" content "\t" content_type "\t" id "\t" author "\t" enclosure } ' } for u in "$@"; do fetch "${u}" | extract_news_entry | sfeedify done