#!/bin/sh

#
# Convert a feed sfeed to mbox and keep a cache of feeds seen
#

feed=$1

if [ -z "${feed}" ]; then
	echo "usage: sfeed_mbox_cache file"
	exit 1
fi

feed_cache=$(dirname "${feed}")/.$(basename "${feed}").cache
touch "${feed_cache}"

awk -v FEED="${feed}" -v FEED_CACHE="${feed_cache}" '
BEGIN {
	FS = "\t"
	mtime = strftime("%a %b %d %H:%M:%S %Y")
}

function print_content(c, c_type)
{
	gsub("\\\\n", "\n", c)
	gsub("\\\\t", "   ", c)
	gsub("\\\\\\\\", "\\", c)

	if (c_type == "html") {
		cmd = "webdump -lrw 72"
		c = "<span>" c "</span>"
	} else {
		cmd = "cat"
	}

	# Gracefully handle possible From in content
	cmd = cmd " | sed \"s/^From/>From/\""

	print c | cmd
	close(cmd)
}

FILENAME == FEED_CACHE {
	seen[$0]++
}

FILENAME == FEED {
	timestamp = $1
	title = $2
	link = $3
	content = $4
	content_type = $5
	id = $6
	author = $7

	feed = FEED
	sub(/.*\//, "", feed)

	# Use current timestamp if not present
	if (!timestamp) {
		timestamp = systime()
	}

	# Workaround for feeds that have no id
	if (!id) {
		id = "empty"
	}

	# Workaround for feeds that have empty link fields
	if (!link) {
		link = id
	}

	if (!author) {
		author = feed
	}

	# Skip already seen feeds
	if (seen[link]) {
		next
	}

	print "From MAILER-DAEMON " mtime
	print "From: \"" author "\" <" feed "@sfeed.invalid>"
	print "Date: " strftime("%a, %d %b %Y %H:%M +0000 (UTC)", timestamp)
	print "Subject: " title
	print "X-RSS-Feed: " feed
	print "X-RSS-Id: " id
	print "X-RSS-URL: " link
	print "X-RSS-Content-Type: " content_type
	print "Content-Type: text/plain; charset=\"utf-8\""
	print "Content-Transfer-Encoding: binary"
	print ""
	print_content(content, content_type)
	print ""
	print ""

	print link >> FEED_CACHE
	seen[link]++
}
' "${feed_cache}" "${feed}"