sfeed_mbox: add mail headers and other improvements - sfeed - RSS and Atom parser
 (HTM) git clone git://git.codemadness.org/sfeed
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 03b52640efbd9f8fdd2297740b1c79fa0a1a7652
 (DIR) parent 839a4563e809916ccdd8ebeac1461d8c7ffc62e7
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Tue, 11 Nov 2025 20:50:06 +0100
       
       sfeed_mbox: add mail headers and other improvements
       
       Note: these changes may cause duplicate messages for existing users in some
       use-cases because the Message-ID, From and To header will be different.
       
       Changes:
       
       Add mail headers, they are now only set headers when they have a value and are
       non-empty.
       
       These headers are used by rss2email also:
       
       - X-RSS-ID: the item ID.
       - X-RSS-TAGS: the category tags, '|' in sfeed(5) is replaced by ','.
       - X-RSS-URL: the item link / URL.
       
       Also added:
       
       - X-RSS-Author: the item author.
       - X-RSS-Enclosure: the first enclosure.
       
       This allows more convenient filtering in many mail clients.
       
       One can further manipulate the headers using mail tools or awk or a sed
       expression for example.  For example changing X-Feedname to X-Label etc.
       
       - Change the default From and To header. It does not use the system hostname or
         $USER anymore.
       
        Add environment variables with defaults:
       
               $SFEED_MBOX_FROM: <anonymous@localhost>
               $SFEED_MBOX_TO:   <anonymous@localhost>
       
       - Fixes: The To header could contain characters that are invalid for the To
         header as well.
       
       - Fixes: The Message-ID field could contain spaces or invalid characters. Now
         it is "@newsfeed.local".
       
       Diffstat:
         M sfeed_mbox.1                        |      18 +++++++++++++++++-
         M sfeed_mbox.c                        |      73 +++++++++++++++++++++----------
       
       2 files changed, 68 insertions(+), 23 deletions(-)
       ---
 (DIR) diff --git a/sfeed_mbox.1 b/sfeed_mbox.1
       @@ -1,4 +1,4 @@
       -.Dd October 27, 2024
       +.Dd October 10, 2025
        .Dt SFEED_MBOX 1
        .Os
        .Sh NAME
       @@ -35,6 +35,16 @@ To make further filtering simpler some custom headers are set:
        .It X-Feedname
        The feed name, this is the basename of the feed
        .Ar file .
       +.It X-RSS-Author
       +Item author, if it is set.
       +.It X-RSS-Enclosure
       +Item, first enclosure, if it is set.
       +.It X-RSS-ID
       +RSS item GUID or Atom id, if it is set.
       +.It X-RSS-TAGS
       +Category field, the '|' separator is replaced by a comma (',').
       +.It X-RSS-URL
       +Item link, if it is set.
        .El
        .Sh ENVIRONMENT VARIABLES
        .Bl -tag -width Ds
       @@ -43,6 +53,12 @@ Include the content.
        This can be insecure for some of the mail clients that interpret HTML code in
        an unsafe way.
        By default this is set to "0".
       +.It Ev SFEED_MBOX_FROM
       +Override the host for the "From" header.
       +The default is "anonymous@localhost".
       +.It Ev SFEED_MBOX_TO
       +Override the user for the "To" header.
       +The default is "anonymous@localhost".
        .El
        .Sh EXIT STATUS
        .Ex -std
 (DIR) diff --git a/sfeed_mbox.c b/sfeed_mbox.c
       @@ -2,14 +2,17 @@
        #include <stdlib.h>
        #include <string.h>
        #include <time.h>
       -#include <unistd.h>
        
        #include "util.h"
        
        static char *line;
        static size_t linesize;
       -static char host[256], *user, dtimebuf[32], mtimebuf[32];
       -static int usecontent = 0; /* env variable: $SFEED_MBOX_CONTENT */
       +static char dtimebuf[32], mtimebuf[32];
       +
       +/* env variables: $SFEED_MBOX_FROM and $SFEED_MBOX_TO */
       +static char *from = "<anonymous@localhost>", *to = "<anonymous@localhost>";
       +/* env variable: $SFEED_MBOX_CONTENT */
       +static int usecontent = 0;
        
        static unsigned long long
        djb2(unsigned char *s, unsigned long long hash)
       @@ -21,18 +24,29 @@ djb2(unsigned char *s, unsigned long long hash)
                return hash;
        }
        
       +static void
       +printtags(const char *s)
       +{
       +        for (; *s; s++) {
       +                if (*s == '|')
       +                        putchar(',');
       +                else
       +                        putchar(*s);
       +        }
       +}
       +
        /* Unescape / decode fields printed by string_print_encoded()
         * "\\" to "\", "\t", to TAB, "\n" to newline. Other escape sequences are
         * ignored: "\z" etc. Mangle "From " in mboxrd style (always prefix >). */
        static void
       -printcontent(const char *s, FILE *fp)
       +printcontent(const char *s)
        {
        escapefrom:
                for (; *s == '>'; s++)
       -                putc('>', fp);
       +                putchar('>');
                /* escape "From ", mboxrd-style. */
                if (!strncmp(s, "From ", 5))
       -                putc('>', fp);
       +                putchar('>');
        
                for (; *s; s++) {
                        switch (*s) {
       @@ -42,15 +56,15 @@ escapefrom:
                                s++;
                                switch (*s) {
                                case 'n':
       -                                putc('\n', fp);
       +                                putchar('\n');
                                        s++;
                                        goto escapefrom;
       -                        case '\\': putc('\\', fp); break;
       -                        case 't':  putc('\t', fp); break;
       +                        case '\\': putchar('\\'); break;
       +                        case 't':  putchar('\t'); break;
                                }
                                break;
                        default:
       -                        putc(*s, fp); break;
       +                        putchar(*s); break;
                        }
                }
        }
       @@ -84,13 +98,13 @@ printfeed(FILE *fp, const char *feedname)
                                printf("Date: %s\n", dtimebuf); /* invalid/missing: use current time */
                        }
        
       -                printf("From: %s <anonymous@>\n", fields[FieldAuthor][0] ? fields[FieldAuthor] : feedname);
       -                printf("To: %s <%s@%s>\n", user, user, host);
       +                printf("From: %s\n", from);
       +                printf("To: %s\n", to);
                        printf("Subject: %s\n", fields[FieldTitle]);
       -                printf("Message-ID: <%s%s%llu@%s>\n",
       +                printf("Message-ID: <%s%s%llu@newsfeed.local>\n",
                               fields[FieldUnixTimestamp],
                               fields[FieldUnixTimestamp][0] ? "." : "",
       -                       hash, feedname);
       +                       hash);
        
                        ishtml = usecontent && !strcmp(fields[FieldContentType], "html");
                        if (ishtml)
       @@ -98,7 +112,21 @@ printfeed(FILE *fp, const char *feedname)
                        else
                                fputs("Content-Type: text/plain; charset=\"utf-8\"\n", stdout);
                        fputs("Content-Transfer-Encoding: binary\n", stdout);
       -                printf("X-Feedname: %s\n", feedname);
       +                if (feedname[0])
       +                        printf("X-Feedname: %s\n", feedname);
       +                if (fields[FieldAuthor][0])
       +                        printf("X-RSS-Author: %s\n", fields[FieldAuthor]);
       +                if (fields[FieldEnclosure][0])
       +                        printf("X-RSS-Enclosure: %s\n", fields[FieldEnclosure]);
       +                if (fields[FieldId][0])
       +                        printf("X-RSS-ID: %s\n", fields[FieldId]);
       +                if (fields[FieldCategory][0]) {
       +                        fputs("X-RSS-TAGS: ", stdout);
       +                        printtags(fields[FieldCategory]);
       +                        fputs("\n", stdout);
       +                }
       +                if (fields[FieldLink][0])
       +                        printf("X-RSS-URL: %s\n", fields[FieldLink]);
                        fputs("\n", stdout);
        
                        if (ishtml) {
       @@ -131,7 +159,7 @@ printfeed(FILE *fp, const char *feedname)
                                        xmlencode(fields[FieldLink], stdout);
                                        fputs("\"/>\n", stdout);
                                }
       -                        printcontent(fields[FieldContent], stdout);
       +                        printcontent(fields[FieldContent]);
                        }
                        fputs("\n\n", stdout);
                }
       @@ -146,15 +174,16 @@ main(int argc, char *argv[])
                char *name, *tmp;
                int i;
        
       -        if (pledge(argc == 1 ? "stdio" : "stdio rpath", NULL) == -1)
       +        if (pledge(argc <= 1 ? "stdio" : "stdio rpath", NULL) == -1)
                        err(1, "pledge");
        
                if ((tmp = getenv("SFEED_MBOX_CONTENT")))
                        usecontent = !strcmp(tmp, "1");
       -        if (!(user = getenv("USER")))
       -                user = "you";
       -        if (gethostname(host, sizeof(host)) == -1)
       -                err(1, "gethostname");
       +        if ((tmp = getenv("SFEED_MBOX_FROM")))
       +                from = tmp;
       +        if ((tmp = getenv("SFEED_MBOX_TO")))
       +                to = tmp;
       +
                if ((now = time(NULL)) == (time_t)-1)
                        errx(1, "time");
                if (!gmtime_r(&now, &tmnow))
       @@ -164,7 +193,7 @@ main(int argc, char *argv[])
                if (!strftime(dtimebuf, sizeof(dtimebuf), "%a, %d %b %Y %H:%M:%S +0000", &tmnow))
                        errx(1, "strftime: can't format current time");
        
       -        if (argc == 1) {
       +        if (argc <= 1) {
                        printfeed(stdin, "");
                        checkfileerror(stdin, "<stdin>", 'r');
                } else {