first version of tscrape_html - tscrape - twitter scraper
 (HTM) git clone git://git.codemadness.org/tscrape
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 7bdeb05e31e28c4cfaf385dffa48ea80aa476315
 (DIR) parent 4640420521e94158d80f94202ed40f7dc4a66169
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sat, 12 Aug 2017 17:23:35 +0200
       
       first version of tscrape_html
       
       Diffstat:
         M Makefile                            |       1 +
         A tscrape_html.c                      |     164 +++++++++++++++++++++++++++++++
       
       2 files changed, 165 insertions(+), 0 deletions(-)
       ---
 (DIR) diff --git a/Makefile b/Makefile
       @@ -4,6 +4,7 @@ NAME = tscrape
        VERSION = 0.1
        BIN = \
                tscrape\
       +        tscrape_html\
                tscrape_plain
        
        SRC = ${BIN:=.c}
 (DIR) diff --git a/tscrape_html.c b/tscrape_html.c
       @@ -0,0 +1,164 @@
       +#include <err.h>
       +#include <stdio.h>
       +#include <stdlib.h>
       +#include <string.h>
       +#include <time.h>
       +
       +#include "util.h"
       +
       +static struct feed **feeds;
       +static int showsidebar;
       +static char *line;
       +static size_t linesize;
       +static unsigned long totalnew;
       +static time_t comparetime;
       +
       +static void
       +printfeed(FILE *fp, struct feed *f)
       +{
       +        char *fields[FieldLast];
       +        struct tm *tm;
       +        time_t parsedtime;
       +        unsigned int islink, isnew;
       +        ssize_t linelen;
       +
       +        if (f->name[0]) {
       +                fputs("<h2 id=\"", stdout);
       +                xmlencode(f->name, stdout);
       +                fputs("\"><a href=\"#", stdout);
       +                xmlencode(f->name, stdout);
       +                fputs("\">", stdout);
       +                xmlencode(f->name, stdout);
       +                fputs("</a></h2>\n", stdout);
       +        }
       +
       +        while ((linelen = getline(&line, &linesize, fp)) > 0) {
       +                if (line[linelen - 1] == '\n')
       +                        line[--linelen] = '\0';
       +                if (!parseline(line, fields))
       +                        break;
       +
       +                parsedtime = 0;
       +                strtotime(fields[FieldUnixTimestamp], &parsedtime);
       +                if (!(tm = localtime(&parsedtime)))
       +                        err(1, "localtime");
       +
       +                isnew = (parsedtime >= comparetime) ? 1 : 0;
       +                islink = fields[FieldItemid][0] ? 1 : 0;
       +
       +                totalnew += isnew;
       +                f->totalnew += isnew;
       +                f->total++;
       +
       +                fprintf(stdout, "%04d-%02d-%02d&nbsp;%02d:%02d ",
       +                        tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
       +                        tm->tm_hour, tm->tm_min);
       +                if (isnew)
       +                        fputs("<b><u>", stdout);
       +                if (islink) {
       +                        fputs("<a href=\"https://mobile.twitter.com/", stdout);
       +                        xmlencode(fields[FieldUsername], stdout);
       +                        fputs("/status/", stdout);
       +                        xmlencode(fields[FieldItemid], stdout);
       +                        fputs("\">", stdout);
       +                }
       +                xmlencode(fields[FieldText], stdout);
       +                if (islink)
       +                        fputs("</a>", stdout);
       +                if (isnew)
       +                        fputs("</u></b>", stdout);
       +
       +                if (fields[FieldRetweetid][0]) {
       +                        printf(" <a href=\"https://mobile.twitter.com/");
       +                        xmlencode(fields[FieldItemUsername], stdout);
       +                        fputs("/status/", stdout);
       +                        xmlencode(fields[FieldRetweetid], stdout);
       +                        fputs("\">[retweet]</a>", stdout);
       +                }
       +                fputs("\n", stdout);
       +        }
       +}
       +
       +int
       +main(int argc, char *argv[])
       +{
       +        struct feed *f;
       +        char *name;
       +        FILE *fp;
       +        int i;
       +
       +        if (pledge(argc == 1 ? "stdio" : "stdio rpath", NULL) == -1)
       +                err(1, "pledge");
       +
       +        if (!(feeds = calloc(argc, sizeof(struct feed *))))
       +                err(1, "calloc");
       +        if ((comparetime = time(NULL)) == -1)
       +                err(1, "time");
       +        /* 1 day old is old news */
       +        comparetime -= 86400;
       +
       +        fputs("<!DOCTYPE HTML>\n"
       +              "<html>\n"
       +              "\t<head>\n"
       +              "\t\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n"
       +              "\t\t<link rel=\"stylesheet\" type=\"text/css\" href=\"style.css\" />\n"
       +              "\t</head>\n"
       +              "\t<body class=\"noframe\">\n", stdout);
       +
       +        showsidebar = (argc > 1);
       +        if (showsidebar)
       +                fputs("\t\t<div id=\"items\">\n", stdout);
       +        else
       +                fputs("\t\t<div id=\"items\" class=\"nosidebar\">\n", stdout);
       +        fputs("<pre>", stdout);
       +
       +        if (argc == 1) {
       +                if (!(feeds[0] = calloc(1, sizeof(struct feed))))
       +                        err(1, "calloc");
       +                feeds[0]->name = "";
       +                printfeed(stdin, feeds[0]);
       +                if (ferror(stdin))
       +                        err(1, "ferror: <stdin>:");
       +        } else {
       +                for (i = 1; i < argc; i++) {
       +                        if (!(feeds[i - 1] = calloc(1, sizeof(struct feed))))
       +                                err(1, "calloc");
       +                        name = ((name = strrchr(argv[i], '/'))) ? name + 1 : argv[i];
       +                        feeds[i - 1]->name = name;
       +                        if (!(fp = fopen(argv[i], "r")))
       +                                err(1, "fopen: %s", argv[i]);
       +                        printfeed(fp, feeds[i - 1]);
       +                        if (ferror(fp))
       +                                err(1, "ferror: %s", argv[i]);
       +                        fclose(fp);
       +                }
       +        }
       +        fputs("</pre>\n</div>\n", stdout); /* div items */
       +
       +        if (showsidebar) {
       +                fputs("\t<div id=\"sidebar\">\n\t\t<ul>\n", stdout);
       +
       +                for (i = 1; i < argc; i++) {
       +                        f = feeds[i - 1];
       +                        if (f->totalnew > 0)
       +                                fputs("<li class=\"n\"><a href=\"#", stdout);
       +                        else
       +                                fputs("<li><a href=\"#", stdout);
       +                        xmlencode(f->name, stdout);
       +                        fputs("\">", stdout);
       +                        if (f->totalnew > 0)
       +                                fputs("<b><u>", stdout);
       +                        xmlencode(f->name, stdout);
       +                        fprintf(stdout, " (%lu)", f->totalnew);
       +                        if (f->totalnew > 0)
       +                                fputs("</u></b>", stdout);
       +                        fputs("</a></li>\n", stdout);
       +                }
       +                fputs("\t\t</ul>\n\t</div>\n", stdout);
       +        }
       +
       +        fprintf(stdout, "\t</body>\n\t<title>Tweets (%lu)</title>\n</html>\n",
       +                totalnew);
       +
       +        return 0;
       +}