sfeed_web.c - sfeed - RSS and Atom parser
 (HTM) git clone git://git.codemadness.org/sfeed
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       sfeed_web.c (3105B)
       ---
            1 #include <stdio.h>
            2 #include <strings.h>
            3 
            4 #include "util.h"
            5 #include "xml.h"
            6 
            7 /* string and size */
            8 #define STRP(s) s,sizeof(s)-1
            9 
           10 static XMLParser parser;
           11 static int isbasetag, islinktag, ishrefattr, istypeattr;
           12 static char linkhref[4096], linktype[256], basehref[4096];
           13 
           14 static void
           15 printvalue(const char *s)
           16 {
           17         for (; *s; s++)
           18                 if (!ISCNTRL((unsigned char)*s))
           19                         putchar(*s);
           20 }
           21 
           22 static void
           23 xmltagstart(XMLParser *p, const char *t, size_t tl)
           24 {
           25         isbasetag = islinktag = 0;
           26 
           27         if (!strcasecmp(t, "base")) {
           28                 isbasetag = 1;
           29         } else if (!strcasecmp(t, "link")) {
           30                 islinktag = 1;
           31                 linkhref[0] = '\0';
           32                 linktype[0] = '\0';
           33         }
           34 }
           35 
           36 static void
           37 xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
           38 {
           39         struct uri baseuri, linkuri, u;
           40         char buf[4096];
           41         int r = -1;
           42 
           43         if (!islinktag)
           44                 return;
           45 
           46         if (strncasecmp(linktype, STRP("application/atom")) &&
           47             strncasecmp(linktype, STRP("application/xml")) &&
           48             strncasecmp(linktype, STRP("application/rss")))
           49                 return;
           50 
           51         /* parse base URI each time: it can change. */
           52         if (basehref[0] &&
           53             uri_parse(linkhref, &linkuri) != -1 && !linkuri.proto[0] &&
           54             uri_parse(basehref, &baseuri) != -1 &&
           55             uri_makeabs(&u, &linkuri, &baseuri) != -1 && u.proto[0])
           56                 r = uri_format(buf, sizeof(buf), &u);
           57 
           58         if (r >= 0 && (size_t)r < sizeof(buf))
           59                 printvalue(buf);
           60         else
           61                 printvalue(linkhref);
           62 
           63         putchar('\t');
           64         printvalue(linktype);
           65         putchar('\n');
           66 }
           67 
           68 static void
           69 xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *a, size_t al)
           70 {
           71         ishrefattr = istypeattr = 0;
           72 
           73         if (!isbasetag && !islinktag)
           74                 return;
           75 
           76         if (!strcasecmp(a, "href")) {
           77                 ishrefattr = 1;
           78                 if (isbasetag)
           79                         basehref[0] = '\0';
           80                 else if (islinktag)
           81                         linkhref[0] = '\0';
           82         } else if (!strcasecmp(a, "type") && islinktag) {
           83                 istypeattr = 1;
           84                 linktype[0] = '\0';
           85         }
           86 }
           87 
           88 static void
           89 xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
           90         const char *v, size_t vl)
           91 {
           92         if (isbasetag && ishrefattr) {
           93                 strlcat(basehref, v, sizeof(basehref));
           94         } else if (islinktag) {
           95                 if (ishrefattr)
           96                         strlcat(linkhref, v, sizeof(linkhref));
           97                 else if (istypeattr)
           98                         strlcat(linktype, v, sizeof(linktype));
           99         }
          100 }
          101 
          102 static void
          103 xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *a, size_t al,
          104               const char *v, size_t vl)
          105 {
          106         char buf[8];
          107         int len;
          108 
          109         if (!ishrefattr && !istypeattr)
          110                 return;
          111 
          112         /* try to translate entity, else just pass as data to
          113          * xmlattr handler. */
          114         if ((len = xml_entitytostr(v, buf, sizeof(buf))) > 0)
          115                 xmlattr(p, t, tl, a, al, buf, (size_t)len);
          116         else
          117                 xmlattr(p, t, tl, a, al, v, vl);
          118 }
          119 
          120 int
          121 main(int argc, char *argv[])
          122 {
          123         if (pledge("stdio", NULL) == -1)
          124                 err(1, "pledge");
          125 
          126         if (argc > 1)
          127                 strlcpy(basehref, argv[1], sizeof(basehref));
          128 
          129         parser.xmlattr = xmlattr;
          130         parser.xmlattrentity = xmlattrentity;
          131         parser.xmlattrstart = xmlattrstart;
          132         parser.xmltagstart = xmltagstart;
          133         parser.xmltagstartparsed = xmltagstartparsed;
          134 
          135         /* NOTE: GETNEXT is defined in xml.h for inline optimization */
          136         xml_parse(&parser);
          137 
          138         checkfileerror(stdin, "<stdin>", 'r');
          139         checkfileerror(stdout, "<stdout>", 'w');
          140 
          141         return 0;
          142 }