grabtitle.c - grabtitle - stupid HTML title grabber
 (HTM) git clone git://git.codemadness.org/grabtitle
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       grabtitle.c (2167B)
       ---
            1 #include <ctype.h>
            2 #include <errno.h>
            3 #include <stdio.h>
            4 #include <stdlib.h>
            5 #include <string.h>
            6 #include <strings.h>
            7 
            8 #include "xml.h"
            9 
           10 #ifdef __OpenBSD__
           11 #include <unistd.h>
           12 #else
           13 #define pledge(a,b) 0
           14 #endif
           15 
           16 static XMLParser parser;
           17 static const char *state, *endtag;
           18 static int (*getnext)(void);
           19 
           20 /* return a space for all data until some case-insensitive string occurs. This
           21    is used to parse incorrect HTML/XML that contains unescaped HTML in script
           22    or style tags. If you see some </script> tag in a CDATA or comment
           23    section then e-mail W3C and tell them the web is too complex. */
           24 static inline int
           25 getnext_ignore(void)
           26 {
           27         int c;
           28 
           29         if ((c = getnext()) == EOF)
           30                 return EOF;
           31 
           32         if (tolower(c) == tolower((unsigned char)*state)) {
           33                 state++;
           34                 if (*state == '\0') {
           35                         parser.getnext = getnext; /* restore */
           36                         return c;
           37                 }
           38         } else {
           39                 state = endtag;
           40         }
           41 
           42         return ' ';
           43 }
           44 
           45 static void
           46 xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
           47 {
           48         putchar('\n');
           49         exit(0);
           50 }
           51 
           52 /* data and CDATA */
           53 static void
           54 xmldata(XMLParser *p, const char *d, size_t dl)
           55 {
           56         size_t i;
           57 
           58         for (i = 0; *d && i < dl; i++, d++) {
           59                 if (iscntrl((unsigned char)*d))
           60                         putchar(' ');
           61                 else
           62                         putchar(*d);
           63         }
           64 }
           65 
           66 static void
           67 xmldataentity(XMLParser *p, const char *d, size_t dl)
           68 {
           69         char buf[16];
           70         ssize_t len;
           71 
           72         if ((len = xml_entitytostr(d, buf, sizeof(buf))) > 0)
           73                 xmldata(p, buf, (size_t)len);
           74         else
           75                 xmldata(p, d, dl);
           76 }
           77 
           78 static void
           79 xmltagstart(XMLParser *p, const char *t, size_t tl)
           80 {
           81         if (tl == 6 && !strcasecmp(t, "script")) {
           82                 state = endtag = "</script>";
           83                 getnext = p->getnext; /* for restore */
           84                 p->getnext = getnext_ignore;
           85         } else if (tl == 5 && !strcasecmp(t, "style")) {
           86                 state = endtag = "</style>";
           87                 getnext = p->getnext; /* for restore */
           88                 p->getnext = getnext_ignore;
           89         } else if (tl == 5 && !strcasecmp(t, "title")) {
           90                 p->xmltagend = xmltagend;
           91                 p->xmlcdata = p->xmldata = xmldata;
           92                 p->xmldataentity = xmldataentity;
           93         }
           94 }
           95 
           96 int
           97 main(void)
           98 {
           99         if (pledge("stdio", NULL) == -1) {
          100                 fprintf(stderr, "pledge: %s\n", strerror(errno));
          101                 return 2;
          102         }
          103 
          104         parser.xmltagstart = xmltagstart;
          105         parser.getnext = getchar;
          106         xml_parse(&parser);
          107 
          108         return 1;
          109 }