grabtitle.c - grabtitle - stupid HTML title grabber
(HTM) git clone git://git.codemadness.org/grabtitle
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
grabtitle.c (2167B)
---
1 #include <ctype.h>
2 #include <errno.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <strings.h>
7
8 #include "xml.h"
9
10 #ifdef __OpenBSD__
11 #include <unistd.h>
12 #else
13 #define pledge(a,b) 0
14 #endif
15
16 static XMLParser parser;
17 static const char *state, *endtag;
18 static int (*getnext)(void);
19
20 /* return a space for all data until some case-insensitive string occurs. This
21 is used to parse incorrect HTML/XML that contains unescaped HTML in script
22 or style tags. If you see some </script> tag in a CDATA or comment
23 section then e-mail W3C and tell them the web is too complex. */
24 static inline int
25 getnext_ignore(void)
26 {
27 int c;
28
29 if ((c = getnext()) == EOF)
30 return EOF;
31
32 if (tolower(c) == tolower((unsigned char)*state)) {
33 state++;
34 if (*state == '\0') {
35 parser.getnext = getnext; /* restore */
36 return c;
37 }
38 } else {
39 state = endtag;
40 }
41
42 return ' ';
43 }
44
45 static void
46 xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
47 {
48 putchar('\n');
49 exit(0);
50 }
51
52 /* data and CDATA */
53 static void
54 xmldata(XMLParser *p, const char *d, size_t dl)
55 {
56 size_t i;
57
58 for (i = 0; *d && i < dl; i++, d++) {
59 if (iscntrl((unsigned char)*d))
60 putchar(' ');
61 else
62 putchar(*d);
63 }
64 }
65
66 static void
67 xmldataentity(XMLParser *p, const char *d, size_t dl)
68 {
69 char buf[16];
70 ssize_t len;
71
72 if ((len = xml_entitytostr(d, buf, sizeof(buf))) > 0)
73 xmldata(p, buf, (size_t)len);
74 else
75 xmldata(p, d, dl);
76 }
77
78 static void
79 xmltagstart(XMLParser *p, const char *t, size_t tl)
80 {
81 if (tl == 6 && !strcasecmp(t, "script")) {
82 state = endtag = "</script>";
83 getnext = p->getnext; /* for restore */
84 p->getnext = getnext_ignore;
85 } else if (tl == 5 && !strcasecmp(t, "style")) {
86 state = endtag = "</style>";
87 getnext = p->getnext; /* for restore */
88 p->getnext = getnext_ignore;
89 } else if (tl == 5 && !strcasecmp(t, "title")) {
90 p->xmltagend = xmltagend;
91 p->xmlcdata = p->xmldata = xmldata;
92 p->xmldataentity = xmldataentity;
93 }
94 }
95
96 int
97 main(void)
98 {
99 if (pledge("stdio", NULL) == -1) {
100 fprintf(stderr, "pledge: %s\n", strerror(errno));
101 return 2;
102 }
103
104 parser.xmltagstart = xmltagstart;
105 parser.getnext = getchar;
106 xml_parse(&parser);
107
108 return 1;
109 }