sfeed_web.c - sfeed - RSS and Atom parser
(HTM) git clone git://git.codemadness.org/sfeed
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
sfeed_web.c (3105B)
---
1 #include <stdio.h>
2 #include <strings.h>
3
4 #include "util.h"
5 #include "xml.h"
6
7 /* string and size */
8 #define STRP(s) s,sizeof(s)-1
9
10 static XMLParser parser;
11 static int isbasetag, islinktag, ishrefattr, istypeattr;
12 static char linkhref[4096], linktype[256], basehref[4096];
13
14 static void
15 printvalue(const char *s)
16 {
17 for (; *s; s++)
18 if (!ISCNTRL((unsigned char)*s))
19 putchar(*s);
20 }
21
22 static void
23 xmltagstart(XMLParser *p, const char *t, size_t tl)
24 {
25 isbasetag = islinktag = 0;
26
27 if (!strcasecmp(t, "base")) {
28 isbasetag = 1;
29 } else if (!strcasecmp(t, "link")) {
30 islinktag = 1;
31 linkhref[0] = '\0';
32 linktype[0] = '\0';
33 }
34 }
35
36 static void
37 xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
38 {
39 struct uri baseuri, linkuri, u;
40 char buf[4096];
41 int r = -1;
42
43 if (!islinktag)
44 return;
45
46 if (strncasecmp(linktype, STRP("application/atom")) &&
47 strncasecmp(linktype, STRP("application/xml")) &&
48 strncasecmp(linktype, STRP("application/rss")))
49 return;
50
51 /* parse base URI each time: it can change. */
52 if (basehref[0] &&
53 uri_parse(linkhref, &linkuri) != -1 && !linkuri.proto[0] &&
54 uri_parse(basehref, &baseuri) != -1 &&
55 uri_makeabs(&u, &linkuri, &baseuri) != -1 && u.proto[0])
56 r = uri_format(buf, sizeof(buf), &u);
57
58 if (r >= 0 && (size_t)r < sizeof(buf))
59 printvalue(buf);
60 else
61 printvalue(linkhref);
62
63 putchar('\t');
64 printvalue(linktype);
65 putchar('\n');
66 }
67
68 static void
69 xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *a, size_t al)
70 {
71 ishrefattr = istypeattr = 0;
72
73 if (!isbasetag && !islinktag)
74 return;
75
76 if (!strcasecmp(a, "href")) {
77 ishrefattr = 1;
78 if (isbasetag)
79 basehref[0] = '\0';
80 else if (islinktag)
81 linkhref[0] = '\0';
82 } else if (!strcasecmp(a, "type") && islinktag) {
83 istypeattr = 1;
84 linktype[0] = '\0';
85 }
86 }
87
88 static void
89 xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
90 const char *v, size_t vl)
91 {
92 if (isbasetag && ishrefattr) {
93 strlcat(basehref, v, sizeof(basehref));
94 } else if (islinktag) {
95 if (ishrefattr)
96 strlcat(linkhref, v, sizeof(linkhref));
97 else if (istypeattr)
98 strlcat(linktype, v, sizeof(linktype));
99 }
100 }
101
102 static void
103 xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *a, size_t al,
104 const char *v, size_t vl)
105 {
106 char buf[8];
107 int len;
108
109 if (!ishrefattr && !istypeattr)
110 return;
111
112 /* try to translate entity, else just pass as data to
113 * xmlattr handler. */
114 if ((len = xml_entitytostr(v, buf, sizeof(buf))) > 0)
115 xmlattr(p, t, tl, a, al, buf, (size_t)len);
116 else
117 xmlattr(p, t, tl, a, al, v, vl);
118 }
119
120 int
121 main(int argc, char *argv[])
122 {
123 if (pledge("stdio", NULL) == -1)
124 err(1, "pledge");
125
126 if (argc > 1)
127 strlcpy(basehref, argv[1], sizeof(basehref));
128
129 parser.xmlattr = xmlattr;
130 parser.xmlattrentity = xmlattrentity;
131 parser.xmlattrstart = xmlattrstart;
132 parser.xmltagstart = xmltagstart;
133 parser.xmltagstartparsed = xmltagstartparsed;
134
135 /* NOTE: GETNEXT is defined in xml.h for inline optimization */
136 xml_parse(&parser);
137
138 checkfileerror(stdin, "<stdin>", 'r');
139 checkfileerror(stdout, "<stdout>", 'w');
140
141 return 0;
142 }