duckduckgo.c - frontends - front-ends for some sites (experiment)
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
duckduckgo.c (4755B)
---
1 #include <sys/types.h>
2
3 #include <ctype.h>
4 #include <err.h>
5 #include <locale.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <unistd.h>
10 #include <wchar.h>
11
12 #include "duckduckgo.h"
13 #include "https.h"
14 #include "util.h"
15 #include "xml.h"
16
17 static XMLParser x;
18
19 static struct duckduckgo_results *results;
20 static struct duckduckgo_result result;
21 static int istitle, isdescription, isurl, isresult;
22
23 void
24 sanitize(char *s, size_t len)
25 {
26 size_t i;
27
28 /* trim trailing whitespace */
29 for (i = strlen(s); i > 0; i--) {
30 if (!isspace((unsigned char)s[i - 1]))
31 break;
32 }
33 s[i] = '\0';
34
35 /* trim leading whitespace */
36 for (i = 0; s[i]; i++) { // TODO: wrong
37 if (!isspace((unsigned char)s[i]))
38 break;
39 }
40 memmove(s, s + i, len - i + 1);
41
42 for (i = 0; s[i]; i++) {
43 if (iscntrl((unsigned char)s[i]))
44 s[i] = ' ';
45 }
46 }
47
48 void
49 xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
50 const char *v, size_t vl)
51 {
52 if (!strcmp(t, "div") && !strcmp(a, "class") && strstr(v, "results_links"))
53 isresult = 1;
54
55 if (!isresult)
56 return;
57
58 /* clear fix is use in the end of a result */
59 if (!strcmp(t, "div") && !strcmp(a, "style") && strstr(v, "clear: both")) {
60 isresult = 0;
61
62 if (!result.title[0] || !result.url[0])
63 return;
64
65 /* add result */
66 if (results->nitems <= MAX_ITEMS) {
67 memcpy(&(results->items[results->nitems]),
68 &result, sizeof(result));
69 results->nitems++;
70 }
71 memset(&result, 0, sizeof(result));
72 return;
73 }
74
75 if (!strcmp(t, "h2") && !strcmp(a, "class") && strstr(v, "result__title"))
76 istitle = 1;
77 if (!strcmp(t, "a") && !strcmp(a, "class") && strstr(v, "result__snippet"))
78 isdescription = 1;
79 if (!strcmp(t, "a") && !strcmp(a, "class") && strstr(v, "result__url"))
80 isurl = 1;
81 if (isurl && !strcmp(t, "a") && !strcmp(a, "href"))
82 strlcpy(result.url, v, sizeof(result.url));
83 }
84
85 void
86 xmlattrentity(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
87 const char *v, size_t vl)
88 {
89 char buf[16];
90 int len;
91
92 if (!isresult || !istitle || !isdescription || !isurl)
93 return;
94
95 if ((len = xml_entitytostr(v, buf, sizeof(buf))) > 0)
96 xmlattr(x, t, tl, a, al, buf, (size_t)len);
97 else
98 xmlattr(x, t, tl, a, al, v, vl);
99 }
100
101 void
102 xmldata(XMLParser *x, const char *d, size_t dl)
103 {
104 if (istitle)
105 strlcat(result.title, d, sizeof(result.title));
106 if (isdescription)
107 strlcat(result.description, d, sizeof(result.description));
108 }
109
110 void
111 xmlcdata(XMLParser *x, const char *d, size_t dl)
112 {
113 xmldata(x, d, dl);
114 }
115
116 void
117 xmldataentity(XMLParser *x, const char *d, size_t dl)
118 {
119 char buf[16];
120 int len;
121
122 if (!isresult || !istitle || !isdescription || !isurl)
123 return;
124
125 if ((len = xml_entitytostr(d, buf, sizeof(buf))) > 0)
126 xmldata(x, buf, (size_t)len);
127 else
128 xmldata(x, d, dl);
129 }
130
131 void
132 xmltagend(XMLParser *x, const char *t, size_t tl, int isshort)
133 {
134 char *p;
135
136 if (!isresult)
137 return;
138
139 if (isdescription) {
140 /* highlight */
141 if (!strcmp(t, "b"))
142 strlcat(result.description, "*", sizeof(result.description));
143 }
144
145 if (istitle && !strcmp(t, "h2"))
146 istitle = 0;
147 if (isdescription && !strcmp(t, "a"))
148 isdescription = 0;
149 if (isurl && !strcmp(t, "a"))
150 isurl = 0;
151 if (!strcmp(t, "div")) {
152 /* decode url and remove "tracking"/usage part via DDG */
153 if ((p = strstr(result.url, "uddg="))) {
154 p += sizeof("uddg=") - 1;
155 if (decodeparam(result.urldecoded, sizeof(result.urldecoded), p) == -1)
156 result.urldecoded[0] = '\0';
157 }
158
159 sanitize(result.title, strlen(result.title));
160 sanitize(result.urldecoded, strlen(result.urldecoded));
161 sanitize(result.description, strlen(result.description));
162
163 istitle = isdescription = isurl = 0;
164 }
165 }
166
167 void
168 xmltagstart(XMLParser *x, const char *t, size_t tl)
169 {
170 /* highlight */
171 if (isdescription && !strcmp(t, "b"))
172 strlcat(result.description, "*", sizeof(result.description));
173
174 }
175
176 char *
177 duckduckgo_search_data(const char *s)
178 {
179 char path[4096];
180 int r;
181
182 r = snprintf(path, sizeof(path), "/html/?q=%s", s);
183 if (r < 0 || (size_t)r >= sizeof(path))
184 return NULL;
185
186 return request("html.duckduckgo.com", path, "");
187 }
188
189 struct duckduckgo_results *
190 duckduckgo_search(const char *s)
191 {
192 struct duckduckgo_results *r;
193 char *data;
194
195 results = NULL; /* global */
196
197 if (!(r = calloc(1, sizeof(*r))))
198 return NULL;
199
200 /* TODO: encodeuri s */
201 if (!(data = duckduckgo_search_data(s))) {
202 free(r);
203 results = NULL;
204 return NULL;
205 }
206
207 // TODO: xmlparser, parse data into struct duckduckgo_results.
208
209 x.xmlattr = xmlattr;
210 x.xmlattrentity = xmlattrentity;
211 x.xmlcdata = xmlcdata;
212 x.xmldata = xmldata;
213 x.xmldataentity = xmldataentity;
214 x.xmltagend = xmltagend;
215 x.xmltagstart = xmltagstart;
216
217 results = r; /* global: store */
218 setxmldata(data, strlen(data));
219 xml_parse(&x);
220
221 free(data);
222
223 return r;
224 }