md-printlinks.c - sites - public wiki contents of suckless.org
(HTM) git clone git://git.suckless.org/sites
(DIR) Log
(DIR) Files
(DIR) Refs
---
md-printlinks.c (9757B)
---
1 /* process Markdown (based on smu code), but only output links */
2 #include <sys/types.h>
3
4 #include <ctype.h>
5 #include <errno.h>
6 #include <stdarg.h>
7 #include <stdio.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #include <string.h>
11
12 #ifdef __OpenBSD__
13 #include <unistd.h>
14 #else
15 #define pledge(a,b) 0
16 #endif
17
18 #define READ_BUF_SIZ 16384
19 #define LEN(x) sizeof(x)/sizeof(x[0])
20 #define ADDC(b,i) if (i % READ_BUF_SIZ == 0) { b = realloc(b, (i + READ_BUF_SIZ)); if (!b) eprint("realloc:"); } b[i]
21
22 typedef int (*Parser)(const char *, const char *, int);
23 typedef struct {
24 char *search;
25 int process;
26 char *before, *after;
27 } Tag;
28
29 static int dolineprefix(const char *begin, const char *end, int newblock);/* Parser for line prefix tags */
30 static int dolink(const char *begin, const char *end, int newblock); /* Parser for links and images */
31 static int dolist(const char *begin, const char *end, int newblock); /* Parser for lists */
32 static int doparagraph(const char *begin, const char *end, int newblock); /* Parser for paragraphs */
33 static int doshortlink(const char *begin, const char *end, int newblock); /* Parser for links and images */
34 static int dosurround(const char *begin, const char *end, int newblock); /* Parser for surrounding tags */
35 static int dounderline(const char *begin, const char *end, int newblock); /* Parser for underline tags */
36 static void *ereallocz(void *p, size_t size);
37 static void hprint(const char *begin, const char *end); /* escapes HTML and prints it to output */
38 static void process(const char *begin, const char *end, int newblock); /* Processes range between begin and end. */
39
40 /* list of parsers */
41 static Parser parsers[] = {
42 dounderline, dolineprefix, dolist, doparagraph, dosurround, dolink, doshortlink,
43 };
44
45 static Tag lineprefix[] = {
46 { " ", 0, "<pre><code>", "</code></pre>" },
47 { "\t", 0, "<pre><code>", "</code></pre>" },
48 { "> ", 2, "<blockquote>", "</blockquote>" },
49 { "###### ", 1, "<h6>", "</h6>" },
50 { "##### ", 1, "<h5>", "</h5>" },
51 { "#### ", 1, "<h4>", "</h4>" },
52 { "### ", 1, "<h3>", "</h3>" },
53 { "## ", 1, "<h2>", "</h2>" },
54 { "# ", 1, "<h1>", "</h1>" },
55 { "- - -\n", 1, "<hr/>", ""},
56 };
57
58 static Tag underline[] = {
59 { "=", 1, "<h1>", "</h1>\n" },
60 { "-", 1, "<h2>", "</h2>\n" },
61 };
62
63 static Tag surround[] = {
64 { "``", 0, "<code>", "</code>" },
65 { "`", 0, "<code>", "</code>" },
66 { "___", 1, "<b><i>", "</i></b>" },
67 { "***", 1, "<b><i>", "</i></b>" },
68 { "__", 1, "<b>", "</b>" },
69 { "**", 1, "<b>", "</b>" },
70 { "_", 1, "<i>", "</i>" },
71 { "*", 1, "<i>", "</i>" },
72 };
73
74 void
75 eprint(const char *format, ...)
76 {
77 va_list ap;
78
79 va_start(ap, format);
80 vfprintf(stderr, format, ap);
81 va_end(ap);
82 if (format[0] && format[strlen(format) - 1] == ':')
83 fputs(strerror(errno), stderr);
84 fputc('\n', stderr);
85 exit(1);
86 }
87
88 int
89 dolineprefix(const char *begin, const char *end, int newblock)
90 {
91 unsigned int i, j, l;
92 char *buffer;
93 const char *p;
94
95 if (newblock)
96 p = begin;
97 else if (*begin == '\n')
98 p = begin + 1;
99 else
100 return 0;
101 for (i = 0; i < LEN(lineprefix); i++) {
102 l = strlen(lineprefix[i].search);
103 if (end - p < l)
104 continue;
105 if (strncmp(lineprefix[i].search, p, l))
106 continue;
107 if (lineprefix[i].search[l-1] == '\n') {
108 return l;
109 }
110 if (!(buffer = malloc(BUFSIZ)))
111 eprint("malloc:");
112 buffer[0] = '\0';
113
114 for (j = 0, p += l; p < end; p++, j++) {
115 ADDC(buffer, j) = *p;
116 if (*p == '\n' && p + l < end) {
117 if (strncmp(lineprefix[i].search, p + 1, l) != 0)
118 break;
119 p += l;
120 }
121 }
122
123 ADDC(buffer, j) = '\0';
124 if (lineprefix[i].process)
125 process(buffer, buffer + strlen(buffer), lineprefix[i].process >= 2);
126 free(buffer);
127 return -(p - begin);
128 }
129 return 0;
130 }
131
132 int
133 dolink(const char *begin, const char *end, int newblock)
134 {
135 int img, len, sep;
136 const char *desc, *link, *p, *q, *descend, *linkend;
137 const char *title = NULL, *titleend = NULL;
138
139 if (*begin == '[')
140 img = 0;
141 else if (strncmp(begin, ") || p > end)
147 return 0;
148 for (q = strstr(desc, ") || p > end)
150 return 0;
151 descend = p;
152 link = p + 2;
153 if (!(q = strchr(link, ')')) || q > end)
154 return 0;
155 if ((p = strpbrk(link, "\"'")) && p < end && q > p) {
156 sep = p[0]; /* separator: can be " or ' */
157 title = p + 1;
158 /* strip trailing whitespace */
159 for (linkend = p; linkend > link && isspace(*(linkend - 1)); linkend--)
160 ;
161 if (!(p = strchr(title, sep)) || q > end || p > q)
162 return 0;
163 titleend = p;
164 len = p + 2 - begin;
165 }
166 else {
167 linkend = q;
168 len = q + 1 - begin;
169 }
170 if (img) {
171 fwrite(link, 1, linkend - link, stdout);
172 fputs("\n", stdout);
173 }
174 else {
175 fwrite(link, 1, linkend - link, stdout);
176 fputs("\n", stdout);
177
178 process(desc, descend, 0);
179 }
180 return len;
181 }
182
183 int
184 dolist(const char *begin, const char *end, int newblock)
185 {
186 unsigned int i, j, indent, run, ul, isblock;
187 const char *p, *q;
188 char *buffer = NULL;
189
190 isblock = 0;
191 if (newblock)
192 p = begin;
193 else if (*begin == '\n')
194 p = begin + 1;
195 else
196 return 0;
197 q = p;
198 if (*p == '-' || *p == '*' || *p == '+')
199 ul = 1;
200 else {
201 ul = 0;
202 for (; p < end && *p >= '0' && *p <= '9'; p++)
203 ;
204 if (p >= end || *p != '.')
205 return 0;
206 }
207 p++;
208 if (p >= end || !(*p == ' ' || *p == '\t'))
209 return 0;
210 for (p++; p != end && (*p == ' ' || *p == '\t'); p++)
211 ;
212 indent = p - q;
213 buffer = ereallocz(buffer, BUFSIZ);
214 run = 1;
215 for (; p < end && run; p++) {
216 for (i = 0; p < end && run; p++, i++) {
217 if (*p == '\n') {
218 if (p + 1 == end)
219 break;
220 else if (p[1] == '\n') {
221 p++;
222 ADDC(buffer, i) = '\n';
223 i++;
224 run = 0;
225 isblock++;
226 }
227 q = p + 1;
228 j = 0;
229 if (ul && (*q == '-' || *q == '*' || *q == '+'))
230 j = 1;
231 else if (!ul) {
232 for (; q + j != end && q[j] >= '0' && q[j] <= '9' && j < indent; j++)
233 ;
234 if (q + j == end)
235 break;
236 if (j > 0 && q[j] == '.')
237 j++;
238 else
239 j = 0;
240 }
241 if (q + indent < end)
242 for (; (q[j] == ' ' || q[j] == '\t') && j < indent; j++)
243 ;
244 if (j == indent) {
245 ADDC(buffer, i) = '\n';
246 i++;
247 p += indent;
248 run = 1;
249 if (*q == ' ' || *q == '\t')
250 p++;
251 else
252 break;
253 }
254 }
255 ADDC(buffer, i) = *p;
256 }
257 ADDC(buffer, i) = '\0';
258 process(buffer, buffer + i, isblock > 1 || (isblock == 1 && run));
259 }
260 free(buffer);
261 p--;
262 while (*(--p) == '\n')
263 ;
264 return -(p - begin + 1);
265 }
266
267 int
268 doparagraph(const char *begin, const char *end, int newblock)
269 {
270 const char *p;
271
272 if (!newblock)
273 return 0;
274 p = strstr(begin, "\n\n");
275 if (!p || p > end)
276 p = end;
277 if (p == begin)
278 return 0;
279 process(begin, p, 0);
280 return -(p - begin);
281 }
282
283 int
284 doshortlink(const char *begin, const char *end, int newblock)
285 {
286 const char *p;
287 int ismail = 0;
288
289 if (*begin != '<')
290 return 0;
291 for (p = begin + 1; p != end; p++) {
292 switch (*p) {
293 case ' ':
294 case '\t':
295 case '\n':
296 return 0;
297 case '#':
298 case ':':
299 ismail = -1;
300 break;
301 case '@':
302 if (ismail == 0)
303 ismail = 1;
304 break;
305 case '>':
306 if (ismail == 0)
307 return 0;
308 if (ismail != 1) {
309 fwrite(begin + 1, 1, p - begin - 1, stdout);
310 fputs("\n", stdout);
311 }
312 return p - begin + 1;
313 }
314 }
315 return 0;
316 }
317
318 int
319 dosurround(const char *begin, const char *end, int newblock)
320 {
321 unsigned int i, l;
322 const char *p, *start, *stop;
323
324 for (i = 0; i < LEN(surround); i++) {
325 l = strlen(surround[i].search);
326 if (end - begin < 2*l || strncmp(begin, surround[i].search, l) != 0)
327 continue;
328 start = begin + l;
329 p = start - 1;
330 do {
331 stop = p;
332 p = strstr(p + 1, surround[i].search);
333 } while (p && p[-1] == '\\');
334 if (p && p[-1] != '\\')
335 stop = p;
336 if (!stop || stop < start || stop >= end)
337 continue;
338 if (surround[i].process)
339 process(start, stop, 0);
340 else
341 hprint(start, stop);
342 return stop - begin + l;
343 }
344 return 0;
345 }
346
347 int
348 dounderline(const char *begin, const char *end, int newblock)
349 {
350 unsigned int i, j, l;
351 const char *p;
352
353 if (!newblock)
354 return 0;
355 p = begin;
356 for (l = 0; p + l != end && p[l] != '\n'; l++)
357 ;
358 p += l + 1;
359 if (l == 0)
360 return 0;
361 for (i = 0; i < LEN(underline); i++) {
362 for (j = 0; p + j != end && p[j] != '\n' && p[j] == underline[i].search[0]; j++)
363 ;
364 if (j >= l) {
365 if (underline[i].process)
366 process(begin, begin + l, 0);
367 else
368 hprint(begin, begin + l);
369 return -(j + p - begin);
370 }
371 }
372 return 0;
373 }
374
375 void *
376 ereallocz(void *p, size_t size)
377 {
378 if (!(p = realloc(p, size)))
379 eprint("realloc: could not allocate %u bytes:", size);
380 return p;
381 }
382
383 void
384 hprint(const char *begin, const char *end)
385 {
386 }
387
388 void
389 process(const char *begin, const char *end, int newblock)
390 {
391 const char *p, *q;
392 int affected;
393 unsigned int i;
394
395 for (p = begin; p < end;) {
396 if (newblock)
397 while (*p == '\n')
398 if (++p == end)
399 return;
400 affected = 0;
401 for (i = 0; i < LEN(parsers) && !affected; i++)
402 affected = parsers[i](p, end, newblock);
403 p += abs(affected);
404 if (!affected) {
405 p++;
406 }
407 for (q = p; q != end && *q == '\n'; q++)
408 ;
409 if (q == end)
410 return;
411 else if (p[0] == '\n' && p + 1 != end && p[1] == '\n')
412 newblock = 1;
413 else
414 newblock = (affected < 0);
415 }
416 }
417
418 int
419 main(int argc, char **argv)
420 {
421 char *buffer;
422 int s;
423 size_t len, bsize;
424
425 if (pledge("stdio", NULL) < 0)
426 eprint("pledge:");
427
428 bsize = 2 * READ_BUF_SIZ;
429 buffer = ereallocz(NULL, bsize);
430 len = 0;
431 while ((s = fread(buffer + len, 1, READ_BUF_SIZ, stdin))) {
432 len += s;
433 if (READ_BUF_SIZ + len + 1 > bsize) {
434 bsize += READ_BUF_SIZ;
435 if (!(buffer = realloc(buffer, bsize)))
436 eprint("realloc:");
437 }
438 }
439 buffer[len] = '\0';
440 process(buffer, buffer + len, 1);
441 free(buffer);
442
443 return 0;
444 }