md-printlinks.c - sites - public wiki contents of suckless.org
 (HTM) git clone git://git.suckless.org/sites
 (DIR) Log
 (DIR) Files
 (DIR) Refs
       ---
       md-printlinks.c (9757B)
       ---
            1 /* process Markdown (based on smu code), but only output links */
            2 #include <sys/types.h>
            3 
            4 #include <ctype.h>
            5 #include <errno.h>
            6 #include <stdarg.h>
            7 #include <stdio.h>
            8 #include <stdint.h>
            9 #include <stdlib.h>
           10 #include <string.h>
           11 
           12 #ifdef __OpenBSD__
           13 #include <unistd.h>
           14 #else
           15 #define pledge(a,b) 0
           16 #endif
           17 
           18 #define READ_BUF_SIZ 16384
           19 #define LEN(x)  sizeof(x)/sizeof(x[0])
           20 #define ADDC(b,i)  if (i % READ_BUF_SIZ == 0) { b = realloc(b, (i + READ_BUF_SIZ)); if (!b) eprint("realloc:"); } b[i]
           21 
           22 typedef int (*Parser)(const char *, const char *, int);
           23 typedef struct {
           24         char *search;
           25         int process;
           26         char *before, *after;
           27 } Tag;
           28 
           29 static int dolineprefix(const char *begin, const char *end, int newblock);/* Parser for line prefix tags */
           30 static int dolink(const char *begin, const char *end, int newblock);      /* Parser for links and images */
           31 static int dolist(const char *begin, const char *end, int newblock);      /* Parser for lists */
           32 static int doparagraph(const char *begin, const char *end, int newblock); /* Parser for paragraphs */
           33 static int doshortlink(const char *begin, const char *end, int newblock); /* Parser for links and images */
           34 static int dosurround(const char *begin, const char *end, int newblock);  /* Parser for surrounding tags */
           35 static int dounderline(const char *begin, const char *end, int newblock); /* Parser for underline tags */
           36 static void *ereallocz(void *p, size_t size);
           37 static void hprint(const char *begin, const char *end);                   /* escapes HTML and prints it to output */
           38 static void process(const char *begin, const char *end, int newblock);    /* Processes range between begin and end. */
           39 
           40 /* list of parsers */
           41 static Parser parsers[] = {
           42         dounderline, dolineprefix, dolist, doparagraph, dosurround, dolink, doshortlink,
           43 };
           44  
           45 static Tag lineprefix[] = {
           46         { "   ",        0,        "<pre><code>", "</code></pre>" },
           47         { "\t",                0,        "<pre><code>", "</code></pre>" },
           48         { "> ",                2,        "<blockquote>",        "</blockquote>" },
           49         { "###### ",        1,        "<h6>",                "</h6>" },
           50         { "##### ",        1,        "<h5>",                "</h5>" },
           51         { "#### ",        1,        "<h4>",                "</h4>" },
           52         { "### ",        1,        "<h3>",                "</h3>" },
           53         { "## ",        1,        "<h2>",                "</h2>" },
           54         { "# ",                1,        "<h1>",                "</h1>" },
           55         { "- - -\n",        1,        "<hr/>",        ""},
           56 };
           57 
           58 static Tag underline[] = {
           59         { "=",                1,        "<h1>",                "</h1>\n" },
           60         { "-",                1,        "<h2>",                "</h2>\n" },
           61 };
           62 
           63 static Tag surround[] = {
           64         { "``",                0,        "<code>",        "</code>" },
           65         { "`",                0,        "<code>",        "</code>" },
           66         { "___",        1,        "<b><i>",        "</i></b>" },
           67         { "***",        1,        "<b><i>",        "</i></b>" },
           68         { "__",                1,        "<b>",                "</b>" },
           69         { "**",                1,        "<b>",                "</b>" },
           70         { "_",                1,        "<i>",                "</i>" },
           71         { "*",                1,        "<i>",                "</i>" },
           72 };
           73 
           74 void
           75 eprint(const char *format, ...)
           76 {
           77         va_list ap;
           78 
           79         va_start(ap, format);
           80         vfprintf(stderr, format, ap);
           81         va_end(ap);
           82         if (format[0] && format[strlen(format) - 1] == ':')
           83                 fputs(strerror(errno), stderr);
           84         fputc('\n', stderr);
           85         exit(1);
           86 }
           87 
           88 int
           89 dolineprefix(const char *begin, const char *end, int newblock)
           90 {
           91         unsigned int i, j, l;
           92         char *buffer;
           93         const char *p;
           94 
           95         if (newblock)
           96                 p = begin;
           97         else if (*begin == '\n')
           98                 p = begin + 1;
           99         else
          100                 return 0;
          101         for (i = 0; i < LEN(lineprefix); i++) {
          102                 l = strlen(lineprefix[i].search);
          103                 if (end - p < l)
          104                         continue;
          105                 if (strncmp(lineprefix[i].search, p, l))
          106                         continue;
          107                 if (lineprefix[i].search[l-1] == '\n') {
          108                         return l;
          109                 }
          110                 if (!(buffer = malloc(BUFSIZ)))
          111                         eprint("malloc:");
          112                 buffer[0] = '\0';
          113 
          114                 for (j = 0, p += l; p < end; p++, j++) {
          115                         ADDC(buffer, j) = *p;
          116                         if (*p == '\n' && p + l < end) {
          117                                 if (strncmp(lineprefix[i].search, p + 1, l) != 0)
          118                                         break;
          119                                 p += l;
          120                         }
          121                 }
          122 
          123                 ADDC(buffer, j) = '\0';
          124                 if (lineprefix[i].process)
          125                         process(buffer, buffer + strlen(buffer), lineprefix[i].process >= 2);
          126                 free(buffer);
          127                 return -(p - begin);
          128         }
          129         return 0;
          130 }
          131 
          132 int
          133 dolink(const char *begin, const char *end, int newblock)
          134 {
          135         int img, len, sep;
          136         const char *desc, *link, *p, *q, *descend, *linkend;
          137         const char *title = NULL, *titleend = NULL;
          138 
          139         if (*begin == '[')
          140                 img = 0;
          141         else if (strncmp(begin, "![", 2) == 0)
          142                 img = 1;
          143         else
          144                 return 0;
          145         p = desc = begin + 1 + img;
          146         if (!(p = strstr(desc, "](")) || p > end)
          147                 return 0;
          148         for (q = strstr(desc, "!["); q && q < end && q < p; q = strstr(q + 1, "!["))
          149                 if (!(p = strstr(p + 1, "](")) || p > end)
          150                         return 0;
          151         descend = p;
          152         link = p + 2;
          153         if (!(q = strchr(link, ')')) || q > end)
          154                 return 0;
          155         if ((p = strpbrk(link, "\"'")) && p < end && q > p) {
          156                 sep = p[0]; /* separator: can be " or ' */
          157                 title = p + 1;
          158                 /* strip trailing whitespace */
          159                 for (linkend = p; linkend > link && isspace(*(linkend - 1)); linkend--)
          160                         ;
          161                 if (!(p = strchr(title, sep)) || q > end || p > q)
          162                         return 0;
          163                 titleend = p;
          164                 len = p + 2 - begin;
          165         }
          166         else {
          167                 linkend = q;
          168                 len = q + 1 - begin;
          169         }
          170         if (img) {
          171                 fwrite(link, 1, linkend - link, stdout);
          172                 fputs("\n", stdout);
          173         }
          174         else {
          175                 fwrite(link, 1, linkend - link, stdout);
          176                 fputs("\n", stdout);
          177 
          178                 process(desc, descend, 0);
          179         }
          180         return len;
          181 }
          182 
          183 int
          184 dolist(const char *begin, const char *end, int newblock)
          185 {
          186         unsigned int i, j, indent, run, ul, isblock;
          187         const char *p, *q;
          188         char *buffer = NULL;
          189 
          190         isblock = 0;
          191         if (newblock)
          192                 p = begin;
          193         else if (*begin == '\n')
          194                 p = begin + 1;
          195         else
          196                 return 0;
          197         q = p;
          198         if (*p == '-' || *p == '*' || *p == '+')
          199                 ul = 1;
          200         else {
          201                 ul = 0;
          202                 for (; p < end && *p >= '0' && *p <= '9'; p++)
          203                         ;
          204                 if (p >= end || *p != '.')
          205                         return 0;
          206         }
          207         p++;
          208         if (p >= end || !(*p == ' ' || *p == '\t'))
          209                 return 0;
          210         for (p++; p != end && (*p == ' ' || *p == '\t'); p++)
          211                 ;
          212         indent = p - q;
          213         buffer = ereallocz(buffer, BUFSIZ);
          214         run = 1;
          215         for (; p < end && run; p++) {
          216                 for (i = 0; p < end && run; p++, i++) {
          217                         if (*p == '\n') {
          218                                 if (p + 1 == end)
          219                                         break;
          220                                 else if (p[1] == '\n') {
          221                                         p++;
          222                                         ADDC(buffer, i) = '\n';
          223                                         i++;
          224                                         run = 0;
          225                                         isblock++;
          226                                 }
          227                                 q = p + 1;
          228                                 j = 0;
          229                                 if (ul && (*q == '-' || *q == '*' || *q == '+'))
          230                                         j = 1;
          231                                 else if (!ul) {
          232                                         for (; q + j != end && q[j] >= '0' && q[j] <= '9' && j < indent; j++)
          233                                                 ;
          234                                         if (q + j == end)
          235                                                 break;
          236                                         if (j > 0 && q[j] == '.')
          237                                                 j++;
          238                                         else
          239                                                 j = 0;
          240                                 }
          241                                 if (q + indent < end)
          242                                         for (; (q[j] == ' ' || q[j] == '\t') && j < indent; j++)
          243                                                 ;
          244                                 if (j == indent) {
          245                                         ADDC(buffer, i) = '\n';
          246                                         i++;
          247                                         p += indent;
          248                                         run = 1;
          249                                         if (*q == ' ' || *q == '\t')
          250                                                 p++;
          251                                         else
          252                                                 break;
          253                                 }
          254                         }
          255                         ADDC(buffer, i) = *p;
          256                 }
          257                 ADDC(buffer, i) = '\0';
          258                 process(buffer, buffer + i, isblock > 1 || (isblock == 1 && run));
          259         }
          260         free(buffer);
          261         p--;
          262         while (*(--p) == '\n')
          263                 ;
          264         return -(p - begin + 1);
          265 }
          266 
          267 int
          268 doparagraph(const char *begin, const char *end, int newblock)
          269 {
          270         const char *p;
          271 
          272         if (!newblock)
          273                 return 0;
          274         p = strstr(begin, "\n\n");
          275         if (!p || p > end)
          276                 p = end;
          277         if (p == begin)
          278                 return 0;
          279         process(begin, p, 0);
          280         return -(p - begin);
          281 }
          282 
          283 int
          284 doshortlink(const char *begin, const char *end, int newblock)
          285 {
          286         const char *p;
          287         int ismail = 0;
          288 
          289         if (*begin != '<')
          290                 return 0;
          291         for (p = begin + 1; p != end; p++) {
          292                 switch (*p) {
          293                 case ' ':
          294                 case '\t':
          295                 case '\n':
          296                         return 0;
          297                 case '#':
          298                 case ':':
          299                         ismail = -1;
          300                         break;
          301                 case '@':
          302                         if (ismail == 0)
          303                                 ismail = 1;
          304                         break;
          305                 case '>':
          306                         if (ismail == 0)
          307                                 return 0;
          308                         if (ismail != 1) {
          309                                 fwrite(begin + 1, 1, p - begin - 1, stdout);
          310                                 fputs("\n", stdout);
          311                         }
          312                         return p - begin + 1;
          313                 }
          314         }
          315         return 0;
          316 }
          317 
          318 int
          319 dosurround(const char *begin, const char *end, int newblock)
          320 {
          321         unsigned int i, l;
          322         const char *p, *start, *stop;
          323 
          324         for (i = 0; i < LEN(surround); i++) {
          325                 l = strlen(surround[i].search);
          326                 if (end - begin < 2*l || strncmp(begin, surround[i].search, l) != 0)
          327                         continue;
          328                 start = begin + l;
          329                 p = start - 1;
          330                 do {
          331                         stop = p;
          332                         p = strstr(p + 1, surround[i].search);
          333                 } while (p && p[-1] == '\\');
          334                 if (p && p[-1] != '\\')
          335                         stop = p;
          336                 if (!stop || stop < start || stop >= end)
          337                         continue;
          338                 if (surround[i].process)
          339                         process(start, stop, 0);
          340                 else
          341                         hprint(start, stop);
          342                 return stop - begin + l;
          343         }
          344         return 0;
          345 }
          346 
          347 int
          348 dounderline(const char *begin, const char *end, int newblock)
          349 {
          350         unsigned int i, j, l;
          351         const char *p;
          352 
          353         if (!newblock)
          354                 return 0;
          355         p = begin;
          356         for (l = 0; p + l != end && p[l] != '\n'; l++)
          357                 ;
          358         p += l + 1;
          359         if (l == 0)
          360                 return 0;
          361         for (i = 0; i < LEN(underline); i++) {
          362                 for (j = 0; p + j != end && p[j] != '\n' && p[j] == underline[i].search[0]; j++)
          363                         ;
          364                 if (j >= l) {
          365                         if (underline[i].process)
          366                                 process(begin, begin + l, 0);
          367                         else
          368                                 hprint(begin, begin + l);
          369                         return -(j + p - begin);
          370                 }
          371         }
          372         return 0;
          373 }
          374 
          375 void *
          376 ereallocz(void *p, size_t size)
          377 {
          378         if (!(p = realloc(p, size)))
          379                 eprint("realloc: could not allocate %u bytes:", size);
          380         return p;
          381 }
          382 
          383 void
          384 hprint(const char *begin, const char *end)
          385 {
          386 }
          387 
          388 void
          389 process(const char *begin, const char *end, int newblock)
          390 {
          391         const char *p, *q;
          392         int affected;
          393         unsigned int i;
          394 
          395         for (p = begin; p < end;) {
          396                 if (newblock)
          397                         while (*p == '\n')
          398                                 if (++p == end)
          399                                         return;
          400                 affected = 0;
          401                 for (i = 0; i < LEN(parsers) && !affected; i++)
          402                         affected = parsers[i](p, end, newblock);
          403                 p += abs(affected);
          404                 if (!affected) {
          405                         p++;
          406                 }
          407                 for (q = p; q != end && *q == '\n'; q++)
          408                         ;
          409                 if (q == end)
          410                         return;
          411                 else if (p[0] == '\n' && p + 1 != end && p[1] == '\n')
          412                         newblock = 1;
          413                 else
          414                         newblock = (affected < 0);
          415         }
          416 }
          417 
          418 int
          419 main(int argc, char **argv)
          420 {
          421         char *buffer;
          422         int s;
          423         size_t len, bsize;
          424 
          425         if (pledge("stdio", NULL) < 0)
          426                 eprint("pledge:");
          427 
          428         bsize = 2 * READ_BUF_SIZ;
          429         buffer = ereallocz(NULL, bsize);
          430         len = 0;
          431         while ((s = fread(buffer + len, 1, READ_BUF_SIZ, stdin))) {
          432                 len += s;
          433                 if (READ_BUF_SIZ + len + 1 > bsize) {
          434                         bsize += READ_BUF_SIZ;
          435                         if (!(buffer = realloc(buffer, bsize)))
          436                                 eprint("realloc:");
          437                 }
          438         }
          439         buffer[len] = '\0';
          440         process(buffer, buffer + len, 1);
          441         free(buffer);
          442 
          443         return 0;
          444 }