util.c - tscrape - twitter scraper (not working anymore)
 (HTM) git clone git://git.codemadness.org/tscrape
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       util.c (2290B)
       ---
            1 #include <sys/types.h>
            2 
            3 #include <ctype.h>
            4 #include <errno.h>
            5 #include <stdio.h>
            6 #include <stdlib.h>
            7 #include <string.h>
            8 #include <time.h>
            9 #include <wchar.h>
           10 
           11 #include "util.h"
           12 
           13 /* Read a field-separated line from 'fp',
           14  * separated by a character 'separator',
           15  * 'fields' is a list of pointers with a size of FieldLast (must be >0).
           16  * 'line' buffer is allocated using malloc, 'size' will contain the allocated
           17  * buffer size.
           18  * returns: amount of fields read (>0) or -1 on error. */
           19 size_t
           20 parseline(char *line, char *fields[FieldLast])
           21 {
           22         char *prev, *s;
           23         size_t i;
           24 
           25         for (prev = line, i = 0;
           26             (s = strchr(prev, '\t')) && i < FieldLast - 1;
           27             i++) {
           28                 *s = '\0';
           29                 fields[i] = prev;
           30                 prev = s + 1;
           31         }
           32         fields[i++] = prev;
           33         /* make non-parsed fields empty. */
           34         for (; i < FieldLast; i++)
           35                 fields[i] = "";
           36 
           37         return i;
           38 }
           39 
           40 /* Parse time to time_t, assumes time_t is signed, ignores fractions. */
           41 int
           42 strtotime(const char *s, time_t *t)
           43 {
           44         long long l;
           45         char *e;
           46 
           47         errno = 0;
           48         l = strtoll(s, &e, 10);
           49         if (errno || *s == '\0' || *e)
           50                 return -1;
           51         /* NOTE: assumes time_t is 64-bit on 64-bit platforms:
           52                  long long (atleast 32-bit) to time_t. */
           53         if (t)
           54                 *t = (time_t)l;
           55 
           56         return 0;
           57 }
           58 
           59 /* Escape characters below as HTML 2.0 / XML 1.0. */
           60 void
           61 xmlencode(const char *s, FILE *fp)
           62 {
           63         for (; *s; s++) {
           64                 switch(*s) {
           65                 case '<':  fputs("&lt;",   fp); break;
           66                 case '>':  fputs("&gt;",   fp); break;
           67                 case '\'': fputs("&#39;",  fp); break;
           68                 case '&':  fputs("&amp;",  fp); break;
           69                 case '"':  fputs("&quot;", fp); break;
           70                 default:   fputc(*s, fp);
           71                 }
           72         }
           73 }
           74 
           75 /* print `len' columns of characters. If string is shorter pad the rest with
           76  * characters `pad`. */
           77 void
           78 printutf8pad(FILE *fp, const char *s, size_t len, int pad)
           79 {
           80         wchar_t wc;
           81         size_t col = 0, i, slen;
           82         int rl, w;
           83 
           84         if (!len)
           85                 return;
           86 
           87         slen = strlen(s);
           88         for (i = 0; i < slen; i += rl) {
           89                 rl = w = 1;
           90                 if ((unsigned char)s[i] < 32)
           91                         continue;
           92                 if ((unsigned char)s[i] >= 127) {
           93                         if ((rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4)) <= 0)
           94                                 break;
           95                         if ((w = wcwidth(wc)) == -1)
           96                                 continue;
           97                 }
           98                 if (col + w > len || (col + w == len && s[i + rl])) {
           99                         fputs("\xe2\x80\xa6", fp);
          100                         col++;
          101                         break;
          102                 }
          103                 fwrite(&s[i], 1, rl, fp);
          104                 col += w;
          105         }
          106         for (; col < len; ++col)
          107                 putc(pad, fp);
          108 }