util.c - tscrape - twitter scraper (not working anymore)
(HTM) git clone git://git.codemadness.org/tscrape
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
util.c (2290B)
---
1 #include <sys/types.h>
2
3 #include <ctype.h>
4 #include <errno.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <time.h>
9 #include <wchar.h>
10
11 #include "util.h"
12
13 /* Read a field-separated line from 'fp',
14 * separated by a character 'separator',
15 * 'fields' is a list of pointers with a size of FieldLast (must be >0).
16 * 'line' buffer is allocated using malloc, 'size' will contain the allocated
17 * buffer size.
18 * returns: amount of fields read (>0) or -1 on error. */
19 size_t
20 parseline(char *line, char *fields[FieldLast])
21 {
22 char *prev, *s;
23 size_t i;
24
25 for (prev = line, i = 0;
26 (s = strchr(prev, '\t')) && i < FieldLast - 1;
27 i++) {
28 *s = '\0';
29 fields[i] = prev;
30 prev = s + 1;
31 }
32 fields[i++] = prev;
33 /* make non-parsed fields empty. */
34 for (; i < FieldLast; i++)
35 fields[i] = "";
36
37 return i;
38 }
39
40 /* Parse time to time_t, assumes time_t is signed, ignores fractions. */
41 int
42 strtotime(const char *s, time_t *t)
43 {
44 long long l;
45 char *e;
46
47 errno = 0;
48 l = strtoll(s, &e, 10);
49 if (errno || *s == '\0' || *e)
50 return -1;
51 /* NOTE: assumes time_t is 64-bit on 64-bit platforms:
52 long long (atleast 32-bit) to time_t. */
53 if (t)
54 *t = (time_t)l;
55
56 return 0;
57 }
58
59 /* Escape characters below as HTML 2.0 / XML 1.0. */
60 void
61 xmlencode(const char *s, FILE *fp)
62 {
63 for (; *s; s++) {
64 switch(*s) {
65 case '<': fputs("<", fp); break;
66 case '>': fputs(">", fp); break;
67 case '\'': fputs("'", fp); break;
68 case '&': fputs("&", fp); break;
69 case '"': fputs(""", fp); break;
70 default: fputc(*s, fp);
71 }
72 }
73 }
74
75 /* print `len' columns of characters. If string is shorter pad the rest with
76 * characters `pad`. */
77 void
78 printutf8pad(FILE *fp, const char *s, size_t len, int pad)
79 {
80 wchar_t wc;
81 size_t col = 0, i, slen;
82 int rl, w;
83
84 if (!len)
85 return;
86
87 slen = strlen(s);
88 for (i = 0; i < slen; i += rl) {
89 rl = w = 1;
90 if ((unsigned char)s[i] < 32)
91 continue;
92 if ((unsigned char)s[i] >= 127) {
93 if ((rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4)) <= 0)
94 break;
95 if ((w = wcwidth(wc)) == -1)
96 continue;
97 }
98 if (col + w > len || (col + w == len && s[i + rl])) {
99 fputs("\xe2\x80\xa6", fp);
100 col++;
101 break;
102 }
103 fwrite(&s[i], 1, rl, fp);
104 col += w;
105 }
106 for (; col < len; ++col)
107 putc(pad, fp);
108 }