main.c - randomcrap - random crap programs of varying quality
(HTM) git clone git://git.codemadness.org/randomcrap
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
main.c (2443B)
---
1 #include <stdio.h>
2 #include <string.h>
3
4 #define TERM_MAX 1024
5 #define TOKEN_MAX 10
6
7 struct token {
8 char term[TERM_MAX];
9 };
10
11 void
12 sanitize_search(char *s)
13 {
14 char *p;
15
16 for (p = s; *s; s++) {
17 if (*s == '\'') {
18 continue; /* remove */
19 } else if ((*s >= 'a' && *s <= 'z') || (*s >= '0' && *s <= '9')) {
20 /* keep */
21 } else if (*s >= 'A' && *s <= 'Z') {
22 *p = (*s - 'A') + 'a'; /* lower */
23 } else {
24 *p = ' ';
25 }
26 p++;
27 }
28 *p = '\0';
29 }
30
31 size_t
32 tokenize_search(struct token *tokens, size_t tokensiz, const char *s)
33 {
34 const char *e;
35 size_t len, n = 0;
36
37 /* skip initial whitespace */
38 for (; *s && *s == ' '; s++)
39 ;
40
41 for (e = s; *s; e++) {
42 if (*e == ' ' || *e == '\0') {
43 if (n >= tokensiz)
44 return n;
45
46 len = e - s;
47 memcpy(tokens[n].term, s, len);
48 tokens[n++].term[len] = '\0';
49
50 /* skip white-space */
51 for (; *e && *e == ' '; e++)
52 ;
53
54 s = e;
55 }
56 }
57 return n;
58 }
59
60 /* match terms on line */
61 int
62 matchline(const char *line, struct token *tokens, size_t tokencount)
63 {
64 const char *s, *e;
65 size_t n, nmatches = 0, len;
66
67 for (n = 0; n < tokencount; n++) {
68 s = strstr(line, tokens[n].term);
69 if (s) {
70 /* match at the start of the word or the whole word */
71 if (s == line || s[-1] == ' ')
72 nmatches++;
73 }
74 }
75
76 return (nmatches == tokencount);
77 }
78
79 int
80 main(void)
81 {
82 FILE *fp;
83 char *line = NULL;
84 size_t linesiz = 0;
85 ssize_t n;
86 const char *query = "RSS atom sfeed";
87 char search[TERM_MAX];
88 struct token tokens[TOKEN_MAX];
89 size_t tokencount;
90 int r;
91 const char *terms;
92
93 r = snprintf(search, sizeof(search), "%s", query);
94 if (r < 0 || (size_t)r >= sizeof(search)) {
95 fprintf(stderr, "please reduce the length of the search terms to max %d bytes\n", TERM_MAX);
96 return 1;
97 }
98 sanitize_search(search);
99 tokencount = tokenize_search(tokens, TOKEN_MAX, search);
100 if (tokencount >= TOKEN_MAX) {
101 fprintf(stderr, "please reduce the search terms to max %d tokens\n", TOKEN_MAX);
102 return 1;
103 }
104
105 if (!(fp = fopen("index.txt", "r"))) {
106 fprintf(stderr, "cannot open the search index database\n");
107 return 1;
108 }
109 while ((n = getline(&line, &linesiz, fp)) > 0) {
110 if (line[n - 1] == '\n')
111 line[--n] = '\0';
112
113 /* skip to field 2 for the terms */
114 if (!(terms = strchr(line, '\t')))
115 continue; /* invalid line */
116
117 /* output filename on a match */
118 if (matchline(terms, tokens, tokencount)) {
119 fwrite(line, 1, terms - line, stdout);
120 putchar('\n');
121 }
122 }
123 fclose(fp);
124
125 return 0;
126 }