main.c - randomcrap - random crap programs of varying quality
 (HTM) git clone git://git.codemadness.org/randomcrap
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       main.c (2443B)
       ---
            1 #include <stdio.h>
            2 #include <string.h>
            3 
            4 #define TERM_MAX 1024
            5 #define TOKEN_MAX 10
            6 
            7 struct token {
            8         char term[TERM_MAX];
            9 };
           10 
           11 void
           12 sanitize_search(char *s)
           13 {
           14         char *p;
           15 
           16         for (p = s; *s; s++) {
           17                 if (*s == '\'') {
           18                         continue; /* remove */
           19                 } else if ((*s >= 'a' && *s <= 'z') || (*s >= '0' && *s <= '9')) {
           20                         /* keep */
           21                 } else if (*s >= 'A' && *s <= 'Z') {
           22                         *p = (*s - 'A') + 'a'; /* lower */
           23                 } else {
           24                         *p = ' ';
           25                 }
           26                 p++;
           27         }
           28         *p = '\0';
           29 }
           30 
           31 size_t
           32 tokenize_search(struct token *tokens, size_t tokensiz, const char *s)
           33 {
           34         const char *e;
           35         size_t len, n = 0;
           36 
           37         /* skip initial whitespace */
           38         for (; *s && *s == ' '; s++)
           39                 ;
           40 
           41         for (e = s; *s; e++) {
           42                 if (*e == ' ' || *e == '\0') {
           43                         if (n >= tokensiz)
           44                                 return n;
           45 
           46                         len = e - s;
           47                         memcpy(tokens[n].term, s, len);
           48                         tokens[n++].term[len] = '\0';
           49 
           50                         /* skip white-space */
           51                         for (; *e && *e == ' '; e++)
           52                                 ;
           53 
           54                         s = e;
           55                 }
           56         }
           57         return n;
           58 }
           59 
           60 /* match terms on line */
           61 int
           62 matchline(const char *line, struct token *tokens, size_t tokencount)
           63 {
           64         const char *s, *e;
           65         size_t n, nmatches = 0, len;
           66 
           67         for (n = 0; n < tokencount; n++) {
           68                 s = strstr(line, tokens[n].term);
           69                 if (s) {
           70                         /* match at the start of the word or the whole word */
           71                         if (s == line || s[-1] == ' ')
           72                                 nmatches++;
           73                 }
           74         }
           75 
           76         return (nmatches == tokencount);
           77 }
           78 
           79 int
           80 main(void)
           81 {
           82         FILE *fp;
           83         char *line = NULL;
           84         size_t linesiz = 0;
           85         ssize_t n;
           86         const char *query = "RSS atom sfeed";
           87         char search[TERM_MAX];
           88         struct token tokens[TOKEN_MAX];
           89         size_t tokencount;
           90         int r;
           91         const char *terms;
           92 
           93         r = snprintf(search, sizeof(search), "%s", query);
           94         if (r < 0 || (size_t)r >= sizeof(search)) {
           95                 fprintf(stderr, "please reduce the length of the search terms to max %d bytes\n", TERM_MAX);
           96                 return 1;
           97         }
           98         sanitize_search(search);
           99         tokencount = tokenize_search(tokens, TOKEN_MAX, search);
          100         if (tokencount >= TOKEN_MAX) {
          101                 fprintf(stderr, "please reduce the search terms to max %d tokens\n", TOKEN_MAX);
          102                 return 1;
          103         }
          104 
          105         if (!(fp = fopen("index.txt", "r"))) {
          106                 fprintf(stderr, "cannot open the search index database\n");
          107                 return 1;
          108         }
          109         while ((n = getline(&line, &linesiz, fp)) > 0) {
          110                 if (line[n - 1] == '\n')
          111                         line[--n] = '\0';
          112 
          113                 /* skip to field 2 for the terms */
          114                 if (!(terms = strchr(line, '\t')))
          115                         continue; /* invalid line */
          116 
          117                 /* output filename on a match */
          118                 if (matchline(terms, tokens, tokencount)) {
          119                         fwrite(line, 1, terms - line, stdout);
          120                         putchar('\n');
          121                 }
          122         }
          123         fclose(fp);
          124 
          125         return 0;
          126 }