merge dbh and dbtext (WIP) - bmf - bmf (Bayesian Mail Filter) 0.9.4 fork + patches
 (HTM) git clone git://git.codemadness.org/bmf
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 60b437c6d0bc19fc9f67ca8cfaf6cbfc50d47423
 (DIR) parent 4c3c79f49125ef555fba1df7f6cbab2c7b26ea00
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sat, 27 Oct 2018 19:31:30 +0200
       
       merge dbh and dbtext (WIP)
       
       Diffstat:
         M Makefile                            |       2 --
         M bmf.c                               |       3 +--
         M dbh.c                               |     474 ++++++++++++++++++++++++++++++-
         M dbh.h                               |      45 +++++++++++++++++++++++++------
         D dbtext.c                            |     490 -------------------------------
         D dbtext.h                            |      49 -------------------------------
         M filt.h                              |      14 +++++++++-----
       
       7 files changed, 511 insertions(+), 566 deletions(-)
       ---
 (DIR) diff --git a/Makefile b/Makefile
       @@ -9,7 +9,6 @@ SRC = \
                bmf.c \
                dbg.c \
                dbh.c \
       -        dbtext.c \
                filt.c \
                lex.c \
                str.c \
       @@ -21,7 +20,6 @@ HDR = \
                config.h \
                dbg.h \
                dbh.h \
       -        dbtext.h \
                filt.h \
                lex.h \
                str.h \
 (DIR) diff --git a/bmf.c b/bmf.c
       @@ -75,7 +75,6 @@ version(void)
        int
        main(int argc, char **argv)
        {
       -        dbfmt_t dbfmt = db_text;
                char *dbname = NULL;
                bool_t rdonly;
                runmode_t mode = mode_normal;
       @@ -152,7 +151,7 @@ main(int argc, char **argv)
                }
                stats.extrema = (discrim_t *) malloc(stats.keepers * sizeof(discrim_t));
        
       -        pdb = dbh_open(dbfmt, "localhost", dbname, "", "");
       +        pdb = dbh_open(dbname);
                if (pdb == NULL) {
                        fprintf(stderr, "%s: cannot open database\n", argv[0]);
                        exit(2);
 (DIR) diff --git a/dbh.c b/dbh.c
       @@ -16,7 +16,6 @@
        #include "vec.h"
        
        #include "dbh.h"
       -#include "dbtext.h"
        
        /*
         * get count for new (incoming) word.  there may be duplicate entries for the
       @@ -50,17 +49,472 @@ db_getnewcount(veciter_t * piter)
        }
        
        dbh_t *
       -dbh_open(dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass)
       +dbh_open(cpchar dbname)
        {
       -        dbh_t *pdb;
       +        dbhtext_t *pthis = NULL;
       +        uint dirlen;
       +        cpchar phome;
       +        struct stat st;
        
       -        switch (dbfmt) {
       -        case db_text:
       -                pdb = (dbh_t *) dbtext_db_open(dbhost, dbname, dbuser, dbpass);
       -                break;
       -        default:
       -                break;
       +        if ((pthis = malloc(sizeof(dbhtext_t))) == NULL) {
       +                perror("malloc()");
       +                goto bail;
                }
        
       -        return pdb;
       +        pthis->close = dbtext_db_close;
       +        pthis->opentable = dbtext_db_opentable;
       +
       +        if (dbname != NULL && *dbname != '\0') {
       +                dirlen = strlen(dbname);
       +                if ((pthis->dir = strdup(dbname)) == NULL) {
       +                        perror("strdup()");
       +                        goto bail;
       +                }
       +                if (dirlen && pthis->dir[dirlen - 1] == '/')
       +                        pthis->dir[--dirlen] = '\0';
       +        } else {
       +                phome = getenv("HOME");
       +                if (phome == NULL || *phome == '\0') {
       +                        phome = ".";
       +                }
       +                dirlen = strlen(phome) + 5 + 1;
       +                if ((pthis->dir = malloc(dirlen)) == NULL)
       +                        goto bail;
       +
       +                /* NOTE: no truncation possible */
       +                snprintf(pthis->dir, dirlen, "%s/.bmf", phome);
       +        }
       +
       +        /* make sure config directory exists */
       +        if (stat(pthis->dir, &st) != 0) {
       +                if (errno != ENOENT ||
       +                    mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != 0)
       +                        goto bail;
       +        } else {
       +                if (!S_ISDIR(st.st_mode))
       +                        goto bail;
       +        }
       +
       +        /* unveil(2), TODO: rework later */
       +        /* TODO: permission depending on mode */
       +        char listpath[PATH_MAX];
       +        snprintf(listpath, sizeof(listpath), "%s/%s", pthis->dir, "goodlist");
       +        if (unveil(listpath, "rw") == -1) {
       +                perror("unveil()");
       +                exit(2);
       +        }
       +        snprintf(listpath, sizeof(listpath), "%s/%s", pthis->dir, "spamlist");
       +        if (unveil(listpath, "rw") == -1) {
       +                perror("unveil()");
       +                exit(2);
       +        }
       +        if (unveil(NULL, NULL) == -1) {
       +                perror("unveil()");
       +                exit(2);
       +        }
       +
       +        return (dbh_t *)pthis;
       +
       +bail:
       +        if (pthis) {
       +                if (pthis->dir)
       +                        free(pthis->dir);
       +                free(pthis);
       +        }
       +
       +        return NULL;
       +}
       +
       +static void
       +dbtext_table_setsize(dbttext_t * pthis, uint nsize)
       +{
       +        uint nnewalloc;
       +        rec_t *pnewitems;
       +        uint n;
       +
       +        if (nsize <= pthis->nalloc)
       +                return;
       +
       +        nnewalloc = pthis->nalloc * 2;
       +        if (nnewalloc < nsize)
       +                nnewalloc = nsize;
       +        pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeof(rec_t));
       +        if (pnewitems == NULL) {
       +                exit(2);
       +        }
       +        for (n = pthis->nitems; n < nsize; n++) {
       +                str_create(&pnewitems[n].w);
       +                pnewitems[n].n = 0;
       +        }
       +        pthis->pitems = pnewitems;
       +        pthis->nalloc = nnewalloc;
       +}
       +
       +bool_t
       +dbtext_db_close(dbhtext_t * pthis)
       +{
       +        free(pthis->dir);
       +        pthis->dir = NULL;
       +        return true;
       +}
       +
       +dbt_t *
       +dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool_t rdonly)
       +{
       +        dbttext_t *ptable = NULL;
       +
       +#ifndef NOLOCK
       +        struct flock lock;
       +
       +#endif                                /* ndef NOLOCK */
       +        char szpath[PATH_MAX];
       +        int flags, ret;
       +        struct stat st;
       +        char *pbegin;
       +        char *pend;
       +        rec_t r;
       +        uint pos;
       +
       +        if (pthis->dir == NULL)
       +                goto bail;
       +
       +        if ((ptable = malloc(sizeof(dbttext_t))) == NULL) {
       +                perror("malloc()");
       +                goto bail;
       +        }
       +        ptable->close = dbtext_table_close;
       +        ptable->mergeclose = dbtext_table_mergeclose;
       +        ptable->unmergeclose = dbtext_table_unmergeclose;
       +        ptable->getmsgcount = dbtext_table_getmsgcount;
       +        ptable->getcount = dbtext_table_getcount;
       +        ptable->fd = -1;
       +        ptable->pbuf = NULL;
       +        ptable->nmsgs = 0;
       +        ptable->nalloc = 0;
       +        ptable->nitems = 0;
       +        ptable->pitems = NULL;
       +
       +        ret = snprintf(szpath, sizeof(szpath), "%s/%s.txt", pthis->dir, table);
       +        if (ret == -1 || (size_t)ret >= sizeof(szpath)) {
       +                fprintf(stderr, "path truncation: %s/%s.txt", pthis->dir, table);
       +                goto bail;
       +        }
       +
       +        flags = O_CREAT | (rdonly ? O_RDONLY : O_RDWR);
       +        if ((ptable->fd = open(szpath, flags, 0644)) == -1) {
       +                perror("open()");
       +                goto bail;
       +        }
       +
       +#ifndef NOLOCK
       +        memset(&lock, 0, sizeof(lock));
       +        lock.l_type = rdonly ? F_RDLCK : F_WRLCK;
       +        lock.l_start = 0;
       +        lock.l_whence = SEEK_SET;
       +        lock.l_len = 0;
       +        fcntl(ptable->fd, F_SETLKW, &lock);
       +#endif                                /* ndef NOLOCK */
       +
       +        if (fstat(ptable->fd, &st) != 0) {
       +                perror("fstat()");
       +                goto bail_uc;
       +        }
       +        if (st.st_size == 0) {
       +                return (dbt_t *) ptable;
       +        }
       +        ptable->pbuf = (char *) malloc(st.st_size);
       +        if (ptable->pbuf == NULL) {
       +                perror("malloc()");
       +                goto bail_uc;
       +        }
       +        if (read(ptable->fd, ptable->pbuf, st.st_size) != st.st_size) {
       +                perror("read()");
       +                goto bail_fuc;
       +        }
       +        /* XXX: bogofilter compatibility */
       +        if (sscanf(ptable->pbuf, BOGOFILTER_HEADER, &ptable->nmsgs) != 1) {
       +                goto bail_fuc;
       +        }
       +        pbegin = ptable->pbuf;
       +        while (*pbegin != '\n')
       +                pbegin++;
       +        pbegin++;
       +
       +        pos = 0;
       +        while (pbegin < ptable->pbuf + st.st_size) {
       +                pend = pbegin;
       +                r.w.p = pbegin;
       +                r.w.len = 0;
       +                r.n = 0;
       +
       +                while (*pend != '\n') {
       +                        if (pend >= ptable->pbuf + st.st_size) {
       +                                goto bail_fuc;
       +                        }
       +                        *pend = tolower(*pend);
       +                        if (*pend == ' ') {
       +                                r.w.len = (pend - pbegin);
       +                                r.n = strtol(pend + 1, NULL, 10);
       +                        }
       +                        pend++;
       +                }
       +                if (pend > pbegin && *pbegin != '#' && *pbegin != ';') {
       +                        if (r.w.len == 0 || r.w.len > MAXWORDLEN) {
       +                                fprintf(stderr, "dbh_loadfile: bad file format\n");
       +                                goto bail_fuc;
       +                        }
       +                        dbtext_table_setsize(ptable, pos + 1);
       +                        ptable->pitems[pos++] = r;
       +                        ptable->nitems = pos;
       +                }
       +                pbegin = pend + 1;
       +        }
       +
       +        if (rdonly) {
       +#ifndef NOLOCK
       +                lock.l_type = F_UNLCK;
       +                fcntl(ptable->fd, F_SETLKW, &lock);
       +#endif                                /* ndef NOLOCK */
       +                close(ptable->fd);
       +                ptable->fd = -1;
       +        }
       +        return (dbt_t *) ptable;
       +
       +bail_fuc:
       +        free(ptable->pbuf);
       +
       +bail_uc:
       +#ifndef NOLOCK
       +        lock.l_type = F_UNLCK;
       +        fcntl(ptable->fd, F_SETLKW, &lock);
       +#endif                                /* ndef NOLOCK */
       +
       +        close(ptable->fd);
       +        ptable->fd = -1;
       +
       +bail:
       +        free(ptable);
       +        return NULL;
       +}
       +
       +bool_t
       +dbtext_table_close(dbttext_t * pthis)
       +{
       +        struct flock lockall;
       +
       +        free(pthis->pbuf);
       +        pthis->pbuf = NULL;
       +        free(pthis->pitems);
       +        pthis->pitems = NULL;
       +
       +        if (pthis->fd != -1) {
       +#ifndef NOLOCK
       +                memset(&lockall, 0, sizeof(lockall));
       +                lockall.l_type = F_UNLCK;
       +                lockall.l_start = 0;
       +                lockall.l_whence = SEEK_SET;
       +                lockall.l_len = 0;
       +                fcntl(pthis->fd, F_SETLKW, &lockall);
       +#endif                                /* ndef NOLOCK */
       +                close(pthis->fd);
       +                pthis->fd = -1;
       +        }
       +        return true;
       +}
       +
       +bool_t
       +dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg)
       +{
       +        /* note that we require both vectors to be sorted */
       +
       +        uint pos;
       +        rec_t *prec;
       +        veciter_t msgiter;
       +        str_t *pmsgstr;
       +        uint count;
       +        char iobuf[IOBUFSIZE];
       +        char *p;
       +
       +        if (pthis->fd == -1) {
       +                return false;
       +        }
       +        ftruncate(pthis->fd, 0);
       +        lseek(pthis->fd, 0, SEEK_SET);
       +
       +        pthis->nmsgs++;
       +
       +        p = iobuf;
       +        p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs);
       +
       +        vec_first(pmsg, &msgiter);
       +        pmsgstr = veciter_get(&msgiter);
       +
       +        pos = 0;
       +        while (pos < pthis->nitems || pmsgstr != NULL) {
       +                int cmp = 0;
       +
       +                prec = &pthis->pitems[pos];
       +                if (pmsgstr != NULL && pos < pthis->nitems) {
       +                        cmp = str_casecmp(&prec->w, pmsgstr);
       +                } else {
       +                        /* we exhausted one list or the other (but not both) */
       +                        cmp = (pos < pthis->nitems) ? -1 : 1;
       +                }
       +                if (cmp < 0) {
       +                        /* write existing str */
       +                        count = prec->n;
       +                        strncpylwr(p, prec->w.p, prec->w.len);
       +                        p += prec->w.len;
       +                        *p++ = ' ';
       +                        p += sprintf(p, "%u\n", count);
       +
       +                        pos++;
       +                } else if (cmp == 0) {
       +                        /* same str, merge and write sum */
       +                        count = db_getnewcount(&msgiter);
       +                        count += prec->n;
       +                        strncpylwr(p, prec->w.p, prec->w.len);
       +                        p += prec->w.len;
       +                        *p++ = ' ';
       +                        p += sprintf(p, "%u\n", count);
       +
       +                        pos++;
       +                        veciter_next(&msgiter);
       +                        pmsgstr = veciter_get(&msgiter);
       +                } else {        /* cmp > 0 */
       +                        /* write new str */
       +                        count = db_getnewcount(&msgiter);
       +                        strncpylwr(p, pmsgstr->p, pmsgstr->len);
       +                        p += pmsgstr->len;
       +                        *p++ = ' ';
       +                        p += sprintf(p, "%u\n", count);
       +
       +                        veciter_next(&msgiter);
       +                        pmsgstr = veciter_get(&msgiter);
       +                }
       +
       +                if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) {
       +                        write(pthis->fd, iobuf, p - iobuf);
       +                        p = iobuf;
       +                }
       +        }
       +        if (p != iobuf) {
       +                write(pthis->fd, iobuf, p - iobuf);
       +        }
       +        veciter_destroy(&msgiter);
       +        return dbtext_table_close(pthis);
       +}
       +
       +bool_t
       +dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg)
       +{
       +        /* note that we require both vectors to be sorted */
       +
       +        uint pos;
       +        rec_t *prec;
       +        veciter_t msgiter;
       +        str_t *pmsgstr;
       +        uint count;
       +        char iobuf[IOBUFSIZE];
       +        char *p;
       +
       +        if (pthis->fd == -1) {
       +                return false;
       +        }
       +        ftruncate(pthis->fd, 0);
       +        lseek(pthis->fd, 0, SEEK_SET);
       +
       +        pthis->nmsgs--;
       +
       +        p = iobuf;
       +        p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs);
       +
       +        vec_first(pmsg, &msgiter);
       +        pmsgstr = veciter_get(&msgiter);
       +
       +        pos = 0;
       +        while (pos < pthis->nitems || pmsgstr != NULL) {
       +                int cmp = 0;
       +
       +                prec = &pthis->pitems[pos];
       +                if (pmsgstr != NULL && pos < pthis->nitems) {
       +                        cmp = str_casecmp(&prec->w, pmsgstr);
       +                } else {
       +                        /* we exhausted one list or the other (but not both) */
       +                        cmp = (pos < pthis->nitems) ? -1 : 1;
       +                }
       +                if (cmp < 0) {
       +                        /* write existing str */
       +                        count = prec->n;
       +                        strncpylwr(p, prec->w.p, prec->w.len);
       +                        p += prec->w.len;
       +                        *p++ = ' ';
       +                        p += sprintf(p, "%u\n", count);
       +
       +                        pos++;
       +                } else if (cmp == 0) {
       +                        /* same str, merge and write difference */
       +                        count = db_getnewcount(&msgiter);
       +                        count = (prec->n > count) ? (prec->n - count) : 0;
       +                        strncpylwr(p, prec->w.p, prec->w.len);
       +                        p += prec->w.len;
       +                        *p++ = ' ';
       +                        p += sprintf(p, "%u\n", count);
       +
       +                        pos++;
       +                        veciter_next(&msgiter);
       +                        pmsgstr = veciter_get(&msgiter);
       +                } else {        /* cmp > 0 */
       +                        /* this should not happen, so write with count=0 */
       +                        db_getnewcount(&msgiter);
       +                        count = 0;
       +                        strncpylwr(p, pmsgstr->p, pmsgstr->len);
       +                        p += pmsgstr->len;
       +                        *p++ = ' ';
       +                        p += sprintf(p, "%u\n", count);
       +
       +                        veciter_next(&msgiter);
       +                        pmsgstr = veciter_get(&msgiter);
       +                }
       +
       +                if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) {
       +                        write(pthis->fd, iobuf, p - iobuf);
       +                        p = iobuf;
       +                }
       +        }
       +        if (p != iobuf) {
       +                write(pthis->fd, iobuf, p - iobuf);
       +        }
       +        veciter_destroy(&msgiter);
       +        return dbtext_table_close(pthis);
       +}
       +
       +uint
       +dbtext_table_getmsgcount(dbttext_t * pthis)
       +{
       +        return pthis->nmsgs;
       +}
       +
       +uint
       +dbtext_table_getcount(dbttext_t * pthis, str_t * pword)
       +{
       +        int lo, hi, mid;
       +
       +        if (pthis->nitems == 0) {
       +                return 0;
       +        }
       +        hi = pthis->nitems - 1;
       +        lo = -1;
       +        while (hi - lo > 1) {
       +                mid = (hi + lo) / 2;
       +                if (str_casecmp(pword, &pthis->pitems[mid].w) <= 0)
       +                        hi = mid;
       +                else
       +                        lo = mid;
       +        }
       +
       +        if (str_casecmp(pword, &pthis->pitems[hi].w) != 0) {
       +                return 0;
       +        }
       +        return pthis->pitems[hi].n;
        }
 (DIR) diff --git a/dbh.h b/dbh.h
       @@ -10,16 +10,14 @@
        #ifndef _DBH_H
        #define _DBH_H
        
       -/* database formats */
       -typedef enum {
       -        db_text                        /* flat text */
       -}    dbfmt_t;
       +#define BOGOFILTER_HEADER "# bogofilter wordlist (format version A): %u\n"
       +#define TEXTDB_MAXLINELEN    (MAXWORDLEN+32)
        
        /* record/field structure */
        typedef struct _rec {
                str_t w;
                uint n;
       -}    rec_t;
       +} rec_t;
        
        /* database table */
        typedef struct _dbt dbt_t;
       @@ -38,11 +36,42 @@ struct _dbh {
                dbt_t *(*opentable) (dbh_t *, cpchar, bool_t);
        };
        
       -dbh_t *dbh_open(dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass);
       +typedef struct _dbttext dbttext_t;
       +struct _dbttext
       +{
       +    bool_t      (*close)(dbttext_t*);
       +    bool_t      (*mergeclose)(dbttext_t*,vec_t*);
       +    bool_t      (*unmergeclose)(dbttext_t*,vec_t*);
       +    uint        (*getmsgcount)(dbttext_t*);
       +    uint        (*getcount)(dbttext_t*,str_t*);
        
       -#define BOGOFILTER_HEADER "# bogofilter wordlist (format version A): %u\n"
       -#define TEXTDB_MAXLINELEN    (MAXWORDLEN+32)
       +    int         fd;         /* file descriptor, if currently open */
       +    char*       pbuf;       /* data buffer, if currently open */
       +    uint        nmsgs;      /* number of messages represented in list */
       +    uint        nalloc;     /* items alloced in pitems */
       +    uint        nitems;     /* items available */
       +    rec_t*      pitems;     /* growing vector of items */
       +};
       +
       +typedef struct _dbhtext dbhtext_t;
       +struct _dbhtext
       +{
       +    bool_t      (*close)(dbhtext_t*);
       +    dbt_t*      (*opentable)(dbhtext_t*,cpchar,bool_t);
       +
       +    char*       dir;
       +};
        
        uint db_getnewcount(veciter_t * piter);
        
       +dbh_t*  dbtext_db_open(cpchar dbname);
       +bool_t  dbtext_db_close( dbhtext_t* pthis );
       +dbt_t*  dbtext_db_opentable( dbhtext_t* pthis, cpchar table, bool_t rdonly );
       +
       +bool_t  dbtext_table_close( dbttext_t* pthis );
       +bool_t  dbtext_table_mergeclose( dbttext_t* pthis, vec_t* pmsg );
       +bool_t  dbtext_table_unmergeclose( dbttext_t* pthis, vec_t* pmsg );
       +uint    dbtext_table_getmsgcount( dbttext_t* pthis );
       +uint    dbtext_table_getcount( dbttext_t* pthis, str_t* pword );
       +
        #endif                                /* ndef _DBH_H */
 (DIR) diff --git a/dbtext.c b/dbtext.c
       @@ -1,490 +0,0 @@
       -/* $Id: dbtext.c,v 1.12 2002/10/19 09:59:35 tommy Exp $ */
       -
       -/*
       - * Copyright (c) 2002 Tom Marshall <tommy@tig-grr.com>
       - *
       - * This program is free software.  It may be distributed under the terms
       - * in the file LICENSE, found in the top level of the distribution.
       - *
       - * dbtext.c: flatfile database handler
       - */
       -
       -#include "config.h"
       -#include "dbg.h"
       -#include "str.h"
       -#include "lex.h"
       -#include "vec.h"
       -
       -#include "dbh.h"
       -#include "dbtext.h"
       -
       -static void
       -dbtext_table_setsize(dbttext_t * pthis, uint nsize)
       -{
       -        uint nnewalloc;
       -        rec_t *pnewitems;
       -        uint n;
       -
       -        if (nsize <= pthis->nalloc)
       -                return;
       -
       -        nnewalloc = pthis->nalloc * 2;
       -        if (nnewalloc < nsize)
       -                nnewalloc = nsize;
       -        pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeof(rec_t));
       -        if (pnewitems == NULL) {
       -                exit(2);
       -        }
       -        for (n = pthis->nitems; n < nsize; n++) {
       -                str_create(&pnewitems[n].w);
       -                pnewitems[n].n = 0;
       -        }
       -        pthis->pitems = pnewitems;
       -        pthis->nalloc = nnewalloc;
       -}
       -
       -dbh_t *
       -dbtext_db_open(cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass)
       -{
       -        dbhtext_t *pthis = NULL;
       -        uint dirlen;
       -        cpchar phome;
       -        struct stat st;
       -
       -        if ((pthis = malloc(sizeof(dbhtext_t))) == NULL) {
       -                perror("malloc()");
       -                goto bail;
       -        }
       -
       -        pthis->close = dbtext_db_close;
       -        pthis->opentable = dbtext_db_opentable;
       -
       -        if (dbname != NULL && *dbname != '\0') {
       -                dirlen = strlen(dbname);
       -                if ((pthis->dir = strdup(dbname)) == NULL) {
       -                        perror("strdup()");
       -                        goto bail;
       -                }
       -                if (dirlen && pthis->dir[dirlen - 1] == '/')
       -                        pthis->dir[--dirlen] = '\0';
       -        } else {
       -                phome = getenv("HOME");
       -                if (phome == NULL || *phome == '\0') {
       -                        phome = ".";
       -                }
       -                dirlen = strlen(phome) + 5 + 1;
       -                if ((pthis->dir = malloc(dirlen)) == NULL)
       -                        goto bail;
       -
       -                /* NOTE: no truncation possible */
       -                snprintf(pthis->dir, dirlen, "%s/.bmf", phome);
       -        }
       -
       -        /* make sure config directory exists */
       -        if (stat(pthis->dir, &st) != 0) {
       -                if (errno != ENOENT ||
       -                    mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != 0)
       -                        goto bail;
       -        } else {
       -                if (!S_ISDIR(st.st_mode))
       -                        goto bail;
       -        }
       -
       -        /* unveil(2), TODO: rework later */
       -        /* TODO: permission depending on mode */
       -        char listpath[PATH_MAX];
       -        snprintf(listpath, sizeof(listpath), "%s/%s", pthis->dir, "goodlist");
       -        if (unveil(listpath, "rw") == -1) {
       -                perror("unveil()");
       -                exit(2);
       -        }
       -        snprintf(listpath, sizeof(listpath), "%s/%s", pthis->dir, "spamlist");
       -        if (unveil(listpath, "rw") == -1) {
       -                perror("unveil()");
       -                exit(2);
       -        }
       -        if (unveil(NULL, NULL) == -1) {
       -                perror("unveil()");
       -                exit(2);
       -        }
       -
       -        return (dbh_t *)pthis;
       -
       -bail:
       -        if (pthis) {
       -                if (pthis->dir)
       -                        free(pthis->dir);
       -                free(pthis);
       -        }
       -
       -        return NULL;
       -}
       -
       -bool_t
       -dbtext_db_close(dbhtext_t * pthis)
       -{
       -        free(pthis->dir);
       -        pthis->dir = NULL;
       -        return true;
       -}
       -
       -dbt_t *
       -dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool_t rdonly)
       -{
       -        dbttext_t *ptable = NULL;
       -
       -#ifndef NOLOCK
       -        struct flock lock;
       -
       -#endif                                /* ndef NOLOCK */
       -        char szpath[PATH_MAX];
       -        int flags, ret;
       -        struct stat st;
       -        char *pbegin;
       -        char *pend;
       -        rec_t r;
       -        uint pos;
       -
       -        if (pthis->dir == NULL)
       -                goto bail;
       -
       -        if ((ptable = malloc(sizeof(dbttext_t))) == NULL) {
       -                perror("malloc()");
       -                goto bail;
       -        }
       -        ptable->close = dbtext_table_close;
       -        ptable->mergeclose = dbtext_table_mergeclose;
       -        ptable->unmergeclose = dbtext_table_unmergeclose;
       -        ptable->getmsgcount = dbtext_table_getmsgcount;
       -        ptable->getcount = dbtext_table_getcount;
       -        ptable->fd = -1;
       -        ptable->pbuf = NULL;
       -        ptable->nmsgs = 0;
       -        ptable->nalloc = 0;
       -        ptable->nitems = 0;
       -        ptable->pitems = NULL;
       -
       -        ret = snprintf(szpath, sizeof(szpath), "%s/%s.txt", pthis->dir, table);
       -        if (ret == -1 || (size_t)ret >= sizeof(szpath)) {
       -                fprintf(stderr, "path truncation: %s/%s.txt", pthis->dir, table);
       -                goto bail;
       -        }
       -
       -        flags = O_CREAT | (rdonly ? O_RDONLY : O_RDWR);
       -        if ((ptable->fd = open(szpath, flags, 0644)) == -1) {
       -                perror("open()");
       -                goto bail;
       -        }
       -
       -#ifndef NOLOCK
       -        memset(&lock, 0, sizeof(lock));
       -        lock.l_type = rdonly ? F_RDLCK : F_WRLCK;
       -        lock.l_start = 0;
       -        lock.l_whence = SEEK_SET;
       -        lock.l_len = 0;
       -        fcntl(ptable->fd, F_SETLKW, &lock);
       -#endif                                /* ndef NOLOCK */
       -
       -        if (fstat(ptable->fd, &st) != 0) {
       -                perror("fstat()");
       -                goto bail_uc;
       -        }
       -        if (st.st_size == 0) {
       -                return (dbt_t *) ptable;
       -        }
       -        ptable->pbuf = (char *) malloc(st.st_size);
       -        if (ptable->pbuf == NULL) {
       -                perror("malloc()");
       -                goto bail_uc;
       -        }
       -        if (read(ptable->fd, ptable->pbuf, st.st_size) != st.st_size) {
       -                perror("read()");
       -                goto bail_fuc;
       -        }
       -        /* XXX: bogofilter compatibility */
       -        if (sscanf(ptable->pbuf, BOGOFILTER_HEADER, &ptable->nmsgs) != 1) {
       -                goto bail_fuc;
       -        }
       -        pbegin = ptable->pbuf;
       -        while (*pbegin != '\n')
       -                pbegin++;
       -        pbegin++;
       -
       -        pos = 0;
       -        while (pbegin < ptable->pbuf + st.st_size) {
       -                pend = pbegin;
       -                r.w.p = pbegin;
       -                r.w.len = 0;
       -                r.n = 0;
       -
       -                while (*pend != '\n') {
       -                        if (pend >= ptable->pbuf + st.st_size) {
       -                                goto bail_fuc;
       -                        }
       -                        *pend = tolower(*pend);
       -                        if (*pend == ' ') {
       -                                r.w.len = (pend - pbegin);
       -                                r.n = strtol(pend + 1, NULL, 10);
       -                        }
       -                        pend++;
       -                }
       -                if (pend > pbegin && *pbegin != '#' && *pbegin != ';') {
       -                        if (r.w.len == 0 || r.w.len > MAXWORDLEN) {
       -                                fprintf(stderr, "dbh_loadfile: bad file format\n");
       -                                goto bail_fuc;
       -                        }
       -                        dbtext_table_setsize(ptable, pos + 1);
       -                        ptable->pitems[pos++] = r;
       -                        ptable->nitems = pos;
       -                }
       -                pbegin = pend + 1;
       -        }
       -
       -        if (rdonly) {
       -#ifndef NOLOCK
       -                lock.l_type = F_UNLCK;
       -                fcntl(ptable->fd, F_SETLKW, &lock);
       -#endif                                /* ndef NOLOCK */
       -                close(ptable->fd);
       -                ptable->fd = -1;
       -        }
       -        return (dbt_t *) ptable;
       -
       -bail_fuc:
       -        free(ptable->pbuf);
       -
       -bail_uc:
       -#ifndef NOLOCK
       -        lock.l_type = F_UNLCK;
       -        fcntl(ptable->fd, F_SETLKW, &lock);
       -#endif                                /* ndef NOLOCK */
       -
       -        close(ptable->fd);
       -        ptable->fd = -1;
       -
       -bail:
       -        free(ptable);
       -        return NULL;
       -}
       -
       -bool_t
       -dbtext_table_close(dbttext_t * pthis)
       -{
       -        struct flock lockall;
       -
       -        free(pthis->pbuf);
       -        pthis->pbuf = NULL;
       -        free(pthis->pitems);
       -        pthis->pitems = NULL;
       -
       -        if (pthis->fd != -1) {
       -#ifndef NOLOCK
       -                memset(&lockall, 0, sizeof(lockall));
       -                lockall.l_type = F_UNLCK;
       -                lockall.l_start = 0;
       -                lockall.l_whence = SEEK_SET;
       -                lockall.l_len = 0;
       -                fcntl(pthis->fd, F_SETLKW, &lockall);
       -#endif                                /* ndef NOLOCK */
       -                close(pthis->fd);
       -                pthis->fd = -1;
       -        }
       -        return true;
       -}
       -
       -bool_t
       -dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg)
       -{
       -        /* note that we require both vectors to be sorted */
       -
       -        uint pos;
       -        rec_t *prec;
       -        veciter_t msgiter;
       -        str_t *pmsgstr;
       -        uint count;
       -        char iobuf[IOBUFSIZE];
       -        char *p;
       -
       -        if (pthis->fd == -1) {
       -                return false;
       -        }
       -        ftruncate(pthis->fd, 0);
       -        lseek(pthis->fd, 0, SEEK_SET);
       -
       -        pthis->nmsgs++;
       -
       -        p = iobuf;
       -        p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs);
       -
       -        vec_first(pmsg, &msgiter);
       -        pmsgstr = veciter_get(&msgiter);
       -
       -        pos = 0;
       -        while (pos < pthis->nitems || pmsgstr != NULL) {
       -                int cmp = 0;
       -
       -                prec = &pthis->pitems[pos];
       -                if (pmsgstr != NULL && pos < pthis->nitems) {
       -                        cmp = str_casecmp(&prec->w, pmsgstr);
       -                } else {
       -                        /* we exhausted one list or the other (but not both) */
       -                        cmp = (pos < pthis->nitems) ? -1 : 1;
       -                }
       -                if (cmp < 0) {
       -                        /* write existing str */
       -                        count = prec->n;
       -                        strncpylwr(p, prec->w.p, prec->w.len);
       -                        p += prec->w.len;
       -                        *p++ = ' ';
       -                        p += sprintf(p, "%u\n", count);
       -
       -                        pos++;
       -                } else if (cmp == 0) {
       -                        /* same str, merge and write sum */
       -                        count = db_getnewcount(&msgiter);
       -                        count += prec->n;
       -                        strncpylwr(p, prec->w.p, prec->w.len);
       -                        p += prec->w.len;
       -                        *p++ = ' ';
       -                        p += sprintf(p, "%u\n", count);
       -
       -                        pos++;
       -                        veciter_next(&msgiter);
       -                        pmsgstr = veciter_get(&msgiter);
       -                } else {        /* cmp > 0 */
       -                        /* write new str */
       -                        count = db_getnewcount(&msgiter);
       -                        strncpylwr(p, pmsgstr->p, pmsgstr->len);
       -                        p += pmsgstr->len;
       -                        *p++ = ' ';
       -                        p += sprintf(p, "%u\n", count);
       -
       -                        veciter_next(&msgiter);
       -                        pmsgstr = veciter_get(&msgiter);
       -                }
       -
       -                if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) {
       -                        write(pthis->fd, iobuf, p - iobuf);
       -                        p = iobuf;
       -                }
       -        }
       -        if (p != iobuf) {
       -                write(pthis->fd, iobuf, p - iobuf);
       -        }
       -        veciter_destroy(&msgiter);
       -        return dbtext_table_close(pthis);
       -}
       -
       -bool_t
       -dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg)
       -{
       -        /* note that we require both vectors to be sorted */
       -
       -        uint pos;
       -        rec_t *prec;
       -        veciter_t msgiter;
       -        str_t *pmsgstr;
       -        uint count;
       -        char iobuf[IOBUFSIZE];
       -        char *p;
       -
       -        if (pthis->fd == -1) {
       -                return false;
       -        }
       -        ftruncate(pthis->fd, 0);
       -        lseek(pthis->fd, 0, SEEK_SET);
       -
       -        pthis->nmsgs--;
       -
       -        p = iobuf;
       -        p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs);
       -
       -        vec_first(pmsg, &msgiter);
       -        pmsgstr = veciter_get(&msgiter);
       -
       -        pos = 0;
       -        while (pos < pthis->nitems || pmsgstr != NULL) {
       -                int cmp = 0;
       -
       -                prec = &pthis->pitems[pos];
       -                if (pmsgstr != NULL && pos < pthis->nitems) {
       -                        cmp = str_casecmp(&prec->w, pmsgstr);
       -                } else {
       -                        /* we exhausted one list or the other (but not both) */
       -                        cmp = (pos < pthis->nitems) ? -1 : 1;
       -                }
       -                if (cmp < 0) {
       -                        /* write existing str */
       -                        count = prec->n;
       -                        strncpylwr(p, prec->w.p, prec->w.len);
       -                        p += prec->w.len;
       -                        *p++ = ' ';
       -                        p += sprintf(p, "%u\n", count);
       -
       -                        pos++;
       -                } else if (cmp == 0) {
       -                        /* same str, merge and write difference */
       -                        count = db_getnewcount(&msgiter);
       -                        count = (prec->n > count) ? (prec->n - count) : 0;
       -                        strncpylwr(p, prec->w.p, prec->w.len);
       -                        p += prec->w.len;
       -                        *p++ = ' ';
       -                        p += sprintf(p, "%u\n", count);
       -
       -                        pos++;
       -                        veciter_next(&msgiter);
       -                        pmsgstr = veciter_get(&msgiter);
       -                } else {        /* cmp > 0 */
       -                        /* this should not happen, so write with count=0 */
       -                        db_getnewcount(&msgiter);
       -                        count = 0;
       -                        strncpylwr(p, pmsgstr->p, pmsgstr->len);
       -                        p += pmsgstr->len;
       -                        *p++ = ' ';
       -                        p += sprintf(p, "%u\n", count);
       -
       -                        veciter_next(&msgiter);
       -                        pmsgstr = veciter_get(&msgiter);
       -                }
       -
       -                if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) {
       -                        write(pthis->fd, iobuf, p - iobuf);
       -                        p = iobuf;
       -                }
       -        }
       -        if (p != iobuf) {
       -                write(pthis->fd, iobuf, p - iobuf);
       -        }
       -        veciter_destroy(&msgiter);
       -        return dbtext_table_close(pthis);
       -}
       -
       -uint
       -dbtext_table_getmsgcount(dbttext_t * pthis)
       -{
       -        return pthis->nmsgs;
       -}
       -
       -uint
       -dbtext_table_getcount(dbttext_t * pthis, str_t * pword)
       -{
       -        int lo, hi, mid;
       -
       -        if (pthis->nitems == 0) {
       -                return 0;
       -        }
       -        hi = pthis->nitems - 1;
       -        lo = -1;
       -        while (hi - lo > 1) {
       -                mid = (hi + lo) / 2;
       -                if (str_casecmp(pword, &pthis->pitems[mid].w) <= 0)
       -                        hi = mid;
       -                else
       -                        lo = mid;
       -        }
       -
       -        if (str_casecmp(pword, &pthis->pitems[hi].w) != 0) {
       -                return 0;
       -        }
       -        return pthis->pitems[hi].n;
       -}
 (DIR) diff --git a/dbtext.h b/dbtext.h
       @@ -1,49 +0,0 @@
       -/* $Id: dbtext.h,v 1.3 2002/10/02 04:45:40 tommy Exp $ */
       -
       -/*
       - * Copyright (c) 2002 Tom Marshall <tommy@tig-grr.com>
       - *
       - * This program is free software.  It may be distributed under the terms
       - * in the file LICENSE, found in the top level of the distribution.
       - */
       -
       -#ifndef _DBTEXT_H
       -#define _DBTEXT_H
       -
       -typedef struct _dbttext dbttext_t;
       -struct _dbttext
       -{
       -    bool_t      (*close)(dbttext_t*);
       -    bool_t      (*mergeclose)(dbttext_t*,vec_t*);
       -    bool_t      (*unmergeclose)(dbttext_t*,vec_t*);
       -    uint        (*getmsgcount)(dbttext_t*);
       -    uint        (*getcount)(dbttext_t*,str_t*);
       -
       -    int         fd;         /* file descriptor, if currently open */
       -    char*       pbuf;       /* data buffer, if currently open */
       -    uint        nmsgs;      /* number of messages represented in list */
       -    uint        nalloc;     /* items alloced in pitems */
       -    uint        nitems;     /* items available */
       -    rec_t*      pitems;     /* growing vector of items */
       -};
       -
       -typedef struct _dbhtext dbhtext_t;
       -struct _dbhtext
       -{
       -    bool_t      (*close)(dbhtext_t*);
       -    dbt_t*      (*opentable)(dbhtext_t*,cpchar,bool_t);
       -
       -    char*       dir;
       -};
       -
       -dbh_t*  dbtext_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass );
       -bool_t  dbtext_db_close( dbhtext_t* pthis );
       -dbt_t*  dbtext_db_opentable( dbhtext_t* pthis, cpchar table, bool_t rdonly );
       -
       -bool_t  dbtext_table_close( dbttext_t* pthis );
       -bool_t  dbtext_table_mergeclose( dbttext_t* pthis, vec_t* pmsg );
       -bool_t  dbtext_table_unmergeclose( dbttext_t* pthis, vec_t* pmsg );
       -uint    dbtext_table_getmsgcount( dbttext_t* pthis );
       -uint    dbtext_table_getcount( dbttext_t* pthis, str_t* pword );
       -
       -#endif /* ndef _DBTEXT_H */
 (DIR) diff --git a/filt.h b/filt.h
       @@ -10,17 +10,21 @@
        #ifndef _FILT_H
        #define _FILT_H
        
       +#include "lex.h"
       +#include "str.h"
       +#include "vec.h"
       +
        typedef struct
        {
       -    str_t       key;
       -    double      prob;
       +        str_t       key;
       +        double      prob;
        } discrim_t;
        
        typedef struct
        {
       -    double      spamicity;
       -    uint        keepers;
       -    discrim_t*  extrema;
       +        double      spamicity;
       +        uint        keepers;
       +        discrim_t*  extrema;
        } stats_t;
        
        void statdump( stats_t* pstat, int fd );