many improvements - bmf - bmf (Bayesian Mail Filter) 0.9.4 fork + patches
 (HTM) git clone git://git.codemadness.org/bmf
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit f5e56cc70c117352ec5b7a7984065eaa65db162f
 (DIR) parent 20a0f52d5b478e240450fd72fa3bbd3ab5c58c48
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sat, 27 Oct 2018 18:37:01 +0200
       
       many improvements
       
       - update README: only flat files are supported now.
       - remove bmf.spec.in file.
       - remove unused functions.
       - remove some assert() calls.
       - dbtext_db_open() improvements:
         - check strdup call + perror message.
         - malloc + perror message.
         - check for empty directory string (just in case).
         - use snprintf (just in case).
         - free memory on failure condition.
       - dbtext_db_opentable() improvements:
         - use snprintf, error on path truncation.
       - remove unit tests (not maintained).
       - code-style improvements.
       
       Diffstat:
         M Makefile                            |       1 -
         M README                              |       5 +----
         M bmf.c                               |       1 -
         D bmf.spec.in                         |      64 -------------------------------
         M config.h                            |       1 -
         M dbg.c                               |      14 +-------------
         M dbg.h                               |       3 ---
         M dbh.c                               |      58 +++++++++++++++---------------
         M dbh.h                               |      46 ++++++++++++++-----------------
         M dbtext.c                            |     166 +++++++++++--------------------
         M filt.c                              |       8 +++-----
         M lex.c                               |      45 -------------------------------
         M lex.h                               |      22 +++++++++++-----------
         M str.c                               |      51 +++----------------------------
         M str.h                               |       4 ----
         M vec.c                               |     215 -------------------------------
         M vec.h                               |      50 +++++++++++--------------------
       
       17 files changed, 144 insertions(+), 610 deletions(-)
       ---
 (DIR) diff --git a/Makefile b/Makefile
       @@ -50,7 +50,6 @@ dist:
                cp -f ${MAN1} ${DOC} ${HDR} \
                        ${SRC} ${SCRIPTS} \
                        Makefile \
       -                bmf.spec.in \
                        "${NAME}-${VERSION}"
                # make tarball
                tar -cf - "${NAME}-${VERSION}" | \
 (DIR) diff --git a/README b/README
       @@ -13,10 +13,7 @@ This project provides features which are not available in other filters:
        
        (1) Independence from external programs and libraries.  Tokens are stored in
        memory using simple vectors which require no heavyweight external data
       -structure libraries.  Multiple token database formats are supported,
       -including flat files, libdb, and mysql.  Conversion between formats will
       -always be possible with the included import/export utility and flat files
       -will always remain an option.
       +structure libraries. The tokens are stored in plain-text "flat" files.
        
        (2) Efficient processing.  Input data is parsed by a handcrafted parser
        which weighs in under 3% of the equivalent code generated by flex.  No
 (DIR) diff --git a/bmf.c b/bmf.c
       @@ -98,7 +98,6 @@ main(int argc, char **argv)
                        err(1, "pledge");
        
                srand(time(NULL));
       -        atexit(dump_alloc_heap);
        
                stats.keepers = DEF_KEEPERS;
                while ((ch = getopt(argc, argv, "NSVd:hk:m:npstv")) != EOF) {
 (DIR) diff --git a/bmf.spec.in b/bmf.spec.in
       @@ -1,64 +0,0 @@
       -Name: bmf
       -Version: VERSION
       -Release: 1
       -URL: http://www.sourceforge.net/projects/bmf
       -Source0: %{name}-%{version}.tar.gz
       -License: GPL
       -Group: Applications/Internet
       -Summary: fast anti-spam filtering by Bayesian statistical analysis
       -Buildroot: %{_tmppath}/%{name}-%{version}-root
       -
       -%description
       -bmf is a Bayesian mail filter.  It takes an email message or other text on
       -stdin, does a statistical check against lists of "good" and "spam" words,
       -and returns a status code indicating whether or not the message is spam. 
       -bmf is efficient, small, and self-contained.
       -
       -%prep
       -
       -%setup
       -
       -%build
       -./configure --with-libdb --without-mysql
       -make
       -
       -%install
       -[ -n "$RPM_BUILD_ROOT" -a "$RPM_BUILD_ROOT" != / ] && rm -rf $RPM_BUILD_ROOT
       -make DESTDIR=${RPM_BUILD_ROOT} install
       -gzip $RPM_BUILD_ROOT/%{_mandir}/*/*.?
       -
       -
       -%files
       -%{_bindir}/bmf
       -%{_mandir}/man1/bmf.1.gz
       -%{_bindir}/bmfconv
       -%{_mandir}/man1/bmfconv.1.gz
       -%doc README LICENSE
       -
       -%changelog
       -* Mon Oct 14 2002 Tom Marshall <tommy@tig-grr.com>
       -- Update to version 0.9.3.
       -
       -* Sat Oct 12 2002 Tom Marshall <tommy@tig-grr.com>
       -- Update to version 0.9.2.
       -
       -* Sat Oct 12 2002 Tom Marshall <tommy@tig-grr.com>
       -- Update to version 0.9.1.
       -
       -* Wed Oct 09 2002 Tom Marshall <tommy@tig-grr.com>
       -- Update to version 0.84.
       -
       -* Mon Oct 07 2002 Tom Marshall <tommy@tig-grr.com>
       -- Update to version 0.83.
       -
       -* Sat Oct 05 2002 Tom Marshall <tommy@tig-grr.com>
       -- Update to version 0.82.
       -
       -* Thu Oct 03 2002 Tom Marshall <tommy@tig-grr.com>
       -- Update to version 0.81.
       -- Add bmfconv.
       -- Use new configure script.
       -
       -* Fri Sep 27 2002 Tom Marshall <tommy@tig-grr.com>
       -- Initial build.
       -
 (DIR) diff --git a/config.h b/config.h
       @@ -19,7 +19,6 @@
        #include <errno.h>
        #include <math.h>
        #include <ctype.h>
       -#include <assert.h>
        
        /**************************************
         * System headers
 (DIR) diff --git a/dbg.c b/dbg.c
       @@ -15,7 +15,7 @@
        
        uint g_verbose = 0;
        
       -void 
       +void
        verbose(int level, const char *fmt,...)
        {
                va_list v;
       @@ -26,15 +26,3 @@ verbose(int level, const char *fmt,...)
                        va_end(v);
                }
        }
       -
       -void 
       -dbgout(const char *fmt,...)
       -{
       -        /* empty */
       -}
       -
       -void 
       -dump_alloc_heap(void)
       -{
       -        /* empty */
       -}
 (DIR) diff --git a/dbg.h b/dbg.h
       @@ -14,7 +14,4 @@ extern uint g_verbose;
        
        void verbose( int level, const char* fmt, ... );
        
       -void dbgout( const char* fmt, ... );
       -void dump_alloc_heap( void );
       -
        #endif /* ndef _DBG_H */
 (DIR) diff --git a/dbh.c b/dbh.c
       @@ -24,43 +24,43 @@
         *
         * the list referenced in the iterator must be sorted.
         */
       -uint db_getnewcount( veciter_t* piter )
       +uint
       +db_getnewcount(veciter_t * piter)
        {
       -    str_t*      pstr;
       -    uint        count;
       -    veciter_t   curiter;
       -    str_t*      pcurstr;
       +        str_t *pstr;
       +        uint count;
       +        veciter_t curiter;
       +        str_t *pcurstr;
        
       -    pstr = &piter->plist->pitems[piter->index];
       -    count = 0;
       +        pstr = &piter->plist->pitems[piter->index];
       +        count = 0;
        
       -    curiter.plist = piter->plist;
       -    curiter.index = piter->index;
       -    pcurstr = &curiter.plist->pitems[curiter.index];
       +        curiter.plist = piter->plist;
       +        curiter.index = piter->index;
       +        pcurstr = &curiter.plist->pitems[curiter.index];
        
       -    while( curiter.index < curiter.plist->nitems && str_casecmp( pstr, pcurstr ) == 0 )
       -    {
       -        piter->index = curiter.index;
       -        count = min( MAXFREQ, count + 1 );
       -        veciter_next( &curiter );
       -        pcurstr = &curiter.plist->pitems[curiter.index];
       -    }
       +        while (curiter.index < curiter.plist->nitems && str_casecmp(pstr, pcurstr) == 0) {
       +                piter->index = curiter.index;
       +                count = min(MAXFREQ, count + 1);
       +                veciter_next(&curiter);
       +                pcurstr = &curiter.plist->pitems[curiter.index];
       +        }
        
       -    return count;
       +        return count;
        }
        
       -dbh_t* dbh_open( dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass )
       +dbh_t *
       +dbh_open(dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass)
        {
       -    dbh_t* pdb = NULL;
       +        dbh_t *pdb;
        
       -    switch( dbfmt )
       -    {
       -    case db_text:
       -        pdb = (dbh_t*)dbtext_db_open( dbhost, dbname, dbuser, dbpass );
       -        break;
       -    default:
       -        assert(false);
       -    }
       +        switch (dbfmt) {
       +        case db_text:
       +                pdb = (dbh_t *) dbtext_db_open(dbhost, dbname, dbuser, dbpass);
       +                break;
       +        default:
       +                break;
       +        }
        
       -    return pdb;
       +        return NULL;
        }
 (DIR) diff --git a/dbh.h b/dbh.h
       @@ -11,44 +11,40 @@
        #define _DBH_H
        
        /* database formats */
       -typedef enum
       -{
       -    db_text        /* flat text */
       -} dbfmt_t;
       +typedef enum {
       +        db_text                        /* flat text */
       +}    dbfmt_t;
        
        /* record/field structure */
       -typedef struct _rec
       -{
       -    str_t   w;
       -    uint    n;
       -} rec_t;
       +typedef struct _rec {
       +        str_t w;
       +        uint n;
       +}    rec_t;
        
        /* database table */
        typedef struct _dbt dbt_t;
       -struct _dbt
       -{
       -    bool_t      (*close)(dbt_t*);
       -    bool_t      (*mergeclose)(dbt_t*,vec_t*);
       -    bool_t      (*unmergeclose)(dbt_t*,vec_t*);
       -    bool_t      (*import)(dbt_t*,cpchar);
       -    bool_t      (*export)(dbt_t*,cpchar);
       -    uint        (*getmsgcount)(dbt_t*);
       -    uint        (*getcount)(dbt_t*,str_t*);
       +struct _dbt {
       +        bool_t(*close) (dbt_t *);
       +        bool_t(*mergeclose) (dbt_t *, vec_t *);
       +        bool_t(*unmergeclose) (dbt_t *, vec_t *);
       +        bool_t(*import) (dbt_t *, cpchar);
       +        bool_t(*export) (dbt_t *, cpchar);
       +        uint(*getmsgcount) (dbt_t *);
       +        uint(*getcount) (dbt_t *, str_t *);
        };
        
        /* database instance */
        typedef struct _dbh dbh_t;
       -struct _dbh
       -{
       -    bool_t      (*close)(dbh_t*);
       -    dbt_t*      (*opentable)(dbh_t*,cpchar,bool_t);
       +struct _dbh {
       +        bool_t(*close) (dbh_t *);
       +        dbt_t *(*opentable) (dbh_t *, cpchar, bool_t);
        };
        
       -dbh_t*  dbh_open( dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass );
       +dbh_t *dbh_open(dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass);
        
        #define BOGOFILTER_HEADER "# bogofilter wordlist (format version A): %u\n"
        #define TEXTDB_MAXLINELEN    (MAXWORDLEN+32)
        
       -uint db_getnewcount( veciter_t* piter );
       +uint db_getnewcount(veciter_t * piter);
        
       -#endif /* ndef _DBH_H */
       +#endif                                /* ndef _DBH_H */
 (DIR) diff --git a/dbtext.c b/dbtext.c
       @@ -21,78 +21,83 @@
        static void
        dbtext_table_setsize(dbttext_t * pthis, uint nsize)
        {
       -        if (nsize > pthis->nalloc) {
       -                uint nnewalloc;
       -                rec_t *pnewitems;
       -                uint n;
       -
       -                nnewalloc = pthis->nalloc * 2;
       -                if (nnewalloc < nsize)
       -                        nnewalloc = nsize;
       -                pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeof(rec_t));
       -                if (pnewitems == NULL) {
       -                        exit(2);
       -                }
       -                for (n = pthis->nitems; n < nsize; n++) {
       -                        str_create(&pnewitems[n].w);
       -                        pnewitems[n].n = 0;
       -                }
       -                pthis->pitems = pnewitems;
       -                pthis->nalloc = nnewalloc;
       +        uint nnewalloc;
       +        rec_t *pnewitems;
       +        uint n;
       +
       +        if (nsize <= pthis->nalloc)
       +                return;
       +
       +        nnewalloc = pthis->nalloc * 2;
       +        if (nnewalloc < nsize)
       +                nnewalloc = nsize;
       +        pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeof(rec_t));
       +        if (pnewitems == NULL) {
       +                exit(2);
                }
       +        for (n = pthis->nitems; n < nsize; n++) {
       +                str_create(&pnewitems[n].w);
       +                pnewitems[n].n = 0;
       +        }
       +        pthis->pitems = pnewitems;
       +        pthis->nalloc = nnewalloc;
        }
        
        dbh_t *
        dbtext_db_open(cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass)
        {
       -        dbhtext_t *pthis;
       -
       +        dbhtext_t *pthis = NULL;
                uint dirlen;
                cpchar phome;
                struct stat st;
        
       -        pthis = (dbhtext_t *) malloc(sizeof(dbhtext_t));
       -        if (pthis == NULL) {
       +        if ((pthis = malloc(sizeof(dbhtext_t))) == NULL) {
       +                perror("malloc()");
                        goto bail;
                }
       +
                pthis->close = dbtext_db_close;
                pthis->opentable = dbtext_db_opentable;
       +
                if (dbname != NULL && *dbname != '\0') {
                        dirlen = strlen(dbname);
       -                pthis->dir = strdup(dbname);
       -                if (pthis->dir[dirlen - 1] == '/') {
       -                        pthis->dir[dirlen - 1] = '\0';
       +                if ((pthis->dir = strdup(dbname)) == NULL) {
       +                        perror("strdup()");
       +                        goto bail;
                        }
       +                if (dirlen && pthis->dir[dirlen - 1] == '/')
       +                        pthis->dir[--dirlen] = '\0';
                } else {
                        phome = getenv("HOME");
                        if (phome == NULL || *phome == '\0') {
                                phome = ".";
                        }
       -                pthis->dir = (char *) malloc(strlen(phome) + 5 + 1);
       -                if (pthis->dir == NULL) {
       +                dirlen = strlen(phome) + 5 + 1;
       +                if ((pthis->dir = malloc(dirlen)) == NULL)
                                goto bail;
       -                }
       -                sprintf(pthis->dir, "%s/.bmf", phome);
       +
       +                snprintf(pthis->dir, dirlen, "%s/.bmf", phome);
                }
        
       -        /* ensure config directory exists */
       +        /* make sure config directory exists */
                if (stat(pthis->dir, &st) != 0) {
       -                if (errno == ENOENT) {
       -                        if (mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != 0) {
       -                                goto bail;
       -                        }
       -                } else {
       +                if (errno != ENOENT ||
       +                    mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != 0)
                                goto bail;
       -                }
                } else {
       -                if (!S_ISDIR(st.st_mode)) {
       +                if (!S_ISDIR(st.st_mode))
                                goto bail;
       -                }
                }
        
       -        return (dbh_t *) pthis;
       +        return (dbh_t *)pthis;
        
        bail:
       +        if (pthis) {
       +                if (pthis->dir)
       +                        free(pthis->dir);
       +                free(pthis);
       +        }
       +
                return NULL;
        }
        
       @@ -114,19 +119,17 @@ dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool_t rdonly)
        
        #endif                                /* ndef NOLOCK */
                char szpath[PATH_MAX];
       -        int flags;
       +        int flags, ret;
                struct stat st;
       -
                char *pbegin;
                char *pend;
                rec_t r;
                uint pos;
        
       -        if (pthis->dir == NULL) {
       +        if (pthis->dir == NULL)
                        goto bail;
       -        }
       -        ptable = (dbttext_t *) malloc(sizeof(dbttext_t));
       -        if (ptable == NULL) {
       +
       +        if ((ptable = malloc(sizeof(dbttext_t))) == NULL) {
                        perror("malloc()");
                        goto bail;
                }
       @@ -144,13 +147,18 @@ dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool_t rdonly)
                ptable->nitems = 0;
                ptable->pitems = NULL;
        
       -        sprintf(szpath, "%s/%s.txt", pthis->dir, table);
       -        flags = (rdonly ? O_RDONLY | O_CREAT : O_RDWR | O_CREAT);
       -        ptable->fd = open(szpath, flags, 0644);
       -        if (ptable->fd == -1) {
       +        ret = snprintf(szpath, sizeof(szpath), "%s/%s.txt", pthis->dir, table);
       +        if (ret == -1 || (size_t)ret >= sizeof(szpath)) {
       +                fprintf(stderr, "path truncation: %s/%s.txt", pthis->dir, table);
       +                goto bail;
       +        }
       +
       +        flags = O_CREAT | (rdonly ? O_RDONLY : O_RDWR);
       +        if ((ptable->fd = open(szpath, flags, 0644)) == -1) {
                        perror("open()");
                        goto bail;
                }
       +
        #ifndef NOLOCK
                memset(&lock, 0, sizeof(lock));
                lock.l_type = rdonly ? F_RDLCK : F_WRLCK;
       @@ -307,8 +315,6 @@ dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg)
                        }
                        if (cmp < 0) {
                                /* write existing str */
       -                        assert(prec->w.p != NULL && prec->w.len > 0);
       -                        assert(prec->w.len <= MAXWORDLEN);
                                count = prec->n;
                                strncpylwr(p, prec->w.p, prec->w.len);
                                p += prec->w.len;
       @@ -318,10 +324,6 @@ dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg)
                                pos++;
                        } else if (cmp == 0) {
                                /* same str, merge and write sum */
       -                        assert(prec->w.p != NULL && prec->w.len > 0);
       -                        assert(pmsgstr->p != NULL && pmsgstr->len > 0);
       -                        assert(prec->w.len <= MAXWORDLEN);
       -                        assert(pmsgstr->len <= MAXWORDLEN);
                                count = db_getnewcount(&msgiter);
                                count += prec->n;
                                strncpylwr(p, prec->w.p, prec->w.len);
       @@ -334,8 +336,6 @@ dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg)
                                pmsgstr = veciter_get(&msgiter);
                        } else {        /* cmp > 0 */
                                /* write new str */
       -                        assert(pmsgstr->p != NULL && pmsgstr->len > 0);
       -                        assert(pmsgstr->len <= MAXWORDLEN);
                                count = db_getnewcount(&msgiter);
                                strncpylwr(p, pmsgstr->p, pmsgstr->len);
                                p += pmsgstr->len;
       @@ -398,8 +398,6 @@ dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg)
                        }
                        if (cmp < 0) {
                                /* write existing str */
       -                        assert(prec->w.p != NULL && prec->w.len > 0);
       -                        assert(prec->w.len <= MAXWORDLEN);
                                count = prec->n;
                                strncpylwr(p, prec->w.p, prec->w.len);
                                p += prec->w.len;
       @@ -409,10 +407,6 @@ dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg)
                                pos++;
                        } else if (cmp == 0) {
                                /* same str, merge and write difference */
       -                        assert(prec->w.p != NULL && prec->w.len > 0);
       -                        assert(pmsgstr->p != NULL && pmsgstr->len > 0);
       -                        assert(prec->w.len <= MAXWORDLEN);
       -                        assert(pmsgstr->len <= MAXWORDLEN);
                                count = db_getnewcount(&msgiter);
                                count = (prec->n > count) ? (prec->n - count) : 0;
                                strncpylwr(p, prec->w.p, prec->w.len);
       @@ -425,8 +419,6 @@ dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg)
                                pmsgstr = veciter_get(&msgiter);
                        } else {        /* cmp > 0 */
                                /* this should not happen, so write with count=0 */
       -                        assert(pmsgstr->p != NULL && pmsgstr->len > 0);
       -                        assert(pmsgstr->len <= MAXWORDLEN);
                                db_getnewcount(&msgiter);
                                count = 0;
                                strncpylwr(p, pmsgstr->p, pmsgstr->len);
       @@ -485,53 +477,9 @@ dbtext_table_getcount(dbttext_t * pthis, str_t * pword)
                        else
                                lo = mid;
                }
       -        assert(hi >= 0 && hi < pthis->nitems);
        
                if (str_casecmp(pword, &pthis->pitems[hi].w) != 0) {
                        return 0;
                }
                return pthis->pitems[hi].n;
        }
       -
       -#ifdef UNIT_TEST
       -int
       -main(int argc, char **argv)
       -{
       -        dbh_t *pdb;
       -        veciter_t iter;
       -        str_t *pstr;
       -        uint n;
       -
       -        if (argc != 2) {
       -                fprintf(stderr, "usage: %s <file>\n", argv[0]);
       -                return 1;
       -        }
       -        for (n = 0; n < 100; n++) {
       -                pdb = dbh_open("testlist", true);
       -
       -                vec_first(&db, &iter);
       -                while ((pstr = veciter_get(&iter)) != NULL) {
       -                        char buf[MAXWORDLEN + 32];
       -                        char *p;
       -
       -                        if (pstr->len > 200) {
       -                                fprintf(stderr, "str too long: %u chars\n", pstr->len);
       -                                break;
       -                        }
       -                        p = buf;
       -                        strcpy(buf, "str: ");
       -                        p += 6;
       -                        memcpy(p, pstr->p, pstr->len);
       -                        p += pstr->len;
       -                        sprintf(p, " %u", pstr->count);
       -                        puts(buf);
       -
       -                        veciter_next(&iter);
       -                }
       -
       -                dbh_close(&db);
       -        }
       -
       -        return 0;
       -}
       -#endif                                /* def UNIT_TEST */
 (DIR) diff --git a/filt.c b/filt.c
       @@ -21,7 +21,7 @@
        #define DEVIATION(n)    fabs((n)-0.5f)
        
        /* Dump the contents of a statistics structure */
       -void 
       +void
        statdump(stats_t * pstat, int fd)
        {
                char iobuf[IOBUFSIZE];
       @@ -49,7 +49,7 @@ statdump(stats_t * pstat, int fd)
                }
        }
        
       -void 
       +void
        bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, stats_t * pstats)
        {
                veciter_t iter;
       @@ -95,8 +95,6 @@ bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, stats_t * pstats)
                                double goodprob = goodtotal ? min(1.0, (goodness / goodtotal)) : 0.0;
                                double spamprob = spamtotal ? min(1.0, (spamness / spamtotal)) : 0.0;
        
       -                        assert(goodtotal > 0 || spamtotal > 0);
       -
        #ifdef NON_EQUIPROBABLE
                                prob = (spamprob * msg_prob) / ((goodprob * (1 - msg_prob)) + (spamprob * msg_prob));
        #else
       @@ -146,7 +144,7 @@ bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, stats_t * pstats)
                pstats->spamicity = product / (product + invproduct);
        }
        
       -bool_t 
       +bool_t
        bvec_loadmsg(vec_t * pthis, lex_t * plex, tok_t * ptok)
        {
                str_t w;
 (DIR) diff --git a/lex.c b/lex.c
       @@ -561,8 +561,6 @@ lex_nexttoken(lex_t * pthis, tok_t * ptok)
                uint len;
                uint toklen;
        
       -        assert(pthis->pbuf != NULL);
       -
                if (pthis->pos == pthis->eom) {
                        pthis->bom = pthis->pos;
                }
       @@ -637,9 +635,6 @@ lex_passthru(lex_t * pthis, bool_t is_spam, double hits)
                char szbuf[256];
                bool_t in_headers = true;
        
       -        assert(pthis->bom < pthis->buflen && pthis->eom <= pthis->buflen);
       -        assert(pthis->bom <= pthis->eom);
       -
                pthis->pos = pthis->bom;
                if (is_spam) {
                        sprintf(szbuf, "X-Spam-Status: Yes, hits=%f required=%f, tests=bmf\n"
       @@ -682,43 +677,3 @@ lex_passthru(lex_t * pthis, bool_t is_spam, double hits)
                }
                pthis->bom = pthis->eom;
        }
       -
       -#ifdef UNIT_TEST
       -
       -int
       -main(int argc, char **argv)
       -{
       -        int fd;
       -        lex_t lex;
       -        tok_t tok;
       -
       -        fd = STDIN_FILENO;
       -        if (argc == 2) {
       -                fd = open(argv[1], O_RDONLY);
       -        }
       -        lex_create(&lex);
       -        if (!lex_load(&lex, fd)) {
       -                fprintf(stderr, "cannot load file\n");
       -                exit(1);
       -        }
       -        lex_nexttoken(&lex, &tok);
       -        while (tok.tt != eof) {
       -                char sztok[64];
       -
       -                if (tok.len > MAXWORDLEN) {
       -                        printf("*** token too long! ***\n");
       -                        exit(1);
       -                }
       -                memcpy(sztok, tok.p, tok.len);
       -                strlwr(sztok);
       -                sztok[tok.len] = '\0';
       -                printf("get_token: %d '%s'\n", tok.tt, sztok);
       -
       -                lex_nexttoken(&lex, &tok);
       -        }
       -
       -        lex_destroy(&lex);
       -        return 0;
       -}
       -
       -#endif                                /* def UNIT_TEST */
 (DIR) diff --git a/lex.h b/lex.h
       @@ -14,23 +14,23 @@ typedef enum { from, eof, word } toktype_t;
        
        typedef struct _tok
        {
       -    toktype_t   tt;         /* token type */
       -    char*       p;
       -    uint        len;
       +        toktype_t tt; /* token type */
       +        char *p;
       +        uint len;
        } tok_t;
        
        typedef enum { envelope, hdrs, body } msgsec_t;
        
        typedef struct _lex
        {
       -    mbox_t      mboxtype;
       -    msgsec_t    section;    /* current section (envelope, headers, body) */
       -    uint        pos;        /* current position */
       -    uint        bom;        /* beginning of message */
       -    uint        eom;        /* end of current message (start of next) */
       -    uint        lineend;    /* line end (actually, start of next line) */
       -    uint        buflen;     /* length of buffer */
       -    char*       pbuf;
       +        mbox_t                mboxtype;
       +        msgsec_t        section;    /* current section (envelope, headers, body) */
       +        uint                pos;        /* current position */
       +        uint                bom;        /* beginning of message */
       +        uint                eom;        /* end of current message (start of next) */
       +        uint                lineend;    /* line end (actually, start of next line) */
       +        uint                buflen;     /* length of buffer */
       +        char                *pbuf;
        } lex_t;
        
        void    lex_create   ( lex_t* plex, mbox_t mboxtype );
 (DIR) diff --git a/str.c b/str.c
       @@ -12,23 +12,6 @@
        #include "str.h"
        
        void
       -strlwr(char *s)
       -{
       -        while (*s != '\0') {
       -                *s = tolower(*s);
       -                s++;
       -        }
       -}
       -
       -void
       -strcpylwr(char *d, const char *s)
       -{
       -        while (*s != '\0') {
       -                *d++ = tolower(*s++);
       -        }
       -}
       -
       -void
        strncpylwr(char *d, const char *s, int n)
        {
                while (n--) {
       @@ -37,46 +20,20 @@ strncpylwr(char *d, const char *s, int n)
        }
        
        void
       -str_create(str_t * pstr)
       +str_create(str_t *pstr)
        {
                pstr->p = NULL;
                pstr->len = 0;
        }
        
       -void
       -str_destroy(str_t * pstr)
       -{
       -        /* empty */
       -}
       -
       -int
       -str_cmp(const str_t * pthis, const str_t * pother)
       -{
       -        uint minlen = min(pthis->len, pother->len);
       -        int cmp;
       -
       -        assert(pthis->p != NULL && pother->p != NULL && minlen != 0);
       -
       -        cmp = strncmp(pthis->p, pother->p, minlen);
       -
       -        if (cmp == 0 && pthis->len != pother->len) {
       -                cmp = (pthis->len < pother->len) ? -1 : 1;
       -        }
       -        return cmp;
       -}
       -
        int
        str_casecmp(const str_t * pthis, const str_t * pother)
        {
       -        uint minlen = min(pthis->len, pother->len);
                int cmp;
        
       -        assert(pthis->p != NULL && pother->p != NULL && minlen != 0);
       -
       -        cmp = strncasecmp(pthis->p, pother->p, minlen);
       -
       -        if (cmp == 0 && pthis->len != pother->len) {
       +        cmp = strncasecmp(pthis->p, pother->p, min(pthis->len, pother->len));
       +        if (cmp == 0 && pthis->len != pother->len)
                        cmp = (pthis->len < pother->len) ? -1 : 1;
       -        }
       +
                return cmp;
        }
 (DIR) diff --git a/str.h b/str.h
       @@ -11,8 +11,6 @@
        #define _STR_H
        
        /* a couple of generic string functions... */
       -void strlwr( char* s );
       -void strcpylwr( char* d, const char* s );
        void strncpylwr( char* d, const char* s, int n );
        
        typedef struct _str
       @@ -22,9 +20,7 @@ typedef struct _str
        } str_t;
        
        void    str_create ( str_t* pthis );
       -void    str_destroy( str_t* pthis );
        
       -int     str_cmp    ( const str_t* pthis, const str_t* pother );
        int     str_casecmp( const str_t* pthis, const str_t* pother );
        
        #endif /* ndef _STR_H */
 (DIR) diff --git a/vec.c b/vec.c
       @@ -61,55 +61,20 @@ vec_setsize(vec_t * pthis, uint nsize)
        }
        
        void
       -vec_addhead(vec_t * pthis, str_t * pstr)
       -{
       -        assert(pstr->p != NULL && pstr->len > 0);
       -
       -        vec_setsize(pthis, pthis->nitems + 1);
       -        memmove(&pthis->pitems[1], &pthis->pitems[0], pthis->nitems * sizeof(str_t));
       -        pthis->pitems[0] = *pstr;
       -        pthis->nitems++;
       -}
       -
       -void
        vec_addtail(vec_t * pthis, str_t * pstr)
        {
       -        assert(pstr->p != NULL && pstr->len > 0);
       -
                vec_setsize(pthis, pthis->nitems + 1);
                pthis->pitems[pthis->nitems] = *pstr;
                pthis->nitems++;
        }
        
        void
       -vec_delhead(vec_t * pthis)
       -{
       -        assert(pthis->nitems > 0);
       -        pthis->nitems--;
       -        memmove(&pthis->pitems[0], &pthis->pitems[1], pthis->nitems * sizeof(str_t));
       -}
       -
       -void
       -vec_deltail(vec_t * pthis)
       -{
       -        assert(pthis->nitems > 0);
       -        pthis->nitems--;
       -}
       -
       -void
        vec_first(vec_t * pthis, veciter_t * piter)
        {
                piter->plist = pthis;
                piter->index = 0;
        }
        
       -void
       -vec_last(vec_t * pthis, veciter_t * piter)
       -{
       -        piter->plist = pthis;
       -        piter->index = pthis->nitems;
       -}
       -
        /*****************************************************************************
         * sorted vector
         */
       @@ -121,66 +86,6 @@ svec_compare(const void *p1, const void *p2)
        }
        
        void
       -svec_add(vec_t * pthis, str_t * pstr)
       -{
       -        int lo, hi, mid;
       -        veciter_t iter;
       -
       -        if (pthis->nitems == 0) {
       -                vec_addtail(pthis, pstr);
       -                return;
       -        }
       -        if (str_casecmp(pstr, &pthis->pitems[0]) < 0) {
       -                vec_addhead(pthis, pstr);
       -                return;
       -        }
       -        hi = pthis->nitems - 1;
       -        lo = -1;
       -        while (hi - lo > 1) {
       -                mid = (hi + lo) / 2;
       -                if (str_casecmp(pstr, &pthis->pitems[mid]) <= 0)
       -                        hi = mid;
       -                else
       -                        lo = mid;
       -        }
       -        assert(hi < pthis->nitems);
       -
       -        iter.plist = pthis;
       -        iter.index = hi;
       -
       -        if (str_casecmp(pstr, &pthis->pitems[hi]) < 0) {
       -                veciter_addbefore(&iter, pstr);
       -        } else {
       -                veciter_addafter(&iter, pstr);
       -        }
       -}
       -
       -str_t *
       -svec_find(vec_t * pthis, str_t * pstr)
       -{
       -        int lo, hi, mid;
       -
       -        if (pthis->nitems == 0) {
       -                return NULL;
       -        }
       -        hi = pthis->nitems - 1;
       -        lo = -1;
       -        while (hi - lo > 1) {
       -                mid = (hi + lo) / 2;
       -                if (str_casecmp(pstr, &pthis->pitems[mid]) <= 0)
       -                        hi = mid;
       -                else
       -                        lo = mid;
       -        }
       -        assert(hi >= 0 && hi < pthis->nitems);
       -
       -        if (str_casecmp(pstr, &pthis->pitems[hi]) != 0) {
       -                return NULL;
       -        }
       -        return &pthis->pitems[hi];
       -}
       -
       -void
        svec_sort(vec_t * pthis)
        {
                if (pthis->nitems > 1) {
       @@ -208,35 +113,6 @@ veciter_get(veciter_t * pthis)
        }
        
        bool_t
       -veciter_equal(veciter_t * pthis, veciter_t * pthat)
       -{
       -        if (pthis->plist != pthat->plist ||
       -            pthis->index != pthat->index) {
       -                return false;
       -        }
       -        return true;
       -}
       -
       -bool_t
       -veciter_hasitem(veciter_t * pthis)
       -{
       -        if (pthis->plist == NULL || pthis->index >= pthis->plist->nitems) {
       -                return false;
       -        }
       -        return true;
       -}
       -
       -bool_t
       -veciter_prev(veciter_t * pthis)
       -{
       -        if (pthis->index == 0) {
       -                return false;
       -        }
       -        pthis->index--;
       -        return true;
       -}
       -
       -bool_t
        veciter_next(veciter_t * pthis)
        {
                pthis->index++;
       @@ -245,94 +121,3 @@ veciter_next(veciter_t * pthis)
                }
                return true;
        }
       -
       -void
       -veciter_addafter(veciter_t * pthis, str_t * pstr)
       -{
       -        str_t *pitems;
       -
       -        vec_setsize(pthis->plist, pthis->plist->nitems + 1);
       -        assert(pthis->index < pthis->plist->nitems);
       -        pitems = pthis->plist->pitems;
       -
       -        if (pthis->index != pthis->plist->nitems - 1) {
       -                memmove(&pitems[pthis->index + 2], &pitems[pthis->index + 1],
       -                 (pthis->plist->nitems - pthis->index - 1) * sizeof(str_t));
       -        }
       -        pitems[pthis->index + 1] = *pstr;
       -        pthis->plist->nitems++;
       -}
       -
       -void
       -veciter_addbefore(veciter_t * pthis, str_t * pstr)
       -{
       -        str_t *pitems;
       -
       -        vec_setsize(pthis->plist, pthis->plist->nitems + 1);
       -        assert(pthis->index < pthis->plist->nitems);
       -        pitems = pthis->plist->pitems;
       -
       -        memmove(&pitems[pthis->index + 1], &pitems[pthis->index],
       -                (pthis->plist->nitems - pthis->index) * sizeof(str_t));
       -
       -        pitems[pthis->index] = *pstr;
       -        pthis->plist->nitems++;
       -}
       -
       -void
       -veciter_del(veciter_t * pthis)
       -{
       -        str_t *pitems;
       -
       -        assert(pthis->plist->nitems > 0);
       -        pthis->plist->nitems--;
       -        if (pthis->index < pthis->plist->nitems) {
       -                pitems = pthis->plist->pitems;
       -                memmove(&pitems[pthis->index], &pitems[pthis->index + 1],
       -                     (pthis->plist->nitems - pthis->index) * sizeof(str_t));
       -        }
       -}
       -
       -#ifdef UNIT_TEST
       -int
       -main(int argc, char **argv)
       -{
       -        vec_t vl;
       -        veciter_t iter;
       -        str_t *pstr;
       -        uint n;
       -
       -        if (argc != 2) {
       -                fprintf(stderr, "usage: %s <file>\n", argv[0]);
       -                return 1;
       -        }
       -        for (n = 0; n < 100; n++) {
       -                vec_create(&vl);
       -                vec_load(&vl, argv[1]);
       -
       -                vec_first(&vl, &iter);
       -                while ((pstr = veciter_get(&iter)) != NULL) {
       -                        char buf[256];
       -                        char *p;
       -
       -                        if (pstr->len > 200) {
       -                                fprintf(stderr, "str too long: %u chars\n", pstr->len);
       -                                break;
       -                        }
       -                        p = buf;
       -                        strcpy(buf, "str: ");
       -                        p += 6;
       -                        memcpy(p, pstr->p, pstr->len);
       -                        p += pstr->len;
       -                        sprintf(p, " %u", pstr->count);
       -                        puts(buf);
       -
       -                        veciter_next(&iter);
       -                }
       -
       -                vec_destroy(&vl);
       -        }
       -
       -        return 0;
       -}
       -#endif                                /* def UNIT_TEST */
 (DIR) diff --git a/vec.h b/vec.h
       @@ -13,46 +13,30 @@
        /* item count for initial alloc */
        #define VEC_INITIAL_SIZE    256
        
       -typedef struct _vec
       -{
       -    uint        nalloc;     /* items alloced in pitems */
       -    uint        nitems;     /* items available */
       -    str_t*      pitems;     /* growing vector of items */
       +typedef struct _vec {
       +        uint nalloc;                /* items allocated in pitems */
       +        uint nitems;                /* items available */
       +        str_t *pitems;                /* growing vector of items */
        } vec_t;
        
       -typedef struct _veciter
       -{
       -    struct _vec*        plist;
       -    uint                index;
       +typedef struct _veciter {
       +        struct _vec *plist;
       +        uint index;
        } veciter_t;
        
        /* class vector */
       -void    vec_create       ( vec_t* pthis );
       -void    vec_destroy      ( vec_t* pthis );
       +void vec_create(vec_t * pthis);
       +void vec_destroy(vec_t * pthis);
       +void vec_addtail(vec_t * pthis, str_t * pstr);
       +void vec_first(vec_t * pthis, veciter_t * piter);
        
       -void    vec_addhead      ( vec_t* pthis, str_t* pstr );
       -void    vec_addtail      ( vec_t* pthis, str_t* pstr );
       -void    vec_delhead      ( vec_t* pthis );
       -void    vec_deltail      ( vec_t* pthis );
       +/* class sorted_vector */
       +void svec_sort(vec_t * ptthis);
        
       -void    vec_first        ( vec_t* pthis, veciter_t* piter );
       -void    vec_last         ( vec_t* pthis, veciter_t* piter );
       +/* veciter_create not needed */
       +void veciter_destroy(veciter_t * pthis);
        
       -/* class sorted_vector */
       -void    svec_add         ( vec_t* pthis, str_t* pstr );
       -str_t*  svec_find        ( vec_t* pthis, str_t* pstr );
       -void    svec_sort        ( vec_t* ptthis );
       -
       -/*      veciter_create not needed */
       -void    veciter_destroy  ( veciter_t* pthis );
       -
       -str_t*  veciter_get      ( veciter_t* pthis );
       -bool_t  veciter_equal    ( veciter_t* pthis, veciter_t* pthat );
       -bool_t  veciter_hasitem  ( veciter_t* pthis );
       -bool_t  veciter_prev     ( veciter_t* pthis );
       -bool_t  veciter_next     ( veciter_t* pthis );
       -void    veciter_addafter ( veciter_t* pthis, str_t* pstr );
       -void    veciter_addbefore( veciter_t* pthis, str_t* pstr );
       -void    veciter_del      ( veciter_t* pthis );
       +str_t *veciter_get(veciter_t * pthis);
       +bool_t veciter_next(veciter_t * pthis);
        
        #endif /* ndef _VEC_H */