many improvements - bmf - bmf (Bayesian Mail Filter) 0.9.4 fork + patches
(HTM) git clone git://git.codemadness.org/bmf
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit f5e56cc70c117352ec5b7a7984065eaa65db162f
(DIR) parent 20a0f52d5b478e240450fd72fa3bbd3ab5c58c48
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sat, 27 Oct 2018 18:37:01 +0200
many improvements
- update README: only flat files are supported now.
- remove bmf.spec.in file.
- remove unused functions.
- remove some assert() calls.
- dbtext_db_open() improvements:
- check strdup call + perror message.
- malloc + perror message.
- check for empty directory string (just in case).
- use snprintf (just in case).
- free memory on failure condition.
- dbtext_db_opentable() improvements:
- use snprintf, error on path truncation.
- remove unit tests (not maintained).
- code-style improvements.
Diffstat:
M Makefile | 1 -
M README | 5 +----
M bmf.c | 1 -
D bmf.spec.in | 64 -------------------------------
M config.h | 1 -
M dbg.c | 14 +-------------
M dbg.h | 3 ---
M dbh.c | 58 +++++++++++++++---------------
M dbh.h | 46 ++++++++++++++-----------------
M dbtext.c | 166 +++++++++++--------------------
M filt.c | 8 +++-----
M lex.c | 45 -------------------------------
M lex.h | 22 +++++++++++-----------
M str.c | 51 +++----------------------------
M str.h | 4 ----
M vec.c | 215 -------------------------------
M vec.h | 50 +++++++++++--------------------
17 files changed, 144 insertions(+), 610 deletions(-)
---
(DIR) diff --git a/Makefile b/Makefile
@@ -50,7 +50,6 @@ dist:
cp -f ${MAN1} ${DOC} ${HDR} \
${SRC} ${SCRIPTS} \
Makefile \
- bmf.spec.in \
"${NAME}-${VERSION}"
# make tarball
tar -cf - "${NAME}-${VERSION}" | \
(DIR) diff --git a/README b/README
@@ -13,10 +13,7 @@ This project provides features which are not available in other filters:
(1) Independence from external programs and libraries. Tokens are stored in
memory using simple vectors which require no heavyweight external data
-structure libraries. Multiple token database formats are supported,
-including flat files, libdb, and mysql. Conversion between formats will
-always be possible with the included import/export utility and flat files
-will always remain an option.
+structure libraries. The tokens are stored in plain-text "flat" files.
(2) Efficient processing. Input data is parsed by a handcrafted parser
which weighs in under 3% of the equivalent code generated by flex. No
(DIR) diff --git a/bmf.c b/bmf.c
@@ -98,7 +98,6 @@ main(int argc, char **argv)
err(1, "pledge");
srand(time(NULL));
- atexit(dump_alloc_heap);
stats.keepers = DEF_KEEPERS;
while ((ch = getopt(argc, argv, "NSVd:hk:m:npstv")) != EOF) {
(DIR) diff --git a/bmf.spec.in b/bmf.spec.in
@@ -1,64 +0,0 @@
-Name: bmf
-Version: VERSION
-Release: 1
-URL: http://www.sourceforge.net/projects/bmf
-Source0: %{name}-%{version}.tar.gz
-License: GPL
-Group: Applications/Internet
-Summary: fast anti-spam filtering by Bayesian statistical analysis
-Buildroot: %{_tmppath}/%{name}-%{version}-root
-
-%description
-bmf is a Bayesian mail filter. It takes an email message or other text on
-stdin, does a statistical check against lists of "good" and "spam" words,
-and returns a status code indicating whether or not the message is spam.
-bmf is efficient, small, and self-contained.
-
-%prep
-
-%setup
-
-%build
-./configure --with-libdb --without-mysql
-make
-
-%install
-[ -n "$RPM_BUILD_ROOT" -a "$RPM_BUILD_ROOT" != / ] && rm -rf $RPM_BUILD_ROOT
-make DESTDIR=${RPM_BUILD_ROOT} install
-gzip $RPM_BUILD_ROOT/%{_mandir}/*/*.?
-
-
-%files
-%{_bindir}/bmf
-%{_mandir}/man1/bmf.1.gz
-%{_bindir}/bmfconv
-%{_mandir}/man1/bmfconv.1.gz
-%doc README LICENSE
-
-%changelog
-* Mon Oct 14 2002 Tom Marshall <tommy@tig-grr.com>
-- Update to version 0.9.3.
-
-* Sat Oct 12 2002 Tom Marshall <tommy@tig-grr.com>
-- Update to version 0.9.2.
-
-* Sat Oct 12 2002 Tom Marshall <tommy@tig-grr.com>
-- Update to version 0.9.1.
-
-* Wed Oct 09 2002 Tom Marshall <tommy@tig-grr.com>
-- Update to version 0.84.
-
-* Mon Oct 07 2002 Tom Marshall <tommy@tig-grr.com>
-- Update to version 0.83.
-
-* Sat Oct 05 2002 Tom Marshall <tommy@tig-grr.com>
-- Update to version 0.82.
-
-* Thu Oct 03 2002 Tom Marshall <tommy@tig-grr.com>
-- Update to version 0.81.
-- Add bmfconv.
-- Use new configure script.
-
-* Fri Sep 27 2002 Tom Marshall <tommy@tig-grr.com>
-- Initial build.
-
(DIR) diff --git a/config.h b/config.h
@@ -19,7 +19,6 @@
#include <errno.h>
#include <math.h>
#include <ctype.h>
-#include <assert.h>
/**************************************
* System headers
(DIR) diff --git a/dbg.c b/dbg.c
@@ -15,7 +15,7 @@
uint g_verbose = 0;
-void
+void
verbose(int level, const char *fmt,...)
{
va_list v;
@@ -26,15 +26,3 @@ verbose(int level, const char *fmt,...)
va_end(v);
}
}
-
-void
-dbgout(const char *fmt,...)
-{
- /* empty */
-}
-
-void
-dump_alloc_heap(void)
-{
- /* empty */
-}
(DIR) diff --git a/dbg.h b/dbg.h
@@ -14,7 +14,4 @@ extern uint g_verbose;
void verbose( int level, const char* fmt, ... );
-void dbgout( const char* fmt, ... );
-void dump_alloc_heap( void );
-
#endif /* ndef _DBG_H */
(DIR) diff --git a/dbh.c b/dbh.c
@@ -24,43 +24,43 @@
*
* the list referenced in the iterator must be sorted.
*/
-uint db_getnewcount( veciter_t* piter )
+uint
+db_getnewcount(veciter_t * piter)
{
- str_t* pstr;
- uint count;
- veciter_t curiter;
- str_t* pcurstr;
+ str_t *pstr;
+ uint count;
+ veciter_t curiter;
+ str_t *pcurstr;
- pstr = &piter->plist->pitems[piter->index];
- count = 0;
+ pstr = &piter->plist->pitems[piter->index];
+ count = 0;
- curiter.plist = piter->plist;
- curiter.index = piter->index;
- pcurstr = &curiter.plist->pitems[curiter.index];
+ curiter.plist = piter->plist;
+ curiter.index = piter->index;
+ pcurstr = &curiter.plist->pitems[curiter.index];
- while( curiter.index < curiter.plist->nitems && str_casecmp( pstr, pcurstr ) == 0 )
- {
- piter->index = curiter.index;
- count = min( MAXFREQ, count + 1 );
- veciter_next( &curiter );
- pcurstr = &curiter.plist->pitems[curiter.index];
- }
+ while (curiter.index < curiter.plist->nitems && str_casecmp(pstr, pcurstr) == 0) {
+ piter->index = curiter.index;
+ count = min(MAXFREQ, count + 1);
+ veciter_next(&curiter);
+ pcurstr = &curiter.plist->pitems[curiter.index];
+ }
- return count;
+ return count;
}
-dbh_t* dbh_open( dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass )
+dbh_t *
+dbh_open(dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass)
{
- dbh_t* pdb = NULL;
+ dbh_t *pdb;
- switch( dbfmt )
- {
- case db_text:
- pdb = (dbh_t*)dbtext_db_open( dbhost, dbname, dbuser, dbpass );
- break;
- default:
- assert(false);
- }
+ switch (dbfmt) {
+ case db_text:
+ pdb = (dbh_t *) dbtext_db_open(dbhost, dbname, dbuser, dbpass);
+ break;
+ default:
+ break;
+ }
- return pdb;
+ return NULL;
}
(DIR) diff --git a/dbh.h b/dbh.h
@@ -11,44 +11,40 @@
#define _DBH_H
/* database formats */
-typedef enum
-{
- db_text /* flat text */
-} dbfmt_t;
+typedef enum {
+ db_text /* flat text */
+} dbfmt_t;
/* record/field structure */
-typedef struct _rec
-{
- str_t w;
- uint n;
-} rec_t;
+typedef struct _rec {
+ str_t w;
+ uint n;
+} rec_t;
/* database table */
typedef struct _dbt dbt_t;
-struct _dbt
-{
- bool_t (*close)(dbt_t*);
- bool_t (*mergeclose)(dbt_t*,vec_t*);
- bool_t (*unmergeclose)(dbt_t*,vec_t*);
- bool_t (*import)(dbt_t*,cpchar);
- bool_t (*export)(dbt_t*,cpchar);
- uint (*getmsgcount)(dbt_t*);
- uint (*getcount)(dbt_t*,str_t*);
+struct _dbt {
+ bool_t(*close) (dbt_t *);
+ bool_t(*mergeclose) (dbt_t *, vec_t *);
+ bool_t(*unmergeclose) (dbt_t *, vec_t *);
+ bool_t(*import) (dbt_t *, cpchar);
+ bool_t(*export) (dbt_t *, cpchar);
+ uint(*getmsgcount) (dbt_t *);
+ uint(*getcount) (dbt_t *, str_t *);
};
/* database instance */
typedef struct _dbh dbh_t;
-struct _dbh
-{
- bool_t (*close)(dbh_t*);
- dbt_t* (*opentable)(dbh_t*,cpchar,bool_t);
+struct _dbh {
+ bool_t(*close) (dbh_t *);
+ dbt_t *(*opentable) (dbh_t *, cpchar, bool_t);
};
-dbh_t* dbh_open( dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass );
+dbh_t *dbh_open(dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass);
#define BOGOFILTER_HEADER "# bogofilter wordlist (format version A): %u\n"
#define TEXTDB_MAXLINELEN (MAXWORDLEN+32)
-uint db_getnewcount( veciter_t* piter );
+uint db_getnewcount(veciter_t * piter);
-#endif /* ndef _DBH_H */
+#endif /* ndef _DBH_H */
(DIR) diff --git a/dbtext.c b/dbtext.c
@@ -21,78 +21,83 @@
static void
dbtext_table_setsize(dbttext_t * pthis, uint nsize)
{
- if (nsize > pthis->nalloc) {
- uint nnewalloc;
- rec_t *pnewitems;
- uint n;
-
- nnewalloc = pthis->nalloc * 2;
- if (nnewalloc < nsize)
- nnewalloc = nsize;
- pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeof(rec_t));
- if (pnewitems == NULL) {
- exit(2);
- }
- for (n = pthis->nitems; n < nsize; n++) {
- str_create(&pnewitems[n].w);
- pnewitems[n].n = 0;
- }
- pthis->pitems = pnewitems;
- pthis->nalloc = nnewalloc;
+ uint nnewalloc;
+ rec_t *pnewitems;
+ uint n;
+
+ if (nsize <= pthis->nalloc)
+ return;
+
+ nnewalloc = pthis->nalloc * 2;
+ if (nnewalloc < nsize)
+ nnewalloc = nsize;
+ pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeof(rec_t));
+ if (pnewitems == NULL) {
+ exit(2);
}
+ for (n = pthis->nitems; n < nsize; n++) {
+ str_create(&pnewitems[n].w);
+ pnewitems[n].n = 0;
+ }
+ pthis->pitems = pnewitems;
+ pthis->nalloc = nnewalloc;
}
dbh_t *
dbtext_db_open(cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass)
{
- dbhtext_t *pthis;
-
+ dbhtext_t *pthis = NULL;
uint dirlen;
cpchar phome;
struct stat st;
- pthis = (dbhtext_t *) malloc(sizeof(dbhtext_t));
- if (pthis == NULL) {
+ if ((pthis = malloc(sizeof(dbhtext_t))) == NULL) {
+ perror("malloc()");
goto bail;
}
+
pthis->close = dbtext_db_close;
pthis->opentable = dbtext_db_opentable;
+
if (dbname != NULL && *dbname != '\0') {
dirlen = strlen(dbname);
- pthis->dir = strdup(dbname);
- if (pthis->dir[dirlen - 1] == '/') {
- pthis->dir[dirlen - 1] = '\0';
+ if ((pthis->dir = strdup(dbname)) == NULL) {
+ perror("strdup()");
+ goto bail;
}
+ if (dirlen && pthis->dir[dirlen - 1] == '/')
+ pthis->dir[--dirlen] = '\0';
} else {
phome = getenv("HOME");
if (phome == NULL || *phome == '\0') {
phome = ".";
}
- pthis->dir = (char *) malloc(strlen(phome) + 5 + 1);
- if (pthis->dir == NULL) {
+ dirlen = strlen(phome) + 5 + 1;
+ if ((pthis->dir = malloc(dirlen)) == NULL)
goto bail;
- }
- sprintf(pthis->dir, "%s/.bmf", phome);
+
+ snprintf(pthis->dir, dirlen, "%s/.bmf", phome);
}
- /* ensure config directory exists */
+ /* make sure config directory exists */
if (stat(pthis->dir, &st) != 0) {
- if (errno == ENOENT) {
- if (mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != 0) {
- goto bail;
- }
- } else {
+ if (errno != ENOENT ||
+ mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != 0)
goto bail;
- }
} else {
- if (!S_ISDIR(st.st_mode)) {
+ if (!S_ISDIR(st.st_mode))
goto bail;
- }
}
- return (dbh_t *) pthis;
+ return (dbh_t *)pthis;
bail:
+ if (pthis) {
+ if (pthis->dir)
+ free(pthis->dir);
+ free(pthis);
+ }
+
return NULL;
}
@@ -114,19 +119,17 @@ dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool_t rdonly)
#endif /* ndef NOLOCK */
char szpath[PATH_MAX];
- int flags;
+ int flags, ret;
struct stat st;
-
char *pbegin;
char *pend;
rec_t r;
uint pos;
- if (pthis->dir == NULL) {
+ if (pthis->dir == NULL)
goto bail;
- }
- ptable = (dbttext_t *) malloc(sizeof(dbttext_t));
- if (ptable == NULL) {
+
+ if ((ptable = malloc(sizeof(dbttext_t))) == NULL) {
perror("malloc()");
goto bail;
}
@@ -144,13 +147,18 @@ dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool_t rdonly)
ptable->nitems = 0;
ptable->pitems = NULL;
- sprintf(szpath, "%s/%s.txt", pthis->dir, table);
- flags = (rdonly ? O_RDONLY | O_CREAT : O_RDWR | O_CREAT);
- ptable->fd = open(szpath, flags, 0644);
- if (ptable->fd == -1) {
+ ret = snprintf(szpath, sizeof(szpath), "%s/%s.txt", pthis->dir, table);
+ if (ret == -1 || (size_t)ret >= sizeof(szpath)) {
+ fprintf(stderr, "path truncation: %s/%s.txt", pthis->dir, table);
+ goto bail;
+ }
+
+ flags = O_CREAT | (rdonly ? O_RDONLY : O_RDWR);
+ if ((ptable->fd = open(szpath, flags, 0644)) == -1) {
perror("open()");
goto bail;
}
+
#ifndef NOLOCK
memset(&lock, 0, sizeof(lock));
lock.l_type = rdonly ? F_RDLCK : F_WRLCK;
@@ -307,8 +315,6 @@ dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg)
}
if (cmp < 0) {
/* write existing str */
- assert(prec->w.p != NULL && prec->w.len > 0);
- assert(prec->w.len <= MAXWORDLEN);
count = prec->n;
strncpylwr(p, prec->w.p, prec->w.len);
p += prec->w.len;
@@ -318,10 +324,6 @@ dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg)
pos++;
} else if (cmp == 0) {
/* same str, merge and write sum */
- assert(prec->w.p != NULL && prec->w.len > 0);
- assert(pmsgstr->p != NULL && pmsgstr->len > 0);
- assert(prec->w.len <= MAXWORDLEN);
- assert(pmsgstr->len <= MAXWORDLEN);
count = db_getnewcount(&msgiter);
count += prec->n;
strncpylwr(p, prec->w.p, prec->w.len);
@@ -334,8 +336,6 @@ dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg)
pmsgstr = veciter_get(&msgiter);
} else { /* cmp > 0 */
/* write new str */
- assert(pmsgstr->p != NULL && pmsgstr->len > 0);
- assert(pmsgstr->len <= MAXWORDLEN);
count = db_getnewcount(&msgiter);
strncpylwr(p, pmsgstr->p, pmsgstr->len);
p += pmsgstr->len;
@@ -398,8 +398,6 @@ dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg)
}
if (cmp < 0) {
/* write existing str */
- assert(prec->w.p != NULL && prec->w.len > 0);
- assert(prec->w.len <= MAXWORDLEN);
count = prec->n;
strncpylwr(p, prec->w.p, prec->w.len);
p += prec->w.len;
@@ -409,10 +407,6 @@ dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg)
pos++;
} else if (cmp == 0) {
/* same str, merge and write difference */
- assert(prec->w.p != NULL && prec->w.len > 0);
- assert(pmsgstr->p != NULL && pmsgstr->len > 0);
- assert(prec->w.len <= MAXWORDLEN);
- assert(pmsgstr->len <= MAXWORDLEN);
count = db_getnewcount(&msgiter);
count = (prec->n > count) ? (prec->n - count) : 0;
strncpylwr(p, prec->w.p, prec->w.len);
@@ -425,8 +419,6 @@ dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg)
pmsgstr = veciter_get(&msgiter);
} else { /* cmp > 0 */
/* this should not happen, so write with count=0 */
- assert(pmsgstr->p != NULL && pmsgstr->len > 0);
- assert(pmsgstr->len <= MAXWORDLEN);
db_getnewcount(&msgiter);
count = 0;
strncpylwr(p, pmsgstr->p, pmsgstr->len);
@@ -485,53 +477,9 @@ dbtext_table_getcount(dbttext_t * pthis, str_t * pword)
else
lo = mid;
}
- assert(hi >= 0 && hi < pthis->nitems);
if (str_casecmp(pword, &pthis->pitems[hi].w) != 0) {
return 0;
}
return pthis->pitems[hi].n;
}
-
-#ifdef UNIT_TEST
-int
-main(int argc, char **argv)
-{
- dbh_t *pdb;
- veciter_t iter;
- str_t *pstr;
- uint n;
-
- if (argc != 2) {
- fprintf(stderr, "usage: %s <file>\n", argv[0]);
- return 1;
- }
- for (n = 0; n < 100; n++) {
- pdb = dbh_open("testlist", true);
-
- vec_first(&db, &iter);
- while ((pstr = veciter_get(&iter)) != NULL) {
- char buf[MAXWORDLEN + 32];
- char *p;
-
- if (pstr->len > 200) {
- fprintf(stderr, "str too long: %u chars\n", pstr->len);
- break;
- }
- p = buf;
- strcpy(buf, "str: ");
- p += 6;
- memcpy(p, pstr->p, pstr->len);
- p += pstr->len;
- sprintf(p, " %u", pstr->count);
- puts(buf);
-
- veciter_next(&iter);
- }
-
- dbh_close(&db);
- }
-
- return 0;
-}
-#endif /* def UNIT_TEST */
(DIR) diff --git a/filt.c b/filt.c
@@ -21,7 +21,7 @@
#define DEVIATION(n) fabs((n)-0.5f)
/* Dump the contents of a statistics structure */
-void
+void
statdump(stats_t * pstat, int fd)
{
char iobuf[IOBUFSIZE];
@@ -49,7 +49,7 @@ statdump(stats_t * pstat, int fd)
}
}
-void
+void
bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, stats_t * pstats)
{
veciter_t iter;
@@ -95,8 +95,6 @@ bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, stats_t * pstats)
double goodprob = goodtotal ? min(1.0, (goodness / goodtotal)) : 0.0;
double spamprob = spamtotal ? min(1.0, (spamness / spamtotal)) : 0.0;
- assert(goodtotal > 0 || spamtotal > 0);
-
#ifdef NON_EQUIPROBABLE
prob = (spamprob * msg_prob) / ((goodprob * (1 - msg_prob)) + (spamprob * msg_prob));
#else
@@ -146,7 +144,7 @@ bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, stats_t * pstats)
pstats->spamicity = product / (product + invproduct);
}
-bool_t
+bool_t
bvec_loadmsg(vec_t * pthis, lex_t * plex, tok_t * ptok)
{
str_t w;
(DIR) diff --git a/lex.c b/lex.c
@@ -561,8 +561,6 @@ lex_nexttoken(lex_t * pthis, tok_t * ptok)
uint len;
uint toklen;
- assert(pthis->pbuf != NULL);
-
if (pthis->pos == pthis->eom) {
pthis->bom = pthis->pos;
}
@@ -637,9 +635,6 @@ lex_passthru(lex_t * pthis, bool_t is_spam, double hits)
char szbuf[256];
bool_t in_headers = true;
- assert(pthis->bom < pthis->buflen && pthis->eom <= pthis->buflen);
- assert(pthis->bom <= pthis->eom);
-
pthis->pos = pthis->bom;
if (is_spam) {
sprintf(szbuf, "X-Spam-Status: Yes, hits=%f required=%f, tests=bmf\n"
@@ -682,43 +677,3 @@ lex_passthru(lex_t * pthis, bool_t is_spam, double hits)
}
pthis->bom = pthis->eom;
}
-
-#ifdef UNIT_TEST
-
-int
-main(int argc, char **argv)
-{
- int fd;
- lex_t lex;
- tok_t tok;
-
- fd = STDIN_FILENO;
- if (argc == 2) {
- fd = open(argv[1], O_RDONLY);
- }
- lex_create(&lex);
- if (!lex_load(&lex, fd)) {
- fprintf(stderr, "cannot load file\n");
- exit(1);
- }
- lex_nexttoken(&lex, &tok);
- while (tok.tt != eof) {
- char sztok[64];
-
- if (tok.len > MAXWORDLEN) {
- printf("*** token too long! ***\n");
- exit(1);
- }
- memcpy(sztok, tok.p, tok.len);
- strlwr(sztok);
- sztok[tok.len] = '\0';
- printf("get_token: %d '%s'\n", tok.tt, sztok);
-
- lex_nexttoken(&lex, &tok);
- }
-
- lex_destroy(&lex);
- return 0;
-}
-
-#endif /* def UNIT_TEST */
(DIR) diff --git a/lex.h b/lex.h
@@ -14,23 +14,23 @@ typedef enum { from, eof, word } toktype_t;
typedef struct _tok
{
- toktype_t tt; /* token type */
- char* p;
- uint len;
+ toktype_t tt; /* token type */
+ char *p;
+ uint len;
} tok_t;
typedef enum { envelope, hdrs, body } msgsec_t;
typedef struct _lex
{
- mbox_t mboxtype;
- msgsec_t section; /* current section (envelope, headers, body) */
- uint pos; /* current position */
- uint bom; /* beginning of message */
- uint eom; /* end of current message (start of next) */
- uint lineend; /* line end (actually, start of next line) */
- uint buflen; /* length of buffer */
- char* pbuf;
+ mbox_t mboxtype;
+ msgsec_t section; /* current section (envelope, headers, body) */
+ uint pos; /* current position */
+ uint bom; /* beginning of message */
+ uint eom; /* end of current message (start of next) */
+ uint lineend; /* line end (actually, start of next line) */
+ uint buflen; /* length of buffer */
+ char *pbuf;
} lex_t;
void lex_create ( lex_t* plex, mbox_t mboxtype );
(DIR) diff --git a/str.c b/str.c
@@ -12,23 +12,6 @@
#include "str.h"
void
-strlwr(char *s)
-{
- while (*s != '\0') {
- *s = tolower(*s);
- s++;
- }
-}
-
-void
-strcpylwr(char *d, const char *s)
-{
- while (*s != '\0') {
- *d++ = tolower(*s++);
- }
-}
-
-void
strncpylwr(char *d, const char *s, int n)
{
while (n--) {
@@ -37,46 +20,20 @@ strncpylwr(char *d, const char *s, int n)
}
void
-str_create(str_t * pstr)
+str_create(str_t *pstr)
{
pstr->p = NULL;
pstr->len = 0;
}
-void
-str_destroy(str_t * pstr)
-{
- /* empty */
-}
-
-int
-str_cmp(const str_t * pthis, const str_t * pother)
-{
- uint minlen = min(pthis->len, pother->len);
- int cmp;
-
- assert(pthis->p != NULL && pother->p != NULL && minlen != 0);
-
- cmp = strncmp(pthis->p, pother->p, minlen);
-
- if (cmp == 0 && pthis->len != pother->len) {
- cmp = (pthis->len < pother->len) ? -1 : 1;
- }
- return cmp;
-}
-
int
str_casecmp(const str_t * pthis, const str_t * pother)
{
- uint minlen = min(pthis->len, pother->len);
int cmp;
- assert(pthis->p != NULL && pother->p != NULL && minlen != 0);
-
- cmp = strncasecmp(pthis->p, pother->p, minlen);
-
- if (cmp == 0 && pthis->len != pother->len) {
+ cmp = strncasecmp(pthis->p, pother->p, min(pthis->len, pother->len));
+ if (cmp == 0 && pthis->len != pother->len)
cmp = (pthis->len < pother->len) ? -1 : 1;
- }
+
return cmp;
}
(DIR) diff --git a/str.h b/str.h
@@ -11,8 +11,6 @@
#define _STR_H
/* a couple of generic string functions... */
-void strlwr( char* s );
-void strcpylwr( char* d, const char* s );
void strncpylwr( char* d, const char* s, int n );
typedef struct _str
@@ -22,9 +20,7 @@ typedef struct _str
} str_t;
void str_create ( str_t* pthis );
-void str_destroy( str_t* pthis );
-int str_cmp ( const str_t* pthis, const str_t* pother );
int str_casecmp( const str_t* pthis, const str_t* pother );
#endif /* ndef _STR_H */
(DIR) diff --git a/vec.c b/vec.c
@@ -61,55 +61,20 @@ vec_setsize(vec_t * pthis, uint nsize)
}
void
-vec_addhead(vec_t * pthis, str_t * pstr)
-{
- assert(pstr->p != NULL && pstr->len > 0);
-
- vec_setsize(pthis, pthis->nitems + 1);
- memmove(&pthis->pitems[1], &pthis->pitems[0], pthis->nitems * sizeof(str_t));
- pthis->pitems[0] = *pstr;
- pthis->nitems++;
-}
-
-void
vec_addtail(vec_t * pthis, str_t * pstr)
{
- assert(pstr->p != NULL && pstr->len > 0);
-
vec_setsize(pthis, pthis->nitems + 1);
pthis->pitems[pthis->nitems] = *pstr;
pthis->nitems++;
}
void
-vec_delhead(vec_t * pthis)
-{
- assert(pthis->nitems > 0);
- pthis->nitems--;
- memmove(&pthis->pitems[0], &pthis->pitems[1], pthis->nitems * sizeof(str_t));
-}
-
-void
-vec_deltail(vec_t * pthis)
-{
- assert(pthis->nitems > 0);
- pthis->nitems--;
-}
-
-void
vec_first(vec_t * pthis, veciter_t * piter)
{
piter->plist = pthis;
piter->index = 0;
}
-void
-vec_last(vec_t * pthis, veciter_t * piter)
-{
- piter->plist = pthis;
- piter->index = pthis->nitems;
-}
-
/*****************************************************************************
* sorted vector
*/
@@ -121,66 +86,6 @@ svec_compare(const void *p1, const void *p2)
}
void
-svec_add(vec_t * pthis, str_t * pstr)
-{
- int lo, hi, mid;
- veciter_t iter;
-
- if (pthis->nitems == 0) {
- vec_addtail(pthis, pstr);
- return;
- }
- if (str_casecmp(pstr, &pthis->pitems[0]) < 0) {
- vec_addhead(pthis, pstr);
- return;
- }
- hi = pthis->nitems - 1;
- lo = -1;
- while (hi - lo > 1) {
- mid = (hi + lo) / 2;
- if (str_casecmp(pstr, &pthis->pitems[mid]) <= 0)
- hi = mid;
- else
- lo = mid;
- }
- assert(hi < pthis->nitems);
-
- iter.plist = pthis;
- iter.index = hi;
-
- if (str_casecmp(pstr, &pthis->pitems[hi]) < 0) {
- veciter_addbefore(&iter, pstr);
- } else {
- veciter_addafter(&iter, pstr);
- }
-}
-
-str_t *
-svec_find(vec_t * pthis, str_t * pstr)
-{
- int lo, hi, mid;
-
- if (pthis->nitems == 0) {
- return NULL;
- }
- hi = pthis->nitems - 1;
- lo = -1;
- while (hi - lo > 1) {
- mid = (hi + lo) / 2;
- if (str_casecmp(pstr, &pthis->pitems[mid]) <= 0)
- hi = mid;
- else
- lo = mid;
- }
- assert(hi >= 0 && hi < pthis->nitems);
-
- if (str_casecmp(pstr, &pthis->pitems[hi]) != 0) {
- return NULL;
- }
- return &pthis->pitems[hi];
-}
-
-void
svec_sort(vec_t * pthis)
{
if (pthis->nitems > 1) {
@@ -208,35 +113,6 @@ veciter_get(veciter_t * pthis)
}
bool_t
-veciter_equal(veciter_t * pthis, veciter_t * pthat)
-{
- if (pthis->plist != pthat->plist ||
- pthis->index != pthat->index) {
- return false;
- }
- return true;
-}
-
-bool_t
-veciter_hasitem(veciter_t * pthis)
-{
- if (pthis->plist == NULL || pthis->index >= pthis->plist->nitems) {
- return false;
- }
- return true;
-}
-
-bool_t
-veciter_prev(veciter_t * pthis)
-{
- if (pthis->index == 0) {
- return false;
- }
- pthis->index--;
- return true;
-}
-
-bool_t
veciter_next(veciter_t * pthis)
{
pthis->index++;
@@ -245,94 +121,3 @@ veciter_next(veciter_t * pthis)
}
return true;
}
-
-void
-veciter_addafter(veciter_t * pthis, str_t * pstr)
-{
- str_t *pitems;
-
- vec_setsize(pthis->plist, pthis->plist->nitems + 1);
- assert(pthis->index < pthis->plist->nitems);
- pitems = pthis->plist->pitems;
-
- if (pthis->index != pthis->plist->nitems - 1) {
- memmove(&pitems[pthis->index + 2], &pitems[pthis->index + 1],
- (pthis->plist->nitems - pthis->index - 1) * sizeof(str_t));
- }
- pitems[pthis->index + 1] = *pstr;
- pthis->plist->nitems++;
-}
-
-void
-veciter_addbefore(veciter_t * pthis, str_t * pstr)
-{
- str_t *pitems;
-
- vec_setsize(pthis->plist, pthis->plist->nitems + 1);
- assert(pthis->index < pthis->plist->nitems);
- pitems = pthis->plist->pitems;
-
- memmove(&pitems[pthis->index + 1], &pitems[pthis->index],
- (pthis->plist->nitems - pthis->index) * sizeof(str_t));
-
- pitems[pthis->index] = *pstr;
- pthis->plist->nitems++;
-}
-
-void
-veciter_del(veciter_t * pthis)
-{
- str_t *pitems;
-
- assert(pthis->plist->nitems > 0);
- pthis->plist->nitems--;
- if (pthis->index < pthis->plist->nitems) {
- pitems = pthis->plist->pitems;
- memmove(&pitems[pthis->index], &pitems[pthis->index + 1],
- (pthis->plist->nitems - pthis->index) * sizeof(str_t));
- }
-}
-
-#ifdef UNIT_TEST
-int
-main(int argc, char **argv)
-{
- vec_t vl;
- veciter_t iter;
- str_t *pstr;
- uint n;
-
- if (argc != 2) {
- fprintf(stderr, "usage: %s <file>\n", argv[0]);
- return 1;
- }
- for (n = 0; n < 100; n++) {
- vec_create(&vl);
- vec_load(&vl, argv[1]);
-
- vec_first(&vl, &iter);
- while ((pstr = veciter_get(&iter)) != NULL) {
- char buf[256];
- char *p;
-
- if (pstr->len > 200) {
- fprintf(stderr, "str too long: %u chars\n", pstr->len);
- break;
- }
- p = buf;
- strcpy(buf, "str: ");
- p += 6;
- memcpy(p, pstr->p, pstr->len);
- p += pstr->len;
- sprintf(p, " %u", pstr->count);
- puts(buf);
-
- veciter_next(&iter);
- }
-
- vec_destroy(&vl);
- }
-
- return 0;
-}
-#endif /* def UNIT_TEST */
(DIR) diff --git a/vec.h b/vec.h
@@ -13,46 +13,30 @@
/* item count for initial alloc */
#define VEC_INITIAL_SIZE 256
-typedef struct _vec
-{
- uint nalloc; /* items alloced in pitems */
- uint nitems; /* items available */
- str_t* pitems; /* growing vector of items */
+typedef struct _vec {
+ uint nalloc; /* items allocated in pitems */
+ uint nitems; /* items available */
+ str_t *pitems; /* growing vector of items */
} vec_t;
-typedef struct _veciter
-{
- struct _vec* plist;
- uint index;
+typedef struct _veciter {
+ struct _vec *plist;
+ uint index;
} veciter_t;
/* class vector */
-void vec_create ( vec_t* pthis );
-void vec_destroy ( vec_t* pthis );
+void vec_create(vec_t * pthis);
+void vec_destroy(vec_t * pthis);
+void vec_addtail(vec_t * pthis, str_t * pstr);
+void vec_first(vec_t * pthis, veciter_t * piter);
-void vec_addhead ( vec_t* pthis, str_t* pstr );
-void vec_addtail ( vec_t* pthis, str_t* pstr );
-void vec_delhead ( vec_t* pthis );
-void vec_deltail ( vec_t* pthis );
+/* class sorted_vector */
+void svec_sort(vec_t * ptthis);
-void vec_first ( vec_t* pthis, veciter_t* piter );
-void vec_last ( vec_t* pthis, veciter_t* piter );
+/* veciter_create not needed */
+void veciter_destroy(veciter_t * pthis);
-/* class sorted_vector */
-void svec_add ( vec_t* pthis, str_t* pstr );
-str_t* svec_find ( vec_t* pthis, str_t* pstr );
-void svec_sort ( vec_t* ptthis );
-
-/* veciter_create not needed */
-void veciter_destroy ( veciter_t* pthis );
-
-str_t* veciter_get ( veciter_t* pthis );
-bool_t veciter_equal ( veciter_t* pthis, veciter_t* pthat );
-bool_t veciter_hasitem ( veciter_t* pthis );
-bool_t veciter_prev ( veciter_t* pthis );
-bool_t veciter_next ( veciter_t* pthis );
-void veciter_addafter ( veciter_t* pthis, str_t* pstr );
-void veciter_addbefore( veciter_t* pthis, str_t* pstr );
-void veciter_del ( veciter_t* pthis );
+str_t *veciter_get(veciter_t * pthis);
+bool_t veciter_next(veciter_t * pthis);
#endif /* ndef _VEC_H */