/* $Id: match.c,v 1.5 2000/05/14 13:25:18 malekith Exp $ */

#include "h.h"

#ifdef USE_PATTERNS

/* pattern is split into bunch of atoms. */

#define A_NULL		0	/* marks end */
#define A_STRING	1	/* data contains nul terminated string to match */
#define A_MASK		2	/* data is mask of chars that can be match */
#define A_STAR		3	/* shell's ``*'', data not present */
#define A_QMARK		4	/* shell's ``?'', data not present */

/* A_QMARK == A_MASK, 1, 1, ... , but is smaller */

struct atom {
	int size;		/* of entire struct in bytes */
	int selector;		/* whatever actualy it is */
	int min_length;		/* minimal number of chars needed for these
				 * and next atoms for string to be
				 * matched. if len drops below this we know
				 * that we can stop */
	char data[0];		/* selector depended. for mask 256 char mask
				 * of allowed chars, for string - nul term.
				 * string. otherwise empty */
};

#define MOVE(a) (((char*)a) += a->size)

static void count_min_length(struct atom *a)
{
	int ml;
	struct atom *h;

	h = a;

	for (ml = 0; a->selector != A_NULL; MOVE(a))
		switch (a->selector) {
		case A_STRING:
			ml += strlen(a->data);
			break;
		case A_MASK:
		case A_QMARK:
			ml++;
			break;
		case A_STAR:
			break;
		default:
			oops();
		}

	for (a = h; a->selector != A_NULL; MOVE(a)) {
		a->min_length = ml;
		switch (a->selector) {
		case A_STRING:
			ml -= strlen(a->data);
			break;
		case A_MASK:
		case A_QMARK:
			ml--;
			break;
		case A_STAR:
			break;
		default:
			oops();
		}
	}
	a->min_length = 0;
}

static struct atom *compile_pattern(const char *pat)
{
	struct atom *a, *r, *tmp;
	const unsigned char *p;
	
	p = pat;
	r = a = alloc(sizeof(struct atom));

	while (*p) {
		if (*p == '*' || *p == '?') {
			a->selector = *p == '*' ? A_STAR : A_QMARK;
			p++;
		} else if (*p == '[') {
			int set = 1;
			alloc(256);
			a->selector = A_MASK;
			p++;
			/* TODO: i'm not sure whatever to handle [^abc]
			 * while it looks perlish (good for me ;),
			 * there must be some reason why pdksh doesn't 
			 * support it */
			if (*p == '^' || *p == '!') {
				set = 0;
				memset(a->data, 1, 256);
			} else
				memset(a->data, 0, 256);
				
			/* handle '[]abc]' */
			if (*p == ']')
				a->data[*p++] = set;
				
			for (; *p && *p != ']'; p++) {
				if (*p == '\\' && *p)
					p++;

				if (p[1] == '-' && p[2] != ']') {
					int s = *p++;
					/* a-\z contruct */
					if (*++p == '\\')
						p++;
					if (s > *p)
						/* wrong char range */
						return 0;
					while (s <= *p)
						a->data[s++] = set;
				} else
					a->data[*p] = set;
			}
			if (*p)
				p++;
			else
				/* unmatched [ */
				return 0;
		} else {
			/* add string */
			char *d;
			
			a->selector = A_STRING;
			d = a->data;
			/* note, that strchr() also checks for eos */
			while (strchr("*?[", *p) == 0) {
				if (*p == '\\' && *p)
					p++;
				alloc(1);
				*d++ = *p++;
			}
			alloc(1);
			*d = 0;
		}
		align();
		tmp = alloc(sizeof(struct atom));
		a->size = (char*)tmp - (char*)a;
		a = tmp;
	}

	a->selector = A_NULL;

	count_min_length(r);
	return r;
}

static int match_pattern(struct atom *a, const char *v)
{
	int l;
	struct atom *n;

	n = a;
	MOVE(n);
	
	l = strlen(v);

	if (l < a->min_length)
		return 0;

	switch (a->selector) {
	case A_STAR:		/* see below */
		break;
	case A_STRING:
		if (strncmp(v, a->data, strlen(a->data)))
			return 0;
		else
			return match_pattern(n, v + strlen(a->data));
	case A_QMARK:
		return match_pattern(n, ++v);
	case A_MASK:
		if (!a->data[(unsigned char) *v])
			return 0;
		return match_pattern(n, ++v);
	case A_NULL:
		return *v ? 0 : 1;
	default:
		oops();
	}

	/* star */
	if (n->min_length == 0)	/* optimization for ``something*'' */
		return 1;
	
	l -= n->min_length;
	v += l++;
	while (l--)	
		if (match_pattern(n, v))
			return 1;
		else
			v--;

	return 0;
}

static struct atom *am;

int match_start(const char *pattern)
{
	assert(am == 0);
		
	a_push_state(0);
	align();
	
	am = compile_pattern(pattern);
	if (!am) {
		a_pop_state(0);
		return -1;
	}
	
	return 0;
}

int match(const char *val)
{
	return match_pattern(am, val);
}

void match_free()
{
	assert(am);
	am = 0;
	a_pop_state(0);
}

int matches(const char *val, const char *pattern)
{
	struct atom *a;
	int r;

	a_push_state(0);
	align();

	a = compile_pattern(pattern);

	if (!a)
		/* perr("bad pattern '%s'", pattern); */
		r = 0;
	else
		r = match_pattern(a, val);

	a_pop_state(0);

	return r;
}

#if 0				/* not implemented */
/* don't need these, we don't support too complicated patterns */
#define A_MASK_P	3	/* [mask]+ -- in perl nomenclature ;) */
#define A_MASK_S	4	/* [mask]* -- " */
/* shell's '*' is A_MASK_S, data = { 1, 1 ... 1 }
 * shell's '?' is A_MASK, data = { 1, 1 ... 1 } */
#endif

#endif /* USE_PATTERNS */
