/*
 * TADS vocabulary checker
 * 23-May-94  Dave Baggett <dmb@ai.mit.edu>
 *
 * This program examines TADS code vocabulary properties and gives warnings
 * when multiple vocabulary words are identical given six characters of
 * significance and case-independence.  E.g., the definition
 *
 *     noun = 'transmogrifier' 'Transmogrifyer'
 *
 * will trigger a warning because, ignoring case, the two words do not
 * differ in the first six characters as the TADS parser requires.
 *
 * Definitions like the above cause the now-infamous
 *
 * Which transmogrifier do you mean: the transmogrifier, or the transmogrifier?
 *
 * problem, where TADS assigns two different (but identical to six characters)
 * vocabulary items to the same object.
 *
 * ----------------------------------------------------------------------
 * HOW TO USE THIS PROGRAM
 *
 * This is a Unix filter.  You pipe stuff to it and it outputs warnings
 * if it sees things wrong.  E.g.,
 *
 * % vocab < cheez.t > errors.out
 *
 * or
 *
 * % cat cheez.t | something | vocab | more
 *
 * etc.
 * ----------------------------------------------------------------------
 * HOW TO COMPILE THIS PROGRAM
 *
 * To compile this program, you need either the "lex" or "flex" lexical
 * anaylzer generators.  Fortunately, these are standard Unix tools, so
 * on most machines the following should work:
 *
 * % flex vocab.l	*OR*	% lex vocab.l
 *
 * Then run your system's C compiler on the resulting lex.yy.c file.  One
 * of the following commands should accomplish this:
 *
 * % cc lex.yy.c -o vocab -ll  *OR*  % gcc lex.yy.c -o vocab -ll
 *
 * That oughta do it!  If it doesn't, ask your system administrator
 * for help, or send me email.
 *
 * ----------------------------------------------------------------------
 * BUT I DON'T HAVE UNIX!
 *
 * Well, too bad.  This would have been a pain to write in straight C,
 * so you get what you pay for.  However, if you want to run this program
 * on a 386 or later PC compatible, you're in luck -- send me email for
 * details.  Otherwise, you'll have to ask people who know how to get
 * flex running on your computer how to do it.
 * ----------------------------------------------------------------------
 * TERMS
 *
 * This program is in the public domain.
 *
 */
BW [ 	\n]
SQ [']
CO (("//".*\n)|("/*"([^*]|"\n"|("*"[^/]))*"*"+"/"))
CH ([^\n\t']|(\\'))

PROP ("noun"|"plural")

%{
	int line = 1;
%}

%%

{PROP}{BW}+"="(({BW}|{CO})*{SQ}{CH}+{SQ})+ {
	check(yytext);
}

\n {
	line++;
}

. {

}

%%

#define MAX_WORDS 256	/* max # vocabulary words for single property */
#define MAX_LENGTH 128  /* max vocab word length */

#define quote(c) ((c) == '\"' ? '\"' : ((c) == '\'' ? '\'' : (c) - 'a'))

check(s)
	char	*s;
{
	static char word[MAX_WORDS][7];
	static char orig[MAX_WORDS][MAX_LENGTH];

	char	*sbase = s;
	int	i, j, w = 0;
	int	lineadd = 0;

	/*
	 * Count newlines in this string
	 */
	for (; *s; s++)
		if (*s == '\n')
			lineadd++;

	/*
	 * Erase comments
	 */
	for (s = sbase; *s;) {
		/*
		 * Find next /
		 */
		for (; *s && *s != '/'; s++)
			;


		if (!*s)
			break;

		/*
		 * If followed by /, skip to newline.
		 * If followed by *, skip to end-comment (star-slash).
		 */
		if (s[1] == '/') {
			for (; *s && *s != '\n'; s++)
				*s = ' ';

			if (*s)
				*s = ' ';
		}
		else if (s[1] == '*') {
			for (; *s && s[1] && !(*s == '*' && s[1] == '/'); s++)
				*s = ' ';

			*s = ' ';
			if (s[1])
				s[1] = ' ';
		}
		else
			s++;
	}

	/*
	 * Build word list, significant to six characters.
	 */
	for (s = sbase; *s; ) {
		/*
		 * Find start of next word (= single quoted string).
		 */
		for (; *s && *s != '\''; s++)
			;

		if (!*s)
			break;
		
		/*
	  	 * Copy characters from string to current word list entry.
		 */
		s++;
		for (i = 0; *s; i++, s++) {
			char	c;

			if (*s == '\'') {
				s++;
				break;
			}
			else if (*s == '\\') {
				c = quote(s[1]);
				s++;
			}
			else
				c = *s;

			orig[w][i] = c;
			if (i < 6) {
				if (isupper(c))
					word[w][i] = tolower(c);
				else
					word[w][i] = c;
			}
		}

		orig[w][i] = 0;
		word[w][i <= 6 ? i : 6] = 0;
		w++;
	}

	/*
	 * Compare each word to every other word.  O(n^2)
	 *
	 * You could do this faster [O(n lg n)] by first sorting and
	 * then checking only adjacent words for similarity.  I
	 * didn't feel like bothering with it.
	 */
	for (i = 0; i < w - 1; i++) {
		for (j = i + 1; j < w; j++) {
			if (!strcmp(word[i], word[j])) {
				printf("line %d: duplicate vocabulary:", line);
				printf(" [%s], [%s] -> [%s]\n",	orig[i], orig[j], word[i]);
				break;
			}
		}
	}

	line += lineadd;
}

main()
{
	yylex();
}