/*	This file is part of the software similarity tester SIM.
	Written by Dick Grune, Vrije Universiteit, Amsterdam.
*/

#include	"buff.h"
#include	"text.h"
#include	"top.p"
#include	"top.h"

/* from the Language Department: */
extern int MayBeStartOfRun();
extern unsigned int CheckRun();

extern char options[];
extern int ntexts;
extern struct text *text;
extern int min_run_size;
extern unsigned int *hash_table;
extern add_run();

static struct text *txt_at();
static unsigned int lcs();

compare()
{
	int n;
	
	InitTop();
	for (n = 0; n < ntexts; n++)	{
		struct text *txt0 = &text[n];
		unsigned int i0 = txt0->tx_start;
		
		if(txt0->tx_limit < min_run_size)
			continue;
		while (i0 < txt0->tx_limit - min_run_size + 1)	{
			i0 += lcs(txt0, i0);
		}
	}
}

static unsigned int
lcs(txt0, i0)
	struct text *txt0;
	unsigned int i0;
{
	/*	find the longest common substring in:
			txt0, starting precisely at i0 and
			the rest of the text
	*/
	struct text *txt1 = txt0;
	unsigned int i1 = i0;
	struct text *txt_best;
	unsigned int i_best;
	unsigned int size_best = 0;
	
	if (!MayBeStartOfRun(buff[i0]))	{
		return 1;
	}
	
	while(
		i1 = hash_table ? hash_table[i1] : i1 + 1,
		txt1 = txt_at(txt1, i1)
	)	{
		
		if (	/* we don't want to compare a file to itself */
			options['s'] && i1 < txt0->tx_limit
		)	{
			/* skip this possibility */
		}
		else
		if (	/* we are looking at the middle of a run */
			i0 != txt0->tx_start && i1 != txt1->tx_start &&
			buff[i0-1] == buff[i1-1]
		)	{
			/* skip this possibility */
		}
		else	{
			/* see how far we can get */
			unsigned int j0 = i0, j1 = i1;
			unsigned int size = 0;
			unsigned int limit0 = txt0->tx_limit;
			unsigned int limit1 = txt1->tx_limit;
			
			while (	size < j1 - j0 &&
				j0 < limit0 && j1 < limit1 &&
				buff[j0] == buff[j1]
			)	{
				j0++, j1++, size++;
			}
			
			if (size >= min_run_size)	{
				/*	offer the run to the
					Language Department
				*/
				size = CheckRun(&buff[i0], size);
			}
			
			if (	/* we still have something better */
				size >= min_run_size && size > size_best
			)	{
				/* record it */
				txt_best = txt1;
				i_best = i1;
				size_best = size;
			}
		}
	}
	if (size_best)	{
		add_run(txt0, i0, txt_best, i_best, size_best);
		return size_best;
	}
	else
		return 1;
}

static struct text *
txt_at(txt, i)
	struct text *txt;
	unsigned int i;
{
	if (i == 0 || i >= text_length())
		return 0;
	while (i >= txt->tx_limit)
		txt++;
	return txt;
}
