/*****************************************************************************
 * File:	jugheadVctl.c
 *
 * Author:	Rhett "Jonzy" Jones
 *		jonzy@cc.utah.edu
 *
 * Date:	September 20, 1994
 *
 * Modified:	September 22, 1994, by Rhett "Jonzy" Jones.
 *		Finished up fixing the problem of determining end-of-file
 *		with the veronica.ctl file.
 *
 *		September 23, 1994, by Rhett "Jonzy" Jones.
 *		Added PrintTheList() for debugging assistance. 
 *
 *		September 24, 1994, by Rhett "Jonzy" Jones.
 *		Added code to prevent waiting for ever for a server reply,
 *		which includes the TooMuchTime4Read() routine.
 *
 *		September 27, 1994, by Rhett "Jonzy" Jones.
 *		Removed all use of rm.
 *
 *		October 9, 1994, by Rhett "Jonzy" Jones.
 *		Replaced the call to unlink() with Unlink() to ensure
 *		core won't dump if the file to unlink does not exist.
 *
 * Description:	Contains routines needed to handle the "veronica.ctl" file,
 *		which is the control file for veronica, by handling the
 *		"Disallow" gopher paths and creating a file to pass to
 *		veronica which does not contain these "Disallow" paths.
 *
 * Routines:	static void	TooMuchTime4Read();
 *		static int	ReadCtlFile(char *hostStr,char *portStr);
 *		static void	BuildDisallowList(TreeType *node);
 *		static int	StrInStr(char *s1,char *s2);
 *		static short	DisallowItem(char *sStr,char *hStr,char *pStr,
 *						List *list);
 *		static short	Item4Veronica(char *sStr,char *hStr,char *pStr);
 *		       void	PrintTheList(void);
 *		       void	CreateVeronicaFile(TreeType *root);
 *
 * Bugs:	No known bugs.
 *
 * Copyright:	Copyright 1993, 1994, University of Utah Computer Center.
 *		JUGHEAD (TM) is a trademark of Archie Comic Publications, Inc.
 *		and is used pursuant to license.  This source may be used and
 *		freely distributed as long as this copyright notice remains
 *		intact, and is in no way used for any monetary gain, by any
 *		institution, business, person, or persons.
 *
 ****************************************************************************/

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <signal.h>
#include <setjmp.h>
#include <errno.h>

#include "jughead.h"
#include "sockets.h"
#include "tree.h"

#define BUFFERLIMIT	2048

#define READTIMEOUT	(5 * 60)
static jmp_buf		env;

#if(0)		/* which one to use?  Check with Steve. */
static	char	*ctlName[] = {	"0/etc/veronica.ctl",
				"0/veronica.ctl",
				"0/robots.txt",
				0 };
#else
static	char	*ctlName[] = {	"0/veronica.ctl",
				"0/robots.txt",
				"0/etc/veronica.ctl",
				0 };
#endif

List	*disallowHead = (List *)NIL,	/* Head of the "Disallow" paths list. */
	*disallowTail = (List *)NIL;	/* Tail of the "Disallow" paths list. */

char	*vctlHost = EMPTYSTRING,	/* The veronica control file host. */
	*vctlPort = EMPTYSTRING;	/* The veronica control file port. */

extern short	Host2Search();		/* Defined in "jughead.c". */
extern int	ReadConfFile();		/* Defined in "jugheadConf.c". */
extern char	*GetString();		/* Defined in "sockets.c". */

/* These variables are defined in "jughead.c". */
extern	FILE	*wtPtr,
		*rdPtr,
		*rptPtr;

/*****************************************************************************
 * TooMuchTime4Read gets called only when the time period has elapsed when
 * waiting to read from our input..
 ****************************************************************************/
static void TooMuchTime4Read()
{
	if (debug)
		(void)fprintf(rptPtr,"In TooMuchTime4Read\n");

	longjmp(env,1);

}	/* TooMuchTime4Read */

/*****************************************************************************
 * ReadCtlFile checks if the gopher server "hostStr" running out port
 * "portStr" has "0/etc/veronica.ctl", "0/veronica.ctl", or "0/robots.txt"
 * and if so acquires this veronica control file, adds the host/port to
 * the table of control files we have checked and/or acquired, and then
 * adds the "Disallow" lines to the list of gopher paths to disallow.
 ****************************************************************************/
static int ReadCtlFile(hostStr,portStr)
	char	*hostStr,	/* The host to get the control file from. */
		*portStr;	/* The port the host is listening to. */
{	int	error = 0,	/* Did we get an error? */
		gotIt = 0;	/* Did we get the control file? */
	FILE	*fPtr;		/* Pointer to the control file. */
	char	*rsltLine,	/* The resultant line of data. */
		**controlFile;	/* The control file to retrieve. */

	for (controlFile = ctlName; !error && *controlFile && !gotIt; controlFile++)
		if (!(error = ContactHost(hostStr,Str2Int(portStr))))
			if (fPtr = fopen(tmpfilename,"w"))
				{
				(void)SendString(*controlFile);
				(void)SendString("\r\n");

				/* Set things up so we don't wait for ever waiting to read. */
				(void)signal(SIGALRM,TooMuchTime4Read);
				(void)alarm(READTIMEOUT);
				if (setjmp(env))
					{
					if (debug)
						(void)fprintf(rptPtr,"Too much time waiting to read\n");
					CloseReadNwriter();
					(void)fclose(fPtr);
					return(0);
					}

				while (rsltLine = GetString(rdPtr))
					{
					(void)fprintf(fPtr,"%s",rsltLine);
					if (strstr(rsltLine,"User-agent") && strstr(rsltLine,"veronica"))
						gotIt = 1;
					}

				/* We got our request to deactivate the alarm. */
				(void)alarm(0);
				(void)signal(SIGALRM,SIG_IGN);

				CloseReadNwriter();
				(void)fclose(fPtr);
				if (gotIt)
					return(1);
				}
			else
				error = fprintf(rptPtr,"error: ReadCtlFile cannot create [%s]\n",tmpfilename);
		else
			PostContactHostError(error,hostStr,portStr);
	return(0);

}	/* ReadCtlFile */

/*****************************************************************************
 * BuildDisallowList builds the 'disallow' list with gopher paths to
 * not include in the data base for veronica.
 ****************************************************************************/
static void BuildDisallowList(node)
	TreeType	*node;		/* The node to have printed. */
{	char		*tab,		/* Position of the tab. */
			*hStr,		/* The host string. */
			*pStr;		/* The port string. */

	if (!node)
		return;
	else
		{
		BuildDisallowList(node->left);

		/* Break the string up into the host and port parts. */
		hStr = node->word;
		tab = strchr(hStr,'\t');
		*tab = '\0';
		pStr = (char *)(tab + 1);

		if (debug)
			(void)fprintf(rptPtr,"Processing [%s]\n",hStr);

		if (Host2Search(hStr))
			{
			if (ReadCtlFile(hStr,pStr))
				{
				if (debug)
					(void)fprintf(rptPtr," SUCCESS with ctl");
				vctlHost = hStr;
				vctlPort = pStr;
				(void)ReadConfFile(tmpfilename);
				if (Unlink(tmpfilename))
					(void)fprintf(rptPtr,"error: %d could delete [%s]\n",tmpfilename,errno);
				vctlHost = vctlPort = EMPTYSTRING;
				}
			else if (debug)
				(void)fprintf(rptPtr," FAILURE with ctl");
			if (debug)
				(void)fprintf(rptPtr,"\n");
			}
		else if (debug)
			(void)fprintf(rptPtr," NEVER MIND\n");

		/* Restore the information in case we want to use it again. */
		*tab = '\t';

		BuildDisallowList(node->right);
		}

}	/* BuildDisallowList */

/*****************************************************************************
 * StrInStr returns true if 's1' is the first part of 's2', and false
 * otherwise.  This routine is used to cull the gopher pathway 's2' if
 * it is a pathway under the pathway 's1'.
 ****************************************************************************/
static int StrInStr(s1,s2)
	char	*s1,	/* The first string. */
		*s2;	/* The second string. */
{

	for ( ;*s1 == *s2; s1++, s2++)
		if (!*s1)
			return(1);

	if (!isalnum(*s2) && !isspace(*s2))
		return(1);

	return(0);

}	/* StrInStr */

/*****************************************************************************
 * DisallowItem returns true if the gopher item specified by 'sStr', 'hStr',
 * and 'pStr' is a "Disallow:" item as specified in the veronica.ctl file.
 ****************************************************************************/
static short DisallowItem(sStr,hStr,pStr,list)
	char	*sStr,		/* The selector string. */
		*hStr,		/* The host string. */
		*pStr;		/* The port string. */
	List	*list;		/* Head of the list. */
{

	if (!*sStr)
		return(0);

	while (list)		/* See if we have been here before. */
		{
		if (StrInStr(list->info.sStr,sStr))
			if (!StrCmp(list->info.hStr,hStr))
				if (!StrCmp(list->info.pStr,pStr))
					return(1);
		list = list->next;
		}
	return(0);

}	/* DisallowItem */

/*****************************************************************************
 * Item4Veronica returns true if the gopher item 'sStr', 'hStr', and 'pStr'
 * is to be included in the data base for veronica.
 * An item is to be included if:
 *  1) It is served from one of the hosts jughead was told to search.
 *  2) It is not in the "Disallow:" list of paths to avoid.
 ****************************************************************************/
static short Item4Veronica(sStr,hStr,pStr)
	char		*sStr,
			*hStr,
			*pStr;
{
	return(Host2Search(hStr) && !DisallowItem(sStr,hStr,pStr,disallowHead));
	
}	/* Item4Veronica */

/*****************************************************************************
 * PrintTheList simply prints the list 'l' and is used solely for
 * debugging.
 ****************************************************************************/
void PrintTheList()
{	List	*l;		/* The list to print. */

	if (debug)
		{
		(void)fprintf(rptPtr,"Printing the disallow items ...\n");
		for (l = disallowHead ; l; l = l->next)
			(void)fprintf(rptPtr,"[%s] [%s] [%s]\n",
				l->info.sStr,l->info.hStr,l->info.pStr);
		}

}	/* PrintTheList */

/*****************************************************************************
 * CreateVeronicaFile builds the data file which will be passed off
 * to veronica.  This data file will not contain any gopher paths as
 * specified in the appropriate "veronica.ctl" file, nor any items from
 * a gopher server not specified as a host to search.
 ****************************************************************************/
void CreateVeronicaFile(root)
	TreeType	*root;			/* Servers we need to get the veronica.ctl file from. */
{	int		dataLines = 0,		/* Number of lines in the data file. */
			veronicaLines = 0;	/* Number of lines in the veronica file. */
	FILE		*inFp,			/* Pointer to the data file. */
			*outFp;			/* Pointer to the veronica file. */
	char		line[BUFFERLIMIT],	/* Line from the data file. */
			*dStr,			/* Display string from data file. */
			*sStr,			/* Selector string from data file. */
			*hStr,			/* Host string from data file. */
			*pStr,			/* Port string from data file. */
			*gPlus,			/* Is this a gopher plus item? */
			*remainder;		/* Any gopher plus specs. */

	if (debug)
		{
		int	i;	/* A loop counter. */
		(void)fprintf(rptPtr,"In CreateVeronicaFile()\n");
		PrintTree(root,2);
		(void)fprintf(rptPtr,"Printing searchHosts ...\n");
		for (i = 0; i < numSearchHosts; i++)
			(void)fprintf(rptPtr,"searchHosts[%2i] = [%s]\n",i,searchHosts[i]);
		}

	BuildDisallowList(root);

	if (debug)
		PrintTheList();

	if (!fileName || !*fileName || !veronica || !*veronica)
		{
		(void)fprintf(rptPtr,"Major problems: null file names.  Aborting\n");
		exit(1);
		}

	if (inFp = fopen(fileName,"r"))
		{
		if (outFp = fopen(veronica,"w"))
			{
			while (ReadLine(inFp,line,BUFFERLIMIT) != EOF)
				{
				dataLines++;

				/* Parse the line. */
				dStr = MyStrTok(line,'\t');
				sStr = MyStrTok((char *)NULL,'\t');
				hStr = MyStrTok((char *)NULL,'\t');
				gPlus = strchr(remainder = MyStrTok((char *)NULL,'\0'),'\t');
				pStr = OnlyDigits(remainder);
				remainder = pStr + strlen(pStr) + 1;

				if (Item4Veronica(sStr,hStr,pStr))
					{
					if (gPlus)
						(void)fprintf(outFp,"%s\t%s\t%s\t%s\t%s\n",dStr,sStr,hStr,pStr,remainder);
					else
						(void)fprintf(outFp,"%s\t%s\t%s\t%s\r\n",dStr,sStr,hStr,pStr);
					veronicaLines++;
					}
				}
			(void)fclose(outFp);
			if (debug)
				(void)fprintf(rptPtr,"%s contains %d items, %s contains %d items, with %d items culled\n",
						fileName,dataLines,veronica,veronicaLines,dataLines - veronicaLines);
			}
		else
			(void)fprintf(rptPtr,"error: could not open [%s] for writing.\n",veronica);
		(void)fclose(inFp);
		}
	else
		(void)fprintf(rptPtr,"error: could not open [%s] for reading.\n",fileName);

}	/* CreateVeronicaFile */
