/*
    Gn: A Server for the Internet Gopher Protocol(*).
    File: waisgn/waisgn.c
    Version 2.08
    
    Copyright (C) 1994  <by John Franks>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 1, or (at your option)
    any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

    (*) Gopher is a registered trademark of the Univ. of Minn.
*/


#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <math.h>
#include "ir/cutil.h"   
#include "ir/irext.h"
#include "ir/irsearch.h"
#include "ir/docid.h"
#include "ir/irtfiles.h"
#include "waisgn.h"

#define	NUMARGS		(9)

extern void	www_escape(),
		www_replace(),
		www_unreplace();

static void	AskWais(),
		getcacheline(),
		doline(),
		toobad(),
		httpintro();

char	host[MIDLEN],
	dbname[MIDLEN],
	gn_root[MIDLEN],
	cachefile[MIDLEN],
	cfname[SMALLLEN],
	gntype[SMALLLEN],
	words[MAXLEN],
	gntitle[MAXLEN],
	port[SMALLLEN];

static FILE	*dfp = stderr;

#ifndef FREEWAIS_03
/* These aren't used by waisgn, but old WAIS libs want them */
char *log_file_name = (char *) NULL;
FILE* logfile = (FILE *) NULL;
#endif

int	http = FALSE,
	usecache = FALSE,
	usedir = FALSE,
	userange = FALSE,
	is0h = FALSE,
	debug = FALSE;
	

main(argc,argv)
int argc;
char *argv[];
{
	register char	*cp;

	int	i = 1,
		argsneeded = NUMARGS;
	
	if ( LOG_DEBUG) {
		debug = TRUE;
		if ( (dfp = fopen( LOG_DEBUG_FILE, "w")) == (FILE *) NULL ) {
			senderr( "Can't open cache file (waisgn)");
			exit( 2);
		}
		fprintf( dfp, "Starting debugging log output\n");
	}		
	if ( (argc > 1) && (streq( argv[1], "-d"))) {
		debug = TRUE;
		i++;
		argsneeded++;
	}

	if ( argc < argsneeded ) {
		printf( "Usage: waisgn [-d] index root_dir cachefile");
		printf( " host port type title protocol words...\n");
		exit( 0);
	}

	strcpy( dbname, argv[i++]);
	if ( (cp = strrchr( dbname, '.')) != NULL) 
		*cp = '\0';
	strcpy( gn_root, argv[i++]);

	if ( argv[i] && *argv[i] ) {
		strcpy( cfname, argv[i]);
	}
	i++;
	strcpy( host, argv[i++]);
	strcpy( port, argv[i++]);
	strcpy( gntype, argv[i++]);
	if ( argv[i])
		strcpy( gntitle, argv[i]);
	i++;
	if (streq( argv[i++], "http"))
		http = TRUE;

	*words =  '\0';
	while( i < argc) {
		strcat(words, argv[i]);
		if ( i < argc - 1 )
			strcat(words, " ");
		i++;
	}

	if ( debug) {
		fprintf( dfp, "Data base should be complete path\n", dbname);
		fprintf( dfp, "ending with \"index\" (no .inv).\n", dbname);
		fprintf( dfp, "Data base is:\n \t%s\n", dbname);
		fprintf( dfp, "Root directory is %s\n", gn_root);
		fprintf( dfp, "Cfname is %s\n", cfname);
		fprintf( dfp, "Host is %s\n", host);
		fprintf( dfp, "Port is %s\n", port);
		fprintf( dfp, "Type is %s\n", gntype);
		fprintf( dfp, "Title is %s\n", gntitle);
		fprintf( dfp, "Protocol: %s\n", (http ? "http" : "gopher"));
		fprintf( dfp, "Search term is %s\n", words);
	}

	/*
	 * gntype is "7w", "7wr", "7wh" or "7wc" according to whether 
	 * it is plain text, a range, type 0h, or using a .cache for names.  
	 * If "7w" or "7wc"  has a 'd' appended it means that instead
	 * of returning the selector of the file matched, the selector of
	 * the directory containing it should be returned.  It can also
	 * be "7wd" or "7wcd" indicating that the search should return
	 * the directory containing the matching item rather than the
	 * item itself.
	 */

	switch ( gntype[2]) {
	case 'd':
		usedir = TRUE;
		break;
	case 'r':
		userange = TRUE;
		break;
	case 'c':
		usecache = TRUE;
		usedir = (gntype[3] == 'd') ? TRUE : FALSE;
		break;
	case 'h':
		is0h = TRUE;
		break;
	}

	AskWais( words, MAXHITS_RETURNED);
	exit(0);
}
/*
 * This function is based very loosely on a function of the same
 * name in Don Gilbert's Go_Ask_WAIS utility.  I am very grateful
 * for the help in dealing with WAIS that his routine has provided
 * provided and for his kind permission to use it here.  Any errors are
 * mine and not Don's.  JMF
 */
 
static void
AskWais( SearchWords, maxhits)
  char *SearchWords;
  int   maxhits;
{ 
	register char		*cp;
	char			words_used[MAXLEN];
	database		*db;
	long			i;
	query_parameter_type	parameters;
	boolean			searchResult;
	hit			theHit;
          
	*words_used = '\0';

	if ( debug)
		fprintf( dfp, "Opening data base %s\n", dbname);
	if ( (db = openDatabase(dbname, false, true)) == (database *) NULL) {
		senderr( "Failed to open database\n");
		exit( 2);
	}
 
	parameters.max_hit_retrieved = (long)((maxhits > 0) ? maxhits : 256);
	set_query_parameter(SET_MAX_RETRIEVED_MASK, &parameters);

#ifdef FREEWAIS_03
	searchResult = search_for_words(SearchWords, db, 0, words_used);
#else
	searchResult = search_for_words(SearchWords, db, 0);
#endif

	if (searchResult == true) {
		finished_search_word(db);
		if ( debug) 
			fprintf( dfp, "Dbase search successful\n");

		if (next_best_hit(&theHit, db) != 0) {
			if ( debug)
				fprintf( dfp, "Headline = %s\n",
					theHit.headline);
			if ( http)
				toobad();
			else
				printf( ".\r\n");
			finished_best_hit(db);
			closeDatabase(db);
			return;
		}

		if ( http)
			httpintro();
		i = 1;
		do {
			if (theHit.weight > 0)
        			doline( &theHit);

			i++;
		} while ( i < parameters.max_hit_retrieved  && 
			(next_best_hit(&theHit, db) == 0));
		if ( http) {
			printf( "</ul>\n<P>You may repeat your search with\n");
			printf( "a new search term. <P> <ISINDEX></BODY>\n");
		}
		else
			printf( ".\r\n");
	}
	else {
		senderr( "The database search failed.");
		exit( 2);
	}
	finished_best_hit(db);
	closeDatabase(db);
	return;
}



static void
doline( match)
hit	*match;
{
	char	*cp = gn_root,
		*relpath,
		type1,
		typebuf[MIDLEN],
		pathbuf[MAXLEN],
		relpathbuf[MAXLEN],
		name[MAXLEN];

	strcpy( pathbuf, match->filename);
	relpath = pathbuf;

	while ( *cp && *relpath && ( *cp == *relpath)) {
		cp++;
		relpath++;
	}

	if ( usedir && (( cp = strrchr( pathbuf, '/')) != NULL))
		*cp = '\0';
		/* Truncate relpath if dir should be returned */

	strcpy( relpathbuf, relpath);

	if ( http)
		www_escape( relpathbuf);

	if ( !usecache ) {
		strcpy( name, match->headline);
		cp = name;
		while ( *cp) {
			if ( isspace(*cp))
				*cp = ' ';
			cp++;
		}

		if ( userange) {
			type1 = '0';
			sprintf( typebuf, "R%ld-%ld-%range",
				match->start_character, match->end_character);
		}

		else if ( *(match->type) == 'D') {  /* DVI type */
			type1 = '9';
			strcpy( typebuf, "9");
		}
		else if ( is0h) {  /* 0h type */
			type1 = '0';
			strcpy( typebuf, "0h");
		}
		else if ( usedir) {  /* Return directory containing file */
			type1 = '1';
			strcpy( typebuf, "1");
		}
		else {
			type1 = '0';
			strcpy( typebuf, "0");
			
		}
		if ( http) {
			printf( "<li> <a href=\"http://%s:%s", host, port);
			printf( "/%s%s\">%s</a>\n", typebuf, relpathbuf, name);
		}
		else
			printf( "%c%s\t%s%s\t%s\t%s\r\n",
				type1, name, typebuf, relpathbuf, host, port);
	}
	else {
		strcpy( cachefile, pathbuf);
		if ( ( cp = strrchr( cachefile, '/')) != NULL )
			strcpy( cp+1, cfname);
		
		getcacheline( relpathbuf);
	}
}

static void
getcacheline( relp)
char	*relp;
{
	register char	*cp,
			*cp2;
	char		*namestop,
			*ptr,
			*endanchor,
			type[SMALLLEN],
			buf[MIDLEN];

	int		namestart,
			ishname,
			gopheronly;

	static FILE	*fp = (FILE *) NULL;
	static char	lastcache[MIDLEN];

	if ( !streq( cachefile, lastcache) ) {
		strcpy( lastcache, cachefile);
		if ( fp != (FILE *) NULL )
			fclose( fp);

		if ( debug) {
			fprintf( dfp, "Opening cachefile %s\n", 
				lastcache);
		}

		if ( (fp = fopen( lastcache, "r")) == (FILE *) NULL ) {
			senderr( "Can't open cache file (waisgn)");
			exit( 2);
		}
	}
	else
		fseek( fp, 0L, 0);


	while ( fgets( buf, MAXLEN, fp) != NULL) {

		ishname = gopheronly = FALSE;
		switch (*buf) {
		case 'h':
			if ( strncmp( buf, "hname:", 6) == 0) {
				ishname = TRUE;
				endanchor = "";
				namestart = 7;
				break;
			}
			if ( strncmp( buf, "http:", 5) == 0)
				continue;
		case 'o':
			if ( strncmp( buf, "onlygopher:", 11) == 0) {
				gopheronly = TRUE;
				namestart = 12;
				break;
			}
			if ( strncmp( buf, "owner:", 6) == 0)
				continue;
		case '\t':
		case 'i':
			continue;
		default:
			endanchor = "</A>";
			namestart = 1;
		}

		cp = buf;
		while ( *cp && (*cp != '\t'))
			cp++;
		namestop = cp++;
		ptr = type;
		while ( *cp && (*cp != '/'))
			*ptr++ = *cp++;
		*ptr ='\0';
		cp2 = cp;
		while ( *cp2 && (*cp2 != '\n'))
			cp2++;

		if ( (*cp == '\0') || (*cp2 == '\0')) {
			senderr( "Corrupt cache file (waisgn)");
			exit( 2);
		}

		if ( strncmp( cp, relp, strlen( relp)) == 0 ) {
			if ( http && !gopheronly) {
				*namestop = '\0';
				if ( !ishname )
					www_replace( buf + namestart);
				printf( "<li> <a href=\"http://%s:%s",
						host, port);
				printf( "/%s%s\">%s%s\n", type, relp,
						buf + namestart, endanchor);
			}
			else if ( !http) {
				if ( ishname )
					www_unreplace( buf + namestart);
				*cp2++ = '\r';
				*cp2++ = '\n';
				*cp2++ = '\0';
				printf( buf + namestart - 1);
				return;
			}
		}
	}
}


int
senderr( msg)
char	*msg;
{
	if (http) {
		printf( "<HEAD> <TITLE>%s</TITLE> </HEAD>\n", msg);
		printf( "<BODY><HR><H2>%s</H2>\n", msg );
		printf( "Sorry, an error has occurred in");
		printf( " the WAIS index search.\n<HR></BODY>\n");
	}
	else
		printf(	"3Server error: %s\t\terror.host\t0\r\n.\r\n", msg);
}


static void
httpintro()
{
	printf( "<HEAD> <TITLE>%s</TITLE> </HEAD>\n", gntitle);
	printf( "<BODY><HR><H2>%s</H2>\n", gntitle);
	printf( "The following items were returned as matches\n");
	printf( "for <B>`%s'</B> by the WAIS index search.\n", words);
	printf( "They are ordered with the best matches first.\n");
	printf( "<P>\n<UL>\n", words);
}


static void
toobad()
{
	printf( "<HEAD> <TITLE>%s</TITLE> </HEAD>\n", gntitle);
	printf( "<BODY><HR><H2>%s</H2>\n", gntitle);
	printf( "Sorry, no matches for <B>`%s'</B> were returned\n", words);
	printf( "by the WAIS index search.  You may try again with\n");
	printf( "different search terms.\n <ISINDEX> </BODY>\n");
}

