/*
    Gn: A Server for the Internet Gopher Protocol(*).
    File: gn/pselect.c
    Version 2.14
    
    Copyright (C) 1993  <by John Franks>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 1, or (at your option)
    any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

    (*) Gopher is a registered trademark of the Univ. of Minn.
*/

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "gn.h"


extern long	atol();
extern void	www_unescape();

static	void	entergtype(),
		enterextra(),
		enterfilepath(),
		entercachepath(),
		enteraccdir(),
		enterrange(),
 		fixhitems(),
		security();

static	char	explicit_cache[PATHLEN];

static int	hasparens;


/*
 * parse_selector( ip, sel, full) fills in entries of the Item struct pointed
 * to by ip.  It destroys the line pointed to by sel.  If full == TRUE it
 * does a full parse needed for selectors received from client.  If full ==
 * FALSE it does a partial providing what is needed to parse selectors which
 * are in our cache files.  Here's the struct:
 *
 * typedef struct Item {
 *	char	selector[PATHLEN],	* The original selector
 *		name[PATHLEN],		* entered in chkcache()
 *		gtype[PATHLEN],		
 *					 * Typically gopher type 0,1, etc.
 *					 * but really everything up to first
 *                                       * '/' in the selector
 *
 * 		filepath[PATHLEN],	* Complete pathname of file  *
 * 		relpath[PATHLEN],	* Relatvie pathname of file  *
 * 		cachepath[PATHLEN],	* Complete pathname of cache which
 *					* which contains this item 
 * 		accdir[PATHLEN],	* path of access file directory *
 *		extra[PATHLEN],		* Stuff after the last tab *
 *		args[PATHLEN],		* Args for exec *
 *		content_type[MIDLEN],	* MIME content type *
 *		mod_date[SMALLLEN],	* File modification date *
 *		length[SMALLLEN],	* File length *
 *		encoding[MIDLEN],	* MIME content-transfer-encoding *
 *		suffix[SMALLLEN];	* File name suffix *
 *
 *	long	range_start,		* Start and end of text file range *
 * 		range_end;
 *
 *	int	compressed,		* True if file is compressed *
 * } Item;
 *
 *
 *   A selector looks like
 *   0/dir1/dir2/file(explicit_cache) or
 *   1/dir/dir/the_dir or 1/dir/dir/the_dir/cachfile(explicit_cache) or
 *   exec:args:/dir1/dir2/script(explicit_cache)  or 
 *   7/dir1/dir2/script <tab> searchterm
 * The (explicit_cache) is optional in all cases.  If present it means 
 * the cachefile containing 0/dir1/dir2/file  has name given by the 
 * contents of explicit_cache appended.  Note that for menus (directories) 
 * the syntax of the selector is different depending on whether 
 * (explicit_cache) is present.  If absent the selector includes
 * the pathname to the directory, e.g. /dir1/dir2/the_dir. But if 
 * (explicit_cache) is present, the pathname is to the cachefile in
 * the directory, e.g. /dir1/dir2/the_dir/cachefile.
 *
 *      BEWARE: THE ARRAY POINTED TO BY 'select' IS DESTROYED!!
 */


void
parse_selector( ip, select, full)
Item	*ip;
char	*select;
int	full;
{
	register char	*cp;
	char		tmpbuf[PATHLEN];

	/* Change tabs to spaces in ip->selector - no tabs in logfile */
	if ( (cp = strchr( ip->selector, '\t')) != NULL)
		while ( *cp) {
			*cp = ( (*cp == '\t') ? ' ' : *cp );
	  		cp++;
		}
	if ( ( !*select) ||
			(*select == '\t') ||
			streq( select, "1/") ||
			streq( select, "7g/") ||
			streq( select, "/") ) {
		/*
		 * It's the root directory.  Root is a special case 
		 * Handle it here.
		 */
		if ( *select == '7')
			strcpy( ip->gtype, "7g");
		else
			strcpy( ip->gtype, (SEARCHABLE_ROOT ? "1s" : "1"));
		strcpy( ip->relpath, "/");
		sprintf( ip->filepath, "%s/%s", rootdir, cfname);
		sprintf( ip->accdir, "%s", rootdir);
		*(ip->cachepath) = '\0';  /* root not contained IN a cache */
		return;
	}

	cp = strchr( select, '/');
	if ( http && ( (cp == NULL) || (*(cp+1) == '\0'))) {
		/*
		 * Handle http selectors in top level (e.g. /docs or /docs/)
		 * By this time the leading '/' is gone so we want everything
                 * with no '/' or only a final '/'
                 */
		strcpy( tmpbuf, select);
		strcpy( select, "XRETRY/");
		strcat( select, tmpbuf);
	}

	/* If select ends in <tab>stuff, or ?stuff for www, put stuff in
	 *ip->extra and null tab */

	if ( full == FULL)
		enterextra( ip, select);
	if (http && ( full == FULL)) {
		www_unescape( select, '+');
		/* URL decode selector (we encoded it before sending) */
		if ( (cp = strchr( select, '\n')) != NULL)
			*cp = '\0';
	}
	/*
	 * Enter gtype, any args bracketed by colons, and ranges.
	 * Also enter ip->relpath.
	 */

	entergtype( ip, select);

	 /*
	 * If the selector ends in "(stuff)" put stuff in explicit_cache 
	 * and remove parens and their contents from select.
	 */

	hasparens = FALSE;

#ifdef	EXPLICIT_CACHE
	if ( (cp = strrchr( ip->relpath, '(')) != NULL) {
		hasparens = TRUE;
		*cp++ = '\0';
		strcpy( explicit_cache, cp);
		if ( (cp = strchr( explicit_cache, ')')) == NULL) {
			senderr( "Selector syntax error", ip);
			exit( 2);
		}
		*cp = '\0';
	}
#endif

	/* fix suffixes on 0h and 1h items */
	fixhitems( ip);
	enterfilepath( ip);
	entercachepath( ip);
	if ( full == FULL) {
		enteraccdir( ip);
		ip->compressed = (ip->gtype[1] == 'Z');
	}
}

static void
enterextra( ip, sel)
Item	*ip;
char	*sel;
{
	register char	*cp;
	char		marker;
	/* if selector ends in <tab>stuff, or ?stuff, put "stuff" in extra */

	marker = ( http ? '?' : '\t');
	if ( (cp = strrchr( sel, marker)) != NULL) {
		*cp++ = '\0';
		strcpy( ip->extra, cp);
	} else
		*(ip->extra) = '\0';
}


/*
 * entergtype( ip) fills in the struct field ip->gtype and the fields
 * ip->range_start and ip->range_end.  If the selector starts with
 *  a type followed by : bracketed items these items are put in ip->args.
 *  A "1m" range gtype
 *  like "Rr1-r2-1m" has the values r1, r2 put in range_start and range_end
 *  respectively and gtype is left alone.  Other range gtypes have the
 *  Rr1-r2- stripped.  This is because for "1m" we want to check the range
 *  in the .cache file, but for others we don't.
 */

static void
entergtype( ip, sel)
Item	*ip;
char	*sel;
{
	register char	*cp,
			*cp2,
			*argp;

	int		escaped = FALSE,
			done = FALSE;

	/* gtype is everything up to the first ':' or '/' in the selector */
	argp = ip->gtype;
	strcpy( argp, sel);
	while ( *argp && (*argp != '/') && ( *argp != ':'))
		argp++;


	/* argp now points to ':' or '/', whichever comes first */

	switch (*argp) {
	case ':':
		*argp = '\0';
		cp = argp + 1;
		cp2 = ip->args;

		while ( !done ) {
			switch ( *cp) {
			case '\\':
				escaped = TRUE;
				cp++;
				break;
			case ':':
				if ( escaped) {
					*cp2++ = *cp++;
					escaped = FALSE;
				}
				else {
					*cp2 = '\0';
					strcpy( ip->relpath, ++cp);
					done = TRUE;
				}
				break;
			case '\0':
				senderr( "Syntax error [arg] in selector", ip);
				exit( 2);
			default:
				*cp2++ = *cp++;
				escaped = FALSE;
				break;
			}
		}
		break;

	case '/':
		strcpy(ip->relpath, argp);
		*argp = '\0';
		*(ip->args) = '\0';
		break;
	default:
		senderr( "Syntax error [no '/'] in selector [pselect]", ip);
		exit( 2);
	}

	if ( *(ip->relpath) != '/' ) {
		senderr( "Syntax error [no '/' in relpath] in selector", ip);
		exit( 2);
	}


	/* Get args for CGI if any.  They occur at end after ".cgi" */
	if ( streq( ip->gtype, "CGI")) {
		cp = ip->relpath;
		while ( *cp ) {
			cp++;
			if ( strncmp( cp, ".cgi", 4) == 0 ) {
				if ( *(cp2 = cp + 4) == '/') {
					strcpy( ip->args, cp2);
					*cp2 = '\0';
					break;
				}
			}
		}
	}

	ip->range_start = ip->range_end = 0L;
	if ( *ip->gtype == 'R')
		enterrange( ip);
}


static void   /* Thanks to Jurgen Botz for this! */
fixhitems( ip)
Item    *ip;
{
	long		len;

	/* For 0h or 1h items make sure relpath has .html suffix under http */
	if ( http && streq( ip->gtype, "0h")) {
		len = strlen(ip->relpath);
		if ( !streq( ".html", ip->relpath + len - 5)) {
			if ( streq( ".txt", ip->relpath + len - 4))
				strcpy( ip->relpath + len - 4, ".html");
			else
				strcat( ip->relpath, ".html");
		}
	}

	if ( http && streq( ip->gtype, "1h")) {
		len = strlen(ip->relpath);
		if ( !streq( ".html", ip->relpath + len - 5)) {
		        if ( streq( ".cache", ip->relpath + len - 6))
			        strcpy( ip->relpath + len - 6, ".html");
			else
			        strcat( ip->relpath, ".html");
 		}
 	}
}
  
static void
enterrange( ip)
Item	*ip;
{
	register char	*cp,
			*cp2;
	long		atol();

	/* if gtype = Rr1-r2-1m/path it is a text file range from
	 * byte r1 to r2.  Put r1 in range_start and r2 in range_end
	 */

	if ( (cp = strchr( ip->gtype, '-')) == NULL) {
		senderr( "Malformed range type", ip);
		exit( 2);
	}
	*cp = '\0';
	ip->range_start = atol( ip->gtype + 1);
	*cp++ = '-';
	if ( (cp2 = strchr( cp, '-')) == NULL) {
		senderr( "Malformed range type", ip);
		exit( 2);
	}
	*cp2 = '\0';
	ip->range_end = atol( cp);
	*cp2++ = '-';
	
	if ( !streq( cp2, "1m"))
		strcpy( ip->gtype, cp2);
}


static void
enterfilepath( ip)
Item	*ip;
{
	register char	*cp;

	/*
	 * In ip->filepath put complete path, i.e. rootdir +
	 * everything in selector from first '/' up to tab or \n or '('
	 */

	/* Remove a '/' at end of relpath, if it's there */
	cp = ip->relpath;
	while (*cp)
		cp++;
	if ( *--cp == '/' )  
		*cp = '\0';

	security( ip->relpath);
	cp = ip->filepath;
	strcpy( cp, rootdir);
	strcat( cp, ip->relpath);

	/*
	 * For a directory (type 1) we want filepath to be the name of the
	 * cache file containing the directory.  If hasparens == TRUE
	 * this should already be the case. 
	 * Otherwise we tack on cfname.  We also tack it on for
	 * for grep search (type 7g).
	 */

	if ( (*(ip->gtype) == '1') && (hasparens == FALSE)) {
		if ( (*(ip->gtype +1) ==  'h') && http) 
			return; /* Don't do it for type "1h" and http */

		if ( *(ip->gtype +1) ==  'm') {
			strcat( ip->filepath, ".");
			strcat( ip->filepath, cfname);
		}
		else {
			strcat( ip->filepath, "/");
			strcat( ip->filepath, cfname);
		}
		return;
	}

	if ( streq( ip->gtype, "7g") && (hasparens == FALSE)) {
		strcat( ip->filepath, "/");
		strcat( ip->filepath, cfname);
	}
	return;
}



static void
entercachepath( ip)
Item	*ip;
{
	register char	*cp;

	/*
	 * In ip->cachepath put complete path to the cachefile containing
	 * this item.  If hasparens is true the relative path to the cachefile
	 * is already in explicit_cache, otherwise the cachefile is always 
	 * assumed to be in the directory containing this item 
	 * (whether this is a file or directory) and assumed to have the
	 * name cfname.
	 *
	 * If it is a "1m" range type then gtype starts with 'R' and 
	 * and ends with "1m".  In this case the cache file is named
	 * path/basename.cache  else named path/.cache or has been given
	 * explicitly.   Notice that other range types have a selector
	 * like "R123-456-0/path" but the "R123-456" has been removed
	 * from the gtype in enterrange() because we don't want them
	 * checked in .cache file.
	 */
	
	if ( hasparens) {
		security( explicit_cache);
		strcpy( ip->cachepath, rootdir);
		strcat( ip->cachepath, explicit_cache);
		return;
	}
	if ( *(ip->gtype) == 'R')
		sprintf( ip->cachepath, "%s.%s", ip->filepath, cfname);
	else {
		strcpy( ip->cachepath, rootdir);
		strcat( ip->cachepath, ip->relpath);

		cp = strrchr( ip->cachepath, '/');
		strcpy( ++cp, cfname);
	}
}

static void
enteraccdir( ip)
Item	*ip;
{
	register char	*cp;

	/*
	 * In ip->accdir put complete path of directory containing the
	 * access file for this item.
	 */

	switch ( accesstype) {
		case FREE:
			*(ip->accdir) = '\0';
			return;

		case ROOTCHK:
			sprintf( ip->accdir, "%s", rootdir);
			return;

		case DIRCHK:
			strcpy( ip->accdir, ip->filepath);
			cp = strrchr(ip->accdir, '/');
			*cp = '\0';
	}
}


static void
security( path)
char	*path;
{
	register char *cp;

	/* Security check */
	cp = path;
	while ( *cp ) {
		switch( *cp) {
		case '\\':
		case '\'':
		case '"':
			writelog( "", "SECURITY", "Found bad char in path");
			exit( 2);
		case '.':
			if ( (cp[1] == '.') && (cp[2] == '/')) {
				writelog( "", "SECURITY", "Found ../ in path");
				exit( 2);
			}
		default:
			cp++;
		}
	}
}
