/*
 * Copyright (c) 1991 by the University of Washington
 * Copyright (c) 1993 by the University of Southern California
 *
 * For copying and distribution information, please see the files
 * <uw-copyright.h> and <usc-copyr.h>.
 */

#include <uw-copyright.h>
#include <usc-copyr.h>

#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/dir.h>
#include <stdio.h>
#include <sgtty.h>
#include <strings.h>

/* archie definitions */

#include "archie_src/include/typedef.h"
#include "archie_src/include/ar_search.h"
#include "archie_src/include/master.h"
#include "archie_src/include/files.h"
#include "archie_src/include/host_db.h"
#include "archie_src/include/db_ops.h"
#include "archie_src/include/ar_attrib.h"
#include "archie_src/include/error.h"

static file_info_t	*strings_idx;
static file_info_t	*strings;
static file_info_t	*strings_hash;
static file_info_t	*domaindb;
static file_info_t      *hostdb;
static file_info_t      *hostbyaddr;
static file_info_t	*hostaux_db;

/* End archie definitions */

#include <pserver.h>
#include <pfs.h>
#include <psrv.h>
#include <plog.h>
#include <pprot.h>
#include <perrno.h>
#include <pmachine.h>

int archie_supported_version = 3;

extern char	hostname[];
extern char	hostwport[];
char		archie_prefix[] = "ARCHIE";
static int num_slashes(char *s);
static int tkllength(TOKEN tkl);
/*
 * dsdb - Make a database query as if it were a directory lookup
 *
 */
arch_dsdb(RREQ	req,           /* Request pointer (unused)                  */
	  char	*name,         /* Name of the directory                     */
	  char	**componentsp, /* Next component of name (may be modified)  */
	  TOKEN	*rcompp,       /* Additional components (may be modified)   */
	  VDIR	dir,           /* Directory to be filled in                 */
	  int	options,       /* Options to list command                   */
	  const char *rattrib, /* Requested attributes ( plus-separated)    */
	  FILTER filters)      /* Filters to be applied (if applied, their
                                  'pre_or_post' members are set to FIL_ALREADY
                                  */  
{
    /* Note that componentspp and rcompp are pointers to */
    /* pointers.  This is necessary because              */
    /* this routine must be able to update these values  */
    /* if more than one component of the name is         */
    /* resolved.                                         */
    char 	*components = NULL;
    char	*rcomp = NULL;
    int		num_unresolvedcomps = 0;
    VLINK	cur_link = NULL;
    char	newdirname[MAXPATHLEN];
    static int	dbopen = 0;
    char	fullquery[MAXPATHLEN];
    char	*dbpart;
    char	dbquery[MAXPATHLEN];
    char	dbargs[MAXPATHLEN];
    char	dbarg1[MAXPATHLEN];
    char	dbarg2[MAXPATHLEN];
    char	dbarg3[MAXPATHLEN];
    char	dirlinkname[MAXPATHLEN];
    char	sep;
    int		tmp;
    VLINK	dirlink = NULL;
    TOKEN       tkl_tmp;

    /* Make sure NAME, COMPONENTSP, and RCOMPP arguments are correct. */

    /* Name components with slashes in them are malformed inputs to the
       ARCHIE database. */ 
    if(componentsp && (components = *componentsp)) {
        if(index(components, '/')) 
            return PFAILURE;
        for (tkl_tmp = *rcompp; tkl_tmp; tkl_tmp = tkl_tmp->next)
            if (index(tkl_tmp->token, '/'))
                return PFAILURE;
    } else {
        if (*rcompp) return PFAILURE; /* ridiculous to specify additional comps
                                         and no initial comps.*/
    }

    /* Directory already initialized, but note that this */
    /* is not a real directory                           */
    dir->version = -1;
    dir->inc_native = 3;	   /* Not really a directory */

    /* Note that if we are resolving multiple components */
    /* (rcomp!=NULL) the directory will already be empty */
    /* since had anything been in it dirsrv would have   */
    /* already cleared it and moved on to the next comp  */

    /* Do only once */
    if(!dbopen++) {
	if(set_master_db_dir("") == (char *) NULL){
	    dbopen = 0;
	    plog(L_DB_ERROR,NOREQ,"Can't set archie master db directory",0);
	    return(PFAILURE);
	}
	if(set_files_db_dir("") == (char *) NULL){
	    dbopen = 0;
	    plog(L_DB_ERROR,NOREQ,"Can't set archie files db directory",0);
	    return(PFAILURE);
	}
	if(set_host_db_dir("") == (char *) NULL){
	    dbopen = 0;
	    plog(L_DB_ERROR,NOREQ,"Can't set archie host db directory",0);
	    return(PFAILURE);
	}
        open_alog(PSRV_LOGFILE, 0, "dirsrv");
        error(A_INFO, "dirsrv", "Prospero server opened Archie database %s", 
              cvt_to_usertime(time((time_t *) NULL), 1));
	strings_idx = create_finfo();
	strings = create_finfo();
	strings_hash = create_finfo();
	domaindb = create_finfo();
	hostdb = create_finfo();
	hostbyaddr = create_finfo();
	hostaux_db = create_finfo();
	
	if(open_files_db(strings_idx,strings,strings_hash,O_RDONLY)!=A_OK){
	    dbopen = 0;
	    plog(L_DB_ERROR,NOREQ,"Can't open archie files database",0);
	    return(PFAILURE);
	}

        /* The domain database doesn't change much. */
	if(open_host_dbs(hostbyaddr,hostdb,domaindb,
			 hostaux_db,O_RDONLY) != A_OK) {
	    dbopen = 0;
	    plog(L_DB_ERROR,NOREQ,"Can't open archie host database",0);
	    return(PFAILURE);
	}
    } else {
        close_host_dbs(hostbyaddr, hostdb, (file_info_t *) NULL, hostaux_db);

        if(open_host_dbs(hostbyaddr,hostdb,(file_info_t *) NULL,
                         hostaux_db,O_RDONLY) != A_OK) {
            plog(L_DB_ERROR,NOREQ,"Can't open archie host database",0);
            dbopen = 0;
            return(PFAILURE);
        }
    }

    /* For now, if only verifying, indicate success */
    /* We don't want to do a DB search.  Eventually */
    /* we might actually check that the directory   */
    /* is valid.                                    */
    if(options&DSDB_VERIFY) return(PSUCCESS);

    /* Construct the full query from the pieces passed to us */
    tmp = -1 + qsprintf(fullquery,sizeof fullquery, "%s%s%s",name,
                        ((components && *components) ? "/" : ""),
                        ((components && *components) ? components : ""));
    for (tkl_tmp = *rcompp; tkl_tmp; tkl_tmp = tkl_tmp->next)
        tmp += -1 + qsprintf(fullquery + tmp, sizeof fullquery - tmp, 
                             "/%s", (*rcompp)->token);
    if (tmp + 1 > sizeof fullquery) return DSRDIR_NOT_A_DIRECTORY;
 

    /* The format for the queries is            */
    /* ARCHIE/COMMAND(PARAMETERS)/ARGS          */

    /* Strip off the database prefix */
    dbpart = fullquery + strlen(archie_prefix);

    /* And we want to skip the next slash */
    dbpart++;

    /* Find the query (up to the next /), determine if the */
    /* / exists and then read the args                     */
    tmp = sscanf(dbpart,"%[^/]%c%s",dbquery,&sep,dbargs);
	
    /* If no separator, for now return nothing         */
    /* Eventually, we might return a list of the query */
    /* types supported                                 */
    if(tmp < 2) return(PSUCCESS);

    /* Check query type */
    if(strncmp(dbquery,"MATCH",5)==0) {
	search_req_t search_req; /* search request          */
	char	stype = 's';     /* search type             */
	int	maxhits = 100;   /* max entries to return   */
	int	maxmatch = 100;  /* max strings to match    */
	int	maxhitpm = 100; /* max hits per match       */
	int	offset = 0;      /* entries to skip         */
	int	onlystr = 0;	 /* Just return strings     */
	FILTER  cfil = NULL;	 /* To step through filters */

	SET_LINK_SIZE(search_req.attrib_list);
	SET_LK_LAST_MOD(search_req.attrib_list);
	SET_LK_UNIX_MODES(search_req.attrib_list);
	SET_AR_H_IP_ADDR(search_req.attrib_list);
        SET_AR_H_LAST_MOD(search_req.attrib_list);

	search_req.orig_type = S_E_SUB_NCASE_STR ;
      	search_req.no_matches = 0;

        /* In the MATCH querytype, the directory part of the query (the
           argument named NAME) may have no more than 3 components.  
           There are 3 possible formats:
           1) DATABASE_PREFIX (one component)
           2) (1)/MATCH(...)
           3) (2)/query-term (3 total components)
           */
        if (num_slashes(name) > 2) return DSRDIR_NOT_A_DIRECTORY;
	/* if no strings to match, return nothing */
	if(tmp < 3) return(PSUCCESS);

	/* Get arguments */
	tmp = sscanf(dbquery,"MATCH(%d,%d,%d,%d,%c",&maxhits,
		     &maxmatch,&maxhitpm,&offset,&stype);
	
	if(tmp < 3) {
	    sscanf(dbquery,"MATCH(%d,%d,%c",&maxhits,&offset,&stype);
	    maxmatch = maxhits;
	    maxhitpm = maxhits;
	}
	/* Note: in maxhits, 0 means use default, -1 means use max */

	switch(stype) {
	  case '=':
	    search_req.orig_type = S_EXACT ;
	    break;
	  case 'R':
	    search_req.orig_type = S_FULL_REGEX ;
	    break;
	  case 'r':
	    search_req.orig_type = S_E_FULL_REGEX ;
	    break;
	  case 'X':
	    search_req.orig_type = S_X_REGEX;
	    break;
	  case 'x':
	    search_req.orig_type = S_E_X_REGEX;
	    break;
	  case 'C':
	    search_req.orig_type = S_SUB_CASE_STR ;
	    break;
	  case 'c':
	    search_req.orig_type = S_E_SUB_CASE_STR ;
	    break;
	  case 'K':
	    search_req.orig_type = S_SUB_KASE;
	    break;
	  case 'k':
	    search_req.orig_type = S_E_SUB_KASE;
	    break;
	  case 'S':
	    search_req.orig_type = S_SUB_NCASE_STR ;
	    break;
	  case 'Z':
	    search_req.orig_type = S_ZUB_NCASE;
	    break;
	  case 'z':
	    search_req.orig_type = S_E_ZUB_NCASE;
	    break;
          case 'n':
            search_req.orig_type = S_NOATTRIB_EXACT;
            break;
	  case 's':
	  default:
	    search_req.orig_type = S_E_SUB_NCASE_STR ;
	    break;
	}
	
	*dbarg1 = *dbarg2 = *dbarg3 = '\0';

	tmp = sscanf(dbargs,"%[^/]%c%[^/]%c%s",dbarg1,&sep,dbarg2,
		     &sep,dbarg3); 

	if(tmp < 2) {
	    /* This specifies a directory, but not a link within it  */
	    /* create a pseudo directory and return a pointer        */
            /* In other words, listing a MATCH directory by itself yields
               an empty directory. */
	    if(*dbarg1 && (strcmp(dbarg1,"*")!= 0)) {
		dirlink = vlalloc();
		dirlink->target = stcopyr("DIRECTORY",dirlink->target);
		dirlink->name = stcopyr(dbarg1,dirlink->name);
		dirlink->host = stcopyr(hostwport,dirlink->host);
		sprintf(dirlinkname,"%s/%s/%s",archie_prefix,dbquery,
			dbarg1);
		dirlink->hsoname = stcopyr(dirlinkname,dirlink->hsoname);
		vl_insert(dirlink,dir,VLI_ALLOW_CONF);
	    }
	}
	else {
	    if(tmp > 4) {
		/* There are remaining components */
		num_unresolvedcomps = num_slashes(dbarg3);
	    }

	    search_req.maxhits = maxhits;
	    search_req.maxmatch = maxmatch;
	    search_req.maxhitpm = maxhitpm;
	    strcpy(search_req.search_str,dbarg1);
	    
	    /* Domains to restict search on.
	       Colon separated list of domain names:

	       eg "usa:mcgill.ca:.fi"

	       The actual domain names are resolved internally to
	       archie so you don't need to do anything other
	       than format them (if necessary) and pass them along.
	       I assume that the clients will just send them
	       preformatted */
		 

	    search_req.domains = (struct token *) NULL;  
	    for(cfil = filters; cfil; cfil = cfil->next) {
		if(cfil->name && (strcmp(cfil->name,"AR_DOMAIN") == 0) &&
		   (cfil->type == FIL_DIRECTORY) && 
		   (cfil->execution_location == FIL_SERVER) &&
		   (cfil->pre_or_post == FIL_PRE)) {
		    search_req.domains = cfil->args;
                    cfil->pre_or_post = FIL_ALREADY;
                    /* Can't apply two AR_DOMAIN filters; ARCHIE won't
                       support this. */
                    /* Note that there is special purpose error handling code
                       in server/list.c to handle this case, too.  Look at it.
                       */ 
                    break;
		}
	    }

	    /* Offset: For exact matches it is
	       the number of the link on the chain for that unique
	       filename. For others it is the record number in the
	       index file of the last hit returned (in previous search).
	       A negative value returned in this variable means that all 
	       hits have been found. */

	    search_req.orig_offset = offset;

	    /* Same in format as "domains". The list contains the
	       pathname components that must exist to make a valid hit.
	       For the moment all comparisons are done with a case
	       insensitive substring match and it is performed as a 
	       logical "or". */

	    search_req.comp_restrict = (struct token *) NULL;
	    for(cfil = filters; cfil ; cfil = cfil->next) {
		if(cfil->name && (strcmp(cfil->name,"AR_PATHCOMP") == 0) &&
		   ((cfil->type == FIL_DIRECTORY) ||
		    (cfil->type == FIL_HIERARCHY)) && 
		   (cfil->execution_location == FIL_SERVER) &&
		   (cfil->pre_or_post == FIL_PRE)) {
		    search_req.comp_restrict = cfil->args;
                    cfil->pre_or_post = FIL_ALREADY;
                    /* If there are two AR_PATHCOMP filters, only the first one
                       should be applied.   If two are sent, list() will return
                       an error. */
                    /* Note that there is special purpose error handling code
                       in server/list.c to handle this case, too.  Look at it.
                       */ 
                    break;
		}
	    }

	    /* Any user errors (bad regular expression etc)
	       generated will set this to an appropriate message. Not used
	       at the moment */

	    search_req.error_string =  (char *) NULL;
	      
	    if(parchie_search_files_db(strings, strings_idx, strings_hash, 
				domaindb, hostdb, hostaux_db, hostbyaddr, 
				&search_req, dir) == ERROR) {
		if(search_req.error_string)
		    strcpy(p_err_string,search_req.error_string);
		return(PFAILURE);
	    }
	    if(search_req.error_string) 
		strcpy(p_warn_string,search_req.error_string);

     	    plog(L_DB_INFO,NOREQ,"matches: %d", search_req.no_matches, 0);
	}
    }
    else if (strncmp(dbquery,"HOST",4)==0) {
	attrib_list_t attrib_list;

	/* First component of args is the site name    */
	/* remaining components are the directory name */

	*dbarg1 = *dbarg2 = '\0';
	    
	tmp = sscanf(dbargs,"%[^/]%c%s",dbarg1,&sep,dbarg2);

	/* If first component is null, return an empty directory */
	if(tmp < 1) return(PSUCCESS);

	/* if first component exists, but is last component, */
	/* then it is the name of the subdirectory for the   */
	/* host, create a pseudo directory and return a      */
	/* pointer, If first component is a wildcard, and no */
	/* additional components, then return matching list  */
	/* of sites.                                         */

        if(tmp == 1) {
	    SET_AR_H_LAST_MOD(attrib_list);
	    SET_AR_H_IP_ADDR(attrib_list);
	    tmp = parchie_list_host(dbarg1,NULL,attrib_list,dir,
				    hostdb,hostaux_db, strings);
	    if(tmp == PRARCH_TOO_MANY) return(DIRSRV_TOO_MANY);
	    if(tmp) return(PFAILURE);
	}
	/* More than one component, Look up the requested directory  */
	/* Note that the since the full query is passed to us, it    */
	/* includes the component name, thus the directory name is   */
	/* what you get when you strip off the last component of the */
	/* name                                                      */
	else {
            char *lastsep;
	    SET_LINK_SIZE(attrib_list);
	    SET_LK_LAST_MOD(attrib_list);
	    SET_LK_UNIX_MODES(attrib_list);
	    lastsep = rindex(dbarg2,'/');
		if(lastsep) *lastsep++ = '\0';
		else *dbarg2 = '\0';
	    tmp = parchie_host_dir(dbarg1, attrib_list, dbarg2,
				dir,hostdb, hostaux_db, strings);
	    if(tmp == PRARCH_SITE_NOT_FOUND)
                return(DSRDIR_NOT_A_DIRECTORY);
	    if(tmp) return(PFAILURE);
	}
    }
    else {
	/* Query type not supported */
	return(DSRDIR_NOT_A_DIRECTORY);
    }

    /* We are done, but we need to figure out if we resolved multiple
       components and reset *componentsp and *rcompp appropriately. */ 
    
    if (num_unresolvedcomps) {
        int skip = tkllength(*rcompp) - num_unresolvedcomps;
        if (skip < 0) return DSRDIR_NOT_A_DIRECTORY; /* shouldn't happen. */
        while(skip-- > 0) {
            assert(*rcompp);
            *componentsp = (*rcompp)->token;
            *rcompp = (*rcompp)->next;
        }
    } else {
        while (*rcompp) {
            *componentsp = (*rcompp)->token;
            *rcompp = (*rcompp)->next;
        }
    }
    return PSUCCESS;
}

static int
tkllength(TOKEN tkl)
{
    int retval = 0;
    for (;tkl; tkl = tkl->next)
        ++retval;
    return retval;
}


static
int
num_slashes(char *s)
{
    int retval = 0;
    for (; *s; ++s) {
        if (*s == '/') 
            ++retval;
    }
    return retval;
}


