/* rename.c - renaming of files to end with appropriate extensions,
              and to prevent metacharacters in filenames */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "web.h"

struct rename_contenttype {
    char  * pszContentType;   /* content type name */
    char  * * ppszExtensions; /* valid extension list - index 0 = preferred */
    int   nExtensions;        /* no of extensions in list */
    struct rename_contenttype * next;
} * typelist;

char * pszMetachars = "?&*%=#";
char cHexquote = '@';
static const char * hexchars = "0123456789ABCDEF";

void addcontenttype(char * typename, char * extlist)
{
    struct rename_contenttype	* p = typelist;
    int i;
    char * tmp;

    /* find the type name if it exists already, otherwise create a new
       one */
    while (1)
    {
	if (!strcmp(typename, p->pszContentType)) /* override prev. def */
	{
	    /* free previously allocated memory */
	    for (i = 0; i < p->nExtensions; i++)
		free(p->ppszExtensions[i]);
	    free(p->ppszExtensions);

	    break;
	}

	if (!p->next) {
	    p->next = malloc(sizeof(*p));
	    p = p->next;
	    p->pszContentType = strdup(typename);
	    p->next = NULL;
	    break;
	}

	p = p->next;
    }

    i = 0;
    tmp = extlist - 1;
    while ((tmp = strchr(tmp+1, ' ')) != NULL) i++; /* count spaces */

    p->nExtensions = i;
    p->ppszExtensions = malloc(i * sizeof(char *));
    for (i = 0; i < p->nExtensions; i++) {
	tmp = strchr(extlist, ' ');
	*tmp = 0;
	p->ppszExtensions[i] = strdup(extlist);
	extlist = tmp + 1;
    }
}
/* Reads a configuration file section for renaming. Accepts the following
 * syntax:
 *
 * meta string                                   - sets metacharacter list
 * type content/type preferred [extra extra ...] - sets extensions for type
 * quote char                                    - sets quote character
 *
 * section ended by line beginning with [, which will be returned in buffer,
 * or by eof.
 */

void rename_readconfig(FILE * fp, const char * filename, int * lineno, 
		       char * buf, int buflen)
{
    while (!feof(fp))
    {
	fgets(buf,buflen,fp);
	if (buf[0] == '[' || feof(fp)) return;
	(*lineno)++;

	while (strlen(buf) && buf[strlen(buf) - 1] < 32)
	    buf[strlen(buf) - 1] = 0;	/* remove end crap */
	if (!strlen(buf)) continue;

	if (strncmp(buf, "meta ", 5)) 
	{
	    pszMetachars = strdup(buf + 5);
	}
	else if (strcmp(buf, "meta"))
	{
	    pszMetachars = ""; /* metacharacter processing disabled */
	}
	else if (strncmp(buf, "type ", 5))
	{
	    char * typename = buf + 5;
	    char * extlist = strchr(typename, ' ');
	    if (! extlist) {
		fprintf (stderr, "%s, line %d: type must have extensions\n",
			 filename, *lineno);
		exit(1);
	    }
	    *extlist = 0;
	    extlist ++;
	    addcontenttype(typename, extlist);
	}
	else if (strncmp(buf, "quote ", 6))
	{
	    if (buf[6] <= 32) {
		fprintf(stderr, "%s, line %d: quote character must be "
			"non-whitespace\n", filename, *lineno);
		exit(1);
	    }
	    cHexquote = buf[6];
	}
	else {
	    fprintf(stderr, "%s, line %d: I can't understand this\n",
		    filename, *lineno);
	    exit(1);
	}
    }
}

void rename_init()
{
    typelist = malloc(sizeof(*typelist));
    typelist->pszContentType = "text/html";
    typelist->ppszExtensions = malloc(sizeof (char *) * 2);
    typelist->ppszExtensions[0] = strdup("html");
    typelist->ppszExtensions[1] = strdup("htm");
    typelist->nExtensions = 2;
    typelist->next = NULL;
}

/* return a saner name for an object. Given the same input name & directory
 * it must always return the same output for each run of the program. Due to
 * the fact that the content type may not be known at all points where the
 * file is referenced, this means we must build up a table for all files
 * whose extensions are modified, and reference this if contenttype is NULL.
 *
 * Filename
 * returned is in static storage; copy before using again if still required */

/* TODO: determine whether or not we need to discard pszObjectname, and
   fix memory leak if one exists. */


struct renamed_table_ent {
    char * pszHost;
    int port;
    char * pszObjectname;
    char * pszResult;
    struct renamed_table_ent * next;
} * renamed_table;


char * rename_object(const char * pszHost, int port,
		     char * pszObjectname, const char * contenttype)
{
    static char namebuf[256];
    int i, j, p;
    const char * lastdot = NULL;
    struct rename_contenttype * t;

    /* returns a saner name if Options.bRename == 1 */
    if (!options.bRename) return pszObjectname;
    if (strlen(pszObjectname) == 0) return pszObjectname;
    if (pszObjectname[strlen(pszObjectname) - 1] == '/') return pszObjectname;
 
    if (options.bVerbose >= 4)
	printf ("[renaming %s:%d/%s (%s)", pszHost, port, pszObjectname,
		contenttype ? contenttype : "NULL");

    if (! contenttype) /* check in table for previous renaming */
    {
	struct renamed_table_ent * r = renamed_table;
	while (r) {
	    if (!strcmp(r->pszHost, pszHost) && r->port == port &&
		!strcmp(r->pszObjectname, pszObjectname))
	    {
		if (options.bVerbose >= 4)
		    printf (" - cached %s]\n", r->pszResult);
		return r->pszResult;
	    }
	    r = r->next;
	}
	if (options.bVerbose >= 4) {
	    printf (" - not cached");
	    fflush(stdout);
	}
    }

    /* no cached version; calculate it ourselves! */
    i = j = 0;
    while (pszObjectname[i] && j < 230)
    {
	if (strchr(pszMetachars, pszObjectname[i]) || 
	    pszObjectname[i] == cHexquote)
	{
	    namebuf[j++] = cHexquote;
	    namebuf[j++] = hexchars[(pszObjectname[i] >> 4) & 0x0F];
	    namebuf[j++] = hexchars[pszObjectname[i] & 0x0F];
	    if (pszObjectname[i] == '?') /* query - personalise if reqd */
	    {
		if (options.bQueryAddPid) {
		    p = getpid(); /* assume pid = 16 bit integer */
		    namebuf[j++] = hexchars[(p >> 12) & 0x0F];
		    namebuf[j++] = hexchars[(p >> 8) & 0x0F];
		    namebuf[j++] = hexchars[(p >> 4) & 0x0F];
		    namebuf[j++] = hexchars[p  & 0x0F];
		}
	    }
	}
	else {
	    if (pszObjectname[i] == '.') lastdot = pszObjectname + i;
	    namebuf[j++] = pszObjectname[i];
	}
	i++;
    }
    
    namebuf[j] = 0;

    /* check extension - first find content type in list, if it exists */
    /* if we have no content type, then we can't do this, so just return
     * here!
     */
    if (!contenttype) {
	if (options.bVerbose >= 4)
	    printf (" - result %s]\n", namebuf);
	return namebuf;
    }

    t = typelist;
    while (t) {
	if (!strcmp(t->pszContentType, contenttype)) break;
	t = t->next;
    }
    if (t) {
	/* okay, we have the type. now, see if our extension [if we have one]
	 * is in the list of valid extensions. Set t to null to indicate OK.
	 */
	if (lastdot)
	{
	    lastdot++;	/* skip over the dot */
	    for (i = 0; i < t->nExtensions; i++)
		if (!strcmp(t->ppszExtensions[i], lastdot)) {
		    t = NULL;
		    break;
		}
	}

	/* add extension if t is not null */
	if (t) {
	    struct renamed_table_ent * r;

	    strcat(namebuf, ".");
	    strcat(namebuf, t->ppszExtensions[0]);

	    /* as we've changed the extension, which requires content
	     * type knowledge that isn't always available, we must now
	     * store the result in the renamed table.  Adding at the
	     * head of the list is a) efficient to add, and b) likely
	     * to give good search results due to reference locality.
	     */
	    r = malloc (sizeof *r);
	    r->next = renamed_table;
	    r->pszHost = strdup(pszHost);
	    r->port = port;
	    r->pszObjectname = strdup(pszObjectname);
	    r->pszResult = strdup(namebuf);
	    renamed_table = r;
	    if (options.bVerbose >= 4) printf (" - logged");
	}
    }
    if (options.bVerbose >= 4)
	printf (" - result %s]\n", namebuf);
    return namebuf;
}
