/***************************************
  $Header: /home/amb/wwwoffle/RCS/document.c 1.6 1999/02/19 19:55:09 amb Exp $

  WWWOFFLE - World Wide Web Offline Explorer - Version 2.4c.
  Document parsing functions.
  ******************/ /******************
  Written by Andrew M. Bishop

  This file Copyright 1998,99 Andrew M. Bishop
  It may be distributed under the GNU Public License, version 2, or
  any higher version.  See section COPYING of the GNU Public license
  for conditions under which this file may be redistributed.
  ***************************************/

#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#include "wwwoffle.h"
#include "document.h"
#include "errors.h"
#include "misc.h"
#include "config.h"


/*+ The list of references. +*/
static char **references[NRefTypes];

/*+ The number of references. +*/
static int nreferences[NRefTypes];

/*+ The base URL from which references are related. +*/
static URL *baseUrl;


static void free_list(RefType type);

static DocType GetDocumentType(char *mimetype);
static char *GetMIMEType(int fd, URL *Url);


/*++++++++++++++++++++++++++++++++++++++
  Parse a document.

  DocType ParseDocument Return 1 if there was anything that could be parsed.

  int fd The file descriptor to read the document from.

  URL *Url The URL of the document.
  ++++++++++++++++++++++++++++++++++++++*/

DocType ParseDocument(int fd,URL *Url)
{
 char *mimetype;

 /*+ The type of document we think it is. +*/
 DocType doctype=DocUnknown;

 baseUrl=Url;

 if((mimetype = GetMIMEType(fd,Url)) != NULL)
   {
    doctype = GetDocumentType(mimetype);
   }

 /* Check the file extension if we don't yet know the DocType. */

 if(doctype==DocUnknown)
   {
    /* Get MIME-Type from extension. */
    mimetype = WhatMIMEType(Url->path);
    doctype =  GetDocumentType(mimetype);
   }

 /* Parse the document if we do know the DocType. */

 if(doctype!=DocUnknown)
   {
    PrintMessage(Debug,"Parsing document of MIME Type '%s'.",mimetype);

    /* Free previous references. */

    ResetReferences();

    if(doctype==DocHTML)
       ParseHTML(fd,Url);
    else if(doctype==DocJavaClass)
       InspectJavaClass(fd,Url);
    else if(doctype==DocXML)
       ParseXML(fd,Url);
    else if(doctype==DocVRML)
       ParseVRML(fd,Url);

    /* Put a trailing NULL on the references. */
 
    FinishReferences();
   }

 return(doctype);
}

static struct {
  char *mimetype;
  DocType doctype;
} docTypeList[] = {
	{"text/html",DocHTML},
	{"application/java",DocJavaClass},
	{"text/xml",DocXML},
	{"application/xml",DocXML},
	{"x-world/x-vrml",DocVRML},
	{"model/vrml",DocVRML},
	{"",},
};


/*++++++++++++++++++++++++++++++++++++++
  Decide the current document type based on the mime type.

  DocType GetDocumentType Returns the document type.

  char *mimetype The mime type to be tested.
  ++++++++++++++++++++++++++++++++++++++*/

DocType GetDocumentType(char *mimetype)
{
 int i;

 for(i = 0; i < sizeof(docTypeList)/sizeof(docTypeList[0]); i++)
   {
    if(!strcmp(mimetype,docTypeList[i].mimetype))
      {
       return(docTypeList[i].doctype);
      }
   }
 return(DocUnknown);
}


/*++++++++++++++++++++++++++++++++++++++
  Decide the mime type of a document based on the header.

  char *GetMIMEType Returns the mime type.

  int fd The file descriptor to read the header from.

  URL *Url The URL we are looking at.
  ++++++++++++++++++++++++++++++++++++++*/

char *GetMIMEType(int fd, URL *Url)
{
 char *docheader,*contenttype;
 char *mimetype=NULL;

 /* Get the header and examine it. */

 ParseReply(fd,Url,&docheader);

 contenttype=GetHTTPHeader(docheader,"Content-Type:");

 if(contenttype)
   {
    char *p;
    mimetype=contenttype+13;
    while(isspace(*mimetype))
       mimetype++;
    p=mimetype;
    while(!isspace(*p) && *p!=';')
       p++;
    *p=0;
   }
 
 return(mimetype);
}
 

/*++++++++++++++++++++++++++++++++++++++
  A function to add a reference to a list.

  char* name The name to add.

  RefType type The type of reference.
  ++++++++++++++++++++++++++++++++++++++*/

void AddReference(char* name,RefType type)
{
 if(name && (!strncasecmp("mailto:",name,7) || !strncasecmp("news:",name,5)))
    return;

 if(name || references[type])
   {
    if(nreferences[type]==0)
       references[type]=(char**)malloc(16*sizeof(char*));
    else if((nreferences[type]%16)==0)
       references[type]=(char**)realloc(references[type],(nreferences[type]+16)*sizeof(char*));

    if(name)
      {
       references[type][nreferences[type]]=(char*)malloc(strlen(name)+1);
       strcpy(references[type][nreferences[type]],name);
      }
    else
       references[type][nreferences[type]]=NULL;

    nreferences[type]++;
   }
}


/*++++++++++++++++++++++++++++++++++++++
  Finish the list of references and set the base URL if changed.
  ++++++++++++++++++++++++++++++++++++++*/

void FinishReferences(void)
{
 RefType i;

 for(i=0;i<NRefTypes;i++)
    AddReference(NULL,i);
}


/*++++++++++++++++++++++++++++++++++++++
  Set another base URL.

  URL *Url The new base URL.
  ++++++++++++++++++++++++++++++++++++++*/

void SetBaseURL(URL *Url)
{
 if(Url)
    baseUrl=Url;
}


/*++++++++++++++++++++++++++++++++++++++
  Get a list of the references of the specified type.

  char **GetReferences Returns the list of URLs.

  RefType type The type of list that is required.
  ++++++++++++++++++++++++++++++++++++++*/

char **GetReferences(RefType type)
{
 int i,j;

 if(!references[type])
    return(NULL);

 /* canonicalise the links */

 for(i=0;references[type][i];i++)
   {
    char *new=LinkURL(baseUrl,references[type][i]);
    if(new!=references[type][i])
      {
       free(references[type][i]);
       references[type][i]=new;
      }
   }

 /* remove the duplicates */

 for(i=0;references[type][i];i++)
   {
    for(j=i+1;references[type][j];j++)
       if(!strcmp(references[type][i],references[type][j]))
          break;

    if(references[type][j])
      {
       free(references[type][j]);
       do
         {
          references[type][j]=references[type][j+1];
         }
       while(references[type][j++]);
       i--;
       nreferences[type]--;
      }
   }

 return(references[type]);
}


/*++++++++++++++++++++++++++++++++++++++
  Reset all of the reference lists.
  ++++++++++++++++++++++++++++++++++++++*/

void ResetReferences(void)
{
 RefType i;

 for(i=0;i<NRefTypes;i++)
    free_list(i);
}


/*++++++++++++++++++++++++++++++++++++++
  Free up a list of references.

  RefType type The type of list that is to be freed.
  ++++++++++++++++++++++++++++++++++++++*/

static void free_list(RefType type)
{
 int i;

 if(references[type])
   {
    for(i=0;references[type][i];i++)
       free(references[type][i]);
    free(references[type]);
   }

 references[type]=NULL;
 nreferences[type]=0;
}
