/***************************************
  $Header: /home/amb/wwwoffle/RCS/document.c 1.2 1998/12/05 09:28:38 amb Exp $

  WWWOFFLE - World Wide Web Offline Explorer - Version 2.4.
  Document parsing functions.
  ******************/ /******************
  Written by Andrew M. Bishop

  This file Copyright 1998 Andrew M. Bishop
  It may be distributed under the GNU Public License, version 2, or
  any higher version.  See section COPYING of the GNU Public license
  for conditions under which this file may be redistributed.
  ***************************************/

#include <string.h>
#include <stdlib.h>

#include "wwwoffle.h"
#include "document.h"
#include "misc.h"


/*+ The type of document we think it is. +*/
static DocType doctype=DocUnknown;

/*+ The list of references. +*/
static char **references[NRefTypes];

/*+ The number of references. +*/
static int nreferences[NRefTypes];

/*+ The base URL from which references are related. +*/
static URL *baseUrl;


static void free_list(RefType type);


/*++++++++++++++++++++++++++++++++++++++
  Parse a document.

  DocType ParseDocument Return 1 if there was anything that could be parsed.

  int fd The file descriptor to read the document from.

  URL *Url The URL of the document.
  ++++++++++++++++++++++++++++++++++++++*/

DocType ParseDocument(int fd,URL *Url)
{
 char *docheader,*mimetype;
 int status;

 baseUrl=Url;
 doctype=DocUnknown;

 /* Get the header and examine it. */

 status=ParseReply(fd,Url,&docheader);

 mimetype=GetHTTPHeader(docheader,"Content-Type:");

 if(mimetype)
   {
    char *eol=strchr(mimetype,'\n');
    char *p;

    if((p=strstr(mimetype+13,"text/html")) && p<eol)
       doctype=DocHTML;
    else if((p=strstr(mimetype+13,"application/java")) && p<eol)
       doctype=DocJavaClass;
   }

 /* Check the file extension. */

 if(!strcmp(Url->path+strlen(Url->path)-6,".class"))
    doctype=DocJavaClass;

 /* Parse the document. */

 if(doctype==DocHTML)
    ParseHTML(fd,Url);
 else if(doctype==DocJavaClass)
    InspectJavaClass(fd,Url);

 return(doctype);
}


/*++++++++++++++++++++++++++++++++++++++
  Return the document type.

  DocType DocumentType
  ++++++++++++++++++++++++++++++++++++++*/

DocType DocumentType(void)
{
 return(doctype);
}


/*++++++++++++++++++++++++++++++++++++++
  A function to add a reference to a list.

  char* name The name to add.

  RefType type The type of reference.
  ++++++++++++++++++++++++++++++++++++++*/

void AddReference(char* name,RefType type)
{
 if(name && (!strncasecmp("mailto:",name,7) || !strncasecmp("news:",name,5)))
    return;

 if(name || references[type])
   {
    if(nreferences[type]==0)
       references[type]=(char**)malloc(16*sizeof(char*));
    else if((nreferences[type]%16)==0)
       references[type]=(char**)realloc(references[type],(nreferences[type]+16)*sizeof(char*));

    if(name)
      {
       references[type][nreferences[type]]=(char*)malloc(strlen(name)+1);
       strcpy(references[type][nreferences[type]],name);
      }
    else
       references[type][nreferences[type]]=NULL;

    nreferences[type]++;
   }
}


/*++++++++++++++++++++++++++++++++++++++
  Finish the list of references and set the base URL if changed.

  URL *Url The new base URL.
  ++++++++++++++++++++++++++++++++++++++*/

void FinishReferences(URL *Url)
{
 RefType i;

 for(i=0;i<NRefTypes;i++)
    AddReference(NULL,i);

 if(Url)
    baseUrl=Url;
}


/*++++++++++++++++++++++++++++++++++++++
  Get a list of the references of the specified type.

  char **GetReferences Returns the list of URLs.

  RefType type The type of list that is required.
  ++++++++++++++++++++++++++++++++++++++*/

char **GetReferences(RefType type)
{
 int i,j;

 if(!references[type])
    return(NULL);

 /* canonicalise the links */

 for(i=0;references[type][i];i++)
   {
    char *new=LinkURL(baseUrl,references[type][i]);
    if(new!=references[type][i])
      {
       free(references[type][i]);
       references[type][i]=new;
      }
   }

 /* remove the duplicates */

 for(i=0;references[type][i];i++)
   {
    for(j=i+1;references[type][j];j++)
       if(!strcmp(references[type][i],references[type][j]))
          break;

    if(references[type][j])
      {
       free(references[type][j]);
       do
         {
          references[type][j]=references[type][j+1];
         }
       while(references[type][j++]);
       i--;
       nreferences[type]--;
      }
   }

 return(references[type]);
}


/*++++++++++++++++++++++++++++++++++++++
  Reset all of the reference lists.
  ++++++++++++++++++++++++++++++++++++++*/

void ResetReferences(void)
{
 RefType i;

 for(i=0;i<NRefTypes;i++)
    free_list(i);
}


/*++++++++++++++++++++++++++++++++++++++
  Free up a list of references.

  RefType type The type of list that is to be freed.
  ++++++++++++++++++++++++++++++++++++++*/

static void free_list(RefType type)
{
 int i;

 if(references[type])
   {
    for(i=0;references[type][i];i++)
       free(references[type][i]);
    free(references[type]);
   }

 references[type]=NULL;
 nreferences[type]=0;
}
