/***************************************
  $Header: /home/amb/wwwoffle/RCS/parse.c 2.39 1998/11/21 13:18:45 amb Exp $

  WWWOFFLE - World Wide Web Offline Explorer - Version 2.4.
  Functions to parse the HTTP requests.
  ******************/ /******************
  Written by Andrew M. Bishop

  This file Copyright 1996,97,98 Andrew M. Bishop
  It may be distributed under the GNU Public License, version 2, or
  any higher version.  See section COPYING of the GNU Public license
  for conditions under which this file may be redistributed.
  ***************************************/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include <sys/types.h>
#include <sys/stat.h>
#include <time.h>
#include <unistd.h>

#include "wwwoffle.h"
#include "misc.h"
#include "proto.h"
#include "errors.h"
#include "config.h"

/*+ The time that the program went online. +*/
time_t OnlineTime=0;

/*+ Headers from a request that can be re-used in automatically generated requests. +*/
static char *reusable_headers[]={"User-Agent",
                                 "Accept"};

/*+ The headers from the request that are re-usable. +*/
static char *reusable_header=NULL;                                 


/*++++++++++++++++++++++++++++++++++++++
  Parse the request to the server.

  char *ParseRequest Returns the URL or NULL if it failed.

  int fd The file descriptor to read the request from.

  char **request_head Return the header of the request.

  char **request_body Return the body of the request.
  ++++++++++++++++++++++++++++++++++++++*/

char *ParseRequest(int fd,char **request_head,char **request_body)
{
 char *url=NULL,*line=NULL;
 int i,length=-1;

 *request_head=NULL;
 *request_body=NULL;

 while((line=read_line_or_timeout(fd,line)))
   {
    if(!strncasecmp("Content-Length:",line,15))
       length=atoi(&line[15]);

    /* Find re-usable headers (for recursive requests) */

    for(i=0;i<sizeof(reusable_headers)/sizeof(char*);i++)
       if(!strncmp(line,reusable_headers[i],strlen(reusable_headers[i])))
         {
          if(reusable_header)
            {
             reusable_header=(char*)realloc((void*)reusable_header,strlen(reusable_header)+strlen(line)+1);
             strcat(reusable_header,line);
            }
          else
            {
             reusable_header=(char*)malloc(strlen(line)+1);
             strcpy(reusable_header,line);
            }
         }

    /* Check for firewall operation. */

    if(!strncasecmp("Host:",line,5) && url && *url=='/')
      {
       char *p,*host=(char*)malloc(strlen(line));
       p=line+6;
       while(*p==' ' || *p=='\t')
          p++;
       strcpy(host,p);
       p=host+strlen(host)-1;
       while(*p=='\r' || *p=='\n' || *p==' ' || *p=='\t')
          *p--=0;
       p=url;
       url=(char*)malloc(strlen(url)+strlen(host)+8);
       strcpy(url,"http://");
       strcat(url,host);
       strcat(url,p);
       free(p);
       free(host);
      }

    /* Check for passwords */

    if(!strncasecmp("Authorization:",line,14))
      {
       char *p=line+strlen(line)-1,*copy,*userpass;
       int l;

       while(*p=='\n' || *p=='\r' || *p==' ')
          *p--=0;

       p=line+15;
       while(*p==' ') p++;
       while(*p!=' ') p++;
       while(*p==' ') p++;

       userpass=Base64Decode(p,&l);
       p=url;

       if(*url=='/')
         {
          char *localhost=GetLocalHost(1);

          copy=(char*)malloc(strlen(url)+strlen(userpass)+strlen(localhost)+10);

          strcpy(copy,"http://");
          strcat(copy,userpass);
          strcat(copy,"@");
          strcat(copy,localhost);
         }
       else
         {
          copy=(char*)malloc(strlen(url)+strlen(userpass)+2);

          while(*p!=':') p++; p++;
          while(*p=='/') p++;

          strncpy(copy,url,p-url);
          copy[p-url]=0;
          strcat(copy,userpass);
          strcat(copy,"@");
         }

       strcat(copy,p);

       free(url);
       url=copy;

       continue;
      }

    /* Create the request body */

    if(!*request_head)          /* first line */
      {
       *request_head=(char*)malloc(strlen(line)+1);
       strcpy(*request_head,line);

       url=(char*)malloc(strlen(line));
       if(sscanf(line,"%*s %s",url)!=1)
         {free(url);return(NULL);}
      }
    else
      {
       *request_head=(char*)realloc((void*)*request_head,strlen(line)+strlen(*request_head)+1);
       strcat(*request_head,line);
      }

    if(*line=='\r' || *line=='\n')
       break;
   }

 if(line)
    free(line);

 if(!*request_head)
    return(NULL);

 if(!strncasecmp("POST",*request_head,4))
   {
    if(length==-1)
      {free(url);return(NULL);}

    *request_body=(char*)malloc(length+1);

    if(length)
      {
       int m,l=length;

       do
         {
          m=read_data_or_timeout(fd,&(*request_body)[length-l],l);
         }
       while(m>0 && (l-=m));

       if(l)
         {free(url);return(NULL);}

       (*request_body)[length]=0;

       empty_buffer(fd);
      }
    else
       *(*request_body)=0;

    url=(char*)realloc((void*)url,strlen(url)+32);

    if(strchr(url,'?'))
      {
       char *from=url+strlen(url),*to=from+1;
       while(*from!='?')
          *to--=*from--;
       *to='!';
      }
    else
       strcat(url,"?");

    sprintf(url+strlen(url),"!POST:%s",MakeHash(*request_body));
   }

 return(url);
}


/*++++++++++++++++++++++++++++++++++++++
  Modify the request to ask for changes since the spooled file.

  int RequestChanges Returns 1 if the file needs changes made, 0 if not, or -1 in case of an error.

  int fd The file descriptor of the spooled file.

  char **request_head The head of the HTTP request to modify.
  ++++++++++++++++++++++++++++++++++++++*/

int RequestChanges(int fd,char **request_head)
{
 struct stat buf;
 char *reply;
 int status=0,retval=0;
 
 reply=read_line(fd,NULL);

 if(reply)
   {
    sscanf(reply,"%*s %d",&status);
    free(reply);
   }

 if(status==0)
   {
    PrintMessage(Debug,"Requesting changes (Empty or no status).");
    retval=1;
   }
 else if(status>=200 && status<400 && !fstat(fd,&buf))
   {
    if(RequestChangedOnce && buf.st_mtime>OnlineTime)
      {
       PrintMessage(Debug,"Not requesting changes (Once per online session).");
       retval=0;
      }
    else if((time(NULL)-buf.st_mtime)<RequestChanged)
      {
       PrintMessage(Debug,"Not requesting changes (Last changed %d seconds ago).",time(NULL)-buf.st_mtime);
       retval=0;
      }
    else
      {
       char *if_mod=(char*)malloc(64);
       char *copy=(char*)malloc(strlen(*request_head)+64);
       char *eol=strchr(*request_head,'\n');

       sprintf(if_mod,"If-Modified-Since: %s",RFC822Date(buf.st_mtime,1));
       *eol=0; eol++;

       strcpy(copy,*request_head);
       strcat(copy,"\n");
       strcat(copy,if_mod);
       strcat(copy,"\r\n");
       strcat(copy,eol);

       free(*request_head);
       free(if_mod);

       *request_head=copy;

       PrintMessage(Debug,"Requesting changes.");
       retval=1;
      }
   }

 return(retval);
}


/*++++++++++++++++++++++++++++++++++++++
  Return the location that the URL has been moved to.

  char *MovedLocation Returns the new URL.

  URL *Url The original URL.

  char *reply_head The head of the original HTTP reply.
  ++++++++++++++++++++++++++++++++++++++*/

char *MovedLocation(URL *Url,char *reply_head)
{
 char *location,*eol,oldeol;
 char *new;

 location=GetHTTPHeader(reply_head,"Location:");

 if(!location)
    return(NULL);

 location+=10;
 eol=strchr(location,'\n');
 if(eol[-1]=='\r')
    eol--;
 oldeol=*eol;
 *eol=0;

 new=LinkURL(Url,location);
 if(new==location)
   {
    new=(char*)malloc(strlen(location)+1);
    strcpy(new,location);
   }

 *eol=oldeol;

 return(new);
}


/*++++++++++++++++++++++++++++++++++++++
  Create a new request for a page.

  char *RequestURL Ask for a page.

  URL *Url The URL to get.

  char *referer The Refering URL or NULL if none.
  ++++++++++++++++++++++++++++++++++++++*/

char *RequestURL(URL *Url,char *referer)
{
 char *new,*authheader=NULL;

 if(Url->user)
   {
    char *userpass=(char*)malloc(strlen(Url->user)+(Url->pass?strlen(Url->pass):0)+2);
    authheader=(char*)malloc(strlen(Url->user)+(Url->pass?strlen(Url->pass):0)+32);

    strcpy(userpass,Url->user);
    strcat(userpass,":");
    if(Url->pass)
       strcat(userpass,Url->pass);

    sprintf(authheader,"Authorization: Basic %s\r\n",Base64Encode(userpass,strlen(userpass)));

    free(userpass);
   }

 new=(char*)malloc(strlen(Url->name)+32+
                   (referer?strlen(referer)+16:0)+
                   (authheader?strlen(authheader):0)+
                   (reusable_header?strlen(reusable_header):0));

 sprintf(new,"GET %s HTTP/1.0\r\n",Url->name);
 if(referer)
    sprintf(&new[strlen(new)],"Referer: %s\r\n",referer);
 if(authheader)
    strcat(new,authheader);
 if(reusable_header)
    strcat(new,reusable_header);
 strcat(new,"\r\n");

 return(new);
}


/*++++++++++++++++++++++++++++++++++++++
  Modify the request taking into account censoring of header and modified URL.

  char *ModifyRequest Return the new request.

  URL *Url The actual URL.

  char *request_head The original head of the HTTP request possibly with a different URL.
  ++++++++++++++++++++++++++++++++++++++*/

char *ModifyRequest(URL *Url,char *request_head)
{
 char *new;
 char *hostheader=(char*)malloc(strlen(Url->host)+16),*closeheader,*authheader="";
 char *bol,*to,http[16];

 /* Make up the new headers. */

 sprintf(hostheader,"Host: %s\r\n",Url->host);
 closeheader="Connection: close\r\n";

 if(Url->user)
   {
    char *userpass=(char*)malloc(strlen(Url->user)+(Url->pass?strlen(Url->pass):0)+2);
    authheader=(char*)malloc(strlen(Url->user)+(Url->pass?strlen(Url->pass):0)+32);

    strcpy(userpass,Url->user);
    strcat(userpass,":");
    if(Url->pass)
       strcat(userpass,Url->pass);

    sprintf(authheader,"Authorization: Basic %s\r\n",Base64Encode(userpass,strlen(userpass)));

    free(userpass);
   }

 new=(char*)malloc(2*strlen(request_head)+strlen(closeheader)+strlen(hostheader)+strlen(authheader)+strlen(Url->name));

 /* Parse the old header and create a new one. */

 sscanf(request_head,"%s %*s %s",new,http);
 strcat(new," ");
 strcat(new,Url->name);

 /* Remove the false arguments from POSTed URLs. */

 if(!strncasecmp(new,"POST",4))
   {
    char *pling=strstr(new,"?!");
    char *pling2=strchr(++pling+1,'!');

    if(pling2)
       for(;pling<pling2;pling++)
          *pling=*(pling+1);

    *(pling-1)=0;
   }

 strcat(new," ");
 strcat(new,http);
 strcat(new,"\r\n");

 strcat(new,hostheader);

 /* Check for HTTP 1.1 and add a Connection header */

 if(!strncmp(http,"HTTP/1.1",8))
    strcat(new,closeheader);

 /* Add an authentication header. */

 if(*authheader)
   {
    strcat(new,authheader);
    free(authheader);
   }

 /* Censor the header */

 to=new+strlen(new);
 bol=strchr(request_head,'\n')+1;

 while(*bol)
   {
    char *censor;

    if(!strncasecmp("Host:",bol,5) ||
       !strncasecmp("Connection:",bol,11) ||
       !strncasecmp("Proxy-Connection:",bol,17) ||
       !strncasecmp("Proxy-Authorization:",bol,20) ||
       !strncasecmp("Pragma: wwwoffle",bol,16))
       bol=strchr(bol,'\n');
    else
      {
       char *eol=strchr(bol,'\n');

       if(*(eol-1)=='\r')
          *(eol-1)=0;
       else
          *eol=0;

       if((censor=CensoredHeader(Url->name,bol)))
         {
          char *bolc=censor;

          while(*bolc)
             *to++=*bolc++;
          *to++='\r';
          *to++='\n';

          if(censor!=bol)
            {
             PrintMessage(Debug,"CensorHeader replaced '%s' by '%s'",bol,censor);
             free(censor);
            }
         }
       else if(!strncasecmp("Referer:",bol,8))
         {
          char *pling=strstr(bol,"?!");

          if(pling)
            {
             char *pling2=strchr(++pling+1,'!');

             if(pling2)
                for(;pling<pling2;pling++)
                   *pling=*(pling+1);

             *(pling-1)=0;
            }

          while(*bol)
             *to++=*bol++;
          *to++='\r';
          *to++='\n';
         }
       else 
          PrintMessage(Debug,"CensorHeader Removed '%s'",bol);

       bol=eol;
      }

    bol++;
   }

 *to=0;

 /* tidy up and exit. */

 free(hostheader);
 free(request_head);

 return(new);
}


/*++++++++++++++++++++++++++++++++++++++
  Change the request to one that contains an authorisation string if required.

  char *MakeRequestAuthorised Returns a new request with the authorisation if required or else the old request.

  char *proxy The name of the proxy.

  char *request_head The old HTTP request head.
  ++++++++++++++++++++++++++++++++++++++*/

char *MakeRequestAuthorised(char *proxy,char *request_head)
{
 char *new=request_head;
 char *userpass=WhatProxyAuth(proxy);

 if(userpass)
   {
    char *userpassencoded=Base64Encode(userpass,strlen(userpass));
    char *auth=(char*)malloc(strlen(userpassencoded)+32);
    char *bol,*eol;

    sprintf(auth,"Proxy-Authorization: Basic %s\r\n",userpassencoded);

    new=(char*)malloc(strlen(request_head)+strlen(auth)+1);

    if((bol=GetHTTPHeader(request_head,"Proxy-Authorization:")))
       eol=strchr(++bol,'\n')+1;
    else
       bol=eol=strchr(request_head,'\n')+1;

    strncpy(new,request_head,bol-request_head);
    new[bol-request_head]=0;
    strcat(new,auth);
    strcat(new,eol);

    free(auth);
   }

 return(new);
}


/*++++++++++++++++++++++++++++++++++++++
  Change the request from one to a proxy to a normal one.

  char *MakeRequestNonProxy Return a new request that is suitable for a non-proxy server.

  char *request_head The buffer containing the head of the HTTP request.
  ++++++++++++++++++++++++++++++++++++++*/

char *MakeRequestNonProxy(char *request_head)
{
 char *new=(char*)malloc(strlen(request_head)),*r=request_head,*n=new;

 /* The URL is already in canonical form because of the ModifyRequest() function. */

 while(*r!=' ')                 /* 'GET ' */
    *n++=*r++;
 *n++=*r++;

 while(*r!=':')                 /* 'http://' */
    r++;
 r+=3;

 while(*r!='/')                 /* 'www.host.domain/' */
    r++;

 strcpy(n,r);

 return(new);
}


/*++++++++++++++++++++++++++++++++++++++
  Parse the reply from the server.

  int ParseReply Return the numeric status of the reply.

  int fd The file descriptor to read from (or -1 to use the Protocol's method).

  URL *Url The URL that we are reading.

  char *reply_head Return the head of the HTTP reply.
  ++++++++++++++++++++++++++++++++++++++*/

int ParseReply(int fd,URL *Url,char **reply_head)
{
 char *line=NULL;
 int status=0;

 *reply_head=NULL;

 while((fd==-1 && (line=(Url->Protocol->readhead)(line))) ||
       (fd!=-1 && (line=read_line(fd,line))))
   {
    if(!*reply_head)
      {
       *reply_head=(char*)malloc(strlen(line)+1);
       strcpy(*reply_head,line);
      }
    else
      {
       *reply_head=(char*)realloc((void*)*reply_head,strlen(line)+strlen(*reply_head)+1);
       strcat(*reply_head,line);
      }

    if(*line=='\r' || *line=='\n')
       break;
   }

 if(!line)
   return(0);

 if(sscanf(*reply_head,"%*s %d",&status)!=1)
    status=0;

 return(status);
}


/*++++++++++++++++++++++++++++++++++++++
  Find the status of a spooled page.

  int SpooledPageStatus Returns the status number.

  URL *Url The URL to check.
  ++++++++++++++++++++++++++++++++++++++*/

int SpooledPageStatus(URL *Url)
{
 int spool=OpenWebpageSpoolFile(1,Url);
 int status=0;

 if(spool!=-1)
   {
    char *reply;

    init_buffer(spool);

    reply=read_line(spool,NULL);

    if(reply)
      {
       sscanf(reply,"%*s %d",&status);
       free(reply);
      }

    close(spool);
   }

 return(status);
}


/*++++++++++++++++++++++++++++++++++++++
  Search through a HTTP header for a specified line.

  char *GetHTTPHeader Returns the header line or NULL if none.

  char *head The header to search through.

  char* line The line to look for.
  ++++++++++++++++++++++++++++++++++++++*/

char *GetHTTPHeader(char *head,char* line)
{
 char *bol=head;
 int n=strlen(line);

 while(bol && *bol)
    if(!strncasecmp(bol,line,n))
       break;
    else
       bol=strchr(bol,'\n')+1;

 if(bol && !*bol)
    bol=NULL;

 return(bol);
}
