/***************************************
  $Header: /home/amb/wwwoffle/RCS/refresh.c 1.2 1997/03/25 20:56:01 amb Exp $

  WWWOFFLE - World Wide Web Offline Explorer - Version 1.1.
  The HTML interactive page to refresh a URL.
  ******************/ /******************
  Written by Andrew M. Bishop

  This file Copyright 1997 Andrew M. Bishop
  It may be distributed under the GNU Public License, version 2, or
  any higher version.  See section COPYING of the GNU Public license
  for conditions under which this file may be redistributed.
  ***************************************/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <unistd.h>

#include "wwwoffle.h"
#include "config.h"
#include "sockets.h"
#include "errors.h"


static void RefreshFormPage(int fd);
static void RefreshFormError(int fd,char *body);
static void RefreshFormSingle(int fd,char *url);
static void RefreshFormRecursive(int fd,char *url,int method);
static void IllegalRefreshPage(int fd,char *path);


/*++++++++++++++++++++++++++++++++++++++
  Send to the client a page to allow refreshes using HTML.

  int fd The file descriptor of the client.

  char *path The path that was specified by the user.

  char *request The HTTP request sent by the browser.
  ++++++++++++++++++++++++++++++++++++++*/

void RefreshPage(int fd,char *path,char *request)
{
 if(!*path)
    RefreshFormPage(fd);
 else if(!strcmp("refresh",path))
   {
    int i,j,method=-100;
    char *body,*copy,*url=NULL;

    if((body=strstr(request,"\n\n")) ||
       (body=strstr(request,"\r\n\n")) ||
       (body=strstr(request,"\n\r\n")) ||
       (body=strstr(request,"\r\n\r\n")))
      {
       body=strchr(body,'\n');
       body++;
       body=strchr(body,'\n');
       body++;
      }
    else
      {
       RefreshFormError(fd,NULL);
       return;
      }

    copy=(char*)malloc(strlen(body)+1);
    strcpy(copy,body);

    for(i=0;copy[i];i++)
      {
       if(i!=0 && copy[i-1]=='&')
          copy[i-1]=0;
       if(i==0 || copy[i-1]==0)
         {
          if(!strncmp("method=",&copy[i],7))
             method=atoi(&copy[i+7]);
          if(!strncmp("url=",&copy[i],4))
             url=&copy[i+4];
         }
      }

    if(url==NULL || *url==0 || method==-100 || strchr(url,'+'))
      {
       RefreshFormError(fd,body);
       free(copy);
       return;
      }

    for(i=0,j=0;url[i];i++)
       if(url[i]=='%')
         {
          int val=0;
          i++;
          if(url[i]>='a') val=url[i]-'a'+10;
          else if(url[i]>='A') val=url[i]-'A'+10;
          else val=url[i]-'0';
          val*=16;
          i++;
          if(url[i]>='a') val+=url[i]-'a'+10;
          else if(url[i]>='A') val+=url[i]-'A'+10;
          else val+=url[i]-'0';
          url[j++]=val;
         }
       else
          url[j++]=url[i];
    url[j]=0;

    if(method==0)
       RefreshFormSingle(fd,url);
    else
       RefreshFormRecursive(fd,url,method);

    free(copy);
   }
 else
    IllegalRefreshPage(fd,path);
}


/*++++++++++++++++++++++++++++++++++++++
  The form that the user enters the details on.

  int fd The file descriptor.
  ++++++++++++++++++++++++++++++++++++++*/

static void RefreshFormPage(int fd)
{
 char *head=
 "HTTP/1.0 200 WWWOFFLE Refresh Form\r\n"
 "Content-type: text/html\r\n"
 "\r\n"
 "<HTML>\n"
 "<HEAD>\n"
 "<TITLE>\n"
 "WWWOFFLE - Interactive Refresh Form\n"
 "</TITLE>\n"
 "</HEAD>\n"
 "<BODY>\n"
 "<H1 align=center>WWWOFFLE Interactive Refresh Form</H1>\n"
 "You can use this form to refresh or fetch any URL, either a single one or by following links recursively.\n"
 "<p>\n"
 "<form action=\"/refresh/refresh\" method=post>\n"
 "Fetch <select name=\"method\">\n"
 "<option value= \"0\" selected>this URL only\n"
 "<option value= \"1\">recursively to depth 1 on same host\n"
 "<option value= \"2\">recursively to depth 2 on same host\n"
 "<option value= \"3\">recursively to depth 3 on same host\n"
 "<option value= \"4\">recursively to depth 4 on same host\n"
 "<option value= \"5\">recursively to depth 5 on same host\n"
 "<option value=\"-1\">recursively to depth 1 on any host\n"
 "<option value=\"-2\">recursively to depth 2 on any host\n"
 "<option value=\"-3\">recursively to depth 3 on any host\n"
 "</select>\n"
 "<br>\n"
 "http://<input type=\"text\" name=\"url\" size=40>\n"
 "<br>\n"
 "<input type=\"submit\" value=\"Fetch Now\">\n"
 "<p>\n"
 "<b>Notes</b><br>\n";
 char *middle1=
 "Images in the specified page will be fetched because the <i>fetch-images</i> option is set in the configuration file.\n";
 char *middle2=
 "Images in the specified page will not be fetched because the <i>fetch-images</i> option is not set in the configuration file.\n";
 char *tail=
 "<p>\n"
 "Depending on whether you are online or offline the behaviour of the recursive fetching is different.\n"
 "<dl compact>\n"
 "<dt>Online\n"
 "<dd>The specified page is fetched and all links from it (to the specified depth) are also fetched.\n"
 "<dt>Offline\n"
 "<dd>The specified page is marked to be fetched and if it exists in the cache then all links from it (to the specified depth) are also marked for fetching.\n"
 "<dl>\n"
 "\n"
 "</form>\n"
 "</BODY>\n"
 "</HTML>\n";

 write(fd,head,strlen(head));
 if(FetchImages)
    write(fd,middle1,strlen(middle1));
 else
    write(fd,middle2,strlen(middle2));
 write(fd,tail,strlen(tail));
}


/*++++++++++++++++++++++++++++++++++++++
  An error with the form.

  int fd The file descriptor.

  char *body The browser reply that the user entered.
  ++++++++++++++++++++++++++++++++++++++*/

static void RefreshFormError(int fd,char *body)
{
 char *head=
 "HTTP/1.0 404 WWWOFFLE Refresh Form Error\r\n"
 "Content-type: text/html\r\n"
 "\r\n"
 "<HTML>\n"
 "<HEAD>\n"
 "<TITLE>\n"
 "WWWOFFLE - Interactive Refresh Form Error\n"
 "</TITLE>\n"
 "</HEAD>\n"
 "<BODY>\n"
 "<H1 align=center>WWWOFFLE Interactive Refresh Form Error</H1>\n"
 "<p align=center>\n";
 char *middle1=
 "The reply from the form that your browser sent did not have a body.\n";
 char *middle2=
 "The reply from the form that your browser sent\n"
 "<br><b><tt>\n";
 char *middle3=
 "\n"
 "</tt></b><br>\n"
 "had an error and could not be parsed.\n";
 char *tail=
 "</BODY>\n"
 "</HTML>\n";

 write(fd,head,strlen(head));
 if(!body)
    write(fd,middle1,strlen(middle1));
 else
   {
    write(fd,middle2,strlen(middle2));
    write(fd,body,strlen(body));
    write(fd,middle3,strlen(middle3));
   }
 write(fd,tail,strlen(tail));
}


/*++++++++++++++++++++++++++++++++++++++
  Fetch a single page and point the user to the real URL.

  int fd The file descriptor to send the message to.

  char *url The url we are going to get.
  ++++++++++++++++++++++++++++++++++++++*/

static void RefreshFormSingle(int fd,char *url)
{
 char *string=(char*)malloc(2*strlen(url)+32);
 char *head=
 "HTTP/1.0 200 WWWOFFLE Refresh Single Page\r\n"
 "Content-type: text/html\r\n"
 "\r\n"
 "<HTML>\n"
 "<HEAD>\n"
 "<TITLE>\n"
 "WWWOFFLE - Refresh Single Page\n"
 "</TITLE>\n"
 "</HEAD>\n"
 "<BODY>\n"
 "<H1 align=center>WWWOFFLE Refresh Single Page</H1>\n"
 "<p align=center>\n"
 "Your requested URL\n"
 "<br><b><tt>\n";
 char *middle=
 "\n"
 "</tt></b><br>\n"
 "has been refreshed and can be viewed by selecting the link below.\n"
 "<br>\n";
 char *tail=
 "\n"
 "<p>\n"
 "<a href=\"/refresh/\">[Return to the refresh page]</a>"
 "</BODY>\n"
 "</HTML>\n";

 RecurseFetchPages(-1,url,
                   0,0,0,
                   "localhost",HTTP_Port);

 write(fd,head,strlen(head));
 write(fd,url,strlen(url));
 write(fd,middle,strlen(middle));
 sprintf(string,"<a href=\"http://%s\">%s</a>",url,url);
 write(fd,string,strlen(string));
 write(fd,tail,strlen(tail));

 free(string);
}


/*++++++++++++++++++++++++++++++++++++++
  Fetch pages recursively, and show progress.

  int fd The file descriptor to write into.

  char *url The URL to fetch.

  int method The method to use.
  ++++++++++++++++++++++++++++++++++++++*/

static void RefreshFormRecursive(int fd,char *url,int method)
{
 char *head=
 "HTTP/1.0 200 WWWOFFLE Refresh Recursive Page\r\n"
 "Content-type: text/html\r\n"
 "\r\n"
 "<HTML>\n"
 "<HEAD>\n"
 "<TITLE>\n"
 "WWWOFFLE - Refresh Recursive Page\n"
 "</TITLE>\n"
 "</HEAD>\n"
 "<BODY>\n"
 "<H1 align=center>WWWOFFLE Refresh Recursive Page</H1>\n"
 "Your requested URL\n"
 "<br><b><tt>\n";
 char *middle=
 "\n"
 "</tt></b><br>\n"
 "and the links to the specified depth are being fetched.\n"
 "<pre>\n";
 char *tail=
 "</pre>\n"
 "<a href=\"/refresh/\">[Return to the refresh page]</a>"
 "</BODY>\n"
 "</HTML>\n";

 write(fd,head,strlen(head));
 write(fd,url,strlen(url));
 write(fd,middle,strlen(middle));

 RecurseFetchPages(fd,url,
                   FetchImages,method>0?method:-method,method<0?1:0,
                   "localhost",HTTP_Port);

 write(fd,tail,strlen(tail));
}


/*++++++++++++++++++++++++++++++++++++++
  Inform the user that the specified refresh page is illegal.

  int fd The file descriptor to write to.

  char *path The specified path.
  ++++++++++++++++++++++++++++++++++++++*/

static void IllegalRefreshPage(int fd,char *path)
{
 char *string=(char*)malloc(strlen(path)+16);
 char *head=
 "HTTP/1.0 404 WWWOFFLE Illegal Refresh Page\r\n"
 "Content-type: text/html\r\n"
 "\r\n"
 "<HTML>\n"
 "<HEAD>\n"
 "<TITLE>\n"
 "WWWOFFLE - Illegal Interactive Refresh Page\n"
 "</TITLE>\n"
 "</HEAD>\n"
 "<BODY>\n"
 "<H1 align=center>WWWOFFLE Illegal Interactive Refresh Page</H1>\n"
 "<p align=center>\n"
 "Your request for the refresh URL\n"
 "<br><b><tt>\n";
 char *tail=
 "\n"
 "</tt></b><br>\n"
 "is illegal, select the link below for the main interactive refresh page.\n"
 "<br>\n"
 "<a href=\"/refresh/\">/refresh/</a>\n"
 "</BODY>\n"
 "</HTML>\n";

 write(fd,head,strlen(head));
 sprintf(string,"/refresh/%s",path);
 write(fd,string,strlen(string));
 write(fd,tail,strlen(tail));

 free(string);
}


static void add_page(char ***pages,int *npages,char *page);

/*++++++++++++++++++++++++++++++++++++++
  Fetch pages recursively.

  int fd The file descriptor to output to.

  char *url The url to start at.

  int images Set to true to fetch images.

  int recursive The depth to recurse to.

  int recursive_any Set to true if pages are to be fetched from any host, else only same.

  char *host The hostname of the server.

  int port The port number of the proxy on the server.
  ++++++++++++++++++++++++++++++++++++++*/

void RecurseFetchPages(int fd,char *url,int images,int recursive,int recursive_any,char *host,int port)
{
 int i;
 int socket;
 FILE *socket_f;
 char *buffer=(char*)malloc(257);
 int parsed=0;
 char *page_host=NULL,**list;
 int npages=0,npages_recursive=1;
 char **pages=NULL;

 add_page(&pages,&npages,url);

 for(i=0;i<npages;i++)
   {
    char *request=(char*)malloc(strlen(pages[i])+48);
    int j;

    if(recursive && i==npages_recursive)
      {
       recursive--;
       npages_recursive=npages;
      }

    socket=OpenClientSocket(host,port);
    if(socket==-1)
      {
       PrintMessage(Warning,"Cannot open connection to http proxy %s port %d.",host,port);
       continue;
      }

    socket_f=fdopen(socket,"r");
    if(!socket_f)
      {
       PrintMessage(Warning,"Cannot fdopen the server connection [%!s].");
       close(socket);
       continue;
      }

    if(fd>=0)
      {
       sprintf(request,"Getting http://%s\n",pages[i]);
       write(fd,request,strlen(request));
      }

    sprintf(request,"GET /outgoing/%s HTTP/1.0\r\nAccept: */*\r\n\r\n",pages[i]);
    write(socket,request,strlen(request));

    if(images || recursive)
      {
       char *path,*args;
       char *head="HTTP/1.0 301 WWWOFFLE Refresh Redirect\r\n"; /* This line must not be changed (see messages.c). */
       char *line=NULL;

       line=fgets_realloc(line,socket_f);

       if(!strcmp(head,line))
         {
          fclose(socket_f);

          socket=OpenClientSocket(host,port);
          if(socket==-1)
            {
             PrintMessage(Warning,"Cannot open connection to http proxy %s port %d.",host,port);
             continue;
            }

          socket_f=fdopen(socket,"r");
          if(!socket_f)
            {
             PrintMessage(Warning,"Cannot fdopen the server connection [%!s].");
             close(socket);
             continue;
            }

          sprintf(request,"GET http://%s HTTP/1.0\r\nAccept: */*\r\n\r\n",pages[i]);
          write(socket,request,strlen(request));
         }

       SplitURL(pages[i],&page_host,&path,&args);

       parsed=ParseHTML(socket_f,page_host,path);
      }

    while(fread(buffer,1,256,socket_f)>0);

    if(images && parsed && (list=ListImages()))
       for(j=0;list[j];j++)
          add_page(&pages,&npages,list[j]+7);

    if(recursive && parsed && (list=ListLinks()))
       for(j=0;list[j];j++)
         {
          char *link_host=NULL,*path,*args;

          if(!recursive_any)
             SplitURL(list[j],&link_host,&path,&args);

          if(recursive_any || !strcmp(page_host,link_host))
             add_page(&pages,&npages,list[j]+7);
         }

    fclose(socket_f);
    free(request);
   }

 free(buffer);
 free(pages);
}


/*++++++++++++++++++++++++++++++++++++++
  Add a page to the list of pages.

  char ***pages A pointer to the list of pages

  int *npages A pointer to the number of pages.

  char *page The page to add.
  ++++++++++++++++++++++++++++++++++++++*/

static void add_page(char ***pages,int *npages,char *page)
{
 int i;

 for(i=0;i<*npages;i++)
    if(!strcmp((*pages)[i],page))
       break;

 if(i==*npages)
   {
    if(!(*npages%8))
       *pages=realloc(*pages,(8+*npages)*sizeof(char*));

    (*pages)[*npages]=(char*)malloc(strlen(page)+1);
    strcpy((*pages)[*npages],page);
    (*npages)++;
   }
}
