/***************************************
  $Header: /home/amb/wwwoffle/RCS/refresh.c 1.9 1997/08/03 09:24:17 amb Exp $

  WWWOFFLE - World Wide Web Offline Explorer - Version 1.2c.
  The HTML interactive page to refresh a URL.
  ******************/ /******************
  Written by Andrew M. Bishop

  This file Copyright 1997 Andrew M. Bishop
  It may be distributed under the GNU Public License, version 2, or
  any higher version.  See section COPYING of the GNU Public license
  for conditions under which this file may be redistributed.
  ***************************************/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <unistd.h>

#include "wwwoffle.h"
#include "config.h"
#include "sockets.h"
#include "errors.h"


static void RefreshFormPage(int fd,char *args);
static void ParseFormRequest(int fd,char *request,int is_online);
static void RefreshFormError(int fd,char *body);
static void RefreshFormRecursive(int fd,char *url,char *method);
static void IllegalRefreshPage(int fd,char *path);
static void RecurseFetchPages(int fd,char *url,char *method);


/*++++++++++++++++++++++++++++++++++++++
  Send to the client a page to allow refreshes using HTML.

  char *RefreshPage Returns a modified URLs for a simple refresh.

  int fd The file descriptor of the client.

  char *path The path that was specified by the user.

  char *args The argument that was appended to the URL.

  char *url The url that was requested.

  char **request A pointer to the HTTP request sent by the browser.

  int is_online Set to true if we are online.
  ++++++++++++++++++++++++++++++++++++++*/

char *RefreshPage(int fd,char *path,char *args,char *url,char **request,int is_online)
{
 char *newurl=NULL;

 if(!strcmp("refresh/",path))
    RefreshFormPage(fd,args);
 else if(!strcmp("refresh-request/",path))
    ParseFormRequest(fd,*request,is_online);
 else if(!strncmp("refresh/",path,8))
   {
    char *copy,*u,*p;

    copy=(char*)malloc(strlen(*request)+1);
    p=strstr(*request,path)+8;
    u=strstr(*request,url); *u=0;
    strcpy(copy,*request);
    strcat(copy,"http://");
    strcat(copy,p);
    free(*request);
    *request=copy;

    copy=(char*)malloc(strlen(url)+1);
    p=strstr(url,path)+8;
    strcpy(copy,"http://");
    strcat(copy,p);
    free(url);
    newurl=copy;
   }
 else
   {
    char *slash=strchr(path,'/');

    if(slash)
      {
       if(is_online)
         {
          char *url=(char*)malloc(strlen(slash+1)+(args?strlen(args):0)+2);
          if(args)
             sprintf(url,"%s?%s",slash+1,args);
          else
             strcpy(url,slash+1);
          *slash=0;
          RecurseFetchPages(fd,url,path);
          free(url);
         }
       else
         {
          char *new_request=RequestURL(url,NULL);
          int new_outgoing=OpenOutgoingSpoolFile(0);

          if(new_outgoing==-1)
             PrintMessage(Warning,"Cannot open the new outgoing request to write.");
          else
            {
             write(new_outgoing,new_request,strlen(new_request));
             CloseOutgoingSpoolFile(new_outgoing);
            }

          free(new_request);
         }
      }
    else
       IllegalRefreshPage(fd,path);
   }

 return(newurl);
}


/*++++++++++++++++++++++++++++++++++++++
  The form that the user enters the details on.

  int fd The file descriptor.

  char *args The arguments that were on the request for this URL.
  ++++++++++++++++++++++++++++++++++++++*/

static void RefreshFormPage(int fd,char *args)
{
 char *head=
 "HTTP/1.0 200 WWWOFFLE Refresh Form\r\n"
 "Content-type: text/html\r\n"
 "\r\n"
 "<HTML>\n"
 "<HEAD>\n"
 "<TITLE>\n"
 "WWWOFFLE - Interactive Refresh Form\n"
 "</TITLE>\n"
 "</HEAD>\n"
 "<BODY>\n"
 "<H1 align=center>WWWOFFLE Interactive Refresh Form</H1>\n"
 "You can use this form to refresh or fetch any URL, either a single one or by following links recursively.\n"
 "<p>\n"
 "<form action=\"/refresh-request/\" method=post>\n"
 "Fetch <select name=\"method\">\n"
 "<option value=\"refresh\" selected>this URL only\n"
 "<option value=\"refresh-dir-1\"   >recursively to depth 1 in the same directory\n"
 "<option value=\"refresh-dir-2\"   >recursively to depth 2 in the same directory\n"
 "<option value=\"refresh-dir-3\"   >recursively to depth 3 in the same directory\n"
 "<option value=\"refresh-dir-4\"   >recursively to depth 4 in the same directory\n"
 "<option value=\"refresh-dir-5\"   >recursively to depth 5 in the same directory\n"
 "<option value=\"refresh-host-1\"  >recursively to depth 1 on the same host\n"
 "<option value=\"refresh-host-2\"  >recursively to depth 2 on the same host\n"
 "<option value=\"refresh-host-3\"  >recursively to depth 3 on the same host\n"
 "<option value=\"refresh-host-4\"  >recursively to depth 4 on the same host\n"
 "<option value=\"refresh-any-1\"   >recursively to depth 1 on any host\n"
 "<option value=\"refresh-any-2\"   >recursively to depth 2 on any host\n"
 "<option value=\"refresh-any-3\"   >recursively to depth 3 on any host\n"
 "</select>\n"
 "<br>\n"
 "<input type=\"text\" name=\"url\" value=\"";
 char *middle1=
 "\"size=60>\n"
 "<br>\n"
 "<input name=\"images\" type=\"checkbox\" value=\"-images\" ";
 char *middle2=
 ">Fetch images in the pages\n"
 "<br>\n"
 "<input name=\"frames\" type=\"checkbox\" value=\"-frames\" ";
 char *tail=
 ">Fetch frames in the pages\n"
 "<br>\n"
 "<input name=\"force\" type=\"checkbox\" value=\"-force\">Force refresh even if already cached"
 "<br>\n"
 "<input type=\"submit\" value=\"Fetch Now\">\n"
 "</form>\n"
 "</BODY>\n"
 "</HTML>\n";

 write(fd,head,strlen(head));
 if(args)
   {
    char *copy=UrlDecode(args);
    write(fd,copy,strlen(copy));
    free(copy);
   }
 write(fd,middle1,strlen(middle1));
 if(FetchImages)
    write(fd,"checked",7);
 write(fd,middle2,strlen(middle2));
 if(FetchFrames)
    write(fd,"checked",7);
 write(fd,tail,strlen(tail));
}


/*++++++++++++++++++++++++++++++++++++++
  Parse the reply from the form.

  int fd The file descriptor of the client.

  char *request The HTTP request sent by the browser.

  int is_online Set to true if we are online.
  ++++++++++++++++++++++++++++++++++++++*/

static void ParseFormRequest(int fd,char *request,int is_online)
{
 int i;
 char *body,*copy,*url=NULL,*method=NULL,*images="",*force="";

 if((body=strstr(request,"\n\n")) ||
    (body=strstr(request,"\r\n\n")) ||
    (body=strstr(request,"\n\r\n")) ||
    (body=strstr(request,"\r\n\r\n")))
   {
    body=strchr(body,'\n');
    body++;
    body=strchr(body,'\n');
    body++;
   }
 else
   {
    RefreshFormError(fd,NULL);
    return;
   }

 copy=(char*)malloc(strlen(body)+1);
 strcpy(copy,body);

 for(i=0;copy[i];i++)
   {
    if(i!=0 && copy[i-1]=='&')
       copy[i-1]=0;
    if(i==0 || copy[i-1]==0)
      {
       if(!strncmp("method=",&copy[i],7))
          method=&copy[i+7];
       if(!strncmp("images=",&copy[i],7))
          images=&copy[i+7];
       if(!strncmp("force=",&copy[i],6))
          force=&copy[i+6];
       if(!strncmp("url=",&copy[i],4))
          url=&copy[i+4];
      }
   }

 if(url==NULL || *url==0 || method==NULL || strchr(url,'+'))
   {
    RefreshFormError(fd,body);
    free(copy);
    return;
   }

 url=UrlDecode(url);

 if(!strncasecmp(url,"http://",7))
    url+=7;

 if(is_online)
    RefreshFormRecursive(fd,url,method);
 else
   {
    char *new_request,*new_url,*outgoing_filename=NULL;
    int new_outgoing;

    new_url=(char*)malloc(strlen(body)+16);
    sprintf(new_url,"/%s%s%s/%s",method,images,force,url);
    new_request=RequestURL(new_url,NULL);
    new_outgoing=OpenOutgoingSpoolFile(0);

    if(new_outgoing==-1)
       PrintMessage(Warning,"Cannot open the new outgoing request to write.");
    else
      {
       write(new_outgoing,new_request,strlen(new_request));
       outgoing_filename=CloseOutgoingSpoolFile(new_outgoing);

       WillGetURL(fd,url,outgoing_filename,0);
      }

    free(new_request);
   }

 free(copy);
}


/*++++++++++++++++++++++++++++++++++++++
  An error with the form.

  int fd The file descriptor.

  char *body The browser reply that the user entered.
  ++++++++++++++++++++++++++++++++++++++*/

static void RefreshFormError(int fd,char *body)
{
 char *head=
 "HTTP/1.0 404 WWWOFFLE Refresh Form Error\r\n"
 "Content-type: text/html\r\n"
 "\r\n"
 "<HTML>\n"
 "<HEAD>\n"
 "<TITLE>\n"
 "WWWOFFLE - Interactive Refresh Form Error\n"
 "</TITLE>\n"
 "</HEAD>\n"
 "<BODY>\n"
 "<H1 align=center>WWWOFFLE Interactive Refresh Form Error</H1>\n"
 "<p align=center>\n";
 char *middle1=
 "The reply from the form that your browser sent did not have a body.\n";
 char *middle2=
 "The reply from the form that your browser sent\n"
 "<br><b><tt>\n";
 char *middle3=
 "\n"
 "</tt></b><br>\n"
 "had an error and could not be parsed.\n";
 char *tail=
 "</BODY>\n"
 "</HTML>\n";

 write(fd,head,strlen(head));
 if(!body)
    write(fd,middle1,strlen(middle1));
 else
   {
    write(fd,middle2,strlen(middle2));
    write(fd,body,strlen(body));
    write(fd,middle3,strlen(middle3));
   }
 write(fd,tail,strlen(tail));
}


/*++++++++++++++++++++++++++++++++++++++
  Fetch pages recursively, and show progress.

  int fd The file descriptor to write into.

  char *url The URL to fetch.

  char *method The method to use.
  ++++++++++++++++++++++++++++++++++++++*/

static void RefreshFormRecursive(int fd,char *url,char *method)
{
 char *head=
 "HTTP/1.0 200 WWWOFFLE Refresh Recursive Page\r\n"
 "Content-type: text/html\r\n"
 "\r\n"
 "<HTML>\n"
 "<HEAD>\n"
 "<TITLE>\n"
 "WWWOFFLE - Refresh Recursive Page\n"
 "</TITLE>\n"
 "</HEAD>\n"
 "<BODY>\n"
 "<H1 align=center>WWWOFFLE Refresh Recursive Page</H1>\n"
 "Your requested URL\n"
 "<br><b><tt>\n";
 char *middle=
 "\n"
 "</tt></b><br>\n"
 "and the links to the specified depth are being fetched.\n"
 "<pre>\n";
 char *tail=
 "</pre>\n"
 "<a href=\"/refresh/\">[Return to the refresh page]</a>"
 "</BODY>\n"
 "</HTML>\n";

 write(fd,head,strlen(head));
 write(fd,url,strlen(url));
 write(fd,middle,strlen(middle));

 RecurseFetchPages(fd,url,method);

 write(fd,tail,strlen(tail));
}


/*++++++++++++++++++++++++++++++++++++++
  Inform the user that the specified refresh page is illegal.

  int fd The file descriptor to write to.

  char *path The specified path.
  ++++++++++++++++++++++++++++++++++++++*/

static void IllegalRefreshPage(int fd,char *path)
{
 char *string=(char*)malloc(strlen(path)+16);
 char *head=
 "HTTP/1.0 404 WWWOFFLE Illegal Refresh Page\r\n"
 "Content-type: text/html\r\n"
 "\r\n"
 "<HTML>\n"
 "<HEAD>\n"
 "<TITLE>\n"
 "WWWOFFLE - Illegal Interactive Refresh Page\n"
 "</TITLE>\n"
 "</HEAD>\n"
 "<BODY>\n"
 "<H1 align=center>WWWOFFLE Illegal Interactive Refresh Page</H1>\n"
 "<p align=center>\n"
 "Your request for the refresh URL\n"
 "<br><b><tt>\n";
 char *tail=
 "\n"
 "</tt></b><br>\n"
 "is illegal, select the link below for the main interactive refresh page.\n"
 "<br>\n"
 "<a href=\"/refresh/\">/refresh/</a>\n"
 "</BODY>\n"
 "</HTML>\n";

 write(fd,head,strlen(head));
 sprintf(string,"/%s",path);
 write(fd,string,strlen(string));
 write(fd,tail,strlen(tail));

 free(string);
}


static void add_page(char ***pages,int *npages,char *page);

/*++++++++++++++++++++++++++++++++++++++
  Fetch pages recursively.

  int fd The file descriptor to output to (mode==Real).

  char *url The url to start at.

  char *method The method to use, encoding the depth and other options.
  ++++++++++++++++++++++++++++++++++++++*/

static void RecurseFetchPages(int fd,char *url,char *method)
{
 int i;
 int recursive_depth=0,recursive_mode=0,recursive_images=0,recursive_frames=0,force_fetch=0;
 int npages=0,npages_recursive=1;
 char **pages=NULL;
 char *buffer=(char*)malloc(257);
 char *dash,*copy;

 if(*method=='-')
    method++;

 do
   {
    if((dash=strchr(method,'-')))
       *dash=0;

    if(!strcmp(method,"refresh"))
       ;
    else if(!strcmp(method,"none"))
       ;
    else if(!strcmp(method,"any"))
       recursive_mode=1;
    else if(!strcmp(method,"host"))
       recursive_mode=0;
    else if(!strcmp(method,"dir"))
       recursive_mode=-1;
    else if(!strcmp(method,"images"))
       recursive_images=1;
    else if(!strcmp(method,"frames"))
       recursive_frames=1;
    else if(!strcmp(method,"force"))
       force_fetch=1;
    else if(atoi(method))
       recursive_depth=atoi(method);
    else
       return;

    method=dash+1;
   }
 while(dash);

 copy=(char*)malloc(strlen(url)+8);
 sprintf(copy,"http://%s",url);
 add_page(&pages,&npages,copy);
 free(copy);

 for(i=0;i<npages;i++)
   {
    int socket;
    FILE *socket_f;
    int parsed=0;
    char *page_host,*page_path,*args;
    char *request,*msg;
    char **list;
    int j;

    if(recursive_depth && i==npages_recursive)
      {
       recursive_depth--;
       npages_recursive=npages;
      }

    socket=OpenClientSocket("localhost",HTTP_Port);
    if(socket==-1)
      {
       PrintMessage(Warning,"Cannot open connection to wwwoffle proxy.");
       continue;
      }

    socket_f=fdopen(socket,"r");
    if(!socket_f)
      {
       PrintMessage(Warning,"Cannot fdopen the server connection [%!s].");
       close(socket);
       continue;
      }

    msg=(char*)malloc(strlen(pages[i])+16);
    sprintf(msg,"Getting %s\n",pages[i]);
    write(fd,msg,strlen(msg));
    free(msg);

    request=RequestURL(pages[i],NULL);

    if(force_fetch)
      {
       char *copy=(char*)malloc(strlen(request)+24);
       char *eol=strchr(request,'\n');

       *eol=0;eol++;
       strcpy(copy,request);
       strcat(copy,"\nPragma: no-cache\r\n");
       strcat(copy,eol);

       free(request);
       request=copy;
      }

    write(socket,request,strlen(request));

    SplitURL(pages[i],&page_host,&page_path,&args);

    parsed=ParseHTML(socket_f,page_host,page_path);

    while(fread(buffer,1,256,socket_f)>0);

    if(recursive_images && parsed && (list=ListImages()))
       for(j=0;list[j];j++)
         {
          PrintMessage(Debug,"Image=%s",list[j]);

          add_page(&pages,&npages,list[j]);
         }

    if(recursive_frames && parsed && (list=ListFrames()))
       for(j=0;list[j];j++)
         {
          PrintMessage(Debug,"Frame=%s",list[j]);

          add_page(&pages,&npages,list[j]);
         }

    if(recursive_depth && parsed && (list=ListLinks()))
      {
       for(j=0;list[j];j++)
         {
          PrintMessage(Debug,"Link=%s",list[j]);

          if(recursive_mode==1)
             add_page(&pages,&npages,list[j]);
          else
            {
             char *link_host=NULL,*link_path,*args;

             SplitURL(list[j],&link_host,&link_path,&args);

             if(!strcmp(page_host,link_host))
               {
                if(recursive_mode==0)
                   add_page(&pages,&npages,list[j]);
                else
                  {
                   char *end=page_path+strlen(page_path);

                   while(end>page_path)
                      if(*end=='/')
                         break;
                      else
                         end--;
                   if(*end)
                      *++end=0;
                   if(end==page_path || !strncmp(page_path,link_path,end-page_path))
                      add_page(&pages,&npages,list[j]);
                  }
               }
            }
         }
      }

    fclose(socket_f);
    free(request);
   }

 free(buffer);
 for(i=0;i<npages;i++)
    free(pages[i]);
 free(pages);
}


/*++++++++++++++++++++++++++++++++++++++
  Add a page to the list of pages.

  char ***pages A pointer to the list of pages

  int *npages A pointer to the number of pages.

  char *page The page to add.
  ++++++++++++++++++++++++++++++++++++++*/

static void add_page(char ***pages,int *npages,char *page)
{
 int i;

 for(i=0;i<*npages;i++)
    if(!strcmp((*pages)[i],page))
       break;

 if(i==*npages)
   {
    if(!(*npages%8))
       *pages=realloc(*pages,(8+*npages)*sizeof(char*));

    (*pages)[*npages]=(char*)malloc(strlen(page)+1);
    strcpy((*pages)[*npages],page);
    (*npages)++;
   }
}
