/***************************************
  $Header: /home/amb/wwwoffle/RCS/refresh.c 1.14 1997/09/23 18:08:51 amb Exp $

  WWWOFFLE - World Wide Web Offline Explorer - Version 1.3.
  The HTML interactive page to refresh a URL.
  ******************/ /******************
  Written by Andrew M. Bishop

  This file Copyright 1997 Andrew M. Bishop
  It may be distributed under the GNU Public License, version 2, or
  any higher version.  See section COPYING of the GNU Public license
  for conditions under which this file may be redistributed.
  ***************************************/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <unistd.h>

#include "wwwoffle.h"
#include "config.h"
#include "sockets.h"
#include "errors.h"


static void RefreshFormPage(int fd,char *args);
static int  ParseFormRequest(int fd,char *request,int is_online);
static void RefreshFormError(int fd,char *body);
static int  RefreshFormRecursive(int fd,char *url,char *method);
static void IllegalRefreshPage(int fd,char *path);
static int  RecurseFetchPages(int fd,char *url,char *method);


/*++++++++++++++++++++++++++++++++++++++
  Send to the client a page to allow refreshes using HTML.

  char *RefreshPage Returns a modified URLs for a simple refresh.

  int fd The file descriptor of the client.

  char *path The path that was specified by the user.

  char *args The argument that was appended to the URL.

  char *url The url that was requested.

  char **request A pointer to the HTTP request sent by the browser.

  int is_online Set to true if we are online.
  ++++++++++++++++++++++++++++++++++++++*/

char *RefreshPage(int fd,char *path,char *args,char *url,char **request,int is_online)
{
 char *newurl=NULL;

 if(!strcmp("refresh/",path))
    RefreshFormPage(fd,args);
 else if(!strcmp("refresh-request/",path))
   {
    if(ParseFormRequest(fd,*request,is_online))
       newurl=(char*)1;
   }
 else if(!strncmp("refresh/",path,8))
   {
    char *copy,*u,*p;

    copy=(char*)malloc(strlen(*request)+1);
    p=strstr(*request,"refresh")+8;
    u=strstr(*request,url); *u=0;
    strcpy(copy,*request);
    strcat(copy,"http://");
    strcat(copy,p);
    free(*request);
    *request=copy;

    copy=(char*)malloc(strlen(url)+1);
    p=strstr(url,"refresh")+8;
    strcpy(copy,"http://");
    strcat(copy,p);
    free(url);
    newurl=copy;
   }
 else
   {
    char *slash=strchr(path,'/');

    if(slash)
      {
       if(is_online)
         {
          char *url=(char*)malloc(strlen(slash+1)+(args?strlen(args):0)+2);
          if(args)
             sprintf(url,"%s?%s",slash+1,args);
          else
             strcpy(url,slash+1);
          *slash=0;
          if(RecurseFetchPages(fd,url,path))
             newurl=(char*)1;
          free(url);
         }
       else
         {
          char *new_request=RequestURL(url,NULL);
          int new_outgoing=OpenOutgoingSpoolFile(0);

          if(new_outgoing==-1)
             PrintMessage(Warning,"Cannot open the new outgoing request to write.");
          else
            {
             write(new_outgoing,new_request,strlen(new_request));
             CloseOutgoingSpoolFile(new_outgoing);
            }

          free(new_request);
         }
      }
    else
       IllegalRefreshPage(fd,path);
   }

 return(newurl);
}


/*++++++++++++++++++++++++++++++++++++++
  The form that the user enters the details on.

  int fd The file descriptor.

  char *args The arguments that were on the request for this URL.
  ++++++++++++++++++++++++++++++++++++++*/

static void RefreshFormPage(int fd,char *args)
{
 char *head=
 "HTTP/1.0 200 WWWOFFLE Refresh Form\r\n"
 "Content-type: text/html\r\n"
 "\r\n"
 "<HTML>\n"
 "<HEAD>\n"
 "<TITLE>\n"
 "WWWOFFLE - Interactive Refresh Form\n"
 "</TITLE>\n"
 "</HEAD>\n"
 "<BODY>\n"
 "<H1 align=center>WWWOFFLE Interactive Refresh Form</H1>\n"
 "You can use this form to refresh or fetch any URL, either a single one or by following links recursively.\n"
 "<p>\n"
 "<form action=\"/refresh-request/\" method=post>\n"
 "Fetch <select name=\"method\">\n"
 "<option value=\"-none\" selected>this URL only\n"
 "<option value=\"-dir-1\"   >recursively to depth 1 in the same directory\n"
 "<option value=\"-dir-2\"   >recursively to depth 2 in the same directory\n"
 "<option value=\"-dir-3\"   >recursively to depth 3 in the same directory\n"
 "<option value=\"-dir-4\"   >recursively to depth 4 in the same directory\n"
 "<option value=\"-dir-5\"   >recursively to depth 5 in the same directory\n"
 "<option value=\"-host-1\"  >recursively to depth 1 on the same host\n"
 "<option value=\"-host-2\"  >recursively to depth 2 on the same host\n"
 "<option value=\"-host-3\"  >recursively to depth 3 on the same host\n"
 "<option value=\"-host-4\"  >recursively to depth 4 on the same host\n"
 "<option value=\"-any-1\"   >recursively to depth 1 on any host\n"
 "<option value=\"-any-2\"   >recursively to depth 2 on any host\n"
 "<option value=\"-any-3\"   >recursively to depth 3 on any host\n"
 "</select>\n"
 "<br>\n"
 "<input type=\"text\" name=\"url\" value=\"";
 char *middle1=
 "\"size=60>\n"
 "<br>\n"
 "<input name=\"images\" type=\"checkbox\" value=\"-images\" ";
 char *middle2=
 ">Fetch images in the pages\n"
 "<br>\n"
 "<input name=\"frames\" type=\"checkbox\" value=\"-frames\" ";
 char *tail=
 ">Fetch frames in the pages\n"
 "<br>\n"
 "<input name=\"force\" type=\"checkbox\" value=\"-force\">Force refresh even if already cached"
 "<br>\n"
 "<input type=\"submit\" value=\"Fetch Now\">\n"
 "</form>\n"
 "</BODY>\n"
 "</HTML>\n";

 write(fd,head,strlen(head));
 if(args)
    write(fd,args,strlen(args));
/*
   {
    char *copy=UrlDecode(args,0);
    write(fd,copy,strlen(copy));
    free(copy);
   }
*/
 write(fd,middle1,strlen(middle1));
 if(FetchImages)
    write(fd,"checked",7);
 write(fd,middle2,strlen(middle2));
 if(FetchFrames)
    write(fd,"checked",7);
 write(fd,tail,strlen(tail));
}


/*++++++++++++++++++++++++++++++++++++++
  Parse the reply from the form.

  int ParseFormRequest Returns a true value if there are more pages to get.

  int fd The file descriptor of the client.

  char *request The HTTP request sent by the browser.

  int is_online Set to true if we are online.
  ++++++++++++++++++++++++++++++++++++++*/

static int ParseFormRequest(int fd,char *request,int is_online)
{
 int i;
 int more=0;
 char *body,*copy,*url=NULL,*method=NULL,*total_method,*images="",*frames="",*force="";

 if((body=strstr(request,"\n\n")) ||
    (body=strstr(request,"\r\n\n")) ||
    (body=strstr(request,"\n\r\n")) ||
    (body=strstr(request,"\r\n\r\n")))
   {
    body=strchr(body,'\n');
    body++;
    body=strchr(body,'\n');
    body++;
   }
 else
   {
    RefreshFormError(fd,NULL);
    return(0);
   }

 copy=(char*)malloc(strlen(body)+1);
 strcpy(copy,body);

 for(i=0;copy[i];i++)
   {
    if(i!=0 && copy[i-1]=='&')
       copy[i-1]=0;
    if(i==0 || copy[i-1]==0)
      {
       if(!strncmp("method=",&copy[i],7))
          method=&copy[i+7];
       if(!strncmp("images=",&copy[i],7))
          images=&copy[i+7];
       if(!strncmp("frames=",&copy[i],7))
          frames=&copy[i+7];
       if(!strncmp("force=",&copy[i],6))
          force=&copy[i+6];
       if(!strncmp("url=",&copy[i],4))
          url=&copy[i+4];
      }
   }

 if(url==NULL || *url==0 || method==NULL)
   {
    RefreshFormError(fd,body);
    free(copy);
    return(0);
   }

 url=UrlDecode(url,1);

 if(!strncasecmp(url,"http://",7))
    url+=7;

 total_method=(char*)malloc(strlen(body));
 strcpy(total_method,"refresh");
 strcat(total_method,method);
 strcat(total_method,images);
 strcat(total_method,frames);
 strcat(total_method,force);

 if(is_online)
    more=RefreshFormRecursive(fd,url,total_method);
 else
   {
    char *new_request,*new_url,*outgoing_filename=NULL;
    int new_outgoing;

    new_url=(char*)malloc(strlen(body)+16);
    sprintf(new_url,"/%s/%s",total_method,url);
    new_request=RequestURL(new_url,NULL);
    new_outgoing=OpenOutgoingSpoolFile(0);

    if(new_outgoing==-1)
       PrintMessage(Warning,"Cannot open the new outgoing request to write.");
    else
      {
       write(new_outgoing,new_request,strlen(new_request));
       outgoing_filename=CloseOutgoingSpoolFile(new_outgoing);

       WillGetURL(fd,url,outgoing_filename,0);
      }

    free(new_request);
   }

 free(total_method);
 free(copy);

 return(more);
}


/*++++++++++++++++++++++++++++++++++++++
  An error with the form.

  int fd The file descriptor.

  char *body The browser reply that the user entered.
  ++++++++++++++++++++++++++++++++++++++*/

static void RefreshFormError(int fd,char *body)
{
 char *head=
 "HTTP/1.0 404 WWWOFFLE Refresh Form Error\r\n"
 "Content-type: text/html\r\n"
 "\r\n"
 "<HTML>\n"
 "<HEAD>\n"
 "<TITLE>\n"
 "WWWOFFLE - Interactive Refresh Form Error\n"
 "</TITLE>\n"
 "</HEAD>\n"
 "<BODY>\n"
 "<H1 align=center>WWWOFFLE Interactive Refresh Form Error</H1>\n"
 "<p align=center>\n";
 char *middle1=
 "The reply from the form that your browser sent did not have a body.\n";
 char *middle2=
 "The reply from the form that your browser sent\n"
 "<br><b><tt>\n";
 char *middle3=
 "\n"
 "</tt></b><br>\n"
 "had an error and could not be parsed.\n";
 char *tail=
 "</BODY>\n"
 "</HTML>\n";

 write(fd,head,strlen(head));
 if(!body)
    write(fd,middle1,strlen(middle1));
 else
   {
    write(fd,middle2,strlen(middle2));
    write(fd,body,strlen(body));
    write(fd,middle3,strlen(middle3));
   }
 write(fd,tail,strlen(tail));
}


/*++++++++++++++++++++++++++++++++++++++
  Fetch pages recursively, and show progress.

  int RefreshFormRecursive Returns a true value if there are more pages to get.

  int fd The file descriptor to write into.

  char *url The URL to fetch.

  char *method The method to use.
  ++++++++++++++++++++++++++++++++++++++*/

static int RefreshFormRecursive(int fd,char *url,char *method)
{
 int more;
 char *head=
 "HTTP/1.0 200 WWWOFFLE Refresh Recursive Page\r\n"
 "Content-type: text/html\r\n"
 "\r\n"
 "<HTML>\n"
 "<HEAD>\n"
 "<TITLE>\n"
 "WWWOFFLE - Refresh Recursive Page\n"
 "</TITLE>\n"
 "</HEAD>\n"
 "<BODY>\n"
 "<H1 align=center>WWWOFFLE Refresh Recursive Page</H1>\n"
 "Your requested URL\n"
 "<br><b><tt>\n";
 char *middle=
 "\n"
 "</tt></b><br>\n"
 "and the links to the specified depth are being fetched in the background.\n"
 "<pre>\n";
 char *tail=
 "</pre>\n"
 "<a href=\"/refresh/\">[Return to the refresh page]</a>"
 "</BODY>\n"
 "</HTML>\n";

 write(fd,head,strlen(head));
 write(fd,url,strlen(url));
 write(fd,middle,strlen(middle));

 more=RecurseFetchPages(fd,url,method);

 write(fd,tail,strlen(tail));

 return(more);
}


/*++++++++++++++++++++++++++++++++++++++
  Inform the user that the specified refresh page is illegal.

  int fd The file descriptor to write to.

  char *path The specified path.
  ++++++++++++++++++++++++++++++++++++++*/

static void IllegalRefreshPage(int fd,char *path)
{
 char *string=(char*)malloc(strlen(path)+16);
 char *head=
 "HTTP/1.0 404 WWWOFFLE Illegal Refresh Page\r\n"
 "Content-type: text/html\r\n"
 "\r\n"
 "<HTML>\n"
 "<HEAD>\n"
 "<TITLE>\n"
 "WWWOFFLE - Illegal Interactive Refresh Page\n"
 "</TITLE>\n"
 "</HEAD>\n"
 "<BODY>\n"
 "<H1 align=center>WWWOFFLE Illegal Interactive Refresh Page</H1>\n"
 "<p align=center>\n"
 "Your request for the refresh URL\n"
 "<br><b><tt>\n";
 char *tail=
 "\n"
 "</tt></b><br>\n"
 "is illegal, select the link below for the main interactive refresh page.\n"
 "<br>\n"
 "<a href=\"/refresh/\">/refresh/</a>\n"
 "</BODY>\n"
 "</HTML>\n";

 write(fd,head,strlen(head));
 sprintf(string,"/%s",path);
 write(fd,string,strlen(string));
 write(fd,tail,strlen(tail));

 free(string);
}


/*++++++++++++++++++++++++++++++++++++++
  Fetch pages recursively.

  int RecurseFetchPages Returns a true value if there are more pages to get.

  int fd The file descriptor to output to (mode==Real).

  char *url The url to start at.

  char *method The method to use, encoding the depth and other options.
  ++++++++++++++++++++++++++++++++++++++*/

static int RecurseFetchPages(int fd,char *url,char *method)
{
 int recursive_depth=0,recursive_mode=0,images=0,frames=0,force=0;
 int more=0,status;
 char *buffer;
 char *dash;
 int socket;
 FILE *socket_f;
 int parsed=0;
 char *page_host,*page_path,*args;
 char *request,*line;
 char **list;
 int j;

 PrintMessage(Debug,"Refresh method='%s'",method);

 if(*method=='-')
    method++;

 do
   {
    if((dash=strchr(method,'-')))
       *dash=0;

    if(!strcmp(method,"refresh"))
       ;
    else if(!strcmp(method,"none"))
       ;
    else if(!strcmp(method,"dir"))
       recursive_mode=1;
    else if(!strcmp(method,"host"))
       recursive_mode=2;
    else if(!strcmp(method,"any"))
       recursive_mode=3;
    else if(!strcmp(method,"images"))
       images=1;
    else if(!strcmp(method,"frames"))
       frames=1;
    else if(!strcmp(method,"force"))
       force=1;
    else if(atoi(method))
       recursive_depth=atoi(method);

    method=dash+1;
   }
 while(dash);

 buffer=(char*)malloc(strlen(url)+8);
 strcpy(buffer,"http://");
 strcat(buffer,url);
 url=buffer;

 /* Get the page */

 socket=OpenClientSocket("localhost",HTTP_Port);
 if(socket==-1)
   {
    PrintMessage(Warning,"Cannot open connection to wwwoffle proxy.");
    return(0);
   }

 socket_f=fdopen(socket,"r");
 if(!socket_f)
   {
    PrintMessage(Warning,"Cannot fdopen the server connection [%!s].");
    close(socket);
    return(0);
   }

 if(fd>=0)
   {
    char *msg=(char*)malloc(strlen(url)+16);
    sprintf(msg,"Getting %s\n",url);
    write(fd,msg,strlen(msg));
    free(msg);
   }

 request=RequestURL(url,NULL);

 if(force)
   {
    char *copy=(char*)malloc(strlen(request)+24);
    char *eol=strchr(request,'\n');

    *eol=0;eol++;
    strcpy(copy,request);
    strcat(copy,"\nPragma: no-cache\r\n");
    strcat(copy,eol);

    free(request);
    request=copy;
   }

 write(socket,request,strlen(request));

 SplitURL(url,&page_host,&page_path,&args);

 line=fgets_realloc(NULL,socket_f);
 if(sscanf(line,"%*s %d",&status)!=1)
    status=404;

 parsed=ParseHTML(socket_f,page_host,page_path);

 buffer=(char*)malloc(257);

 while(fread(buffer,1,256,socket_f)>0);

 fclose(socket_f);

 free(line);
 free(request);
 free(buffer);

 if(images && parsed && (list=ListImages()))
    for(j=0;list[j];j++)
      {
       char *new_request=RequestURL(list[j],url);
       int new_outgoing=OpenOutgoingSpoolFile(0);

       PrintMessage(Debug,"Image=%s",list[j]);

       if(new_outgoing==-1)
          PrintMessage(Warning,"Cannot open the new outgoing request to write.");
       else
         {
          write(new_outgoing,new_request,strlen(new_request));
          CloseOutgoingSpoolFile(new_outgoing);
          more=1;
         }

       free(new_request);
      }

 if(frames && parsed && (list=ListFrames()))
    for(j=0;list[j];j++)
      {
       char *new_request,*refresh;
       int recurse=1;
       int new_outgoing;
       char *link_host=NULL,*link_path,*args;

       SplitURL(list[j],&link_host,&link_path,&args);

       PrintMessage(Debug,"Frame=%s",list[j]);

       if(recursive_mode!=3)
         {
          if(strcmp(page_host,link_host))
             recurse=0;
          else
             if(recursive_mode!=2)
               {
                char *end=page_path+strlen(page_path);

                while(end>page_path)
                   if(*end=='/')
                      break;
                   else
                      end--;
                if(*end)
                   *++end=0;
                if(end!=page_path && strncmp(page_path,link_path,end-page_path))
                   recurse=0;
               }
         }

       if(recurse && status<300)
         {
          refresh=(char*)malloc(strlen(list[j])+64);

          strcpy(refresh,"/refresh");
          if(images)
             strcat(refresh,"-images");
          if(frames)
             strcat(refresh,"-frames");
          if(force)
             strcat(refresh,"-force");
          if(recursive_depth)
            {
             if(recursive_mode==1)
                strcat(refresh,"-dir");
             else if(recursive_mode==2)
                strcat(refresh,"-host");
             else /* recursive_mode==3 */
                strcat(refresh,"-any");
             sprintf(&refresh[strlen(refresh)],"-%d",recursive_depth);
            }
          if(!images && !frames && !recursive_depth && !force)
             strcat(refresh,"-none");
          strcat(refresh,"/");
          strcat(refresh,link_host);
          strcat(refresh,"/");
          if(*link_path=='/')
             strcat(refresh,link_path+1);
          else if(*link_path)
             strcat(refresh,link_path);
          if(args)
            {
             strcat(refresh,"?");
             strcat(refresh,args);
            }
         }
       else
          refresh=list[j];

       new_request=RequestURL(refresh,url);
       new_outgoing=OpenOutgoingSpoolFile(0);

       if(new_outgoing==-1)
          PrintMessage(Warning,"Cannot open the new outgoing request to write.");
       else
         {
          write(new_outgoing,new_request,strlen(new_request));
          CloseOutgoingSpoolFile(new_outgoing);
          more=1;
         }

       if(refresh!=list[j])
          free(refresh);
       free(new_request);
      }

 if(status<400 && recursive_depth && parsed && (list=ListLinks()))
    for(j=0;list[j];j++)
      {
       char *new_request,*refresh;
       int new_outgoing;
       char *link_host=NULL,*link_path,*args;

       SplitURL(list[j],&link_host,&link_path,&args);

       PrintMessage(Debug,"Link=%s",list[j]);

       if(recursive_mode!=3)
         {
          if(strcmp(page_host,link_host))
             continue;
          else
             if(recursive_mode!=2)
               {
                char *end=page_path+strlen(page_path);

                while(end>page_path)
                   if(*end=='/')
                      break;
                   else
                      end--;
                if(*end)
                   *++end=0;
                if(end!=page_path && strncmp(page_path,link_path,end-page_path))
                   continue;
               }
         }

       if(IsNotGotRecursive(link_host,link_path))
          continue;

       refresh=(char*)malloc(strlen(list[j])+64);

       strcpy(refresh,"/refresh");
       if(images)
          strcat(refresh,"-images");
       if(frames)
          strcat(refresh,"-frames");
       if(force)
          strcat(refresh,"-force");
       if(recursive_depth-1)
         {
          if(recursive_mode==1)
             strcat(refresh,"-dir");
          else if(recursive_mode==2)
             strcat(refresh,"-host");
          else /* recursive_mode==3 */
             strcat(refresh,"-any");
          sprintf(&refresh[strlen(refresh)],"-%d",recursive_depth-1);
         }
       if(!images && !frames && !(recursive_depth-1) && !force)
          strcat(refresh,"-none");
       strcat(refresh,"/");
       strcat(refresh,link_host);
       strcat(refresh,"/");
       if(*link_path=='/')
          strcat(refresh,link_path+1);
       else if(*link_path)
          strcat(refresh,link_path);
       if(args)
         {
          strcat(refresh,"?");
          strcat(refresh,args);
         }

       new_request=RequestURL(refresh,url);
       new_outgoing=OpenOutgoingSpoolFile(0);

       if(new_outgoing==-1)
          PrintMessage(Warning,"Cannot open the new outgoing request to write.");
       else
         {
          write(new_outgoing,new_request,strlen(new_request));
          CloseOutgoingSpoolFile(new_outgoing);
          more=1;
         }

       free(refresh);
       free(new_request);
      }

 return(more);
}
