/**************************************************************************
 *                                                                        *
 *               Copyright (C) 1995 Silicon Graphics, Inc.                *
 *                                                                        *
 *  These coded instructions, statements, and computer programs were      *
 *  developed by SGI for public use.  If any changes are made to this code*
 *  please try to get the changes back to the author.  Feel free to make  *
 *  modifications and changes to the code and release it.                 *
 *                                                                        *
 **************************************************************************/

/* ========================== BEWARE ==================================== */
/* =================== HERE BE CONRAD VEIDT ============================= */
/* The code herein is a horrible, ugly hack of code that originally was   *
 * a part of SGI's WebStone.  It has spent a goodly portion of it's life  *
 * in the Laboratory of Dr. Frankenstein, who unmercifully sliced & diced *
 * it the most unmentionable, despicable fashion.  In an attempt to remove*
 * this code from its befouled birthing pen, management has given it to   *
 * me, Dr. Caligari.  My somnambulist has been doing well, as I have been *
 * applying salves & ointments to heal its horrid wounds.  However, I     *
 * recognize my only mortal talents, and thus before your gaze rests      *
 * upon my Cesare,  be forewarned that your wits and faculties may        *
 * be so repulsed by the sight that you may never recover status quo      *
 * ante bellum.  Five cents, please, and you may enter the tent.          */
/* ========================== BEWARE ==================================== */
 
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>


#ifndef WIN32
#include <netdb.h>
#include <netinet/in.h>
#include <strings.h>
#include <sys/errno.h>
#include <sys/param.h>
#include <sys/socket.h>
#endif /* WIN32 */

#ifdef WIN32
#include <io.h>
#include <winsock.h>
#include <windows.h>
#endif /* WIN32 */

#ifdef SUNOS
#include <unistd.h>
#endif

#include "checksum.h"
#include "cookie.h"
#include "errexit.h"
#include "fetchpage.h"
#include "generic.h"
#include "parse.h"
#include "socket.h"
#include "sysdep.h"
#include "timefunc.h"
#include "webopts.h"


/* ====================================================== */
/* 
 * The extract_gif_url() digs through the haysack to try to find
 * an url that looks like a gif url.  If it finds one, it mallocs
 * some memory and copies the url into it.  If the url looks like 
 * a relative path, then 'basenmae' is pre-pended to it.  Calling
 * extract_gif_url() with a null haystack tells it to resume the
 * search where it last left off (just like 'strtok()')
 *
 * Note it returns malloced memory that must be freed after use.
 */
static char * gif_cursor = 0x0;

static char *
extract_gif_url (char * haystack, const char * basename)
{
   char *tail, *tmp;
   char *retval = 0x0;
   char delim;
   int rc;
   size_t len;

   if (haystack) gif_cursor = haystack; 
   if (!gif_cursor) return 0x0;

    /* start by searching for IMG */
   gif_cursor = strcasestr (gif_cursor, "IMG");
   while (gif_cursor) {
      gif_cursor += 3;

        /* skipping whitespace, we expect the three letters "SRC" to follow */
      gif_cursor += strspn (gif_cursor, " \t\v\r\n\f");
      rc = strncasecmp (gif_cursor, "SRC", 3);
      if (rc) {
         gif_cursor = strcasestr (gif_cursor, "IMG");
         continue;
      }

      gif_cursor += 3;

      /* now we expect to see an equals sign */
      gif_cursor += strspn (gif_cursor, " \t\v\r\n\f");
      if ('=' != *gif_cursor) {
         gif_cursor = strcasestr (gif_cursor, "IMG");
         continue;
      }
      gif_cursor ++;

      /* skip whitespace */
      gif_cursor += strspn (gif_cursor, " \t\v\r\n\f");

      /* we need to be able to handle the following cases:
       * IMG SRC=/some/where.gif>                  (no delimiter)
       * IMG SRC=/some/where.gif OTHERSTUFF=       (no delimiter)
       * IMG SRC="/some/where.gif"                 (match delimiters)
       * IMG SRC='/some/where.gif'                 (match delimiters)
       * IMG SRC=\"/some/where.gif\"               (match delimiters)
       * IMG SRC=\"" + javascriptvar + "\""        (not a valid url)
       * IMG SRC='" + javascriptvar + "'"          (not a valid url)
       */

      delim = 0x0;
      if ('\'' == *gif_cursor) { delim = '\''; gif_cursor ++; } else
      if ('\"' == *gif_cursor) { delim = '\"'; gif_cursor ++; } else
      if ('\\' == *gif_cursor) { delim = '\\'; gif_cursor +=2;} 

      if (0x0 == *gif_cursor) {gif_cursor = 0x0; return 0x0; }

      if (delim) {
         tail = strchr (gif_cursor, delim);
      } else {
         tail = strpbrk (gif_cursor, " >\t\v\r\n\f");
      }
       if (!tail) {
         gif_cursor = strcasestr (gif_cursor, "IMG");
         continue;
      }

      /* ok, by now, gif_cursor should point to the begining of a 
       * valid URL fragment, and tail should point to the end of it.
       * Make sure that it doesn't contain any garbage characters.
       * If it does contain garbage, then assume its some sort of 
       * Javascript thing, and discard it, and try again.
       * 
       * If the url looks relative, then prepend the basename to it.
       * A relative url looks like "./imagename" 
       */
      if ('/' != gif_cursor[0]) {
         if (('.' == gif_cursor[0]) && ('/' == gif_cursor[1])) gif_cursor +=2;
         len = tail - gif_cursor;
         retval = (char *) malloc (len+strlen(basename)+1);
         tmp = stpcpy (retval, basename);
         strncpy (tmp, gif_cursor, len);
         tmp[len] = 0x0;
      } else {
         len = tail - gif_cursor;
         retval = (char *) malloc (len+1);
         strncpy (retval, gif_cursor, len);
         retval[len] = 0x0;
      }

      tmp = strpbrk (retval, "\"\'\\ \t\v\r\n\f>");
      if (tmp) { 
         free (retval);
         gif_cursor = strcasestr (gif_cursor, "IMG");
         continue;
      }

      /* if we got to here, I think we have something valid */
      gif_cursor = tail;

      return (retval);
   }

   return 0x0; 
}


/* =============================================================== */
/* =============================================================== */
/* =============================================================== */
/* =============================================================== */
/* =============================================================== */
/* functions that manage cookie-like embedded handles in the web 
 * pages. 
 *
 * The extract_handles() routine scans the text, looking for the 
 * indicated handle, and grabs the value that follows it.
 * It is assumed that handles always follow a URL-encoded format, 
 * i.e. are of the form 'handle=value', with either & or whitespace
 * terminating the value.
 */

void
extract_handles (wlString &text, wlOpts &opts)
{
   /* if no handles to look for, do nothing */
   if ((!opts.handle_field) || (!opts.handle_value)) return;

   int nhandle = 0;
   while (opts.handle_field[nhandle]) 
   {
      char *handle = (char *) opts.handle_field[nhandle];
      size_t hlen = strlen (handle);
      char *hp = (char *) text;
      hp = strstr (hp, handle);
      while (hp) {
         hp += hlen;
         hp += strspn (hp, " \t\v\n\r");

         /* handle must be delimited by an equals sign */
         if ('=' == hp[0]) 
         {
            hp += strspn (hp, "= \t\v\n\r");

            /* Find the tail end of the value string.
              * terminators include quote marks and angle bracket so
              * that stuff embeded in an href will work; viz:
              * <a href="http://blob.com/blob?thing=xxx&handle=value">  */
            size_t tail = strcspn (hp, "&>\"\' \t\v\r\n");
            char tailv = hp[tail];
            hp[tail] = 0x0;  /* null-terminate */
   
            /* store the new value */
            if (opts.handle_value[nhandle]) free (opts.handle_value[nhandle]);
            opts.handle_value[nhandle] = strdup (hp);
            hp[tail] = tailv;

            hp += tail;
         }
         hp = strstr (hp, handle);
      }
      nhandle ++;
   }
}


/* the routine below does excessive string copying, it would 
 * be better replaced by a full-fledged url class. */

void 
url_replace_handles (wlString &url, char **handles, char **values)
{
   if ((!handles) || (!values)) return;

   wlURI uri;
   uri.uri = url;
   uri.Parse();

    // extract the url-encoded paramters out of the url
   char * enc = uri.path;
   enc = strchr (enc, '?');
   if (!enc) return;
   enc ++;

    wlURLEncoding ue;
    ue = enc;
   int didrepl = ue.ReplaceFields (handles, values);
   if (!didrepl) return;

   // splice together the new URL
   *enc = 0x0;
   wlString ns;
   ns = (char *) url;
   ns += ue;
   url = ns;
}

void 
body_replace_handles (wlString &body, char **handles, char **values)
{
   if ((!handles) || (!values)) return;

    wlURLEncoding ue;
    ue = body;
   int didrepl = ue.ReplaceFields (handles, values);
   if (!didrepl) return;

   // create a new body
   body = ue;
}

void linky (wlString &page, char *start, char*end)
{
   char save = *end;
   *end = 0x0;

   prt ("duude url is %s\n", start);


   *end = save;
}

/* =============================================================== */
/* =============================================================== */
/* =============================================================== */
/* =============================================================== */
/* =============================================================== */

wlPageTimer :: wlPageTimer (void)
{
   Clear ();
}

void
wlPageTimer :: Clear (void)
{
   // zero out the entire structure
   memset (this, 0, sizeof (wlPageTimer));

   INIT_RQSIZE (setup);
   INIT_RQSIZE (last);
   INIT_RQSIZE (gif);
}

/* initialize the printable names for statistics. .............. */
char * wlPageTimer::timer_names[NPTIMER_TIME_STRUCT] =
{
   "getpage_entry                   ",
   "getpage_exit                    ",
   "getpage_time                    ",
   "getpage_overhead                ",
   "sum_of_response_times           ",
   "sum_of_connect_times            ",
   "sum_of_header_delays            ",
   "sum_of_transfer_times           ",
   "sum_of_get_times                ",
   "sum_of_get_overhead_times       ",
   "sum_of_html_get_times           ",
   "sum_of_html_ovhd_times          ",
   "sum_of_html_response_times      ",
   "sum_of_gif_get_times            ",
   "sum_of_gif_ovhd_times           ",
   "sum_of_gif_response_times       ",
   "first_data_response_time        ",
   "sum_of_tcp_connect_times        ",
   "sum_of_ssl_connect_ovhd         ",
   "sum_of_ssl_net_delay_connect    ",
   "sum_of_ssl_header_ovhd          ",
   "sum_of_ssl_net_delay_header     ",
   "sum_of_ssl_transfer_ovhd        ",
   "sum_of_ssl_net_delay_transfer   ",
   "sum_of_html_net_delay           ",
   "sum_of_gif_net_delay            ",
   "sum_of_net_delay                ",     
   "sum_of_html_ssl_ovhd            ",
   "sum_of_gif_ssl_ovhd             ",
   "sum_of_ssl_ovhd                 ",   
   "end_to_end_time                 ",   
   "think_time                      ",   
   "request_time                    "   
};

/* =============================================================== */
/* =============================================================== */
/* =============================================================== */
/* =============================================================== */
/* =============================================================== */

wlFetchPage :: wlFetchPage (int nthreads)
{
   num_threads = nthreads;
   furl = new wlFetchURL [nthreads];
   wlThread **arr = new wlThread* [nthreads];
   for (int i=0; i<nthreads; i++) {
      arr[i] = &furl[i];
   }
   pool = new wlThreadPool (nthreads, arr);
   delete [] arr;
}

wlFetchPage :: ~wlFetchPage ()
{
   delete pool; pool = 0x0;
   delete [] furl; furl = 0x0;
   num_threads = 0;
}

void 
wlFetchPage :: ResetSessionCounts (void)
{
   for (int i=0; i<num_threads; i++)
   {
      furl[i].ResetSessionCounts();
   }
}

void 
wlFetchPage :: ResetNetTimes (void)
{
   memset(&TCPConnectTime,      0, sizeof(time_struct));
   memset(&SSLConnectOvhd,      0, sizeof(time_struct));
   memset(&SSLNetDelayConnect,  0, sizeof(time_struct));
   memset(&NetDelayHeader,      0, sizeof(time_struct));
   memset(&NetDelayTransfer,    0, sizeof(time_struct));
}


void 
wlFetchPage :: ClearCache (void)
{
   gif_cache.ClearCache();
   cookie_jar.ClearCache();

   for (int i=0; i<num_threads; i++) {
      furl[i].sock.Close();
   }
}

/* *************************************************************** */
/* getpage()                                                       */
/*     wrapper function for get that also handles:                 */
/*     (1) following of redirects                                  */
/*     (2) management of cookie cache                              */
/*     (3) fetching of gifs (and client side javascript)           */
/*                                                                 */
/* *************************************************************** */
int
wlFetchPage :: FetchPage (
        char *server, NETPORT port, 
        char *proxy_server, NETPORT proxy_port, 
        wlOpts & opts) 
{
   wlString current_method;
   wlString current_server;
   wlString current_url;
   char *start, *end;
   int  getrc;
   int  first_file = 1;
   time_struct tmptime1;
   time_struct gettime;
   time_struct firstdata;
   time_struct ovhd1_sum;
   time_struct connecttime;
   time_struct transfer;
   time_struct headerdelay;
   time_struct send_to_end;
   time_struct first_connect;

   time_struct stamp_entry;
   time_struct stamp_exit;

   int i;
   int pending_error_rc = 0;
   double dta, dtb;

   TIMESTAMP(&stamp_entry);

   /* initalize the page stats */
   ptimer.Clear();

   /* initialize running sums here. ..............................*/
   memset(&ovhd1_sum,0,sizeof(ovhd1_sum));
   ResetNetTimes ();
   ResetSessionCounts();

   /* set the http protocol version to use */
   for (i=0; i<num_threads; i++) {
      furl[i].request.version = opts.http_version;
   }

   /* make substitution for handles embedded in the request */
   url_replace_handles (request.url, opts.handle_field, opts.handle_value);
   body_replace_handles (request.body, opts.handle_field, opts.handle_value);

   /* Allow complex urls, such as ............................... */
   /* GET https://some.where.com/some/url.html .................. */
   /* to work correctly. ........................................ */
   if ((0 == strncasecmp (request.url, HTTP_STRING, strlen (HTTP_STRING))) ||
       (0 == strncasecmp (request.url, HTTPS_STRING, strlen (HTTPS_STRING))))
   {
      wlURI uri;
      uri.uri = request.url;

      uri.Parse ();

      if (!uri.host || !uri.portno || !uri.path )
      {
         PERR("can't parse complex URL \n"
            "\tURL was %s\n", (char *) request.url);
         return(GET_URL_ERROR);
      }

      /* make a local copy of the host, in case we get redirected... */
      current_server.Memcpy (uri.host, uri.host_len);

      port = uri.portno;
      opts.client_ssl_opts.use_ssl = uri.encrypt;

      /* make a local copy of the URL, in case we get redirected.... */
      if (uri.path_len >= MAXPATHLENGTH) return(GET_URL_LENGTH_ERROR);
      current_url.Memcpy (uri.path, uri.path_len);

   } else {

      /* make a local copy of the host & URL, in case we get redirected... */
      current_server = server;
      current_url = request.url;
   }

   /* make a local copy of the method, in case we get redirected. */
   current_method = request.method;

redirect_target:
   /* if we do get redirected, we start over again here.......... */
   /* *********************************************************** */
   /* handle cookie processing for the request                    */
   /* *********************************************************** */

   /* *********************************************************** */
   /* make the request                                            */
   /* *********************************************************** */
   PDBG("fetching %s\n", (char *)current_url);
   furl[0].ResetNetTimes();
   furl[0].request.method = current_method;
   furl[0].request.url = current_url;
   furl[0].request.header = request.header;
   char *got_cookie = (char *) cookie_jar.GetCookie (current_url);
   if (got_cookie && *got_cookie) 
   {
      furl[0].request.header += "Cookie: ";
      furl[0].request.header += got_cookie;
      furl[0].request.header += "\r\n";
   }
   if (0 == strcmp (current_method,"POST")) 
   {
      furl[0].request.body = request.body;
      PDBG1("POST data=%s\n", (char *)request.body);
   }
   else 
   {
      // if we got redirected from a POST to a GET, don't set up a body
      furl[0].request.body = 0x0;
   }

   furl[0].request.bug_compat = request.bug_compat;
   furl[0].www_server = current_server;
   furl[0].www_port = port;
   furl[0].proxy_server = proxy_server;
   furl[0].proxy_port = proxy_port;
   furl[0].keep_alive = opts.keep_alive;
   furl[0].use_ssl = opts.client_ssl_opts.use_ssl;
   furl[0].opts = &opts;

   getrc = furl[0].GoFetch ();

   addtime (&TCPConnectTime,      &furl[0].TCPConnectTime);
   addtime (&SSLNetDelayConnect,  &furl[0].SSLNetDelayConnect);
   addtime (&SSLConnectOvhd,      &furl[0].SSLConnectOvhd);
   addtime (&NetDelayHeader,      &furl[0].NetDelayHeader);
   addtime (&NetDelayTransfer,    &furl[0].NetDelayTransfer);

   if (first_file) 
   {
      /* save the first file connect time for first data response */
      /* time calculation ....................................... */
      first_connect = furl[0].beforeconnect;
      DT_PP("first_connect=",first_connect);
   }

   if (0>getrc) {
      if (!first_file && close_io && (GET_SIGHUP_ERROR == getrc)) {
         getrc = GET_SIGFIN_ERROR;
      }
      return(getrc);
   }

   first_file = 0;

   if ((HTTP_OK         != getrc) && 
       (HTTP_MOVED_PERM != getrc) &&
       (HTTP_MOVED_TEMP != getrc) &&
       (HTTP_AUTH_REQ   != getrc)) {
      PERR("Unexpected HTTP status code %d\n", getrc);
      return(getrc);
   }

   /* *********************************************************** */
   /* update statistics for the page due to this request          */
   /* *********************************************************** */

   ptimer.number_of_connects += furl[0].num_connects;
   ptimer.number_of_conn_reuses += furl[0].num_connection_reused;
   ptimer.number_of_reuse_fails += furl[0].num_failed_reuses;
   ACCUM_RQSIZE (ptimer.setup, furl[0].rqsize);

   /* gettime is the total time in the get subroutine. .......... */
   wl_difftime(&furl[0].exittime, &furl[0].entertime, &gettime);

   wl_difftime(&furl[0].afterconnect, &furl[0].beforeconnect, &connecttime);

   /* headerdelay is the time from send to header arrival ....... */
   wl_difftime(&furl[0].afterheader, &furl[0].beforesend, &headerdelay);

   /* body transfer time is time from header to end of messge.... */
   wl_difftime(&furl[0].afterbody, &furl[0].afterheader, &transfer);

   /* send_to_end time is time from request send to end of message*/
   /* this is used to calculate SSL overhead times during msg xfer*/
   wl_difftime(&furl[0].afterbody, &furl[0].beforesend, &send_to_end);
   DT_PP("getpage: send_to_end:",send_to_end);

    /* various overheads */
   time_struct ovhd0;
   time_struct ovhd1;
   time_struct ovhd2;
   wl_difftime (&furl[0].beforeconnect, &furl[0].entertime, &ovhd0);
   wl_difftime (&furl[0].beforesend, &furl[0].afterconnect, &ovhd1);
   wl_difftime (&furl[0].exittime,   &furl[0].afterbody,    &ovhd2);
   addtime(&ovhd1_sum, &ovhd1);

   /* add the times into the running totals in the page structure.*/
   addtime(&ptimer.sum_of_connect_times, &connecttime);
   addtime(&ptimer.sum_of_get_overhead_times,&ovhd0);
   addtime(&ptimer.sum_of_get_overhead_times,&ovhd1);
   addtime(&ptimer.sum_of_get_overhead_times,&ovhd2);
   addtime(&ptimer.sum_of_get_times,&gettime);
   addtime(&ptimer.sum_of_header_delays,&headerdelay);
   addtime(&ptimer.sum_of_transfer_times,&transfer);

   /* *********************************************************** */
   /* handle cookie processing on the response                    */
   /* *********************************************************** */
   start = strcasestr(furl[0].reply.header, SET_COOKIE);
   if(start) 
   {
      /* AddCookie takes all cookies from the messagebuffer...*/
      cookie_jar.AddCookie (furl[0].reply.header, current_url);
   } 
   else 
   {
      /* Were we asking for a path that should return a cookie?  */
      /* If so, and we didn't get one back, its an error.        */
      /* However, for debugging purposes, we won't find out why  */
      /* we didn't get a cookie until we follow the redirect     */
      /* chain.  So what we do is flag this as a "pending error" */
      /* and then continue. .................................... */

      char * cpath = opts.path_requires_cookie.OccursIn (current_url);
      if (cpath)
      {
         PERR("Cookie! Cookie!  Want Cookie! Want Cookie!\n"
             "\tNo cookie received for page %s\n", cpath);
         if (0 == pending_error_rc) 
         {
            pending_error_rc = GET_MISSING_COOKIE;
         }
         else 
         {
            /* some other error was already pending, so give up*/
            return(pending_error_rc);
         }
      }
   }

   /* *********************************************************** */
   /* did we get a redirect response?  If so, go fetch that       */
   /* *********************************************************** */
   if ((HTTP_MOVED_PERM == getrc) ||
       (HTTP_MOVED_TEMP == getrc))
    {
      size_t len = 0;
      PDBG ("begin processing redirect \n");
      ptimer.number_of_redirects++;

      /* get the redirect location out of the header. .......... */
      start = furl[0].reply.header.GetValue ("Location");
      if(start) {

         /* find the end of the location */
         /* Note that some cgi-bins (especially netscape        */
         /* live-wire serverlets) return garbage at the end of  */
         /* the location string, such as cookies delimited with */
         /* semicolons tabs, whitespace, etc.  So strip these   */
         /* out from the tail-end.  ........................... */
            
         end = strpbrk (start, "; \t\v\r\n\f");
         len = end - start;

         /* if the new location starts with "http:", then we    */
         /* may have to go to a new server.....  pull out the   */
         /* server name and update it. ........................ */
         /* if SSL is being used, then the new location will    */
         /* start with https:. ................................ */
         /* if this really is a new server, then we will update */
         /* the global webserver name, global web port number,  */
         /* redo address resolution so that from now one we will*/
         /* use the new location (until we get redirected again)*/
         /* (can get a redirect with a host name that is the    */
         /* same as the host we are currently connected to) ... */
         if (((0 == opts.client_ssl_opts.use_ssl) && 
              (strncasecmp(start, "http://",strlen("http://"))==0))
            || (strncasecmp(start, "https://",strlen("https://")) == 0)) 
         {
            wlURI uri;

            uri.uri = start;
            uri.Parse();

            if (!uri.host || !uri.portno || !uri.path )
            {
               PERR("can't parse location string \n"
                  "\tLocation was %s\n", start);
               return(GET_LOCATION_ERROR2);
            }

            /* make a local copy of the host, in case we get redirected. */
            current_server.Memcpy (uri.host, uri.host_len);

            port = uri.portno;

            start = uri.path;
            len = uri.path_len;

            /* ************************************************* */
            /* was this a REAL server redirect?  That is, did the*/
            /* server name or port number really change?  (we can*/
            /* get what looks like a full server redirect, but in*/
            /* actuality, it just points at a page on the server */
            /* we were talking to, anyway) so check to see if it */
            /* really changed or not. .......................... */
            /* ************************************************* */
            /* Note that the first time we get redirected, its   */
            /* probably to the same server; yet, the name        */
            /* changes from a partially to a fully qualified     */
            /* domain name.  Accept that and move on.            */
            if ((0 != strcasecmp(current_server, opts.webserver)) 
               || (opts.web_portnum != port)) 
            {
               /* by golly, we were redirected to a new server  */
               /* name or port number. update globals.........  */
               opts.web_portnum = port;
               opts.webserver = current_server;
            }
         }

         /* if the new location doesn't start with a /, it is a */
         /* relative redirect and we have to prepend the current*/
         /* directory and file information......................*/
         if ('/' != *start) 
         {
            /* find the very last / in the current url, and    */
            /* strip out everything that comes after it        */
            /* but first, trash any bogus ?&=#% in the url     */
            char *curl = (char *) current_url;
            size_t tmplen = strcspn (curl, "?&=#%");
            curl[tmplen] = 0x0;
            char *basetail = strrchr (curl, '/');
            basetail ++;
            tmplen = basetail - curl;

            wlString newurl;
            newurl.Memcpy (curl, tmplen);
            newurl.Memcat (start, len);
            current_url = newurl;
         } else {
            /*copy the new location into the current_url string*/
            current_url.Memcpy (start, len);
         }

      } else {

         PERR("Got redirected, but couldn't find 'Location:' in header.\n");
         return(GET_LOCATION_ERROR);

      }

      /* change the method to GET. ............................. */
      current_method = "GET";

      /* and go fetch the redirected page. ......................*/
      PDBG("redirected to url %s\n", (char *) current_url);

      /* indicate that this is not the first file we have fetched*/
      first_file = 0;
      goto redirect_target;
   }

   /* *********************************************************** */
   /* we got a "final" page                                       */
   /* *********************************************************** */

   /* make a copy of the final page ... */
   reply = furl[0].reply;

   /* *********************************************************** */
   /* build the referer string for the next request. ............ */
   /* *********************************************************** */
   wlString path_part_of_url = request.url;
   char * qmark = strchr (request.url,'?');
   if (qmark) {
      path_part_of_url[qmark-(char *)request.url] = '\0';
   }

   wlString rurl;
   rurl = (opts.client_ssl_opts.use_ssl) ? "https://" : "http://";
   rurl += opts.webserver;
   char ports[10]; sprintf (ports, ":%hu", opts.web_portnum);
   rurl += ports;
   rurl += path_part_of_url;
   /* NB referer is mispleled but that's what the spec wants ... */
   request.header.ReplaceOrAddField ("Referer", rurl);

   /* *********************************************************** */
   /* record html statistics for the page. ...................... */
   /* *********************************************************** */
   /* *********************************************************** */
   /* response time is the amount of time spend in the get routine*/
   /* minus the overhead spent in that routine. ................. */
   /* *********************************************************** */
   ptimer.sum_of_html_get_times = ptimer.sum_of_get_times;
   ptimer.sum_of_html_ovhd_times = ptimer.sum_of_get_overhead_times;
   wl_difftime(&ptimer.sum_of_html_get_times,
         &ptimer.sum_of_html_ovhd_times,
         &ptimer.sum_of_html_response_times);

   /* html_net_delay is the sum of the Delays to this point ......*/
   ptimer.sum_of_html_net_delay = TCPConnectTime;
   addtime(&ptimer.sum_of_html_net_delay,&SSLNetDelayConnect);
   addtime(&ptimer.sum_of_html_net_delay,&NetDelayHeader);
   addtime(&ptimer.sum_of_html_net_delay,&NetDelayTransfer);

   INIT_RQSIZE (ptimer.last);
   ACCUM_RQSIZE (ptimer.last, furl[0].rqsize);

   DT_PP1("sum_of_html_get_times",ptimer.sum_of_html_get_times);
   DT_PP1("sum_of_html_ovhd_times",ptimer.sum_of_html_ovhd_times);
   DT_PP1("sum_of_html_response_times",ptimer.sum_of_html_response_times);

   /* *********************************************************** */
   /* firstdata response time is the time from connect until the  */
   /* first message containing HTML is received. ................ */
   /* *********************************************************** */
   DT_PP("furl.firstdata=", furl[0].firstdata);
   wl_difftime(&furl[0].firstdata,&first_connect,&firstdata);
   DT_PP("firstdata response time (noadj):",firstdata);
   /* subtract out the getpage overhead between afterconnect and  */
   /* before header. ............................................ */
   wl_difftime(&firstdata,&ovhd1_sum,&firstdata);
   ptimer.first_data_response_time = firstdata;
   DT_PP("firstdata response time (adj):",firstdata);

   /* calculate the simple checksum we use for error detection... */
   /* (unless this has been turned off for this run) ............ */
   if (! (opts.ignore_checksum_errors)) {
      simple_check_sum = calc_simple_check_sum(furl[0].reply.message, cs_info);
   }

   /* *********************************************************** */
   /* scan the page and the header for embedded handles;          */
   /* update our cached values for these handles.                 */
   /* *********************************************************** */
   extract_handles (furl[0].reply.message, opts);

/* xxxxxxxxxxxx */
/*
we've got this disabled for now, but it is an opportunity to scan the
web page for links, and grab anything out of it that we might need.
Its also an opportunity to blank out portions of the page so that
we can compute a more stable checksum.

char *msg = (char *) furl[0].reply.message;
int msglen = furl[0].reply.message.Memlen();
wl_scan_for_links (furl[0].reply.message, msg, msglen, "href", "=", NULL,
linky);
*/

   /* *********************************************************** */
   /* handle fetching gifs if required. ........................  */
   /* *********************************************************** */
   if (opts.fetch_gifs) 
   {
      char basename[MAXPATHLEN];
      char * pimg;
      size_t len;

      /* *************************************************** */
      /* In case we do relative addressing for the gifs,     */
      /* figure out the base name that addressing is         */
      /* relative to.  Note that the base name ends with the */
      /* last slash in the url.  However, be careful with    */
      /* slashes embedded in method=GET urls; trim the       */
      /* non-path part i.e. trim & ? = # % etc.............. */
      /* *************************************************** */
      /* put the base name into basename.................... */

      strcpy (basename, current_url);
      len = strcspn (basename, "?&=#%");
      basename[len] = 0x0;
      end = strrchr(basename, '/');
      if (end) {end ++; *end = 0x0; }

      PDBG1("gif basename=%s\n", basename);

      /* ***************************************** */
      /* create a list of the (unique) image files */
      /* ***************************************** */
      /* ***************************************** */
      /* Note well: this code will also fetch the  */
      /* client side javascript since this is also */
      /* flagged by an SRC= tag. ................. */
      /* ***************************************** */
      wlCache images_to_fetch;
      pimg = extract_gif_url (furl[0].reply.message, basename);
      while (pimg) {

         PDBG1("found image %s \n", pimg);
         if (opts.do_cache_emulation) {
            if (gif_cache.AddToCache(pimg)) {
               // cache-miss, will need to get this image
               images_to_fetch.PutInCache (pimg);
            } else {
               // record a cache-hit statistic.
               ptimer.num_gifs_cached ++;
               free (pimg);
            }
         } else {
            images_to_fetch.PutInCache (pimg);
         }

         pimg = extract_gif_url (NULL, basename);
      }

      PDBG1("found %d unique images on page\n",
         images_to_fetch.NumEntries(), (char *) current_url);

      for(i=0; i<images_to_fetch.NumEntries(); i++) {
         PDBG1("images_to_fetch[%d]=%s\n",
            i,images_to_fetch.GetEntry (i));
      }

      /* ***************************** */
      /* Now go get each of the images */
      /* ***************************** */

      for (i=0; i<num_threads; i++) 
      {
         furl[i].ResetNetTimes();
         furl[i].ResetSessionCounts();
         furl[i].status = HTTP_OK;
         furl[i].www_server = 0;
         furl[i].www_port = 0;
      }


#define CHECK_STATUS(urp) {					\
   if ( (0 > (urp)->status) || (HTTP_OK != (urp)->status)) {	\
      if (!close_io) {						\
         PERR("unexpected HTTP status code %d "			\
            "while fetching %s\n", 				\
            (urp)->status, (char *) ((urp)->request.url));	\
      }								\
      if (close_io && (GET_SIGHUP_ERROR == (urp)->status)) {	\
         (urp)->status = GET_SIGFIN_ERROR;			\
      }								\
      (urp)->NeverMind();					\
      pool->JoinAll();						\
      for (int j=0; j<num_threads; j++) 			\
      {								\
         if (furl[j].www_server) free(furl[j].www_server);	\
         furl[j].www_server = 0;				\
         furl[j].www_port = 0;					\
      }								\
      return ((urp)->status);					\
   }								\
}


#define UPDATE_STATS(urp) {						\
   /* update statistics. */						\
   addtime (&TCPConnectTime,      &(urp)->TCPConnectTime);		\
   addtime (&SSLNetDelayConnect,  &(urp)->SSLNetDelayConnect);		\
   addtime (&SSLConnectOvhd,      &(urp)->SSLConnectOvhd);		\
   addtime (&NetDelayHeader,      &(urp)->NetDelayHeader);		\
   addtime (&NetDelayTransfer,    &(urp)->NetDelayTransfer);		\
									\
   ptimer.num_gifs_fetched++;						\
   ACCUM_RQSIZE (ptimer.gif, (urp)->rqsize);				\
									\
   /* various overheads */						\
   wl_difftime (&(urp)->beforeconnect, &(urp)->entertime, &ovhd0);	\
   wl_difftime (&(urp)->beforesend, &(urp)->afterconnect, &ovhd1);	\
   wl_difftime (&(urp)->exittime,   &(urp)->afterbody,    &ovhd2);	\
									\
   /* gettime is the total time in the get subroutine. .......... */	\
   wl_difftime(&(urp)->exittime, &(urp)->entertime, &gettime);		\
									\
   wl_difftime(&(urp)->afterconnect, &(urp)->beforeconnect, &connecttime);\
									\
   /* firstdata arrival time is time from connect to firstdata... */	\
   wl_difftime(&(urp)->firstdata, &(urp)->beforeconnect, &firstdata);	\
   /* minus the overhead time from afterconnect to beforesend.... */	\
   wl_difftime(&firstdata, &ovhd1, &firstdata);				\
									\
   /* headerdelay is the time from send to header arrival ....... */	\
   wl_difftime(&(urp)->afterheader, &(urp)->beforesend, &headerdelay);	\
									\
   /* body transfer time is time from header to end of messge.... */	\
   wl_difftime(&(urp)->afterbody, &(urp)->afterheader, &transfer);	\
									\
   /* start to end time is time from send request to end of message */	\
   /* this is used to calculate SSL overhead times during msg xfer */	\
   wl_difftime(&(urp)->afterbody, &(urp)->beforesend, &send_to_end);	\
   DT_PP("getpage(): gifs send_to_end:",send_to_end);			\
									\
   /* add the times into the running totals in the page structure.*/	\
   addtime(&ptimer.sum_of_connect_times,&connecttime);			\
   addtime(&ptimer.sum_of_get_overhead_times,&ovhd0);			\
   addtime(&ptimer.sum_of_get_overhead_times,&ovhd1);			\
   addtime(&ptimer.sum_of_get_overhead_times,&ovhd2);			\
   addtime(&ptimer.sum_of_get_times,&gettime);				\
   addtime(&ptimer.sum_of_header_delays,&headerdelay);			\
   addtime(&ptimer.sum_of_transfer_times,&transfer);			\
}

      // Each image is fetched by dispatching it to a separate thread.
      // Each thread is one copy of wlFetchURL class, which contains
      // one keep-alive socket.  The threads are yanked out of a pool
      // of idle threads.
      for(i=0; i<images_to_fetch.NumEntries(); i++) 
      {
         // get the next available worker 
         wlFetchURL *worker = (wlFetchURL *) pool->GetThread();

         // Did this worker have an error condition from 
         // the last time it ran?
         if (!worker) return GET_THREAD_ERROR;
         CHECK_STATUS (worker);

         // Does this worker have timing stats from a previous 
         // run we haven't yet collected?
         if (worker->timer_ran) UPDATE_STATS (worker);

         // figure out the server, the file path, etc to the image
         wlURI uri;
         uri.uri = images_to_fetch.GetEntry (i);
         uri.path = uri.uri;
         uri.host = current_server;
         uri.host_len = current_server.Memlen();
         uri.portno = port;
         uri.encrypt = opts.client_ssl_opts.use_ssl;
         uri.Parse();

         PDBG("fetching %s\n", uri.path);

         // set up the server hostname
         if (worker->www_server) free(worker->www_server);
         worker->www_server = (char *) malloc (uri.host_len +1);
         strncpy (worker->www_server, uri.host, uri.host_len);
         worker->www_server[uri.host_len] = 0x0;

         worker->ResetNetTimes();
         worker->request.method = "GET";
         worker->request.url = uri.path;
         worker->request.header = request.header;

         char *got_cookie = (char *) cookie_jar.GetCookie (uri.path);
         if (got_cookie && *got_cookie) 
         {      
            worker->request.header += "Cookie: ";
            worker->request.header += got_cookie;
            worker->request.header += "\r\n";
         }
         worker->request.body = 0;
         worker->request.bug_compat = request.bug_compat;
         worker->www_port = uri.portno;
         worker->proxy_server = proxy_server;
         worker->proxy_port = proxy_port;
         worker->keep_alive = opts.keep_alive;
         worker->use_ssl = uri.encrypt;
         worker->opts = &opts;

         worker->Go();
      }

      // wait for all workers to complete
      pool -> JoinAll();

      PDBG ("all image threads just finished\n");
      // collect up any remaing stats the workers have
      for (i=0; i<num_threads; i++) 
      {
         CHECK_STATUS (&furl[i]);

         if (furl[i].timer_ran) UPDATE_STATS (&furl[i]);
         furl[i].status = HTTP_OK;
         if (furl[i].www_server) free(furl[i].www_server);
         furl[i].www_server = 0;
         furl[i].www_port = 0;

         ptimer.number_of_connects += furl[i].num_connects;
         ptimer.number_of_conn_reuses += furl[i].num_connection_reused;
         ptimer.number_of_reuse_fails += furl[i].num_failed_reuses;
         furl[i].ResetNetTimes();
         furl[i].ResetSessionCounts();
      }

   }
   TIMESTAMP(&stamp_exit);

   /* convert PPC timebase to secs & nanoseconds................. */
   CONVERTTIME (&stamp_entry);
   CONVERTTIME (&stamp_exit);

   DT_PP1("Getpage Myentry", stamp_entry);
   DT_PP1("Getpage MyExitTime", stamp_exit);

   /* ***************************************************************** */
   /* calculate time spent fetching gifs. ............................. */
   /* ***************************************************************** */
   wl_difftime(&ptimer.sum_of_get_times,&ptimer.sum_of_html_get_times,
               &ptimer.sum_of_gif_get_times);
   wl_difftime(&ptimer.sum_of_get_overhead_times,&ptimer.sum_of_html_ovhd_times,
               &ptimer.sum_of_gif_ovhd_times);
   wl_difftime(&ptimer.sum_of_gif_get_times,&ptimer.sum_of_gif_ovhd_times,
               &ptimer.sum_of_gif_response_times);

   /* ***************************************************************** */
   /* calculate the time spent in skREAD/skWRITE while fetching page. . */
   /* ***************************************************************** */
   ptimer.sum_of_net_delay = TCPConnectTime;
   addtime(&ptimer.sum_of_net_delay,&SSLNetDelayConnect);
   addtime(&ptimer.sum_of_net_delay,&NetDelayHeader);
   addtime(&ptimer.sum_of_net_delay,&NetDelayTransfer);

   /* ***************************************************************** */
   /* time spent in skREAD/skWrite fetching gifs is excess of this sum  */
   /* above the time spent in skREAD/skWRITE fetching html. ........... */
   /* ***************************************************************** */
   wl_difftime(&ptimer.sum_of_net_delay,&ptimer.sum_of_html_net_delay,
          &ptimer.sum_of_gif_net_delay);

   /* ****************************************************************** */
   /* Record the total elapsed time for this page.  It runs from before  */
   /* the begining of the first request, to after the last byte of the   */
   /* last gif.  Note that this value should be almost exactly equal the */
   /* end_to_end_time statistic, to within a microsecond or so.  These   */
   /* two statistics measure the same thing, its just that the other one */
   /* is taken slightly higher up in the code.                           */
   /* ****************************************************************** */
   wl_difftime(&stamp_exit, &stamp_entry, &tmptime1);
   ptimer.getpage_entry = stamp_entry;
   ptimer.getpage_exit  = stamp_exit;
   ptimer.getpage_time  = tmptime1;

   /* ****************************************************************** */
   /* XXX this is wrong/not applicabile to multi-threaded fetching       */
   /* now calculate the overhead times for the getpage subroutine itself:*/
   /*    getpage_overhead = total time in getpage - sum_of_get_times. .. */
   /* XXX hack alert the above formula is fundamentally incorrect when   */
   /* gifs are fetched in parallel.                                      */
   /* ****************************************************************** */
   // This does not give the correct getpage overhead, since the get_times
   // are collected as if the gifs are fetched serially, and they are not,
   // they're fetched in parallel.  
   // wl_difftime(&ptimer.getpage_time, &ptimer.sum_of_get_times,
   //         &ptimer.sum_of_getpage_overhead);


   /* ****************************************************************** */
   /* pick up statistics values calculated below us. ................... */
   /* ****************************************************************** */
   ptimer.sum_of_tcp_connect_times      = TCPConnectTime;
   ptimer.sum_of_ssl_connect_ovhd       = SSLConnectOvhd;
   ptimer.sum_of_ssl_net_delay_connect  = SSLNetDelayConnect;
   ptimer.sum_of_ssl_net_delay_header   = NetDelayHeader;
   ptimer.sum_of_ssl_net_delay_transfer = NetDelayTransfer;

   /* ***************************************************************** */
   /* calculate SSL overhead times .................................... */
   /* ***************************************************************** */
   wl_difftime(&ptimer.sum_of_header_delays,   
          &NetDelayHeader,
          &ptimer.sum_of_ssl_header_ovhd);
   wl_difftime(&ptimer.sum_of_transfer_times,   
          &NetDelayTransfer,
            &ptimer.sum_of_ssl_transfer_ovhd);

   /* ***************************************************************** */
   /* calculate the overall page response time. ....................... */
   /* sum_of_response_times = sum_of_get_times - sum_of_get_overhead_times */
   /* ***************************************************************** */
   wl_difftime(&ptimer.sum_of_get_times,&ptimer.sum_of_get_overhead_times,
            &ptimer.sum_of_response_times);

   /* ****************************************************************** */
   /* calc SSL overhead : response times minus time spent in skREAD/WRITE*/
   /* ****************************************************************** */
   wl_difftime(&ptimer.sum_of_response_times,&ptimer.sum_of_net_delay,
          &ptimer.sum_of_ssl_ovhd);
   wl_difftime(&ptimer.sum_of_html_response_times,&ptimer.sum_of_html_net_delay,
          &ptimer.sum_of_html_ssl_ovhd);
   wl_difftime(&ptimer.sum_of_gif_response_times,&ptimer.sum_of_gif_net_delay,
          &ptimer.sum_of_gif_ssl_ovhd);

   /* ****************************************************************** */
   /* check invariant:                                                   */
   /* sum_of_response_times = sum_of_connect_times+sum_of_header_delays+ */
   /*                            sum_of_xfer_times. .................... */
   /* ****************************************************************** */
   tmptime1 = ptimer.sum_of_connect_times;
   addtime(&tmptime1,&ptimer.sum_of_header_delays);
   addtime(&tmptime1,&ptimer.sum_of_transfer_times);

   dta = timevaldouble(&tmptime1);
   dtb = timevaldouble(&ptimer.sum_of_response_times);
   if (! FEQ (dta,dtb)) {
      PERR("Timing invariant check failed AAA \n");
      perr("\tsum_of_response_times (lhs)",ptimer.sum_of_response_times);
      perr("\tsum of rhs",tmptime1);
   }

   /* ***************************************************************** */
   /* check the invariant: sum_of_response_times=sum_of_html_response_times+*/
   /*                           sum_of_gif_response_times               */
   /* ***************************************************************** */
   tmptime1 = ptimer.sum_of_html_response_times;
   addtime(&tmptime1,&ptimer.sum_of_gif_response_times);
   dta = timevaldouble(&tmptime1);
   dtb = timevaldouble(&ptimer.sum_of_response_times);
   if (! FEQ(dta,dtb)) {
      PERR("Timing invariant check failed BBB \n");
      perr("\tsum_of_response_times (lhs)",ptimer.sum_of_response_times);
      perr("\tsum of rhs",tmptime1);
   }

   /* **************************************************************** */
   /* if we had an error that we'd like to report, but not stop        */
   /* fetching of redirected pages, we made it a pending error.        */
   /* Report it now, if needed.  ......................................*/
   /* **************************************************************** */
   PDBG1("returns.  pending_error_rc=%d\n",pending_error_rc);
   return (pending_error_rc);
}

/* ======================== END OF FILE ======================= */
