W               [ \t\r\n]
Q               [\"\']
F               [-a-z0-9$_.!*(),%;/?:@&=+~|]

%x ANGLE_START ANGLE
%x ANCHOR ANCHOR_HREF
%x BASE BASE_HREF
%x COMMENT

%{
/***************************************
  $Header: /home/amb/wwwoffle/RCS/htmlmodify.l 1.2 1999/03/13 16:55:23 amb Exp $

  WWWOFFLE - World Wide Web Offline Explorer - Version 2.4c.
  Parse the HTML and modify the source.
  ******************/ /******************
  Written by Andrew M. Bishop

  This file Copyright 1997,98,99 Andrew M. Bishop
  It may be distributed under the GNU Public License, version 2, or
  any higher version.  See section COPYING of the GNU Public license
  for conditions under which this file may be redistributed.
  ***************************************/


#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include <sys/stat.h>
#include <unistd.h>
#include <time.h>

#include "wwwoffle.h"
#include "document.h"
#include "config.h"
#include "misc.h"


extern int htmlmodify_yylex(void);
#define htmlmodify_yywrap() 1


/*+ The file descriptor to output to. +*/
static int output_fd=-1;

/*+ The add-cache-info optional footer. +*/
static char *cache_info=NULL;

/*+ The file descriptor that we are reading from. +*/
static int htmlmodify_yyfd=-1;

/*+ The base URL of this page. +*/
static URL *baseUrl=NULL;


/*++++++++++++++++++++++++++++++++++++++
  Output the file with the modificatons if it is HTML, else just output.

  int client The file to write to.

  int spool The file to read from.

  URL *Url The URL that we are parsing.
  ++++++++++++++++++++++++++++++++++++++*/

void OutputHTMLWithModifications(int client,int spool,URL *Url)
{
 char *line=NULL;

 while((line=read_line(spool,line)))
   {
    if(strncasecmp("Content-Length:",line,15))
       write_string(client,line);

    if(*line=='\r' || *line=='\n')
       break;
   }

 if(AddCacheInfo)
   {
    struct stat buf;
    time_t t_ago;
    char *date,*timeunit,timeago[8];

    fstat(spool,&buf);

    t_ago=time(NULL)-buf.st_mtime;
    date=RFC822Date(buf.st_mtime,0);
    
    if(t_ago<0)
      {strcpy(timeago,"?");timeunit="";}
    else if(t_ago<3600)
      {sprintf(timeago,"%ld",t_ago/60);timeunit=(t_ago/60==1?"min":"mins");}
    else if(t_ago<(24*3600))
      {sprintf(timeago,"%ld",t_ago/3600);timeunit=(t_ago/3600==1?"hour":"hours");}
    else if(t_ago<(14*24*3600))
      {sprintf(timeago,"%ld",t_ago/(24*3600));timeunit=(t_ago/(24*3600)==1?"day":"days");}
    else if(t_ago<(30*24*3600))
      {sprintf(timeago,"%ld",t_ago/(7*24*3600));timeunit=(t_ago/(7*24*3600)==1?"week":"weeks");}
    else
      {sprintf(timeago,"%ld",t_ago/(30*24*3600));timeunit=(t_ago/(30*24*3600)==1?"month":"months");}

    cache_info=HTMLMessageBody(-1,"AddCacheInfo",
                               "url",Url->name,
                               "date",date,
                               "time",timeago,
                               "unit",timeunit,
                               NULL);
   }

 baseUrl=Url;

 output_fd=client;
 htmlmodify_yyfd=spool;
 htmlmodify_yyrestart(NULL);
 htmlmodify_yylex();

 if(cache_info)
    free(cache_info);
 cache_info=NULL;
}

/*+ A macro to read data that can be used by the lexer. +*/
#define YY_INPUT(buf,result,max_size) \
        if((result=read_data(htmlmodify_yyfd,buf,max_size))==-1) \
           result=0;

/*+ A macro to output the data if valid to do so. +*/
#define YY_OUTPUT(text) \
           write_string(output_fd,text)

%}

%%
 int url_cached=0;

 /* Handle comments and other angle brackets */

[^<]+                                   { YY_OUTPUT(htmlmodify_yytext); }
"<!--"                                  { YY_OUTPUT(htmlmodify_yytext); BEGIN(COMMENT); }
"<"{W}*                                 { BEGIN(ANGLE_START); }

 /* Comments */

<COMMENT>"-->"                          { YY_OUTPUT(htmlmodify_yytext); BEGIN(INITIAL); }
<COMMENT>">"                            { YY_OUTPUT(htmlmodify_yytext); }
<COMMENT>"-"                            { YY_OUTPUT(htmlmodify_yytext); }
<COMMENT>[^->]+                         { YY_OUTPUT(htmlmodify_yytext); }

 /* Angle brackets */

<ANGLE_START>">"                        { YY_OUTPUT("<"); YY_OUTPUT(htmlmodify_yytext); BEGIN(INITIAL); }
<ANGLE_START>"a"{W}                     { YY_OUTPUT("<"); YY_OUTPUT(htmlmodify_yytext); BEGIN(ANCHOR); }
<ANGLE_START>"/a"                       { if(url_cached==1) {if(AnchorModifyEnd[0]) YY_OUTPUT(AnchorModifyEnd[0]);}
                                          else if(url_cached==2) {if(AnchorModifyEnd[1]) YY_OUTPUT(AnchorModifyEnd[1]);}
                                          else if(url_cached==-1) {if(AnchorModifyEnd[2]) YY_OUTPUT(AnchorModifyEnd[2]);}
                                          url_cached=0;
                                          YY_OUTPUT("<"); YY_OUTPUT(htmlmodify_yytext); BEGIN(ANGLE); }
<ANGLE_START>"base"{W}                  { YY_OUTPUT("<"); YY_OUTPUT(htmlmodify_yytext); BEGIN(BASE); }
<ANGLE_START>"/body"                    { if(cache_info)
                                            {YY_OUTPUT(cache_info); free(cache_info); cache_info=NULL;}
                                          YY_OUTPUT("<"); YY_OUTPUT(htmlmodify_yytext); BEGIN(ANGLE); }
<ANGLE_START>"/html"                    { if(cache_info)
                                            {YY_OUTPUT(cache_info); free(cache_info); cache_info=NULL;}
                                          YY_OUTPUT("<"); YY_OUTPUT(htmlmodify_yytext); BEGIN(ANGLE); }
<ANGLE_START>.|\r|\n                    { YY_OUTPUT("<"); YY_OUTPUT(htmlmodify_yytext); BEGIN(ANGLE); }

<ANGLE>">"                              { YY_OUTPUT(htmlmodify_yytext); BEGIN(INITIAL); }
<ANGLE>[^>]+                            { YY_OUTPUT(htmlmodify_yytext); }

 /* Base */

<BASE>">"                               { YY_OUTPUT(htmlmodify_yytext); BEGIN(INITIAL); }
<BASE>"href"{W}*"="{W}*{Q}*             { YY_OUTPUT(htmlmodify_yytext); BEGIN(BASE_HREF); }
<BASE>.|\r|\n                           { YY_OUTPUT(htmlmodify_yytext); }

<BASE_HREF>">"                          { YY_OUTPUT(htmlmodify_yytext); BEGIN(INITIAL); }
<BASE_HREF>{F}+                         { YY_OUTPUT(htmlmodify_yytext); BEGIN(BASE);
                                          baseUrl=SplitURL(htmlmodify_yytext); }
<BASE_HREF>.|\r|\n                      { YY_OUTPUT(htmlmodify_yytext); BEGIN(BASE); }

 /* Links */

<ANCHOR>">"                             { YY_OUTPUT(htmlmodify_yytext); BEGIN(INITIAL);
                                          if(url_cached==1) {if(AnchorModifyBegin[0]) YY_OUTPUT(AnchorModifyBegin[0]);}
                                          else if(url_cached==2) {if(AnchorModifyBegin[1]) YY_OUTPUT(AnchorModifyBegin[1]);}
                                          else if(url_cached==-1) {if(AnchorModifyBegin[2]) YY_OUTPUT(AnchorModifyBegin[2]);} }
<ANCHOR>"href"{W}*"="{W}*{Q}*           { YY_OUTPUT(htmlmodify_yytext); BEGIN(ANCHOR_HREF); }
<ANCHOR>.|\r|\n                         { YY_OUTPUT(htmlmodify_yytext); }

<ANCHOR_HREF>">"                        { unput('>'); BEGIN(ANCHOR); }
<ANCHOR_HREF>{F}*                       { char *link=LinkURL(baseUrl,htmlmodify_yytext);
                                          URL *Url=SplitURL(link);
                                          if(!strncasecmp("mailto:",htmlmodify_yytext,7) || !strncasecmp("news:",htmlmodify_yytext,5) ||
                                             !Url || !Url->Protocol) url_cached=0;
                                          else if(ExistsWebpageSpoolFile(Url) || IsLocalNetHost(Url->host)) url_cached=1;
                                          else if(ExistsOutgoingSpoolFile(Url)) url_cached=2;
                                          else url_cached=-1;
                                          if(link!=htmlmodify_yytext) free(link); if(Url) FreeURL(Url);
                                          YY_OUTPUT(htmlmodify_yytext); BEGIN(ANCHOR); }
<ANCHOR_HREF>"#"                        { url_cached=1;
                                          YY_OUTPUT(htmlmodify_yytext); BEGIN(ANCHOR); }
<ANCHOR_HREF>.|\r|\n                    { YY_OUTPUT(htmlmodify_yytext); BEGIN(ANCHOR); }

 /* End of file */

<<EOF>>                                 { if(cache_info)
                                            {YY_OUTPUT(cache_info); free(cache_info); cache_info=NULL;}
                                          return(EOF); }

%%
