W               [ \t\r\n]
Q               [\"\']
F               [-a-z0-9$_.!*(),%;/?:@&=+~|]
FA              [-a-z0-9$_.!*()%;/?:@&=+~|]

%x ANGLE_START ANGLE
%x ANCHOR ANCHOR_HREF
%x AREA AREA_HREF
%x BASE BASE_HREF
%x BODY BODY_BACK
%x COMMENT
%x FRAME FRAME_SRC
%x HEADER
%x IMAGE IMAGE_SRC
%x INPUT INPUT_SRC
%x LINK LINK_STYLE LINK_STYLE_HREF
%x META META_REFRESH META_REFRESH_URL
%x OBJECT OBJECT_PART ARCHIVE
%x PARAM PARAM_VALUE
%x SCRIPT SCRIPT_SRC

%{
/***************************************
  $Header: /home/amb/wwwoffle/RCS/html.l 2.20 1998/06/21 09:09:02 amb Exp $

  WWWOFFLE - World Wide Web Offline Explorer - Version 2.2a.
  Parse the HTML and look for the images, links and end of body.
  ******************/ /******************
  Written by Andrew M. Bishop

  This file Copyright 1997,98 Andrew M. Bishop
  It may be distributed under the GNU Public License, version 2, or
  any higher version.  See section COPYING of the GNU Public license
  for conditions under which this file may be redistributed.
  ***************************************/


#define DEBUG_HTML 0

#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include <unistd.h>

#include "wwwoffle.h"
#include "misc.h"
#if DEBUG_HTML
#include "errors.h"
#endif


extern int html_yylex(void);
#define html_yywrap() 1
#define YY_NO_UNPUT


/*+ The type of reference that has been found. +*/
typedef enum _RefType
{
 StyleSheet,                    /*+ A style sheet. +*/
 Image,                         /*+ An image. +*/
 Frame,                         /*+ The contents of a frame. +*/
 Script,                        /*+ An included script. +*/
 Object,                        /*+ An included object (e.g. Java). +*/
 Link,                          /*+ A link to another page. +*/
 NRefTypes                      /*+ The number of different reference types. +*/
}
RefType;


static void free_list(RefType type);
static char **get_list(RefType type);
static void append_list(char* name,RefType type);


/*+ The refresh content of a Meta tag. +*/
static char *meta_refresh=NULL;

/*+ The content of a Base tag. +*/
static char *base_url=NULL;

/*+ The list of references. +*/
static char **references[NRefTypes];

/*+ The number of references. +*/
static int nreferences[NRefTypes];

/*+ The file descriptor that we are reading from. +*/
static int html_yyfd=-1;

/*+ If the file is parsed as an HTML file. +*/
static int is_html=0;

/*+ Just before the end of body tag (or the end of html tag (or end of file)). +*/
static int body_or_html_end=0;

/*+ The base URL of this page. +*/
static URL *baseUrl=NULL;


/*++++++++++++++++++++++++++++++++++++++
  Parse the HTML and look for references to image/links/frames.

  int ParseHTML Returns 1 if it was parsed as an HTML file.

  int fd The file descriptor of the file to parse.

  URL *Url The reference URL to use.

  int html For files without a header, force html selection.
  ++++++++++++++++++++++++++++++++++++++*/

int ParseHTML(int fd,URL *Url,int html)
{
 RefType i;

 baseUrl=Url;
 base_url=NULL;

 is_html=html;
 body_or_html_end=0;

 if(meta_refresh)
    free(meta_refresh);
 meta_refresh=NULL;

 for(i=0;i<NRefTypes;i++)
    free_list(i);

 html_yyfd=fd;
 html_yyrestart(NULL);
 html_yylex();

 for(i=0;i<NRefTypes;i++)
    append_list(NULL,i);

 if(base_url)
    baseUrl=SplitURL(base_url);

 return(is_html);
}


/*++++++++++++++++++++++++++++++++++++++
  Return the URL from the Meta Refresh tag if there is one.

  char *MetaRefresh Returns the new URL or NULL if none.
  ++++++++++++++++++++++++++++++++++++++*/

char *MetaRefresh(void)
{
 if(meta_refresh)
   {
    char *new=LinkURL(baseUrl,meta_refresh);
    if(new!=meta_refresh)
      {
       free(meta_refresh);
       meta_refresh=new;
      }
   }

 return(meta_refresh);
}


/*++++++++++++++++++++++++++++++++++++++
  Return the list of style sheets.

  char **ListStyleSheets Returns a null terminated list of style sheets.
  ++++++++++++++++++++++++++++++++++++++*/

char **ListStyleSheets(void)
{
 return(get_list(StyleSheet));
}


/*++++++++++++++++++++++++++++++++++++++
  Return the list of image references.

  char **ListImages Returns a null terminated list of images.
  ++++++++++++++++++++++++++++++++++++++*/

char **ListImages(void)
{
 return(get_list(Image));
}


/*++++++++++++++++++++++++++++++++++++++
  List the frames found in the HTML file.

  char **ListFrames Returns a null terminated list of frames.
  ++++++++++++++++++++++++++++++++++++++*/

char **ListFrames(void)
{
 return(get_list(Frame));
}


/*++++++++++++++++++++++++++++++++++++++
  Return the list of script references.

  char **ListScripts Returns a null terminated list of scripts.
  ++++++++++++++++++++++++++++++++++++++*/

char **ListScripts(void)
{
 return(get_list(Script));
}


/*++++++++++++++++++++++++++++++++++++++
  List the objects found in the HTML file.

  char **ListObjects Returns a null terminated list of objects.
  ++++++++++++++++++++++++++++++++++++++*/

char **ListObjects(void)
{
 return(get_list(Object));
}


/*++++++++++++++++++++++++++++++++++++++
  List the links found in the HTML file.

  char **ListLinks Returns a null terminated list of links.
  ++++++++++++++++++++++++++++++++++++++*/

char **ListLinks(void)
{
 return(get_list(Link));
}


/*++++++++++++++++++++++++++++++++++++++
  Return the position of the end of the html in the document.

  int GetHTMLEnd Returns the position of the end of the html.
  ++++++++++++++++++++++++++++++++++++++*/

int GetHTMLEnd(void)
{
 return(body_or_html_end);
}


/*++++++++++++++++++++++++++++++++++++++
  Free up a list of references.

  RefType type The type of list that is to be freed.
  ++++++++++++++++++++++++++++++++++++++*/

static void free_list(RefType type)
{
 int i;

 if(references[type])
   {
    for(i=0;references[type][i];i++)
       free(references[type][i]);
    free(references[type]);
   }

 references[type]=NULL;
 nreferences[type]=0;
}


/*++++++++++++++++++++++++++++++++++++++
  Get a list of the references of the specified type.

  char **get_list Returns the list of URLs.

  RefType type The type of list that is required.
  ++++++++++++++++++++++++++++++++++++++*/

static char **get_list(RefType type)
{
 int i,j;

 if(!references[type])
    return(NULL);

 /* canonicalise the links */

 for(i=0;references[type][i];i++)
   {
    char *new=LinkURL(baseUrl,references[type][i]);
    if(new!=references[type][i])
      {
       free(references[type][i]);
       references[type][i]=new;
      }
   }

 /* remove the duplicates */

 for(i=0;references[type][i];i++)
   {
    for(j=i+1;references[type][j];j++)
       if(!strcmp(references[type][i],references[type][j]))
          break;

    if(references[type][j])
      {
       free(references[type][j]);
       do
         {
          references[type][j]=references[type][j+1];
         }
       while(references[type][j++]);
       i--;
       nreferences[type]--;
      }
   }

 return(references[type]);
}


/*++++++++++++++++++++++++++++++++++++++
  A function to add a name to a list.

  char* name The name to add.

  RefType type The type of reference.
  ++++++++++++++++++++++++++++++++++++++*/

static void append_list(char* name,RefType type)
{
 if(name && (!strncmp("mailto:",name,7) || !strncmp("news:",name,5)))
    return;

 if(name || references[type])
   {
    if(nreferences[type]==0)
       references[type]=(char**)malloc(16*sizeof(char*));
    else if((nreferences[type]%16)==0)
       references[type]=(char**)realloc(references[type],(nreferences[type]+16)*sizeof(char*));

    if(name)
      {
       references[type][nreferences[type]]=(char*)malloc(strlen(name)+1);
       strcpy(references[type][nreferences[type]],name);
      }
    else
       references[type][nreferences[type]]=NULL;

    nreferences[type]++;
   }
}


/*++++++++++++++++++++++++++++++++++++++
  Object and Param treatment:
  this is the attempt to extract all 
  valid URIs from the OBJECT or PARAM tag
  defined in HTML 4.0.

  usemap, cite, longdesc URIs are not yet supported.

  (c) Walter Pfannenmller
  ++++++++++++++++++++++++++++++++++++++*/

/* no more than obj_archives_max are accepted: seems enough */
#define obj_archives_max 32

/* Object */
enum {
    obj_classid = 0,
    obj_codetype,
    obj_codebase,
    obj_code,
    obj_object,
    obj_data,
    obj_usemap,
    obj_type,

    obj_archives_start,
    obj_archives_end = obj_archives_start + obj_archives_max,
    obj_parts_size
};

static int obj_codetype_Type = Object; 
static int obj_type_Type = Object; 
static int obj_narchives = 0;

static char *obj_parts[obj_parts_size] = { NULL, }; 

/* Param */

enum {
   param_type = 0,
   param_value,
   param_parts_size
};

static int param_valuetype_is_ref = 0;
static char *param_parts[param_parts_size] = { NULL, }; 

static char **op_ = NULL;
static void op_free()
{
    if(*op_ != NULL)
    {
       free(*op_);
       *op_ = NULL;
    }
}

static void op_malloc(char *text)
{
    op_free();
    *op_ = (char *)malloc(strlen(text) + 1);
    strcpy(*op_,text); 
} 

/*+++++++++++++++++++++++++++++++++++++++++++++++
  turn dots to slash and add .class to Applets
  +++++++++++++++++++++++++++++++++++++++++++++++*/
static const char class_suffix[] = ".class";
static char *norm_applet_class(int part)
{
    char *dots;
    char *applet = obj_parts[part];
    if(strcmp(&applet[strlen(applet) - strlen(class_suffix)],class_suffix))
    {
        applet = (char *)malloc(strlen(applet) + sizeof(class_suffix));
        strcpy(applet,obj_parts[part]); 
        strcat(applet,class_suffix); 
        op_ = &obj_parts[part];
        op_free();
        *op_ = applet;
    }
    dots = applet;
    while((dots = strchr(dots,'.')) < (applet + strlen(applet) - sizeof(class_suffix)))
    {
       *dots = '/';
    }
    return applet;
}
 
/*+++++++++++++++++++++++++++++++++++++++++++++++
  add codebase
  +++++++++++++++++++++++++++++++++++++++++++++++*/
static char *add_obj_codebase(int part)
{
    if(obj_parts[obj_codebase])
    {
        char *url = (char *)malloc(strlen(obj_parts[obj_codebase]) + 
                                   sizeof("/") +
                                   strlen(obj_parts[part]));
        strcpy(url,obj_parts[obj_codebase]); 
        if(url[strlen(url) - 1] != '/')
        {
            strcat(url,"/"); 
        }
        strcat(url,obj_parts[part]);
        op_ = &obj_parts[part];
        op_free();
        *op_ = url;
    }
    return obj_parts[part];
}
 
/*++++++++++++++++++++++++++++++++++++++
  take the object's info and build an applet url
  ++++++++++++++++++++++++++++++++++++++*/
static void applet_url(int part)
{
    if(obj_parts[part])
    {
        norm_applet_class(part);
        add_obj_codebase(part); 
        append_list(obj_parts[part],Object);
    }
}
/*++++++++++++++++++++++++++++++++++++++
  take the object's info and add codebase
  ++++++++++++++++++++++++++++++++++++++*/
static void codebase_url(int part)
{
    if(obj_parts[part])
    {
        add_obj_codebase(part); 
        append_list(obj_parts[part],Object);
    }
}
/*++++++++++++++++++++++++++++++++++++++
  take the object's info and build urls
  ++++++++++++++++++++++++++++++++++++++*/
static void build_obj_urls()
{
    int i;
    if(obj_codetype_Type == Image)
    {
        add_obj_codebase(obj_classid); 
        append_list(obj_parts[obj_classid],Image);
    }
    else
    {
        applet_url(obj_classid);
    }
    if(obj_type_Type == Image)
    {
        add_obj_codebase(obj_data); 
        append_list(obj_parts[obj_data],Image);
    }
    else
    {
        applet_url(obj_classid);
    }
    applet_url(obj_code);
    applet_url(obj_object);
    for(i = 0; i < obj_narchives; i++)
    {
        codebase_url(i + obj_archives_start);
    }
    
#if DEBUG_HTML
    for(i = 0; i < obj_parts_size; i++)
    {
       op_ = &obj_parts[i];
       if(*op_)
       {
           PrintMessage(Debug,"object %d: %s\n",i,*op_);
       }
    }
#endif
    for(i = 0; i < obj_parts_size; i++)
    {
       op_ = &obj_parts[i];
       op_free();
    }
    obj_codetype_Type = Object; 
    obj_type_Type = Object; 
    obj_narchives = 0;
}

/*+++++++++++++++
  storing archives  
  +++++++++++++++*/
static void add_obj_archive(char *text)
{
    if(obj_narchives < obj_archives_max)
    {
        op_ = &obj_parts[obj_archives_start + obj_narchives++];
        op_malloc(text);
    }
}

/*+++++++++++++++++++++++++++++++++++++++++
  take the param's info and build urls
  +++++++++++++++++++++++++++++++++++++++++*/
static void build_param_urls()
{
    int i;
    if(param_valuetype_is_ref && param_parts[param_value])
    {
        append_list(param_parts[param_value],Object);
    } 
    param_valuetype_is_ref = 0;
#if DEBUG_HTML
    for(i = 0; i < param_parts_size; i++)
    {
       op_ = &param_parts[i];
       if(*op_)
       {
           PrintMessage(Debug,"param %d: %s\n",i,*op_);
       }
    }
#endif
    for(i = 0; i < param_parts_size; i++)
    {
       op_ = &param_parts[i];
       op_free();
    }
}


/*+ A macro to read data that can be used by the lexer. +*/
#define YY_INPUT(buf,result,max_size) \
        if((result=read_data(html_yyfd,buf,max_size))==-1) \
           result=0;

%}

%%
 int open_angle=0,position=0,html_end=0,body_end=0;
 if(is_html)
    BEGIN(INITIAL);
 else
    BEGIN(HEADER);

 /* Decide if HTML */

<HEADER>\r*\n                                           { if(is_html) BEGIN(INITIAL); else return(EOF); }
<HEADER>"Content-Type:"[ \t]+"text/html"[ \t]*\r*\n     { is_html=1; }
<HEADER>"Content-Type:"[ \t]+"text/html"[ \t]*;.+\r*\n  { is_html=1; }
<HEADER>.+\r*\n                                         { }

 /* Handle comments and other angle brackets */

[^<]+                                   { position+=html_yyleng; }
"<!--"                                  { position+=html_yyleng; BEGIN(COMMENT); }
"<"{W}*                                 { position+=html_yyleng; BEGIN(ANGLE_START); open_angle=html_yyleng; }

 /* Comments */

<COMMENT>"-->"                          { position+=html_yyleng; BEGIN(INITIAL); }
<COMMENT>">"                            { position+=html_yyleng; }
<COMMENT>"-"                            { position+=html_yyleng; }
<COMMENT>[^->]+                         { position+=html_yyleng; }

 /* Angle brackets */

<ANGLE_START>">"                        { position+=html_yyleng; BEGIN(INITIAL); }
<ANGLE_START>"a"{W}                     { position+=html_yyleng; BEGIN(ANCHOR); }
<ANGLE_START>"area"{W}                  { position+=html_yyleng; BEGIN(AREA); }
<ANGLE_START>"base"{W}                  { position+=html_yyleng; BEGIN(BASE); }
<ANGLE_START>"body"{W}                  { position+=html_yyleng; BEGIN(BODY); }
<ANGLE_START>"frame"|"iframe"{W}        { position+=html_yyleng; BEGIN(FRAME); }
<ANGLE_START>"img"{W}                   { position+=html_yyleng; BEGIN(IMAGE); }
<ANGLE_START>"input"{W}                 { position+=html_yyleng; BEGIN(INPUT); }
<ANGLE_START>"link"{W}                  { position+=html_yyleng; BEGIN(LINK); }
<ANGLE_START>"meta"{W}                  { position+=html_yyleng; BEGIN(META); }
<ANGLE_START>"object"|"applet"|"embed"{W} { position+=html_yyleng; BEGIN(OBJECT); }
<ANGLE_START>"param"{W}                 { position+=yyleng; BEGIN(PARAM); } 
<ANGLE_START>"script"{W}                { position+=html_yyleng; BEGIN(SCRIPT); }
<ANGLE_START>"/body"                    { position+=html_yyleng; BEGIN(ANGLE); body_end=position-html_yyleng-open_angle; }
<ANGLE_START>"/html"                    { position+=html_yyleng; BEGIN(ANGLE); html_end=position-html_yyleng-open_angle; }
<ANGLE_START>.|\n                       { position+=html_yyleng; BEGIN(ANGLE); }

<ANGLE>">"                              { position+=html_yyleng; BEGIN(INITIAL); }
<ANGLE>[^>]+                            { position+=html_yyleng; }

 /* Base */

<BASE>">"                               { position+=html_yyleng; BEGIN(INITIAL); }
<BASE>"href"{W}*"="{W}*{Q}*             { position+=html_yyleng; BEGIN(BASE_HREF); }
<BASE>.|\n                              { position+=html_yyleng; }

<BASE_HREF>">"                          { position+=html_yyleng; BEGIN(INITIAL); }
<BASE_HREF>{F}+                         { position+=html_yyleng; BEGIN(BASE);
                                          base_url=(char*)malloc(strlen(html_yytext)+1); strcpy(base_url,html_yytext); }
<BASE_HREF>.|\n                         { position+=html_yyleng; BEGIN(BASE); }

 /* Meta refresh */

<META>">"                               { position+=html_yyleng; BEGIN(INITIAL); }
<META>"HTTP-EQUIV"{W}*"="{W}*{Q}*"Refresh"{Q}* { position+=html_yyleng; BEGIN(META_REFRESH); }
<META>.|\n                              { position+=html_yyleng; }

<META_REFRESH>">"                       { position+=html_yyleng; BEGIN(INITIAL); }
<META_REFRESH>"URL"{W}*"="{W}*{Q}*      { position+=html_yyleng; BEGIN(META_REFRESH_URL); }
<META_REFRESH>.|\n                      { position+=html_yyleng; }

<META_REFRESH_URL>{F}+                  { position+=html_yyleng; BEGIN(META);
                                          meta_refresh=(char*)malloc(strlen(html_yytext)+1); strcpy(meta_refresh,html_yytext); }
<META_REFRESH_URL>.|\n                  { position+=html_yyleng; BEGIN(META); }

 /* Stylesheets */

<LINK>">"                               { position+=html_yyleng; BEGIN(INITIAL); }
<LINK>"REL"{W}*"="{W}*{Q}*"Stylesheet"{Q}* { position+=html_yyleng; BEGIN(LINK_STYLE); }
<LINK>.|\n                              { position+=html_yyleng; }

<LINK_STYLE>">"                         { position+=html_yyleng; BEGIN(INITIAL); }
<LINK_STYLE>"href"{W}*"="{W}*{Q}*       { position+=html_yyleng; BEGIN(LINK_STYLE_HREF); }
<LINK_STYLE>.|\n                        { position+=html_yyleng; }

<LINK_STYLE_HREF>{F}+                   { position+=html_yyleng; BEGIN(LINK); append_list(html_yytext,StyleSheet); }
<LINK_STYLE_HREF>.|\n                   { position+=html_yyleng; BEGIN(LINK); }

 /* Images */

<IMAGE>">"                              { position+=html_yyleng; BEGIN(INITIAL); }
<IMAGE>"src"{W}*"="{W}*{Q}*             { position+=html_yyleng; BEGIN(IMAGE_SRC); }
<IMAGE>.|\n                             { position+=html_yyleng; }

<IMAGE_SRC>">"                          { position+=html_yyleng; BEGIN(INITIAL); }
<IMAGE_SRC>{F}+                         { position+=html_yyleng; BEGIN(IMAGE); append_list(html_yytext,Image); }
<IMAGE_SRC>.|\n                         { position+=html_yyleng; BEGIN(IMAGE); }

<INPUT>">"                              { position+=html_yyleng; BEGIN(INITIAL); }
<INPUT>"src"{W}*"="{W}*{Q}*             { position+=html_yyleng; BEGIN(INPUT_SRC); }
<INPUT>.|\n                             { position+=html_yyleng; }

<INPUT_SRC>">"                          { position+=html_yyleng; BEGIN(INITIAL); }
<INPUT_SRC>{F}+                         { position+=html_yyleng; BEGIN(INPUT); append_list(html_yytext,Image); }
<INPUT_SRC>.|\n                         { position+=html_yyleng; BEGIN(INPUT); }

<BODY>">"                               { position+=html_yyleng; BEGIN(INITIAL); }
<BODY>"background"{W}*"="{W}*{Q}*       { position+=html_yyleng; BEGIN(BODY_BACK); }
<BODY>.|\n                              { position+=html_yyleng; }

<BODY_BACK>">"                          { position+=html_yyleng; BEGIN(INITIAL); }
<BODY_BACK>{F}+                         { position+=html_yyleng; BEGIN(BODY); append_list(html_yytext,Image); }
<BODY_BACK>.|\n                         { position+=html_yyleng; BEGIN(BODY); }

 /* Frames */

<FRAME>">"                              { position+=html_yyleng; BEGIN(INITIAL); }
<FRAME>"src"{W}*"="{W}*{Q}*             { position+=html_yyleng; BEGIN(FRAME_SRC); }
<FRAME>.|\n                             { position+=html_yyleng; }

<FRAME_SRC>">"                          { position+=html_yyleng; BEGIN(INITIAL); }
<FRAME_SRC>{F}+                         { position+=html_yyleng; BEGIN(FRAME); append_list(html_yytext,Frame); }
<FRAME_SRC>.|\n                         { position+=html_yyleng; BEGIN(FRAME); }

 /* Scripts */

<SCRIPT>">"                             { position+=html_yyleng; BEGIN(INITIAL); }
<SCRIPT>"src"{W}*"="{W}*{Q}*            { position+=html_yyleng; BEGIN(SCRIPT_SRC); }
<SCRIPT>.|\n                            { position+=html_yyleng; }

<SCRIPT_SRC>">"                         { position+=html_yyleng; BEGIN(INITIAL); }
<SCRIPT_SRC>{F}+                        { position+=html_yyleng; BEGIN(SCRIPT); append_list(html_yytext,Script); }
<SCRIPT_SRC>.|\n                        { position+=html_yyleng; BEGIN(SCRIPT); }

 /* Objects */

<OBJECT>">"                             { position+=html_yyleng; BEGIN(INITIAL); build_obj_urls(); }
<OBJECT>"code"{W}*"="{W}*{Q}*           { position+=html_yyleng; BEGIN(OBJECT_PART); op_ = &obj_parts[obj_code]; }
<OBJECT>"classid"{W}*"="{W}*{Q}*        { position+=html_yyleng; BEGIN(OBJECT_PART); op_ = &obj_parts[obj_classid]; }
<OBJECT>"codetype"{W}*"="{W}*{Q}*"image" { position+=html_yyleng; BEGIN(OBJECT_PART); op_ = &obj_parts[obj_codetype]; 
                                          obj_codetype_Type = Image; }
<OBJECT>"codetype"{W}*"="{W}*{Q}*       { position+=html_yyleng; BEGIN(OBJECT_PART); op_ = &obj_parts[obj_codetype]; }
<OBJECT>"codebase"{W}*"="{W}*{Q}*       { position+=html_yyleng; BEGIN(OBJECT_PART); op_ = &obj_parts[obj_codebase]; }
<OBJECT>"object"{W}*"="{W}*{Q}*         { position+=html_yyleng; BEGIN(OBJECT_PART); op_ = &obj_parts[obj_object]; }
<OBJECT>"data"{W}*"="{W}*{Q}*           { position+=html_yyleng; BEGIN(OBJECT_PART); op_ = &obj_parts[obj_data]; }
<OBJECT>"type"{W}*"="{W}*{Q}*"image"    { position+=html_yyleng; BEGIN(OBJECT_PART); op_ = &obj_parts[obj_type];
                                          obj_type_Type = Image; }
<OBJECT>"type"{W}*"="{W}*{Q}*           { position+=html_yyleng; BEGIN(OBJECT_PART); op_ = &obj_parts[obj_type]; }
<OBJECT>"usemap"{W}*"="{W}*{Q}*         { position+=html_yyleng; BEGIN(OBJECT_PART); op_ = &obj_parts[obj_usemap]; }
<OBJECT>"archive"{W}*"="{W}*{Q}*        { position+=html_yyleng; BEGIN(ARCHIVE); }
<OBJECT>.|\n                            { position+=html_yyleng; }

<OBJECT_PART>{F}+                       { position+=html_yyleng; BEGIN(OBJECT); op_malloc(yytext); }
<OBJECT_PART>.|\n                       { position+=html_yyleng; }

<ARCHIVE>">"                            { position+=html_yyleng; BEGIN(INITIAL); build_obj_urls(); }
<ARCHIVE>{W}*","{W}*                    { position+=html_yyleng; }
<ARCHIVE>{W}+                           { position+=html_yyleng; }
<ARCHIVE>{FA}+                          { position+=html_yyleng; add_obj_archive(yytext); }
<ARCHIVE>{W}*|{Q}                       { position+=html_yyleng; BEGIN(OBJECT); }

<PARAM>">"                              { position+=html_yyleng; BEGIN(INITIAL); build_param_urls(); }
<PARAM>"type"{W}*"="{W}*{Q}*            { position+=html_yyleng; BEGIN(PARAM_VALUE); op_ = &param_parts[param_type]; }
<PARAM>"valuetype"{W}*"="{W}*{Q}*"ref"  { position+=html_yyleng; param_valuetype_is_ref = 1; } 
<PARAM>"value"{W}*"="{W}*{Q}*           { position+=html_yyleng; BEGIN(PARAM_VALUE); op_ = &param_parts[param_value]; }
<PARAM>.|\n                             { position+=html_yyleng; }

<PARAM_VALUE>{F}+                       { position+=html_yyleng; BEGIN(PARAM); op_malloc(yytext); }
<PARAM_VALUE>.|\n                       { position+=html_yyleng; }

 /* Links */

<ANCHOR>">"                             { position+=html_yyleng; BEGIN(INITIAL); }
<ANCHOR>"href"{W}*"="{W}*{Q}*           { position+=html_yyleng; BEGIN(ANCHOR_HREF); }
<ANCHOR>.|\n                            { position+=html_yyleng; }

<ANCHOR_HREF>">"                        { position+=html_yyleng; BEGIN(INITIAL); }
<ANCHOR_HREF>{F}+                       { position+=html_yyleng; BEGIN(ANCHOR); append_list(html_yytext,Link); }
<ANCHOR_HREF>.|\n                       { position+=html_yyleng; BEGIN(ANCHOR); }

<AREA>">"                               { position+=html_yyleng; BEGIN(INITIAL); }
<AREA>"href"{W}*"="{W}*{Q}*             { position+=html_yyleng; BEGIN(AREA_HREF); }
<AREA>.|\n                              { position+=html_yyleng; }

<AREA_HREF>">"                          { position+=html_yyleng; BEGIN(INITIAL); }
<AREA_HREF>{F}+                         { position+=html_yyleng; BEGIN(AREA); append_list(html_yytext,Link); }
<AREA_HREF>.|\n                         { position+=html_yyleng; BEGIN(AREA); }

 /* End of file */

<<EOF>>                                 { if(body_end && html_end && (html_end-body_end)<16 && (position-html_end)<16)
                                             body_or_html_end=body_end;
                                          else if(body_end && (position-body_end)<16)
                                             body_or_html_end=body_end;
                                          else if(html_end && (position-html_end)<16)
                                             body_or_html_end=html_end;
                                          else
                                             body_or_html_end=position;
                                          return(EOF); }

%%
