/*		HTML Parser
**		===========
*/
#include <ctype.h>
#include <stdio.h>

#include "HTUtils.h"
#include "SGML.h"
#include "HTAtom.h"
#include "HTChunk.h"
#include "HText.h"
#include "HTStyle.h"
#include "HTML.h"


/*				SPECIAL HTML CODE
**				=================
*/

extern HTStyleSheet * styleSheet;	/* Application-wide */

PRIVATE HTParentAnchor * node_anchor;
PRIVATE HText * text;

PRIVATE HTStyle * list_style;
PRIVATE HTStyle * list_compact_style;
PRIVATE HTStyle * list_dir_style;
PRIVATE HTStyle * glossary_style;
PRIVATE HTStyle * glossary_compact_style;

PRIVATE HTChunk title = { 0, 128, 0, 0 };	/* Grow by 128 */


/*	Forward declrations of routines for DTD
*/
PRIVATE void begin_litteral PARAMS((HTTag * t, HTElement * e));
PRIVATE void no_change PARAMS((HTTag * t, HTElement * e));
PRIVATE void begin_element PARAMS((HTTag * t, HTElement * e));
PRIVATE void end_element PARAMS((HTTag * t, HTElement * e));
PRIVATE void begin_document PARAMS((HTTag * t, HTElement * e));
PRIVATE void end_document PARAMS((HTTag * t, HTElement * e));
PRIVATE void begin_hidden PARAMS((HTTag * t, HTElement * e));
PRIVATE void end_hidden PARAMS((HTTag * t, HTElement * e));
PRIVATE void begin_anchor PARAMS((HTTag * t, HTElement * e));
PRIVATE void end_anchor PARAMS((HTTag * t, HTElement * e));
PRIVATE void begin_list PARAMS((HTTag * t, HTElement * e));
PRIVATE void begin_listc PARAMS((HTTag * t, HTElement * e));
PRIVATE void begin_listcd PARAMS((HTTag * t, HTElement * e));
PRIVATE void end_list PARAMS((HTTag * t, HTElement * e));
PRIVATE void begin_glossary PARAMS((HTTag * t, HTElement * e));
PRIVATE void end_glossary PARAMS((HTTag * t, HTElement * e));

PRIVATE int got_styles = 0;
PRIVATE void get_styles NOPARAMS;

PRIVATE	BOOL		style_change;
PRIVATE HTStyle *	new_style;
PRIVATE HTStyle *	old_style;

/*	Style buffering avoids dummy paragraph begin/ends.
*/
#define UPDATE_STYLE if (style_change) { \
	HText_setStyle(text, new_style); \
	old_style = new_style; \
	style_change = NO; }

PRIVATE void change_style ARGS1(HTStyle *,style)
{
    if (new_style!=style) {
    	style_change = YES /* was old_style == new_style */ ;
	new_style = style;
    }
}


/*		TITLE
*/

/*	Accumulate a character of title
*/
#ifdef __STDC__
static void accumulate_string(char c)
#else
static void accumulate_string(c)
    char c;
#endif
{
    HTChunkPutc(&title, c);
}


/*		Clear the title
*/
PRIVATE  void clear_string ARGS2(HTTag *,t, HTElement *,e)
{
    HTChunkClear(&title);
}

PRIVATE void set_title ARGS2(HTTag *,t, HTElement *,e)
{
    HTChunkTerminate(&title);
    HTAnchor_setTitle(node_anchor, title.data);
}

/*		Character handling
*/
PRIVATE void set_index ARGS2(HTTag *,t, HTElement *,e)
{
    HTAnchor_setIndex(node_anchor);
}

PRIVATE void pass_character ARGS1(char, c)
{
    if (style_change) {
        if ((c=='\n') || (c==' ')) return;	/* Ignore it */
        UPDATE_STYLE;
    }
    if (c=='\n') HText_appendCharacter(text, ' ');
    else HText_appendCharacter(text, c);
}

PRIVATE void litteral_text ARGS1(char, c)
{
/*	We guarrantee that the style is up-to-date in begin_litteral
*/
    HText_appendCharacter(text, c);		/* @@@@@ */
}

PRIVATE void hidden_text ARGS1(char, c)
{
    HText_appendHiddenCharacter(text, c);	/* @@@@@ */
}

PRIVATE void ignore_text ARGS1(char, c)
{
    /* Do nothing */
}

PRIVATE void set_next_id  ARGS2(HTTag *,t, HTElement *,e)
{
    /* @@@@@  Bad SGML anyway */
}

PRIVATE void new_paragraph  ARGS2(HTTag *,t, HTElement *,e)
{
    UPDATE_STYLE;
     HText_appendParagraph(text);
}

PRIVATE void list_element  ARGS2(HTTag *,t, HTElement *,e)
{
	if (old_style) {
		if (old_style->tabs) {
			HText_appendCharacter(text, '\t');
			return;
		}
	}
	HText_appendCharacter(text, '\n');
/*    HText_appendParagraph(text);*/
}

PRIVATE void term ARGS2(HTTag *,t, HTElement *,e)
{
    if (!style_change) HText_appendParagraph(text);
}

PRIVATE void definition ARGS2(HTTag *,t, HTElement *,e)
{
    UPDATE_STYLE;
    pass_character('\t');	/* Just tab out one stop */
}

/*		Our Static DTD for HTML
**		-----------------------
*/

static entity entities[] = {
	{ "lt",	"<" },
	{ "gt", ">" },
	{ "amp", "&" },
	{ "bullet" , "\267" },			/* @@@ NeXT only */
	{ 0,	0 }  /* Terminate list */
};

static attr no_attr[] = {{ 0, 0 , 0}};

static attr a_attr[] = {				/* Anchor attributes */
#define A_ID 0
	{ "NAME", 0, 0 },				/* Should be ID */
#define A_TYPE 1
	{ "TYPE", 0, 0 },
#define A_HREF 2
	{ "HREF", 0, 0 },
	{ 0, 0 , 0}	/* Terminate list */
};	

static attr list_attr[] = {
#define LIST_COMPACT 0
	{ "COMPACT", 0, 0 },
	{ 0, 0, 0 }	/* Terminate list */
};

/* like list_attr, but compact by default */
static attr listc_attr[] = { 
	{ "COMPACT", 1, "" },
	{ 0, 0, 0 }	/* Terminate list */
};

static attr glossary_attr[] = {
#define GLOSSARY_COMPACT 0
	{ "COMPACT", 0, 0 },
	{ 0, 0, 0 }	/* Terminate list */
};

static HTTag default_tag =
    { "DOCUMENT", no_attr , 0, 0, begin_document, pass_character, end_document };
    
static HTTag tags[] = {
#define TITLE_TAG 0
    { "TITLE", no_attr, 0, 0, clear_string, accumulate_string, set_title},
#define ISINDEX_TAG 1
    { "ISINDEX", no_attr, 0, 0, set_index, 0 , 0},
#define NEXTID_TAG 2
    { "NEXTID", no_attr, 0, 0, set_next_id, 0, 0},
#define ADDRESS_TAG 3
    { "ADDRESS"	, no_attr, 0, 0, begin_element, pass_character, end_element},
#define H1_TAG 4
    { "H1"	, no_attr, 0, 0, begin_element, pass_character, end_element},
    { "H2"	, no_attr, 0, 0, begin_element, pass_character, end_element},
    { "H3"	, no_attr, 0, 0, begin_element, pass_character, end_element},
    { "H4"	, no_attr, 0, 0, begin_element, pass_character, end_element},
    { "H5"	, no_attr, 0, 0, begin_element, pass_character, end_element},
    { "H6"	, no_attr, 0, 0, begin_element, pass_character, end_element},
    { "H7"	, no_attr, 0, 0, begin_element, pass_character, end_element},
#define UL_TAG 11
    { "UL"	, list_attr, 0, 0, begin_list, pass_character, end_list},
#define ULC_TAG 12
    { "ULC"	, listc_attr, 0, 0, begin_listc, pass_character, end_list},
#define OL_TAG 13
    { "OL"	, list_attr, 0, 0, begin_list, pass_character, end_list},
#define OLC_TAG 14
    { "OLC"	, list_attr, 0, 0, begin_listc, pass_character, end_list},
#define DIR_TAG 15
    { "DIR"	, list_attr, 0, 0, begin_listcd, pass_character, end_list},
#define MENU_TAG 16
    { "MENU"	, list_attr, 0, 0, begin_listc, pass_character, end_list},
#define LI_TAG 17
    { "LI"	, no_attr, 0, 0, list_element, pass_character, end_list},
#define DL_TAG 18
    { "DL"	, list_attr, 0, 0, begin_glossary, pass_character, end_glossary },
#define DLC_TAG 19
    { "DLC"	, list_attr, 0, 0, begin_listc, pass_character, end_list},
    { "DT"	, no_attr, 0, 0, term, pass_character, 0},
    { "DD"	, no_attr, 0, 0, definition, pass_character, 0},
    { "A"	, a_attr,  0, 0, begin_anchor, pass_character, end_anchor},
#define P_TAG 23
    { "P"	, no_attr, 0, 0, new_paragraph, pass_character, 0},
#define XMP_TAG 24
    { "XMP"	, no_attr, 0, 1, begin_litteral, litteral_text, end_element},
#define LISTING_TAG 25
    { "LISTING"	, no_attr, 0, 1, begin_litteral, litteral_text, end_element},
#define PLAINTEXT_TAG 26
    { "PLAINTEXT", no_attr, 0, 1, begin_litteral, litteral_text, end_element},
#define HP_TAG 27
    { "HP"	, no_attr,  0, 0, begin_element, pass_character, end_element},
#define ST_TAG 28 /* PYW: ScriptTitle  */
    { "ST"	, no_attr, 0, 0, begin_anchor, pass_character, end_anchor},
#define S_TAG 29 /* PYW: Script body */
    { "S"	, no_attr, 0, 0, begin_hidden, hidden_text, end_hidden},
#define VOBJ_TAG 30 /* PYW: Viols Object Description */
    { "VOBJ"	, no_attr, 0, 0, begin_hidden, hidden_text, end_hidden},
#define XMPA_TAG 31 /* PYW: anchorable scripts */
    { "XMPA"	, no_attr, 0, 0, begin_litteral, litteral_text, end_element},
#define COMMENT_TAG 32
    { "COMMENT", no_attr, 0, 1, no_change, ignore_text, no_change },
    { 0, 0, 0, 0,  0, 0 , 0}	/* Terminate list */
};

PUBLIC SGML_dtd HTML_dtd = { tags, &default_tag, entities };

/*		Flattening the style structure
**		------------------------------
**
On the NeXT, and on any read-only browser, it is simpler for the text to have
a sequence of styles, rather than a nested tree of styles. In this
case we have to flatten the structure as it arrives from SGML tags into
a sequence of styles.
*/

/*	Anchor handling
**	---------------
*/
PRIVATE void begin_anchor ARGS2(HTTag *,t, HTElement *,e)
{
    HTChildAnchor * source = HTAnchor_findChildAndLink(
    	node_anchor,						/* parent */
	a_attr[A_ID].present	? a_attr[A_ID].value : 0,	/* Tag */
	a_attr[A_HREF].present	? a_attr[A_HREF].value : 0,	/* Addresss */
	a_attr[A_TYPE].present	? 
		(HTLinkType*)HTAtom_for(a_attr[A_TYPE].value)
		 : 0);
    
    UPDATE_STYLE;
    HText_beginAnchor(text, source);
}

PRIVATE void end_anchor ARGS2(HTTag *,	 t,
			HTElement *,	e)
{
    UPDATE_STYLE;
    HText_endAnchor(text);
}


/*	General SGML Element Handling
**	-----------------------------
*/
PRIVATE void begin_element ARGS2(HTTag *,t, HTElement *,e)
{
    change_style(t->style);
}
PRIVATE void no_change ARGS2(HTTag *,t, HTElement *,e)
{
    /* Do nothing */;
}
PRIVATE void begin_litteral ARGS2(HTTag *,t, HTElement *,e)
{
    change_style(t->style);
    UPDATE_STYLE;
}

PRIVATE void end_element ARGS2(HTTag *,t, HTElement *,e)
{
/*      HTStyle *style = e->tag->style;
    if (e) {
      if (t->style) {
	if (((HTStyle*)(t->style))->spaceAfter) HText_appendParagraph(text);
      }
*/
    if (e) change_style(e->tag->style);
}

PRIVATE void begin_list ARGS2(HTTag *,t, HTElement *,e)
{
    change_style(list_attr[LIST_COMPACT].present
    		? list_compact_style
		: list_style);
}

PRIVATE void begin_listc ARGS2(HTTag *,t, HTElement *,e)
{
    change_style(list_compact_style);
}

PRIVATE void begin_listcd ARGS2(HTTag *,t, HTElement *,e)
{
    change_style(list_dir_style);
}

PRIVATE void end_list ARGS2(HTTag *,t, HTElement *,e)
{
/*	if (old_style->tabs) {
		HText_appendCharacter(text, '\t');
	} else {
		HText_appendCharacter(text, '\n');
	}
*/
	change_style(e->tag->style);
}

PRIVATE void begin_glossary ARGS2(HTTag *,t, HTElement *,e)
{
    change_style(glossary_attr[GLOSSARY_COMPACT].present
    		? glossary_compact_style
		: glossary_style);
}

PRIVATE void end_glossary ARGS2(HTTag *,t, HTElement *,e)
{
    change_style(e->tag->style);
}


/*	Begin and End document
**	----------------------
*/
PUBLIC void HTML_begin ARGS1(HTParentAnchor *,anchor)
{
    node_anchor = anchor;
}

PRIVATE void begin_document ARGS2(HTTag *, t, HTElement *, e)
{
    if (!got_styles) get_styles();
    text = HText_new(node_anchor);
    HText_beginAppend(text);
    old_style = 0;
    style_change = NO;
}

PRIVATE void end_document ARGS2(HTTag *, t, HTElement *, e)
{
    HText_endAppend(text);
}

PRIVATE void begin_hidden ARGS2(HTTag *, t, HTElement *, e)
{
    HText_beginHiddenAppend(text);
}

PRIVATE void end_hidden ARGS2(HTTag *, t, HTElement *, e)
{
    HText_endHiddenAppend(text);
}

/*	Get Styles from style sheet
**	---------------------------
*/
PRIVATE void get_styles NOARGS
{
    got_styles = YES;
    
    tags[P_TAG].style =
    default_tag.style =		HTStyleNamed(styleSheet, "Normal");
    tags[H1_TAG].style =	HTStyleNamed(styleSheet, "Heading1");
    tags[H1_TAG+1].style =	HTStyleNamed(styleSheet, "Heading2");
    tags[H1_TAG+2].style =	HTStyleNamed(styleSheet, "Heading3");
    tags[H1_TAG+3].style =	HTStyleNamed(styleSheet, "Heading4");
    tags[H1_TAG+4].style =	HTStyleNamed(styleSheet, "Heading5");
    tags[H1_TAG+5].style =	HTStyleNamed(styleSheet, "Heading6");
    tags[H1_TAG+6].style =	HTStyleNamed(styleSheet, "Heading7");
    tags[DIR_TAG].style =	HTStyleNamed(styleSheet, "Directory");
    tags[MENU_TAG].style =	HTStyleNamed(styleSheet, "Menu");
    tags[DL_TAG].style =	HTStyleNamed(styleSheet, "Glossary");
    list_style =		HTStyleNamed(styleSheet, "List");
    list_compact_style =	HTStyleNamed(styleSheet, "ListCompact");
    list_dir_style =		HTStyleNamed(styleSheet, "ListDirectory");
    glossary_style =		HTStyleNamed(styleSheet, "Glossary");
    glossary_compact_style =	HTStyleNamed(styleSheet, "GlossaryCompact");
    tags[ADDRESS_TAG].style=	HTStyleNamed(styleSheet, "Address");
    tags[PLAINTEXT_TAG].style =
    tags[XMP_TAG].style =	HTStyleNamed(styleSheet, "Example");
    tags[LISTING_TAG].style =	HTStyleNamed(styleSheet, "Listing");
    tags[ST_TAG].style =	HTStyleNamed(styleSheet, "ScriptTitle");
    tags[S_TAG].style =		HTStyleNamed(styleSheet, "Hidden");
    tags[VOBJ_TAG].style =	HTStyleNamed(styleSheet, "Hidden");
}


/*	Parse an HTML file
**	------------------
**
**	This version takes a pointer to the routine to call
**	to get each character.
*/
BOOL HTML_Parse
#ifdef __STDC__
  (HTParentAnchor * anchor, char (*next_char)() )
#else
  (anchor, next_char)
    HTParentAnchor * anchor;
    char (*next_char)();
#endif
{
        HTML_begin(anchor);
	SGML_begin(&HTML_dtd);
	for(;;) {
	    char character;
	    character = (*next_char)();
	    if (character == (char)EOF) break;
    
	    SGML_character(&HTML_dtd, character);           
         }
	SGML_end(&HTML_dtd);
	return YES;
}
