tImport version of libhtml that might actually work with ANSI C. - plan9port - [fork] Plan 9 from user space
 (HTM) git clone git://src.adamsgaard.dk/plan9port
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 7cf289ca89a7416999ae02330236042b0d37e3db
 (DIR) parent 3e3817f7c86658f60715dd93768eaf8285807985
 (HTM) Author: wkj <devnull@localhost>
       Date:   Tue,  6 Apr 2004 19:06:52 +0000
       
       Import version of libhtml that might actually work with ANSI C.
       
       Diffstat:
         A src/cmd/htmlfmt/dat.h               |      50 +++++++++++++++++++++++++++++++
         A src/cmd/htmlfmt/html.c              |     331 +++++++++++++++++++++++++++++++
         A src/cmd/htmlfmt/main.c              |      71 +++++++++++++++++++++++++++++++
         A src/cmd/htmlfmt/mkfile              |      30 ++++++++++++++++++++++++++++++
         A src/cmd/htmlfmt/util.c              |     120 +++++++++++++++++++++++++++++++
         A src/libhtml/build.c                 |    4238 +++++++++++++++++++++++++++++++
         A src/libhtml/impl.h                  |     163 +++++++++++++++++++++++++++++++
         A src/libhtml/lex.c                   |    1384 +++++++++++++++++++++++++++++++
         A src/libhtml/mkfile                  |      22 ++++++++++++++++++++++
         A src/libhtml/runetab.c               |      83 +++++++++++++++++++++++++++++++
         A src/libhtml/runetab.h               |      59 +++++++++++++++++++++++++++++++
         A src/libhtml/strinttab.c             |      64 +++++++++++++++++++++++++++++++
         A src/libhtml/utils.c                 |     591 +++++++++++++++++++++++++++++++
       
       13 files changed, 7206 insertions(+), 0 deletions(-)
       ---
 (DIR) diff --git a/src/cmd/htmlfmt/dat.h b/src/cmd/htmlfmt/dat.h
       t@@ -0,0 +1,50 @@
       +typedef struct Bytes Bytes;
       +typedef struct URLwin URLwin;
       +
       +enum
       +{
       +        STACK                = 8192,
       +        EVENTSIZE        = 256,
       +};
       +
       +struct Bytes
       +{
       +        uchar        *b;
       +        long                n;
       +        long                nalloc;
       +};
       +
       +struct URLwin
       +{
       +        int                infd;
       +        int                outfd;
       +        int                type;
       +
       +        char                *url;
       +        Item                *items;
       +        Docinfo        *docinfo;
       +};
       +
       +extern        char*        url;
       +extern        int                aflag;
       +extern        int                width;
       +extern        int                defcharset;
       +
       +extern        char*        loadhtml(int);
       +
       +extern        char*        readfile(char*, char*, int*);
       +extern        int        charset(char*);
       +extern        void*        emalloc(ulong);
       +extern        char*        estrdup(char*);
       +extern        char*        estrstrdup(char*, char*);
       +extern        char*        egrow(char*, char*, char*);
       +extern        char*        eappend(char*, char*, char*);
       +extern        void                error(char*, ...);
       +
       +extern        void                growbytes(Bytes*, char*, long);
       +
       +extern        void                rendertext(URLwin*, Bytes*);
       +extern        void                rerender(URLwin*);
       +extern        void                freeurlwin(URLwin*);
       +
       +#pragma        varargck        argpos        error        1
 (DIR) diff --git a/src/cmd/htmlfmt/html.c b/src/cmd/htmlfmt/html.c
       t@@ -0,0 +1,331 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +#include <draw.h>
       +#include <regexp.h>
       +#include <html.h>
       +#include <ctype.h>
       +#include "dat.h"
       +
       +char urlexpr[] = "^(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero)://([a-zA-Z0-9_@\\-]+([.:][a-zA-Z0-9_@\\-]+)*)";
       +Reprog        *urlprog;
       +
       +int inword = 0;
       +int col = 0;
       +int wordi = 0;
       +
       +char*
       +loadhtml(int fd)
       +{
       +        URLwin *u;
       +        Bytes *b;
       +        int n;
       +        char buf[4096];
       +
       +        u = emalloc(sizeof(URLwin));
       +        u->infd = fd;
       +        u->outfd = 1;
       +        u->url = estrdup(url);
       +        u->type = TextHtml;
       +
       +        b = emalloc(sizeof(Bytes));
       +        while((n = read(fd, buf, sizeof buf)) > 0)
       +                growbytes(b, buf, n);
       +        if(b->b == nil)
       +                return nil;        /* empty file */
       +        rendertext(u, b);
       +        freeurlwin(u);
       +        return nil;
       +}
       +
       +char*
       +runetobyte(Rune *r, int n)
       +{
       +        char *s;
       +
       +        if(n == 0)
       +                return emalloc(1);
       +        s = smprint("%.*S", n, r);
       +        if(s == nil)
       +                error("malloc failed");
       +        return s;
       +}
       +
       +int
       +closingpunct(int c)
       +{
       +        return strchr(".,:;'\")]}>!?", c) != nil;
       +}
       +
       +void
       +emitword(Bytes *b, Rune *r, int nr)
       +{
       +        char *s;
       +        int space;
       +
       +        if(nr == 0)
       +                return;
       +        s = smprint("%.*S", nr, r);
       +        space = (b->n>0) && !isspace(b->b[b->n-1]) && !closingpunct(r[0]);
       +        if(col>0 && col+space+nr > width){
       +                growbytes(b, "\n", 1);
       +                space = 0;
       +                col = 0;
       +        }
       +        if(space && col>0){
       +                growbytes(b, " ", 1);
       +                col++;
       +        }
       +        growbytes(b, s, strlen(s));
       +        col += nr;
       +        free(s);
       +        inword = 0;
       +}
       +
       +void
       +renderrunes(Bytes *b, Rune *r)
       +{
       +        int i, n;
       +
       +        n = runestrlen(r);
       +        for(i=0; i<n; i++){
       +                switch(r[i]){
       +                case '\n':
       +                        if(inword)
       +                                emitword(b, r+wordi, i-wordi);
       +                        col = 0;
       +                        if(b->n == 0)
       +                                break;        /* don't start with blank lines */
       +                        if(b->n<2 || b->b[b->n-1]!='\n' || b->b[b->n-2]!='\n')
       +                                growbytes(b, "\n", 1);
       +                        break;
       +                case ' ':
       +                        if(inword)
       +                                emitword(b, r+wordi, i-wordi);
       +                        break;
       +                default:
       +                        if(!inword)
       +                                wordi = i;
       +                        inword = 1;
       +                        break;
       +                }
       +        }
       +        if(inword)
       +                emitword(b, r+wordi, i-wordi);
       +}
       +
       +void
       +renderbytes(Bytes *b, char *fmt, ...)
       +{
       +        Rune *r;
       +        va_list arg;
       +
       +        va_start(arg, fmt);
       +        r = runevsmprint(fmt, arg);
       +        va_end(arg);
       +        renderrunes(b, r);
       +        free(r);
       +}
       +
       +char*
       +baseurl(char *url)
       +{
       +        char *base, *slash;
       +        Resub rs[10];
       +
       +        if(url == nil)
       +                return nil;
       +        if(urlprog == nil){
       +                urlprog = regcomp(urlexpr);
       +                if(urlprog == nil)
       +                        error("can't compile URL regexp");
       +        }
       +        memset(rs, 0, sizeof rs);
       +        if(regexec(urlprog, url, rs, nelem(rs)) == 0)
       +                return nil;
       +        base = estrdup(url);
       +        slash = strrchr(base, '/');
       +        if(slash!=nil && slash>=&base[rs[0].e.p-rs[0].s.p])
       +                *slash = '\0';
       +        else
       +                base[rs[0].e.p-rs[0].s.p] = '\0';
       +        return base;
       +}
       +
       +char*
       +fullurl(URLwin *u, Rune *rhref)
       +{
       +        char *base, *href, *hrefbase;
       +        char *result;
       +
       +        if(rhref == nil)
       +                return estrdup("NULL URL");
       +        href = runetobyte(rhref, runestrlen(rhref));
       +        hrefbase = baseurl(href);
       +        result = nil;
       +        if(hrefbase==nil && (base = baseurl(u->url))!=nil){
       +                result = estrdup(base);
       +                if(base[strlen(base)-1]!='/' && (href==nil || href[0]!='/'))
       +                        result = eappend(result, "/", "");
       +                free(base);
       +        }
       +        if(href){
       +                if(result)
       +                        result = eappend(result, "", href);
       +                else
       +                        result = estrdup(href);
       +        }
       +        free(hrefbase);
       +        if(result == nil)
       +                return estrdup("***unknown***");
       +        return result;
       +}
       +
       +void
       +render(URLwin *u, Bytes *t, Item *items, int curanchor)
       +{
       +        Item *il;
       +        Itext *it;
       +        Ifloat *ifl;
       +        Ispacer *is;
       +        Itable *ita;
       +        Iimage *im;
       +        Anchor *a;
       +        Table *tab;
       +        Tablecell *cell;
       +        char *href;
       +
       +        inword = 0;
       +        col = 0;
       +        wordi = 0;
       +
       +        for(il=items; il!=nil; il=il->next){
       +                if(il->state & IFbrk)
       +                        renderbytes(t, "\n");
       +                if(il->state & IFbrksp)
       +                        renderbytes(t, "\n");
       +
       +                switch(il->tag){
       +                case Itexttag:
       +                        it = (Itext*)il;
       +                        renderrunes(t, it->s);
       +                        break;
       +                case Iruletag:
       +                        if(t->n>0 && t->b[t->n-1]!='\n')
       +                                renderbytes(t, "\n");
       +                        renderbytes(t, "=======\n");
       +                        break;
       +                case Iimagetag:
       +                        if(!aflag)
       +                                break;
       +                        im = (Iimage*)il;
       +                        if(im->imsrc){
       +                                href = fullurl(u, im->imsrc);
       +                                renderbytes(t, "[image %s]", href);
       +                                free(href);
       +                        }
       +                        break;
       +                case Iformfieldtag:
       +                        if(aflag)
       +                                renderbytes(t, "[formfield]");
       +                        break;
       +                case Itabletag:
       +                        ita = (Itable*)il;
       +                        tab = ita->table;
       +                        for(cell=tab->cells; cell!=nil; cell=cell->next){
       +                                render(u, t, cell->content, curanchor);
       +                        }
       +                        if(t->n>0 && t->b[t->n-1]!='\n')
       +                                renderbytes(t, "\n");
       +                        break;
       +                case Ifloattag:
       +                        ifl = (Ifloat*)il;
       +                        render(u, t, ifl->item, curanchor);
       +                        break;
       +                case Ispacertag:
       +                        is = (Ispacer*)il;
       +                        if(is->spkind != ISPnull)
       +                                renderbytes(t, " ");
       +                        break;
       +                default:
       +                        error("unknown item tag %d\n", il->tag);
       +                }
       +                if(il->anchorid != 0 && il->anchorid!=curanchor){
       +                        for(a=u->docinfo->anchors; a!=nil; a=a->next)
       +                                if(aflag && a->index == il->anchorid){
       +                                        href = fullurl(u, a->href);
       +                                        renderbytes(t, "[%s]", href);
       +                                        free(href);
       +                                        break;
       +                                }
       +                        curanchor = il->anchorid;
       +                }
       +        }
       +        if(t->n>0 && t->b[t->n-1]!='\n')
       +                renderbytes(t, "\n");
       +}
       +
       +void
       +rerender(URLwin *u)
       +{
       +        Bytes *t;
       +
       +        t = emalloc(sizeof(Bytes));
       +
       +        render(u, t, u->items, 0);
       +
       +        if(t->n)
       +                write(u->outfd, (char*)t->b, t->n);
       +        free(t->b);
       +        free(t);
       +}
       +
       +/*
       + * Somewhat of a hack.  Not a full parse, just looks for strings in the beginning
       + * of the document (cistrstr only looks at first somewhat bytes).
       + */
       +int
       +charset(char *s)
       +{
       +        char *meta, *emeta, *charset;
       +
       +        if(defcharset == 0)
       +                defcharset = ISO_8859_1;
       +        meta = cistrstr(s, "<meta");
       +        if(meta == nil)
       +                return defcharset;
       +        for(emeta=meta; *emeta!='>' && *emeta!='\0'; emeta++)
       +                ;
       +        charset = cistrstr(s, "charset=");
       +        if(charset == nil)
       +                return defcharset;
       +        charset += 8;
       +        if(*charset == '"')
       +                charset++;
       +        if(cistrncmp(charset, "utf-8", 5) || cistrncmp(charset, "utf8", 4))
       +                return UTF_8;
       +        return defcharset;
       +}
       +
       +void
       +rendertext(URLwin *u, Bytes *b)
       +{
       +        Rune *rurl;
       +
       +        rurl = toStr((uchar*)u->url, strlen(u->url), ISO_8859_1);
       +        u->items = parsehtml(b->b, b->n, rurl, u->type, charset((char*)b->b), &u->docinfo);
       +//        free(rurl);
       +
       +        rerender(u);
       +}
       +
       +
       +void
       +freeurlwin(URLwin *u)
       +{
       +        freeitems(u->items);
       +        u->items = nil;
       +        freedocinfo(u->docinfo);
       +        u->docinfo = nil;
       +        free(u);
       +}
 (DIR) diff --git a/src/cmd/htmlfmt/main.c b/src/cmd/htmlfmt/main.c
       t@@ -0,0 +1,71 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +#include <draw.h>
       +#include <html.h>
       +#include "dat.h"
       +
       +char *url = "";
       +int aflag;
       +int width = 70;
       +int defcharset;
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: htmlfmt [-c charset] [-u URL] [-a] [-l length] [file ...]\n");
       +        exits("usage");
       +}
       +
       +void
       +main(int argc, char *argv[])
       +{
       +        int i, fd;
       +        char *p, *err, *file;
       +        char errbuf[ERRMAX];
       +
       +        ARGBEGIN{
       +        case 'a':
       +                aflag++;
       +                break;
       +        case 'c':
       +                p = smprint("<meta charset=\"%s\">", EARGF(usage()));
       +                defcharset = charset(p);
       +                free(p);
       +                break;
       +        case 'l': case 'w':
       +                err = EARGF(usage());
       +                width = atoi(err);
       +                if(width <= 0)
       +                        usage();
       +                break;
       +        case 'u':
       +                url = EARGF(usage());
       +                aflag++;
       +                break;
       +        default:
       +                usage();
       +        }ARGEND
       +
       +        err = nil;
       +        file = "<stdin>";
       +        if(argc == 0)
       +                err = loadhtml(0);
       +        else
       +                for(i=0; err==nil && i<argc; i++){
       +                        file = argv[i];
       +                        fd = open(file, OREAD);
       +                        if(fd < 0){
       +                                errstr(errbuf, sizeof errbuf);
       +                                err = errbuf;
       +                                break;
       +                        }
       +                        err = loadhtml(fd);
       +                        close(fd);
       +                        if(err)
       +                                break;
       +                }
       +        if(err)
       +                fprint(2, "htmlfmt: processing %s: %s\n", file, err);
       +        exits(err);
       +}
 (DIR) diff --git a/src/cmd/htmlfmt/mkfile b/src/cmd/htmlfmt/mkfile
       t@@ -0,0 +1,30 @@
       +<$SYS9/$systype/$objtype/mkfile
       +
       +TARG=htmlfmt
       +OFILES=\
       +        main.$O\
       +        html.$O\
       +        util.$O\
       +
       +HFILES=\
       +        dat.h\
       +        $SYS9/sys/include/html.h\
       +
       +LIB=$SYS9/$systype/$objtype/lib/libbio.a\
       +        $SYS9/$systype/$objtype/lib/libregexp.a\
       +        $SYS9/$systype/$objtype/lib/libhtml.a\
       +        $SYS9/$systype/$objtype/lib/lib9c.a
       +
       +BIN=$SYS9/$systype/$objtype/bin
       +
       +UPDATE=\
       +        mkfile\
       +        $HFILES\
       +        ${OFILES:%.$O=%.c}
       +
       +<$SYS9/sys/src/cmd/mkone
       +
       +CFLAGS=$CFLAGS
       +
       +#$O.out: $OFILES
       +#        $LD -o $target  $LDFLAGS $OFILES
 (DIR) diff --git a/src/cmd/htmlfmt/util.c b/src/cmd/htmlfmt/util.c
       t@@ -0,0 +1,120 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <bio.h>
       +#include <draw.h>
       +#include <html.h>
       +#include "dat.h"
       +
       +void*
       +emalloc(ulong n)
       +{
       +        void *p;
       +
       +        p = malloc(n);
       +        if(p == nil)
       +                error("can't malloc: %r");
       +        memset(p, 0, n);
       +        return p;
       +}
       +
       +void*
       +erealloc(void *p, ulong n)
       +{
       +        p = realloc(p, n);
       +        if(p == nil)
       +                error("can't malloc: %r");
       +        return p;
       +}
       +
       +char*
       +estrdup(char *s)
       +{
       +        char *t;
       +
       +        t = emalloc(strlen(s)+1);
       +        strcpy(t, s);
       +        return t;
       +}
       +
       +char*
       +estrstrdup(char *s, char *t)
       +{
       +        long ns, nt;
       +        char *u;
       +
       +        ns = strlen(s);
       +        nt = strlen(t);
       +        /* use malloc to avoid memset */
       +        u = malloc(ns+nt+1);
       +        if(u == nil)
       +                error("can't malloc: %r");
       +        memmove(u, s, ns);
       +        memmove(u+ns, t, nt);
       +        u[ns+nt] = '\0';
       +        return u;
       +}
       +
       +char*
       +eappend(char *s, char *sep, char *t)
       +{
       +        long ns, nsep, nt;
       +        char *u;
       +
       +        if(t == nil)
       +                u = estrstrdup(s, sep);
       +        else{
       +                ns = strlen(s);
       +                nsep = strlen(sep);
       +                nt = strlen(t);
       +                /* use malloc to avoid memset */
       +                u = malloc(ns+nsep+nt+1);
       +                if(u == nil)
       +                        error("can't malloc: %r");
       +                memmove(u, s, ns);
       +                memmove(u+ns, sep, nsep);
       +                memmove(u+ns+nsep, t, nt);
       +                u[ns+nsep+nt] = '\0';
       +        }
       +        free(s);
       +        return u;
       +}
       +
       +char*
       +egrow(char *s, char *sep, char *t)
       +{
       +        s = eappend(s, sep, t);
       +        free(t);
       +        return s;
       +}
       +
       +void
       +error(char *fmt, ...)
       +{
       +        va_list arg;
       +        char buf[256];
       +        Fmt f;
       +
       +        fmtfdinit(&f, 2, buf, sizeof buf);
       +        fmtprint(&f, "Mail: ");
       +        va_start(arg, fmt);
       +        fmtvprint(&f, fmt, arg);
       +        va_end(arg);
       +        fmtprint(&f, "\n");
       +        fmtfdflush(&f);
       +        exits(fmt);
       +}
       +
       +void
       +growbytes(Bytes *b, char *s, long ns)
       +{
       +        if(b->nalloc < b->n + ns + 1){
       +                b->nalloc = b->n + ns + 8000;
       +                /* use realloc to avoid memset */
       +                b->b = realloc(b->b, b->nalloc);
       +                if(b->b == nil)
       +                        error("growbytes: can't realloc: %r");
       +        }
       +        memmove(b->b+b->n, s, ns);
       +        b->n += ns;
       +        b->b[b->n] = '\0';
       +}
 (DIR) diff --git a/src/libhtml/build.c b/src/libhtml/build.c
       t@@ -0,0 +1,4238 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <draw.h>
       +#include <ctype.h>
       +#include <html.h>
       +#include "impl.h"
       +
       +// A stack for holding integer values
       +enum {
       +        Nestmax = 40        // max nesting level of lists, font styles, etc.
       +};
       +
       +struct Stack {
       +        int                n;                                // next available slot (top of stack is stack[n-1])
       +        int                slots[Nestmax];        // stack entries
       +};
       +
       +// Parsing state
       +struct Pstate
       +{
       +        Pstate*        next;                        // in stack of Pstates
       +        int                skipping;                // true when we shouldn't add items
       +        int                skipwhite;                // true when we should strip leading space
       +        int                curfont;                // font index for current font
       +        int                curfg;                // current foreground color
       +        Background        curbg;        // current background
       +        int                curvoff;                // current baseline offset
       +        uchar        curul;                // current underline/strike state
       +        uchar        curjust;                // current justify state
       +        int                curanchor;        // current (href) anchor id (if in one), or 0
       +        int                curstate;                // current value of item state
       +        int                literal;                // current literal state
       +        int                inpar;                // true when in a paragraph-like construct
       +        int                adjsize;                // current font size adjustment
       +        Item*        items;                // dummy head of item list we're building
       +        Item*        lastit;                // tail of item list we're building
       +        Item*        prelastit;                // item before lastit
       +        Stack        fntstylestk;        // style stack
       +        Stack        fntsizestk;                // size stack
       +        Stack        fgstk;                // text color stack
       +        Stack        ulstk;                // underline stack
       +        Stack        voffstk;                // vertical offset stack
       +        Stack        listtypestk;        // list type stack
       +        Stack        listcntstk;                // list counter stack
       +        Stack        juststk;                // justification stack
       +        Stack        hangstk;                // hanging stack
       +};
       +
       +struct ItemSource
       +{
       +        Docinfo*                doc;
       +        Pstate*                psstk;
       +        int                        nforms;
       +        int                        ntables;
       +        int                        nanchors;
       +        int                        nframes;
       +        Form*                curform;
       +        Map*                curmap;
       +        Table*                tabstk;
       +        Kidinfo*                kidstk;
       +};
       +
       +// Some layout parameters
       +enum {
       +        FRKIDMARGIN = 6,        // default margin around kid frames
       +        IMGHSPACE = 0,        // default hspace for images (0 matches IE, Netscape)
       +        IMGVSPACE = 0,        // default vspace for images
       +        FLTIMGHSPACE = 2,        // default hspace for float images
       +        TABSP = 5,                // default cellspacing for tables
       +        TABPAD = 1,                // default cell padding for tables
       +        LISTTAB = 1,                // number of tabs to indent lists
       +        BQTAB = 1,                // number of tabs to indent blockquotes
       +        HRSZ = 2,                        // thickness of horizontal rules
       +        SUBOFF = 4,                // vertical offset for subscripts
       +        SUPOFF = 6,                // vertical offset for superscripts
       +        NBSP = 160                // non-breaking space character
       +};
       +
       +// These tables must be sorted
       +static StringInt *align_tab;
       +static AsciiInt _align_tab[] = {
       +        {"baseline",        ALbaseline},
       +        {"bottom",        ALbottom},
       +        {"center",        ALcenter},
       +        {"char",                ALchar},
       +        {"justify",        ALjustify},
       +        {"left",                ALleft},
       +        {"middle",        ALmiddle},
       +        {"right",                ALright},
       +        {"top",                ALtop}
       +};
       +#define NALIGNTAB (sizeof(align_tab)/sizeof(StringInt))
       +
       +static StringInt *input_tab;
       +static AsciiInt _input_tab[] = {
       +        {"button",        Fbutton},
       +        {"checkbox",        Fcheckbox},
       +        {"file",                Ffile},
       +        {"hidden",        Fhidden},
       +        {"image",        Fimage},
       +        {"password",        Fpassword},
       +        {"radio",                Fradio},
       +        {"reset",                Freset},
       +        {"submit",        Fsubmit},
       +        {"text",                Ftext}
       +};
       +#define NINPUTTAB (sizeof(input_tab)/sizeof(StringInt))
       +
       +static StringInt *clear_tab;
       +static AsciiInt _clear_tab[] = {
       +        {"all",        IFcleft|IFcright},
       +        {"left",        IFcleft},
       +        {"right",        IFcright}
       +};
       +#define NCLEARTAB (sizeof(clear_tab)/sizeof(StringInt))
       +
       +static StringInt *fscroll_tab;
       +static AsciiInt _fscroll_tab[] = {
       +        {"auto",        FRhscrollauto|FRvscrollauto},
       +        {"no",        FRnoscroll},
       +        {"yes",        FRhscroll|FRvscroll},
       +};
       +#define NFSCROLLTAB (sizeof(fscroll_tab)/sizeof(StringInt))
       +
       +static StringInt *shape_tab;
       +static AsciiInt _shape_tab[] = {
       +        {"circ",                SHcircle},
       +        {"circle",                SHcircle},
       +        {"poly",                SHpoly},
       +        {"polygon",        SHpoly},
       +        {"rect",                SHrect},
       +        {"rectangle",        SHrect}
       +};
       +#define NSHAPETAB (sizeof(shape_tab)/sizeof(StringInt))
       +
       +static StringInt *method_tab;
       +static AsciiInt _method_tab[] = {
       +        {"get",                HGet},
       +        {"post",                HPost}
       +};
       +#define NMETHODTAB (sizeof(method_tab)/sizeof(StringInt))
       +
       +static Rune** roman;
       +static char* _roman[15]= {
       +        "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X",
       +        "XI", "XII", "XIII", "XIV", "XV"
       +};
       +#define NROMAN 15
       +
       +// List number types
       +enum {
       +        LTdisc, LTsquare, LTcircle, LT1, LTa, LTA, LTi, LTI
       +};
       +
       +enum {
       +        SPBefore = 2,
       +        SPAfter = 4,
       +        BL = 1,
       +        BLBA = (BL|SPBefore|SPAfter)
       +};
       +
       +// blockbrk[tag] is break info for a block level element, or one
       +// of a few others that get the same treatment re ending open paragraphs
       +// and requiring a line break / vertical space before them.
       +// If we want a line of space before the given element, SPBefore is OR'd in.
       +// If we want a line of space after the given element, SPAfter is OR'd in.
       +
       +static uchar blockbrk[Numtags]= {
       +        [Taddress] BLBA, [Tblockquote] BLBA, [Tcenter] BL,
       +        [Tdir] BLBA, [Tdiv] BL, [Tdd] BL, [Tdl] BLBA,
       +        [Tdt] BL, [Tform] BLBA,
       +        // headings and tables get breaks added manually
       +        [Th1] BL, [Th2] BL, [Th3] BL,
       +        [Th4] BL, [Th5] BL, [Th6] BL,
       +        [Thr] BL, [Tisindex] BLBA, [Tli] BL, [Tmenu] BLBA,
       +        [Tol] BLBA, [Tp] BLBA, [Tpre] BLBA,
       +        [Tul] BLBA
       +};
       +
       +enum {
       +        AGEN = 1
       +};
       +
       +// attrinfo is information about attributes.
       +// The AGEN value means that the attribute is generic (applies to almost all elements)
       +static uchar attrinfo[Numattrs]= {
       +        [Aid] AGEN, [Aclass] AGEN, [Astyle] AGEN, [Atitle] AGEN,
       +        [Aonblur] AGEN, [Aonchange] AGEN, [Aonclick] AGEN,
       +        [Aondblclick] AGEN, [Aonfocus] AGEN, [Aonkeypress] AGEN,
       +        [Aonkeyup] AGEN, [Aonload] AGEN, [Aonmousedown] AGEN,
       +        [Aonmousemove] AGEN, [Aonmouseout] AGEN, [Aonmouseover] AGEN,
       +        [Aonmouseup] AGEN, [Aonreset] AGEN, [Aonselect] AGEN,
       +        [Aonsubmit] AGEN, [Aonunload] AGEN
       +};
       +
       +static uchar scriptev[Numattrs]= {
       +        [Aonblur] SEonblur, [Aonchange] SEonchange, [Aonclick] SEonclick,
       +        [Aondblclick] SEondblclick, [Aonfocus] SEonfocus, [Aonkeypress] SEonkeypress,
       +        [Aonkeyup] SEonkeyup, [Aonload] SEonload, [Aonmousedown] SEonmousedown,
       +        [Aonmousemove] SEonmousemove, [Aonmouseout] SEonmouseout, [Aonmouseover] SEonmouseover,
       +        [Aonmouseup] SEonmouseup, [Aonreset] SEonreset, [Aonselect] SEonselect,
       +        [Aonsubmit] SEonsubmit, [Aonunload] SEonunload
       +};
       +
       +// Color lookup table
       +static StringInt *color_tab;
       +static AsciiInt _color_tab[] = {
       +        {"aqua", 0x00FFFF},
       +        {"black",  0x000000},
       +        {"blue", 0x0000CC},
       +        {"fuchsia", 0xFF00FF},
       +        {"gray", 0x808080},
       +        {"green", 0x008000},
       +        {"lime", 0x00FF00},
       +        {"maroon", 0x800000},
       +        {"navy", 0x000080,},
       +        {"olive", 0x808000},
       +        {"purple", 0x800080},
       +        {"red", 0xFF0000},
       +        {"silver", 0xC0C0C0},
       +        {"teal", 0x008080},
       +        {"white", 0xFFFFFF},
       +        {"yellow", 0xFFFF00}
       +};
       +#define NCOLORS (sizeof(color_tab)/sizeof(StringInt))
       +
       +static StringInt                 *targetmap;
       +static int                        targetmapsize;
       +static int                        ntargets;
       +
       +static int buildinited = 0;
       +
       +#define SMALLBUFSIZE 240
       +#define BIGBUFSIZE 2000
       +
       +int        dbgbuild = 0;
       +int        warn = 0;
       +
       +static Align                aalign(Token* tok);
       +static int                        acolorval(Token* tok, int attid, int dflt);
       +static void                        addbrk(Pstate* ps, int sp, int clr);
       +static void                        additem(Pstate* ps, Item* it, Token* tok);
       +static void                        addlinebrk(Pstate* ps, int clr);
       +static void                        addnbsp(Pstate* ps);
       +static void                        addtext(Pstate* ps, Rune* s);
       +static Dimen                adimen(Token* tok, int attid);
       +static int                        aflagval(Token* tok, int attid);
       +static int                        aintval(Token* tok, int attid, int dflt);
       +static Rune*                astrval(Token* tok, int attid, Rune* dflt);
       +static int                        atabval(Token* tok, int attid, StringInt* tab, int ntab, int dflt);
       +static int                        atargval(Token* tok, int dflt);
       +static int                        auintval(Token* tok, int attid, int dflt);
       +static Rune*                aurlval(Token* tok, int attid, Rune* dflt, Rune* base);
       +static Rune*                aval(Token* tok, int attid);
       +static void                        buildinit(void);
       +static Pstate*                cell_pstate(Pstate* oldps, int ishead);
       +static void                        changehang(Pstate* ps, int delta);
       +static void                        changeindent(Pstate* ps, int delta);
       +static int                        color(Rune* s, int dflt);
       +static void                        copystack(Stack* tostk, Stack* fromstk);
       +static int                        dimprint(char* buf, int nbuf, Dimen d);
       +static Pstate*                finishcell(Table* curtab, Pstate* psstk);
       +static void                        finish_table(Table* t);
       +static void                        freeanchor(Anchor* a);
       +static void                        freedestanchor(DestAnchor* da);
       +static void                        freeform(Form* f);
       +static void                        freeformfield(Formfield* ff);
       +static void                        freeitem(Item* it);
       +static void                        freepstate(Pstate* p);
       +static void                        freepstatestack(Pstate* pshead);
       +static void                        freescriptevents(SEvent* ehead);
       +static void                        freetable(Table* t);
       +static Map*                getmap(Docinfo* di, Rune* name);
       +static Rune*                getpcdata(Token* toks, int tokslen, int* ptoki);
       +static Pstate*                lastps(Pstate* psl);
       +static Rune*                listmark(uchar ty, int n);
       +static int                        listtyval(Token* tok, int dflt);
       +static Align                makealign(int halign, int valign);
       +static Background        makebackground(Rune* imgurl, int color);
       +static Dimen                makedimen(int kind, int spec);
       +static Anchor*                newanchor(int index, Rune* name, Rune* href, int target, Anchor* link);
       +static Area*                newarea(int shape, Rune* href, int target, Area* link);
       +static DestAnchor*        newdestanchor(int index, Rune* name, Item* item, DestAnchor* link);
       +static Docinfo*                newdocinfo(void);
       +static Genattr*                newgenattr(Rune* id, Rune* class, Rune* style, Rune* title, SEvent* events);
       +static Form*                newform(int formid, Rune* name, Rune* action,
       +                                        int target, int method, Form* link);
       +static Formfield*        newformfield(int ftype, int fieldid, Form* form, Rune* name,
       +                                        Rune* value, int size, int maxlength, Formfield* link);
       +static Item*                newifloat(Item* it, int side);
       +static Item*                newiformfield(Formfield* ff);
       +static Item*                newiimage(Rune* src, Rune* altrep, int align, int width, int height,
       +                                        int hspace, int vspace, int border, int ismap, Map* map);
       +static Item*                newirule(int align, int size, int noshade, Dimen wspec);
       +static Item*                newispacer(int spkind);
       +static Item*                newitable(Table* t);
       +static ItemSource*        newitemsource(Docinfo* di);
       +static Item*                newitext(Rune* s, int fnt, int fg, int voff, int ul);
       +static Kidinfo*                newkidinfo(int isframeset, Kidinfo* link);
       +static Option*                newoption(int selected, Rune* value, Rune* display, Option* link);
       +static Pstate*                newpstate(Pstate* link);
       +static SEvent*                newscriptevent(int type, Rune* script, SEvent* link);
       +static Table*                newtable(int tableid, Align align, Dimen width, int border,
       +                                        int cellspacing, int cellpadding, Background bg, Token* tok, Table* link);
       +static Tablecell*        newtablecell(int cellid, int rowspan, int colspan, Align align, Dimen wspec,
       +                                        int hspec, Background bg, int flags, Tablecell* link);
       +static Tablerow*        newtablerow(Align align, Background bg, int flags, Tablerow* link);
       +static Dimen                parsedim(Rune* s, int ns);
       +static void                        pop(Stack* stk);
       +static void                        popfontsize(Pstate* ps);
       +static void                        popfontstyle(Pstate* ps);
       +static void                        popjust(Pstate* ps);
       +static int                        popretnewtop(Stack* stk, int dflt);
       +static int                        push(Stack* stk, int val);
       +static void                        pushfontsize(Pstate* ps, int sz);
       +static void                        pushfontstyle(Pstate* ps, int sty);
       +static void                        pushjust(Pstate* ps, int j);
       +static Item*                textit(Pstate* ps, Rune* s);
       +static Rune*                removeallwhite(Rune* s);
       +static void                        resetdocinfo(Docinfo* d);
       +static void                        setcurfont(Pstate* ps);
       +static void                        setcurjust(Pstate* ps);
       +static void                        setdimarray(Token* tok, int attid, Dimen** pans, int* panslen);
       +static Rune*                stringalign(int a);
       +static void                        targetmapinit(void);
       +static int                        toint(Rune* s);
       +static int                        top(Stack* stk, int dflt);
       +static void                        trim_cell(Tablecell* c);
       +static int                        validalign(Align a);
       +static int                        validdimen(Dimen d);
       +static int                        validformfield(Formfield* f);
       +static int                        validhalign(int a);
       +static int                        validptr(void* p);
       +static int                        validStr(Rune* s);
       +static int                        validtable(Table* t);
       +static int                        validtablerow(Tablerow* r);
       +static int                        validtablecol(Tablecol* c);
       +static int                        validtablecell(Tablecell* c);
       +static int                        validvalign(int a);
       +static int                        Iconv(Fmt *f);
       +
       +static void
       +buildinit(void)
       +{
       +        runetabinit();
       +        roman = cvtstringtab(_roman, nelem(_roman));
       +        color_tab = cvtstringinttab(_color_tab, nelem(_color_tab));
       +        method_tab = cvtstringinttab(_method_tab, nelem(_method_tab));
       +        shape_tab = cvtstringinttab(_shape_tab, nelem(_shape_tab));
       +        fscroll_tab = cvtstringinttab(_fscroll_tab, nelem(_fscroll_tab));
       +        clear_tab = cvtstringinttab(_clear_tab, nelem(_clear_tab));
       +        input_tab = cvtstringinttab(_input_tab, nelem(_input_tab));
       +        align_tab = cvtstringinttab(_align_tab, nelem(_align_tab));
       +
       +        fmtinstall('I', Iconv);
       +        targetmapinit();
       +        buildinited = 1;
       +}
       +
       +static ItemSource*
       +newitemsource(Docinfo* di)
       +{
       +        ItemSource*        is;
       +        Pstate*        ps;
       +
       +        ps = newpstate(nil);
       +        if(di->mediatype != TextHtml) {
       +                ps->curstate &= ~IFwrap;
       +                ps->literal = 1;
       +                pushfontstyle(ps, FntT);
       +        }
       +        is = (ItemSource*)emalloc(sizeof(ItemSource));
       +        is->doc = di;
       +        is->psstk = ps;
       +        is->nforms = 0;
       +        is->ntables = 0;
       +        is->nanchors = 0;
       +        is->nframes = 0;
       +        is->curform = nil;
       +        is->curmap = nil;
       +        is->tabstk = nil;
       +        is->kidstk = nil;
       +        return is;
       +}
       +
       +static Item *getitems(ItemSource* is, uchar* data, int datalen);
       +
       +// Parse an html document and create a list of layout items.
       +// Allocate and return document info in *pdi.
       +// When caller is done with the items, it should call
       +// freeitems on the returned result, and then
       +// freedocinfo(*pdi).
       +Item*
       +parsehtml(uchar* data, int datalen, Rune* pagesrc, int mtype, int chset, Docinfo** pdi)
       +{
       +        Item *it;
       +        Docinfo*        di;
       +        ItemSource*        is;
       +
       +        di = newdocinfo();
       +        di->src = _Strdup(pagesrc);
       +        di->base = _Strdup(pagesrc);
       +        di->mediatype = mtype;
       +        di->chset = chset;
       +        *pdi = di;
       +        is = newitemsource(di);
       +        it = getitems(is, data, datalen);
       +        freepstatestack(is->psstk);
       +        free(is);
       +        return it;
       +}
       +
       +// Get a group of tokens for lexer, parse them, and create
       +// a list of layout items.
       +// When caller is done with the items, it should call
       +// freeitems on the returned result.
       +static Item*
       +getitems(ItemSource* is, uchar* data, int datalen)
       +{
       +        int        i;
       +        int        j;
       +        int        nt;
       +        int        pt;
       +        int        doscripts;
       +        int        tokslen;
       +        int        toki;
       +        int        h;
       +        int        sz;
       +        int        method;
       +        int        n;
       +        int        nblank;
       +        int        norsz;
       +        int        bramt;
       +        int        sty;
       +        int        nosh;
       +        int        oldcuranchor;
       +        int        dfltbd;
       +        int        v;
       +        int        hang;
       +        int        isempty;
       +        int        tag;
       +        int        brksp;
       +        int        target;
       +        uchar        brk;
       +        uchar        flags;
       +        uchar        align;
       +        uchar        al;
       +        uchar        ty;
       +        uchar        ty2;
       +        Pstate*        ps;
       +        Pstate*        nextps;
       +        Pstate*        outerps;
       +        Table*        curtab;
       +        Token*        tok;
       +        Token*        toks;
       +        Docinfo*        di;
       +        Item*        ans;
       +        Item*        img;
       +        Item*        ffit;
       +        Item*        tabitem;
       +        Rune*        s;
       +        Rune*        t;
       +        Rune*        name;
       +        Rune*        enctype;
       +        Rune*        usemap;
       +        Rune*        prompt;
       +        Rune*        equiv;
       +        Rune*        val;
       +        Rune*        nsz;
       +        Rune*        script;
       +        Map*        map;
       +        Form*        frm;
       +        Iimage*        ii;
       +        Kidinfo*        kd;
       +        Kidinfo*        ks;
       +        Kidinfo*        pks;
       +        Dimen        wd;
       +        Option*        option;
       +        Table*        tab;
       +        Tablecell*        c;
       +        Tablerow*        tr;
       +        Formfield*        field;
       +        Formfield*        ff;
       +        Rune*        href;
       +        Rune*        src;
       +        Rune*        scriptsrc;
       +        Rune*        bgurl;
       +        Rune*        action;
       +        Background        bg;
       +
       +        if(!buildinited)
       +                buildinit();
       +        doscripts = 0;        // for now
       +        ps = is->psstk;
       +        curtab = is->tabstk;
       +        di = is->doc;
       +        toks = _gettoks(data, datalen, di->chset, di->mediatype, &tokslen);
       +        toki = 0;
       +        for(; toki < tokslen; toki++) {
       +                tok = &toks[toki];
       +                if(dbgbuild > 1)
       +                        fprint(2, "build: curstate %ux, token %T\n", ps->curstate, tok);
       +                tag = tok->tag;
       +                brk = 0;
       +                brksp = 0;
       +                if(tag < Numtags) {
       +                        brk = blockbrk[tag];
       +                        if(brk&SPBefore)
       +                                brksp = 1;
       +                }
       +                else if(tag < Numtags + RBRA) {
       +                        brk = blockbrk[tag - RBRA];
       +                        if(brk&SPAfter)
       +                                brksp = 1;
       +                }
       +                if(brk) {
       +                        addbrk(ps, brksp, 0);
       +                        if(ps->inpar) {
       +                                popjust(ps);
       +                                ps->inpar = 0;
       +                        }
       +                }
       +                // check common case first (Data), then switch statement on tag
       +                if(tag == Data) {
       +                        // Lexing didn't pay attention to SGML record boundary rules:
       +                        // \n after start tag or before end tag to be discarded.
       +                        // (Lex has already discarded all \r's).
       +                        // Some pages assume this doesn't happen in <PRE> text,
       +                        // so we won't do it if literal is true.
       +                        // BUG: won't discard \n before a start tag that begins
       +                        // the next bufferful of tokens.
       +                        s = tok->text;
       +                        n = _Strlen(s);
       +                        if(!ps->literal) {
       +                                i = 0;
       +                                j = n;
       +                                if(toki > 0) {
       +                                        pt = toks[toki - 1].tag;
       +                                        // IE and Netscape both ignore this rule (contrary to spec)
       +                                        // if previous tag was img
       +                                        if(pt < Numtags && pt != Timg && j > 0 && s[0] == '\n')
       +                                                i++;
       +                                }
       +                                if(toki < tokslen - 1) {
       +                                        nt = toks[toki + 1].tag;
       +                                        if(nt >= RBRA && nt < Numtags + RBRA && j > i && s[j - 1] == '\n')
       +                                                j--;
       +                                }
       +                                if(i > 0 || j < n) {
       +                                        t = s;
       +                                        s = _Strsubstr(s, i, j);
       +                                        free(t);
       +                                        n = j-i;
       +                                }
       +                        }
       +                        if(ps->skipwhite) {
       +                                _trimwhite(s, n, &t, &nt);
       +                                if(t == nil) {
       +                                        free(s);
       +                                        s = nil;
       +                                }
       +                                else if(t != s) {
       +                                        t = _Strndup(t, nt);
       +                                        free(s);
       +                                        s = t;
       +                                }
       +                                if(s != nil)
       +                                        ps->skipwhite = 0;
       +                        }
       +                        tok->text = nil;                // token doesn't own string anymore
       +                        if(s != nil)
       +                                addtext(ps, s);
       +                }
       +                else
       +                        switch(tag) {
       +                        // Some abbrevs used in following DTD comments
       +                        // %text =         #PCDATA
       +                        //                | TT | I | B | U | STRIKE | BIG | SMALL | SUB | SUP
       +                        //                | EM | STRONG | DFN | CODE | SAMP | KBD | VAR | CITE
       +                        //                | A | IMG | APPLET | FONT | BASEFONT | BR | SCRIPT | MAP
       +                        //                | INPUT | SELECT | TEXTAREA
       +                        // %block = P | UL | OL | DIR | MENU | DL | PRE | DL | DIV | CENTER
       +                        //                | BLOCKQUOTE | FORM | ISINDEX | HR | TABLE
       +                        // %flow = (%text | %block)*
       +                        // %body.content = (%heading | %text | %block | ADDRESS)*
       +
       +                        // <!ELEMENT A - - (%text) -(A)>
       +                        // Anchors are not supposed to be nested, but you sometimes see
       +                        // href anchors inside destination anchors.
       +                        case Ta:
       +                                if(ps->curanchor != 0) {
       +                                        if(warn)
       +                                                fprint(2, "warning: nested <A> or missing </A>\n");
       +                                        ps->curanchor = 0;
       +                                }
       +                                name = aval(tok, Aname);
       +                                href = aurlval(tok, Ahref, nil, di->base);
       +                                // ignore rel, rev, and title attrs
       +                                if(href != nil) {
       +                                        target = atargval(tok, di->target);
       +                                        di->anchors = newanchor(++is->nanchors, name, href, target, di->anchors);
       +                                        if(name != nil)
       +                                                name = _Strdup(name);        // for DestAnchor construction, below
       +                                        ps->curanchor = is->nanchors;
       +                                        ps->curfg = push(&ps->fgstk, di->link);
       +                                        ps->curul = push(&ps->ulstk, ULunder);
       +                                }
       +                                if(name != nil) {
       +                                        // add a null item to be destination
       +                                        additem(ps, newispacer(ISPnull), tok);
       +                                        di->dests = newdestanchor(++is->nanchors, name, ps->lastit, di->dests);
       +                                }
       +                                break;
       +
       +                        case Ta+RBRA :
       +                                if(ps->curanchor != 0) {
       +                                        ps->curfg = popretnewtop(&ps->fgstk, di->text);
       +                                        ps->curul = popretnewtop(&ps->ulstk, ULnone);
       +                                        ps->curanchor = 0;
       +                                }
       +                                break;
       +
       +                        // <!ELEMENT APPLET - - (PARAM | %text)* >
       +                        // We can't do applets, so ignore PARAMS, and let
       +                        // the %text contents appear for the alternative rep
       +                        case Tapplet:
       +                        case Tapplet+RBRA:
       +                                if(warn && tag == Tapplet)
       +                                        fprint(2, "warning: <APPLET> ignored\n");
       +                                break;
       +
       +                        // <!ELEMENT AREA - O EMPTY>
       +                        case Tarea:
       +                                map = di->maps;
       +                                if(map == nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: <AREA> not inside <MAP>\n");
       +                                        continue;
       +                                }
       +                                map->areas = newarea(atabval(tok, Ashape, shape_tab, NSHAPETAB, SHrect),
       +                                        aurlval(tok, Ahref, nil, di->base),
       +                                        atargval(tok, di->target),
       +                                        map->areas);
       +                                setdimarray(tok, Acoords, &map->areas->coords, &map->areas->ncoords);
       +                                break;
       +
       +                        // <!ELEMENT (B|STRONG) - - (%text)*>
       +                        case Tb:
       +                        case Tstrong:
       +                                pushfontstyle(ps, FntB);
       +                                break;
       +
       +                        case Tb+RBRA:
       +                        case Tcite+RBRA:
       +                        case Tcode+RBRA:
       +                        case Tdfn+RBRA:
       +                        case Tem+RBRA:
       +                        case Tkbd+RBRA:
       +                        case Ti+RBRA:
       +                        case Tsamp+RBRA:
       +                        case Tstrong+RBRA:
       +                        case Ttt+RBRA:
       +                        case Tvar+RBRA :
       +                        case Taddress+RBRA:
       +                                popfontstyle(ps);
       +                                break;
       +
       +                        // <!ELEMENT BASE - O EMPTY>
       +                        case Tbase:
       +                                t = di->base;
       +                                di->base = aurlval(tok, Ahref, di->base, di->base);
       +                                if(t != nil)
       +                                        free(t);
       +                                di->target = atargval(tok, di->target);
       +                                break;
       +
       +                        // <!ELEMENT BASEFONT - O EMPTY>
       +                        case Tbasefont:
       +                                ps->adjsize = aintval(tok, Asize, 3) - 3;
       +                                break;
       +
       +                        // <!ELEMENT (BIG|SMALL) - - (%text)*>
       +                        case Tbig:
       +                        case Tsmall:
       +                                sz = ps->adjsize;
       +                                if(tag == Tbig)
       +                                        sz += Large;
       +                                else
       +                                        sz += Small;
       +                                pushfontsize(ps, sz);
       +                                break;
       +
       +                        case Tbig+RBRA:
       +                        case Tsmall+RBRA:
       +                                popfontsize(ps);
       +                                break;
       +
       +                        // <!ELEMENT BLOCKQUOTE - - %body.content>
       +                        case Tblockquote:
       +                                changeindent(ps, BQTAB);
       +                                break;
       +
       +                        case Tblockquote+RBRA:
       +                                changeindent(ps, -BQTAB);
       +                                break;
       +
       +                        // <!ELEMENT BODY O O %body.content>
       +                        case Tbody:
       +                                ps->skipping = 0;
       +                                bg = makebackground(nil, acolorval(tok, Abgcolor, di->background.color));
       +                                bgurl = aurlval(tok, Abackground, nil, di->base);
       +                                if(bgurl != nil) {
       +                                        if(di->backgrounditem != nil)
       +                                                freeitem((Item*)di->backgrounditem);
       +                                                // really should remove old item from di->images list,
       +                                                // but there should only be one BODY element ...
       +                                        di->backgrounditem = (Iimage*)newiimage(bgurl, nil, ALnone, 0, 0, 0, 0, 0, 0, nil);
       +                                        di->backgrounditem->nextimage = di->images;
       +                                        di->images = di->backgrounditem;
       +                                }
       +                                ps->curbg = bg;
       +                                di->background = bg;
       +                                di->text = acolorval(tok, Atext, di->text);
       +                                di->link = acolorval(tok, Alink, di->link);
       +                                di->vlink = acolorval(tok, Avlink, di->vlink);
       +                                di->alink = acolorval(tok, Aalink, di->alink);
       +                                if(di->text != ps->curfg) {
       +                                        ps->curfg = di->text;
       +                                        ps->fgstk.n = 0;
       +                                }
       +                                break;
       +
       +                        case Tbody+RBRA:
       +                                // HTML spec says ignore things after </body>,
       +                                // but IE and Netscape don't
       +                                // ps.skipping = 1;
       +                                break;
       +
       +                        // <!ELEMENT BR - O EMPTY>
       +                        case Tbr:
       +                                addlinebrk(ps, atabval(tok, Aclear, clear_tab, NCLEARTAB, 0));
       +                                break;
       +
       +                        // <!ELEMENT CAPTION - - (%text;)*>
       +                        case Tcaption:
       +                                if(curtab == nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: <CAPTION> outside <TABLE>\n");
       +                                        continue;
       +                                }
       +                                if(curtab->caption != nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: more than one <CAPTION> in <TABLE>\n");
       +                                        continue;
       +                                }
       +                                ps = newpstate(ps);
       +                                curtab->caption_place = atabval(tok, Aalign, align_tab, NALIGNTAB, ALtop);
       +                                break;
       +
       +                        case Tcaption+RBRA:
       +                                nextps = ps->next;
       +                                if(curtab == nil || nextps == nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: unexpected </CAPTION>\n");
       +                                        continue;
       +                                }
       +                                curtab->caption = ps->items->next;
       +                                free(ps);
       +                                ps = nextps;
       +                                break;
       +
       +                        case Tcenter:
       +                        case Tdiv:
       +                                if(tag == Tcenter)
       +                                        al = ALcenter;
       +                                else
       +                                        al = atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust);
       +                                pushjust(ps, al);
       +                                break;
       +
       +                        case Tcenter+RBRA:
       +                        case Tdiv+RBRA:
       +                                popjust(ps);
       +                                break;
       +
       +                        // <!ELEMENT DD - O  %flow >
       +                        case Tdd:
       +                                if(ps->hangstk.n == 0) {
       +                                        if(warn)
       +                                                fprint(2, "warning: <DD> not inside <DL\n");
       +                                        continue;
       +                                }
       +                                h = top(&ps->hangstk, 0);
       +                                if(h != 0)
       +                                        changehang(ps, -10*LISTTAB);
       +                                else
       +                                        addbrk(ps, 0, 0);
       +                                push(&ps->hangstk, 0);
       +                                break;
       +
       +                        //<!ELEMENT (DIR|MENU) - - (LI)+ -(%block) >
       +                        //<!ELEMENT (OL|UL) - - (LI)+>
       +                        case Tdir:
       +                        case Tmenu:
       +                        case Tol:
       +                        case Tul:
       +                                changeindent(ps, LISTTAB);
       +                                push(&ps->listtypestk, listtyval(tok, (tag==Tol)? LT1 : LTdisc));
       +                                push(&ps->listcntstk, aintval(tok, Astart, 1));
       +                                break;
       +
       +                        case Tdir+RBRA:
       +                        case Tmenu+RBRA:
       +                        case Tol+RBRA:
       +                        case Tul+RBRA:
       +                                if(ps->listtypestk.n == 0) {
       +                                        if(warn)
       +                                                fprint(2, "warning: %T ended no list\n", tok);
       +                                        continue;
       +                                }
       +                                addbrk(ps, 0, 0);
       +                                pop(&ps->listtypestk);
       +                                pop(&ps->listcntstk);
       +                                changeindent(ps, -LISTTAB);
       +                                break;
       +
       +                        // <!ELEMENT DL - - (DT|DD)+ >
       +                        case Tdl:
       +                                changeindent(ps, LISTTAB);
       +                                push(&ps->hangstk, 0);
       +                                break;
       +
       +                        case Tdl+RBRA:
       +                                if(ps->hangstk.n == 0) {
       +                                        if(warn)
       +                                                fprint(2, "warning: unexpected </DL>\n");
       +                                        continue;
       +                                }
       +                                changeindent(ps, -LISTTAB);
       +                                if(top(&ps->hangstk, 0) != 0)
       +                                        changehang(ps, -10*LISTTAB);
       +                                pop(&ps->hangstk);
       +                                break;
       +
       +                        // <!ELEMENT DT - O (%text)* >
       +                        case Tdt:
       +                                if(ps->hangstk.n == 0) {
       +                                        if(warn)
       +                                                fprint(2, "warning: <DT> not inside <DL>\n");
       +                                        continue;
       +                                }
       +                                h = top(&ps->hangstk, 0);
       +                                pop(&ps->hangstk);
       +                                if(h != 0)
       +                                        changehang(ps, -10*LISTTAB);
       +                                changehang(ps, 10*LISTTAB);
       +                                push(&ps->hangstk, 1);
       +                                break;
       +
       +                        // <!ELEMENT FONT - - (%text)*>
       +                        case Tfont:
       +                                sz = top(&ps->fntsizestk, Normal);
       +                                if(_tokaval(tok, Asize, &nsz, 0)) {
       +                                        if(_prefix(L(Lplus), nsz))
       +                                                sz = Normal + _Strtol(nsz+1, nil, 10) + ps->adjsize;
       +                                        else if(_prefix(L(Lminus), nsz))
       +                                                sz = Normal - _Strtol(nsz+1, nil, 10) + ps->adjsize;
       +                                        else if(nsz != nil)
       +                                                sz = Normal + (_Strtol(nsz, nil, 10) - 3);
       +                                }
       +                                ps->curfg = push(&ps->fgstk, acolorval(tok, Acolor, ps->curfg));
       +                                pushfontsize(ps, sz);
       +                                break;
       +
       +                        case Tfont+RBRA:
       +                                if(ps->fgstk.n == 0) {
       +                                        if(warn)
       +                                                fprint(2, "warning: unexpected </FONT>\n");
       +                                        continue;
       +                                }
       +                                ps->curfg = popretnewtop(&ps->fgstk, di->text);
       +                                popfontsize(ps);
       +                                break;
       +
       +                        // <!ELEMENT FORM - - %body.content -(FORM) >
       +                        case Tform:
       +                                if(is->curform != nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: <FORM> nested inside another\n");
       +                                        continue;
       +                                }
       +                                action = aurlval(tok, Aaction, di->base, di->base);
       +                                s = aval(tok, Aid);
       +                                name = astrval(tok, Aname, s);
       +                                if(s)
       +                                        free(s);
       +                                target = atargval(tok, di->target);
       +                                method = atabval(tok, Amethod, method_tab, NMETHODTAB, HGet);
       +                                if(warn && _tokaval(tok, Aenctype, &enctype, 0) &&
       +                                                _Strcmp(enctype, L(Lappl_form)))
       +                                        fprint(2, "form enctype %S not handled\n", enctype);
       +                                frm = newform(++is->nforms, name, action, target, method, di->forms);
       +                                di->forms = frm;
       +                                is->curform = frm;
       +                                break;
       +
       +                        case Tform+RBRA:
       +                                if(is->curform == nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: unexpected </FORM>\n");
       +                                        continue;
       +                                }
       +                                // put fields back in input order
       +                                is->curform->fields = (Formfield*)_revlist((List*)is->curform->fields);
       +                                is->curform = nil;
       +                                break;
       +
       +                        // <!ELEMENT FRAME - O EMPTY>
       +                        case Tframe:
       +                                ks = is->kidstk;
       +                                if(ks == nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: <FRAME> not in <FRAMESET>\n");
       +                                        continue;
       +                                }
       +                                ks->kidinfos = kd = newkidinfo(0, ks->kidinfos);
       +                                kd->src = aurlval(tok, Asrc, nil, di->base);
       +                                kd->name = aval(tok, Aname);
       +                                if(kd->name == nil) {
       +                                        s = _ltoStr(++is->nframes);
       +                                        kd->name = _Strdup2(L(Lfr), s);
       +                                        free(s);
       +                                }
       +                                kd->marginw = auintval(tok, Amarginwidth, 0);
       +                                kd->marginh = auintval(tok, Amarginheight, 0);
       +                                kd->framebd = auintval(tok, Aframeborder, 1);
       +                                kd->flags = atabval(tok, Ascrolling, fscroll_tab, NFSCROLLTAB, kd->flags);
       +                                norsz = aflagval(tok, Anoresize);
       +                                if(norsz)
       +                                        kd->flags |= FRnoresize;
       +                                break;
       +
       +                        // <!ELEMENT FRAMESET - - (FRAME|FRAMESET)+>
       +                        case Tframeset:
       +                                ks = newkidinfo(1, nil);
       +                                pks = is->kidstk;
       +                                if(pks == nil)
       +                                        di->kidinfo = ks;
       +                                else  {
       +                                        ks->next = pks->kidinfos;
       +                                        pks->kidinfos = ks;
       +                                }
       +                                ks->nextframeset = pks;
       +                                is->kidstk = ks;
       +                                setdimarray(tok, Arows, &ks->rows, &ks->nrows);
       +                                if(ks->nrows == 0) {
       +                                        ks->rows = (Dimen*)emalloc(sizeof(Dimen));
       +                                        ks->nrows = 1;
       +                                        ks->rows[0] = makedimen(Dpercent, 100);
       +                                }
       +                                setdimarray(tok, Acols, &ks->cols, &ks->ncols);
       +                                if(ks->ncols == 0) {
       +                                        ks->cols = (Dimen*)emalloc(sizeof(Dimen));
       +                                        ks->ncols = 1;
       +                                        ks->cols[0] = makedimen(Dpercent, 100);
       +                                }
       +                                break;
       +
       +                        case Tframeset+RBRA:
       +                                if(is->kidstk == nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: unexpected </FRAMESET>\n");
       +                                        continue;
       +                                }
       +                                ks = is->kidstk;
       +                                // put kids back in original order
       +                                // and add blank frames to fill out cells
       +                                n = ks->nrows*ks->ncols;
       +                                nblank = n - _listlen((List*)ks->kidinfos);
       +                                while(nblank-- > 0)
       +                                        ks->kidinfos = newkidinfo(0, ks->kidinfos);
       +                                ks->kidinfos = (Kidinfo*)_revlist((List*)ks->kidinfos);
       +                                is->kidstk = is->kidstk->nextframeset;
       +                                if(is->kidstk == nil) {
       +                                        // end input
       +                                        ans = nil;
       +                                        goto return_ans;
       +                                }
       +                                break;
       +
       +                        // <!ELEMENT H1 - - (%text;)*>, etc.
       +                        case Th1:
       +                        case Th2:
       +                        case Th3:
       +                        case Th4:
       +                        case Th5:
       +                        case Th6:
       +                                bramt = 1;
       +                                if(ps->items == ps->lastit)
       +                                        bramt = 0;
       +                                addbrk(ps, bramt, IFcleft|IFcright);
       +                                sz = Verylarge - (tag - Th1);
       +                                if(sz < Tiny)
       +                                        sz = Tiny;
       +                                pushfontsize(ps, sz);
       +                                sty = top(&ps->fntstylestk, FntR);
       +                                if(tag == Th1)
       +                                        sty = FntB;
       +                                pushfontstyle(ps, sty);
       +                                pushjust(ps, atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust));
       +                                ps->skipwhite = 1;
       +                                break;
       +
       +                        case Th1+RBRA:
       +                        case Th2+RBRA:
       +                        case Th3+RBRA:
       +                        case Th4+RBRA:
       +                        case Th5+RBRA:
       +                        case Th6+RBRA:
       +                                addbrk(ps, 1, IFcleft|IFcright);
       +                                popfontsize(ps);
       +                                popfontstyle(ps);
       +                                popjust(ps);
       +                                break;
       +
       +                        case Thead:
       +                                // HTML spec says ignore regular markup in head,
       +                                // but Netscape and IE don't
       +                                // ps.skipping = 1;
       +                                break;
       +
       +                        case Thead+RBRA:
       +                                ps->skipping = 0;
       +                                break;
       +
       +                        // <!ELEMENT HR - O EMPTY>
       +                        case Thr:
       +                                al = atabval(tok, Aalign, align_tab, NALIGNTAB, ALcenter);
       +                                sz = auintval(tok, Asize, HRSZ);
       +                                wd = adimen(tok, Awidth);
       +                                if(dimenkind(wd) == Dnone)
       +                                        wd = makedimen(Dpercent, 100);
       +                                nosh = aflagval(tok, Anoshade);
       +                                additem(ps, newirule(al, sz, nosh, wd), tok);
       +                                addbrk(ps, 0, 0);
       +                                break;
       +
       +                        case Ti:
       +                        case Tcite:
       +                        case Tdfn:
       +                        case Tem:
       +                        case Tvar:
       +                        case Taddress:
       +                                pushfontstyle(ps, FntI);
       +                                break;
       +
       +                        // <!ELEMENT IMG - O EMPTY>
       +                        case Timg:
       +                                map = nil;
       +                                oldcuranchor = ps->curanchor;
       +                                if(_tokaval(tok, Ausemap, &usemap, 0)) {
       +                                        if(!_prefix(L(Lhash), usemap)) {
       +                                                if(warn)
       +                                                        fprint(2, "warning: can't handle non-local map %S\n", usemap);
       +                                        }
       +                                        else {
       +                                                map = getmap(di, usemap+1);
       +                                                if(ps->curanchor == 0) {
       +                                                        di->anchors = newanchor(++is->nanchors, nil, nil, di->target, di->anchors);
       +                                                        ps->curanchor = is->nanchors;
       +                                                }
       +                                        }
       +                                }
       +                                align = atabval(tok, Aalign, align_tab, NALIGNTAB, ALbottom);
       +                                dfltbd = 0;
       +                                if(ps->curanchor != 0)
       +                                        dfltbd = 2;
       +                                src = aurlval(tok, Asrc, nil, di->base);
       +                                if(src == nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: <img> has no src attribute\n");
       +                                        ps->curanchor = oldcuranchor;
       +                                        continue;
       +                                }
       +                                img = newiimage(src,
       +                                                aval(tok, Aalt),
       +                                                align,
       +                                                auintval(tok, Awidth, 0),
       +                                                auintval(tok, Aheight, 0),
       +                                                auintval(tok, Ahspace, IMGHSPACE),
       +                                                auintval(tok, Avspace, IMGVSPACE),
       +                                                auintval(tok, Aborder, dfltbd),
       +                                                aflagval(tok, Aismap),
       +                                                map);
       +                                if(align == ALleft || align == ALright) {
       +                                        additem(ps, newifloat(img, align), tok);
       +                                        // if no hspace specified, use FLTIMGHSPACE
       +                                        if(!_tokaval(tok, Ahspace, &val, 0))
       +                                                ((Iimage*)img)->hspace = FLTIMGHSPACE;
       +                                }
       +                                else {
       +                                        ps->skipwhite = 0;
       +                                        additem(ps, img, tok);
       +                                }
       +                                if(!ps->skipping) {
       +                                        ((Iimage*)img)->nextimage = di->images;
       +                                        di->images = (Iimage*)img;
       +                                }
       +                                ps->curanchor = oldcuranchor;
       +                                break;
       +
       +                        // <!ELEMENT INPUT - O EMPTY>
       +                        case Tinput:
       +                                ps->skipwhite = 0;
       +                                if(is->curform == nil) {
       +                                        if(warn)
       +                                                fprint(2, "<INPUT> not inside <FORM>\n");
       +                                        continue;
       +                                }
       +                                is->curform->fields = field = newformfield(
       +                                                atabval(tok, Atype, input_tab, NINPUTTAB, Ftext),
       +                                                ++is->curform->nfields,
       +                                                is->curform,
       +                                                aval(tok, Aname),
       +                                                aval(tok, Avalue),
       +                                                auintval(tok, Asize, 0),
       +                                                auintval(tok, Amaxlength, 1000),
       +                                                is->curform->fields);
       +                                if(aflagval(tok, Achecked))
       +                                        field->flags = FFchecked;
       +
       +                                switch(field->ftype) {
       +                                case Ftext:
       +                                case Fpassword:
       +                                case Ffile:
       +                                        if(field->size == 0)
       +                                                field->size = 20;
       +                                        break;
       +
       +                                case Fcheckbox:
       +                                        if(field->name == nil) {
       +                                                if(warn)
       +                                                        fprint(2, "warning: checkbox form field missing name\n");
       +                                                continue;
       +                                        }
       +                                        if(field->value == nil)
       +                                                field->value = _Strdup(L(Lone));
       +                                        break;
       +
       +                                case Fradio:
       +                                        if(field->name == nil || field->value == nil) {
       +                                                if(warn)
       +                                                        fprint(2, "warning: radio form field missing name or value\n");
       +                                                continue;
       +                                        }
       +                                        break;
       +
       +                                case Fsubmit:
       +                                        if(field->value == nil)
       +                                                field->value = _Strdup(L(Lsubmit));
       +                                        if(field->name == nil)
       +                                                field->name = _Strdup(L(Lnoname));
       +                                        break;
       +
       +                                case Fimage:
       +                                        src = aurlval(tok, Asrc, nil, di->base);
       +                                        if(src == nil) {
       +                                                if(warn)
       +                                                        fprint(2, "warning: image form field missing src\n");
       +                                                continue;
       +                                        }
       +                                        // width and height attrs aren't specified in HTML 3.2,
       +                                        // but some people provide them and they help avoid
       +                                        // a relayout
       +                                        field->image = newiimage(src,
       +                                                astrval(tok, Aalt, L(Lsubmit)),
       +                                                atabval(tok, Aalign, align_tab, NALIGNTAB, ALbottom),
       +                                                auintval(tok, Awidth, 0), auintval(tok, Aheight, 0),
       +                                                0, 0, 0, 0, nil);
       +                                        ii = (Iimage*)field->image;
       +                                        ii->nextimage = di->images;
       +                                        di->images = ii;
       +                                        break;
       +
       +                                case Freset:
       +                                        if(field->value == nil)
       +                                                field->value = _Strdup(L(Lreset));
       +                                        break;
       +
       +                                case Fbutton:
       +                                        if(field->value == nil)
       +                                                field->value = _Strdup(L(Lspace));
       +                                        break;
       +                                }
       +                                ffit = newiformfield(field);
       +                                additem(ps, ffit, tok);
       +                                if(ffit->genattr != nil)
       +                                        field->events = ffit->genattr->events;
       +                                break;
       +
       +                        // <!ENTITY ISINDEX - O EMPTY>
       +                        case Tisindex:
       +                                ps->skipwhite = 0;
       +                                prompt = astrval(tok, Aprompt, L(Lindex));
       +                                target = atargval(tok, di->target);
       +                                additem(ps, textit(ps, prompt), tok);
       +                                frm = newform(++is->nforms,
       +                                                nil,
       +                                                di->base,
       +                                                target,
       +                                                HGet,
       +                                                di->forms);
       +                                di->forms = frm;
       +                                ff = newformfield(Ftext,
       +                                                1,
       +                                                frm,
       +                                                _Strdup(L(Lisindex)),
       +                                                nil,
       +                                                50,
       +                                                1000,
       +                                                nil);
       +                                frm->fields = ff;
       +                                frm->nfields = 1;
       +                                additem(ps, newiformfield(ff), tok);
       +                                addbrk(ps, 1, 0);
       +                                break;
       +
       +                        // <!ELEMENT LI - O %flow>
       +                        case Tli:
       +                                if(ps->listtypestk.n == 0) {
       +                                        if(warn)
       +                                                fprint(2, "<LI> not in list\n");
       +                                        continue;
       +                                }
       +                                ty = top(&ps->listtypestk, 0);
       +                                ty2 = listtyval(tok, ty);
       +                                if(ty != ty2) {
       +                                        ty = ty2;
       +                                        push(&ps->listtypestk, ty2);
       +                                }
       +                                v = aintval(tok, Avalue, top(&ps->listcntstk, 1));
       +                                if(ty == LTdisc || ty == LTsquare || ty == LTcircle)
       +                                        hang = 10*LISTTAB - 3;
       +                                else
       +                                        hang = 10*LISTTAB - 1;
       +                                changehang(ps, hang);
       +                                addtext(ps, listmark(ty, v));
       +                                push(&ps->listcntstk, v + 1);
       +                                changehang(ps, -hang);
       +                                ps->skipwhite = 1;
       +                                break;
       +
       +                        // <!ELEMENT MAP - - (AREA)+>
       +                        case Tmap:
       +                                if(_tokaval(tok, Aname, &name, 0))
       +                                        is->curmap = getmap(di, name);
       +                                break;
       +
       +                        case Tmap+RBRA:
       +                                map = is->curmap;
       +                                if(map == nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: unexpected </MAP>\n");
       +                                        continue;
       +                                }
       +                                map->areas = (Area*)_revlist((List*)map->areas);
       +                                break;
       +
       +                        case Tmeta:
       +                                if(ps->skipping)
       +                                        continue;
       +                                if(_tokaval(tok, Ahttp_equiv, &equiv, 0)) {
       +                                        val = aval(tok, Acontent);
       +                                        n = _Strlen(equiv);
       +                                        if(!_Strncmpci(equiv, n, L(Lrefresh)))
       +                                                di->refresh = val;
       +                                        else if(!_Strncmpci(equiv, n, L(Lcontent))) {
       +                                                n = _Strlen(val);
       +                                                if(!_Strncmpci(val, n, L(Ljavascript))
       +                                                   || !_Strncmpci(val, n, L(Ljscript1))
       +                                                   || !_Strncmpci(val, n, L(Ljscript)))
       +                                                        di->scripttype = TextJavascript;
       +                                                else {
       +                                                        if(warn)
       +                                                                fprint(2, "unimplemented script type %S\n", val);
       +                                                        di->scripttype = UnknownType;
       +                                                }
       +                                        }
       +                                }
       +                                break;
       +
       +                        // Nobr is NOT in HMTL 4.0, but it is ubiquitous on the web
       +                        case Tnobr:
       +                                ps->skipwhite = 0;
       +                                ps->curstate &= ~IFwrap;
       +                                break;
       +
       +                        case Tnobr+RBRA:
       +                                ps->curstate |= IFwrap;
       +                                break;
       +
       +                        // We do frames, so skip stuff in noframes
       +                        case Tnoframes:
       +                                ps->skipping = 1;
       +                                break;
       +
       +                        case Tnoframes+RBRA:
       +                                ps->skipping = 0;
       +                                break;
       +
       +                        // We do scripts (if enabled), so skip stuff in noscripts
       +                        case Tnoscript:
       +                                if(doscripts)
       +                                        ps->skipping = 1;
       +                                break;
       +
       +                        case Tnoscript+RBRA:
       +                                if(doscripts)
       +                                        ps->skipping = 0;
       +                                break;
       +
       +                        // <!ELEMENT OPTION - O (        //PCDATA)>
       +                        case Toption:
       +                                if(is->curform == nil || is->curform->fields == nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: <OPTION> not in <SELECT>\n");
       +                                        continue;
       +                                }
       +                                field = is->curform->fields;
       +                                if(field->ftype != Fselect) {
       +                                        if(warn)
       +                                                fprint(2, "warning: <OPTION> not in <SELECT>\n");
       +                                        continue;
       +                                }
       +                                val = aval(tok, Avalue);
       +                                option = newoption(aflagval(tok, Aselected), val, nil, field->options);
       +                                field->options = option;
       +                                option->display =  getpcdata(toks, tokslen, &toki);
       +                                if(val == nil)
       +                                        option->value = _Strdup(option->display);
       +                                break;
       +
       +                        // <!ELEMENT P - O (%text)* >
       +                        case Tp:
       +                                pushjust(ps, atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust));
       +                                ps->inpar = 1;
       +                                ps->skipwhite = 1;
       +                                break;
       +
       +                        case Tp+RBRA:
       +                                break;
       +
       +                        // <!ELEMENT PARAM - O EMPTY>
       +                        // Do something when we do applets...
       +                        case Tparam:
       +                                break;
       +
       +                        // <!ELEMENT PRE - - (%text)* -(IMG|BIG|SMALL|SUB|SUP|FONT) >
       +                        case Tpre:
       +                                ps->curstate &= ~IFwrap;
       +                                ps->literal = 1;
       +                                ps->skipwhite = 0;
       +                                pushfontstyle(ps, FntT);
       +                                break;
       +
       +                        case Tpre+RBRA:
       +                                ps->curstate |= IFwrap;
       +                                if(ps->literal) {
       +                                        popfontstyle(ps);
       +                                        ps->literal = 0;
       +                                }
       +                                break;
       +
       +                        // <!ELEMENT SCRIPT - - CDATA>
       +                        case Tscript:
       +                                if(doscripts) {
       +                                        if(!di->hasscripts) {
       +                                                if(di->scripttype == TextJavascript) {
       +                                                        // TODO: initialize script if nec.
       +                                                        // initjscript(di);
       +                                                        di->hasscripts = 1;
       +                                                }
       +                                        }
       +                                }
       +                                if(!di->hasscripts) {
       +                                        if(warn)
       +                                                fprint(2, "warning: <SCRIPT> ignored\n");
       +                                        ps->skipping = 1;
       +                                }
       +                                else {
       +                                        scriptsrc = aurlval(tok, Asrc, nil, di->base);
       +                                        script = nil;
       +                                        if(scriptsrc != nil) {
       +                                                if(warn)
       +                                                        fprint(2, "warning: non-local <SCRIPT> ignored\n");
       +                                                free(scriptsrc);
       +                                        }
       +                                        else {
       +                                                script = getpcdata(toks, tokslen, &toki);
       +                                        }
       +                                        if(script != nil) {
       +                                                if(warn)
       +                                                        fprint(2, "script ignored\n");
       +                                                free(script);
       +                                        }
       +                                }
       +                                break;
       +
       +                        case Tscript+RBRA:
       +                                ps->skipping = 0;
       +                                break;
       +
       +                        // <!ELEMENT SELECT - - (OPTION+)>
       +                        case Tselect:
       +                                if(is->curform == nil) {
       +                                        if(warn)
       +                                                fprint(2, "<SELECT> not inside <FORM>\n");
       +                                        continue;
       +                                }
       +                                field = newformfield(Fselect,
       +                                        ++is->curform->nfields,
       +                                        is->curform,
       +                                        aval(tok, Aname),
       +                                        nil,
       +                                        auintval(tok, Asize, 0),
       +                                        0,
       +                                        is->curform->fields);
       +                                is->curform->fields = field;
       +                                if(aflagval(tok, Amultiple))
       +                                        field->flags = FFmultiple;
       +                                ffit = newiformfield(field);
       +                                additem(ps, ffit, tok);
       +                                if(ffit->genattr != nil)
       +                                        field->events = ffit->genattr->events;
       +                                // throw away stuff until next tag (should be <OPTION>)
       +                                s = getpcdata(toks, tokslen, &toki);
       +                                if(s != nil)
       +                                        free(s);
       +                                break;
       +
       +                        case Tselect+RBRA:
       +                                if(is->curform == nil || is->curform->fields == nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: unexpected </SELECT>\n");
       +                                        continue;
       +                                }
       +                                field = is->curform->fields;
       +                                if(field->ftype != Fselect)
       +                                        continue;
       +                                // put options back in input order
       +                                field->options = (Option*)_revlist((List*)field->options);
       +                                break;
       +
       +                        // <!ELEMENT (STRIKE|U) - - (%text)*>
       +                        case Tstrike:
       +                        case Tu:
       +                                ps->curul = push(&ps->ulstk, (tag==Tstrike)? ULmid : ULunder);
       +                                break;
       +
       +                        case Tstrike+RBRA:
       +                        case Tu+RBRA:
       +                                if(ps->ulstk.n == 0) {
       +                                        if(warn)
       +                                                fprint(2, "warning: unexpected %T\n", tok);
       +                                        continue;
       +                                }
       +                                ps->curul = popretnewtop(&ps->ulstk, ULnone);
       +                                break;
       +
       +                        // <!ELEMENT STYLE - - CDATA>
       +                        case Tstyle:
       +                                if(warn)
       +                                        fprint(2, "warning: unimplemented <STYLE>\n");
       +                                ps->skipping = 1;
       +                                break;
       +
       +                        case Tstyle+RBRA:
       +                                ps->skipping = 0;
       +                                break;
       +
       +                        // <!ELEMENT (SUB|SUP) - - (%text)*>
       +                        case Tsub:
       +                        case Tsup:
       +                                if(tag == Tsub)
       +                                        ps->curvoff += SUBOFF;
       +                                else
       +                                        ps->curvoff -= SUPOFF;
       +                                push(&ps->voffstk, ps->curvoff);
       +                                sz = top(&ps->fntsizestk, Normal);
       +                                pushfontsize(ps, sz - 1);
       +                                break;
       +
       +                        case Tsub+RBRA:
       +                        case Tsup+RBRA:
       +                                if(ps->voffstk.n == 0) {
       +                                        if(warn)
       +                                                fprint(2, "warning: unexpected %T\n", tok);
       +                                        continue;
       +                                }
       +                                ps->curvoff = popretnewtop(&ps->voffstk, 0);
       +                                popfontsize(ps);
       +                                break;
       +
       +                        // <!ELEMENT TABLE - - (CAPTION?, TR+)>
       +                        case Ttable:
       +                                ps->skipwhite = 0;
       +                                tab = newtable(++is->ntables,
       +                                                aalign(tok),
       +                                                adimen(tok, Awidth),
       +                                                aflagval(tok, Aborder), 
       +                                                auintval(tok, Acellspacing, TABSP),
       +                                                auintval(tok, Acellpadding, TABPAD),
       +                                                makebackground(nil, acolorval(tok, Abgcolor, ps->curbg.color)),
       +                                                tok,
       +                                                is->tabstk);
       +                                is->tabstk = tab;
       +                                curtab = tab;
       +                                break;
       +
       +                        case Ttable+RBRA:
       +                                if(curtab == nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: unexpected </TABLE>\n");
       +                                        continue;
       +                                }
       +                                isempty = (curtab->cells == nil);
       +                                if(isempty) {
       +                                        if(warn)
       +                                                fprint(2, "warning: <TABLE> has no cells\n");
       +                                }
       +                                else {
       +                                        ps = finishcell(curtab, ps);
       +                                        if(curtab->rows != nil)
       +                                                curtab->rows->flags = 0;
       +                                        finish_table(curtab);
       +                                }
       +                                ps->skipping = 0;
       +                                if(!isempty) {
       +                                        tabitem = newitable(curtab);
       +                                        al = curtab->align.halign;
       +                                        switch(al) {
       +                                        case ALleft:
       +                                        case ALright:
       +                                                additem(ps, newifloat(tabitem, al), tok);
       +                                                break;
       +                                        default:
       +                                                if(al == ALcenter)
       +                                                        pushjust(ps, ALcenter);
       +                                                addbrk(ps, 0, 0);
       +                                                if(ps->inpar) {
       +                                                        popjust(ps);
       +                                                        ps->inpar = 0;
       +                                                }
       +                                                additem(ps, tabitem, curtab->tabletok);
       +                                                if(al == ALcenter)
       +                                                        popjust(ps);
       +                                                break;
       +                                        }
       +                                }
       +                                if(is->tabstk == nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: table stack is wrong\n");
       +                                }
       +                                else
       +                                        is->tabstk = is->tabstk->next;
       +                                curtab->next = di->tables;
       +                                di->tables = curtab;
       +                                curtab = is->tabstk;
       +                                if(!isempty)
       +                                        addbrk(ps, 0, 0);
       +                                break;
       +
       +                        // <!ELEMENT (TH|TD) - O %body.content>
       +                        // Cells for a row are accumulated in reverse order.
       +                        // We push ps on a stack, and use a new one to accumulate
       +                        // the contents of the cell.
       +                        case Ttd:
       +                        case Tth:
       +                                if(curtab == nil) {
       +                                        if(warn)
       +                                                fprint(2, "%T outside <TABLE>\n", tok);
       +                                        continue;
       +                                }
       +                                if(ps->inpar) {
       +                                        popjust(ps);
       +                                        ps->inpar = 0;
       +                                }
       +                                ps = finishcell(curtab, ps);
       +                                tr = nil;
       +                                if(curtab->rows != nil)
       +                                        tr = curtab->rows;
       +                                if(tr == nil || !tr->flags) {
       +                                        if(warn)
       +                                                fprint(2, "%T outside row\n", tok);
       +                                        tr = newtablerow(makealign(ALnone, ALnone),
       +                                                        makebackground(nil, curtab->background.color),
       +                                                        TFparsing,
       +                                                        curtab->rows);
       +                                        curtab->rows = tr;
       +                                }
       +                                ps = cell_pstate(ps, tag == Tth);
       +                                flags = TFparsing;
       +                                if(aflagval(tok, Anowrap)) {
       +                                        flags |= TFnowrap;
       +                                        ps->curstate &= ~IFwrap;
       +                                }
       +                                if(tag == Tth)
       +                                        flags |= TFisth;
       +                                c = newtablecell(curtab->cells==nil? 1 : curtab->cells->cellid+1,
       +                                                auintval(tok, Arowspan, 1),
       +                                                auintval(tok, Acolspan, 1), 
       +                                                aalign(tok), 
       +                                                adimen(tok, Awidth),
       +                                                auintval(tok, Aheight, 0),
       +                                                makebackground(nil, acolorval(tok, Abgcolor, tr->background.color)),
       +                                                flags,
       +                                                curtab->cells);
       +                                curtab->cells = c;
       +                                ps->curbg = c->background;
       +                                if(c->align.halign == ALnone) {
       +                                        if(tr->align.halign != ALnone)
       +                                                c->align.halign = tr->align.halign;
       +                                        else if(tag == Tth)
       +                                                c->align.halign = ALcenter;
       +                                        else
       +                                                c->align.halign = ALleft;
       +                                }
       +                                if(c->align.valign == ALnone) {
       +                                        if(tr->align.valign != ALnone)
       +                                                c->align.valign = tr->align.valign;
       +                                        else
       +                                                c->align.valign = ALmiddle;
       +                                }
       +                                c->nextinrow = tr->cells;
       +                                tr->cells = c;
       +                                break;
       +
       +                        case Ttd+RBRA:
       +                        case Tth+RBRA:
       +                                if(curtab == nil || curtab->cells == nil) {
       +                                        if(warn)
       +                                                fprint(2, "unexpected %T\n", tok);
       +                                        continue;
       +                                }
       +                                ps = finishcell(curtab, ps);
       +                                break;
       +
       +                        // <!ELEMENT TEXTAREA - - (        //PCDATA)>
       +                        case Ttextarea:
       +                                if(is->curform == nil) {
       +                                        if(warn)
       +                                                fprint(2, "<TEXTAREA> not inside <FORM>\n");
       +                                        continue;
       +                                }
       +                                field = newformfield(Ftextarea,
       +                                        ++is->curform->nfields,
       +                                        is->curform,
       +                                        aval(tok, Aname),
       +                                        nil,
       +                                        0,
       +                                        0,
       +                                        is->curform->fields);
       +                                is->curform->fields = field;
       +                                field->rows = auintval(tok, Arows, 3);
       +                                field->cols = auintval(tok, Acols, 50);
       +                                field->value = getpcdata(toks, tokslen, &toki);
       +                                if(warn && toki < tokslen - 1 && toks[toki + 1].tag != Ttextarea + RBRA)
       +                                        fprint(2, "warning: <TEXTAREA> data ended by %T\n", &toks[toki + 1]);
       +                                ffit = newiformfield(field);
       +                                additem(ps, ffit, tok);
       +                                if(ffit->genattr != nil)
       +                                        field->events = ffit->genattr->events;
       +                                break;
       +
       +                        // <!ELEMENT TITLE - - (        //PCDATA)* -(%head.misc)>
       +                        case Ttitle:
       +                                di->doctitle = getpcdata(toks, tokslen, &toki);
       +                                if(warn && toki < tokslen - 1 && toks[toki + 1].tag != Ttitle + RBRA)
       +                                        fprint(2, "warning: <TITLE> data ended by %T\n", &toks[toki + 1]);
       +                                break;
       +
       +                        // <!ELEMENT TR - O (TH|TD)+>
       +                        // rows are accumulated in reverse order in curtab->rows
       +                        case Ttr:
       +                                if(curtab == nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: <TR> outside <TABLE>\n");
       +                                        continue;
       +                                }
       +                                if(ps->inpar) {
       +                                        popjust(ps);
       +                                        ps->inpar = 0;
       +                                }
       +                                ps = finishcell(curtab, ps);
       +                                if(curtab->rows != nil)
       +                                        curtab->rows->flags = 0;
       +                                curtab->rows = newtablerow(aalign(tok),
       +                                        makebackground(nil, acolorval(tok, Abgcolor, curtab->background.color)),
       +                                        TFparsing,
       +                                        curtab->rows);
       +                                break;
       +
       +                        case Ttr+RBRA:
       +                                if(curtab == nil || curtab->rows == nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: unexpected </TR>\n");
       +                                        continue;
       +                                }
       +                                ps = finishcell(curtab, ps);
       +                                tr = curtab->rows;
       +                                if(tr->cells == nil) {
       +                                        if(warn)
       +                                                fprint(2, "warning: empty row\n");
       +                                        curtab->rows = tr->next;
       +                                        tr->next = nil;
       +                                }
       +                                else
       +                                        tr->flags = 0;
       +                                break;
       +
       +                        // <!ELEMENT (TT|CODE|KBD|SAMP) - - (%text)*>
       +                        case Ttt:
       +                        case Tcode:
       +                        case Tkbd:
       +                        case Tsamp:
       +                                pushfontstyle(ps, FntT);
       +                                break;
       +
       +                        // Tags that have empty action
       +                        case Tabbr:
       +                        case Tabbr+RBRA:
       +                        case Tacronym:
       +                        case Tacronym+RBRA:
       +                        case Tarea+RBRA:
       +                        case Tbase+RBRA:
       +                        case Tbasefont+RBRA:
       +                        case Tbr+RBRA:
       +                        case Tdd+RBRA:
       +                        case Tdt+RBRA:
       +                        case Tframe+RBRA:
       +                        case Thr+RBRA:
       +                        case Thtml:
       +                        case Thtml+RBRA:
       +                        case Timg+RBRA:
       +                        case Tinput+RBRA:
       +                        case Tisindex+RBRA:
       +                        case Tli+RBRA:
       +                        case Tlink:
       +                        case Tlink+RBRA:
       +                        case Tmeta+RBRA:
       +                        case Toption+RBRA:
       +                        case Tparam+RBRA:
       +                        case Ttextarea+RBRA:
       +                        case Ttitle+RBRA:
       +                                break;
       +
       +
       +                        // Tags not implemented
       +                        case Tbdo:
       +                        case Tbdo+RBRA:
       +                        case Tbutton:
       +                        case Tbutton+RBRA:
       +                        case Tdel:
       +                        case Tdel+RBRA:
       +                        case Tfieldset:
       +                        case Tfieldset+RBRA:
       +                        case Tiframe:
       +                        case Tiframe+RBRA:
       +                        case Tins:
       +                        case Tins+RBRA:
       +                        case Tlabel:
       +                        case Tlabel+RBRA:
       +                        case Tlegend:
       +                        case Tlegend+RBRA:
       +                        case Tobject:
       +                        case Tobject+RBRA:
       +                        case Toptgroup:
       +                        case Toptgroup+RBRA:
       +                        case Tspan:
       +                        case Tspan+RBRA:
       +                                if(warn) {
       +                                        if(tag > RBRA)
       +                                                tag -= RBRA;
       +                                        fprint(2, "warning: unimplemented HTML tag: %S\n", tagnames[tag]);
       +                                }
       +                                break;
       +
       +                        default:
       +                                if(warn)
       +                                        fprint(2, "warning: unknown HTML tag: %S\n", tok->text);
       +                                break;
       +                        }
       +        }
       +        // some pages omit trailing </table>
       +        while(curtab != nil) {
       +                if(warn)
       +                        fprint(2, "warning: <TABLE> not closed\n");
       +                if(curtab->cells != nil) {
       +                        ps = finishcell(curtab, ps);
       +                        if(curtab->cells == nil) {
       +                                if(warn)
       +                                        fprint(2, "warning: empty table\n");
       +                        }
       +                        else {
       +                                if(curtab->rows != nil)
       +                                        curtab->rows->flags = 0;
       +                                finish_table(curtab);
       +                                ps->skipping = 0;
       +                                additem(ps, newitable(curtab), curtab->tabletok);
       +                                addbrk(ps, 0, 0);
       +                        }
       +                }
       +                if(is->tabstk != nil)
       +                        is->tabstk = is->tabstk->next;
       +                curtab->next = di->tables;
       +                di->tables = curtab;
       +                curtab = is->tabstk;
       +        }
       +        outerps = lastps(ps);
       +        ans = outerps->items->next;
       +        // note: ans may be nil and di->kids not nil, if there's a frameset!
       +        outerps->items = newispacer(ISPnull);
       +        outerps->lastit = outerps->items;
       +        is->psstk = ps;
       +        if(ans != nil && di->hasscripts) {
       +                // TODO evalscript(nil);
       +                ;
       +        }
       +
       +return_ans:
       +        if(dbgbuild) {
       +                assert(validitems(ans));
       +                if(ans == nil)
       +                        fprint(2, "getitems returning nil\n");
       +                else
       +                        printitems(ans, "getitems returning:");
       +        }
       +        return ans;
       +}
       +
       +// Concatenate together maximal set of Data tokens, starting at toks[toki+1].
       +// Lexer has ensured that there will either be a following non-data token or
       +// we will be at eof.
       +// Return emallocd trimmed concatenation, and update *ptoki to last used toki
       +static Rune*
       +getpcdata(Token* toks, int tokslen, int* ptoki)
       +{
       +        Rune*        ans;
       +        Rune*        p;
       +        Rune*        trimans;
       +        int        anslen;
       +        int        trimanslen;
       +        int        toki;
       +        Token*        tok;
       +
       +        ans = nil;
       +        anslen = 0;
       +        // first find length of answer
       +        toki = (*ptoki) + 1;
       +        while(toki < tokslen) {
       +                tok = &toks[toki];
       +                if(tok->tag == Data) {
       +                        toki++;
       +                        anslen += _Strlen(tok->text);
       +                }
       +                else
       +                        break;
       +        }
       +        // now make up the initial answer
       +        if(anslen > 0) {
       +                ans = _newstr(anslen);
       +                p = ans;
       +                toki = (*ptoki) + 1;
       +                while(toki < tokslen) {
       +                        tok = &toks[toki];
       +                        if(tok->tag == Data) {
       +                                toki++;
       +                                p = _Stradd(p, tok->text, _Strlen(tok->text));
       +                        }
       +                        else
       +                                break;
       +                }
       +                *p = 0;
       +                _trimwhite(ans, anslen, &trimans, &trimanslen);
       +                if(trimanslen != anslen) {
       +                        p = ans;
       +                        ans = _Strndup(trimans, trimanslen);
       +                        free(p);
       +                }
       +        }
       +        *ptoki = toki-1;
       +        return ans;
       +}
       +
       +// If still parsing head of curtab->cells list, finish it off
       +// by transferring the items on the head of psstk to the cell.
       +// Then pop the psstk and return the new psstk.
       +static Pstate*
       +finishcell(Table* curtab, Pstate* psstk)
       +{
       +        Tablecell*        c;
       +        Pstate* psstknext;
       +
       +        c = curtab->cells;
       +        if(c != nil) {
       +                if((c->flags&TFparsing)) {
       +                        psstknext = psstk->next;
       +                        if(psstknext == nil) {
       +                                if(warn)
       +                                        fprint(2, "warning: parse state stack is wrong\n");
       +                        }
       +                        else {
       +                                c->content = psstk->items->next;
       +                                c->flags &= ~TFparsing;
       +                                freepstate(psstk);
       +                                psstk = psstknext;
       +                        }
       +                }
       +        }
       +        return psstk;
       +}
       +
       +// Make a new Pstate for a cell, based on the old pstate, oldps.
       +// Also, put the new ps on the head of the oldps stack.
       +static Pstate*
       +cell_pstate(Pstate* oldps, int ishead)
       +{
       +        Pstate*        ps;
       +        int        sty;
       +
       +        ps = newpstate(oldps);
       +        ps->skipwhite = 1;
       +        ps->curanchor = oldps->curanchor;
       +        copystack(&ps->fntstylestk, &oldps->fntstylestk);
       +        copystack(&ps->fntsizestk, &oldps->fntsizestk);
       +        ps->curfont = oldps->curfont;
       +        ps->curfg = oldps->curfg;
       +        ps->curbg = oldps->curbg;
       +        copystack(&ps->fgstk, &oldps->fgstk);
       +        ps->adjsize = oldps->adjsize;
       +        if(ishead) {
       +                sty = ps->curfont%NumSize;
       +                ps->curfont = FntB*NumSize + sty;
       +        }
       +        return ps;
       +}
       +
       +// Return a new Pstate with default starting state.
       +// Use link to add it to head of a list, if any.
       +static Pstate*
       +newpstate(Pstate* link)
       +{
       +        Pstate*        ps;
       +
       +        ps = (Pstate*)emalloc(sizeof(Pstate));
       +        ps->curfont = DefFnt;
       +        ps->curfg = Black;
       +        ps->curbg.image = nil;
       +        ps->curbg.color = White;
       +        ps->curul = ULnone;
       +        ps->curjust = ALleft;
       +        ps->curstate = IFwrap;
       +        ps->items = newispacer(ISPnull);
       +        ps->lastit = ps->items;
       +        ps->prelastit = nil;
       +        ps->next = link;
       +        return ps;
       +}
       +
       +// Return last Pstate on psl list
       +static Pstate*
       +lastps(Pstate* psl)
       +{
       +        assert(psl != nil);
       +        while(psl->next != nil)
       +                psl = psl->next;
       +        return psl;
       +}
       +
       +// Add it to end of ps item chain, adding in current state from ps.
       +// Also, if tok is not nil, scan it for generic attributes and assign
       +// the genattr field of the item accordingly.
       +static void
       +additem(Pstate* ps, Item* it, Token* tok)
       +{
       +        int        aid;
       +        int        any;
       +        Rune*        i;
       +        Rune*        c;
       +        Rune*        s;
       +        Rune*        t;
       +        Attr*        a;
       +        SEvent*        e;
       +
       +        if(ps->skipping) {
       +                if(warn)
       +                        fprint(2, "warning: skipping item: %I\n", it);
       +                return;
       +        }
       +        it->anchorid = ps->curanchor;
       +        it->state |= ps->curstate;
       +        if(tok != nil) {
       +                any = 0;
       +                i = nil;
       +                c = nil;
       +                s = nil;
       +                t = nil;
       +                e = nil;
       +                for(a = tok->attr; a != nil; a = a->next) {
       +                        aid = a->attid;
       +                        if(!attrinfo[aid])
       +                                continue;
       +                        switch(aid) {
       +                        case Aid:
       +                                i = a->value;
       +                                break;
       +
       +                        case Aclass:
       +                                c = a->value;
       +                                break;
       +
       +                        case Astyle:
       +                                s = a->value;
       +                                break;
       +
       +                        case Atitle:
       +                                t = a->value;
       +                                break;
       +
       +                        default:
       +                                assert(aid >= Aonblur && aid <= Aonunload);
       +                                e = newscriptevent(scriptev[a->attid], a->value, e);
       +                                break;
       +                        }
       +                        a->value = nil;
       +                        any = 1;
       +                }
       +                if(any)
       +                        it->genattr = newgenattr(i, c, s, t, e);
       +        }
       +        ps->curstate &= ~(IFbrk|IFbrksp|IFnobrk|IFcleft|IFcright);
       +        ps->prelastit = ps->lastit;
       +        ps->lastit->next = it;
       +        ps->lastit = it;
       +}
       +
       +// Make a text item out of s,
       +// using current font, foreground, vertical offset and underline state.
       +static Item*
       +textit(Pstate* ps, Rune* s)
       +{
       +        assert(s != nil);
       +        return newitext(s, ps->curfont, ps->curfg, ps->curvoff + Voffbias, ps->curul);
       +}
       +
       +// Add text item or items for s, paying attention to
       +// current font, foreground, baseline offset, underline state,
       +// and literal mode.  Unless we're in literal mode, compress
       +// whitespace to single blank, and, if curstate has a break,
       +// trim any leading whitespace.  Whether in literal mode or not,
       +// turn nonbreaking spaces into spacer items with IFnobrk set.
       +//
       +// In literal mode, break up s at newlines and add breaks instead.
       +// Also replace tabs appropriate number of spaces.
       +// In nonliteral mode, break up the items every 100 or so characters
       +// just to make the layout algorithm not go quadratic.
       +//
       +// addtext assumes ownership of s.
       +static void
       +addtext(Pstate* ps, Rune* s)
       +{
       +        int        n;
       +        int        i;
       +        int        j;
       +        int        k;
       +        int        col;
       +        int        c;
       +        int        nsp;
       +        Item*        it;
       +        Rune*        ss;
       +        Rune*        p;
       +        Rune        buf[SMALLBUFSIZE];
       +
       +        assert(s != nil);
       +        n = runestrlen(s);
       +        i = 0;
       +        j = 0;
       +        if(ps->literal) {
       +                col = 0;
       +                while(i < n) {
       +                        if(s[i] == '\n') {
       +                                if(i > j) {
       +                                        // trim trailing blanks from line
       +                                        for(k = i; k > j; k--)
       +                                                if(s[k - 1] != ' ')
       +                                                        break;
       +                                        if(k > j)
       +                                                additem(ps, textit(ps, _Strndup(s+j, k-j)), nil);
       +                                }
       +                                addlinebrk(ps, 0);
       +                                j = i + 1;
       +                                col = 0;
       +                        }
       +                        else {
       +                                if(s[i] == '\t') {
       +                                        col += i - j;
       +                                        nsp = 8 - (col%8);
       +                                        // make ss = s[j:i] + nsp spaces
       +                                        ss = _newstr(i-j+nsp);
       +                                        p = _Stradd(ss, s+j, i-j);
       +                                        p = _Stradd(p, L(Ltab2space), nsp);
       +                                        *p = 0;
       +                                        additem(ps, textit(ps, ss), nil);
       +                                        col += nsp;
       +                                        j = i + 1;
       +                                }
       +                                else if(s[i] == NBSP) {
       +                                        if(i > j)
       +                                                additem(ps, textit(ps, _Strndup(s+j, i-j)), nil);
       +                                        addnbsp(ps);
       +                                        col += (i - j) + 1;
       +                                        j = i + 1;
       +                                }
       +                        }
       +                        i++;
       +                }
       +                if(i > j) {
       +                        if(j == 0 && i == n) {
       +                                // just transfer s over
       +                                additem(ps, textit(ps, s), nil);
       +                        }
       +                        else {
       +                                additem(ps, textit(ps, _Strndup(s+j, i-j)), nil);
       +                                free(s);
       +                        }
       +                }
       +        }
       +        else {        // not literal mode
       +                if((ps->curstate&IFbrk) || ps->lastit == ps->items)
       +                        while(i < n) {
       +                                c = s[i];
       +                                if(c >= 256 || !isspace(c))
       +                                        break;
       +                                i++;
       +                        }
       +                p = buf;
       +                for(j = i; i < n; i++) {
       +                        assert(p+i-j < buf+SMALLBUFSIZE-1);
       +                        c = s[i];
       +                        if(c == NBSP) {
       +                                if(i > j)
       +                                        p = _Stradd(p, s+j, i-j);
       +                                if(p > buf)
       +                                        additem(ps, textit(ps, _Strndup(buf, p-buf)), nil);
       +                                p = buf;
       +                                addnbsp(ps);
       +                                j = i + 1;
       +                                continue;
       +                        }
       +                        if(c < 256 && isspace(c)) {
       +                                if(i > j)
       +                                        p = _Stradd(p, s+j, i-j);
       +                                *p++ = ' ';
       +                                while(i < n - 1) {
       +                                        c = s[i + 1];
       +                                        if(c >= 256 || !isspace(c))
       +                                                break;
       +                                        i++;
       +                                }
       +                                j = i + 1;
       +                        }
       +                        if(i - j >= 100) {
       +                                p = _Stradd(p, s+j, i+1-j);
       +                                j = i + 1;
       +                        }
       +                        if(p-buf >= 100) {
       +                                additem(ps, textit(ps, _Strndup(buf, p-buf)), nil);
       +                                p = buf;
       +                        }
       +                }
       +                if(i > j && j < n) {
       +                        assert(p+i-j < buf+SMALLBUFSIZE-1);
       +                        p = _Stradd(p, s+j, i-j);
       +                }
       +                // don't add a space if previous item ended in a space
       +                if(p-buf == 1 && buf[0] == ' ' && ps->lastit != nil) {
       +                        it = ps->lastit;
       +                        if(it->tag == Itexttag) {
       +                                ss = ((Itext*)it)->s;
       +                                k = _Strlen(ss);
       +                                if(k > 0 && ss[k] == ' ')
       +                                        p = buf;
       +                        }
       +                }
       +                if(p > buf)
       +                        additem(ps, textit(ps, _Strndup(buf, p-buf)), nil);
       +                free(s);
       +        }
       +}
       +
       +// Add a break to ps->curstate, with extra space if sp is true.
       +// If there was a previous break, combine this one's parameters
       +// with that to make the amt be the max of the two and the clr
       +// be the most general. (amt will be 0 or 1)
       +// Also, if the immediately preceding item was a text item,
       +// trim any whitespace from the end of it, if not in literal mode.
       +// Finally, if this is at the very beginning of the item list
       +// (the only thing there is a null spacer), then don't add the space.
       +static void
       +addbrk(Pstate* ps, int sp, int clr)
       +{
       +        int        state;
       +        Rune*        l;
       +        int                nl;
       +        Rune*        r;
       +        int                nr;
       +        Itext*        t;
       +        Rune*        s;
       +
       +        state = ps->curstate;
       +        clr = clr|(state&(IFcleft|IFcright));
       +        if(sp && !(ps->lastit == ps->items))
       +                sp = IFbrksp;
       +        else
       +                sp = 0;
       +        ps->curstate = IFbrk|sp|(state&~(IFcleft|IFcright))|clr;
       +        if(ps->lastit != ps->items) {
       +                if(!ps->literal && ps->lastit->tag == Itexttag) {
       +                        t = (Itext*)ps->lastit;
       +                        _splitr(t->s, _Strlen(t->s), notwhitespace, &l, &nl, &r, &nr);
       +                        // try to avoid making empty items
       +                        // but not crucial f the occasional one gets through
       +                        if(nl == 0 && ps->prelastit != nil) {
       +                                ps->lastit = ps->prelastit;
       +                                ps->lastit->next = nil;
       +                                ps->prelastit = nil;
       +                        }
       +                        else {
       +                                s = t->s;
       +                                if(nl == 0) {
       +                                        // need a non-nil pointer to empty string
       +                                        // (_Strdup(L(Lempty)) returns nil)
       +                                        t->s = emalloc(sizeof(Rune));
       +                                        t->s[0] = 0;
       +                                }
       +                                else
       +                                        t->s = _Strndup(l, nl);
       +                                if(s)
       +                                        free(s);
       +                        }
       +                }
       +        }
       +}
       +
       +// Add break due to a <br> or a newline within a preformatted section.
       +// We add a null item first, with current font's height and ascent, to make
       +// sure that the current line takes up at least that amount of vertical space.
       +// This ensures that <br>s on empty lines cause blank lines, and that
       +// multiple <br>s in a row give multiple blank lines.
       +// However don't add the spacer if the previous item was something that
       +// takes up space itself.
       +static void
       +addlinebrk(Pstate* ps, int clr)
       +{
       +        int        obrkstate;
       +        int        b;
       +        int        addit;
       +
       +        // don't want break before our null item unless the previous item
       +        // was also a null item for the purposes of line breaking
       +        obrkstate = ps->curstate&(IFbrk|IFbrksp);
       +        b = IFnobrk;
       +        addit = 0;
       +        if(ps->lastit != nil) {
       +                if(ps->lastit->tag == Ispacertag) {
       +                        if(((Ispacer*)ps->lastit)->spkind == ISPvline)
       +                                b = IFbrk;
       +                        addit = 1;
       +                }
       +                else if(ps->lastit->tag == Ifloattag)
       +                        addit = 1;
       +        }
       +        if(addit) {
       +                ps->curstate = (ps->curstate&~(IFbrk|IFbrksp))|b;
       +                additem(ps, newispacer(ISPvline), nil);
       +                ps->curstate = (ps->curstate&~(IFbrk|IFbrksp))|obrkstate;
       +        }
       +        addbrk(ps, 0, clr);
       +}
       +
       +// Add a nonbreakable space
       +static void
       +addnbsp(Pstate* ps)
       +{
       +        // if nbsp comes right where a break was specified,
       +        // do the break anyway (nbsp is being used to generate undiscardable
       +        // space rather than to prevent a break)
       +        if((ps->curstate&IFbrk) == 0)
       +                ps->curstate |= IFnobrk;
       +        additem(ps, newispacer(ISPhspace), nil);
       +        // but definitely no break on next item
       +        ps->curstate |= IFnobrk;
       +}
       +
       +// Change hang in ps.curstate by delta.
       +// The amount is in 1/10ths of tabs, and is the amount that
       +// the current contiguous set of items with a hang value set
       +// is to be shifted left from its normal (indented) place.
       +static void
       +changehang(Pstate* ps, int delta)
       +{
       +        int        amt;
       +
       +        amt = (ps->curstate&IFhangmask) + delta;
       +        if(amt < 0) {
       +                if(warn)
       +                        fprint(2, "warning: hang went negative\n");
       +                amt = 0;
       +        }
       +        ps->curstate = (ps->curstate&~IFhangmask)|amt;
       +}
       +
       +// Change indent in ps.curstate by delta.
       +static void
       +changeindent(Pstate* ps, int delta)
       +{
       +        int        amt;
       +
       +        amt = ((ps->curstate&IFindentmask) >> IFindentshift) + delta;
       +        if(amt < 0) {
       +                if(warn)
       +                        fprint(2, "warning: indent went negative\n");
       +                amt = 0;
       +        }
       +        ps->curstate = (ps->curstate&~IFindentmask)|(amt << IFindentshift);
       +}
       +
       +// Push val on top of stack, and also return value pushed
       +static int
       +push(Stack* stk, int val)
       +{
       +        if(stk->n == Nestmax) {
       +                if(warn)
       +                        fprint(2, "warning: build stack overflow\n");
       +        }
       +        else
       +                stk->slots[stk->n++] = val;
       +        return val;
       +}
       +
       +// Pop top of stack
       +static void
       +pop(Stack* stk)
       +{
       +        if(stk->n > 0)
       +                --stk->n;
       +}
       +
       +//Return top of stack, using dflt if stack is empty
       +static int
       +top(Stack* stk, int dflt)
       +{
       +        if(stk->n == 0)
       +                return dflt;
       +        return stk->slots[stk->n-1];
       +}
       +
       +// pop, then return new top, with dflt if empty
       +static int
       +popretnewtop(Stack* stk, int dflt)
       +{
       +        if(stk->n == 0)
       +                return dflt;
       +        stk->n--;
       +        if(stk->n == 0)
       +                return dflt;
       +        return stk->slots[stk->n-1];
       +}
       +
       +// Copy fromstk entries into tostk
       +static void
       +copystack(Stack* tostk, Stack* fromstk)
       +{
       +        int n;
       +
       +        n = fromstk->n;
       +        tostk->n = n;
       +        memmove(tostk->slots, fromstk->slots, n*sizeof(int));
       +}
       +
       +static void
       +popfontstyle(Pstate* ps)
       +{
       +        pop(&ps->fntstylestk);
       +        setcurfont(ps);
       +}
       +
       +static void
       +pushfontstyle(Pstate* ps, int sty)
       +{
       +        push(&ps->fntstylestk, sty);
       +        setcurfont(ps);
       +}
       +
       +static void
       +popfontsize(Pstate* ps)
       +{
       +        pop(&ps->fntsizestk);
       +        setcurfont(ps);
       +}
       +
       +static void
       +pushfontsize(Pstate* ps, int sz)
       +{
       +        push(&ps->fntsizestk, sz);
       +        setcurfont(ps);
       +}
       +
       +static void
       +setcurfont(Pstate* ps)
       +{
       +        int        sty;
       +        int        sz;
       +
       +        sty = top(&ps->fntstylestk, FntR);
       +        sz = top(&ps->fntsizestk, Normal);
       +        if(sz < Tiny)
       +                sz = Tiny;
       +        if(sz > Verylarge)
       +                sz = Verylarge;
       +        ps->curfont = sty*NumSize + sz;
       +}
       +
       +static void
       +popjust(Pstate* ps)
       +{
       +        pop(&ps->juststk);
       +        setcurjust(ps);
       +}
       +
       +static void
       +pushjust(Pstate* ps, int j)
       +{
       +        push(&ps->juststk, j);
       +        setcurjust(ps);
       +}
       +
       +static void
       +setcurjust(Pstate* ps)
       +{
       +        int        j;
       +        int        state;
       +
       +        j = top(&ps->juststk, ALleft);
       +        if(j != ps->curjust) {
       +                ps->curjust = j;
       +                state = ps->curstate;
       +                state &= ~(IFrjust|IFcjust);
       +                if(j == ALcenter)
       +                        state |= IFcjust;
       +                else if(j == ALright)
       +                        state |= IFrjust;
       +                ps->curstate = state;
       +        }
       +}
       +
       +// Do final rearrangement after table parsing is finished
       +// and assign cells to grid points
       +static void
       +finish_table(Table* t)
       +{
       +        int        ncol;
       +        int        nrow;
       +        int        r;
       +        Tablerow*        rl;
       +        Tablecell*        cl;
       +        int*        rowspancnt;
       +        Tablecell**        rowspancell;
       +        int        ri;
       +        int        ci;
       +        Tablecell*        c;
       +        Tablecell*        cnext;
       +        Tablerow*        row;
       +        Tablerow*        rownext;
       +        int        rcols;
       +        int        newncol;
       +        int        k;
       +        int        j;
       +        int        cspan;
       +        int        rspan;
       +        int        i;
       +
       +        rl = t->rows;
       +        t->nrow = nrow = _listlen((List*)rl);
       +        t->rows = (Tablerow*)emalloc(nrow * sizeof(Tablerow));
       +        ncol = 0;
       +        r = nrow - 1;
       +        for(row = rl; row != nil; row = rownext) {
       +                // copy the data from the allocated Tablerow into the array slot
       +                t->rows[r] = *row;
       +                rownext = row->next;
       +                row = &t->rows[r];
       +                r--;
       +                rcols = 0;
       +                c = row->cells;
       +
       +                // If rowspan is > 1 but this is the last row,
       +                // reset the rowspan
       +                if(c != nil && c->rowspan > 1 && r == nrow-2)
       +                                c->rowspan = 1;
       +
       +                // reverse row->cells list (along nextinrow pointers)
       +                row->cells = nil;
       +                while(c != nil) {
       +                        cnext = c->nextinrow;
       +                        c->nextinrow = row->cells;
       +                        row->cells = c;
       +                        rcols += c->colspan;
       +                        c = cnext;
       +                }
       +                if(rcols > ncol)
       +                        ncol = rcols;
       +        }
       +        t->ncol = ncol;
       +        t->cols = (Tablecol*)emalloc(ncol * sizeof(Tablecol));
       +
       +        // Reverse cells just so they are drawn in source order.
       +        // Also, trim their contents so they don't end in whitespace.
       +        t->cells = (Tablecell*)_revlist((List*)t->cells);
       +        for(c = t->cells; c != nil; c= c->next)
       +                trim_cell(c);
       +        t->grid = (Tablecell***)emalloc(nrow * sizeof(Tablecell**));
       +        for(i = 0; i < nrow; i++)
       +                t->grid[i] = (Tablecell**)emalloc(ncol * sizeof(Tablecell*));
       +
       +        // The following arrays keep track of cells that are spanning
       +        // multiple rows;  rowspancnt[i] is the number of rows left
       +        // to be spanned in column i.
       +        // When done, cell's (row,col) is upper left grid point.
       +        rowspancnt = (int*)emalloc(ncol * sizeof(int));
       +        rowspancell = (Tablecell**)emalloc(ncol * sizeof(Tablecell*));
       +        for(ri = 0; ri < nrow; ri++) {
       +                row = &t->rows[ri];
       +                cl = row->cells;
       +                ci = 0;
       +                while(ci < ncol || cl != nil) {
       +                        if(ci < ncol && rowspancnt[ci] > 0) {
       +                                t->grid[ri][ci] = rowspancell[ci];
       +                                rowspancnt[ci]--;
       +                                ci++;
       +                        }
       +                        else {
       +                                if(cl == nil) {
       +                                        ci++;
       +                                        continue;
       +                                }
       +                                c = cl;
       +                                cl = cl->nextinrow;
       +                                cspan = c->colspan;
       +                                rspan = c->rowspan;
       +                                if(ci + cspan > ncol) {
       +                                        // because of row spanning, we calculated
       +                                        // ncol incorrectly; adjust it
       +                                        newncol = ci + cspan;
       +                                        t->cols = (Tablecol*)erealloc(t->cols, newncol * sizeof(Tablecol));
       +                                        rowspancnt = (int*)erealloc(rowspancnt, newncol * sizeof(int));
       +                                        rowspancell = (Tablecell**)erealloc(rowspancell, newncol * sizeof(Tablecell*));
       +                                        k = newncol-ncol;
       +                                        memset(t->cols+ncol, 0, k*sizeof(Tablecol));
       +                                        memset(rowspancnt+ncol, 0, k*sizeof(int));
       +                                        memset(rowspancell+ncol, 0, k*sizeof(Tablecell*));
       +                                        for(j = 0; j < nrow; j++) {
       +                                                t->grid[j] = (Tablecell**)erealloc(t->grid[j], newncol * sizeof(Tablecell*));
       +                                                memset(t->grid[j], 0, k*sizeof(Tablecell*));
       +                                        }
       +                                        t->ncol = ncol = newncol;
       +                                }
       +                                c->row = ri;
       +                                c->col = ci;
       +                                for(i = 0; i < cspan; i++) {
       +                                        t->grid[ri][ci] = c;
       +                                        if(rspan > 1) {
       +                                                rowspancnt[ci] = rspan - 1;
       +                                                rowspancell[ci] = c;
       +                                        }
       +                                        ci++;
       +                                }
       +                        }
       +                }
       +        }
       +}
       +
       +// Remove tail of cell content until it isn't whitespace.
       +static void
       +trim_cell(Tablecell* c)
       +{
       +        int        dropping;
       +        Rune*        s;
       +        Rune*        x;
       +        Rune*        y;
       +        int                nx;
       +        int                ny;
       +        Item*        p;
       +        Itext*        q;
       +        Item*        pprev;
       +
       +        dropping = 1;
       +        while(c->content != nil && dropping) {
       +                p = c->content;
       +                pprev = nil;
       +                while(p->next != nil) {
       +                        pprev = p;
       +                        p = p->next;
       +                }
       +                dropping = 0;
       +                if(!(p->state&IFnobrk)) {
       +                        if(p->tag == Itexttag) {
       +                                q = (Itext*)p;
       +                                s = q->s;
       +                                _splitr(s, _Strlen(s), notwhitespace, &x, &nx, &y, &ny);
       +                                if(nx != 0 && ny != 0) {
       +                                        q->s = _Strndup(x, nx);
       +                                        free(s);
       +                                }
       +                                break;
       +                        }
       +                }
       +                if(dropping) {
       +                        if(pprev == nil)
       +                                c->content = nil;
       +                        else
       +                                pprev->next = nil;
       +                        freeitem(p);
       +                }
       +        }
       +}
       +
       +// Caller must free answer (eventually).
       +static Rune*
       +listmark(uchar ty, int n)
       +{
       +        Rune*        s;
       +        Rune*        t;
       +        int        n2;
       +        int        i;
       +
       +        s = nil;
       +        switch(ty) {
       +        case LTdisc:
       +        case LTsquare:
       +        case LTcircle:
       +                s = _newstr(1);
       +                s[0] = (ty == LTdisc)? 0x2022                // bullet
       +                        : ((ty == LTsquare)? 0x220e        // filled square
       +                            : 0x2218);                                // degree
       +                s[1] = 0;
       +                break;
       +
       +        case LT1:
       +                t = _ltoStr(n);
       +                n2 = _Strlen(t);
       +                s = _newstr(n2+1);
       +                t = _Stradd(s, t, n2);
       +                *t++ = '.';
       +                *t = 0;
       +                break;
       +
       +        case LTa:
       +        case LTA:
       +                n--;
       +                i = 0;
       +                if(n < 0)
       +                        n = 0;
       +                s = _newstr((n <= 25)? 2 : 3);
       +                if(n > 25) {
       +                        n2 = n%26;
       +                        n /= 26;
       +                        if(n2 > 25)
       +                                n2 = 25;
       +                        s[i++] = n2 + (ty == LTa)? 'a' : 'A';
       +                }
       +                s[i++] = n + (ty == LTa)? 'a' : 'A';
       +                s[i++] = '.';
       +                s[i] = 0;
       +                break;
       +
       +        case LTi:
       +        case LTI:
       +                if(n >= NROMAN) {
       +                        if(warn)
       +                                fprint(2, "warning: unimplemented roman number > %d\n", NROMAN);
       +                        n = NROMAN;
       +                }
       +                t = roman[n - 1];
       +                n2 = _Strlen(t);
       +                s = _newstr(n2+1);
       +                for(i = 0; i < n2; i++)
       +                        s[i] = (ty == LTi)? tolower(t[i]) : t[i];
       +                s[i++] = '.';
       +                s[i] = 0;
       +                break;
       +        }
       +        return s;
       +}
       +
       +// Find map with given name in di.maps.
       +// If not there, add one, copying name.
       +// Ownership of map remains with di->maps list.
       +static Map*
       +getmap(Docinfo* di, Rune* name)
       +{
       +        Map*        m;
       +
       +        for(m = di->maps; m != nil; m = m->next) {
       +                if(!_Strcmp(name, m->name))
       +                        return m;
       +        }
       +        m = (Map*)emalloc(sizeof(Map));
       +        m->name = _Strdup(name);
       +        m->areas = nil;
       +        m->next = di->maps;
       +        di->maps = m;
       +        return m;
       +}
       +
       +// Transfers ownership of href to Area
       +static Area*
       +newarea(int shape, Rune* href, int target, Area* link)
       +{
       +        Area* a;
       +
       +        a = (Area*)emalloc(sizeof(Area));
       +        a->shape = shape;
       +        a->href = href;
       +        a->target = target;
       +        a->next = link;
       +        return a;
       +}
       +
       +// Return string value associated with attid in tok, nil if none.
       +// Caller must free the result (eventually).
       +static Rune*
       +aval(Token* tok, int attid)
       +{
       +        Rune*        ans;
       +
       +        _tokaval(tok, attid, &ans, 1);        // transfers string ownership from token to ans
       +        return ans;
       +}
       +
       +// Like aval, but use dflt if there was no such attribute in tok.
       +// Caller must free the result (eventually).
       +static Rune*
       +astrval(Token* tok, int attid, Rune* dflt)
       +{
       +        Rune*        ans;
       +
       +        if(_tokaval(tok, attid, &ans, 1))
       +                return ans;        // transfers string ownership from token to ans
       +        else
       +                return _Strdup(dflt);
       +}
       +
       +// Here we're supposed to convert to an int,
       +// and have a default when not found
       +static int
       +aintval(Token* tok, int attid, int dflt)
       +{
       +        Rune*        ans;
       +
       +        if(!_tokaval(tok, attid, &ans, 0) || ans == nil)
       +                return dflt;
       +        else
       +                return toint(ans);
       +}
       +
       +// Like aintval, but result should be >= 0
       +static int
       +auintval(Token* tok, int attid, int dflt)
       +{
       +        Rune* ans;
       +        int v;
       +
       +        if(!_tokaval(tok, attid, &ans, 0) || ans == nil)
       +                return dflt;
       +        else {
       +                v = toint(ans);
       +                return v >= 0? v : 0;
       +        }
       +}
       +
       +// int conversion, but with possible error check (if warning)
       +static int
       +toint(Rune* s)
       +{
       +        int ans;
       +        Rune* eptr;
       +
       +        ans = _Strtol(s, &eptr, 10);
       +        if(warn) {
       +                if(*eptr != 0) {
       +                        eptr = _Strclass(eptr, notwhitespace);
       +                        if(eptr != nil)
       +                                fprint(2, "warning: expected integer, got %S\n", s);
       +                }
       +        }
       +        return ans;
       +}
       +
       +// Attribute value when need a table to convert strings to ints
       +static int
       +atabval(Token* tok, int attid, StringInt* tab, int ntab, int dflt)
       +{
       +        Rune*        aval;
       +        int        ans;
       +
       +        ans = dflt;
       +        if(_tokaval(tok, attid, &aval, 0)) {
       +                if(!_lookup(tab, ntab, aval, _Strlen(aval), &ans)) {
       +                        ans = dflt;
       +                        if(warn)
       +                                fprint(2, "warning: name not found in table lookup: %S\n", aval);
       +                }
       +        }
       +        return ans;
       +}
       +
       +// Attribute value when supposed to be a color
       +static int
       +acolorval(Token* tok, int attid, int dflt)
       +{
       +        Rune*        aval;
       +        int        ans;
       +
       +        ans = dflt;
       +        if(_tokaval(tok, attid, &aval, 0))
       +                ans = color(aval, dflt);
       +        return ans;
       +}
       +
       +// Attribute value when supposed to be a target frame name
       +static int
       +atargval(Token* tok, int dflt)
       +{
       +        int        ans;
       +        Rune*        aval;
       +
       +        ans = dflt;
       +        if(_tokaval(tok, Atarget, &aval, 0)){
       +                ans = targetid(aval);
       +        }
       +        return ans;
       +}
       +
       +// special for list types, where "i" and "I" are different,
       +// but "square" and "SQUARE" are the same
       +static int
       +listtyval(Token* tok, int dflt)
       +{
       +        Rune*        aval;
       +        int        ans;
       +        int        n;
       +
       +        ans = dflt;
       +        if(_tokaval(tok, Atype, &aval, 0)) {
       +                n = _Strlen(aval);
       +                if(n == 1) {
       +                        switch(aval[0]) {
       +                        case '1':
       +                                ans = LT1;
       +                                break;
       +                        case 'A':
       +                                ans = LTA;
       +                                break;
       +                        case 'I':
       +                                ans = LTI;
       +                                break;
       +                        case 'a':
       +                                ans = LTa;
       +                                break;
       +                        case 'i':
       +                                ans = LTi;
       +                        default:
       +                                if(warn)
       +                                        fprint(2, "warning: unknown list element type %c\n", aval[0]);
       +                        }
       +                }
       +                else {
       +                        if(!_Strncmpci(aval, n, L(Lcircle)))
       +                                ans = LTcircle;
       +                        else if(!_Strncmpci(aval, n, L(Ldisc)))
       +                                ans = LTdisc;
       +                        else if(!_Strncmpci(aval, n, L(Lsquare)))
       +                                ans = LTsquare;
       +                        else {
       +                                if(warn)
       +                                        fprint(2, "warning: unknown list element type %S\n", aval);
       +                        }
       +                }
       +        }
       +        return ans;
       +}
       +
       +// Attribute value when value is a URL, possibly relative to base.
       +// FOR NOW: leave the url relative.
       +// Caller must free the result (eventually).
       +static Rune*
       +aurlval(Token* tok, int attid, Rune* dflt, Rune* base)
       +{
       +        Rune*        ans;
       +        Rune*        url;
       +
       +        USED(base);
       +        ans = nil;
       +        if(_tokaval(tok, attid, &url, 0) && url != nil)
       +                ans = removeallwhite(url);
       +        if(ans == nil)
       +                ans = _Strdup(dflt);
       +        return ans;
       +}
       +
       +// Return copy of s but with all whitespace (even internal) removed.
       +// This fixes some buggy URL specification strings.
       +static Rune*
       +removeallwhite(Rune* s)
       +{
       +        int        j;
       +        int        n;
       +        int        i;
       +        int        c;
       +        Rune*        ans;
       +
       +        j = 0;
       +        n = _Strlen(s);
       +        for(i = 0; i < n; i++) {
       +                c = s[i];
       +                if(c >= 256 || !isspace(c))
       +                        j++;
       +        }
       +        if(j < n) {
       +                ans = _newstr(j);
       +                j = 0;
       +                for(i = 0; i < n; i++) {
       +                        c = s[i];
       +                        if(c >= 256 || !isspace(c))
       +                                ans[j++] = c;
       +                }
       +                ans[j] = 0;
       +        }
       +        else
       +                ans = _Strdup(s);
       +        return ans;
       +}
       +
       +// Attribute value when mere presence of attr implies value of 1,
       +// but if there is an integer there, return it as the value.
       +static int
       +aflagval(Token* tok, int attid)
       +{
       +        int        val;
       +        Rune*        sval;
       +
       +        val = 0;
       +        if(_tokaval(tok, attid, &sval, 0)) {
       +                val = 1;
       +                if(sval != nil)
       +                        val = toint(sval);
       +        }
       +        return val;
       +}
       +
       +static Align
       +makealign(int halign, int valign)
       +{
       +        Align        al;
       +
       +        al.halign = halign;
       +        al.valign = valign;
       +        return al;
       +}
       +
       +// Make an Align (two alignments, horizontal and vertical)
       +static Align
       +aalign(Token* tok)
       +{
       +        return makealign(
       +                atabval(tok, Aalign, align_tab, NALIGNTAB, ALnone),
       +                atabval(tok, Avalign, align_tab, NALIGNTAB, ALnone));
       +}
       +
       +// Make a Dimen, based on value of attid attr
       +static Dimen
       +adimen(Token* tok, int attid)
       +{
       +        Rune*        wd;
       +
       +        if(_tokaval(tok, attid, &wd, 0))
       +                return parsedim(wd, _Strlen(wd));
       +        else
       +                return makedimen(Dnone, 0);
       +}
       +
       +// Parse s[0:n] as num[.[num]][unit][%|*]
       +static Dimen
       +parsedim(Rune* s, int ns)
       +{
       +        int        kind;
       +        int        spec;
       +        Rune*        l;
       +        int        nl;
       +        Rune*        r;
       +        int        nr;
       +        int        mul;
       +        int        i;
       +        Rune*        f;
       +        int        nf;
       +        int        Tkdpi;
       +        Rune*        units;
       +
       +        kind = Dnone;
       +        spec = 0;
       +        _splitl(s, ns, L(Lnot0to9), &l, &nl, &r, &nr);
       +        if(nl != 0) {
       +                spec = 1000*_Strtol(l, nil, 10);
       +                if(nr > 0 && r[0] == '.') {
       +                        _splitl(r+1, nr-1, L(Lnot0to9), &f, &nf, &r, &nr);
       +                        if(nf != 0) {
       +                                mul = 100;
       +                                for(i = 0; i < nf; i++) {
       +                                        spec = spec + mul*(f[i]-'0');
       +                                        mul = mul/10;
       +                                }
       +                        }
       +                }
       +                kind = Dpixels;
       +                if(nr != 0) {
       +                        if(nr >= 2) {
       +                                Tkdpi = 100;
       +                                units = r;
       +                                r = r+2;
       +                                nr -= 2;
       +                                if(!_Strncmpci(units, 2, L(Lpt)))
       +                                        spec = (spec*Tkdpi)/72;
       +                                else if(!_Strncmpci(units, 2, L(Lpi)))
       +                                        spec = (spec*12*Tkdpi)/72;
       +                                else if(!_Strncmpci(units, 2, L(Lin)))
       +                                        spec = spec*Tkdpi;
       +                                else if(!_Strncmpci(units, 2, L(Lcm)))
       +                                        spec = (spec*100*Tkdpi)/254;
       +                                else if(!_Strncmpci(units, 2, L(Lmm)))
       +                                        spec = (spec*10*Tkdpi)/254;
       +                                else if(!_Strncmpci(units, 2, L(Lem)))
       +                                        spec = spec*15;
       +                                else {
       +                                        if(warn)
       +                                                fprint(2, "warning: unknown units %C%Cs\n", units[0], units[1]);
       +                                }
       +                        }
       +                        if(nr >= 1) {
       +                                if(r[0] == '%')
       +                                        kind = Dpercent;
       +                                else if(r[0] == '*')
       +                                        kind = Drelative;
       +                        }
       +                }
       +                spec = spec/1000;
       +        }
       +        else if(nr == 1 && r[0] == '*') {
       +                spec = 1;
       +                kind = Drelative;
       +        }
       +        return makedimen(kind, spec);
       +}
       +
       +static void
       +setdimarray(Token* tok, int attid, Dimen** pans, int* panslen)
       +{
       +        Rune*        s;
       +        Dimen*        d;
       +        int        k;
       +        int        nc;
       +        Rune* a[SMALLBUFSIZE];
       +        int        an[SMALLBUFSIZE];
       +
       +        if(_tokaval(tok, attid, &s, 0)) {
       +                nc = _splitall(s, _Strlen(s), L(Lcommaspace), a, an, SMALLBUFSIZE);
       +                if(nc > 0) {
       +                        d = (Dimen*)emalloc(nc * sizeof(Dimen));
       +                        for(k = 0; k < nc; k++) {
       +                                d[k] = parsedim(a[k], an[k]);
       +                        }
       +                        *pans = d;
       +                        *panslen = nc;
       +                        return;
       +                }
       +        }
       +        *pans = nil;
       +        *panslen = 0;
       +}
       +
       +static Background
       +makebackground(Rune* imageurl, int color)
       +{
       +        Background bg;
       +
       +        bg.image = imageurl;
       +        bg.color = color;
       +        return bg;
       +}
       +
       +static Item*
       +newitext(Rune* s, int fnt, int fg, int voff, int ul)
       +{
       +        Itext* t;
       +
       +        assert(s != nil);
       +        t = (Itext*)emalloc(sizeof(Itext));
       +        t->item.tag = Itexttag;
       +        t->s = s;
       +        t->fnt = fnt;
       +        t->fg = fg;
       +        t->voff = voff;
       +        t->ul = ul;
       +        return (Item*)t;
       +}
       +
       +static Item*
       +newirule(int align, int size, int noshade, Dimen wspec)
       +{
       +        Irule* r;
       +
       +        r = (Irule*)emalloc(sizeof(Irule));
       +        r->item.tag = Iruletag;
       +        r->align = align;
       +        r->size = size;
       +        r->noshade = noshade;
       +        r->wspec = wspec;
       +        return (Item*)r;
       +}
       +
       +// Map is owned elsewhere.
       +static Item*
       +newiimage(Rune* src, Rune* altrep, int align, int width, int height,
       +                int hspace, int vspace, int border, int ismap, Map* map)
       +{
       +        Iimage* i;
       +        int        state;
       +
       +        state = 0;
       +        if(ismap)
       +                state = IFsmap;
       +        i = (Iimage*)emalloc(sizeof(Iimage));
       +        i->item.tag = Iimagetag;
       +        i->item.state = state;
       +        i->imsrc = src;
       +        i->altrep = altrep;
       +        i->align = align;
       +        i->imwidth = width;
       +        i->imheight = height;
       +        i->hspace = hspace;
       +        i->vspace = vspace;
       +        i->border = border;
       +        i->map = map;
       +        i->ctlid = -1;
       +        return (Item*)i;
       +}
       +
       +static Item*
       +newiformfield(Formfield* ff)
       +{
       +        Iformfield* f;
       +
       +        f = (Iformfield*)emalloc(sizeof(Iformfield));
       +        f->item.tag = Iformfieldtag;
       +        f->formfield = ff;
       +        return (Item*)f;
       +}
       +
       +static Item*
       +newitable(Table* tab)
       +{
       +        Itable* t;
       +
       +        t = (Itable*)emalloc(sizeof(Itable));
       +        t->item.tag = Itabletag;
       +        t->table = tab;
       +        return (Item*)t;
       +}
       +
       +static Item*
       +newifloat(Item* it, int side)
       +{
       +        Ifloat* f;
       +
       +        f = (Ifloat*)emalloc(sizeof(Ifloat));
       +        f->_item.tag = Ifloattag;
       +        f->_item.state = IFwrap;
       +        f->item = it;
       +        f->side = side;
       +        return (Item*)f;
       +}
       +
       +static Item*
       +newispacer(int spkind)
       +{
       +        Ispacer* s;
       +
       +        s = (Ispacer*)emalloc(sizeof(Ispacer));
       +        s->item.tag = Ispacertag;
       +        s->spkind = spkind;
       +        return (Item*)s;
       +}
       +
       +// Free one item (caller must deal with next pointer)
       +static void
       +freeitem(Item* it)
       +{
       +        Iimage* ii;
       +        Genattr* ga;
       +
       +        if(it == nil)
       +                return;
       +
       +        switch(it->tag) {
       +        case Itexttag:
       +                free(((Itext*)it)->s);
       +                break;
       +        case Iimagetag:
       +                ii = (Iimage*)it;
       +                free(ii->imsrc);
       +                free(ii->altrep);
       +                break;
       +        case Iformfieldtag:
       +                freeformfield(((Iformfield*)it)->formfield);
       +                break;
       +        case Itabletag:
       +                freetable(((Itable*)it)->table);
       +                break;
       +        case Ifloattag:
       +                freeitem(((Ifloat*)it)->item);
       +                break;
       +        }
       +        ga = it->genattr;
       +        if(ga != nil) {
       +                free(ga->id);
       +                free(ga->class);
       +                free(ga->style);
       +                free(ga->title);
       +                freescriptevents(ga->events);
       +        }
       +        free(it);
       +}
       +
       +// Free list of items chained through next pointer
       +void
       +freeitems(Item* ithead)
       +{
       +        Item* it;
       +        Item* itnext;
       +
       +        it = ithead;
       +        while(it != nil) {
       +                itnext = it->next;
       +                freeitem(it);
       +                it = itnext;
       +        }
       +}
       +
       +static void
       +freeformfield(Formfield* ff)
       +{
       +        Option* o;
       +        Option* onext;
       +
       +        if(ff == nil)
       +                return;
       +
       +        free(ff->name);
       +        free(ff->value);
       +        for(o = ff->options; o != nil; o = onext) {
       +                onext = o->next;
       +                free(o->value);
       +                free(o->display);
       +        }
       +        free(ff);
       +}
       +
       +static void
       +freetable(Table* t)
       +{
       +        int i;
       +        Tablecell* c;
       +        Tablecell* cnext;
       +
       +        if(t == nil)
       +                return;
       +
       +        // We'll find all the unique cells via t->cells and next pointers.
       +        // (Other pointers to cells in the table are duplicates of these)
       +        for(c = t->cells; c != nil; c = cnext) {
       +                cnext = c->next;
       +                freeitems(c->content);
       +        }
       +        if(t->grid != nil) {
       +                for(i = 0; i < t->nrow; i++)
       +                        free(t->grid[i]);
       +                free(t->grid);
       +        }
       +        free(t->rows);
       +        free(t->cols);
       +        freeitems(t->caption);
       +        free(t);
       +}
       +
       +static void
       +freeform(Form* f)
       +{
       +        if(f == nil)
       +                return;
       +
       +        free(f->name);
       +        free(f->action);
       +        // Form doesn't own its fields (Iformfield items do)
       +        free(f);
       +}
       +
       +static void
       +freeforms(Form* fhead)
       +{
       +        Form* f;
       +        Form* fnext;
       +
       +        for(f = fhead; f != nil; f = fnext) {
       +                fnext = f->next;
       +                freeform(f);
       +        }
       +}
       +
       +static void
       +freeanchor(Anchor* a)
       +{
       +        if(a == nil)
       +                return;
       +
       +        free(a->name);
       +        free(a->href);
       +        free(a);
       +}
       +
       +static void
       +freeanchors(Anchor* ahead)
       +{
       +        Anchor* a;
       +        Anchor* anext;
       +
       +        for(a = ahead; a != nil; a = anext) {
       +                anext = a->next;
       +                freeanchor(a);
       +        }
       +}
       +
       +static void
       +freedestanchor(DestAnchor* da)
       +{
       +        if(da == nil)
       +                return;
       +
       +        free(da->name);
       +        free(da);
       +}
       +
       +static void
       +freedestanchors(DestAnchor* dahead)
       +{
       +        DestAnchor* da;
       +        DestAnchor* danext;
       +
       +        for(da = dahead; da != nil; da = danext) {
       +                danext = da->next;
       +                freedestanchor(da);
       +        }
       +}
       +
       +static void
       +freearea(Area* a)
       +{
       +        if(a == nil)
       +                return;
       +        free(a->href);
       +        free(a->coords);
       +}
       +
       +static void freekidinfos(Kidinfo* khead);
       +
       +static void
       +freekidinfo(Kidinfo* k)
       +{
       +        if(k->isframeset) {
       +                free(k->rows);
       +                free(k->cols);
       +                freekidinfos(k->kidinfos);
       +        }
       +        else {
       +                free(k->src);
       +                free(k->name);
       +        }
       +        free(k);
       +}
       +
       +static void
       +freekidinfos(Kidinfo* khead)
       +{
       +        Kidinfo* k;
       +        Kidinfo* knext;
       +
       +        for(k = khead; k != nil; k = knext) {
       +                knext = k->next;
       +                freekidinfo(k);
       +        }
       +}
       +
       +static void
       +freemap(Map* m)
       +{
       +        Area* a;
       +        Area* anext;
       +
       +        if(m == nil)
       +                return;
       +
       +        free(m->name);
       +        for(a = m->areas; a != nil; a = anext) {
       +                anext = a->next;
       +                freearea(a);
       +        }
       +        free(m);
       +}
       +
       +static void
       +freemaps(Map* mhead)
       +{
       +        Map* m;
       +        Map* mnext;
       +
       +        for(m = mhead; m != nil; m = mnext) {
       +                mnext = m->next;
       +                freemap(m);
       +        }
       +}
       +
       +void
       +freedocinfo(Docinfo* d)
       +{
       +        if(d == nil)
       +                return;
       +        free(d->src);
       +        free(d->base);
       +        freeitem((Item*)d->backgrounditem);
       +        free(d->refresh);
       +        freekidinfos(d->kidinfo);
       +        freeanchors(d->anchors);
       +        freedestanchors(d->dests);
       +        freeforms(d->forms);
       +        freemaps(d->maps);
       +        // tables, images, and formfields are freed when
       +        // the items pointing at them are freed
       +        free(d);
       +}
       +
       +// Currently, someone else owns all the memory
       +// pointed to by things in a Pstate.
       +static void
       +freepstate(Pstate* p)
       +{
       +        free(p);
       +}
       +
       +static void
       +freepstatestack(Pstate* pshead)
       +{
       +        Pstate* p;
       +        Pstate* pnext;
       +
       +        for(p = pshead; p != nil; p = pnext) {
       +                pnext = p->next;
       +                free(p);
       +        }
       +}
       +
       +static int
       +Iconv(Fmt *f)
       +{
       +        Item*        it;
       +        Itext*        t;
       +        Irule*        r;
       +        Iimage*        i;
       +        Ifloat*        fl;
       +        int        state;
       +        Formfield*        ff;
       +        Rune*        ty;
       +        Tablecell*        c;
       +        Table*        tab;
       +        char*        p;
       +        int        cl;
       +        int        hang;
       +        int        indent;
       +        int        bi;
       +        int        nbuf;
       +        char        buf[BIGBUFSIZE];
       +
       +        it = va_arg(f->args, Item*);
       +        bi = 0;
       +        nbuf = sizeof(buf);
       +        state = it->state;
       +        nbuf = nbuf-1;
       +        if(state&IFbrk) {
       +                cl = state&(IFcleft|IFcright);
       +                p = "";
       +                if(cl) {
       +                        if(cl == (IFcleft|IFcright))
       +                                p = " both";
       +                        else if(cl == IFcleft)
       +                                p = " left";
       +                        else
       +                                p = " right";
       +                }
       +                bi = snprint(buf, nbuf, "brk(%d%s)", (state&IFbrksp)? 1 : 0, p);
       +        }
       +        if(state&IFnobrk)
       +                bi += snprint(buf+bi, nbuf-bi, " nobrk");
       +        if(!(state&IFwrap))
       +                bi += snprint(buf+bi, nbuf-bi, " nowrap");
       +        if(state&IFrjust)
       +                bi += snprint(buf+bi, nbuf-bi, " rjust");
       +        if(state&IFcjust)
       +                bi += snprint(buf+bi, nbuf-bi, " cjust");
       +        if(state&IFsmap)
       +                bi += snprint(buf+bi, nbuf-bi, " smap");
       +        indent = (state&IFindentmask) >> IFindentshift;
       +        if(indent > 0)
       +                bi += snprint(buf+bi, nbuf-bi, " indent=%d", indent);
       +        hang = state&IFhangmask;
       +        if(hang > 0)
       +                bi += snprint(buf+bi, nbuf-bi, " hang=%d", hang);
       +
       +        switch(it->tag) {
       +        case Itexttag:
       +                t = (Itext*)it;
       +                bi += snprint(buf+bi, nbuf-bi, " Text '%S', fnt=%d, fg=%x", t->s, t->fnt, t->fg);
       +                break;
       +
       +        case Iruletag:
       +                r = (Irule*)it;
       +                bi += snprint(buf+bi, nbuf-bi, "Rule size=%d, al=%S, wspec=", r->size, stringalign(r->align));
       +                bi += dimprint(buf+bi, nbuf-bi, r->wspec);
       +                break;
       +
       +        case Iimagetag:
       +                i = (Iimage*)it;
       +                bi += snprint(buf+bi, nbuf-bi,
       +                        "Image src=%S, alt=%S, al=%S, w=%d, h=%d hsp=%d, vsp=%d, bd=%d, map=%S",
       +                        i->imsrc, i->altrep? i->altrep : L(Lempty), stringalign(i->align), i->imwidth, i->imheight,
       +                        i->hspace, i->vspace, i->border, i->map?i->map->name : L(Lempty));
       +                break;
       +
       +        case Iformfieldtag:
       +                ff = ((Iformfield*)it)->formfield;
       +                if(ff->ftype == Ftextarea)
       +                        ty = L(Ltextarea);
       +                else if(ff->ftype == Fselect)
       +                        ty = L(Lselect);
       +                else {
       +                        ty = _revlookup(input_tab, NINPUTTAB, ff->ftype);
       +                        if(ty == nil)
       +                                ty = L(Lnone);
       +                }
       +                bi += snprint(buf+bi, nbuf-bi, "Formfield %S, fieldid=%d, formid=%d, name=%S, value=%S",
       +                        ty, ff->fieldid, ff->form->formid, ff->name?  ff->name : L(Lempty),
       +                        ff->value? ff->value : L(Lempty));
       +                break;
       +
       +        case Itabletag:
       +                tab = ((Itable*)it)->table;
       +                bi += snprint(buf+bi, nbuf-bi, "Table tableid=%d, width=", tab->tableid);
       +                bi += dimprint(buf+bi, nbuf-bi, tab->width);
       +                bi += snprint(buf+bi, nbuf-bi, ", nrow=%d, ncol=%d, ncell=%d, totw=%d, toth=%d\n",
       +                        tab->nrow, tab->ncol, tab->ncell, tab->totw, tab->toth);
       +                for(c = tab->cells; c != nil; c = c->next)
       +                        bi += snprint(buf+bi, nbuf-bi, "Cell %d.%d, at (%d,%d) ",
       +                                        tab->tableid, c->cellid, c->row, c->col);
       +                bi += snprint(buf+bi, nbuf-bi, "End of Table %d", tab->tableid);
       +                break;
       +
       +        case Ifloattag:
       +                fl = (Ifloat*)it;
       +                bi += snprint(buf+bi, nbuf-bi, "Float, x=%d y=%d, side=%S, it=%I",
       +                        fl->x, fl->y, stringalign(fl->side), fl->item);
       +                bi += snprint(buf+bi, nbuf-bi, "\n\t");
       +                break;
       +
       +        case Ispacertag:
       +                p = "";
       +                switch(((Ispacer*)it)->spkind) {
       +                case ISPnull:
       +                        p = "null";
       +                        break;
       +                case ISPvline:
       +                        p = "vline";
       +                        break;
       +                case ISPhspace:
       +                        p = "hspace";
       +                        break;
       +                }
       +                bi += snprint(buf+bi, nbuf-bi, "Spacer %s ", p);
       +                break;
       +        }
       +        bi += snprint(buf+bi, nbuf-bi, " w=%d, h=%d, a=%d, anchor=%d\n",
       +                        it->width, it->height, it->ascent, it->anchorid);
       +        buf[bi] = 0;
       +        return fmtstrcpy(f, buf);
       +}
       +
       +// String version of alignment 'a'
       +static Rune*
       +stringalign(int a)
       +{
       +        Rune*        s;
       +
       +        s = _revlookup(align_tab, NALIGNTAB, a);
       +        if(s == nil)
       +                s = L(Lnone);
       +        return s;
       +}
       +
       +// Put at most nbuf chars of representation of d into buf,
       +// and return number of characters put
       +static int
       +dimprint(char* buf, int nbuf, Dimen d)
       +{
       +        int        n;
       +        int        k;
       +
       +        n = 0;
       +        n += snprint(buf, nbuf, "%d", dimenspec(d));
       +        k = dimenkind(d);
       +        if(k == Dpercent)
       +                buf[n++] = '%';
       +        if(k == Drelative)
       +                buf[n++] = '*';
       +        return n;
       +}
       +
       +void
       +printitems(Item* items, char* msg)
       +{
       +        Item*        il;
       +
       +        fprint(2, "%s\n", msg);
       +        il = items;
       +        while(il != nil) {
       +                fprint(2, "%I", il);
       +                il = il->next;
       +        }
       +}
       +
       +static Genattr*
       +newgenattr(Rune* id, Rune* class, Rune* style, Rune* title, SEvent* events)
       +{
       +        Genattr* g;
       +
       +        g = (Genattr*)emalloc(sizeof(Genattr));
       +        g->id = id;
       +        g->class = class;
       +        g->style = style;
       +        g->title = title;
       +        g->events = events;
       +        return g;
       +}
       +
       +static Formfield*
       +newformfield(int ftype, int fieldid, Form* form, Rune* name,
       +                Rune* value, int size, int maxlength, Formfield* link)
       +{
       +        Formfield* ff;
       +
       +        ff = (Formfield*)emalloc(sizeof(Formfield));
       +        ff->ftype = ftype;
       +        ff->fieldid = fieldid;
       +        ff->form = form;
       +        ff->name = name;
       +        ff->value = value;
       +        ff->size = size;
       +        ff->maxlength = maxlength;
       +        ff->ctlid = -1;
       +        ff->next = link;
       +        return ff;
       +}
       +
       +// Transfers ownership of value and display to Option.
       +static Option*
       +newoption(int selected, Rune* value, Rune* display, Option* link)
       +{
       +        Option *o;
       +
       +        o = (Option*)emalloc(sizeof(Option));
       +        o->selected = selected;
       +        o->value = value;
       +        o->display = display;
       +        o->next = link;
       +        return o;
       +}
       +
       +static Form*
       +newform(int formid, Rune* name, Rune* action, int target, int method, Form* link)
       +{
       +        Form* f;
       +
       +        f = (Form*)emalloc(sizeof(Form));
       +        f->formid = formid;
       +        f->name = name;
       +        f->action = action;
       +        f->target = target;
       +        f->method = method;
       +        f->nfields = 0;
       +        f->fields = nil;
       +        f->next = link;
       +        return f;
       +}
       +
       +static Table*
       +newtable(int tableid, Align align, Dimen width, int border,
       +        int cellspacing, int cellpadding, Background bg, Token* tok, Table* link)
       +{
       +        Table* t;
       +
       +        t = (Table*)emalloc(sizeof(Table));
       +        t->tableid = tableid;
       +        t->align = align;
       +        t->width = width;
       +        t->border = border;
       +        t->cellspacing = cellspacing;
       +        t->cellpadding = cellpadding;
       +        t->background = bg;
       +        t->caption_place = ALbottom;
       +        t->caption_lay = nil;
       +        t->tabletok = tok;
       +        t->tabletok = nil;
       +        t->next = link;
       +        return t;
       +}
       +
       +static Tablerow*
       +newtablerow(Align align, Background bg, int flags, Tablerow* link)
       +{
       +        Tablerow* tr;
       +
       +        tr = (Tablerow*)emalloc(sizeof(Tablerow));
       +        tr->align = align;
       +        tr->background = bg;
       +        tr->flags = flags;
       +        tr->next = link;
       +        return tr;
       +}
       +
       +static Tablecell*
       +newtablecell(int cellid, int rowspan, int colspan, Align align, Dimen wspec, int hspec,
       +                Background bg, int flags, Tablecell* link)
       +{
       +        Tablecell* c;
       +
       +        c = (Tablecell*)emalloc(sizeof(Tablecell));
       +        c->cellid = cellid;
       +        c->lay = nil;
       +        c->rowspan = rowspan;
       +        c->colspan = colspan;
       +        c->align = align;
       +        c->flags = flags;
       +        c->wspec = wspec;
       +        c->hspec = hspec;
       +        c->background = bg;
       +        c->next = link;
       +        return c;
       +}
       +
       +static Anchor*
       +newanchor(int index, Rune* name, Rune* href, int target, Anchor* link)
       +{
       +        Anchor* a;
       +
       +        a = (Anchor*)emalloc(sizeof(Anchor));
       +        a->index = index;
       +        a->name = name;
       +        a->href = href;
       +        a->target = target;
       +        a->next = link;
       +        return a;
       +}
       +
       +static DestAnchor*
       +newdestanchor(int index, Rune* name, Item* item, DestAnchor* link)
       +{
       +        DestAnchor* d;
       +
       +        d = (DestAnchor*)emalloc(sizeof(DestAnchor));
       +        d->index = index;
       +        d->name = name;
       +        d->item = item;
       +        d->next = link;
       +        return d;
       +}
       +
       +static SEvent*
       +newscriptevent(int type, Rune* script, SEvent* link)
       +{
       +        SEvent* ans;
       +
       +        ans = (SEvent*)emalloc(sizeof(SEvent));
       +        ans->type = type;
       +        ans->script = script;
       +        ans->next = link;
       +        return ans;
       +}
       +
       +static void
       +freescriptevents(SEvent* ehead)
       +{
       +        SEvent* e;
       +        SEvent* nexte;
       +
       +        e = ehead;
       +        while(e != nil) {
       +                nexte = e->next;
       +                free(e->script);
       +                free(e);
       +                e = nexte;
       +        }
       +}
       +
       +static Dimen
       +makedimen(int kind, int spec)
       +{
       +        Dimen d;
       +
       +        if(spec&Dkindmask) {
       +                if(warn)
       +                        fprint(2, "warning: dimension spec too big: %d\n", spec);
       +                spec = 0;
       +        }
       +        d.kindspec = kind|spec;
       +        return d;
       +}
       +
       +int
       +dimenkind(Dimen d)
       +{
       +        return (d.kindspec&Dkindmask);
       +}
       +
       +int
       +dimenspec(Dimen d)
       +{
       +        return (d.kindspec&Dspecmask);
       +}
       +
       +static Kidinfo*
       +newkidinfo(int isframeset, Kidinfo* link)
       +{
       +        Kidinfo*        ki;
       +
       +        ki = (Kidinfo*)emalloc(sizeof(Kidinfo));
       +        ki->isframeset = isframeset;
       +        if(!isframeset) {
       +                ki->flags = FRhscrollauto|FRvscrollauto;
       +                ki->marginw = FRKIDMARGIN;
       +                ki->marginh = FRKIDMARGIN;
       +                ki->framebd = 1;
       +        }
       +        ki->next = link;
       +        return ki;
       +}
       +
       +static Docinfo*
       +newdocinfo(void)
       +{
       +        Docinfo*        d;
       +
       +        d = (Docinfo*)emalloc(sizeof(Docinfo));
       +        resetdocinfo(d);
       +        return d;
       +}
       +
       +static void
       +resetdocinfo(Docinfo* d)
       +{
       +        memset(d, 0, sizeof(Docinfo));
       +        d->background = makebackground(nil, White);
       +        d->text = Black;
       +        d->link = Blue;
       +        d->vlink = Blue;
       +        d->alink = Blue;
       +        d->target = FTself;
       +        d->chset = ISO_8859_1;
       +        d->scripttype = TextJavascript;
       +        d->frameid = -1;
       +}
       +
       +// Use targetmap array to keep track of name <-> targetid mapping.
       +// Use real malloc(), and never free
       +static void
       +targetmapinit(void)
       +{
       +        targetmapsize = 10;
       +        targetmap = (StringInt*)emalloc(targetmapsize*sizeof(StringInt));
       +        memset(targetmap, 0, targetmapsize*sizeof(StringInt));
       +        targetmap[0].key = _Strdup(L(L_top));
       +        targetmap[0].val = FTtop;
       +        targetmap[1].key = _Strdup(L(L_self));
       +        targetmap[1].val = FTself;
       +        targetmap[2].key = _Strdup(L(L_parent));
       +        targetmap[2].val = FTparent;
       +        targetmap[3].key = _Strdup(L(L_blank));
       +        targetmap[3].val = FTblank;
       +        ntargets = 4;
       +}
       +
       +int
       +targetid(Rune* s)
       +{
       +        int i;
       +        int n;
       +
       +        n = _Strlen(s);
       +        if(n == 0)
       +                return FTself;
       +        for(i = 0; i < ntargets; i++)
       +                if(_Strcmp(s, targetmap[i].key) == 0)
       +                        return targetmap[i].val;
       +        if(i >= targetmapsize) {
       +                targetmapsize += 10;
       +                targetmap = (StringInt*)erealloc(targetmap, targetmapsize*sizeof(StringInt));
       +        }
       +        targetmap[i].key = (Rune*)emalloc((n+1)*sizeof(Rune));
       +        memmove(targetmap[i].key, s, (n+1)*sizeof(Rune));
       +        targetmap[i].val = i;
       +        ntargets++;
       +        return i;
       +}
       +
       +Rune*
       +targetname(int targid)
       +{
       +        int i;
       +
       +        for(i = 0; i < ntargets; i++)
       +                if(targetmap[i].val == targid)
       +                        return targetmap[i].key;
       +        return L(Lquestion);
       +}
       +
       +// Convert HTML color spec to RGB value, returning dflt if can't.
       +// Argument is supposed to be a valid HTML color, or "".
       +// Return the RGB value of the color, using dflt if s
       +// is nil or an invalid color.
       +static int
       +color(Rune* s, int dflt)
       +{
       +        int v;
       +        Rune* rest;
       +
       +        if(s == nil)
       +                return dflt;
       +        if(_lookup(color_tab, NCOLORS, s, _Strlen(s), &v))
       +                return v;
       +        if(s[0] == '#')
       +                s++;
       +        v = _Strtol(s, &rest, 16);
       +        if(*rest == 0)
       +                return v;
       +        return dflt;
       +}
       +
       +// Debugging
       +
       +#define HUGEPIX 10000
       +
       +// A "shallow" validitem, that doesn't follow next links
       +// or descend into tables.
       +static int
       +validitem(Item* i)
       +{
       +        int ok;
       +        Itext* ti;
       +        Irule* ri;
       +        Iimage* ii;
       +        Ifloat* fi;
       +        int a;
       +
       +        ok = (i->tag >= Itexttag && i->tag <= Ispacertag) &&
       +                (i->next == nil || validptr(i->next)) &&
       +                (i->width >= 0 && i->width < HUGEPIX) &&
       +                (i->height >= 0 && i->height < HUGEPIX) &&
       +                (i->ascent > -HUGEPIX && i->ascent < HUGEPIX) &&
       +                (i->anchorid >= 0) &&
       +                (i->genattr == nil || validptr(i->genattr));
       +        // also, could check state for ridiculous combinations
       +        // also, could check anchorid for within-doc-range
       +        if(ok)
       +                switch(i->tag) {
       +                case Itexttag:
       +                        ti = (Itext*)i;
       +                        ok = validStr(ti->s) &&
       +                                (ti->fnt >= 0 && ti->fnt < NumStyle*NumSize) &&
       +                                (ti->ul == ULnone || ti->ul == ULunder || ti->ul == ULmid);
       +                        break;
       +                case Iruletag:
       +                        ri = (Irule*)i;
       +                        ok = (validvalign(ri->align) || validhalign(ri->align)) &&
       +                                (ri->size >=0 && ri->size < HUGEPIX);
       +                        break;
       +                case Iimagetag:
       +                        ii = (Iimage*)i;
       +                        ok = (ii->imsrc == nil || validptr(ii->imsrc)) &&
       +                                (ii->item.width >= 0 && ii->item.width < HUGEPIX) &&
       +                                (ii->item.height >= 0 && ii->item.height < HUGEPIX) &&
       +                                (ii->imwidth >= 0 && ii->imwidth < HUGEPIX) &&
       +                                (ii->imheight >= 0 && ii->imheight < HUGEPIX) &&
       +                                (ii->altrep == nil || validStr(ii->altrep)) &&
       +                                (ii->map == nil || validptr(ii->map)) &&
       +                                (validvalign(ii->align) || validhalign(ii->align)) &&
       +                                (ii->nextimage == nil || validptr(ii->nextimage));
       +                        break;
       +                case Iformfieldtag:
       +                        ok = validformfield(((Iformfield*)i)->formfield);
       +                        break;
       +                case Itabletag:
       +                        ok = validptr((Itable*)i);
       +                        break;
       +                case Ifloattag:
       +                        fi = (Ifloat*)i;
       +                        ok = (fi->side == ALleft || fi->side == ALright) &&
       +                                validitem(fi->item) &&
       +                                (fi->item->tag == Iimagetag || fi->item->tag == Itabletag);
       +                        break;
       +                case Ispacertag:
       +                        a = ((Ispacer*)i)->spkind;
       +                        ok = a==ISPnull || a==ISPvline || a==ISPhspace || a==ISPgeneral;
       +                        break;
       +                default:
       +                        ok = 0;
       +                }
       +        return ok;
       +}
       +
       +// "deep" validation, that checks whole list of items,
       +// and descends into tables and floated tables.
       +// nil is ok for argument.
       +int
       +validitems(Item* i)
       +{
       +        int ok;
       +        Item* ii;
       +
       +        ok = 1;
       +        while(i != nil && ok) {
       +                ok = validitem(i);
       +                if(ok) {
       +                        if(i->tag == Itabletag) {
       +                                ok = validtable(((Itable*)i)->table);
       +                        }
       +                        else if(i->tag == Ifloattag) {
       +                                ii = ((Ifloat*)i)->item;
       +                                if(ii->tag == Itabletag)
       +                                        ok = validtable(((Itable*)ii)->table);
       +                        }
       +                }
       +                if(!ok) {
       +                        fprint(2, "invalid item: %I\n", i);
       +                }
       +                i = i->next;
       +        }
       +        return ok;
       +}
       +
       +static int
       +validformfield(Formfield* f)
       +{
       +        int ok;
       +
       +        ok = (f->next == nil || validptr(f->next)) &&
       +                (f->ftype >= 0 && f->ftype <= Ftextarea) &&
       +                f->fieldid >= 0 &&
       +                (f->form == nil || validptr(f->form)) &&
       +                (f->name == nil || validStr(f->name)) &&
       +                (f->value == nil || validStr(f->value)) &&
       +                (f->options == nil || validptr(f->options)) &&
       +                (f->image == nil || validitem(f->image)) &&
       +                (f->events == nil || validptr(f->events));
       +        // when all built, should have f->fieldid < f->form->nfields,
       +        // but this may be called during build...
       +        return ok;
       +}
       +
       +// "deep" validation -- checks cell contents too
       +static int
       +validtable(Table* t)
       +{
       +        int ok;
       +        int i, j;
       +        Tablecell* c;
       +
       +        ok = (t->next == nil || validptr(t->next)) &&
       +                t->nrow >= 0 &&
       +                t->ncol >= 0 &&
       +                t->ncell >= 0 &&
       +                validalign(t->align) &&
       +                validdimen(t->width) &&
       +                (t->border >= 0 && t->border < HUGEPIX) &&
       +                (t->cellspacing >= 0 && t->cellspacing < HUGEPIX) &&
       +                (t->cellpadding >= 0 && t->cellpadding < HUGEPIX) &&
       +                validitems(t->caption) &&
       +                (t->caption_place == ALtop || t->caption_place == ALbottom) &&
       +                (t->totw >= 0 && t->totw < HUGEPIX) &&
       +                (t->toth >= 0 && t->toth < HUGEPIX) &&
       +                (t->tabletok == nil || validptr(t->tabletok));
       +        // during parsing, t->rows has list;
       +        // only when parsing is done is t->nrow set > 0
       +        if(ok && t->nrow > 0 && t->ncol > 0) {
       +                // table is "finished"
       +                for(i = 0; i < t->nrow && ok; i++) 
       +                        ok = validtablerow(t->rows+i);
       +                for(j = 0; j < t->ncol && ok; j++)
       +                        ok = validtablecol(t->cols+j);
       +                for(c = t->cells; c != nil && ok; c = c->next)
       +                        ok = validtablecell(c);
       +                for(i = 0; i < t->nrow && ok; i++)
       +                        for(j = 0; j < t->ncol && ok; j++)
       +                                ok = validptr(t->grid[i][j]);
       +        }
       +        return ok;
       +}
       +
       +static int
       +validvalign(int a)
       +{
       +        return a == ALnone || a == ALmiddle || a == ALbottom || a == ALtop || a == ALbaseline;
       +}
       +
       +static int
       +validhalign(int a)
       +{
       +        return a == ALnone || a == ALleft || a == ALcenter || a == ALright ||
       +                        a == ALjustify || a == ALchar;
       +}
       +
       +static int
       +validalign(Align a)
       +{
       +        return validhalign(a.halign) && validvalign(a.valign);
       +}
       +
       +static int
       +validdimen(Dimen d)
       +{
       +        int ok;
       +        int s;
       +
       +        ok = 0;
       +        s = d.kindspec&Dspecmask;
       +        switch(d.kindspec&Dkindmask) {
       +        case Dnone:
       +                ok = s==0;
       +                break;
       +        case Dpixels:
       +                ok = s < HUGEPIX;
       +                break;
       +        case Dpercent:
       +        case Drelative:
       +                ok = 1;
       +                break;
       +        }
       +        return ok;
       +}
       +
       +static int
       +validtablerow(Tablerow* r)
       +{
       +        return (r->cells == nil || validptr(r->cells)) &&
       +                (r->height >= 0 && r->height < HUGEPIX) &&
       +                (r->ascent > -HUGEPIX && r->ascent < HUGEPIX) &&
       +                validalign(r->align);
       +}
       +
       +static int
       +validtablecol(Tablecol* c)
       +{
       +        return c->width >= 0 && c->width < HUGEPIX
       +                && validalign(c->align);
       +}
       +
       +static int
       +validtablecell(Tablecell* c)
       +{
       +        int ok;
       +
       +        ok = (c->next == nil || validptr(c->next)) &&
       +                (c->nextinrow == nil || validptr(c->nextinrow)) &&
       +                (c->content == nil || validptr(c->content)) &&
       +                (c->lay == nil || validptr(c->lay)) &&
       +                c->rowspan >= 0 &&
       +                c->colspan >= 0 &&
       +                validalign(c->align) &&
       +                validdimen(c->wspec) &&
       +                c->row >= 0 &&
       +                c->col >= 0;
       +        if(ok) {
       +                if(c->content != nil)
       +                        ok = validitems(c->content);
       +        }
       +        return ok;
       +}
       +
       +static int
       +validptr(void* p)
       +{
       +        // TODO: a better job of this.
       +        // For now, just dereference, which cause a bomb
       +        // if not valid
       +        static char c;
       +
       +        c = *((char*)p);
       +        return 1;
       +}
       +
       +static int
       +validStr(Rune* s)
       +{
       +        return s != nil && validptr(s);
       +}
 (DIR) diff --git a/src/libhtml/impl.h b/src/libhtml/impl.h
       t@@ -0,0 +1,163 @@
       +
       +// UTILS
       +typedef struct List List;
       +typedef struct Strlist Strlist;
       +
       +// List of integers (and also generic list with next pointer at beginning)
       +struct List
       +{
       +        List*        next;
       +        int        val;
       +};
       +
       +struct Strlist
       +{
       +        Strlist*        next;
       +        Rune*        val;
       +};
       +
       +extern int                _inclass(Rune c, Rune* cl);
       +extern int                _listlen(List* l);
       +extern Rune*        _ltoStr(int n);
       +extern List*        _newlist(int val, List* rest);
       +extern Rune*        _newstr(int n);
       +extern int                _prefix(Rune* pre, Rune* s);
       +extern List*        _revlist(List* l);
       +extern void        _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2);
       +extern void        _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2);
       +extern int                _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen);
       +extern Rune*        _Stradd(Rune*s1, Rune* s2, int n);
       +extern Rune*        _Strclass(Rune* s, Rune* cl);
       +extern int                _Strcmp(Rune* s1, Rune* s2);
       +extern Rune*        _Strdup(Rune* s);
       +extern Rune*        _Strdup2(Rune* s, Rune* t);
       +extern int                _Streqn(Rune* s1, int n1, Rune* s2);
       +extern int                _Strlen(Rune* s);
       +extern Rune*        _Strnclass(Rune* s, Rune* cl, int n);
       +extern int                _Strncmpci(Rune* s1, int n1, Rune* s2);
       +extern Rune*        _Strndup(Rune* s, int n);
       +extern Rune*        _Strnrclass(Rune* s, Rune* cl, int n);
       +extern Rune*        _Strrclass(Rune* s, Rune* cl);
       +extern Rune*        _Strsubstr(Rune* s, int start, int stop);
       +extern long        _Strtol(Rune* s, Rune** eptr, int base);
       +extern void        _trimwhite(Rune* s, int n, Rune** pans, int* panslen);
       +
       +extern Rune        notwhitespace[];
       +extern Rune        whitespace[];
       +
       +// STRINTTAB
       +typedef struct StringInt StringInt;
       +
       +// Element of String-Int table (used for keyword lookup)
       +struct StringInt
       +{
       +        Rune*        key;
       +        int        val;
       +};
       +
       +extern int                        _lookup(StringInt* t, int n, Rune* key, int keylen, int* pans);
       +extern StringInt*        _makestrinttab(Rune** a, int n);
       +extern Rune*                _revlookup(StringInt* t, int n, int val);
       +
       +// Colors, in html format, not Plan 9 format.  (RGB values in bottom 3 bytes)
       +enum {
       +        White = 0xFFFFFF,
       +        Black = 0x000000,
       +        Blue = 0x0000CC,
       +};
       +
       +// LEX
       +
       +// HTML 4.0 tags (plus blink, nobr)
       +// sorted in lexical order; used as array indices
       +enum {
       +        Notfound,
       +        Comment,
       +        Ta, Tabbr, Tacronym, Taddress, Tapplet, Tarea,
       +        Tb, Tbase, Tbasefont, Tbdo, Tbig, Tblink,
       +        Tblockquote, Tbody, Tbq, Tbr, Tbutton,
       +        Tcaption, Tcenter, Tcite, Tcode, Tcol, Tcolgroup,
       +        Tdd, Tdel, Tdfn, Tdir, Tdiv, Tdl, Tdt,
       +        Tem,
       +        Tfieldset, Tfont, Tform, Tframe, Tframeset,
       +        Th1, Th2, Th3, Th4, Th5, Th6,
       +        Thead, Thr, Thtml,
       +        Ti, Tiframe, Timg, Tinput, Tins, Tisindex,
       +        Tkbd,
       +        Tlabel, Tlegend, Tli, Tlink,
       +        Tmap, Tmenu, Tmeta,
       +        Tnobr, Tnoframes, Tnoscript,
       +        Tobject, Tol, Toptgroup, Toption,
       +        Tp, Tparam, Tpre,
       +        Tq,
       +        Ts, Tsamp, Tscript, Tselect, Tsmall,
       +        Tspan, Tstrike, Tstrong, Tstyle, Tsub, Tsup,
       +        Ttable, Ttbody, Ttd, Ttextarea, Ttfoot,
       +        Tth, Tthead, Ttitle, Ttr, Ttt,
       +        Tu, Tul,
       +        Tvar,
       +        Numtags,
       +        RBRA = Numtags,
       +        Data = Numtags+RBRA
       +};
       +
       +// HTML 4.0 tag attributes
       +// Keep sorted in lexical order
       +enum {
       +        Aabbr, Aaccept_charset, Aaccess_key, Aaction,
       +        Aalign, Aalink, Aalt, Aarchive, Aaxis,
       +        Abackground, Abgcolor, Aborder,
       +        Acellpadding, Acellspacing, Achar, Acharoff,
       +        Acharset, Achecked, Acite, Aclass, Aclassid,
       +        Aclear, Acode, Acodebase, Acodetype, Acolor,
       +        Acols, Acolspan, Acompact, Acontent, Acoords,
       +        Adata, Adatetime, Adeclare, Adefer, Adir, Adisabled,
       +        Aenctype,
       +        Aface, Afor, Aframe, Aframeborder,
       +        Aheaders, Aheight, Ahref, Ahreflang, Ahspace, Ahttp_equiv,
       +        Aid, Aismap,
       +        Alabel, Alang, Alink, Alongdesc,
       +        Amarginheight, Amarginwidth, Amaxlength,
       +        Amedia, Amethod, Amultiple,
       +        Aname, Anohref, Anoresize, Anoshade, Anowrap,
       +        Aobject, Aonblur, Aonchange, Aonclick, Aondblclick,
       +        Aonfocus, Aonkeypress, Aonkeyup, Aonload,
       +        Aonmousedown, Aonmousemove, Aonmouseout,
       +        Aonmouseover, Aonmouseup, Aonreset, Aonselect,
       +        Aonsubmit, Aonunload,
       +        Aprofile, Aprompt,
       +        Areadonly, Arel, Arev, Arows, Arowspan, Arules,
       +        Ascheme, Ascope, Ascrolling, Aselected, Ashape,
       +        Asize, Aspan, Asrc, Astandby, Astart, Astyle, Asummary,
       +        Atabindex, Atarget, Atext, Atitle, Atype,
       +        Ausemap,
       +        Avalign, Avalue, Avaluetype, Aversion, Avlink, Avspace,
       +        Awidth,
       +        Numattrs
       +};
       +
       +struct Attr
       +{
       +        Attr*                next;                // in list of attrs for a token
       +        int                attid;                // Aabbr, etc.
       +        Rune*        value;
       +};
       +
       +struct Token
       +{
       +        int                tag;                // Ta, etc
       +        Rune*        text;                // text in Data, attribute text in tag
       +        Attr*                attr;                // list of Attrs
       +        int                starti;        // index into source buffer of token start
       +};
       +
       +extern Rune**        tagnames;
       +extern Rune**        attrnames;
       +
       +extern void        _freetokens(Token* tarray, int n);
       +extern Token*        _gettoks(uchar* data, int datalen, int chset, int mtype, int* plen);
       +extern int                _tokaval(Token* t, int attid, Rune** pans, int xfer);
       +
       +#pragma varargck        type "T"        Token*
       +
       +#include "runetab.h"
 (DIR) diff --git a/src/libhtml/lex.c b/src/libhtml/lex.c
       t@@ -0,0 +1,1384 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <draw.h>
       +#include <ctype.h>
       +#include <html.h>
       +#include "impl.h"
       +
       +typedef struct TokenSource TokenSource;
       +struct TokenSource
       +{
       +        int                        i;                // index of next byte to use
       +        uchar*                data;                // all the data
       +        int                        edata;        // data[0:edata] is valid
       +        int                        chset;        // one of US_Ascii, etc.
       +        int                        mtype;        // TextHtml or TextPlain
       +};
       +
       +enum {
       +        EOF = -2,
       +        EOB = -1
       +};
       +
       +#define ISNAMCHAR(c)        ((c)<256 && (isalpha(c) || isdigit(c) || (c) == '-' || (c) == '.'))
       +
       +#define SMALLBUFSIZE 240
       +#define BIGBUFSIZE 2000
       +
       +// HTML 4.0 tag names.
       +// Keep sorted, and in correspondence with enum in iparse.h.
       +Rune **tagnames;
       +char *_tagnames[] = {
       +        " ",
       +        "!",
       +        "a", 
       +        "abbr",
       +        "acronym",
       +        "address",
       +        "applet", 
       +        "area",
       +        "b",
       +        "base",
       +        "basefont",
       +        "bdo",
       +        "big",
       +        "blink",
       +        "blockquote",
       +        "body",
       +        "bq",
       +        "br",
       +        "button",
       +        "caption",
       +        "center",
       +        "cite",
       +        "code",
       +        "col",
       +        "colgroup",
       +        "dd",
       +        "del",
       +        "dfn",
       +        "dir",
       +        "div",
       +        "dl",
       +        "dt",
       +        "em",
       +        "fieldset",
       +        "font",
       +        "form",
       +        "frame",
       +        "frameset",
       +        "h1",
       +        "h2",
       +        "h3",
       +        "h4",
       +        "h5",
       +        "h6",
       +        "head",
       +        "hr",
       +        "html",
       +        "i",
       +        "iframe",
       +        "img",
       +        "input",
       +        "ins",
       +        "isindex",
       +        "kbd",
       +        "label",
       +        "legend",
       +        "li",
       +        "link",
       +        "map",
       +        "menu",
       +        "meta",
       +        "nobr",
       +        "noframes",
       +        "noscript",
       +        "object",
       +        "ol",
       +        "optgroup",
       +        "option",
       +        "p",
       +        "param",
       +        "pre",
       +        "q",
       +        "s",
       +        "samp",
       +        "script",
       +        "select",
       +        "small",
       +        "span",
       +        "strike",
       +        "strong",
       +        "style",
       +        "sub",
       +        "sup",
       +        "table",
       +        "tbody",
       +        "td",
       +        "textarea",
       +        "tfoot",
       +        "th",
       +        "thead",
       +        "title",
       +        "tr",
       +        "tt",
       +        "u",
       +        "ul",
       +        "var"
       +};
       +
       +// HTML 4.0 attribute names.
       +// Keep sorted, and in correspondence with enum in i.h.
       +Rune **attrnames;
       +char* _attrnames[] = {
       +        "abbr",
       +        "accept-charset",
       +        "access-key",
       +        "action",
       +        "align",
       +        "alink",
       +        "alt",
       +        "archive",
       +        "axis",
       +        "background",
       +        "bgcolor",
       +        "border",
       +        "cellpadding",
       +        "cellspacing",
       +        "char",
       +        "charoff",
       +        "charset",
       +        "checked",
       +        "cite",
       +        "class",
       +        "classid",
       +        "clear",
       +        "code",
       +        "codebase",
       +        "codetype",
       +        "color",
       +        "cols",
       +        "colspan",
       +        "compact",
       +        "content",
       +        "coords",
       +        "data",
       +        "datetime",
       +        "declare",
       +        "defer",
       +        "dir",
       +        "disabled",
       +        "enctype",
       +        "face",
       +        "for",
       +        "frame",
       +        "frameborder",
       +        "headers",
       +        "height",
       +        "href",
       +        "hreflang",
       +        "hspace",
       +        "http-equiv",
       +        "id",
       +        "ismap",
       +        "label",
       +        "lang",
       +        "link",
       +        "longdesc",
       +        "marginheight",
       +        "marginwidth",
       +        "maxlength",
       +        "media",
       +        "method",
       +        "multiple",
       +        "name",
       +        "nohref",
       +        "noresize",
       +        "noshade",
       +        "nowrap",
       +        "object",
       +        "onblur",
       +        "onchange",
       +        "onclick",
       +        "ondblclick",
       +        "onfocus",
       +        "onkeypress",
       +        "onkeyup",
       +        "onload",
       +        "onmousedown",
       +        "onmousemove",
       +        "onmouseout",
       +        "onmouseover",
       +        "onmouseup",
       +        "onreset",
       +        "onselect",
       +        "onsubmit",
       +        "onunload",
       +        "profile",
       +        "prompt",
       +        "readonly",
       +        "rel",
       +        "rev",
       +        "rows",
       +        "rowspan",
       +        "rules",
       +        "scheme",
       +        "scope",
       +        "scrolling",
       +        "selected",
       +        "shape",
       +        "size",
       +        "span",
       +        "src",
       +        "standby",
       +        "start",
       +        "style",
       +        "summary",
       +        "tabindex",
       +        "target",
       +        "text",
       +        "title",
       +        "type",
       +        "usemap",
       +        "valign",
       +        "value",
       +        "valuetype",
       +        "version",
       +        "vlink",
       +        "vspace",
       +        "width"
       +};
       +
       +
       +// Character entity to unicode character number map.
       +// Keep sorted by name.
       +StringInt *chartab;
       +AsciiInt _chartab[142] = {
       +        {"AElig", 198},
       +        {"Aacute", 193},
       +        {"Acirc", 194},
       +        {"Agrave", 192},
       +        {"Aring", 197},
       +        {"Atilde", 195},
       +        {"Auml", 196},
       +        {"Ccedil", 199},
       +        {"ETH", 208},
       +        {"Eacute", 201},
       +        {"Ecirc", 202},
       +        {"Egrave", 200},
       +        {"Euml", 203},
       +        {"Iacute", 205},
       +        {"Icirc", 206},
       +        {"Igrave", 204},
       +        {"Iuml", 207},
       +        {"Ntilde", 209},
       +        {"Oacute", 211},
       +        {"Ocirc", 212},
       +        {"Ograve", 210},
       +        {"Oslash", 216},
       +        {"Otilde", 213},
       +        {"Ouml", 214},
       +        {"THORN", 222},
       +        {"Uacute", 218},
       +        {"Ucirc", 219},
       +        {"Ugrave", 217},
       +        {"Uuml", 220},
       +        {"Yacute", 221},
       +        {"aacute", 225},
       +        {"acirc", 226},
       +        {"acute", 180},
       +        {"aelig", 230},
       +        {"agrave", 224},
       +        {"alpha", 945},
       +        {"amp", 38},
       +        {"aring", 229},
       +        {"atilde", 227},
       +        {"auml", 228},
       +        {"beta", 946},
       +        {"brvbar", 166},
       +        {"ccedil", 231},
       +        {"cdots", 8943},
       +        {"cedil", 184},
       +        {"cent", 162},
       +        {"chi", 967},
       +        {"copy", 169},
       +        {"curren", 164},
       +        {"ddots", 8945},
       +        {"deg", 176},
       +        {"delta", 948},
       +        {"divide", 247},
       +        {"eacute", 233},
       +        {"ecirc", 234},
       +        {"egrave", 232},
       +        {"emdash", 8212},
       +        {"emsp", 8195},
       +        {"endash", 8211},
       +        {"ensp", 8194},
       +        {"epsilon", 949},
       +        {"eta", 951},
       +        {"eth", 240},
       +        {"euml", 235},
       +        {"frac12", 189},
       +        {"frac14", 188},
       +        {"frac34", 190},
       +        {"gamma", 947},
       +        {"gt", 62},
       +        {"iacute", 237},
       +        {"icirc", 238},
       +        {"iexcl", 161},
       +        {"igrave", 236},
       +        {"iota", 953},
       +        {"iquest", 191},
       +        {"iuml", 239},
       +        {"kappa", 954},
       +        {"lambda", 955},
       +        {"laquo", 171},
       +        {"ldots", 8230},
       +        {"lt", 60},
       +        {"macr", 175},
       +        {"micro", 181},
       +        {"middot", 183},
       +        {"mu", 956},
       +        {"nbsp", 160},
       +        {"not", 172},
       +        {"ntilde", 241},
       +        {"nu", 957},
       +        {"oacute", 243},
       +        {"ocirc", 244},
       +        {"ograve", 242},
       +        {"omega", 969},
       +        {"omicron", 959},
       +        {"ordf", 170},
       +        {"ordm", 186},
       +        {"oslash", 248},
       +        {"otilde", 245},
       +        {"ouml", 246},
       +        {"para", 182},
       +        {"phi", 966},
       +        {"pi", 960},
       +        {"plusmn", 177},
       +        {"pound", 163},
       +        {"psi", 968},
       +        {"quad", 8193},
       +        {"quot", 34},
       +        {"raquo", 187},
       +        {"reg", 174},
       +        {"rho", 961},
       +        {"sect", 167},
       +        {"shy", 173},
       +        {"sigma", 963},
       +        {"sp", 8194},
       +        {"sup1", 185},
       +        {"sup2", 178},
       +        {"sup3", 179},
       +        {"szlig", 223},
       +        {"tau", 964},
       +        {"theta", 952},
       +        {"thinsp", 8201},
       +        {"thorn", 254},
       +        {"times", 215},
       +        {"trade", 8482},
       +        {"uacute", 250},
       +        {"ucirc", 251},
       +        {"ugrave", 249},
       +        {"uml", 168},
       +        {"upsilon", 965},
       +        {"uuml", 252},
       +        {"varepsilon", 8712},
       +        {"varphi", 981},
       +        {"varpi", 982},
       +        {"varrho", 1009},
       +        {"vdots", 8942},
       +        {"vsigma", 962},
       +        {"vtheta", 977},
       +        {"xi", 958},
       +        {"yacute", 253},
       +        {"yen", 165},
       +        {"yuml", 255},
       +        {"zeta", 950}
       +};
       +#define NCHARTAB (sizeof(chartab)/sizeof(chartab[0]))
       +
       +// Characters Winstart..Winend are those that Windows
       +// uses interpolated into the Latin1 set.
       +// They aren't supposed to appear in HTML, but they do....
       +enum {
       +        Winstart = 127,
       +        Winend = 159
       +};
       +
       +static int        winchars[]= { 8226,        // 8226 is a bullet
       +        8226, 8226, 8218, 402, 8222, 8230, 8224, 8225,
       +        710, 8240, 352, 8249, 338, 8226, 8226, 8226,
       +        8226, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
       +        732, 8482, 353, 8250, 339, 8226, 8226, 376};
       +
       +static StringInt*        tagtable;                // initialized from tagnames
       +static StringInt*        attrtable;                // initialized from attrnames
       +
       +static void                lexinit();
       +static int                getplaindata(TokenSource* ts, Token* a, int* pai);
       +static int                getdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai);
       +static int                getscriptdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai);
       +static int                gettag(TokenSource* ts, int starti, Token* a, int* pai);
       +static Rune*                buftostr(Rune* s, Rune* buf, int j);
       +static int                comment(TokenSource* ts);
       +static int                findstr(TokenSource* ts, Rune* s);
       +static int                ampersand(TokenSource* ts);
       +//static int                lowerc(int c);
       +static int                getchar(TokenSource* ts);
       +static void                ungetchar(TokenSource* ts, int c);
       +static void                backup(TokenSource* ts, int savei);
       +//static void                freeinsidetoken(Token* t);
       +static void                freeattrs(Attr* ahead);
       +static Attr*                newattr(int attid, Rune* value, Attr* link);
       +static int                Tconv(Fmt* f);
       +
       +int        dbglex = 0;
       +static int lexinited = 0;
       +
       +static void
       +lexinit(void)
       +{
       +        chartab = cvtstringinttab(_chartab, nelem(_chartab));
       +        tagnames = cvtstringtab(_tagnames, nelem(_tagnames));
       +        tagtable = _makestrinttab(tagnames, Numtags);
       +        attrnames = cvtstringtab(_attrnames, nelem(_attrnames));
       +        attrtable = _makestrinttab(attrnames, Numattrs);
       +        fmtinstall('T', Tconv);
       +        lexinited = 1;
       +}
       +
       +static TokenSource*
       +newtokensource(uchar* data, int edata, int chset, int mtype)
       +{
       +        TokenSource*        ans;
       +
       +        assert(chset == US_Ascii || chset == ISO_8859_1 ||
       +                        chset == UTF_8 || chset == Unicode);
       +        ans = (TokenSource*)emalloc(sizeof(TokenSource));
       +        ans->i = 0;
       +        ans->data = data;
       +        ans->edata = edata;
       +        ans->chset = chset;
       +        ans->mtype = mtype;
       +        return ans;
       +}
       +
       +enum {
       +        ToksChunk = 500
       +};
       +
       +// Call this to get the tokens.
       +//  The number of returned tokens is returned in *plen.
       +Token*
       +_gettoks(uchar* data, int datalen, int chset, int mtype, int* plen)
       +{
       +        TokenSource*        ts;
       +        Token*                a;
       +        int        alen;
       +        int        ai;
       +        int        starti;
       +        int        c;
       +        int        tag;
       +
       +        if(!lexinited)
       +                lexinit();
       +        ts = newtokensource(data, datalen, chset, mtype);
       +        alen = ToksChunk;
       +        a = (Token*)emalloc(alen * sizeof(Token));
       +        ai = 0;
       +        if(dbglex)
       +                fprint(2, "_gettoks starts, ts.i=%d, ts.edata=%d\n", ts->i, ts->edata);
       +        if(ts->mtype == TextHtml) {
       +                for(;;) {
       +                        if(ai == alen) {
       +                                a = (Token*)erealloc(a, (alen+ToksChunk)*sizeof(Token));
       +                                alen += ToksChunk;
       +                        }
       +                        starti = ts->i;
       +                        c = getchar(ts);
       +                        if(c < 0)
       +                                break;
       +                        if(c == '<') {
       +                                tag = gettag(ts, starti, a, &ai);
       +                                if(tag == Tscript) {
       +                                        // special rules for getting Data after....
       +                                        starti = ts->i;
       +                                        c = getchar(ts);
       +                                        tag = getscriptdata(ts, c, starti, a, &ai);
       +                                }
       +                        }
       +                        else
       +                                tag = getdata(ts, c, starti, a, &ai);
       +                        if(tag == -1)
       +                                break;
       +                        else if(dbglex > 1 && tag != Comment)
       +                                fprint(2, "lex: got token %T\n", &a[ai-1]);
       +                }
       +        }
       +        else {
       +                // plain text (non-html) tokens
       +                for(;;) {
       +                        if(ai == alen) {
       +                                a = (Token*)erealloc(a, (alen+ToksChunk)*sizeof(Token));
       +                                alen += ToksChunk;
       +                        }
       +                        tag = getplaindata(ts, a, &ai);
       +                        if(tag == -1)
       +                                break;
       +                        if(dbglex > 1)
       +                                fprint(2, "lex: got token %T\n", &a[ai]);
       +                }
       +        }
       +        if(dbglex)
       +                fprint(2, "lex: returning %d tokens\n", ai);
       +        *plen = ai;
       +        if(ai == 0) 
       +                return nil;
       +        return a;
       +}
       +
       +// For case where source isn't HTML.
       +// Just make data tokens, one per line (or partial line,
       +// at end of buffer), ignoring non-whitespace control
       +// characters and dumping \r's.
       +// If find non-empty token, fill in a[*pai], bump *pai, and return Data.
       +// Otherwise return -1;
       +static int
       +getplaindata(TokenSource* ts, Token* a, int* pai)
       +{
       +        Rune*        s;
       +        int        j;
       +        int        starti;
       +        int        c;
       +        Token*        tok;
       +        Rune        buf[BIGBUFSIZE];
       +
       +        s = nil;
       +        j = 0;
       +        starti = ts->i;
       +        for(c = getchar(ts); c >= 0; c = getchar(ts)) {
       +                if(c < ' ') {
       +                        if(isspace(c)) {
       +                                if(c == '\r') {
       +                                        // ignore it unless no following '\n',
       +                                        // in which case treat it like '\n'
       +                                        c = getchar(ts);
       +                                        if(c != '\n') {
       +                                                if(c >= 0)
       +                                                        ungetchar(ts, c);
       +                                                c = '\n';
       +                                        }
       +                                }
       +                        }
       +                        else
       +                                c = 0;
       +                }
       +                if(c != 0) {
       +                        buf[j++] = c;
       +                        if(j == sizeof(buf)-1) {
       +                                s = buftostr(s, buf, j);
       +                                j = 0;
       +                        }
       +                }
       +                if(c == '\n')
       +                        break;
       +        }
       +        s = buftostr(s, buf, j);
       +        if(s == nil)
       +                return -1;
       +        tok = &a[(*pai)++];
       +        tok->tag = Data;
       +        tok->text = s;
       +        tok->attr = nil;
       +        tok->starti = starti;
       +        return Data;
       +}
       +
       +// Return concatenation of s and buf[0:j]
       +static Rune*
       +buftostr(Rune* s, Rune* buf, int j)
       +{
       +        buf[j] = 0;
       +        if(s == nil)
       +                s = _Strndup(buf, j);
       +        else 
       +                s = _Strdup2(s, buf);
       +        return s;
       +}
       +
       +// Gather data up to next start-of-tag or end-of-buffer.
       +// Translate entity references (&amp;).
       +// Ignore non-whitespace control characters and get rid of \r's.
       +// If find non-empty token, fill in a[*pai], bump *pai, and return Data.
       +// Otherwise return -1;
       +static int
       +getdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai)
       +{
       +        Rune*        s;
       +        int        j;
       +        int        c;
       +        Token*        tok;
       +        Rune        buf[BIGBUFSIZE];
       +
       +        s = nil;
       +        j = 0;
       +        c = firstc;
       +        while(c >= 0) {
       +                if(c == '&') {
       +                        c = ampersand(ts);
       +                        if(c < 0)
       +                                break;
       +                }
       +                else if(c < ' ') {
       +                        if(isspace(c)) {
       +                                if(c == '\r') {
       +                                        // ignore it unless no following '\n',
       +                                        // in which case treat it like '\n'
       +                                        c = getchar(ts);
       +                                        if(c != '\n') {
       +                                                if(c >= 0)
       +                                                        ungetchar(ts, c);
       +                                                c = '\n';
       +                                        }
       +                                }
       +                        }
       +                        else {
       +                                if(warn)
       +                                        fprint(2, "warning: non-whitespace control character %d ignored\n", c);
       +                                c = 0;
       +                        }
       +                }
       +                else if(c == '<') {
       +                        ungetchar(ts, c);
       +                        break;
       +                }
       +                if(c != 0) {
       +                        buf[j++] = c;
       +                        if(j == BIGBUFSIZE-1) {
       +                                s = buftostr(s, buf, j);
       +                                j = 0;
       +                        }
       +                }
       +                c = getchar(ts);
       +        }
       +        s = buftostr(s, buf, j);
       +        if(s == nil)
       +                return -1;
       +        tok = &a[(*pai)++];
       +        tok->tag = Data;
       +        tok->text = s;
       +        tok->attr = nil;
       +        tok->starti = starti;
       +        return Data;
       +}
       +
       +// The rules for lexing scripts are different (ugh).
       +// Gather up everything until see a </SCRIPT>.
       +static int
       +getscriptdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai)
       +{
       +        Rune*        s;
       +        int        j;
       +        int        tstarti;
       +        int        savei;
       +        int        c;
       +        int        tag;
       +        int        done;
       +        Token*        tok;
       +        Rune        buf[BIGBUFSIZE];
       +
       +        s = nil;
       +        j = 0;
       +        tstarti = starti;
       +        c = firstc;
       +        done = 0;
       +        while(c >= 0) {
       +                if(c == '<') {
       +                        // other browsers ignore stuff to end of line after <!
       +                        savei = ts->i;
       +                        c = getchar(ts);
       +                        if(c == '!') {
       +                                while(c >= 0 && c != '\n' && c != '\r')
       +                                        c = getchar(ts);
       +                                if(c == '\r')
       +                                        c = getchar(ts);
       +                                if(c == '\n')
       +                                        c = getchar(ts);
       +                        }
       +                        else if(c >= 0) {
       +                                backup(ts, savei);
       +                                tag = gettag(ts, tstarti, a, pai);
       +                                if(tag == -1)
       +                                        break;
       +                                if(tag != Comment)
       +                                        (*pai)--;
       +                                backup(ts, tstarti);
       +                                if(tag == Tscript + RBRA) {
       +                                        done = 1;
       +                                        break;
       +                                }
       +                                // here tag was not </SCRIPT>, so take as regular data
       +                                c = getchar(ts);
       +                        }
       +                }
       +                if(c < 0)
       +                        break;
       +                if(c != 0) {
       +                        buf[j++] = c;
       +                        if(j == BIGBUFSIZE-1) {
       +                                s = buftostr(s, buf, j);
       +                                j = 0;
       +                        }
       +                }
       +                tstarti = ts->i;
       +                c = getchar(ts);
       +        }
       +        if(done || ts->i == ts->edata) {
       +                s = buftostr(s, buf, j);
       +                tok = &a[(*pai)++];
       +                tok->tag = Data;
       +                tok->text = s;
       +                tok->attr = nil;
       +                tok->starti = starti;
       +                return Data;
       +        }
       +        backup(ts, starti);
       +        return -1;
       +}
       +
       +// We've just seen a '<'.  Gather up stuff to closing '>' (if buffer
       +// ends before then, return -1).
       +// If it's a tag, look up the name, gather the attributes, and return
       +// the appropriate token.
       +// Else it's either just plain data or some kind of ignorable stuff:
       +// return Data or Comment as appropriate.
       +// If it's not a Comment, put it in a[*pai] and bump *pai.
       +static int
       +gettag(TokenSource* ts, int starti, Token* a, int* pai)
       +{
       +        int        rbra;
       +        int        ans;
       +        Attr*        al;
       +        int        nexti;
       +        int        c;
       +        int        ti;
       +        int        afnd;
       +        int        attid;
       +        int        quote;
       +        Rune*        val;
       +        int        nv;
       +        int        i;
       +        int        tag;
       +        Token*        tok;
       +        Rune        buf[BIGBUFSIZE];
       +
       +        rbra = 0;
       +        nexti = ts->i;
       +        tok = &a[*pai];
       +        tok->tag = Notfound;
       +        tok->text = nil;
       +        tok->attr = nil;
       +        tok->starti = starti;
       +        c = getchar(ts);
       +        if(c == '/') {
       +                rbra = RBRA;
       +                c = getchar(ts);
       +        }
       +        if(c < 0)
       +                goto eob_done;
       +        if(c >= 256 || !isalpha(c)) {
       +                // not a tag
       +                if(c == '!') {
       +                        ans = comment(ts);
       +                        if(ans != -1)
       +                                return ans;
       +                        goto eob_done;
       +                }
       +                else {
       +                        backup(ts, nexti);
       +                        tok->tag = Data;
       +                        tok->text = _Strdup(L(Llt));
       +                        (*pai)++;
       +                        return Data;
       +                }
       +        }
       +        // c starts a tagname
       +        buf[0] = c;
       +        i = 1;
       +        while(1) {
       +                c = getchar(ts);
       +                if(c < 0)
       +                        goto eob_done;
       +                if(!ISNAMCHAR(c))
       +                        break;
       +                // if name is bigger than buf it won't be found anyway...
       +                if(i < BIGBUFSIZE)
       +                        buf[i++] = c;
       +        }
       +        if(_lookup(tagtable, Numtags, buf, i, &tag))
       +                tok->tag = tag + rbra;
       +        else
       +                tok->text = _Strndup(buf, i);        // for warning print, in build
       +
       +        // attribute gathering loop
       +        al = nil;
       +        while(1) {
       +                // look for "ws name" or "ws name ws = ws val"  (ws=whitespace)
       +                // skip whitespace
       +attrloop_continue:
       +                while(c < 256 && isspace(c)) {
       +                        c = getchar(ts);
       +                        if(c < 0)
       +                                goto eob_done;
       +                }
       +                if(c == '>')
       +                        goto attrloop_done;
       +                if(c == '<') {
       +                        if(warn)
       +                                fprint(2, "warning: unclosed tag\n");
       +                        ungetchar(ts, c);
       +                        goto attrloop_done;
       +                }
       +                if(c >= 256 || !isalpha(c)) {
       +                        if(warn)
       +                                fprint(2, "warning: expected attribute name\n");
       +                        // skipt to next attribute name
       +                        while(1) {
       +                                c = getchar(ts);
       +                                if(c < 0)
       +                                        goto eob_done;
       +                                if(c < 256 && isalpha(c))
       +                                        goto attrloop_continue;
       +                                if(c == '<') {
       +                                        if(warn)
       +                                                fprint(2, "warning: unclosed tag\n");
       +                                        ungetchar(ts, 60);
       +                                        goto attrloop_done;
       +                                }
       +                                if(c == '>')
       +                                        goto attrloop_done;
       +                        }
       +                }
       +                // gather attribute name
       +                buf[0] = c;
       +                i = 1;
       +                while(1) {
       +                        c = getchar(ts);
       +                        if(c < 0)
       +                                goto eob_done;
       +                        if(!ISNAMCHAR(c))
       +                                break;
       +                        if(i < BIGBUFSIZE-1)
       +                                buf[i++] = c;
       +                }
       +                afnd = _lookup(attrtable, Numattrs, buf, i, &attid);
       +                if(warn && !afnd) {
       +                        buf[i] = 0;
       +                        fprint(2, "warning: unknown attribute name %S\n", buf);
       +                }
       +                // skip whitespace
       +                while(c < 256 && isspace(c)) {
       +                        c = getchar(ts);
       +                        if(c < 0)
       +                                goto eob_done;
       +                }
       +                if(c != '=') {
       +                        if(afnd)
       +                                al = newattr(attid, nil, al);
       +                        goto attrloop_continue;
       +                }
       +                //# c is '=' here;  skip whitespace
       +                while(1) {
       +                        c = getchar(ts);
       +                        if(c < 0)
       +                                goto eob_done;
       +                        if(c >= 256 || !isspace(c))
       +                                break;
       +                }
       +                quote = 0;
       +                if(c == '\'' || c == '"') {
       +                        quote = c;
       +                        c = getchar(ts);
       +                        if(c < 0)
       +                                goto eob_done;
       +                }
       +                val = nil;
       +                nv = 0;
       +                while(1) {
       +valloop_continue:
       +                        if(c < 0)
       +                                goto eob_done;
       +                        if(c == '>') {
       +                                if(quote) {
       +                                        // c might be part of string (though not good style)
       +                                        // but if line ends before close quote, assume
       +                                        // there was an unmatched quote
       +                                        ti = ts->i;
       +                                        while(1) {
       +                                                c = getchar(ts);
       +                                                if(c < 0)
       +                                                        goto eob_done;
       +                                                if(c == quote) {
       +                                                        backup(ts, ti);
       +                                                        buf[nv++] = '>';
       +                                                        if(nv == BIGBUFSIZE-1) {
       +                                                                val = buftostr(val, buf, nv);
       +                                                                nv = 0;
       +                                                        }
       +                                                        c = getchar(ts);
       +                                                        goto valloop_continue;
       +                                                }
       +                                                if(c == '\n') {
       +                                                        if(warn)
       +                                                                fprint(2, "warning: apparent unmatched quote\n");
       +                                                        backup(ts, ti);
       +                                                        c = '>';
       +                                                        goto valloop_done;
       +                                                }
       +                                        }
       +                                }
       +                                else
       +                                        goto valloop_done;
       +                        }
       +                        if(quote) {
       +                                if(c == quote) {
       +                                        c = getchar(ts);
       +                                        if(c < 0)
       +                                                goto eob_done;
       +                                        goto valloop_done;
       +                                }
       +                                if(c == '\r') {
       +                                        c = getchar(ts);
       +                                        goto valloop_continue;
       +                                }
       +                                if(c == '\t' || c == '\n')
       +                                        c = ' ';
       +                        }
       +                        else {
       +                                if(c < 256 && isspace(c))
       +                                        goto valloop_done;
       +                        }
       +                        if(c == '&') {
       +                                c = ampersand(ts);
       +                                if(c == -1)
       +                                        goto eob_done;
       +                        }
       +                        buf[nv++] = c;
       +                        if(nv == BIGBUFSIZE-1) {
       +                                val = buftostr(val, buf, nv);
       +                                nv = 0;
       +                        }
       +                        c = getchar(ts);
       +                }
       +valloop_done:
       +                if(afnd) {
       +                        val = buftostr(val, buf, nv);
       +                        al = newattr(attid, val, al);
       +                }
       +        }
       +
       +attrloop_done:
       +        tok->attr = al;
       +        (*pai)++;
       +        return tok->tag;
       +
       +eob_done:
       +        if(warn)
       +                fprint(2, "warning: incomplete tag at end of page\n");
       +        backup(ts, nexti);
       +        tok->tag = Data;
       +        tok->text = _Strdup(L(Llt));
       +        return Data;
       +}
       +
       +// We've just read a '<!' at position starti,
       +// so this may be a comment or other ignored section, or it may
       +// be just a literal string if there is no close before end of file
       +// (other browsers do that).
       +// The accepted practice seems to be (note: contrary to SGML spec!):
       +// If see <!--, look for --> to close, or if none, > to close.
       +// If see <!(not --), look for > to close.
       +// If no close before end of file, leave original characters in as literal data.
       +//
       +// If we see ignorable stuff, return Comment.
       +// Else return nil (caller should back up and try again when more data arrives,
       +// unless at end of file, in which case caller should just make '<' a data token).
       +static int
       +comment(TokenSource* ts)
       +{
       +        int        nexti;
       +        int        havecomment;
       +        int        c;
       +
       +        nexti = ts->i;
       +        havecomment = 0;
       +        c = getchar(ts);
       +        if(c == '-') {
       +                c = getchar(ts);
       +                if(c == '-') {
       +                        if(findstr(ts, L(Larrow)))
       +                                havecomment = 1;
       +                        else
       +                                backup(ts, nexti);
       +                }
       +        }
       +        if(!havecomment) {
       +                if(c == '>')
       +                        havecomment = 1;
       +                else if(c >= 0) {
       +                        if(findstr(ts, L(Lgt)))
       +                                havecomment = 1;
       +                }
       +        }
       +        if(havecomment)
       +                return Comment;
       +        return -1;
       +}
       +
       +// Look for string s in token source.
       +// If found, return 1, with buffer at next char after s,
       +// else return 0 (caller should back up).
       +static int
       +findstr(TokenSource* ts, Rune* s)
       +{
       +        int        c0;
       +        int        n;
       +        int        nexti;
       +        int        i;
       +        int        c;
       +
       +        c0 = s[0];
       +        n = runestrlen(s);
       +        while(1) {
       +                c = getchar(ts);
       +                if(c < 0)
       +                        break;
       +                if(c == c0) {
       +                        if(n == 1)
       +                                return 1;
       +                        nexti = ts->i;
       +                        for(i = 1; i < n; i++) {
       +                                c = getchar(ts);
       +                                if(c < 0)
       +                                        goto mainloop_done;
       +                                if(c != s[i])
       +                                        break;
       +                        }
       +                        if(i == n)
       +                                return 1;
       +                        backup(ts, nexti);
       +                }
       +        }
       +mainloop_done:
       +        return 0;
       +}
       +
       +// We've just read an '&'; look for an entity reference
       +// name, and if found, return translated char.
       +// if there is a complete entity name but it isn't known,
       +// try prefixes (gets around some buggy HTML out there),
       +// and if that fails, back up to just past the '&' and return '&'.
       +// If the entity can't be completed in the current buffer, back up
       +// to the '&' and return -1.
       +static int
       +ampersand(TokenSource* ts)
       +{
       +        int        savei;
       +        int        c;
       +        int        fnd;
       +        int        ans;
       +        int        v;
       +        int        i;
       +        int        k;
       +        Rune        buf[SMALLBUFSIZE];
       +
       +        savei = ts->i;
       +        c = getchar(ts);
       +        fnd = 0;
       +        ans = -1;
       +        if(c == '#') {
       +                c = getchar(ts);
       +                v = 0;
       +                while(c >= 0) {
       +                        if(!(c < 256 && isdigit(c)))
       +                                break;
       +                        v = v*10 + c - 48;
       +                        c = getchar(ts);
       +                }
       +                if(c >= 0) {
       +                        if(!(c == ';' || c == '\n' || c == '\r'))
       +                                ungetchar(ts, c);
       +                        c = v;
       +                        if(c == 160)
       +                                c = 160;
       +                        if(c >= Winstart && c <= Winend) {
       +                                c = winchars[c - Winstart];
       +                        }
       +                        ans = c;
       +                        fnd = 1;
       +                }
       +        }
       +        else if(c < 256 && isalpha(c)) {
       +                buf[0] = c;
       +                k = 1;
       +                while(1) {
       +                        c = getchar(ts);
       +                        if(c < 0)
       +                                break;
       +                        if(ISNAMCHAR(c)) {
       +                                if(k < SMALLBUFSIZE-1)
       +                                        buf[k++] = c;
       +                        }
       +                        else {
       +                                if(!(c == ';' || c == '\n' || c == '\r'))
       +                                        ungetchar(ts, c);
       +                                break;
       +                        }
       +                }
       +                if(c >= 0) {
       +                        fnd = _lookup(chartab, NCHARTAB, buf, k, &ans);
       +                        if(!fnd) {
       +                                // Try prefixes of s
       +                                if(c == ';' || c == '\n' || c == '\r')
       +                                        ungetchar(ts, c);
       +                                i = k;
       +                                while(--k > 0) {
       +                                        fnd = _lookup(chartab, NCHARTAB, buf, k, &ans);
       +                                        if(fnd) {
       +                                                while(i > k) {
       +                                                        i--;
       +                                                        ungetchar(ts, buf[i]);
       +                                                }
       +                                                break;
       +                                        }
       +                                }
       +                        }
       +                }
       +        }
       +        if(!fnd) {
       +                backup(ts, savei);
       +                ans = '&';
       +        }
       +        return ans;
       +}
       +
       +// Get next char, obeying ts.chset.
       +// Returns -1 if no complete character left before current end of data.
       +static int
       +getchar(TokenSource* ts)
       +{
       +        uchar*        buf;
       +        int        c;
       +        int        n;
       +        int        ok;
       +        Rune        r;
       +
       +        if(ts->i >= ts->edata)
       +                return -1;
       +        buf = ts->data;
       +        c = buf[ts->i];
       +        switch(ts->chset) {
       +        case ISO_8859_1:
       +                if(c >= Winstart && c <= Winend)
       +                        c = winchars[c - Winstart];
       +                ts->i++;
       +                break;
       +        case US_Ascii:
       +                if(c > 127) {
       +                        if(warn)
       +                                fprint(2, "non-ascii char (%x) when US-ASCII specified\n", c);
       +                }
       +                ts->i++;
       +                break;
       +        case UTF_8:
       +                ok = fullrune((char*)(buf+ts->i), ts->edata-ts->i);
       +                n = chartorune(&r, (char*)(buf+ts->i));
       +                if(ok) {
       +                        if(warn && c == 0x80)
       +                                fprint(2, "warning: invalid utf-8 sequence (starts with %x)\n", ts->data[ts->i]);
       +                        ts->i += n;
       +                        c = r;
       +                }
       +                else {
       +                        // not enough bytes in buf to complete utf-8 char
       +                        ts->i = ts->edata;        // mark "all used"
       +                        c = -1;
       +                }
       +                break;
       +        case Unicode:
       +                if(ts->i < ts->edata - 1) {
       +                        //standards say most-significant byte first
       +                        c = (c << 8)|(buf[ts->i + 1]);
       +                        ts->i += 2;
       +                }
       +                else {
       +                        ts->i = ts->edata;        // mark "all used"
       +                        c = -1;
       +                }
       +                break;
       +        }
       +        return c;
       +}
       +
       +// Assuming c was the last character returned by getchar, set
       +// things up so that next getchar will get that same character
       +// followed by the current 'next character', etc.
       +static void
       +ungetchar(TokenSource* ts, int c)
       +{
       +        int        n;
       +        Rune        r;
       +        char        a[UTFmax];
       +
       +        n = 1;
       +        switch(ts->chset) {
       +        case UTF_8:
       +                if(c >= 128) {
       +                        r = c;
       +                        n = runetochar(a, &r);
       +                }
       +                break;
       +        case Unicode:
       +                n = 2;
       +                break;
       +        }
       +        ts->i -= n;
       +}
       +
       +// Restore ts so that it is at the state where the index was savei.
       +static void
       +backup(TokenSource* ts, int savei)
       +{
       +        if(dbglex)
       +                fprint(2, "lex: backup; i=%d, savei=%d\n", ts->i, savei);
       +        ts->i = savei;
       +}
       +
       +
       +// Look for value associated with attribute attid in token t.
       +// If there is one, return 1 and put the value in *pans,
       +// else return 0.
       +// If xfer is true, transfer ownership of the string to the caller
       +// (nil it out here); otherwise, caller must duplicate the answer
       +// if it needs to save it.
       +// OK to have pans==0, in which case this is just looking
       +// to see if token is present.
       +int
       +_tokaval(Token* t, int attid, Rune** pans, int xfer)
       +{
       +        Attr*        attr;
       +
       +        attr = t->attr;
       +        while(attr != nil) {
       +                if(attr->attid == attid) {
       +                        if(pans != nil)
       +                                *pans = attr->value;
       +                        if(xfer)
       +                                attr->value = nil;
       +                        return 1;
       +                }
       +                attr = attr->next;
       +        }
       +        if(pans != nil)
       +                *pans = nil;
       +        return 0;
       +}
       +
       +static int
       +Tconv(Fmt *f)
       +{
       +        Token*        t;
       +        int        i;
       +        int        tag;
       +        char*        srbra;
       +        Rune*        aname;
       +        Rune*        tname;
       +        Attr*        a;
       +        char        buf[BIGBUFSIZE];
       +
       +        t = va_arg(f->args, Token*);
       +        if(t == nil)
       +                sprint(buf, "<null>");
       +        else {
       +                i = 0;
       +                if(dbglex > 1)
       +                        i = snprint(buf, sizeof(buf), "[%d]", t->starti);
       +                tag = t->tag;
       +                if(tag == Data) {
       +                        i += snprint(buf+i, sizeof(buf)-i-1, "'%S'", t->text);
       +                }
       +                else {
       +                        srbra = "";
       +                        if(tag >= RBRA) {
       +                                tag -= RBRA;
       +                                srbra = "/";
       +                        }
       +                        tname = tagnames[tag];
       +                        if(tag == Notfound)
       +                                tname = L(Lquestion);
       +                        i += snprint(buf+i, sizeof(buf)-i-1, "<%s%S", srbra, tname);
       +                        for(a = t->attr; a != nil; a = a->next) {
       +                                aname = attrnames[a->attid];
       +                                i += snprint(buf+i, sizeof(buf)-i-1, " %S", aname);
       +                                if(a->value != nil)
       +                                        i += snprint(buf+i, sizeof(buf)-i-1, "=%S", a->value);
       +                        }
       +                        i += snprint(buf+i, sizeof(buf)-i-1, ">");
       +                }
       +                buf[i] = 0;
       +        }
       +        return fmtstrcpy(f, buf);
       +}
       +
       +// Attrs own their constituent strings, but build may eventually
       +// transfer some values to its items and nil them out in the Attr.
       +static Attr*
       +newattr(int attid, Rune* value, Attr* link)
       +{
       +        Attr* ans;
       +
       +        ans = (Attr*)emalloc(sizeof(Attr));
       +        ans->attid = attid;
       +        ans->value = value;
       +        ans->next = link;
       +        return ans;
       +}
       +
       +// Free list of Attrs linked through next field
       +static void
       +freeattrs(Attr* ahead)
       +{
       +        Attr* a;
       +        Attr* nexta;
       +
       +        a = ahead;
       +        while(a != nil) {
       +                nexta = a->next;
       +                free(a->value);
       +                free(a);
       +                a = nexta;
       +        }
       +}
       +
       +// Free array of Tokens.
       +// Allocated space might have room for more than n tokens,
       +// but only n of them are initialized.
       +// If caller has transferred ownership of constitutent strings
       +// or attributes, it must have nil'd out the pointers in the Tokens.
       +void
       +_freetokens(Token* tarray, int n)
       +{
       +        int i;
       +        Token* t;
       +
       +        if(tarray == nil)
       +                return;
       +        for(i = 0; i < n; i++) {
       +                t = &tarray[i];
       +                free(t->text);
       +                freeattrs(t->attr);
       +        }
       +        free(tarray);
       +}
 (DIR) diff --git a/src/libhtml/mkfile b/src/libhtml/mkfile
       t@@ -0,0 +1,22 @@
       +<$SYS9/$systype/$objtype/mkfile
       +
       +LIB=$LIB9/libhtml.a
       +
       +OFILES=\
       +        build.$O\
       +        lex.$O\
       +        strinttab.$O\
       +        utils.$O\
       +        runetab.$O\
       +
       +HFILES=\
       +        $SYS9/sys/include/html.h\
       +        impl.h\
       +
       +UPDATE=\
       +        mkfile\
       +        $HFILES\
       +        ${OFILES:%.$O=%.c}\
       +        ${LIB:$SYS9/$systype/$objtype/%=$SYS9/$systype/386/%}\
       +
       +<$SYS9/sys/src/cmd/mksyslib
 (DIR) diff --git a/src/libhtml/runetab.c b/src/libhtml/runetab.c
       t@@ -0,0 +1,83 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <draw.h>
       +#include <html.h>
       +#include "impl.h"
       +
       +Rune **runeconsttab;
       +char *_runeconsttab[] = {
       +        "        ",
       +        " ",
       +        "",
       +        "#",
       +        "+",
       +        ", ",
       +        "-",
       +        "-->",
       +        "1",
       +        "<",
       +        ">",
       +        "?",
       +        "Index search terms:",
       +        "Reset",
       +        "Submit",
       +        "^0-9",
       +        "_ISINDEX_",
       +        "_blank",
       +        "_fr",
       +        "_no_name_submit_",
       +        "_parent",
       +        "_self",
       +        "_top",
       +        "application/x-www-form-urlencoded",
       +        "circle",
       +        "cm",
       +        "content-script-type",
       +        "disc",
       +        "em",
       +        "in",
       +        "javascript",
       +        "jscript",
       +        "jscript1.1",
       +        "mm",
       +        "none",
       +        "pi",
       +        "pt",
       +        "refresh",
       +        "select",
       +        "square",
       +        "textarea",
       +};
       +
       +Rune**
       +cvtstringtab(char **tab, int n)
       +{
       +        int i;
       +        Rune **rtab;
       +
       +        rtab = emalloc(n*sizeof(rtab[0]));
       +        for(i=0; i<n; i++)
       +                rtab[i] = toStr(tab[i], strlen(tab[i]), US_Ascii);
       +        return rtab;
       +}
       +
       +StringInt*
       +cvtstringinttab(AsciiInt *tab, int n)
       +{
       +        int i;
       +        StringInt *stab;
       +
       +        stab = emalloc(n*sizeof(stab[0]));
       +        for(i=0; i<n; i++){
       +                stab[i].key = toStr(tab[i].key, strlen(tab[i].key), US_Ascii);
       +                stab[i].val = tab[i].val;
       +        }
       +        return stab;
       +}
       +
       +void
       +runetabinit(void)
       +{
       +        runeconsttab = cvtstringtab(_runeconsttab, nelem(_runeconsttab));
       +        return;
       +}
 (DIR) diff --git a/src/libhtml/runetab.h b/src/libhtml/runetab.h
       t@@ -0,0 +1,59 @@
       +typedef struct AsciiInt AsciiInt;
       +
       +struct AsciiInt {
       +        char*        key;
       +        int        val;
       +};
       +
       +enum {
       +        Ltab2space,
       +        Lspace,
       +        Lempty,
       +        Lhash,
       +        Lplus,
       +        Lcommaspace,
       +        Lminus,
       +        Larrow,
       +        Lone,
       +        Llt,
       +        Lgt,
       +        Lquestion,
       +        Lindex,
       +        Lreset,
       +        Lsubmit,
       +        Lnot0to9,
       +        Lisindex,
       +        L_blank,
       +        Lfr,
       +        Lnoname,
       +        L_parent,
       +        L_self,
       +        L_top,
       +        Lappl_form,
       +        Lcircle,
       +        Lcm,
       +        Lcontent,
       +        Ldisc,
       +        Lem,
       +        Lin,
       +        Ljavascript,
       +        Ljscript,
       +        Ljscript1,
       +        Lmm,
       +        Lnone,
       +        Lpi,
       +        Lpt,
       +        Lrefresh,
       +        Lselect,
       +        Lsquare,
       +        Ltextarea,
       +};
       +
       +#define L(x)        runeconsttab[(x)]
       +
       +extern        Rune        **runeconsttab;
       +
       +/* XXX: for unix port only */
       +Rune                **cvtstringtab(char**, int);
       +StringInt        *cvtstringinttab(AsciiInt*, int);
       +void                runetabinit(void);
 (DIR) diff --git a/src/libhtml/strinttab.c b/src/libhtml/strinttab.c
       t@@ -0,0 +1,64 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <draw.h>
       +#include <html.h>
       +#include "impl.h"
       +
       +// Do case-insensitive lookup of key[0:keylen] in t[0:n] (key part),
       +// returning 1 if found, 0 if not.
       +// Array t must be sorted in increasing lexicographic order of key.
       +// If found, return corresponding val in *pans.
       +int
       +_lookup(StringInt* t, int n, Rune* key, int keylen, int* pans)
       +{
       +        int        min;
       +        int        max;
       +        int        try;
       +        int        cmpresult;
       +
       +        min = 0;
       +        max = n - 1;
       +        while(min <= max) {
       +                try = (min + max)/2;
       +                cmpresult = _Strncmpci(key, keylen, t[try].key);
       +                if(cmpresult > 0)
       +                        min = try + 1;
       +                else if(cmpresult < 0)
       +                        max = try - 1;
       +                else {
       +                        *pans = t[try].val;
       +                        return 1;
       +                }
       +        }
       +        return 0;
       +}
       +
       +// Return first key in t[0:n] that corresponds to val,
       +// nil if none.
       +Rune*
       +_revlookup(StringInt* t, int n, int val)
       +{
       +        int        i;
       +
       +        for(i = 0; i < n; i++)
       +                if(t[i].val == val)
       +                        return t[i].key;
       +        return nil;
       +}
       +
       +// Make a StringInt table out of a[0:n], mapping each string
       +// to its index.  Check that entries are in alphabetical order.
       +StringInt*
       +_makestrinttab(Rune** a, int n)
       +{
       +        StringInt*        ans;
       +        int        i;
       +
       +        ans = (StringInt*)emalloc(n * sizeof(StringInt));
       +        for(i = 0; i < n; i++) {
       +                ans[i].key = a[i];
       +                ans[i].val = i;
       +                assert(i == 0 || runestrcmp(a[i], a[i - 1]) >= 0);
       +        }
       +        return ans;
       +}
 (DIR) diff --git a/src/libhtml/utils.c b/src/libhtml/utils.c
       t@@ -0,0 +1,591 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <draw.h>
       +#include <html.h>
       +#include "impl.h"
       +
       +Rune whitespace[] = { ' ', '\t', '\n', '\r', '\0' };
       +Rune notwhitespace[] = { '^', ' ', '\t', '\n', '\r' , '\0'};
       +
       +// All lists start out like List structure.
       +// List itself can be used as list of int.
       +int
       +_listlen(List* l)
       +{
       +        int n = 0;
       +
       +        while(l != nil) {
       +                l = l->next;
       +                n++;
       +        }
       +        return n;
       +}
       +
       +// Cons
       +List*
       +_newlist(int val, List* rest)
       +{
       +        List* ans;
       +
       +        ans = (List*)emalloc(sizeof(List));
       +        ans->val = val;
       +        ans->next = rest;
       +        return ans;
       +}
       +
       +// Reverse a list in place
       +List*
       +_revlist(List* l)
       +{
       +        List* newl;
       +        List* nextl;
       +
       +        newl = nil;
       +        while(l != nil) {
       +                nextl = l->next;
       +                l->next = newl;
       +                newl = l;
       +                l = nextl;
       +        }
       +        return newl;
       +}
       +
       +// The next few routines take a "character class" as argument.
       +//    e.g., "a-zA-Z", or "^ \t\n"
       +// (ranges indicated by - except in first position;
       +//  ^ is first position means "not in" the following class)
       +
       +// Splitl splits s[0:n] just before first character of class cl.
       +// Answers go in (p1, n1) and (p2, n2).
       +// If no split, the whole thing goes in the first component.
       +// Note: answers contain pointers into original string.
       +void
       +_splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
       +{
       +        Rune* p;
       +
       +        p = _Strnclass(s, cl, n);
       +        *p1 = s;
       +        if(p == nil) {
       +                *n1 = n;
       +                *p2 = nil;
       +                *n2 = 0;
       +        }
       +        else {
       +                *p2 = p;
       +                *n1 = p-s;
       +                *n2 = n-*n1;
       +        }
       +}
       +
       +// Splitr splits s[0:n] just after last character of class cl.
       +// Answers go in (p1, n1) and (p2, n2).
       +// If no split, the whole thing goes in the last component.
       +// Note: answers contain pointers into original string.
       +void
       +_splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
       +{
       +        Rune* p;
       +
       +        p = _Strnrclass(s, cl, n);
       +        if(p == nil) {
       +                *p1 = nil;
       +                *n1 = 0;
       +                *p2 = s;
       +                *n2 = n;
       +        }
       +        else {
       +                *p1 = s;
       +                *p2 = p+1;
       +                *n1 = *p2-s;
       +                *n2 = n-*n1;
       +        }
       +}
       +
       +// Splitall splits s[0:n] into parts that are separated by characters from class cl.
       +// Each part will have nonzero length.
       +// At most alen parts are found, and pointers to their starts go into
       +// the strarr array, while their lengths go into the lenarr array.
       +// The return value is the number of parts found.
       +int
       +_splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
       +{
       +        int i;
       +        Rune* p;
       +        Rune* q;
       +        Rune* slast;
       +
       +        if(s == nil || n == 0)
       +                return 0;
       +        i = 0;
       +        p = s;
       +        slast = s+n;
       +        while(p < slast && i < alen) {
       +                while(p < slast && _inclass(*p, cl))
       +                        p++;
       +                if(p == slast)
       +                        break;
       +                q = _Strnclass(p, cl, slast-p);
       +                if(q == nil)
       +                        q = slast;
       +                assert(q > p && q <= slast);
       +                strarr[i] = p;
       +                lenarr[i] = q-p;
       +                i++;
       +                p = q;
       +        }
       +        return i;
       +}
       +
       +// Find part of s that excludes leading and trailing whitespace,
       +// and return that part in *pans (and its length in *panslen).
       +void
       +_trimwhite(Rune* s, int n, Rune** pans, int* panslen)
       +{
       +        Rune* p;
       +        Rune* q;
       +
       +        p = nil;
       +        if(n > 0) {
       +                p = _Strnclass(s, notwhitespace, n);
       +                if(p != nil) {
       +                        q = _Strnrclass(s, notwhitespace, n);
       +                        assert(q != nil);
       +                        n = q+1-p;
       +                }
       +        }
       +        *pans = p;
       +        *panslen = n;
       +}
       +
       +// _Strclass returns a pointer to the first element of s that is
       +// a member of class cl, nil if none.
       +Rune*
       +_Strclass(Rune* s, Rune* cl)
       +{
       +        Rune* p;
       +
       +        for(p = s; *p != 0; p++)
       +                if(_inclass(*p, cl))
       +                        return p;
       +        return nil;
       +}
       +
       +// _Strnclass returns a pointer to the first element of s[0:n] that is
       +// a member of class cl, nil if none.
       +Rune*
       +_Strnclass(Rune* s, Rune* cl, int n)
       +{
       +        Rune* p;
       +
       +        for(p = s; n-- && *p != 0; p++)
       +                if(_inclass(*p, cl))
       +                        return p;
       +        return nil;
       +}
       +
       +// _Strrclass returns a pointer to the last element of s that is
       +// a member of class cl, nil if none
       +Rune*
       +_Strrclass(Rune* s, Rune* cl)
       +{
       +        Rune* p;
       +
       +        if(s == nil || *s == 0)
       +                return nil;
       +        p = s + runestrlen(s) - 1;
       +        while(p >= s) {
       +                if(_inclass(*p, cl))
       +                        return p;
       +                p--;
       +        };
       +        return nil;
       +}
       +
       +// _Strnrclass returns a pointer to the last element of s[0:n] that is
       +// a member of class cl, nil if none
       +Rune*
       +_Strnrclass(Rune* s, Rune* cl, int n)
       +{
       +        Rune* p;
       +
       +        if(s == nil || *s == 0 || n == 0)
       +                return nil;
       +        p = s + n - 1;
       +        while(p >= s) {
       +                if(_inclass(*p, cl))
       +                        return p;
       +                p--;
       +        };
       +        return nil;
       +}
       +
       +// Is c in the class cl?
       +int
       +_inclass(Rune c, Rune* cl)
       +{
       +        int        n;
       +        int        ans;
       +        int        negate;
       +        int        i;
       +
       +        n = _Strlen(cl);
       +        if(n == 0)
       +                return 0;
       +        ans = 0;
       +        negate = 0;
       +        if(cl[0] == '^') {
       +                negate = 1;
       +                cl++;
       +                n--;
       +        }
       +        for(i = 0; i < n; i++) {
       +                if(cl[i] == '-' && i > 0 && i < n - 1) {
       +                        if(c >= cl[i - 1] && c <= cl[i + 1]) {
       +                                ans = 1;
       +                                break;
       +                        }
       +                        i++;
       +                }
       +                else if(c == cl[i]) {
       +                        ans = 1;
       +                        break;
       +                }
       +        }
       +        if(negate)
       +                ans = !ans;
       +        return ans;
       +}
       +
       +// Is pre a prefix of s?
       +int
       +_prefix(Rune* pre, Rune* s)
       +{
       +        int        ns;
       +        int        n;
       +        int        k;
       +
       +        ns = _Strlen(s);
       +        n = _Strlen(pre);
       +        if(ns < n)
       +                return 0;
       +        for(k = 0; k < n; k++) {
       +                if(pre[k] != s[k])
       +                        return 0;
       +        }
       +        return 1;
       +}
       +
       +// Number of runes in (null-terminated) s
       +int
       +_Strlen(Rune* s)
       +{
       +        if(s == nil)
       +                return 0;
       +        return runestrlen(s);
       +}
       +
       +// -1, 0, 1 as s1 is lexicographically less, equal greater than s2
       +int
       +_Strcmp(Rune *s1, Rune *s2)
       +{
       +        if(s1 == nil)
       +                return (s2 == nil || *s2 == 0) ? 0 : -1;
       +        if(s2 == nil)
       +                return (*s1 == 0) ? 0 : 1;
       +        return runestrcmp(s1, s2);
       +}
       +
       +// Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars).
       +// Also, do a case-insensitive match, assuming s2
       +// has no chars in [A-Z], only their lowercase versions.
       +// (This routine is used for in-place keyword lookup, where s2 is in a keyword
       +// list and s1 is some substring, possibly mixed-case, in a buffer.)
       +int
       +_Strncmpci(Rune *s1, int n1, Rune *s2)
       +{
       +        Rune c1, c2;
       +
       +        for(;;) {
       +                if(n1-- == 0) {
       +                        if(*s2 == 0)
       +                                return 0;
       +                        return -1;
       +                }
       +                c1 = *s1++;
       +                c2 = *s2++;
       +                if(c1 >= 'A' && c1 <= 'Z')
       +                        c1 = c1 - 'A' + 'a';
       +                if(c1 != c2) {
       +                        if(c1 > c2)
       +                                return 1;
       +                        return -1;
       +                }
       +        }
       +}
       +
       +// emalloc and copy
       +Rune*
       +_Strdup(Rune* s)
       +{
       +        if(s == nil)
       +                return nil;
       +        return _Strndup(s, runestrlen(s));
       +}
       +
       +// emalloc and copy n chars of s (assume s is at least that long),
       +// and add 0 terminator.
       +// Return nil if n==0.
       +Rune*
       +_Strndup(Rune* s, int n)
       +{
       +        Rune* ans;
       +
       +        if(n <= 0)
       +                return nil;
       +        ans = _newstr(n);
       +        memmove(ans, s, n*sizeof(Rune));
       +        ans[n] = 0;
       +        return ans;
       +}
       +// emalloc enough room for n Runes, plus 1 null terminator.
       +// (Not initialized to anything.)
       +Rune*
       +_newstr(int n)
       +{
       +        return (Rune*)emalloc((n+1)*sizeof(Rune));
       +}
       +
       +// emalloc and copy s+t
       +Rune*
       +_Strdup2(Rune* s, Rune* t)
       +{
       +        int ns, nt;
       +        Rune* ans;
       +        Rune* p;
       +
       +        ns = _Strlen(s);
       +        nt = _Strlen(t);
       +        if(ns+nt == 0)
       +                return nil;
       +        ans = _newstr(ns+nt);
       +        p = _Stradd(ans, s, ns);
       +        p = _Stradd(p, t, nt);
       +        *p = 0;
       +        return ans;
       +}
       +
       +// Return emalloc'd substring s[start:stop],
       +Rune*
       +_Strsubstr(Rune* s, int start, int stop)
       +{
       +        Rune* t;
       +
       +        if(start == stop)
       +                return nil;
       +        t = _Strndup(s+start, stop-start);
       +        return t;
       +}
       +
       +// Copy n chars to s1 from s2, and return s1+n
       +Rune*
       +_Stradd(Rune* s1, Rune* s2, int n)
       +{
       +        if(n == 0)
       +                return s1;
       +        memmove(s1, s2, n*sizeof(Rune));
       +        return s1+n;
       +}
       +
       +// Like strtol, but converting from Rune* string
       +
       +//#define LONG_MAX        2147483647L
       +//#define LONG_MIN        -2147483648L
       +
       +long
       +_Strtol(Rune* nptr, Rune** endptr, int base)
       +{
       +        Rune* p;
       +        long n, nn;
       +        int c, ovfl, v, neg, ndig;
       +
       +        p = nptr;
       +        neg = 0;
       +        n = 0;
       +        ndig = 0;
       +        ovfl = 0;
       +
       +        /*
       +         * White space
       +         */
       +        for(;;p++){
       +                switch(*p){
       +                case ' ':
       +                case '\t':
       +                case '\n':
       +                case '\f':
       +                case '\r':
       +                case '\v':
       +                        continue;
       +                }
       +                break;
       +        }
       +
       +        /*
       +         * Sign
       +         */
       +        if(*p=='-' || *p=='+')
       +                if(*p++ == '-')
       +                        neg = 1;
       +
       +        /*
       +         * Base
       +         */
       +        if(base==0){
       +                if(*p != '0')
       +                        base = 10;
       +                else{
       +                        base = 8;
       +                        if(p[1]=='x' || p[1]=='X'){
       +                                p += 2;
       +                                base = 16;
       +                        }
       +                }
       +        }else if(base==16 && *p=='0'){
       +                if(p[1]=='x' || p[1]=='X')
       +                        p += 2;
       +        }else if(base<0 || 36<base)
       +                goto Return;
       +
       +        /*
       +         * Non-empty sequence of digits
       +         */
       +        for(;; p++,ndig++){
       +                c = *p;
       +                v = base;
       +                if('0'<=c && c<='9')
       +                        v = c - '0';
       +                else if('a'<=c && c<='z')
       +                        v = c - 'a' + 10;
       +                else if('A'<=c && c<='Z')
       +                        v = c - 'A' + 10;
       +                if(v >= base)
       +                        break;
       +                nn = n*base + v;
       +                if(nn < n)
       +                        ovfl = 1;
       +                n = nn;
       +        }
       +
       +    Return:
       +        if(ndig == 0)
       +                p = nptr;
       +        if(endptr)
       +                *endptr = p;
       +        if(ovfl){
       +                if(neg)
       +                        return LONG_MIN;
       +                return LONG_MAX;
       +        }
       +        if(neg)
       +                return -n;
       +        return n;
       +}
       +
       +// Convert buf[0:n], bytes whose character set is chset,
       +// into a emalloc'd null-terminated Unicode string.
       +Rune*
       +toStr(uchar* buf, int n, int chset)
       +{
       +        int i;
       +        int m;
       +        Rune ch;
       +        Rune* ans;
       +
       +        switch(chset) {
       +        case US_Ascii:
       +        case ISO_8859_1:
       +                ans = (Rune*)emalloc((n+1)*sizeof(Rune));
       +                for(i = 0; i < n; i++)
       +                        ans[i] = buf[i];
       +                ans[n] = 0;
       +                break;
       +
       +        case UTF_8:
       +                m = 0;
       +                for(i = 0; i < n; ) {
       +                        i += chartorune(&ch, (char*)(buf+i));
       +                        m++;
       +                }
       +                ans = (Rune*)emalloc((m+1)*sizeof(Rune));
       +                m = 0;
       +                for(i = 0; i < n; ) {
       +                        i += chartorune(&ch, (char*)(buf+i));
       +                        ans[m++] = ch;
       +                }
       +                ans[m] = 0;
       +                break;
       +
       +        default:
       +                ans = nil;
       +                assert(0);
       +        }
       +        return ans;
       +}
       +
       +// Convert buf[0:n], Unicode characters,
       +// into an emalloc'd null-terminated string in character set chset.
       +// Use 0x80 for unconvertable characters.
       +uchar*
       +fromStr(Rune* buf, int n, int chset)
       +{
       +        uchar* ans;
       +        int i, lim, m;
       +        Rune ch;
       +        uchar* p;
       +        uchar s[UTFmax];
       +
       +        ans = nil;
       +        switch(chset) {
       +        case US_Ascii:
       +        case ISO_8859_1:
       +                ans = (uchar*)emalloc(n+1);
       +                lim = (chset==US_Ascii)? 127 : 255;
       +                for(i = 0; i < n; i++) {
       +                        ch = buf[i];
       +                        if(ch > lim)
       +                                ch = 0x80;
       +                        ans[i] = ch;
       +                }
       +                ans[n] = 0;
       +                break;
       +
       +        case UTF_8:
       +                m = 0;
       +                for(i = 0; i < n; i++) {
       +                        m += runetochar((char*)s, &buf[i]);
       +                }
       +                ans = (uchar*)emalloc(m+1);
       +                p = ans;
       +                for(i = 0; i < n; i++)
       +                        p += runetochar((char*)p, &buf[i]);
       +                *p = 0;
       +                break;
       +
       +        default:
       +                assert(0);
       +        }
       +        return ans;
       +
       +}
       +
       +// Convert n to emalloc'd String.
       +Rune*
       +_ltoStr(int n)
       +{
       +        int m;
       +        uchar buf[20];
       +
       +        m = snprint((char*)buf, sizeof(buf), "%d", n);
       +        return toStr(buf, m, US_Ascii);
       +}