initial repo - webdump - HTML to plain-text converter for webpages
(HTM) git clone git://git.codemadness.org/webdump
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit ce2a730d81823f9fc5f1d607296bb4529e9aeef0
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Thu, 7 Sep 2023 18:25:16 +0200
initial repo
Reset development/chaotic hacking history.
Diffstat:
A LICENSE | 15 +++++++++++++++
A Makefile | 100 +++++++++++++++++++++++++++++++
A README | 98 +++++++++++++++++++++++++++++++
A arg.h | 42 +++++++++++++++++++++++++++++++
A namedentities.all.h | 2031 +++++++++++++++++++++++++++++++
A namedentities.h | 62 +++++++++++++++++++++++++++++++
A strlcat.c | 55 +++++++++++++++++++++++++++++++
A strlcpy.c | 50 +++++++++++++++++++++++++++++++
A webdump.1 | 66 +++++++++++++++++++++++++++++++
A webdump.c | 2072 +++++++++++++++++++++++++++++++
A xml.c | 489 +++++++++++++++++++++++++++++++
A xml.h | 49 +++++++++++++++++++++++++++++++
12 files changed, 5129 insertions(+), 0 deletions(-)
---
(DIR) diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,15 @@
+ISC License
+
+Copyright (c) 2017-2023 Hiltjo Posthuma <hiltjo@codemadness.org>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
(DIR) diff --git a/Makefile b/Makefile
@@ -0,0 +1,100 @@
+.POSIX:
+
+NAME = webdump
+VERSION = 0.1
+
+# paths
+PREFIX = /usr/local
+MANPREFIX = ${PREFIX}/man
+DOCPREFIX = ${PREFIX}/share/doc/${NAME}
+
+RANLIB = ranlib
+
+# use system flags.
+WEBDUMP_CFLAGS = ${CFLAGS}
+WEBDUMP_LDFLAGS = ${LDFLAGS}
+WEBDUMP_CPPFLAGS = -D_DEFAULT_SOURCE -D_XOPEN_SOURCE=700 -D_BSD_SOURCE
+
+BIN = ${NAME}
+SCRIPTS =
+
+SRC = ${BIN:=.c}
+HDR = arg.h namedentities.h namedentities.all.h xml.h
+
+LIBXML = libxml.a
+LIBXMLSRC = \
+ xml.c
+LIBXMLOBJ = ${LIBXMLSRC:.c=.o}
+
+COMPATSRC = \
+ strlcat.c\
+ strlcpy.c
+COMPATOBJ =\
+ strlcat.o\
+ strlcpy.o
+
+LIB = ${LIBXML} ${COMPATOBJ}
+
+MAN1 = ${BIN:=.1}\
+ ${SCRIPTS:=.1}
+
+DOC = \
+ LICENSE\
+ README
+
+all: ${BIN}
+
+${BIN}: ${LIB} ${@:=.o}
+
+OBJ = ${SRC:.c=.o} ${LIBXMLOBJ} ${COMPATOBJ}
+
+${OBJ}: ${HDR}
+
+.o:
+ ${CC} ${WEBDUMP_LDFLAGS} -o $@ $< ${LIB}
+
+.c.o:
+ ${CC} ${WEBDUMP_CFLAGS} ${WEBDUMP_CPPFLAGS} -o $@ -c $<
+
+${LIBXML}: ${LIBXMLOBJ}
+ ${AR} rc $@ $?
+ ${RANLIB} $@
+
+dist:
+ rm -rf "${NAME}-${VERSION}"
+ mkdir -p "${NAME}-${VERSION}"
+ cp -f ${MAN1} ${DOC} ${HDR} \
+ ${SRC} ${LIBXMLSRC} ${COMPATSRC} ${SCRIPTS} \
+ Makefile \
+ "${NAME}-${VERSION}"
+ # make tarball
+ tar -cf - "${NAME}-${VERSION}" | \
+ gzip -c > "${NAME}-${VERSION}.tar.gz"
+ rm -rf "${NAME}-${VERSION}"
+
+clean:
+ rm -f ${BIN} ${OBJ} ${LIB}
+
+install: all
+ # installing executable files and scripts.
+ mkdir -p "${DESTDIR}${PREFIX}/bin"
+ cp -f ${BIN} ${SCRIPTS} "${DESTDIR}${PREFIX}/bin"
+ for f in ${BIN} ${SCRIPTS}; do chmod 755 "${DESTDIR}${PREFIX}/bin/$$f"; done
+ # installing example files.
+ mkdir -p "${DESTDIR}${DOCPREFIX}"
+ cp -f README "${DESTDIR}${DOCPREFIX}"
+ # installing manual pages for general commands: section 1.
+ mkdir -p "${DESTDIR}${MANPREFIX}/man1"
+ cp -f ${MAN1} "${DESTDIR}${MANPREFIX}/man1"
+ for m in ${MAN1}; do chmod 644 "${DESTDIR}${MANPREFIX}/man1/$$m"; done
+
+uninstall:
+ # removing executable files and scripts.
+ for f in ${BIN} ${SCRIPTS}; do rm -f "${DESTDIR}${PREFIX}/bin/$$f"; done
+ # removing example files.
+ rm -f "${DESTDIR}${DOCPREFIX}/README"
+ -rmdir "${DESTDIR}${DOCPREFIX}"
+ # removing manual pages.
+ for m in ${MAN1}; do rm -f "${DESTDIR}${MANPREFIX}/man1/$$m"; done
+
+.PHONY: all clean dist install uninstall
(DIR) diff --git a/README b/README
@@ -0,0 +1,98 @@
+!!!
+
+NOTE! work-in-progress (very slowly) and experimental.
+
+This code has many dirty hacks and ugliness. Intended for my personal use only.
+Knowing this: of course feel free to use it in any way you like, see the
+LICENSE.
+
+!!!
+
+
+webdump
+-------
+
+HTML to plain-text converter tool.
+
+It reads HTML in UTF-8 from stdin and writes plain-text to stdout.
+
+
+Build and install
+-----------------
+
+$ make
+# make install
+
+
+Dependencies
+------------
+
+- C compiler.
+- libc + some BSDisms.
+
+
+Usage
+-----
+
+webdump < file.html | less -R
+
+hurl 'https://codemadness.org/' | webdump | less -R
+
+webdump -a -i -l -r -w $(tput cols) < file.html | less -R
+
+
+Goals / scope
+-------------
+
+The tool will only render HTML to stdout, similarly to links -dump or
+lynx -dump but simpler and more secure.
+
+- HTML and XHTML will be supported.
+- There will be some workarounds and quirks for broken and legacy HTML code.
+- It will be usable and secure for reading HTML from mails and RSS/Atom feeds.
+- No remote resources which are part of the HTML will be downloaded:
+ images, video, audio, etc. But these may be visible as a link reference.
+- Data will be written to stdout. Intended for plain-text or a text terminal.
+- No support for Javascript, CSS, frame rendering or forms.
+- No HTTP or network protocol handling: HTML data is read from stdin.
+
+
+Features
+--------
+
+- Support for word-wrapping.
+- A mode to enable basic markup: bold, underline, italic and blink ;)
+- Indentation of headers, paragraphs, pre and list items.
+- Basic support to query an element or hide them.
+- Show link references.
+- Show link references and resources such as img, video, audio, subtitles.
+- Export link references and resources to a TAB-separated format.
+
+
+Examples
+--------
+
+To use webdump as a HTML filter for example in mutt, change in ~/.mailcap:
+
+ text/html; /home/user/.config/scripts/mutt/viewhtml.sh %s; copiousoutput; nametemplate=%s.html
+
+The viewhtml.sh could be something like:
+
+ #!/bin/sh
+ webdump -r -l < "$1"
+
+In mutt you should then add:
+
+ auto_view text/html
+
+
+License
+-------
+
+ISC, see LICENSE file.
+
+
+Author
+------
+
+Hiltjo Posthuma <hiltjo@codemadness.org>
(DIR) diff --git a/arg.h b/arg.h
@@ -0,0 +1,42 @@
+/*
+ * Copy me if you can.
+ * by 20h
+ */
+
+#ifndef ARG_H__
+#define ARG_H__
+
+extern char *argv0;
+
+/* use main(int argc, char *argv[]) */
+#define ARGBEGIN for (argv0 = *argv, argv++, argc--;\
+ argv[0] && argv[0][0] == '-'\
+ && argv[0][1];\
+ argc--, argv++) {\
+ char argc_;\
+ char **argv_;\
+ int brk_;\
+ if (argv[0][1] == '-' && argv[0][2] == '\0') {\
+ argv++;\
+ argc--;\
+ break;\
+ }\
+ int i_;\
+ for (i_ = 1, brk_ = 0, argv_ = argv;\
+ argv[0][i_] && !brk_;\
+ i_++) {\
+ if (argv_ != argv)\
+ break;\
+ argc_ = argv[0][i_];\
+ switch (argc_)
+
+#define ARGEND }\
+ }
+
+#define EARGF(x) ((argv[0][i_+1] == '\0' && argv[1] == NULL)?\
+ ((x), abort(), (char *)0) :\
+ (brk_ = 1, (argv[0][i_+1] != '\0')?\
+ (&argv[0][i_+1]) :\
+ (argc--, argv++, argv[0])))
+
+#endif
(DIR) diff --git a/namedentities.all.h b/namedentities.all.h
@@ -0,0 +1,2031 @@
+{ "AElig;", 0x000C6 }, /* LATIN CAPITAL LETTER AE */
+{ "AMP;", 0x00026 }, /* AMPERSAND */
+{ "Aacute;", 0x000C1 }, /* LATIN CAPITAL LETTER A WITH ACUTE */
+{ "Abreve;", 0x00102 }, /* LATIN CAPITAL LETTER A WITH BREVE */
+{ "Acirc;", 0x000C2 }, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
+{ "Acy;", 0x00410 }, /* CYRILLIC CAPITAL LETTER A */
+{ "Afr;", 0x1D504 }, /* MATHEMATICAL FRAKTUR CAPITAL A */
+{ "Agrave;", 0x000C0 }, /* LATIN CAPITAL LETTER A WITH GRAVE */
+{ "Alpha;", 0x00391 }, /* GREEK CAPITAL LETTER ALPHA */
+{ "Amacr;", 0x00100 }, /* LATIN CAPITAL LETTER A WITH MACRON */
+{ "And;", 0x02A53 }, /* DOUBLE LOGICAL AND */
+{ "Aogon;", 0x00104 }, /* LATIN CAPITAL LETTER A WITH OGONEK */
+{ "Aopf;", 0x1D538 }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL A */
+{ "ApplyFunction;", 0x02061 }, /* FUNCTION APPLICATION */
+{ "Aring;", 0x000C5 }, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
+{ "Ascr;", 0x1D49C }, /* MATHEMATICAL SCRIPT CAPITAL A */
+{ "Assign;", 0x02254 }, /* COLON EQUALS */
+{ "Atilde;", 0x000C3 }, /* LATIN CAPITAL LETTER A WITH TILDE */
+{ "Auml;", 0x000C4 }, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
+{ "Backslash;", 0x02216 }, /* SET MINUS */
+{ "Barv;", 0x02AE7 }, /* SHORT DOWN TACK WITH OVERBAR */
+{ "Barwed;", 0x02306 }, /* PERSPECTIVE */
+{ "Bcy;", 0x00411 }, /* CYRILLIC CAPITAL LETTER BE */
+{ "Because;", 0x02235 }, /* BECAUSE */
+{ "Bernoullis;", 0x0212C }, /* SCRIPT CAPITAL B */
+{ "Beta;", 0x00392 }, /* GREEK CAPITAL LETTER BETA */
+{ "Bfr;", 0x1D505 }, /* MATHEMATICAL FRAKTUR CAPITAL B */
+{ "Bopf;", 0x1D539 }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL B */
+{ "Breve;", 0x002D8 }, /* BREVE */
+{ "Bscr;", 0x0212C }, /* SCRIPT CAPITAL B */
+{ "Bumpeq;", 0x0224E }, /* GEOMETRICALLY EQUIVALENT TO */
+{ "CHcy;", 0x00427 }, /* CYRILLIC CAPITAL LETTER CHE */
+{ "COPY;", 0x000A9 }, /* COPYRIGHT SIGN */
+{ "Cacute;", 0x00106 }, /* LATIN CAPITAL LETTER C WITH ACUTE */
+{ "Cap;", 0x022D2 }, /* DOUBLE INTERSECTION */
+{ "CapitalDifferentialD;", 0x02145 }, /* DOUBLE-STRUCK ITALIC CAPITAL D */
+{ "Cayleys;", 0x0212D }, /* BLACK-LETTER CAPITAL C */
+{ "Ccaron;", 0x0010C }, /* LATIN CAPITAL LETTER C WITH CARON */
+{ "Ccedil;", 0x000C7 }, /* LATIN CAPITAL LETTER C WITH CEDILLA */
+{ "Ccirc;", 0x00108 }, /* LATIN CAPITAL LETTER C WITH CIRCUMFLEX */
+{ "Cconint;", 0x02230 }, /* VOLUME INTEGRAL */
+{ "Cdot;", 0x0010A }, /* LATIN CAPITAL LETTER C WITH DOT ABOVE */
+{ "Cedilla;", 0x000B8 }, /* CEDILLA */
+{ "CenterDot;", 0x000B7 }, /* MIDDLE DOT */
+{ "Cfr;", 0x0212D }, /* BLACK-LETTER CAPITAL C */
+{ "Chi;", 0x003A7 }, /* GREEK CAPITAL LETTER CHI */
+{ "CircleDot;", 0x02299 }, /* CIRCLED DOT OPERATOR */
+{ "CircleMinus;", 0x02296 }, /* CIRCLED MINUS */
+{ "CirclePlus;", 0x02295 }, /* CIRCLED PLUS */
+{ "CircleTimes;", 0x02297 }, /* CIRCLED TIMES */
+{ "ClockwiseContourIntegral;", 0x02232 }, /* CLOCKWISE CONTOUR INTEGRAL */
+{ "CloseCurlyDoubleQuote;", 0x0201D }, /* RIGHT DOUBLE QUOTATION MARK */
+{ "CloseCurlyQuote;", 0x02019 }, /* RIGHT SINGLE QUOTATION MARK */
+{ "Colon;", 0x02237 }, /* PROPORTION */
+{ "Colone;", 0x02A74 }, /* DOUBLE COLON EQUAL */
+{ "Congruent;", 0x02261 }, /* IDENTICAL TO */
+{ "Conint;", 0x0222F }, /* SURFACE INTEGRAL */
+{ "ContourIntegral;", 0x0222E }, /* CONTOUR INTEGRAL */
+{ "Copf;", 0x02102 }, /* DOUBLE-STRUCK CAPITAL C */
+{ "Coproduct;", 0x02210 }, /* N-ARY COPRODUCT */
+{ "CounterClockwiseContourIntegral;", 0x02233 }, /* ANTICLOCKWISE CONTOUR INTEGRAL */
+{ "Cross;", 0x02A2F }, /* VECTOR OR CROSS PRODUCT */
+{ "Cscr;", 0x1D49E }, /* MATHEMATICAL SCRIPT CAPITAL C */
+{ "Cup;", 0x022D3 }, /* DOUBLE UNION */
+{ "CupCap;", 0x0224D }, /* EQUIVALENT TO */
+{ "DD;", 0x02145 }, /* DOUBLE-STRUCK ITALIC CAPITAL D */
+{ "DDotrahd;", 0x02911 }, /* RIGHTWARDS ARROW WITH DOTTED STEM */
+{ "DJcy;", 0x00402 }, /* CYRILLIC CAPITAL LETTER DJE */
+{ "DScy;", 0x00405 }, /* CYRILLIC CAPITAL LETTER DZE */
+{ "DZcy;", 0x0040F }, /* CYRILLIC CAPITAL LETTER DZHE */
+{ "Dagger;", 0x02021 }, /* DOUBLE DAGGER */
+{ "Darr;", 0x021A1 }, /* DOWNWARDS TWO HEADED ARROW */
+{ "Dashv;", 0x02AE4 }, /* VERTICAL BAR DOUBLE LEFT TURNSTILE */
+{ "Dcaron;", 0x0010E }, /* LATIN CAPITAL LETTER D WITH CARON */
+{ "Dcy;", 0x00414 }, /* CYRILLIC CAPITAL LETTER DE */
+{ "Del;", 0x02207 }, /* NABLA */
+{ "Delta;", 0x00394 }, /* GREEK CAPITAL LETTER DELTA */
+{ "Dfr;", 0x1D507 }, /* MATHEMATICAL FRAKTUR CAPITAL D */
+{ "DiacriticalAcute;", 0x000B4 }, /* ACUTE ACCENT */
+{ "DiacriticalDot;", 0x002D9 }, /* DOT ABOVE */
+{ "DiacriticalDoubleAcute;", 0x002DD }, /* DOUBLE ACUTE ACCENT */
+{ "DiacriticalGrave;", 0x00060 }, /* GRAVE ACCENT */
+{ "DiacriticalTilde;", 0x002DC }, /* SMALL TILDE */
+{ "Diamond;", 0x022C4 }, /* DIAMOND OPERATOR */
+{ "DifferentialD;", 0x02146 }, /* DOUBLE-STRUCK ITALIC SMALL D */
+{ "Dopf;", 0x1D53B }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL D */
+{ "Dot;", 0x000A8 }, /* DIAERESIS */
+{ "DotDot;", 0x020DC }, /* COMBINING FOUR DOTS ABOVE */
+{ "DotEqual;", 0x02250 }, /* APPROACHES THE LIMIT */
+{ "DoubleContourIntegral;", 0x0222F }, /* SURFACE INTEGRAL */
+{ "DoubleDot;", 0x000A8 }, /* DIAERESIS */
+{ "DoubleDownArrow;", 0x021D3 }, /* DOWNWARDS DOUBLE ARROW */
+{ "DoubleLeftArrow;", 0x021D0 }, /* LEFTWARDS DOUBLE ARROW */
+{ "DoubleLeftRightArrow;", 0x021D4 }, /* LEFT RIGHT DOUBLE ARROW */
+{ "DoubleLeftTee;", 0x02AE4 }, /* VERTICAL BAR DOUBLE LEFT TURNSTILE */
+{ "DoubleLongLeftArrow;", 0x027F8 }, /* LONG LEFTWARDS DOUBLE ARROW */
+{ "DoubleLongLeftRightArrow;", 0x027FA }, /* LONG LEFT RIGHT DOUBLE ARROW */
+{ "DoubleLongRightArrow;", 0x027F9 }, /* LONG RIGHTWARDS DOUBLE ARROW */
+{ "DoubleRightArrow;", 0x021D2 }, /* RIGHTWARDS DOUBLE ARROW */
+{ "DoubleRightTee;", 0x022A8 }, /* TRUE */
+{ "DoubleUpArrow;", 0x021D1 }, /* UPWARDS DOUBLE ARROW */
+{ "DoubleUpDownArrow;", 0x021D5 }, /* UP DOWN DOUBLE ARROW */
+{ "DoubleVerticalBar;", 0x02225 }, /* PARALLEL TO */
+{ "DownArrow;", 0x02193 }, /* DOWNWARDS ARROW */
+{ "DownArrowBar;", 0x02913 }, /* DOWNWARDS ARROW TO BAR */
+{ "DownArrowUpArrow;", 0x021F5 }, /* DOWNWARDS ARROW LEFTWARDS OF UPWARDS ARROW */
+{ "DownBreve;", 0x00311 }, /* COMBINING INVERTED BREVE */
+{ "DownLeftRightVector;", 0x02950 }, /* LEFT BARB DOWN RIGHT BARB DOWN HARPOON */
+{ "DownLeftTeeVector;", 0x0295E }, /* LEFTWARDS HARPOON WITH BARB DOWN FROM BAR */
+{ "DownLeftVector;", 0x021BD }, /* LEFTWARDS HARPOON WITH BARB DOWNWARDS */
+{ "DownLeftVectorBar;", 0x02956 }, /* LEFTWARDS HARPOON WITH BARB DOWN TO BAR */
+{ "DownRightTeeVector;", 0x0295F }, /* RIGHTWARDS HARPOON WITH BARB DOWN FROM BAR */
+{ "DownRightVector;", 0x021C1 }, /* RIGHTWARDS HARPOON WITH BARB DOWNWARDS */
+{ "DownRightVectorBar;", 0x02957 }, /* RIGHTWARDS HARPOON WITH BARB DOWN TO BAR */
+{ "DownTee;", 0x022A4 }, /* DOWN TACK */
+{ "DownTeeArrow;", 0x021A7 }, /* DOWNWARDS ARROW FROM BAR */
+{ "Downarrow;", 0x021D3 }, /* DOWNWARDS DOUBLE ARROW */
+{ "Dscr;", 0x1D49F }, /* MATHEMATICAL SCRIPT CAPITAL D */
+{ "Dstrok;", 0x00110 }, /* LATIN CAPITAL LETTER D WITH STROKE */
+{ "ENG;", 0x0014A }, /* LATIN CAPITAL LETTER ENG */
+{ "ETH;", 0x000D0 }, /* LATIN CAPITAL LETTER ETH */
+{ "Eacute;", 0x000C9 }, /* LATIN CAPITAL LETTER E WITH ACUTE */
+{ "Ecaron;", 0x0011A }, /* LATIN CAPITAL LETTER E WITH CARON */
+{ "Ecirc;", 0x000CA }, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
+{ "Ecy;", 0x0042D }, /* CYRILLIC CAPITAL LETTER E */
+{ "Edot;", 0x00116 }, /* LATIN CAPITAL LETTER E WITH DOT ABOVE */
+{ "Efr;", 0x1D508 }, /* MATHEMATICAL FRAKTUR CAPITAL E */
+{ "Egrave;", 0x000C8 }, /* LATIN CAPITAL LETTER E WITH GRAVE */
+{ "Element;", 0x02208 }, /* ELEMENT OF */
+{ "Emacr;", 0x00112 }, /* LATIN CAPITAL LETTER E WITH MACRON */
+{ "EmptySmallSquare;", 0x025FB }, /* WHITE MEDIUM SQUARE */
+{ "EmptyVerySmallSquare;", 0x025AB }, /* WHITE SMALL SQUARE */
+{ "Eogon;", 0x00118 }, /* LATIN CAPITAL LETTER E WITH OGONEK */
+{ "Eopf;", 0x1D53C }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL E */
+{ "Epsilon;", 0x00395 }, /* GREEK CAPITAL LETTER EPSILON */
+{ "Equal;", 0x02A75 }, /* TWO CONSECUTIVE EQUALS SIGNS */
+{ "EqualTilde;", 0x02242 }, /* MINUS TILDE */
+{ "Equilibrium;", 0x021CC }, /* RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON */
+{ "Escr;", 0x02130 }, /* SCRIPT CAPITAL E */
+{ "Esim;", 0x02A73 }, /* EQUALS SIGN ABOVE TILDE OPERATOR */
+{ "Eta;", 0x00397 }, /* GREEK CAPITAL LETTER ETA */
+{ "Euml;", 0x000CB }, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
+{ "Exists;", 0x02203 }, /* THERE EXISTS */
+{ "ExponentialE;", 0x02147 }, /* DOUBLE-STRUCK ITALIC SMALL E */
+{ "Fcy;", 0x00424 }, /* CYRILLIC CAPITAL LETTER EF */
+{ "Ffr;", 0x1D509 }, /* MATHEMATICAL FRAKTUR CAPITAL F */
+{ "FilledSmallSquare;", 0x025FC }, /* BLACK MEDIUM SQUARE */
+{ "FilledVerySmallSquare;", 0x025AA }, /* BLACK SMALL SQUARE */
+{ "Fopf;", 0x1D53D }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL F */
+{ "ForAll;", 0x02200 }, /* FOR ALL */
+{ "Fouriertrf;", 0x02131 }, /* SCRIPT CAPITAL F */
+{ "Fscr;", 0x02131 }, /* SCRIPT CAPITAL F */
+{ "GJcy;", 0x00403 }, /* CYRILLIC CAPITAL LETTER GJE */
+{ "GT;", 0x0003E }, /* GREATER-THAN SIGN */
+{ "Gamma;", 0x00393 }, /* GREEK CAPITAL LETTER GAMMA */
+{ "Gammad;", 0x003DC }, /* GREEK LETTER DIGAMMA */
+{ "Gbreve;", 0x0011E }, /* LATIN CAPITAL LETTER G WITH BREVE */
+{ "Gcedil;", 0x00122 }, /* LATIN CAPITAL LETTER G WITH CEDILLA */
+{ "Gcirc;", 0x0011C }, /* LATIN CAPITAL LETTER G WITH CIRCUMFLEX */
+{ "Gcy;", 0x00413 }, /* CYRILLIC CAPITAL LETTER GHE */
+{ "Gdot;", 0x00120 }, /* LATIN CAPITAL LETTER G WITH DOT ABOVE */
+{ "Gfr;", 0x1D50A }, /* MATHEMATICAL FRAKTUR CAPITAL G */
+{ "Gg;", 0x022D9 }, /* VERY MUCH GREATER-THAN */
+{ "Gopf;", 0x1D53E }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL G */
+{ "GreaterEqual;", 0x02265 }, /* GREATER-THAN OR EQUAL TO */
+{ "GreaterEqualLess;", 0x022DB }, /* GREATER-THAN EQUAL TO OR LESS-THAN */
+{ "GreaterFullEqual;", 0x02267 }, /* GREATER-THAN OVER EQUAL TO */
+{ "GreaterGreater;", 0x02AA2 }, /* DOUBLE NESTED GREATER-THAN */
+{ "GreaterLess;", 0x02277 }, /* GREATER-THAN OR LESS-THAN */
+{ "GreaterSlantEqual;", 0x02A7E }, /* GREATER-THAN OR SLANTED EQUAL TO */
+{ "GreaterTilde;", 0x02273 }, /* GREATER-THAN OR EQUIVALENT TO */
+{ "Gscr;", 0x1D4A2 }, /* MATHEMATICAL SCRIPT CAPITAL G */
+{ "Gt;", 0x0226B }, /* MUCH GREATER-THAN */
+{ "HARDcy;", 0x0042A }, /* CYRILLIC CAPITAL LETTER HARD SIGN */
+{ "Hacek;", 0x002C7 }, /* CARON */
+{ "Hat;", 0x0005E }, /* CIRCUMFLEX ACCENT */
+{ "Hcirc;", 0x00124 }, /* LATIN CAPITAL LETTER H WITH CIRCUMFLEX */
+{ "Hfr;", 0x0210C }, /* BLACK-LETTER CAPITAL H */
+{ "HilbertSpace;", 0x0210B }, /* SCRIPT CAPITAL H */
+{ "Hopf;", 0x0210D }, /* DOUBLE-STRUCK CAPITAL H */
+{ "HorizontalLine;", 0x02500 }, /* BOX DRAWINGS LIGHT HORIZONTAL */
+{ "Hscr;", 0x0210B }, /* SCRIPT CAPITAL H */
+{ "Hstrok;", 0x00126 }, /* LATIN CAPITAL LETTER H WITH STROKE */
+{ "HumpDownHump;", 0x0224E }, /* GEOMETRICALLY EQUIVALENT TO */
+{ "HumpEqual;", 0x0224F }, /* DIFFERENCE BETWEEN */
+{ "IEcy;", 0x00415 }, /* CYRILLIC CAPITAL LETTER IE */
+{ "IJlig;", 0x00132 }, /* LATIN CAPITAL LIGATURE IJ */
+{ "IOcy;", 0x00401 }, /* CYRILLIC CAPITAL LETTER IO */
+{ "Iacute;", 0x000CD }, /* LATIN CAPITAL LETTER I WITH ACUTE */
+{ "Icirc;", 0x000CE }, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
+{ "Icy;", 0x00418 }, /* CYRILLIC CAPITAL LETTER I */
+{ "Idot;", 0x00130 }, /* LATIN CAPITAL LETTER I WITH DOT ABOVE */
+{ "Ifr;", 0x02111 }, /* BLACK-LETTER CAPITAL I */
+{ "Igrave;", 0x000CC }, /* LATIN CAPITAL LETTER I WITH GRAVE */
+{ "Im;", 0x02111 }, /* BLACK-LETTER CAPITAL I */
+{ "Imacr;", 0x0012A }, /* LATIN CAPITAL LETTER I WITH MACRON */
+{ "ImaginaryI;", 0x02148 }, /* DOUBLE-STRUCK ITALIC SMALL I */
+{ "Implies;", 0x021D2 }, /* RIGHTWARDS DOUBLE ARROW */
+{ "Int;", 0x0222C }, /* DOUBLE INTEGRAL */
+{ "Integral;", 0x0222B }, /* INTEGRAL */
+{ "Intersection;", 0x022C2 }, /* N-ARY INTERSECTION */
+{ "InvisibleComma;", 0x02063 }, /* INVISIBLE SEPARATOR */
+{ "InvisibleTimes;", 0x02062 }, /* INVISIBLE TIMES */
+{ "Iogon;", 0x0012E }, /* LATIN CAPITAL LETTER I WITH OGONEK */
+{ "Iopf;", 0x1D540 }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL I */
+{ "Iota;", 0x00399 }, /* GREEK CAPITAL LETTER IOTA */
+{ "Iscr;", 0x02110 }, /* SCRIPT CAPITAL I */
+{ "Itilde;", 0x00128 }, /* LATIN CAPITAL LETTER I WITH TILDE */
+{ "Iukcy;", 0x00406 }, /* CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I */
+{ "Iuml;", 0x000CF }, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
+{ "Jcirc;", 0x00134 }, /* LATIN CAPITAL LETTER J WITH CIRCUMFLEX */
+{ "Jcy;", 0x00419 }, /* CYRILLIC CAPITAL LETTER SHORT I */
+{ "Jfr;", 0x1D50D }, /* MATHEMATICAL FRAKTUR CAPITAL J */
+{ "Jopf;", 0x1D541 }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL J */
+{ "Jscr;", 0x1D4A5 }, /* MATHEMATICAL SCRIPT CAPITAL J */
+{ "Jsercy;", 0x00408 }, /* CYRILLIC CAPITAL LETTER JE */
+{ "Jukcy;", 0x00404 }, /* CYRILLIC CAPITAL LETTER UKRAINIAN IE */
+{ "KHcy;", 0x00425 }, /* CYRILLIC CAPITAL LETTER HA */
+{ "KJcy;", 0x0040C }, /* CYRILLIC CAPITAL LETTER KJE */
+{ "Kappa;", 0x0039A }, /* GREEK CAPITAL LETTER KAPPA */
+{ "Kcedil;", 0x00136 }, /* LATIN CAPITAL LETTER K WITH CEDILLA */
+{ "Kcy;", 0x0041A }, /* CYRILLIC CAPITAL LETTER KA */
+{ "Kfr;", 0x1D50E }, /* MATHEMATICAL FRAKTUR CAPITAL K */
+{ "Kopf;", 0x1D542 }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL K */
+{ "Kscr;", 0x1D4A6 }, /* MATHEMATICAL SCRIPT CAPITAL K */
+{ "LJcy;", 0x00409 }, /* CYRILLIC CAPITAL LETTER LJE */
+{ "LT;", 0x0003C }, /* LESS-THAN SIGN */
+{ "Lacute;", 0x00139 }, /* LATIN CAPITAL LETTER L WITH ACUTE */
+{ "Lambda;", 0x0039B }, /* GREEK CAPITAL LETTER LAMDA */
+{ "Lang;", 0x027EA }, /* MATHEMATICAL LEFT DOUBLE ANGLE BRACKET */
+{ "Laplacetrf;", 0x02112 }, /* SCRIPT CAPITAL L */
+{ "Larr;", 0x0219E }, /* LEFTWARDS TWO HEADED ARROW */
+{ "Lcaron;", 0x0013D }, /* LATIN CAPITAL LETTER L WITH CARON */
+{ "Lcedil;", 0x0013B }, /* LATIN CAPITAL LETTER L WITH CEDILLA */
+{ "Lcy;", 0x0041B }, /* CYRILLIC CAPITAL LETTER EL */
+{ "LeftAngleBracket;", 0x027E8 }, /* MATHEMATICAL LEFT ANGLE BRACKET */
+{ "LeftArrow;", 0x02190 }, /* LEFTWARDS ARROW */
+{ "LeftArrowBar;", 0x021E4 }, /* LEFTWARDS ARROW TO BAR */
+{ "LeftArrowRightArrow;", 0x021C6 }, /* LEFTWARDS ARROW OVER RIGHTWARDS ARROW */
+{ "LeftCeiling;", 0x02308 }, /* LEFT CEILING */
+{ "LeftDoubleBracket;", 0x027E6 }, /* MATHEMATICAL LEFT WHITE SQUARE BRACKET */
+{ "LeftDownTeeVector;", 0x02961 }, /* DOWNWARDS HARPOON WITH BARB LEFT FROM BAR */
+{ "LeftDownVector;", 0x021C3 }, /* DOWNWARDS HARPOON WITH BARB LEFTWARDS */
+{ "LeftDownVectorBar;", 0x02959 }, /* DOWNWARDS HARPOON WITH BARB LEFT TO BAR */
+{ "LeftFloor;", 0x0230A }, /* LEFT FLOOR */
+{ "LeftRightArrow;", 0x02194 }, /* LEFT RIGHT ARROW */
+{ "LeftRightVector;", 0x0294E }, /* LEFT BARB UP RIGHT BARB UP HARPOON */
+{ "LeftTee;", 0x022A3 }, /* LEFT TACK */
+{ "LeftTeeArrow;", 0x021A4 }, /* LEFTWARDS ARROW FROM BAR */
+{ "LeftTeeVector;", 0x0295A }, /* LEFTWARDS HARPOON WITH BARB UP FROM BAR */
+{ "LeftTriangle;", 0x022B2 }, /* NORMAL SUBGROUP OF */
+{ "LeftTriangleBar;", 0x029CF }, /* LEFT TRIANGLE BESIDE VERTICAL BAR */
+{ "LeftTriangleEqual;", 0x022B4 }, /* NORMAL SUBGROUP OF OR EQUAL TO */
+{ "LeftUpDownVector;", 0x02951 }, /* UP BARB LEFT DOWN BARB LEFT HARPOON */
+{ "LeftUpTeeVector;", 0x02960 }, /* UPWARDS HARPOON WITH BARB LEFT FROM BAR */
+{ "LeftUpVector;", 0x021BF }, /* UPWARDS HARPOON WITH BARB LEFTWARDS */
+{ "LeftUpVectorBar;", 0x02958 }, /* UPWARDS HARPOON WITH BARB LEFT TO BAR */
+{ "LeftVector;", 0x021BC }, /* LEFTWARDS HARPOON WITH BARB UPWARDS */
+{ "LeftVectorBar;", 0x02952 }, /* LEFTWARDS HARPOON WITH BARB UP TO BAR */
+{ "Leftarrow;", 0x021D0 }, /* LEFTWARDS DOUBLE ARROW */
+{ "Leftrightarrow;", 0x021D4 }, /* LEFT RIGHT DOUBLE ARROW */
+{ "LessEqualGreater;", 0x022DA }, /* LESS-THAN EQUAL TO OR GREATER-THAN */
+{ "LessFullEqual;", 0x02266 }, /* LESS-THAN OVER EQUAL TO */
+{ "LessGreater;", 0x02276 }, /* LESS-THAN OR GREATER-THAN */
+{ "LessLess;", 0x02AA1 }, /* DOUBLE NESTED LESS-THAN */
+{ "LessSlantEqual;", 0x02A7D }, /* LESS-THAN OR SLANTED EQUAL TO */
+{ "LessTilde;", 0x02272 }, /* LESS-THAN OR EQUIVALENT TO */
+{ "Lfr;", 0x1D50F }, /* MATHEMATICAL FRAKTUR CAPITAL L */
+{ "Ll;", 0x022D8 }, /* VERY MUCH LESS-THAN */
+{ "Lleftarrow;", 0x021DA }, /* LEFTWARDS TRIPLE ARROW */
+{ "Lmidot;", 0x0013F }, /* LATIN CAPITAL LETTER L WITH MIDDLE DOT */
+{ "LongLeftArrow;", 0x027F5 }, /* LONG LEFTWARDS ARROW */
+{ "LongLeftRightArrow;", 0x027F7 }, /* LONG LEFT RIGHT ARROW */
+{ "LongRightArrow;", 0x027F6 }, /* LONG RIGHTWARDS ARROW */
+{ "Longleftarrow;", 0x027F8 }, /* LONG LEFTWARDS DOUBLE ARROW */
+{ "Longleftrightarrow;", 0x027FA }, /* LONG LEFT RIGHT DOUBLE ARROW */
+{ "Longrightarrow;", 0x027F9 }, /* LONG RIGHTWARDS DOUBLE ARROW */
+{ "Lopf;", 0x1D543 }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL L */
+{ "LowerLeftArrow;", 0x02199 }, /* SOUTH WEST ARROW */
+{ "LowerRightArrow;", 0x02198 }, /* SOUTH EAST ARROW */
+{ "Lscr;", 0x02112 }, /* SCRIPT CAPITAL L */
+{ "Lsh;", 0x021B0 }, /* UPWARDS ARROW WITH TIP LEFTWARDS */
+{ "Lstrok;", 0x00141 }, /* LATIN CAPITAL LETTER L WITH STROKE */
+{ "Lt;", 0x0226A }, /* MUCH LESS-THAN */
+{ "Map;", 0x02905 }, /* RIGHTWARDS TWO-HEADED ARROW FROM BAR */
+{ "Mcy;", 0x0041C }, /* CYRILLIC CAPITAL LETTER EM */
+{ "MediumSpace;", 0x0205F }, /* MEDIUM MATHEMATICAL SPACE */
+{ "Mellintrf;", 0x02133 }, /* SCRIPT CAPITAL M */
+{ "Mfr;", 0x1D510 }, /* MATHEMATICAL FRAKTUR CAPITAL M */
+{ "MinusPlus;", 0x02213 }, /* MINUS-OR-PLUS SIGN */
+{ "Mopf;", 0x1D544 }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL M */
+{ "Mscr;", 0x02133 }, /* SCRIPT CAPITAL M */
+{ "Mu;", 0x0039C }, /* GREEK CAPITAL LETTER MU */
+{ "NJcy;", 0x0040A }, /* CYRILLIC CAPITAL LETTER NJE */
+{ "Nacute;", 0x00143 }, /* LATIN CAPITAL LETTER N WITH ACUTE */
+{ "Ncaron;", 0x00147 }, /* LATIN CAPITAL LETTER N WITH CARON */
+{ "Ncedil;", 0x00145 }, /* LATIN CAPITAL LETTER N WITH CEDILLA */
+{ "Ncy;", 0x0041D }, /* CYRILLIC CAPITAL LETTER EN */
+{ "NegativeMediumSpace;", 0x0200B }, /* ZERO WIDTH SPACE */
+{ "NegativeThickSpace;", 0x0200B }, /* ZERO WIDTH SPACE */
+{ "NegativeThinSpace;", 0x0200B }, /* ZERO WIDTH SPACE */
+{ "NegativeVeryThinSpace;", 0x0200B }, /* ZERO WIDTH SPACE */
+{ "NestedGreaterGreater;", 0x0226B }, /* MUCH GREATER-THAN */
+{ "NestedLessLess;", 0x0226A }, /* MUCH LESS-THAN */
+{ "NewLine;", 0x0000A }, /* LINE FEED (LF) */
+{ "Nfr;", 0x1D511 }, /* MATHEMATICAL FRAKTUR CAPITAL N */
+{ "NoBreak;", 0x02060 }, /* WORD JOINER */
+{ "NonBreakingSpace;", 0x000A0 }, /* NO-BREAK SPACE */
+{ "Nopf;", 0x02115 }, /* DOUBLE-STRUCK CAPITAL N */
+{ "Not;", 0x02AEC }, /* DOUBLE STROKE NOT SIGN */
+{ "NotCongruent;", 0x02262 }, /* NOT IDENTICAL TO */
+{ "NotCupCap;", 0x0226D }, /* NOT EQUIVALENT TO */
+{ "NotDoubleVerticalBar;", 0x02226 }, /* NOT PARALLEL TO */
+{ "NotElement;", 0x02209 }, /* NOT AN ELEMENT OF */
+{ "NotEqual;", 0x02260 }, /* NOT EQUAL TO */
+{ "NotExists;", 0x02204 }, /* THERE DOES NOT EXIST */
+{ "NotGreater;", 0x0226F }, /* NOT GREATER-THAN */
+{ "NotGreaterEqual;", 0x02271 }, /* NEITHER GREATER-THAN NOR EQUAL TO */
+{ "NotGreaterLess;", 0x02279 }, /* NEITHER GREATER-THAN NOR LESS-THAN */
+{ "NotGreaterTilde;", 0x02275 }, /* NEITHER GREATER-THAN NOR EQUIVALENT TO */
+{ "NotLeftTriangle;", 0x022EA }, /* NOT NORMAL SUBGROUP OF */
+{ "NotLeftTriangleEqual;", 0x022EC }, /* NOT NORMAL SUBGROUP OF OR EQUAL TO */
+{ "NotLess;", 0x0226E }, /* NOT LESS-THAN */
+{ "NotLessEqual;", 0x02270 }, /* NEITHER LESS-THAN NOR EQUAL TO */
+{ "NotLessGreater;", 0x02278 }, /* NEITHER LESS-THAN NOR GREATER-THAN */
+{ "NotLessTilde;", 0x02274 }, /* NEITHER LESS-THAN NOR EQUIVALENT TO */
+{ "NotPrecedes;", 0x02280 }, /* DOES NOT PRECEDE */
+{ "NotPrecedesSlantEqual;", 0x022E0 }, /* DOES NOT PRECEDE OR EQUAL */
+{ "NotReverseElement;", 0x0220C }, /* DOES NOT CONTAIN AS MEMBER */
+{ "NotRightTriangle;", 0x022EB }, /* DOES NOT CONTAIN AS NORMAL SUBGROUP */
+{ "NotRightTriangleEqual;", 0x022ED }, /* DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL */
+{ "NotSquareSubsetEqual;", 0x022E2 }, /* NOT SQUARE IMAGE OF OR EQUAL TO */
+{ "NotSquareSupersetEqual;", 0x022E3 }, /* NOT SQUARE ORIGINAL OF OR EQUAL TO */
+{ "NotSubsetEqual;", 0x02288 }, /* NEITHER A SUBSET OF NOR EQUAL TO */
+{ "NotSucceeds;", 0x02281 }, /* DOES NOT SUCCEED */
+{ "NotSucceedsSlantEqual;", 0x022E1 }, /* DOES NOT SUCCEED OR EQUAL */
+{ "NotSupersetEqual;", 0x02289 }, /* NEITHER A SUPERSET OF NOR EQUAL TO */
+{ "NotTilde;", 0x02241 }, /* NOT TILDE */
+{ "NotTildeEqual;", 0x02244 }, /* NOT ASYMPTOTICALLY EQUAL TO */
+{ "NotTildeFullEqual;", 0x02247 }, /* NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO */
+{ "NotTildeTilde;", 0x02249 }, /* NOT ALMOST EQUAL TO */
+{ "NotVerticalBar;", 0x02224 }, /* DOES NOT DIVIDE */
+{ "Nscr;", 0x1D4A9 }, /* MATHEMATICAL SCRIPT CAPITAL N */
+{ "Ntilde;", 0x000D1 }, /* LATIN CAPITAL LETTER N WITH TILDE */
+{ "Nu;", 0x0039D }, /* GREEK CAPITAL LETTER NU */
+{ "OElig;", 0x00152 }, /* LATIN CAPITAL LIGATURE OE */
+{ "Oacute;", 0x000D3 }, /* LATIN CAPITAL LETTER O WITH ACUTE */
+{ "Ocirc;", 0x000D4 }, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
+{ "Ocy;", 0x0041E }, /* CYRILLIC CAPITAL LETTER O */
+{ "Odblac;", 0x00150 }, /* LATIN CAPITAL LETTER O WITH DOUBLE ACUTE */
+{ "Ofr;", 0x1D512 }, /* MATHEMATICAL FRAKTUR CAPITAL O */
+{ "Ograve;", 0x000D2 }, /* LATIN CAPITAL LETTER O WITH GRAVE */
+{ "Omacr;", 0x0014C }, /* LATIN CAPITAL LETTER O WITH MACRON */
+{ "Omega;", 0x003A9 }, /* GREEK CAPITAL LETTER OMEGA */
+{ "Omicron;", 0x0039F }, /* GREEK CAPITAL LETTER OMICRON */
+{ "Oopf;", 0x1D546 }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL O */
+{ "OpenCurlyDoubleQuote;", 0x0201C }, /* LEFT DOUBLE QUOTATION MARK */
+{ "OpenCurlyQuote;", 0x02018 }, /* LEFT SINGLE QUOTATION MARK */
+{ "Or;", 0x02A54 }, /* DOUBLE LOGICAL OR */
+{ "Oscr;", 0x1D4AA }, /* MATHEMATICAL SCRIPT CAPITAL O */
+{ "Oslash;", 0x000D8 }, /* LATIN CAPITAL LETTER O WITH STROKE */
+{ "Otilde;", 0x000D5 }, /* LATIN CAPITAL LETTER O WITH TILDE */
+{ "Otimes;", 0x02A37 }, /* MULTIPLICATION SIGN IN DOUBLE CIRCLE */
+{ "Ouml;", 0x000D6 }, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
+{ "OverBar;", 0x000AF }, /* MACRON */
+{ "OverBrace;", 0x023DE }, /* TOP CURLY BRACKET */
+{ "OverBracket;", 0x023B4 }, /* TOP SQUARE BRACKET */
+{ "OverParenthesis;", 0x023DC }, /* TOP PARENTHESIS */
+{ "PartialD;", 0x02202 }, /* PARTIAL DIFFERENTIAL */
+{ "Pcy;", 0x0041F }, /* CYRILLIC CAPITAL LETTER PE */
+{ "Pfr;", 0x1D513 }, /* MATHEMATICAL FRAKTUR CAPITAL P */
+{ "Phi;", 0x003A6 }, /* GREEK CAPITAL LETTER PHI */
+{ "Pi;", 0x003A0 }, /* GREEK CAPITAL LETTER PI */
+{ "PlusMinus;", 0x000B1 }, /* PLUS-MINUS SIGN */
+{ "Poincareplane;", 0x0210C }, /* BLACK-LETTER CAPITAL H */
+{ "Popf;", 0x02119 }, /* DOUBLE-STRUCK CAPITAL P */
+{ "Pr;", 0x02ABB }, /* DOUBLE PRECEDES */
+{ "Precedes;", 0x0227A }, /* PRECEDES */
+{ "PrecedesEqual;", 0x02AAF }, /* PRECEDES ABOVE SINGLE-LINE EQUALS SIGN */
+{ "PrecedesSlantEqual;", 0x0227C }, /* PRECEDES OR EQUAL TO */
+{ "PrecedesTilde;", 0x0227E }, /* PRECEDES OR EQUIVALENT TO */
+{ "Prime;", 0x02033 }, /* DOUBLE PRIME */
+{ "Product;", 0x0220F }, /* N-ARY PRODUCT */
+{ "Proportion;", 0x02237 }, /* PROPORTION */
+{ "Proportional;", 0x0221D }, /* PROPORTIONAL TO */
+{ "Pscr;", 0x1D4AB }, /* MATHEMATICAL SCRIPT CAPITAL P */
+{ "Psi;", 0x003A8 }, /* GREEK CAPITAL LETTER PSI */
+{ "QUOT;", 0x00022 }, /* QUOTATION MARK */
+{ "Qfr;", 0x1D514 }, /* MATHEMATICAL FRAKTUR CAPITAL Q */
+{ "Qopf;", 0x0211A }, /* DOUBLE-STRUCK CAPITAL Q */
+{ "Qscr;", 0x1D4AC }, /* MATHEMATICAL SCRIPT CAPITAL Q */
+{ "RBarr;", 0x02910 }, /* RIGHTWARDS TWO-HEADED TRIPLE DASH ARROW */
+{ "REG;", 0x000AE }, /* REGISTERED SIGN */
+{ "Racute;", 0x00154 }, /* LATIN CAPITAL LETTER R WITH ACUTE */
+{ "Rang;", 0x027EB }, /* MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET */
+{ "Rarr;", 0x021A0 }, /* RIGHTWARDS TWO HEADED ARROW */
+{ "Rarrtl;", 0x02916 }, /* RIGHTWARDS TWO-HEADED ARROW WITH TAIL */
+{ "Rcaron;", 0x00158 }, /* LATIN CAPITAL LETTER R WITH CARON */
+{ "Rcedil;", 0x00156 }, /* LATIN CAPITAL LETTER R WITH CEDILLA */
+{ "Rcy;", 0x00420 }, /* CYRILLIC CAPITAL LETTER ER */
+{ "Re;", 0x0211C }, /* BLACK-LETTER CAPITAL R */
+{ "ReverseElement;", 0x0220B }, /* CONTAINS AS MEMBER */
+{ "ReverseEquilibrium;", 0x021CB }, /* LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON */
+{ "ReverseUpEquilibrium;", 0x0296F }, /* DOWNWARDS HARPOON WITH BARB LEFT BESIDE UPWARDS HARPOON WITH BARB RIGHT */
+{ "Rfr;", 0x0211C }, /* BLACK-LETTER CAPITAL R */
+{ "Rho;", 0x003A1 }, /* GREEK CAPITAL LETTER RHO */
+{ "RightAngleBracket;", 0x027E9 }, /* MATHEMATICAL RIGHT ANGLE BRACKET */
+{ "RightArrow;", 0x02192 }, /* RIGHTWARDS ARROW */
+{ "RightArrowBar;", 0x021E5 }, /* RIGHTWARDS ARROW TO BAR */
+{ "RightArrowLeftArrow;", 0x021C4 }, /* RIGHTWARDS ARROW OVER LEFTWARDS ARROW */
+{ "RightCeiling;", 0x02309 }, /* RIGHT CEILING */
+{ "RightDoubleBracket;", 0x027E7 }, /* MATHEMATICAL RIGHT WHITE SQUARE BRACKET */
+{ "RightDownTeeVector;", 0x0295D }, /* DOWNWARDS HARPOON WITH BARB RIGHT FROM BAR */
+{ "RightDownVector;", 0x021C2 }, /* DOWNWARDS HARPOON WITH BARB RIGHTWARDS */
+{ "RightDownVectorBar;", 0x02955 }, /* DOWNWARDS HARPOON WITH BARB RIGHT TO BAR */
+{ "RightFloor;", 0x0230B }, /* RIGHT FLOOR */
+{ "RightTee;", 0x022A2 }, /* RIGHT TACK */
+{ "RightTeeArrow;", 0x021A6 }, /* RIGHTWARDS ARROW FROM BAR */
+{ "RightTeeVector;", 0x0295B }, /* RIGHTWARDS HARPOON WITH BARB UP FROM BAR */
+{ "RightTriangle;", 0x022B3 }, /* CONTAINS AS NORMAL SUBGROUP */
+{ "RightTriangleBar;", 0x029D0 }, /* VERTICAL BAR BESIDE RIGHT TRIANGLE */
+{ "RightTriangleEqual;", 0x022B5 }, /* CONTAINS AS NORMAL SUBGROUP OR EQUAL TO */
+{ "RightUpDownVector;", 0x0294F }, /* UP BARB RIGHT DOWN BARB RIGHT HARPOON */
+{ "RightUpTeeVector;", 0x0295C }, /* UPWARDS HARPOON WITH BARB RIGHT FROM BAR */
+{ "RightUpVector;", 0x021BE }, /* UPWARDS HARPOON WITH BARB RIGHTWARDS */
+{ "RightUpVectorBar;", 0x02954 }, /* UPWARDS HARPOON WITH BARB RIGHT TO BAR */
+{ "RightVector;", 0x021C0 }, /* RIGHTWARDS HARPOON WITH BARB UPWARDS */
+{ "RightVectorBar;", 0x02953 }, /* RIGHTWARDS HARPOON WITH BARB UP TO BAR */
+{ "Rightarrow;", 0x021D2 }, /* RIGHTWARDS DOUBLE ARROW */
+{ "Ropf;", 0x0211D }, /* DOUBLE-STRUCK CAPITAL R */
+{ "RoundImplies;", 0x02970 }, /* RIGHT DOUBLE ARROW WITH ROUNDED HEAD */
+{ "Rrightarrow;", 0x021DB }, /* RIGHTWARDS TRIPLE ARROW */
+{ "Rscr;", 0x0211B }, /* SCRIPT CAPITAL R */
+{ "Rsh;", 0x021B1 }, /* UPWARDS ARROW WITH TIP RIGHTWARDS */
+{ "RuleDelayed;", 0x029F4 }, /* RULE-DELAYED */
+{ "SHCHcy;", 0x00429 }, /* CYRILLIC CAPITAL LETTER SHCHA */
+{ "SHcy;", 0x00428 }, /* CYRILLIC CAPITAL LETTER SHA */
+{ "SOFTcy;", 0x0042C }, /* CYRILLIC CAPITAL LETTER SOFT SIGN */
+{ "Sacute;", 0x0015A }, /* LATIN CAPITAL LETTER S WITH ACUTE */
+{ "Sc;", 0x02ABC }, /* DOUBLE SUCCEEDS */
+{ "Scaron;", 0x00160 }, /* LATIN CAPITAL LETTER S WITH CARON */
+{ "Scedil;", 0x0015E }, /* LATIN CAPITAL LETTER S WITH CEDILLA */
+{ "Scirc;", 0x0015C }, /* LATIN CAPITAL LETTER S WITH CIRCUMFLEX */
+{ "Scy;", 0x00421 }, /* CYRILLIC CAPITAL LETTER ES */
+{ "Sfr;", 0x1D516 }, /* MATHEMATICAL FRAKTUR CAPITAL S */
+{ "ShortDownArrow;", 0x02193 }, /* DOWNWARDS ARROW */
+{ "ShortLeftArrow;", 0x02190 }, /* LEFTWARDS ARROW */
+{ "ShortRightArrow;", 0x02192 }, /* RIGHTWARDS ARROW */
+{ "ShortUpArrow;", 0x02191 }, /* UPWARDS ARROW */
+{ "Sigma;", 0x003A3 }, /* GREEK CAPITAL LETTER SIGMA */
+{ "SmallCircle;", 0x02218 }, /* RING OPERATOR */
+{ "Sopf;", 0x1D54A }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL S */
+{ "Sqrt;", 0x0221A }, /* SQUARE ROOT */
+{ "Square;", 0x025A1 }, /* WHITE SQUARE */
+{ "SquareIntersection;", 0x02293 }, /* SQUARE CAP */
+{ "SquareSubset;", 0x0228F }, /* SQUARE IMAGE OF */
+{ "SquareSubsetEqual;", 0x02291 }, /* SQUARE IMAGE OF OR EQUAL TO */
+{ "SquareSuperset;", 0x02290 }, /* SQUARE ORIGINAL OF */
+{ "SquareSupersetEqual;", 0x02292 }, /* SQUARE ORIGINAL OF OR EQUAL TO */
+{ "SquareUnion;", 0x02294 }, /* SQUARE CUP */
+{ "Sscr;", 0x1D4AE }, /* MATHEMATICAL SCRIPT CAPITAL S */
+{ "Star;", 0x022C6 }, /* STAR OPERATOR */
+{ "Sub;", 0x022D0 }, /* DOUBLE SUBSET */
+{ "Subset;", 0x022D0 }, /* DOUBLE SUBSET */
+{ "SubsetEqual;", 0x02286 }, /* SUBSET OF OR EQUAL TO */
+{ "Succeeds;", 0x0227B }, /* SUCCEEDS */
+{ "SucceedsEqual;", 0x02AB0 }, /* SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN */
+{ "SucceedsSlantEqual;", 0x0227D }, /* SUCCEEDS OR EQUAL TO */
+{ "SucceedsTilde;", 0x0227F }, /* SUCCEEDS OR EQUIVALENT TO */
+{ "SuchThat;", 0x0220B }, /* CONTAINS AS MEMBER */
+{ "Sum;", 0x02211 }, /* N-ARY SUMMATION */
+{ "Sup;", 0x022D1 }, /* DOUBLE SUPERSET */
+{ "Superset;", 0x02283 }, /* SUPERSET OF */
+{ "SupersetEqual;", 0x02287 }, /* SUPERSET OF OR EQUAL TO */
+{ "Supset;", 0x022D1 }, /* DOUBLE SUPERSET */
+{ "THORN;", 0x000DE }, /* LATIN CAPITAL LETTER THORN */
+{ "TRADE;", 0x02122 }, /* TRADE MARK SIGN */
+{ "TSHcy;", 0x0040B }, /* CYRILLIC CAPITAL LETTER TSHE */
+{ "TScy;", 0x00426 }, /* CYRILLIC CAPITAL LETTER TSE */
+{ "Tab;", 0x00009 }, /* CHARACTER TABULATION */
+{ "Tau;", 0x003A4 }, /* GREEK CAPITAL LETTER TAU */
+{ "Tcaron;", 0x00164 }, /* LATIN CAPITAL LETTER T WITH CARON */
+{ "Tcedil;", 0x00162 }, /* LATIN CAPITAL LETTER T WITH CEDILLA */
+{ "Tcy;", 0x00422 }, /* CYRILLIC CAPITAL LETTER TE */
+{ "Tfr;", 0x1D517 }, /* MATHEMATICAL FRAKTUR CAPITAL T */
+{ "Therefore;", 0x02234 }, /* THEREFORE */
+{ "Theta;", 0x00398 }, /* GREEK CAPITAL LETTER THETA */
+{ "ThinSpace;", 0x02009 }, /* THIN SPACE */
+{ "Tilde;", 0x0223C }, /* TILDE OPERATOR */
+{ "TildeEqual;", 0x02243 }, /* ASYMPTOTICALLY EQUAL TO */
+{ "TildeFullEqual;", 0x02245 }, /* APPROXIMATELY EQUAL TO */
+{ "TildeTilde;", 0x02248 }, /* ALMOST EQUAL TO */
+{ "Topf;", 0x1D54B }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL T */
+{ "TripleDot;", 0x020DB }, /* COMBINING THREE DOTS ABOVE */
+{ "Tscr;", 0x1D4AF }, /* MATHEMATICAL SCRIPT CAPITAL T */
+{ "Tstrok;", 0x00166 }, /* LATIN CAPITAL LETTER T WITH STROKE */
+{ "Uacute;", 0x000DA }, /* LATIN CAPITAL LETTER U WITH ACUTE */
+{ "Uarr;", 0x0219F }, /* UPWARDS TWO HEADED ARROW */
+{ "Uarrocir;", 0x02949 }, /* UPWARDS TWO-HEADED ARROW FROM SMALL CIRCLE */
+{ "Ubrcy;", 0x0040E }, /* CYRILLIC CAPITAL LETTER SHORT U */
+{ "Ubreve;", 0x0016C }, /* LATIN CAPITAL LETTER U WITH BREVE */
+{ "Ucirc;", 0x000DB }, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
+{ "Ucy;", 0x00423 }, /* CYRILLIC CAPITAL LETTER U */
+{ "Udblac;", 0x00170 }, /* LATIN CAPITAL LETTER U WITH DOUBLE ACUTE */
+{ "Ufr;", 0x1D518 }, /* MATHEMATICAL FRAKTUR CAPITAL U */
+{ "Ugrave;", 0x000D9 }, /* LATIN CAPITAL LETTER U WITH GRAVE */
+{ "Umacr;", 0x0016A }, /* LATIN CAPITAL LETTER U WITH MACRON */
+{ "UnderBar;", 0x00332 }, /* COMBINING LOW LINE */
+{ "UnderBrace;", 0x023DF }, /* BOTTOM CURLY BRACKET */
+{ "UnderBracket;", 0x023B5 }, /* BOTTOM SQUARE BRACKET */
+{ "UnderParenthesis;", 0x023DD }, /* BOTTOM PARENTHESIS */
+{ "Union;", 0x022C3 }, /* N-ARY UNION */
+{ "UnionPlus;", 0x0228E }, /* MULTISET UNION */
+{ "Uogon;", 0x00172 }, /* LATIN CAPITAL LETTER U WITH OGONEK */
+{ "Uopf;", 0x1D54C }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL U */
+{ "UpArrow;", 0x02191 }, /* UPWARDS ARROW */
+{ "UpArrowBar;", 0x02912 }, /* UPWARDS ARROW TO BAR */
+{ "UpArrowDownArrow;", 0x021C5 }, /* UPWARDS ARROW LEFTWARDS OF DOWNWARDS ARROW */
+{ "UpDownArrow;", 0x02195 }, /* UP DOWN ARROW */
+{ "UpEquilibrium;", 0x0296E }, /* UPWARDS HARPOON WITH BARB LEFT BESIDE DOWNWARDS HARPOON WITH BARB RIGHT */
+{ "UpTee;", 0x022A5 }, /* UP TACK */
+{ "UpTeeArrow;", 0x021A5 }, /* UPWARDS ARROW FROM BAR */
+{ "Uparrow;", 0x021D1 }, /* UPWARDS DOUBLE ARROW */
+{ "Updownarrow;", 0x021D5 }, /* UP DOWN DOUBLE ARROW */
+{ "UpperLeftArrow;", 0x02196 }, /* NORTH WEST ARROW */
+{ "UpperRightArrow;", 0x02197 }, /* NORTH EAST ARROW */
+{ "Upsi;", 0x003D2 }, /* GREEK UPSILON WITH HOOK SYMBOL */
+{ "Upsilon;", 0x003A5 }, /* GREEK CAPITAL LETTER UPSILON */
+{ "Uring;", 0x0016E }, /* LATIN CAPITAL LETTER U WITH RING ABOVE */
+{ "Uscr;", 0x1D4B0 }, /* MATHEMATICAL SCRIPT CAPITAL U */
+{ "Utilde;", 0x00168 }, /* LATIN CAPITAL LETTER U WITH TILDE */
+{ "Uuml;", 0x000DC }, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
+{ "VDash;", 0x022AB }, /* DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE */
+{ "Vbar;", 0x02AEB }, /* DOUBLE UP TACK */
+{ "Vcy;", 0x00412 }, /* CYRILLIC CAPITAL LETTER VE */
+{ "Vdash;", 0x022A9 }, /* FORCES */
+{ "Vdashl;", 0x02AE6 }, /* LONG DASH FROM LEFT MEMBER OF DOUBLE VERTICAL */
+{ "Vee;", 0x022C1 }, /* N-ARY LOGICAL OR */
+{ "Verbar;", 0x02016 }, /* DOUBLE VERTICAL LINE */
+{ "Vert;", 0x02016 }, /* DOUBLE VERTICAL LINE */
+{ "VerticalBar;", 0x02223 }, /* DIVIDES */
+{ "VerticalLine;", 0x0007C }, /* VERTICAL LINE */
+{ "VerticalSeparator;", 0x02758 }, /* LIGHT VERTICAL BAR */
+{ "VerticalTilde;", 0x02240 }, /* WREATH PRODUCT */
+{ "VeryThinSpace;", 0x0200A }, /* HAIR SPACE */
+{ "Vfr;", 0x1D519 }, /* MATHEMATICAL FRAKTUR CAPITAL V */
+{ "Vopf;", 0x1D54D }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL V */
+{ "Vscr;", 0x1D4B1 }, /* MATHEMATICAL SCRIPT CAPITAL V */
+{ "Vvdash;", 0x022AA }, /* TRIPLE VERTICAL BAR RIGHT TURNSTILE */
+{ "Wcirc;", 0x00174 }, /* LATIN CAPITAL LETTER W WITH CIRCUMFLEX */
+{ "Wedge;", 0x022C0 }, /* N-ARY LOGICAL AND */
+{ "Wfr;", 0x1D51A }, /* MATHEMATICAL FRAKTUR CAPITAL W */
+{ "Wopf;", 0x1D54E }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL W */
+{ "Wscr;", 0x1D4B2 }, /* MATHEMATICAL SCRIPT CAPITAL W */
+{ "Xfr;", 0x1D51B }, /* MATHEMATICAL FRAKTUR CAPITAL X */
+{ "Xi;", 0x0039E }, /* GREEK CAPITAL LETTER XI */
+{ "Xopf;", 0x1D54F }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL X */
+{ "Xscr;", 0x1D4B3 }, /* MATHEMATICAL SCRIPT CAPITAL X */
+{ "YAcy;", 0x0042F }, /* CYRILLIC CAPITAL LETTER YA */
+{ "YIcy;", 0x00407 }, /* CYRILLIC CAPITAL LETTER YI */
+{ "YUcy;", 0x0042E }, /* CYRILLIC CAPITAL LETTER YU */
+{ "Yacute;", 0x000DD }, /* LATIN CAPITAL LETTER Y WITH ACUTE */
+{ "Ycirc;", 0x00176 }, /* LATIN CAPITAL LETTER Y WITH CIRCUMFLEX */
+{ "Ycy;", 0x0042B }, /* CYRILLIC CAPITAL LETTER YERU */
+{ "Yfr;", 0x1D51C }, /* MATHEMATICAL FRAKTUR CAPITAL Y */
+{ "Yopf;", 0x1D550 }, /* MATHEMATICAL DOUBLE-STRUCK CAPITAL Y */
+{ "Yscr;", 0x1D4B4 }, /* MATHEMATICAL SCRIPT CAPITAL Y */
+{ "Yuml;", 0x00178 }, /* LATIN CAPITAL LETTER Y WITH DIAERESIS */
+{ "ZHcy;", 0x00416 }, /* CYRILLIC CAPITAL LETTER ZHE */
+{ "Zacute;", 0x00179 }, /* LATIN CAPITAL LETTER Z WITH ACUTE */
+{ "Zcaron;", 0x0017D }, /* LATIN CAPITAL LETTER Z WITH CARON */
+{ "Zcy;", 0x00417 }, /* CYRILLIC CAPITAL LETTER ZE */
+{ "Zdot;", 0x0017B }, /* LATIN CAPITAL LETTER Z WITH DOT ABOVE */
+{ "ZeroWidthSpace;", 0x0200B }, /* ZERO WIDTH SPACE */
+{ "Zeta;", 0x00396 }, /* GREEK CAPITAL LETTER ZETA */
+{ "Zfr;", 0x02128 }, /* BLACK-LETTER CAPITAL Z */
+{ "Zopf;", 0x02124 }, /* DOUBLE-STRUCK CAPITAL Z */
+{ "Zscr;", 0x1D4B5 }, /* MATHEMATICAL SCRIPT CAPITAL Z */
+{ "aacute;", 0x000E1 }, /* LATIN SMALL LETTER A WITH ACUTE */
+{ "abreve;", 0x00103 }, /* LATIN SMALL LETTER A WITH BREVE */
+{ "ac;", 0x0223E }, /* INVERTED LAZY S */
+{ "acd;", 0x0223F }, /* SINE WAVE */
+{ "acirc;", 0x000E2 }, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
+{ "acute;", 0x000B4 }, /* ACUTE ACCENT */
+{ "acy;", 0x00430 }, /* CYRILLIC SMALL LETTER A */
+{ "aelig;", 0x000E6 }, /* LATIN SMALL LETTER AE */
+{ "af;", 0x02061 }, /* FUNCTION APPLICATION */
+{ "afr;", 0x1D51E }, /* MATHEMATICAL FRAKTUR SMALL A */
+{ "agrave;", 0x000E0 }, /* LATIN SMALL LETTER A WITH GRAVE */
+{ "alefsym;", 0x02135 }, /* ALEF SYMBOL */
+{ "aleph;", 0x02135 }, /* ALEF SYMBOL */
+{ "alpha;", 0x003B1 }, /* GREEK SMALL LETTER ALPHA */
+{ "amacr;", 0x00101 }, /* LATIN SMALL LETTER A WITH MACRON */
+{ "amalg;", 0x02A3F }, /* AMALGAMATION OR COPRODUCT */
+{ "amp;", 0x00026 }, /* AMPERSAND */
+{ "and;", 0x02227 }, /* LOGICAL AND */
+{ "andand;", 0x02A55 }, /* TWO INTERSECTING LOGICAL AND */
+{ "andd;", 0x02A5C }, /* LOGICAL AND WITH HORIZONTAL DASH */
+{ "andslope;", 0x02A58 }, /* SLOPING LARGE AND */
+{ "andv;", 0x02A5A }, /* LOGICAL AND WITH MIDDLE STEM */
+{ "ang;", 0x02220 }, /* ANGLE */
+{ "ange;", 0x029A4 }, /* ANGLE WITH UNDERBAR */
+{ "angle;", 0x02220 }, /* ANGLE */
+{ "angmsd;", 0x02221 }, /* MEASURED ANGLE */
+{ "angmsdaa;", 0x029A8 }, /* MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING UP AND RIGHT */
+{ "angmsdab;", 0x029A9 }, /* MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING UP AND LEFT */
+{ "angmsdac;", 0x029AA }, /* MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING DOWN AND RIGHT */
+{ "angmsdad;", 0x029AB }, /* MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING DOWN AND LEFT */
+{ "angmsdae;", 0x029AC }, /* MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING RIGHT AND UP */
+{ "angmsdaf;", 0x029AD }, /* MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING LEFT AND UP */
+{ "angmsdag;", 0x029AE }, /* MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING RIGHT AND DOWN */
+{ "angmsdah;", 0x029AF }, /* MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING LEFT AND DOWN */
+{ "angrt;", 0x0221F }, /* RIGHT ANGLE */
+{ "angrtvb;", 0x022BE }, /* RIGHT ANGLE WITH ARC */
+{ "angrtvbd;", 0x0299D }, /* MEASURED RIGHT ANGLE WITH DOT */
+{ "angsph;", 0x02222 }, /* SPHERICAL ANGLE */
+{ "angst;", 0x0212B }, /* ANGSTROM SIGN */
+{ "angzarr;", 0x0237C }, /* RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW */
+{ "aogon;", 0x00105 }, /* LATIN SMALL LETTER A WITH OGONEK */
+{ "aopf;", 0x1D552 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL A */
+{ "ap;", 0x02248 }, /* ALMOST EQUAL TO */
+{ "apE;", 0x02A70 }, /* APPROXIMATELY EQUAL OR EQUAL TO */
+{ "apacir;", 0x02A6F }, /* ALMOST EQUAL TO WITH CIRCUMFLEX ACCENT */
+{ "ape;", 0x0224A }, /* ALMOST EQUAL OR EQUAL TO */
+{ "apid;", 0x0224B }, /* TRIPLE TILDE */
+{ "apos;", 0x00027 }, /* APOSTROPHE */
+{ "approx;", 0x02248 }, /* ALMOST EQUAL TO */
+{ "approxeq;", 0x0224A }, /* ALMOST EQUAL OR EQUAL TO */
+{ "aring;", 0x000E5 }, /* LATIN SMALL LETTER A WITH RING ABOVE */
+{ "ascr;", 0x1D4B6 }, /* MATHEMATICAL SCRIPT SMALL A */
+{ "ast;", 0x0002A }, /* ASTERISK */
+{ "asymp;", 0x02248 }, /* ALMOST EQUAL TO */
+{ "asympeq;", 0x0224D }, /* EQUIVALENT TO */
+{ "atilde;", 0x000E3 }, /* LATIN SMALL LETTER A WITH TILDE */
+{ "auml;", 0x000E4 }, /* LATIN SMALL LETTER A WITH DIAERESIS */
+{ "awconint;", 0x02233 }, /* ANTICLOCKWISE CONTOUR INTEGRAL */
+{ "awint;", 0x02A11 }, /* ANTICLOCKWISE INTEGRATION */
+{ "bNot;", 0x02AED }, /* REVERSED DOUBLE STROKE NOT SIGN */
+{ "backcong;", 0x0224C }, /* ALL EQUAL TO */
+{ "backepsilon;", 0x003F6 }, /* GREEK REVERSED LUNATE EPSILON SYMBOL */
+{ "backprime;", 0x02035 }, /* REVERSED PRIME */
+{ "backsim;", 0x0223D }, /* REVERSED TILDE */
+{ "backsimeq;", 0x022CD }, /* REVERSED TILDE EQUALS */
+{ "barvee;", 0x022BD }, /* NOR */
+{ "barwed;", 0x02305 }, /* PROJECTIVE */
+{ "barwedge;", 0x02305 }, /* PROJECTIVE */
+{ "bbrk;", 0x023B5 }, /* BOTTOM SQUARE BRACKET */
+{ "bbrktbrk;", 0x023B6 }, /* BOTTOM SQUARE BRACKET OVER TOP SQUARE BRACKET */
+{ "bcong;", 0x0224C }, /* ALL EQUAL TO */
+{ "bcy;", 0x00431 }, /* CYRILLIC SMALL LETTER BE */
+{ "bdquo;", 0x0201E }, /* DOUBLE LOW-9 QUOTATION MARK */
+{ "becaus;", 0x02235 }, /* BECAUSE */
+{ "because;", 0x02235 }, /* BECAUSE */
+{ "bemptyv;", 0x029B0 }, /* REVERSED EMPTY SET */
+{ "bepsi;", 0x003F6 }, /* GREEK REVERSED LUNATE EPSILON SYMBOL */
+{ "bernou;", 0x0212C }, /* SCRIPT CAPITAL B */
+{ "beta;", 0x003B2 }, /* GREEK SMALL LETTER BETA */
+{ "beth;", 0x02136 }, /* BET SYMBOL */
+{ "between;", 0x0226C }, /* BETWEEN */
+{ "bfr;", 0x1D51F }, /* MATHEMATICAL FRAKTUR SMALL B */
+{ "bigcap;", 0x022C2 }, /* N-ARY INTERSECTION */
+{ "bigcirc;", 0x025EF }, /* LARGE CIRCLE */
+{ "bigcup;", 0x022C3 }, /* N-ARY UNION */
+{ "bigodot;", 0x02A00 }, /* N-ARY CIRCLED DOT OPERATOR */
+{ "bigoplus;", 0x02A01 }, /* N-ARY CIRCLED PLUS OPERATOR */
+{ "bigotimes;", 0x02A02 }, /* N-ARY CIRCLED TIMES OPERATOR */
+{ "bigsqcup;", 0x02A06 }, /* N-ARY SQUARE UNION OPERATOR */
+{ "bigstar;", 0x02605 }, /* BLACK STAR */
+{ "bigtriangledown;", 0x025BD }, /* WHITE DOWN-POINTING TRIANGLE */
+{ "bigtriangleup;", 0x025B3 }, /* WHITE UP-POINTING TRIANGLE */
+{ "biguplus;", 0x02A04 }, /* N-ARY UNION OPERATOR WITH PLUS */
+{ "bigvee;", 0x022C1 }, /* N-ARY LOGICAL OR */
+{ "bigwedge;", 0x022C0 }, /* N-ARY LOGICAL AND */
+{ "bkarow;", 0x0290D }, /* RIGHTWARDS DOUBLE DASH ARROW */
+{ "blacklozenge;", 0x029EB }, /* BLACK LOZENGE */
+{ "blacksquare;", 0x025AA }, /* BLACK SMALL SQUARE */
+{ "blacktriangle;", 0x025B4 }, /* BLACK UP-POINTING SMALL TRIANGLE */
+{ "blacktriangledown;", 0x025BE }, /* BLACK DOWN-POINTING SMALL TRIANGLE */
+{ "blacktriangleleft;", 0x025C2 }, /* BLACK LEFT-POINTING SMALL TRIANGLE */
+{ "blacktriangleright;", 0x025B8 }, /* BLACK RIGHT-POINTING SMALL TRIANGLE */
+{ "blank;", 0x02423 }, /* OPEN BOX */
+{ "blk12;", 0x02592 }, /* MEDIUM SHADE */
+{ "blk14;", 0x02591 }, /* LIGHT SHADE */
+{ "blk34;", 0x02593 }, /* DARK SHADE */
+{ "block;", 0x02588 }, /* FULL BLOCK */
+{ "bnot;", 0x02310 }, /* REVERSED NOT SIGN */
+{ "bopf;", 0x1D553 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL B */
+{ "bot;", 0x022A5 }, /* UP TACK */
+{ "bottom;", 0x022A5 }, /* UP TACK */
+{ "bowtie;", 0x022C8 }, /* BOWTIE */
+{ "boxDL;", 0x02557 }, /* BOX DRAWINGS DOUBLE DOWN AND LEFT */
+{ "boxDR;", 0x02554 }, /* BOX DRAWINGS DOUBLE DOWN AND RIGHT */
+{ "boxDl;", 0x02556 }, /* BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE */
+{ "boxDr;", 0x02553 }, /* BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE */
+{ "boxH;", 0x02550 }, /* BOX DRAWINGS DOUBLE HORIZONTAL */
+{ "boxHD;", 0x02566 }, /* BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL */
+{ "boxHU;", 0x02569 }, /* BOX DRAWINGS DOUBLE UP AND HORIZONTAL */
+{ "boxHd;", 0x02564 }, /* BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE */
+{ "boxHu;", 0x02567 }, /* BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE */
+{ "boxUL;", 0x0255D }, /* BOX DRAWINGS DOUBLE UP AND LEFT */
+{ "boxUR;", 0x0255A }, /* BOX DRAWINGS DOUBLE UP AND RIGHT */
+{ "boxUl;", 0x0255C }, /* BOX DRAWINGS UP DOUBLE AND LEFT SINGLE */
+{ "boxUr;", 0x02559 }, /* BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE */
+{ "boxV;", 0x02551 }, /* BOX DRAWINGS DOUBLE VERTICAL */
+{ "boxVH;", 0x0256C }, /* BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL */
+{ "boxVL;", 0x02563 }, /* BOX DRAWINGS DOUBLE VERTICAL AND LEFT */
+{ "boxVR;", 0x02560 }, /* BOX DRAWINGS DOUBLE VERTICAL AND RIGHT */
+{ "boxVh;", 0x0256B }, /* BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE */
+{ "boxVl;", 0x02562 }, /* BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE */
+{ "boxVr;", 0x0255F }, /* BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE */
+{ "boxbox;", 0x029C9 }, /* TWO JOINED SQUARES */
+{ "boxdL;", 0x02555 }, /* BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE */
+{ "boxdR;", 0x02552 }, /* BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE */
+{ "boxdl;", 0x02510 }, /* BOX DRAWINGS LIGHT DOWN AND LEFT */
+{ "boxdr;", 0x0250C }, /* BOX DRAWINGS LIGHT DOWN AND RIGHT */
+{ "boxh;", 0x02500 }, /* BOX DRAWINGS LIGHT HORIZONTAL */
+{ "boxhD;", 0x02565 }, /* BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE */
+{ "boxhU;", 0x02568 }, /* BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE */
+{ "boxhd;", 0x0252C }, /* BOX DRAWINGS LIGHT DOWN AND HORIZONTAL */
+{ "boxhu;", 0x02534 }, /* BOX DRAWINGS LIGHT UP AND HORIZONTAL */
+{ "boxminus;", 0x0229F }, /* SQUARED MINUS */
+{ "boxplus;", 0x0229E }, /* SQUARED PLUS */
+{ "boxtimes;", 0x022A0 }, /* SQUARED TIMES */
+{ "boxuL;", 0x0255B }, /* BOX DRAWINGS UP SINGLE AND LEFT DOUBLE */
+{ "boxuR;", 0x02558 }, /* BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE */
+{ "boxul;", 0x02518 }, /* BOX DRAWINGS LIGHT UP AND LEFT */
+{ "boxur;", 0x02514 }, /* BOX DRAWINGS LIGHT UP AND RIGHT */
+{ "boxv;", 0x02502 }, /* BOX DRAWINGS LIGHT VERTICAL */
+{ "boxvH;", 0x0256A }, /* BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE */
+{ "boxvL;", 0x02561 }, /* BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE */
+{ "boxvR;", 0x0255E }, /* BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE */
+{ "boxvh;", 0x0253C }, /* BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL */
+{ "boxvl;", 0x02524 }, /* BOX DRAWINGS LIGHT VERTICAL AND LEFT */
+{ "boxvr;", 0x0251C }, /* BOX DRAWINGS LIGHT VERTICAL AND RIGHT */
+{ "bprime;", 0x02035 }, /* REVERSED PRIME */
+{ "breve;", 0x002D8 }, /* BREVE */
+{ "brvbar;", 0x000A6 }, /* BROKEN BAR */
+{ "bscr;", 0x1D4B7 }, /* MATHEMATICAL SCRIPT SMALL B */
+{ "bsemi;", 0x0204F }, /* REVERSED SEMICOLON */
+{ "bsim;", 0x0223D }, /* REVERSED TILDE */
+{ "bsime;", 0x022CD }, /* REVERSED TILDE EQUALS */
+{ "bsol;", 0x0005C }, /* REVERSE SOLIDUS */
+{ "bsolb;", 0x029C5 }, /* SQUARED FALLING DIAGONAL SLASH */
+{ "bull;", 0x02022 }, /* BULLET */
+{ "bullet;", 0x02022 }, /* BULLET */
+{ "bump;", 0x0224E }, /* GEOMETRICALLY EQUIVALENT TO */
+{ "bumpE;", 0x02AAE }, /* EQUALS SIGN WITH BUMPY ABOVE */
+{ "bumpe;", 0x0224F }, /* DIFFERENCE BETWEEN */
+{ "bumpeq;", 0x0224F }, /* DIFFERENCE BETWEEN */
+{ "cacute;", 0x00107 }, /* LATIN SMALL LETTER C WITH ACUTE */
+{ "cap;", 0x02229 }, /* INTERSECTION */
+{ "capand;", 0x02A44 }, /* INTERSECTION WITH LOGICAL AND */
+{ "capbrcup;", 0x02A49 }, /* INTERSECTION ABOVE BAR ABOVE UNION */
+{ "capcap;", 0x02A4B }, /* INTERSECTION BESIDE AND JOINED WITH INTERSECTION */
+{ "capcup;", 0x02A47 }, /* INTERSECTION ABOVE UNION */
+{ "capdot;", 0x02A40 }, /* INTERSECTION WITH DOT */
+{ "caret;", 0x02041 }, /* CARET INSERTION POINT */
+{ "caron;", 0x002C7 }, /* CARON */
+{ "ccaps;", 0x02A4D }, /* CLOSED INTERSECTION WITH SERIFS */
+{ "ccaron;", 0x0010D }, /* LATIN SMALL LETTER C WITH CARON */
+{ "ccedil;", 0x000E7 }, /* LATIN SMALL LETTER C WITH CEDILLA */
+{ "ccirc;", 0x00109 }, /* LATIN SMALL LETTER C WITH CIRCUMFLEX */
+{ "ccups;", 0x02A4C }, /* CLOSED UNION WITH SERIFS */
+{ "ccupssm;", 0x02A50 }, /* CLOSED UNION WITH SERIFS AND SMASH PRODUCT */
+{ "cdot;", 0x0010B }, /* LATIN SMALL LETTER C WITH DOT ABOVE */
+{ "cedil;", 0x000B8 }, /* CEDILLA */
+{ "cemptyv;", 0x029B2 }, /* EMPTY SET WITH SMALL CIRCLE ABOVE */
+{ "cent;", 0x000A2 }, /* CENT SIGN */
+{ "centerdot;", 0x000B7 }, /* MIDDLE DOT */
+{ "cfr;", 0x1D520 }, /* MATHEMATICAL FRAKTUR SMALL C */
+{ "chcy;", 0x00447 }, /* CYRILLIC SMALL LETTER CHE */
+{ "check;", 0x02713 }, /* CHECK MARK */
+{ "checkmark;", 0x02713 }, /* CHECK MARK */
+{ "chi;", 0x003C7 }, /* GREEK SMALL LETTER CHI */
+{ "cir;", 0x025CB }, /* WHITE CIRCLE */
+{ "cirE;", 0x029C3 }, /* CIRCLE WITH TWO HORIZONTAL STROKES TO THE RIGHT */
+{ "circ;", 0x002C6 }, /* MODIFIER LETTER CIRCUMFLEX ACCENT */
+{ "circeq;", 0x02257 }, /* RING EQUAL TO */
+{ "circlearrowleft;", 0x021BA }, /* ANTICLOCKWISE OPEN CIRCLE ARROW */
+{ "circlearrowright;", 0x021BB }, /* CLOCKWISE OPEN CIRCLE ARROW */
+{ "circledR;", 0x000AE }, /* REGISTERED SIGN */
+{ "circledS;", 0x024C8 }, /* CIRCLED LATIN CAPITAL LETTER S */
+{ "circledast;", 0x0229B }, /* CIRCLED ASTERISK OPERATOR */
+{ "circledcirc;", 0x0229A }, /* CIRCLED RING OPERATOR */
+{ "circleddash;", 0x0229D }, /* CIRCLED DASH */
+{ "cire;", 0x02257 }, /* RING EQUAL TO */
+{ "cirfnint;", 0x02A10 }, /* CIRCULATION FUNCTION */
+{ "cirmid;", 0x02AEF }, /* VERTICAL LINE WITH CIRCLE ABOVE */
+{ "cirscir;", 0x029C2 }, /* CIRCLE WITH SMALL CIRCLE TO THE RIGHT */
+{ "clubs;", 0x02663 }, /* BLACK CLUB SUIT */
+{ "clubsuit;", 0x02663 }, /* BLACK CLUB SUIT */
+{ "colon;", 0x0003A }, /* COLON */
+{ "colone;", 0x02254 }, /* COLON EQUALS */
+{ "coloneq;", 0x02254 }, /* COLON EQUALS */
+{ "comma;", 0x0002C }, /* COMMA */
+{ "commat;", 0x00040 }, /* COMMERCIAL AT */
+{ "comp;", 0x02201 }, /* COMPLEMENT */
+{ "compfn;", 0x02218 }, /* RING OPERATOR */
+{ "complement;", 0x02201 }, /* COMPLEMENT */
+{ "complexes;", 0x02102 }, /* DOUBLE-STRUCK CAPITAL C */
+{ "cong;", 0x02245 }, /* APPROXIMATELY EQUAL TO */
+{ "congdot;", 0x02A6D }, /* CONGRUENT WITH DOT ABOVE */
+{ "conint;", 0x0222E }, /* CONTOUR INTEGRAL */
+{ "copf;", 0x1D554 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL C */
+{ "coprod;", 0x02210 }, /* N-ARY COPRODUCT */
+{ "copy;", 0x000A9 }, /* COPYRIGHT SIGN */
+{ "copysr;", 0x02117 }, /* SOUND RECORDING COPYRIGHT */
+{ "crarr;", 0x021B5 }, /* DOWNWARDS ARROW WITH CORNER LEFTWARDS */
+{ "cross;", 0x02717 }, /* BALLOT X */
+{ "cscr;", 0x1D4B8 }, /* MATHEMATICAL SCRIPT SMALL C */
+{ "csub;", 0x02ACF }, /* CLOSED SUBSET */
+{ "csube;", 0x02AD1 }, /* CLOSED SUBSET OR EQUAL TO */
+{ "csup;", 0x02AD0 }, /* CLOSED SUPERSET */
+{ "csupe;", 0x02AD2 }, /* CLOSED SUPERSET OR EQUAL TO */
+{ "ctdot;", 0x022EF }, /* MIDLINE HORIZONTAL ELLIPSIS */
+{ "cudarrl;", 0x02938 }, /* RIGHT-SIDE ARC CLOCKWISE ARROW */
+{ "cudarrr;", 0x02935 }, /* ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS */
+{ "cuepr;", 0x022DE }, /* EQUAL TO OR PRECEDES */
+{ "cuesc;", 0x022DF }, /* EQUAL TO OR SUCCEEDS */
+{ "cularr;", 0x021B6 }, /* ANTICLOCKWISE TOP SEMICIRCLE ARROW */
+{ "cularrp;", 0x0293D }, /* TOP ARC ANTICLOCKWISE ARROW WITH PLUS */
+{ "cup;", 0x0222A }, /* UNION */
+{ "cupbrcap;", 0x02A48 }, /* UNION ABOVE BAR ABOVE INTERSECTION */
+{ "cupcap;", 0x02A46 }, /* UNION ABOVE INTERSECTION */
+{ "cupcup;", 0x02A4A }, /* UNION BESIDE AND JOINED WITH UNION */
+{ "cupdot;", 0x0228D }, /* MULTISET MULTIPLICATION */
+{ "cupor;", 0x02A45 }, /* UNION WITH LOGICAL OR */
+{ "curarr;", 0x021B7 }, /* CLOCKWISE TOP SEMICIRCLE ARROW */
+{ "curarrm;", 0x0293C }, /* TOP ARC CLOCKWISE ARROW WITH MINUS */
+{ "curlyeqprec;", 0x022DE }, /* EQUAL TO OR PRECEDES */
+{ "curlyeqsucc;", 0x022DF }, /* EQUAL TO OR SUCCEEDS */
+{ "curlyvee;", 0x022CE }, /* CURLY LOGICAL OR */
+{ "curlywedge;", 0x022CF }, /* CURLY LOGICAL AND */
+{ "curren;", 0x000A4 }, /* CURRENCY SIGN */
+{ "curvearrowleft;", 0x021B6 }, /* ANTICLOCKWISE TOP SEMICIRCLE ARROW */
+{ "curvearrowright;", 0x021B7 }, /* CLOCKWISE TOP SEMICIRCLE ARROW */
+{ "cuvee;", 0x022CE }, /* CURLY LOGICAL OR */
+{ "cuwed;", 0x022CF }, /* CURLY LOGICAL AND */
+{ "cwconint;", 0x02232 }, /* CLOCKWISE CONTOUR INTEGRAL */
+{ "cwint;", 0x02231 }, /* CLOCKWISE INTEGRAL */
+{ "cylcty;", 0x0232D }, /* CYLINDRICITY */
+{ "dArr;", 0x021D3 }, /* DOWNWARDS DOUBLE ARROW */
+{ "dHar;", 0x02965 }, /* DOWNWARDS HARPOON WITH BARB LEFT BESIDE DOWNWARDS HARPOON WITH BARB RIGHT */
+{ "dagger;", 0x02020 }, /* DAGGER */
+{ "daleth;", 0x02138 }, /* DALET SYMBOL */
+{ "darr;", 0x02193 }, /* DOWNWARDS ARROW */
+{ "dash;", 0x02010 }, /* HYPHEN */
+{ "dashv;", 0x022A3 }, /* LEFT TACK */
+{ "dbkarow;", 0x0290F }, /* RIGHTWARDS TRIPLE DASH ARROW */
+{ "dblac;", 0x002DD }, /* DOUBLE ACUTE ACCENT */
+{ "dcaron;", 0x0010F }, /* LATIN SMALL LETTER D WITH CARON */
+{ "dcy;", 0x00434 }, /* CYRILLIC SMALL LETTER DE */
+{ "dd;", 0x02146 }, /* DOUBLE-STRUCK ITALIC SMALL D */
+{ "ddagger;", 0x02021 }, /* DOUBLE DAGGER */
+{ "ddarr;", 0x021CA }, /* DOWNWARDS PAIRED ARROWS */
+{ "ddotseq;", 0x02A77 }, /* EQUALS SIGN WITH TWO DOTS ABOVE AND TWO DOTS BELOW */
+{ "deg;", 0x000B0 }, /* DEGREE SIGN */
+{ "delta;", 0x003B4 }, /* GREEK SMALL LETTER DELTA */
+{ "demptyv;", 0x029B1 }, /* EMPTY SET WITH OVERBAR */
+{ "dfisht;", 0x0297F }, /* DOWN FISH TAIL */
+{ "dfr;", 0x1D521 }, /* MATHEMATICAL FRAKTUR SMALL D */
+{ "dharl;", 0x021C3 }, /* DOWNWARDS HARPOON WITH BARB LEFTWARDS */
+{ "dharr;", 0x021C2 }, /* DOWNWARDS HARPOON WITH BARB RIGHTWARDS */
+{ "diam;", 0x022C4 }, /* DIAMOND OPERATOR */
+{ "diamond;", 0x022C4 }, /* DIAMOND OPERATOR */
+{ "diamondsuit;", 0x02666 }, /* BLACK DIAMOND SUIT */
+{ "diams;", 0x02666 }, /* BLACK DIAMOND SUIT */
+{ "die;", 0x000A8 }, /* DIAERESIS */
+{ "digamma;", 0x003DD }, /* GREEK SMALL LETTER DIGAMMA */
+{ "disin;", 0x022F2 }, /* ELEMENT OF WITH LONG HORIZONTAL STROKE */
+{ "div;", 0x000F7 }, /* DIVISION SIGN */
+{ "divide;", 0x000F7 }, /* DIVISION SIGN */
+{ "divideontimes;", 0x022C7 }, /* DIVISION TIMES */
+{ "divonx;", 0x022C7 }, /* DIVISION TIMES */
+{ "djcy;", 0x00452 }, /* CYRILLIC SMALL LETTER DJE */
+{ "dlcorn;", 0x0231E }, /* BOTTOM LEFT CORNER */
+{ "dlcrop;", 0x0230D }, /* BOTTOM LEFT CROP */
+{ "dollar;", 0x00024 }, /* DOLLAR SIGN */
+{ "dopf;", 0x1D555 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL D */
+{ "dot;", 0x002D9 }, /* DOT ABOVE */
+{ "doteq;", 0x02250 }, /* APPROACHES THE LIMIT */
+{ "doteqdot;", 0x02251 }, /* GEOMETRICALLY EQUAL TO */
+{ "dotminus;", 0x02238 }, /* DOT MINUS */
+{ "dotplus;", 0x02214 }, /* DOT PLUS */
+{ "dotsquare;", 0x022A1 }, /* SQUARED DOT OPERATOR */
+{ "doublebarwedge;", 0x02306 }, /* PERSPECTIVE */
+{ "downarrow;", 0x02193 }, /* DOWNWARDS ARROW */
+{ "downdownarrows;", 0x021CA }, /* DOWNWARDS PAIRED ARROWS */
+{ "downharpoonleft;", 0x021C3 }, /* DOWNWARDS HARPOON WITH BARB LEFTWARDS */
+{ "downharpoonright;", 0x021C2 }, /* DOWNWARDS HARPOON WITH BARB RIGHTWARDS */
+{ "drbkarow;", 0x02910 }, /* RIGHTWARDS TWO-HEADED TRIPLE DASH ARROW */
+{ "drcorn;", 0x0231F }, /* BOTTOM RIGHT CORNER */
+{ "drcrop;", 0x0230C }, /* BOTTOM RIGHT CROP */
+{ "dscr;", 0x1D4B9 }, /* MATHEMATICAL SCRIPT SMALL D */
+{ "dscy;", 0x00455 }, /* CYRILLIC SMALL LETTER DZE */
+{ "dsol;", 0x029F6 }, /* SOLIDUS WITH OVERBAR */
+{ "dstrok;", 0x00111 }, /* LATIN SMALL LETTER D WITH STROKE */
+{ "dtdot;", 0x022F1 }, /* DOWN RIGHT DIAGONAL ELLIPSIS */
+{ "dtri;", 0x025BF }, /* WHITE DOWN-POINTING SMALL TRIANGLE */
+{ "dtrif;", 0x025BE }, /* BLACK DOWN-POINTING SMALL TRIANGLE */
+{ "duarr;", 0x021F5 }, /* DOWNWARDS ARROW LEFTWARDS OF UPWARDS ARROW */
+{ "duhar;", 0x0296F }, /* DOWNWARDS HARPOON WITH BARB LEFT BESIDE UPWARDS HARPOON WITH BARB RIGHT */
+{ "dwangle;", 0x029A6 }, /* OBLIQUE ANGLE OPENING UP */
+{ "dzcy;", 0x0045F }, /* CYRILLIC SMALL LETTER DZHE */
+{ "dzigrarr;", 0x027FF }, /* LONG RIGHTWARDS SQUIGGLE ARROW */
+{ "eDDot;", 0x02A77 }, /* EQUALS SIGN WITH TWO DOTS ABOVE AND TWO DOTS BELOW */
+{ "eDot;", 0x02251 }, /* GEOMETRICALLY EQUAL TO */
+{ "eacute;", 0x000E9 }, /* LATIN SMALL LETTER E WITH ACUTE */
+{ "easter;", 0x02A6E }, /* EQUALS WITH ASTERISK */
+{ "ecaron;", 0x0011B }, /* LATIN SMALL LETTER E WITH CARON */
+{ "ecir;", 0x02256 }, /* RING IN EQUAL TO */
+{ "ecirc;", 0x000EA }, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
+{ "ecolon;", 0x02255 }, /* EQUALS COLON */
+{ "ecy;", 0x0044D }, /* CYRILLIC SMALL LETTER E */
+{ "edot;", 0x00117 }, /* LATIN SMALL LETTER E WITH DOT ABOVE */
+{ "ee;", 0x02147 }, /* DOUBLE-STRUCK ITALIC SMALL E */
+{ "efDot;", 0x02252 }, /* APPROXIMATELY EQUAL TO OR THE IMAGE OF */
+{ "efr;", 0x1D522 }, /* MATHEMATICAL FRAKTUR SMALL E */
+{ "eg;", 0x02A9A }, /* DOUBLE-LINE EQUAL TO OR GREATER-THAN */
+{ "egrave;", 0x000E8 }, /* LATIN SMALL LETTER E WITH GRAVE */
+{ "egs;", 0x02A96 }, /* SLANTED EQUAL TO OR GREATER-THAN */
+{ "egsdot;", 0x02A98 }, /* SLANTED EQUAL TO OR GREATER-THAN WITH DOT INSIDE */
+{ "el;", 0x02A99 }, /* DOUBLE-LINE EQUAL TO OR LESS-THAN */
+{ "elinters;", 0x023E7 }, /* ELECTRICAL INTERSECTION */
+{ "ell;", 0x02113 }, /* SCRIPT SMALL L */
+{ "els;", 0x02A95 }, /* SLANTED EQUAL TO OR LESS-THAN */
+{ "elsdot;", 0x02A97 }, /* SLANTED EQUAL TO OR LESS-THAN WITH DOT INSIDE */
+{ "emacr;", 0x00113 }, /* LATIN SMALL LETTER E WITH MACRON */
+{ "empty;", 0x02205 }, /* EMPTY SET */
+{ "emptyset;", 0x02205 }, /* EMPTY SET */
+{ "emptyv;", 0x02205 }, /* EMPTY SET */
+{ "emsp13;", 0x02004 }, /* THREE-PER-EM SPACE */
+{ "emsp14;", 0x02005 }, /* FOUR-PER-EM SPACE */
+{ "emsp;", 0x02003 }, /* EM SPACE */
+{ "eng;", 0x0014B }, /* LATIN SMALL LETTER ENG */
+{ "ensp;", 0x02002 }, /* EN SPACE */
+{ "eogon;", 0x00119 }, /* LATIN SMALL LETTER E WITH OGONEK */
+{ "eopf;", 0x1D556 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL E */
+{ "epar;", 0x022D5 }, /* EQUAL AND PARALLEL TO */
+{ "eparsl;", 0x029E3 }, /* EQUALS SIGN AND SLANTED PARALLEL */
+{ "eplus;", 0x02A71 }, /* EQUALS SIGN ABOVE PLUS SIGN */
+{ "epsi;", 0x003F5 }, /* GREEK LUNATE EPSILON SYMBOL */
+{ "epsilon;", 0x003B5 }, /* GREEK SMALL LETTER EPSILON */
+{ "epsiv;", 0x003B5 }, /* GREEK SMALL LETTER EPSILON */
+{ "eqcirc;", 0x02256 }, /* RING IN EQUAL TO */
+{ "eqcolon;", 0x02255 }, /* EQUALS COLON */
+{ "eqsim;", 0x02242 }, /* MINUS TILDE */
+{ "eqslantgtr;", 0x02A96 }, /* SLANTED EQUAL TO OR GREATER-THAN */
+{ "eqslantless;", 0x02A95 }, /* SLANTED EQUAL TO OR LESS-THAN */
+{ "equals;", 0x0003D }, /* EQUALS SIGN */
+{ "equest;", 0x0225F }, /* QUESTIONED EQUAL TO */
+{ "equiv;", 0x02261 }, /* IDENTICAL TO */
+{ "equivDD;", 0x02A78 }, /* EQUIVALENT WITH FOUR DOTS ABOVE */
+{ "eqvparsl;", 0x029E5 }, /* IDENTICAL TO AND SLANTED PARALLEL */
+{ "erDot;", 0x02253 }, /* IMAGE OF OR APPROXIMATELY EQUAL TO */
+{ "erarr;", 0x02971 }, /* EQUALS SIGN ABOVE RIGHTWARDS ARROW */
+{ "escr;", 0x0212F }, /* SCRIPT SMALL E */
+{ "esdot;", 0x02250 }, /* APPROACHES THE LIMIT */
+{ "esim;", 0x02242 }, /* MINUS TILDE */
+{ "eta;", 0x003B7 }, /* GREEK SMALL LETTER ETA */
+{ "eth;", 0x000F0 }, /* LATIN SMALL LETTER ETH */
+{ "euml;", 0x000EB }, /* LATIN SMALL LETTER E WITH DIAERESIS */
+{ "euro;", 0x020AC }, /* EURO SIGN */
+{ "excl;", 0x00021 }, /* EXCLAMATION MARK */
+{ "exist;", 0x02203 }, /* THERE EXISTS */
+{ "expectation;", 0x02130 }, /* SCRIPT CAPITAL E */
+{ "exponentiale;", 0x02147 }, /* DOUBLE-STRUCK ITALIC SMALL E */
+{ "fallingdotseq;", 0x02252 }, /* APPROXIMATELY EQUAL TO OR THE IMAGE OF */
+{ "fcy;", 0x00444 }, /* CYRILLIC SMALL LETTER EF */
+{ "female;", 0x02640 }, /* FEMALE SIGN */
+{ "ffilig;", 0x0FB03 }, /* LATIN SMALL LIGATURE FFI */
+{ "fflig;", 0x0FB00 }, /* LATIN SMALL LIGATURE FF */
+{ "ffllig;", 0x0FB04 }, /* LATIN SMALL LIGATURE FFL */
+{ "ffr;", 0x1D523 }, /* MATHEMATICAL FRAKTUR SMALL F */
+{ "filig;", 0x0FB01 }, /* LATIN SMALL LIGATURE FI */
+{ "flat;", 0x0266D }, /* MUSIC FLAT SIGN */
+{ "fllig;", 0x0FB02 }, /* LATIN SMALL LIGATURE FL */
+{ "fltns;", 0x025B1 }, /* WHITE PARALLELOGRAM */
+{ "fnof;", 0x00192 }, /* LATIN SMALL LETTER F WITH HOOK */
+{ "fopf;", 0x1D557 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL F */
+{ "forall;", 0x02200 }, /* FOR ALL */
+{ "fork;", 0x022D4 }, /* PITCHFORK */
+{ "forkv;", 0x02AD9 }, /* ELEMENT OF OPENING DOWNWARDS */
+{ "fpartint;", 0x02A0D }, /* FINITE PART INTEGRAL */
+{ "frac12;", 0x000BD }, /* VULGAR FRACTION ONE HALF */
+{ "frac13;", 0x02153 }, /* VULGAR FRACTION ONE THIRD */
+{ "frac14;", 0x000BC }, /* VULGAR FRACTION ONE QUARTER */
+{ "frac15;", 0x02155 }, /* VULGAR FRACTION ONE FIFTH */
+{ "frac16;", 0x02159 }, /* VULGAR FRACTION ONE SIXTH */
+{ "frac18;", 0x0215B }, /* VULGAR FRACTION ONE EIGHTH */
+{ "frac23;", 0x02154 }, /* VULGAR FRACTION TWO THIRDS */
+{ "frac25;", 0x02156 }, /* VULGAR FRACTION TWO FIFTHS */
+{ "frac34;", 0x000BE }, /* VULGAR FRACTION THREE QUARTERS */
+{ "frac35;", 0x02157 }, /* VULGAR FRACTION THREE FIFTHS */
+{ "frac38;", 0x0215C }, /* VULGAR FRACTION THREE EIGHTHS */
+{ "frac45;", 0x02158 }, /* VULGAR FRACTION FOUR FIFTHS */
+{ "frac56;", 0x0215A }, /* VULGAR FRACTION FIVE SIXTHS */
+{ "frac58;", 0x0215D }, /* VULGAR FRACTION FIVE EIGHTHS */
+{ "frac78;", 0x0215E }, /* VULGAR FRACTION SEVEN EIGHTHS */
+{ "frasl;", 0x02044 }, /* FRACTION SLASH */
+{ "frown;", 0x02322 }, /* FROWN */
+{ "fscr;", 0x1D4BB }, /* MATHEMATICAL SCRIPT SMALL F */
+{ "gE;", 0x02267 }, /* GREATER-THAN OVER EQUAL TO */
+{ "gEl;", 0x02A8C }, /* GREATER-THAN ABOVE DOUBLE-LINE EQUAL ABOVE LESS-THAN */
+{ "gacute;", 0x001F5 }, /* LATIN SMALL LETTER G WITH ACUTE */
+{ "gamma;", 0x003B3 }, /* GREEK SMALL LETTER GAMMA */
+{ "gammad;", 0x003DD }, /* GREEK SMALL LETTER DIGAMMA */
+{ "gap;", 0x02A86 }, /* GREATER-THAN OR APPROXIMATE */
+{ "gbreve;", 0x0011F }, /* LATIN SMALL LETTER G WITH BREVE */
+{ "gcirc;", 0x0011D }, /* LATIN SMALL LETTER G WITH CIRCUMFLEX */
+{ "gcy;", 0x00433 }, /* CYRILLIC SMALL LETTER GHE */
+{ "gdot;", 0x00121 }, /* LATIN SMALL LETTER G WITH DOT ABOVE */
+{ "ge;", 0x02265 }, /* GREATER-THAN OR EQUAL TO */
+{ "gel;", 0x022DB }, /* GREATER-THAN EQUAL TO OR LESS-THAN */
+{ "geq;", 0x02265 }, /* GREATER-THAN OR EQUAL TO */
+{ "geqq;", 0x02267 }, /* GREATER-THAN OVER EQUAL TO */
+{ "geqslant;", 0x02A7E }, /* GREATER-THAN OR SLANTED EQUAL TO */
+{ "ges;", 0x02A7E }, /* GREATER-THAN OR SLANTED EQUAL TO */
+{ "gescc;", 0x02AA9 }, /* GREATER-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL */
+{ "gesdot;", 0x02A80 }, /* GREATER-THAN OR SLANTED EQUAL TO WITH DOT INSIDE */
+{ "gesdoto;", 0x02A82 }, /* GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE */
+{ "gesdotol;", 0x02A84 }, /* GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE LEFT */
+{ "gesles;", 0x02A94 }, /* GREATER-THAN ABOVE SLANTED EQUAL ABOVE LESS-THAN ABOVE SLANTED EQUAL */
+{ "gfr;", 0x1D524 }, /* MATHEMATICAL FRAKTUR SMALL G */
+{ "gg;", 0x0226B }, /* MUCH GREATER-THAN */
+{ "ggg;", 0x022D9 }, /* VERY MUCH GREATER-THAN */
+{ "gimel;", 0x02137 }, /* GIMEL SYMBOL */
+{ "gjcy;", 0x00453 }, /* CYRILLIC SMALL LETTER GJE */
+{ "gl;", 0x02277 }, /* GREATER-THAN OR LESS-THAN */
+{ "glE;", 0x02A92 }, /* GREATER-THAN ABOVE LESS-THAN ABOVE DOUBLE-LINE EQUAL */
+{ "gla;", 0x02AA5 }, /* GREATER-THAN BESIDE LESS-THAN */
+{ "glj;", 0x02AA4 }, /* GREATER-THAN OVERLAPPING LESS-THAN */
+{ "gnE;", 0x02269 }, /* GREATER-THAN BUT NOT EQUAL TO */
+{ "gnap;", 0x02A8A }, /* GREATER-THAN AND NOT APPROXIMATE */
+{ "gnapprox;", 0x02A8A }, /* GREATER-THAN AND NOT APPROXIMATE */
+{ "gne;", 0x02A88 }, /* GREATER-THAN AND SINGLE-LINE NOT EQUAL TO */
+{ "gneq;", 0x02A88 }, /* GREATER-THAN AND SINGLE-LINE NOT EQUAL TO */
+{ "gneqq;", 0x02269 }, /* GREATER-THAN BUT NOT EQUAL TO */
+{ "gnsim;", 0x022E7 }, /* GREATER-THAN BUT NOT EQUIVALENT TO */
+{ "gopf;", 0x1D558 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL G */
+{ "grave;", 0x00060 }, /* GRAVE ACCENT */
+{ "gscr;", 0x0210A }, /* SCRIPT SMALL G */
+{ "gsim;", 0x02273 }, /* GREATER-THAN OR EQUIVALENT TO */
+{ "gsime;", 0x02A8E }, /* GREATER-THAN ABOVE SIMILAR OR EQUAL */
+{ "gsiml;", 0x02A90 }, /* GREATER-THAN ABOVE SIMILAR ABOVE LESS-THAN */
+{ "gt;", 0x0003E }, /* GREATER-THAN SIGN */
+{ "gtcc;", 0x02AA7 }, /* GREATER-THAN CLOSED BY CURVE */
+{ "gtcir;", 0x02A7A }, /* GREATER-THAN WITH CIRCLE INSIDE */
+{ "gtdot;", 0x022D7 }, /* GREATER-THAN WITH DOT */
+{ "gtlPar;", 0x02995 }, /* DOUBLE LEFT ARC GREATER-THAN BRACKET */
+{ "gtquest;", 0x02A7C }, /* GREATER-THAN WITH QUESTION MARK ABOVE */
+{ "gtrapprox;", 0x02A86 }, /* GREATER-THAN OR APPROXIMATE */
+{ "gtrarr;", 0x02978 }, /* GREATER-THAN ABOVE RIGHTWARDS ARROW */
+{ "gtrdot;", 0x022D7 }, /* GREATER-THAN WITH DOT */
+{ "gtreqless;", 0x022DB }, /* GREATER-THAN EQUAL TO OR LESS-THAN */
+{ "gtreqqless;", 0x02A8C }, /* GREATER-THAN ABOVE DOUBLE-LINE EQUAL ABOVE LESS-THAN */
+{ "gtrless;", 0x02277 }, /* GREATER-THAN OR LESS-THAN */
+{ "gtrsim;", 0x02273 }, /* GREATER-THAN OR EQUIVALENT TO */
+{ "hArr;", 0x021D4 }, /* LEFT RIGHT DOUBLE ARROW */
+{ "hairsp;", 0x0200A }, /* HAIR SPACE */
+{ "half;", 0x000BD }, /* VULGAR FRACTION ONE HALF */
+{ "hamilt;", 0x0210B }, /* SCRIPT CAPITAL H */
+{ "hardcy;", 0x0044A }, /* CYRILLIC SMALL LETTER HARD SIGN */
+{ "harr;", 0x02194 }, /* LEFT RIGHT ARROW */
+{ "harrcir;", 0x02948 }, /* LEFT RIGHT ARROW THROUGH SMALL CIRCLE */
+{ "harrw;", 0x021AD }, /* LEFT RIGHT WAVE ARROW */
+{ "hbar;", 0x0210F }, /* PLANCK CONSTANT OVER TWO PI */
+{ "hcirc;", 0x00125 }, /* LATIN SMALL LETTER H WITH CIRCUMFLEX */
+{ "hearts;", 0x02665 }, /* BLACK HEART SUIT */
+{ "heartsuit;", 0x02665 }, /* BLACK HEART SUIT */
+{ "hellip;", 0x02026 }, /* HORIZONTAL ELLIPSIS */
+{ "hercon;", 0x022B9 }, /* HERMITIAN CONJUGATE MATRIX */
+{ "hfr;", 0x1D525 }, /* MATHEMATICAL FRAKTUR SMALL H */
+{ "hksearow;", 0x02925 }, /* SOUTH EAST ARROW WITH HOOK */
+{ "hkswarow;", 0x02926 }, /* SOUTH WEST ARROW WITH HOOK */
+{ "hoarr;", 0x021FF }, /* LEFT RIGHT OPEN-HEADED ARROW */
+{ "homtht;", 0x0223B }, /* HOMOTHETIC */
+{ "hookleftarrow;", 0x021A9 }, /* LEFTWARDS ARROW WITH HOOK */
+{ "hookrightarrow;", 0x021AA }, /* RIGHTWARDS ARROW WITH HOOK */
+{ "hopf;", 0x1D559 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL H */
+{ "horbar;", 0x02015 }, /* HORIZONTAL BAR */
+{ "hscr;", 0x1D4BD }, /* MATHEMATICAL SCRIPT SMALL H */
+{ "hslash;", 0x0210F }, /* PLANCK CONSTANT OVER TWO PI */
+{ "hstrok;", 0x00127 }, /* LATIN SMALL LETTER H WITH STROKE */
+{ "hybull;", 0x02043 }, /* HYPHEN BULLET */
+{ "hyphen;", 0x02010 }, /* HYPHEN */
+{ "iacute;", 0x000ED }, /* LATIN SMALL LETTER I WITH ACUTE */
+{ "ic;", 0x02063 }, /* INVISIBLE SEPARATOR */
+{ "icirc;", 0x000EE }, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
+{ "icy;", 0x00438 }, /* CYRILLIC SMALL LETTER I */
+{ "iecy;", 0x00435 }, /* CYRILLIC SMALL LETTER IE */
+{ "iexcl;", 0x000A1 }, /* INVERTED EXCLAMATION MARK */
+{ "iff;", 0x021D4 }, /* LEFT RIGHT DOUBLE ARROW */
+{ "ifr;", 0x1D526 }, /* MATHEMATICAL FRAKTUR SMALL I */
+{ "igrave;", 0x000EC }, /* LATIN SMALL LETTER I WITH GRAVE */
+{ "ii;", 0x02148 }, /* DOUBLE-STRUCK ITALIC SMALL I */
+{ "iiiint;", 0x02A0C }, /* QUADRUPLE INTEGRAL OPERATOR */
+{ "iiint;", 0x0222D }, /* TRIPLE INTEGRAL */
+{ "iinfin;", 0x029DC }, /* INCOMPLETE INFINITY */
+{ "iiota;", 0x02129 }, /* TURNED GREEK SMALL LETTER IOTA */
+{ "ijlig;", 0x00133 }, /* LATIN SMALL LIGATURE IJ */
+{ "imacr;", 0x0012B }, /* LATIN SMALL LETTER I WITH MACRON */
+{ "image;", 0x02111 }, /* BLACK-LETTER CAPITAL I */
+{ "imagline;", 0x02110 }, /* SCRIPT CAPITAL I */
+{ "imagpart;", 0x02111 }, /* BLACK-LETTER CAPITAL I */
+{ "imath;", 0x00131 }, /* LATIN SMALL LETTER DOTLESS I */
+{ "imof;", 0x022B7 }, /* IMAGE OF */
+{ "imped;", 0x001B5 }, /* LATIN CAPITAL LETTER Z WITH STROKE */
+{ "in;", 0x02208 }, /* ELEMENT OF */
+{ "incare;", 0x02105 }, /* CARE OF */
+{ "infin;", 0x0221E }, /* INFINITY */
+{ "infintie;", 0x029DD }, /* TIE OVER INFINITY */
+{ "inodot;", 0x00131 }, /* LATIN SMALL LETTER DOTLESS I */
+{ "int;", 0x0222B }, /* INTEGRAL */
+{ "intcal;", 0x022BA }, /* INTERCALATE */
+{ "integers;", 0x02124 }, /* DOUBLE-STRUCK CAPITAL Z */
+{ "intercal;", 0x022BA }, /* INTERCALATE */
+{ "intlarhk;", 0x02A17 }, /* INTEGRAL WITH LEFTWARDS ARROW WITH HOOK */
+{ "intprod;", 0x02A3C }, /* INTERIOR PRODUCT */
+{ "iocy;", 0x00451 }, /* CYRILLIC SMALL LETTER IO */
+{ "iogon;", 0x0012F }, /* LATIN SMALL LETTER I WITH OGONEK */
+{ "iopf;", 0x1D55A }, /* MATHEMATICAL DOUBLE-STRUCK SMALL I */
+{ "iota;", 0x003B9 }, /* GREEK SMALL LETTER IOTA */
+{ "iprod;", 0x02A3C }, /* INTERIOR PRODUCT */
+{ "iquest;", 0x000BF }, /* INVERTED QUESTION MARK */
+{ "iscr;", 0x1D4BE }, /* MATHEMATICAL SCRIPT SMALL I */
+{ "isin;", 0x02208 }, /* ELEMENT OF */
+{ "isinE;", 0x022F9 }, /* ELEMENT OF WITH TWO HORIZONTAL STROKES */
+{ "isindot;", 0x022F5 }, /* ELEMENT OF WITH DOT ABOVE */
+{ "isins;", 0x022F4 }, /* SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE */
+{ "isinsv;", 0x022F3 }, /* ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE */
+{ "isinv;", 0x02208 }, /* ELEMENT OF */
+{ "it;", 0x02062 }, /* INVISIBLE TIMES */
+{ "itilde;", 0x00129 }, /* LATIN SMALL LETTER I WITH TILDE */
+{ "iukcy;", 0x00456 }, /* CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I */
+{ "iuml;", 0x000EF }, /* LATIN SMALL LETTER I WITH DIAERESIS */
+{ "jcirc;", 0x00135 }, /* LATIN SMALL LETTER J WITH CIRCUMFLEX */
+{ "jcy;", 0x00439 }, /* CYRILLIC SMALL LETTER SHORT I */
+{ "jfr;", 0x1D527 }, /* MATHEMATICAL FRAKTUR SMALL J */
+{ "jmath;", 0x00237 }, /* LATIN SMALL LETTER DOTLESS J */
+{ "jopf;", 0x1D55B }, /* MATHEMATICAL DOUBLE-STRUCK SMALL J */
+{ "jscr;", 0x1D4BF }, /* MATHEMATICAL SCRIPT SMALL J */
+{ "jsercy;", 0x00458 }, /* CYRILLIC SMALL LETTER JE */
+{ "jukcy;", 0x00454 }, /* CYRILLIC SMALL LETTER UKRAINIAN IE */
+{ "kappa;", 0x003BA }, /* GREEK SMALL LETTER KAPPA */
+{ "kappav;", 0x003F0 }, /* GREEK KAPPA SYMBOL */
+{ "kcedil;", 0x00137 }, /* LATIN SMALL LETTER K WITH CEDILLA */
+{ "kcy;", 0x0043A }, /* CYRILLIC SMALL LETTER KA */
+{ "kfr;", 0x1D528 }, /* MATHEMATICAL FRAKTUR SMALL K */
+{ "kgreen;", 0x00138 }, /* LATIN SMALL LETTER KRA */
+{ "khcy;", 0x00445 }, /* CYRILLIC SMALL LETTER HA */
+{ "kjcy;", 0x0045C }, /* CYRILLIC SMALL LETTER KJE */
+{ "kopf;", 0x1D55C }, /* MATHEMATICAL DOUBLE-STRUCK SMALL K */
+{ "kscr;", 0x1D4C0 }, /* MATHEMATICAL SCRIPT SMALL K */
+{ "lAarr;", 0x021DA }, /* LEFTWARDS TRIPLE ARROW */
+{ "lArr;", 0x021D0 }, /* LEFTWARDS DOUBLE ARROW */
+{ "lAtail;", 0x0291B }, /* LEFTWARDS DOUBLE ARROW-TAIL */
+{ "lBarr;", 0x0290E }, /* LEFTWARDS TRIPLE DASH ARROW */
+{ "lE;", 0x02266 }, /* LESS-THAN OVER EQUAL TO */
+{ "lEg;", 0x02A8B }, /* LESS-THAN ABOVE DOUBLE-LINE EQUAL ABOVE GREATER-THAN */
+{ "lHar;", 0x02962 }, /* LEFTWARDS HARPOON WITH BARB UP ABOVE LEFTWARDS HARPOON WITH BARB DOWN */
+{ "lacute;", 0x0013A }, /* LATIN SMALL LETTER L WITH ACUTE */
+{ "laemptyv;", 0x029B4 }, /* EMPTY SET WITH LEFT ARROW ABOVE */
+{ "lagran;", 0x02112 }, /* SCRIPT CAPITAL L */
+{ "lambda;", 0x003BB }, /* GREEK SMALL LETTER LAMDA */
+{ "lang;", 0x027E8 }, /* MATHEMATICAL LEFT ANGLE BRACKET */
+{ "langd;", 0x02991 }, /* LEFT ANGLE BRACKET WITH DOT */
+{ "langle;", 0x027E8 }, /* MATHEMATICAL LEFT ANGLE BRACKET */
+{ "lap;", 0x02A85 }, /* LESS-THAN OR APPROXIMATE */
+{ "laquo;", 0x000AB }, /* LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */
+{ "larr;", 0x02190 }, /* LEFTWARDS ARROW */
+{ "larrb;", 0x021E4 }, /* LEFTWARDS ARROW TO BAR */
+{ "larrbfs;", 0x0291F }, /* LEFTWARDS ARROW FROM BAR TO BLACK DIAMOND */
+{ "larrfs;", 0x0291D }, /* LEFTWARDS ARROW TO BLACK DIAMOND */
+{ "larrhk;", 0x021A9 }, /* LEFTWARDS ARROW WITH HOOK */
+{ "larrlp;", 0x021AB }, /* LEFTWARDS ARROW WITH LOOP */
+{ "larrpl;", 0x02939 }, /* LEFT-SIDE ARC ANTICLOCKWISE ARROW */
+{ "larrsim;", 0x02973 }, /* LEFTWARDS ARROW ABOVE TILDE OPERATOR */
+{ "larrtl;", 0x021A2 }, /* LEFTWARDS ARROW WITH TAIL */
+{ "lat;", 0x02AAB }, /* LARGER THAN */
+{ "latail;", 0x02919 }, /* LEFTWARDS ARROW-TAIL */
+{ "late;", 0x02AAD }, /* LARGER THAN OR EQUAL TO */
+{ "lbarr;", 0x0290C }, /* LEFTWARDS DOUBLE DASH ARROW */
+{ "lbbrk;", 0x02772 }, /* LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT */
+{ "lbrace;", 0x0007B }, /* LEFT CURLY BRACKET */
+{ "lbrack;", 0x0005B }, /* LEFT SQUARE BRACKET */
+{ "lbrke;", 0x0298B }, /* LEFT SQUARE BRACKET WITH UNDERBAR */
+{ "lbrksld;", 0x0298F }, /* LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER */
+{ "lbrkslu;", 0x0298D }, /* LEFT SQUARE BRACKET WITH TICK IN TOP CORNER */
+{ "lcaron;", 0x0013E }, /* LATIN SMALL LETTER L WITH CARON */
+{ "lcedil;", 0x0013C }, /* LATIN SMALL LETTER L WITH CEDILLA */
+{ "lceil;", 0x02308 }, /* LEFT CEILING */
+{ "lcub;", 0x0007B }, /* LEFT CURLY BRACKET */
+{ "lcy;", 0x0043B }, /* CYRILLIC SMALL LETTER EL */
+{ "ldca;", 0x02936 }, /* ARROW POINTING DOWNWARDS THEN CURVING LEFTWARDS */
+{ "ldquo;", 0x0201C }, /* LEFT DOUBLE QUOTATION MARK */
+{ "ldquor;", 0x0201E }, /* DOUBLE LOW-9 QUOTATION MARK */
+{ "ldrdhar;", 0x02967 }, /* LEFTWARDS HARPOON WITH BARB DOWN ABOVE RIGHTWARDS HARPOON WITH BARB DOWN */
+{ "ldrushar;", 0x0294B }, /* LEFT BARB DOWN RIGHT BARB UP HARPOON */
+{ "ldsh;", 0x021B2 }, /* DOWNWARDS ARROW WITH TIP LEFTWARDS */
+{ "le;", 0x02264 }, /* LESS-THAN OR EQUAL TO */
+{ "leftarrow;", 0x02190 }, /* LEFTWARDS ARROW */
+{ "leftarrowtail;", 0x021A2 }, /* LEFTWARDS ARROW WITH TAIL */
+{ "leftharpoondown;", 0x021BD }, /* LEFTWARDS HARPOON WITH BARB DOWNWARDS */
+{ "leftharpoonup;", 0x021BC }, /* LEFTWARDS HARPOON WITH BARB UPWARDS */
+{ "leftleftarrows;", 0x021C7 }, /* LEFTWARDS PAIRED ARROWS */
+{ "leftrightarrow;", 0x02194 }, /* LEFT RIGHT ARROW */
+{ "leftrightarrows;", 0x021C6 }, /* LEFTWARDS ARROW OVER RIGHTWARDS ARROW */
+{ "leftrightharpoons;", 0x021CB }, /* LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON */
+{ "leftrightsquigarrow;", 0x021AD }, /* LEFT RIGHT WAVE ARROW */
+{ "leftthreetimes;", 0x022CB }, /* LEFT SEMIDIRECT PRODUCT */
+{ "leg;", 0x022DA }, /* LESS-THAN EQUAL TO OR GREATER-THAN */
+{ "leq;", 0x02264 }, /* LESS-THAN OR EQUAL TO */
+{ "leqq;", 0x02266 }, /* LESS-THAN OVER EQUAL TO */
+{ "leqslant;", 0x02A7D }, /* LESS-THAN OR SLANTED EQUAL TO */
+{ "les;", 0x02A7D }, /* LESS-THAN OR SLANTED EQUAL TO */
+{ "lescc;", 0x02AA8 }, /* LESS-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL */
+{ "lesdot;", 0x02A7F }, /* LESS-THAN OR SLANTED EQUAL TO WITH DOT INSIDE */
+{ "lesdoto;", 0x02A81 }, /* LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE */
+{ "lesdotor;", 0x02A83 }, /* LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE RIGHT */
+{ "lesges;", 0x02A93 }, /* LESS-THAN ABOVE SLANTED EQUAL ABOVE GREATER-THAN ABOVE SLANTED EQUAL */
+{ "lessapprox;", 0x02A85 }, /* LESS-THAN OR APPROXIMATE */
+{ "lessdot;", 0x022D6 }, /* LESS-THAN WITH DOT */
+{ "lesseqgtr;", 0x022DA }, /* LESS-THAN EQUAL TO OR GREATER-THAN */
+{ "lesseqqgtr;", 0x02A8B }, /* LESS-THAN ABOVE DOUBLE-LINE EQUAL ABOVE GREATER-THAN */
+{ "lessgtr;", 0x02276 }, /* LESS-THAN OR GREATER-THAN */
+{ "lesssim;", 0x02272 }, /* LESS-THAN OR EQUIVALENT TO */
+{ "lfisht;", 0x0297C }, /* LEFT FISH TAIL */
+{ "lfloor;", 0x0230A }, /* LEFT FLOOR */
+{ "lfr;", 0x1D529 }, /* MATHEMATICAL FRAKTUR SMALL L */
+{ "lg;", 0x02276 }, /* LESS-THAN OR GREATER-THAN */
+{ "lgE;", 0x02A91 }, /* LESS-THAN ABOVE GREATER-THAN ABOVE DOUBLE-LINE EQUAL */
+{ "lhard;", 0x021BD }, /* LEFTWARDS HARPOON WITH BARB DOWNWARDS */
+{ "lharu;", 0x021BC }, /* LEFTWARDS HARPOON WITH BARB UPWARDS */
+{ "lharul;", 0x0296A }, /* LEFTWARDS HARPOON WITH BARB UP ABOVE LONG DASH */
+{ "lhblk;", 0x02584 }, /* LOWER HALF BLOCK */
+{ "ljcy;", 0x00459 }, /* CYRILLIC SMALL LETTER LJE */
+{ "ll;", 0x0226A }, /* MUCH LESS-THAN */
+{ "llarr;", 0x021C7 }, /* LEFTWARDS PAIRED ARROWS */
+{ "llcorner;", 0x0231E }, /* BOTTOM LEFT CORNER */
+{ "llhard;", 0x0296B }, /* LEFTWARDS HARPOON WITH BARB DOWN BELOW LONG DASH */
+{ "lltri;", 0x025FA }, /* LOWER LEFT TRIANGLE */
+{ "lmidot;", 0x00140 }, /* LATIN SMALL LETTER L WITH MIDDLE DOT */
+{ "lmoust;", 0x023B0 }, /* UPPER LEFT OR LOWER RIGHT CURLY BRACKET SECTION */
+{ "lmoustache;", 0x023B0 }, /* UPPER LEFT OR LOWER RIGHT CURLY BRACKET SECTION */
+{ "lnE;", 0x02268 }, /* LESS-THAN BUT NOT EQUAL TO */
+{ "lnap;", 0x02A89 }, /* LESS-THAN AND NOT APPROXIMATE */
+{ "lnapprox;", 0x02A89 }, /* LESS-THAN AND NOT APPROXIMATE */
+{ "lne;", 0x02A87 }, /* LESS-THAN AND SINGLE-LINE NOT EQUAL TO */
+{ "lneq;", 0x02A87 }, /* LESS-THAN AND SINGLE-LINE NOT EQUAL TO */
+{ "lneqq;", 0x02268 }, /* LESS-THAN BUT NOT EQUAL TO */
+{ "lnsim;", 0x022E6 }, /* LESS-THAN BUT NOT EQUIVALENT TO */
+{ "loang;", 0x027EC }, /* MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET */
+{ "loarr;", 0x021FD }, /* LEFTWARDS OPEN-HEADED ARROW */
+{ "lobrk;", 0x027E6 }, /* MATHEMATICAL LEFT WHITE SQUARE BRACKET */
+{ "longleftarrow;", 0x027F5 }, /* LONG LEFTWARDS ARROW */
+{ "longleftrightarrow;", 0x027F7 }, /* LONG LEFT RIGHT ARROW */
+{ "longmapsto;", 0x027FC }, /* LONG RIGHTWARDS ARROW FROM BAR */
+{ "longrightarrow;", 0x027F6 }, /* LONG RIGHTWARDS ARROW */
+{ "looparrowleft;", 0x021AB }, /* LEFTWARDS ARROW WITH LOOP */
+{ "looparrowright;", 0x021AC }, /* RIGHTWARDS ARROW WITH LOOP */
+{ "lopar;", 0x02985 }, /* LEFT WHITE PARENTHESIS */
+{ "lopf;", 0x1D55D }, /* MATHEMATICAL DOUBLE-STRUCK SMALL L */
+{ "loplus;", 0x02A2D }, /* PLUS SIGN IN LEFT HALF CIRCLE */
+{ "lotimes;", 0x02A34 }, /* MULTIPLICATION SIGN IN LEFT HALF CIRCLE */
+{ "lowast;", 0x02217 }, /* ASTERISK OPERATOR */
+{ "lowbar;", 0x0005F }, /* LOW LINE */
+{ "loz;", 0x025CA }, /* LOZENGE */
+{ "lozenge;", 0x025CA }, /* LOZENGE */
+{ "lozf;", 0x029EB }, /* BLACK LOZENGE */
+{ "lpar;", 0x00028 }, /* LEFT PARENTHESIS */
+{ "lparlt;", 0x02993 }, /* LEFT ARC LESS-THAN BRACKET */
+{ "lrarr;", 0x021C6 }, /* LEFTWARDS ARROW OVER RIGHTWARDS ARROW */
+{ "lrcorner;", 0x0231F }, /* BOTTOM RIGHT CORNER */
+{ "lrhar;", 0x021CB }, /* LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON */
+{ "lrhard;", 0x0296D }, /* RIGHTWARDS HARPOON WITH BARB DOWN BELOW LONG DASH */
+{ "lrm;", 0x0200E }, /* LEFT-TO-RIGHT MARK */
+{ "lrtri;", 0x022BF }, /* RIGHT TRIANGLE */
+{ "lsaquo;", 0x02039 }, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
+{ "lscr;", 0x1D4C1 }, /* MATHEMATICAL SCRIPT SMALL L */
+{ "lsh;", 0x021B0 }, /* UPWARDS ARROW WITH TIP LEFTWARDS */
+{ "lsim;", 0x02272 }, /* LESS-THAN OR EQUIVALENT TO */
+{ "lsime;", 0x02A8D }, /* LESS-THAN ABOVE SIMILAR OR EQUAL */
+{ "lsimg;", 0x02A8F }, /* LESS-THAN ABOVE SIMILAR ABOVE GREATER-THAN */
+{ "lsqb;", 0x0005B }, /* LEFT SQUARE BRACKET */
+{ "lsquo;", 0x02018 }, /* LEFT SINGLE QUOTATION MARK */
+{ "lsquor;", 0x0201A }, /* SINGLE LOW-9 QUOTATION MARK */
+{ "lstrok;", 0x00142 }, /* LATIN SMALL LETTER L WITH STROKE */
+{ "lt;", 0x0003C }, /* LESS-THAN SIGN */
+{ "ltcc;", 0x02AA6 }, /* LESS-THAN CLOSED BY CURVE */
+{ "ltcir;", 0x02A79 }, /* LESS-THAN WITH CIRCLE INSIDE */
+{ "ltdot;", 0x022D6 }, /* LESS-THAN WITH DOT */
+{ "lthree;", 0x022CB }, /* LEFT SEMIDIRECT PRODUCT */
+{ "ltimes;", 0x022C9 }, /* LEFT NORMAL FACTOR SEMIDIRECT PRODUCT */
+{ "ltlarr;", 0x02976 }, /* LESS-THAN ABOVE LEFTWARDS ARROW */
+{ "ltquest;", 0x02A7B }, /* LESS-THAN WITH QUESTION MARK ABOVE */
+{ "ltrPar;", 0x02996 }, /* DOUBLE RIGHT ARC LESS-THAN BRACKET */
+{ "ltri;", 0x025C3 }, /* WHITE LEFT-POINTING SMALL TRIANGLE */
+{ "ltrie;", 0x022B4 }, /* NORMAL SUBGROUP OF OR EQUAL TO */
+{ "ltrif;", 0x025C2 }, /* BLACK LEFT-POINTING SMALL TRIANGLE */
+{ "lurdshar;", 0x0294A }, /* LEFT BARB UP RIGHT BARB DOWN HARPOON */
+{ "luruhar;", 0x02966 }, /* LEFTWARDS HARPOON WITH BARB UP ABOVE RIGHTWARDS HARPOON WITH BARB UP */
+{ "mDDot;", 0x0223A }, /* GEOMETRIC PROPORTION */
+{ "macr;", 0x000AF }, /* MACRON */
+{ "male;", 0x02642 }, /* MALE SIGN */
+{ "malt;", 0x02720 }, /* MALTESE CROSS */
+{ "maltese;", 0x02720 }, /* MALTESE CROSS */
+{ "map;", 0x021A6 }, /* RIGHTWARDS ARROW FROM BAR */
+{ "mapsto;", 0x021A6 }, /* RIGHTWARDS ARROW FROM BAR */
+{ "mapstodown;", 0x021A7 }, /* DOWNWARDS ARROW FROM BAR */
+{ "mapstoleft;", 0x021A4 }, /* LEFTWARDS ARROW FROM BAR */
+{ "mapstoup;", 0x021A5 }, /* UPWARDS ARROW FROM BAR */
+{ "marker;", 0x025AE }, /* BLACK VERTICAL RECTANGLE */
+{ "mcomma;", 0x02A29 }, /* MINUS SIGN WITH COMMA ABOVE */
+{ "mcy;", 0x0043C }, /* CYRILLIC SMALL LETTER EM */
+{ "mdash;", 0x02014 }, /* EM DASH */
+{ "measuredangle;", 0x02221 }, /* MEASURED ANGLE */
+{ "mfr;", 0x1D52A }, /* MATHEMATICAL FRAKTUR SMALL M */
+{ "mho;", 0x02127 }, /* INVERTED OHM SIGN */
+{ "micro;", 0x000B5 }, /* MICRO SIGN */
+{ "mid;", 0x02223 }, /* DIVIDES */
+{ "midast;", 0x0002A }, /* ASTERISK */
+{ "midcir;", 0x02AF0 }, /* VERTICAL LINE WITH CIRCLE BELOW */
+{ "middot;", 0x000B7 }, /* MIDDLE DOT */
+{ "minus;", 0x02212 }, /* MINUS SIGN */
+{ "minusb;", 0x0229F }, /* SQUARED MINUS */
+{ "minusd;", 0x02238 }, /* DOT MINUS */
+{ "minusdu;", 0x02A2A }, /* MINUS SIGN WITH DOT BELOW */
+{ "mlcp;", 0x02ADB }, /* TRANSVERSAL INTERSECTION */
+{ "mldr;", 0x02026 }, /* HORIZONTAL ELLIPSIS */
+{ "mnplus;", 0x02213 }, /* MINUS-OR-PLUS SIGN */
+{ "models;", 0x022A7 }, /* MODELS */
+{ "mopf;", 0x1D55E }, /* MATHEMATICAL DOUBLE-STRUCK SMALL M */
+{ "mp;", 0x02213 }, /* MINUS-OR-PLUS SIGN */
+{ "mscr;", 0x1D4C2 }, /* MATHEMATICAL SCRIPT SMALL M */
+{ "mstpos;", 0x0223E }, /* INVERTED LAZY S */
+{ "mu;", 0x003BC }, /* GREEK SMALL LETTER MU */
+{ "multimap;", 0x022B8 }, /* MULTIMAP */
+{ "mumap;", 0x022B8 }, /* MULTIMAP */
+{ "nLeftarrow;", 0x021CD }, /* LEFTWARDS DOUBLE ARROW WITH STROKE */
+{ "nLeftrightarrow;", 0x021CE }, /* LEFT RIGHT DOUBLE ARROW WITH STROKE */
+{ "nRightarrow;", 0x021CF }, /* RIGHTWARDS DOUBLE ARROW WITH STROKE */
+{ "nVDash;", 0x022AF }, /* NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE */
+{ "nVdash;", 0x022AE }, /* DOES NOT FORCE */
+{ "nabla;", 0x02207 }, /* NABLA */
+{ "nacute;", 0x00144 }, /* LATIN SMALL LETTER N WITH ACUTE */
+{ "nap;", 0x02249 }, /* NOT ALMOST EQUAL TO */
+{ "napos;", 0x00149 }, /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */
+{ "napprox;", 0x02249 }, /* NOT ALMOST EQUAL TO */
+{ "natur;", 0x0266E }, /* MUSIC NATURAL SIGN */
+{ "natural;", 0x0266E }, /* MUSIC NATURAL SIGN */
+{ "naturals;", 0x02115 }, /* DOUBLE-STRUCK CAPITAL N */
+{ "nbsp;", 0x000A0 }, /* NO-BREAK SPACE */
+{ "ncap;", 0x02A43 }, /* INTERSECTION WITH OVERBAR */
+{ "ncaron;", 0x00148 }, /* LATIN SMALL LETTER N WITH CARON */
+{ "ncedil;", 0x00146 }, /* LATIN SMALL LETTER N WITH CEDILLA */
+{ "ncong;", 0x02247 }, /* NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO */
+{ "ncup;", 0x02A42 }, /* UNION WITH OVERBAR */
+{ "ncy;", 0x0043D }, /* CYRILLIC SMALL LETTER EN */
+{ "ndash;", 0x02013 }, /* EN DASH */
+{ "ne;", 0x02260 }, /* NOT EQUAL TO */
+{ "neArr;", 0x021D7 }, /* NORTH EAST DOUBLE ARROW */
+{ "nearhk;", 0x02924 }, /* NORTH EAST ARROW WITH HOOK */
+{ "nearr;", 0x02197 }, /* NORTH EAST ARROW */
+{ "nearrow;", 0x02197 }, /* NORTH EAST ARROW */
+{ "nequiv;", 0x02262 }, /* NOT IDENTICAL TO */
+{ "nesear;", 0x02928 }, /* NORTH EAST ARROW AND SOUTH EAST ARROW */
+{ "nexist;", 0x02204 }, /* THERE DOES NOT EXIST */
+{ "nexists;", 0x02204 }, /* THERE DOES NOT EXIST */
+{ "nfr;", 0x1D52B }, /* MATHEMATICAL FRAKTUR SMALL N */
+{ "nge;", 0x02271 }, /* NEITHER GREATER-THAN NOR EQUAL TO */
+{ "ngeq;", 0x02271 }, /* NEITHER GREATER-THAN NOR EQUAL TO */
+{ "ngsim;", 0x02275 }, /* NEITHER GREATER-THAN NOR EQUIVALENT TO */
+{ "ngt;", 0x0226F }, /* NOT GREATER-THAN */
+{ "ngtr;", 0x0226F }, /* NOT GREATER-THAN */
+{ "nhArr;", 0x021CE }, /* LEFT RIGHT DOUBLE ARROW WITH STROKE */
+{ "nharr;", 0x021AE }, /* LEFT RIGHT ARROW WITH STROKE */
+{ "nhpar;", 0x02AF2 }, /* PARALLEL WITH HORIZONTAL STROKE */
+{ "ni;", 0x0220B }, /* CONTAINS AS MEMBER */
+{ "nis;", 0x022FC }, /* SMALL CONTAINS WITH VERTICAL BAR AT END OF HORIZONTAL STROKE */
+{ "nisd;", 0x022FA }, /* CONTAINS WITH LONG HORIZONTAL STROKE */
+{ "niv;", 0x0220B }, /* CONTAINS AS MEMBER */
+{ "njcy;", 0x0045A }, /* CYRILLIC SMALL LETTER NJE */
+{ "nlArr;", 0x021CD }, /* LEFTWARDS DOUBLE ARROW WITH STROKE */
+{ "nlarr;", 0x0219A }, /* LEFTWARDS ARROW WITH STROKE */
+{ "nldr;", 0x02025 }, /* TWO DOT LEADER */
+{ "nle;", 0x02270 }, /* NEITHER LESS-THAN NOR EQUAL TO */
+{ "nleftarrow;", 0x0219A }, /* LEFTWARDS ARROW WITH STROKE */
+{ "nleftrightarrow;", 0x021AE }, /* LEFT RIGHT ARROW WITH STROKE */
+{ "nleq;", 0x02270 }, /* NEITHER LESS-THAN NOR EQUAL TO */
+{ "nless;", 0x0226E }, /* NOT LESS-THAN */
+{ "nlsim;", 0x02274 }, /* NEITHER LESS-THAN NOR EQUIVALENT TO */
+{ "nlt;", 0x0226E }, /* NOT LESS-THAN */
+{ "nltri;", 0x022EA }, /* NOT NORMAL SUBGROUP OF */
+{ "nltrie;", 0x022EC }, /* NOT NORMAL SUBGROUP OF OR EQUAL TO */
+{ "nmid;", 0x02224 }, /* DOES NOT DIVIDE */
+{ "nopf;", 0x1D55F }, /* MATHEMATICAL DOUBLE-STRUCK SMALL N */
+{ "not;", 0x000AC }, /* NOT SIGN */
+{ "notin;", 0x02209 }, /* NOT AN ELEMENT OF */
+{ "notinva;", 0x02209 }, /* NOT AN ELEMENT OF */
+{ "notinvb;", 0x022F7 }, /* SMALL ELEMENT OF WITH OVERBAR */
+{ "notinvc;", 0x022F6 }, /* ELEMENT OF WITH OVERBAR */
+{ "notni;", 0x0220C }, /* DOES NOT CONTAIN AS MEMBER */
+{ "notniva;", 0x0220C }, /* DOES NOT CONTAIN AS MEMBER */
+{ "notnivb;", 0x022FE }, /* SMALL CONTAINS WITH OVERBAR */
+{ "notnivc;", 0x022FD }, /* CONTAINS WITH OVERBAR */
+{ "npar;", 0x02226 }, /* NOT PARALLEL TO */
+{ "nparallel;", 0x02226 }, /* NOT PARALLEL TO */
+{ "npolint;", 0x02A14 }, /* LINE INTEGRATION NOT INCLUDING THE POLE */
+{ "npr;", 0x02280 }, /* DOES NOT PRECEDE */
+{ "nprcue;", 0x022E0 }, /* DOES NOT PRECEDE OR EQUAL */
+{ "nprec;", 0x02280 }, /* DOES NOT PRECEDE */
+{ "nrArr;", 0x021CF }, /* RIGHTWARDS DOUBLE ARROW WITH STROKE */
+{ "nrarr;", 0x0219B }, /* RIGHTWARDS ARROW WITH STROKE */
+{ "nrightarrow;", 0x0219B }, /* RIGHTWARDS ARROW WITH STROKE */
+{ "nrtri;", 0x022EB }, /* DOES NOT CONTAIN AS NORMAL SUBGROUP */
+{ "nrtrie;", 0x022ED }, /* DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL */
+{ "nsc;", 0x02281 }, /* DOES NOT SUCCEED */
+{ "nsccue;", 0x022E1 }, /* DOES NOT SUCCEED OR EQUAL */
+{ "nscr;", 0x1D4C3 }, /* MATHEMATICAL SCRIPT SMALL N */
+{ "nshortmid;", 0x02224 }, /* DOES NOT DIVIDE */
+{ "nshortparallel;", 0x02226 }, /* NOT PARALLEL TO */
+{ "nsim;", 0x02241 }, /* NOT TILDE */
+{ "nsime;", 0x02244 }, /* NOT ASYMPTOTICALLY EQUAL TO */
+{ "nsimeq;", 0x02244 }, /* NOT ASYMPTOTICALLY EQUAL TO */
+{ "nsmid;", 0x02224 }, /* DOES NOT DIVIDE */
+{ "nspar;", 0x02226 }, /* NOT PARALLEL TO */
+{ "nsqsube;", 0x022E2 }, /* NOT SQUARE IMAGE OF OR EQUAL TO */
+{ "nsqsupe;", 0x022E3 }, /* NOT SQUARE ORIGINAL OF OR EQUAL TO */
+{ "nsub;", 0x02284 }, /* NOT A SUBSET OF */
+{ "nsube;", 0x02288 }, /* NEITHER A SUBSET OF NOR EQUAL TO */
+{ "nsubseteq;", 0x02288 }, /* NEITHER A SUBSET OF NOR EQUAL TO */
+{ "nsucc;", 0x02281 }, /* DOES NOT SUCCEED */
+{ "nsup;", 0x02285 }, /* NOT A SUPERSET OF */
+{ "nsupe;", 0x02289 }, /* NEITHER A SUPERSET OF NOR EQUAL TO */
+{ "nsupseteq;", 0x02289 }, /* NEITHER A SUPERSET OF NOR EQUAL TO */
+{ "ntgl;", 0x02279 }, /* NEITHER GREATER-THAN NOR LESS-THAN */
+{ "ntilde;", 0x000F1 }, /* LATIN SMALL LETTER N WITH TILDE */
+{ "ntlg;", 0x02278 }, /* NEITHER LESS-THAN NOR GREATER-THAN */
+{ "ntriangleleft;", 0x022EA }, /* NOT NORMAL SUBGROUP OF */
+{ "ntrianglelefteq;", 0x022EC }, /* NOT NORMAL SUBGROUP OF OR EQUAL TO */
+{ "ntriangleright;", 0x022EB }, /* DOES NOT CONTAIN AS NORMAL SUBGROUP */
+{ "ntrianglerighteq;", 0x022ED }, /* DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL */
+{ "nu;", 0x003BD }, /* GREEK SMALL LETTER NU */
+{ "num;", 0x00023 }, /* NUMBER SIGN */
+{ "numero;", 0x02116 }, /* NUMERO SIGN */
+{ "numsp;", 0x02007 }, /* FIGURE SPACE */
+{ "nvDash;", 0x022AD }, /* NOT TRUE */
+{ "nvHarr;", 0x02904 }, /* LEFT RIGHT DOUBLE ARROW WITH VERTICAL STROKE */
+{ "nvdash;", 0x022AC }, /* DOES NOT PROVE */
+{ "nvinfin;", 0x029DE }, /* INFINITY NEGATED WITH VERTICAL BAR */
+{ "nvlArr;", 0x02902 }, /* LEFTWARDS DOUBLE ARROW WITH VERTICAL STROKE */
+{ "nvrArr;", 0x02903 }, /* RIGHTWARDS DOUBLE ARROW WITH VERTICAL STROKE */
+{ "nwArr;", 0x021D6 }, /* NORTH WEST DOUBLE ARROW */
+{ "nwarhk;", 0x02923 }, /* NORTH WEST ARROW WITH HOOK */
+{ "nwarr;", 0x02196 }, /* NORTH WEST ARROW */
+{ "nwarrow;", 0x02196 }, /* NORTH WEST ARROW */
+{ "nwnear;", 0x02927 }, /* NORTH WEST ARROW AND NORTH EAST ARROW */
+{ "oS;", 0x024C8 }, /* CIRCLED LATIN CAPITAL LETTER S */
+{ "oacute;", 0x000F3 }, /* LATIN SMALL LETTER O WITH ACUTE */
+{ "oast;", 0x0229B }, /* CIRCLED ASTERISK OPERATOR */
+{ "ocir;", 0x0229A }, /* CIRCLED RING OPERATOR */
+{ "ocirc;", 0x000F4 }, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
+{ "ocy;", 0x0043E }, /* CYRILLIC SMALL LETTER O */
+{ "odash;", 0x0229D }, /* CIRCLED DASH */
+{ "odblac;", 0x00151 }, /* LATIN SMALL LETTER O WITH DOUBLE ACUTE */
+{ "odiv;", 0x02A38 }, /* CIRCLED DIVISION SIGN */
+{ "odot;", 0x02299 }, /* CIRCLED DOT OPERATOR */
+{ "odsold;", 0x029BC }, /* CIRCLED ANTICLOCKWISE-ROTATED DIVISION SIGN */
+{ "oelig;", 0x00153 }, /* LATIN SMALL LIGATURE OE */
+{ "ofcir;", 0x029BF }, /* CIRCLED BULLET */
+{ "ofr;", 0x1D52C }, /* MATHEMATICAL FRAKTUR SMALL O */
+{ "ogon;", 0x002DB }, /* OGONEK */
+{ "ograve;", 0x000F2 }, /* LATIN SMALL LETTER O WITH GRAVE */
+{ "ogt;", 0x029C1 }, /* CIRCLED GREATER-THAN */
+{ "ohbar;", 0x029B5 }, /* CIRCLE WITH HORIZONTAL BAR */
+{ "ohm;", 0x02126 }, /* OHM SIGN */
+{ "oint;", 0x0222E }, /* CONTOUR INTEGRAL */
+{ "olarr;", 0x021BA }, /* ANTICLOCKWISE OPEN CIRCLE ARROW */
+{ "olcir;", 0x029BE }, /* CIRCLED WHITE BULLET */
+{ "olcross;", 0x029BB }, /* CIRCLE WITH SUPERIMPOSED X */
+{ "oline;", 0x0203E }, /* OVERLINE */
+{ "olt;", 0x029C0 }, /* CIRCLED LESS-THAN */
+{ "omacr;", 0x0014D }, /* LATIN SMALL LETTER O WITH MACRON */
+{ "omega;", 0x003C9 }, /* GREEK SMALL LETTER OMEGA */
+{ "omicron;", 0x003BF }, /* GREEK SMALL LETTER OMICRON */
+{ "omid;", 0x029B6 }, /* CIRCLED VERTICAL BAR */
+{ "ominus;", 0x02296 }, /* CIRCLED MINUS */
+{ "oopf;", 0x1D560 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL O */
+{ "opar;", 0x029B7 }, /* CIRCLED PARALLEL */
+{ "operp;", 0x029B9 }, /* CIRCLED PERPENDICULAR */
+{ "oplus;", 0x02295 }, /* CIRCLED PLUS */
+{ "or;", 0x02228 }, /* LOGICAL OR */
+{ "orarr;", 0x021BB }, /* CLOCKWISE OPEN CIRCLE ARROW */
+{ "ord;", 0x02A5D }, /* LOGICAL OR WITH HORIZONTAL DASH */
+{ "order;", 0x02134 }, /* SCRIPT SMALL O */
+{ "orderof;", 0x02134 }, /* SCRIPT SMALL O */
+{ "ordf;", 0x000AA }, /* FEMININE ORDINAL INDICATOR */
+{ "ordm;", 0x000BA }, /* MASCULINE ORDINAL INDICATOR */
+{ "origof;", 0x022B6 }, /* ORIGINAL OF */
+{ "oror;", 0x02A56 }, /* TWO INTERSECTING LOGICAL OR */
+{ "orslope;", 0x02A57 }, /* SLOPING LARGE OR */
+{ "orv;", 0x02A5B }, /* LOGICAL OR WITH MIDDLE STEM */
+{ "oscr;", 0x02134 }, /* SCRIPT SMALL O */
+{ "oslash;", 0x000F8 }, /* LATIN SMALL LETTER O WITH STROKE */
+{ "osol;", 0x02298 }, /* CIRCLED DIVISION SLASH */
+{ "otilde;", 0x000F5 }, /* LATIN SMALL LETTER O WITH TILDE */
+{ "otimes;", 0x02297 }, /* CIRCLED TIMES */
+{ "otimesas;", 0x02A36 }, /* CIRCLED MULTIPLICATION SIGN WITH CIRCUMFLEX ACCENT */
+{ "ouml;", 0x000F6 }, /* LATIN SMALL LETTER O WITH DIAERESIS */
+{ "ovbar;", 0x0233D }, /* APL FUNCTIONAL SYMBOL CIRCLE STILE */
+{ "par;", 0x02225 }, /* PARALLEL TO */
+{ "para;", 0x000B6 }, /* PILCROW SIGN */
+{ "parallel;", 0x02225 }, /* PARALLEL TO */
+{ "parsim;", 0x02AF3 }, /* PARALLEL WITH TILDE OPERATOR */
+{ "parsl;", 0x02AFD }, /* DOUBLE SOLIDUS OPERATOR */
+{ "part;", 0x02202 }, /* PARTIAL DIFFERENTIAL */
+{ "pcy;", 0x0043F }, /* CYRILLIC SMALL LETTER PE */
+{ "percnt;", 0x00025 }, /* PERCENT SIGN */
+{ "period;", 0x0002E }, /* FULL STOP */
+{ "permil;", 0x02030 }, /* PER MILLE SIGN */
+{ "perp;", 0x022A5 }, /* UP TACK */
+{ "pertenk;", 0x02031 }, /* PER TEN THOUSAND SIGN */
+{ "pfr;", 0x1D52D }, /* MATHEMATICAL FRAKTUR SMALL P */
+{ "phi;", 0x003C6 }, /* GREEK SMALL LETTER PHI */
+{ "phiv;", 0x003C6 }, /* GREEK SMALL LETTER PHI */
+{ "phmmat;", 0x02133 }, /* SCRIPT CAPITAL M */
+{ "phone;", 0x0260E }, /* BLACK TELEPHONE */
+{ "pi;", 0x003C0 }, /* GREEK SMALL LETTER PI */
+{ "pitchfork;", 0x022D4 }, /* PITCHFORK */
+{ "piv;", 0x003D6 }, /* GREEK PI SYMBOL */
+{ "planck;", 0x0210F }, /* PLANCK CONSTANT OVER TWO PI */
+{ "planckh;", 0x0210E }, /* PLANCK CONSTANT */
+{ "plankv;", 0x0210F }, /* PLANCK CONSTANT OVER TWO PI */
+{ "plus;", 0x0002B }, /* PLUS SIGN */
+{ "plusacir;", 0x02A23 }, /* PLUS SIGN WITH CIRCUMFLEX ACCENT ABOVE */
+{ "plusb;", 0x0229E }, /* SQUARED PLUS */
+{ "pluscir;", 0x02A22 }, /* PLUS SIGN WITH SMALL CIRCLE ABOVE */
+{ "plusdo;", 0x02214 }, /* DOT PLUS */
+{ "plusdu;", 0x02A25 }, /* PLUS SIGN WITH DOT BELOW */
+{ "pluse;", 0x02A72 }, /* PLUS SIGN ABOVE EQUALS SIGN */
+{ "plusmn;", 0x000B1 }, /* PLUS-MINUS SIGN */
+{ "plussim;", 0x02A26 }, /* PLUS SIGN WITH TILDE BELOW */
+{ "plustwo;", 0x02A27 }, /* PLUS SIGN WITH SUBSCRIPT TWO */
+{ "pm;", 0x000B1 }, /* PLUS-MINUS SIGN */
+{ "pointint;", 0x02A15 }, /* INTEGRAL AROUND A POINT OPERATOR */
+{ "popf;", 0x1D561 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL P */
+{ "pound;", 0x000A3 }, /* POUND SIGN */
+{ "pr;", 0x0227A }, /* PRECEDES */
+{ "prE;", 0x02AB3 }, /* PRECEDES ABOVE EQUALS SIGN */
+{ "prap;", 0x02AB7 }, /* PRECEDES ABOVE ALMOST EQUAL TO */
+{ "prcue;", 0x0227C }, /* PRECEDES OR EQUAL TO */
+{ "pre;", 0x02AAF }, /* PRECEDES ABOVE SINGLE-LINE EQUALS SIGN */
+{ "prec;", 0x0227A }, /* PRECEDES */
+{ "precapprox;", 0x02AB7 }, /* PRECEDES ABOVE ALMOST EQUAL TO */
+{ "preccurlyeq;", 0x0227C }, /* PRECEDES OR EQUAL TO */
+{ "preceq;", 0x02AAF }, /* PRECEDES ABOVE SINGLE-LINE EQUALS SIGN */
+{ "precnapprox;", 0x02AB9 }, /* PRECEDES ABOVE NOT ALMOST EQUAL TO */
+{ "precneqq;", 0x02AB5 }, /* PRECEDES ABOVE NOT EQUAL TO */
+{ "precnsim;", 0x022E8 }, /* PRECEDES BUT NOT EQUIVALENT TO */
+{ "precsim;", 0x0227E }, /* PRECEDES OR EQUIVALENT TO */
+{ "prime;", 0x02032 }, /* PRIME */
+{ "primes;", 0x02119 }, /* DOUBLE-STRUCK CAPITAL P */
+{ "prnE;", 0x02AB5 }, /* PRECEDES ABOVE NOT EQUAL TO */
+{ "prnap;", 0x02AB9 }, /* PRECEDES ABOVE NOT ALMOST EQUAL TO */
+{ "prnsim;", 0x022E8 }, /* PRECEDES BUT NOT EQUIVALENT TO */
+{ "prod;", 0x0220F }, /* N-ARY PRODUCT */
+{ "profalar;", 0x0232E }, /* ALL AROUND-PROFILE */
+{ "profline;", 0x02312 }, /* ARC */
+{ "profsurf;", 0x02313 }, /* SEGMENT */
+{ "prop;", 0x0221D }, /* PROPORTIONAL TO */
+{ "propto;", 0x0221D }, /* PROPORTIONAL TO */
+{ "prsim;", 0x0227E }, /* PRECEDES OR EQUIVALENT TO */
+{ "prurel;", 0x022B0 }, /* PRECEDES UNDER RELATION */
+{ "pscr;", 0x1D4C5 }, /* MATHEMATICAL SCRIPT SMALL P */
+{ "psi;", 0x003C8 }, /* GREEK SMALL LETTER PSI */
+{ "puncsp;", 0x02008 }, /* PUNCTUATION SPACE */
+{ "qfr;", 0x1D52E }, /* MATHEMATICAL FRAKTUR SMALL Q */
+{ "qint;", 0x02A0C }, /* QUADRUPLE INTEGRAL OPERATOR */
+{ "qopf;", 0x1D562 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL Q */
+{ "qprime;", 0x02057 }, /* QUADRUPLE PRIME */
+{ "qscr;", 0x1D4C6 }, /* MATHEMATICAL SCRIPT SMALL Q */
+{ "quaternions;", 0x0210D }, /* DOUBLE-STRUCK CAPITAL H */
+{ "quatint;", 0x02A16 }, /* QUATERNION INTEGRAL OPERATOR */
+{ "quest;", 0x0003F }, /* QUESTION MARK */
+{ "questeq;", 0x0225F }, /* QUESTIONED EQUAL TO */
+{ "quot;", 0x00022 }, /* QUOTATION MARK */
+{ "rAarr;", 0x021DB }, /* RIGHTWARDS TRIPLE ARROW */
+{ "rArr;", 0x021D2 }, /* RIGHTWARDS DOUBLE ARROW */
+{ "rAtail;", 0x0291C }, /* RIGHTWARDS DOUBLE ARROW-TAIL */
+{ "rBarr;", 0x0290F }, /* RIGHTWARDS TRIPLE DASH ARROW */
+{ "rHar;", 0x02964 }, /* RIGHTWARDS HARPOON WITH BARB UP ABOVE RIGHTWARDS HARPOON WITH BARB DOWN */
+{ "race;", 0x029DA }, /* LEFT DOUBLE WIGGLY FENCE */
+{ "racute;", 0x00155 }, /* LATIN SMALL LETTER R WITH ACUTE */
+{ "radic;", 0x0221A }, /* SQUARE ROOT */
+{ "raemptyv;", 0x029B3 }, /* EMPTY SET WITH RIGHT ARROW ABOVE */
+{ "rang;", 0x027E9 }, /* MATHEMATICAL RIGHT ANGLE BRACKET */
+{ "rangd;", 0x02992 }, /* RIGHT ANGLE BRACKET WITH DOT */
+{ "range;", 0x029A5 }, /* REVERSED ANGLE WITH UNDERBAR */
+{ "rangle;", 0x027E9 }, /* MATHEMATICAL RIGHT ANGLE BRACKET */
+{ "raquo;", 0x000BB }, /* RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */
+{ "rarr;", 0x02192 }, /* RIGHTWARDS ARROW */
+{ "rarrap;", 0x02975 }, /* RIGHTWARDS ARROW ABOVE ALMOST EQUAL TO */
+{ "rarrb;", 0x021E5 }, /* RIGHTWARDS ARROW TO BAR */
+{ "rarrbfs;", 0x02920 }, /* RIGHTWARDS ARROW FROM BAR TO BLACK DIAMOND */
+{ "rarrc;", 0x02933 }, /* WAVE ARROW POINTING DIRECTLY RIGHT */
+{ "rarrfs;", 0x0291E }, /* RIGHTWARDS ARROW TO BLACK DIAMOND */
+{ "rarrhk;", 0x021AA }, /* RIGHTWARDS ARROW WITH HOOK */
+{ "rarrlp;", 0x021AC }, /* RIGHTWARDS ARROW WITH LOOP */
+{ "rarrpl;", 0x02945 }, /* RIGHTWARDS ARROW WITH PLUS BELOW */
+{ "rarrsim;", 0x02974 }, /* RIGHTWARDS ARROW ABOVE TILDE OPERATOR */
+{ "rarrtl;", 0x021A3 }, /* RIGHTWARDS ARROW WITH TAIL */
+{ "rarrw;", 0x0219D }, /* RIGHTWARDS WAVE ARROW */
+{ "ratail;", 0x0291A }, /* RIGHTWARDS ARROW-TAIL */
+{ "ratio;", 0x02236 }, /* RATIO */
+{ "rationals;", 0x0211A }, /* DOUBLE-STRUCK CAPITAL Q */
+{ "rbarr;", 0x0290D }, /* RIGHTWARDS DOUBLE DASH ARROW */
+{ "rbbrk;", 0x02773 }, /* LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT */
+{ "rbrace;", 0x0007D }, /* RIGHT CURLY BRACKET */
+{ "rbrack;", 0x0005D }, /* RIGHT SQUARE BRACKET */
+{ "rbrke;", 0x0298C }, /* RIGHT SQUARE BRACKET WITH UNDERBAR */
+{ "rbrksld;", 0x0298E }, /* RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER */
+{ "rbrkslu;", 0x02990 }, /* RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER */
+{ "rcaron;", 0x00159 }, /* LATIN SMALL LETTER R WITH CARON */
+{ "rcedil;", 0x00157 }, /* LATIN SMALL LETTER R WITH CEDILLA */
+{ "rceil;", 0x02309 }, /* RIGHT CEILING */
+{ "rcub;", 0x0007D }, /* RIGHT CURLY BRACKET */
+{ "rcy;", 0x00440 }, /* CYRILLIC SMALL LETTER ER */
+{ "rdca;", 0x02937 }, /* ARROW POINTING DOWNWARDS THEN CURVING RIGHTWARDS */
+{ "rdldhar;", 0x02969 }, /* RIGHTWARDS HARPOON WITH BARB DOWN ABOVE LEFTWARDS HARPOON WITH BARB DOWN */
+{ "rdquo;", 0x0201D }, /* RIGHT DOUBLE QUOTATION MARK */
+{ "rdquor;", 0x0201D }, /* RIGHT DOUBLE QUOTATION MARK */
+{ "rdsh;", 0x021B3 }, /* DOWNWARDS ARROW WITH TIP RIGHTWARDS */
+{ "real;", 0x0211C }, /* BLACK-LETTER CAPITAL R */
+{ "realine;", 0x0211B }, /* SCRIPT CAPITAL R */
+{ "realpart;", 0x0211C }, /* BLACK-LETTER CAPITAL R */
+{ "reals;", 0x0211D }, /* DOUBLE-STRUCK CAPITAL R */
+{ "rect;", 0x025AD }, /* WHITE RECTANGLE */
+{ "reg;", 0x000AE }, /* REGISTERED SIGN */
+{ "rfisht;", 0x0297D }, /* RIGHT FISH TAIL */
+{ "rfloor;", 0x0230B }, /* RIGHT FLOOR */
+{ "rfr;", 0x1D52F }, /* MATHEMATICAL FRAKTUR SMALL R */
+{ "rhard;", 0x021C1 }, /* RIGHTWARDS HARPOON WITH BARB DOWNWARDS */
+{ "rharu;", 0x021C0 }, /* RIGHTWARDS HARPOON WITH BARB UPWARDS */
+{ "rharul;", 0x0296C }, /* RIGHTWARDS HARPOON WITH BARB UP ABOVE LONG DASH */
+{ "rho;", 0x003C1 }, /* GREEK SMALL LETTER RHO */
+{ "rhov;", 0x003F1 }, /* GREEK RHO SYMBOL */
+{ "rightarrow;", 0x02192 }, /* RIGHTWARDS ARROW */
+{ "rightarrowtail;", 0x021A3 }, /* RIGHTWARDS ARROW WITH TAIL */
+{ "rightharpoondown;", 0x021C1 }, /* RIGHTWARDS HARPOON WITH BARB DOWNWARDS */
+{ "rightharpoonup;", 0x021C0 }, /* RIGHTWARDS HARPOON WITH BARB UPWARDS */
+{ "rightleftarrows;", 0x021C4 }, /* RIGHTWARDS ARROW OVER LEFTWARDS ARROW */
+{ "rightleftharpoons;", 0x021CC }, /* RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON */
+{ "rightrightarrows;", 0x021C9 }, /* RIGHTWARDS PAIRED ARROWS */
+{ "rightsquigarrow;", 0x0219D }, /* RIGHTWARDS WAVE ARROW */
+{ "rightthreetimes;", 0x022CC }, /* RIGHT SEMIDIRECT PRODUCT */
+{ "ring;", 0x002DA }, /* RING ABOVE */
+{ "risingdotseq;", 0x02253 }, /* IMAGE OF OR APPROXIMATELY EQUAL TO */
+{ "rlarr;", 0x021C4 }, /* RIGHTWARDS ARROW OVER LEFTWARDS ARROW */
+{ "rlhar;", 0x021CC }, /* RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON */
+{ "rlm;", 0x0200F }, /* RIGHT-TO-LEFT MARK */
+{ "rmoust;", 0x023B1 }, /* UPPER RIGHT OR LOWER LEFT CURLY BRACKET SECTION */
+{ "rmoustache;", 0x023B1 }, /* UPPER RIGHT OR LOWER LEFT CURLY BRACKET SECTION */
+{ "rnmid;", 0x02AEE }, /* DOES NOT DIVIDE WITH REVERSED NEGATION SLASH */
+{ "roang;", 0x027ED }, /* MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET */
+{ "roarr;", 0x021FE }, /* RIGHTWARDS OPEN-HEADED ARROW */
+{ "robrk;", 0x027E7 }, /* MATHEMATICAL RIGHT WHITE SQUARE BRACKET */
+{ "ropar;", 0x02986 }, /* RIGHT WHITE PARENTHESIS */
+{ "ropf;", 0x1D563 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL R */
+{ "roplus;", 0x02A2E }, /* PLUS SIGN IN RIGHT HALF CIRCLE */
+{ "rotimes;", 0x02A35 }, /* MULTIPLICATION SIGN IN RIGHT HALF CIRCLE */
+{ "rpar;", 0x00029 }, /* RIGHT PARENTHESIS */
+{ "rpargt;", 0x02994 }, /* RIGHT ARC GREATER-THAN BRACKET */
+{ "rppolint;", 0x02A12 }, /* LINE INTEGRATION WITH RECTANGULAR PATH AROUND POLE */
+{ "rrarr;", 0x021C9 }, /* RIGHTWARDS PAIRED ARROWS */
+{ "rsaquo;", 0x0203A }, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
+{ "rscr;", 0x1D4C7 }, /* MATHEMATICAL SCRIPT SMALL R */
+{ "rsh;", 0x021B1 }, /* UPWARDS ARROW WITH TIP RIGHTWARDS */
+{ "rsqb;", 0x0005D }, /* RIGHT SQUARE BRACKET */
+{ "rsquo;", 0x02019 }, /* RIGHT SINGLE QUOTATION MARK */
+{ "rsquor;", 0x02019 }, /* RIGHT SINGLE QUOTATION MARK */
+{ "rthree;", 0x022CC }, /* RIGHT SEMIDIRECT PRODUCT */
+{ "rtimes;", 0x022CA }, /* RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT */
+{ "rtri;", 0x025B9 }, /* WHITE RIGHT-POINTING SMALL TRIANGLE */
+{ "rtrie;", 0x022B5 }, /* CONTAINS AS NORMAL SUBGROUP OR EQUAL TO */
+{ "rtrif;", 0x025B8 }, /* BLACK RIGHT-POINTING SMALL TRIANGLE */
+{ "rtriltri;", 0x029CE }, /* RIGHT TRIANGLE ABOVE LEFT TRIANGLE */
+{ "ruluhar;", 0x02968 }, /* RIGHTWARDS HARPOON WITH BARB UP ABOVE LEFTWARDS HARPOON WITH BARB UP */
+{ "rx;", 0x0211E }, /* PRESCRIPTION TAKE */
+{ "sacute;", 0x0015B }, /* LATIN SMALL LETTER S WITH ACUTE */
+{ "sbquo;", 0x0201A }, /* SINGLE LOW-9 QUOTATION MARK */
+{ "sc;", 0x0227B }, /* SUCCEEDS */
+{ "scE;", 0x02AB4 }, /* SUCCEEDS ABOVE EQUALS SIGN */
+{ "scap;", 0x02AB8 }, /* SUCCEEDS ABOVE ALMOST EQUAL TO */
+{ "scaron;", 0x00161 }, /* LATIN SMALL LETTER S WITH CARON */
+{ "sccue;", 0x0227D }, /* SUCCEEDS OR EQUAL TO */
+{ "sce;", 0x02AB0 }, /* SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN */
+{ "scedil;", 0x0015F }, /* LATIN SMALL LETTER S WITH CEDILLA */
+{ "scirc;", 0x0015D }, /* LATIN SMALL LETTER S WITH CIRCUMFLEX */
+{ "scnE;", 0x02AB6 }, /* SUCCEEDS ABOVE NOT EQUAL TO */
+{ "scnap;", 0x02ABA }, /* SUCCEEDS ABOVE NOT ALMOST EQUAL TO */
+{ "scnsim;", 0x022E9 }, /* SUCCEEDS BUT NOT EQUIVALENT TO */
+{ "scpolint;", 0x02A13 }, /* LINE INTEGRATION WITH SEMICIRCULAR PATH AROUND POLE */
+{ "scsim;", 0x0227F }, /* SUCCEEDS OR EQUIVALENT TO */
+{ "scy;", 0x00441 }, /* CYRILLIC SMALL LETTER ES */
+{ "sdot;", 0x022C5 }, /* DOT OPERATOR */
+{ "sdotb;", 0x022A1 }, /* SQUARED DOT OPERATOR */
+{ "sdote;", 0x02A66 }, /* EQUALS SIGN WITH DOT BELOW */
+{ "seArr;", 0x021D8 }, /* SOUTH EAST DOUBLE ARROW */
+{ "searhk;", 0x02925 }, /* SOUTH EAST ARROW WITH HOOK */
+{ "searr;", 0x02198 }, /* SOUTH EAST ARROW */
+{ "searrow;", 0x02198 }, /* SOUTH EAST ARROW */
+{ "sect;", 0x000A7 }, /* SECTION SIGN */
+{ "semi;", 0x0003B }, /* SEMICOLON */
+{ "seswar;", 0x02929 }, /* SOUTH EAST ARROW AND SOUTH WEST ARROW */
+{ "setminus;", 0x02216 }, /* SET MINUS */
+{ "setmn;", 0x02216 }, /* SET MINUS */
+{ "sext;", 0x02736 }, /* SIX POINTED BLACK STAR */
+{ "sfr;", 0x1D530 }, /* MATHEMATICAL FRAKTUR SMALL S */
+{ "sfrown;", 0x02322 }, /* FROWN */
+{ "sharp;", 0x0266F }, /* MUSIC SHARP SIGN */
+{ "shchcy;", 0x00449 }, /* CYRILLIC SMALL LETTER SHCHA */
+{ "shcy;", 0x00448 }, /* CYRILLIC SMALL LETTER SHA */
+{ "shortmid;", 0x02223 }, /* DIVIDES */
+{ "shortparallel;", 0x02225 }, /* PARALLEL TO */
+{ "shy;", 0x000AD }, /* SOFT HYPHEN */
+{ "sigma;", 0x003C3 }, /* GREEK SMALL LETTER SIGMA */
+{ "sigmaf;", 0x003C2 }, /* GREEK SMALL LETTER FINAL SIGMA */
+{ "sigmav;", 0x003C2 }, /* GREEK SMALL LETTER FINAL SIGMA */
+{ "sim;", 0x0223C }, /* TILDE OPERATOR */
+{ "simdot;", 0x02A6A }, /* TILDE OPERATOR WITH DOT ABOVE */
+{ "sime;", 0x02243 }, /* ASYMPTOTICALLY EQUAL TO */
+{ "simeq;", 0x02243 }, /* ASYMPTOTICALLY EQUAL TO */
+{ "simg;", 0x02A9E }, /* SIMILAR OR GREATER-THAN */
+{ "simgE;", 0x02AA0 }, /* SIMILAR ABOVE GREATER-THAN ABOVE EQUALS SIGN */
+{ "siml;", 0x02A9D }, /* SIMILAR OR LESS-THAN */
+{ "simlE;", 0x02A9F }, /* SIMILAR ABOVE LESS-THAN ABOVE EQUALS SIGN */
+{ "simne;", 0x02246 }, /* APPROXIMATELY BUT NOT ACTUALLY EQUAL TO */
+{ "simplus;", 0x02A24 }, /* PLUS SIGN WITH TILDE ABOVE */
+{ "simrarr;", 0x02972 }, /* TILDE OPERATOR ABOVE RIGHTWARDS ARROW */
+{ "slarr;", 0x02190 }, /* LEFTWARDS ARROW */
+{ "smallsetminus;", 0x02216 }, /* SET MINUS */
+{ "smashp;", 0x02A33 }, /* SMASH PRODUCT */
+{ "smeparsl;", 0x029E4 }, /* EQUALS SIGN AND SLANTED PARALLEL WITH TILDE ABOVE */
+{ "smid;", 0x02223 }, /* DIVIDES */
+{ "smile;", 0x02323 }, /* SMILE */
+{ "smt;", 0x02AAA }, /* SMALLER THAN */
+{ "smte;", 0x02AAC }, /* SMALLER THAN OR EQUAL TO */
+{ "softcy;", 0x0044C }, /* CYRILLIC SMALL LETTER SOFT SIGN */
+{ "sol;", 0x0002F }, /* SOLIDUS */
+{ "solb;", 0x029C4 }, /* SQUARED RISING DIAGONAL SLASH */
+{ "solbar;", 0x0233F }, /* APL FUNCTIONAL SYMBOL SLASH BAR */
+{ "sopf;", 0x1D564 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL S */
+{ "spades;", 0x02660 }, /* BLACK SPADE SUIT */
+{ "spadesuit;", 0x02660 }, /* BLACK SPADE SUIT */
+{ "spar;", 0x02225 }, /* PARALLEL TO */
+{ "sqcap;", 0x02293 }, /* SQUARE CAP */
+{ "sqcup;", 0x02294 }, /* SQUARE CUP */
+{ "sqsub;", 0x0228F }, /* SQUARE IMAGE OF */
+{ "sqsube;", 0x02291 }, /* SQUARE IMAGE OF OR EQUAL TO */
+{ "sqsubset;", 0x0228F }, /* SQUARE IMAGE OF */
+{ "sqsubseteq;", 0x02291 }, /* SQUARE IMAGE OF OR EQUAL TO */
+{ "sqsup;", 0x02290 }, /* SQUARE ORIGINAL OF */
+{ "sqsupe;", 0x02292 }, /* SQUARE ORIGINAL OF OR EQUAL TO */
+{ "sqsupset;", 0x02290 }, /* SQUARE ORIGINAL OF */
+{ "sqsupseteq;", 0x02292 }, /* SQUARE ORIGINAL OF OR EQUAL TO */
+{ "squ;", 0x025A1 }, /* WHITE SQUARE */
+{ "square;", 0x025A1 }, /* WHITE SQUARE */
+{ "squarf;", 0x025AA }, /* BLACK SMALL SQUARE */
+{ "squf;", 0x025AA }, /* BLACK SMALL SQUARE */
+{ "srarr;", 0x02192 }, /* RIGHTWARDS ARROW */
+{ "sscr;", 0x1D4C8 }, /* MATHEMATICAL SCRIPT SMALL S */
+{ "ssetmn;", 0x02216 }, /* SET MINUS */
+{ "ssmile;", 0x02323 }, /* SMILE */
+{ "sstarf;", 0x022C6 }, /* STAR OPERATOR */
+{ "star;", 0x02606 }, /* WHITE STAR */
+{ "starf;", 0x02605 }, /* BLACK STAR */
+{ "straightepsilon;", 0x003F5 }, /* GREEK LUNATE EPSILON SYMBOL */
+{ "straightphi;", 0x003D5 }, /* GREEK PHI SYMBOL */
+{ "strns;", 0x000AF }, /* MACRON */
+{ "sub;", 0x02282 }, /* SUBSET OF */
+{ "subE;", 0x02AC5 }, /* SUBSET OF ABOVE EQUALS SIGN */
+{ "subdot;", 0x02ABD }, /* SUBSET WITH DOT */
+{ "sube;", 0x02286 }, /* SUBSET OF OR EQUAL TO */
+{ "subedot;", 0x02AC3 }, /* SUBSET OF OR EQUAL TO WITH DOT ABOVE */
+{ "submult;", 0x02AC1 }, /* SUBSET WITH MULTIPLICATION SIGN BELOW */
+{ "subnE;", 0x02ACB }, /* SUBSET OF ABOVE NOT EQUAL TO */
+{ "subne;", 0x0228A }, /* SUBSET OF WITH NOT EQUAL TO */
+{ "subplus;", 0x02ABF }, /* SUBSET WITH PLUS SIGN BELOW */
+{ "subrarr;", 0x02979 }, /* SUBSET ABOVE RIGHTWARDS ARROW */
+{ "subset;", 0x02282 }, /* SUBSET OF */
+{ "subseteq;", 0x02286 }, /* SUBSET OF OR EQUAL TO */
+{ "subseteqq;", 0x02AC5 }, /* SUBSET OF ABOVE EQUALS SIGN */
+{ "subsetneq;", 0x0228A }, /* SUBSET OF WITH NOT EQUAL TO */
+{ "subsetneqq;", 0x02ACB }, /* SUBSET OF ABOVE NOT EQUAL TO */
+{ "subsim;", 0x02AC7 }, /* SUBSET OF ABOVE TILDE OPERATOR */
+{ "subsub;", 0x02AD5 }, /* SUBSET ABOVE SUBSET */
+{ "subsup;", 0x02AD3 }, /* SUBSET ABOVE SUPERSET */
+{ "succ;", 0x0227B }, /* SUCCEEDS */
+{ "succapprox;", 0x02AB8 }, /* SUCCEEDS ABOVE ALMOST EQUAL TO */
+{ "succcurlyeq;", 0x0227D }, /* SUCCEEDS OR EQUAL TO */
+{ "succeq;", 0x02AB0 }, /* SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN */
+{ "succnapprox;", 0x02ABA }, /* SUCCEEDS ABOVE NOT ALMOST EQUAL TO */
+{ "succneqq;", 0x02AB6 }, /* SUCCEEDS ABOVE NOT EQUAL TO */
+{ "succnsim;", 0x022E9 }, /* SUCCEEDS BUT NOT EQUIVALENT TO */
+{ "succsim;", 0x0227F }, /* SUCCEEDS OR EQUIVALENT TO */
+{ "sum;", 0x02211 }, /* N-ARY SUMMATION */
+{ "sung;", 0x0266A }, /* EIGHTH NOTE */
+{ "sup1;", 0x000B9 }, /* SUPERSCRIPT ONE */
+{ "sup2;", 0x000B2 }, /* SUPERSCRIPT TWO */
+{ "sup3;", 0x000B3 }, /* SUPERSCRIPT THREE */
+{ "sup;", 0x02283 }, /* SUPERSET OF */
+{ "supE;", 0x02AC6 }, /* SUPERSET OF ABOVE EQUALS SIGN */
+{ "supdot;", 0x02ABE }, /* SUPERSET WITH DOT */
+{ "supdsub;", 0x02AD8 }, /* SUPERSET BESIDE AND JOINED BY DASH WITH SUBSET */
+{ "supe;", 0x02287 }, /* SUPERSET OF OR EQUAL TO */
+{ "supedot;", 0x02AC4 }, /* SUPERSET OF OR EQUAL TO WITH DOT ABOVE */
+{ "suphsub;", 0x02AD7 }, /* SUPERSET BESIDE SUBSET */
+{ "suplarr;", 0x0297B }, /* SUPERSET ABOVE LEFTWARDS ARROW */
+{ "supmult;", 0x02AC2 }, /* SUPERSET WITH MULTIPLICATION SIGN BELOW */
+{ "supnE;", 0x02ACC }, /* SUPERSET OF ABOVE NOT EQUAL TO */
+{ "supne;", 0x0228B }, /* SUPERSET OF WITH NOT EQUAL TO */
+{ "supplus;", 0x02AC0 }, /* SUPERSET WITH PLUS SIGN BELOW */
+{ "supset;", 0x02283 }, /* SUPERSET OF */
+{ "supseteq;", 0x02287 }, /* SUPERSET OF OR EQUAL TO */
+{ "supseteqq;", 0x02AC6 }, /* SUPERSET OF ABOVE EQUALS SIGN */
+{ "supsetneq;", 0x0228B }, /* SUPERSET OF WITH NOT EQUAL TO */
+{ "supsetneqq;", 0x02ACC }, /* SUPERSET OF ABOVE NOT EQUAL TO */
+{ "supsim;", 0x02AC8 }, /* SUPERSET OF ABOVE TILDE OPERATOR */
+{ "supsub;", 0x02AD4 }, /* SUPERSET ABOVE SUBSET */
+{ "supsup;", 0x02AD6 }, /* SUPERSET ABOVE SUPERSET */
+{ "swArr;", 0x021D9 }, /* SOUTH WEST DOUBLE ARROW */
+{ "swarhk;", 0x02926 }, /* SOUTH WEST ARROW WITH HOOK */
+{ "swarr;", 0x02199 }, /* SOUTH WEST ARROW */
+{ "swarrow;", 0x02199 }, /* SOUTH WEST ARROW */
+{ "swnwar;", 0x0292A }, /* SOUTH WEST ARROW AND NORTH WEST ARROW */
+{ "szlig;", 0x000DF }, /* LATIN SMALL LETTER SHARP S */
+{ "target;", 0x02316 }, /* POSITION INDICATOR */
+{ "tau;", 0x003C4 }, /* GREEK SMALL LETTER TAU */
+{ "tbrk;", 0x023B4 }, /* TOP SQUARE BRACKET */
+{ "tcaron;", 0x00165 }, /* LATIN SMALL LETTER T WITH CARON */
+{ "tcedil;", 0x00163 }, /* LATIN SMALL LETTER T WITH CEDILLA */
+{ "tcy;", 0x00442 }, /* CYRILLIC SMALL LETTER TE */
+{ "tdot;", 0x020DB }, /* COMBINING THREE DOTS ABOVE */
+{ "telrec;", 0x02315 }, /* TELEPHONE RECORDER */
+{ "tfr;", 0x1D531 }, /* MATHEMATICAL FRAKTUR SMALL T */
+{ "there4;", 0x02234 }, /* THEREFORE */
+{ "therefore;", 0x02234 }, /* THEREFORE */
+{ "theta;", 0x003B8 }, /* GREEK SMALL LETTER THETA */
+{ "thetasym;", 0x003D1 }, /* GREEK THETA SYMBOL */
+{ "thetav;", 0x003D1 }, /* GREEK THETA SYMBOL */
+{ "thickapprox;", 0x02248 }, /* ALMOST EQUAL TO */
+{ "thicksim;", 0x0223C }, /* TILDE OPERATOR */
+{ "thinsp;", 0x02009 }, /* THIN SPACE */
+{ "thkap;", 0x02248 }, /* ALMOST EQUAL TO */
+{ "thksim;", 0x0223C }, /* TILDE OPERATOR */
+{ "thorn;", 0x000FE }, /* LATIN SMALL LETTER THORN */
+{ "tilde;", 0x002DC }, /* SMALL TILDE */
+{ "times;", 0x000D7 }, /* MULTIPLICATION SIGN */
+{ "timesb;", 0x022A0 }, /* SQUARED TIMES */
+{ "timesbar;", 0x02A31 }, /* MULTIPLICATION SIGN WITH UNDERBAR */
+{ "timesd;", 0x02A30 }, /* MULTIPLICATION SIGN WITH DOT ABOVE */
+{ "tint;", 0x0222D }, /* TRIPLE INTEGRAL */
+{ "toea;", 0x02928 }, /* NORTH EAST ARROW AND SOUTH EAST ARROW */
+{ "top;", 0x022A4 }, /* DOWN TACK */
+{ "topbot;", 0x02336 }, /* APL FUNCTIONAL SYMBOL I-BEAM */
+{ "topcir;", 0x02AF1 }, /* DOWN TACK WITH CIRCLE BELOW */
+{ "topf;", 0x1D565 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL T */
+{ "topfork;", 0x02ADA }, /* PITCHFORK WITH TEE TOP */
+{ "tosa;", 0x02929 }, /* SOUTH EAST ARROW AND SOUTH WEST ARROW */
+{ "tprime;", 0x02034 }, /* TRIPLE PRIME */
+{ "trade;", 0x02122 }, /* TRADE MARK SIGN */
+{ "triangle;", 0x025B5 }, /* WHITE UP-POINTING SMALL TRIANGLE */
+{ "triangledown;", 0x025BF }, /* WHITE DOWN-POINTING SMALL TRIANGLE */
+{ "triangleleft;", 0x025C3 }, /* WHITE LEFT-POINTING SMALL TRIANGLE */
+{ "trianglelefteq;", 0x022B4 }, /* NORMAL SUBGROUP OF OR EQUAL TO */
+{ "triangleq;", 0x0225C }, /* DELTA EQUAL TO */
+{ "triangleright;", 0x025B9 }, /* WHITE RIGHT-POINTING SMALL TRIANGLE */
+{ "trianglerighteq;", 0x022B5 }, /* CONTAINS AS NORMAL SUBGROUP OR EQUAL TO */
+{ "tridot;", 0x025EC }, /* WHITE UP-POINTING TRIANGLE WITH DOT */
+{ "trie;", 0x0225C }, /* DELTA EQUAL TO */
+{ "triminus;", 0x02A3A }, /* MINUS SIGN IN TRIANGLE */
+{ "triplus;", 0x02A39 }, /* PLUS SIGN IN TRIANGLE */
+{ "trisb;", 0x029CD }, /* TRIANGLE WITH SERIFS AT BOTTOM */
+{ "tritime;", 0x02A3B }, /* MULTIPLICATION SIGN IN TRIANGLE */
+{ "trpezium;", 0x023E2 }, /* WHITE TRAPEZIUM */
+{ "tscr;", 0x1D4C9 }, /* MATHEMATICAL SCRIPT SMALL T */
+{ "tscy;", 0x00446 }, /* CYRILLIC SMALL LETTER TSE */
+{ "tshcy;", 0x0045B }, /* CYRILLIC SMALL LETTER TSHE */
+{ "tstrok;", 0x00167 }, /* LATIN SMALL LETTER T WITH STROKE */
+{ "twixt;", 0x0226C }, /* BETWEEN */
+{ "twoheadleftarrow;", 0x0219E }, /* LEFTWARDS TWO HEADED ARROW */
+{ "twoheadrightarrow;", 0x021A0 }, /* RIGHTWARDS TWO HEADED ARROW */
+{ "uArr;", 0x021D1 }, /* UPWARDS DOUBLE ARROW */
+{ "uHar;", 0x02963 }, /* UPWARDS HARPOON WITH BARB LEFT BESIDE UPWARDS HARPOON WITH BARB RIGHT */
+{ "uacute;", 0x000FA }, /* LATIN SMALL LETTER U WITH ACUTE */
+{ "uarr;", 0x02191 }, /* UPWARDS ARROW */
+{ "ubrcy;", 0x0045E }, /* CYRILLIC SMALL LETTER SHORT U */
+{ "ubreve;", 0x0016D }, /* LATIN SMALL LETTER U WITH BREVE */
+{ "ucirc;", 0x000FB }, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
+{ "ucy;", 0x00443 }, /* CYRILLIC SMALL LETTER U */
+{ "udarr;", 0x021C5 }, /* UPWARDS ARROW LEFTWARDS OF DOWNWARDS ARROW */
+{ "udblac;", 0x00171 }, /* LATIN SMALL LETTER U WITH DOUBLE ACUTE */
+{ "udhar;", 0x0296E }, /* UPWARDS HARPOON WITH BARB LEFT BESIDE DOWNWARDS HARPOON WITH BARB RIGHT */
+{ "ufisht;", 0x0297E }, /* UP FISH TAIL */
+{ "ufr;", 0x1D532 }, /* MATHEMATICAL FRAKTUR SMALL U */
+{ "ugrave;", 0x000F9 }, /* LATIN SMALL LETTER U WITH GRAVE */
+{ "uharl;", 0x021BF }, /* UPWARDS HARPOON WITH BARB LEFTWARDS */
+{ "uharr;", 0x021BE }, /* UPWARDS HARPOON WITH BARB RIGHTWARDS */
+{ "uhblk;", 0x02580 }, /* UPPER HALF BLOCK */
+{ "ulcorn;", 0x0231C }, /* TOP LEFT CORNER */
+{ "ulcorner;", 0x0231C }, /* TOP LEFT CORNER */
+{ "ulcrop;", 0x0230F }, /* TOP LEFT CROP */
+{ "ultri;", 0x025F8 }, /* UPPER LEFT TRIANGLE */
+{ "umacr;", 0x0016B }, /* LATIN SMALL LETTER U WITH MACRON */
+{ "uml;", 0x000A8 }, /* DIAERESIS */
+{ "uogon;", 0x00173 }, /* LATIN SMALL LETTER U WITH OGONEK */
+{ "uopf;", 0x1D566 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL U */
+{ "uparrow;", 0x02191 }, /* UPWARDS ARROW */
+{ "updownarrow;", 0x02195 }, /* UP DOWN ARROW */
+{ "upharpoonleft;", 0x021BF }, /* UPWARDS HARPOON WITH BARB LEFTWARDS */
+{ "upharpoonright;", 0x021BE }, /* UPWARDS HARPOON WITH BARB RIGHTWARDS */
+{ "uplus;", 0x0228E }, /* MULTISET UNION */
+{ "upsi;", 0x003C5 }, /* GREEK SMALL LETTER UPSILON */
+{ "upsih;", 0x003D2 }, /* GREEK UPSILON WITH HOOK SYMBOL */
+{ "upsilon;", 0x003C5 }, /* GREEK SMALL LETTER UPSILON */
+{ "upuparrows;", 0x021C8 }, /* UPWARDS PAIRED ARROWS */
+{ "urcorn;", 0x0231D }, /* TOP RIGHT CORNER */
+{ "urcorner;", 0x0231D }, /* TOP RIGHT CORNER */
+{ "urcrop;", 0x0230E }, /* TOP RIGHT CROP */
+{ "uring;", 0x0016F }, /* LATIN SMALL LETTER U WITH RING ABOVE */
+{ "urtri;", 0x025F9 }, /* UPPER RIGHT TRIANGLE */
+{ "uscr;", 0x1D4CA }, /* MATHEMATICAL SCRIPT SMALL U */
+{ "utdot;", 0x022F0 }, /* UP RIGHT DIAGONAL ELLIPSIS */
+{ "utilde;", 0x00169 }, /* LATIN SMALL LETTER U WITH TILDE */
+{ "utri;", 0x025B5 }, /* WHITE UP-POINTING SMALL TRIANGLE */
+{ "utrif;", 0x025B4 }, /* BLACK UP-POINTING SMALL TRIANGLE */
+{ "uuarr;", 0x021C8 }, /* UPWARDS PAIRED ARROWS */
+{ "uuml;", 0x000FC }, /* LATIN SMALL LETTER U WITH DIAERESIS */
+{ "uwangle;", 0x029A7 }, /* OBLIQUE ANGLE OPENING DOWN */
+{ "vArr;", 0x021D5 }, /* UP DOWN DOUBLE ARROW */
+{ "vBar;", 0x02AE8 }, /* SHORT UP TACK WITH UNDERBAR */
+{ "vBarv;", 0x02AE9 }, /* SHORT UP TACK ABOVE SHORT DOWN TACK */
+{ "vDash;", 0x022A8 }, /* TRUE */
+{ "vangrt;", 0x0299C }, /* RIGHT ANGLE VARIANT WITH SQUARE */
+{ "varepsilon;", 0x003B5 }, /* GREEK SMALL LETTER EPSILON */
+{ "varkappa;", 0x003F0 }, /* GREEK KAPPA SYMBOL */
+{ "varnothing;", 0x02205 }, /* EMPTY SET */
+{ "varphi;", 0x003C6 }, /* GREEK SMALL LETTER PHI */
+{ "varpi;", 0x003D6 }, /* GREEK PI SYMBOL */
+{ "varpropto;", 0x0221D }, /* PROPORTIONAL TO */
+{ "varr;", 0x02195 }, /* UP DOWN ARROW */
+{ "varrho;", 0x003F1 }, /* GREEK RHO SYMBOL */
+{ "varsigma;", 0x003C2 }, /* GREEK SMALL LETTER FINAL SIGMA */
+{ "vartheta;", 0x003D1 }, /* GREEK THETA SYMBOL */
+{ "vartriangleleft;", 0x022B2 }, /* NORMAL SUBGROUP OF */
+{ "vartriangleright;", 0x022B3 }, /* CONTAINS AS NORMAL SUBGROUP */
+{ "vcy;", 0x00432 }, /* CYRILLIC SMALL LETTER VE */
+{ "vdash;", 0x022A2 }, /* RIGHT TACK */
+{ "vee;", 0x02228 }, /* LOGICAL OR */
+{ "veebar;", 0x022BB }, /* XOR */
+{ "veeeq;", 0x0225A }, /* EQUIANGULAR TO */
+{ "vellip;", 0x022EE }, /* VERTICAL ELLIPSIS */
+{ "verbar;", 0x0007C }, /* VERTICAL LINE */
+{ "vert;", 0x0007C }, /* VERTICAL LINE */
+{ "vfr;", 0x1D533 }, /* MATHEMATICAL FRAKTUR SMALL V */
+{ "vltri;", 0x022B2 }, /* NORMAL SUBGROUP OF */
+{ "vopf;", 0x1D567 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL V */
+{ "vprop;", 0x0221D }, /* PROPORTIONAL TO */
+{ "vrtri;", 0x022B3 }, /* CONTAINS AS NORMAL SUBGROUP */
+{ "vscr;", 0x1D4CB }, /* MATHEMATICAL SCRIPT SMALL V */
+{ "vzigzag;", 0x0299A }, /* VERTICAL ZIGZAG LINE */
+{ "wcirc;", 0x00175 }, /* LATIN SMALL LETTER W WITH CIRCUMFLEX */
+{ "wedbar;", 0x02A5F }, /* LOGICAL AND WITH UNDERBAR */
+{ "wedge;", 0x02227 }, /* LOGICAL AND */
+{ "wedgeq;", 0x02259 }, /* ESTIMATES */
+{ "weierp;", 0x02118 }, /* SCRIPT CAPITAL P */
+{ "wfr;", 0x1D534 }, /* MATHEMATICAL FRAKTUR SMALL W */
+{ "wopf;", 0x1D568 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL W */
+{ "wp;", 0x02118 }, /* SCRIPT CAPITAL P */
+{ "wr;", 0x02240 }, /* WREATH PRODUCT */
+{ "wreath;", 0x02240 }, /* WREATH PRODUCT */
+{ "wscr;", 0x1D4CC }, /* MATHEMATICAL SCRIPT SMALL W */
+{ "xcap;", 0x022C2 }, /* N-ARY INTERSECTION */
+{ "xcirc;", 0x025EF }, /* LARGE CIRCLE */
+{ "xcup;", 0x022C3 }, /* N-ARY UNION */
+{ "xdtri;", 0x025BD }, /* WHITE DOWN-POINTING TRIANGLE */
+{ "xfr;", 0x1D535 }, /* MATHEMATICAL FRAKTUR SMALL X */
+{ "xhArr;", 0x027FA }, /* LONG LEFT RIGHT DOUBLE ARROW */
+{ "xharr;", 0x027F7 }, /* LONG LEFT RIGHT ARROW */
+{ "xi;", 0x003BE }, /* GREEK SMALL LETTER XI */
+{ "xlArr;", 0x027F8 }, /* LONG LEFTWARDS DOUBLE ARROW */
+{ "xlarr;", 0x027F5 }, /* LONG LEFTWARDS ARROW */
+{ "xmap;", 0x027FC }, /* LONG RIGHTWARDS ARROW FROM BAR */
+{ "xnis;", 0x022FB }, /* CONTAINS WITH VERTICAL BAR AT END OF HORIZONTAL STROKE */
+{ "xodot;", 0x02A00 }, /* N-ARY CIRCLED DOT OPERATOR */
+{ "xopf;", 0x1D569 }, /* MATHEMATICAL DOUBLE-STRUCK SMALL X */
+{ "xoplus;", 0x02A01 }, /* N-ARY CIRCLED PLUS OPERATOR */
+{ "xotime;", 0x02A02 }, /* N-ARY CIRCLED TIMES OPERATOR */
+{ "xrArr;", 0x027F9 }, /* LONG RIGHTWARDS DOUBLE ARROW */
+{ "xrarr;", 0x027F6 }, /* LONG RIGHTWARDS ARROW */
+{ "xscr;", 0x1D4CD }, /* MATHEMATICAL SCRIPT SMALL X */
+{ "xsqcup;", 0x02A06 }, /* N-ARY SQUARE UNION OPERATOR */
+{ "xuplus;", 0x02A04 }, /* N-ARY UNION OPERATOR WITH PLUS */
+{ "xutri;", 0x025B3 }, /* WHITE UP-POINTING TRIANGLE */
+{ "xvee;", 0x022C1 }, /* N-ARY LOGICAL OR */
+{ "xwedge;", 0x022C0 }, /* N-ARY LOGICAL AND */
+{ "yacute;", 0x000FD }, /* LATIN SMALL LETTER Y WITH ACUTE */
+{ "yacy;", 0x0044F }, /* CYRILLIC SMALL LETTER YA */
+{ "ycirc;", 0x00177 }, /* LATIN SMALL LETTER Y WITH CIRCUMFLEX */
+{ "ycy;", 0x0044B }, /* CYRILLIC SMALL LETTER YERU */
+{ "yen;", 0x000A5 }, /* YEN SIGN */
+{ "yfr;", 0x1D536 }, /* MATHEMATICAL FRAKTUR SMALL Y */
+{ "yicy;", 0x00457 }, /* CYRILLIC SMALL LETTER YI */
+{ "yopf;", 0x1D56A }, /* MATHEMATICAL DOUBLE-STRUCK SMALL Y */
+{ "yscr;", 0x1D4CE }, /* MATHEMATICAL SCRIPT SMALL Y */
+{ "yucy;", 0x0044E }, /* CYRILLIC SMALL LETTER YU */
+{ "yuml;", 0x000FF }, /* LATIN SMALL LETTER Y WITH DIAERESIS */
+{ "zacute;", 0x0017A }, /* LATIN SMALL LETTER Z WITH ACUTE */
+{ "zcaron;", 0x0017E }, /* LATIN SMALL LETTER Z WITH CARON */
+{ "zcy;", 0x00437 }, /* CYRILLIC SMALL LETTER ZE */
+{ "zdot;", 0x0017C }, /* LATIN SMALL LETTER Z WITH DOT ABOVE */
+{ "zeetrf;", 0x02128 }, /* BLACK-LETTER CAPITAL Z */
+{ "zeta;", 0x003B6 }, /* GREEK SMALL LETTER ZETA */
+{ "zfr;", 0x1D537 }, /* MATHEMATICAL FRAKTUR SMALL Z */
+{ "zhcy;", 0x00436 }, /* CYRILLIC SMALL LETTER ZHE */
+{ "zigrarr;", 0x021DD }, /* RIGHTWARDS SQUIGGLE ARROW */
+{ "zopf;", 0x1D56B }, /* MATHEMATICAL DOUBLE-STRUCK SMALL Z */
+{ "zscr;", 0x1D4CF }, /* MATHEMATICAL SCRIPT SMALL Z */
+{ "zwj;", 0x0200D }, /* ZERO WIDTH JOINER */
+{ "zwnj;", 0x0200C }, /* ZERO WIDTH NON-JOINER */
(DIR) diff --git a/namedentities.h b/namedentities.h
@@ -0,0 +1,62 @@
+/* from https://dev.w3.org/html5/html-author/charref and
+ https://www.w3.org/TR/html4/sgml/entities.html */
+
+{ "AMP;", 0x00026 }, /* AMPERSAND */
+{ "COPY;", 0x000A9 }, /* COPYRIGHT SIGN */
+{ "GT;", 0x0003E }, /* GREATER-THAN SIGN */
+{ "LT;", 0x0003C }, /* LESS-THAN SIGN */
+{ "QUOT;", 0x00022 }, /* QUOTATION MARK */
+{ "REG;", 0x000AE }, /* REGISTERED SIGN */
+{ "TRADE;", 0x02122 }, /* TRADE MARK SIGN */
+{ "aacute;", 0x000E1 }, /* LATIN SMALL LETTER A WITH ACUTE */
+{ "acute;", 0x000B4 }, /* ACUTE ACCENT */
+{ "agrave;", 0x000E0 }, /* LATIN SMALL LETTER A WITH GRAVE */
+{ "amp;", 0x00026 }, /* AMPERSAND */
+{ "apos;", 0x00027 }, /* APOSTROPHE */
+{ "bull;", 0x02022 }, /* BULLET */
+{ "bullet;", 0x02022 }, /* BULLET */
+{ "cent;", 0x000A2 }, /* CENT SIGN */
+{ "copy;", 0x000A9 }, /* COPYRIGHT SIGN */
+{ "dagger;", 0x02020 }, /* DAGGER */
+{ "dash;", 0x02010 }, /* HYPHEN */
+{ "deg;", 0x000B0 }, /* DEGREE SIGN */
+{ "delta;", 0x003B4 }, /* GREEK SMALL LETTER DELTA */
+{ "dollar;", 0x00024 }, /* DOLLAR SIGN */
+{ "eacute;", 0x000E9 }, /* LATIN SMALL LETTER E WITH ACUTE */
+{ "egrave;", 0x000E8 }, /* LATIN SMALL LETTER E WITH GRAVE */
+{ "emsp;", 0x02003 }, /* EM SPACE */
+{ "ensp;", 0x02002 }, /* EN SPACE */
+{ "equals;", 0x0003D }, /* EQUALS SIGN */
+{ "euml;", 0x000EB }, /* LATIN SMALL LETTER E WITH DIAERESIS */
+{ "euro;", 0x020AC }, /* EURO SIGN */
+{ "gbreve;", 0x0011F }, /* LATIN SMALL LETTER G WITH BREVE */
+{ "grave;", 0x00060 }, /* GRAVE ACCENT */
+{ "gt;", 0x0003E }, /* GREATER-THAN SIGN */
+{ "hellip;", 0x02026 }, /* HORIZONTAL ELLIPSIS */
+{ "hyphen;", 0x02010 }, /* HYPHEN */
+{ "laquo;", 0x000AB }, /* LEFT-POINTING DOUBLE ANGLE QUOTATION MARK */
+{ "ldquo;", 0x0201C }, /* LEFT DOUBLE QUOTATION MARK */
+{ "lpar;", 0x00028 }, /* LEFT PARENTHESIS */
+{ "lrm;", 0x0200E }, /* LEFT-TO-RIGHT MARK */
+{ "lsquo;", 0x02018 }, /* LEFT SINGLE QUOTATION MARK */
+{ "lt;", 0x0003C }, /* LESS-THAN SIGN */
+{ "mdash;", 0x02014 }, /* EM DASH */
+{ "micro;", 0x000B5 }, /* MICRO SIGN */
+{ "middot;", 0x000B7 }, /* MIDDLE DOT */
+{ "nbsp;", 0x000A0 }, /* NO-BREAK SPACE */
+{ "ndash;", 0x02013 }, /* EN DASH */
+{ "percnt;", 0x00025 }, /* PERCENT SIGN */
+{ "pi;", 0x003C0 }, /* GREEK SMALL LETTER PI */
+{ "pound;", 0x000A3 }, /* POUND SIGN */
+{ "quot;", 0x00022 }, /* QUOTATION MARK */
+{ "raquo;", 0x000BB }, /* RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK */
+{ "rdquo;", 0x0201D }, /* RIGHT DOUBLE QUOTATION MARK */
+{ "reg;", 0x000AE }, /* REGISTERED SIGN */
+{ "rpar;", 0x00029 }, /* RIGHT PARENTHESIS */
+{ "rsquo;", 0x02019 }, /* RIGHT SINGLE QUOTATION MARK */
+{ "shy;", 0x000AD }, /* SOFT HYPHEN */
+{ "sigma;", 0x003C3 }, /* GREEK SMALL LETTER SIGMA */
+{ "thinsp;", 0x02009 }, /* THIN SPACE */
+{ "times;", 0x000D7 }, /* MULTIPLICATION SIGN */
+{ "trade;", 0x02122 }, /* TRADE MARK SIGN */
+{ "yen;", 0x000A5 }, /* YEN SIGN */
(DIR) diff --git a/strlcat.c b/strlcat.c
@@ -0,0 +1,55 @@
+/* $OpenBSD: strlcat.c,v 1.15 2015/03/02 21:41:08 millert Exp $ */
+
+/*
+ * Copyright (c) 1998, 2015 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <string.h>
+
+/*
+ * Appends src to string dst of size dsize (unlike strncat, dsize is the
+ * full size of dst, not space left). At most dsize-1 characters
+ * will be copied. Always NUL terminates (unless dsize <= strlen(dst)).
+ * Returns strlen(src) + MIN(dsize, strlen(initial dst)).
+ * If retval >= dsize, truncation occurred.
+ */
+size_t
+strlcat(char *dst, const char *src, size_t dsize)
+{
+ const char *odst = dst;
+ const char *osrc = src;
+ size_t n = dsize;
+ size_t dlen;
+
+ /* Find the end of dst and adjust bytes left but don't go past end. */
+ while (n-- != 0 && *dst != '\0')
+ dst++;
+ dlen = dst - odst;
+ n = dsize - dlen;
+
+ if (n-- == 0)
+ return(dlen + strlen(src));
+ while (*src != '\0') {
+ if (n != 0) {
+ *dst++ = *src;
+ n--;
+ }
+ src++;
+ }
+ *dst = '\0';
+
+ return(dlen + (src - osrc)); /* count does not include NUL */
+}
(DIR) diff --git a/strlcpy.c b/strlcpy.c
@@ -0,0 +1,50 @@
+/* $OpenBSD: strlcpy.c,v 1.12 2015/01/15 03:54:12 millert Exp $ */
+
+/*
+ * Copyright (c) 1998, 2015 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <string.h>
+
+/*
+ * Copy string src to buffer dst of size dsize. At most dsize-1
+ * chars will be copied. Always NUL terminates (unless dsize == 0).
+ * Returns strlen(src); if retval >= dsize, truncation occurred.
+ */
+size_t
+strlcpy(char *dst, const char *src, size_t dsize)
+{
+ const char *osrc = src;
+ size_t nleft = dsize;
+
+ /* Copy as many bytes as will fit. */
+ if (nleft != 0) {
+ while (--nleft != 0) {
+ if ((*dst++ = *src++) == '\0')
+ break;
+ }
+ }
+
+ /* Not enough room in dst, add NUL and traverse rest of src. */
+ if (nleft == 0) {
+ if (dsize != 0)
+ *dst = '\0'; /* NUL-terminate dst */
+ while (*src++)
+ ;
+ }
+
+ return(src - osrc - 1); /* count does not include NUL */
+}
(DIR) diff --git a/webdump.1 b/webdump.1
@@ -0,0 +1,66 @@
+.Dd September 7, 2023
+.Dt WEBDUMP 1
+.Os
+.Sh NAME
+.Nm webdump
+.Nd convert HTML to plain-text
+.Sh SYNOPSIS
+.Nm
+.Op Fl 8ailrx
+.Op Fl b Ar baseurl
+.Op Fl s Ar selector
+.Op Fl u Ar selector
+.Op Fl w Ar termwidth
+.Sh DESCRIPTION
+.Nm
+reads UTF-8 HTML data from stdin.
+It converts and writes the output as plain-text to stdout.
+A
+.Ar baseurl
+can be specified if the links in the feed are relative URLs.
+.Bl -tag -width Ds
+.It Fl 8
+Use UTF-8 symbols for certain items like bullet items and rulers to make the
+output fancier.
+.It Fl a
+Toggle ANSI escape codes usage, by default it is not enabled.
+.It Fl b Ar baseurl
+Base URL of links.
+This is used to make links absolute.
+.It Fl i
+Toggle if link reference numbers are displayed inline or not, by default it is
+not enabled.
+.It Fl l
+Toggle if link references are displayed at the bottom or not, by default it is
+not enabled.
+.It Fl r
+Toggle if line-wrapping mode is enabled, by default it is not enabled.
+.It Fl s
+CSS-like selectors, this sets a reader mode to hide content
+matching the selector, for example: "main" or "main#id" or "main.class".
+Multiple selectors can be specified by separating them with a comma.
+.It Fl u
+CSS-like selectors, this sets a reader mode to hide content
+matching the selector, for example: "main" or "main#id" or "main.class".
+Multiple selectors can be specified by separating them with a comma.
+.It Fl w Ar termwidth
+The terminal width.
+The default is 77 characters.
+.It Fl x
+Write resources as TAB-separated lines to file descriptor 3.
+.El
+.Sh EXIT STATUS
+.Ex -std
+.Sh EXAMPLES
+.Bd -literal
+curl -s 'https://codemadness.org/' | \\
+ webdump -b 'https://codemadness.org' -l -r | \\
+ less
+.Ed
+.Sh SEE ALSO
+.Xr curl 1 ,
+.Xr xmllint 1 ,
+.Xr xmlstarlet 1 ,
+.Xr ftp 1
+.Sh AUTHORS
+.An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org
(DIR) diff --git a/webdump.c b/webdump.c
@@ -0,0 +1,2072 @@
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include "arg.h"
+char *argv0;
+
+#include "xml.h"
+
+static XMLParser parser;
+
+#ifndef __OpenBSD__
+#define pledge(p1,p2) 0
+#endif
+
+#undef strlcat
+size_t strlcat(char *, const char *, size_t);
+#undef strlcpy
+size_t strlcpy(char *, const char *, size_t);
+
+/* ctype-like macros, but always compatible with ASCII / UTF-8 */
+#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
+#define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f)
+#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
+#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
+#define TOLOWER(c) ((((unsigned)c) - 'A' < 26) ? ((c) | 32) : (c))
+
+#define LEN(x) (sizeof(x) / sizeof(x[0]))
+
+/* URI */
+struct uri {
+ char proto[48]; /* scheme including ":" or "://" */
+ char userinfo[256]; /* username [:password] */
+ char host[256];
+ char port[6]; /* numeric port */
+ char path[1024];
+ char query[1024];
+ char fragment[1024];
+};
+
+/* options */
+static int allowansi = 0; /* allow ANSI escape codes */
+static int showrefbottom = 0; /* show link references at the bottom */
+static int showrefinline = 0; /* show link reference number inline */
+static int linewrap = 0; /* line-wrapping */
+static int termwidth = 77; /* terminal width */
+static int resources = 0; /* write resources line-by-line to fd 3? */
+
+/* linked-list of link references */
+struct linkref {
+ char *type;
+ char *url;
+ int ishidden;
+ struct linkref *next;
+};
+
+static struct linkref *links_head;
+static struct linkref *links_cur;
+static int linkcount; /* visible link count */
+
+enum DisplayType {
+ DisplayUnknown = 0,
+ DisplayInline = 1 << 0,
+ DisplayInlineBlock = 1 << 1, /* unused for now */
+ DisplayBlock = 1 << 2,
+ DisplayNone = 1 << 3,
+ DisplayPre = 1 << 4,
+ DisplayList = 1 << 5,
+ DisplayListOrdered = 1 << 6,
+ DisplayListItem = 1 << 7,
+ DisplayTable = 1 << 8,
+ DisplayTableRow = 1 << 9,
+ DisplayTableCell = 1 << 10,
+ DisplayHeader = 1 << 11
+};
+
+/* ANSI markup */
+enum MarkupType {
+ MarkupNone = 0,
+ MarkupBold = 1 << 0,
+ MarkupItalic = 1 << 1,
+ MarkupUnderline = 1 << 2,
+ MarkupBlink = 1 << 3, /* lol */
+ MarkupReverse = 1 << 4,
+ MarkupStrike = 1 << 5
+};
+
+/* String data / memory pool */
+typedef struct string {
+ char *data; /* data */
+ size_t len; /* string length */
+ size_t bufsiz; /* allocated size */
+} String;
+
+struct tag {
+ const char *name;
+ enum DisplayType displaytype;
+ enum MarkupType markuptype; /* ANSI markup */
+ enum DisplayType parenttype; /* display type belonging to element */
+ int isvoid; /* "void" element */
+ int isoptional; /* optional to close tag */
+ int margintop; /* newlines when the tag starts */
+ int marginbottom; /* newlines after the tag ends */
+ int indent; /* indent in cells */
+};
+
+struct node {
+ char tagname[256];
+ struct tag tag;
+ size_t nchildren; /* child node count */
+ size_t visnchildren; /* child node count which are visible */
+ /* attributes */
+ char id[256];
+ char classnames[1024];
+ int indent; /* indent per node, for formatting */
+ int hasdata; /* tag contains some data, for formatting */
+};
+
+struct selectornode {
+ char tagname[256];
+ /* attributes */
+ char id[256];
+ char classnames[1024];
+};
+
+struct selector {
+ const char *text;
+ struct selectornode nodes[32];
+ int depth;
+};
+
+/* list of selectors */
+struct selectors {
+ struct selector **selectors;
+ size_t count;
+};
+
+static const char *str_bullet_item = "* ";
+static const char *str_ruler = "-";
+
+/* base href, to make URLs absolute */
+static char *basehref = "";
+static char basehrefdoc[4096]; /* base href in document, if any */
+
+/* buffers for some attributes of the current tag */
+String attr_alt; /* alt attribute */
+String attr_class; /* class attribute */
+String attr_href; /* href attribute */
+String attr_id; /* id attribute */
+String attr_src; /* src attribute */
+String attr_type; /* type attribute */
+String attr_value; /* value attribute */
+
+static String htmldata;
+
+/* for white-space output handling:
+ 1 = whitespace emitted (suppress repeated), 2 = other characters on this line
+ Behaviour:
+ * White-space data before non-whitespace data in tags are ignored on a line.
+ * Repeated white-space are ignored: a single space (' ') is emitted.
+*/
+static int whitespace_mode = 0;
+static int nbytesline = 0;
+static int ncells = 0; /* current cell count */
+static int hadnewline = 0; /* count for repeated newlines */
+/* flag for skipping initial white-space in tag: for HTML white-space handling */
+static int skipinitialws = 1;
+static const int defaultindent = 2;
+static int indent;
+/* previous output sequential newlines, used for calculating margins between
+ elements and reducing excessive newlines */
+static int currentnewlines;
+
+/* buffers for line-wrapping (buffer per word boundary) */
+static char rbuf[1024];
+static int rbuflen;
+static int rnbufcells = 0; /* pending cell count to add */
+
+#define MAX_DEPTH 256
+static struct node nodes[MAX_DEPTH];
+static String nodes_links[MAX_DEPTH]; /* keep track of links per node */
+static int curnode;
+
+/* reader / selector mode */
+static int reader_mode = 0;
+static int reader_ignore = 0;
+
+static enum MarkupType curmarkup;
+
+/* selector to match */
+static struct selectors *sel_hide, *sel_show;
+
+/* tag displaytype markup parent v o b a i */
+static struct tag tags[] = {
+{ "a", DisplayInline, MarkupUnderline, 0, 0, 0, 0, 0, 0 },
+{ "area", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
+{ "article", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
+{ "audio", DisplayInline, MarkupUnderline, 0, 0, 0, 0, 0, 0 },
+{ "b", DisplayInline, MarkupBold, 0, 0, 0, 0, 0, 0 },
+{ "base", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
+{ "blink", DisplayInline, MarkupBlink, 0, 0, 0, 0, 0, 0 },
+{ "blockquote", DisplayBlock, 0, 0, 0, 0, 0, 0, 2 },
+{ "body", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
+{ "br", 0, 0, 0, 1, 0, 0, 0, 0 },
+{ "code", DisplayInline, 0, 0, 0, 0, 0, 0, 0 },
+{ "col", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
+{ "colgroup", DisplayInline, 0, 0, 0, 1, 0, 0, 0 },
+{ "dd", DisplayBlock, 0, 0, 0, 1, 0, 0, 4 },
+{ "del", DisplayInline, MarkupStrike, 0, 0, 0, 0, 0, 0 },
+{ "div", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
+{ "dl", DisplayInline, 0, 0, 0, 0, 0, 0, 0 },
+{ "dt", DisplayBlock, MarkupBold, 0, 0, 1, 0, 0, 0 },
+{ "em", DisplayInline, MarkupItalic, 0, 0, 0, 0, 0, 0 },
+{ "embed", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
+{ "footer", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
+{ "h1", DisplayHeader, MarkupBold, 0, 0, 0, 1, 1, -defaultindent },
+{ "h2", DisplayHeader, MarkupBold, 0, 0, 0, 1, 1, -defaultindent },
+{ "h3", DisplayHeader, MarkupBold, 0, 0, 0, 1, 1, -defaultindent },
+{ "h4", DisplayHeader, MarkupBold, 0, 0, 0, 1, 1, -defaultindent },
+{ "h5", DisplayHeader, MarkupBold, 0, 0, 0, 1, 1, -defaultindent },
+{ "h6", DisplayHeader, MarkupBold, 0, 0, 0, 1, 1, -defaultindent },
+{ "head", DisplayBlock, 0, 0, 0, 1, 0, 0, 0 },
+{ "header", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
+{ "hr", DisplayBlock, 0, 0, 1, 0, 0, 0, 0 },
+{ "html", DisplayBlock, 0, 0, 0, 1, 0, 0, 0 },
+{ "i", DisplayInline, MarkupItalic, 0, 0, 0, 0, 0, 0 },
+{ "img", DisplayInline, MarkupUnderline, 0, 1, 0, 0, 0, 0 },
+{ "input", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
+{ "label", DisplayInline, MarkupBold, 0, 0, 0, 0, 0, 0 },
+{ "li", DisplayListItem, 0, DisplayList, 0, 1, 0, 0, 0 },
+{ "link", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
+{ "main", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
+{ "meta", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
+{ "nav", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
+{ "ol", DisplayList | DisplayListOrdered, 0, 0, 0, 0, 1, 1, 0 },
+{ "option", DisplayNone, 0, 0, 0, 1, 0, 0, 0 }, /* prevent clutter and hide all options for now */
+{ "p", DisplayBlock, 0, 0, 0, 1, 1, 1, 0 },
+{ "param", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
+{ "pre", DisplayPre, 0, 0, 0, 0, 1, 1, 4 },
+{ "s", DisplayInline, MarkupStrike, 0, 0, 0, 0, 0, 0 },
+{ "script", DisplayNone, 0, 0, 0, 0, 0, 0, 0 },
+{ "source", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
+{ "strike", DisplayInline, MarkupStrike, 0, 0, 0, 0, 0, 0 },
+{ "strong", DisplayInline, MarkupBold, 0, 0, 0, 0, 0, 0 },
+{ "style", DisplayNone, 0, 0, 0, 0, 0, 0, 0 },
+{ "table", DisplayTable, 0, 0, 0, 0, 0, 0, 0 },
+{ "tbody", DisplayInline, 0, DisplayTable, 0, 1, 0, 0, 0 },
+{ "td", DisplayTableCell, 0, DisplayTableRow, 0, 1, 0, 0, 0 },
+{ "template", DisplayNone, 0, 0, 0, 0, 0, 0, 0 },
+{ "textarea", DisplayInline, 0, 0, 0, 0, 0, 0, 0 },
+{ "tfoot", DisplayInline, 0, DisplayTable, 0, 1, 0, 0, 0 },
+{ "th", DisplayTableCell, MarkupBold, DisplayTableRow, 0, 1, 0, 0, 0 },
+{ "thead", DisplayInline, 0, DisplayTable, 0, 1, 0, 0, 0 },
+{ "time", DisplayInline, 0, 0, 0, 0, 0, 0, 0 },
+{ "title", DisplayBlock, 0, 0, 0, 0, 0, 1, -defaultindent },
+{ "tr", DisplayTableRow, 0, DisplayTable, 0, 1, 0, 0, 0 },
+{ "track", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
+{ "u", DisplayInline, MarkupUnderline, 0, 0, 0, 0, 0, 0 },
+{ "ul", DisplayList, 0, 0, 0, 0, 1, 1, 2 },
+{ "video", DisplayInline, MarkupUnderline, 0, 0, 0, 0, 0, 0 },
+{ "wbr", DisplayInline, 0, 0, 1, 0, 0, 0, 0 }
+};
+
+/* hint for compilers and static analyzers that a function exits */
+#ifndef __dead
+#define __dead
+#endif
+
+/* print to stderr, print error message of errno and exit(). */
+__dead void
+err(int exitstatus, const char *fmt, ...)
+{
+ va_list ap;
+ int saved_errno;
+
+ saved_errno = errno;
+
+ fputs("webdump: ", stderr);
+ if (fmt) {
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fputs(": ", stderr);
+ }
+ fprintf(stderr, "%s\n", strerror(saved_errno));
+
+ exit(exitstatus);
+}
+
+/* print to stderr and exit(). */
+__dead void
+errx(int exitstatus, const char *fmt, ...)
+{
+ va_list ap;
+
+ fputs("webdump: ", stderr);
+ if (fmt) {
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
+ fputs("\n", stderr);
+
+ exit(exitstatus);
+}
+
+static const char *ignorestate, *endtag;
+static int (*getnext)(void);
+
+/* return a space for all data until some case-insensitive string occurs. This
+ is used to parse incorrect HTML/XML that contains unescaped HTML in script
+ or style tags. If you see some </script> tag in a CDATA or comment
+ section then e-mail W3C and tell them the web is too complex. */
+static inline int
+getnext_ignore(void)
+{
+ int c;
+
+ if ((c = getnext()) == EOF)
+ return EOF;
+
+ if (TOLOWER((unsigned char)c) == TOLOWER((unsigned char)*ignorestate)) {
+ ignorestate++;
+ if (*ignorestate == '\0') {
+ parser.getnext = getnext; /* restore */
+ return ' ';
+ }
+ } else {
+ ignorestate = endtag; /* no full match: reset to beginning */
+ }
+
+ return ' '; /* pretend there is just SPACEs */
+}
+
+/* Clear string only; don't free, prevents unnecessary reallocation. */
+static void
+string_clear(String *s)
+{
+ if (s->data)
+ s->data[0] = '\0';
+ s->len = 0;
+}
+
+static void
+string_buffer_realloc(String *s, size_t newlen)
+{
+ size_t alloclen;
+
+ for (alloclen = 64; alloclen <= newlen; alloclen *= 2)
+ ;
+ if (!(s->data = realloc(s->data, alloclen)))
+ err(1, "realloc");
+ s->bufsiz = alloclen;
+}
+
+static void
+string_append(String *s, const char *data, size_t len)
+{
+ if (!len)
+ return;
+ /* check if allocation is necesary, don't shrink buffer,
+ * should be more than bufsiz ofcourse. */
+ if (s->len + len >= s->bufsiz)
+ string_buffer_realloc(s, s->len + len + 1);
+ memcpy(s->data + s->len, data, len);
+ s->len += len;
+ s->data[s->len] = '\0';
+}
+
+char *
+estrdup(const char *s)
+{
+ char *p;
+
+ if (!(p = strdup(s)))
+ err(1, "strdup");
+ return p;
+}
+
+char *
+estrndup(const char *s, size_t n)
+{
+ char *p;
+
+ if (!(p = strndup(s, n)))
+ err(1, "strndup");
+ return p;
+}
+
+void *
+erealloc(void *p, size_t siz)
+{
+ if (!(p = realloc(p, siz)))
+ err(1, "realloc");
+
+ return p;
+}
+
+void *
+ecalloc(size_t nmemb, size_t size)
+{
+ void *p;
+
+ if (!(p = calloc(nmemb, size)))
+ err(1, "calloc");
+ return p;
+}
+
+/* check if string has a non-empty scheme / protocol part */
+int
+uri_hasscheme(const char *s)
+{
+ const char *p = s;
+
+ for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
+ *p == '+' || *p == '-' || *p == '.'; p++)
+ ;
+ /* scheme, except if empty and starts with ":" then it is a path */
+ return (*p == ':' && p != s);
+}
+
+int
+uri_parse(const char *s, struct uri *u)
+{
+ const char *p = s;
+ char *endptr;
+ size_t i;
+ long l;
+
+ u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
+ u->path[0] = u->query[0] = u->fragment[0] = '\0';
+
+ /* protocol-relative */
+ if (*p == '/' && *(p + 1) == '/') {
+ p += 2; /* skip "//" */
+ goto parseauth;
+ }
+
+ /* scheme / protocol part */
+ for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
+ *p == '+' || *p == '-' || *p == '.'; p++)
+ ;
+ /* scheme, except if empty and starts with ":" then it is a path */
+ if (*p == ':' && p != s) {
+ if (*(p + 1) == '/' && *(p + 2) == '/')
+ p += 3; /* skip "://" */
+ else
+ p++; /* skip ":" */
+
+ if ((size_t)(p - s) >= sizeof(u->proto))
+ return -1; /* protocol too long */
+ memcpy(u->proto, s, p - s);
+ u->proto[p - s] = '\0';
+
+ if (*(p - 1) != '/')
+ goto parsepath;
+ } else {
+ p = s; /* no scheme format, reset to start */
+ goto parsepath;
+ }
+
+parseauth:
+ /* userinfo (username:password) */
+ i = strcspn(p, "@/?#");
+ if (p[i] == '@') {
+ if (i >= sizeof(u->userinfo))
+ return -1; /* userinfo too long */
+ memcpy(u->userinfo, p, i);
+ u->userinfo[i] = '\0';
+ p += i + 1;
+ }
+
+ /* IPv6 address */
+ if (*p == '[') {
+ /* bracket not found, host too short or too long */
+ i = strcspn(p, "]");
+ if (p[i] != ']' || i < 3)
+ return -1;
+ i++; /* including "]" */
+ } else {
+ /* domain / host part, skip until port, path or end. */
+ i = strcspn(p, ":/?#");
+ }
+ if (i >= sizeof(u->host))
+ return -1; /* host too long */
+ memcpy(u->host, p, i);
+ u->host[i] = '\0';
+ p += i;
+
+ /* port */
+ if (*p == ':') {
+ p++;
+ if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
+ return -1; /* port too long */
+ memcpy(u->port, p, i);
+ u->port[i] = '\0';
+ /* check for valid port: range 1 - 65535, may be empty */
+ errno = 0;
+ l = strtol(u->port, &endptr, 10);
+ if (i && (errno || *endptr || l <= 0 || l > 65535))
+ return -1;
+ p += i;
+ }
+
+parsepath:
+ /* path */
+ if ((i = strcspn(p, "?#")) >= sizeof(u->path))
+ return -1; /* path too long */
+ memcpy(u->path, p, i);
+ u->path[i] = '\0';
+ p += i;
+
+ /* query */
+ if (*p == '?') {
+ p++;
+ if ((i = strcspn(p, "#")) >= sizeof(u->query))
+ return -1; /* query too long */
+ memcpy(u->query, p, i);
+ u->query[i] = '\0';
+ p += i;
+ }
+
+ /* fragment */
+ if (*p == '#') {
+ p++;
+ if ((i = strlen(p)) >= sizeof(u->fragment))
+ return -1; /* fragment too long */
+ memcpy(u->fragment, p, i);
+ u->fragment[i] = '\0';
+ }
+
+ return 0;
+}
+
+/* Transform and try to make the URI `u` absolute using base URI `b` into `a`.
+ Follows some of the logic from "RFC 3986 - 5.2.2. Transform References".
+ Returns 0 on success, -1 on error or truncation. */
+int
+uri_makeabs(struct uri *a, struct uri *u, struct uri *b)
+{
+ char *p;
+ int c;
+
+ strlcpy(a->fragment, u->fragment, sizeof(a->fragment));
+
+ if (u->proto[0] || u->host[0]) {
+ strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto));
+ strlcpy(a->host, u->host, sizeof(a->host));
+ strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo));
+ strlcpy(a->host, u->host, sizeof(a->host));
+ strlcpy(a->port, u->port, sizeof(a->port));
+ strlcpy(a->path, u->path, sizeof(a->path));
+ strlcpy(a->query, u->query, sizeof(a->query));
+ return 0;
+ }
+
+ strlcpy(a->proto, b->proto, sizeof(a->proto));
+ strlcpy(a->host, b->host, sizeof(a->host));
+ strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo));
+ strlcpy(a->host, b->host, sizeof(a->host));
+ strlcpy(a->port, b->port, sizeof(a->port));
+
+ if (!u->path[0]) {
+ strlcpy(a->path, b->path, sizeof(a->path));
+ } else if (u->path[0] == '/') {
+ strlcpy(a->path, u->path, sizeof(a->path));
+ } else {
+ a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0';
+ a->path[1] = '\0';
+
+ if ((p = strrchr(b->path, '/'))) {
+ c = *(++p);
+ *p = '\0'; /* temporary NUL-terminate */
+ if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path))
+ return -1;
+ *p = c; /* restore */
+ }
+ if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path))
+ return -1;
+ }
+
+ if (u->path[0] || u->query[0])
+ strlcpy(a->query, u->query, sizeof(a->query));
+ else
+ strlcpy(a->query, b->query, sizeof(a->query));
+
+ return 0;
+}
+
+int
+uri_format(char *buf, size_t bufsiz, struct uri *u)
+{
+ return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s",
+ u->proto,
+ u->userinfo[0] ? u->userinfo : "",
+ u->userinfo[0] ? "@" : "",
+ u->host,
+ u->port[0] ? ":" : "",
+ u->port,
+ u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "",
+ u->path,
+ u->query[0] ? "?" : "",
+ u->query,
+ u->fragment[0] ? "#" : "",
+ u->fragment);
+}
+
+static void
+rindent(void)
+{
+ int i;
+
+ for (i = 0; i < indent + defaultindent; i++) {
+ putchar(' ');
+ nbytesline++;
+ ncells++;
+ }
+}
+
+static void
+emitmarkup(int markuptype)
+{
+ if (!allowansi)
+ return;
+
+ if (!markuptype)
+ fputs("\033[0m", stdout); /* reset all attributes */
+
+ /* set */
+ if (markuptype & MarkupBold)
+ fputs("\033[1m", stdout);
+ if (markuptype & MarkupItalic)
+ fputs("\033[3m", stdout);
+ if (markuptype & MarkupUnderline)
+ fputs("\033[4m", stdout);
+ if (markuptype & MarkupBlink)
+ fputs("\033[5m", stdout);
+ if (markuptype & MarkupReverse)
+ fputs("\033[7m", stdout);
+ if (markuptype & MarkupStrike)
+ fputs("\033[9m", stdout);
+}
+
+/* flush remaining buffer (containing a word): used for word-wrap handling */
+static void
+hflush(void)
+{
+ int i;
+
+ if (!rbuflen)
+ return;
+
+ if (!nbytesline) {
+ emitmarkup(0);
+ rindent();
+ /* emit code again per line, needed for GNU/less -R */
+ emitmarkup(curmarkup);
+ }
+
+ for (i = 0; i < rbuflen; i++)
+ putchar(rbuf[i]);
+
+ nbytesline += rbuflen;
+ ncells += rnbufcells;
+ rbuflen = 0;
+ rnbufcells = 0;
+}
+
+static void
+printansi(const char *s)
+{
+ size_t len;
+
+ if (!allowansi)
+ return;
+
+ if (linewrap) {
+ len = strlen(s);
+ if (rbuflen + len + 1 >= sizeof(rbuf))
+ hflush();
+ if (rbuflen + len + 1 < sizeof(rbuf)) {
+ memcpy(rbuf + rbuflen, s, len);
+ rbuflen += len;
+ }
+ } else {
+ fputs(s, stdout);
+ }
+}
+
+static void
+setmarkup(int markuptype)
+{
+ if (!allowansi)
+ return;
+
+ /* need change? */
+ if (curmarkup == markuptype)
+ return;
+
+ if (!markuptype) {
+ printansi("\033[0m"); /* reset all attributes */
+ curmarkup = markuptype;
+ return;
+ }
+
+ /* set */
+ if (!(curmarkup & MarkupBold) && (markuptype & MarkupBold))
+ printansi("\033[1m");
+ if (!(curmarkup & MarkupItalic) && (markuptype & MarkupItalic))
+ printansi("\033[3m");
+ if (!(curmarkup & MarkupUnderline) && (markuptype & MarkupUnderline))
+ printansi("\033[4m");
+ if (!(curmarkup & MarkupBlink) && (markuptype & MarkupBlink))
+ printansi("\033[5m");
+ if (!(curmarkup & MarkupReverse) && (markuptype & MarkupReverse))
+ printansi("\033[7m");
+ if (!(curmarkup & MarkupStrike) && (markuptype & MarkupStrike))
+ printansi("\033[9m");
+
+ /* unset */
+ if ((curmarkup & MarkupBold) && !(markuptype & MarkupBold))
+ printansi("\033[22m"); /* reset bold or faint */
+ if ((curmarkup & MarkupItalic) && !(markuptype & MarkupItalic))
+ printansi("\033[23m"); /* reset italic */
+ if ((curmarkup & MarkupUnderline) && !(markuptype & MarkupUnderline))
+ printansi("\033[24m"); /* reset underline */
+ if ((curmarkup & MarkupBlink) && !(markuptype & MarkupBlink))
+ printansi("\033[25m"); /* reset blink */
+ if ((curmarkup & MarkupReverse) && !(markuptype & MarkupReverse))
+ printansi("\033[27m"); /* reset reverse */
+ if ((curmarkup & MarkupStrike) && !(markuptype & MarkupStrike))
+ printansi("\033[29m"); /* reset strike */
+
+ curmarkup = markuptype;
+}
+
+static void
+startmarkup(int markuptype)
+{
+ setmarkup(curmarkup | markuptype);
+}
+
+static void
+endmarkup(int markuptype)
+{
+ setmarkup(curmarkup & ~markuptype);
+}
+
+/* rough cell width of a unicode codepoint by counting a unicode codepoint as 1
+ cell in general.
+ NOTE: this is of course incorrect since characters can be 2 width aswell,
+ in the future maybe replace this with wcwidth() or similar */
+int
+utfwidth(int c)
+{
+ /* not the start of a codepoint */
+ if ((c & 0xc0) == 0x80)
+ return 0;
+ /* count TAB as 8 */
+ if (c == '\t')
+ return 8;
+ return 1;
+}
+
+/* write a character, handling state of repeated newlines, some HTML
+ white-space rules, indentation and word-wrapping */
+static void
+hputchar(int c)
+{
+ struct node *cur = &nodes[curnode];
+ cur->hasdata = 1;
+
+ if (c == '\n') {
+ if (nbytesline <= 0)
+ hadnewline = 0;
+
+ /* start a new line, no chars on this line yet */
+ whitespace_mode &= ~2; /* no chars on this line yet */
+ nbytesline = 0;
+ ncells = 0;
+
+ if (hadnewline)
+ currentnewlines++; /* repeating newlines */
+ hadnewline = 1;
+ } else {
+ hadnewline = 0;
+ currentnewlines = 0;
+ }
+
+ /* skip initial/leading white-space */
+ if (ISSPACE((unsigned char)c)) {
+ if (skipinitialws)
+ return;
+ } else {
+ skipinitialws = 0;
+ }
+
+ if (!(c == '\n' || c == '\t' || !ISCNTRL((unsigned char)c)))
+ return;
+
+ if (!linewrap) {
+ if (c == '\n') {
+ putchar('\n');
+ nbytesline = 0;
+ ncells = 0;
+ } else {
+ if (!nbytesline) {
+ emitmarkup(0);
+ rindent();
+ /* emit code again per line, needed for GNU/less -R */
+ emitmarkup(curmarkup);
+ }
+ putchar(c);
+ nbytesline++;
+ ncells += utfwidth(c);
+ }
+ return;
+ }
+
+ /* really too long: the whole word doesn't even fit, flush it */
+ if (ncells + rnbufcells >= termwidth || rbuflen >= sizeof(rbuf) - 1) {
+ putchar('\n');
+ nbytesline = 0;
+ ncells = 0;
+ hflush();
+ }
+
+ if (c == '\n') {
+ putchar('\n');
+ hflush();
+ return;
+ } else if (ISSPACE((unsigned char)c) || c == '-') {
+ if (ncells + rnbufcells >= termwidth) {
+ putchar('\n');
+ nbytesline = 0;
+ ncells = 0;
+ }
+ rbuf[rbuflen++] = c;
+ rnbufcells += utfwidth(c);
+ hflush();
+ return;
+ }
+
+ rbuf[rbuflen++] = c;
+ rnbufcells += utfwidth(c);
+}
+
+/* calculate indentation of current node depth, using the sum of each
+ indentation per node */
+static int
+calcindent(void)
+{
+ int i, n = 0;
+
+ for (i = curnode; i >= 0; i--)
+ n += nodes[i].indent;
+
+ return n;
+}
+
+static void
+hprint(const char *s)
+{
+ for (; *s; ++s)
+ hputchar(*s);
+}
+
+/* printf(), max 256 bytes for now */
+static void
+hprintf(const char *fmt, ...)
+{
+ va_list ap;
+ char buf[256];
+
+ va_start(ap, fmt);
+ vsnprintf(buf, sizeof(buf), fmt, ap);
+ va_end(ap);
+
+ /* use hprint() formatting logic. */
+ hprint(buf);
+}
+
+static void
+newline(void)
+{
+ if (skipinitialws)
+ return;
+ hputchar('\n');
+}
+
+static int
+parentcontainerhasdata(int curtype, int n)
+{
+ int i;
+
+ for (i = n; i >= 0; i--) {
+ if (nodes[i].tag.displaytype & (DisplayList|DisplayTable))
+ break;
+ if (nodes[i].hasdata)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+parenthasdata(int n)
+{
+ int i;
+
+ for (i = n; i >= 0; i--)
+ return nodes[i].hasdata;
+
+ return 0;
+}
+
+/* start on a newline for the start of a block element or not */
+static void
+startblock(void)
+{
+ hflush();
+ whitespace_mode &= ~2; /* no characters on this line yet */
+ if (nbytesline <= 0)
+ return;
+ if (!hadnewline && parenthasdata(curnode - 1))
+ hputchar('\n');
+}
+
+/* start on a newline for the end of a block element or not */
+static void
+endblock(void)
+{
+ hflush();
+ whitespace_mode &= ~2; /* no characters on this line yet */
+ if (nbytesline <= 0)
+ return;
+ if (!hadnewline)
+ hputchar('\n');
+}
+
+/* print one character safely: no control characters,
+ handle HTML white-space rules */
+static void
+printc(int c)
+{
+ if (ISSPACE((unsigned char)c)) {
+ if (whitespace_mode == 2)
+ hputchar(' ');
+ whitespace_mode |= 1;
+ } else {
+ whitespace_mode = 2;
+ if (!ISCNTRL((unsigned char)c))
+ hputchar(c);
+ }
+}
+
+static void
+printpre(const char *s, size_t len)
+{
+ struct node *cur;
+ size_t i;
+
+ /* reset state of newlines because this data is printed literally */
+ hadnewline = 0;
+ currentnewlines = 0;
+
+ /* skip leading white-space */
+ i = 0;
+ if (skipinitialws) {
+ for (; *s && i < len; s++, i++) {
+ if (!ISSPACE((unsigned char)*s))
+ break;
+ }
+ }
+
+ hflush();
+
+ skipinitialws = 0;
+
+ if (*s) {
+ cur = &nodes[curnode];
+ cur->hasdata = 1;
+ }
+
+ for (; *s && i < len; s++, i++) {
+ switch (*s) {
+ case '\n':
+ putchar('\n');
+ nbytesline = 0;
+ ncells = 0;
+ break;
+ case '\t':
+ hadnewline = 0;
+ if (!nbytesline) {
+ emitmarkup(0);
+ rindent();
+ /* emit code again per line, needed for GNU/less -R */
+ emitmarkup(curmarkup);
+ }
+
+ /* TAB to 8 spaces */
+ fputs(" ", stdout);
+ nbytesline += 8;
+ ncells += 8;
+ break;
+ default:
+ if (ISCNTRL((unsigned char)*s))
+ continue;
+
+ if (!nbytesline) {
+ emitmarkup(0);
+ rindent();
+ /* emit code again per line, needed for GNU/less -R */
+ emitmarkup(curmarkup);
+ }
+
+ putchar(*s);
+ nbytesline++;
+ /* start of rune: incorrectly assume 1 rune is 1 cell for now */
+ ncells += utfwidth((unsigned char)*s);
+ }
+ }
+}
+
+static struct node *
+findparenttype(int cur, int findtype)
+{
+ int i;
+
+ for (i = cur; i >= 0; i--) {
+ if ((nodes[i].tag.displaytype & findtype))
+ return &nodes[i];
+ }
+ return NULL;
+}
+
+int
+isclassmatch(const char *haystack, const char *needle)
+{
+ const char *p;
+ size_t needlelen;
+ size_t matched = 0;
+
+ needlelen = strlen(needle);
+ for (p = haystack; *p; p++) {
+ if (ISSPACE((unsigned char)*p)) {
+ matched = 0;
+ continue;
+ }
+ if (needle[matched] == *p)
+ matched++;
+ else
+ matched = 0;
+ if (matched == needlelen) {
+ if (*(p + 1) == '\0' || ISSPACE((unsigned char)*(p + 1)))
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/* very limited CSS-like selector, supports: main, main#id, main.class,
+ ".class", "#id", "ul li a" */
+int
+compileselector(const char *sel, struct selectornode *nodes, size_t maxnodes)
+{
+ int depth = 0, len;
+ const char *s, *start;
+ char tmp[256];
+ int nameset = 0;
+
+ memset(&nodes[0], 0, sizeof(nodes[0]));
+
+ s = sel;
+ for (; *s && ISSPACE((unsigned char)*s); s++)
+ ;
+
+ start = s;
+ for (; ; s++) {
+ /* end of tag */
+ if (!nameset &&
+ (*s == '#' || *s == '.' || *s == '[' ||
+ *s == '\0' || ISSPACE((unsigned char)*s))) {
+ nameset = 1;
+ len = s - start; /* tag name */
+ if (len >= sizeof(tmp))
+ return 0;
+ if (len)
+ memcpy(tmp, start, len);
+ tmp[len] = '\0';
+
+ memcpy(nodes[depth].tagname, tmp, len + 1);
+ }
+
+ /* end */
+ if (*s == '\0' || ISSPACE((unsigned char)*s)) {
+ for (; ISSPACE((unsigned char)*s); s++)
+ ;
+ start = s; /* start of a new tag */
+ depth++;
+ if (depth >= maxnodes)
+ return 0;
+
+ nameset = 0;
+ memset(&nodes[depth], 0, sizeof(nodes[depth]));
+
+ /* end of selector */
+ if (*s == '\0')
+ break;
+ }
+
+ /* id */
+ if (*s == '#') {
+ len = strcspn(s + 1, ".#[ \t\n");
+ if (len >= sizeof(tmp))
+ return 0;
+ memcpy(tmp, s + 1, len);
+ tmp[len] = '\0';
+ memcpy(nodes[depth].id, tmp, len + 1);
+ s += len;
+ start = s + 1;
+ continue;
+ }
+
+ /* class */
+ if (*s == '.') {
+ len = strcspn(s + 1, ".#[ \t\n");
+ if (len >= sizeof(tmp))
+ return 0;
+ memcpy(tmp, s + 1, len);
+ tmp[len] = '\0';
+ /* allow only one classname for now */
+ memcpy(nodes[depth].classnames, tmp, len + 1);
+ s += len;
+ start = s + 1;
+ continue;
+ }
+ }
+
+ return depth;
+}
+
+struct selector *
+newselector(const char *q)
+{
+ struct selector *sel;
+ int r;
+
+ sel = ecalloc(1, sizeof(*sel));
+ sel->text = estrdup(q);
+
+ r = compileselector(sel->text, sel->nodes, LEN(sel->nodes));
+ if (r <= 0) {
+ free(sel);
+ return NULL;
+ }
+ sel->depth = r;
+
+ return sel;
+}
+
+struct selectors *
+compileselectors(const char *q)
+{
+ struct selectors *sels = NULL;
+ struct selector *sel;
+ const char *start;
+ char *qe;
+ int count = 0;
+ size_t siz;
+
+ sels = ecalloc(1, sizeof(*sels));
+
+ start = q;
+ for (; ; q++) {
+ if (*q == ',' || *q == '\0') {
+ qe = estrndup(start, q - start);
+ sel = newselector(qe);
+ free(qe);
+
+ /* add new selector */
+ siz = (count + 1) * sizeof(struct selector *);
+ sels->selectors = erealloc(sels->selectors, siz);
+ sels->selectors[count] = sel;
+ count++;
+
+ if (*q == '\0')
+ break;
+ start = q + 1;
+ }
+ }
+ sels->count = count;
+
+ return sels;
+}
+
+/* very limited CSS-like matcher, supports: main, main#id, main.class,
+ ".class", "#id", "ul li a" */
+int
+iscssmatch(struct selector *sel, struct node *root, int maxdepth)
+{
+ int d, md = 0;
+
+ for (d = 0; d <= maxdepth; d++) {
+ /* tag matched? */
+ if (sel->nodes[md].tagname[0] &&
+ strcasecmp(sel->nodes[md].tagname, root[d].tagname))
+ continue; /* no */
+
+ /* id matched? */
+ if (sel->nodes[md].id[0] && strcmp(sel->nodes[md].id, root[d].id))
+ continue; /* no */
+
+ /* class matched, for now allow only one classname in the selector,
+ matching multiple classnames */
+ if (sel->nodes[md].classnames[0] &&
+ !isclassmatch(root[d].classnames, sel->nodes[md].classnames))
+ continue; /* no */
+
+ md++;
+ /* all matched of one selector */
+ if (md == sel->depth)
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+iscssmatchany(struct selectors *sels, struct node *root, int maxdepth)
+{
+ struct selector *sel;
+ int i;
+
+ for (i = 0; i < sels->count; i++) {
+ sel = sels->selectors[i];
+ if (iscssmatch(sel, root, maxdepth))
+ return 1;
+ }
+ return 0;
+}
+
+static void
+handleinlinealt(void)
+{
+ struct node *cur;
+ char *start, *s, *e;
+
+ /* do not show the alt text if the element is hidden */
+ cur = &nodes[curnode];
+ if (cur->tag.displaytype & DisplayNone)
+ return;
+
+ /* show img alt attribute as text. */
+ if (attr_alt.len) {
+ start = attr_alt.data;
+ e = attr_alt.data + attr_alt.len;
+
+ for (s = start; s < e; s++)
+ printc((unsigned char)*s);
+ hflush(); /* TODO: this flush should not be needed */
+ }
+}
+
+static void
+addlinkref(const char *url, const char *_type, int ishidden)
+{
+ /* add to linked list */
+ if (!links_head)
+ links_cur = links_head = ecalloc(1, sizeof(*links_head));
+ else
+ links_cur = links_cur->next = ecalloc(1, sizeof(*links_head));
+ links_cur->url = estrdup(url);
+ links_cur->type = estrdup(_type);
+ links_cur->ishidden = ishidden;
+}
+
+/* TODO: make parsed base URL global and overwrite it once. */
+static void
+handleinlinelink(void)
+{
+ struct uri base, newuri, olduri;
+ struct node *cur;
+ char buf[4096], *url;
+ int b, r;
+
+ /* show links as reference at the bottom */
+ if ((showrefbottom || resources) && (attr_src.len || attr_href.len)) {
+ /* by default use the original URL */
+ if (attr_src.len)
+ url = attr_src.data;
+ else
+ url = attr_href.data;
+
+ b = -1;
+ if (uri_hasscheme(url))
+ ; /* already absolute: nothing to do */
+ else if (basehref[0]) /* prefer -b option over <base> */
+ b = uri_parse(basehref, &base);
+ else if (basehrefdoc[0])
+ b = uri_parse(basehrefdoc, &base);
+
+ if (b != -1 &&
+ uri_parse(url, &olduri) != -1 &&
+ uri_makeabs(&newuri, &olduri, &base) != -1 &&
+ newuri.proto[0]) {
+ r = uri_format(buf, sizeof(buf), &newuri);
+ if (r >= 0 && (size_t)r < sizeof(buf))
+ url = buf;
+ }
+
+ if (!url[0])
+ return;
+
+ cur = &nodes[curnode];
+
+ if (showrefinline && !(cur->tag.displaytype & DisplayNone)) {
+ string_clear(&nodes_links[curnode]);
+ string_append(&nodes_links[curnode], url, strlen(url));
+ }
+
+ /* add hidden links directly to the reference,
+ the order doesn't matter */
+ if (cur->tag.displaytype & DisplayNone)
+ addlinkref(url, cur->tag.name, 1);
+ }
+}
+
+void
+printlinkrefs(void)
+{
+ size_t i;
+ int hashiddenrefs = 0;
+
+ if (!links_head)
+ return;
+
+ if (resources) {
+ for (i = 1, links_cur = links_head; links_cur; links_cur = links_cur->next, i++)
+ dprintf(3, "%s\t%s\n", links_cur->type, links_cur->url);
+ }
+
+ printf("\n\nReferences\n\n");
+
+ i = 1;
+ for (links_cur = links_head; links_cur; links_cur = links_cur->next) {
+ if (links_cur->ishidden) {
+ hashiddenrefs = 1;
+ continue;
+ }
+ printf(" %zu. %s (%s)\n", i, links_cur->url, links_cur->type);
+ i++;
+ }
+
+ if (hashiddenrefs)
+ printf("\n\nHidden references\n\n");
+ for (links_cur = links_head; links_cur; links_cur = links_cur->next) {
+ if (!links_cur->ishidden)
+ continue;
+ printf(" %zu. %s (%s)\n", i, links_cur->url, links_cur->type);
+ i++;
+ }
+}
+
+static void
+xmldatastart(XMLParser *p)
+{
+}
+
+static void
+xmldataend(XMLParser *p)
+{
+ struct node *cur;
+ char *start, *s, *e;
+
+ if (!htmldata.data || !htmldata.len)
+ return;
+
+ cur = &nodes[curnode];
+
+ if (reader_ignore || (cur->tag.displaytype & DisplayNone)) {
+ /* print nothing */
+ } else if ((cur->tag.displaytype & DisplayPre) ||
+ findparenttype(curnode - 1, DisplayPre)) {
+ printpre(htmldata.data, htmldata.len);
+ } else {
+ start = htmldata.data;
+ e = htmldata.data + htmldata.len;
+
+ for (s = start; s < e; s++)
+ printc((unsigned char)*s);
+ }
+
+ string_clear(&htmldata);
+}
+
+static void
+xmldata(XMLParser *p, const char *data, size_t datalen)
+{
+ struct node *cur;
+
+ if (reader_ignore)
+ return;
+
+ cur = &nodes[curnode];
+ if (cur->tag.displaytype & DisplayNone)
+ return;
+
+ string_append(&htmldata, data, datalen);
+}
+
+static void
+xmldataentity(XMLParser *p, const char *data, size_t datalen)
+{
+ struct node *cur;
+ char buf[16];
+ int n;
+
+ if (reader_ignore)
+ return;
+
+ cur = &nodes[curnode];
+ if (cur->tag.displaytype & DisplayNone)
+ return;
+
+ n = xml_entitytostr(data, buf, sizeof(buf));
+ if (n > 0)
+ xmldata(p, buf, (size_t)n);
+ else
+ xmldata(p, data, datalen);
+}
+
+static void
+xmlcdatastart(XMLParser *p)
+{
+ xmldatastart(p);
+}
+
+static void
+xmlcdataend(XMLParser *p)
+{
+ xmldataend(p); /* treat CDATA as data */
+}
+
+static void
+xmlcdata(XMLParser *p, const char *data, size_t datalen)
+{
+ xmldata(p, data, datalen); /* treat CDATA as data */
+}
+
+/* compare tag name (case-insensitive) */
+int
+tagcmp(const char *s1, const char *s2)
+{
+ return strcasecmp(s1, s2);
+}
+
+/* compare attribute name (case-insensitive) */
+int
+attrcmp(const char *s1, const char *s2)
+{
+ return strcasecmp(s1, s2);
+}
+
+/* lookup function to compare tag name (case-insensitive) for sort functions */
+int
+findtagcmp(const void *v1, const void *v2)
+{
+ struct tag *t1 = (struct tag *)v1;
+ struct tag *t2 = (struct tag *)v2;
+
+ return strcasecmp(t1->name, t2->name);
+}
+
+/* binary search tag by tag name */
+struct tag *
+findtag(const char *t)
+{
+ struct tag find = { 0 };
+
+ find.name = t;
+
+ return bsearch(&find, tags, LEN(tags), sizeof(*tags), findtagcmp);
+}
+
+static void
+handleendtag(struct tag *tag)
+{
+ int i, marginbottom;
+
+ if (tag->displaytype & DisplayNone)
+ return;
+ if (reader_ignore)
+ return;
+
+ if (tag->displaytype & (DisplayBlock | DisplayHeader | DisplayTable | DisplayTableRow |
+ DisplayList | DisplayListItem | DisplayPre)) {
+ endblock(); /* break line if needed */
+ }
+
+ /* when a list ends and its not inside a list add an extra bottom margin */
+ marginbottom = tag->marginbottom;
+
+ if (marginbottom > 0) {
+ if (tag->displaytype & DisplayList) {
+ if (findparenttype(curnode - 1, DisplayList))
+ marginbottom--;
+ }
+ }
+
+ if (marginbottom > 0) {
+ hflush();
+ for (i = currentnewlines; i < marginbottom; i++) {
+ putchar('\n');
+ currentnewlines++;
+ }
+ nbytesline = 0;
+ ncells = 0;
+ hadnewline = 1;
+ }
+}
+
+static void
+endnode(struct node *cur)
+{
+ int i, ishidden;
+
+ /* set a flag indicating the element and its parent containers have data.
+ This is used for some formatting */
+ if (cur->hasdata) {
+ for (i = curnode; i >= 0; i--)
+ nodes[i].hasdata = 1;
+ }
+
+ endmarkup(cur->tag.markuptype);
+
+ ishidden = reader_ignore || (cur->tag.displaytype & DisplayNone);
+
+ /* add link and show the link number in the visible order */
+ if (!ishidden && nodes_links[curnode].len > 0) {
+ addlinkref(nodes_links[curnode].data, cur->tag.name, ishidden);
+#if 1
+ hprintf("[%zu]", ++linkcount);
+#else
+ hprintf("[%s: %s]", cur->tag.name, nodes_links[curnode].data);
+#endif
+ }
+
+ handleendtag(&(cur->tag));
+}
+
+static void
+xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
+{
+ struct tag *found, *tag;
+ const char *child;
+ int i, j, parenttype;
+
+ /* ignore closing of void elements, like </br>, which is not allowed */
+ if ((found = findtag(t))) {
+ if (!isshort && found->isvoid)
+ return;
+ }
+
+ /* TODO: implement more complete optional tag handling.
+ in reality the optional tag rules are more complex, see:
+ https://html.spec.whatwg.org/multipage/syntax.html#optional-tags */
+
+ child = NULL;
+ parenttype = 0;
+
+ if (found && found->displaytype & DisplayPre) {
+ skipinitialws = 0; /* do not skip white-space, for margins */
+ } else if (found && found->displaytype & DisplayList) {
+ child = "li";
+ parenttype = DisplayList;
+ } else if (found && found->displaytype & DisplayTableRow) {
+ child = "td";
+ parenttype = DisplayTableRow;
+ } else if (found && found->displaytype & DisplayTable) {
+ child = "td";
+ parenttype = DisplayTable;
+ }
+
+ if (child && parenttype) {
+ for (i = curnode; i >= 0; i--) {
+ if ((nodes[i].tag.displaytype & parenttype))
+ break;
+ if (!tagcmp(nodes[i].tag.name, child)) {
+ /* fake closing the previous tags */
+ for (j = curnode; j >= i; j--)
+ endnode(&nodes[j]);
+ curnode = j;
+ break;
+ }
+ }
+ }
+
+ /* if the current closing tag matches the current open tag */
+ if (nodes[curnode].tag.name &&
+ !tagcmp(nodes[curnode].tag.name, t)) {
+ endnode(&nodes[curnode]);
+ if (curnode)
+ curnode--;
+ } else {
+ /* ... else lookup the first matching start tag. This is also
+ for handling optional closing tags */
+ tag = NULL;
+ for (i = curnode; i >= 0; i--) {
+ if (nodes[curnode].tag.name &&
+ !tagcmp(nodes[i].tag.name, t)) {
+ endnode(&nodes[i]);
+ curnode = i > 0 ? i - 1 : 0;
+ tag = &nodes[i].tag;
+ break;
+ }
+ }
+ /* unmatched closing tag found */
+ if (!tag && found)
+ handleendtag(found);
+ }
+ indent = calcindent();
+
+ /* restore markup of the tag we are in now */
+ startmarkup(nodes[curnode].tag.markuptype);
+
+ /* check if the current node still matches the visible selector */
+ if (reader_mode && sel_show && !reader_ignore) {
+ if (!iscssmatchany(sel_show, nodes, curnode)) {
+ reader_ignore = 1;
+ newline();
+ }
+ }
+}
+
+static void
+xmltagstart(XMLParser *p, const char *t, size_t tl)
+{
+ struct tag *found;
+ struct node *cur;
+ const char *child;
+ char *s;
+ int i, j, parenttype;
+
+ if (curnode >= MAX_DEPTH - 2)
+ errx(1, "max tag depth reached: %d\n", curnode);
+
+ cur = &nodes[curnode];
+
+ string_clear(&attr_alt);
+ string_clear(&attr_class);
+ string_clear(&attr_href);
+ string_clear(&attr_id);
+ string_clear(&attr_src);
+ string_clear(&attr_type);
+ string_clear(&attr_value);
+
+ /* match tag */
+ found = findtag(t);
+
+ /* TODO: implement more complete optional tag handling.
+ in reality the optional tag rules are more complex, see:
+ https://html.spec.whatwg.org/multipage/syntax.html#optional-tags */
+
+ child = NULL;
+ parenttype = 0;
+
+ /* if optional tag <p> is open and a block element is found, close </p>. */
+ if (found && found->displaytype & DisplayList) {
+ /* not inside a list */
+ child = "p";
+ parenttype = DisplayList;
+ } else if (found && found->isoptional) {
+ if (!tagcmp(t, "li")) {
+ child = "li";
+ parenttype = DisplayList;
+ } else if (!tagcmp(t, "td")) {
+ child = "td";
+ parenttype = DisplayTableRow;
+ } else if (!tagcmp(t, "tr")) {
+ child = "tr";
+ parenttype = DisplayTable;
+ } else if (!tagcmp(t, cur->tag.name)) {
+ /* fake closing the previous tag if it is the same and repeated */
+ xmltagend(p, t, tl, 0);
+ }
+ }
+
+ if (child && parenttype) {
+ for (i = curnode; i >= 0; i--) {
+ if ((nodes[i].tag.displaytype & parenttype))
+ break;
+ if (!tagcmp(nodes[i].tag.name, child)) {
+ /* fake closing the previous tags */
+ for (j = curnode; j >= i; j--)
+ xmltagend(p, nodes[j].tag.name, strlen(nodes[j].tag.name), 0);
+ break;
+ }
+ }
+ }
+
+ curnode++;
+ string_clear(&nodes_links[curnode]); /* clear possible link reference for this node */
+ cur = &nodes[curnode];
+ memset(cur, 0, sizeof(*cur)); /* clear / reset node */
+ /* tag defaults */
+ cur->tag.displaytype = DisplayInline;
+ cur->tag.name = cur->tagname;
+ strlcpy(cur->tagname, t, sizeof(cur->tagname));
+ /* force to lowercase */
+ for (s = cur->tagname; *s; s++)
+ *s = TOLOWER((unsigned char)*s);
+
+ /* matched tag: copy tag information to current node */
+ if (found)
+ memcpy(&(cur->tag), found, sizeof(*found));
+
+ /* parent tag is hidden, so hide ourself too */
+ if (curnode > 0 && (nodes[curnode - 1].tag.displaytype & DisplayNone))
+ cur->tag.displaytype |= DisplayNone;
+}
+
+static void
+xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
+{
+ struct node *cur, *parent;
+ int i, margintop;
+
+ /* temporary replace the callback except the reader and end of tag
+ restore the context once we receive the same ignored tag in the
+ end tag handler */
+ if (!tagcmp(t, "script")) {
+ ignorestate = endtag = "</script>";
+ getnext = p->getnext; /* for restore */
+ p->getnext = getnext_ignore;
+ xmltagend(p, t, tl, 0); /* fake the call the tag was ended */
+ return;
+ } else if (!tagcmp(t, "style")) {
+ ignorestate = endtag = "</style>";
+ getnext = p->getnext; /* for restore */
+ p->getnext = getnext_ignore;
+ xmltagend(p, t, tl, 0); /* fake the call the tag was ended */
+ return;
+ }
+
+ cur = &nodes[curnode];
+
+ /* copy attributes if set */
+ if (attr_id.len)
+ strlcpy(cur->id, attr_id.data, sizeof(cur->id));
+ else
+ cur->id[0] = '\0';
+ if (attr_class.len)
+ strlcpy(cur->classnames, attr_class.data, sizeof(cur->classnames));
+ else
+ cur->classnames[0] = '\0';
+
+ /* parent node */
+ if (curnode > 0) {
+ parent = &nodes[curnode - 1];
+ parent->nchildren++; /* increase child node count */
+ /* count visible childnodes */
+ if (!(cur->tag.displaytype & DisplayNone))
+ parent->visnchildren++;
+ } else {
+ parent = NULL;
+ }
+
+ if (reader_mode && sel_show && reader_ignore &&
+ iscssmatchany(sel_show, nodes, curnode))
+ reader_ignore = 0;
+
+ /* hide element */
+ if (reader_mode && sel_hide &&
+ iscssmatchany(sel_hide, nodes, curnode))
+ cur->tag.displaytype |= DisplayNone;
+
+ /* indent for this tag */
+ cur->indent = cur->tag.indent;
+
+ if (!reader_ignore) {
+ /* add link reference, print links and alt text */
+ handleinlinelink();
+ handleinlinealt();
+ }
+
+ if (cur->tag.displaytype & DisplayNone)
+ return;
+
+ if (reader_ignore)
+ return;
+
+ indent = calcindent();
+
+ if ((cur->tag.displaytype & (DisplayBlock | DisplayHeader | DisplayPre |
+ DisplayTable | DisplayTableRow |
+ DisplayList | DisplayListItem))) {
+ startblock(); /* break line if needed */
+ }
+
+ margintop = cur->tag.margintop;
+ if (cur->tag.displaytype & (DisplayList)) {
+ for (i = curnode - 1; i >= 0; i--) {
+ if (nodes[i].tag.displaytype & DisplayList)
+ break;
+ if (!(nodes[i].tag.displaytype & DisplayListItem))
+ continue;
+ if (nodes[i].hasdata && margintop > 0) {
+ margintop--;
+ break;
+ }
+ }
+ } else if (cur->tag.displaytype & (DisplayBlock|DisplayTable)) {
+ if (!parentcontainerhasdata(cur->tag.displaytype, curnode - 1)) {
+ if (margintop > 0)
+ margintop--;
+ }
+ }
+
+ if (margintop > 0) {
+ hflush();
+ for (i = currentnewlines; i < margintop; i++) {
+ putchar('\n');
+ currentnewlines++;
+ }
+ nbytesline = 0;
+ ncells = 0;
+ hadnewline = 1;
+ }
+
+ if (cur->tag.displaytype & DisplayPre) {
+ skipinitialws = 1;
+ } else if (cur->tag.displaytype & DisplayTableCell) {
+ if (parent && parent->visnchildren > 1)
+ hputchar('\t');
+ } else if (cur->tag.displaytype & DisplayListItem) {
+ /* find first parent node and ordered numbers or unordered */
+ if (parent) {
+ skipinitialws = 0;
+
+ /* print bullet, add columns to indentation level */
+ if (parent->tag.displaytype & DisplayListOrdered) {
+ hprintf("%4zu. ", parent->nchildren);
+ cur->indent = 6;
+ indent += cur->indent; /* align to number */
+ } else if (parent->tag.displaytype & DisplayList) {
+ hprint(str_bullet_item);
+ cur->indent = 2;
+ indent += 2; /* align to bullet */
+ }
+ }
+ skipinitialws = 0;
+ }
+
+ if (!tagcmp(cur->tag.name, "input")) {
+ if (!attr_type.len) {
+ hprintf("[%-15s]", attr_value.len ? attr_value.data : ""); /* default: text */
+ } else if (!strcasecmp(attr_type.data, "text")) {
+ hprintf("[%-15s]", attr_value.len ? attr_value.data : ""); /* text */
+ } else if (!strcasecmp(attr_type.data, "search")) {
+ hprintf("[%-15s]", attr_value.len ? attr_value.data : "");
+ } else if (!strcasecmp(attr_type.data, "button")) {
+ hprintf("[%s]", attr_value.len ? attr_value.data : "");
+ } else if (!strcasecmp(attr_type.data, "submit")) {
+ hprintf("[%s]", attr_value.len ? attr_value.data : "");
+ } else if (!strcasecmp(attr_type.data, "checkbox")) {
+ hprint("[ ]"); /* TODO: show x or unicode checkmark when selected? */
+ } else if (!strcasecmp(attr_type.data, "radio")) {
+ hprint("( )"); /* TODO: show x or unicode checkmark when selected? */
+ }
+ }
+
+ startmarkup(cur->tag.markuptype);
+
+ /* do not count data such as an item bullet as part of the data for
+ the node */
+ cur->hasdata = 0;
+
+ if (!tagcmp(t, "hr")) { /* ruler */
+ i = termwidth - indent - defaultindent;
+ for (; i > 0; i--)
+ hprint(str_ruler);
+ cur->hasdata = 1; /* treat <hr/> as data */
+ } else if (!tagcmp(t, "br")) {
+ hflush();
+ hadnewline = 0; /* forced newline */
+ hputchar('\n');
+ cur->hasdata = 1; /* treat <br/> as data */
+ }
+
+ /* autoclose tags, such as <br>, pretend we are <br/> */
+ if (!isshort && cur->tag.isvoid)
+ xmltagend(p, t, tl, 1); /* pretend close of short tag */
+}
+
+static void
+xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name,
+ size_t namelen, const char *value, size_t valuelen)
+{
+ struct node *cur;
+
+ cur = &nodes[curnode];
+
+ if (!attrcmp(name, "class"))
+ string_append(&attr_class, value, valuelen);
+ else if (!attrcmp(name, "id"))
+ string_append(&attr_id, value, valuelen);
+
+ /* <base href="..." /> */
+ if (!attrcmp(name, "href") && !tagcmp(tag, "base"))
+ strlcat(basehrefdoc, value, sizeof(basehrefdoc));
+
+ /* hide tags with attribute aria-hidden or hidden */
+ if (!attrcmp(name, "aria-hidden") || !attrcmp(name, "hidden"))
+ cur->tag.displaytype |= DisplayNone;
+
+ if (!tagcmp(tag, "a") && !attrcmp(name, "href"))
+ string_append(&attr_src, value, valuelen);
+
+ if ((!tagcmp(tag, "img") || !tagcmp(tag, "video") ||
+ !tagcmp(tag, "source") || !tagcmp(tag, "track") ||
+ !tagcmp(tag, "audio")) &&
+ !attrcmp(name, "src") && valuelen)
+ string_append(&attr_href, value, valuelen);
+
+ /* show img alt attribute as text. */
+ if (!tagcmp(tag, "img") && !attrcmp(name, "alt"))
+ string_append(&attr_alt, value, valuelen);
+
+ if (!attrcmp(name, "type"))
+ string_append(&attr_type, value, valuelen);
+ if (!attrcmp(name, "value"))
+ string_append(&attr_value, value, valuelen);
+}
+
+static void
+xmlattrentity(XMLParser *p, const char *tag, size_t taglen, const char *name,
+ size_t namelen, const char *value, size_t valuelen)
+{
+ char buf[16];
+ int n;
+
+ n = xml_entitytostr(value, buf, sizeof(buf));
+ if (n > 0)
+ xmlattr(p, tag, taglen, name, namelen, buf, (size_t)n);
+ else
+ xmlattr(p, tag, taglen, name, namelen, value, valuelen);
+}
+
+static void
+xmlattrend(XMLParser *p, const char *t, size_t tl, const char *n,
+ size_t nl)
+{
+}
+
+static void
+xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *n,
+ size_t nl)
+{
+ if (!attrcmp(n, "alt"))
+ string_clear(&attr_alt);
+ else if (!attrcmp(n, "class"))
+ string_clear(&attr_class);
+ else if (!attrcmp(n, "href"))
+ string_clear(&attr_href);
+ else if (!attrcmp(n, "id"))
+ string_clear(&attr_id);
+ else if (!attrcmp(n, "src"))
+ string_clear(&attr_src);
+ else if (!attrcmp(n, "type"))
+ string_clear(&attr_type);
+ else if (!attrcmp(n, "value"))
+ string_clear(&attr_value);
+
+ if (!attrcmp(n, "href") && !tagcmp(t, "base"))
+ basehrefdoc[0] = '\0';
+}
+
+void
+usage(void)
+{
+ fprintf(stderr, "%s [-8ailrx] [-b basehref] [-s selector] [-u selector] [-w termwidth]\n", argv0);
+ exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+ if (pledge("stdio", NULL) < 0)
+ err(1, "pledge");
+
+ ARGBEGIN {
+ case '8':
+ str_bullet_item = "\xe2\x80\xa2 ";
+ str_ruler = "\xe2\x94\x80"; /* symbol: "light horizontal" */
+ break;
+ case 'a':
+ allowansi = !allowansi;
+ break;
+ case 'b':
+ basehref = EARGF(usage());
+ break;
+ case 'i':
+ showrefinline = !showrefinline;
+ break;
+ case 'l':
+ showrefbottom = !showrefbottom;
+ break;
+ case 'r':
+ linewrap = !linewrap;
+ break;
+ case 's':
+ sel_show = compileselectors(EARGF(usage()));
+ /* switch to reader/selector mode, ignore all data except when matched */
+ reader_mode = 1;
+ reader_ignore = 1;
+ break;
+ case 'u':
+ sel_hide = compileselectors(EARGF(usage()));
+ /* switch to reader/selector mode */
+ reader_mode = 1;
+ break;
+ case 'w':
+ if ((termwidth = strtol(EARGF(usage()), NULL, 10)) < 1)
+ usage();
+ break;
+ case 'x':
+ resources = !resources;
+ break;
+ default:
+ usage();
+ } ARGEND
+
+ /* top-most document root needs initialization */
+ nodes[0].tag.name = "";
+
+ parser.xmlattrstart = xmlattrstart;
+ parser.xmlattr = xmlattr;
+ parser.xmlattrentity = xmlattrentity;
+ parser.xmlattrend = xmlattrend;
+ parser.xmlcdatastart = xmlcdatastart;
+ parser.xmlcdata = xmlcdata;
+ parser.xmlcdataend = xmlcdataend;
+ parser.xmldatastart = xmldatastart;
+ parser.xmldata = xmldata;
+ parser.xmldataentity = xmldataentity;
+ parser.xmldataend = xmldataend;
+ parser.xmltagstart = xmltagstart;
+ parser.xmltagstartparsed = xmltagstartparsed;
+ parser.xmltagend = xmltagend;
+
+ parser.getnext = getchar;
+ xml_parse(&parser);
+
+ if (showrefbottom || resources)
+ printlinkrefs();
+
+ hflush();
+ if (ncells > 0)
+ newline();
+
+ hflush();
+ setmarkup(0);
+
+ return 0;
+}
(DIR) diff --git a/xml.c b/xml.c
@@ -0,0 +1,489 @@
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "xml.h"
+
+/* ifdef for HTML mode. To differentiate xml.c and webdump HTML changes */
+#define HTML_MODE
+
+#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
+#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
+
+static void
+xml_parseattrs(XMLParser *x)
+{
+ size_t namelen = 0, valuelen;
+ int c, endsep, endname = 0, valuestart = 0;
+
+ while ((c = GETNEXT()) != EOF) {
+ if (ISSPACE(c)) {
+ if (namelen)
+ endname = 1;
+ continue;
+ } else if (c == '?')
+ ; /* ignore */
+ else if (c == '=') {
+ x->name[namelen] = '\0';
+ valuestart = 1;
+ endname = 1;
+ } else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) {
+ /* attribute without value */
+ x->name[namelen] = '\0';
+ if (x->xmlattrstart)
+ x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
+ if (x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0);
+ if (x->xmlattrend)
+ x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
+ endname = 0;
+ x->name[0] = c;
+ namelen = 1;
+ } else if (namelen && valuestart) {
+ /* attribute with value */
+ if (x->xmlattrstart)
+ x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
+
+ valuelen = 0;
+ if (c == '\'' || c == '"') {
+ endsep = c;
+ } else {
+ endsep = ' '; /* ISSPACE() */
+ goto startvalue;
+ }
+
+ while ((c = GETNEXT()) != EOF) {
+startvalue:
+ if (c == '&') { /* entities */
+ x->data[valuelen] = '\0';
+ /* call data function with data before entity if there is data */
+ if (valuelen && x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ x->data[0] = c;
+ valuelen = 1;
+ while ((c = GETNEXT()) != EOF) {
+ if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c))))
+ break;
+ if (valuelen < sizeof(x->data) - 1)
+ x->data[valuelen++] = c;
+ else {
+ /* entity too long for buffer, handle as normal data */
+ x->data[valuelen] = '\0';
+ if (x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ x->data[0] = c;
+ valuelen = 1;
+ break;
+ }
+ if (c == ';') {
+ x->data[valuelen] = '\0';
+ if (x->xmlattrentity)
+ x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ valuelen = 0;
+ break;
+ }
+ }
+ } else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) {
+ if (valuelen < sizeof(x->data) - 1) {
+ x->data[valuelen++] = c;
+ } else {
+ x->data[valuelen] = '\0';
+ if (x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ x->data[0] = c;
+ valuelen = 1;
+ }
+ }
+ if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) {
+ x->data[valuelen] = '\0';
+ if (x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ if (x->xmlattrend)
+ x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
+ break;
+ }
+ }
+ namelen = endname = valuestart = 0;
+ } else if (namelen < sizeof(x->name) - 1) {
+ x->name[namelen++] = c;
+ }
+ if (c == '>') {
+ break;
+ } else if (c == '/') {
+ x->isshorttag = 1;
+ x->name[0] = '\0';
+ namelen = 0;
+ }
+ }
+}
+
+static void
+xml_parsecomment(XMLParser *x)
+{
+ size_t datalen = 0, i = 0;
+ int c;
+
+ if (x->xmlcommentstart)
+ x->xmlcommentstart(x);
+ while ((c = GETNEXT()) != EOF) {
+ if (c == '-' || c == '>') {
+ if (x->xmlcomment && datalen) {
+ x->data[datalen] = '\0';
+ x->xmlcomment(x, x->data, datalen);
+ datalen = 0;
+ }
+ }
+
+ if (c == '-') {
+ if (++i > 2) {
+ if (x->xmlcomment)
+ for (; i > 2; i--)
+ x->xmlcomment(x, "-", 1);
+ i = 2;
+ }
+ continue;
+ } else if (c == '>' && i == 2) {
+ if (x->xmlcommentend)
+ x->xmlcommentend(x);
+ return;
+ } else if (i) {
+ if (x->xmlcomment) {
+ for (; i > 0; i--)
+ x->xmlcomment(x, "-", 1);
+ }
+ i = 0;
+ }
+
+ if (datalen < sizeof(x->data) - 1) {
+ x->data[datalen++] = c;
+ } else {
+ x->data[datalen] = '\0';
+ if (x->xmlcomment)
+ x->xmlcomment(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+}
+
+static void
+xml_parsecdata(XMLParser *x)
+{
+ size_t datalen = 0, i = 0;
+ int c;
+
+ if (x->xmlcdatastart)
+ x->xmlcdatastart(x);
+ while ((c = GETNEXT()) != EOF) {
+ if (c == ']' || c == '>') {
+ if (x->xmlcdata && datalen) {
+ x->data[datalen] = '\0';
+ x->xmlcdata(x, x->data, datalen);
+ datalen = 0;
+ }
+ }
+
+ if (c == ']') {
+ if (++i > 2) {
+ if (x->xmlcdata)
+ for (; i > 2; i--)
+ x->xmlcdata(x, "]", 1);
+ i = 2;
+ }
+ continue;
+ } else if (c == '>' && i == 2) {
+ if (x->xmlcdataend)
+ x->xmlcdataend(x);
+ return;
+ } else if (i) {
+ if (x->xmlcdata)
+ for (; i > 0; i--)
+ x->xmlcdata(x, "]", 1);
+ i = 0;
+ }
+
+ if (datalen < sizeof(x->data) - 1) {
+ x->data[datalen++] = c;
+ } else {
+ x->data[datalen] = '\0';
+ if (x->xmlcdata)
+ x->xmlcdata(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+}
+
+static int
+codepointtoutf8(long r, char *s)
+{
+ if (r == 0) {
+ return 0; /* NUL byte */
+ } else if (r <= 0x7F) {
+ /* 1 byte: 0aaaaaaa */
+ s[0] = r;
+ return 1;
+ } else if (r <= 0x07FF) {
+ /* 2 bytes: 00000aaa aabbbbbb */
+ s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */
+ s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */
+ return 2;
+ } else if (r <= 0xFFFF) {
+ /* 3 bytes: aaaabbbb bbcccccc */
+ s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
+ s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */
+ s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */
+ return 3;
+ } else {
+ /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
+ s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
+ s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
+ s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */
+ s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */
+ return 4;
+ }
+}
+
+struct namedentity {
+ const char *entity;
+ long cp;
+};
+
+int
+namedentitycmp(const void *v1, const void *v2)
+{
+ struct namedentity *n1 = (struct namedentity *)v1;
+ struct namedentity *n2 = (struct namedentity *)v2;
+
+ return strcmp(n1->entity, n2->entity);
+}
+
+static int
+namedentitytostr(const char *e, char *buf, size_t bufsiz)
+{
+ static const struct namedentity entities[] = {
+#include "namedentities.h"
+ };
+ struct namedentity find, *found;
+ size_t i;
+
+ /* buffer is too small */
+ if (bufsiz < 5)
+ return -1;
+
+ find.entity = e;
+ found = bsearch(&find, entities, sizeof(entities) / sizeof(*entities),
+ sizeof(*entities), namedentitycmp);
+ if (found) {
+ i = codepointtoutf8(found->cp, buf);
+ buf[i] = '\0';
+ return i;
+ }
+ return -1;
+}
+
+static int
+numericentitytostr(const char *e, char *buf, size_t bufsiz)
+{
+ long l;
+ int len;
+ char *end;
+
+ /* buffer is too small */
+ if (bufsiz < 5)
+ return -1;
+
+ errno = 0;
+ /* hex (16) or decimal (10) */
+ if (*e == 'x')
+ l = strtol(++e, &end, 16);
+ else
+ l = strtol(e, &end, 10);
+ /* invalid value or not a well-formed entity or invalid code point */
+ if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||
+ (l >= 0xd800 && l <= 0xdfff))
+ return -1;
+ len = codepointtoutf8(l, buf);
+ buf[len] = '\0';
+
+ return len;
+}
+
+/* convert named- or numeric entity string to buffer string
+ * returns byte-length of string or -1 on failure. */
+int
+xml_entitytostr(const char *e, char *buf, size_t bufsiz)
+{
+ /* doesn't start with & */
+ if (e[0] != '&')
+ return -1;
+ /* numeric entity */
+ if (e[1] == '#')
+ return numericentitytostr(e + 2, buf, bufsiz);
+ else /* named entity */
+ return namedentitytostr(e + 1, buf, bufsiz);
+}
+
+void
+xml_parse(XMLParser *x)
+{
+ size_t datalen, tagdatalen;
+ int c, isend;
+
+#ifdef HTML_MODE
+ goto read_data;
+#else
+ /* HTML: process data before a tag occured aswell */
+ while ((c = GETNEXT()) != EOF && c != '<')
+ ; /* skip until < */
+#endif
+
+ while (c != EOF) {
+ if (c == '<') { /* parse tag */
+ if ((c = GETNEXT()) == EOF)
+ return;
+
+ if (c == '!') { /* CDATA and comments */
+ for (tagdatalen = 0; (c = GETNEXT()) != EOF;) {
+ /* NOTE: sizeof(x->data) must be at least sizeof("[CDATA[") */
+ if (tagdatalen <= sizeof("[CDATA[") - 1)
+ x->data[tagdatalen++] = c;
+ if (c == '>')
+ break;
+ else if (c == '-' && tagdatalen == sizeof("--") - 1 &&
+ (x->data[0] == '-')) {
+ xml_parsecomment(x);
+ break;
+ } else if (c == '[') {
+ if (tagdatalen == sizeof("[CDATA[") - 1 &&
+ !strncmp(x->data, "[CDATA[", tagdatalen)) {
+ xml_parsecdata(x);
+ break;
+ }
+ }
+ }
+ } else {
+ /* normal tag (open, short open, close), processing instruction. */
+ x->tag[0] = c;
+ x->taglen = 1;
+ x->isshorttag = isend = 0;
+
+ /* treat processing instruction as shorttag, don't strip "?" prefix. */
+ if (c == '?') {
+ x->isshorttag = 1;
+ } else if (c == '/') {
+ if ((c = GETNEXT()) == EOF)
+ return;
+ x->tag[0] = c;
+ isend = 1;
+ }
+
+ while ((c = GETNEXT()) != EOF) {
+ if (c == '/')
+ x->isshorttag = 1; /* short tag */
+ else if (c == '>' || ISSPACE(c)) {
+ x->tag[x->taglen] = '\0';
+ if (isend) { /* end tag, starts with </ */
+ if (x->xmltagend)
+ x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
+ x->tag[0] = '\0';
+ x->taglen = 0;
+ } else {
+ /* start tag */
+ if (x->xmltagstart)
+ x->xmltagstart(x, x->tag, x->taglen);
+ if (ISSPACE(c))
+ xml_parseattrs(x);
+ if (x->xmltagstartparsed)
+ x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
+ }
+ /* call tagend for shortform or processing instruction */
+ if (x->isshorttag) {
+ if (x->xmltagend)
+ x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
+ x->tag[0] = '\0';
+ x->taglen = 0;
+ }
+ break;
+ } else if (x->taglen < sizeof(x->tag) - 1)
+ x->tag[x->taglen++] = c; /* NOTE: tag name truncation */
+ }
+ }
+ } else {
+#ifdef HTML_MODE
+read_data:
+#endif
+ /* parse tag data */
+ datalen = 0;
+ if (x->xmldatastart)
+ x->xmldatastart(x);
+ while ((c = GETNEXT()) != EOF) {
+ if (c == '&') {
+ if (datalen) {
+ x->data[datalen] = '\0';
+ if (x->xmldata)
+ x->xmldata(x, x->data, datalen);
+ }
+ x->data[0] = c;
+ datalen = 1;
+ while ((c = GETNEXT()) != EOF) {
+ if (c == '<')
+ break;
+ if (datalen < sizeof(x->data) - 1)
+ x->data[datalen++] = c;
+ else {
+ /* entity too long for buffer, handle as normal data */
+ x->data[datalen] = '\0';
+ if (x->xmldata)
+ x->xmldata(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ break;
+ }
+ if (c == ';') {
+ x->data[datalen] = '\0';
+ if (x->xmldataentity)
+ x->xmldataentity(x, x->data, datalen);
+ datalen = 0;
+ break;
+ }
+ }
+ } else if (c != '<') {
+ if (datalen < sizeof(x->data) - 1) {
+ x->data[datalen++] = c;
+ } else {
+ x->data[datalen] = '\0';
+ if (x->xmldata)
+ x->xmldata(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+ if (c == '<') {
+ x->data[datalen] = '\0';
+ if (x->xmldata && datalen)
+ x->xmldata(x, x->data, datalen);
+ if (x->xmldataend)
+ x->xmldataend(x);
+#ifdef HTML_MODE
+ datalen = 0;
+#endif
+ break;
+ }
+ }
+
+#ifdef HTML_MODE
+ /* pending data, even if a tag didn't close (EOF, etc). */
+ if (datalen) {
+ x->data[datalen] = '\0';
+ if (x->xmldata && datalen)
+ x->xmldata(x, x->data, datalen);
+ if (x->xmldataend)
+ x->xmldataend(x);
+ datalen = 0;
+ }
+#endif
+ }
+ }
+}
(DIR) diff --git a/xml.h b/xml.h
@@ -0,0 +1,49 @@
+#ifndef _XML_H_
+#define _XML_H_
+
+#include <stdio.h>
+
+typedef struct xmlparser {
+ /* handlers */
+ void (*xmlattr)(struct xmlparser *, const char *, size_t,
+ const char *, size_t, const char *, size_t);
+ void (*xmlattrend)(struct xmlparser *, const char *, size_t,
+ const char *, size_t);
+ void (*xmlattrstart)(struct xmlparser *, const char *, size_t,
+ const char *, size_t);
+ void (*xmlattrentity)(struct xmlparser *, const char *, size_t,
+ const char *, size_t, const char *, size_t);
+ void (*xmlcdatastart)(struct xmlparser *);
+ void (*xmlcdata)(struct xmlparser *, const char *, size_t);
+ void (*xmlcdataend)(struct xmlparser *);
+ void (*xmlcommentstart)(struct xmlparser *);
+ void (*xmlcomment)(struct xmlparser *, const char *, size_t);
+ void (*xmlcommentend)(struct xmlparser *);
+ void (*xmldata)(struct xmlparser *, const char *, size_t);
+ void (*xmldataend)(struct xmlparser *);
+ void (*xmldataentity)(struct xmlparser *, const char *, size_t);
+ void (*xmldatastart)(struct xmlparser *);
+ void (*xmltagend)(struct xmlparser *, const char *, size_t, int);
+ void (*xmltagstart)(struct xmlparser *, const char *, size_t);
+ void (*xmltagstartparsed)(struct xmlparser *, const char *,
+ size_t, int);
+
+#ifndef GETNEXT
+ #define GETNEXT (x)->getnext
+ int (*getnext)(void);
+#endif
+
+ /* current tag */
+ char tag[1024];
+ size_t taglen;
+ /* current tag is in shortform ? <tag /> */
+ int isshorttag;
+ /* current attribute name */
+ char name[1024];
+ /* data buffer used for tag data, CDATA and attribute data */
+ char data[BUFSIZ];
+} XMLParser;
+
+int xml_entitytostr(const char *, char *, size_t);
+void xml_parse(XMLParser *);
+#endif