
/*
 * entity.c - Converts to/from HTML entities from/to ISO Latin 1 charset.
 *
 * History:
 *
 * 1996/09/18 ver. 0.50
 *   - First version, unreleased.
 *
 * 1997/01/27 ver. 1.00
 *   - Initial release.
 *
 * 1997/02/07 ver. 1.10
 *   - Added getopt.[ch] and tailor.h to make porting to more platforms
 *     easier.
 *   - Fixed careless bug when using -H option, printed <html> instead
 *     of <body>.
 *   - Rearranged the entity.h file.
 *   - Syntax screen is now printed on stdmsg instead of stderr. Same goes
 *     for debugging stuff.
 *   - Added the -V (--version) option.
 *
 * Copyright (C) 1997 Oyvind A. Solheim <sunny256@bigblue.no>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* #include <unistd.h> */

#include "getopt.h"
#include "entity.h"

#ifdef DOS
#  include <io.h> /* Used by setmode(stdin) */
#  include <fcntl.h>
#endif

/*
 * Local function prototypes
 */

int   process_file(const char *);
void  printheader(FILE *);
void  print_version(void);
void  usage(int);

/*
 * Global variables
 */

char  *progname,
      *doctitle = NULL,
      *docowner = NULL,
      debug = 0;
bool  use_all = FALSE,
      all_errors = FALSE,
      quiet = FALSE,
      allnum = FALSE,
      numeric = FALSE,
      replace_file = FALSE,
      verbose = FALSE,
      mkhtml = FALSE,
      to_iso = FALSE;
#ifndef C_STDDEBUG
FILE  *stddebug = stdout;
#else
FILE *stddebug = C_STDDEBUG;
#endif

char *ent_iso[] =
{
  "nbsp",    /* 160 - no-break space */
  "iexcl",   /* 161 - inverted exclamation mark */
  "cent",    /* 162 - cent sign */
  "pound",   /* 163 - pound sterling sign */
  "curren",  /* 164 - general currency sign */
  "yen",     /* 165 - yen sign */
  "brvbar",  /* 166 - broken (vertical) bar */
  "sect",    /* 167 - section sign */
  "uml",     /* 168 - umlaut (dieresis) */
  "copy",    /* 169 - copyright sign */
  "ordf",    /* 170 - ordinal indicator, feminine */
  "laquo",   /* 171 - angle quotation mark, left */
  "not",     /* 172 - not sign */
  "shy",     /* 173 - soft hyphen */
  "reg",     /* 174 - registered sign */
  "macr",    /* 175 - macron */
  "deg",     /* 176 - degree sign */
  "plusmn",  /* 177 - plus-or-minus sign */
  "sup2",    /* 178 - superscript two */
  "sup3",    /* 179 - superscript three */
  "acute",   /* 180 - acute accent */
  "micro",   /* 181 - micro sign */
  "para",    /* 182 - pilcrow (paragraph sign) */
  "middot",  /* 183 - middle dot */
  "cedil",   /* 184 - cedilla */
  "sup1",    /* 185 - superscript one */
  "ordm",    /* 186 - ordinal indicator, masculine */
  "raquo",   /* 187 - angle quotation mark, right */
  "frac14",  /* 188 - fraction one-quarter */
  "frac12",  /* 189 - fraction one-half */
  "frac34",  /* 190 - fraction three-quarters */
  "iquest",  /* 191 - inverted question mark */
  "Agrave",  /* 192 - capital A, grave accent */
  "Aacute",  /* 193 - capital A, acute accent */
  "Acirc",   /* 194 - capital A, circumflex accent */
  "Atilde",  /* 195 - capital A, tilde */
  "Auml",    /* 196 - capital A, dieresis or umlaut mark */
  "Aring",   /* 197 - capital A, ring */
  "AElig",   /* 198 - capital AE diphthong (ligature) */
  "Ccedil",  /* 199 - capital C, cedilla */
  "Egrave",  /* 200 - capital E, grave accent */
  "Eacute",  /* 201 - capital E, acute accent */
  "Ecirc",   /* 202 - capital E, circumflex accent */
  "Euml",    /* 203 - capital E, dieresis or umlaut mark */
  "Igrave",  /* 204 - capital I, grave accent */
  "Iacute",  /* 205 - capital I, acute accent */
  "Icirc",   /* 206 - capital I, circumflex accent */
  "Iuml",    /* 207 - capital I, dieresis or umlaut mark */
  "ETH",     /* 208 - capital Eth, Icelandic */
  "Ntilde",  /* 209 - capital N, tilde */
  "Ograve",  /* 210 - capital O, grave accent */
  "Oacute",  /* 211 - capital O, acute accent */
  "Ocirc",   /* 212 - capital O, circumflex accent */
  "Otilde",  /* 213 - capital O, tilde */
  "Ouml",    /* 214 - capital O, dieresis or umlaut mark */
  "times",   /* 215 - multiply sign */
  "Oslash",  /* 216 - capital O, slash */
  "Ugrave",  /* 217 - capital U, grave accent */
  "Uacute",  /* 218 - capital U, acute accent */
  "Ucirc",   /* 219 - capital U, circumflex accent */
  "Uuml",    /* 220 - capital U, dieresis or umlaut mark */
  "Yacute",  /* 221 - capital Y, acute accent */
  "THORN",   /* 222 - capital THORN, Icelandic */
  "szlig",   /* 223 - small sharp s, German (sz ligature) */
  "agrave",  /* 224 - small a, grave accent */
  "aacute",  /* 225 - small a, acute accent */
  "acirc",   /* 226 - small a, circumflex accent */
  "atilde",  /* 227 - small a, tilde */
  "auml",    /* 228 - small a, dieresis or umlaut mark */
  "aring",   /* 229 - small a, ring */
  "aelig",   /* 230 - small ae diphthong (ligature) */
  "ccedil",  /* 231 - small c, cedilla */
  "egrave",  /* 232 - small e, grave accent */
  "eacute",  /* 233 - small e, acute accent */
  "ecirc",   /* 234 - small e, circumflex accent */
  "euml",    /* 235 - small e, dieresis or umlaut mark */
  "igrave",  /* 236 - small i, grave accent */
  "iacute",  /* 237 - small i, acute accent */
  "icirc",   /* 238 - small i, circumflex accent */
  "iuml",    /* 239 - small i, dieresis or umlaut mark */
  "eth",     /* 240 - small eth, Icelandic */
  "ntilde",  /* 241 - small n, tilde */
  "ograve",  /* 242 - small o, grave accent */
  "oacute",  /* 243 - small o, acute accent */
  "ocirc",   /* 244 - small o, circumflex accent */
  "otilde",  /* 245 - small o, tilde */
  "ouml",    /* 246 - small o, dieresis or umlaut mark */
  "divide",  /* 247 - divide sign */
  "oslash",  /* 248 - small o, slash */
  "ugrave",  /* 249 - small u, grave accent */
  "uacute",  /* 250 - small u, acute accent */
  "ucirc",   /* 251 - small u, circumflex accent */
  "uuml",    /* 252 - small u, dieresis or umlaut mark */
  "yacute",  /* 253 - small y, acute accent */
  "thorn",   /* 254 - small thorn, Icelandic */
  "yuml",    /* 255 - small y, dieresis or umlaut mark */
};

/*
 * main()
 */

int  main (int argc, char *argv[])
{
  int c,
      retval = EXIT_OK;

  progname = argv[0];

  while (1)
  {
    int option_index = 0;
    static struct option long_options[] =
    {
      {    "all", 0, 0, 'a'},
      {"convert", 0, 0, 'c'},
      {  "debug", 0, 0,   0},
      { "errors", 0, 0, 'e'},
      {   "help", 0, 0, 'h'},
      {   "html", 0, 0, 'H'},
#ifdef C_LICENSE
      {"license", 0, 0,   0},
#endif
      {"numeric", 0, 0, 'n'},
      {"num-all", 0, 0, 'N'},
      {  "owner", 1, 0, 'O'},
#ifdef C_PGPKEY
      { "pgpkey", 0, 0,   0},
#endif
      {  "quiet", 0, 0, 'q'},
      { "to-iso", 0, 0, 't'},
      {  "title", 1, 0, 'T'},
      {"verbose", 0, 0, 'v'},
      {"version", 0, 0, 'V'},
      {        0, 0, 0,   0}
    };

    /*
     * long_options:
     *
     * 1. const char  *name;
     * 2. int         has_arg;
     * 3. int         *flag;
     * 4. int         val;
     *
     */

    c = getopt_long (argc, argv, "acehHnNO:qtT:vV",
                     long_options, &option_index);

    if (c == -1)
      break;

    switch (c)
    {
      case 0 :
        if (!strcmp(long_options[option_index].name, "debug"))
          debug = 1;
#ifdef C_PGPKEY
        else
        if (!strcmp(long_options[option_index].name, "pgpkey"))
        {
          fprintf(stdout,
           "-----BEGIN PGP PUBLIC KEY BLOCK-----\n"
           "Version: 2.6.3i\n"
           "Comment: Public key for " AUTHOR
           "\n"
           "mQCNAi3/mGcAAAEEAKUYYYgy/SXZ+Q+TQAmfaLPaRxB+MCBIY/MmACblmCLO1QQV\n"
           "61k/ANWdqHtJyOd/QSqLip6pk/s8mgUn2j35q8XeDhfih8FluGgLw11lhZcCHlls\n"
           "3BMuoP1Msrm7kf0tNLIoXYu2uyunFhmoG+vSEcXfhDNquzsqFQOm97MzWXiNAAUR\n"
           "tCdPeXZpbmQgQS4gU29saGVpbSA8c3VubnkyNTZAYmlnYmx1ZS5ubz6JARUCBRAy\n"
           "7Hi7rYRmx56SSB0BAYBiCACJS56vnOnOWcp7BM9Io0nGZdhOU3UiKPKtjADvRvau\n"
           "SlQ4QoUcbh+Dv+KGkR2rhu9mJS5tP1dUmmdUh44EBdLoB5IrnM2//MfNh5uh1s76\n"
           "r0vb2VdSNkCCokkedzL6s0244/2viH63tlQxwC77cK5dwZJcol+cNtkmQwmSFRrE\n"
           "u+QdU5E1/8RtRKVMrI54ZjJPqDVgXuZk+OE5TO8PAkksdDm5RVLbuBvXB11qhxVU\n"
           "cFAl8DeRV/XgZ394M8Jt1ylc/AHI28xKzvwEjRQIcPpItQ4Ff/BDpBAsK3zttGwY\n"
           "xQYzFw9OrQ5CdoMkj8PPm9SCbsw9QQxckhq8JOVtqLVgiQCVAgUQMuu/RwOm97Mz\n"
           "WXiNAQGUTAP+O7b8ONozN2J+SPncnS66HJUlmWezF60ja9GF8MibXCb397qcPpQ7\n"
           "qFmZsPslDIL3mRNBNVvMvEKYBBKha6pPHZo5yRkiFaRFt7HZEZK3FM1WCgfnSbsf\n"
           "jbXOhy1LdWMWZGQDinisXheIF0lvqeWWM3e5RDxroJcSvdhPPcOomjQ=\n"
           "=HLF4\n"
           "-----END PGP PUBLIC KEY BLOCK-----\n"
          );
          retval = EXIT_OK;
          goto endfunc;
        }
#endif /* ifdef C_PGPKEY */

#ifdef C_LICENSE
        else
        if (!strcmp(long_options[option_index].name, "license"))
        {
          fprintf(stdout,
           "(C)opyright " RELEASE_DATE " " AUTHOR "\n"
           "This program is free software; you can redistribute it and/or modify\n"
           "it under the terms of the GNU General Public License as published by\n"
           "the Free Software Foundation; either version 2 of the License, or\n"
           "(at your option) any later version.\n"
           "\n"
           "This program is distributed in the hope that it will be useful,\n"
           "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
           "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
           "GNU General Public License for more details.\n"
           "\n"
           "You should have received a copy of the GNU General Public License\n"
           "along with this program; if not, write to the Free Software\n"
           "Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n"
          );
          return(EXIT_OK);
        }
#endif /* ifdef C_LICENSE */

#if 0
        printf("option %s", long_options[option_index].name);
        if (optarg)
          printf (" with arg %s", optarg);
         printf ("\n");
#endif /* if 0 */
        break;

      case 'a' :
        use_all = TRUE;
        break;

      case 'c' :
        replace_file = TRUE;
        break;

      case 'e' :
        all_errors = TRUE;
        break;

      case 'h' :
        usage(EXIT_OK);
        break;

      case 'H' :
        mkhtml = TRUE;
        break;

      case 'n' :
        numeric = TRUE;
        break;

      case 'N' :
        allnum = numeric = TRUE;
        break;

      case 'O' :
        docowner = optarg;
        break;

      case 'q' :
        quiet = TRUE;
        break;

      case 't' :
        to_iso = TRUE;
        break;

      case 'T' :
        doctitle = optarg;
        break;

      case 'v' :
        verbose = TRUE;
        break;

      case 'V' :
        print_version();
        return(EXIT_OK);

      case '?' :
        usage(EXIT_ERROR);
        break;

      default :
          debpr1("main(): getopt_long() returned character code %d\n", c);
        break;
    }
  }

  if (debug)
  {
    debpr1("debugging is set to level %d\n", debug);

    if (replace_file)
      debpr0("Convert mode on (--convert)\n");
  }

  if (debug && optind < argc)
  {
    int t;

    debpr0("non-option args: ");
    for (t = optind; t < argc; t++)
      fprintf(stddebug, "%s ", argv[t]);

    fprintf(stddebug, "\n");
  }

  if (optind < argc)
  {
    int  t;

    for (t = optind; t < argc; t++)
      retval |= process_file(argv[t]);
  }
  else
    retval |= process_file("-");

#if 0
  if (docowner)
    free(docowner);
#endif

endfunc:
;
  debpr1("Returning from main() with value %d\n", retval);
  return(retval);
} /* main() */


int  process_file(const char *fname)
{
  FILE          *fp = NULL,
                *tofp = NULL;
  int           c,
                retval = NOT_FOUND;
  unsigned int  linecount = 1;
  char          *tmpfilename = NULL;
  bool          done_mkhtml = mkhtml ? FALSE : TRUE,
                wrn_amp_outside = quiet ? TRUE : FALSE, /* Amp outside entity */
                wrn_inv_num = quiet ? TRUE : FALSE,     /* Invalid numeric entity */
                wrn_unkn_ent = quiet ? TRUE : FALSE,    /* Unknown entity */
                wrn_unconv = quiet ? TRUE : FALSE,      /* Unconvertable character found */
                wrn_nprtnum = quiet ? TRUE : FALSE,     /* Non-printable value in numeric entity */
                wrn_no_num = quiet ? TRUE : FALSE;      /* Numeric not allowed */

  const char *filename = !strcmp(fname, "-") ? "(stdin)" : fname;

  debpr1("Entering process_file(), filename = \"%s\"\n", filename);

  if (!strcmp(fname, "-"))
  {
    fp = stdin;

#ifdef DOS
    if (setmode(fileno(stdin), O_BINARY) == -1)
      fprintf(stderr, "%s: warning: Unable to set stdin to binary mode. Output may be corrupted.\n", progname);
#endif

    if (replace_file)
    {
      fprintf(stderr, "%s: warning: Using stdin, ignoring --convert option\n", progname);
      replace_file = FALSE;
    }
  }
  else
  {
    if ((fp = fopen(filename, FOPEN_READ)) == NULL)
    {
      myerror(filename);
      retval = FILE_ERROR;
      goto endfunc;
    }
  }

  if (verbose)
  {
    fprintf(
     replace_file ? stdout : stderr, /* stdout if converting, otherwise stderr */
     "%s%s\n",
     replace_file ? "Converting " : "",
     filename
    );
  }

  if (replace_file)
  {
    tmpfilename = tmpnam(NULL);

    debpr1("Creating tmpfile \"%s\"...", tmpfilename);

    if ((tofp = fopen(tmpfilename, FOPEN_WRITE)) == NULL)
    {
      if (debug)
        fprintf(stddebug, "ERROR\n");

      myerror(tmpfilename);
      retval = FILE_ERROR;
      goto endfunc;
    }

    if (debug)
      fprintf(stddebug, "OK\n");
  }
  else
    tofp = stdout;

  if (!to_iso)
  {
    /*
     * Convert from ISO Latin 1 to HTML entities. This is the default unless
     * -t (--to-iso) is specified.
     */

    if (!done_mkhtml)
    {
      printheader(tofp);
      done_mkhtml = TRUE;
    }

    while (c = fgetc(fp), !feof(fp))
    {
      if (ferror(fp))
      {
        myerror(filename);
        retval |= FILE_ERROR;
        goto endfunc;
      }

      if (in_range(c, 160, 255))
      {
        if (!numeric)
          fprintf(tofp, "&%s;", ent_iso[c-160]);
        else
          fprintf(tofp, "&#%u;", c);
        retval |= WAS_FOUND;
      }
      else
      {
        if (!is_htmlchar(c))
        {
          if (!wrn_unconv)
          {
            fprintf(stderr,
             "%s: %s: line %u: Unconvertable character (%d) found%s\n",
             progname, filename, linecount, c, allnum ? ", but -N is specified" : ""
            );

            if (!all_errors)
              wrn_unconv = TRUE;
          }

#ifdef C_CONVCTRL /* Allow -N to convert control characters into entities */
          if (allnum)
#else
          if (allnum && c >= ' ')
#endif /* ifdef C_CONVCTRL else */
          {
            fprintf(tofp, "&#%u;", c);
            retval |= WAS_FOUND;
          }
          else
            fputc(c, tofp);
        }
        else
        {
          if (!use_all)
            fputc(c, tofp);
          else
          {
            switch (c)
            {
              case '\"' :
                fprintf(tofp, numeric ? "&#34;" : "&quot;");
                retval |= WAS_FOUND;
                break;

              case '&' :
                fprintf(tofp, numeric ? "&#38;" : "&amp;");
                retval |= WAS_FOUND;
                break;

              case '<' :
                fprintf(tofp, numeric ? "&#60;" : "&lt;");
                retval |= WAS_FOUND;
                break;

              case '>' :
                fprintf(tofp, numeric ? "&#62;" : "&gt;");
                retval |= WAS_FOUND;
                break;

              default :
                fputc(c, tofp);
                break;
            } /* switch (c) */

          } /* if (!use_all) else */

        } /* if (!is_htmlchar(c)) else */

        if (c == '\n')
          linecount++;

      } /* if (in_range(c, 160, 255)) else */

    } /* while (c = fgetc(fp), !feof(fp)) */

  } /* if (!to_iso) */
  else
  {
    /*
     * Convert from HTML entites to ISO Latin 1.
     */

    int count = 0;
    char buf[MAXLEN+2];

    if (!done_mkhtml)
    {
      printheader(tofp); /* Don't see the point of it, but if the user wants to... */
      done_mkhtml = TRUE;
    }

    while (c = fgetc(fp), !feof(fp))
    {
      if (ferror(fp))
      {
        myerror(filename);
        retval = FILE_ERROR;
        goto endfunc;
      }

      if (!wrn_unconv && !is_htmlchar(c))
      {
        fprintf(stderr,
          "%s: %s: line %u: Invalid character (%d) found\n",
          progname, filename, linecount, c
        );

        if (!all_errors)
          wrn_unconv = TRUE;
      }

      if (count)
      {
        /*
         * We're inside an entity now.
         */

        count++;

        if (count-1 < MAXLEN)
        {
          /*
           * Length of entity is still legal.
           */

          if (c == ';')
          {
            /*
             * We found a terminating semicolon.
             */

            buf[count-1] = '\0';

            if (buf[1] == '#')
            {
              if (numeric)
              {
                /*
                 * Seems as this is a numeric entity. Now check if the rest of
                 * contains digits.
                 */

                int   numval;
                bool  illegal = FALSE;

                if (in_range(strlen(buf), 3, 5))
                {
                  int  t;

                  for (t = 2; buf[t]; t++)
                  {
                    if (!isdigit(buf[t]))
                    {
                      illegal = TRUE;
                      break;
                    }
                  }
                }
                else
                  illegal = TRUE; /* Entity too long */

                numval = atoi(buf+2);

                if (!in_range(numval, 0, 255))
                  illegal = TRUE;

                if (illegal)
                {
                  if (!wrn_inv_num)
                  {
                    fprintf(stderr, "%s: %s: line %u: %s;: Invalid numeric entity\n",
                     progname, filename, linecount, buf);

                    if (!all_errors)
                      wrn_inv_num = TRUE;
                  }

                  fwrite(buf, 1, count-1, tofp);
                  fputc(c, tofp);
                }
                else
                {
                  if (!is_htmlchar(numval))
                  {
                    if (!wrn_nprtnum)
                    {
                      fprintf(stderr, "%s: %s: line %u: %s;: Entity has non-printable value%s\n",
                       progname, filename, linecount, buf, allnum ? ", but -N is specified\n" : ""
                      );

                      if (!all_errors)
                        wrn_nprtnum = TRUE;
                    }

                    if (allnum)
                    {
                      fputc(numval, tofp);
                      retval |= WAS_FOUND;
                    }
                    else
                    {
                      fwrite(buf, 1, count-1, tofp);
                      fputc(c, tofp);
                    }
                  }
                  else
                  {
                    fputc(numval, tofp);
                    retval |= WAS_FOUND;
                  }
                }
              } /* if (numeric) */
              else
              {
                if (!wrn_no_num)
                {
                  fprintf(stderr, "%s: %s: line %u: Numeric not allowed, must specify -n option.\n",
                   progname, filename, linecount);

                  if (!all_errors)
                    wrn_no_num = TRUE;
                }

                fwrite(buf, 1, count-1, tofp);
                fputc(c, tofp);
              } /* if (numeric) else */

              count = 0;
            } /* if (buf[1] == '#') */
            else
            {
              register int  t;

              for (t = 0; t < 96; t++)
              {
                /*
                 * Scan through the array of entities for the one which we just
                 * found.
                 */

                if (!strcmp(buf+1, ent_iso[t])) /* If the entity was found... */
                {
                  /*
                   * Found, print out the corresponding character.
                   */

                  fputc(t+160, tofp);
                  retval |= WAS_FOUND;
                  count = 0;
                }
              }

              if (count)
              {
                /*
                 * Everything looked fine, MAXLEN was not reached, there was a
                 * terminating semicolon, but the entity was not found in the
                 * array. Now check if it's a quot, amp, lt, gt or a non-standard
                 * thing.
                 */

                if (use_all & !strcmp(buf+1, "quot"))
                {
                  fputc('\"', tofp);
                  retval |= WAS_FOUND;
                }
                else
                if (use_all & !strcmp(buf+1, "amp"))
                {
                  fputc('&', tofp);
                  retval |= WAS_FOUND;
                }
                else
                if (use_all & !strcmp(buf+1, "lt"))
                {
                  fputc('<', tofp);
                  retval |= WAS_FOUND;
                }
                else
                if (use_all & !strcmp(buf+1, "gt"))
                {
                  fputc('>', tofp);
                  retval |= WAS_FOUND;
                }
                else /* And now for the non-standard shit... */
                if (!strcmp(buf+1, "brkbar")) /* brvbar */
                {
                  fputc(166 & 255, tofp);
                  retval |= WAS_FOUND;
                }
                else
                if (!strcmp(buf+1, "die")) /* uml */
                {
                  fputc(168 & 255, tofp);
                  retval |= WAS_FOUND;
                }
                else
                if (!strcmp(buf+1, "hibar")) /* macr */
                {
                  fputc(175 & 255, tofp);
                  retval |= WAS_FOUND;
                }
                else
                if (!strcmp(buf+1, "half")) /* frac12 */
                {
                  fputc(189 & 255, tofp);
                  retval |= WAS_FOUND;
                }
                else
                if (!strcmp(buf+1, "angst")) /* Aring */
                {
                  fputc(197 & 255, tofp);
                  retval |= WAS_FOUND;
                }
                else
                if (!strcmp(buf+1, "Dstrok")) /* ETH */
                {
                  fputc(208 & 255, tofp);
                  retval |= WAS_FOUND;
                }
                else
                {
                  /*
                   * Now it's definately unknown.
                   */

                  if (
                       !wrn_unkn_ent &&
                       !(
                         !strcmp(buf+1, "quot") || /* Necessary because of use_all */
                         !strcmp(buf+1, "amp")  ||
                         !strcmp(buf+1, "lt")   ||
                         !strcmp(buf+1, "gt")
                       )
                     )
                  {
                    fprintf(stderr, "%s: %s: line %u: %s;: Unknown entity\n",
                     progname, filename, linecount, buf);

                    if (!all_errors)
                      wrn_unkn_ent = TRUE;
                  }

                  fwrite(buf, 1, count-1, tofp);
                  fputc(c, tofp);
                }
                count = 0;
              } /* if (count) */

            } /* if (numeric && buf[1] == '#') else */

          } /* if (c == ';') */
          else
          if (isalpha(c) || isdigit(c) || c == '#')
          {
            /*
             * The character found was not a semicolon, but a legal entity
             * character.
             */

            buf[count-1] = c;
          }
          else
          {
            /*
             * The character was not a semicolon, and it wasn't legal.
             * There was an invalid character in the entity, or it lacks a
             * terminating semicolon.
             */

            if (!wrn_amp_outside)
            {
              fprintf(stderr, "%s: %s: line %u: Ampersand (\'&\') found outside entity\n",
               progname, filename, linecount);

              if (!all_errors)
                wrn_amp_outside = TRUE;
            }
            fwrite(buf, 1, count-1, tofp); /* Illegal char in entity */

            if (c == '&')
            {
              /*
               * Special case if '&', might be a new entity.
               */

              count = 1;
              buf[0] = c;
            }
            else
            {
              /*
               * We have printed the contents of buf, also print out the last
               * character read.
               */

              fputc(c, tofp);
              count = 0;
            }
          }
        } /* if (count < MAXLEN) */
        else
        {
          /*
           * The entity was too long, print out buffer.
           */

          if (!wrn_amp_outside)
          {
            fprintf(stderr, "%s: %s: line %u: Ampersand (\'&\') found outside entity\n",
             progname, filename, linecount);

            if (!all_errors)
              wrn_amp_outside = TRUE;
          }

          fwrite(buf, 1, count-1, tofp);
          fputc(c, tofp);
          count = 0;
        }
      } /* if (count) */
      else
      {
        /*
         * Haven't found any entities yet.
         */

        if (c == '&')
        {
          /*
           * Found an ampersand, seems as the beginning of an entity.
           */

          count = 1;
          buf[0] = c;
        }
        else
          fputc(c, tofp); /* Just plain text found */
      } /* if (count) else */

      if (c == '\n')
        linecount++;
    } /* while (c = fgetc(fp), !feof(fp)) */

    if (count)
    {
      /*
       * There was an unterminated entity at EOF, we'll have to print it out.
       */

      fwrite(buf, 1, count, tofp);
      count = 0;
    }
  } /* if (!to_iso) else */

  if (mkhtml)
  {
    fprintf(tofp,
     "</pre>\n"
     "\n"
     "</body>\n"
     "\n"
     "</html>\n"
    );
  }

  if (fp != stdin)
    fclose(fp);

  if (tofp != stdout)
    fclose(tofp);

  if (replace_file)
  {
    if (retval & WAS_FOUND)
    {
      debpr1("Removing \"%s\"...", filename);

      if (remove(filename))
      {
        if (debug)
          fprintf(stddebug, "ERROR\n");

        myerror(filename);

        if (remove(tmpfilename)) /* Clean up */
          myerror(tmpfilename);

        retval = FILE_ERROR;
        goto endfunc;
      }

      if (debug)
        fprintf(stddebug, "OK\n");

      debpr2("Renaming \"%s\" to \"%s\"...", tmpfilename, filename);

      if (rename(tmpfilename, filename))
      {
        if (debug)
          fprintf(stddebug, "ERROR\n");

        myerror(tmpfilename);
      }

    }

    if (remove(tmpfilename) && errno != ENOENT) /* Clean up */
      myerror(tmpfilename);

    retval = FILE_ERROR;
    goto endfunc;

    if (debug)
      fprintf(stddebug, "OK\n");

  } /* if (replace_file) */

endfunc:
;
  debpr1("Returning from process_file() with value %d.\n", retval);
  return(retval);
} /* process_file() */


void  printheader(FILE *tofp)
{
  fprintf(tofp,
   "<html>\n"
   "\n"
   "<head>\n"
   "\n"
   "  <title>%s</title>\n"
   "\n",
   doctitle ? doctitle : ""
  );

  if (docowner)
    fprintf(tofp, "  <link rev=\"made\" href=\"mailto:%s\">\n\n", docowner);

  fprintf(tofp,
   "</head>\n"
   "\n"
   "<body>\n"
   "\n"
   "<pre>\n"
  );
} /* printheader() */


/*
 * print_version() - Print version information on stdout
 */

void  print_version(void)
{
  fprintf(stdout, "%s ver. %s (%s) - Compiled %s %s\n",
   progname, VERSION, MSG_LANGUAGE, __DATE__, __TIME__);
} /* print_version() */


/*
 * usage() - Prints a help screen
 */

void  usage(int retval)
{
  if (retval != EXIT_OK)
    fprintf(stderr, "\nType \"%s --help\" for help screen. Returning with value %d.\n",
     progname, retval);
  else
  {
    int  t;

    fputc('\n', stdout);
    print_version();
    t = fprintf(stdout, "(C)opyright %s %s", RELEASE_DATE, AUTHOR);
    fputc('\n', stdout);

    for (; t; t--)
      fputc('-', stdout);

    fprintf(stdout,
     "\n"
     "Usage: %s [options] [file ...]\n"
     "\n"
     "Converts between HTML entities and ISO Latin 1 charset. Default action is\n"
     "from ISO Latin 1 to HTML entities.\n"
     "\n"
     "Options:\n"
     "\n"
     "-a,   --all      Also use the &amp; (\'&\'), &lt; (\'<\'), &gt; (\'>\') and\n"
     "                 &quot; (\'\"\') entities. Should only be used on plain text,\n"
     "                 not HTML documents.\n"
     "-c,   --convert  Convert files, i.e. replace the files instead of\n"
     "                 sending to stdout.\n"
     "-e,   --errors   Print all errors, not only the first of each type.\n"
     "-h,   --help     Show this help screen.\n"
     "-H,   --html     Print output as an HTML document\n"
#ifdef C_LICENSE
     "      --license  Print the software license\n"
#endif
     "-n,   --numeric  From HTML to text, allow numeric entities (i.e. \"&#160;\")\n"
     "                 From text to HTML, print numeric entities instead of\n"
     "                 symbolic.\n"
     "-N,   --num-all  Use numeric entities for all characters, also\n"
     "                 non-convertable.\n"
     "-O x, --owner x  Set owner of HTML document to x. Needs -H option.\n"
     "-q,   --quiet    No errors, please.\n"
     "-t,   --to-iso   Convert from HTML entities to ISO Latin 1.\n"
     "-T x, --title x  Set title of HTML document to x. Needs -H option.\n"
     "-v,   --verbose  Verbose operation, print file names while processing.\n"
     "-V,   --version  Print version information on stdout.\n"
     "\n"
     "Undocumented options (May disappear in future versions):\n"
     "\n"
     "      --debug    Print lots of annoying information on %s.\n"
#ifdef C_PGPKEY
     "      --pgpkey   Print the PGP public key of the author on stdout.\n"
#endif
     "\n"
     "Return values:\n"
     "\n"
     "Bit 0 set = Any changes made\n"
     "Bit 1 set = Some file error occured\n"
     "\n",
     progname, stddebug == stdout ? "stdout" : "stderr"
    );
  } /* if (retval != EXIT_OK) else */

  exit(retval);
} /* usage() */

/***** End of file entity.c *****/
