/*
 * Stichwort-Suche für Webseiten
 * Copyright © 2015-2017,2019,2023 Andreas K. Förster <akf@akfoerster.de>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <ctype.h>
#include <stdnoreturn.h>

// bei Änderungen bitte auch diesen Link zum Quelltext anpassen!
#define QUELLTEXT "http://akfoerster.de/p/Stichwort/"
#define LISTE "cgi-data/Stichwortliste"
#define TITEL "Stichwortverzeichnis"
//#define LISTENANZEIGE

#define SUCHMASCHINE "Fireball"
#define SUCHMASKE "https://fireball.com/search?q="


#define TYP "Content-Type: application/xhtml+xml; charset=UTF-8"

#define HTML_ANFANG                                             \
  "<!DOCTYPE html>\n"                                           \
  "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='de'>\n" \
  "<head>\n<title>" TITEL "</title>\n"                          \
  "<meta name='robots' content='noindex, nofollow'/>\n"         \
  "<meta name='viewport' content='width=device-width'/>\n"      \
  "<style>\n"                                                   \
  "@media screen and (prefers-color-scheme:light) "             \
  "{ html{background-color:#F4ECD8; color:#000} }\n"            \
  "@media screen and (prefers-color-scheme:dark) "              \
  "{ html{background-color:#000; color:#F4ECD8} }\n"            \
  "body{max-width:50em;margin:2em auto}\n"                      \
  "h1,p{text-align:center}\n"                                   \
  "a{color:inherit}\n"                                          \
  "</style>\n</head>\n\n<body>\n<h1>" TITEL "</h1>\n\n"

#define HTML_ENDE "\n<hr/><p><a href='" QUELLTEXT               \
  "'>Quelltext</a></p>\n</body></html>\n"

#define FORMULAR \
  "<form accept-charset='UTF-8'><p>\n"                              \
  "<input type='search' name='s' placeholder='Stichwort'"           \
  " required='required'/>\n"                                        \
  "<button name='a' value='aufrufen'>&#x1F517; aufrufen</button>\n" \
  "<button name='a' value='anzeigen'>&#x1F50D; anzeigen</button>\n" \
  "</p></form>\n"

#define PUFFER 1024

#define Code(x)  fputs ((x), stdout)

#pragma GCC poison  printf fprintf sprintf snprintf scanf fscanf

static noreturn void Abfrage (void);
static noreturn void Suche (const char *);
static noreturn void gefunden (const char *, const char *);
static noreturn void nicht_gefunden (const char *);
static noreturn void Serverfehler (void);
static void Suchanfrage (void);
static void Kopfende (void);
static void Listenanzeige (void);
static char *Adressanfang (char *, size_t, const char *);
static char *endbereinigt (char *);
static char *Anfang (const char *);
static void normalisieren (char *, size_t, const char *);
static char *url_dekodieren (char *);
static void urlkodiert (const char *);
static void Text (const char *);


static const char *Methode, *Anfrage;

int
main (void)
{
  Methode = getenv ("REQUEST_METHOD");
  Anfrage = getenv ("QUERY_STRING");

  if (!Methode)
    {
      puts ("Bitte über einen Webserver per CGI aufrufen!");
      exit (EXIT_FAILURE);
    }

  if (Anfrage && *Anfrage)
    Suchanfrage ();

  // Kurz-URL
  const char *s = getenv ("PATH_INFO");
  if (s && *s == '/' && *++s)
    Suche (s);

  Abfrage ();
  return EXIT_SUCCESS;
}


static noreturn void
nicht_gefunden (const char *Suchwort)
{
  puts ("Status: 404 Not Found");
  puts (TYP);
  Kopfende ();

  Code (HTML_ANFANG);

  if (Suchwort && *Suchwort)
    {
      Code ("<p>Tut mir leid, aber zu „");
      Text (Suchwort);
      Code ("“ hab ich nichts gefunden.</p>\n");

#ifdef SUCHMASKE
      Code ("<p><a href='" SUCHMASKE);
      urlkodiert (Suchwort);
      Code ("'>Mit " SUCHMASCHINE " im Web suchen</a></p>\n");
#endif
    }

  Code ("<p>");
  Text (getenv ("SERVER_NAME"));
  Code ("</p>\n\n");

  Code (FORMULAR);
#ifdef LISTENANZEIGE
  Listenanzeige ();
#endif
  Code (HTML_ENDE);

  exit (EXIT_SUCCESS);
}


static noreturn void
gefunden (const char *Suchwort, const char *url)
{
  char Praefix[256];

  if (!Adressanfang (Praefix, sizeof (Praefix), url))
    Serverfehler ();

  if (Anfrage && strstr (Anfrage, "a=anzeigen"))
    puts ("Status: 200 Okay");
  else				// a=aufrufen
    {
      Code ("Status: 303 See Other\nLocation: ");
      Code (Praefix);
      puts (url);
    }

  puts (TYP);
  Kopfende ();

  Code (HTML_ANFANG);
  Code ("<dl>\n<dt>");
  Text (Suchwort);
  Code ("</dt>\n<dd>\n<a rel='noreferrer' href=\n'");
  Code (Praefix);
  Text (url);
  Code ("'\n>");
  Code (Praefix);
  Text (url);
  Code ("</a>\n</dd>\n</dl>\n");
  Code (HTML_ENDE);

  exit (EXIT_SUCCESS);
}


static noreturn void
Abfrage (void)
{
  puts ("Status: 200 Okay");
  puts (TYP);
  Kopfende ();

  Code (HTML_ANFANG);

  Code ("<p>");
  Text (getenv ("SERVER_NAME"));
  Code ("</p>\n\n");

  Code (FORMULAR);

#ifdef LISTENANZEIGE
  Listenanzeige ();
#endif

  Code (HTML_ENDE);

  exit (EXIT_SUCCESS);
}


static noreturn void
Serverfehler (void)
{
  puts ("Status: 500 Internal Server Error");
  puts (TYP);
  Kopfende ();

  Code (HTML_ANFANG);
  Code ("<p>Tut mir leid, aber "
	"es liegt ein Fehler auf dem Server vor.</p>\n");
  Code (HTML_ENDE);

  exit (EXIT_SUCCESS);
}


static void
Kopfende (void)
{
  putchar_unlocked ('\n');

  if (Methode && !strcmp (Methode, "HEAD"))
    exit (EXIT_SUCCESS);
}


static void
Listenanzeige (void)
{
  FILE *d;
  char Zeile[PUFFER];

  d = fopen (LISTE, "r");
  if (!d)
    return;
  // hier nicht Serverfehler() aufrufen, da bereits etwas ausgegeben wurde.

  Code ("<h2>Stichworte</h2>\n\n<div>\n");

  size_t n = 0;
  while (fgets (Zeile, sizeof (Zeile), d))
    {
      char *z = Anfang (Zeile);
      if (!*z || *z == '#')
	continue;

      endbereinigt (z);

      if (*z)
	{
	  if (n++)
	    puts (";");

	  Text (Anfang (strpbrk (z, " \t")));
	}
    }

  fclose (d);
  Code ("\n</div>\n");
}



static noreturn void
Suche (const char *Suchwort)
{
  FILE *d;
  char Normalwort[256], Zeile[PUFFER];

  normalisieren (Normalwort, sizeof (Normalwort), Suchwort);

  d = fopen (LISTE, "r");
  if (!d)
    Serverfehler ();

  while (fgets (Zeile, sizeof (Zeile), d))
    {
      char *z, *url, *Stichwort, *sz;

      z = Anfang (Zeile);
      if (!*z || *z == '#')
	continue;

      url = strtok_r (z, " \t\r\n", &sz);

      while ((Stichwort = strtok_r (NULL, ",", &sz)))
	if (!strcasecmp (endbereinigt (Anfang (Stichwort)), Normalwort))
	  {
	    fclose (d);
	    gefunden (Suchwort, url);
	  }
    }

  fclose (d);
  nicht_gefunden (Suchwort);
}


static void
Suchanfrage (void)
{
  char *s;

  s = strstr (Anfrage, "s=");
  if (!s)
    return;

  s += 2;
  size_t l = strcspn (s, "&;");
  if (l == 0)
    return;

  char Wort[l + 1];
  memcpy (Wort, s, l);
  Wort[l] = '\0';

  Suche (endbereinigt (url_dekodieren (Wort)));
}


// gibt den Adressanfang aus, zB. „http:“ oder „http://host:81“
static char *
Adressanfang (char *s, size_t Groesse, const char *url)
{
  size_t l;

  if (Groesse < 10)
    return NULL;

  *s = '\0';
  l = 0;

  // eine unvollständige URL muss mit / oder // beginnen
  if (url[0] != '/')
    return s;

  char *https = getenv ("HTTPS");

  if (https && *https)
    {
      memcpy (s, "https:", 7);
      l = 6;
    }
  else
    {
      memcpy (s, "http:", 6);
      l = 5;
    }

  // Protocol-relative URL? (PURL)
  if (url[1] == '/')
    return s;

  const char *Server, *Port;

  Server = getenv ("SERVER_NAME");
  Port = getenv ("SERVER_PORT");

  memcpy (s + l, "//", 3);
  l += 2;

  if (Server)
    {
      size_t sl = strlen (Server);
      if (Groesse <= l + sl)
	return NULL;

      memcpy (s + l, Server, sl + 1);
      l += sl;
    }

  if (Port && strcmp (Port, "80") && strcmp (Port, "443"))
    {
      size_t pl = strlen (Port);
      if (Groesse <= l + pl + 1)
	return NULL;

      s[l++] = ':';
      memcpy (s + l, Port, pl + 1);
      l += pl;
    }

  return s;
}


static char *
endbereinigt (char *z)
{
  if (z && *z)
    {
      char *e = z + strlen (z) - 1;
      while (e >= z && isspace (*e))
	*e-- = '\0';
    }

  return z;
}


static char *
Anfang (const char *z)
{
  if (z)
    while (*z && isspace (*z))
      ++z;

  return (char *) z;
}


/*
Transkribiert deutsche Umlaute von UTF-8,
Esperanto von UTF-8 ins x-System,
und ersetzt Unterstriche durch Leerzeichen.
*/
static void
normalisieren (char *Puffer, size_t Groesse, const char *Eingabe)
{
  size_t l;
  const signed char *e;
  char *a, v;

  if (!Eingabe)
    {
      *Puffer = '\0';
      return;
    }

  v = 0;
  a = Puffer;
  e = (const signed char *) Anfang (Eingabe);

  // wirklich genug Platz lassen
  Groesse -= 3;

  for (l = 0; *e && l < Groesse; ++e)
    {
      if (*e == ' ' || *e == '_' || *e == '\t')
	{
	  if (v != ' ')
	    {
	      *a++ = v = ' ';
	      ++l;
	    }
	}
      else if (*e > ' ')	// druckbares ASCII
	{
	  *a++ = v = (char) *e;
	  ++l;
	}
      else if (!memcmp (e, "ä", 2) || !memcmp (e, "Ä", 2))
	{
	  *a++ = 'a';
	  *a++ = v = 'e';
	  ++e;
	  l += 2;
	}
      else if (!memcmp (e, "ö", 2) || !memcmp (e, "Ö", 2))
	{
	  *a++ = 'o';
	  *a++ = v = 'e';
	  ++e;
	  l += 2;
	}
      else if (!memcmp (e, "ü", 2) || !memcmp (e, "Ü", 2))
	{
	  *a++ = 'u';
	  *a++ = v = 'e';
	  ++e;
	  l += 2;
	}
      else if (!memcmp (e, "ß", 2))
	{
	  *a++ = 's';
	  *a++ = v = 's';
	  ++e;
	  l += 2;
	}
      else if (!memcmp (e, "ẞ", 3))
	{
	  *a++ = 'S';
	  *a++ = v = 'S';
	  e += 2;
	  l += 2;
	}
      else if (!memcmp (e, "Ĉ", 2) || !memcmp (e, "ĉ", 2))
	{
	  *a++ = 'c';
	  *a++ = v = 'x';
	  e += 2;
	  l += 2;
	}
      else if (!memcmp (e, "Ĉ", 2) || !memcmp (e, "ĉ", 2))
	{
	  *a++ = 'c';
	  *a++ = v = 'x';
	  e += 2;
	  l += 2;
	}
      else if (!memcmp (e, "Ĝ", 2) || !memcmp (e, "ĝ", 2))
	{
	  *a++ = 'g';
	  *a++ = v = 'x';
	  e += 2;
	  l += 2;
	}
      else if (!memcmp (e, "Ĥ", 2) || !memcmp (e, "ĥ", 2))
	{
	  *a++ = 'h';
	  *a++ = v = 'x';
	  e += 2;
	  l += 2;
	}
      else if (!memcmp (e, "Ĵ", 2) || !memcmp (e, "ĵ", 2))
	{
	  *a++ = 'j';
	  *a++ = v = 'x';
	  e += 2;
	  l += 2;
	}
      else if (!memcmp (e, "Ŝ", 2) || !memcmp (e, "ŝ", 2))
	{
	  *a++ = 's';
	  *a++ = v = 'x';
	  e += 2;
	  l += 2;
	}
      else if (!memcmp (e, "Ŭ", 2) || !memcmp (e, "ŭ", 2))
	{
	  *a++ = 'u';
	  *a++ = v = 'x';
	  e += 2;
	  l += 2;
	}
      else if (*e < 0)		// sonstiges nicht-ASCII
	{
	  *a++ = v = (char) *e;
	  ++l;
	}
    }

  *a = '\0';
}


/*
 * Folgende Funktionen wurden für akfnetz geschrieben.
 */

// gibt Wert von hexadezimaler Ziffer aus (ungeprüft)
static inline unsigned char
Hexwert (char c)
{
  return (c >= 'A') ? (c & 0xDF) - ('A' - 10) : c - '0';
}


static char *
url_dekodieren (char *s)
{
  char *q, *z;
  // q: Quelle, z: Ziel

  for (q = z = s; *q; ++q, ++z)
    {
      if ('+' == *q)
	*z = ' ';
      else if ('%' == *q)
	{
	  *z = (char) ((Hexwert (q[1]) << 4) | Hexwert (q[2]));
	  q += 2;
	}
      else if (z != q)
	*z = *q;
    }

  *z = '\0';

  return s;
}


static inline int
Hexziffer (unsigned char c)
{
  return ((c < 10) ? '0' + c : ('A' - 10) + c);
}


static void
urlkodiert (const char *s)
{
  if (!s)
    return;

  flockfile (stdout);

  for (; *s; ++s)
    {
      if (isalnum (*s) || strchr ("-_.~()", *s))
	putchar_unlocked (*s);
      else
	{
	  register unsigned char c;
	  c = (unsigned char) *s;

	  putchar_unlocked ('%');
	  putchar_unlocked (Hexziffer (c >> 4));
	  putchar_unlocked (Hexziffer (c & 0x0F));
	}
    }

  funlockfile (stdout);
}


static void
Text (const char *s)
{
  if (!s)
    return;

  flockfile (stdout);

  for (; *s; ++s)
    switch (*s)
      {
      case '<':
	fputs ("&#60;", stdout);
	break;

      case '>':
	fputs ("&#62;", stdout);
	break;

      case '&':
	fputs ("&#38;", stdout);
	break;

      case '\'':
	fputs ("&#39;", stdout);
	break;

      case '"':
	fputs ("&#34;", stdout);
	break;

      case '\t':
      case '\n':
      case '\r':
	putchar_unlocked (*s);
	break;

      case '\x7F':
	break;

      default:
	if ((unsigned char) *s >= ' ')
	  putchar_unlocked (*s);
	break;
      }

  funlockfile (stdout);
}
