Check-in by ben on 2025-04-12 20:46:41

  Add server-side cache using SQLite, disabled by default in
  config.m4. Cache accounts, item details, lists, and searches. 
  Add option for non-standard features from gawk and mawk,
  disabled by default in config.m4  Bump to version 12.

  INSERTED    DELETED
         5          1 config.m4
         3          0 readme.txt
        35         22 src/account/index.dcgi.m4
        59          0 src/cache.awk
         4          0 src/config.awk
        47         35 src/details/index.dcgi.m4
        24         14 src/list/index.dcgi.m4
        19          7 src/lists/index.dcgi.m4
        30         18 src/search/index.dcgi.m4
        22          0 src/sqlite.awk
        20         12 src/util.awk
         8          6 src/web.awk
       276        115 TOTAL over 12 changed files

Index: config.m4
==================================================================
--- config.m4
+++ config.m4
@@ -1,19 +1,23 @@
 dnl Set configuration variables
 dnl
-define(__PHAROS_VERSION__, 11)dnl
+define(__PHAROS_VERSION__, 12)dnl
 dnl
 define(__API_ENDPOINT__, http://archive.org)dnl
 define(__API_SSL_ENDPOINT__, https://archive.org)dnl
+define(__AWK_EXT__, 0)dnl
 define(__AGENT__, Lynx/2.9.0dev.10 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/1.1.1w)dnl
+define(__CACHE_DB__, /home/user/pharos/db/cache.dat)dnl
+define(__CACHE_ENABLED__, 0)dnl
 define(__CGIPATH__, /~user/pharos)dnl
 define(__CMD_AWK__, /usr/bin/awk)dnl
 define(__CMD_CURL__, /usr/bin/curl)dnl
 define(__CMD_ENV__, /usr/bin/env)dnl
 define(__CMD_JSON2TSV__, /usr/local/bin/json2tsv)dnl
 define(__CMD_MKTEMP__, /bin/mktemp)dnl
 define(__CMD_RM__, rm)dnl
+define(__CMD_SQLITE__, /usr/bin/sqlite3)dnl
 define(__CMD_STRINGS__, /bin/busybox strings)dnl
 define(__CMD_WEBDUMP__, /usr/local/bin/webdump)dnl
 define(__CMD_XARGS__, /usr/bin/xargs)dnl
 define(__GEOMYIDAE_VERSION__, 0.96)dnl
 define(__MAX_BIN_SIZE__, 10)dnl

Index: readme.txt
==================================================================
--- readme.txt
+++ readme.txt
@@ -31,10 +31,13 @@
   * webdump <gopher://codemadness.org/1/phlog/webdump/>
 
 Configuration
 =============
 To set configuration variables, edit config.m4
+
+AWK_EXT allows use of non-standard features in gawk and mawk
+CACHE_ENABLED caches content in sqlite to reduce API calls
 
 Installation
 ============
 Installation depends on m4.
 

Index: src/account/index.dcgi.m4
==================================================================
--- src/account/index.dcgi.m4
+++ src/account/index.dcgi.m4
@@ -5,20 +5,31 @@
 #
 # Show details for an account
 
 include(src/config.awk)
 incl(src/api.awk)
+incl(src/cache.awk)
 incl(src/cgi.awk)
+incl(src/sqlite.awk)
 incl(src/util.awk)
 incl(src/web.awk)
 
 function main(     acct, cmd, col, cols, descr, dir, email, iaout, id,
-    item_server, item_size, thumb, title, type, url)
+    item_server, item_size, output, signature, str, thumb, title,
+    type, url)
 {
     acct = parts[3]
     email = search
 
+    signature = sprintf("account/%s/%s", acct, email)
+    str = cache_init(signature)
+    if (length(str) > 0) {
+        print str
+        return
+    }
+
+    output = cache_begin()
     iaout = gettemp()
 
     url = api_endpoint "/metadata/" acct
     api_request(url, "GET", iaout)
     
@@ -63,44 +74,46 @@
         }
     }
     close(cmd)
 
     if (length(id) == 0) {
-        print_not_found(url)
+        print_not_found(output, url)
+        cache_end()
         unlink(iaout)
         return
     }
 
-    print "Account: " acct
+    print "Account: " acct >>output
     if (length(thumb) > 0) {
         url = sprintf("http://%s%s/%s", item_server, dir, thumb)
         printf "[I|Thumbnail|%s/raw/%%09%s|%s|%s]\n", cgipath, url,
-            server, port
+            server, port >>output
     }
-    print_html(descr)
+    print_html(output, descr)
 
     if (length(email) > 0) {
         printf "[1|Uploads|%s/search/%%09uploader:%s|%s|%s]\n",
-            cgipath, email, server, port
+            cgipath, email, server, port >>output
     }
     printf "[1|Items|%s/search/%%09anyfield:%s|%s|%s]\n", cgipath,
-        acct, server, port
-    printf "[1|Lists|%s/lists/%%09%s|%s|%s]\n", cgipath, acct,
-        server, port
-
-    print ""
-    printf "%-20s %s\n", "Identifier:", id
-    if (item_size > 0) {
-        printf "%-20s %d\n", "Item Size:", item_size
-    }
-    printf "%-20s %s\n", "Media Type:", type
-    
-    print ""
-    printf "[h|Account web page|URL:%s/details/%s|%s|%s]\n",
-        api_ssl_endpoint, uri_encode(id), server, port
-    printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port
-
+        acct, server, port >>output
+    printf "[1|Lists|%s/lists/%%09%s|%s|%s]\n", cgipath, acct,
+        server, port >>output
+
+    print "" >>output
+    printf "%-20s %s\n", "Identifier:", id >>output
+    if (item_size > 0) {
+        printf "%-20s %d\n", "Item Size:", item_size >>output
+    }
+    printf "%-20s %s\n", "Media Type:", type >>output
+    
+    print "" >>output
+    printf "[h|Account web page|URL:%s/details/%s|%s|%s]\n",
+        api_ssl_endpoint, uri_encode(id), server, port >>output
+    printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port >>output
+
+    cache_end()
     unlink(iaout)
     exit 0
 }
 
 BEGIN {

ADDED   src/cache.awk
Index: src/cache.awk
==================================================================
--- /dev/null
+++ src/cache.awk
@@ -0,0 +1,59 @@
+# cache or memo-ize to reduce Internet Archive API calls
+
+function cache_begin() {
+    if (cache_enabled || !awk_ext) {
+        cache_output = gettemp()
+    } else {
+        # awk_ext micro-optimization for gawk and mawk:
+        # when cache is disabled, print directly to stdout
+        cache_output = "/dev/stdout"
+    }
+    return retval
+}
+
+function cache_end(    value) {
+    if (cache_enabled || !awk_ext) {
+        close(cache_output)
+        value = read_file(cache_output)
+        print value
+        cache_set_value(value)
+        unlink(cache_output)
+    }
+    return
+}
+
+function cache_init(signature,    retval, sql) {
+    if (!cache_enabled) {
+        return
+    }
+    cache_signature = signature
+    sql = "CREATE TABLE IF NOT EXISTS cache ( \
+        id TEXT PRIMARY KEY,                  \
+        epoch INTEGER,                        \
+        result TEXT)"
+    sqlite_exec(cache_db, sql)
+    retval = cache_get_value()
+    return retval
+}
+
+function cache_get_value(    retval, sql) {
+    if (!cache_enabled) {
+        return retval
+    }
+    sql = sprintf("SELECT result FROM cache WHERE id = '%s'",
+        sqlite_escape(cache_signature))
+    retval = sqlite_exec(cache_db, sql)
+    return retval
+}
+
+function cache_set_value(value,    sql) {
+    if (!cache_enabled) {
+        return
+    }
+    sql = sprintf("REPLACE INTO cache(id, epoch, result) \
+        VALUES('%s', unixepoch(), '%s')",
+        sqlite_escape(cache_signature),
+        sqlite_escape(value))
+    sqlite_exec(cache_db, sql)
+    return
+}

Index: src/config.awk
==================================================================
--- src/config.awk
+++ src/config.awk
@@ -1,15 +1,19 @@
 function config_init() {
     agent = "__AGENT__"
     api_endpoint = "__API_ENDPOINT__"
     api_ssl_endpoint = "__API_SSL_ENDPOINT__"
+    awk_ext = __AWK_EXT__
+    cache_db = "__CACHE_DB__"
+    cache_enabled = __CACHE_ENABLED__
     cgipath = "__CGIPATH__"
     cmd_curl = "__CMD_CURL__"
     cmd_enc = "__CMD_ENV__"
     cmd_json2tsv = "__CMD_JSON2TSV__"
     cmd_mktemp = "__CMD_MKTEMP__"
     cmd_rm = "__CMD_RM__"
+    cmd_sqlite = "__CMD_SQLITE__"
     cmd_strings = "__CMD_STRINGS__"
     cmd_webdump = "__CMD_WEBDUMP__"
     cmd_xargs = "__CMD_XARGS__"
     geomyidae_version = __GEOMYIDAE_VERSION__
     max_bin_size = __MAX_BIN_SIZE__

Index: src/details/index.dcgi.m4
==================================================================
--- src/details/index.dcgi.m4
+++ src/details/index.dcgi.m4
@@ -5,22 +5,32 @@
 #
 # Show details for an item
 
 include(src/config.awk)
 incl(src/api.awk)
+incl(src/cache.awk)
 incl(src/cgi.awk)
+incl(src/sqlite.awk)
 incl(src/util.awk)
 incl(src/web.awk)
 
-function main(     add_date, col, cols, cmd, creator, descr, dir, i, \
-    iaout, id, item_id, item_server, item_size, label, language,     \
-    license, pub_date, scanner, thumb, title, topic, topics, type,   \
-    uploader_account, uploader_email, url)
+function main(     add_date, col, cols, cmd, creator, descr, dir, i,
+    iaout, id, item_id, item_server, item_size, label, language,
+    license, output, pub_date, scanner, signature, str, thumb, title,
+    topic, topics, type, uploader_account, uploader_email, url)
 {
     item_id = parts[3]
+
+    signature = "details/" item_id
+    str = cache_init(signature)
+    if (length(str) > 0) {
+        print str
+        return
+    }
 
     iaout = gettemp()
+    output = cache_begin()
 
     url = api_endpoint "/metadata/" item_id
     api_request(url, "GET", iaout)
     
     # format search results as a gopher directory (menu)
@@ -99,67 +109,68 @@
         }
     }
     close(cmd)
 
     if (length(id) == 0) {
-        print_not_found(url)
+        print_not_found(output, url)
+        cache_end()
         unlink(iaout)
         return
     }
 
-    print(shorten(title, 70))
+    print shorten(title, 70) >>output
     if (creators == 1) {
         label = "by " shorten(creator[1], 70)
         printf "[1|%s|%s/search/%%09creator:(%s)|%s|%s]\n", label,
-            cgipath, creator[1], server, port
+            cgipath, creator[1], server, port >>output
     } else if (creators > 1) {
-        printf "\nby:\n"
+        printf "\nby:\n" >>output
         for (i = 1; i <= creators; i++) {
             label = shorten(creator[i], 70)
             printf "[1|%s|%s/search/%%09creator:(%s)|%s|%s]\n", label,
-                cgipath, creator[i], server, port
+                cgipath, creator[i], server, port >>output
         }
-        printf "\n"
+        printf "\n" >>output
     }
     if (length(thumb) > 0) {
         url = sprintf("http://%s%s/%s", item_server, dir, thumb)
         printf "[I|Thumbnail|%s/raw/%%09%s|%s|%s]\n",
-            cgipath, url, server, port
+            cgipath, url, server, port >>output
     }
 
     printf "[1|Download|%s/download/%s|%s|%s]\n", cgipath,
-        item_id, server, port
-    print ""
+        item_id, server, port >>output
+    print "" >>output
 
-    print_html(descr)
+    print_html(output, descr)
 
-    print ""
+    print "" >>output
     if (length(add_date) > 0) {
-        printf "%-20s %s\n", "Date Added:", add_date
+        printf "%-20s %s\n", "Date Added:", add_date >>output
     }
     if (pub_date != add_date) {
-        printf "%-20s %s\n", "Date Published:", pub_date
+        printf "%-20s %s\n", "Date Published:", pub_date >>output
     }
-    printf "%-20s %s\n", "Identifier:", id
+    printf "%-20s %s\n", "Identifier:", id >>output
     if (item_size > 0) {
-        printf "%-20s %d\n", "Item Size:", item_size
+        printf "%-20s %d\n", "Item Size:", item_size >>output
     }
     if (length(language) > 0) {
-        printf "%-20s %s\n", "Language:", language
+        printf "%-20s %s\n", "Language:", language >>output
     }
     if (length(license) > 0) {
-        printf "%-20s %s\n", "License:", license
+        printf "%-20s %s\n", "License:", license >>output
     }
-    printf "%-20s %s\n", "Media Type:", type
+    printf "%-20s %s\n", "Media Type:", type >>output
     
     if (topics > 0) {
-        print ""
-        print "# Topics"
+        print "" >>output
+        print "# Topics" >>output
         for (i = 1; i <= topics; i++) {
             label = shorten(topic[i], 40)
             printf "[1|%s|%s/search/%%09subject:(%s)|%s|%s]\n", label,
-                cgipath, topic[i], server, port
+                cgipath, topic[i], server, port >>output
         }
     }
 
     # scrape uploader name from item web page HTML
     url = api_ssl_endpoint "/details/" item_id
@@ -172,36 +183,37 @@
         }
     }
     close(iaout)
 
     if (cols > 0) {
-        print ""
-        print "# Collections"
+        print "" >>output
+        print "# Collections" >>output
         for (i = 1; i <= cols; i++) {
             label = shorten(col[i], 40)
             printf "[1|%s|%s/search/%%09collection:(%s)|%s|%s]\n",
-                label, cgipath, col[i], server, port
+                label, cgipath, col[i], server, port >>output
         }
     }
 
-    print ""
-    print "# Uploaded by"
+    print "" >>output
+    print "# Uploaded by" >>output
     if (length(uploader_account) > 0) {
         label = shorten(uploader_account, 70)
         printf "[1|%s|%s/account/%s%%09%s|%s|%s]\n", label, cgipath,
-            uploader_account, uploader_email, server, port
+            uploader_account, uploader_email, server, port >>output
     } else {
         label = shorten(uploader_email, 70)
         printf "[1|%s|%s/search/%%09uploader:%s|%s|%s]\n", label,
-            cgipath, uploader_email, server, port
+            cgipath, uploader_email, server, port >>output
     }
 
-    print ""
+    print "" >>output
     printf "[h|Item web page|URL:%s/details/%s|%s|%s]\n",
-        api_ssl_endpoint, uri_encode(id), server, port
-    printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port
+        api_ssl_endpoint, uri_encode(id), server, port >>output
+    printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port >>output
 
+    cache_end()
     unlink(iaout)
     return
 }
 
 BEGIN {

Index: src/list/index.dcgi.m4
==================================================================
--- src/list/index.dcgi.m4
+++ src/list/index.dcgi.m4
@@ -5,17 +5,19 @@
 #
 # Show a specific list
 
 include(src/config.awk)
 incl(src/api.awk)
+incl(src/cache.awk)
 incl(src/cgi.awk)
+incl(src/sqlite.awk)
 incl(src/util.awk)
 
 function main(     acct, client_url, cmd, count, creator, iaout, id,
     is_private, items, label, list_id, name, name_slug, numfound,
-    order, order_name, order_names, order_param, page, pages, rows,
-    query, sort_param, title, type, url)
+    order, order_name, order_names, order_param, output, page, pages,
+    rows, query, sort_param, signature, str, title, type, url)
 {
     order_names["creator"] = "creatorSorter"
     order_names["date"] = "date"
     order_names["title"] = "titleSorter"
     order_names["week"] = "week"
@@ -41,13 +43,18 @@
 
     split(search, parts, "/")
     acct = parts[1]
     list_id = parts[2]
 
-    print acct "'s Lists"
-    print ""
+    signature = sprintf("list/%s", input)
+    str = cache_init(signature)
+    if (length(str) > 0) {
+        print str
+        return
+    }
 
+    output = cache_begin()
     iaout = gettemp()
 
     url = api_ssl_endpoint "/services/users/" acct "/lists/" list_id
     api_request(url, "GET", iaout)
 
@@ -117,12 +124,14 @@
         pages++
     }
 
     # format as a gopher directory (menu)
 
-    printf "# List: %s, page %d of %d\n", name, page, pages
-    print ""
+    print acct "'s Lists" >>output
+    print "" >>output
+    printf "# List: %s, page %d of %d\n", name, page, pages >>output
+    print "" >>output
 
     cmd = sprintf("%s <%s 2>&1", cmd_json2tsv, iaout)
     FS = "\t"
     count = 0
     creator = ""
@@ -155,11 +164,11 @@
                 } else {
                     label = sprintf("[%s] %s", mediatype[type], \
                         gph_encode(shorten(title, 58)))
                 }
                 printf "[1|%s|%s/details/%s|%s|%s]\n", label, cgipath, id,
-                    server, port
+                    server, port >>output
                 count++
             }
             creator = ""
             descr = ""
             id = ""
@@ -166,38 +175,39 @@
             type = ""
         }
     }
     close(cmd)
 
-    print ""
+    print "" >>output
 
     # only show "page back" if the user is past page 1
     if (page > 1) {
         printf "[1|[<<] Page %d|%s/list/page%d/rows%d/%s%%09%s/%d|%s|%s]\n",
             page - 1, cgipath, page - 1, rows, sort_param,
-            acct, list_id, server, port
+            acct, list_id, server, port >>output
     }
 
     # only show "next page" if the current page is completely full
     if (count == rows) {
         printf "[1|[>>] Page %d|%s/list/page%d/rows%d/%s%%09%s/%d|%s|%s]\n",
             page + 1, cgipath, page + 1, rows, sort_param,
-            acct, list_id, server, port
+            acct, list_id, server, port >>output
     }
 
     # only show "sort" if there's more than one item to sort
     if (numfound > 1) {
         printf "[1|[^v] Sort|%s/listsort/%%09%s/%d|%s|%s]\n", cgipath,
-            acct, list_id, server, port
+            acct, list_id, server, port >>output
     }
 
     printf "[1|Account %s|%s/account/%s|%s|%s]\n", acct, cgipath,
-        acct, server, port
+        acct, server, port >>output
  
-    print ""
-    printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port
+    print "" >>output
+    printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port >>output
 
+    cache_end()
     unlink(iaout)
     exit 0
 }
 
 BEGIN {

Index: src/lists/index.dcgi.m4
==================================================================
--- src/lists/index.dcgi.m4
+++ src/lists/index.dcgi.m4
@@ -5,16 +5,27 @@
 #
 # Show a list of a user's lists
 
 include(src/config.awk)
 incl(src/api.awk)
+incl(src/cache.awk)
 incl(src/cgi.awk)
+incl(src/sqlite.awk)
 incl(src/util.awk)
 
-function main(     cmd, count, fields, iaout, i, id, is_private, item, \
-    item_count, item_id, label, name, record, records, url)
+function main(     cmd, count, fields, iaout, i, id, is_private, item,
+    item_count, item_id, label, name, output, record, records,
+    signature, str, url)
 {
+    signature = sprintf("%s/lists", search)
+    str = cache_init(signature)
+    if (length(str) > 0) {
+        print str
+        return
+    }
+
+    output = cache_begin()
     iaout = gettemp()
 
     url = api_ssl_endpoint "/services/users/" search "/lists"
     api_request(url, "GET", iaout)
 
@@ -71,23 +82,24 @@
     }
 
     # sort lists by label and id
     hsort(records, count)
 
-    print search "'s Lists"
-    print ""
+    print search "'s Lists" >>output
+    print "" >>output
 
     for (i = 1; i <= count; i++) {
         record = records[i]
         split(record, fields, /\t/)
         item = fields[3]
-        print item
+        print item >>output
     }
 
-    print ""
-    printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port
+    print "" >>output
+    printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port >>output
 
+    cache_end()
     unlink(iaout)
     return
 }
 
 BEGIN {

Index: src/search/index.dcgi.m4
==================================================================
--- src/search/index.dcgi.m4
+++ src/search/index.dcgi.m4
@@ -5,16 +5,18 @@
 #
 # Show search results
 
 include(src/config.awk)
 incl(src/api.awk)
+incl(src/cache.awk)
 incl(src/cgi.awk)
+incl(src/sqlite.awk)
 incl(src/util.awk)
 
-function main(search,     cmd, count, creator, descr, field, fields, i, \
-    iaout, id, jsout, label, numfound, order, order_names, page, rows,  \
-    searchstr, sort_param, title, type, url)
+function main(search,     cmd, count, creator, descr, field, fields, i,
+    iaout, id, jsout, label, numfound, order, order_names, output,
+    page, rows, searchstr, signature, sort_param, str, title, type, url)
 {
     order_names["addeddate"] = "addeddate"
     order_names["collection_size"] = "collection_size"
     order_names["createddate"] = "createddate"
     order_names["creator"] = "creatorSorter"
@@ -63,10 +65,18 @@
             order = "nav_order desc"
             sort_param = "sort" order
         }
     }
 
+    signature = sprintf("search/%s", input)
+    str = cache_init(signature)
+    if (length(str) > 0) {
+        print str
+        return
+    }
+
+    output = cache_begin()
     iaout = gettemp()
     jsout = gettemp()
 
     # get search results
 
@@ -107,25 +117,26 @@
 
     if (search ~ /^@/) {
         numfound++
     }
     if (numfound == 0) {
-        print "Your search did not match any items in the Archive."
-        print "Try different keywords or a more general search."
-        print ""
-        printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port
+        print "Your search did not match any items in the Archive." >>output
+        print "Try different keywords or a more general search." >>output
+        print "" >>output
+        printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port >>output
+        cache_end()
         unlink(jsout)
         unlink(iaout)
         return
     } else {
         pages = int(numfound / rows)
         if (numfound % rows != 0) {
             pages++
         }
         printf "# %s search results, page %d of %d\n", numfound, \
-            page, pages
-        print ""
+            page, pages >>output
+        print "" >>output
     }
 
     # format search results as a gopher directory (menu)
     FS = "\t"
     creator = ""
@@ -135,11 +146,11 @@
     type = ""
     count = 0
 
     if (search ~ /^@/) {
         printf "[1|Account %s|%s/account/%s|%s|%s]\n", search, cgipath,
-           search, server, port
+           search, server, port >>output
     }
 
     while ((getline <jsout) > 0) {
         if ($1 == ".response.docs[].creator" && $2 == "s") {
             creator = $3
@@ -160,53 +171,54 @@
                 label = sprintf("[%s] %s", mediatype[type], \
                     gph_encode(shorten(title, 58)))
             }
             if (type == "collection") {
                 printf "[1|%s|%s/search/%%09collection:(%s)|%s|%s]\n",
-                    label, cgipath, id, server, port
+                    label, cgipath, id, server, port >>output
             } else {
                 printf "[1|%s|%s/details/%s|%s|%s]\n", label, cgipath,
-                   id, server, port
+                   id, server, port >>output
             }
             creator = ""
             descr = ""
             id = ""
             type = ""
         }
     }
     close(jsout)
 
-    print ""
+    print "" >>output
 
     # only show "page back" if the user is past page 1
     if (page > 1) {
         printf "[1|[<<] Page %d|%s/search/page%d/rows%d/%s%%09%s|%s|%s]\n",
             page - 1, cgipath, page - 1, rows, sort_param, search,
-            server, port
+            server, port >>output
     }
 
     # only show "next page" if the current page is completely full
     if (count == rows) {
         printf "[1|[>>] Page %d|%s/search/page%d/rows%d/%s%%09%s|%s|%s]\n",
             page + 1, cgipath, page + 1, rows, sort_param, search,
-            server, port
+            server, port >>output
     }
 
     # only show "sort" if there's more than one item to sort
     if (count > 1) {
         printf "[1|[^v] Sort|%s/sort/%%09%s|%s|%s]\n", cgipath, search,
-            server, port
+            server, port >>output
     }
 
     # only show "search within list" if there's multiple pages of results
     if (numfound > rows) {
         printf "[1|[\\/] Filter results|%s/wizard/step1/%s|%s|%s]\n",
-            cgipath, search, server, port
+            cgipath, search, server, port >>output
     }
 
-    printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port
+    printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port >>output
 
+    cache_end()
     unlink(jsout)
     unlink(iaout)
     exit 0
 }
 

ADDED   src/sqlite.awk
Index: src/sqlite.awk
==================================================================
--- /dev/null
+++ src/sqlite.awk
@@ -0,0 +1,22 @@
+function sqlite_exec(db, query,    retval, sqlcfg) {
+    retval = ""
+    sqlcfg = gettemp()
+    
+    printf ".timeout 1000\n" >sqlcfg
+    printf "PRAGMA encoding=\"UTF-8\";\n" >>sqlcfg
+    printf "%s;\n", query >>sqlcfg
+    close(sqlcfg)
+    cmd = sprintf("%s -batch -line -safe -init \"%s\" \"%s\" .quit 2>&1",
+        cmd_sqlite, sqlcfg, db)
+    while ((cmd | getline) > 0) {
+        retval = retval $0 "\n"
+    }
+    close(cmd)
+    unlink(sqlcfg)
+    return retval
+}
+
+function sqlite_escape(str) {
+    gsub(/'/, "''", str)
+    return str
+}

Index: src/util.awk
==================================================================
--- src/util.awk
+++ src/util.awk
@@ -110,26 +110,34 @@
         retval = sprintf("%dB", bytes)
     }
     return retval
 }
 
-function print_not_found(url) {
-    print "Item cannot be found"
-    print ""
-    print "Items may be taken down for various reasons,"
-    print "including by decision of the uploader or"
-    print "due to a violation of the Terms of Use."
-    print ""
-    printf "[h|Metadata|URL:%s|%s|%s]\n", url, server, port
-    print ""
+function print_not_found(output, url) {
+    print "Item cannot be found" >>output
+    print "" >>output
+    print "Items may be taken down for various reasons," >>output
+    print "including by decision of the uploader or" >>output
+    print "due to a violation of the Terms of Use." >>output
+    print "" >>output
+    printf "[h|Metadata|URL:%s|%s|%s]\n", url, server, port >>output
+    print "" >>output
     url = api_ssl_endpoint "/about/terms.php"
     printf "[0|Terms of Use|%s/text/%%09%s|%s|%s]\n", cgipath,
-        url, server, port
-    print ""
-    printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port
+        url, server, port >>output
+    print "" >>output
+    printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port >>output
     return
 }
+
+function read_file(name,    retval) {
+    while ((getline <name) > 0) {
+        retval = retval $0 "\n"
+    }
+    close(name)
+    return retval
+}
 
 function shorten_left(str, len) {
     if (length(str) > len) {
         retval = "..." substr(str, 3 + length(str) - len)
     } else {

Index: src/web.awk
==================================================================
--- src/web.awk
+++ src/web.awk
@@ -117,11 +117,11 @@
     unlink(curlcfg)
 
     return
 }
 
-function print_html(html,    cmd, marker, work) {
+function print_html(output, html,    cmd, marker, work) {
     work = gettemp()
     gsub(/\\n/, "<br>", html)
     print html >work
     close(work)
     cmd = sprintf("%s -a -n 3 <%s | %s -ilr -w 60", cmd_strings, work, \
@@ -132,13 +132,13 @@
         gsub(/\\t/, "        ")
         if (NR < marker) {
             if ($0 ~ /^References$/) {
                 marker = NR
             }
-            print
+            print >>output
         } else {
-            print_ref_pharos($0)
+            print_ref_pharos(output, $0)
         }
     }
     close(cmd)
     unlink(work)
     return
@@ -172,11 +172,13 @@
 
 
 # Print the webdump references section, translating archive.org URLs to
 # pharos URLs
 
-function print_ref_pharos(str,     id, label, link, prefix, relative, token) {
+function print_ref_pharos(output, str,     id, label, link, prefix, \
+    relative, token)
+{
     if (match(str, /^ [0-9]+\. /)) {
         prefix = substr(str, 0, RLENGTH)
         link = substr(str, RLENGTH+1)
 
         id = ""
@@ -190,14 +192,14 @@
         if (length(id) > 0) {
             label = prefix id
             printf "[1|%s|%s/details/%s|%s|%s]\n", label, cgipath,
                 id, server, port
         } else {
-             print str
+             print str >>output
         }
     } else {
-        print str
+        print str >>output
     }
     return
 }
 
 function web_init() {