Check-in by ben on 2025-04-12 20:46:41
Add server-side cache using SQLite, disabled by default in
config.m4. Cache accounts, item details, lists, and searches.
Add option for non-standard features from gawk and mawk,
disabled by default in config.m4 Bump to version 12.
INSERTED DELETED
5 1 config.m4
3 0 readme.txt
35 22 src/account/index.dcgi.m4
59 0 src/cache.awk
4 0 src/config.awk
47 35 src/details/index.dcgi.m4
24 14 src/list/index.dcgi.m4
19 7 src/lists/index.dcgi.m4
30 18 src/search/index.dcgi.m4
22 0 src/sqlite.awk
20 12 src/util.awk
8 6 src/web.awk
276 115 TOTAL over 12 changed files
Index: config.m4
==================================================================
--- config.m4
+++ config.m4
@@ -1,19 +1,23 @@
dnl Set configuration variables
dnl
-define(__PHAROS_VERSION__, 11)dnl
+define(__PHAROS_VERSION__, 12)dnl
dnl
define(__API_ENDPOINT__, http://archive.org)dnl
define(__API_SSL_ENDPOINT__, https://archive.org)dnl
+define(__AWK_EXT__, 0)dnl
define(__AGENT__, Lynx/2.9.0dev.10 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/1.1.1w)dnl
+define(__CACHE_DB__, /home/user/pharos/db/cache.dat)dnl
+define(__CACHE_ENABLED__, 0)dnl
define(__CGIPATH__, /~user/pharos)dnl
define(__CMD_AWK__, /usr/bin/awk)dnl
define(__CMD_CURL__, /usr/bin/curl)dnl
define(__CMD_ENV__, /usr/bin/env)dnl
define(__CMD_JSON2TSV__, /usr/local/bin/json2tsv)dnl
define(__CMD_MKTEMP__, /bin/mktemp)dnl
define(__CMD_RM__, rm)dnl
+define(__CMD_SQLITE__, /usr/bin/sqlite3)dnl
define(__CMD_STRINGS__, /bin/busybox strings)dnl
define(__CMD_WEBDUMP__, /usr/local/bin/webdump)dnl
define(__CMD_XARGS__, /usr/bin/xargs)dnl
define(__GEOMYIDAE_VERSION__, 0.96)dnl
define(__MAX_BIN_SIZE__, 10)dnl
Index: readme.txt
==================================================================
--- readme.txt
+++ readme.txt
@@ -31,10 +31,13 @@
* webdump
Configuration
=============
To set configuration variables, edit config.m4
+
+AWK_EXT allows use of non-standard features in gawk and mawk
+CACHE_ENABLED caches content in sqlite to reduce API calls
Installation
============
Installation depends on m4.
Index: src/account/index.dcgi.m4
==================================================================
--- src/account/index.dcgi.m4
+++ src/account/index.dcgi.m4
@@ -5,20 +5,31 @@
#
# Show details for an account
include(src/config.awk)
incl(src/api.awk)
+incl(src/cache.awk)
incl(src/cgi.awk)
+incl(src/sqlite.awk)
incl(src/util.awk)
incl(src/web.awk)
function main( acct, cmd, col, cols, descr, dir, email, iaout, id,
- item_server, item_size, thumb, title, type, url)
+ item_server, item_size, output, signature, str, thumb, title,
+ type, url)
{
acct = parts[3]
email = search
+ signature = sprintf("account/%s/%s", acct, email)
+ str = cache_init(signature)
+ if (length(str) > 0) {
+ print str
+ return
+ }
+
+ output = cache_begin()
iaout = gettemp()
url = api_endpoint "/metadata/" acct
api_request(url, "GET", iaout)
@@ -63,44 +74,46 @@
}
}
close(cmd)
if (length(id) == 0) {
- print_not_found(url)
+ print_not_found(output, url)
+ cache_end()
unlink(iaout)
return
}
- print "Account: " acct
+ print "Account: " acct >>output
if (length(thumb) > 0) {
url = sprintf("http://%s%s/%s", item_server, dir, thumb)
printf "[I|Thumbnail|%s/raw/%%09%s|%s|%s]\n", cgipath, url,
- server, port
+ server, port >>output
}
- print_html(descr)
+ print_html(output, descr)
if (length(email) > 0) {
printf "[1|Uploads|%s/search/%%09uploader:%s|%s|%s]\n",
- cgipath, email, server, port
+ cgipath, email, server, port >>output
}
printf "[1|Items|%s/search/%%09anyfield:%s|%s|%s]\n", cgipath,
- acct, server, port
- printf "[1|Lists|%s/lists/%%09%s|%s|%s]\n", cgipath, acct,
- server, port
-
- print ""
- printf "%-20s %s\n", "Identifier:", id
- if (item_size > 0) {
- printf "%-20s %d\n", "Item Size:", item_size
- }
- printf "%-20s %s\n", "Media Type:", type
-
- print ""
- printf "[h|Account web page|URL:%s/details/%s|%s|%s]\n",
- api_ssl_endpoint, uri_encode(id), server, port
- printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port
-
+ acct, server, port >>output
+ printf "[1|Lists|%s/lists/%%09%s|%s|%s]\n", cgipath, acct,
+ server, port >>output
+
+ print "" >>output
+ printf "%-20s %s\n", "Identifier:", id >>output
+ if (item_size > 0) {
+ printf "%-20s %d\n", "Item Size:", item_size >>output
+ }
+ printf "%-20s %s\n", "Media Type:", type >>output
+
+ print "" >>output
+ printf "[h|Account web page|URL:%s/details/%s|%s|%s]\n",
+ api_ssl_endpoint, uri_encode(id), server, port >>output
+ printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port >>output
+
+ cache_end()
unlink(iaout)
exit 0
}
BEGIN {
ADDED src/cache.awk
Index: src/cache.awk
==================================================================
--- /dev/null
+++ src/cache.awk
@@ -0,0 +1,59 @@
+# cache or memo-ize to reduce Internet Archive API calls
+
+function cache_begin() {
+ if (cache_enabled || !awk_ext) {
+ cache_output = gettemp()
+ } else {
+ # awk_ext micro-optimization for gawk and mawk:
+ # when cache is disabled, print directly to stdout
+ cache_output = "/dev/stdout"
+ }
+ return retval
+}
+
+function cache_end( value) {
+ if (cache_enabled || !awk_ext) {
+ close(cache_output)
+ value = read_file(cache_output)
+ print value
+ cache_set_value(value)
+ unlink(cache_output)
+ }
+ return
+}
+
+function cache_init(signature, retval, sql) {
+ if (!cache_enabled) {
+ return
+ }
+ cache_signature = signature
+ sql = "CREATE TABLE IF NOT EXISTS cache ( \
+ id TEXT PRIMARY KEY, \
+ epoch INTEGER, \
+ result TEXT)"
+ sqlite_exec(cache_db, sql)
+ retval = cache_get_value()
+ return retval
+}
+
+function cache_get_value( retval, sql) {
+ if (!cache_enabled) {
+ return retval
+ }
+ sql = sprintf("SELECT result FROM cache WHERE id = '%s'",
+ sqlite_escape(cache_signature))
+ retval = sqlite_exec(cache_db, sql)
+ return retval
+}
+
+function cache_set_value(value, sql) {
+ if (!cache_enabled) {
+ return
+ }
+ sql = sprintf("REPLACE INTO cache(id, epoch, result) \
+ VALUES('%s', unixepoch(), '%s')",
+ sqlite_escape(cache_signature),
+ sqlite_escape(value))
+ sqlite_exec(cache_db, sql)
+ return
+}
Index: src/config.awk
==================================================================
--- src/config.awk
+++ src/config.awk
@@ -1,15 +1,19 @@
function config_init() {
agent = "__AGENT__"
api_endpoint = "__API_ENDPOINT__"
api_ssl_endpoint = "__API_SSL_ENDPOINT__"
+ awk_ext = __AWK_EXT__
+ cache_db = "__CACHE_DB__"
+ cache_enabled = __CACHE_ENABLED__
cgipath = "__CGIPATH__"
cmd_curl = "__CMD_CURL__"
cmd_enc = "__CMD_ENV__"
cmd_json2tsv = "__CMD_JSON2TSV__"
cmd_mktemp = "__CMD_MKTEMP__"
cmd_rm = "__CMD_RM__"
+ cmd_sqlite = "__CMD_SQLITE__"
cmd_strings = "__CMD_STRINGS__"
cmd_webdump = "__CMD_WEBDUMP__"
cmd_xargs = "__CMD_XARGS__"
geomyidae_version = __GEOMYIDAE_VERSION__
max_bin_size = __MAX_BIN_SIZE__
Index: src/details/index.dcgi.m4
==================================================================
--- src/details/index.dcgi.m4
+++ src/details/index.dcgi.m4
@@ -5,22 +5,32 @@
#
# Show details for an item
include(src/config.awk)
incl(src/api.awk)
+incl(src/cache.awk)
incl(src/cgi.awk)
+incl(src/sqlite.awk)
incl(src/util.awk)
incl(src/web.awk)
-function main( add_date, col, cols, cmd, creator, descr, dir, i, \
- iaout, id, item_id, item_server, item_size, label, language, \
- license, pub_date, scanner, thumb, title, topic, topics, type, \
- uploader_account, uploader_email, url)
+function main( add_date, col, cols, cmd, creator, descr, dir, i,
+ iaout, id, item_id, item_server, item_size, label, language,
+ license, output, pub_date, scanner, signature, str, thumb, title,
+ topic, topics, type, uploader_account, uploader_email, url)
{
item_id = parts[3]
+
+ signature = "details/" item_id
+ str = cache_init(signature)
+ if (length(str) > 0) {
+ print str
+ return
+ }
iaout = gettemp()
+ output = cache_begin()
url = api_endpoint "/metadata/" item_id
api_request(url, "GET", iaout)
# format search results as a gopher directory (menu)
@@ -99,67 +109,68 @@
}
}
close(cmd)
if (length(id) == 0) {
- print_not_found(url)
+ print_not_found(output, url)
+ cache_end()
unlink(iaout)
return
}
- print(shorten(title, 70))
+ print shorten(title, 70) >>output
if (creators == 1) {
label = "by " shorten(creator[1], 70)
printf "[1|%s|%s/search/%%09creator:(%s)|%s|%s]\n", label,
- cgipath, creator[1], server, port
+ cgipath, creator[1], server, port >>output
} else if (creators > 1) {
- printf "\nby:\n"
+ printf "\nby:\n" >>output
for (i = 1; i <= creators; i++) {
label = shorten(creator[i], 70)
printf "[1|%s|%s/search/%%09creator:(%s)|%s|%s]\n", label,
- cgipath, creator[i], server, port
+ cgipath, creator[i], server, port >>output
}
- printf "\n"
+ printf "\n" >>output
}
if (length(thumb) > 0) {
url = sprintf("http://%s%s/%s", item_server, dir, thumb)
printf "[I|Thumbnail|%s/raw/%%09%s|%s|%s]\n",
- cgipath, url, server, port
+ cgipath, url, server, port >>output
}
printf "[1|Download|%s/download/%s|%s|%s]\n", cgipath,
- item_id, server, port
- print ""
+ item_id, server, port >>output
+ print "" >>output
- print_html(descr)
+ print_html(output, descr)
- print ""
+ print "" >>output
if (length(add_date) > 0) {
- printf "%-20s %s\n", "Date Added:", add_date
+ printf "%-20s %s\n", "Date Added:", add_date >>output
}
if (pub_date != add_date) {
- printf "%-20s %s\n", "Date Published:", pub_date
+ printf "%-20s %s\n", "Date Published:", pub_date >>output
}
- printf "%-20s %s\n", "Identifier:", id
+ printf "%-20s %s\n", "Identifier:", id >>output
if (item_size > 0) {
- printf "%-20s %d\n", "Item Size:", item_size
+ printf "%-20s %d\n", "Item Size:", item_size >>output
}
if (length(language) > 0) {
- printf "%-20s %s\n", "Language:", language
+ printf "%-20s %s\n", "Language:", language >>output
}
if (length(license) > 0) {
- printf "%-20s %s\n", "License:", license
+ printf "%-20s %s\n", "License:", license >>output
}
- printf "%-20s %s\n", "Media Type:", type
+ printf "%-20s %s\n", "Media Type:", type >>output
if (topics > 0) {
- print ""
- print "# Topics"
+ print "" >>output
+ print "# Topics" >>output
for (i = 1; i <= topics; i++) {
label = shorten(topic[i], 40)
printf "[1|%s|%s/search/%%09subject:(%s)|%s|%s]\n", label,
- cgipath, topic[i], server, port
+ cgipath, topic[i], server, port >>output
}
}
# scrape uploader name from item web page HTML
url = api_ssl_endpoint "/details/" item_id
@@ -172,36 +183,37 @@
}
}
close(iaout)
if (cols > 0) {
- print ""
- print "# Collections"
+ print "" >>output
+ print "# Collections" >>output
for (i = 1; i <= cols; i++) {
label = shorten(col[i], 40)
printf "[1|%s|%s/search/%%09collection:(%s)|%s|%s]\n",
- label, cgipath, col[i], server, port
+ label, cgipath, col[i], server, port >>output
}
}
- print ""
- print "# Uploaded by"
+ print "" >>output
+ print "# Uploaded by" >>output
if (length(uploader_account) > 0) {
label = shorten(uploader_account, 70)
printf "[1|%s|%s/account/%s%%09%s|%s|%s]\n", label, cgipath,
- uploader_account, uploader_email, server, port
+ uploader_account, uploader_email, server, port >>output
} else {
label = shorten(uploader_email, 70)
printf "[1|%s|%s/search/%%09uploader:%s|%s|%s]\n", label,
- cgipath, uploader_email, server, port
+ cgipath, uploader_email, server, port >>output
}
- print ""
+ print "" >>output
printf "[h|Item web page|URL:%s/details/%s|%s|%s]\n",
- api_ssl_endpoint, uri_encode(id), server, port
- printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port
+ api_ssl_endpoint, uri_encode(id), server, port >>output
+ printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port >>output
+ cache_end()
unlink(iaout)
return
}
BEGIN {
Index: src/list/index.dcgi.m4
==================================================================
--- src/list/index.dcgi.m4
+++ src/list/index.dcgi.m4
@@ -5,17 +5,19 @@
#
# Show a specific list
include(src/config.awk)
incl(src/api.awk)
+incl(src/cache.awk)
incl(src/cgi.awk)
+incl(src/sqlite.awk)
incl(src/util.awk)
function main( acct, client_url, cmd, count, creator, iaout, id,
is_private, items, label, list_id, name, name_slug, numfound,
- order, order_name, order_names, order_param, page, pages, rows,
- query, sort_param, title, type, url)
+ order, order_name, order_names, order_param, output, page, pages,
+ rows, query, sort_param, signature, str, title, type, url)
{
order_names["creator"] = "creatorSorter"
order_names["date"] = "date"
order_names["title"] = "titleSorter"
order_names["week"] = "week"
@@ -41,13 +43,18 @@
split(search, parts, "/")
acct = parts[1]
list_id = parts[2]
- print acct "'s Lists"
- print ""
+ signature = sprintf("list/%s", input)
+ str = cache_init(signature)
+ if (length(str) > 0) {
+ print str
+ return
+ }
+ output = cache_begin()
iaout = gettemp()
url = api_ssl_endpoint "/services/users/" acct "/lists/" list_id
api_request(url, "GET", iaout)
@@ -117,12 +124,14 @@
pages++
}
# format as a gopher directory (menu)
- printf "# List: %s, page %d of %d\n", name, page, pages
- print ""
+ print acct "'s Lists" >>output
+ print "" >>output
+ printf "# List: %s, page %d of %d\n", name, page, pages >>output
+ print "" >>output
cmd = sprintf("%s <%s 2>&1", cmd_json2tsv, iaout)
FS = "\t"
count = 0
creator = ""
@@ -155,11 +164,11 @@
} else {
label = sprintf("[%s] %s", mediatype[type], \
gph_encode(shorten(title, 58)))
}
printf "[1|%s|%s/details/%s|%s|%s]\n", label, cgipath, id,
- server, port
+ server, port >>output
count++
}
creator = ""
descr = ""
id = ""
@@ -166,38 +175,39 @@
type = ""
}
}
close(cmd)
- print ""
+ print "" >>output
# only show "page back" if the user is past page 1
if (page > 1) {
printf "[1|[<<] Page %d|%s/list/page%d/rows%d/%s%%09%s/%d|%s|%s]\n",
page - 1, cgipath, page - 1, rows, sort_param,
- acct, list_id, server, port
+ acct, list_id, server, port >>output
}
# only show "next page" if the current page is completely full
if (count == rows) {
printf "[1|[>>] Page %d|%s/list/page%d/rows%d/%s%%09%s/%d|%s|%s]\n",
page + 1, cgipath, page + 1, rows, sort_param,
- acct, list_id, server, port
+ acct, list_id, server, port >>output
}
# only show "sort" if there's more than one item to sort
if (numfound > 1) {
printf "[1|[^v] Sort|%s/listsort/%%09%s/%d|%s|%s]\n", cgipath,
- acct, list_id, server, port
+ acct, list_id, server, port >>output
}
printf "[1|Account %s|%s/account/%s|%s|%s]\n", acct, cgipath,
- acct, server, port
+ acct, server, port >>output
- print ""
- printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port
+ print "" >>output
+ printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port >>output
+ cache_end()
unlink(iaout)
exit 0
}
BEGIN {
Index: src/lists/index.dcgi.m4
==================================================================
--- src/lists/index.dcgi.m4
+++ src/lists/index.dcgi.m4
@@ -5,16 +5,27 @@
#
# Show a list of a user's lists
include(src/config.awk)
incl(src/api.awk)
+incl(src/cache.awk)
incl(src/cgi.awk)
+incl(src/sqlite.awk)
incl(src/util.awk)
-function main( cmd, count, fields, iaout, i, id, is_private, item, \
- item_count, item_id, label, name, record, records, url)
+function main( cmd, count, fields, iaout, i, id, is_private, item,
+ item_count, item_id, label, name, output, record, records,
+ signature, str, url)
{
+ signature = sprintf("%s/lists", search)
+ str = cache_init(signature)
+ if (length(str) > 0) {
+ print str
+ return
+ }
+
+ output = cache_begin()
iaout = gettemp()
url = api_ssl_endpoint "/services/users/" search "/lists"
api_request(url, "GET", iaout)
@@ -71,23 +82,24 @@
}
# sort lists by label and id
hsort(records, count)
- print search "'s Lists"
- print ""
+ print search "'s Lists" >>output
+ print "" >>output
for (i = 1; i <= count; i++) {
record = records[i]
split(record, fields, /\t/)
item = fields[3]
- print item
+ print item >>output
}
- print ""
- printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port
+ print "" >>output
+ printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port >>output
+ cache_end()
unlink(iaout)
return
}
BEGIN {
Index: src/search/index.dcgi.m4
==================================================================
--- src/search/index.dcgi.m4
+++ src/search/index.dcgi.m4
@@ -5,16 +5,18 @@
#
# Show search results
include(src/config.awk)
incl(src/api.awk)
+incl(src/cache.awk)
incl(src/cgi.awk)
+incl(src/sqlite.awk)
incl(src/util.awk)
-function main(search, cmd, count, creator, descr, field, fields, i, \
- iaout, id, jsout, label, numfound, order, order_names, page, rows, \
- searchstr, sort_param, title, type, url)
+function main(search, cmd, count, creator, descr, field, fields, i,
+ iaout, id, jsout, label, numfound, order, order_names, output,
+ page, rows, searchstr, signature, sort_param, str, title, type, url)
{
order_names["addeddate"] = "addeddate"
order_names["collection_size"] = "collection_size"
order_names["createddate"] = "createddate"
order_names["creator"] = "creatorSorter"
@@ -63,10 +65,18 @@
order = "nav_order desc"
sort_param = "sort" order
}
}
+ signature = sprintf("search/%s", input)
+ str = cache_init(signature)
+ if (length(str) > 0) {
+ print str
+ return
+ }
+
+ output = cache_begin()
iaout = gettemp()
jsout = gettemp()
# get search results
@@ -107,25 +117,26 @@
if (search ~ /^@/) {
numfound++
}
if (numfound == 0) {
- print "Your search did not match any items in the Archive."
- print "Try different keywords or a more general search."
- print ""
- printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port
+ print "Your search did not match any items in the Archive." >>output
+ print "Try different keywords or a more general search." >>output
+ print "" >>output
+ printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port >>output
+ cache_end()
unlink(jsout)
unlink(iaout)
return
} else {
pages = int(numfound / rows)
if (numfound % rows != 0) {
pages++
}
printf "# %s search results, page %d of %d\n", numfound, \
- page, pages
- print ""
+ page, pages >>output
+ print "" >>output
}
# format search results as a gopher directory (menu)
FS = "\t"
creator = ""
@@ -135,11 +146,11 @@
type = ""
count = 0
if (search ~ /^@/) {
printf "[1|Account %s|%s/account/%s|%s|%s]\n", search, cgipath,
- search, server, port
+ search, server, port >>output
}
while ((getline 0) {
if ($1 == ".response.docs[].creator" && $2 == "s") {
creator = $3
@@ -160,53 +171,54 @@
label = sprintf("[%s] %s", mediatype[type], \
gph_encode(shorten(title, 58)))
}
if (type == "collection") {
printf "[1|%s|%s/search/%%09collection:(%s)|%s|%s]\n",
- label, cgipath, id, server, port
+ label, cgipath, id, server, port >>output
} else {
printf "[1|%s|%s/details/%s|%s|%s]\n", label, cgipath,
- id, server, port
+ id, server, port >>output
}
creator = ""
descr = ""
id = ""
type = ""
}
}
close(jsout)
- print ""
+ print "" >>output
# only show "page back" if the user is past page 1
if (page > 1) {
printf "[1|[<<] Page %d|%s/search/page%d/rows%d/%s%%09%s|%s|%s]\n",
page - 1, cgipath, page - 1, rows, sort_param, search,
- server, port
+ server, port >>output
}
# only show "next page" if the current page is completely full
if (count == rows) {
printf "[1|[>>] Page %d|%s/search/page%d/rows%d/%s%%09%s|%s|%s]\n",
page + 1, cgipath, page + 1, rows, sort_param, search,
- server, port
+ server, port >>output
}
# only show "sort" if there's more than one item to sort
if (count > 1) {
printf "[1|[^v] Sort|%s/sort/%%09%s|%s|%s]\n", cgipath, search,
- server, port
+ server, port >>output
}
# only show "search within list" if there's multiple pages of results
if (numfound > rows) {
printf "[1|[\\/] Filter results|%s/wizard/step1/%s|%s|%s]\n",
- cgipath, search, server, port
+ cgipath, search, server, port >>output
}
- printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port
+ printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port >>output
+ cache_end()
unlink(jsout)
unlink(iaout)
exit 0
}
ADDED src/sqlite.awk
Index: src/sqlite.awk
==================================================================
--- /dev/null
+++ src/sqlite.awk
@@ -0,0 +1,22 @@
+function sqlite_exec(db, query, retval, sqlcfg) {
+ retval = ""
+ sqlcfg = gettemp()
+
+ printf ".timeout 1000\n" >sqlcfg
+ printf "PRAGMA encoding=\"UTF-8\";\n" >>sqlcfg
+ printf "%s;\n", query >>sqlcfg
+ close(sqlcfg)
+ cmd = sprintf("%s -batch -line -safe -init \"%s\" \"%s\" .quit 2>&1",
+ cmd_sqlite, sqlcfg, db)
+ while ((cmd | getline) > 0) {
+ retval = retval $0 "\n"
+ }
+ close(cmd)
+ unlink(sqlcfg)
+ return retval
+}
+
+function sqlite_escape(str) {
+ gsub(/'/, "''", str)
+ return str
+}
Index: src/util.awk
==================================================================
--- src/util.awk
+++ src/util.awk
@@ -110,26 +110,34 @@
retval = sprintf("%dB", bytes)
}
return retval
}
-function print_not_found(url) {
- print "Item cannot be found"
- print ""
- print "Items may be taken down for various reasons,"
- print "including by decision of the uploader or"
- print "due to a violation of the Terms of Use."
- print ""
- printf "[h|Metadata|URL:%s|%s|%s]\n", url, server, port
- print ""
+function print_not_found(output, url) {
+ print "Item cannot be found" >>output
+ print "" >>output
+ print "Items may be taken down for various reasons," >>output
+ print "including by decision of the uploader or" >>output
+ print "due to a violation of the Terms of Use." >>output
+ print "" >>output
+ printf "[h|Metadata|URL:%s|%s|%s]\n", url, server, port >>output
+ print "" >>output
url = api_ssl_endpoint "/about/terms.php"
printf "[0|Terms of Use|%s/text/%%09%s|%s|%s]\n", cgipath,
- url, server, port
- print ""
- printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port
+ url, server, port >>output
+ print "" >>output
+ printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port >>output
return
}
+
+function read_file(name, retval) {
+ while ((getline 0) {
+ retval = retval $0 "\n"
+ }
+ close(name)
+ return retval
+}
function shorten_left(str, len) {
if (length(str) > len) {
retval = "..." substr(str, 3 + length(str) - len)
} else {
Index: src/web.awk
==================================================================
--- src/web.awk
+++ src/web.awk
@@ -117,11 +117,11 @@
unlink(curlcfg)
return
}
-function print_html(html, cmd, marker, work) {
+function print_html(output, html, cmd, marker, work) {
work = gettemp()
gsub(/\\n/, "
", html)
print html >work
close(work)
cmd = sprintf("%s -a -n 3 <%s | %s -ilr -w 60", cmd_strings, work, \
@@ -132,13 +132,13 @@
gsub(/\\t/, " ")
if (NR < marker) {
if ($0 ~ /^References$/) {
marker = NR
}
- print
+ print >>output
} else {
- print_ref_pharos($0)
+ print_ref_pharos(output, $0)
}
}
close(cmd)
unlink(work)
return
@@ -172,11 +172,13 @@
# Print the webdump references section, translating archive.org URLs to
# pharos URLs
-function print_ref_pharos(str, id, label, link, prefix, relative, token) {
+function print_ref_pharos(output, str, id, label, link, prefix, \
+ relative, token)
+{
if (match(str, /^ [0-9]+\. /)) {
prefix = substr(str, 0, RLENGTH)
link = substr(str, RLENGTH+1)
id = ""
@@ -190,14 +192,14 @@
if (length(id) > 0) {
label = prefix id
printf "[1|%s|%s/details/%s|%s|%s]\n", label, cgipath,
id, server, port
} else {
- print str
+ print str >>output
}
} else {
- print str
+ print str >>output
}
return
}
function web_init() {