Check-in by ben on 2024-08-04 22:03:15 Initial commit of pharos version 11 INSERTED DELETED 58 0 Makefile 36 0 pharos/about.txt 10 0 pharos/wizard/index.gph 44 0 readme.txt 112 0 src/account/index.dcgi.m4 24 0 src/api.awk 21 0 src/audio/index.gph.m4 22 0 src/books/index.gph.m4 74 0 src/cgi.awk 20 0 src/config.awk 28 0 src/config.m4 200 0 src/details/index.dcgi.m4 105 0 src/download/index.dcgi.m4 14 0 src/images/index.gph.m4 26 0 src/index.gph.m4 74 0 src/list/index.dcgi.m4 75 0 src/lists/index.dcgi.m4 46 0 src/raw/index.cgi.m4 212 0 src/search/index.dcgi.m4 27 0 src/software/index.gph.m4 61 0 src/sort/index.dcgi.m4 131 0 src/util.awk 23 0 src/video/index.gph.m4 159 0 src/web.awk 67 0 src/wizard/step1/index.dcgi.m4 51 0 src/wizard/step2/index.dcgi.m4 44 0 src/wizard/step3/index.dcgi.m4 1764 0 TOTAL over 27 changed files ADDED Makefile Index: Makefile ================================================================== --- /dev/null +++ Makefile @@ -0,0 +1,58 @@ +DESTDIR = pharos +SRC = src + +CGIS = $(DESTDIR)/debug/index.cgi \ + $(DESTDIR)/raw/index.cgi \ + $(DESTDIR)/text/index.cgi + +DCGIS = $(DESTDIR)/account/index.dcgi \ + $(DESTDIR)/details/index.dcgi \ + $(DESTDIR)/direct/index.dcgi \ + $(DESTDIR)/download/index.dcgi \ + $(DESTDIR)/links/index.dcgi \ + $(DESTDIR)/list/index.dcgi \ + $(DESTDIR)/lists/index.dcgi \ + $(DESTDIR)/search/index.dcgi \ + $(DESTDIR)/sort/index.dcgi \ + $(DESTDIR)/wizard/step1/index.dcgi \ + $(DESTDIR)/wizard/step2/index.dcgi \ + $(DESTDIR)/wizard/step3/index.dcgi + +GOPHERMAPS = $(DESTDIR)/index.gph \ + $(DESTDIR)/audio/index.gph \ + $(DESTDIR)/books/index.gph \ + $(DESTDIR)/images/index.gph \ + $(DESTDIR)/software/index.gph \ + $(DESTDIR)/video/index.gph + +all: $(DESTDIR) $(CGIS) $(DCGIS) $(GOPHERMAPS) + +$(DESTDIR)/debug/index.cgi: $(SRC)/raw/index.cgi.m4 + m4 -I $(SRC) $< >$@ + chmod a+rx $@ + +$(DESTDIR)/text/index.cgi: $(SRC)/raw/index.cgi.m4 + m4 -I $(SRC) $< >$@ + chmod a+rx $@ + +$(DESTDIR)/direct/index.dcgi: $(SRC)/download/index.dcgi.m4 + m4 -I $(SRC) $< >$@ + chmod a+rx $@ + +$(DESTDIR)/links/index.dcgi: $(SRC)/raw/index.cgi.m4 + m4 -I $(SRC) $< >$@ + chmod a+rx $@ + +$(DESTDIR)/%.cgi: $(SRC)/%.cgi.m4 + m4 -I $(SRC) $< >$@ + chmod a+rx $@ + +$(DESTDIR)/%.dcgi: $(SRC)/%.dcgi.m4 + m4 -I $(SRC) $< >$@ + chmod a+rx $@ + +$(DESTDIR)/%.gph: $(SRC)/%.gph.m4 + m4 -I $(SRC) $< >$@ + +clean: + rm -f $(CGIS) $(DCGIS) $(GOPHERMAPS) ADDED pharos/about.txt Index: pharos/about.txt ================================================================== --- /dev/null +++ pharos/about.txt @@ -0,0 +1,36 @@ +# About PHAROS + +Proxy Internet Archive to gopher. + +# Search + +Example: To find Indian music, excluding commercial samples. + + description:(indian) AND -collection:samples_only AND + mediatype:audio AND subject:music + +# Fields + +* date: + YYYY-MM-DD or [YYYY-MM-DD TO YYYY-MM-DD] +* mediatype: + audio, collection, data, image, movies, software, texts, or web + +# Media type key for search results + +* [aud] = audio or etree +* [col] = collection +* [dat] = data +* [img] = image +* [mov] = movies +* [bin] = software +* [txt] = texts +* [web] = web + +See also: + +# Limits + +This service does not work in the lagrange browser. +Lagrange URI encodes the search string and also +mangles items that are not URI encoded. ADDED pharos/wizard/index.gph Index: pharos/wizard/index.gph ================================================================== --- /dev/null +++ pharos/wizard/index.gph @@ -0,0 +1,10 @@ + , _ + /| | | + _/_\_ >_< + .-\-/. | + / | | \_ | + \ \| |\__(/ + /(`---') | + / / \ | + _.' \'-' / | + `----'`=-=' ' hjw ADDED readme.txt Index: readme.txt ================================================================== --- /dev/null +++ readme.txt @@ -0,0 +1,44 @@ +PHAROS by Ben Collver +============================================= +* Description +* Requirements +* Configuration +* Installation + +Description +=========== +Pharos is a gopher front-end to the Internet Archive written in AWK. +It is named after the light-house island near Alexandria. + +For a description and screenshots from an earlier version, see my +post linked below. + + + + + +Requirements +============ +* Runs under the Geomyidae gopher server. + +* Written in AWK. Tested using busybox awk. +* Commands used: + * env, mktemp, rm, strings, xargs + * curl + * json2tsv + * webdump + +Configuration +============= +To set configuration variables, edit src/config.m4 + +Installation +============ +Installation depends on make and m4. + +$ make clean +$ make + +Copy pharos/ into place ADDED src/account/index.dcgi.m4 Index: src/account/index.dcgi.m4 ================================================================== --- /dev/null +++ src/account/index.dcgi.m4 @@ -0,0 +1,112 @@ +include(config.m4)dnl +#!__CMD_AWK__ -f + +# account/index.dcgi +# +# Show details for an account + +include(config.awk) +undivert(api.awk) +undivert(cgi.awk) +undivert(util.awk) +undivert(web.awk) + +function main( acct, cmd, col, cols, descr, dir, email, iaout, id, + item_server, item_size, thumb, title, type, url) +{ + acct = parts[3] + email = search + + iaout = gettemp() + + url = api_endpoint "/metadata/" acct + api_request(url, "GET", iaout) + + # format search results as a gopher directory (menu) + cmd = sprintf("%s <%s 2>&1", cmd_json2tsv, iaout) + FS = "\t" + cols = 0 + delete col + descr = "" + dir = "" + id = "" + item_server = "" + item_size = 0 + title = "" + thumb = "" + type = "" + while ((cmd | getline) > 0) { + if ($1 == ".dir" && $2 == "s") { + dir = $3 + } else if ($1 == ".files[].name" && $2 == "s") { + if ($3 == "__ia_thumb.jpg") { + thumb = $3 + } + } else if ($1 == ".item_size" && $2 == "n") { + item_size = $3 + } else if ($1 == ".metadata.collection" && $2 == "s") { + cols++ + col[cols] = $3 + } else if ($1 == ".metadata.collection[]" && $2 == "s") { + cols++ + col[cols] = $3 + } else if ($1 == ".metadata.description" && $2 == "s") { + descr = $3 + } else if ($1 == ".metadata.identifier" && $2 == "s") { + id = $3 + } else if ($1 == ".metadata.mediatype" && $2 == "s") { + type = $3 + } else if ($1 == ".metadata.title" && $2 == "s") { + title = $3 + } else if ($1 == ".server" && $2 == "s") { + item_server = $3 + } + } + close(cmd) + + if (length(id) == 0) { + print_not_found(url) + unlink(iaout) + return + } + + print "Account: " acct + if (length(thumb) > 0) { + url = sprintf("http://%s%s/%s", item_server, dir, thumb) + printf "[I|Thumbnail|%s/raw/%%09%s|%s|%s]\n", cgipath, url, + server, port + } + print_html(descr) + + if (length(email) > 0) { + printf "[1|Uploads|%s/search/%%09uploader:%s|%s|%s]\n", + cgipath, email, server, port + } + printf "[1|Items|%s/search/%%09anyfield:%s|%s|%s]\n", cgipath, + acct, server, port + printf "[1|Lists|%s/lists/%%09%s|%s|%s]\n", cgipath, acct, + server, port + + print "" + printf "%-20s %s\n", "Identifier:", id + if (item_size > 0) { + printf "%-20s %d\n", "Item Size:", item_size + } + printf "%-20s %s", "Media Type:", type + + print "" + printf "[h|Account web page|URL:%s/details/%s|%s|%s]\n", + api_ssl_endpoint, uri_encode(id), server, port + printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port + + unlink(iaout) + exit 0 +} + +BEGIN { + config_init() + + cgi_init() + uri_encode_init() + main() +} ADDED src/api.awk Index: src/api.awk ================================================================== --- /dev/null +++ src/api.awk @@ -0,0 +1,24 @@ +function api_request(url, method, output, data) { + retval = "" + curlcfg = gettemp() + printf "--connect-timeout 10\n" > curlcfg + printf "--max-redirs 0\n" >> curlcfg + if (url ~ /^https:/) { + printf "--proto =https\n" >> curlcfg + } else { + printf "--proto =http\n" >> curlcfg + } + printf "--show-error\n" >> curlcfg + printf "--silent\n" >> curlcfg + printf "--url %s\n", uri_encode(url) >> curlcfg + printf "--user-agent %s\n", agent >> curlcfg + close(curlcfg) + cmd = sprintf("%s -K %s 2>&1", cmd_curl, curlcfg) + while ((cmd | getline) > 0) { + print $0 >>output + } + close(output) + close(cmd) + unlink(curlcfg) + return retval +} ADDED src/audio/index.gph.m4 Index: src/audio/index.gph.m4 ================================================================== --- /dev/null +++ src/audio/index.gph.m4 @@ -0,0 +1,21 @@ +include(config.m4)dnl +# Audio + +[1|All Audio|__CGIPATH__/search/sortweek desc/%09mediatype:audio|__SERVER__|__PORT__] +[1|This Just In|__CGIPATH__/search/sortpublicdate desc/%09mediatype:audio|__SERVER__|__PORT__] +[1|Live Music Archive|__CGIPATH__/search/sortweek desc/%09collection:etree AND mediatype:collection|__SERVER__|__PORT__] +[1|Librivox Free Audiobooks|__CGIPATH__/search/sortweek desc/%09collection:librivoxaudio|__SERVER__|__PORT__] +[1|Grateful Dead|__CGIPATH__/search/sortweek desc/%09collection:GratefulDead|__SERVER__|__PORT__] +[1|Netlabels|__CGIPATH__/search/sortweek desc/%09collection:netlabels AND mediatype:collection|__SERVER__|__PORT__] +[1|Old Time Radio|__CGIPATH__/search/sortweek desc/%09collection:oldtimeradio|__SERVER__|__PORT__] +[1|78 RPMs & Cylinder Recordings|__CGIPATH__/search/sortweek desc/%09collection:78rpm AND mediatype:collection|__SERVER__|__PORT__] +[1|Audio Books & Poetry|__CGIPATH__/search/sortweek desc/%09collection:audio_bookspoetry|__SERVER__|__PORT__] +[1|Computers, Technology & Science|__CGIPATH__/search/sortweek desc/%09collection:audio_tech|__SERVER__|__PORT__] +[1|Music, Arts & Culture|__CGIPATH__/search/sortweek desc/%09collection:audio_music AND mediatype:collection|__SERVER__|__PORT__] +[1|News & Public Affairs|__CGIPATH__/search/sortweek desc/%09collection:audio_news|__SERVER__|__PORT__] +[1|Spirituality & Religion|__CGIPATH__/search/sortweek desc/%09collection:audio_religion|__SERVER__|__PORT__] +[1|Podcasts|__CGIPATH__/search/sortweek desc/%09collection:podcasts|__SERVER__|__PORT__] +[1|Radio News Archive|__CGIPATH__/search/sortweek desc/%09collection:radio|__SERVER__|__PORT__] +[1|Long Playing Records|__CGIPATH__/search/sortweek desc/%09collection:album_recordings|__SERVER__|__PORT__] +[1|Various Cassette Tapes|__CGIPATH__/search/sortweek desc/%09collection:cassettetapes|__SERVER__|__PORT__] +[1|Audiophile CD Collection|__CGIPATH__/search/sortweek desc/%09collection:acdc AND -collection:samples_only|__SERVER__|__PORT__] ADDED src/books/index.gph.m4 Index: src/books/index.gph.m4 ================================================================== --- /dev/null +++ src/books/index.gph.m4 @@ -0,0 +1,22 @@ +include(config.m4)dnl +# Books + +[1|All Books|__CGIPATH__/search/sortweek desc/%09collection:books AND -access-restricted-item:true|__SERVER__|__PORT__] +[1|All Texts|__CGIPATH__/search/sortweek desc/%09mediatype:texts AND -access-restricted-item:true|__SERVER__|__PORT__] +[1|This Just In|__CGIPATH__/search/sortpublicdate desc/%09mediatype:texts AND -access-restricted-item:true|__SERVER__|__PORT__] +[1|Smithsonian Libraries|__CGIPATH__/search/sortweek desc/%09collection:smithsonian|__SERVER__|__PORT__] +[1|FEDLINK|__CGIPATH__/search/sortweek desc/%09collection:fedlink AND mediatype:collection|__SERVER__|__PORT__] +[1|Genealogy|__CGIPATH__/search/sortweek desc/%09collection:genealogy AND mediatype:collection|__SERVER__|__PORT__] +[1|Lincoln Collection|__CGIPATH__/search/sortweek desc/%09collection:lincolncollection|__SERVER__|__PORT__] +[1|American Libraries|__CGIPATH__/search/sortweek desc/%09collection:americana AND mediatype:collection AND -access-restricted-item:true|__SERVER__|__PORT__] +[1|Canadian Libraries|__CGIPATH__/search/sortweek desc/%09collection:toronto AND mediatype:collection AND -access-restricted-item:true|__SERVER__|__PORT__] +[1|Universal Library|__CGIPATH__/search/sortweek desc/%09collection:universallibrary|__SERVER__|__PORT__] +[1|Project Gutenberg|__CGIPATH__/search/sortweek desc/%09collection:gutenberg|__SERVER__|__PORT__] +[1|Children's Library|__CGIPATH__/search/sortweek desc/%09collection:iacl AND -access-restricted-item:true|__SERVER__|__PORT__] +[1|Biodiversity Heritage Library|__CGIPATH__/search/sortweek desc/%09collection:biodiversity AND mediatype:collection|__SERVER__|__PORT__] +[1|Books By Language|__CGIPATH__/search/sortweek desc/%09collection:booksbylanguage AND mediatype:collection|__SERVER__|__PORT__] +[1|Additional Collections|__CGIPATH__/search/sortweek desc/%09collection:additional_collections AND mediatype:collection|__SERVER__|__PORT__] +[1|The Magazine Rack|__CGIPATH__/search/sortweek desc/%09collection:magazine_rack AND mediatype:collection|__SERVER__|__PORT__] +[1|The Pulp Magazine Archive|__CGIPATH__/search/sortweek desc/%09collection:pulpmagazinearchive|__SERVER__|__PORT__] +[1|Newspapers|__CGIPATH__/search/sortweek desc/%09collection:newspapers AND mediatype:collection|__SERVER__|__PORT__] +[1|Comic Books & Graphic Novels|__CGIPATH__/search/sortweek desc/%09collection:comics AND -access-restricted-item:true|__SERVER__|__PORT__] ADDED src/cgi.awk Index: src/cgi.awk ================================================================== --- /dev/null +++ src/cgi.awk @@ -0,0 +1,74 @@ +function cgi_init() { + search = ARGV[1] + arguments = ARGV[2] + traversal = ARGV[5] + selector = ARGV[6] + + if (geomyidae_version < 0.96) { + input = arguments + } else { + input = traversal + } + + if (length(search) == 0 && match(input, /%09.*/)) { + # This is a hack to include a search in the URL. + # everything before %09 is considered arguments + # everything after %09 is considered the search + search = substr(input, RSTART+3, RLENGTH-3) + args = substr(input, 0, RSTART-1) + } else { + args = input + } + + # query is everything after ? in the gopher selector + # this is NOT the same as the gopher search string + query = args + + # parse the path out of the selector + path = selector + if (substr(path, 1, length(cgipath)) == cgipath) { + path = substr(path, length(cgipath) + 1) + } + if (match(path, /%09/)) { + path = substr(path, 1, RSTART-1) + } + + split(path, parts, "/") + topdir = parts[2] + + return +} + +function uri_encode_init( i, c) { + for (i = 0; i <= 255; i++) { + c = sprintf("%c", i) + uri_encode_ord[c] = i + uri_encode_tab[i] = c + } + + # Percent encode only control characters so that the higher unicode + # block characters are left plainly visible. + for (i = 0; i < 33; i++) { + uri_encode_tab[i] = sprintf("%%%02X", i) + } + + # Percent encode higher unicode block characters too + for (i = 128; i <= 255; i++) { + uri_encode_tab[i] = sprintf("%%%02X", i) + } + + # DEL + uri_encode_tab[127] = sprintf("%%%02X", 127) + + return +} + +function uri_encode(str, i, c, len) { + len = length(str) + retval = "" + for (i = 1; i <= len; i++) { + c = substr(str, i, 1) + retval = retval uri_encode_tab[uri_encode_ord[c]] + } + return retval +} ADDED src/config.awk Index: src/config.awk ================================================================== --- /dev/null +++ src/config.awk @@ -0,0 +1,20 @@ +function config_init() { + agent = "__AGENT__" + api_endpoint = "__API_ENDPOINT__" + api_ssl_endpoint = "__API_SSL_ENDPOINT__" + cgipath = "__CGIPATH__" + cmd_curl = "__CMD_CURL__" + cmd_enc = "__CMD_ENV__" + cmd_json2tsv = "__CMD_JSON2TSV__" + cmd_mktemp = "__CMD_MKTEMP__" + cmd_rm = "__CMD_RM__" + cmd_strings = "__CMD_STRINGS__" + cmd_webdump = "__CMD_WEBDUMP__" + cmd_xargs = "__CMD_XARGS__" + geomyidae_version = __GEOMYIDAE_VERSION__ + max_bin_size = __MAX_BIN_SIZE__ + max_txt_size = __MAX_TXT_SIZE__ + server = "__SERVER__" + port = "__PORT__" + return +} ADDED src/config.m4 Index: src/config.m4 ================================================================== --- /dev/null +++ src/config.m4 @@ -0,0 +1,28 @@ +dnl Set configuration variables +dnl +define(__PHAROS_VERSION__, 11)dnl +dnl +define(__API_ENDPOINT__, http://archive.org)dnl +define(__API_SSL_ENDPOINT__, https://archive.org)dnl +define(__AGENT__, Lynx/2.9.0dev.10 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/1.1.1w)dnl +define(__CGIPATH__, /~user/pharos)dnl +define(__CMD_AWK__, /usr/bin/awk)dnl +define(__CMD_CURL__, /usr/bin/curl)dnl +define(__CMD_ENV__, /usr/bin/env)dnl +define(__CMD_JSON2TSV__, /usr/local/bin/json2tsv)dnl +define(__CMD_MKTEMP__, /bin/mktemp)dnl +define(__CMD_RM__, rm)dnl +define(__CMD_STRINGS__, /bin/busybox strings)dnl +define(__CMD_WEBDUMP__, /usr/local/bin/webdump)dnl +define(__CMD_XARGS__, /usr/bin/xargs)dnl +define(__GEOMYIDAE_VERSION__, 0.96)dnl +define(__MAX_BIN_SIZE__, 10)dnl +define(__MAX_TXT_SIZE__, 1)dnl +define(__SERVER__, server)dnl +define(__PORT__, port)dnl +dnl +dnl Set up M4 to work with AWK code +dnl +define(m4_substr, defn(substr))dnl +undefine(substr)dnl +changecom(`/*', `*/')dnl ADDED src/details/index.dcgi.m4 Index: src/details/index.dcgi.m4 ================================================================== --- /dev/null +++ src/details/index.dcgi.m4 @@ -0,0 +1,200 @@ +include(config.m4)dnl +#!__CMD_AWK__ -f + +# details/index.dcgi +# +# Show details for an item + +include(config.awk) +undivert(api.awk) +undivert(cgi.awk) +undivert(util.awk) +undivert(web.awk) + +function main( add_date, col, cols, cmd, creator, descr, dir, i, \ + iaout, id, item_id, item_server, item_size, label, language, \ + license, pub_date, scanner, thumb, title, topic, topics, type, \ + uploader_account, uploader_email, url) +{ + item_id = parts[3] + + iaout = gettemp() + + url = api_endpoint "/metadata/" item_id + api_request(url, "GET", iaout) + + # format search results as a gopher directory (menu) + cmd = sprintf("%s <%s 2>&1", cmd_json2tsv, iaout) + FS = "\t" + cols = 0 + delete col + add_date = "" + creator = "" + descr = "" + dir = "" + id = "" + item_server = "" + item_size = 0 + language = "" + license = "" + pub_date = "" + scanner = "" + thumb = "" + title = "" + topics = 0 + delete topic + type = "" + uploader_account = "" + uploader_email = "" + while ((cmd | getline) > 0) { + if ($1 == ".dir" && $2 == "s") { + dir = $3 + } else if ($1 == ".files[].name" && $2 == "s") { + if ($3 == "__ia_thumb.jpg") { + thumb = $3 + } + } else if ($1 == ".item_size" && $2 == "n") { + item_size = $3 + } else if ($1 == ".metadata.addeddate" && $2 == "s") { + added_date = $3 + } else if ($1 == ".metadata.collection" && $2 == "s") { + cols++ + col[cols] = $3 + } else if ($1 == ".metadata.collection[]" && $2 == "s") { + cols++ + col[cols] = $3 + } else if ($1 == ".metadata.creator" && $2 == "s") { + creator = $3 + } else if ($1 == ".metadata.description" && $2 == "s") { + descr = $3 + } else if ($1 == ".metadata.identifier" && $2 == "s") { + id = $3 + } else if ($1 == ".metadata.language" && $2 == "s") { + language = $3 + } else if ($1 == ".metadata.license" && $2 == "s") { + license = licenseurl[$3] + } else if ($1 == ".metadata.mediatype" && $2 == "s") { + type = $3 + } else if ($1 == ".metadata.publicdate" && $2 == "s") { + pub_date = $3 + } else if ($1 == ".metadata.scanner" && $2 == "s") { + scanner = $3 + } else if ($1 == ".metadata.subject" && $2 == "s") { + topics++ + topic[topics] = $3 + } else if ($1 == ".metadata.subject[]" && $2 == "s") { + topics++ + topic[topics] = $3 + } else if ($1 == ".metadata.title" && $2 == "s") { + title = $3 + } else if ($1 == ".metadata.uploader" && $2 == "s") { + uploader_email = $3 + } else if ($1 == ".server" && $2 == "s") { + item_server = $3 + } + } + close(cmd) + + if (length(id) == 0) { + print_not_found(url) + unlink(iaout) + return + } + + print(shorten(title, 70)) + if (length(creator) > 0) { + label = "by " shorten(creator, 70) + printf "[1%s|%s/search/%%09creator:(%s)|%s|%d]\n", label, + cgipath, creator, server, port + } + if (length(thumb) > 0) { + url = sprintf("http://%s%s/%s", item_server, dir, thumb) + printf "[I|Thumbnail|%s/raw/%%09%s|%s|%d]\n", + cgipath, url, server, port + } + + printf "[1|Download|%s/download/%s|%s|%d]\n", cgipath, + item_id, server, port + print "" + + print_html(descr) + + print "" + if (length(add_date) > 0) { + printf "%-20s %s\n", "Date Added:", add_date + } + if (pub_date != add_date) { + printf "%-20s %s\n", "Date Published:", pub_date + } + printf "%-20s %s", "Identifier:", id + if (item_size > 0) { + printf "%-20s %d", "Item Size:", item_size + } + if (length(language) > 0) { + printf "%-20s %s", "Language:", language + } + if (length(license) > 0) { + printf "%-20s %s", "License:", license + } + printf "%-20s %s", "Media Type:", type + + if (topics > 0) { + print "" + print "# Topics" + for (i = 1; i <= topics; i++) { + label = shorten(topic[i], 40) + printf "[1|%s|%s/search/%%09subject:(%s)|%s|%s]\n", label, + cgipath, topic[i], server, port + } + } + + # scrape uploader name from item web page HTML + url = api_ssl_endpoint "/details/" item_id + api_request(url, "GET", iaout) + while ((getline 0) { + if (/item-upload-info__uploader-name/ && + match($0, /\/details\/[^"]*"/)) + { + uploader_account = substr($0, RSTART+9, RLENGTH-10) + } + } + close(iaout) + + if (cols > 0) { + print "" + print "# Collections" + for (i = 1; i <= cols; i++) { + label = shorten(col[i], 40) + printf "[1|%s|%s/search/%%09collection:(%s)|%s|%s]\n", + label, cgipath, col[i], server, port + } + } + + print "" + print "# Uploaded by" + if (length(uploader_account) > 0) { + label = shorten(uploader_account, 70) + printf "[1|%s|%s/account/%s%%09%s|%s|%s]\n", label, cgipath, + uploader_account, uploader_email, server, port + } else { + label = shorten(uploader_email, 70) + printf "[1|%s|%s/search/%%09uploader:%s|%s|%s]\n", label, + cgipath, uploader_email, server, port + } + + print "" + printf "[h|Item web page|URL:%s/details/%s|%s|%s]\n", + api_ssl_endpoint, uri_encode(id), server, port + printf "[1|PHAROS|%s|%s|%d]\n", cgipath, server, port + + unlink(iaout) + return +} + +BEGIN { + config_init() + + cgi_init() + uri_encode_init() + main() +} ADDED src/download/index.dcgi.m4 Index: src/download/index.dcgi.m4 ================================================================== --- /dev/null +++ src/download/index.dcgi.m4 @@ -0,0 +1,105 @@ +include(config.m4)dnl +#!__CMD_AWK__ -f + +# download/index.dcgi +# +# Show file downloads using either direct http or gopher proxy links + +include(config.awk) +undivert(api.awk) +undivert(cgi.awk) +undivert(util.awk) + +function main(cmd, dir, files, file_size, format, iaout, is_archive, \ + is_proxy, item_server, label, mtime, name, source, url) +{ + dir = parts[2] + item_id = parts[3] + if (dir == "download") { + is_proxy = 1 + } else { + # dir == "direct" + is_proxy = 0 + } + + iaout = gettemp() + + url = api_endpoint "/metadata/" item_id + api_request(url, "GET", iaout) + + # format search results as a gopher directory (menu) + cmd = sprintf("%s <%s 2>&1", cmd_json2tsv, iaout) + FS = "\t" + dir = "" + files = 0 + delete format + item_server = "" + delete mtime + delete name + delete file_size + delete source + + while ((cmd | getline) > 0) { + if ($1 == ".dir" && $2 == "s") { + dir = $3 + } else if ($1 == ".files[]" && $2 == "o") { + files++ + } else if ($1 == ".files[].format" && $2 == "s") { + format[files] = $3 + } else if ($1 == ".files[].mtime" && $2 == "s") { + mtime[files] = $3 + } else if ($1 == ".files[].name" && $2 == "s") { + name[files] = $3 + } else if ($1 == ".files[].size" && $2 == "s") { + file_size[files] = $3 + } else if ($1 == ".files[].source" && $2 == "s") { + source[files] = $3 + } else if ($1 == ".server" && $2 == "s") { + item_server = $3 + } + } + close(cmd) + + for (i = 1; i <= files; i++) { + label = sprintf("%s %s %s", \ + shorten_left(name[i], 40), \ + strftime("%Y-%m-%d %H:%M", mtime[i]), \ + human_size(file_size[i])) + url = sprintf("http://%s%s/%s", item_server, dir, name[i]) + if (is_proxy) { + if (file_size[i] > max_bin_size * size_mb) { + printf "[h|%s|URL:%s|%s|%s]\n", label, uri_encode(url), + server, port + } else { + printf "[1|%s|%s/links/%%09%s|%s|%s]\n", label, cgipath, + url, server, port + } + } else { + printf "[h|%s|URL:%s|%s|%s]\n", label, uri_encode(url), + server, port + } + is_archive = detect_archive(url) + if (is_archive) { + url = sprintf("http://%s/view_archive.php?archive=%s/%s", \ + item_server, dir, name[i]) + printf "[h|%s (View Contents)|URL:%s|%s|%s]\n", + shorten_left(name[i], 40), uri_encode(url), + server, port + } + } + + printf "[1|Downloads via http|%s/direct/%s|%s|%s]\n", cgipath, + item_id, server, port + + unlink(iaout) + exit 0 +} + +BEGIN { + config_init() + + cgi_init() + uri_encode_init() + util_init() + main() +} ADDED src/images/index.gph.m4 Index: src/images/index.gph.m4 ================================================================== --- /dev/null +++ src/images/index.gph.m4 @@ -0,0 +1,14 @@ +include(config.m4)dnl +# Images + +[1|All Images|__CGIPATH__/search/sortweek desc/%09mediatype:image|__SERVER__|__PORT__] +[1|This Just In|__CGIPATH__/search/sortpublicdate desc/%09mediatype:image|__SERVER__|__PORT__] +[1|Metropolitan Museum of Art|__CGIPATH__/search/sortweek desc/%09collection:metropolitanmuseumofart-gallery|__SERVER__|__PORT__] +[1|Cleveland Museum of Art|__CGIPATH__/search/sortweek desc/%09collection:clevelandart|__SERVER__|__PORT__] +[1|Flickr Commons|__CGIPATH__/search/sortweek desc/%09collection:flickrcommons|__SERVER__|__PORT__] +[1|Occupy Wallstreet Flickr|__CGIPATH__/search/sortweek desc/%09collection:flickr-ows|__SERVER__|__PORT__] +[1|Cover Art|__CGIPATH__/search/sortweek desc/%09collection:coverartarchive|__SERVER__|__PORT__] +[1|USGS Maps|__CGIPATH__/search/sortweek desc/%09collection:maps_usgs|__SERVER__|__PORT__] +[1|NASA Images|__CGIPATH__/search/sortweek desc/%09collection:nasa|__SERVER__|__PORT__] +[1|Solar System Collection|__CGIPATH__/search/sortweek desc/%09collection:solarsystemcollection|__SERVER__|__PORT__] +[1|Ames Research Center|__CGIPATH__/search/sortweek desc/%09collection:amesresearchcenterimagelibrary|__SERVER__|__PORT__] ADDED src/index.gph.m4 Index: src/index.gph.m4 ================================================================== --- /dev/null +++ src/index.gph.m4 @@ -0,0 +1,26 @@ +include(config.m4)dnl + .n. | + /___\ _.---. \ _ / + [|||] (_._ ) )--;_) =- + [___] '---'.__,' \ + }-=-{ | + |-" | + |.-"| p +~^=~^~-|_.-|~^-~^~ ~^~ -^~^~|\ ~^-~^~- +^ .=.| _.|__ ^ ~ /| \ + ~ /:. \" _|_/\ ~ /_|__\ ^ +.-/::. | |""|-._ ^ ~~~~ + `===-'-----'""` '-. ~ + PHAROS __.-' ^ + +Gopher proxy to Internet Archive. + +[7|Search|__CGIPATH__/search/|__SERVER__|__PORT__] +[1|Advanced Search|__CGIPATH__/wizard/step1|__SERVER__|__PORT__] +[1|Books|__CGIPATH__/books/|__SERVER__|__PORT__] +[1|Video|__CGIPATH__/video/|__SERVER__|__PORT__] +[1|Audio|__CGIPATH__/audio/|__SERVER__|__PORT__] +[1|Software|__CGIPATH__/software/|__SERVER__|__PORT__] +[1|Images|__CGIPATH__/images/|__SERVER__|__PORT__] +[0|About PHAROS|__CGIPATH__/about.txt|__SERVER__|__PORT__] +[h|Source Code|URL:https://chiselapp.com/user/bencollver/repository/pharos/logout|__SERVER__|__PORT__] ADDED src/list/index.dcgi.m4 Index: src/list/index.dcgi.m4 ================================================================== --- /dev/null +++ src/list/index.dcgi.m4 @@ -0,0 +1,74 @@ +include(config.m4)dnl +#!__CMD_AWK__ -f + +# lists/index.dcgi +# +# Show a specific list + +include(config.awk) +undivert(api.awk) +undivert(cgi.awk) +undivert(util.awk) + +function main( acct, cmd, count, iaout, id, is_private, item_id, + name, list_id, name, parts, url) +{ + count = split(search, parts, "/") + acct = parts[1] + list_id = parts[2] + + print acct "'s Lists" + print "" + + iaout = gettemp() + + url = api_ssl_endpoint "/services/users/" acct "/lists/" list_id + api_request(url, "GET", iaout) + + # format list as a gopher directory (menu) + cmd = sprintf("%s <%s 2>&1", cmd_json2tsv, iaout) + FS = "\t" + name = "" + id = 0 + is_private = 0 + item_id = "" + + while ((cmd | getline) > 0) { + if ($1 == ".value[]" && $2 == "o") { + if (!is_private && length(name) > 0 && item_count > 0) { + printf "[1%4d Items: %-40s|%s/lists/%%09%s/%d|%s|%s]\n", + item_count, name, cgipath, search, id, server, port + } + } else if ($1 == ".value.list_name" && $2 == "s") { + name = $3 + print "# List: " name + id = 0 + is_private = 0 + } else if ($1 == ".value.is_private" && $2 == "b") { + if ($3 == "true") { + is_private = 1 + } + } else if ($1 == ".value.members[].identifier" && $2 == "s") { + if (!is_private) { + item_id = $3 + printf "[1|%s|%s/details/%s|%s|%s]\n", item_id, + cgipath, item_id, server, port + } + } + } + close(cmd) + + print "" + printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port + + unlink(iaout) + exit 0 +} + +BEGIN { + config_init() + + cgi_init() + uri_encode_init() + main() +} ADDED src/lists/index.dcgi.m4 Index: src/lists/index.dcgi.m4 ================================================================== --- /dev/null +++ src/lists/index.dcgi.m4 @@ -0,0 +1,75 @@ +include(config.m4)dnl +#!__CMD_AWK__ -f + +# lists/index.dcgi +# +# Show a list of a user's lists + +include(config.awk) +undivert(api.awk) +undivert(cgi.awk) +undivert(util.awk) + +function main( cmd, iaout, id, is_private, item_count, item_id, name, url) +{ + print search "'s Lists" + print "" + + iaout = gettemp() + + url = api_ssl_endpoint "/services/users/" search "/lists" + api_request(url, "GET", iaout) + + # format list as a gopher directory (menu) + cmd = sprintf("%s <%s 2>&1", cmd_json2tsv, iaout) + FS = "\t" + name = "" + id = 0 + is_private = 0 + item_count = 0 + item_id = "" + + while ((cmd | getline) > 0) { + if ($1 == ".value[]" && $2 == "o") { + # print information for previous list + if (!is_private && length(name) > 0 && item_count > 0) { + printf "[1|%4d Items: %-40s|%s/list/%%09%s/%d|%s|%s]\n", + item_count, name, cgipath, search, id, server, port + } + } else if ($1 == ".value[].list_name" && $2 == "s") { + name = $3 + id = 0 + is_private = 0 + item_count = 0 + } else if ($1 == ".value[].is_private" && $2 == "b") { + if ($3 == "true") { + is_private = 1 + } + } else if ($1 == ".value[].id" && $2 == "n") { + id = $3 + } else if ($1 == ".value[].members[].identifier" && $2 == "s") { + item_count++ + } + } + close(cmd) + + # print information for previous list + if (!is_private && length(name) > 0 && item_count > 0) { + printf "[1|%4d Items: %-40s|%s/list/%%09%s/%d|%s|%s]\n", + item_count, name, cgipath, search, id, server, port + } + + print "" + printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port + + unlink(iaout) + return +} + +BEGIN { + config_init() + + cgi_init() + uri_encode_init() + main() +} ADDED src/raw/index.cgi.m4 Index: src/raw/index.cgi.m4 ================================================================== --- /dev/null +++ src/raw/index.cgi.m4 @@ -0,0 +1,46 @@ +include(config.m4)dnl +#!__CMD_AWK__ -f + +# debug/index.cgi +# +# Show HTTP headers from curl fetching a URL +# +# links/index.dcgi +# +# Show download links, plus links scraped from HTML document +# +# raw/index.cgi +# +# Show raw bytes from binary document +# +# text/index.cgi +# +# Show text content scraped from HTML document + +include(config.awk) +undivert(api.awk) +undivert(cgi.awk) +undivert(util.awk) +undivert(web.awk) + +function main() { + if (path == "/debug/") { + dump(search, TYPE_HEADERS) + } else if (path == "/raw/") { + dump(search, TYPE_RAW) + } else if (path == "/text/") { + dump(search, TYPE_TEXT) + } else if (path == "/links/") { + dump(search, TYPE_LINKS) + } + exit 0 +} + +BEGIN { + config_init() + + cgi_init() + uri_encode_init() + web_init() + main() +} ADDED src/search/index.dcgi.m4 Index: src/search/index.dcgi.m4 ================================================================== --- /dev/null +++ src/search/index.dcgi.m4 @@ -0,0 +1,212 @@ +include(config.m4)dnl +#!__CMD_AWK__ -f + +# search/index.dcgi +# +# Show search results + +include(config.awk) +undivert(api.awk) +undivert(cgi.awk) +undivert(util.awk) + +function main(search, cmd, count, creator, descr, field, fields, i, \ + iaout, id, jsout, label, numfound, order, orders, page, rows, \ + searchstr, title, type, url) +{ + mediatype["audio"] = "aud" + mediatype["collection"] = "col" + mediatype["data"] = "dat" + mediatype["etree"] = "aud" + mediatype["image"] = "img" + mediatype["movies"] = "mov" + mediatype["software"] = "bin" + mediatype["texts"] = "txt" + mediatype["web"] = "web" + + rows = 15 + page = 1 + delete order + orders = 0 + + # parse out page number and sort orders + for (i in parts) { + if (parts[i] ~ /^rows[0-9][0-9]*$/) { + rows = substr(parts[i], 5) + } else if (parts[i] ~ /^page[0-9][0-9]*$/) { + page = substr(parts[i], 5) + } else if (parts[i] ~ /^sort/) { + orders++ + str = substr(parts[i], 5) + order[orders] = str + } + } + + # special case for when the search term is an archive.org details URL + if (match(search, /^https:\/\/(www\.)?archive\.org\/details\//)) { + id = substr(search, RLENGTH+1) + search = "identifier:" id + } + + # remove quotes from search string, since it gets quoted later + gsub(/"/, "", search) + + # default sort orders if none were specified + if (orders == 0) { + orders = 1 + if (search == "mediatype:collection" || + search == "mediatype:(collection)") + { + order[1] = "collection_size desc" + } else { + order[1] = "nav_order desc" + } + } + + iaout = gettemp() + jsout = gettemp() + + # get search results + + field[1] = "creator" + field[2] = "description" + field[3] = "identifier" + field[4] = "mediatype" + field[5] = "title" + fields = 5 + + # remove anyfield, a hobgoblin of consistency + searchstr = search + gsub(/anyfield:/, "", searchstr) + + url = sprintf("%s/advancedsearch.php?q=%s&output=json&rows=%d&page=%d", \ + api_endpoint, searchstr, rows, page) + for (i = 1; i <= orders; i++) { + url = url sprintf("&sort%%5B%d%%5D=%s", i-1, order[i]) + } + for (i = 1; i <= fields; i++) { + url = url sprintf("&fl%%5B%d%%5D=%s", i-1, field[i]) + } + api_request(url, "GET", iaout) + + cmd = sprintf("%s <%s >%s 2>&1", cmd_json2tsv, iaout, jsout) + system(cmd) + + numfound = 0 + FS = "\t" + while ((getline 0) { + if ($1 == ".response.numFound" && $2 == "n") { + numfound = $3 + } + } + close(jsout) + + if (search ~ /^@/) { + numfound++ + } + if (numfound == 0) { + print "Your search did not match any items in the Archive." + print "Try different keywords or a more general search." + print "" + printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port + unlink(jsout) + unlink(iaout) + return + } else { + pages = int(numfound / rows) + if (numfound % rows != 0) { + pages++ + } + printf "# %s search results, page %d of %d\n", numfound, \ + page, pages + print "" + } + + # format search results as a gopher directory (menu) + FS = "\t" + creator = "" + descr = "" + id = "" + title = "" + type = "" + count = 0 + + if (search ~ /^@/) { + printf "[1|Account %s|%s/account/%s|%s|%s]\n", search, cgipath, + search, server, port + } + + while ((getline 0) { + if ($1 == ".response.docs[].creator" && $2 == "s") { + creator = $3 + } else if ($1 == ".response.docs[].description" && $2 == "s") { + descr = $3 + } else if ($1 == ".response.docs[].identifier" && $2 == "s") { + id = $3 + } else if ($1 == ".response.docs[].mediatype" && $2 == "s") { + type = $3 + } else if ($1 == ".response.docs[].title" && $2 == "s") { + title = $3 + count++ + if (length(creator) > 0) { + label = sprintf("[%s] %s by %s", mediatype[type], \ + shorten(title, 40), shorten(creator, 18)) + } else { + label = sprintf("[%s] %s", mediatype[type], shorten(title, 58)) + } + if (type == "collection") { + printf "[1|%s|%s/search/%%09collection:(%s)|%s|%s]\n", + label, cgipath, id, server, port + } else { + printf "[1|%s|%s/details/%s|%s|%s]\n", label, cgipath, + id, server, port + } + creator = "" + descr = "" + id = "" + type = "" + } + } + close(jsout) + + print "" + + # only show "page back" if the user is past page 1 + if (page > 1) { + printf "[1|[<<] Page %d|%s/search/page%d/rows%d/%%09%s|%s|%s]\n", + page - 1, cgipath, page - 1, rows, search, server, port + } + + # only show "next page" if the current page is completely full + if (count == rows) { + printf "[1|[>>] Page %d|%s/search/page%d/rows%d/%%09%s|%s|%s]\n", + page + 1, cgipath, page + 1, rows, search, server, port + } + + # only show "sort" if there's more than one item to sort + if (count > 1) { + printf "[1|[^v] Sort|%s/sort/%%09%s|%s|%s]\n", cgipath, search, + server, port + } + + # only show "search within list" if there's multiple pages of results + if (numfound > rows) { + printf "[1|[\\/] Filter results|%s/wizard/step1/%s|%s|%s]\n", + cgipath, search, server, port + } + + printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port + + unlink(jsout) + unlink(iaout) + exit 0 +} + +BEGIN { + config_init() + + cgi_init() + uri_encode_init() + util_init() + main(search) +} ADDED src/software/index.gph.m4 Index: src/software/index.gph.m4 ================================================================== --- /dev/null +++ src/software/index.gph.m4 @@ -0,0 +1,27 @@ +include(config.m4)dnl + +[1|All Software|__CGIPATH__/search/sortweek desc/%09mediatype:software|__SERVER__|__PORT__] +[1|This Just In|__CGIPATH__/search/sortpublicdate desc/%09mediatype:software|__SERVER__|__PORT__] +[1|Internet Arcade|__CGIPATH__/search/sortweek desc/%09collection:internetarcade|__SERVER__|__PORT__] +[1|Console Living Room|__CGIPATH__/search/sortweek desc/%09collection:consolelivingroom|__SERVER__|__PORT__] +[1|Old School Emulation|__CGIPATH__/search/sortweek desc/%09collection:tosec|__SERVER__|__PORT__] +[1|MS-DOS Games|__CGIPATH__/search/sortweek desc/%09collection:softwarelibrary_msdos_games|__SERVER__|__PORT__] +[1|Historical Software|__CGIPATH__/search/sortweek desc/%09collection:historicalsoftware|__SERVER__|__PORT__] +[1|Classic PC Games|__CGIPATH__/search/sortweek desc/%09collection:classicpcgames|__SERVER__|__PORT__] +[1|Software Library|__CGIPATH__/search/sortweek desc/%09collection:softwarelibrary AND mediatype:collection|__SERVER__|__PORT__] +[1|Kodi Archive & Support Files|__CGIPATH__/search/sortweek desc/%09collection:kodi_archive|__SERVER__|__PORT__] +[1|Vintage Software|__CGIPATH__/search/sortweek desc/%09collection:vintagesoftware|__SERVER__|__PORT__] +[1|APK|__CGIPATH__/search/sortweek desc/%09collection:apkarchive|__SERVER__|__PORT__] +[1|MS-DOS|__CGIPATH__/search/sortweek desc/%09collection:softwarelibrary_msdos|__SERVER__|__PORT__] +[1|CD-ROM Software|__CGIPATH__/search/sortweek desc/%09collection:cd-roms|__SERVER__|__PORT__] +[1|CD-ROM Software Library|__CGIPATH__/search/sortweek desc/%09collection:cdromsoftware|__SERVER__|__PORT__] +[1|Software Sites|__CGIPATH__/search/sortweek desc/%09collection:softwaresites|__SERVER__|__PORT__] +[1|Tucows Software Library|__CGIPATH__/search/sortweek desc/%09collection:tucows|__SERVER__|__PORT__] +[1|Shareware CD-ROMs|__CGIPATH__/search/sortweek desc/%09collection:cdbbsarchive|__SERVER__|__PORT__] +[1|Software Capsules Compilation|__CGIPATH__/search/sortweek desc/%09collection:softwarecapsules|__SERVER__|__PORT__] +[1|CD-ROM Images|__CGIPATH__/search/sortweek desc/%09collection:cdromimages|__SERVER__|__PORT__] +[1|ZX Spectrum|__CGIPATH__/search/sortweek desc/%09collection:softwarelibrary_zx_spectrum|__SERVER__|__PORT__] +[1|DOOM Level CD|__CGIPATH__/search/sortweek desc/%09collection:doom-cds|__SERVER__|__PORT__] +[1|Floppy Disks of Software|__CGIPATH__/search/sortweek desc/%09collection:floppysoftware|__SERVER__|__PORT__] +[1|The Good Old Days IBM PC Floppy Collection|__CGIPATH__/search/sortweek desc/%09collection:TGODFloppyCollection|__SERVER__|__PORT__] +[1|MS-DOS: The Frostbyte Shareware Collection|__CGIPATH__/search/sortweek desc/%09collection:softwarelibrary_msdos_frostbyte|__SERVER__|__PORT__] ADDED src/sort/index.dcgi.m4 Index: src/sort/index.dcgi.m4 ================================================================== --- /dev/null +++ src/sort/index.dcgi.m4 @@ -0,0 +1,61 @@ +include(config.m4)dnl +#!__CMD_AWK__ -f + +# sort/index.dcgi +# +# Change search sort order + +include(config.awk) +undivert(cgi.awk) + +function main( i, lbl, opt) { + lbl[1] = "Default [^]" + opt[1] = "nav_order asc" + lbl[2] = "Default [v]" + opt[2] = "nav_order desc" + lbl[3] = "Weekly views [^]" + opt[3] = "week asc" + lbl[4] = "Weekly views [v]" + opt[4] = "week desc" + lbl[5] = "All-time views [^]" + opt[5] = "downloads asc" + lbl[6] = "All-time views [v]" + opt[6] = "downloads desc" + lbl[7] = "Title [^]" + opt[7] = "title asc" + lbl[8] = "Title [v]" + opt[8] = "title desc" + lbl[9] = "Date added [^]" + opt[9] = "addeddate asc" + lbl[10] = "Date added [v]" + opt[10] = "addeddate desc" + lbl[11] = "Date created [^]" + opt[11] = "createddate asc" + lbl[12] = "Date created [v]" + opt[12] = "createddate desc" + lbl[13] = "Date published [^]" + opt[13] = "publicdate asc" + lbl[14] = "Date published [v]" + opt[14] = "publicdate desc" + lbl[15] = "Creator [^]" + opt[15] = "creator asc" + lbl[16] = "Creator [v]" + opt[16] = "creator desc" + lbl[17] = "Random" + opt[17] = "random asc" + + print "# Sort by" + print "" + for (i = 1; i < 18; i++) { + printf "[1|%s|%s/search/sort%s%%09%s|%s|%s]\n", + lbl[i], cgipath, opt[i], search, server, port + } + exit 0 +} + +BEGIN { + config_init() + + cgi_init() + main() +} ADDED src/util.awk Index: src/util.awk ================================================================== --- /dev/null +++ src/util.awk @@ -0,0 +1,131 @@ +function detect_archive(url, str) { + str = tolower(url) + if (str ~ /\.iso$/ || + str ~ /\.zip$/) + { + retval = 1 + } else { + retval = 0 + } + return retval +} + +function detect_html(url, str) { + str = tolower(url) + if (str ~ /\.html?$/ || + str ~ /\.php$/) + { + retval = 1 + } else { + retval = 0 + } + return retval +} + +function detect_image(url, str) { + str = tolower(url) + if (str ~ /\.bmp$/ || + str ~ /\.gif$/ || + str ~ /\.jpg$/ || + str ~ /\.jpeg$/ || + str ~ /\.png$/ || + str ~ /\.webp$/) + { + retval = 1 + } else { + retval = 0 + } + return retval +} + +function gettemp( result) { + while ((cmd_mktemp | getline) > 0) { + retval = $0 + } + result = close(cmd_mktemp) + if (result != 0) { + print "Error: mktemp failed exit status: " result + exit + } + if (length(retval) == 0) { + print "Error: mktemp failed, no tmpfile" + exit + } + return retval +} + +function human_size(bytes) { + if (bytes > size_gb) { + retval = sprintf("%.1fG", bytes / size_gb) + } else if (bytes > size_mb) { + retval = sprintf("%.1fM", bytes / size_mb) + } else if (bytes > size_kb) { + retval = sprintf("%.1fK", bytes / size_kb) + } else { + retval = sprintf("%dB", bytes) + } + return retval +} + +function print_not_found(url) { + print "Item cannot be found" + print "" + print "Items may be taken down for various reasons," + print "including by decision of the uploader or" + print "due to a violation of the Terms of Use." + print "" + printf "[h|Metadata|URL:%s|%s|%s]\n", url, server, port + print "" + url = api_ssl_endpoint "/about/terms.php" + printf "[0|Terms of Use|%s/text/%%09%s|%s|%s]\n", cgipath, + url, server, port + print "" + printf "[1|PHAROS|%s|%s|%s]\n", cgipath, server, port + return +} + +function shorten_left(str, len) { + if (length(str) > len) { + retval = "..." substr(str, 3 + length(str) - len) + } else { + retval = str + } + return retval +} + +function shorten(str, len) { + if (length(str) > len) { + retval = substr(str, 0, len - 3) "..." + } else { + retval = str + } + return retval +} + +function unlink(name) { + system(cmd_rm " " name) + return +} + +function util_init() { + licenseurl[""] = "None" + licenseurl["https://creativecommons.org/publicdomain/zero/1.0/"] = \ + "Public Domain" + licenseurl["https://creativecommons.org/licenses/by/4.0/"] = \ + "Attribution 4.0 International" + licenseurl["https://creativecommons.org/licenses/by-sa/4.0/"] = \ + "Attribution-ShareAlike 4.0 International" + licenseurl["https://creativecommons.org/licenses/by-nd/4.0/"] = \ + "Attribution-NoDerivs 4.0 International" + licenseurl["https://creativecommons.org/licenses/by-nc/4.0/"] = \ + "Attribution-NonCommercial 4.0 International" + licenseurl["https://creativecommons.org/licenses/by-nc-sa/4.0/"] = \ + "Attribution-NonCommercial-ShareAlike 4.0 International" + licenseurl["https://creativecommons.org/licenses/by-nc-nd/4.0/"] = \ + "Attribution-NonCommercial-NoDerivs 4.0 International" + + size_kb = 1024 + size_mb = 1024 * 1024 + size_gb = 1024 * 1024 * 1024 + return +} ADDED src/video/index.gph.m4 Index: src/video/index.gph.m4 ================================================================== --- /dev/null +++ src/video/index.gph.m4 @@ -0,0 +1,23 @@ +include(config.m4)dnl +# Video + +[1|All Video|__CGIPATH__/search/sortweek desc/%09mediatype:movies|__SERVER__|__PORT__] +[1|This Just In|__CGIPATH__/search/sortpublicdate desc/%09mediatype:movies|__SERVER__|__PORT__] +[1|Prelinger Archives|__CGIPATH__/search/sortweek desc/%09collection:prelinger|__SERVER__|__PORT__] +[1|Democracy Now!|__CGIPATH__/search/sortweek desc/%09collection:democracy_now_vid|__SERVER__|__PORT__] +[1|Occupy Wall Street|__CGIPATH__/search/sortweek desc/%09collection:occupywallstreet|__SERVER__|__PORT__] +[1|TV NSA Clip Library|__CGIPATH__/search/sortweek desc/%09collection:nsa|__SERVER__|__PORT__] +[1|Animation And Cartoons|__CGIPATH__/search/sortweek desc/%09collection:animationandcartoons|__SERVER__|__PORT__] +[1|Arts & Music|__CGIPATH__/search/sortweek desc/%09collection:artsandmusicvideos|__SERVER__|__PORT__] +[1|Computers & Technology|__CGIPATH__/search/sortweek desc/%09collection:computersandtechvideos|__SERVER__|__PORT__] +[1|Cultural & Academic Films|__CGIPATH__/search/sortweek desc/%09collection:culturalandacademicfilms|__SERVER__|__PORT__] +[1|Ephemeral Films|__CGIPATH__/search/sortweek desc/%09collection:ephemera|__SERVER__|__PORT__] +[1|Movies|__CGIPATH__/search/sortweek desc/%09collection:moviesandfilms|__SERVER__|__PORT__] +[1|News & Public Affairs|__CGIPATH__/search/sortweek desc/%09collection:newsandpublicaffairs|__SERVER__|__PORT__] +[1|Spirituality & Religion|__CGIPATH__/search/sortweek desc/%09collection:spiritualityandreligion|__SERVER__|__PORT__] +[1|Sports|__CGIPATH__/search/sortweek desc/%09collection:sports|__SERVER__|__PORT__] +[1|Television|__CGIPATH__/search/sortweek desc/%09collection:television AND mediatype:collection|__SERVER__|__PORT__] +[1|VHS Vault|__CGIPATH__/search/sortweek desc/%09collection:vhsvault|__SERVER__|__PORT__] +[1|Video Games|__CGIPATH__/search/sortweek desc/%09collection:gamevideos AND mediatype:collection|__SERVER__|__PORT__] +[1|Vlogs|__CGIPATH__/search/sortweek desc/%09collection:vlogs|__SERVER__|__PORT__] +[1|Youth Media|__CGIPATH__/search/sortweek desc/%09collection:youth_media|__SERVER__|__PORT__] ADDED src/web.awk Index: src/web.awk ================================================================== --- /dev/null +++ src/web.awk @@ -0,0 +1,159 @@ +function dump(search, type, base, cmd, curlcfg, is_html, is_image, \ + label, limit, link, marker, parts, prefix, proto, relative, root, \ + url) +{ + url = search + gsub(/%3F/, "?", url) + + if (url !~ /^(http|https):\/\/[[:alnum:].-]+(:[0-9]+)*(\/[[:alnum:].,?@~=%%:\/+&_() -]*)*$/) { + printf "Error: Unacceptable URL \"%s\"\r\n", url + return + } + + if (type == TYPE_HEADERS || type == TYPE_RAW) { + limit = max_bin_size + } else { + limit = max_txt_size + } + + # Use temporary file for curl configuration. + # This keeps user input separate from shell execution. + + curlcfg = gettemp() + printf "--connect-timeout 10\n" > curlcfg + printf "--max-filesize %dM\n", limit >> curlcfg + printf "--max-redirs 0\n" >> curlcfg + printf "--proto =http,https\n" >> curlcfg + printf "--show-error\n" >> curlcfg + printf "--silent\n" >> curlcfg + printf "--url %s\n", uri_encode(url) >> curlcfg + printf "--user-agent %s\n", agent >> curlcfg + if (type == TYPE_HEADERS) { + printf "--output /dev/null\n" >> curlcfg + printf "--dump-header -\n" >> curlcfg + } + close(curlcfg) + + if (type == TYPE_HEADERS || type == TYPE_RAW) { + cmd = sprintf("%s -K %s 2>&1", cmd_curl, curlcfg) + system(cmd) + unlink(curlcfg) + return + } + + # Use strings command to guard webdump from binary input. + # Use "strings -a" to avoid security pitfalls. + + cmd = sprintf("%s -K %s 2>&1 | %s -a -n 3 | %s -ilr -w 60", \ + cmd_curl, curlcfg, cmd_strings, cmd_webdump) + + # Parse base out of original URL. + # Use this to convert relative links to full URLs. + # webdump has the -b option for this. + # Do it manually instead to avoid passing user input through the shell. + + split(url, parts, "?") + base = parts[1] + if (match(base, /^(http|https):\/\/[[:alnum:].-]+(:[0-9]+)*/)) { + root = substr(base, 0, RLENGTH) + } else { + root = "" + } + if (match(base, /^(http|https):/)) { + proto = substr(base, 0, RLENGTH) + } else { + proto = "" + } + sub(/\/$/, "", base) + + # marker determines where the bottom references section begins + # line numbers smaller than the marker are content + # line numbers larger than the marker are referenced links + + marker = 999999 + + if (type == TYPE_LINKS) { + is_html = detect_html(url) + is_image = detect_image(url) + printf "[9|Binary download|%s/raw/%%09%s|%s|%s]\n", cgipath, + search, server, port + if (is_image) { + printf "[I|Image view|%s/raw/%%09%s|%s|%s]\n", cgipath, + search, server, port + } + if (is_html) { + label = "Source" + } else { + label = "Text view" + } + printf "[0|%s|%s/raw/%%09%s|%s|%s]\n", label, cgipath, search, + server, port + if (is_html) { + label = "HTML view" + } else { + label = "Strings" + } + printf "[0|%s|%s/text/%%09%s|%s|%s]\n", label, cgipath, search, + server, port + printf "[0|Headers|%s/debug/%%09%s|%s|%s]\n", cgipath, search, + server, port + print "" + } + + while ((cmd | getline) > 0) { + if (NR < marker) { + if ($0 ~ /^References$/) { + marker = NR + } + if (type == TYPE_TEXT) { + print $0 + } + } else { + if (match($0, /^ [0-9]+\. /)) { + prefix = substr($0, 0, RLENGTH) + link = substr($0, RLENGTH+1) + if (link !~ /^[a-z]+:/) { + # convert relative link to full URL + relative = link + if (relative ~ /^\/\//) { + link = proto relative + } else if (relative ~ /^\//) { + link = root relative + } else { + link = base "/" relative + } + } + print prefix link + } else { + print $0 + } + } + } + close(cmd) + + unlink(curlcfg) + + return +} + +function print_html(html, cmd, work) { + work = gettemp() + gsub(/\\n/, "
", html) + print html >work + close(work) + cmd = sprintf("%s -a -n 3 <%s | %s -ilr -w 60", cmd_strings, work, \ + cmd_webdump) + while ((cmd | getline) > 0) { + print + } + close(cmd) + unlink(work) + return +} + +function web_init() { + TYPE_HEADERS = 2 + TYPE_LINKS = 1 + TYPE_RAW = 9 + TYPE_TEXT = 0 +} ADDED src/wizard/step1/index.dcgi.m4 Index: src/wizard/step1/index.dcgi.m4 ================================================================== --- /dev/null +++ src/wizard/step1/index.dcgi.m4 @@ -0,0 +1,67 @@ +include(config.m4)dnl +#!__CMD_AWK__ -f + +# wizard/step1/index.dcgi +# +# Select field to filter/search by + +include(config.awk) +undivert(cgi.awk) + +function main( i, lbl, opt, searchstr) { + searchstr = parts[4] + + lbl[1] = "Any field contains" + opt[1] = "anyfield" + lbl[2] = "Any field does not contain" + opt[2] = "-anyfield" + lbl[3] = "Title contains" + opt[3] = "title" + lbl[4] = "Title does not contain" + opt[4] = "-title" + lbl[5] = "Creator contains" + opt[5] = "creator" + lbl[6] = "Creator does not contain" + opt[6] = "-creator" + lbl[7] = "Description contains" + opt[7] = "description" + lbl[8] = "Description does not contain" + opt[8] = "-description" + lbl[9] = "Collection is" + opt[9] = "collection" + lbl[10] = "Collection does not contain" + opt[10] = "-collection" + lbl[11] = "Mediatype is" + opt[11] = "mediatype" + lbl[12] = "Mediatype does not contain" + opt[12] = "-mediatype" + lbl[13] = "Date or date range is" + opt[13] = "date" + lbl[14] = "Language contains" + opt[14] = "language" + lbl[15] = "Always available" + opt[15] = "-access-restricted-item" + + print "# Search wizard: Select field" + print "" + for (i = 1; i < 16; i++) { + if (opt[i] ~ /mediatype$/) { + printf "[1|%s|%s/wizard/step2/%s/%s|%s|%s]\n", lbl[i], + cgipath, opt[i], searchstr, server, port + } else if (lbl[i] == "Always available") { + printf "[1|%s|%s/wizard/step3/%s/%s%%09true|%s|%s]\n", + lbl[i], cgipath, opt[i], searchstr, server, port + } else { + printf "[7|%s|%s/wizard/step3/%s/%s|%s|%s]\n", lbl[i], + cgipath, opt[i], searchstr, server, port + } + } + exit 0 +} + +BEGIN { + config_init() + + cgi_init() + main() +} ADDED src/wizard/step2/index.dcgi.m4 Index: src/wizard/step2/index.dcgi.m4 ================================================================== --- /dev/null +++ src/wizard/step2/index.dcgi.m4 @@ -0,0 +1,51 @@ +include(config.m4)dnl +#!__CMD_AWK__ -f + +# wizard/step2/index.dcgi +# +# Select mediatype to filter/search by + +include(config.awk) +undivert(cgi.awk) + +function main( field, newsearch, searchstr) { + field = parts[4] + searchstr = parts[5] + + if (field == "mediatype") { + print "# Mediatype is:" + } else { + print "# Mediatype does not contain:" + } + print "" + + lbl[1] = "audio" + lbl[2] = "collection" + lbl[3] = "data" + lbl[4] = "etree" + lbl[5] = "image" + lbl[6] = "movies" + lbl[7] = "software" + lbl[8] = "texts" + lbl[9] = "web" + for (i = 1; i < 10; i++) { + if (length(searchstr) == 0) { + newsearch = sprintf("%s:(%s)", field, lbl[i]) + } else { + newsearch = sprintf("%s AND %s:(%s)", searchstr, field, lbl[i]) + } + printf "[1|%s|%s/search/%%09%s|%s|%s]\n", lbl[i], cgipath, + newsearch, server, port + } + print "# Progress:" + print "" + print "* Field: Mediatype" + exit 0 +} + +BEGIN { + config_init() + + cgi_init() + main() +} ADDED src/wizard/step3/index.dcgi.m4 Index: src/wizard/step3/index.dcgi.m4 ================================================================== --- /dev/null +++ src/wizard/step3/index.dcgi.m4 @@ -0,0 +1,44 @@ +include(config.m4)dnl +#!__CMD_AWK__ -f + +# wizard/step3/index.dcgi +# +# Apply new search terms + +include(config.awk) +undivert(cgi.awk) + +function main( field, label, newsearch, op, searchstr, value) { + field = parts[4] + searchstr = parts[5] + value = search + + if (field ~ /^-/) { + label = toupper(substr(field, 2, 1)) substr(field, 3) + op = "does not contain" + } else { + label = toupper(substr(field, 1, 1)) substr(field, 2) + op = "contains" + } + if (length(searchstr) == 0) { + newsearch = sprintf("%s:(%s)", field, value) + } else { + newsearch = sprintf("%s AND %s:(%s)", searchstr, field, value) + } + print "" + printf "[1|Apply search criteria|%s/search/%%09%s|%s|%s]\n", + cgipath, newsearch, server, port + print "" + print "# Progress:" + print "" + printf "* Field %s %s %s\n", label, op, value + printf "* New search: %s\n", newsearch + exit 0 +} + +BEGIN { + config_init() + + cgi_init() + main() +}