Check-in by ben on 2025-06-19 03:33:26 Set global variabled blocked=1 in cgi.awk function cgi_init() when client IP address is in block.txt. Change how blocked clients are treated. OLD: Send empty document NEW: Send item URL to redirect back to archive.org My intention is to block gopher to web proxies without leaving end users stuck on an empty document. Instead, bots and users on blocked IP addresses will be redirected to item web pages. INSERTED DELETED 6 0 src/account/index.dcgi.m4 3 1 src/cgi.awk 6 0 src/details/index.dcgi.m4 7 0 src/download/index.dcgi.m4 6 0 src/list/index.dcgi.m4 6 0 src/lists/index.dcgi.m4 6 0 src/listsort/index.dcgi.m4 6 0 src/raw/index.cgi.m4 6 0 src/search/index.dcgi.m4 6 0 src/sort/index.dcgi.m4 6 0 src/wizard/step1/index.dcgi.m4 6 0 src/wizard/step2/index.dcgi.m4 6 0 src/wizard/step3/index.dcgi.m4 76 1 TOTAL over 13 changed files Index: src/account/index.dcgi.m4 ================================================================== --- src/account/index.dcgi.m4 +++ src/account/index.dcgi.m4 @@ -17,10 +17,16 @@ item_server, item_size, output, signature, str, thumb, title, type, url) { acct = parts[3] email = search + + if (blocked) { + printf "[h|Account|URL:%s/details/%s|%s|%s]\n", + api_ssl_endpoint, acct, server, port + exit 0 + } signature = sprintf("account/%s/%s", acct, email) str = cache_init(signature) if (length(str) > 0) { print str Index: src/cgi.awk ================================================================== --- src/cgi.awk +++ src/cgi.awk @@ -1,11 +1,13 @@ function cgi_init( ip) { + blocked = 0 ip = ENVIRON["REMOTE_ADDR"] while ((getline < blocklist) > 0) { # bad bot ignoring robots.txt, block by IP address if (match(ip, $0)) { - exit 0 + blocked = 1 + break } } close(blocklist) search = ARGV[1] Index: src/details/index.dcgi.m4 ================================================================== --- src/details/index.dcgi.m4 +++ src/details/index.dcgi.m4 @@ -17,10 +17,16 @@ iaout, id, item_id, item_server, item_size, label, language, license, output, pub_date, scanner, signature, str, thumb, title, topic, topics, type, uploader_account, uploader_email, url) { item_id = parts[3] + + if (blocked) { + printf "[h|Details|URL:%s/details/%s|%s|%s]\n", + api_ssl_endpoint, item_id, server, port + exit 0 + } signature = "details/" item_id str = cache_init(signature) if (length(str) > 0) { print str Index: src/download/index.dcgi.m4 ================================================================== --- src/download/index.dcgi.m4 +++ src/download/index.dcgi.m4 @@ -13,10 +13,17 @@ function main(cmd, dir, files, file_size, format, iaout, is_archive, is_proxy, item_server, label, mtime, name, source, url) { dir = parts[2] item_id = parts[3] + + if (blocked) { + printf "[h|Download|URL:%s/download/%s|%s|%s]\n", + api_ssl_endpoint, item_id, server, port + exit 0 + } + if (dir == "download") { is_proxy = 1 } else { # dir == "direct" is_proxy = 0 Index: src/list/index.dcgi.m4 ================================================================== --- src/list/index.dcgi.m4 +++ src/list/index.dcgi.m4 @@ -42,10 +42,16 @@ } split(search, parts, "/") acct = parts[1] list_id = parts[2] + + if (blocked) { + printf "[h|List|URL:%s/details/%s/lists/%d|%s|%s]\n", + api_ssl_endpoint, acct, list_id, server, port + exit 0 + } signature = sprintf("list/%s", input) str = cache_init(signature) if (length(str) > 0) { print str Index: src/lists/index.dcgi.m4 ================================================================== --- src/lists/index.dcgi.m4 +++ src/lists/index.dcgi.m4 @@ -14,10 +14,16 @@ function main( cmd, count, fields, iaout, i, id, is_private, item, item_count, item_id, label, name, output, record, records, signature, str, url) { + if (blocked) { + printf "[h|Lists|URL:%s/details/%s/lists|%s|%s]\n", + api_ssl_endpoint, search, server, port + exit 0 + } + signature = sprintf("%s/lists", search) str = cache_init(signature) if (length(str) > 0) { print str return Index: src/listsort/index.dcgi.m4 ================================================================== --- src/listsort/index.dcgi.m4 +++ src/listsort/index.dcgi.m4 @@ -7,10 +7,16 @@ include(src/config.awk) incl(src/cgi.awk) function main( acct, i, lbl, list_id, opt) { + if (blocked) { + printf "[h|Internet Archive|URL:%s/|%s|%s]\n", + api_ssl_endpoint, server, port + exit 0 + } + lbl[1] = "Relevance" opt[1] = "" lbl[2] = "Weekly views [^]" opt[2] = "week asc" lbl[3] = "Weekly views [v]" Index: src/raw/index.cgi.m4 ================================================================== --- src/raw/index.cgi.m4 +++ src/raw/index.cgi.m4 @@ -22,10 +22,16 @@ incl(src/cgi.awk) incl(src/util.awk) incl(src/web.awk) function main() { + if (blocked) { + printf "[h|Internet Archive|URL:%s/|%s|%s]\n", + api_ssl_endpoint, server, port + exit 0 + } + if (path == "/debug/") { dump(search, TYPE_HEADERS) } else if (path == "/raw/") { dump(search, TYPE_RAW) } else if (path == "/text/") { Index: src/search/index.dcgi.m4 ================================================================== --- src/search/index.dcgi.m4 +++ src/search/index.dcgi.m4 @@ -13,10 +13,16 @@ function main(search, cmd, count, creator, descr, field, fields, i, iaout, id, item, items, jsout, label, numfound, order, order_names, page, rows, searchstr, sort_param, str, title, type, url) { + if (blocked) { + printf "[h|Search|URL:%s/|%s|%s]\n", api_ssl_endpoint, + server, port + exit 0 + } + order_names["addeddate"] = "addeddate" order_names["collection_size"] = "collection_size" order_names["createddate"] = "createddate" order_names["creator"] = "creatorSorter" order_names["date"] = "date" Index: src/sort/index.dcgi.m4 ================================================================== --- src/sort/index.dcgi.m4 +++ src/sort/index.dcgi.m4 @@ -7,10 +7,16 @@ include(src/config.awk) incl(src/cgi.awk) function main( i, lbl, opt) { + if (blocked) { + printf "[h|Internet Archive|URL:%s/|%s|%s]\n", + api_ssl_endpoint, server, port + exit 0 + } + lbl[1] = "Default [^]" opt[1] = "nav_order asc" lbl[2] = "Default [v]" opt[2] = "nav_order desc" lbl[3] = "Weekly views [^]" Index: src/wizard/step1/index.dcgi.m4 ================================================================== --- src/wizard/step1/index.dcgi.m4 +++ src/wizard/step1/index.dcgi.m4 @@ -7,10 +7,16 @@ include(src/config.awk) incl(src/cgi.awk) function main( i, lbl, opt, searchstr) { + if (blocked) { + printf "[h|Internet Archive|URL:%s/|%s|%s]\n", + api_ssl_endpoint, server, port + exit 0 + } + searchstr = parts[4] lbl[1] = "Any field contains" opt[1] = "anyfield" lbl[2] = "Any field does not contain" Index: src/wizard/step2/index.dcgi.m4 ================================================================== --- src/wizard/step2/index.dcgi.m4 +++ src/wizard/step2/index.dcgi.m4 @@ -7,10 +7,16 @@ include(src/config.awk) incl(src/cgi.awk) function main( field, newsearch, searchstr) { + if (blocked) { + printf "[h|Internet Archive|URL:%s/|%s|%s]\n", + api_ssl_endpoint, server, port + exit 0 + } + field = parts[4] searchstr = parts[5] if (field == "mediatype") { print "# Mediatype is:" Index: src/wizard/step3/index.dcgi.m4 ================================================================== --- src/wizard/step3/index.dcgi.m4 +++ src/wizard/step3/index.dcgi.m4 @@ -7,10 +7,16 @@ include(src/config.awk) incl(src/cgi.awk) function main( field, label, newsearch, op, searchstr, value) { + if (blocked) { + printf "[h|Internet Archive|URL:%s/|%s|%s]\n", + api_ssl_endpoint, server, port + exit 0 + } + field = parts[4] searchstr = parts[5] value = search if (field ~ /^-/) {