add simple mail filter script - randomcrap - random crap programs of varying quality
(HTM) git clone git://git.codemadness.org/randomcrap
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 070669888d0464f3e89d5d2a7c7862367d453d8a
(DIR) parent 6a468b63e4b0d34e85596acc7561021921d12f63
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Wed, 8 Oct 2025 19:08:44 +0200
add simple mail filter script
silent but deadly...
Diffstat:
A config/scripts/mail_filter.sh | 165 +++++++++++++++++++++++++++++++
1 file changed, 165 insertions(+), 0 deletions(-)
---
(DIR) diff --git a/config/scripts/mail_filter.sh b/config/scripts/mail_filter.sh
@@ -0,0 +1,165 @@
+#!/bin/sh
+# Filters maildir with some crude logic.
+# Adds anti-spam header for further filtering/display.
+# Completely deletes the most obvious spam.
+#
+# Dependencies: OpenBSD date and touch, awk.
+
+# cutoff is: current time - 3 days
+days="3"
+
+export LC_ALL=C
+now=$(date +'%s')
+timestamp="$(date -r "$((now - (days * 86400)))" +'%Y%m%d%H%M')"
+cutoff="$(mktemp)"
+touch -t "$timestamp" "$cutoff"
+
+# cleanup temporary file on exit.
+trap 'rm -f "$cutoff"' EXIT
+
+processmails() {
+ while read -r action file; do
+ if test "$action" = "DELETE"; then
+ echo "Deleted spam: ${file}" >&2
+ rm -f "$file"
+ fi
+
+ if test "$action" = "SPAM"; then
+ echo "Marking as spam: ${file}" >&2
+ # insert header before Subject header (which probably exists).
+ sed -i '/Subject:/i\
+X-Spam-Status: Yes
+' "$file"
+ fi
+ done
+}
+
+debugmails() {
+ while read -r action file; do
+ echo "ACTION=$action, FILE=$file" >&2
+ done
+}
+
+listfiles() {
+ for d in ~/Maildir/codemadness.org/{new,cur}; do
+ find "$d" -newer "$cutoff"
+ done
+}
+
+# filtermail(filepath)
+filtermail() {
+ awk '
+BEGIN {
+ FS = OFS = "\t";
+}
+!length($0) {
+ exit; # end of header
+}
+/^X-Spam-Status: .*Yes/ {
+ alreadyspam = 1; # already flagged;
+}
+
+# DEBUG
+#/^From:/ { fromline = $0; }
+#/^To:/ { toline = $0; }
+#/^Content-[Tt]ype:/ { contenttypeline=$0; }
+
+/^From:/ && /\.(cn|cc)>/ { tld=1; } # china
+
+/^X-[Mm]ailer:/ {
+ line = tolower($0); # case-insensitive matching.
+}
+
+/^X-[Mm]ailer:/ && line ~ /foxmail/ { mailer=1; } # chinese e-mail client
+/^X-[Mm]ailer:/ && line ~ /outlook/ { mailer=1; }
+
+/^Content-[Tt]ype:.*multipart\// { multipart=1; } # can be HTML attached or HTML alternative
+/^Content-[Tt]ype:.*text\/html/ { html=1; }
+
+# empty subject or in all caps should be a trigger.
+/^Subject:/ {
+ subject = substr($0, 9);
+ if (subject == toupper(subject))
+ rsub = 1;
+ subject = tolower(subject); # for matching
+ # simple masking, like "R0LEX" -> "rolex".
+ gsub("0", "o", subject);
+ gsub("1", "i", subject);
+ gsub("3", "e", subject);
+}
+
+# words that are very commonly used in spam.
+/^Subject:/ && subject ~ / hi$/ { rsub=1; }
+/^Subject:/ && subject ~ /lottery/ { rsub=1; }
+/^Subject:/ && subject ~ /solicit/ { rsub=1; }
+/^Subject:/ && subject ~ /freight/ { rsub=1; }
+/^Subject:/ && subject ~ /china/ { rsub=1; }
+/^Subject:/ && subject ~ /immediately/ { rsub=1; }
+/^Subject:/ && subject ~ /donation/ { rsub=1; }
+/^Subject:/ && subject ~ /funds/ { rsub=1; }
+/^Subject:/ && subject ~ /business/ { rsub=1; }
+/^Subject:/ && subject ~ /proposition/ { rsub=1; }
+/^Subject:/ && subject ~ /account warning/ { rsub=1; }
+/^Subject:/ && subject ~ /beneficiary/ { rsub=1; }
+/^Subject:/ && subject ~ /investment/ { rsub=1; }
+/^Subject:/ && subject ~ /luxury/ { rsub=1; }
+/^Subject:/ && subject ~ /rolex/ { rsub=1; }
+/^Subject:/ && subject ~ /supplier/ { rsub=1; }
+/^Subject:/ && subject ~ /password expired/ { rsub=1; }
+/^Subject:/ && subject ~ /coupon/ { rsub=1; }
+/^Subject:/ && subject ~ /request for quotation/ { rsub=1; }
+/^Subject:/ && subject ~ /email account is due for renewal/ { rsub=1; }
+/^Subject:/ && subject ~ /investment opportunity/ { rsub=1; }
+/^Subject:/ && subject ~ /louis vuitton/ { rsub=1; }
+
+{ subject=""; }
+
+/^[Tt]o:.*info@codemadness/ { to=1; }
+
+/^([Tt]o|[Cc]c):.*openbsd\.org/ {
+ # mails sent to mailinglists are never spam.
+ whitelist = 1;
+# print "SKIP" "\t" FILENAME; # DEBUG
+}
+
+END {
+# print FILENAME > "/dev/stderr";
+# print " TLD=" tld ", html=" html ",to=" to ",fromline=" fromline > "/dev/stderr";
+# print " toline=" toline > "/dev/stderr";
+# print " contenttype=" contenttypeline > "/dev/stderr";
+
+ if (whitelist)
+ exit;
+ if (to)
+ d = 1;
+ if (rsub || mailer)
+ s = 1;
+ if (tld && multipart)
+ s = 1;
+ if (tld && html)
+ s = 1;
+ if (tld && html && mailer) {
+ # example: russian HTML Outlook mail, chinese HTML Foxmail
+ d=1;
+ }
+ if (tld && multipart && mailer) {
+ # same as above, but with (typically) HTML attached.
+ d=1;
+ }
+
+ if (d) {
+ print "DELETE" "\t" FILENAME;
+ } else if (alreadyspam) {
+ exit;
+ } else if (s) {
+ print "SPAM" "\t" FILENAME;
+ }
+}
+' "$1"
+}
+
+listfiles | while read -r f; do
+ filtermail "$f"
+done | processmails
+
+# debugmails, processmails