md2gopher.sh - www.codemadness.org - www.codemadness.org saait content files
(HTM) git clone git://git.codemadness.org/www.codemadness.org
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
md2gopher.sh (6040B)
---
1 #!/bin/sh
2 # Limitations:
3 # - Doesn't handle multiple links per line.
4 # - Parsing is not complete. It doesn't cover complex cases.
5
6 LC_ALL=C awk -F '\t' '
7 BEGIN {
8 FS = OFS = "\t";
9 HOST = "codemadness.org";
10 PORT = "70";
11 }
12 function hasalpha(s) {
13 if (match(s, /[a-zA-Z]+/))
14 return 1;
15 return 0;
16 }
17 function highlight(s) {
18 return "»" s "«";
19 }
20 function isabsurl(s) {
21 if (match(s, /^[a-z0-9+-.]+:/))
22 return 1;
23 return 0;
24 }
25 function isgopherurl(s) {
26 if (match(s, /^gopher[s]?:/))
27 return 1;
28 return 0;
29 }
30 # parse URL, sets variables host, port, type and selector.
31 function parseurl(s) {
32 _port = "";
33
34 gsub("^[a-z0-9+-.]+:(//)?", "", s);
35
36 # path
37 i = index(s, "/");
38 if (i > 0) {
39 _host = substr(s, 1, i - 1);
40 _path = substr(s, i);
41 } else {
42 _host = s;
43 _path = "";
44 }
45 # IPv6
46 if (substr(_host, 1, 1) == "[") {
47 i = index(_host, "]");
48 if (i > 0) {
49 if (substr(_host, i + 1, 1) == ":")
50 _port = int(substr(_host, i + 2));
51 _host = substr(_host, 1, i);
52 }
53 } else {
54 i = index(_host, ":");
55 if (i > 0)
56 _port = int(substr(_host, i + 1));
57 }
58 if (_port == "" || _port <= 0 || _port >= 65535)
59 _port = 70;
60
61 if (length(_path) >= 2) {
62 _type = substr(_path, 2, 1);
63 _path = substr(_path, 3);
64 } else {
65 _type = "1"; # directory
66 }
67
68 host = _host
69 port = _port
70 type = _type
71 selector = _path
72 }
73 # detect Gopher type by extension/filename/path/URL.
74 function detecttype(s) {
75 s = tolower(s); # case-insensitive
76 if (match(s, /\.(txt|md|sh|mk|c|h|pl|s|css|js|xml|mbox|sha256|patch|diff|conf|vim|json|tsv|csv)$/))
77 return "0"; # text
78 if (match(s, /(^|[\/])(Makefile|README|TODO|cgitrc)$/))
79 return "0"; # text
80 if (match(s, /\.(htm|html)$/))
81 return "h"; # HTML
82 if (match(s, /\.(gif|png|webp|jpg|jpeg|bmp|xpm|webp|avif)$/))
83 return "I"; # image
84 return "9"; # binary
85 }
86 function makeabsurl(s) {
87 if (isabsurl(s))
88 return s;
89 path = s;
90 # starts with "/" ?
91 if (path != "" && index(path, "/") != 1)
92 path = "/" path;
93 return "https://" host path;
94 }
95 function unescape(s) {
96 gsub("\\\\_", "_", s); # escaped underscore.
97 return s;
98 }
99 {
100 type = "";
101 selector = "";
102 url = "";
103 host = HOST;
104 port = PORT;
105
106 text = $0;
107 # if not code.
108 if (!match(text, /^ /)) {
109 text = unescape(text);
110 }
111 gsub("\t", " ", text);
112 gsub("[[:cntrl:]]", " ", text);
113 }
114 type == "" && /<([a-z0-9+-.]+):.*>/{
115 type = "h";
116 match($0, /<([a-z0-9+-.]+):.*>/);
117 url = substr($0, RSTART + 1, RLENGTH - 2);
118
119 alt = url;
120
121 before = substr($0, 1, RSTART - 1);
122 after = substr($0, RSTART + RLENGTH);
123
124 # highlight is not necesary if the line has no words and an URL.
125 if (hasalpha(before) || hasalpha(after))
126 alt = highlight(alt); # highlight inside text.
127 text = before unescape(alt) after;
128 }
129 # linked thumbnail image, like: [](image)
130 # use the image alt text as text, but the full image as selector.
131 type == "" && /\[!\[[^]]*\]\([^)]*\)\]\([^)]*\)/ {
132 type = "I";
133
134 match($0, /\[!\[[^]]*\]\([^)]*\)\]\([^)]*\)/);
135 before = substr($0, 1, RSTART - 1);
136 after = substr($0, RSTART + RLENGTH);
137
138 alt = "";
139 endalt = substr($0, RSTART + 3);
140 idx = index(endalt, "]");
141 if (idx != 0) {
142 alt = substr(endalt, 1, idx - 1);
143 }
144 # highlight is not necesary if the line has no words and an URL.
145 if (hasalpha(before) || hasalpha(after))
146 alt = highlight(alt); # highlight inside text.
147 text = before unescape(alt) after;
148
149 match($0, /\)\]\([^) ]*\)/);
150 url = substr($0, RSTART + 3, RLENGTH - 4);
151 }
152 # image
153 type == "" && /!\[[^]]*\]\([^)]*\)/ {
154 type = "I";
155
156 match($0, /!\[[^]]*\]\([^)]*\)/);
157 before = substr($0, 1, RSTART - 1);
158 after = substr($0, RSTART + RLENGTH);
159
160 alt = "";
161 endalt = substr($0, RSTART + 2);
162 idx = index(endalt, "]");
163 if (idx != 0) {
164 alt = substr(endalt, 1, idx - 1);
165 }
166 # highlight is not necesary if the line has no words and an URL.
167 if (hasalpha(before) || hasalpha(after))
168 alt = highlight(alt); # highlight inside text.
169 text = before unescape(alt) after;
170
171 match($0, /\]\([^) ]*\)/);
172 url = substr($0, RSTART + 2, RLENGTH - 3);
173 }
174 # link
175 type == "" && /\[[^]]*\]\([^)]*\)/ {
176 type = "";
177
178 match($0, /\[[^]]*\]\([^)]*\)/);
179 before = substr($0, 1, RSTART - 1);
180 after = substr($0, RSTART + RLENGTH);
181
182 alt = "";
183 endalt = substr($0, RSTART + 1);
184 idx = index(endalt, "]");
185 if (idx != 0) {
186 alt = substr(endalt, 1, idx - 1);
187 }
188 # highlight is not necesary if the line has no words and an URL.
189 if (hasalpha(before) || hasalpha(after))
190 alt = highlight(alt); # highlight inside text.
191 text = before unescape(alt) after;
192
193 match($0, /\]\([^)]*\)/);
194 url = substr($0, RSTART + 2, RLENGTH - 3);
195 }
196 {
197 if (url != "") {
198 if (isabsurl(url)) {
199 if (isgopherurl(url)) {
200 # parse gopher URL and make it a selector.
201 parseurl(url);
202 } else if ((i = index(url, "://git.codemadness.org/")) != 0 &&
203 (index(url, "/file/") != 0 || index(url, "/commit/") != 0)) {
204 # site-specific: stagit to stagit-gopher (gph) pages.
205 page = substr(url, i + length("://git.codemadness.org/"));
206 url = "";
207 type = "1"; # directory
208 gsub(".html$", ".gph", page);
209 selector = "/git/" page;
210 } else {
211 type = "h"; # type "h" for "URL:".
212 selector = "URL:" url;
213 }
214 } else {
215 # if a type is already set then do not detect the type.
216 if (type == "")
217 type = detecttype(url);
218
219 if (type == "h" && !isabsurl(url) && index(url, "/") == 0) {
220 # site-specific: relative page link.
221 page = url;
222 url = "";
223 type = "1"; # directory
224 gsub(".html$", "", page);
225 selector = "/phlog/" page;
226 } else if (!isabsurl(url) && index(url, "/git/") == 1) {
227 # site-specific: stagit to stagit-gopher (gph) pages.
228 page = url;
229 url = "";
230 gsub(".html$", ".gph", page);
231 if (detecttype(page) == "0")
232 type = "0"; # text (like for atom.xml).
233 else
234 type = "1"; # directory
235 selector = page;
236 } else {
237 # make path root-relative.
238 if (url != "" && index(url, "/") != 1)
239 url = "/" url;
240 selector = url;
241 }
242 }
243 }
244 if (type == "")
245 type = "i";
246 printf("%s\r\n", type text "\t" selector "\t" host "\t" port);
247 }
248 END {
249 printf(".\r\n");
250 }'