# ita2.awk version 1 by Ben Collver # # Convert between ITA-2 and ASCII. # Input & output is always padded to 8-bit bytes. # # ITA-2 # # # ITA-5 AKA ASCII # # # Extensions: # * Handle characters that don't exist in ITA-2. # Setting: escape_unknown_characters = [0|1] # 0 = Error on non-existing characters # 1 = Escape non-existing characters (default) # escape format = FIGURES ALLSPACE_NOTINUSE digit1 digit2 digit3 # digit = ITA-2 encoded octal digit function decode_line(line, c, c2, ddd, i) { len = length(line) for (i = 1; i <= len; i++) { c = substr(line, i, 1) c2 = substr(line, i + 1, 1) if (c == LETTERS) { mode = mode_letters } else if (c == FIGURES) { mode = mode_figures if (c2 == ALLSPACE_NOTINUSE) { ddd = substr(line, i + 2, 3) printf "%s", unescape_char(ddd) i += 4 } } else if (c == CARRIAGERETURN) { printf "\r" } else if (c == LINEFEED) { printf "\n" } else if (c == SPACE) { printf " " } else if (c == ALLSPACE_NOTINUSE) { print "Error: unexpected ALLSPACE_NOTINUSE" exit 1 } else { if (mode == mode_letters) { if (c in letters_decode) { printf "%s", letters_decode[c] } else { printf "Error: unrecognized letter %c(%d)\n", c, ord[c] exit 1 } } else { if (c in figures_decode) { printf "%s", figures_decode[c] } else { printf "Error: unrecognized figure %c(%d)\n", c, ord[c] exit 1 } } } } return } function encode_char(c, retval) { retval = "" if (c == "\r") { retval = CARRIAGERETURN } else if (c == "\n") { retval = LINEFEED } else if (c == " ") { retval = SPACE } else if (c in letters) { if (mode == mode_figures) { retval = LETTERS letters[c] mode = mode_letters } else { retval = letters[c] } } else if (c in figures) { if (mode == mode_letters) { retval = FIGURES figures[c] mode = mode_figures } else { retval = figures[c] } } else { if (escape_unknown_characters) { retval = escape_char(c) } else { printf "Error: Unknown character: %s\n", c exit 1 } } return retval } function encode_line(line, c, i, len, x) { len = length(line) for (i = 1; i <= len; i++) { c = substr(line, i, 1) x = encode_char(c) printf "%s", x } return } function escape_char(c, d1, d2, d3, retval, str) { str = sprintf("%03o", ord[c]) d1 = substr(str, 1, 1) d2 = substr(str, 2, 1) d3 = substr(str, 3, 1) retval = FIGURES ALLSPACE_NOTINUSE figures[d1] figures[d2] figures[d3] mode = mode_figures return retval } function unescape_char(ddd, d1, d2, d3, o, retval) { d1 = substr(ddd, 1, 1) d2 = substr(ddd, 2, 1) d3 = substr(ddd, 3, 1) o = figures_decode[d1] figures_decode[d2] figures_decode[d3] if (escape_unknown_characters) { retval = sprintf("%c", oct[o]) } else { retval = sprintf("\\%s", o) } return retval } BEGIN { escape_unknown_characters = 1 command_decode = 2 command_encode = 1 command = command_encode if (ARGV[1] == "decode") { delete ARGV[1] command = command_decode # hack to read entire input into $0 RS = "\377" } mode_figures = 2 mode_letters = 1 mode = mode_letters ALLSPACE_NOTINUSE = "\0" CARRIAGERETURN = "\10" LINEFEED = "\2" SPACE = "\4" LETTERS = "\37" FIGURES = "\33" letters["A"] = "\3" letters["B"] = "\31" letters["C"] = "\16" letters["D"] = "\11" letters["E"] = "\1" letters["F"] = "\15" letters["G"] = "\32" letters["H"] = "\24" letters["I"] = "\6" letters["J"] = "\13" letters["K"] = "\17" letters["L"] = "\22" letters["M"] = "\34" letters["N"] = "\14" letters["O"] = "\30" letters["P"] = "\26" letters["Q"] = "\27" letters["R"] = "\12" letters["S"] = "\5" letters["T"] = "\20" letters["U"] = "\7" letters["V"] = "\36" letters["W"] = "\23" letters["X"] = "\35" letters["Y"] = "\25" letters["Z"] = "\21" figures["-"] = letters["A"] figures["?"] = letters["B"] figures[":"] = letters["C"] figures["\5"] = letters["D"] # ENQ figures["3"] = letters["E"] figures["%"] = letters["F"] figures["@"] = letters["G"] figures["£"] = letters["H"] figures["8"] = letters["I"] figures["\7"] = letters["J"] # BEL figures["("] = letters["K"] figures[")"] = letters["L"] figures["."] = letters["M"] figures[","] = letters["N"] figures["9"] = letters["O"] figures["0"] = letters["P"] figures["1"] = letters["Q"] figures["4"] = letters["R"] figures["'"] = letters["S"] figures["5"] = letters["T"] figures["7"] = letters["U"] figures["="] = letters["V"] figures["2"] = letters["W"] figures["/"] = letters["X"] figures["6"] = letters["Y"] figures["+"] = letters["Z"] for (i = 0; i < 256; i++) { c = sprintf("%c", i) ord[c] = i c = sprintf("%03o", i) oct[c] = i } for (i in figures) { figures_decode[figures[i]] = i } for (i in letters) { letters_decode[letters[i]] = i } } { if (command == command_encode) { encode_line($0) } else { decode_line($0) } }