REM
REM THIS FILE IS PART OF THE "MÓTSOGNIR GOPHER SERVER" PROJECT
REM http://www.viste-family.net/mateusz/motsognir/
REM
REM  Percent encoding/decoding routine with UTF8 support
REM  Author: Mateusz Viste <mateusz@viste-family.net>
REM  Last modified: 02 Jul 2010
REM

FUNCTION TranslatePercentEnc(RawString AS STRING, RevEnc AS BYTE = 0) AS STRING
  REM   RevEnc accepts 3 possible values:
  REM     0 - Decode the provided %encoded string
  REM     1 - %-Encode the provided text string
  REM     9 - Tell me how many %encoded chars in the provided string you're unable to decode
  DIM EncodePerc(1 TO 22) AS STRING => {"%20","%21","%22","%23","%24","%25","%26","%27","%28","%29","%2A","%2B","%2C","%3A","%3B","%3C","%3D","%3E","%3F","%40","%5B","%5D"}
  DIM EncodeText(1 TO 22) AS STRING => {" ",  "!",CHR(34),"#",  "$",  "%",  "&",  "'",  "(",  ")",  "*",  "+",  ","  ,":",  ";",  "<",  "=",  ">",  "?",  "@",  "[",  "]"}

  ' Used for decoding only:
  DIM DecodeAddPerc(1 TO 5) AS STRING => {"%2E","%5F","%2D","%7E", "%2F"}
  DIM DecodeAddText(1 TO 5) AS STRING => {".",  "_",  "-",  "~",   "/"}

  DIM AS STRING WorkString, Wynik
  DIM AS INTEGER x, y, JobDone, NotDecoded = 0, InvalidUTF8seq
  DIM Utf8SeqBuff(1 TO 4) AS INTEGER
  WorkString = RawString

  IF RevEnc = 1 THEN  ' Encoding
      x = 1
      WHILE x <= LEN(WorkString)
        JobDone = 0
        IF WorkString[x - 1] >= 194 AND WorkString[x - 1] <= 244 THEN       ' UTF8 sequence
            InvalidUTF8seq = 0
            IF WorkString[x - 1] >= 194 AND WorkString[x - 1] <= 223 THEN       ' UTF8 start seq of two bytes
                IF x = LEN(WorkString) THEN
                    InvalidUTF8seq = 1
                  ELSE
                    IF WorkString[x] < 128 OR WorkString[x] > 191 THEN
                        InvalidUTF8seq = 1
                      ELSE
                        Wynik += "%" + HEX(WorkString[x - 1], 2) + "%" + HEX(WorkString[x], 2)
                        x += 1
                        JobDone = 1
                    END IF
                END IF
              ELSEIF WorkString[x - 1] >= 224 AND WorkString[x - 1] <= 239 THEN ' UTF8 start seq of three bytes
                IF x >= LEN(WorkString) - 1 THEN
                    InvalidUTF8seq = 1
                  ELSE
                    IF WorkString[x] < 128 OR WorkString[x] > 191 OR WorkString[x + 1] < 128 OR WorkString[x + 1] > 191 THEN
                        InvalidUTF8seq = 1
                      ELSE
                        Wynik += "%" + HEX(WorkString[x - 1], 2) + "%" + HEX(WorkString[x], 2) + "%" + HEX(WorkString[x + 1], 2)
                        x += 2
                        JobDone = 1
                    END IF
                END IF
              ELSEIF WorkString[x - 1] >= 240 AND WorkString[x - 1] <= 244 THEN ' UTF8 start seq of four bytes
                IF x >= LEN(WorkString) - 2 THEN
                    InvalidUTF8seq = 1
                  ELSE
                    IF WorkString[x] < 128 OR WorkString[x] > 191 OR WorkString[x + 1] < 128 OR WorkString[x + 1] > 191 OR WorkString[x + 2] < 128 OR WorkString[x + 2] > 191 THEN
                        InvalidUTF8seq = 1
                      ELSE
                        Wynik += "%" + HEX(WorkString[x - 1], 2) + "%" + HEX(WorkString[x], 2) + "%" + HEX(WorkString[x + 1], 2) + HEX(WorkString[x + 2], 2)
                        x += 3
                        JobDone = 1
                    END IF
                END IF
            END IF
            IF InvalidUTF8seq = 1 THEN
              Wynik = CHR(34)
              FOR x = 0 TO LEN(RawString) - 1
                IF x > 0 THEN Wynik += " "
                Wynik += HEX(RawString[x], 2)
              NEXT x
              Wynik += CHR(34)
              LogLine("Illegal UTF8 sequence detected! The connection has been killed. Hex dump: " + Wynik, 1)
              ConnClose()
            END IF
          ELSE   ' Non-UTF8 byte
            y = 1
            WHILE y <= UBOUND(EncodePerc) AND JobDone = 0
              IF MID(WorkString, x, 1) = EncodeText(y) THEN
                Wynik += EncodePerc(y)
                JobDone = 1
              END IF
              y += 1
            WEND
        END IF
        IF JobDone = 0 THEN Wynik += MID(WorkString, x, 1)
        x += 1
      WEND
    ELSE  ' Decoding

      x = INSTR(WorkString, "+")
      WHILE x > 0    ' Decode any "+" char as a space
        WorkString = MID(WorkString, 1, x - 1) + " " + MID(WorkString, x + 1)
        x = INSTR(WorkString, "+")
      WEND

      IF INSTR(WorkString, "%") > 0 THEN ' Do not check if no "%" in the string
          x = 1
          DO
            IF MID(WorkString, x, 1) = "%" THEN
                Utf8SeqBuff(1) = VALINT("&h" + MID(WorkString, x + 1, 2))
                IF MID(WorkString, x + 3, 1) = "%" THEN Utf8SeqBuff(2) = VALINT("&h" + MID(WorkString, x + 4, 2)) ELSE Utf8SeqBuff(2) = -1
                IF MID(WorkString, x + 6, 1) = "%" THEN Utf8SeqBuff(3) = VALINT("&h" + MID(WorkString, x + 7, 2)) ELSE Utf8SeqBuff(3) = -1
                IF MID(WorkString, x + 9, 1) = "%" THEN Utf8SeqBuff(4) = VALINT("&h" + MID(WorkString, x + 10, 2)) ELSE Utf8SeqBuff(4) = -1
                JobDone = 0
                IF Utf8SeqBuff(1) >= 194 AND Utf8SeqBuff(1) <= 244 THEN       ' UTF8 sequence
                    InvalidUTF8seq = 0
                    IF Utf8SeqBuff(1) >= 194 AND Utf8SeqBuff(1) <= 223 THEN       ' UTF8 start seq of two bytes
                        IF Utf8SeqBuff(2) < 128 OR Utf8SeqBuff(2) > 191 THEN
                            InvalidUTF8seq = 1
                          ELSE
                            Wynik += CHR(Utf8SeqBuff(1), Utf8SeqBuff(2))
                            x += 5
                            JobDone = 1
                        END IF
                      ELSEIF Utf8SeqBuff(1) >= 224 AND Utf8SeqBuff(1) <= 239 THEN ' UTF8 start seq of three bytes
                        IF Utf8SeqBuff(2) < 128 OR Utf8SeqBuff(2) > 191 OR Utf8SeqBuff(3) < 128 OR Utf8SeqBuff(3) > 191 THEN
                            InvalidUTF8seq = 1
                          ELSE
                            Wynik += CHR(Utf8SeqBuff(1), Utf8SeqBuff(2), Utf8SeqBuff(3))
                            x += 8
                            JobDone = 1
                        END IF
                      ELSEIF Utf8SeqBuff(1) >= 240 AND Utf8SeqBuff(1) <= 244 THEN ' UTF8 start seq of four bytes
                        IF Utf8SeqBuff(2) < 128 OR Utf8SeqBuff(2) > 191 OR Utf8SeqBuff(3) < 128 OR Utf8SeqBuff(3) > 191 OR Utf8SeqBuff(4) < 128 OR Utf8SeqBuff(4) > 191 THEN
                            InvalidUTF8seq = 1
                          ELSE
                            Wynik += CHR(Utf8SeqBuff(1),Utf8SeqBuff(2),Utf8SeqBuff(3),Utf8SeqBuff(4))
                            x += 11
                            JobDone = 1
                        END IF
                    END IF
                    IF InvalidUTF8seq = 1 THEN
                      LogLine("Illegal UTF8 sequence detected! The connection has been killed. %-encoded string: " + WorkString, 1)
                      ConnClose()
                    END IF
                  ELSE
                    JobDone = 0
                    IF x <= LEN(WorkString) - 2 THEN
                      '  Check for general characters
                      y = 1
                      WHILE y <= UBOUND(EncodePerc) AND JobDone = 0
                        IF MID(WorkString, x, 3) = EncodePerc(y) THEN Wynik += EncodeText(y) : JobDone = 1
                        y += 1
                      WEND
                      '  Check for optional characters (decoding only)
                      y = 1
                      WHILE y <= UBOUND(DecodeAddPerc) AND JobDone = 0
                        IF MID(WorkString, x, 3) = DecodeAddPerc(y) THEN Wynik += DecodeAddText(y) : JobDone = 1
                       y += 1
                      WEND
                    END IF
                    IF JobDone = 0 THEN NotDecoded += 1
                    x += 2
                END IF
              ELSE
                Wynik += MID(WorkString, x, 1)
            END IF
            x += 1
          LOOP UNTIL x > LEN(WorkString)
        ELSE
          Wynik = WorkString
      END IF
  END IF

  IF RevEnc = 9 THEN RETURN TRIM(STR(NotDecoded)) ELSE RETURN Wynik
END FUNCTION
