# Pretty-print Meal-Master recipe to HTML format
# Usage: awk -f mmhtml.awk <recipe.mmf >recipe.html
# Only works with single-column Meal-Master format
# https://github.com/wedesoft/anymeal/blob/master/anymeal/html.cc

function array_size(a) {
    retval = 0
    for (i in a) {
        retval++
    }
    return retval
}

function html() {
    printf "<html>\n"
    printf "<head>\n"
    printf "<meta charset=\"UTF-8\">\n"
    printf "<title>%s</title>\n", html_encode(title)
    printf "</head>\n"
    printf "<body>\n"
    printf "<h2>%s</h2>\n", title
    if (length(categories) > 0) {
        printf "<p><b>Categories:</b> %s</p>\n", html_encode(categories)
    }
    if (length(yield) > 0) {
        printf "<p><b>Yield:</b> %s</p>\n", html_encode(yield)
    }
    if (array_size(ingredients) > 0) {
        printf "<h3>Ingredients</h3>\n"
        printf "<table>\n"
        printf "<tr style=\"white-space:nowrap\">\n"
        printf "<th style=\"text-align:left\">Amount</th>\n"
        printf "<th style=\"text-align:left\">Unit</th>\n"
        printf "<th style=\"text-align:left\">Ingredient</th>\n"
        printf "</tr>\n"
        for (gsection in gsections) {
            printf "<tr>\n"
            printf "<td colspan=\"3\"><em>%s</em></td>\n", html_encode(gsection)
            printf "</tr>\n"
            ingredient_count = gsections[gsection]
            for (i = 1; i <= ingredient_count; i++) {
                amount = amounts[gsection, i]
                unit = units[gsection, i]
                ingredient = ingredients[gsection, i]
                printf "<tr>\n"
                printf "<td style=\"white-space:nowrap\">%s</td>\n",
                    html_encode(amount)
                printf "<td style=\"white-space:nowrap\">%s</td>\n",
                    unit_name(unit)
                printf "<td>%s</td>\n", html_encode(ingredient)
                printf "</tr>\n"
            }
        }
        printf "</table>\n"
    }
    if (array_size(instructions) > 0) {
        printf "<h3>Instructions</h3>\n"
        list_open = 0
        par_open = 0
        for (tsection in tsections) {
            html_close()
            if (length(tsection) > 0) {
                printf "<h4>%s</h4>\n", html_encode(tsection)
            }
            instruction_count = tsections[tsection]
            for (i = 1; i <= instruction_count; i++) {
                line = instructions[tsection,i]
                if (line ~ /^[[:space:]]*$/) {
                    if (list_open || par_open) {
                        html_close()
                    }
                } else if (match(line, /^[[:space:]]*\* /)) {
                    if (par_open == 1) {
                        html_close()
                    }
                    if (list_open == 0) {
                        list_open = 1
                        printf "<ul>\n"
                    }
                    text = substr(line, RLENGTH+1)
                    printf "<li>%s</li>\n", html_encode(text)
                } else if (match(line, /(gemini|gopher|http|https):\/\/[^[:space:]]*/)) {
                    pre = substr(line, 0, RSTART-1)
                    url = substr(line, RSTART, RLENGTH)
                    post = substr(line, RSTART+RLENGTH+1)
                    printf "%s<a href=\"%s\">%s</a>%s\n",
                        html_encode(pre),
                        uri_encode(url),
                        html_encode_both(url),
                        html_encode(post)
                } else {
                    if (list_open) {
                        html_close()
                    }
                    if (par_open == 0) {
                        printf "<p>\n"
                        par_open = 1
                    }
                    printf "%s\n", html_encode(trim(line))
                }
            }
        }
        html_close()
    }

    printf "</body>\n"
    printf "</html>\n"
    return
}

function html_close() {
    if (list_open) {
        printf "</ul>\n"
        list_open = 0
    } else if (par_open) {
        printf "</p>\n"
        par_open = 0
    }
    return
}

function html_encode(str) {
    retval = html_encode_entities(str)
    return retval
}

function html_encode_both(str) {
    retval = html_encode_entities(str)
    retval = html_encode_quotes(retval)
    return retval
}

function html_encode_entities(str) {
    gsub(/</, "&lt;", str)
    gsub(/>/, "&gt;", str)
    return str
}

function html_encode_quotes(str) {
    gsub(/"/, "&quot", str)
    gsub(/'/, "&#39", str)
    return str
}

function ingredient_parse(line) {
    new_amount = substr(line, 1, 7)
    new_unit = substr(line, 9, 2)
    new_ingredient = substr(line, 12)
    if (new_amount == "       " && new_unit == "  " && match(new_ingredient, /^[[:space:]]*- */)) {
        ingredient = ingredient " " substr(new_ingredient, RLENGTH+1)
        is_continuation = 1
    } else {
        amount = new_amount
        unit = new_unit
        ingredient = new_ingredient
        is_continuation = 0
    }
    return
}

function trim(str) {
    retval = str
    gsub(/^[[:space:]]+/, "", retval)
    gsub(/[[:space:]]+$/, "", retval)
    return retval
}

function unit_name(unit) {
    if (unit in names) {
        retval = names[unit]
    } else {
        retval = unit
    }
    return retval
}

function unit_names_init() {
    names["x "] = "per serving"
    names["sm"] = "small"
    names["md"] = "medium"
    names["lg"] = "large"
    names["cn"] = "can"
    names["pk"] = "package"
    names["pn"] = "pinch"
    names["dr"] = "drop"
    names["ds"] = "dash"
    names["ct"] = "carton"
    names["bn"] = "bunch"
    names["sl"] = "slice"
    names["ea"] = "each"
    names["t "] = "teaspoon"
    names["ts"] = "teaspoon"
    names["T "] = "tablespoon"
    names["tb"] = "tablespoon"
    names["fl"] = "fluid ounce"
    names["c "] = "cup"
    names["pt"] = "pint"
    names["qt"] = "quart"
    names["ga"] = "gallon"
    names["oz"] = "ounce"
    names["lb"] = "pound"
    names["ml"] = "milliliter"
    names["cb"] = "cubic cm"
    names["cl"] = "centiliter"
    names["dl"] = "deciliter"
    names["l "] = "liter"
    names["mg"] = "milligram"
    names["cg"] = "centigram"
    names["dg"] = "decigram"
    names["g "] = "gram"
    names["kg"] = "kilogram"
    return
}

function uri_encode_init() {
    for (i = 0; i <= 255; i++) {
        c = sprintf("%c", i)
        uri_encode_ord[c] = i
        uri_encode_tab[i] = c
    }

    # Percent encode only control characters so that the higher unicode
    # block characters are left plainly visible.
    for (i = 0; i < 32; i++) {
        uri_encode_tab[i] = sprintf("%%%02X", i)
    }

    # Percent encode higher unicode block characters too
    for (i = 128; i <= 255; i++) {
         uri_encode_tab[i] = sprintf("%%%02X", i)
    }

    # SPACE
    uri_encode_tab[32] = "+"

    # DEL
    uri_encode_tab[127] = sprintf("%%%02X", 127)

    return
}

function uri_encode(str) {
    len = length(str)
    retval = ""
    for (j = 1; j <= len; j++) {
        c = substr(str, j, 1)
        retval = retval uri_encode_tab[uri_encode_ord[c]]
    }
    return retval
}

BEGIN {
    after_ingredients = 0
    at_end = 0
    in_gsection = 0
    in_heading = 0
    in_ingredients = 0
    in_instructions = 0
    in_list = 0
    unit_names_init()
    uri_encode_init()
}

{
    gsub(/\r/, "")
    if (NR == 1) {
        if (/^(MMMMM|-----)----- Recipe via Meal-Master/) {
            in_heading = 1
        } else {
            print "Error: Not in Meal-Master format\n"
            exit 0
        }
    } else if (NR == 2) {
        # ignore second line
    } else if (in_heading) {
        if (match($0, /^[[:space:]]+Title: /)) {
            title = substr($0, RLENGTH+1)
        } else if (match($0, /^[[:space:]]+Categories: /)) {
            categories = substr($0, RLENGTH+1)
        } else if (match($0, /^[[:space:]]+Yield: /)) {
            yield = substr($0, RLENGTH+1)
        } else if (/^[[:space:]]*$/) {
            in_heading = 0
            in_ingredients = 1
        }
    } else if (in_ingredients) {
        if (match($0, /^(MMMMM|-----)-+/)) {
            in_ingredients = 0
            in_gsection = 1
            gsection = substr($0, RLENGTH+1)
            gsub(/-+$/, "", gsection)
        } else if (/^[[:space:]]*$/) {
            in_ingredients = 0
            after_ingredients = 1
        } else {
            gsection = ""
            i = gsections[gsection]
            ingredient_parse($0)
            if (is_continuation == 0) {
                i++
                gsections[gsection] = i
            }
            amounts[gsection,i] = amount
            units[gsection,i] = unit
            ingredients[gsection,i] = ingredient
        }
    } else if (after_ingredients) {
        if (match($0, /^(MMMMM|-----)-+/)) {
            after_ingredients = 0
            in_gsection = 1
            gsection = substr($0, RLENGTH+1)
            gsub(/-+$/, "", gsection)
        } else {
            after_ingredients = 0
            in_instructions = 1
        }
    } else if (in_gsection) {
        if (match($0, /^(MMMMM|-----)-+/)) {
            in_gsection = 1
            gsection = substr($0, RLENGTH+1)
            gsub(/-+$/, "", gsection)
        } else if (/^[[:space:]]*$/) {
            in_gsection = 0
            after_gsection = 1
        } else {
            i = gsections[gsection]
            ingredient_parse($0)
            if (is_continuation == 0) {
                i++
                gsections[gsection] = i
            }
            amounts[gsection,i] = amount
            units[gsection,i] = unit
            ingredients[gsection,i] = ingredient
        }
    } else if (after_gsection) {
        if (match($0, /^(MMMMM|-----)-+/)) {
            after_gsection = 0
            in_gsection = 1
            gsection = substr($0, RLENGTH+1)
            gsub(/-+$/, "", gsection)
        } else {
            after_gsection = 0
            in_instructions = 1
        }
    }
    if (in_instructions) {
        if (match($0, /^(MMMMM|-----)-+/)) {
            tsection = substr($0, RLENGTH+1)
            gsub(/-+$/, "", tsection)
        } else if (/^(MMMMM|-----)$/) {
            in_instructions = 0
            at_end = 1
        } else if (/^[[:space:]]*\* /) {
            in_instructions = 0
            in_list = 1
        } else {
            tsections[tsection]++
            i = tsections[tsection]
            instructions[tsection,i] = $0
        }
    }
    if (in_list) {
        if (/^[[:space:]]*$/) {
            in_list = 0
            in_instructions = 1
            tsections[tsection]++
            i = tsections[tsection]
            instructions[tsection,i] = $0
        } else if (/^[[:space:]]*\* /) {
            tsections[tsection]++
            i = tsections[tsection]
            instructions[tsection,i] = $0
        } else {
            i = tsections[tsection]
            line = trim($0)
            instructions[tsection,i] = instructions[tsection,i] " " line
        }
    }
}

END {
    html()
}
