#!/usr/bin/tclsh

# $Id: fickle.tcl,v 1.26 2002/07/12 16:01:36 Administrator Exp $
# fickle 1.00 by Jason Tang (tang@jtang.org).  Please see the README
# for usage instructions.  Also see http://mini.net/tcl/fickle

set IO_ERROR 1
set SYNTAX_ERROR 2
set PARAM_ERROR 3
set LOGIC_ERROR -1

set BUFFER_SIZE 1024
set HEADERS 1
set p "yy"
set P "YY"

;# two types of start states allowed:
set INCLUSIVE 0
set EXCLUSIVE 1

;# copy everything between ^%\{$ to ^%\}$ to the destination file
proc handle_literal_block {} {
    global src dest line_count
    set end_defs 0
    while {$end_defs == 0} {
        if {[gets $src line] < 0} {
            incr line_count
            puts stderr "No terminator to verbatim section found."
            global SYNTAX_ERROR
            exit $SYNTAX_ERROR
        } elseif {[string equal [string trim $line] "%\}"]} {
            set end_defs 1
        } else {
            puts $dest $line
        }
        incr line_count
    }
}

;# processes the definitions section
proc handle_defs {line} {
    global line_count BUFFER_SIZE SYNTAX_ERROR PARAM_ERROR INCLUSIVE EXCLUSIVE
    set line [string trim $line]
    ;# ignore blank lines
    if {[string equal $line ""]} {
        continue
    } elseif {[string equal $line "%\{"]} {
        handle_literal_block
    } else {
        ;# two main types are allowed: %xxxxx rules, and textual
        ;# substitutions
        ;# for both of them, extract the keyword to the left of the first
        ;# space, and the arguments (if any) to the right
        if {[regexp -line {^(\S+)\s+(.*)} $line foo keyword args] == 0} {
            set keyword $line
            set args ""
        }
        global sub_table state_table
        switch -- $keyword {
            "%s" {
                foreach state_name [split $args] {
                    if {! [string equal $state_name ""]} {
                        set state_table($state_name) $INCLUSIVE
                    }
                }
            }
            "%x" {
                foreach state_name [split $args] {
                    if {! [string equal $state_name ""]} {
                        set state_table($state_name) $EXCLUSIVE
                    }
                }
            }
            "%BUFSIZE" {
                if {[string equal $args ""]} {
                    puts stderr "%BUFSIZE must have an integer parameter on line $line_count."
                    exit $PARAM_ERROR
                } elseif {[expr [string is digit $args] && {$args > 0}]} {
                    set BUFFER_SIZE $args
                } else {
                    puts stderr "%BUFSIZE parameter must be positive integer on line $line_count."
                    exit $PARAM_ERROR
                }
            }
            "%GLOBAL" {
                global global_args
                append global_args " $args"
            }
            "%NOCASE" {
                global scan_args
                append scan_args "-nocase"
            }
            "%NOHEADERS" {
                global HEADERS
                set HEADERS 0
            }
            "%PREFIX" {
                global p P
                if {[string equal $args ""]} {
                    puts stderr "%PREFIX must have a parameter on line $line_count."
                    exit $PARAM_ERROR
                } elseif {[llength $args] > 1} {
                    puts stderr "%PREFIX may have only one parameter on line $line_count."
                    exit $PARAM_ERROR
                } else {
                    set p [string tolower $args]
                    set P [string toupper $args]
                }
            }
            "%SUPPRESS" {
                global suppress
                set suppress 1
            }
            default {
                if {[string equal -len 1 $keyword "%"]} {
                    puts stderr "Unknown directive $keyword on line $line_count."
                    exit $SYNTAX_ERROR
                }
                set sub_table($keyword) $args
            }
        }
    }
}


;# process the rules line
proc handle_rule {line} {
    global rule_table src line_count

    ;# check for blank lines
    if {[string length [string trim $line]] == 0} {
        return
    }
    
    ;# first extract the regular expression part from the line
    set pattern ""
    set space_found 0
    set i 0
    set in_quotes 0
    set brace_count 0
    set bracket_count 0
    set paren_count 0
    while {$space_found == 0} {
        if {$i >= [string length $line]} {
            puts stderr "Improperly formed pattern: line $line_count."
            global SYNTAX_ERROR
            exit $SYNTAX_ERROR
        }
        set c [string index $line $i]
        ;# this ugly switch statement is to allow for whitespaces
        ;# within regexs; it also cleans up special characters by
        ;# adding backslashes where needed, and fixes how Tcl handles
        ;# quotation marks
        switch -regexp -- $c {
            {\\} {
                ;# skip over the next character as well
                append pattern [string range $line $i [expr $i + 1]]
                incr i 2
            }
            {\{} {
                if {[expr {$brace_count == 0} && {$bracket_count == 0} &&\
                          {$in_quotes == 0}]} {
                    incr brace_count
                } else {
                    append pattern "\\"
                }
                append pattern $c
                incr i
            }
            {\}} {
                if {[expr {$brace_count == 1} && {$bracket_count == 0} &&\
                          {$in_quotes == 0}]} {
                    incr brace_count -1
                } else {
                    append pattern "\\"
                }
                append pattern $c
                incr i
            }
            {\[} {
                if {[expr {$in_quotes == 0} && {$bracket_count == 0}]} {
                    incr bracket_count
                } else {
                    append pattern "\\"
                }
                append pattern $c
                incr i
            }
            {\]} {
                if {[expr {$in_quotes == 0} && {$bracket_count == 1}]} {
                    incr bracket_count -1
                } else {
                    append pattern "\\"
                }
                append pattern $c
                incr i
            }
            {\(} {
                if {[expr {$in_quotes == 0} && {$bracket_count == 0}]} {
                    incr paren_count
                } else {
                    append pattern "\\"
                }
                append pattern $c
                incr i
            }
            {\)} {
                if {[expr {$in_quotes == 0} && {$bracket_count == 0}]} {
                    incr paren_count -1
                } else {
                    append pattern "\\"
                }
                append pattern $c
                incr i
            }
            {\"}  {
                if {$bracket_count == 0} {
                    set in_quotes [expr ! $in_quotes]
                } else {
                    append pattern "\\$c"
                }
                incr i
            }
            {[ \t]} {
                if {[expr {$brace_count == 0} && {$bracket_count == 0} && \
                        {$paren_count == 0} && {$in_quotes == 0}]} {
                    set space_found 1
                } else {
                    append pattern $c
                    incr i
                }
            }
            default {
                if {$in_quotes == 0} {
                    append pattern $c
                } else {
                    if {[regexp {[.*\[\]^$\{\}+?|/\(\)]} $c foo] > 0} {
                        append pattern "\\"
                    }
                    append pattern $c
                }
                incr i
            }
        }
    }

    set orig_pattern [string range $line 0 $i]
    
    ;# check the pattern to see if it has a start state is indicated
    set state_name ""
    if {[regexp {^<([^>]+)>} $pattern foo state_name] > 0} {
        ;# a state was found; remove the tag from the pattern
        regsub {^<[^>]+>} $pattern "" pattern
    }

    ;# now that a pattern has been found, see if any textual
    ;# substitutions are needed
    global sub_table
    foreach sub_rule [array names sub_table] {
        ;# the quotes around the regexp below is necessary, to allow
        ;# for substitution of the sub_rule
        regsub -all "\{$sub_rule\}" $pattern \
                    "\($sub_table($sub_rule)\)" pattern
    }

    set line [string trimleft [string range $line $i end]]

    ;# now that a pattern has been found, determine the action
    ;# if the action does not start with a curly brace, then scan then
    ;# the rest of the line; otherwise, scan for the matching closing
    ;# brace, which may be several lines later
    set action ""
    if {[string equal -len 1 $line "\{"] == 0} {
        set action $line
    } else {
        set brace_count 1
        set i 1
        while {$brace_count > 0} {
            if {$i >= [string length $line]} {
                if {[gets $src line] < 0} {
                    incr line_count
                    puts stderr "Improperly formed action: line $line_count."
                    global SYNTAX_ERROR
                    exit $SYNTAX_ERROR
                }
                incr line_count
                append action "\n"
                set i 0
            }
            set c [string index $line $i]
            if {[string equal $c "\\"]} {
                append action [string range $line $i [expr $i + 1]]
                incr i 2
            } elseif {[string equal $c "\{"]} {
                incr brace_count
                append action "\{"
                incr i
            } elseif {[string equal $c "\}"]} {
                incr brace_count -1
                if {$brace_count > 0} {
                    append action "\}"
                }
                incr i
            } else {
                append action $c
                incr i
            }
        }
    }
    ;# special condition: if the action is merely a bar, then use the
    ;# next pattern's action
    if {[string equal [string trim $action] "|"]} {
        set action ""
    }
    lappend rule_table [list $orig_pattern $state_name $pattern $action]
;#    puts stdout "DEBUG  state: |$state_name|  pattern: |$pattern|  action: |$action|"
}


;# upon reaching the subroutines section, copy everything thereafter
;# to the destination file
proc handle_subroutines {} {
    global src dest line_count
    while {[gets $src line] >= 0} {
        incr line_count
        puts $dest $line
    }
}


;# writes the utility functions to the destination; the programmer may
;# call these from actions, and choose to override these as necessary
proc write_header {} {
    global dest BUFFER_SIZE p P HEADERS
    puts $dest "
;######
;# autogenerated utility functions used by fickle; override as needed
;######
"
    if {$HEADERS} {
        puts $dest ";# \[from the flex(1) man page\]:
;#
;# If ${p}wrap() returns false (zero), then it is assumed that the
;# function has gone ahead and set up ${p}in to point to another input
;# file, and scanning continues.  If it returns true (non-zero), then
;# the scanner terminates, returning 0 to its caller.  Note that in
;# either case, the start condition remains unchanged; it does not
;# revert to INITIAL."
    }
    puts $dest "proc ${p}wrap \{\} \{
    return 1
\}
"
    if {$HEADERS} {
        puts $dest ";# \[from the flex(1) man page\]:
;#
;# ECHO copies ${p}text to the scanner's output \[if no arguments are
;# given\]. . . .The scanner writes its ECHO output to the ${p}out global
;# (default, stdout), which may be redefined by the user simply by
;# assigning it to some other FILE pointer."
    }
    puts $dest "proc ECHO \{\{s \"\"\}\} \{
    if \{\[string equal \$s \"\"\]\} \{
        upvar ${p}text local_${p}text
        set s \$local_${p}text
    \}
    global ${p}out
    puts -nonewline \$${p}out \$s
\}
"
    if {$HEADERS} {
        puts $dest ";# \[from the flex(1) man page\]:
;#
;# ${P}_FLUSH_BUFFER flushes the scanner's internal buffer so that the
;# next time the scanner attempts to match a token, it will first
;# refill the buffer using ${P}_INPUT."
    }
    puts $dest "proc ${P}_FLUSH_BUFFER \{\} \{
    global ${p}_buffer ${p}_index ${p}_done
    set ${p}_buffer \"\"
    set ${p}_index 0
    set ${p}_done 0
\}
"
    if {$HEADERS} {
    puts $dest ";# \[from the flex(1) man page\]:
;#
;# The nature of how it gets its input can be controlled by defining
;# the ${P}_INPUT macro.  ${P}_INPUT's calling sequence is
;# \"${P}_INPUT(buf,result,max_size)\".  Its action is to place up to
;# max_size characters in the character array buf and return in the
;# integer variable result either the number of characters read or the
;# constant ${P}_NULL (0 on Unix systems) to indicate EOF.  The default
;# ${P}_INPUT reads from the global file-pointer \"${p}in\"."
    }
    puts $dest "set ${P}_NULL 0
proc ${P}_INPUT \{buf result max_size\} \{
    global ${p}in
    upvar \$result ret_val
    upvar \$buf new_data
    set new_data \[read \$${p}in \$max_size\]
    set ret_val \[string length \$new_data\]
\}
"
    if {$HEADERS} {
        puts $dest ";# \[from the flex(1) man page\]:
;#
;# unput(c) puts the character c back onto the input stream.  It will
;# be the next character scanned.  The following action will take the
;# current token and cause it to be rescanned enclosed in parentheses."
    }
    puts $dest "proc unput \{c\} \{
    global ${p}_buffer ${p}_index
    set ${p}_buffer \[string replace \$${p}_buffer \$${p}_index \$${p}_index \\
                                  \"\$c\[string index \$${p}_buffer \$${p}_index\]\"\]
\}
"
    if {$HEADERS} {
        puts $dest ";# \[from the flex(1) man page\]:
;#
;# input() reads the next character from the input stream.
;#
;# As with flex, please do NOT override this function."
    }
    puts $dest "proc input \{\} \{
    global ${p}_buffer ${p}_index ${P}_NULL ${p}_done
    if \{\[expr \[string length \$${p}_buffer\] - \$${p}_index\] < $BUFFER_SIZE\} \{
       set ${p}_buffer_size \$${P}_NULL
       if \{\$${p}_done == 0\} \{
           ${P}_INPUT new_buffer ${p}_buffer_size $BUFFER_SIZE
           append ${p}_buffer \$new_buffer
           if \{\$${p}_buffer_size == \$${P}_NULL\} \{
               set ${p}_done 1
           \}
       \}
       if \{\$${p}_done == 1\} \{
           if \{\[${p}wrap\] == 0\} \{
               return \[input\]
           \} elseif \{\[expr \[string length \$${p}_buffer\] - \$${p}_index\] == 0\} \{
               return \"\"
           \}
        \}
    \}
    set c \[string index \$${p}_buffer \$${p}_index\]
    incr ${p}_index
    return \$c
\}
"
    if {$HEADERS} {
        puts $dest ";# \[from the flex(1) man page\]:
;#
;# Pushes the current start condition onto the top of the start
;# condition stack and switches to new_state as though you had used
;# BEGIN new_state.
;#
;# Please do NOT override this function."
    }
    puts $dest "proc ${p}_push_state \{new_state\} \{
    global ${p}_state_stack
    lappend ${p}_state_stack \$new_state
\}
"
    if {$HEADERS} {
        puts $dest ";# \[from the flex(1) man page\]:
;#
;# Pops off the top of the state stack; if the stack is now empty, then
;# pushes the state \"INITIAL\".
;#
;# Please do NOT override this function."
    }
    puts $dest "proc ${p}_pop_state \{\} \{
    global ${p}_state_stack
    set ${p}_state_stack \[lrange \$${p}_state_stack 0 end-1\]
    if \{\[string equal \$${p}_state_stack \"\"\]\} \{
        ${p}_push_state INITIAL
    \}
\}
"
    if {$HEADERS} {
        puts $dest ";# \[from the flex(1) man page\]:
;#
;# Returns the top of the stack without altering the stack's contents.
;#
;# Please do NOT override this function."
    }
    puts $dest "proc ${p}_top_state \{\} \{
    global ${p}_state_stack
    return \[lindex \$${p}_state_stack end\]
\}
"
    if {$HEADERS} {
        puts $dest ";# \[from the flex(1) man page\]:
;#
;# BEGIN followed by the name of a start condition places the scanner
;# in the corresponding start condition. . . .Until the next BEGIN
;# action is executed, rules with the given start condition will be
;# active and rules with other start conditions will be inactive.  If
;# the start condition is inclusive, then rules with no start
;# conditions at all will also be active.  If it is exclusive, then
;# only rules qualified with the start condi tion will be active.
;# 
;# Please do NOT override this function."
    }
    puts $dest "proc BEGIN \{new_state\ \{prefix \"yy\"\}\} \{
    eval global \${prefix}_state_stack
    eval set \${prefix}_state_stack \[lrange \$\${prefix}_state_stack 0 end-1\]
    eval lappend \${prefix}_state_stack \$new_state
\}

;######
;# end autogenerated utility functions
;######
"
}

;# starts building the yy_lex() function; called when the rules section
;# is about to begin
proc start_rules {} {
    global dest global_args state_table BUFFER_SIZE p P

    puts $dest "
;######
;# autogenerated ${p}lex function by fickle -- modify at your own peril
;######
proc ${p}lex \{\} \{
    global ${p}_first_time ${p}_buffer ${p}_index ${p}_state_stack
    global ${p}_state_table ${p}in ${p}out ${P}_NULL ${p}_done"

    if {! [string equal $global_args ""]} {
        puts $dest "    global $global_args"
    }
    
    puts -nonewline $dest "    if \{\[info exists ${p}_first_time\] == 0\} \{
        set ${p}_first_time \"\"
        set ${p}_buffer \"\"
        set ${p}_buffer_size \$${P}_NULL
        set ${p}_index 0
        set ${p}_state_stack \"\"
        set ${p}_done 0
        BEGIN INITIAL ${p}
        array set ${p}_state_table \[list "

    ;# write the state table to the file
    puts -nonewline $dest [array get state_table]

    puts $dest "\]
        if \{\[info exists ${p}in\] == 0\} \{
            set ${p}in \"stdin\"
        \}
        if \{\[info exists ${p}out\] == 0\} \{
            set ${p}out \"stdout\"
        \}
    \}
    while \{1\} \{
        set ${p}_current_state \[${p}_top_state\]
        if \{\[expr \[string length \$${p}_buffer\] - \$${p}_index\] < $BUFFER_SIZE\} \{
            if \{\$${p}_done == 0\} \{
                set new_buffer \"\"
                ${P}_INPUT new_buffer ${p}_buffer_size $BUFFER_SIZE
                append ${p}_buffer \$new_buffer
                if \{\$${p}_buffer_size == \$${P}_NULL\ && \\
                        \[expr \[string length \$${p}_buffer\] - \$${p}_index\] == 0\} \{
                    set ${p}_done 1
                \}
            \}
            if \{\$${p}_done == 1\} \{
                if \{\[${p}wrap\] == 0\} \{
                    set ${p}_done 0
                    continue
                \} elseif \{\[expr \[string length \$${p}_buffer\] - \$${p}_index\] == 0\} \{
                    break
                \}
            \}
            
        \}
        set ${p}text \"\"
        set ${p}_matched_rule -1"
}

;# stop yy_lex() function; called when exiting the rules section
proc end_rules {} {
    global dest rule_table scan_args suppress EXCLUSIVE p P

    ;# build up the if statements to determine which rule to execute;
    ;# lex is greedy, and will use the rule that matches the most
    ;# strings
    if {[llength $rule_table] > 0} {
        set rule_num 0
        foreach rule $rule_table {
            set orig_pattern [lindex $rule 0]
            set state_name [lindex $rule 1]
            set pattern [lindex $rule 2]
            ;# puts stdout "DEBUG:  ($rule_num). $rule"
            ;# if the state is "*", then this state will *always* match
            puts $dest "        ;# rule $rule_num: $orig_pattern"
            if {[string equal $state_name "*"]} {
                puts $dest "        if \{\[expr \\"
            } elseif {[string equal $state_name ""]} {
                puts $dest "        if \{\[expr \\
                \{\$${p}_state_table(\$${p}_current_state) != $EXCLUSIVE\} && \\"
            } else {
                ;# do logic to figure out states
                puts $dest "        if \{\[expr \\
                \{\[string equal \$${p}_current_state $state_name\]\} && \\"
            }
            puts $dest "                \{\[regexp -start \$${p}_index -indices -line $scan_args -- \{$pattern\} \$${p}_buffer ${p}_match\] > 0\} && \\
                \{\[lindex \$${p}_match 0\] == \$${p}_index\}\]\} \{
            if \{\[expr \[lindex \$${p}_match 1\] - \$${p}_index + 1\] > \[string length \$${p}text\]\} \{
                set ${p}text \[string range \$${p}_buffer \$${p}_index \[lindex \$${p}_match 1\]\]
                set ${p}_matched_rule $rule_num
            \}
        \}"
            incr rule_num
        }
        ;# now add the default case
        puts $dest "        if \{\$${p}_matched_rule == -1\} \{
            set ${p}text \[string index \$${p}_buffer \$${p}_index\]
        \}"
    } else {
        ;# no rules were defined at all, so need to slightly adjust output
        puts $dest "    set ${p}text \[string index \$${p}_buffer \$${p}_index\]"
    }
    puts $dest "        set ${p}leng \[string length \$${p}text\]
        incr ${p}_index \$${p}leng
        ;# workaround for stupid circumflex behavior
        if \{\[string equal \[string index \$${p}text end\] \"\\n\"\]\} \{
            set ${p}_buffer \[string range \$${p}_buffer \$${p}_index end\]
            set ${p}_index 0
        \}
        switch -- \$${p}_matched_rule \{"
    set rule_num 0
    foreach rule $rule_table {
        puts $dest "            $rule_num"
        if {[string length [lindex $rule 3]] == 0} {
            ;# action is empty, so use next pattern's action
            puts $dest "                -"
        } else {
            puts $dest "                \{"
            ;# output the action associated with the rule
            foreach action_line [split [lindex $rule 3] "\n"] {
                puts $dest "                $action_line"
            }
            puts $dest "            \}"
        }
        incr rule_num
    }
    puts $dest "            default"
    if {$suppress == 0} {
        puts $dest "                \{ ECHO \}"
    } else {
        puts $dest "                \{ puts stderr \"unmatched token: \$${p}text in state \$${p}_current_state\" ; exit -1 \}"
    }
    puts $dest "        \}
    \}
    return 0
\}
;######
;# end autogenerated data
;######
"
}


;#############################################################
;# start of actual script

if {[llength $argv] >= 1} {
    set force_overwrite 0
    if {[string equal [lindex $argv 0] "--force"]} {
        set force_overwrite 1
    }
    set in_filename [lindex $argv end]
    set out_filename [file rootname $in_filename]
    append out_filename ".tcl"

    if {[expr [file exists $out_filename] && {$force_overwrite != 1}]} {
        puts stderr "Destination already exists.  (Override with --force.)"
        exit $IO_ERROR
    }
    if {[catch {open $in_filename r} src]} {
        puts stderr "Could not open source file."
        exit $IO_ERROR
    }
    
    if {[catch {open $out_filename w} dest]} {
        puts stderr "Could not open destination file."
        exit $IO_ERROR
    }
} else {
    set src "stdin"
    set dest "stdout"
}

;# indicates the state of the file being scanned
;# valid states are 'defs', 'rules', and 'sub'[routines]
set file_state "defs"
set rule_table ""
set line_count 0
set scan_args ""
set suppress 0
set global_args ""

;# set up the INITIAL start state to be a normal inclusionary state
set state_table(INITIAL) $INCLUSIVE

while {[gets $src line] >= 0} {
    global line_count
    incr line_count
    
    set line [string trim $line]
    ;# ignore blank lines
    if {[string equal $line ""]} {
        continue
    } elseif {[string equal $line "%%"]} {
        ;# figure out what state to switch to
        if {[string equal $file_state "rules"]} {
            end_rules
            set file_state "sub"
            handle_subroutines
        } elseif {[string equal $file_state "defs"]} {
            write_header
            ;# puts stdout "DEBUG:"
            ;# foreach {state val} [array get state_table] {
            ;#     puts stdout "  $state: $val"
            ;# }
            start_rules
            set file_state "rules"
        }
    } else {
        ;# no special symbol found, so handle the line
        if {[string equal $file_state "defs"]} {
            handle_defs $line
        } elseif {[string equal $file_state "rules"]} {
            handle_rule $line
        } else {
            puts stderr "logic error:  in state $file_state on line $line"
            exit $LOGIC_ERROR
        }
    }
}

;# EOF reached -- make sure to close off the yy_lex() function, in case
;# there was no explicit subroutines section
if {[string equal $file_state "rules"]} {
    end_rules
}


close $src
close $dest
