(* depends on Py_lex.mli *)
open Py_parse

let string_of_token (tok:Py_parse.token):string =
  match tok with
  | NAME s -> s
  | NONE -> "None"
  | INTEGER i -> string_of_int i
  | LONG i -> ((Big_int.string_of_big_int i) ^ "L")
  | FLOAT f -> (string_of_float f)
  | COMPLEX (r,i)  -> "(" ^ (string_of_float r) ^ ", " ^ (string_of_float i) ^ ")"
  | STRING s -> Py_string.quote_of_string s 

  (* one character tokens *)
  | LPAR -> "(" 
  | RPAR -> ")" 
  | LSQB -> "["
  | RSQB -> "]"
  | COLON -> ":"
  | COMMA -> ","
  | SEMI -> ";"
  | PLUS -> "+"
  | MINUS -> "-"
  | STAR -> "*"
  | SLASH -> "/"
  | VBAR -> "|"
  | AMPER -> "&"
  | LESS -> "<"
  | GREATER -> ">"
  | EQUAL -> "="
  | DOT -> "."
  | PERCENT -> "%"
  | BACKQUOTE -> "`"
  | LBRACE -> "{"
  | RBRACE -> "}"
  | TILDE -> "~"
  | CIRCUMFLEX -> "^"

  (* two character tokens *)
  | EQEQUAL -> "=="
  | NOTEQUAL -> "!="
  | LESSEQUAL -> "<="
  | GREATEREQUAL -> ">="
  | LEFTSHIFT -> "<<"
  | RIGHTSHIFT -> ">>"
  | DOUBLESTAR -> "**"
  | POWER -> "<POW>**"

  (* Viper extensions *)
  | PLUSPLUS -> "++"
  | MINUSMINUS -> "--"
  | PLUSEQUAL -> "+="
  | MINUSEQUAL -> "-="
  | STAREQUAL -> "*="
  | SLASHEQUAL -> "/="
  | PERCENTEQUAL -> "%="
  | CARETEQUAL -> "^="
  | VBAREQUAL -> "|="
  | AMPEREQUAL -> "&="
  | TILDEEQUAL -> "~="
  | COLONEQUAL -> ":="
  | RIGHTARROW -> "->"
  | LEFTARROW -> "<-"

  (* keywords *)
  | AND  ->  "and"
  | ASSERT   -> "assert"
  | BREAK   -> "break"
  | CLASS   -> "class"
  | CONTINUE   -> "continue"
  | DEF   -> "def"
  | DEL   -> "del"
  | ELIF   -> "elif"
  | ELSE  -> "else"
  | EXCEPT   -> "except"
  | EXEC   -> "exec"
  | FINALLY   -> "finally"
  | FOR   -> "for" 
  | FROM   -> "from"
  | GLOBAL   -> "global"
  | IF   -> "if" 
  | IMPORT   -> "import"
  | IN   -> "in"
  | IS   -> "is"
  | LAMBDA   -> "<lambda>"
  | NOT   -> "not"
  | OR   -> "or"
  | PASS  -> "pass"
  | PRINT   -> "print"
  | RAISE   -> "raise"
  | RETURN   -> "return"
  | TRY   -> "try"
  | WHILE  -> "while"

  (* special things *)
  | TRAILING_COMMA -> "<TRAILING>,"
  | SLICESEP -> "<SLICESEP>:"
  | KEYVALSEP -> "<KEYVALSEP>:"
  | SLOSH -> "\\" (* "\"" *) 
  | CTRL -> "<CTRL>:"

  | LOC_IF (count, file) -> "<@" ^ (string_of_int count) ^ "> if"
  | LOC_ELIF (count, file) -> "<@" ^ (string_of_int count) ^ "> elif"
  | LOC_ELSE (count, file) -> "<@" ^ (string_of_int count) ^ "> else"
  | LOC_EXCEPT (count, file) -> "<@" ^ (string_of_int count) ^ "> except"
  | LOC_FINALLY (count, file) -> "<@" ^ (string_of_int count) ^ "> finally"

  | COMMENT s -> "#" ^ s 
  | COMMENT_NEWLINE ((count,f),s) -> "#" ^ s ^ "<NEWLINE "^(string_of_int count)^">"
  | WHITE i -> String.make i ' '
  | NEWLINE (count,f) -> "<NEWLINE "^(string_of_int count)^">"
  | INDENT -> "<INDENT>"
  | DEDENT -> "<DEDENT>"
  | ENDMARKER -> "<<EOF>>"
  | ERRORTOKEN s -> "<<ERROR '"^ s ^"'>>"
  | LOC (count, file) -> "<@" ^ (string_of_int count) ^ ">"
  | _ -> "<<??>>"
;;


let pre_tokens_of_lexbuf buf =
  let lex_it() = Py_lex.pre_pylex buf in
  let run = ref true in
  let rec get () = 
    if !run 
    then let t = lex_it () in
      match t with
      | Py_parse.ENDMARKER -> 
        run := false;
        [Py_parse.ENDMARKER]
      | _ -> t :: get()
    else [Py_parse.ENDMARKER]
  in get ()
;;       

let pre_tokens_of_filename filename =
  Py_lex.reset filename;
  let infile = open_in filename in
  let src = Lexing.from_channel infile in
  let toks = pre_tokens_of_lexbuf src in
    close_in infile; 
    toks
;;       

let pre_tokens_of_string s filename =
  Py_lex.reset filename;
  pre_tokens_of_lexbuf (Lexing.from_string s)
;;       


let print_pre_token t = 
  let emit t = print_string (string_of_token t) in
    begin match t with
    | Py_parse.COMMENT_NEWLINE ((count,f),s) -> 
      print_endline ("#" ^ s); 
      let s' = "    " ^ (string_of_int (count+1)) in
      let n = String.length s' in
      print_string ((String.sub s' (n-4) 4) ^ ": ");

    | Py_parse.NEWLINE (count,f) -> 
      print_endline "";
      let s' = "    " ^ (string_of_int (count+1)) in
      let n = String.length s' in
      print_string ((String.sub s' (n-4) 4) ^ ": ");

    | Py_parse.ENDMARKER -> print_endline "<<EOF>>" 
    | _ -> emit t
    end;
    flush stdout
;;

let print_pre_tokens ts = 
  if (List.length ts) = 0
  then print_string "<Empty pretoken list>";
  print_string "   1: ";
  List.iter print_pre_token ts
;;

