open Py_parse
open Py_exceptions

(* 1: remove comments *)
let filter_comments x =
  let rec filter x' =
    match x' with 
    | COMMENT_NEWLINE (count,_) :: t-> NEWLINE count :: filter t
    | COMMENT _ :: t -> filter t
    | h :: t -> h :: filter t
    | [] -> []
  in filter x

(* 1a: adjust newlines to mark line _beginnings_ not ends *)
exception Breaker
let adjust_newlines x =
  let filename = ref "Unknown-filename" in
  begin try 
    List.iter 
    begin fun t  -> match t with 
    | NEWLINE (c,f) -> filename := f; raise Breaker
    | _ -> ()
    end
    x
  with Breaker ->()
  end;
  
  let rec filter x' =
    match x' with 
    | NEWLINE (count,f) :: t-> NEWLINE (count+1,f) :: filter t
    | h :: t -> h :: filter t
    | [] -> []
  in 
    NEWLINE (1, !filename) :: filter x


(* 2: remove slosh/newline *)
let filter_sloshes x =
  let last_newline = ref (0,"") in
  let rec filter x' =
    match x' with 
    | SLOSH :: t' ->
      begin match t' with 
      | NEWLINE c :: t -> last_newline := c; filter t
      | WHITE _ :: NEWLINE c :: t-> last_newline := c; filter t
      | _ -> raise (TokenError "Illegal \\ character")
      end
    | NEWLINE c :: t -> last_newline := c; NEWLINE c:: filter t
    | COLON :: t -> CTRL :: filter t
    | h :: t -> h ::  filter t
    | [] -> []
  in filter x

(* 3: remove bracketed newlines *)
let filter_newlines x =
  let level = ref 0 in
  let rec filter x' =
    match x' with 
    | LPAR :: t  -> incr level; LPAR :: filter t 
    | LSQB :: t  -> incr level; LSQB :: filter t 
    | LBRACE :: t  -> incr level; LBRACE :: filter t 
    | RPAR :: t  -> decr level; RPAR :: filter t 
    | RSQB :: t  -> decr level; RSQB :: filter t 
    | RBRACE :: t  -> decr level; RBRACE :: filter t 
    | NEWLINE count :: t  -> 
      if !level = 0 then NEWLINE count :: filter t 
      else filter t 
    | h :: t -> h :: filter t
    | [] -> []
  in filter x

(* 4: remove embedded whitespace *)
let filter_whitespace x =
  let rec filter x' =
    match x' with 
    | WHITE _  :: t -> filter t 
    | NEWLINE count :: t' ->
      begin match t' with 
      | WHITE n :: t  -> NEWLINE count :: WHITE n :: filter t 
      | _ -> NEWLINE count :: filter t'
      end
    | h :: t -> h :: filter t
    | [] -> []
  in filter x

(* 5: remove extraneous blank lines *)
let filter_blanklines x =
  let rec filter x' =
    match x' with 
    | NEWLINE _ :: NEWLINE count :: t  -> filter (NEWLINE count ::t)
    | NEWLINE _ :: WHITE _ :: NEWLINE count :: t -> filter (NEWLINE count ::t)
    | h :: t -> h :: filter t
    | [] -> []
  in filter x

(* 5a track line numbers, brackets, and replace colons with
   CTRL for control indicators or KEYVALSEP for key/value separators,
   or SLICESEP for slice separators

  WARNING: lambdas are not handled correctly!
*)

let filter_colons x =
  let stack = ref [] 
  and lineno = ref (1,"")
  in
  let rec outer x' =
    match x' with 
    | NEWLINE count :: t -> 
      lineno := count; 
      NEWLINE count :: LOC count :: outer t

    | SEMI :: NEWLINE count :: t ->  (* strip trailing semi colon *)
      outer (NEWLINE count :: t)

    | SEMI :: t ->
      let loc = LOC !lineno in SEMI :: loc :: outer t

    | CTRL :: NEWLINE count :: t -> (* let the NEWLINE insert the loc *)
      CTRL :: outer (NEWLINE count :: t)

    | CTRL :: t -> (* insert a loc *)
      let loc = LOC !lineno in CTRL :: loc :: outer t

    | LPAR :: _ 
    | LBRACE :: _
    | LSQB :: _ -> inner x'
    | h :: t -> h :: outer t
    | [] -> []
  and inner x' =
    match x' with
    | LPAR :: t -> stack:= LPAR :: !stack; LPAR :: inner t
    | LBRACE :: t -> stack:= LBRACE :: !stack; LBRACE :: inner t
    | LSQB :: t -> stack:= LSQB :: !stack; LSQB :: inner t
    | RPAR :: t -> 
      stack:= List.tl !stack; 
      RPAR :: (if (List.length !stack) > 0 then inner else outer) t
    | RSQB :: t -> 
      stack:= List.tl !stack; 
      RSQB :: (if (List.length !stack) > 0 then inner else outer) t
    | RBRACE :: t -> 
      stack:= List.tl !stack; 
      RBRACE :: (if (List.length !stack) > 0 then inner else outer) t
    | CTRL :: t when (List.hd !stack) = LBRACE -> KEYVALSEP :: inner t
    | CTRL :: t when (List.hd !stack) = LSQB -> SLICESEP :: inner t
    | h :: t -> h :: inner t
    | [] -> raise (TokenError "Unmatched bracket at end of file")
  in outer x

(* 6: make comma/}]) into a single token, 
   replace POWER with DOUBLESTAR if preceded by a ( or ,
*)
(* WARNING: THIS ROUTINE IS NOT COMPLETE!!
   This is a mess.
*)

let filter_commas x =
  let rec filter x' =
    match x' with 
    | COMMA :: t'  ->
      begin match t' with 
      | RPAR :: t  -> TRAILING_COMMA :: RPAR :: filter t 
      | RSQB :: t  -> TRAILING_COMMA :: RSQB :: filter t 
      | RBRACE :: t  -> TRAILING_COMMA :: RBRACE :: filter t
      | EQUAL :: t  -> TRAILING_COMMA :: EQUAL :: filter t
      | SEMI :: t  -> TRAILING_COMMA :: SEMI :: filter t
      | NEWLINE count :: t  -> TRAILING_COMMA :: NEWLINE count :: filter t
      | IN :: t  -> TRAILING_COMMA :: IN :: filter t
      | SLICESEP :: t  -> TRAILING_COMMA :: SLICESEP :: filter t
      | KEYVALSEP :: t  -> TRAILING_COMMA :: KEYVALSEP :: filter t
      | CTRL :: t  -> TRAILING_COMMA :: CTRL :: filter t
      | POWER :: t -> COMMA :: DOUBLESTAR :: filter t
      |  t  -> COMMA :: filter t
      end
    | LPAR :: POWER :: t -> LPAR :: DOUBLESTAR :: filter t
    | h :: t -> h :: filter t
    | [] -> []
  in filter x

(* 7: strip trailing semicolons and
  insert empty whitespace after newlines if necessary,
  make sure the LOC is bang up against the statement [after whitespace]
*)
let filter_tabulation x =
  let rec filter x' =
    match x' with 
    | NEWLINE count ::  LOC count' :: t'  ->
      begin match t' with 
      | WHITE n :: t -> NEWLINE count :: WHITE n :: LOC count' :: filter t 
      | t  -> NEWLINE count :: WHITE 0 :: LOC count' :: filter t 
      end
    | h :: t -> h :: filter t
    | [] -> []
  in filter x

(* 8:  process leading white spaces now*)
let filter_indents x =
  let rec filter x' indents =
    match x' with
    | WHITE n :: t  -> 
      begin match indents with 
      | ih :: it -> 
        if n > ih then INDENT :: filter t (n :: indents) 
        else if n < ih then DEDENT :: filter x' it 
        else filter t indents 
      | [] -> raise (Failure "Program error in py_lex1.ml, phase 8a")
      end
    | h :: t -> h :: filter t indents 
    | [] -> 
      begin match indents with
      | [0] -> []
      | ih :: it -> DEDENT :: filter [] it
      | [] -> raise (Failure "Program error in py_lex1.ml, phase 8b")
      end
  in filter x [0]

let repair_suffices x = 
  let rec filter x = 
    match x with
    | LOC c :: IF :: t -> LOC_IF c :: filter t
    | LOC c :: ELSE :: t -> LOC_ELSE c :: filter t
    | LOC c :: ELIF :: t -> LOC_ELIF c :: filter t
    | LOC c :: EXCEPT :: t -> LOC_EXCEPT c :: filter t
    | LOC c :: FINALLY :: t -> LOC_FINALLY c :: filter t
    | LOC _ :: ENDMARKER :: t -> [ENDMARKER]
    | h :: t -> h :: filter t
    | [] -> []
  in filter x

let repair_for_expr x =
  let rec filter x = 
  match x with 
  | LOC _ :: t -> filter t
  | NEWLINE _ :: t -> filter t
  | h :: t -> h :: filter t
  | [] -> []
  in filter x
;;

let translate ts = 
  let filters = [
    (* 1 *) filter_comments; 
    (* 1a *) adjust_newlines; 
    (* 2 *) filter_sloshes; 
    (* 3 *) filter_newlines;
    (* 4 *) filter_whitespace;
    (* 5 *) filter_blanklines;
    (* 5a *) filter_colons;
    (* 6 *) filter_commas;
    (* 7 *) filter_tabulation; 
    (* 8 *) filter_indents;
    (* 9 *) repair_suffices 
    ] 
  and reverse_apply dat fn = fn dat 
  in List.fold_left reverse_apply ts filters
;;

(* this routine is used for preparing a token list for
parsing with 'eval_input', which doesn't accept any LOC tokens
*)

let translate_for_expr ts =
    let filters = [
    (* 1 *) filter_comments; 
    (* 1a *) adjust_newlines; 
    (* 2 *) filter_sloshes; 
    (* 3 *) filter_newlines;
    (* 4 *) filter_whitespace;
    (* 5 *) filter_blanklines;
    (* 5a *) filter_colons;
    (* 6 *) filter_commas;
    (* 7 *) filter_tabulation; 
    (* 8 *) filter_indents;
    (* 9 *) repair_suffices;
    (* 10 *) repair_for_expr
    ] 
  and reverse_apply dat fn = fn dat 
  in List.fold_left reverse_apply ts filters
;;

