(***********************************************************************)
(*                                                                     *)
(*                           CIME Caml                                 *)
(*                                                                     *)
(*            Pierre Weis, projet Cristal, INRIA Rocquencourt          *)
(*                                                                     *)
(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
(*  en Automatique.  Distributed only by permission.                   *)
(*                                                                     *)
(***********************************************************************)

(* $Id: substitute.ml,v 1.23 2012-03-25 00:54:04 weis Exp $ *)

(** {3 Variable substitution in buffers} *)

type ident = string;;
type value = string;;

(** Variable substitution in strings:
   Substitute [$ident] (or [$(ident)]) in a string;
   the identifiers are substituted according to an environment
   that maps variables to strings. *)

let closing = function
  | '(' -> ')'
  | '[' -> ']'
  | '{' -> '}'
  | '<' -> '>'
  | _ -> failwith "opening";;

(** Opening and closing: open and close characters, typically ( and )
   [k] balance of opening and closing chars
   [s] the string in which we are searching
   [p] the index where we start the search. *)
let advance_to_closing opening closing k s start =
 let rec advance k i lim =
   if i >= lim then raise Not_found else
   if s.[i] = opening then advance (k + 1) (i + 1) lim else
   if s.[i] = closing then
    if k = 0 then i else advance (k - 1) (i + 1) lim
   else advance k (i + 1) lim in
 advance k start (String.length s)
;;

let is_ident_start_char = function
  | '_'
  | 'a' .. 'z' | 'A' .. 'Z' |
    ''|''|''|''|''|''|''|''|''|''|''|''|''|''|
    ''|''|''|''|''|''|''|''|''|''|''|''|''|'' -> true
  | _ -> false
;;

let advance_to_non_alpha s start =
 let rec advance i lim =
  if i >= lim then lim else
  match s.[i] with
  | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' |
    ''|''|''|''|''|''|''|''|''|''|''|''|''|''|
    ''|''|''|''|''|''|''|''|''|''|''|''|''|'' ->
    advance (i + 1) lim
  | _ -> i in
 advance start (String.length s)
;;

(** We are just after a dollar marker (e.g. a ['$'] character), presumably at
  the beginning of an ident in [s], starting at [start] *)
let find_ident s start lim =
 if start >= lim then raise Not_found else
 match s.[start] with
 (** Parenthesized ident ? *)
 | '(' | '[' | '{' | '<' as c ->
   let new_start = start + 1 in
   let stop = advance_to_closing c (closing c) 0 s new_start in
   String.sub s new_start (stop - start - 1), stop + 1
 (** Regular ident *)
 | c when is_ident_start_char c ->
   let stop = advance_to_non_alpha s (start + 1) in
   String.sub s start (stop - start), stop
 | _ -> raise Not_found
;;

(** Find the value associated to a variable
   in the user's (shell) environment. *)
let get_var ident =
  try Sys.getenv ident with
  | Not_found ->
      failwith
        (Printf.sprintf "htmlc: no system environment variable ``%s''" ident)
;;

let is_quoted_string s =
  let l = String.length s in
  l > 1 && s.[0] = '\'' && s.[l - 1] = '\''
;;

let is_forced_string s =
  let l = String.length s in
  l > 1 && s.[0] = '`' && s.[l - 1] = '`'
;;

let is_system_ident s i next_i =
  (** There is a '<' '>' delimited ident in the string [s] between index [i]
  and index [next_i] excluded. *)
  let start_char = s.[i] and stop_char = s.[next_i - 1] in
  start_char = '<' && stop_char = '>'
;;

(** Add a string [s] to a buffer, while substituting bound variables in [s].

   - Variables of the form [$ident] (or [$(ident)]) are replaced, according to
   the function [f_subst].

   - Variables of the form [$<ident>] are replaced by their value in the
   user's system environment.

   - Character '\\' is treated specially:

     - n successive '\\' characters are replaced by n - 1 '\\' characters,

     - if "\\" is followed by "#" or "$" then it is omitted (hence, "\\#" and
       "\\$" are respectively rewritten to "#" and "$";
       replacing "\\$" by "$" allows to delay the evaluation of expressions
       when needed; conversely, replacing "\\#" by "#" allows to delay the
       evaluation of SSI when needed,

     - if "\\" is followed by any other character then it is left as is. *)

let rec substitute_buffer f_subst ob s =
  let lim = String.length s in
  (*prerr_endline (Printf.sprintf "Substituting %S" s);*)
  let dollar_marker = Htmlc_options.get_marker_char () in
  let rec subst previous i =
    if i < lim then
      let current = s.[i] in
      match previous, current with
      | '\\', '#' ->
        go_on current i
      | '\\', current when current = dollar_marker ->
        go_on current i
      | '\\', '\\' ->
        Buffer.add_char ob current;
        go_on_back_slash (i + 1)
      | '\\', current ->
        go_on_previous previous current i
      | _, current when current = dollar_marker ->
        begin
          try
            let ident, next_i = find_ident s (i + 1) lim in
            begin
              if is_system_ident s (i + 1) next_i then
                substitute_buffer f_subst ob (get_var ident) else
              let v =
                try f_subst ident with
                | Not_found ->
                  failwith
                    (Printf.sprintf "htmlc: unbound variable ``%s''" ident) in
              if is_quoted_string v
              then Buffer.add_string ob v
              else substitute_buffer f_subst ob v
            end;
            subst ' ' next_i
          with
          | Not_found -> go_on current i
        end
      | _, '\\' ->
        subst current (i + 1)
      | _, current ->
        go_on current i
    else
    (** We catch here the case where one lookahead character (['\\'])
       was the last one of the input.
       We must do that only if the input was not empty in the first place
       (otherwise there is no lookahead characters at all). *)
    if lim > 0 then
    match s.[i - 1] with
    | '\\' as previous -> Buffer.add_char ob previous
    | _ -> ()

  and go_on current i =
    Buffer.add_char ob current;
    subst current (i + 1)

  and go_on_back_slash i =
    (** If end of string is reached do nothing, otherwise add all the ['\\']
       characters following the one that trigers this call. *)
    if i < lim then begin
      match s.[i] with
      | '\\' as current ->
        Buffer.add_char ob current;
        go_on_back_slash (i + 1)
      | _ -> subst '\\' i
      end

  and go_on_previous previous current i =
    Buffer.add_char ob previous;
    Buffer.add_char ob current;
    subst current (i + 1) in

  subst ' ' 0
;;

let add_string = substitute_buffer;;
let add_string_substitute t = substitute_buffer (Hashtbl.find t);;
