(***********************************************************************)
(*                                                                     *)
(*                           Objective Caml                            *)
(*                                                                     *)
(*            Pierre Weis, projet Cristal, INRIA Rocquencourt          *)
(*                                                                     *)
(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
(*  Automatique.  Distributed only by permission.                      *)
(*                                                                     *)
(***********************************************************************)

(* $Id: lib_date.ml,v 1.18 2012-03-25 00:57:01 weis Exp $ *)

(** {3 [Lib_date]: A library to deal with dates in various languages} *)

(** {6 Numberings for dates} *)

type year = int
and month = int
;;
(** The year and month numberings.
    Years are numbered with their rank in the Gregorian calendar.
    Months are numbered from [0] for January to [11] for December.
*)

type month_day = int
and week_day = int
;;
(** The month day and week day numberings.
    Month days are numbered from [1] for the first day in the month to [28],
    [29], [30], or [31] for the last day in the month.
    Week days are numbered from [0] for Sunday to [6] for Saturday.
*)

type hour = int
and minute = int
and second = int
;;
(** The hour, minute and second numberings.
    Hours are numbered from [0] to [23].
    Minutes are numbered from [0] to [59].
    Seconds are numbered from [0] to [60];
    [60] is the number of the exceptional leap second,
    cf. http://en.wikipedia.org/wiki/Leap_second.
*)

(** {6 String representation for dates} *)

type year_string = string
and month_string = string
;;
(** The year represented as a string and a month with its name according to
  the language. *)

type month_day_string = string
and week_day_string = string
;;
(** The month day represented as a string and the week day with its name
  according to the language. *)

open Printf;;
open Lang;;

let month_names lang =
  match lang with
  | Fr -> [| "janvier"; "fvrier"; "mars"; "avril"; "mai"; "juin"; "juillet";
             "aot"; "septembre"; "octobre"; "novembre"; "dcembre"; |]
  | Uk -> [| "January"; "February"; "March"; "April"; "May"; "June"; "July";
             "August"; "September"; "October"; "November"; "December"; |]
  | Es -> [| "enero"; "febrero"; "marzo"; "abril"; "mayo"; "junio"; "julio";
             "agosto"; "septiembre"; "octubre"; "noviembre";
             "diciembre"; |]
  | De -> [| "Januar"; "Februar"; "Mrz"; "April"; "Mai"; "Juni";
             "Juli"; "August"; "September"; "Oktober"; "November";
             "Dezember"; |];
  | It -> invalid_arg "unknown language"
;;

let recognizer_months lang =
  match lang with
  | Fr ->
    [| "Jan\\($\\|v$\\|vier$\\)";
       "F[e||e']v\\($\\|rier$\\)";
       "Mar\\($\\|s$\\)";
       "Avr\\($\\|il$\\)";
       "Mai";
       "Jui\\($\\|n\\)";
       "Juil\\($\\|let$\\)";
       "Ao[u||u^]\\($\\|t$\\)";
       "Sep\\($\\|t$\\|tembre$\\)";
       "Oct\\($\\|obre$\\)";
       "Nov\\($\\|embre$\\)";
       "D[e||e']c\\($\\|embre$\\)"; |]
  | Uk ->
    [| "Jan\\($\\|uary$\\)";
       "Feb\\($\\|ruary$\\)";
       "Mar\\($\\|ch$\\)";
       "Apr\\($\\|il$\\)";
       "May";
       "Jun\\($\\|e$\\)";
       "Jul\\($\\|y$\\)";
       "Aug\\($\\|ust$\\)";
       "Sep\\($\\|tember$\\)";
       "Oct\\($\\|ober$\\)";
       "Nov\\($\\|ember$\\)";
       "Dec\\($\\|ember$\\)"; |]
  | Es ->
    [| "Ene\\($\\|ro$\\)";
       "Feb\\($\\|rero$\\)";
       "Mar\\($\\|zo$\\)";
       "Abr\\($\\|il$\\)";
       "May\\($\\|o$\\)";
       "Jun\\($\\|io$\\)";
       "Jul\\($\\|io$\\)";
       "Ago\\($\\|sto$\\)";
       "Sep\\($\\|tiembre$\\)";
       "Oct\\($\\|ubre$\\)";
       "Nov\\($\\|iembre$\\)";
       "Dic\\($\\|iembre$\\)"; |]
  | De ->
    [| "Jan\\($\\|uar$\\)";
       "Feb\\($\\|ruar$\\)";
       "M[|ae]r\\($\\|z$\\)";
       "Apr\\($\\|il$\\)";
       "Mai";
       "Jun\\($\\|i$\\)";
       "Jul\\($\\|i$\\)";
       "Aug\\($\\|ust$\\)";
       "Sep\\($\\|tember$\\)";
       "Okt\\($\\|ober$\\)";
       "Nov\\($\\|ember$\\)";
       "Dez\\($\\|ember$\\)"; |]

  | It -> invalid_arg "unknown language"
;;

let week_day_names lang =
  match lang with
  | Fr -> [| "dimanche"; "lundi"; "mardi"; "mercredi";
             "jeudi"; "vendredi"; "samedi"; |]
  | Uk -> [| "Sunday"; "Monday"; "Tuesday"; "Wednesday";
             "Thursday"; "Friday"; "Saturday"; |]
  | Es -> [| "Domingo"; "Lunes"; "Martes"; "Mircoles";
             "Jueves"; "Viernes"; "Sbado"; |]
  | De -> [| "Sonntag"; "Montag"; "Dienstag"; "Mittwoch";
           "Donnerstag"; "Freitag"; "Samstag"; |]
  | It -> invalid_arg "unknown language"
;;

let recognizer_week_days lang =
  match lang with
  | Fr -> [| "di\\($\\|m\\($\\|anche$\\)\\)";
             "lu\\($\\|n\\($\\|di$\\)\\)";
             "ma\\($\\|r\\($\\|di$\\)\\)";
             "me\\($\\|r\\($\\|credi$\\)\\)";
             "je\\($\\|u\\($\\|di$\\)\\)";
             "ve\\($\\|n\\($\\|dredi$\\)\\)";
             "sa\\($\\|m\\($\\|edi$\\)\\)"; |]
  | Uk -> [| "Sun\\($\\|day$\\)";
             "Mon\\($\\|day$\\)";
             "Tue\\($\\|s\\($\\|day$\\)\\)";
             "Wed\\($\\|n\\($\\|esday$\\)\\)";
             "Thu\\($\\|r\\($\\|sday$\\)\\)";
             "Fri\\($\\|day$\\)";
             "Sat\\($\\|u\\($\\|rday$\\)\\)"; |]
  | Es -> [| "do\\($\\|m\\($\\|ingo$\\)\\)";
             "lu\\($\\|n\\($\\|es$\\)\\)";
             "ma\\($\\|r\\($\\|tes$\\)\\)";
             "mi\\($\\|[|e]\\($\\|rcoles$\\)\\)";
             "ju\\($\\|e\\($\\|ves$\\)\\)";
             "vi\\($\\|e\\($\\|rnes$\\)\\)";
             "s[|a]\\($\\|b\\($\\|ado$\\)\\)"; |]
  | De -> [| "Son\\($\\|ntag$\\)";
             "Mon\\($\\|tag$\\)";
             "Die\\($\\|nstag$\\)";
             "Mit\\($\\|t\\($|woch$\\)\\)";
             "Don\\($\\|nerstag$\\)";
             "Fr\\($\\|eitag$\\)";
             "Sam\\($\\|stag$\\)"; |]
  | It -> invalid_arg "unknown language"
;;

let reg_exps_week_days =
  let fr_regs = Array.map Str.regexp_case_fold (recognizer_week_days Fr)
  and uk_regs = Array.map Str.regexp_case_fold (recognizer_week_days Uk)
  and es_regs = Array.map Str.regexp_case_fold (recognizer_week_days Es)
  and de_regs = Array.map Str.regexp_case_fold (recognizer_week_days De) in
  function
  | Fr -> fr_regs
  | Uk -> uk_regs
  | Es -> es_regs
  | De -> de_regs
  | It -> invalid_arg "unknown language"
;;

let reg_exps_months =
  let fr_regs = Array.map Str.regexp_case_fold (recognizer_months Fr)
  and uk_regs = Array.map Str.regexp_case_fold (recognizer_months Uk)
  and es_regs = Array.map Str.regexp_case_fold (recognizer_months Es)
  and de_regs = Array.map Str.regexp_case_fold (recognizer_months De) in
  function
  | Fr -> fr_regs
  | Uk -> uk_regs
  | Es -> es_regs
  | De -> de_regs
  | It -> invalid_arg "unknown language"
;;

(** Computing days of week and time intervals *)
let year_is_leap a =
  (a mod 4 = 0 && not (a mod 100 = 0)) || a mod 400 = 0
;;

let last_month_day_of_month m a =
  match m with
  | 1 | 3 | 5 | 7 | 8 | 10 | 12 -> 31
  | 2 -> if year_is_leap a then 29 else 28
  | 4 | 6 | 9 | 11 -> 30
  | _ -> invalid_arg "last_month_day_of_month"
;;

let date_is_valid j m a =
  j >= 1 && m >= 1 && m <= 12 && j <= last_month_day_of_month m a
;;

let compare_date (j1, m1, a1) (j2, m2, a2) =
  match compare a1 a2 with
  | 0 ->
     begin match compare m1 m2 with
     | 0 -> compare j1 j2
     | c -> c end
  | c -> c
;;

let rec days_of_month m a =
  if m <= 0 then invalid_arg "days_of_month" else
  match m with
  | 1 -> 0
  | n -> days_of_month (n - 1) a + last_month_day_of_month (m - 1) a
;;

let days_of_year a = a * 365 + (a / 4) - (a / 100) + a / 400;;

let days_of_date (j1, m1, a1 as _d1) =
  j1 + days_of_month m1 a1 + days_of_year (a1 - 1)
;;

let sub_date (_j1, _m1, _a1 as d1) (_j2, _m2, _a2 as d2) =
  let j_d1 = days_of_date d1 in
  let j_d2 = days_of_date d2 in
  j_d1 - j_d2
;;

(** 1 Janvier 1900 is a monday *)
let week_day_of_date month_day month year =
  let days = sub_date (month_day, month, year) (1, 1, 1900) in
  (1 + (days mod 7)) mod 7
;;

(** Formating dates to strings *)
let string_of_week_day lang week_day =
  if week_day < 0 || week_day > 6 then invalid_arg "string_of_week_day" else
  (week_day_names lang).(week_day)
;;

let string_of_month_day lang month_day =
  if month_day < 0 || month_day > 31 then invalid_arg "string_of_month_day" else
  match lang with
  | Fr -> begin if month_day = 1 then "1er" else string_of_int month_day end
  | Uk ->
    begin match month_day with
    | 1 | 21 | 31 as md -> Printf.sprintf "%dst" md
    | 2 | 22 as md -> Printf.sprintf "%dnd" md
    | 3 | 23 as md -> Printf.sprintf "%drd" md
    | md -> Printf.sprintf "%dth" md end
  | De | Es | It -> string_of_int month_day
;;

let string_of_month lang month =
  if month < 0 || month > 11 then invalid_arg "string_of_month" else
  (month_names lang).(month)
;;

let string_of_full_date lang week_day month_day month year =
  (string_of_week_day lang week_day,
   string_of_month_day lang month_day,
   string_of_month lang month,
   string_of_int year)
;;

let string_of_date lang month_day month year =
  let week_day = week_day_of_date month_day month year in
  string_of_full_date lang week_day month_day month year
;;

let string_of_mois = string_of_month Fr;;
let string_of_jour = string_of_week_day Fr;;

let string_of_mes = string_of_month Es;;
let string_of_dia = string_of_week_day Es;;

let string_of_monat = string_of_month De;;
let string_of_tag = string_of_week_day De;;

let array_find p v =
  let limit = Array.length v in
  let rec index i =
   if i >= limit then raise Not_found else
   if p v.(i) then i else index (succ i) in
  index 0
;;

(*let array_index item v = array_find (function x -> x = item) v;; *)

let week_day_of_string lang day_name =
  array_find (function regexp -> Str.string_match regexp day_name 0)
       (reg_exps_week_days lang)
;;

let month_of_string lang month_name =
  succ
    (array_find (function regexp -> Str.string_match regexp month_name 0)
    (reg_exps_months lang))
;;

let year_of_int i = if i <= 1000 then i + 1900 else i;;
let month_of_int i = i + 1;;

let format_date lang month_day month week_day year =
  let get_string_of_week_day lang week_day =
    match week_day with
    | Some w ->
      begin
       let s = string_of_week_day lang w in
       match lang with
       | Fr | Es -> s ^ " "
       | Uk | De -> s ^ ", "
       | It -> invalid_arg "format_date: get_string_of_week"
      end
    | None -> "" in
  
  match lang with
  | Fr -> sprintf "%s%s %s %d"
            (get_string_of_week_day Fr week_day)
            (string_of_month_day Fr month_day)
            (string_of_month Fr month)
            (year_of_int year)
  | Uk -> sprintf "%s%s %s, %d"
            (get_string_of_week_day Uk week_day)
            (string_of_month Uk month)
            (string_of_month_day Uk month_day)
            (year_of_int year)
  | Es -> sprintf "%s%s de %s de %d"
            (get_string_of_week_day Es week_day)
            (string_of_month_day Es month_day)
            (string_of_month Es month)
            (year_of_int year)
  | De -> sprintf "%s%s. %s %d"
            (get_string_of_week_day De week_day)
            (string_of_month_day De month_day)
            (string_of_month De month)
            (year_of_int year)
  | It -> invalid_arg "format_date"
;;

let string_of_date_Uk = format_date Uk;;
let string_of_date_Fr = format_date Fr;;
let string_of_date_Es = format_date Es;;
let string_of_date_De = format_date De;;
let string_of_date_It = format_date It;;

let hour_of_int i =
  if 0 <= i && i <= 24 then i else
  invalid_arg (Printf.sprintf "hour_of_int: %i" i)
;;
let minute_of_int i =
  if 0 <= i && i <= 59 then i else
  invalid_arg (Printf.sprintf "minute_of_int: %i" i)
;;
let second_of_int i =
  if 0 <= i && i <= 60 then i else
  invalid_arg (Printf.sprintf "second_of_int: %i" i)
;;

let format_iso_time hour minute second =
  sprintf "%02d:%02d:%02d"
    (hour_of_int hour)
    (minute_of_int minute)
    (second_of_int second)
;;

let string_of_iso_time = format_iso_time;;

let format_iso_date month_day month year =
  sprintf "%04d-%02d-%02d"
    (year_of_int year) (month_of_int month) month_day
;;

let string_of_iso_date = format_iso_date;;

let format_full_iso_date month_day month year hour minute second =
  Printf.sprintf "%sT%sZ"
    (format_iso_date month_day month year)
    (format_iso_time hour minute second)
;;

let string_of_full_iso_date = format_full_iso_date;;

let format_digital_date lang month_day month year =
  let month = month_of_int month in
  match lang with
  | Fr -> sprintf "%d/%d/%d" month_day month (year_of_int year)
  | Uk -> sprintf "%d/%d/%d" month month_day (year_of_int year)
  | Es | De
  | It -> invalid_arg "format_digital_date"
;;

let string_of_digital_date_Uk = format_digital_date Uk;;
let string_of_digital_date_Fr = format_digital_date Fr;;
let string_of_digital_date_Es = format_digital_date Es;;
let string_of_digital_date_De = format_digital_date De;;
let string_of_digital_date_It = format_digital_date It;;

(** Parsing dates *)

(*
let rec get_int s limit i =
  let rec get j =
   if j >= limit then invalid_arg s else
   match s.[j] with
   | '0' .. '9' -> get (succ j)
   | _ ->
     if j = i then 0, succ j else begin
       try int_of_string (String.sub s i (j - i)), j with
       | Failure _ -> invalid_arg s end in

  if i >= limit then invalid_arg s else
  match s.[i] with
  | ' ' -> get_int s limit (i + 1)
  | '-' -> let i, j = get (i + 1) in -i, j
  | _ -> get i
;;
*)

let is_separator = function
  | ':' | ' ' | '\t' | '/' | ',' | '-' | '.' -> true
  | _ -> false
;;

let get_separator s limit i =
  let rec get j =
    if j >= limit then invalid_arg s else
    if is_separator s.[j] then get (succ j)
    else j in
  get i
;;

let get_word s limit i =
  if i >= limit then invalid_arg s else
  let rec get j =
    if j >= limit then j else
    if is_separator s.[j] then j else get (succ j) in
  get i
;;

let get_date s =
  let limit = String.length s in
  let rec get accu i =
    let j = get_word s limit i in
    let w = String.sub s i (j - i) in
    let new_accu = w :: accu in
    if j >= limit then new_accu else
      let new_i = get_separator s limit j in
      get new_accu new_i in
  List.rev (get [] 0)
;;

let month_of_string_or_int lang s =
  try int_of_string s
  with Failure _ -> month_of_string lang s
;;

let year_of_string s =
  if String.length s <= 2 then 2000 + int_of_string s
  else int_of_string s
;;

let get_time = get_date;;

let hour_of_string s = hour_of_int (int_of_string s)
and minute_of_string s = minute_of_int (int_of_string s)
and second_of_string s = second_of_int (int_of_string s)
;;

let time_of_tokens toks =
  match toks with
  | [hour] ->
    let hour = hour_of_string hour in
    let minute = 0 in
    let second = 0 in
    (hour, minute, second)
  | [hour; minute] ->
    let hour = hour_of_string hour in
    let minute = minute_of_string minute in
    let second = 0 in
    (hour, minute, second)
  | [hour; minute; second] ->
    let hour = hour_of_string hour in
    let minute = minute_of_string minute in
    let second = second_of_string second in
    (hour, minute, second)
  | _ -> invalid_arg "parse_time"
;;

(** Time can also be written using digits only: hh, hhmm, or hhmmss *)
let time_of_int_string s =
  match String.length s with
  | 6 ->
    time_of_tokens [String.sub s 0 2; String.sub s 2 2; String.sub s 4 2]
  | 4 ->
    time_of_tokens [String.sub s 0 2; String.sub s 2 2]
  | 2 ->
    time_of_tokens [s]
  | _ -> invalid_arg "time_of_int_string"
;;

let parse_time s =
  let toks = get_time s in
  match toks with
  | [ s ] -> time_of_int_string s
  | toks -> time_of_tokens toks
;;

let rec date_of_tokens lang toks =
  match toks with
  | [year] ->
    date_of_int_string lang year
  | [month; year] ->
    let month_day = 1 in
    let month = month_of_string_or_int lang month in
    let year = year_of_string year in
    let week_day = week_day_of_date month_day month year in
    (week_day, month_day, month, year)
  | words ->
    match lang with
    | Fr ->
      begin match words with
      | [month_day; month; year] ->
        let month_day = int_of_string month_day in
        let month = month_of_string_or_int lang month in
        let year = year_of_string year in
        let week_day = week_day_of_date month_day month year in
        (week_day, month_day, month, year)
      | [week_day; month_day; month; year] ->
        let week_day = week_day_of_string lang week_day in
        let month_day = int_of_string month_day in
        let month = month_of_string_or_int lang month in
        let year = year_of_string year in
        (week_day, month_day, month, year)
      | _ -> invalid_arg "parse_date" end
    | Uk | De  ->
      begin match words with
      | [month; month_day; year] ->
        let month = month_of_string_or_int lang month in
        let month_day = int_of_string month_day in
        let year = year_of_string year in
        let week_day = week_day_of_date month_day month year in
        (week_day, month_day, month, year)
      | [week_day; ","; month; month_day; year]
      | [week_day; month; month_day; year] ->
        let week_day = week_day_of_string lang week_day in
        let month = month_of_string_or_int lang month in
        let month_day = int_of_string month_day in
        let year = year_of_string year in
        (week_day, month_day, month, year)
      | _ -> invalid_arg "parse_date" end
    | Es ->
      begin match words with
      | [month_day; "de"; month; ("de" | "del"); year] ->
        let month_day = int_of_string month_day in
        let month = month_of_string_or_int lang month in
        let year = year_of_string year in
        let week_day = week_day_of_date month_day month year in
        (week_day, month_day, month, year)
      | [week_day; ","; month_day; "de"; month; ("de" | "del"); year]
      | [week_day;      month_day; "de"; month; ("de" | "del"); year] ->
        let week_day = week_day_of_string lang week_day in
        let month_day = int_of_string month_day in
        let month = month_of_string_or_int lang month in
        let year = year_of_string year in
        (week_day, month_day, month, year)
      | _ -> invalid_arg "parse_date" end
    | It -> invalid_arg "parse_date"

(** Dates can also be written using digits only: yy, yyyy, ddmmyy, or ddmmyyyy *)
and date_of_int_string lang s =
  match String.length s with
  | 8 ->
    date_of_tokens lang [String.sub s 0 2; String.sub s 2 2; String.sub s 4 4]
  | 6 ->
    date_of_tokens lang [String.sub s 0 2; String.sub s 2 2; String.sub s 4 2]
  | _ ->
   let year = s in
   let month_day = 1 in
   let month = 0 in
   let year = year_of_string year in
   let week_day = week_day_of_date month_day month year in
   (week_day, month_day, month, year)
;;

let parse_date lang s =
  let toks = get_date s in
  date_of_tokens lang toks
;;

let valid_date lang s =
  let (week_day, month_day, month, year as date) = parse_date lang s in
  if week_day = week_day_of_date month_day month year then date
  else invalid_arg "valid_date"
;;

let parse s = parse_date Uk s;;

let format month_day month week_day year =
  format_date Uk month_day month week_day year
;;

let normalize_date lang s =
  let (week_day, month_day, month, year) = parse_date lang s in
  string_of_full_date lang week_day month_day month year
;;
