/* pcre_intf.c

   Copyright (C) 1999  Markus Mottl
   email: mottl@miss.wu-wien.ac.at
   WWW: http://miss.wu-wien.ac.at/~mottl

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with this library; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

/* $Id: pcre_intf.c,v 1.9 1999/09/21 08:59:15 mottl Exp $ */

#include <caml/mlvalues.h>
#include <caml/alloc.h>
#include <caml/memory.h>
#include <caml/fail.h>
#include <caml/callback.h>

#include <pcre.h>

typedef const unsigned char * chartables; /* Type of chartable sets */

/* Cache for exceptions */
value *pcre_exc_Not_found     = NULL; /* Exception value [Not_found] */
value *pcre_exc_BadPattern    = NULL; /* Exception value [BadPattern] */
value *pcre_exc_InternalError = NULL; /* Exception value [InternalError] */

/* Fetches the named OCaml-values of the exceptions and caches them */
void pcre_init_caml_names()
{
  pcre_exc_Not_found     = caml_named_value("Pcre.Not_found");
  pcre_exc_BadPattern    = caml_named_value("Pcre.BadPattern");
  pcre_exc_InternalError = caml_named_value("Pcre.InternalError");
}

/* Finalizing deallocation function for chartable sets */
void pcre_dealloc_tables(value val) { pcre_free((void *) Field(val, 1)); }

/* Finalizing deallocation function for compiled regular expressions */
void pcre_dealloc_pattern(value val)
{
  void *extra = (void *) Field(val,2);
  pcre_free((void *) Field(val, 1));
  if (extra != NULL) pcre_free(extra);
}

/* Raises exceptions which take two arguments */
void raise_with_two_args(value tag, value arg1, value arg2)
{
  value exc;

  /* Protects tag, arg1 and arg2 from being reclaimed by the garbage
     collector when the exception value is allocated */
  Begin_roots3(tag, arg1, arg2);
    exc = alloc_small (3, 0);
    Field(exc, 0) = tag;
    Field(exc, 1) = arg1;
    Field(exc, 2) = arg2;
  End_roots();

  mlraise(exc);
}

/* Makes OCaml-string from PCRE-version */
value pcre_version_wrapper() { return copy_string((char *) pcre_version()); }

/* Makes compiled regular expression from compilation options, an optional
   value of chartables and the pattern string */
value pcre_compile_wrapper(value options, value table_val, value regexp)
{
  value result; /* Final result -> value of type [regexp] */
  const char *error = NULL; /* pointer to possible error message */
  int error_offset = 0; /* offset in the pattern at which error occurred */

  /* If table_val = [None] then pointer to tables is NULL, otherwise
     set it to the appropriate value */
  chartables tables =
    (table_val == Val_int(0)) ? NULL
                              : (chartables) Field(Field(table_val, 0), 1);

  /* Compiles the pattern */
  pcre *pattern = pcre_compile(String_val(regexp),
                               Int_val(options),
                               &error, &error_offset, tables);

  /* Raises appropriate exception [BadPattern] if the pattern could not
     be compiled */
  if (pattern == NULL) raise_with_two_args(*pcre_exc_BadPattern,
                                           copy_string((char *) error),
                                           Val_int(error_offset));

  /* Finalized value: no idea whether this garbage collector setting
     (ratio of 1:10 for used/max) is useful. */
  result = alloc_final(4, pcre_dealloc_pattern, 1, 10);

  /* Field[1]: compiled regular expression (Field[0] is finalizing
     function! See above!) */
  Field(result, 1) = (value) pattern;

  /* Field[2]: extra information about pattern when it has been studied
     successfully */
  Field(result, 2) = (value) NULL;

  /* Field[3]: If 0 -> pattern has not yet been studied
                  1 -> pattern has already been studied */
  Field(result, 3) = 0;
  return result;
}

/* Studies a pattern */
value pcre_study_wrapper(value pat)
{
  /* If it has not yet been studied */
  if (! (int) Field(pat, 3)) {
    const char *error = NULL;
    pcre_extra *extra = pcre_study((pcre *) Field(pat, 1), 0, &error);
    if (error != NULL) invalid_argument((char *) error);
    Field(pat, 2) = (value) extra;
    ++Field(pat, 3);
  }

  return pat;
}

/* Returns information (of type [info]) on patterns */
value pcre_info_wrapper(value pat)
{
  CAMLparam1(pat); /* Protects input parameter from being reclaimed */

  /* Gets compilation options, information on firstchar and number of
     subpatterns */
  int options, firstchar;
  int subpats = pcre_info((pcre *) Field(pat, 1), &options, &firstchar);

  int study_stat = 0; /* Initial status: [Not_studied] */
  value v_firstchar, result = alloc_small(4,0);

  Field(result, 0) = Val_int(subpats);
  Field(result, 1) = Val_int(options);

  /* Generates the appropriate constant constructor ([Studied] or
     [Optimal] if pattern has already been studied */
  if (Field(pat, 3))
    study_stat = ((pcre_extra *) Field(pat, 2) == NULL) ? 1 : 2;

  Field(result, 3) = Val_int(study_stat);

  switch (firstchar) {
    case -1 : Field(result, 2) = Val_int(0); break; /* [StartOnly] */
    case -2 : Field(result, 2) = Val_int(1); break; /* [Anchored] */
    default :
      /* Result block: initialized fields in blocks allocated with
         "alloc_small" make the garbage collector
         happy if further things are to be allocated */
      Field(result, 2) = Val_int(0);

      if (firstchar < 0 ) /* Should not happen */
        raise_with_string (*pcre_exc_InternalError, "pcre_info_wrapper");

      /* Protects the fresh result block from being reclaimed on the
         following allocation */
      Begin_roots1(result);
        /* Allocates the non-constant constructor [Char of char], fills
           in the appropriate value and puts it into the result block */
        v_firstchar = alloc_small (1,0);
        Field(v_firstchar, 0) = Val_int(firstchar);
        Store_field(result, 2, v_firstchar);
      End_roots();
  }

  CAMLreturn result;
}

/* Executes a pattern match with runtime options, a regular expression, a
   string offset, a string length, a subject string, a number of subgroup
   offsets and an offset vector */
value pcre_exec_wrapper(value v_opt, value v_pat, value v_offset,
                        value v_subject, value v_subgroups2, value v_ovec)
{
  int offset = Int_val(v_offset), len = string_length(v_subject);
  if (offset > len || offset < 0)
    invalid_argument("Pcre.pcre_exec_wrapper: illegal offset");
  {
    const pcre *code = (pcre *) Field(v_pat, 1); /* Compiled pattern */
    const pcre_extra *extra = (pcre_extra *) Field(v_pat, 2); /* Extra info */
    const char *subject = String_val(v_subject); /* Subject string */
    int subgroups2 = Int_val(v_subgroups2);
    int subgroups1 = subgroups2 >> 1;
    int *ovector = (int *) &Field(v_ovec, 0);
    int subgroups2_1 = subgroups2 - 1;
    int *ovec_source = ovector + subgroups2_1;
    long int *ovec_dest = (long int *) ovector + subgroups2_1;

    /* Performs the match */
    int ret =
      pcre_exec(code, extra, subject, len, offset, Int_val(v_opt),
                ovector, subgroups1 + subgroups2);

    if (ret == -1) raise_constant(*pcre_exc_Not_found); /* [Not_found] */

    /* Converts offsets from C-integers to OCaml-Integers.
       This is a bit tricky, because there are 32- and 64-bit platforms
       around and OCaml chooses the larger possibility for representing
       integers when available (also in arrays). */
    while (subgroups2--) {
      *ovec_dest = Val_int(*ovec_source);
      --ovec_source; --ovec_dest;
    }

    if (ret >= 0) return Val_unit;
    
    /* Should not happen */
    raise_with_string (*pcre_exc_InternalError, "pcre_exec_wrapper");
  }
}

/* Byte-code hook for pcre_exec_wrapper.
   Needed, because there are more than 5 arguments */
value pcre_exec_wrapper_bc(value *argv, int argn)
{
  return pcre_exec_wrapper(argv[0], argv[1], argv[2], argv[3],
                           argv[4], argv[5]);
}

/* Generates a new set of chartables for the current locale (see man
   page of PCRE */
value pcre_maketables_wrapper()
{
  /* Table sets are of size 864 - a maximum of ten table sets shall
     exist unreclaimed. Is this a good idea? */
  value result = alloc_final(2, pcre_dealloc_tables, 864, 8640);
  Field(result, 1) = (value) pcre_maketables();
  return result;
}
