/* regexp.c - main routine for regular expression
 * Copyright (C) 1995-99 Andrew Pipkin (minitrue@pagesz.net)
 * MiniTrue is free software released with no warranty. See COPYING for details
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>

#include "minitrue.h"
#include "regexp.h"
#include "fixedstr.h"
#include "charset.h"

static void clean_up(RegExp *rx);
static void convert_indices(RegExp *rx);
static RxAtom *i_to_ptr(RegExp *rx, int i);
static void atom_kill(RxAtom *rx_ptr);

static void RxData_kill(RxData *rxd);

/* Initiailize rx with the regular expression beginning at src. Set
 * *max_len_ptr to the maximum length of the regular expression */
int RegExp_init(RegExp *rx, char *src)
{
    int max_len;
    Rx_init(rx);
    max_len = Rx_parse(rx, src);
    if(max_len != -1)
    {   Rx_analyse(rx);
        Rx_encode(rx, 0, rx->len);
        Rx_init_atom(rx, RX_MATCH); /* add success atom at end */
#ifdef PAREN_DUMP
        RxData_dump(rx->data);
#endif
    }
    clean_up(rx);
    return max_len;
}

/* Copy the data from the workspace back to the original atoms, then convert
 * the indices to pointers, finally kill the complation data */
static void clean_up(RegExp *rx)
{
    int rx_i;
    RxData *rxd = rx->data;

 /* Free sets which were not used in the compiled version */
    for(rx_i = 0; rx_i < rxd->nparse; ++rx_i)
        atom_kill(&rx->start[rx_i]);

 /* Delete parsed atoms by shifting encoded atoms over them */
    memmove(rx->start, &rx->start[rxd->nparse],
            sizeof(RxAtom) * (rx->len - rxd->nparse));

 /* Resize atoms array */
    rx->len  -= rxd->nparse;
    rx->start = x_realloc(rx->start, rx->len * sizeof(RxAtom));

    convert_indices(rx);
    RxData_kill(rxd);
    free(rxd);
}

static RxAtom No_match = { 0, RX_FAIL, 0, 0, { '\0' }, {NULL} };

/* Convert the indices in the fail field to pointers */
static void convert_indices(RegExp *rx)
{
    RxAtom *rx_ptr = rx->start;
    RxData *rxd    = rx->data;
    int rx_i;

 /* Set up array in which atoms containing paren locations are stored */
    rx->nparen = rxd->nparen - 1;
    if(rxd->nparen > 1)
    {   int par_i;
        Paren *paren = &rxd->parens[1];
        rx->par_locs = x_malloc(sizeof(RxAtom *) * rxd->nparen);

        for(par_i = 1; par_i < rxd->nparen; ++par_i, ++paren)
            rx->par_locs[par_i] = i_to_ptr(rx, paren->store_i);
    }
    else
        rx->par_locs = NULL;

 /* Now convert fail in all the atoms from an index into a pointer, convert
  * some data fields as well */
    for(rx_ptr = rx->start, rx_i = 0; rx_i < rx->len; ++rx_ptr, ++rx_i)
    {   int opcode = rx_ptr->opcode, paren_i = rx_ptr->fail.i;

        if((rx_ptr->fail.ptr = i_to_ptr(rx, paren_i)) == NULL)
           rx_ptr->fail.ptr = &No_match;

     /* Convert the index containing the parentheses corresponding to the
      * backreference to a pointer after making the index positive */
        if(rx_ptr->flags & RX_BREF)
            rx_ptr->data.paren_loc = i_to_ptr(rx, -rx_ptr->data.bref.num);

        else if(opcode == GROUP_START || opcode == GROUP_START_REV)
        {   ptrdiff_t ptr_shift = (*rx_ptr->data.paren_buf.start
                                   - (char *)i_to_ptr(rx, 0));
            int loc_i;
            char const* * *loc_ptr = rx_ptr->data.paren_buf.locs;
            for(loc_i = 0; loc_i < rx_ptr->min; ++loc_i, ++loc_ptr)
                *loc_ptr = (char const * *)((char *)*loc_ptr - ptr_shift);
        }

     /* Convert the indices corresponding to the starts of the alternatives
      * in a top-level alternation */
        else if(opcode == TOP_ALT_INIT)
        {   SubRX *srx = rx_ptr->data.top_alt.sub_rxs;
            int sub_rx_i = 0;
            for(; sub_rx_i < rx_ptr->data.top_alt.len; ++sub_rx_i, ++srx)
                srx->code.ptr      = i_to_ptr(rx, srx->code.i);
        }
     /* Convert indices in list of parens to be cleared to pointers */
        else if(opcode == CLEAR_PARENS)
        {   int i;
            AtomLoc *clear_list = rx_ptr->data.clear_list;
            for(i = 0; i < rx_ptr->max; ++i)
                clear_list[i].ptr = i_to_ptr(rx, clear_list[i].i);
        }
     /* Set up list of sub-parens if atoms data contains sub->paren info */
        else if(opcode == TOP_ALT_MATCH)
        {   int nsub_paren = rx_ptr->max, i;
            SubParens *sub_parens = &rx_ptr->data.sub_parens;
            RxAtom * *par_locs = x_malloc(sizeof(RxAtom *) * nsub_paren);

            for(i = 0; i < nsub_paren; ++i)
                par_locs[i] = rx->par_locs[i + sub_parens->start_i];

            sub_parens->par_locs = par_locs;
            sub_parens->buf  = x_malloc(2 * sizeof(char *) * nsub_paren);
        }
    }
}

/* Convert the index i to the corresponding pointer, adjusting for the
 *  shift back over the parsed atoms */
static RxAtom *i_to_ptr(RegExp *rx, int i)
{
    return (i >= 0) ? &rx->start[i - rx->data->nparse] : NULL;
}

static void atom_kill(RxAtom *rx_ptr)
{
    int opcode = rx_ptr->opcode;
    if(rx_ptr->flags & RX_ALIAS)
        ;
    else if(opcode == FIND_STR)
    {   FixedStr_kill(rx_ptr->data.bmoore.table);
        free(rx_ptr->data.bmoore.table);
        free((void *)rx_ptr->data.bmoore.new_str);
    }
    else if(opcode == FIND_SET || opcode == FIND_FIRST_SETS)
        farfree(rx_ptr->data.skips);
    else if(Rx_type(rx_ptr) == RX_SET)
        CharSet_kill(&rx_ptr->data.set);
    else if(opcode == TOP_ALT_INIT)
        free(rx_ptr->data.top_alt.sub_rxs);
    else if(opcode == CLEAR_PARENS)
        free(rx_ptr->data.clear_list);
    else if(opcode == GROUP_START || opcode == GROUP_START_REV)
    {   free(rx_ptr->data.paren_buf.locs);
        free(rx_ptr->data.paren_buf.start);
    }
    else if(opcode == GROUP_FAIL_REV)
        free(rx_ptr->data.paren.rmost);
    else if(opcode == TOP_ALT_MATCH)
    {   free(rx_ptr->data.sub_parens.buf);
        free(rx_ptr->data.sub_parens.par_locs);
    }
    rx_ptr->flags = rx_ptr->opcode = 0;
}

void RegExp_kill(RegExp *rx)
{
    int rx_i;
    RxAtom *rx_ptr = rx->start;
    for(rx_i = 0, rx_ptr = rx->start; rx_i < rx->len; ++rx_i, ++rx_ptr)
        atom_kill(rx_ptr);
}

static void RxData_kill(RxData *rxd)
{
    free(rxd->parens);
    free(rxd->alts);
    free(rxd->atom_data);
    free(rxd->paren_list);
}

/* If PAREN_DUMP is set, just compile a program which dumps information
 * about the parens entered on the command line */
#if PAREN_DUMP
static void RxData_dump(RxData *rxd)
{
    Paren *parens = rxd->parens;
    Altern *alts  = rxd->alts;
    int paren_i, atom_data_i;
    for(paren_i = 0; paren_i < rxd->nparen; ++paren_i)
    {   Paren *par = &parens[paren_i];
        printf("%2d: start: %3d  end: %3d  min : %3d  max : %3d  min len: %3d  max len: %3d\n",
               paren_i, par->start_i, par->end_i, par->min, par->max,
               par->min_len, par->max_len);
        if(par->first_alt_i != -1)
        {   int alt_i = par->first_alt_i;
            printf("Alterns:");
            while(alts[alt_i].rx_i < par->end_i && alt_i < rxd->nalt)
            {   if(alts[alt_i].level == par->level)
                    printf("  %d", alts[alt_i].rx_i);
                ++alt_i;
            }
            putchar('\n');
        }
    }
 /* Print the parentheses number for each atom */
    for(atom_data_i = 0; atom_data_i < parens[0].end_i; ++atom_data_i)
        printf("%d ", rxd->atom_data[atom_data_i].paren_i);
    putchar('\n');
}

int main(int argc, char *argv[])
{
    int arg_i = 1;
    size_t max_len;
    char line_buf[256], *rx_src;
    RegExp rx;

    while((rx_src = (argc > 1) ? argv[arg_i++] : fgets(line_buf, 256, stdin))
          != NULL)
    {   RegExp_init(&rx, rx_src, &max_len);
        RegExp_kill(&rx);
    }
    return EXIT_SUCCESS;
}
#endif /* PAREN_DUMP */
