/* rxmisc.c - miscellaneous functions for regular expression
 * Copyright (C) 1995-99 Andrew Pipkin (minitrue@pagesz.net)
 * MiniTrue is free software released with no warranty. See COPYING for details
 */

#include <string.h>
#include <assert.h>

#include "regexp.h"
#include "minitrue.h"

/* Initialize the regular expression */
void Rx_init(RegExp *rx)
{
    RxData *rxd = rx->data = x_malloc(sizeof(RxData));
    rx->start   = NULL;
    rx->len     = 0;

 /* Initialize regular expression data structure */
    memset(rxd, 0, sizeof(RxData));
    rxd->paren_level = -1;
}

/* Return the length of the atom, 1 if char, set or backreference,
 * string length if string, 0 otherwise */
int Rx_atom_len(RxAtom *rx)
{
    int len = 0, type = Rx_is_data(rx);
    if(type)
        len = (type == RX_STR) ? rx->data.str.len : 1;
    return len;
}

/* Return a pointer to the string located in the atom, setting str_len to the
 *  length of the string. If single character, treat as string */
char *Rx_atom_str(RxAtom *str_atom, int *str_len)
{
    int type = Rx_type(str_atom);
    assert(type == RX_CH || type == RX_STR);

    if(type == RX_CH)
    {   *str_len = 1;
        return &str_atom->data.ch;
    }
    else
    {   *str_len = str_atom->data.str.len;
        return (char *)str_atom->data.str.start;
    }
}

/* Return the minimum length of an atom */
int Rx_min_len(RegExp *rx, RxAtom *rx_ptr)
{
    int len = Rx_atom_len(rx_ptr);
    if(rx_ptr->flags & RX_BREF)
        len = rx->data->parens[rx_ptr->data.bref.num].min_len;

    return mult_truncate(rx_ptr->min, len);
}

/* Return the maximum length of an atom */
int Rx_max_len(RegExp *rx, RxAtom *rx_ptr)
{
    int len = Rx_atom_len(rx_ptr);
    if(rx_ptr->flags & RX_BREF)
        len = rx->data->parens[rx_ptr->data.bref.num].max_len;

    return mult_truncate(rx_ptr->max, len);
}

/* Return true if parentheses paren has a quantifier, false if no quantifer*/
int Rx_paren_quant(Paren *paren)
{
    return (paren->min != 1 || paren->max != 1);
}

/* Initialize a the next atom in the workspace with the desired opcode,
 * return a pointer to the atom */
RxAtom *Rx_init_atom(RegExp *rx, int opcode)
{
    RxAtom *atom;
    Rx_set_nfree(rx, 1);
    atom = &rx->start[ rx->len ];
    ++rx->len;
    memset(atom, 0, sizeof(RxAtom));
    atom->opcode = opcode;
    return atom;
}

/* Initialize an atom, setting both the opcode and the paren level */
RxAtom *Rx_init_atom2(RegExp *rx, int opcode, int paren_i)
{
    RxAtom *atom = Rx_init_atom(rx, opcode);
    atom->fail.paren_i = paren_i;
    return atom;
}

/* Initialize an atom, setting the opcode, paren level and flags */
RxAtom *Rx_init_atom3(RegExp *rx, int opcode, int paren_i, int flags)
{
    RxAtom *atom = Rx_init_atom2(rx, opcode, paren_i);
    atom->flags = flags;
    return atom;
}

/* Set the opcode, paren index and flags of the atom appropriately */
void Rx_set_atom3(RxAtom *atom, int opcode, int paren_i, int flags)
{   atom->opcode       = opcode;
    atom->fail.paren_i = paren_i;
    atom->flags        = flags;
}

/* Copy the atom at rx_i to the end of the atom array, return a pointer
 * to the copied atom */
RxAtom *Rx_copy_atom(RegExp *rx, int rx_i)
{
    RxAtom *dest = Rx_init_atom(rx, 0);
    *dest = rx->start[rx_i];
    return dest;
}

/* If the number of unused atoms is less than min_nfree, allocate more
 * atoms */
void Rx_set_nfree(RegExp *rx, int min_nfree)
{
    RxData *rxd = rx->data;
    while(rx->len + min_nfree > rxd->nalloc)
    {   rxd->nalloc  = !(rxd->nalloc) ? 16 : (rxd->nalloc * 2);
        rx->start    = x_realloc(rx->start, rxd->nalloc * sizeof(RxAtom));
    }
}

/* Drop the last atom from the array */
void Rx_drop(RegExp *rx)
{
    --rx->len;
}

/* Return type of simple data, 0 if not data */
int Rx_type(RxAtom *atom) { return atom->flags & 3; }

/* Return true if simple data or backreference */
int Rx_is_data(RxAtom *atom) { return (atom->flags & (3 | RX_BREF)); }

/* Return true if atom is a parentheses */
int Rx_is_paren(RxAtom *atom)
{
    return (atom->flags & (RX_LPAREN | RX_RPAREN));
}

/* Return true if atom is alternative */
int Rx_is_alt(RxAtom *atom) { return (atom->flags & RX_ALTERN); }

/* Return true if atom is zero-length anchor */
int Rx_is_anchor(RxAtom *atom) { return (atom->flags & RX_ANCHOR); }

/* Return true if atom is left parentheses */
int Rx_is_lparen(RxAtom *atom) { return (atom->flags & RX_LPAREN); }

/* Return true if atom is a right parentheses */
int Rx_is_rparen(RxAtom *atom) { return (atom->flags & RX_RPAREN); }

/* Return the atom where the parsed atom with rx_i is encoded */
int Rx_encoded_i(RegExp *rx, int rx_i)
{
    return rx->data->atom_data[rx_i].encoded_i;
}

/* Return the parentheses corresponding to the atom at atom_i */
Paren* Rx_paren(RegExp *rx, int atom_i)
{
    return &rx->data->parens[rx->start[atom_i].fail.paren_i];
}

/* Return true if the atom is at the end of an alternative in the parentheses
 * at paren_i */
int Rx_is_alt_end(RxAtom *atom, int paren_i)
{
    return (   (atom->flags & (RX_LPAREN | RX_RPAREN | RX_ALTERN))
            && atom->fail.paren_i == paren_i);
}

/* Return the enclosing parentheses of paren */
Paren *Rx_outer_paren(RegExp *rx, Paren *paren)
{
    return &rx->data->parens[paren->enclose_i];
}

/* Return non-zero if atom at rx_i is found in the parentheses with paren_i */
int Rx_in_paren(RegExp *rx, int rx_i, Paren *paren)
{
    Paren *parens     = rx->data->parens;
    Paren *atom_paren = &parens[rx->start[rx_i].fail.paren_i];

 /* If level of paren enclosing rx_i lower than that of the paren, rx_i
  * cannot be in paren */
    if(atom_paren->level < paren->level)
        return FALSE;

 /* Find paren enclosing rx_i which has the same level as paren_i */
    while(paren->level < atom_paren->level)
        atom_paren = &parens[atom_paren->enclose_i];

    return (paren == atom_paren);
}

/* Return the innermost paren with quantification surrounding paren.
 * If paren is quantified return itself, return
 * NULL if no surrounding quantified parens */
Paren *Rx_closest_quant(RegExp *rx, Paren *paren)
{
    for( ; ; )
    {   if(paren->min != paren->max || paren->min != 1)
            return paren;
        else if(!paren->enclose_i)
            return NULL;
        else
            paren = &rx->data->parens[paren->enclose_i];
    }
}

/* Return a pointer to the outermost quantified parentheses around the
 *  atom at rx_i. Stop before the parentheses go past start_i and/or end_i.
 *  Return NULL if no quantified parens around rx_i */
Paren *Rx_quant_paren(RegExp *rx, int rx_i, int start_i, int end_i)
{
    RxData *rxd  = rx->data;
    int paren_i  = rx->start[rx_i].fail.paren_i, quant_i = -1;

    while(paren_i)
    {   Paren *paren = &rxd->parens[paren_i];
        if(paren->start_i < start_i || paren->end_i > end_i)
            break;
        if(paren->max > 1 || !paren->min)
            quant_i = paren_i;
        paren_i = paren->enclose_i;
    }
    return quant_i != -1 ? &rxd->parens[quant_i] : NULL;
}

/* Return the number of sub-parentheses contained in parentheses paren_i */
int Rx_nsub_paren(RegExp *rx, int paren_i)
{
    Paren *paren = &rx->data->parens[paren_i];
    int nsub_paren = 0, level = paren->level;

    for( ; ; )
    {   if(++paren_i == rx->data->nparen || (++paren)->level == level)
            break;
        ++nsub_paren;
    }
    return nsub_paren;
}

/* Return the index where the paren is exitted: end_i if dir == 1, start_i
 * start_i if -1 */
int Rx_exit_i(Paren *paren, int dir)
{
    return (dir == 1) ? paren->end_i : paren->start_i;
}

/* Beginning with the atom having index start_i, find the index of the
 * next alternative in the parentheses numbered paren_i. If dir is 1 go
 * forwards, -1 go backwards */
int Rx_next_alt_i(int start_i, int paren_i, int dir, RegExp *rx)
{
    int alt_i;

    for(alt_i = start_i; alt_i >= 0 && alt_i < rx->len; alt_i += dir)
    {   RxAtom *atom = &rx->start[alt_i];
        if((atom->flags & RX_ALTERN) && atom->fail.paren_i == paren_i)
            return alt_i;
        if(Rx_is_paren(atom) && atom->fail.paren_i == paren_i)
            break;
    }
    return -1;
}
