/* rxsubst.c - generate replacement for regular expression
 * Copyright (C) 1995-99 Andrew Pipkin (minitrue@pagesz.net)
 * MiniTrue is free software released with no warranty. See COPYING for details
 */

#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <limits.h>

#include "rxsubst.h"
#include "regexp.h"
#include "minitrue.h"

char *Subst_buf;
size_t Buf_len;

enum case_conv_chs { LOW_CH = 1000, UP_CH, LOW_CHS, UP_CHS, END_CASE };

/* Parse regular expression substitution and return string of regular
 * expression atoms. Regular expression has nparens parentheses. If
 * the replacement is a fixed string, return NULL and set *str_len to
 * the length of the string */
RxSubst *RxSubst_init(char *src, int nparens, size_t *str_len)
{
    char *dest = src;
    RxSubst *rx_subst = NULL;
    int atom_i = 0, nalloc = 0;

    do
    {   char *sub_str_start = dest, *sub_str_end = dest;
        int paren_no = 0;
     /* Allocate more memory if there are not at least 3 free atoms */
        if(atom_i + 3 >= nalloc)
        {   nalloc   = !nalloc ? 4 : nalloc * 2;
            rx_subst = x_realloc(rx_subst, sizeof(RxSubst) * nalloc);
        }
     /* Keep going until an escape sequence found which represents a
      * parenthesized subexpression or other metacharacter */
        for( ; ; )
        {   if(*src == '\\')
            {   if(src[1] == 'z')
                {   src += 2;
                    continue;
                }
             /* Test for case-converting escape sequence, record them as
              * paren nos greater than 1000 */
                else if(src[1] == 'l')
                    paren_no = LOW_CH;
                else if(src[1] == 'u')
                    paren_no = UP_CH;
                else if(src[1] == 'L')
                    paren_no = LOW_CHS;
                else if(src[1] == 'U')
                    paren_no = UP_CHS;
                else if(src[1] == 'E')
                    paren_no = END_CASE;
                else if(src[1] == 'v')
                {   paren_no = 1000;
                    if(isdigit(src[2]) && isdigit(src[3]))
                        paren_no = (src[2] - '0') * 10 + (src[3] - '0');
                    if(paren_no > nparens)
                        input_error("\\v must be followed by 2-digit backreference");

                    src += 4;
                }
                else if(   (src[1] == '8' || src[1] == '9')
                        && src[1] - '0' <= nparens)
                {   paren_no = src[1] - '0';
                    src += 2;
                }
                else
                {/* If escape sequence has three digits, treat as octal num */
                    int ndigits = 0;
                    while(isdigit(src[ndigits + 1]))
                        ++ndigits;

                    src = (char *)esc_to_ch(dest, src);
                    if(ndigits && ndigits < 3 && *dest <= nparens)
                    {   paren_no    = *dest;
                        sub_str_end = dest;
                    }
                    ++dest;
                }
                if(paren_no)
                {   if(paren_no > 100)
                        src += 2;
                    break;
                }
            }
            else if(!*src)
            {   sub_str_end = dest;
                break;
            }
         /* If char not escape sequence, just copy it */
            else
                *dest++ = *src++;
        }
        if(sub_str_start != sub_str_end || !atom_i)
        {   rx_subst[atom_i].sub_str = sub_str_start;
            rx_subst[atom_i++].len   = sub_str_end - sub_str_start;
        }
        if(paren_no)
        {   rx_subst[atom_i].sub_str = sub_str_start;
            rx_subst[atom_i++].len   = -abs(paren_no);
        }
    }while(*src);

 /* End of regexp substitution will be represented by sub_str of NULL */
    rx_subst[atom_i++].sub_str = NULL;

 /* Trim unused atoms */
    rx_subst = x_realloc(rx_subst, atom_i * sizeof(RxSubst));

 /* If substitution consists of a single fixed string, do not need structure
  * so return NULL, setting *str_len to the length of the string */
    if(atom_i == 1 || (atom_i == 2 && rx_subst[0].len >= 0))
    {   int len = rx_subst[0].len;
        free(rx_subst);
        *str_len = len;
        rx_subst = NULL;
    }
 /* Initialize buffer for where replacements are constructed if not already
  * initialized */
    if(!Buf_len)
    {   Subst_buf = x_malloc(256);
        Buf_len   = 256;
    }
    return rx_subst;
}

/* Create the desired replacement string for the Regexp reg_exp, return a
 * pointer to the start of the replacement and set *len to the length of
 * the replacement */
char *RxSubst_str(RxSubst *rx_subst, RegExp *reg_exp, size_t *len)
{
    const char *sub_str;
    int buf_i = 0, sub_str_len;
    int nlow_case = 0, nup_case = 0;

    for( ; rx_subst->sub_str != NULL ; ++rx_subst)
    {   int len = rx_subst->len;
        if(len >= 0)
        {   sub_str     = rx_subst->sub_str;
            sub_str_len = rx_subst->len;
        }
        else if(len > -100)
            sub_str = RegExp_paren(reg_exp, -rx_subst->len, &sub_str_len);
        else
        {   switch(abs(len))
            { case LOW_CH:
                if(nlow_case < 1)
                    nlow_case = 1;
                nup_case = 0;
                break;

              case UP_CH:
                if(nup_case < 1)
                    nup_case = 1;
                nlow_case = 0;
                break;

              case LOW_CHS:
                nlow_case = INT_MAX;
                nup_case  = 0;
                break;

              case UP_CHS:
                nup_case  = INT_MAX;
                nlow_case = 0;
                break;

              case END_CASE:
                nup_case = nlow_case = 0;
                break;
            }
            continue;
        }

     /* Expand buffer if not enough space to hold substring */
        while((size_t)buf_i + sub_str_len > Buf_len)
        {   Buf_len  *= 2;
            Subst_buf = x_realloc(Subst_buf, Buf_len);
        }
        memcpy(&Subst_buf[buf_i], sub_str, sub_str_len);
     /* Convert case for desired # of characaters */
        if(nlow_case)
        {   char *lc_ptr = &Subst_buf[buf_i];
            int nlc = miN(sub_str_len, nlow_case);
            nlow_case -= nlc;
            while(nlc--)
                *lc_ptr = low_casE(*lc_ptr++);
        }
        else if(nup_case)
        {   char *uc_ptr = &Subst_buf[buf_i];
            int nuc = miN(sub_str_len, nup_case);
            nup_case -= nuc;
            while(nuc--)
                *uc_ptr = up_casE(*uc_ptr++);
        }
        buf_i += sub_str_len;
    }
    *len = buf_i;
    return Subst_buf;
}

void RxSubst_kill(void) { free(Subst_buf); }

