/*GPL*START*
 * 
 * Copyright (C) 1998 by Johannes Overmann <overmann@iname.com>
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 * *GPL*END*/  

#include "tregex.h"


static regex_t null_preg;
static bool null_preg_initialized = false;

// history: see header file

// ctor & dtor

TRegEx::TRegEx(const char *regex, int cflags): preg(null_preg),
_error(0), nosub(false), regstr(regex) 
{
   if(!null_preg_initialized) {
      memset(&null_preg, 0, sizeof(regex_t));
      memset(&     preg, 0, sizeof(regex_t));
      null_preg_initialized = true;
   }
   _error = regcomp(&preg, regex, cflags);
   if(cflags & REG_NOSUB) nosub = true;
}


TRegEx::~TRegEx() {
   regfree(&preg);
}


// error reporting

string TRegEx::errorToStr() const {
   if(_error) {
      char buf[1000];
      regerror(_error, &preg, buf, sizeof(buf));
      return string(buf);
   } else {
      fatalError("errorToStr(): no regcomp error detected!\n");
   }
}   


void TRegEx::exitOnError() const {
   if(_error)
     userError("invalid regex '%s': %s\n", *regstr, *errorToStr());
}


// matching

bool TRegEx::match(const char *str, int flags) const {
   if(_error)
     fatalError("match(): erroneous regex!\n");
   return regexec(&preg, str, 0, 0, flags) == 0; // match
}


bool TRegEx::firstMatch(const char *str, int& start, int& len, int flags) const {
   if(_error)
     fatalError("firstMatch(): erroneous regex!\n");
   if(nosub)
     fatalError("firstMatch(): regex was compiled with REG_NOSUB!\n");
   regmatch_t m[1];
   if(regexec(&preg, str, 1, m, flags)==0) { // match
      start = m->rm_so;
      len   = m->rm_eo - m->rm_so;
      return true;      
   } else { // no match
      start = len = -1;
      return false;
   }
}


bool TRegEx::allMatches(const char *str, TArray<int>& all, int flags) const {
   if(_error)
     fatalError("allMatches(): erroneous regex!\n");
   if(nosub)
     fatalError("allMatches(): regex was compiled with REG_NOSUB!\n");
   regmatch_t m[1];
   const char *p = str;
   bool match = false;
   int start, len, off = 0;
   while(1) {
      if(regexec(&preg, p, 1, m, flags)) break;
      match = true;
      start = m->rm_so;
      len   = m->rm_eo - m->rm_so;
      all += start + off;
      all += len;
      if(len==0) break;
      flags |= REG_NOTBOL;
      p   += start + len;
      off += start + len;     
   }
   return match;
}


bool TRegEx::allMatchesSubstring(const char *str, TArray<TArray<int> >& all, 
				 int flags, int progress, int progmode) const 
{
   if(_error)
     fatalError("allMatchesSubstring(): erroneous regex!\n");
   if(nosub)
     fatalError("allMatchesSubstring(): regex was compiled with REG_NOSUB!\n");
   const char *p = str;
   bool match = false;
   int start, len, off = 0, j=0;
   TArray<int> sub;
   regmatch_t m[MAX_SUBSTRING];
   FILE *pout=stdout;
   if(progress < 0) {
      pout = stderr;
      progress = -progress;
   }
   if(progmode & P_STDERR) pout = stderr;
   bool prognum = (progmode & P_NUMBER) > 0;
   while(1) {
      memset(m, -1, sizeof(regmatch_t)*MAX_SUBSTRING);
      if(regexec(&preg, p, MAX_SUBSTRING, m, flags)) break;
      match = true;
      sub.empty();
      for(int i=0; i < MAX_SUBSTRING; i++) {
	 // workaround for bug in regex
	 // if(m[i].rm_so >= m[i].rm_eo) break; 	 
#if 1
	 if(m[i].rm_so != -1) {
	    sub += m[i].rm_so + off;
	    sub += m[i].rm_eo - m[i].rm_so;
	 }
#else
	 sub += m[i].rm_so;
	 sub += m[i].rm_eo;
#endif
      }
      start = m->rm_so;
      len   = m->rm_eo - m->rm_so;
      all += sub;
      if(len==0) break;
      flags |= REG_NOTBOL;
      p   += start + len;
      off += start + len;
      
      // show progress
      if(progress) {
	 if((j%progress)==0) {
	    if(prognum) fprintf(pout, "%6d   \r", j);
	    else putc('.', pout);	       
	    fflush(pout);
	 }	   
	 j++;
      }
   }
   return match;
}


static int ValidatePos(char c) {
   if((c<'0') || (c>'z')) return -1; 
   if((c>'9') && (c<'a')) return -1;
   if(c>='a') return c-'a'+10;
   else return c-'0';
}


static string BackslashParamSubstitute(const string& org, const string& sub,
				       const TArray<int>& occ) {
   int num = occ.num()/2;
   if(num==0) return sub;
   int i=0, pos=0;
   string r;
   
   while(i < sub.len()-1) {
      if(sub[i]=='\\') {
	 char c = sub[i+1];
	 if(c=='\\') { // protection
	    r += sub(pos, i+1);
	    i+=2; 
	    pos=i;
	 } else {
	    int p = ValidatePos(c);
	    if((p>=0) && (p<num)) { // match	       
	       r += sub(pos, i);
	       i+=2;
	       pos=i;
	       r += org(occ[2*p], occ[2*p] + occ[2*p+1]);
	    } else i++;
	 }
      } else i++;
   }
   r += sub(pos, string::END);
   return r;
}


void parameterSubstitution(const string& in, string& out, const string& sub,
			   const TArray<TArray<int> >& occ, bool preserve_case,
			   int modify_case, int progress) {
   out.empty();
   int pos=0;
   for(int i=0; i<occ.num(); i++) {
      out += in(pos, occ[i][0]);
      if(!preserve_case) {
	 if(modify_case) 
	   out += modifyCase(BackslashParamSubstitute(in, sub, occ[i]),
			     modify_case);
	 else
	   out += BackslashParamSubstitute(in, sub, occ[i]);
      } else out += preserveCase(in(occ[i][0], occ[i][0] + occ[i][1]), 
			       BackslashParamSubstitute(in, sub, occ[i]));
      pos = occ[i][0] + occ[i][1];
      if(progress>0) if((i%progress)==0) {putchar('.');fflush(stdout);}
   }
   out += in(pos, string::END);
}

