/*GPL*START*
 * 
 * Copyright (C) 1998 by Johannes Overmann <overmann@iname.com>
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 * *GPL*END*/  

#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <sys/stat.h>
#include <unistd.h>
#include "string.h"
#include "tarray.h"

// todo: 
// make Split,Unquote,ReadLine,extractFilename,extractPath 0 byte safe


// 1997
// 01:45 11 Jun split(): backslash behavior fixed (601 lines)
// 23:50 11 Jun strings may contain 0 bytes
// 12:00 19 Jun some filename extracting added
// 17:00 19 Jun more sophisticated search: ignore_case and whole_words
// 02:00 08 Jul substring extraction via operator() (start,end)
// 02:00 31 Jul new ContainsNulChar, new ReadFile, fixed \ \\ in ExpUnPrint
// 12:00 08 Aug new Upper Lower Capitalize
// 23:30 19 Aug improved collapseSpace()
// 00:00 27 Aug cropSpace() bug fixed (1 byte out of bound zero write)
// 20:00 30 Aug now cons accept 0 pointer as empty string
// 21:00 30 Aug addDirSlash() added (809 lines)
// 13:00 02 Sep isLower ... added, preserve_case for SearchReplace added (867)
// 23:45 16 Dec normalizePath() added
// 15:00 24 Dec started conversion to Rep reference model
// 18:00 27 Dec finished. debugging starts ... :)
// 00:30 09 Jan scanTools started (cc=817) (h=462)
// 00:05 12 Jan compare operators fixed (0 byte ...)



// global static nul rep
string::Rep* string::Rep::nul = 0;
int string::Rep::nul_mem[10];



string string::shortFilename(int maxchar) const {
   if(rep->len <= maxchar) return *this;
   if(maxchar < 3) return "";
   return "..." + operator()(rep->len-maxchar+3, END);
#if 0
   string file(*this);
   file.extractFilename();
   string r;
   if(file.rep->len <= maxchar-3) {
      
      
   }
   
#endif 
}


string string::scanToken(int& scanner, int flags, 
		       const char *allow, const char *forbid,
		       bool allow_quoted) const 
{
   if(allow_quoted && (uint(scanner)<uint(rep->len))) {
      char q = (*rep)[scanner];
      if((q=='\'')||(q=='\"')) {
	 int st(++scanner);
	 while((uint(scanner)<uint(rep->len)) && ((*rep)[scanner]!=q)) 
	   ++scanner;
	 string out = operator()(st, scanner);	 
	 if(uint(scanner)<uint(rep->len)) ++scanner;
	 return out;
      }
   }
   int start(scanner);
   for(; (uint(scanner)<uint(rep->len)); ++scanner) {
      char c = (*rep)[scanner];
      if(forbid && strchr(forbid, c)) break; 
      if((flags&ALL                )) continue;
      if(allow  && strchr(allow , c)) continue; 
      if((flags&ALPHA) && isalpha(c)) continue;
      if((flags&DIGIT) && isdigit(c)) continue;
      if((flags&LOWER) && islower(c)) continue;
      if((flags&UPPER) && isupper(c)) continue;
      if((flags&PRINT) && isprint(c)) continue;
      if((flags&GRAPH) && isgraph(c)) continue;
      if((flags&CNTRL) && iscntrl(c)) continue;
      if((flags&SPACE) && isspace(c)) continue;
      if((flags&XDIGIT)&&isxdigit(c)) continue;
      if((flags&PUNCT) && ispunct(c)) continue;
   }
   return operator()(start, scanner);
}


void string::normalizePath() {
   TArray<string> a = split(*this, "/", false, false);
   int i;

   // delete nul dirs (/./ and //)
   for(i=0; i < a.num(); ++i) {
      if((a[i].rep->len==0) || (a[i]==".")) {
	 a.slowRemove(i--);
      }
   }
   
   // check for absolute
   if((*rep)[0]=='/') empty();
   else operator=(".");

   // delete '..'
   for(i=0; i < a.num(); ++i) {
      if((a[i]=="..") && (i>=1) && (a[i-1]!="..")) {
	 a.slowRemove(--i);
	 a.slowRemove(i--);
      } 
   }
      
   // assemble string
   if(a.num()>0 || rep->len==0)
   operator += ("/" + join(a, "/"));
}


bool string::isLower() const {
   if(rep->len==0) return false;
   for(int i=0; i<rep->len; i++) 
     if(isalpha((*rep)[i])) 
       if(isupper((*rep)[i])) 
	 return false;
   return true;
}


bool string::isUpper() const {
   if(rep->len==0) return false;
   for(int i=0; i<rep->len; i++) 
     if(isalpha((*rep)[i])) 
       if(islower((*rep)[i])) 
	 return false;
   return true;
}


bool string::isCapitalized() const {
   if(rep->len==0) return false;
   if(isalpha((*rep)[0])) if(islower((*rep)[0])) return false;
   for(int i=1; i<rep->len; i++) 
     if(isalpha((*rep)[i])) 
       if(isupper((*rep)[i])) 
	 return false;
   return true;   
}


void string::lower() {
   detach();
   for(int i=0; i<rep->len; i++) (*rep)[i] = tolower((*rep)[i]);
}


void string::upper() {
   detach();
   for(int i=0; i<rep->len; i++) (*rep)[i] = toupper((*rep)[i]);
}


void string::capitalize() {
   lower();
   if(rep->len) (*rep)[0] = toupper((*rep)[0]);
}


static const char *bytesearch(const char *mem, int mlen, 
			      const char *pat, int plen,
			      bool ignore_case, bool whole_words) {
   int i,j;
   for(i=0; i <= mlen-plen; i++) {
      if(ignore_case) {
	 for(j=0; j<plen; j++) 
	   if(tolower(mem[i+j]) != tolower(pat[j])) break;
      } else {
	 for(j=0; j<plen; j++) 
	   if(mem[i+j] != pat[j]) break;
      }
      if(j==plen) { // found
	 if(!whole_words) return mem + i;
	 else {
	    bool left_ok = true;
	    bool right_ok = true;
	    if(i > 0) if(isalnum(mem[i-1]) || (mem[i-1]=='_')) 
	      left_ok = false;
	    if(i < mlen-plen) if(isalnum(mem[i+plen]) || (mem[i+plen]=='_')) 
	      right_ok = false;
	    if(left_ok && right_ok) return mem + i;
	 }
      }
   }
   return 0; // not found
}


void string::extractFilename() {
   char *p = strrchr(rep->data(), '/');
   if(p) operator=(p+1);
}


void string::extractPath() {
   char *p = strrchr(rep->data(), '/');
   if(p) truncate((p - rep->data()) + 1);
   else empty();
}


void string::removeDirSlash() {
   if(lastChar()=='/') truncate(rep->len-1);
}


void string::addDirSlash() {
   if(lastChar()!='/') operator += ("/");
}


void string::extractFilenameExtension() {
   extractFilename();  // get file name
   char *p = strrchr(rep->data(), '.');
   if(p) {  // contains period
      if(p > rep->data()) { // last period not first char
	 operator=(p+1);    // get extension
	 return;
      }
   }
   empty(); // no extension
}


double string::binaryPercentage() const {
   double bin=0;
   
   for(int i=0; i<rep->len; i++) 
     if((!isprint((*rep)[i])) && (!isspace((*rep)[i]))) bin+=1.0;
   return (bin*100.0)/double(rep->len);
}


int string::searchReplace(const string& tsearch, const string& replace, 
			  bool ignore_case, bool whole_words,
			  bool preserve_case, int progress) {
   // get new length
   if(progress) {putc('S', stderr);fflush(stderr);}
   int num = search(tsearch, ignore_case, whole_words, progress);
   if(progress) {putc('R', stderr);fflush(stderr);}
   if(num==0) return 0;
   int newlen = rep->len + num*(replace.rep->len-tsearch.rep->len);

   // create new string 
   Rep *newrep = Rep::create(newlen);   
   const char *p = rep->data();  // read
   char *q =    newrep->data();  // write
   const char *r;                // found substring
   int mlen = rep->len;          // rest of read mem
   for(int i=0; i < num; i++) {
      if(progress>0) if((i%progress)==0) {putc('.', stderr);fflush(stderr);}
      r = bytesearch(p, mlen, tsearch, tsearch.rep->len, ignore_case, whole_words);
      memcpy(q, p, r-p); // add skipped part
      q += r-p;
      if(!preserve_case) { // add replaced part
	 memcpy(q, replace.rep->data(), replace.rep->len);
      } else {
	 string rep(preserveCase(string(r, tsearch.rep->len), replace.rep->data()));
	 memcpy(q, rep.rep->data(), rep.rep->len);
      }
      q += replace.rep->len;      
      mlen -= r-p;
      mlen -= tsearch.rep->len;
      p = r + tsearch.rep->len;
   }
   memcpy(q, p, mlen); // add rest
   replaceRep(newrep);
   rep->len = newlen;
   rep->terminate();
   return num;
}


int string::search(const string& pat, 
		   bool ignore_case, bool whole_words, int progress) const {
   if(!pat) {
      fprintf(stderr, "string::search() search pattern must not be emtpy!\n");
      exit(1);
   }
      
   int num=0;
   int mlen=rep->len;
   const char *q;
   for(const char *p=rep->data(); (q=bytesearch(p, mlen, pat, pat.rep->len, 
					ignore_case, whole_words)); num++) {
      mlen -= q-p;
      mlen -= pat.rep->len;
      p = q + pat.rep->len;
      if(progress>0) if((num%progress)==0) {putc('.', stderr);fflush(stderr);}
   }
   return num;
}


bool string::hasPrefix(const string& pref) const {
   if(pref.rep->len > rep->len) return false;
   return memcmp(rep->data(), pref.rep->data(), pref.rep->len)==0;
}


bool string::hasSuffix(const string& suf) const {
   if(suf.rep->len > rep->len) return false;
   return memcmp(rep->data() + (rep->len - suf.rep->len), 
		 suf.rep->data(), suf.rep->len)==0;
}


bool string::consistsOfSpace() const {
   for(int i=0; i<rep->len; i++) {
      if(!isspace((*rep)[i])) return false;
   }
   return true;
}


void string::truncate(int max) {
   if((unsigned int)max < (unsigned int)rep->len) {
      detach();
      rep->len = max;
      rep->terminate();
   }
}


string string::getFitWords(int max) {
   if(max<1) {
      fprintf(stderr, "string::getFitWords(int): max must be >=1 (was %d)\n", max);
      exit(1);
   }

   string r(*this); // return value
   
   // check for lf
   int lf = firstOccurence('\n');
   if((lf!=-1) && (lf<=max)) {
      r.truncate(lf);
      while(isspace((*rep)[lf])) lf++;
      operator=(operator()(lf, END)); 
      return r;
   }
   
   // string fits
   if(rep->len <= max) {
      empty();
      return r;
   }
   
   // find space
   int last_space = -1;
   int i;
   for(i=0; i <= max; i++) {
      if((*rep)[i] == ' ') last_space = i;
   }
   if(last_space==-1) last_space = max;
   
   // return 
   r.truncate(last_space);
   while(isspace((*rep)[last_space])) last_space++;
   operator=(operator()(last_space, END)); 
   return r;
}


void string::unquote(bool allow_bslash, bool crop_space) {
   detach();
   
   char *p=rep->data();
   char *q=rep->data();
   char quote=0;
   char *nonspace=rep->data();
   
   if(crop_space) while(isspace(*p)) p++;
   for(; *p; p++) {
      if(allow_bslash && *p=='\\') {
	 if(p[1] == quote) {
	    p++;
	    if(*p == 0) break;
	 }
      } else {
	 if(quote) {
	    if(*p == quote) {
	       quote = 0;
	       continue;
	    }
	 } else {
	    if((*p == '\'') || (*p == '\"')) {
	       quote = *p;
	       continue;
	    }
	 }	 
      }
      if(quote || (!isspace(*p))) nonspace = q;
      *(q++) = *p;
   }   
   *q = 0;
   if(crop_space) if(*nonspace) nonspace[1] = 0;
   rep->len = strlen(rep->data());   
}


bool string::readLine(FILE *file) {
   char buf[1024];
   
   empty();
   while(1) {	 
      buf[sizeof(buf)-2] = '\n';
      if(!fgets(buf, sizeof(buf), file)) break;
      operator+=(buf);
      if(buf[sizeof(buf)-2] == '\n') break;
   }
   if(rep->len) return true;
   else    return false;
}


int string::write(FILE *file) const {
   return fwrite(rep->data(), 1, rep->len, file);   
}


int string::read(FILE *file, int l) {
   if(l<0) {
      fprintf(stderr, "string::read(FILE*,int): len must be >=0 (was %d)!\n", l);
      exit(1);
   }

   rep->release();
   rep = Rep::create(l);
   int r = fread(rep->data(), 1, l, file);
   rep->len = r;
   rep->terminate();
   return r;
}


int string::readFile(const char *filename) {
   struct stat buf;

   if(stat(filename, &buf)) return -1; // does not exist
   FILE *f=fopen(filename, "rb");
   if(f==0) return -2;                 // no permission?
   int r = read(f, buf.st_size);
   fclose(f);
   if(r != buf.st_size) return -3;     // read error
   return 0;
}

   
void string::expandUnprintable(void) {
   Rep *newrep = Rep::create(rep->len*4);
   char *q = newrep->data(); // write
   char *p = rep->data();    // read
   int l=0;
   
   // expand each char
   for(int j=0; j < rep->len; ++j, ++p) {
      if(isprint(*p)) { // printable --> print
	 if(*p=='\\') { // backslashify backslash
	    *(q++) = '\\';	 
	    l++;	    
	 } 
	 *(q++) = *p;
	 l++;
      } else { // unprintable --> expand
	 *(q++) = '\\';	// leading backslash
	 l++;
	 switch(*p) {
#if 0
	  case '\a':
	    *(q++) = 'a';
	    l++;
	    break;
#endif
	  case '\b':
	    *(q++) = 'b';
	    l++;
	    break;
	  case '\f':
	    *(q++) = 'f';
	    l++;
	    break;
	  case '\n':
	    *(q++) = 'n';
	    l++;
	    break;
	  case '\r':
	    *(q++) = 'r';
	    l++;
	    break;
	  case '\t':
	    *(q++) = 't';
	    l++;
	    break;
	  case '\v':
	    *(q++) = 'v';
	    l++;
	    break;
	  default: // no single char control
	    uint i = (unsigned char)*p;
	    l+=3;
	    if(i<32) {  // print lower control octal
	       if(isdigit(p[1])) {
		  q += ::sprintf(q, "%03o", i);
	       } else {
		  q += ::sprintf(q, "%o", i);
		  if(i>=8) --l;
		  else l-=2;
	       }
	    } else {    // print octal or hex
	       if(isxdigit(p[1])) {
		  q += ::sprintf(q, "%03o", i);
	       } else {
		  q += ::sprintf(q, "x%02x", i);
	       }
	    }
	 }
      }
   }
   
   // end
   replaceRep(newrep);
   rep->len = l;
   rep->terminate();
}


void string::backslashify(void) {
   Rep *newrep = Rep::create(rep->len*2);
   char *p = rep->data();
   char *q = newrep->data();
   int l=0;
   
   // backslashify each char
   for(int i=0; i<rep->len; i++, p++) {
      switch(*p) {
       case '\\':
	 *(q++) = '\\';
	 *(q++) = '\\';
	 l+=2;
	 break;
       case '\'':
	 *(q++) = '\\';
	 *(q++) = '\'';
	 l+=2;
	 break;
       case '\"':
	 *(q++) = '\\';
	 *(q++) = '\"';
	 l+=2;
	 break;
       default:
	 *(q++) = *p;
	 l++;
	 break;
      }
   }
   
   // end
   replaceRep(newrep);
   rep->len = l;
   rep->terminate();
}


void string::compileCString(void) {
   detach();

   char *p = rep->data(); // read
   char *q = rep->data(); // write
   char c;                // tmp char
   int l=0;               // write
   int i=0;               // read
   
   while(i < rep->len) {
      c = *(p++); // read char
      i++;
      if(c == '\\') { // compile char
	 if(i>=rep->len) break;
	 c = *(p++);
	 i++;
	 switch(c) {
#if 0
	  case 'a':
	    c = '\a';
	    break;
#endif
	  case 'b':
	    c = '\b';
	    break;
	  case 'f':
	    c = '\f';
	    break;
	  case 'n':
	    c = '\n';
	    break;
	  case 'r':
	    c = '\r';
	    break;
	  case 't':
	    c = '\t';
	    break;
	  case 'v':
	    c = '\v';
	    break;
	  case 'x': // hex
	    char *q;
	    c = strtol(p, &q, 16);
	    i += q-p;
	    p = q;
	    break;	    
	  case '0': // octal
	  case '1':
	  case '2':
	  case '3':
	  case '4':
	  case '5':
	  case '6':
	  case '7':
	    char buf[4];
	    buf[0] = c;
	    buf[1] = *p;
	    buf[2] = (i < rep->len) ? p[1] : 0;
	    buf[3] = 0;
	    char *t;
	    c = strtol(buf, &t, 8);
	    i += (t-buf)-1;
	    p += (t-buf)-1;
	    break;	    
	 }	 
      } 
      *(q++) = c; // write char
      l++;
   }
   rep->len = l;
   rep->terminate();
}


void string::cropSpace(void) {
   int first = rep->len;
   int last = 0;
   int i;
   
   // get first nonspace
   for(i=0; i < rep->len; ++i) 
     if(!isspace((*rep)[i])) {
	first = i;
	break;
     }
   
   // full of spaces   
   if(first == rep->len) {
      empty();
      return;
   }
   
   // get last nonspace
   for(i = rep->len - 1; i >= first; --i) 
     if(!isspace((*rep)[i])) {
	last = i;
	break;
     }
   ++last;
   
   // truncate
   if(first == 0) {
      truncate(last);
      return;
   }
     
   // extract substring
   operator=(operator()(first, last));   
}


void string::collapseSpace(void) {
   detach();
   
   char *p = rep->data(); // read
   char *q = rep->data(); // write
   char last_char = ' ';
   int l=0;               // length
   char c;
   
   for(int i=0; i < rep->len; ++i, ++p) {
      if((!isspace(*p)) || (!isspace(last_char))) {
	 c = *p;
	 if(isspace(c)) c=' ';
	 *(q++) = c;
	 last_char = c;
	 l++;
      }
   }
   if(isspace(last_char)&&(l>0)) --l;
   rep->len = l;
   rep->terminate();
}


void string::translateChar(char from, char to) {
   detach();   
   char *p = rep->data();   
   for(int i=0; i < rep->len; ++i, ++p)
     if(*p == from) *p = to;
}


int string::firstOccurence(char c) const {
   int i;
   
   for(i=0; (i < rep->len) && ((*rep)[i] != c); ++i);
   if(i < rep->len) return i;
   else return -1;
}



// non member  implementation


TArray<string> split(const string &s, const char *sep, bool allow_quoting,
		     bool crop_space) {
   TArray<string> r;
   int i=0;
   TArray<char> buf;
   const char *p = s;
   p--; // bias
   
   do {
      // next chunk
      p++;	  
      
      // collect chars to buf
      while(*p) {
	 if(strchr(sep, *p)) {
	    break;
	 } else	if(!allow_quoting) {
	    buf += *(p++);	    
	 } else if(*p=='\\') {
	    p++;
	    if(strchr(sep, *p)==0) buf += '\\';
	    if(*p) buf += *(p++);
	 } else if(*p=='\'') {
	    buf += '\'';
	    for(p++; *p && *p!='\''; p++) {
	       if(*p=='\\') {
		  p++;
		  buf += '\\';
		  if(*p) buf += *p;
	       } else 
		 buf += *p;
	    }
	    buf += '\'';
	    if(*p=='\'') p++;
	 } else if(*p=='\"') {
	    buf += '\"';
	    for(p++; *p && *p!='\"'; p++) {
	       if(*p=='\\') {
		  p++;
		  buf += '\\';
		  if(*p) buf += *p;
	       } else 
		 buf += *p;
	    }
	    buf += '\"';
	    if(*p=='\"') p++;
	 } else {
	    buf += *(p++);
	 }
      }
      
      // put buf to r
      buf+='\0';
      r[i] = buf.data();
      if(crop_space) r[i].cropSpace();
      i++;
      
      // cleanup
      buf.empty();
   } while(*p);
   
   r.fixedSize();
   return r;
}


string join(const TArray<string>& a, const string& sep) {
   string r;
   
   if(a.isEmpty()) return r;
   else r = a[0];   
   for(int i = 1; i < a.num(); i++) {
      r += sep;
      r += a[i]; 
   }
   return r;
}


string preserveCase(const string& from, const string& to) {
   string r(to);
   
   if(from.len() == to.len()) { 
      // same len
      for(int i=0; i < r.len(); i++) {
	 if(islower(from[i])) r[i] = tolower(r[i]);
	 else if(isupper(from[i])) r[i] = toupper(r[i]);
      }
   } else {   
      // some heuristics
      if(from.isLower()) r.lower();
      if(from.isUpper()) r.upper();
      if(from.isCapitalized()) r.capitalize();
   }
   
   return r;
}


TArray<string> loadTextFile(const char *fname) {
   FILE *f = fopen(fname, "r");
   if(f==0) {
      fprintf(stderr, "LoadTextFile: error while opening file '%s' for reading!\n", fname);
      exit(1);
   }
   TArray<string> r;
   for(int i=0; r[i].readLine(f); i++);
   fclose(f);
   r.killLastElement();
   r.fixedSize();
   return r;
}


TArray<string> loadTextFile(FILE *file) {
   TArray<string> r;
   for(int i=0; r[i].readLine(file); i++);
   r.killLastElement();
   r.fixedSize();
   return r;
}


// global dump implementation
void dump(const string& s) {
   string a(s);
   a.expandUnprintable();
   printf("\"%s\"", *a);
}

void dump(FILE *f, const string& s) {
   string a(s);
   a.expandUnprintable();
   fprintf(f, "\"%s\"", *a);
}







