/* autostr-func.c -- implementation for additional string functions in libretto
 *
 * Aaron Crane <aaronc@pobox.com>
 * 11 August 1997
 * 26 August 1997: whoops, need to move the inline funcs above their callers
 * 26 February 1998
 *
 * This file is part of Libretto, a library of useful functions.
 * Libretto is Copyright  1996, 1997, 1998 Aaron Crane <aaronc@pobox.com>.
 *
 * This library is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Library General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at your
 * option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
 * License for more details.
 *
 * You should have received a copy of the GNU Library General Public License
 * along with this library; if not, write to the Free Software Foundation,
 * Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include <config.h>
#include <libretto/libretto.h>
#include <libretto/autostr.h>
#include <structs.h>
#include <autostr-priv.h>
#include <fmt-scan-priv.h>

#include <ctype.h>
#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <stdlib.h>

inline static ssize_t
s_getdelim (Autostr *str, FILE *file, int idelim)
{
    ssize_t offset;
    int c;
    char delim = (char) idelim;

    /* initialise buffer */
    offset = 0;
    s_zero (str);

    for (;;)
    {
	c = getc (file);

	if (c == delim)
	    /* end of record: NUL-terminate, and return length */
	    return s_terminate (str, offset);

	if (c == 0)		/* silently drop NULs */
	    continue;

	if (c == EOF)
	{
	    if (offset == 0)
		/* If this is the first char read on this call (except
		 * possibly some ignored NUL chars), then return EOF */
		return EOF;
	    /* Else unexpected EOF: NUL-terminate buffer, return length */
	    return s_terminate (str, offset);
	}

	/* A normal character: add it to the Autostr */
	if (s_addc (str, c, offset) == -1)
	{
	    s_terminate (str, offset);
	    errno = ENOMEM;
	    return EOF;
	}
	offset++;
    }
}

ssize_t
astr_getline (Autostr *str, FILE *file)
{
    assert (str);
    assert (file);

    return s_getdelim (str, file, '\n');
}

ssize_t
astr_getdelim (Autostr *str, FILE *file, int delim)
{
    assert (str);
    assert (file);

    return s_getdelim (str, file, delim);
}

void
astr_destroy_all (Autostr *str, ...)
{
    va_list va;
    Autostr *p;

    va_start (va, str);
    for (p = str;  p;  p = va_arg (va, Autostr *))
    {
	s_final (str);
	mem_free (str);
    }
    va_end (va);
}

int
astr_copy_from_abuf (Autostr *str, const Autobuf *ab)
{
    const unsigned char *ap, *ab_end = ab->data + ab->length;
    char *sp = str->s;
    ssize_t n = 0;
    int i;

    assert (str);
    assert (ab);

    for (ap = ab->data;  ap < ab_end;  ap++)
	if (*ap != 0)
	    n++;

    i = s_realloc (str, s_chunkof (n + 1));
    if (n >= str->alloced && i == -1)
	/* did the grow fail? */
	return -1;

    str->length = n;
    for (ap = ab->data;  ap < ab_end;  ap++)
	if (*ap != 0)
	    *sp++ = *ap;
    *sp = 0;

    return 0;
}

/*
 * Case fiddling
 */

void
astr_upcase (Autostr *str)
{
    char *p;

    assert (str);

    for (p = str->s;  *p;  p++)
	*p = toupper (*p);
}

void
astr_downcase (Autostr *str)
{
    char *p;

    assert (str);

    for (p = str->s;  *p;  p++)
	*p = tolower (*p);
}

void
astr_flipcase (Autostr *str)
{
    char *p;

    assert (str);

    for (p = str->s;  *p;  p++)
    {
	if (isupper (*p))
	    *p = tolower (*p);
	else if (islower (*p))
	    *p = toupper (*p);
    }
}

void
astr_capitalise (Autostr *str)
{
    char *p;

    assert (str);

    p = str->s;
    while (1)
    {
	if (*p == 0)
	    break;
	while (*p && !isalpha (*p))
	    p++;
	if (*p == 0)
	    break;
	*p = toupper (*p);
	p++;
	while (*p && isalpha (*p))
	{
	    *p = tolower (*p);
	    p++;
	}
    }
}

void
astr_upcase_initials (Autostr *str)
{
    char *p;

    assert (str);

    p = str->s;
    while (1)
    {
	if (*p == 0)
	    break;
	while (*p && !isalpha (*p))
	    p++;
	if (*p == 0)
	    break;
	*p = toupper (*p);
	p++;
	while (*p && isalpha (*p)) /* assume isalpha('\0') == 0 */
	    p++;
    }
}


/* Some more searching functions */

ssize_t
astr_find_nmatch (const Autostr *astr, ssize_t offset, const Autostr *skipset)
{
    ssize_t span;

    assert (astr);
    assert (skipset);

    offset++;
    if (offset < 0)
	offset = 0;
    assert (offset <= astr->length);

    span = strspn (astr->s + offset, skipset->s);
    return (span == 0) ? -1 : offset + span;
}

ssize_t
astr_find_nmatch_s (const Autostr *astr, ssize_t offset, const char *skipset)
{
    ssize_t span;

    assert (astr);
    assert (skipset);

    offset++;
    if (offset < 0)
	offset = 0;
    assert (offset <= astr->length);

    span = strspn (astr->s + offset, skipset);
    return (span == 0) ? -1 : offset + span;
}

ssize_t
astr_find_nmatch_f (const Autostr *astr, ssize_t offset, Astr_istype_f skip)
{
    const char *p;
    ssize_t span;

    assert (astr);
    assert (skip);

    offset++;
    if (offset < 0)
	offset = 0;
    assert (offset <= astr->length);

    p = astr->s + offset;
    while (*p && skip (*p))
	p++;

    span = p - astr->s;
    return (span == offset) ? -1 : span;
}

ssize_t
astr_find_match (const Autostr *astr, ssize_t offset, const Autostr *stopset)
{
    ssize_t span;

    assert (astr);
    assert (stopset);

    offset++;
    if (offset < 0)
	offset = 0;
    assert (offset <= astr->length);

    span = strcspn (astr->s + offset, stopset->s);
    return (span == 0) ? -1 : offset + span;
}

ssize_t
astr_find_match_s (const Autostr *astr, ssize_t offset, const char *stopset)
{
    ssize_t span;

    assert (astr);
    assert (stopset);

    offset++;
    if (offset < 0)
	offset = 0;
    assert (offset <= astr->length);

    span = strcspn (astr->s + offset, stopset);
    return (span == 0) ? -1 : offset + span;
}

ssize_t
astr_find_match_f (const Autostr *astr, ssize_t offset, Astr_istype_f skip)
{
    const char *p;
    ssize_t span;

    assert (astr);
    assert (skip);

    offset++;
    if (offset < 0)
	offset = 0;
    assert (offset <= astr->length);

    p = astr->s + offset;
    while (*p && !skip (*p))
	p++;

    span = p - astr->s;
    return (span == offset) ? -1 : span;
}

void
astr_translit_c (Autostr *astr, int from_int, int to_int)
{
    char *p, from, to;

    assert (astr);
    assert (from_int > 0);
    assert (to_int > 0);

    from = from_int;
    to = to_int;

    for (p = astr->s;  *p;  p++)
	if (*p == from)
	    *p = to;
}

inline static int
looking_at_range (const char *p)
{
    /* assume *p is non-zero */
    return p[1] == '-' && p[2] && (unsigned char) p[2] > (unsigned char) p[0];
}

inline static void
get_set (char **p, const char **set)
{
    char i, max;

    /* assume **set is non-zero */
    if (!looking_at_range (*set))
    {
	**p = **set;
	++*p;
	++*set;
    }
    else			/* found a range */
    {
	max = (*set)[2];
	for (i = **set;  i <= max;  i++)
	{
	    **p = i;
	    ++*p;
	}
	*set += 3;
    }
}

inline static void
build_trans (unsigned char *trans, const char *fromset, const char *toset)
{
    size_t i;
    const char *set;
    char *pfrom, *pto, *p;
    char from[UCHAR_MAX + 1], to[UCHAR_MAX + 1];

    p = from;
    set = fromset;
    while (*set)
	get_set (&p, &set);
    *p = 0;

    p = to;
    set = toset;
    while (*set)
	get_set (&p, &set);
    *p = 0;

    for (i = 0;  i <= UCHAR_MAX;  i++)
	trans[i] = i;

    pfrom = from;
    pto = to;

    while (*pfrom)
    {
	trans[(unsigned char) *pfrom++] = *pto++;
	if (*pto == 0)
	    pto--;
    }
}

void
astr_translit (Autostr *astr, const Autostr *fromset, const Autostr *toset)
{
    unsigned char trans[UCHAR_MAX + 1];
    char *p;

    assert (astr);
    assert (fromset);
    assert (toset);

    if (astr->s[0])
	/* Don't bother building it if we won't be doing anything */
	build_trans (trans, fromset->s, toset->s);

    for (p = astr->s;  *p;  p++)
	*p = trans[(unsigned char) *p];
}

void
astr_translit_s (Autostr *astr, const char *fromset, const char *toset)
{
    unsigned char trans[UCHAR_MAX];
    char *p;

    assert (astr);
    assert (fromset);
    assert (toset);

    if (astr->s[0])
	/* Don't bother building it if we won't be doing anything */
	build_trans (trans, fromset, toset);

    for (p = astr->s;  *p;  p++)
	*p = trans[(unsigned char) *p];
}

void
astr_translit_f (Autostr *astr, Astr_istype_f matches, Astr_totype_f translit)
{
    char *p;
    char c;

    assert (astr);
    assert (matches);
    assert (translit);

    for (p = astr->s;  *p;  p++)
	if (matches (*p))
	{
	    c = translit (*p);
	    if (c)
		*p = c;
	}
}


/* A brief commentary on astr_esc_char():
 *
 * Allowed sequences:
 *
 * \a, \b, \e, \d (DEL), \e (ESC), \f, \n, \r, \t, \v
 *
 * \M-<char>, where <char> is a normal char or an esc_char sequence.  Here,
 * the meta bit (0200) is set in <char> and the resulting char is returned.
 *
 * \C-<char> (or its equivalent, \^<char>), where <char> is a normal char or
 * an esc_char sequence.  Here, the low 5 bits of <char> are ORed with the
 * meta bit of <char> and the resulting char is returned.  (That is, if the
 * char following the \C- is a meta char, then so is the resulting char.)
 * In addition, if the char following the \C- is a question mark, then as a
 * special case, ASCII DEL (0177) is returned.  Note that \C-\M-? will not
 * give M-DEL; for that you need to say \M-\C-? or \M-\d.
 *
 * \<ddd>, where <ddd> is a sequence of no more than three octal digits.
 *
 * \x<hh>, where <hh> is a sequence of no more than two hex digits.
 *
 * Any other char is returned as is.
 *
 * If the string at OFFSET is not ESC_CHAR, then the character is returned
 * as is.
 */

/*
 * inlined helper functions
 */

inline static char
get (const Autostr *str, ssize_t *offset)
{
    return *(str->s + (*offset)++);
}

inline static void
unget (ssize_t *offset)
{
    *offset -= 1;
}

inline static char
peek (const Autostr *str, const ssize_t *offset, ssize_t n)
{
    return *(str->s + *offset + n);
}

/* This function calls esc_meta and esc_control, both of which are inlined.
 * They also call esc_char co-recursively.  We need a prototype here: when
 * they call esc_char, they actually have to do a function call. */
inline static int esc_char (const Autostr *str, ssize_t *offset, int backslash);

inline static int
esc_meta (const Autostr *str, ssize_t *offset, int backslash)
{
    int c;

    c = esc_char (str, offset, backslash);
    if (c < 0)
	c = -1;			/* error */
    else
	c |= 0200;	/* set meta bit */
    return c;
}

inline static int
esc_control (const Autostr *str, ssize_t *offset, int backslash)
{
    int c;

    c = esc_char (str, offset, backslash);
    if (c < 0)
	c = -1;
    else if (c == '?')		/* "^?" == DEL */
	c = 0177;
    else
	c &= 0237;		/* meta bit and low 5 bits */
    return c;
}

inline static int
octal_p (int c)
{
    return isdigit (c) && c != '8' && c != '9';
}

/* Assumes C is a digit in the right range (oct, dec, hex).  This
 * implementation is slow, but portable and clear */
inline static int
char_to_int (int c)
{
    /* this is static (== bad for multi-threading) but it is const *const,
     * so it should be ok. */
    static const char *const xdigits = "0123456789ABCDEF";

    return strchr (xdigits, toupper (c)) - xdigits;
}

inline static int
esc_octal (const Autostr *str, ssize_t *offset)
{
    int i = 0, n = 0;

    while (octal_p (peek (str, offset, 0)))
    {
	if (++n > 3)
	    break;
	i *= 8;
	i += char_to_int (get (str, offset));
    }
    return i;
}

inline static int
esc_hex (const Autostr *str, ssize_t *offset)
{
    int i = 0, n = 0;

    while (isxdigit (peek (str, offset, 0)))
    {
	if (++n > 2)
	    break;
	i *= 16;
	i += char_to_int (get (str, offset));
    }
    return i;
}

inline static int
esc_char (const Autostr *str, ssize_t *offset, int ibackslash)
{
    char c;
    char backslash = (char) ibackslash;

    c = get (str, offset);
    if (c != backslash)
	return c;
    else
    {
	/* If we get a backslash at the end of a string, just return a backslash */
	if (peek (str, offset, 0) == 0)
	    return backslash;

	c = get (str, offset);

	switch (c)
	{
	case 'a':
	    return '\a';
	case 'b':
	    return '\b';
	case 'D':
	    return '\177';
	case 'e': case 'E':
	    return '\033';
	case 'f':
	    return '\f';
	case 'n':
	    return '\n';
	case 'r':
	    return '\r';
	case 't':
	    return '\t';
	case 'v':
	    return '\v';
	case '^':
	    if (peek (str, offset, 0) == 0)
		return '^';
	    else
		return esc_control (str, offset, backslash);
	case 'C':
	    if (peek (str, offset, 0) != '-' || peek (str, offset, 1) == 0)
		return 'C';
	    else
	    {
		get (str, offset); /* skip the `-' */
		return esc_control (str, offset, backslash);
	    }
	case 'M':
	    if (peek (str, offset, 0) != '-' || peek (str, offset, 1) == 0)
		return 'M';
	    else
	    {
		get (str, offset); /* skip the `-` */
		return esc_meta (str, offset, backslash);
	    }
	case '0': case '1': case '2': case '3':
	case '4': case '5': case '6': case '7':
	    /* octal number */
	    unget (offset);	/* unget the leading (possibly non-zero) digit */
	    return esc_octal (str, offset);
	case 'x':		/* hex number */
	    return esc_hex (str, offset);
	default:
	    return c;
	}
    }
}


/*
 * Public functions
 */

int
astr_esc_char (const Autostr *str, ssize_t *offset)
{
    assert (str);
    assert (str->s);
    assert (*offset >= 0);
    assert (*offset < str->length);
    assert (peek (str, offset, 0) != 0);

    return esc_char (str, offset, '\\');
}

int
astr_esc_char_c (const Autostr *str, ssize_t *offset, int backslash)
{
    assert (str);
    assert (str->s);
    assert (*offset >= 0);
    assert (*offset < str->length);
    assert (peek (str, offset, 0) != 0);
    assert (backslash != 0);

    return esc_char (str, offset, backslash);
}
