/* $Id: expand.c,v 1.14 2000/05/15 16:45:56 malekith Exp $ */

#include "h.h"

char *interpret_var(char *name);
extern char **positional, *ifs;

static void do_backtick(const char **pp, char **vv, int quote_spaces)
{
	const char *p, *s;
	char *v;
	int fd[2];
	char buf[4096 + 1];
	int pid, n;
	
	s = p = *pp;
	v = *vv;
	
	while (*p && *p != '`')
		p++;
	
	if (!*p) {
		perr("no closing backtick '%s'",s);
		goto croak;
	}
	
	if (pipe(fd)) {
		perror("pipe");
		goto croak;
	}
	
	pid = fork();
	
	if (pid == 0) {
		a_align(a_arg);
		close(fd[0]);
		dup2(fd[1],1);
		exec(strndup(s, p-s), 1);
		/* no ret */
	}
	
	close(fd[1]);
	for (;;) {
		char *p;
		
		n = read(fd[0], buf, sizeof(buf) - 1);
		if (n <= 0 && errno != EINTR)
			break;
		if (n <= 0) continue;
		
		buf[n] = 0;
		for (p = buf; *p; p++) {
			/* quote non alphanumerics */
			if (!isalnum(*p) && (quote_spaces || !issep(*p))) { 
				*v++ = '\\'; 
				a_alloc(a_arg, 1); 
			}
			*v++ = *p;
			a_alloc(a_arg, 1);
		}
		
	}
	close(fd[0]);
	p++;
	
	xwait(pid);
	
	while (*vv != v)
	 	if (*--v == '\n')
			*v = 0;
		else { v++; break; }
	if (v > *vv)
		*vv = v;
	*pp = p;
	return;
	
croak:
	*pp = p;
	return;
}

static void add_string(const char *sub, char **vv, int quote_spaces)
{
	char *v;
	v = *vv;
	
	for (; *sub; sub++) {
		/* quote non alphanumerics */
		if (!isalnum(*sub) && (quote_spaces || !issep(*sub))) { 
			*v++ = '\\'; 
			a_alloc(a_arg, 1); 
		}
		*v++ = *sub;
		a_alloc(a_arg, 1);
	}
	
	*vv = v;
}

static int expand_var(const char **pp, char **vv, int quote_spaces)
{
	const char *p;
	char *sub;
	char *name, *nn;
	int ret = 0;
	
	p = *pp;
	
	a_push_state(0);
	
	nn = name = alloc(2);
	
	if (*p == '{') {
		p++;
		while (*p && *p != '}') {
			*name++ = *p++;
			alloc(1);
		}
		if (*p == '}')
			p++;
	} else {
		alloc(1);
		if (strchr("!#$-?0123456789*@", *p))
			*name++ = *p++;
		else
			while (isalnum(*p) || *p == '_') {
				*name++ = *p++;
				alloc(1);
			}
	}
	*name = 0;
	align();
	if (name == nn) {
		ret = 1;
		goto see_ya;
	}
	name = nn;
	
	*pp = p;
	
	/* well, this may look strange, but that's the way i understood
	 * ksh(1) and bash(1) manuals */
	if (*name == '@' || *name == '*') {
		char **p = positional;
		
		p++;
		while (*p) {
			add_string(*p++, vv, quote_spaces);
			if (*p && ifs[0]) {
				if (quote_spaces && *name == '*') {
					a_alloc(a_arg, 2);
					*(*vv)++ = '\\';
				} else
					a_alloc(a_arg, 1);
				*(*vv)++ = ifs[0];
			}
		}
	} else { 
		sub = interpret_var(name);
		if (!sub) {
			ret = -1;
			goto see_ya;
		}
		add_string(sub, vv, quote_spaces);
	}
	*(*vv) = 0;

see_ya:	
	a_pop_state(0);
	return ret;
}

#define M_PLAIN			1
#define M_SINGLE_QUOTE		2
#define M_DOUBLE_QUOTE		3

static int switch_mode(int mode, const char *p, char **v)
{
	static char *last_quote;
	static const char *ps;
	int m;
	
	if (mode == -1) { 
		last_quote = 0; 
		ps = p;
		return M_PLAIN; 
	}

	m = *p == '"' ? M_DOUBLE_QUOTE : M_SINGLE_QUOTE;
	
	/* plain */
	if (mode != M_PLAIN && m != mode)
		return mode;
	
	if (mode == M_PLAIN) {
		if (p != ps && issep(p[-1]))
			last_quote = *v;
		return -m;
	} else {
		/* if we didn't advance since last_quote, and we're followed 
		 * by a word seperator let know parse(), that there was an
		 * empty word */
		if (last_quote == *v && 
		    (issep(p[1]) || p[1] == 0)) {
			a_alloc(a_arg, 2);
			*(*v)++ = '\'';
			*(*v)++ = 0;
			last_quote = 0;
		}
		return -M_PLAIN; 
	}
}

#define PUTC(a) do { *v++ = a; a_alloc(a_arg, 1); } while (0)

static int x_putword(const char **pp, char **vv, int copy, int sc)
{
	const char *p;
	char *v;
	p = *pp;
	v = *vv;
	
	if (!v)
		copy = 0;
		
	while (*p && !issep(*p) && *p != '\n' && *p != ';') {
		if (!isalnum(*p)) {
			perr("illegal character `%i' in token near `%s'", 
			     *p, *pp);
			return -1;
		}
		if (copy || sc)
			PUTC(*p++);
		else
			p++;
	}
	
	while (issep(*p) && *p != '\n')
			p++;
		
	if (copy || sc)
		PUTC(ifs[0]);
	
	*pp = p;
	*vv = v;
	return 0;
}

static int do_the_bos_thing(const char **pp, char **vv, int sc);

static int cmp(const char *p, const char *cmd)
{
	int l = cmd ? strlen(cmd) : 0;
	return cmd && p && strncmp(p, cmd, l) == 0 && 
		(issep(p[l]) || p[l] == '\n' || p[l] == ';');
}

/* TODO: ``;'' from beg of list should be removed, and ``;'' as first
 * char ought to be treated as error in parse() */
static int skip_list_till(const char **pp, char **vv, const char *t1,
 		   	  const char *t2, const char *t3, int sc)
{
	const char *p;
	char *v;
	int mode, bos = 1;
	
	p = *pp;
	v = *vv;
	
	mode = M_PLAIN;
	
	if (!sc) {
		a_alloc(a_arg, 2);
		*v++ = ' ';
		*v++ = '\'';
	}
	
WIZARD(4, "slt: '%s' '%s|%s|%s'\n", p, t1, t2, t3);
	
restart:	
	for (; *p; p++) {
		if (bos) {
			if (cmp(p, t1) || cmp(p, t2) || cmp(p, t3))
				goto the_end;
			if (strchr("{fiwuc", *p)) {
WIZARD(4, "dtbt calling: <%s> <%s> <%s> %d\n",p,v-5,*vv,v-*vv);
				if (do_the_bos_thing(&p, &v, sc+1))
					return -1;
WIZARD(4, "dtbt called: <%s> <%s> <%s> %d\n",p,v-5,*vv,v-*vv);
			}
		}
		if (mode == M_PLAIN && (*p == ';' || *p == '\n'))
			bos = 1;
		else if (mode != M_PLAIN || !issep(*p))
			bos = 0;
		
		/* special case for t1 == 0 -- which means `put one word 
		 * only' */
		if (t1 == 0 && mode == M_PLAIN && issep(*p)) {
			while (issep(*p) && *p != '\n')
				p++;
			goto the_end;
		}
			
		switch (*p) {
		case 0:
			p--;
			break;
		case '\'':
			if (mode == M_PLAIN)
				mode = M_SINGLE_QUOTE;
			else if (mode == M_SINGLE_QUOTE)
				mode = M_PLAIN;
			goto plain;
		case '"':
			if (mode == M_PLAIN)
				mode = M_DOUBLE_QUOTE;
			else if (mode == M_DOUBLE_QUOTE)
				mode = M_PLAIN;
			goto plain;
		case '\\':
			if (p[1])
				PUTC(*p++);
			goto plain;
		case '#':
			if (!isatty && mode == M_PLAIN) {
				p = strchr(p, '\n');
				if (p && p[1])
					goto plain;
				else
					p = " "; /* cause loop to stop */
			} else
				goto plain;
			break;
		default:
		plain:
			PUTC(*p); 
			break;
		}
	}
	
	p = getline(1);	/* get a line with '> ' prompt */
	if (!p) {
		perr("unterminated %s.", 
		     mode == M_PLAIN ? "control structure" : "quote");
		return -1;
	}
/*	PUTC('\n'); what was it for ???? */
	bos = 1;
	goto restart;

the_end:

	if (!sc) {
#if 0	/* what was it for ???? */
		while (issep(*--v) && *v != '\n')
			/* nothing */ ;
		if (*v != ';')
			v++;
#endif
		a_alloc(a_arg, 3);
		*v++ = 0;
		*v++ = ' ';
		*v = 0;
WIZARD(4, "slt: finished <%s>\n", v - 5);
	} else {
		*v = 0;
		WIZARD(4, "slt: finished sc=%d <%s>\n", sc, v - 5);
	}
	
	*pp = p;
	*vv = v;
	
	return 0;
}

/* case 'unexp word' 'patterns' */
static int x_case(const char **pp, char **vv, int sc)
{
	const char *end;
	
	if (x_putword(pp, vv, 1, sc))	/* case */
		return -1;
		
	if (skip_list_till(pp, vv, 0, 0, 0, sc)) /* $foo or sth */
		return -1;
		
	if (cmp(*pp, "in")) {
		if (x_putword(pp, vv, 0, sc)) 	/* in */
			return -1;
		end = "esac";
	} else if (cmp(*pp, "{")) {
		if (x_putword(pp, vv, 0, sc)) 	/* { */
			return -1;
		end = "}";
	} else {
		perr("case: unexpected ``%s''", *pp);
		return -1;
	}
	if (skip_list_till(pp, vv, end, 0, 0, sc))
		return -1;
	if (x_putword(pp, vv, 0, sc)) 	/* } / esac */
		return -1;
	return 0;
}

/* for 'var' 'list of words' 'commands' */
static int x_for(const char **pp, char **vv, int sc)
{
	const char *end;
	
	if (x_putword(pp, vv, 1, sc)) 	/* for */
		return -1;
	if (x_putword(pp, vv, 1, sc)) 	/* foo */
		return -1;
	
	if (cmp(*pp, "in")) {
		if (x_putword(pp, vv, 0, sc)) 	/* in */
			return -1;
		/* TODO: it's not realy list, ``for x in a ; b ; do'' should
		 * be parse error, it's now treated as 
		 * ``for x in a ';' b ; do'' */
		if (skip_list_till(pp, vv, "{", "do", 0, sc))
			return -1;
	} else {
		/* default: for i; == for i in "$@"; */
		if (*vv) {
			a_alloc(a_arg, 8);
			strcpy(*vv, " '\"$@\"' ");
			*vv += 8;
		}
		if (**pp == ';')
			(*pp)++;
		while (issep(**pp))
			(*pp)++;
		if (cmp(*pp, "do") == 0 && cmp(*pp, "{") == 0) {
			perr("unexpected ``%s''", *pp);
			return -1;
		}
	}
		
	end = **pp == '{' ? "}" : "done";
	if (x_putword(pp, vv, 0, sc)) 	/* {, do */
		return -1;
	
	if (skip_list_till(pp, vv, end, 0, 0, sc))
		return -1;

	if (x_putword(pp, vv, 0, sc)) 	/* }, done */
		return -1;
	
	return 0;
}

/* { 'commands' */
static int x_b(const char **pp, char **vv, int sc)
{
	if (x_putword(pp,vv, 1, sc)) 
		return -1;
	if (skip_list_till(pp, vv, "}", 0, 0, sc))
		return -1;
	if (x_putword(pp,vv, 0, sc)) 
		return -1;
	return 0;
}

/* while 'condition' 'commands' */
static int x_while(const char **pp, char **vv, int sc)
{
	if (x_putword(pp,vv, 1, sc)) 
		return -1;
	if (skip_list_till(pp, vv, "do", 0, 0, sc))
		return -1;
	if (x_putword(pp,vv, 0, sc)) 
		return -1;
	if (skip_list_till(pp, vv, "done", 0, 0, sc))
		return -1;
	if (x_putword(pp,vv, 0, sc)) 
		return -1;
	return 0;
}

/* if list1 ; then list2 ; elif list3 ; then ; list4 ; else ; list5 ; fi
 * gets expanded to:
 * if 'list1' 'list2' elif 'list3' 'list4' else 'list5' fi
 */
static int x_if(const char **pp, char **vv, int sc)
{
	int mode = 2;

	while (mode) {
		if (x_putword(pp, vv, 1, sc)) 
			return -1;
		if (mode == 2) {
			if (skip_list_till(pp, vv, "then", 0, 0, sc))
				return -1;
			if (x_putword(pp, vv, 0, sc)) 
				return -1;
		}

		if (skip_list_till(pp, vv, "fi", "elif", "else", sc))
			return -1;

		if (**pp == 'f')
			mode = 0;	/* fi -- fin */
		else if ((*pp)[2] == 's') {
			if (mode == 1) {
				perr("else after else");
				return -1;
			} else
				mode = 1;	/* else */
		} else if (mode == 1) {
			perr("elif after else");
			return -1;
		} else
			mode = 2;	/* elif */

		if (x_putword(pp, vv, 1, sc)) 
			return -1;
	}
	return 0;
}

static int do_the_bos_thing(const char **pp, char **vv, int sc)
{
	switch (**pp) {
	case 'f':
		if (cmp(*pp, "for"))
			return x_for(pp, vv, sc);
		break;
	case 'c':
		if (cmp(*pp, "case"))
			return x_case(pp, vv, sc);
		break;
	case '{':
		if (cmp(*pp, "{"))
			return x_b(pp, vv, sc);
		break;
	case 'i':
		if (cmp(*pp, "if"))
			return x_if(pp, vv, sc);
		break;
	case 'w':
	case 'u':
		if (cmp(*pp, "while") || cmp(*pp, "until"))
			return x_while(pp, vv, sc);
		break;
	}
	return 0;
}

/* expands $, ``, eats `#' and changes all quotes ('"\) into \ style quoting */
/* TODO: we possibly should expand $ first and then `` */
/* note that this parser cannot produce certain chars unquoted, including 
 * \' and \" .  so unqouted \' is used to mark verbatim words to next level
 * parser 
 * uses a_arg stack for output. 
 * also looks for control commands, and quotes next items, so that
 * while true do echo; echo; done
 * becomes
 * while 'true\0 'echo; echo;\0
 * specifical grammar for commands is given in each x_XXX where XXX is command
 */
char *expand(const char *p)
{
	char *ret, *v;
	int mode, bos = 1;
	
WIZARD(8, "expin: \"%s\"\n",p);
	mode = switch_mode(-1, p, 0);
	
	v = ret = a_alloc(a_arg, 2);

restart:	
	for (; *p; p++) {
		if (mode == M_PLAIN && (*p == ';' || *p == '\n'))
			bos = 1;
		if (bos && strchr("{fiwuc", *p)) {
			if (do_the_bos_thing(&p, &v, 0))
				return 0;
		}
		if (mode != M_PLAIN || !issep(*p))
			bos = 0;
		switch (*p) {
		case 0:
			p--;	/* somthing went wrong, we don't want to
			         * pass end of buffer (in ``for (; *p; p++)'')
				 */
			break;
		case '$':
			if (mode == M_SINGLE_QUOTE)
				goto plain;
			else {
				int r;
				
				p++;
				r = expand_var(&p, &v, mode == M_DOUBLE_QUOTE);
				p--;
				if (r == 1)
					goto plain;
				else if (r == -1)
					return 0;
				break;
			}
		case '`':
			if (mode == M_SINGLE_QUOTE)
				goto plain;
			p++;
			do_backtick(&p, &v, mode == M_DOUBLE_QUOTE);
			p--;
			break;
		case '\'':
		case '"':
			if ((mode = switch_mode(mode, p, &v)) < 0)
				mode = -mode;
			else
				goto plain;
			break;
		case '\\':
			if (p[1] && mode == M_DOUBLE_QUOTE && 
				 strchr("\\$`\"", p[1]))
				p++;
			else if (p[1] == '\n' && mode != M_SINGLE_QUOTE) {
				p += 2; /* it shall be removed if not in '' */
				continue;
			}
			goto plain;
		case '#':
			if (!isatty && mode == M_PLAIN) {
				p = strchr(p, '\n');
				if (p && p[1])
					goto plain;
				else
					p = "x"; /* cause loop to stop */
			} else
				goto plain;
			break;
		default:
		plain:
			if (isalnum(*p) || mode == M_PLAIN) {
				*v++ = *p;
				a_alloc(a_arg, 1);
			} else {
				a_alloc(a_arg, 2);
				*v++ = '\\';	/* quote */
				*v++ = *p;
			}
			break;
		}
	}
	
	if (mode != M_PLAIN) {
		p = getline(1);	/* get a line with '> ' prompt */
		if (!p) {
			perr("unterminated quote.");
			return 0;
		}
		goto restart;
	}
	
	/* eat newline from eos if unquoted */
	if (v[-1] == '\n' && v[-2] != '\\')
		v--;

	*v = 0;
	a_align(a_arg);

#ifndef SMALL
if (wizard_mode&8) {
	printf("expnd: \"");
	v = ret; 
	while (*v) {
		if (*v == '\'') {
			printf("{{{%s}}}", v+1);
			v = strchr(v, 0) + 1;
		} else
			printf("%c", *v++);
	}
	printf("\"\n");
}
#endif

	return ret;
}

char *copy_exp(const char *p);

char *nested_expand(char *xxx)
{
	a_push_state(a_arg);
	a_push_state(0);
	gl_eof++;	/* forbid expand() getting more input, since
			 * we're called on string, not input from file */
	xxx = expand(xxx);
	gl_eof--;
	a_pop_state(0);
	xxx = xxx ? copy_exp(xxx) : 0;
	a_pop_state(a_arg);
	return xxx;
}
