


/*
	A C representation of an XMLScanner


"""
	Breaks the raw text of an xml file into production objecst. These are
	simple tokens which can be either tags (anything inside a <>) or
	text (anything outside <>).
"""
class XMLScanner:
	def __init__(self):
		self.inside_tag = false
		self.inside_comment = false
		self.prod_vect = []
		self.textbuf, self.tagbuf = "", ""

	# return and purge productions from list
	def move_production(self): 
		r =  self.prod_vect
		self.prod_vect = []
		return r

	def push(self, buf):
		if buf == "": return
		p = production(buf, self.inside_tag)
		self.prod_vect.append( p )		
	# determine if we are inside a comment	
	def check_for_comment(self, cnt, text):
		if cnt > 4:
			if   text[cnt-3:cnt+1] == "<!--":
				self.inside_comment = true
			elif text[cnt-2:cnt+1] == "-->":
				self.inside_comment = false					
	
	
	# feed some text and produce production objects					
	def feed(self , text):
		cnt = 0
		for ch in text:
			self.check_for_comment(cnt, text)
			if   ch == "<" and self.inside_comment == false:
				self.push( self.textbuf )
				self.inside_tag = true
				self.textbuf = ""
			elif ch == ">" and self.inside_comment == false:
				self.push( self.tagbuf )
				self.inside_tag = false
				self.tagbuf = ""
			else:
				if self.inside_tag == true:
					self.tagbuf = self.tagbuf + ch
				else:
					self.textbuf = self.textbuf + ch
			cnt = cnt + 1
*/
#include <malloc.h>
#include <string.h>
#include <stdlib.h>

enum
{
false, true
};

typedef struct _XMLScanner
{
	char inside_tag, inside_comment;
	char *tagbuf, *textbuf;
	int largestsize;
}XMLScanner;


void XMLScanner_init(XMLScanner *self)
{
	self->inside_tag = false;
	self->inside_comment = false;
	self->textbuf = NULL;
	self->tagbuf = NULL;
	self->largestsize = 0;
}

void check_for_comment(XMLScanner *self, int cnt, char *text)
{
	if (cnt > 4){
		char buf[5];
		
		strncpy(buf, &text[cnt-4],4); buf[4] = '\0';
		
		if (strcmp(buf,"<!--") == 0)
			self->inside_comment = true;
		else 	
			if (strcmp(&buf[1],"-->") == 0)
				self->inside_comment = false;
		}		
}


void XMLScanner_push(XMLScanner *self, char *x)
{
	
}


void XMLScanner_feed(XMLScanner *self, char *text)
{
	int cnt;
	char *ptr, ch;

	/* an ancient tradeoff: speed for memory utilitization. */
	/* given that text will be the same or less size on subsiquent
	   calls to feed then we can cheat and create two buffers
	   that are the largest chunk of memory that each will use.
           
           This is done to avoid expensive reallocation calls.
           
           Memory is freed on destruction of Scanner.
         */
        /* if the number of bytes is bigger than assigned */ 
	if (self->largestsize < strlen(text)){
		self->tagbuf = (char *) malloc(strlen(text)+1); self->tagbuf[0] = '\0';
		self->textbuf = (char *) malloc(strlen(text)+1); self->textbuf[0] = '\0';	
		self->largestsize = strlen(text);
		}
		

	cnt = 0;	
	for (ptr = text; *ptr != '\0'; ptr++){
		ch = *ptr;
		/* toggle a flag to tell when we are in or out of a comment */		
		check_for_comment(self,cnt,text);

		if (ch == '<' && self->inside_comment == false) {		
		/*	XMLScanner_push(self, self->textbuf); */
			self->inside_tag = true;
			self->textbuf[0] = '\0';
		} else if (ch == '>' && self->inside_comment == false) {
		/*	XMLScanner_push( self, self->tagbuf ); */
			self->inside_tag = false;
			self->tagbuf[0] = '\0';
		} else {
			char temp[2];
			
			temp[0] = ch;
			temp[1] = '\0';
			if (self->inside_tag == true)
				strcat(self->tagbuf, temp);
			else
				strcat(self->textbuf, temp);
		}
		cnt++;
	}
	
}
