"""
Author: David W. Schere, XML toolkit
Copyright (C) 1998 DIDX llc.

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  
02111-1307, USA.

	Uses an XML file as raw material for a factory to produce production objects.
A production is any XML entity (text, tag ect.) Each is represented by a
class. This object borrows some concepts from the Aelfred xml
parser see 'http://www.microstar.com/XML'  in that it aims to be as
light wieght as possible. It is meant  to be a front end for an XML
processor that would evaluate and execute tags.

	Work in phases:

	1) Transform a piece of an xml file upto the '>' or eof
	   into two tokens one representing text befor the tag
	   the other is text inside a tag
        2) Evaluate tokens creating production objects for each
	3) Call the XMLProcessor giving it a production object. 
"""

XPointerSyntax = "XFactory.XPointerSyntax"
SyntaxError = "XMLFactory.SyntaxError"
UnknownObjectType = "XMLFactory.UnknownObjectType"
HandlerException = "XMLFactory.HandlerException"

import regsub, regex, strop
false, true = 0, 1

# global helper functions
def inside_frame(p, header, trailer):
	h = p[:len(header)]
	t = p[len(p)-len(trailer):]
        
	if h == header and t == trailer:
		return p[len(header):len(p)-len(trailer)]
	return None


spaces_compiler = regex.compile("[ \t\n\r]+")
def skip_spaces(text):
	t = spaces_compiler.match(text)
	if t == -1: return text
	return text[t:]



"""
	Base class for all productions.
	Converts aliases for '<','>' or quote  into literals
	Sets up frame work for derived objects

	Note:
	Must have a no argument constructor for the shelve 
	library. The XMLProcessor might be a remote object.
"""

ascii_compile = regex.compile("&#[x]*[0-9]+")
# basic token representing an entity within an xml file
class production:
	def linecnt(self):
		import string
		return string.count( self._raw, "\n" )

	def eval_ascii(self, t):		
		pos = ascii_compile.search(t)

		# the only way to esape!			
		if pos == -1: 
			return t
		
		width = ascii_compile.match(t[pos:])
		token = t[ pos: width + pos]
						
		if token[2] in ("x","X"):
			val = chr( strop.atoi( token[3:], 16 ) )
		else:       
		        val = chr( strop.atoi( token[2:] ) )

		t = t[:pos] + token + t[pos+width:]
		# recurse until finished
		return t				 
	

	def translate(self, raw):
		kw = [("&quot;", "\""),("&lt;", "<"),("&gt;", ">"),("&amp;","&")]
		x = raw
		for (f,r) in kw:
			x = regsub.gsub(f,r,x)
		

		return self.eval_ascii(x)
	def __init__(self, raw = None, istag = false):
		if raw == None: return
		self._istag = istag
		self._raw = self.translate(raw)	
		
	def istag(self):
		return self._istag
		
	def raw(self):
		return self._raw	


		

"""
	Breaks the raw text of an xml file into production objecst. These are
	simple tokens which can be either tags (anything inside a <>) or
	text (anything outside <>).
"""
class XMLScanner:
	def __init__(self):
		self.inside_tag = false
		self.inside_comment = false
		self.prod_vect = []
		self.textbuf, self.tagbuf = "", ""

	# return and purge productions from list
	def move_production(self): 
		r =  self.prod_vect
		self.prod_vect = []
		return r

	def push(self, buf):
		if buf == "": return
#*** not been tested in the case of no C extension so I commented it out
#		try:
#			# see if the C optimization is here
#			import production
#			p = production.init(buf, self.inside_tag)
#		except:		
#			# if not use the python class
  		p = production(buf, self.inside_tag)
		self.prod_vect.append( p )		
	# determine if we are inside a comment	
	def check_for_comment(self, cnt, text):
		if cnt > 4:
			if   text[cnt-3:cnt+1] == "<!--":
				self.inside_comment = true
			elif text[cnt-2:cnt+1] == "-->":
				self.inside_comment = false					
	
	
	# feed some text and produce production objects					
	def feed(self , text):
		cnt = 0
		for ch in text:
			self.check_for_comment(cnt, text)
			if   ch == "<" and self.inside_comment == false:
				self.push( self.textbuf )
				self.inside_tag = true
				self.textbuf = ""
			elif ch == ">" and self.inside_comment == false:
				if self.inside_tag == false:
					print "Error: Malformed document! '>' without '<'"
				else:	
					self.push( self.tagbuf )
					self.inside_tag = false
					self.tagbuf = ""
			else:
				if self.inside_tag == true:
					self.tagbuf = self.tagbuf + ch
				else:
					self.textbuf = self.textbuf + ch
			cnt = cnt + 1

"""
	Abstract class for attributes, These are Symbols,
	Literals or Numbers
"""
class Attribute:
	def __init__(self):
		pass
	def typeOf(self):
		return None
	def value(self):
		return None
	def nameOf(self):
		return None
        def new(self):
                return None


"""
	These classes represent attributes of a tag
	They can be combined to form expressions
"""
num_compiler = regex.compile("[0-9.]+")
class Number(Attribute):
	def __init__(self):
		Attribute.__init__(self)

	def process(self, text):
		t = num_compiler.match(text)
		if t == -1: return text
		token = num_compiler.group(0)

		import string
		cnt = string.count(token,".")
		if   cnt == 0:
			self.val = strop.atoi( token )
		elif cnt == 1:
			self.val = strop.atof( token )
		else:
			msg = "Too many decimals in floating point %s" % (token)
			raise SyntaxError, msg

		return text[t:]

	def typeOf(self):
		return self.__class__
	def value(self):
		return self.val
        def new(self):
                return Number()

sym_compiler = regex.compile("[#]*[.a-zA-Z_][.a-zA-Z0-9_]*[?+*]*")
class Symbol(Attribute):
	def __init__(self):
		Attribute.__init__(self)
	def process(self, text):
		# match the input stream
		t = sym_compiler.match(text)
		if t == -1: return text
		self.val = sym_compiler.group(0)
		return text[t:]
	def typeOf(self):
		return self.__class__
	def value(self):
		return self.val
        def new(self):
                return Symbol()
	def nameOf(self):
		return self.val

LitError = "XMLFactory.LitError"

class Literal(Attribute):
	def __init__(self):
		Attribute.__init__(self)
	def process(self, text):
                if len(text) == 0: return text
		if text[0] != "\"": return text

                self.val = ""
		t = 1
		inside_quotes = true
		prev_char = ""

		while inside_quotes == true:
                        try:
                                ch = text[t]
                        except:
                                raise LitError, "Unbounded literal"

                        if ch == "\"":
                                return text[t+1:]
			else:
				self.val = self.val + ch
			prev_char = ch
			t = t + 1

		raise LitError, "Unbalanced quote! starting at col %s " % (t) 
	def typeOf(self):
		return self.__class__
	def value(self):
		return self.val
        def new(self):
                return Literal()
"""
	Produces a list of attribute objects based on a liist of choices.
"""
def make_attributes(text, choices):
        class A:
                def __init__(self):
                        self.attr, self.done = [], false
                def match(self, choices, text):
                        for x in choices:
                                c = x.new()   
                                t = c.process(text)
                                # see if it matched
                                if t != text:
                                        # it did append to list
                                        self.attr.append( c )
                                        # eat up white space till next token
                                        t = skip_spaces(t)
                                        return (true, t)
                        return (false, t)
                def do(self, choices, text):         
                        while self.done is false:
                                (m, text) = self.match(choices, text)
                                if m is false:
                                        self.done = true
         

        a = A()
        a.do(choices, text)
        return a.attr

# classes that use combinations of Symbols, Literals and Numbers

# <Symbol>=<Value>
class Association(Attribute):
	def __init__(self):
		Attribute.__init__(self)
	def process(self, text):
		save = text

		# get the symbol at the left of equals
		left = Symbol()
                p = left.process(save)
                if save == p:
			return text

                # skip the space, reuse the save variable
                save = skip_spaces(p)
                if len(save) == 0: return text

		# must be an equals
		if save[0] != "=":
			return text

		# go beyond =
		save = save[1:]

		# go beyond it and try to match either a Literal, Symbol or Number
		for right in [Symbol(),Literal(),Number()]:
                        p = right.process(save)
			if p != save:
				self.name = left.value()
				self.val = right.value()
				return p

		return text
	def typeOf(self):
		return self.__class__
	def value(self):
		return self.val
	def nameOf(self):
		return self.name
        def new(self):
                return Association()

		
ExprError = "XMLFactory.ExprError"

# (<symbol>,<symbol> .. >) or (<symbol> | <symbol> | ... )
class Expr(Attribute):
	def __init__(self):
		Attribute.__init__(self)
	def process(self, text):
		# see if in expression
                if len(text) == 0: return text
		if text[0] != "(": return text
	
		lastpos = strop.find(text,")")
		if lastpos == -1: return text

		# get what inside ()
		token = text[1:lastpos]
		# remove spaces
#		token = regsub.gsub("[ \t\r\n]+","",token)

		# create attribute list
		self.attr = []
                def func(self, token, type, delim):
			self.type = type
			for subtoken in strop.splitfields(token, delim):
				subtoken = strop.strip(subtoken)
				a = make_attributes(subtoken,[Symbol(),Literal(),Number()])				
				self.attr.append( a[0] )
		
		if strop.find(token,",") != -1:		
                        func(self, token, "and", ",")
		if strop.find(text,"|") != -1:
                        func(self, token, "or", "|")
								
		# see if there is a modifier + or * sign after the )
		x = regex.compile("\([*+]\)")
		pos = x.search(text[lastpos:])
		if pos != -1:
			self.modifier = text[lastpos+pos]
			return text[lastpos+pos+1:]
		else:
			self.modifier = None

		return text[lastpos:]
					
	def exprType(self):
		return self.type			
        def typeOf(self):
		return self.__class__
	def value(self):
		return self.attr
        def new(self):
                return Expr()
	def getModifier(self):
		return self.modifier

# --------------- end attributes --------------------------


#------- Entity classes ------------------------------------


class entity:
	def __init__(self):
		pass
	def process(self, prod):
		pass
	def nameOf(self):
		try:
			return self.attr[0].value()
		except:
			return None
	def value(self):
		try:
			return self.attr[1:]
		except:
			return ""
	def clone(self):
		return self


class CDATA(entity):
	def __init__(self):
		entity.__init__(self)
	def process(self, prod):
		if prod.istag() == false: return false
		text = inside_frame( prod.raw(), "![CDATA[", "]]" )
		if text == None: return false
		self.text = text
		return true
	def nameOf(self):
		return None
	def value(self):
		return self.text
		

# kklfjlk ... <
class rawText(entity):
	def __init__(self):
		entity.__init__(self)
	def process(self, prod):
		if prod.istag() == true: return false
		self.text = prod.raw()
		return true
		
	def nameOf(self):
		return None
	def value(self):
		return self.text

# <!-- .... --->
class comment(entity):
	def __init__(self):
		entity.__init__(self)
	def process(self, prod):
		if prod.istag() == false: return false
		text = inside_frame( prod.raw(), "!--", "--" )

		if text == None: return false
		self.text = text

		return true
	def nameOf(self):
		return None
	def value(self):
		return self.text
	

# <!... >
class dtd(entity):
	def __init__(self):
		entity.__init__(self)
	def process(self, prod):
		if prod.istag() == false: return false
		if prod.raw()[0] != "!": return false
		
		# check for CDATA
		if prod.raw()[: len("![CDATA[")] == "![CDATA[": return false
			
                self.attr = make_attributes( prod.raw()[1:], [Expr(), Literal(), Symbol()])
		return true
	def has_val(self, val):
		for obj in self.value():
			p = "%s" % (obj.value())
			q = "%s" % (value)
			if p == q:
				return true
		return false
						
	
#<? .. ?>	
class pi(entity):
	def __init__(self):
		entity.__init__(self)
	def process(self, prod):
		if prod.istag() == false: return false
		text = inside_frame( prod.raw() , "?" , "?")
		if text == None: return false
                self.attr = make_attributes(text, [Association(), Literal(), Symbol()])
		return true


# Allows for other resouces to be linked in
class XPointer(entity):
	def __init__(self):
		entity.__init__(self)
			
	def resolve(self, pointer_text):
		e = Expr()
		p = e.process(pointer_text)
		if p == pointer_text:
			raise XPointerSyntax, "Syntax error %s" % (pointer_text)		
						
		from XMLProcessor import XPointerProcessor

		attr = e.value()
		nummatches = 0
		if len(attr) == 4:
			nummatches = strop.atoi( "%s" % (attr[3].value()) )
		
		command = attr[0].value()
		url = attr[1].value()
		pattern = attr[2].value()

		x = XPointerProcessor()
		return x.run(command, url, pattern, nummatches)
			
	def process(self, prod):
		if prod.istag() == false: return false
		
		p = strop.upper( prod.raw()[:len("XPOINTER")] )
		if p != "XPOINTER": return false

		xpointer_text = strop.strip( prod.raw()[len("XPOINTER"):] )
		self.target = self.resolve(xpointer_text) 		

		return true
				 				

# <...> or <.../>			 
class tag(entity):
	def __init__(self):
		entity.__init__(self)
		self._text = ""
	def process(self, prod):
		if prod.istag() == false: return false
		# see if it ends with / for an empty tag
		last = prod.raw()[ len(prod.raw()) - 1 ]
		text = prod.raw()
		if last == "/":
			text = text[: len(text) - 1 ]
			self.empty = true
		else:
			self.empty = false

		if text[0] == "/":
			text = text[1:]
			self.endtag = true
		else:
			self.endtag = false	

			
		self.source = text	
                self.attr = make_attributes(text, [Association(), Symbol()])
		return true
	def emptyTag(self):
		return self.empty
	def endTag(self):
		return self.endtag
	def __getitem__(self,  key):
		k = strop.upper(key)
		for obj in self.value():
			if obj.typeOf() == Association().__class__:
				if strop.upper(obj.nameOf()) == k:
					return obj.value()
		return None
		
	def items(self):
		list = []
		for obj in self.value():
			list.append( obj.nameOf(), obj.value() )
		return list		

	def keys(self):
		list = []
		for obj in self.value():
			list.append( obj.nameOf() )
		return list		


	def __setitem__(self, key, data):
		k = strop.upper(key)
		i = 0
		for obj in self.value():
			if obj.typeOf() == Association().__class__:
				if k == obj.name:
					obj.val = data
					self.attr[i] = obj
			i = i + 1	
							
	def has_key(self, key):
		if type(key) == type(""):
			if self.__getitem__(key) == None: return false
			return true
		elif type(key) == type(()) or type(key) == type([]):
			for k in key:
				if self.__getitem__(k) == None:
					return false
			return true
		return false

	def setText(self, text):
		self._text = text
	def getText(self):
		return self._text

				
"""
	Gets fed text and spits out entity objects
	
	client object must be provided with the following methods
	each will recieve an entity object
	
	text 
	pi
	emptyTag
	nonEmptyTag
	endTag
	dtd
	CDATA
	comment
	
"""		
class XMLFactory:
	def __init__(self, client):
		self.s = XMLScanner()
		self.client = client
		self.linecnt = 0	

	def getClient(self):
		return self.client

	# stub function that can be overloaded to inspect each object
	# before it reaches the client
	def inspect(self, obj):
		pass

	# call client		
	def handle_rawText(self, obj):
		self.inspect(obj)
		self.client.text( obj )
	def handle_pi(self, obj):
		self.inspect(obj)
		self.client.pi( obj )
	def handle_dtd(self, obj):
		self.inspect(obj)
		self.client.dtd( obj )
	def handle_tag(self, obj):
		self.inspect(obj)
		if obj.emptyTag() == true:
			self.client.emptyTag( obj )
		elif obj.endTag() == true:
			self.client.endTag( obj )
		else:
                        # push stack
			self.client.nonEmptyTag( obj )
	def handle_comment(self, obj):
		self.inspect(obj)
		self.client.comment( obj )	
	def handle_CDATA(self, obj):
		self.inspect(obj)
		self.client.CDATA( obj )
	def endfile(self):
		self.client.endfile()

	def externalFeed(self, obj):
		if   obj.__class__ == tag().__class__:
			self.handle_tag(obj)
		elif obj.__class__ == rawText().__class__:
			self.handle_rawText(obj)
		elif obj.__class__ == dtd().__class__:
			self.handle_dtd(obj)
		elif obj.__class__ == pi().__class__:
			self.handle_pi(obj)
		elif obj.__class__ == CDATA().__class__:
			self.handle_CDATA(obj)
		elif obj.__class__ == comment().__class__:
			self.handle_comment(obj)
		else:
			msg = "XMLFactory.externalFeed - illegal object"
			raise UnknownObjectType, msg

	def feed(self, text):				
		self.s.feed(text)
		plist = self.s.move_production()
		

		def match(self, p):
			choices = [(rawText(), "handle_rawText"),
				(comment(), "handle_comment"),
				(pi(), "handle_pi"),
				(dtd(), "handle_dtd"),
				(CDATA(), "handle_CDATA"),
				(tag(), "handle_tag")]

			for (obj, cb) in choices:
				x = XPointer()
				# see if its an XPointer
				if x.process(p) == true:
					# if it is then see if the pointer target is valid
					if x.target != None:
						# The target is a production which has already
						# been processed. send it to the client
						for (obj, cb) in choices:
							if obj.__class__ == x.target.__class__:
								func = getattr(self, cb)
								func(x.target)	
					 			return true
				
				if obj.process(p) == true:
					if self.client.canProcess(obj) == true:
						func = getattr(self, cb)
						func(obj)
						return true
			return false

		# for each production
		for p in plist:
			match(self, p)
			self.linecnt = self.linecnt + p.linecnt()

			if self.client.abortProcess():
				return true
				
		return false
			

#-------------------------- test harness -------------------------
"""

URLNotFound = "XMLFactory.UrlNotFound"

# test XMLProcessor
class XMLProcessor:
	def __init__(self, bufsize=4096):
		self.bufsize = bufsize
	def run(self, client, url):
		import urllib

		self.factory = XMLFactory(client)
		file = None
		
		# get the url 
		self.fn, h = urllib.urlretrieve(url)
		# open the file
		try:
			file = open(self.fn, 'r')
		except:
			raise URLNotFound, "Unable to open %s" % (url)
			
		while 1:
			text = file.read(self.bufsize)
			if text == None:
				break
			if len(text) == 0:
				break
			self.factory.feed( text )

		del file
		self.factory.endfile()

#from XMLClient import ClientBase
import XMLClient

class TestClient(XMLClient.ClientBase):
	def __init__(self):
		XMLClient.ClientBase.__init__(self)
        def dump(self, obj):
                if hasattr(obj,"attr"):
               	        for a in obj.attr:
                       	        print vars(a)
	def text(self, obj):
		print "text: ", vars(obj)
	def pi(self, obj):
		print "pi = ", vars(obj)
	def emptyTag(self, obj):
		print "endTag = ", vars(obj)
	def nonEmptyTag(self, obj):
       	        print "nonEmptyTag = ", strop.strip( obj.nameOf() ), self.dump(obj)
	def endTag(self, obj):
		print "endTag = ", vars(obj)
	def dtd(self, obj):
       	        print "dtd = ", self.dump(obj)
	def CDATA(self, obj):
		print "CDATA = ", vars(obj)
	def comment(self, obj):
		print "comment = ", vars(obj)
	def endfile(self):
		pass	



if __name__ == '__main__':
	import sys



#	file = open(sys.argv[1],"r")
	XMLProcessor().run( TestClient(), sys.argv[1] )
"""