"""
Author: David W. Schere, XML toolkit
Copyright (C) 1998 DIDX llc.

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  
02111-1307, USA.


	This is a collection of clients that can be swapped in and out
	of the factory. The main purpose of the client is for validation.
	All implimentation is done though a registry schema, the client
	is there as a middle mad processing Entity objects from the
	XMLFactory. 
	
	
Added validation support. 
Note: The <!DOCTYPE tagname SYSTEM " ... *.dtd " > works
but the <!DOCTYPE tagname [ .... ]> does not, supporting this would mean
a major patch to the underlining parser ;(
	
"""

Unexpected = "XMLClient.Client"
XPointerErr = "XMLClient.XPointerClient"
FoundTarget = "XMLClient.FoundTarget"
TargetNotFound = "XMLClient.TargetNotFound"
NoElement = "XMLClient.NoElement"

# -- for Validating mode 
InvalidElement = "XMLClient.InValidElement"
InvalidAttribute = "XMLClient.InvalidAttribute"
InvalidAttrValue = "XMLClient.InvalidAttrValue"
InvalidTagname = "XMLClient.InvalidTagname"

from XMLFactory import Association, Symbol, Literal, Number, Expr, true, false
import strop


## ----- for validation 
# This determines what tags can be in a document. It also 
# determines what tags may be children of a nonempty tag.
class element_rec:
	def nameOf(self):
		return self.elmname

	# called on an end tag if we are validating.
	def checksum(self):
		x = ""
		for (key, (cnt,min,max)) in self.table.items():
			if cnt == 0 and min > 0:
				x = "No reference to "+key
			elif cnt < min:
				x = "Not enough references to "+key
			if x != "":
				msg = "In "+self.elmname+": "+x
				raise InvalidElement, msg	

	# called whenever a nonEmpty or emptyTag method is called if we
	# we are validating
	def reference(self, obj):
		if not self.table.has_key( obj.nameOf() ):
			msg = "Tag "+obj.nameOf()+" can not be a sibling of tag "+self.elmname
			raise InvalidElement, msg
		(cnt,min,max) = self.table[obj.nameOf()]
		cnt = cnt + 1
		if cnt > max:
			msg = "Too many references to tag "+obj.nameOf()+\
				"within the scope of tag "+self.elmname
			raise InvalidElement, msg
		self.table[obj.nameOf()] = (cnt,min,max)


	# adds a list of possible siblings from an expression
	def add_expr(self, e):
		m = e.getModifier()
		for s in e.attr:
			# modifier exists then distribute it alongst all
			# symbols else use the symbol modifier
			if m == "":
				self.add( s.value(), s.getModifier())
			else:
				self.add( s.value(), s.getModifier(), m)

	def add(self, v, m, expr_m = ""):
		minnum,maxnum=0,99999
		cnt = 0
		if m == "" and expr_m == "": 
			minnum,maxnum=1,1
		if m == "+": 
			minnum = 1
		if m == "*":
			minmum = 0	
			
		# remove modifier
		if m != "":
			v = v[: len(v) - 1]					
			
		self.table[v] = (cnt,minnum,maxnum)

	def __getitem__(self, key):
		return self.attr[ key ]

	def __setitem__(self, key, data):
		self.attr[key] = data

	def __init__(self, obj):
		self.table = {}
		self.attr = {}

		x,y = obj.attr[1], obj.attr[2]

		# get first element
		self.elmname = x.value()

		# the next element may be a symbol or an expression
		# like (<sym> | <sym> ... ) <modifier>
		if y.__class__  == Expr().__class__:
			self.add_expr( y )
		else:
			n = y.value()
			if n != "EMPTY":
				self.add( n, y.getModifier() )
			

# keeps tags on what attributes may be in a tag. These are associated
# by element tags.
class attrlist_rec:
	def nameOf(self):
		return self.elmname

	def add( self, stk ):
		n = stk[0]
		if len(stk) == 2:
			choices = None
			default = stk[1]
		elif len(stk) == 3:
			l = []
			if stk[1].__class__  == Expr().__class__:
				for a in stk[1].attr[1:]:
					l.append( a.value() )
			choices = l
			default = stk[2]	
		else:
			msg= "Inside of ATTLIST wrong number of declarations"
			raise InvalidAttribute, msg
		self.table[n] = (choices, default)

	def test_required(self, obj, n):
		if obj.has_key(n) == false:
			name = obj.nameOf()
			msg ="In tag " + name
			msg = msg + ", required attribute "
			msg = msg + n + " missing"
			raise InvalidAttribute, msg

	def test_optional(self, obj, n, choices, default):	
		if obj.has_key(n) == false:
			# set default value
			obj.attr.append( default )
		else:
			# see if this attribute has a valid value
			if not "..." in choices:
				if not obj[n] in choices:
					msg = "Attribute %s has an invalid value of %s, in tag %s" % (n, obj[n], self.elmname)
					raise InvalidAttrValue, msg
				

	# Test attributes of a tag, insert implied attribute values into
	# a tag if they are not defined
	def test_attributes(self, obj):
		for (k, (choices,default)) in self.table.items():
			if default == None:
				continue
				
			if default.__class__ == Symbol().__class__:
				if   strop.upper(default.value()) == "#REQUIRED":
					self.test_required(obj,k.value())
			else:
				self.test_optional(obj, choices, default)
			
	def __init__(self, obj=None, vtable=None):
		if obj == None: return

		self.elmname = obj[1]
		self.table = {}

		# the token beginning with #,
		# or a literal, marks the end of a 
		# declaration.
		stk = []
		for t in obj.attr[2:]:
			stk.append( t )
			if t.__class__ == Symbol().__class__:
				if t.value()[0] == '#':
					self.add( stk )
					stk = []

			if t.__class__ == Literal().__class__ or \
			   t.__class__ == Number().__class__:
				self.add( stk )
				stk = []

		if not vtable.has_key(self.elmname):
			msg = "No match for element name "+\
				self.elmname+" in ATTLIST "
			raise NoElement, msg


# base class for all client classes, receives entity object from factory 
# and process them.
class ClientBase:		

	def getEntities(self):
		return self.entities

	def __init__(self):
		self.stack = []

		# set default entities
		self.entities = {}
		self.entities["quot"] = "\""
		self.entities["lt"] = "<"
		self.entities["gt"] = ">"
		self.entities["amp"] = "&"

		self._texthandler = []
		self.abortFlag = false
		self.entityMask = []
		self.entityNameMask = []
		# if true reject any object that is referrenced in a
		# mask, else pass any object that is referenced in a mask
		# and reject the ones that match
		self.reject = true 
		self.validation = false
		
		# for validation. Contains a referrence table that determines
		# what attributes are valid and which are not
		# This feature is triggered by
		# setValidation()
		
		# validation table (not virtual table ;) )
		self.vtable = {}
		
		# a mapping of validation tables generated by
		# doc types. If a tag has been found who's tagname
		# is a key to this table then validation will be triggered 
		# for this tag and it's siblings.
		self.vt_mapping = {}
		self.current_vt = None
		self.current_vtn = ""		
		self.vstk = []
		
	def setValidation(self):
		self.validation = true		
		
	def getStack(self):
		return self.stack
		
	def stackLevel(self):
		return len(self.stack)		
		
	def print_stack(self):
		print "stack -> ",
		for obj in self.stack:
			print obj.nameOf(),"::",
		print ""

	def setAbort(self):
		self.abortFlag = true
		
	def abortProcess(self):
		return self.abortFlag			

	def maskEntityByClass(self, obj):
		self.entityMask.append( obj.__class__ )
	def maskEntityByName(self, name):
#		self.entityNameMask.append( obj.nameOf() )
		self.entityNameMask.append( strop.upper(name) )
	# default mode
	def maskRejectAllThatMatch(self):
		self.reject = true
	def maskRejectAllThatDontMatch(self):
		self.reject = false

	def _doCanProcess(self, obj):
		if obj.__class__ in self.entityMask: return false
		if obj.nameOf() == None: return true		

		n = strop.upper(obj.nameOf())
		if n != None:
			if n in self.entityNameMask:
				return false
		return true

	# called by XMLFactory to see whether or not it can process an
	# entitity
	def canProcess(self, obj):
		p = self._doCanProcess(obj)
		if self.reject == false:
			if p == true:
				p = false
			else:
				p = true
		return p

	# passes characters including markup back to application.
	# This not part of the w3c spec, but it makes this parser useful 
	# as a tool for a web crawler where we are spidering through 
	# documents and dumping them into a database.
	def chars(self, ch):
		pass	 
	
	def setTextHandler(self, func):
		self._texthandler.append( func )

	def text(self, obj):
		s = len(self._texthandler)
		if s > 0:
			func = self._texthandler[s-1]
			func(obj)
			del self._texthandler[s-1]	

	def pi(self, obj):
		pass	

	def emptyTag(self, obj):
		self.validate( obj, false )	

	#----------- validation routines -----------------

	def gt_vt_tables(self, obj):
		if not self.current_vt.has_key(obj.nameOf()):
			msg = "Invalid tag name: "+ obj.nameOf()
			raise InvalidTagname, msg

		# fetch a table describing the tag
		(this_e,this_a) = self.current_vt[ obj.nameOf() ]

		# see if there is a parent to evaluate
		if len( self.vstk ) > 0:
			# get the name of the parent
			pname = self.vstk[len( self.vstk ) - 1]
			# look it up in current_vt
			(parent_e,d) = self.current_vt[ pname ]
		else:
			parent_e = None
			
		# return tables for this tag and it's parent	
		return (this_a, parent_e)

	# make sure that this tag can be a sibling tag of the
	# parent
	def validate_sibling(self, obj, parent_e):
		if parent_e == None: return
		#print "validate_sibling ", vars(parent_e)
		parent_e.reference( obj )		

	# entry point for tag evaluations
	def validate(self, obj, validate_as_sibling = true):
		if self.vt_mapping.has_key( obj.nameOf() ):
			self.current_vt = self.vt_mapping[ obj.nameOf() ]
	
		if self.current_vt != None:
			#(this_a, parent_e) = self.gt_vt_tables( obj )
			if not self.vt_mapping.has_key(obj.nameOf()):
				msg = "Invalid tag name: "+ obj.nameOf()
				raise InvalidTagname, msg

			(this_e, this_a) = self.vt_mapping[ obj.nameOf() ]
			if len( self.stack ) > 0:
				pname = self.stack[ len(self.stack) - 1].nameOf()
				(parent_e, dummy) = self.vt_mapping[ pname ]
			else:				
				parent_e = None	
			
			if validate_as_sibling == true:
				self.validate_sibling( obj, parent_e )
			this_a.test_attributes( obj )
			self.vstk.append( obj.nameOf() )
			
			
	# -------------------------------------------------	
	
		

	# called for < ... > tags
	def nonEmptyTag(self, obj):
		self.validate( obj )	
		self.stack.append(obj)

	def endTag(self, obj):
		if len(self.stack) == 0:
			#print "name = ", obj.nameOf()
			raise Unexpected, "Unexpected end tag, stack is empty"
	
		# pop the vstk for validation	
		if len(self.vstk) > 0:
			if len(self.vstk) == 1:
				self.vstk = []
				self.current_vt = None
			else:
				self.vstk = self.vstk[0:len(self.vstk)-1]
				
		self.stack = self.stack[0:len(self.stack)-1]

	def hentity(self, obj):
		prev, i = "", 1
		while i < len(obj):
			s = obj[i]
			#print "dtd ", s, i
			if s[0] == "%": # we are declaring
				k = s[1:]
				v = obj[i+1]
				self.entities[k] = v
				i = i + 2
			else:
				i = i + 1
		

	# defines a new element in the document
	#	defines what sibling tags are allowed
	def helement(self, obj):
		e = element_rec(obj)
		self.vtable[ e.nameOf() ] = (e, None)

	def hattlist(self, obj):
		a = attrlist_rec( obj, self.vtable )
		(e, dummy) = self.vtable[ a.nameOf() ]		
		self.vtable[ a.nameOf() ] = (e,a)
		
	# link in a dtd file for a set of tags	
	def hdoctype(self, obj):
		#<!DOCTYPE tagname SYSTEM "hello.dtd">
		# is currently the only tag implemented.
	
		# get the tag name that matches this dtd
		tagname = obj[1]
		
		# if system then we are directed to link in
		# a dtd file 
		if obj.attr[2].__class__ == Symbol().__class__:
			if obj[2] == "SYSTEM":
				dtdfile = obj[3]
				c = ClientBase()
				from XMLProcessor import *
				p = XMLProcessor( c )
				p.run( dtdfile )
				#self.vt_mapping[tagname] = c.vtable
				self.vt_mapping = c.vtable
				#print "Tagname", tagname

	# <! .... >
	def dtd(self, obj):
		if   obj.nameOf() == "ENTITY":
			self.hentity(obj)
		elif obj.nameOf() == "ELEMENT":
			self.helement( obj )	
		elif obj.nameOf() == "ATTLIST":
			self.hattlist( obj )
		elif obj.nameOf() == "DOCTYPE":		
			self.hdoctype( obj )
				

	def CDATA(self, obj):
		pass

	def comment(self, obj):
		pass
		
	def endfile(self):
		if len(self.stack) > 0:
			msg = "stack: "
			for p in self.stack:
				msg = msg + "%s::" % (strop.upper(p.nameOf())) 
			raise Unexpected, "Nonempty tag without endtag unexpected end of file"
	
# generic null class which is generated dynamically to represent an xml
# namespace
class namespace:
	pass	

import strop

"""
	Utility used to create a table of namespace objects.
"""
class namespaceGenerator(ClientBase):
	def __init__(self):
		ClientBase.__init__(self)
		self.table = {}
		self.curr_o = ""		

	def getNameSpace(self, n):
		return self.table[n]

	def text_handler(self, obj):
		self.curr_o.text = obj.value()
		self.add( self.curr_o ) 

	def add(self, obj):
		l = strop.splitfields(obj.nameOf(),":")
		objn, attn = l[0], l[1]
		if not self.table.has_key( objn ):
			self.table[objn] = namespace()
		setattr( self.table[objn], attn, obj )	

	def nonEmptyTag(self, obj):
		ClientBase.nonEmptyTag(self, obj)
		self.curr_o = obj
		ClientBase.setTextHandler(self,  self.text_handler )		

	def emptyTag( self, obj ):
		ClientBase.emptyTag( self, obj )
		self.add( obj )





#****
# html files have special nonEmptyTags that behave like emptyTags
# these are input, hr, p, br.
class htmlClientBase(ClientBase):
	def __init__(self):
		ClientBase.__init__(self)
	def nonEmptyTag(self, obj):
		if strop.upper(obj.nameOf()) in ("INPUT","HR","P","BR"):
			ClientBase.emptyTag(self, obj)
		else:
			ClientBase.nonEmptyTag(self, obj)	
	def endfile(self):
		pass



"""
Test harness
"""
class TestClient(ClientBase):
	def __init__(self):
		ClientBase.__init__(self)
        def dump(self, obj):
                if hasattr(obj,"attr"):
               	        for a in obj.attr:
                       	        print "   ",a.value()
	def text(self, obj):
		ClientBase.text( self, obj )
		print "text: ", vars(obj)
	def pi(self, obj):
		ClientBase.pi( self, obj )
		print "pi = ", vars(obj)
	def emptyTag(self, obj):
		ClientBase.emptyTag( self, obj )
		print "endTag = ", vars(obj)
	def nonEmptyTag(self, obj):
		ClientBase.nonEmptyTag( self, obj )
       	        print "nonEmptyTag = ", obj.nameOf() , self.dump(obj)
	def endTag(self, obj):
		ClientBase.endTag( self, obj )
		print "endTag = ", vars(obj)
	def dtd(self, obj):
		ClientBase.dtd(self, obj)
       	        print "dtd = ", self.dump(obj)
	def CDATA(self, obj):
		print "CDATA = ", vars(obj)
	def comment(self, obj):
		print "comment = ", vars(obj)
	def endfile(self):
		pass	





		
"""
 A special client for use in resolving an XPointer
Give me the tag object within a url by its 
 	<key                                                 
	name=<key>
	text before said tag
	text after tag

	
I want to find the first <table> tag in the document at http://www.ggg.com

x = XPointerClient("tagname","http://www.ggg.com", "table, tr, ... ")

I want the the first tag with the attribute name and its value is "command"
x = XPointerClient("attribute","http://www.ggg.com","name=command")

"""
class XPointerClient(ClientBase):
	def endfile(self):
		ClientBase.endfile(self)
		if self.target == None:
			raise TargetNotFound

	def text(self, obj):
		if self.trap_next_text == true:
			self.target = obj
			raise FoundTarget

	def _tagid(self, tag):
		if self.symbol.value() == tag.nameOf():
			self.nummatches = self.nummatches - 1
			if self.nummatches <= 0:
				if self.command == "textafter_tagid":
					self.trap_next_text = t
				else:
					self.target = tag
					raise FoundTarget

	def _attribute(self, tag):
		attrlist = tag.value()
		for a in attrlist:
			if a.typeOf() == Association().typeOf():
				if a.nameOf() == self.assoc.nameOf() and a.value() == self.assoc.value():
					self.nummatches = self.nummatches - 1
					if self.nummatches == 0:
						if self.command == "textafter_attribute":
							self.trap_next_text = true
						else:
							self.target = tag				
							raise FoundTarget
				
					
	def __init__(self, command, pattern, nummatches = 1):
		ClientBase.__init__(self)
		self.command = command
		self.pattern = pattern
		self.nummatches = nummatches
		self.trap_next_text = false
		self.textafter = ""
		self.target = None		

		if self.command in ("attribute", "textafter_attribute"):
			self.assoc = Association()
			p = self.assoc.process(self.pattern)
			if p == self.pattern:
				raise XPointerErr, "Pattern %s is bogus" % (self.pattern)			
			self.cb = self._attribute
		elif self.command in ("tagid", "textafter_tagid"):
			self.symbol = Symbol()			
			p = self.symbol.process(self.pattern)
			if p == self.pattern:
				raise XPointerErr, "Invalid tag id %s" % (self.pattern) 
			self.cb = self._tagid
		else:
			raise XPointerErr, "Unknown command %s" % (self.command) 				

	def emptyTag(self, obj):
		self.cb( obj )
	def nonEmptyTag(self, obj):
		ClientBase.nonEmptyTag(self, obj)
		self.cb( obj )






"""
	Stuff that doesn't belong in ClientBase but will recieve a new home
	sometime soon.

	def python(self, obj):
		text = obj.value()
		x = compile(text, "<string>", "exec")
		exec(x)
		
		for line in strop.splitfields(text,"\n"):
			if line[:4] == "def ":
				t = strop.find(line,"(")
				funcname = line[4:t]
				cmd = "self._%s = %s" % (funcname, funcname)
				exec(cmd)
		

	def nonEmptyTag(self, obj):
		if obj.nameOf() == "PYTHON":
			self.setTextHandler( self.python )
		# in html there are tags which should end with /> but don't
		implied_endtags = ("p","br")
		if obj.nameOf() in implied_endtags:
			self.emptyTag(obj)
		else:
			self.stack.append(obj)


"""


