#!/usr/bin/python

"""
Author: David W. Schere, XML toolkit
Copyright (C) 1998 DIDX llc.

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  
02111-1307, USA.


	The XMLServer is a proxy server which is a client
	to the Paos C/S database and a gateway to the web. 
	It keeps a cache of previously processed documents 
	and allows for multiple documents to be processed 
	simultaneously. 

	The transaction is totally hidden from the XMLClient
	objects.
	
	To start the Paos server just go. Values of PAOSPORT and
	CACHEDB are defined below.

	nohup python ./Paos-1.4/Server.py PAOSPORT CACHEDB &
	chmod 666 CACEDB



	Notes:
	I have tried and tried to work out a chache scheme that relies
	on the web server to tell me when a file has been modified.
	As idiotic as it sounds *NOBODY* is following the standards
	so I can't key on anything to tell me when a stupid url
	has been updated so I can dump my cache!

	The end result is that the cache has to be freed by human
	intervension ;(( . This really pisses me off!	

"""

false,true = 0,1

##################################
###### Generated by StartServer.py 
###### DO NOT TOUCH - Use python ./StartServer.py to launch a configuration
###### GUI. This segment gets altered.
#__BEGIN__
PAOSPORT=2201
PAOSHOST=""
SERVERPATH="./Paos-1.4/Server.py"
CACHEDB="./cache.dbm"
#__END__
########
########
##################################

#-- use the Paos database engine
from Schema import DBobject
import posix, paos_inter



#-- xml 
from XMLProcessor import XMLProcessor, URLNotFound
from XMLFactory import XMLFactory, strop


UnableToConnect = "XMLServer.UnableToConnect"
UnableToStore = "XMLServer.UnableToStore"


CONTENT_LENGTH = "Content-length"
LAST_MODIFIED = "last-modified"


#*****
# Identifes a document in the database, contains modification time and url
class DocHeader(DBobject):
	def __init__(self):
		DBobject.__init__(self)
	def setup(self, url, id):
		self.url = url
		self.deleted = false
		# All image id's which match this one belong to this header
		self.id = id	

try:
	import cPickle
	pickle = cPickle
except:
	import pickle

#******	
# represenst a fragment of a document
class DocImage(DBobject):
	def __init__(self):
		DBobject.__init__(self)
		self.list = []
	def setup(self, url, seq, list, id):
		self.url = url
		self.seq = seq
		self.list = pickle.dumps(list)
		self.id = id
	def fixup(self):
		self.list = pickle.loads(self.list)

import Utilities

#******
# A special XMLFactory that records objects sent to a client
# then ships them to the database.
class MyXMLFactory(XMLFactory):
	def __init__(self, ready_conn):
		from XMLClient import ClientBase
		XMLFactory.__init__(self, ClientBase())
		self.ready_conn = ready_conn
		self.list = []
		self.cnt = 0
	# overload XMLFactory stub
	def inspect(self, obj):
		self.list.append(obj)
		self.externalFeed(obj)
	def setUrl(self, url):
		self.url = url
	def store(self, paos, image_key):
		d = DocImage()
		d.setup(self.url, self.cnt, self.list, image_key)
		if paos.add([d]) != paos_inter.SUCCESS:
			msg= "Failure to add DocImage"
			raise UnableToStore, msg 

		# setup for next pass
		self.cnt = self.cnt + 1
		self.list = []
	
	def externalFeed(self, obj):
		print "externalFeed Sending:",obj
		Utilities.SEND( self.ready_conn, pickle.dumps(obj) )
#**** Unable to support abort
#		if obj != None:
#			x = Utilities.RECV( self.ready_conn, 20000 )
#			print "Recieved ",x
#			return pickle.loads(x)
			
	def endfile(self):
		Utilities.SEND( self.ready_conn, pickle.dumps(None) )
		 

#*****
# Seamless interface to Paos. The client has no knowledge of the  
# database'es existance. Any previously accessed document is 
# loaded from cache
class XMLServer(XMLProcessor):
	def __init__(self, ready_conn):
		self.bufsize = 4096
		self.factory = MyXMLFactory(ready_conn)
		self.paos = paos_inter.paos_inter(PAOSHOST,PAOSPORT)			

	def createDocHeader(self, url, id):
		print id
		d = DocHeader()
		d.setup(url, id)

		if self.paos.add([d]) != paos_inter.SUCCESS:		
			msg = "Unable to store document fragments, is server down ?"
			raise UnableToStore, msg
					
	def createDocImage(self, id):
		print id
		self.factory.store( self.paos, id )
			
	# determine if the server gave us enough information to cache
	# this url. If we can generate a key to be used to identify DocImage 	
	# objects.
	def canCache(self, h):
		if h == None: return (false, None)

		#Hack since the web servers aren't sending me the
		# modification time
		return (true, "----")	

# Another failed experiment - Web servers using the Expires variable
# occationaly as the system time. 
#		for (k,v) in h.items():
#			k = strop.upper(k)
#			if k == "EXPIRES" and v != None:
#				return (true, v)
#			if k == "CONTENT-LENGTH" and v != None:
#				return (true, v)

		return (false, None)		
		

	def useCache(self, url, id):
		print "before paos.get"
		import Store
		c = self.factory.ready_conn
		x = Store.load(c, 'rw', "XMLServer.DocHeader", [("url","==",url),("deleted","==",false)])
#		x = self.paos.get("XMLServer.DocHeader",[("url","==",url),("deleted","==",false)],'rw')
		print "after paos.get"
		
		if len(x) == 0: 
			return false

		if id != x[0].id:
			x[0].deleted = true
			if self.paos.update(x) != paos_inter.SUCCESS:
				msg = "Unable to update database!"
				raise UnableToStore, msg
			return false

		print "Using cache ... "
		return true
		
	def feedFromArchive(self, url, id):
		import Store
#		answer = self.paos.get("XMLServer.DocImage",[("url","==",url),("id","==",id)])
		c = self.factory.ready_conn
		answer = Store.load(c,'r',"XMLServer.DocImage",[("url","==",url),("id","==",id)]) 

		def comp(x,y):
			if x.seq > y.seq: return 1
			if x.seq < y.seq: return -1
			return 0
		# no gurantee of order so sort
		answer.sort( comp )
		for image in answer:
			# send in preprocessed objects
			image.fixup()
			for obj in image.list:
				if self.factory.externalFeed( obj ) == true:
					return
		# gets interpreted as a endfile
		self.factory.externalFeed( None )

	def run(self, url):
		import urllib, time
	
		# make a note of the url
		self.factory.setUrl(url)

		# get the url 
		self.fn, h = urllib.urlretrieve(url)

		# Has the web server given us enough information to cache
		# this url ?
		canCache, id = self.canCache(h)
		if canCache == true:
			# if so, then has the url changed since we
			# visted it last ?
			if self.useCache(url, id) == true:
				# It hasn't changed, lets use cache.
				self.feedFromArchive(url, id)
				return
			#-- the url has changed, we must re-cache

		# open the file
		try:
			file = open(self.fn, 'r')
		except:
			raise URLNotFound, "Unable to open %s" % (url)
   	
		# store DocHeader
		if canCache == true: 
			self.image_key = self.createDocHeader(url, id)

		starttime = time.time()
		totalbytes, exitflag = 0, false
		print "[XMLProcessor] begining to process %s" % (url)	

		while 1:
			data = file.read( self.bufsize )

			if data == None: break
			if len(data) == 0: break

			totalbytes = totalbytes + len(data)
			exitflag = self.factory.feed(data)

			if canCache == true:
				# create Document Fragment Image				
				self.createDocImage(id)

			if exitflag == true:
				break
			
		del file
		if self.factory.client.abortProcess() == false:
			self.factory.endfile()
		
		print ""		
		print "[XMLProcessor] total execution time ",time.time()-starttime," seconds. File size = ", totalbytes

import Client

# This gets called by the Paos server in response to an procURL
def processURL( ready_conn, url ):
	print "Called processURL"
	XMLServer(ready_conn).run( url )

def recv(s, retries):
	while retries > 0:
		try:
			return Utilities.RECV(s, 20000)
		except:
			retries = retries - 1
	print "Timeout error"
 

#*****
# Remove cache from data base
def rmcache(client, url):
	import Store
	global db

  
	answer = Store.load(client,'r',"XMLServer.DocHeader",[("url","==",url)]) 
	if len(answer) == 0:
		msg = ("Error", "Cache not present for URL %s" % (url))
		Utilities.SEND( client, pickle.dumps(msg) )
	
	# deleted header
	if not_locked_by_other(client, answer):
		del db[ answer[0].db_id ]		
	else:
		msg = ("Retry", "DocHeader for %s locked" %(url))
		Utilities.SEND(client,pickle.dumps(msg))
 			
	objlist = Store.load(client,'rw',"XMLServer.DocImage",[("url","==",url)])
	if not_locked_by_other(client, objlist):
		for obj in objlist:
			del db[obj.db_id]
	else:
		msg = ("Retry", "DocImage for url %s locked" % (url))
		Utilities.SEND(client,pickle.dumps(msg))
		
	return ("exit", "")

#*****
# This class represents the end-user namespace
class Connection(Client.Connection):
	def __init__(self, host, port):
		Client.Connection.__init__(self, host, port, 'XMLServer')
	def rmcache(self, url, retries = 5):
		while retries > 0:
			data = pickle.dumps((self.client, 'rmcache', url))
			Utilities.SEND(self.s, data)
			x = Utilities.RECV(self.s, 20000)
			cmd,msg = x[0], x[1]

			if cmd == "Error":
				print msg
				return true
			elif cmd == "Retry":
				retries = retries - 1
				print msg
			else:

				print "Success, removed cache for ",url
				return false

		msg = "Unable to delete cache for url %s" % (url)
		raise UnableToDelCache, msg
		
	def getPlugin(self, name):
		answer = Store.load(self.client,'r',"plugin.plugin",\
			[("name","==",name)])
		if len(answer) == 0: return None
		return answer[0] 		 	
		
	def procURL(self, url, client):
		# we are now in the user namespace
		data = pickle.dumps((self.client, 'xml', url))
		Utilities.SEND(self.s, data)

		import XMLFactory
		f = XMLFactory.XMLFactory(client)
		while 1:
			data = recv(self.s, 5)
			
			obj = pickle.loads(data)
			if obj == None: break

			f.externalFeed( obj )			

		if f.client.abortProcess() == false:
			f.endfile()


def test():
	import sys
	c = Connection(PAOSHOST,PAOSPORT)
	from XMLFactory import TestClient
	c.procURL( sys.argv[1], TestClient() )
	c.close()
	

	c = Connection(PAOSHOST,PAOSPORT)
	c.rmcache( sys.argv[1] )
	c.close()

def start_server():
	cmd = "nohup python %s %s %s &" % (PAOSPATH, PAOSPORT, CACHEDB)
	import os
	print cmd
	os.system(cmd)

	

if __name__ == "__main__":
 	test()		 
