from string   import find, ljust
from regex    import search
from os       import mkdir, path, chdir
from sys      import exit
from urlparse import urljoin, urlunparse

from config   import *
from utils    import *
from parse    import HIParser

###############################################
## Constants
WARNING_MSG = 'WARNING : '
ERROR_MSG   = 'ERROR : '

################################################
## Classe de variables globales
class Globale:
    #################################
	## init and check user parameters
	def __init__(self):
		## Initial checks
		initUrl            = check_string(INIT_URL)
		self.host          = host_from_url(INIT_URL)
		self.resDir        = check_string(RES_DIR)
		self.maxFileLength = check_integer(MAX_FILE_LENGTH, 0)
		logFile            = check_string(LOG_FILE)
		self.verbosity     = check_bounded_integer(VERBOSITY, -1, 1, 0)
		self.checkModified = check_bounded_integer(CHECK_MODIFIED, 0, 1, 1)
		self.checkMissing  = check_bounded_integer(CHECK_MISSING, 0, 1, 1)
		self.localizeURL   = check_bounded_integer(LOCALIZE_URLS, 0, 1, 0)

		## Create/check save directory (before to start logging)
		self.log = None
		if not self.host:
			self.print_error('host name is empty')
			exit(0)
		saveDirExists = path.isdir(self.host)
		if not saveDirExists:
			try: mkdir(self.host, 493)
			except:
				self.print_error('cannot create save directory %s', self.host)
				exit(0)
		chdir(self.host)

		## Create log file (eventually)
		if logFile: self.log = open(logFile, 'w')

		self.print_separator('%s version %s' %(APP_NAME, APP_VERSION))
		self.print_msg('', -1)
    
        ## Initial URL
		if not initUrl:
			self.print_error('invalid initiale URL')
			exit(0)
		self.print_msg('initial URL : %s' %initUrl, -1)
		if scheme_from_url(initUrl)!=SCHEME:
			self.print_error('scheme must be "%s"' %SCHEME)

        ## Save directory
		self.print_normal(N0, 'save directory : %s' %self.host)
		if saveDirExists:
			self.print_normal(N1, 'save directory already exists')

        ## Restriction on directory
		if self.resDir: 
			self.print_msg('restricted to the directory : %s' %self.resDir, -1)
			if self.resDir[0]=='/': self.resDir = self.resDir[1:]
			if search(self.resDir, path_from_url(initUrl))!=0:
				self.print_error('initial URL is not in the directory restriction')
				exit(0)	
		else: self.print_msg('no directory restriction', -1)

        ## File length restriction
		if self.maxFileLength<=0:
			self.maxFileLength = None
			self.print_msg('no limit on file size', -1)
		else: self.print_msg('file size limited to : %d bytes'
							 %self.maxFileLength, -1)

        ## Log file
		if logFile:
			self.print_msg('messages logged in : %s' %logFile, -1)
			if not self.log: self.print_warning('cannot write to log file')
		else: self.print_msg('messages not logged', -1)

        ## Check modified
		if not self.checkModified: self.print_msg('do not check the server for already downloaded files', -1)

		## Check missing
		if not self.checkMissing:
			self.print_msg('do not check the server for missing links', -1)

		self.print_msg('', -1)
		self.print_separator('Begin work')
		self.print_normal(N0, '')

		self.parser = HIParser(self)
		self.parser.append_todo(initUrl)

	def end(self):
		self.print_normal(N0, '')
		self.print_separator('All work done')
		if self.log: self.log.close()

	#############################
    ## Print messages
	def print_msg(self, msg, verb_level):
		if verb_level>self.verbosity: return 
		print msg
		if self.log: self.log.write(msg + '\n')
		
	def print_normal(self, n, msg):
		t = ''
		for i in range(n-1): t = t + ' '
		self.print_msg(t + msg, n-1)
	
	def print_warning(self, msg):
		self.print_msg(WARNING_MSG + msg, -1)
	
	def print_error(self, msg):
		self.print_msg(ERROR_MSG + msg, -1)
	
	def print_separator(self, msg):
		t = ''
		for i in range(len(msg)+8):
			t = t + '*' 
		self.print_msg(t, 0)
		self.print_msg('*** %s ***' %msg, -1)
		self.print_msg(t, 0)

	def treat_return(self, r):
		if r.msgType:
			if r.msgType==NE:  self.print_error(r.msg)
			elif r.msgType==NW: self.print_warning(r.msg)
			else: self.print_normal(r.msgType, r.msg)
		return not r.error()

    ################################################
    ## Retrieve a file with HTTP.
    ## If the file already exists : retrieve it only if it is newer.
	def retrieve_page(self, anchor, fileName):
		# check file and eventually get date
		r = check_file(fileName)
		if not self.treat_return(r): return
		date = r.value
		if not date and not self.checkMissing:
			self.print_normal(N0, 'Missing link (not checked)')
			return
		if date and not self.checkModified:
			f = open(fileName, 'r')
			self.print_normal(N0,
							  'Already present (not rechecked from server)')
			return [determine_type(fileName), f, date, 0]

		# Send HEAD request
		# We need this request to check the length
		r = send_request(self.host, 1, anchor, None)
		if not self.treat_return(r): return
		if not r.value:
			self.print_error('error when connecting host')
			return
		h_errcode, h_errmsg, h_header = r.value.getreply()
		if h_errcode==200:
			# check length
			r = check_length(h_header, self.maxFileLength)
			if not self.treat_return(r): return
			length = r.value[1]
			if not r.value[0]:
				self.print_normal(N0, '%s (%d) [%s] size=%d : Over size limit'
								  %(h_errmsg, h_errcode, h_header.gettype(),
									length))
				return
		elif h_errcode!=302:
			self.print_normal(N0, '%s (%d) [%s]' %(h_errmsg, h_errcode,
												   h_header.gettype()))
			return

		# Send GET request
		# We need this resuest even if the file is not modified (as we could
        # have asked in the HEAD request) because the head request with
        # "Last-modified" filed seems to NOT return the right type for the
		# file (and we need if possible to know it)
		if date: sdate = strftime('%a, %d %b %Y %H:%M:%S GMT', date)
		else: sdate = None
		r = send_request(self.host, 0, anchor, sdate)
		if not self.treat_return(r): return
		g_errcode, g_errmsg, g_header = r.value.getreply()
		if g_errcode!=200 and g_errcode!=302 and g_errcode!=304: return
	
		# Act upon each error code
		mustSave = 0
		msg = ''
		if g_errcode==200:
			msg = 'size=%d' %length
			try: f = r.value.getfile()
			except:
				self.print_error('error when reading the data via HTTP')
				return
			date = g_header.getdate('Last-modified')
			if not date:
				self.print_error('no "Last-modified" field in HTTP response')
				return
			mustSave = 1
		elif g_errcode==302: msg = ': directory (retry with an ending /)'
		elif g_errcode==304: f = open(fileName, 'r')
			
		self.print_normal(N0, '%s (%d) [%s] %s' %(g_errmsg, g_errcode,
												   h_header.gettype(), msg))
		if g_errcode==302: return ['rep']
		else: return [h_header.gettype(), f, date, mustSave]

	# handle URL
	def handle_url(self, _url, param):
		# we construct the real URL
		url = urljoin(self.base, _url, 0)

		# check if the link is to be added to the TODO list
		r = check_url(url, param, self.host, self.resDir)
		if not self.treat_return(r): return
		if not r.value[0]: s = self.parser.append_todo(r.value[1][2])
		else: s = r.value[0]
		self.print_normal(N2, '"' + url + '"' + ' : ' + s)

		if not self.localizeURL: return
		# simplify the URL (make it relative to the current url dir)
		if r.value[1][0]!=SCHEME: return
		if r.value[1][1]!=self.host: newUrl = url
		else:
			if r.value[1][2][0]=='/': s = r.value[1][2][1:]
			else: s = r.value[1][2]
			fp = path.split(s)
#			print fp, self.currentPath
			p = relativePath(fp[0], self.currentPath)
#			print p
			newUrl = path.join(p, fp[1])
#			print newUrl, _url
		if newUrl==_url: return   # URL needs no modification

		# save the modified HTML document
		k = find(self.parser.rawdata, _url, self.parser.i)
		self.page = self.page + self.parser.rawdata[self.i:k] + newUrl
		self.i = k + len(_url)
		print self.i, '/', self.l

	# treat HTML data
	def treat_html(self, page, url):
		self.parser.reset()
		# construct a complete URL for default base URL
		self.base = urlunparse((SCHEME, self.host, url, '', '', ''))
		self.currentPath = path.split(urlparse(url)[2])[0]
		self.page = ''
		self.i = 0
		self.l = len(page)
		self.parser.feed(page)
		if self.page: 
			self.page = self.page + page[self.i:]
			self.print_normal(N0, 'Some URLs were localized (HTML file modified)')
		self.parser.close()

	# treat BASE tag
	def handle_base(self, url):
		# the BASE field must contain an absolute %URL
		# so a relative URL is ignored (we keep the default one)
		p = urlparse(url)[2]
		if p and p[0]=='/': self.base = urljoin(self.base, url)

		if not self.localizeURL: return
		# if we localized : eliminate the BASE field
		self.page = self.page + self.parser.rawdata[self.i:self.parser.i]
		self.i = find(self.parser.rawdata, '>', self.parser.i)+1
		
