#!/usr/local/bin/python
"""
mkwwwcatalog - Spunk HTML Catalog generating utility
Jack Jansen, CWI, 03-Oct-95.
23-Oct-95 - Fixed multiple indeices with the same name
16-Dec-95 - Updated for new directory structure and names
"""
import sys
import os
import string
import time
import getopt
def curdate():
"""Return date in 00-Mmm-99 form"""
MONTHS=['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
now = time.localtime(time.time())
return '%02.2d-%s-%02.2d'%(now[2], MONTHS[now[1]], now[0]%100)
CURDATE=curdate()
WHERE=''
MAXLINES=40
LIST_START='
\n'
LIST_ITEM_TXT='-
'
LIST_ITEM_IMG='-
'
LIST_ITEM_DIR='-
'
LIST_END='
\n'
RULER='
'
NamesUsed = {}
def mkfilename(name):
if NamesUsed.has_key(name):
i = 0
n = name + `i`
while NamesUsed.has_key(n):
i = i + 1
n = name + `i`
print 'Index renamed to', n
name = n
NamesUsed[name] = 1
return name + '.html'
class Thing:
def __init__(self, name, path, description, fullsize, fmt):
self.name = name
self.path = path
self.description = description
self.fullsize = fullsize
self.fmt = fmt
class Document(Thing):
def output(self):
if not self.description:
self.description = self.name
rv = self.fmt + '%s [%s]' % \
(self.path, self.description, self.getfullsize())
return rv
def numeric_output(self, dict):
rv = self.fmt + '%s %s [%s]' % \
(self.path, self.name, self.description,
self.getfullsize())
dict[self.name] = rv
def calcsizes(self):
return 1
def getfullsize(self):
if self.fullsize < 1000:
return '< 1Kb'
if self.fullsize < 1000000:
sz = (self.fullsize+500) / 1000
return '%d Kb'%sz
return '%d Mb'%((self.fullsize+500000)/1000000)
def getnumitems(self):
return 1
class Directory(Thing):
def __init__(self, name, path, description):
Thing.__init__(self, name, path, description, 0, None)
self.children = []
self.parent = None
self.index = None
self.main = None
self.isbig = 0
def getnumitems(self):
sum = 0
for ch in self.children:
sum = sum + ch.getnumitems()
return sum
def getfullsize(self):
return '%d items'%self.getnumitems()
def _big_o_header(self):
"""Out header for a html index file"""
self.fp.write(
"Spunk Press Catalog: %s\n"%
self.name)
self.fp.write("\n")
self.fp.write("Spunk Press Catalog: %s
\n"%self.name)
if self.description:
self.fp.write("%s
\n"%self.description)
self.fp.write(RULER)
self._big_o_menu()
self.fp.write(LIST_START)
def _big_o_footer(self):
"""Output footer for index file"""
self.fp.write(LIST_END)
self.fp.write(RULER)
self.fp.write('
'+
'Back to the top level catalog ')
self.fp.write('
'+
'Back to the Spunk Press homepage
\n')
self.fp.write(RULER)
self.fp.write('Spunk Press, 1995.\n')
self.fp.write('Catalog generated: %s
\n'%CURDATE)
if WHERE:
self.fp.write('(actual documents retrieved from %s)\n'%WHERE)
self.fp.write('\n')
self.fp.write('\n')
def _big_o_menu(self):
pass
def big_output(self):
filename = mkfilename(self.name)
self.fp = open(filename, 'w')
self._big_o_header()
for ch in self.children:
self.fp.write(ch.output()+'\n')
self._big_o_footer()
if not self.description:
self.description = self.name
rv = LIST_ITEM_DIR + '%s [%s]' % \
(filename, self.description, self.getfullsize())
return rv
def small_output(self):
name = self.name
if self.description:
name = name + ' - ' + self.description
name = name + ':'
rv = LIST_ITEM_DIR + ('%s\n'%name) + LIST_START
for ch in self.children:
rv = rv + ch.output() + '\n'
rv = rv + LIST_END
return rv
def calcsizes(self):
sum = 0
for ch in self.children:
sum = sum + ch.calcsizes()
if sum > MAXLINES:
for ch in self.children:
ch.isbig = 1
sum = len(self.children)
if sum > MAXLINES:
self.isbig = 1
sum = 1
return sum
def output(self):
if self.isbig:
return self.big_output()
else:
return self.small_output()
def numeric_output(self, dict):
for ch in self.children:
ch.numeric_output(dict)
def addchild(self, child):
self.children.append(child)
def findchild(self, name):
for child in self.children:
if child.path == name:
return child
return None
def parseindex(filename):
"""Parse an index file, return a dictionary and the directory descr"""
fp = open(filename)
dict = {}
lastkey = None
line = fp.readline()
fields = string.split(line)
if len(fields) > 1 and fields[1] == '-':
fields = fields[2:]
descr = string.join(fields)
for desc in fp.readlines():
parts = string.split(desc)
if not parts:
continue
if desc[0] in (' ', '\t'):
if lastkey:
dict[lastkey] = dict[lastkey] + '
\n' + \
string.join(parts)
continue
name = string.lower(parts[0])
# if name[:5] != 'spunk':
# continue
lastkey = name
dict[name] = string.join(parts[1:])
return dict, descr
def run(obj, prefix, dir, dstdir, fmt):
"""Examine (recursively) the contents of 'dir' storing in obj"""
## sys.stderr.write(' examine %s\n'% dir)
all = os.listdir(os.path.join(prefix, dir))
index = {}
files = []
dirs = []
#
# We look for Index.txt, sp* and directories. The rest is ignored.
#
for f in all:
if f[0] == '.' or f[-1] == '~':
continue
full = os.path.join(os.path.join(prefix, dir), f)
l = string.lower(f)
if os.path.isdir(full):
dirs.append(f)
elif l == 'index.txt':
index, obj.description = parseindex(full)
elif l[:2] == 'sp':
files.append(f)
else:
sys.stderr.write('Extraneous file: %s\n'%full)
if files and not index:
sys.stderr.write('No index in %s\n'% dir)
#
# Next, fill our object
#
for f in files:
full = os.path.join(os.path.join(prefix, dir), f)
dstfull = dstdir + '/' + f
l = string.lower(f)
if index.has_key(l):
desc = index[l]
del index[l]
else:
desc = ''
sys.stderr.write('No description for %s\n'%full)
n = Document(f, dstfull, desc, os.stat(full)[6], fmt)
obj.addchild(n)
for f in dirs:
full = os.path.join(dir, f)
dstfull = dstdir + '/' + f
n = obj.findchild(full)
if not n:
n = Directory(f, full, '') # XXXX
obj.addchild(n)
run(n, prefix, full, dstfull, fmt)
if index.has_key(string.lower(f)):
del index[string.lower(f)]
if index:
sys.stderr.write('Extraneous entries in %s\n'%
os.path.join(os.path.join(prefix,dir), 'index.txt'))
for k in index.keys():
sys.stderr.write('\t'+k+'\n')
def numeric(dict):
fp = open('Numeric.html', 'w')
fp.write(
"Spunk Press numeric catalog\n")
fp.write('\n')
fp.write("Spunk Press numeric catalog
\n")
fp.write(RULER)
fp.write(LIST_START)
items = dict.keys()
items.sort()
for i in items:
fp.write(dict[i]+'\n')
fp.write(LIST_END)
fp.write(RULER)
fp.write('
'+
'Back to the top level catalog ')
fp.write('
'+
'Back to the Spunk Press homepage
\n')
fp.write(RULER)
fp.write('Spunk Press, 1995.\n')
fp.write('Catalog generated: %s
\n'%CURDATE)
if WHERE:
fp.write('(actual documents retrieved from %s)\n'%WHERE)
fp.write('\n')
fp.write('\n')
def argerror():
print 'Usage: GenCatalog -s sourcetree -p URLprefix [-w where]'
sys.exit(1)
def main():
global WHERE
dir = realdir = None
try:
opts, args = getopt.getopt(sys.argv[1:], 's:p:w:')
except getopt.error:
argerror()
for o, a in opts:
if o == '-s':
dir = a
if o == '-p':
realdir = a
if o == '-w':
WHERE = a
if not dir or not realdir:
argerror()
top = Directory('Toplevel', None, 'Full catalog')
run(top, os.path.join(dir, 'texts'), '', realdir+'/texts', LIST_ITEM_TXT)
run(top, os.path.join(dir, 'images'), '', realdir+'/images', LIST_ITEM_IMG)
dummy = top.calcsizes()
dummy = top.big_output()
dict = {}
top.numeric_output(dict)
numeric(dict)
if __name__ == '__main__':
main()