#!/usr/local/bin/python """ mkwwwcatalog - Spunk HTML Catalog generating utility Jack Jansen, CWI, 03-Oct-95. 23-Oct-95 - Fixed multiple indeices with the same name 16-Dec-95 - Updated for new directory structure and names """ import sys import os import string import time import getopt def curdate(): """Return date in 00-Mmm-99 form""" MONTHS=['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] now = time.localtime(time.time()) return '%02.2d-%s-%02.2d'%(now[2], MONTHS[now[1]], now[0]%100) CURDATE=curdate() WHERE='' MAXLINES=40 LIST_START='
\n' LIST_ITEM_TXT='
* ' LIST_ITEM_IMG='
* ' LIST_ITEM_DIR='
- ' LIST_END='
\n' RULER='
' NamesUsed = {} def mkfilename(name): if NamesUsed.has_key(name): i = 0 n = name + `i` while NamesUsed.has_key(n): i = i + 1 n = name + `i` print 'Index renamed to', n name = n NamesUsed[name] = 1 return name + '.html' class Thing: def __init__(self, name, path, description, fullsize, fmt): self.name = name self.path = path self.description = description self.fullsize = fullsize self.fmt = fmt class Document(Thing): def output(self): if not self.description: self.description = self.name rv = self.fmt + '%s [%s]' % \ (self.path, self.description, self.getfullsize()) return rv def numeric_output(self, dict): rv = self.fmt + '%s %s [%s]' % \ (self.path, self.name, self.description, self.getfullsize()) dict[self.name] = rv def calcsizes(self): return 1 def getfullsize(self): if self.fullsize < 1000: return '< 1Kb' if self.fullsize < 1000000: sz = (self.fullsize+500) / 1000 return '%d Kb'%sz return '%d Mb'%((self.fullsize+500000)/1000000) def getnumitems(self): return 1 class Directory(Thing): def __init__(self, name, path, description): Thing.__init__(self, name, path, description, 0, None) self.children = [] self.parent = None self.index = None self.main = None self.isbig = 0 def getnumitems(self): sum = 0 for ch in self.children: sum = sum + ch.getnumitems() return sum def getfullsize(self): return '%d items'%self.getnumitems() def _big_o_header(self): """Out header for a html index file""" self.fp.write( "Spunk Press Catalog: %s\n"% self.name) self.fp.write("\n") self.fp.write("

Spunk Press Catalog: %s

\n"%self.name) if self.description: self.fp.write("

%s

\n"%self.description) self.fp.write(RULER) self._big_o_menu() self.fp.write(LIST_START) def _big_o_footer(self): """Output footer for index file""" self.fp.write(LIST_END) self.fp.write(RULER) self.fp.write(' '+ 'Back to the top level catalog ') self.fp.write(' '+ 'Back to the Spunk Press homepage
\n') self.fp.write(RULER) self.fp.write('Spunk Press, 1995.\n') self.fp.write('Catalog generated: %s
\n'%CURDATE) if WHERE: self.fp.write('(actual documents retrieved from %s)\n'%WHERE) self.fp.write('\n') self.fp.write('\n') def _big_o_menu(self): pass def big_output(self): filename = mkfilename(self.name) self.fp = open(filename, 'w') self._big_o_header() for ch in self.children: self.fp.write(ch.output()+'\n') self._big_o_footer() if not self.description: self.description = self.name rv = LIST_ITEM_DIR + '%s [%s]' % \ (filename, self.description, self.getfullsize()) return rv def small_output(self): name = self.name if self.description: name = name + ' - ' + self.description name = name + ':' rv = LIST_ITEM_DIR + ('%s\n'%name) + LIST_START for ch in self.children: rv = rv + ch.output() + '\n' rv = rv + LIST_END return rv def calcsizes(self): sum = 0 for ch in self.children: sum = sum + ch.calcsizes() if sum > MAXLINES: for ch in self.children: ch.isbig = 1 sum = len(self.children) if sum > MAXLINES: self.isbig = 1 sum = 1 return sum def output(self): if self.isbig: return self.big_output() else: return self.small_output() def numeric_output(self, dict): for ch in self.children: ch.numeric_output(dict) def addchild(self, child): self.children.append(child) def findchild(self, name): for child in self.children: if child.path == name: return child return None def parseindex(filename): """Parse an index file, return a dictionary and the directory descr""" fp = open(filename) dict = {} lastkey = None line = fp.readline() fields = string.split(line) if len(fields) > 1 and fields[1] == '-': fields = fields[2:] descr = string.join(fields) for desc in fp.readlines(): parts = string.split(desc) if not parts: continue if desc[0] in (' ', '\t'): if lastkey: dict[lastkey] = dict[lastkey] + '
\n' + \ string.join(parts) continue name = string.lower(parts[0]) # if name[:5] != 'spunk': # continue lastkey = name dict[name] = string.join(parts[1:]) return dict, descr def run(obj, prefix, dir, dstdir, fmt): """Examine (recursively) the contents of 'dir' storing in obj""" ## sys.stderr.write(' examine %s\n'% dir) all = os.listdir(os.path.join(prefix, dir)) index = {} files = [] dirs = [] # # We look for Index.txt, sp* and directories. The rest is ignored. # for f in all: if f[0] == '.' or f[-1] == '~': continue full = os.path.join(os.path.join(prefix, dir), f) l = string.lower(f) if os.path.isdir(full): dirs.append(f) elif l == 'index.txt': index, obj.description = parseindex(full) elif l[:2] == 'sp': files.append(f) else: sys.stderr.write('Extraneous file: %s\n'%full) if files and not index: sys.stderr.write('No index in %s\n'% dir) # # Next, fill our object # for f in files: full = os.path.join(os.path.join(prefix, dir), f) dstfull = dstdir + '/' + f l = string.lower(f) if index.has_key(l): desc = index[l] del index[l] else: desc = '' sys.stderr.write('No description for %s\n'%full) n = Document(f, dstfull, desc, os.stat(full)[6], fmt) obj.addchild(n) for f in dirs: full = os.path.join(dir, f) dstfull = dstdir + '/' + f n = obj.findchild(full) if not n: n = Directory(f, full, '') # XXXX obj.addchild(n) run(n, prefix, full, dstfull, fmt) if index.has_key(string.lower(f)): del index[string.lower(f)] if index: sys.stderr.write('Extraneous entries in %s\n'% os.path.join(os.path.join(prefix,dir), 'index.txt')) for k in index.keys(): sys.stderr.write('\t'+k+'\n') def numeric(dict): fp = open('Numeric.html', 'w') fp.write( "Spunk Press numeric catalog\n") fp.write('\n') fp.write("

Spunk Press numeric catalog

\n") fp.write(RULER) fp.write(LIST_START) items = dict.keys() items.sort() for i in items: fp.write(dict[i]+'\n') fp.write(LIST_END) fp.write(RULER) fp.write(' '+ 'Back to the top level catalog ') fp.write(' '+ 'Back to the Spunk Press homepage
\n') fp.write(RULER) fp.write('Spunk Press, 1995.\n') fp.write('Catalog generated: %s
\n'%CURDATE) if WHERE: fp.write('(actual documents retrieved from %s)\n'%WHERE) fp.write('\n') fp.write('\n') def argerror(): print 'Usage: GenCatalog -s sourcetree -p URLprefix [-w where]' sys.exit(1) def main(): global WHERE dir = realdir = None try: opts, args = getopt.getopt(sys.argv[1:], 's:p:w:') except getopt.error: argerror() for o, a in opts: if o == '-s': dir = a if o == '-p': realdir = a if o == '-w': WHERE = a if not dir or not realdir: argerror() top = Directory('Toplevel', None, 'Full catalog') run(top, os.path.join(dir, 'texts'), '', realdir+'/texts', LIST_ITEM_TXT) run(top, os.path.join(dir, 'images'), '', realdir+'/images', LIST_ITEM_IMG) dummy = top.calcsizes() dummy = top.big_output() dict = {} top.numeric_output(dict) numeric(dict) if __name__ == '__main__': main()