#!/usr/local/bin/perl
#
# builds ghindex.html file in each directory
# input:
#		<indexdir> <title> [rootonly]
# output:
#		generates all the ghindex.html files for all subdirectories
###############################################################

# Location of httpd directory
$GLIMPSEHTTP_HOME="/usr2/local/glimpsehttp2";

# This is URL path where the scripts are located...
$CGIBIN = "cgi-bin2";

# get the paths for perl and glimpse
$GLIMPSE_LOC = "/usr2/local/glimpse";

####################################################################
#                                                                  #
# configuration below this line should be done by hand (if at all) #
#                                                                  #
####################################################################

# default options for glimpseindex
$GLIMPSEIDX_OPT = "-o";

# index template
$MASTERTEMPLATE=$GLIMPSEHTTP_HOME."/ghtemplate.html";
$TEMPLATE=".ghtemplate.html";

# description file, we try to find it in every directory
$DESCFILE=".description";

# name of the index file
$HTMLINDEX="ghindex.html";

# name of config file
$CONFIGFILE = "archive.cfg";

# name of config file
$CRONFILE = "ghreindex";

# gif file
$EYEGIF = ".gheye.gif";


##############################################
#                                            #
# no configuration is needed below this line #
#                                            #
##############################################

# permission information
$umaskval = umask(0022);
# for executables
$xmodval = 0755;

$indexdir = $ARGV[0];

if(!$indexdir){
	print "Directory not specified.  Quitting.\n";
	exit -1;
}

# try to change the directory to indexdir
$oldpwd = `pwd`;
$retval = chdir ($indexdir);
if($retval==0){
	print "Cannot change directory to $indexdir.  Quitting.\n";
	exit -3;
}

# get the 'real' path
$indexdir = `pwd`;
chop $indexdir;

# open the config file
open (CFG, "$indexdir/$CONFIGFILE") 
	|| die "Cannot open $indexdir/$CONFIGFILE: ";
$line = <CFG>;
($title,$url,$subindex) = split("\t", $line);
close(CFG);

$archivename = $title;

# set the variables BEFORE making the template
$searchurl = "/".$CGIBIN."/aglimpse";
$indexdir_length = length($indexdir);

# open TEMPLATE
open(TEMPLATE, "$indexdir/$TEMPLATE") ||
    die "Cannot open template file "."$TEMPLATE: $!\n";

# make the indices
print "\n\nMaking index files...\n";
&make_ghindices($indexdir,$title);
close(TEMPLATE);

print "\n\nGeneration of ghindex.html files has been successfully completed.\n";

#change the dir back
chdir($oldpwd);


##############################################################################
## Subroutines
##############################################################################

sub make_ghindices {
	local($absdir,$title) = @_;
	local($file,$dirfiles,$dirsize,$totfiles,$totsize);
	local($thistitle,$OUT,$fileshere,$dirshere,$dir,$size);
	local(@files,@subdirs);
	$totfiles = 0;
	$totsize = 0;
	undef $dirshere;
	undef $fileshere;

	@subdirs = ();
	@files = ();

	# read the directory
	chdir($absdir);
	opendir(DIR,"$absdir");
	print "In directory $absdir...\n";
	file: while ($file=readdir(DIR)) {
		# skip the file if it starts with a '.' or is one of OUR config files
		next if $file =~ /^\./;

		# should we skip the html file?
		next if $file eq $HTMLINDEX;
		next if $file eq $CRONFILE;
		next if $file eq $CONFIGFILE;
		next if $file eq $TEMPLATE;

		if (-d $file) {
			# it's a subdir
			print "  directory: $file\n";
			push(@subdirs, $file);
		} else {
			next if (-l $file);  # skip if it's a symbolic link

			# it's a file
			print "  file: $file\n";
			push(@files, $file);
		}
	}
	closedir(DIR);

	# process the files
	foreach $file (@files) {
		if ($file =~ m#htm[^/]*#i){
			$desc{$file} = &gettitle("$absdir/$file");
		}

		# get the size
		local($dev,$ino,$mode,$nlinsk,$uid,$gid,$rdev,$size) =
			stat("$absdir/$file");
		$totsize += $size;

		# make the descriptions
		if($desc{$file}) {
			$thistitle = $desc{$file};
		}else{
			$thistitle = "$file";
		}
		$thistitle .= " ($size bytes)";
		$fileshere .= "<li><a href=\"$file\">".
			"$thistitle</a>\n";
	}
	$totfiles = $#files+1;
	if($fileshere eq ""){
		$fileshere = "<dt>No files in this directory.\n";
	}

	# process the subdirs
	### TO DO -- decide if this is the best way to do the subdirs
	foreach $dir (@subdirs) {
		$thistitle = $desc{$dir};
		$thistitle = &gettitle("$absdir/$dir/index.html")
			if !$thistitle;
		$thistitle = &gettitle("$absdir/$dir/index.htm")
			if !$thistitle;
		# RECURSE
		($dirfiles,$dirsize) = &make_ghindices("$absdir/$dir", $thistitle);
		$totfiles += $dirfiles;
		$totsize += $dirsize;
		if(!$thistitle){
			$thistitle = "Subdirectory '$dir'";
		}

		# add the number of files and file size to ALL directories
		$thistitle .= " ($dirfiles files, " if $dirfiles;
		$thistitle .= "$dirsize bytes)";

		$dirshere .= "<li><a href=\"$dir/$HTMLINDEX\">".
			"$thistitle</a>\n";
	}
	if($dirshere eq ""){
		$dirshere = "<dt>No subdirectories.\n";
	}

	# check to make sure we should make the file
	# if we want the subdirs OR we're the root
	if($subindex==1 || $absdir eq $indexdir){
		# set the $relpath to our directory
		$relpath = "/$indexdir_length$absdir";
		# generate output
		if (!$title){
			# we need to make a subdir title
			$subdir = substr($absdir, $indexdir_length);
			$title = "$archivename (directory: $subdir)";
		}

		# changed from a 'die' to a 'warn'
		eval{
			open(OUT,">$absdir/$HTMLINDEX");
		};
		if($@){
			warn "Cannot open file ".
				"$absdir/$HTMLINDEX for writing: $!\n";

			# return!
			return ($totfiles+1,$totsize);
		}

		# rewind the template
		seek(TEMPLATE,0,0);

		line: while(<TEMPLATE>) {
			# look for the key words
			### TO DO -- might want a bit more robust checking

			if (/^\s*<\!GH_TITLE>\s*$/) {
				print OUT $title,"\n";
			}elsif (/^\s*<\!GH_DESCRIPTION>\s*$/) {
				# slurp description file into array desc
				if (open(DESC,"$absdir/$DESCFILE")) {
					descline: while (<DESC>) {
						# print verbatim any lines beginning with @
						if (s/^\@//) {
							print OUT;
							next descline;
						}
						chop;
						($file,$desc) = split(/\t+/);
						next descline unless $file =~ /^\w/;
						$desc{$file} = $desc;
					}
					close(DESC);
				}
			}elsif (/^\s*<\!GH_SUBDIRS>\s*$/) {
				print OUT "<ul>\n";
				print OUT $dirshere;
				print OUT "</ul>\n";
			}elsif (/^\s*<\!GH_SEARCH>\s*$/) {
				&output_form(OUT);
			}elsif (/^\s*<\!GH_FILES>\s*$/) {
				print OUT "<ul>\n";
				print OUT $fileshere;
				print OUT "</ul>\n";
			}else{
				print OUT $_;
			}
		}

		close(OUT);
	}

	return ($totfiles+1,$totsize);
}

sub output_form{
	##################################################################
	# NOTE: DO NOT REMOVE THE REFERENCE TO GLIMPSE HTTP              #
   #       OR YOU WILL BE IN VIOLATION OF THE COPYRIGHT             #
	#       You may change the reference format, if you'd like, but  #
	#       the words "Glimpse HTTP" must still appear               #
	##################################################################
	local($OUT) = @_;
	print $OUT "<! *******************************************************\n";
	print $OUT "   * NOTE: DO NOT REMOVE THE REFERENCE TO GLIMPSE HTTP   *\n";
	print $OUT "   * OR YOU WILL BE IN VIOLATION OF THE COPYRIGHT        *\n";
	print $OUT "   * You may change the reference format, if you'd like, *\n";
	print $OUT "   * but the words \"Glimpse HTTP\" must still appear      *\n";
	print $OUT "   *******************************************************>\n";
	print $OUT "<center>\n";
	print $OUT "<table border=5>\n";
	print $OUT "<tr border=0>\n";
	print $OUT "<td align=center valign=middle>\n";
	print $OUT "<a href=http://glimpse.cs.arizona.edu/ghttp>\n";
	print $OUT "<img src=$url/$EYEGIF align=middle></td>\n";
	print $OUT "<td align=center valign=middle>\n";
	print $OUT "<a href=http://glimpse.cs.arizona.edu/ghttp>\n";
	print $OUT "<font size=+3>Glimpse HTTP</a> Search<br></font></td>\n";
	print $OUT "</tr>\n";
	print $OUT "<tr><td colspan=2>\n";
	print $OUT "<FORM method=get ACTION=$searchurl$relpath>\n";
	print $OUT "String to search for: <INPUT NAME=query size=30>\n";
	print $OUT "<INPUT TYPE=submit VALUE=Submit>\n";
	print $OUT "<br>\n";
	# print $OUT "<table border=0>\n";
	# print $OUT "<tr>\n";
	# print $OUT "<td align=left width=20%>\n";
	print $OUT "<center>\n";
	print $OUT "<INPUT NAME=case TYPE=checkbox>Case sensitive\n";
	print $OUT "<!SPACES>&#160;&#160;&#160;&#160;&#160;&#160;&#160;\n";
	print $OUT "<!SPACES>&#160;&#160;&#160;&#160;&#160;&#160;&#160;\n";
	# print $OUT "</td><td align=left width=20%>\n";
	print $OUT "<INPUT NAME=whole TYPE=checkbox>Partial match\n";
	print $OUT "<!SPACES>&#160;&#160;&#160;&#160;&#160;&#160;&#160;\n";
	print $OUT "<!SPACES>&#160;&#160;&#160;&#160;&#160;&#160;&#160;\n";
	# print $OUT "</td><td align=left width=20%>\n";
	print $OUT "<SELECT NAME=errors align=right>\n";
	print $OUT "<OPTION>0\n";
	print $OUT "<OPTION>1\n";
	print $OUT "<OPTION>2\n";
	print $OUT "</SELECT>\n";
	print $OUT "misspellings allowed\n";
	print $OUT "<br>\n";
	print $OUT "</center>\n";
	# print $OUT "</td></tr></table>\n";
	print $OUT "Return only files modified within the last <INPUT NAME=age size=5>\n";
	print $OUT "days.\n";
	print $OUT "<br>\n";


	print $OUT "Maximum number of files returned:\n";
	# print $OUT "<INPUT NAME=maxfiles VALUE=100>\n";
	print $OUT "<SELECT NAME=maxfiles>\n";
	print $OUT "<OPTION>10\n";
	print $OUT "<OPTION selected>50\n";
	print $OUT "<OPTION>100\n";
	print $OUT "<OPTION>1000\n";
	print $OUT "</SELECT>\n";
	
	print $OUT "<br>Maximum number of matches per file returned:\n";
	# print $OUT "<INPUT NAME=maxlines VALUE=30>\n";
	print $OUT "<SELECT NAME=maxlines>\n";
	print $OUT "<OPTION>10\n";
	print $OUT "<OPTION selected>30\n";
	print $OUT "<OPTION>50\n";
	print $OUT "<OPTION>500\n";
	print $OUT "</SELECT>\n";

	print $OUT "<br>\n";
	# print $OUT "<BR>\n";
	# print $OUT "To reset the form, press this button: <INPUT TYPE=\"reset\" VALUE=\"Reset\">\n";
	print $OUT "</FORM>\n";
	print $OUT "</td></tr>\n";
	print $OUT "<tr><td colspan=2>\n";
	print $OUT "<center>\n";
	print $OUT "<font size=-2><a href=http://glimpse.cs.arizona.edu>\n";
	print $OUT "Glimpse</a> and <a href=http://glimpse.cs.arizona.edu/ghttp>\n";
	print $OUT "GlimpseHTTP</a>, Copyright &copy; 1996, \n";
	print $OUT "University of Arizona\n";
	print $OUT "</center>\n";
	print $OUT "</font></td></tr>\n";
	print $OUT "</table></center>\n";
}

sub gettitle {
	local($fullpath) = @_[0];
	local($intitle, $title);
	if (open(IN, "<$fullpath")) {
		$intitle = 0;
		line: while (<IN>) {
			chop;
			if (/\<title\>(.*)$/i) {
				$intitle = 1;
				$title = $1;
			} elsif ($intitle) {
				$title .= " $_";
			}
			if ($intitle && $title =~ s#</title>.*##i) {
				last line;
			}
		}
	}
	# print "Filename: $fullpath, title: $title\n";
	return $title;
}

