#                              -*- Mode: Perl -*- 
# 
# client_common.pl -- common code for waisperl and plain perl 
# ITIID           : $ITI$ $Header $__Header$
# Author          : Ulrich Pfeifer
# Created On      : Thu Jun 23 16:00:30 1994
# Last Modified By: Ulrich Pfeifer
# Last Modified On: Mon Mar 27 15:10:42 1995
# Language        : Perl
# Update Count    : 41
# Status          : Unknown, Use with caution!
# 
# (C) Copyright 1995, Universitt Dortmund, all rights reserved.
# 
# HISTORY
# 20-Jan-1995		Ulrich Pfeifer	
#    Last Modified: Fri Jan 20 14:51:54 1995 #20 (Ulrich Pfeifer)
#    support for verbose headlines

# $Locker:  $
# $Log: client_common.pl,v $
# Revision 1.7  1995/04/08  13:22:13  pfeifer
# added the return after the grep in parse_url which is necessary with
# some perl systems.
# Support for verbose headlines.
#
# Revision 1.6  1995/01/20  15:42:20  pfeifer
# support for verbose headlines
# removed support for compressed multitypes
#

sub parse_url {
    local($_) = @_;
    local($doctype,$server,$database,$start,$end,$path);

    s/^[^\?]*\?//;                       # remove path in front of query string
    s/waisdoc=[^&]&//;                   # remove document tag
    s/%20/ /g;                           # spaces
    ($database) = m:^([^/]+):;           # first path component
    ($doctype)  = m:([^/]*)/[0-9]+/:;    # second path component
    ($server)   = /4=([^;]*);/;
    ($start,$end,$path)    = /6=(\d+)\s+(\d+)\s+([^;]*);/;
    &dprint("parse_url doctype=%s server=%s database=%s start=%s end=%s path=%s\n",
            $doctype,$server,$database,$start,$end,$path);

    grep(($_ = &decode($_),1), ($server,$database,$doctype,$start,$end,$path));
    # some implementations need this line. do not know why.
    return ($server,$database,$doctype,$start,$end,$path);
}

sub get_chunk {
    local($no, $url, $max) = @_;
    local($csize,$cstart,$cend,$next,
          $server,$database,$doctype,$start,$end,$path);

    $csize = $max - 232;                # seems to be what freeWAIS does
    ($server,$database,$doctype,$start,$end,$path) = &parse_url($url);
    $end     = $end-$start;              # chunks are relative
    $start   = 0;
    $cstart  = $no*$csize;
    $cend    = $cstart+$csize;
    if ($cend >= $end) {
        $cend = $end;
    } elsif ($cend < $end) {
        $next = $no+1;                   # a chunk follows
    }
    &dprint ("get_chunk: start=%d end=%d next=%d\n", $cstart,$cend,$next);
    &log("get_chunk: %d,%d start=%d end=%d next=%d\n", $start,$end, $cstart,$cend,$next) if $next;
    ($cstart,$cend,$next);
}
    
sub parse_wsrc {
    local($_) = join('',@_);
    local($databasename,$ipname,$tcpport,$description) =
        ($serverdb, $server, $defport, 'No description');

    $databasename  = $1 if m/:database-name\s+\"([^\"]*)\"/; 
    $ipname        = $1 if m/:ip-name\s+\"([^\"]*)\"/;
    $tcpport       = $1 if m/:tcp-port\s+([0-9]+)/;
    $description   = $1 if m/:description\s+\"([^\"]*)\"/;
    ($databasename,$ipname,$tcpport,$description);
}

sub init_plain {
    local($num,$type) = @_;
    $init_plain_type = $type;

    if ($init_plain_type && $verbose_headlines) {
        $init_plain_type = 'DL';
    }
    print <<"EOHL"
The selected databases contain the following $num items relevant to
your query.
<$init_plain_type>
EOHL
    ;
}

sub exit_plain {
    if ($init_plain_type) {
        print "</$init_plain_type>\n";
        $init_plain_type = '';
    }
}

sub init_form {
    print <<"EOHL"
Select some of the following databases and enter your query.
<form METHOD=GET>

<HR>
<DL>
EOHL
    ;                          
}

sub exit_form {
    print <<"EOHL"
</DL>
<HR><H1>Enter your query here</H1>
<TEXTAREA NAME="text" TYPE=TEXT ROWS=5 COLS=40 size=40,5>

</textarea>
<p>
<input type="submit" value="start search">
<input type="reset"  value="reset query">

</form>
EOHL
    ;        
}
    
sub form_entry {
    local($ipname,$tcpport,$databasename) = @_;

    print <<"EOHL"
<DT> 
<input NAME="database"
       TYPE="checkbox"
       VALUE="$ipname:$tcpport/$databasename"><b>$databasename</b>
<DD> <PRE>$description</PRE>
EOHL
    ;
}

sub print_server_descriptions {
    local($score, $lines, $url, $headline, $text, $type);
    local($databasename,$ipname,$tcpport,$description);
    local($form_inited);
    local(@other_headlines);

    # handle server descriptions
    for (sort {&scoresort} @headlines) {
        ($score, $lines, $url, $headline, $text) = split(/$WAISfldsepr/, $_);
        ($type) = ($url =~ m:[^/]*/([^/]*)/:);
        if ($type eq "WSRC") {
            &init_form unless $form_inited++;
            ($databasename,$ipname,$tcpport,$description) = 
                &parse_wsrc($text);
            &form_entry($ipname,$tcpport,$databasename);
        } else {
            push(@other_headlines, $_);
        }
    } 
    &exit_form if $form_inited;
    @headlines = @other_headlines; # the should be a better way!
    $#other_headlines+1;
}

sub print_headlines {
    local($_, $score, $lines, $headline, @documenttext);
    local($url, $newurl, $num, $plain_inited);
    local($server,$database,$type,$start,$end,$path,$size,$rank);
    local($urlprefix) = "http:$htbin/$me?waisdoc=1&";

    if ($directwais) {
        $urlprefix = "wais://$server:$port/";
    } elsif ($convert && !$directget && !$directwais) {
        $urlprefix .= "$convert_field=".(&encode($convert)).'&';
    }
    $num = $#headlines - $[ + 1;

    return unless $num;

    # handle server descriptions
    $num = &print_server_descriptions;
    $num = $WAISmaxdoc if $num > $WAISmaxdoc;

    # handle other document types
    for (sort {&scoresort} @headlines) {
        $rank++;                          
        ($score, $lines, $url, $headline, $text) = split($WAISfldsepr, $_);
        ## ($type) = ($url =~ m:[^/]*/([^/]*)/:);
        ($server,$database,$type,$start,$end,$path) = &parse_url($url);
        $size = $end - $start;
        &init_plain($num, ($text)?'':'PRE') unless $plain_inited++ || $num <= 0;
        if ($text) {                              # we have the text already
            &print_text($type,$text,$headline) || # something went wrong?
                                                  # give a pointer instead. 
                &print_anchor($urlprefix.$url,$headline,$database,$size,$type,$rank);
                #print "<A HREF=\"${urlprefix}$url\">$headline</A>\n";
        } else {
            if ($type=~ /,/) {
                # multiple types
                @types = split(/,/, $type);
                foreach $newtype (@types) {
                    $newurl = $url;    # Original URL with A,B,C,... types
                    $newurl =~ s:/$type/:/$newtype/:; 
                    /`/;    # to fool perl-mode
                    $newurl =~ s:(6|3)=(\d+)(\s|%20)(\d+):$1=$2%20${maximum_message_size}0:g;
                    /`/;
                    $headline = $newtype unless $headline;
                    foreach $fileext (@types) {
                        $headline =~ s:.$fileext:.$newtype:;
                    } 
                    print "<A HREF=\"${urlprefix}$newurl\">$headline</A>";
                    &print_anchor($urlprefix.$newurl,$headline,$database,$size,$type,$rank);
                }
            } else {
                if ($type eq 'URL') {
                    $_ = &decode($url);
                    ($url) = ($_ =~ /3=\d+\s*\d+\s*([^;]+);/);
                    $url = $headline unless $url =~ /^\w{1,8}:/;
                    # print "<A HREF=\"$url\">$headline</A>\n";
                    &print_anchor($url,$headline,$database,$size,$type,$rank);
                } else {
                    &print_anchor($urlprefix.$url,$headline,$database,$size,$type,$rank);
                }
            }
            last unless --$num;
        }
    }
    &exit_plain;
}

sub print_anchor {
    local($url,$headline,$database,$size,$type,$num) = @_;
    
    if ($size > 1024) {
        $size = sprintf("%6.1f kbytes", $size/1024);
    } else {
        $size .= " bytes";
    }
    if ($verbose_headlines) {
        print <<"EOS"
<DT> <b>$num: <A HREF=\"$url\">$headline</A></b>
<DD> Database: <b>$database</b>, Size: <b>$size</b>, Type: <b>$type</b>
EOS
    } else {
        print "<A HREF=\"$url\">$headline</A>\n";
    }
}
    

# sort after the first digit in the arguments
$a = $b = 0; # sort uses this variables, but perl -w compains :-);
sub scoresort {
    local($sa,$sb);

    ($sa) = ($a =~ /\D*(\d+)/);
    ($sb) = ($b =~ /\D*(\d+)/);
    $sb <=> $sa;
}


sub add_diag {
    local($server,$database,$message) = @_;

    # global array %diagnostics  *must* be deleted before next run of client
    $diagnostics{$server} .= "<dt><b>$database</b><dd> $code $message<p>\n";
}

sub print_diag {
    local($server);

    return unless (keys %diagnostics);

    print <<"EOD"
<H1>Diagnostics</H1>
EOD
    ;
    for $server (keys %diagnostics) {
        print <<"EOS"
Server <b>$server</b> returns the following diagnostics:<p>
<dl>$diagnostics{$server}</dl>
EOS
    ;
    }
}

sub add_text {
    local($text) = @_;
    
    push(@documenttext,$text);
}

# here is some work to do, to give the appropriat mime content types
# Currently 
sub print_text {
    local($type, $text, $inline) = @_;

    local($title) = ("Wais document (plain text)");
    local($is_html);

    $text = join('', @documenttext) unless $text;

    if ($type =~ /GIF/) {
        return(0) if $inline;
        print "Content-Type: image/gif\n\n$text";
        return(1);
    } elsif ($type =~ /MIME/) {
        return(0) if $inline;
        print "Content-Type: application/metamail\n\n$text";
        return(1);
    } elsif ($type =~ /PS/) {
        return(0) if $inline;
        print "Content-Type: application/postscript\n\n$text";
        return(1);
    } elsif ($type =~ /HTML|URL/) {           # remove original header
        $text =~ s:</?(BODY|HTML|HEAD)>::ig;
        $text =~ s:<TITLE>((\n|.)*)</TITLE>::ig;
        $title = $1;
        if ($inline) {
            $text =~ s:<H([0-9])>((\n|.)*)</H\1>::ig;
            $inline = $2 if $2;
            $inline = $inline || $title;
        }
        $is_html = 1;
    }
    if ($type =~ /TEXT|HTML|URL|WSRC/) {
        if ($inline) {
            print "<HR><h1>$inline</h1>\n";
        } else {
            &print_header ($title);
        }
        if ($convert) {
        	$text = &convert($text);
        	$is_html = $text =~ /<\w+>/; # sanity check for dump
                                             # converters
		}
        if ($is_html) {
            print $text;
        } else {
            # TS - '<' (and '&') are special in HTML documents. 
            # Replace them with '&amp;' and '&lt;' :
            # $text =~ s:&:&amp;:g;
            $text =~ s:<:&lt;:g;
            print "<PRE>\n$text\n</PRE>\n";
        }
        &print_footer unless $inline;
        return(1);
    } else {
        return(0) if $inline;
        print $text;
    }
}

sub convert {
    local($text) = @_;

    if ($converter{$convert}) { # external converter
        local($tmpfile,$_) = ("/usr/tmp/SFgate.$$");
        open(TMP, ">$tmpfile") ||
            die "Could not open $tmpfile: $!\n";
        print  TMP $text;
        close (TMP);

        # $text = '<PRE>';
        $text = '';
        open(CONVERT, "$converter{$convert} $tmpfile|") ||
            die "Could not call converter: $!\n";
        while (<CONVERT>) {
            next if /^%/;
            $text .= $_;
        }
        close(CONVERT);
        unlink($tmpfile);
        # return($text."</PRE>");
        return($text);
    } else {
        if ($convert =~ /label(.*)/) {
            local($sep,$label,$value,$_,$ntext);
            $ntext = "<pre>" unless $is_html;
            $sep = $1;
            for (split('\n',$text)) {
                ($label,$value) = m/^\s*(\w+)$sep(.*)/;
                if ($value && $label =~ /^\s*\w+/) {
                    $ntext .= "<b>$label</b>${sep}${value}\n";
                } else {
                    $ntext .= "$_\n";
                }
            }
            $ntext .= "\n</pre>" unless $is_html;
            return($ntext);
        } 
    }
    $text;
}


## general utilities

# logging messages

sub log {
    if ($logging == 1 && !$opt_t) {
        if (!open (TMP, ">> $log_file")) {
            $logging=0;
            return; 
# die ("Can't open log file $log_file: $!\nstopped");
        }
	print TMP  &Time_to_String (time, "GMT");
	print TMP " ";
	printf (TMP @_);
	print TMP "\n";
	close (TMP);
    }
}

# debugging messages
sub dprint {
    if ($debugging == 1)   {
	printf @_;
    }
}

sub ddump {
    local ($message, $buffer) = @_;
    if ($debugging == 1) {
	print "$message\n";
	local ($line1, $line2);
	$line1 = sprintf ("%06o  ", 0);
	$line2 = "        ";
	for ($i = 0; $i < length ($buffer); $i++) {
	    $byte = unpack ("C", substr ($buffer, $i, 1));
	    $line1 .= sprintf ("%3d ", $byte);
	    if (($byte < 32) || ($byte > 126)) {
		$line2 .= sprintf ("%3o ", $byte);}
	    else { $line2 .= sprintf ("%3c ", $byte);}
	    if (($i % 16) == 15) {
		print "$line1\n$line2\n";
		$line1 = sprintf ("%06o  ", $i+1);
		$line2 = "        " ;
		}}
	print "$line1\n$line2\n";
    }
}



@day_names = ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat");

@month_names = 	     ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
		      "Jul", "Aug", "Sep", "Oct", "Nov", "Dec");

sub Time_to_String {
    local ($time, $zone) = @_;
    local ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $iddst);
    local ($zone_string);
    if ($zone eq "local") {
	$zone_string = "";
	($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $iddst) = 
	    localtime ($time);}
    elsif ($zone eq "" || $zone eq "GMT") {
	$zone_string = " GMT";
	($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $iddst) = 
	    gmtime($time);}
    else {die "Time zone $zone is not supported";}
    sprintf ("%s, %d %s %02d %02d:%02d:%02d%s",
	     $day_names[$wday],
	     $mday,
	     $month_names [$mon],
	     $year, $hour, $min, $sec,
	     $zone_string);
}

