#                              -*- Mode: Perl -*- 
# waisclient.pl -- 
# ITIID           : $ITI$ $Header $__Header$
# Author          : Miao-Jane Lin <mjlin@cs.cornell.edu>
# Created On      : Wed Apr 27 08:30:01 1994
# Last Modified By: Ulrich Pfeifer
# Last Modified On: Mon Dec 12 11:37:13 1994
# Update Count    : 388
# Status          : Unknown, Use with caution!
# 

# Copyright 1994 by Universty of Dortmund 

# HISTORY 
# 1-May-1994		Ulrich Pfeifer	
#    Last Modified: Sun May  1 13:08:56 1994 #40 (Ulrich Pfeifer)
#    Modified print_headline to print forms when document type eq WSRC
# 27-Apr-1994		Ulrich Pfeifer	
#    Added client code for use as CGI script. The added functions just
#    use the convenience functions of the original code. Most
#    functions just do the reverse of some original functions. The
#    server code remains untouched and should run as before.
#
#    INTERFACE:
#
#    Main interface function is 
#
#    	client($them, $database, $port, $queryType, $query)
#
#    where $them is the host to connect, $database is the database to
#    search, $port the port to use. $queryType ist the type of query
#    to send. It must be one of $TextRetrievalQuery or
#    $RelevanceFeedbackQuery. $query contains the query to send or the
#    document specification for a document to retrieve.
#
#    client is called by two convenience functions:
#
#    	do_waisq ($query, @databases)
#
#    where $query is the query to send and @databases is an array of
#    "server:port/database" connnection descriptions.
#
#    	get_text($url)
#
#    retrieves a document specified by a direct wais url. These look
#    like: "http://...?database/document type/size/4=connection;
#    5=database;6=docid;7=%00". See function parse_url for details.
#



#-------------------------------------------------------------------
# reverse of main
# connect a server, send searchAPU, receive and interprete response
#-------------------------------------------------------------------

sub client {
    local ($them, $database, $port, $queryType, $query) = @_;
    local ($name,$proto,$aliases,$type,$len,$thisaddr,$thataddr,$this,$that);
    $port = $defport unless $port;
    $them = $defserver unless $them;
    $database = $serverdb unless $database;

    &dprint("af_inet=%d sock_stream=%d sockaddr=%s hostname=%s\n",
            $af_inet, $sock_stream, $sockaddr, $hostname);
    ($name,$aliases,$proto) = getprotobyname('tcp');
    ($name,$aliases,$type,$len,$thisaddr) =
	gethostbyname($hostname);
    ($name,$aliases,$type,$len,$thataddr) = 
        gethostbyname($them);

    $this = pack($sockaddr, $af_inet, 0, $thisaddr);
    &dprint("thisaddr=%d,this=%d\n", $thisaddr,$this);
    $that = pack($sockaddr, $af_inet, $port, $thataddr);

# Make the socket filehandle.

    if (socket(S, $af_inet, $sock_stream, $proto='tcp')) { 
        &dprint("socket ok\n");
    }
    else {
        die $!;
    }

# Give the socket an address.

    if (bind(S, $this)) {
        &dprint ("bind ok\n");
    }
    else {
        die $!;
    }

# Call up the server.

    if (connect(S,$that)) {
        &dprint ("connect ok\n");
    }
    else {
        die $!;
    }

# Set socket to be command buffered.

    select(S); $| = 1; select(STDOUT);

    #&log("Connect: $them");

    &call_server(S, S, $database, $queryType, $query); 
    &directget(S, S, $database);
    close(S); # close connection with client
}

sub directget {
    local($in_file, $out_file, $database) = @_;
    local($no,$be_friendly,@documenttext);

    for $no ($[.. $#headlines) {
        ($score, $lines, $url, $headline) = split('\|', $headlines[$no]);
        ($type) = ($url =~ m:[^/]*/([^/]*)/:);
        if ($directget || $type eq "WSRC") {
            @documenttext = ();
            &call_server($in_file, $out_file, $database, $TextRetrievalQuery,
                         $url);
            $text = join('',@documenttext);
            $headlines[$no] = join('|', $score, $lines, $url, $headline, $text);
        }
    }
}
              

#-----------------------------------------------------------------
# help functions, interfaces to global variables
# global variables: $server, $database, $port
#-----------------------------------------------------------------

# add a headline to the global array
sub add_headline {
    local($documentID, $HeaderVersion, $score, $bestMatch, 
          $documentLength, $lines, $type, $source, $date, 
          $headline, $originCity) = @_;
    local($url,$connection,$edatabase);

    $connection = &encode("$server:$port");

    $documentID =~ s/4=[^;]*/4=$connection/ unless $server eq "$defserver";
    $documentID =~ s/\+/%2B/g;

    &dprint ("
documentID      = $documentID 
HeaderVersion   = $HeaderVersion 
score           = $score 
bestMatch       = $bestMatch 
documentLength  = $documentLength 
lines           = $lines 
type            = $type 
source          = $source 
date            = $date 
headline        = $headline 
originCity      = $originCity
");
    $documentID =~ s/\s/%20/g;                    # maybe we schuld encode all ?
    $edatabase = &encode($database);
#    if ($type eq "URL") {
#        ($url) = ($documentID =~ m/3=\d+%20\d+%20([^;]*)/);
#    } else {
        $documentID =~ s/:/%3A/g;
        $url = "$edatabase/$type/$documentLength/$documentID;";
#    }
    # global array @headlines *must* be deleted before next run of client
    push(@headlines, join('|', $score, $lines, $url, $headline));
}

# sub print_headlines {
#     local($_, $score, $headline, $url, $newurl, $type, $num, @documenttext);
#     local($databasename,$ipname,$tcpport,$description,$plain_inited,$form_inited);
# 
#     $num = $#headlines - $[ + 1;
# 
#     return unless $num;
# 
#     $num = $maxhits if $num > $maxhits;
#     # handle server descriptions
#     for (sort {&scoresort} @headlines) {
#         # handle server descriptions
#         ($score, $headline, $url, $type,$text) = split('\|', $_);
#         if ($type eq "WSRC") {
#             &init_form($num) unless $form_inited++;
#             ($databasename,$ipname,$tcpport,$description) = 
#                 &parse_wsrc($text);
#             &form_entry($ipname,$tcpport,$databasename);
#             last unless --$num;
#         }
#     } 
#     &exit_form if $form_inited;
#     for (sort {&scoresort} @headlines) {
#         # handle other document types
#         ($score, $headline, $url, $type, $text) = split('\|', $_);
#         #$url = $headline if $type eq "URL";
#         $headline =~ s/^>//;    # Bug in freeWAIS
#         if ($type ne "WSRC") {
#             if ($text) {
#                 &init_plain($num) unless $plain_inited++;
#                 &print_text($type,$text,$headline) ||
#                     print "<A HREF=\"$url\">$headline</A>\n";
#             } else {
#                 &init_plain($num,"<PRE>") unless $plain_inited++;
#                 if ($type=~ /,/) {
#                     # multiple types
#                     # JS>Don't do this. We don't know whether TEXT is even part of the document set.
#                     #                    $url =~ s:/$type/:/TEXT/:g;
#                     # JS>We need to know types for patching matching filenames               
#                     # UP>agreed. Never used filenames in headline
# 		    @types = split(/,/, $type);
#                     foreach $newtype (@types) {
#                         $newurl = $url;    # Original URL with A,B,C,... types
#                         $newurl =~ s:/$type/:/$newtype/:;  # JS>Replace multiple types with next in loop.
#                         # JS>and request max. doc size 
#                         /`/;    # to fool perl-mode
#                         $newurl =~ s:(6|3)=(\d+)(\s|%20)(\d+):$1=$2%20${maximum_message_size}0:g;
#                         /`/;
#                         # JS>Again, can't do this, because we don't know whether TEXT is in the set.
#                         # JS>We could key on the FIRST type. I believe freeWAIS does the same...
#                         #   ... unless $newtype eq 'TEXT';
#                         # remove unvalid chunk definition for other types
#                         # JS> Not sure what that does - can this happen?
#                         # UP> Dont like to use multiple lines for the sematic equal results
#                         $headline = $newtype unless $headline;
#                         # JS>OK, let's patch the filenames with the types
#                         # JS>assuming that types and filename extensions match...
#                         # UP>again not needed, if using one line for display, second and following links
#                         # UP>just containing the fileextension
#                         if ($multiple_multiline) {
#                             foreach $fileext (@types) {
#                                 $headline =~ s:.$fileext:.$newtype:;
#                             }
#                         }
#                         # JS>One line per document type...
#                         # JS>I'd also like to swap the two parts of $headline to get a nicer output.
#                         # UP> prefer one line for all links
#                         print "<A HREF=\"$newurl\">$headline</A>";
#                         # JS>Why?
#                         # UP>for oneline display;
#                         if ($multiple_multiline) {
#                             $headline = '';
#                         } else {
#                             print "\n";
#                         }
#                     }
#                     print "\n" if $multiple_multiline;
#                 } else {
#                     print "<A HREF=\"$url\">$headline</A>\n";
#                 }
#                 last unless --$num;
#             }
#         }
#     }
#     print "</PRE>\n";
# }


#sub add_diag {
#    local($code, $message) = @_;
#
#    # global array %diagnostics  *must* be deleted before next run of client
#    $diagnostics{$server} .= "<dt><b>$database</b><dd> $code $message<p>\n";
#}


# check a wais header and return the message length
sub check_header {
    local($wais_header) = @_;
    local($msg_len, $msg_type, $header_vers, $servertype, $compression,
     $encoding, $check_sum) = &Parse_WAIS_Header ($wais_header);

    if ($header_vers > $HeaderVersion) { 
        die "Incompatable header version. Can't handle it.\n";  
    }

    if ($encoding ne " ") {
        die "Encoded messages aren't supported.\n Stopped ";  
    }
    
    if ($compression ne " ") { 
        die "Compressed messages are not supported.\nStopped ";  
    }

    $msg_len;
}

# check a initResponseAPDU and return the maximum message size
sub check_init_response {
    local($packet) = @_;
    local($versionBM, $pdu_type, $msg_len, $maximum_message_size,
          $optionsBM, $prefSize, $maxMsgSize);

    $msg_len  = &readBinaryNumber(2, *packet);
    $pdu_type = &readBinaryNumber(1, *packet);
    &readBinaryNumber(1, *packet);
    &readBitMap(*versionBM, *packet);
    &readBitMap(*optionsBM, *packet);
    $prefSize = &readNumber(*packet);
    $maximum_message_size = &readNumber(*packet);
    #&dprint("\npdu=%s versionBM=%s optionsBM=%s, prefSize=%s, maxMsgSize=%s\n",
    #        $pdu_type, $versionBM, $optionsBM, $prefSize, $maxMsgSize);
    $maximum_message_size;
}


#-------------------------------------------------------------------
# call_server: reverse of serve_client
# arguments: in_file- file handler for input
#            out_file- file handler for output
#            database
#            queryType
#                $RelevanceFeedbackQuery: search
#                $TextRetrievalQuery: get document
#            query a query or and doctype url
#            chunk_no number of the chunk to retrieve
# it is invoked by parent process. actual server jobs are done here.
# keeps looping to serve client, until client quit, it aborts. 
#-------------------------------------------------------------------
sub call_server {
    local ($in_file, $out_file, $database, $queryType, $query, $chunk_no) = @_;
    local($send_packet, $wais_header, $msg_type, $servertype, $packet,
          $compression, $message, $encoding, $msg_len, $header_vers);
    local($be_friendly, $chunk_start, $chunk_end) = $be_friendly;

    if ($be_friendly) {
        $send_packet = &writeInitAPDU;
        $wais_header = &writeWAISPacketHeader
            (length($send_packet), $msg_type='z', $HeaderVersion, 
             $servertype='wais      ', $compression=" ", $encoding = " ", "0");

        print ($out_file $wais_header);
        print ($out_file $send_packet);

        if (read($in_file, $wais_header, 25) == 0) {  return;  }
        $msg_len = &check_header($wais_header);

        read($in_file, $packet, $msg_len);
        $maximum_message_size = &check_init_response($packet);
    }

    # do not understand why this is necessary - but it seems to
    if ($queryType == $RelevanceFeedbackQuery) {
        $send_packet =                            # ask a query
            &writeSearchAPDU($maxBufSize/2,$maxBufSize,$maxBufSize/2,1,'',
                             $database, $queryType,'', '',$query);
    } else {
        # now let's test if we need to get more than on  chunk
        ($chunk_start, $chunk_end, $chunk_no) =
            &get_chunk($chunk_no,$query,$chars_per_request);
        $send_packet =                            # get a document chunk
            &writeSearchAPDU(10,16,15,1,'FOO',
                             $database, $queryType,' Document Text',
                             pack($any_type,1,'3'),$query,$chunk_start, $chunk_end);
        &dprint("Requesting bytes from %d to %d\n", $chunk_start, $chunk_end);
        #printf(STDERR "Requesting bytes from %d to %d\n", $chunk_start, $chunk_end);
        #&ddump("SENDING request", $send_packet);
    }

    $wais_header = &writeWAISPacketHeader
        (length($send_packet), $msg_type='z', $HeaderVersion,
         $servertype='wais      ', $compression=" ", $encoding = " ", "0");

    print ($out_file $wais_header);
    print ($out_file $send_packet);

    # read wais_header (always 25 bytes long)
    # if read eof, means client quit, so will serve_client
    if (read($in_file, $wais_header, 25) == 0) {  return;  }
    $msg_len = &check_header($wais_header);
    $message='';
    while (length($message) < $msg_len) {                  
        # maybe the net is too slow ?
        read($in_file, $packet, $msg_len-length($message));
        if (($queryType == $TextRetrievalQuery) &&
            ($packet =~ /Missing DocID in request/)) {
            &dprint("Missing DocID in request\n");
            $chunk_no ='';
            $message .= $packet;
            last;
        }
        if (($queryType == $TextRetrievalQuery) &&
            ($packet =~ /Request out of range/)) {
            &dprint ("Request out of range\n");
            #printf(STDERR "Request out of range, Got %d bytes\n", length($packet));
            $chunk_no ='';
            $message .= $packet;
            last;
        }
        if (length($packet)==0) {
            # we didn't get anything, giveup!
            $chunk_no ='';
            last;
        }
        $message .= $packet;
    } 
    &dprint("Total got %d bytes\n", length($message));
    #printf(STDERR "Got %d bytes, Total=%d\n", length($message),$TOTAL+=length($message));
    # next, parse the message
    # first two bytes is the length
    &readBinaryNumber (2, *message);

    # read 1 byte pdu type
    $pdu_type = &readBinaryNumber(1, *message);
    &dprint("\nPDU type %d\n",$pdu_type);
    &ddump ("APDU", $message);
    # dispatch on PDU type
    if ($pdu_type == $SearchResponse_PDUtype) {
        $send_packet = &Handle_SearchResponseAPDU($message); }
    else {  
        die "Unknown APDU type $pdu_type.\nStopped";  
    }
    if ($chunk_no) {                     # get the rest
        $be_friendly = 0;                # no more initAPDU's
        &call_server($in_file, $out_file, $database, $queryType, $query, $chunk_no);
    }
}

# do not thrust this :-)
sub writeSearchAPDU {
    local($minMsgSize, $maxMsgSize, $prefMsgSize, $replace, $resultSetName,
          $databaseName, $queryType, $elementSetName, $refID, $query,
          $chunk_start, $chunk_end) = @_;
    local ($buf, $queryInfo, $header);

    # write 1 byte pdu type
    &writeBinaryNumber($Search_PDUtype,1, *buf);
    &writeBinaryNumber($minMsgSize,3,*buf);
    &writeBinaryNumber($maxMsgSize,3,*buf);
    &writeBinaryNumber($prefMsgSize,3,*buf);
    &writeBinaryNumber($replace,1,*buf);
    &writeString($resultSetName, $DT_ResultSetName, *buf);  
    &writeString($databaseName, $DT_DatabaseName, *buf);
    &writeString($queryType, $DT_QueryType, *buf);

    &writeString($elementSetName, $DT_ElementSetName, *buf);
    &writeBitMap($refID, $DT_ReferenceID, *buf);
    # record the length of SearchAPDU in header as 2 bytes ?
    &writeBinaryNumber(length($buf), 2, *header); 

    # attach header in the front of InitResponseAPDU
    $buf = $header . $buf;

    if ($queryType == $RelevanceFeedbackQuery) {
        # ask a query
        &writeString($query,$DT_SeedWord, *queryInfo);
        &writeNumber($WAISmaxdoc,$DT_MaxDocumentsRetrieved, *queryInfo);
        &writeCompressedInteger($DT_UserInfoLength, *buf);
        &writeCompressedInteger(length($queryInfo), *buf);
        $buf . $queryInfo;
    } elsif ($queryType == $TextRetrievalQuery) { 
        # ge a document
        $queryInfo = &unparseQueryTerms($query,$chunk_start, $chunk_end);
        &writeString($queryInfo,$DT_Query, *buf);
        $buf;
    }
}

# do not thrust this :-), the function parseQueryTerms is easier to
# satisfy than a real wais server
sub unparseQueryTerms {
    local($query, $chunk_start, $chunk_end) = @_;
    local($info,$mtmp);
    local($server, $database, $type, $low, $high, $path) = &parse_url($query);
    local($_,$tag,$val);

    local($position, $structure, $truncation, $completeness) =
        (' ig', ' ig', ' ig', ' ig');


    # doc id
    &writeString(pack("a2 a3 a3 a3 a3 a3",
                      ($AT_docID, ' re', $position, $structure, 
                       $truncation, $completeness)),
                 $DT_AttributeList,
                 *info);
    $query =~ s/^[^=]*(\d\=)/$1/;
    &dprint("DOCID: %d %d %s\n",$low, $high, $path);
    &dprint("QUERY: %s\n",$query);
    for (split(/;/, $query)) {
        if (/^(\d)\=([^=]*)/) {
            $tag=$1; $val=$2;
            &dprint("DOCID: %d=%s\n", $tag, &decode($val));
            &writeString (&decode($val),$tag,*mtmp);
        }
    }

    #&writeString ($docid, $DT_OriginalLocalID, *mtmp);
    #&writeString ($database, $DT_DistributorDatabase, *mtmp);
    #&writeNumber ($DocID_COPY_WITHOUT_RESTRICTION, $DT_CopyrightDisposition, *mtmp);
    &writeString($mtmp, $DT_Term, *info);
    
    # type
    &writeString(pack("a2 a3 a3 a3 a3 a3",
                      ($AT_docType, ' re', $position, $structure, 
                       $truncation, $completeness)),
                 $DT_AttributeList,
                *info);
    &writeString($type, $DT_Term, *info);
    &writeString("a", $DT_Operator, *info);

    # low
    &writeString(pack("a2 a3 a3 a3 a3 a3",
                      ($AT_bytePosition, ' ro', $position, $structure, 
                       $truncation, $completeness)),
                 $DT_AttributeList,
                *info);
    &writeString($chunk_start, $DT_Term, *info);
    &writeString("a", $DT_Operator, *info);

    # high
    &writeString(pack("a2 a3 a3 a3 a3 a3",
                      ($AT_bytePosition, ' rl', $position, $structure, 
                       $truncation, $completeness)),
                 $DT_AttributeList,
                *info);
    #&writeString(98975, $DT_Term, *info);
    &writeString($chunk_end, $DT_Term, *info);
    &writeString("a", $DT_Operator, *info);

    $info;
}

sub writeInitAPDU {
    local($initmsg,$header);

    &writeCompressedInteger($Init_PDUtype, *initmsg);
    &writeBitMap(&makeBitMap(1,"1"), $DT_ProtocolVersion, *initmsg);
    &writeBitMap(&makeBitMap(1,"1"), $DT_Option, *initmsg);
    &writeNumber($maximum_message_size,$DT_PrefMsgSize, *initmsg);
    &writeNumber($maximum_message_size,$DT_MaxMsgSize, *initmsg);
    &writeString("$me $revision, from host: $hostname, user:$ENV{'USER'}", 
                 $DT_IDAuthentication, *initmsg);
    &writeString($defaultID, $DT_ImplementationID, *initmsg);
    &writeString($defaultName,
                 $DT_ImplementationName,
                 *initmsg);
    &writeString($defaultVersion, $DT_ImplementationVersion, *initmsg);
    &writeBinaryNumber(length($initmsg), 2, *header);
    $header.$initmsg;
}

sub Handle_SearchResponseAPDU {
    local($buf) = @_;

    local ($searchStatus, $resultCount, $numberOfRecordReturned, 
	   $nextResultSetPosition, $resultSetStatus, $presentStatus,
	   $refID);

    # read 1 byte search status
    $searchStatus = &readBinaryNumber(1, *buf);

    # read 3 bytes result count
    $resultCount = &readBinaryNumber(3, *buf);

    # read 3 bytes number of records returned
    $numberOfRecordReturned = &readBinaryNumber(3, *buf);

    # read 3 bytes next result set position
    $nextResultSetPosition = &readBinaryNumber(3, *buf);

    if (&PeekTag($buf) == $DT_ResultSetStatus) {
        $resultSetStatus = &readNumber(*buf);
    }
    if (&PeekTag($buf) == $DT_PresentStatus) {
        $presentStatus = &readNumber(*buf);
    }
    if (&PeekTag($buf) == $DT_ReferenceID) {
        $refID = &readString(*buf); # was readBitmap buggy?
    }
    &dprint ("
searchStatus = $searchStatus
resultCount  = $resultCount
numberOfRecordReturned = $numberOfRecordReturned
nextResultSetPosition = $nextResultSetPosition
resultSetStatus = $resultSetStatus
presentStatus = $presentStatus
refID = $refID
");
    &readSearchResponseInfo(*buf);
}

# do not thrust this :-)
# Parsing should be the inverse of:
#    $info = $info . $docHeaders . $shortHeaders . $longHeaders . 
#	$text . $headlines . $codes . $diagnostics;
sub readSearchResponseInfo {
    local(*buf) = @_;
    local($len,$seed);

    if (&PeekTag($buf) == $DT_UserInfoLength) {
        &readBinaryNumber(1,*buf);
        $len = &readCompressedInteger(*buf); # ignore
        &dprint ("readSearchResponseInfo: len = $len\n");
        if (&PeekTag($buf) == $DT_SeedWordUsed) {
            $seed = &readString(*buf);
            &dprint ("seed = $seed\n");
        }
    }
    &readWAISDocumentHeader(*buf);
    &readWAISDocumentText(*buf);
    &readDiagnostics(*buf);
    &ddump ("Rest APDU", $buf) if length ($buf);
    #print "Next tag is ", &PeekTag($buf), "\n"; 
}

# do not thrust this :-)
# This was generated by trial !
sub readWAISDocumentText {
    local (*inbuf) = @_;
    local ($xbuf,$docany);
    local ($docid,$version,$text);

    return unless &PeekTag($inbuf) == $DT_DocumentTextGroup;
    $xbuf = &readString(*inbuf);

    if (&PeekTag($xbuf) == $DT_DocumentID) {
        $docany = &readString(*xbuf);
        $docid = &readDocAny($docany);
    }
    if (&PeekTag($xbuf) == $DT_VersionNumber) {
        $version = &readNumber(*xbuf);
    }
    if (&PeekTag($xbuf) == $DT_DocumentText) {
        $text = &readString(*xbuf);
        &add_text($text);
    }
    # is there something left?
    &dprint("Next tag is %d\n", &PeekTag($xbuf)) if length($xbuf);
    &ddump ("Rest DocumentText", (substr($xbuf,0,40))) if length ($xbuf);
}

sub readWAISDocumentHeader {
    local (*inbuf) = @_;
    local ($type, $source, $date, $headline, $originCity);
    local ($documentID, $HeaderVersion, $score, $bestMatch, 
           $documentLength, $lines, $tbuf, $tmp);

    while (&PeekTag($inbuf) == $DT_DocumentHeaderGroup) {
        ($documentID, $HeaderVersion, $score, $bestMatch, $documentLength,
         $lines, $type, $source, $date, $headline, $originCity) = ();

        $tbuf = &readString(*inbuf);

        if (&PeekTag($tbuf) == $DT_DocumentID) {
            $tmp = &readString(*tbuf);
            $documentID = &readDocAny($tmp);
        }
        if (&PeekTag($tbuf) == $DT_VersionNumber) {
            $HeaderVersion = &readNumber(*tbuf);
        }
        if (&PeekTag($tbuf) == $DT_Score) {
            $score = &readNumber(*tbuf);
        }
        if (&PeekTag($tbuf) == $DT_BestMatch) {
            $bestMatch  = &readNumber(*tbuf);
        }
        if (&PeekTag($tbuf) == $DT_DocumentLength) {
            $documentLength = &readNumber(*tbuf);
        }
        if (&PeekTag($tbuf) == $DT_Lines) {
            $lines = &readNumber(*tbuf);
        }
        if (&PeekTag($tbuf) == $DT_TYPE_BLOCK) {
            $tmp = &readString(*tbuf);
            while (&PeekTag($tmp) == $DT_TYPE) {
                $type .= &readString(*tmp).',';
            }
            chop($type);
        }
        if (&PeekTag($tbuf) == $DT_Source) {
            $source = &readString(*tbuf);
        }
        if (&PeekTag($tbuf) == $DT_Date) {
            $date= &readString(*tbuf);
        }
        if (&PeekTag($tbuf) == $DT_Headline) {
            $headline = &readString(*tbuf);
        }
        if (&PeekTag($tbuf) == $DT_OriginCity) {
            $originCity =  &readString(*tbuf);
        }
        &add_headline($documentID, $HeaderVersion, $score, $bestMatch, 
                      $documentLength, $lines, $type, $source, $date, 
                      $headline, $originCity)
    }
}
        
sub readDiagnostics {
    local(*buf) = @_;
    local($size, $code, $diag);

    if (&PeekTag($buf) == $DT_DatabaseDiagnosticRecord) {
        &readCompressedInteger(*buf); # ignore tag
        $size = &readBinaryNumber(2,*buf);
        $code = substr($buf,0, $diagnosticCodeSize-1);
        $diag = substr($buf,$diagnosticCodeSize-1, $size-$diagnosticCodeSize);
        &dprint ("code=$code diag=$diag\n");
        &add_diag($server,$database,"$code $diag") unless $diag =~ /Request out of range/;
        $buf = substr($buf,$size);
        while ((unpack('C', $buf) != $endOfRecord) && (length($buf))) {
            print "Skipping ", (substr($buf,0,1)), "\n";
            $buf = substr($buf,1);
        }
        $buf = substr($buf,1);
    }
}

#-----------------------------------------------------------------
# here are convenience function for calling client
#-----------------------------------------------------------------

#------------------------------------------------------------------------
# ask a query
#------------------------------------------------------------------------
sub do_waisq {
    local($query, @databases) = @_;
    local($connection, $server, $database, $port);

    # clean global variables (dynamic binding !)
    local(%diagnostics, @headlines, @documenttext);

    for $connection (@databases) {
        if ($connection =~ m#/#) {
            ($server,$database) = split('/', $connection, 2);
        } else {
            $server   = "$defserver:$defport";
            $database = $connection;
        }
        if ($server =~ /:/) {
            ($server,$port) = split(':', $server);
        } else {
            $port = $defport;
        }
        &dprint("client($server, $database, $query, $port)\n");
        &client($server, $database, $port, $RelevanceFeedbackQuery, $query);
    }

    &print_header ("$query in @databases");
    print <<"EOH"
<DL>
<DT>Your query was:
<DD><b>$query</b>
</DL>
EOH
    ;

    &print_diag;
    &print_headlines;
    #print "<HR>\n<A HREF=\"http:$ENV{'PATH_INFO'}\">Back</A> to query form.\n";
    &print_footer;
    0;
}

sub get_text {
    local($url,$noprint) = @_;

    local($connetion, $database, $doctype) = &parse_url($url);
    local($server, $port) = split(/:/, $connetion);

    $port = 210 unless $port;
    $database = &decode($database);
    &dprint("server=%s database=%s port=%s qtype=%s url=%s",
            $server, $database, $port, 
            $TextRetrievalQuery, 
            $url);
    &client($server, $database, $port, 
            $TextRetrievalQuery, 
            $url);
    unless ($noprint){
        &print_diag;
        &print_text($doctype);
    }
}

if ($0 =~ /waisserver/) {
    if ($#ARGV == $[) {
        &get_text(@ARGV);
    } else {
        &do_waisq(@ARGV);
    }
} else {
    1;
}

