#!/usr/local/bin/perl
#
# a perl script to handle WAIS requests through mail.
# this script will use waisq to do its dirty work.

$ENV{'PATH'} = $ENV{'PATH'}.":/proj/wais/latest/bin";

$maxres = 10;
$search = 0;
$retrieve = 0;
$help = 0;
$inheaders = 1;
$indocid = 0;

$waissourcedir = "/proj/wais/wais-sources";
$helpfile = "/users/menlo-park/jonathan/waismail.help";

$tmpfile = "/tmp/waismail.$$";
$outfile = "/tmp/waismail.out.$$";
$errfile = "/tmp/waismail.err.$$";
$msgfile = "/tmp/waismail.in.$$";
$sfile   = "waismail.$$.src";
$logfile = "/var/log/waismail.log";

open(LOG, ">>$logfile");
open(MSG, ">$msgfile");

while (<>) {
 if ($indocid == 1 || $inreldoc == 1) {
  if (!(/^\n/)) {
   if ($indocid == 1) {
    $docid = $docid.$_; chop($docid);
   }
   else {
    $relevant = $relevant.$_; chop($relevant);
   }
  }
  else { 
   if ($indocid == 1) { &doretrieve(); $indocid = 0;}
   else {push(@reldocs, $relevant); $inreldoc = 0;}
  }
 }
 else {
  if ($inheaders == 1) {
   if (/^Subject:/) {
    $subject = $';
    chop($subject);
   }
   if (/^from |^From |^From: |^Reply-to: /) {
    $from = $'; chop($from);
   }
   if (/^\n/) {
    $inheaders = 0;
   }
  }
  else {
   printf MSG "%s", $_;
   if (/^[ \t]{0,}help|^[ \t]{0,}Help|^[ \t]{0,}HELP/) {
    $help = 1;
   }
   if (/^maxres (\d+)/) {
    $maxres = $1;
   }
   if (/^search |^Search |^SEARCH /) {
    @sources = ();
    $search = 1;
    @words = split(' ', $');
    push(@sources, shift(@words));
    if (/"(.*)"/) {
      @sources = split(' ', $1);
      @words = split(' ', $');
    }
    @ss = ();
    foreach $source (@sources) {
     $_ = $source;
     s/.src//g; push(@ss, $_);
    }
    @sources = @ss;
    &dosearch();
   }
   if (/^retrieve |^Retrieve |^RETRIEVE |^[ \t]{0,}DocID: /) {
    $retrieve = 1; $docid = $'; chop($docid); $indocid = 1;
   }
   if (/^like |^Like |^LIKE /) {
    $retrieve = 0; $relevant = $'; chop($relevant); $inreldoc = 1;
   }
  }
 }
}

close(MSG);

if ($indocid == 1) {
   $indocid = 0;
   &doretrieve();
}

# didn't understand anything in the message.  Say so.
if ($help == 0 && $search == 0 && $retrieve == 0) {
 &logdate();
 printf LOG "Error: unknown message from $from\n";
 open(TMP, ">$tmpfile");
 printf TMP "From: WAISmail\nTo: $from\nSubject: Your WAIS Request: $subject\n\n";
 printf TMP "*****Your request:\n";
 printf TMP "______________________________________________________________________\n";
 open(MSG, "$msgfile");
 while (<MSG>) { printf TMP "%s", $_;}
 close (MSG);
 printf TMP "______________________________________________________________________\n";
 printf TMP "\nwas unknown to me, so...*****\n";
 printf TMP "Send a message containing the word \"help\" for more information.\n";
 close(TMP);
 $_ = $from;
 if (!(/WAISmail|waismail/)) {
  system("cat $tmpfile | /usr/lib/sendmail '$from'");
 }
 unlink($tmpfile);
} 

unlink("$msgfile");

if ($help == 1) {
 &logdate();
 printf LOG "Sending help to $from\n";
 open(TMP, ">$tmpfile");
 printf TMP "From: WAISmail\nTo: $from\nSubject: Your WAIS Request: $subject\n\n";
 close(TMP);
 $_ = $from;
 if (!(/WAISmail|waismail/)) {
   system("cat $tmpfile $helpfile | /usr/lib/sendmail '$from'");
 }
 unlink($tmpfile);
} 

if ($retrieve == 1 || $search == 1) {
 open(TMP, ">$tmpfile");
 printf TMP "From: WAISmail\nTo: $from\nSubject: Your WAIS Request: $subject\n\n";
 close(TMP);
 $_ = $from;
 if (!(/WAISmail|waismail/)) {
  system("cat $tmpfile $outfile | /usr/lib/sendmail '$from'");
 }
 unlink($tmpfile);
 unlink($outfile);
}

close (LOG);

sub dosearch {
 foreach $source (@sources) {
  if(!(-f "$waissourcedir/$source.src")) {
   &badsource();
  }
 }
# while(<SRC>) {
#	/:ip-name[ \t]{0,}"(.*)"/ && ($ipname = $1);
#	/:database-name[ \t]{0,}"(.*)"/ && ($databasename = $1);
#	/:tcp-port[ \t]{0,}"(.*)"/ && ($tcpport = $1);
#	/:tcp-port[ \t]{0,}(\d+)/ && ($tcpport = $1);
#	/:maintainer[ \t]{0,}"(.*)"/ && ($maintainer = $1);
# }
 open(TMP, ">$tmpfile");
 printf TMP "(:question :version  2\n :seed-words \"";
 foreach $w (@words) { printf TMP "$w ";};
 printf TMP "\"\n :relevant-documents\n ( ";
 if ($relevant) {
  foreach $rel (@reldocs) {
   $_ = $rel;
   /@/ &&  ($_ = $`) && (/:/) && ($id = $`) && ($db = $');
   printf TMP "\n  (:document-id \n   :document \n   (:document \n    :doc-id \n";
   printf TMP "     (:doc-id \n      :original-database %s \n      :original-local-id %s\n     )\n", 
	     &stringtoany($db), &stringtoany($id);
   printf TMP "    :source (:source-id :filename \"$source.src\" )\n";
   printf TMP "    ) )\n";
  }
 }
 printf TMP " )\n";
 printf TMP " :sourcepath \"$waissourcedir/:\" \n";
 printf TMP " :sources (\n";
 foreach $source (@sources) {
  printf TMP "  (:source-id :filename \"$source.src\" )\n";
 }
 printf TMP " )\n";
 printf TMP " :maximum-results %d )\n", $maxres;
 close(TMP);
 &logdate(); printf LOG "Searching @sources, words: \"";
 foreach $w (@words) { printf LOG "$w "; };
 if ($relevant) {
  foreach $rel (@reldocs) {
   $_ = $rel;
   { printf LOG "RelDocID: \"$rel\" ";}
  }
 }
 printf LOG "\" for $from\n";
 open (OUT, ">>$outfile");
 printf OUT "Searching: ";
 foreach $source (@sources) {
  printf OUT "$source ";
 }
 printf OUT "\nKeywords: ";
 foreach $w (@words) { printf OUT "$w "; };
 if ($relevant) {
  foreach $rel (@reldocs) {
   $_ = $rel;
   { printf OUT "\nRelDocID: \"$rel\"";}
  }
 }
 printf OUT "\n";
 system("waisq -f $tmpfile -g >> /dev/null 2> $errfile");
 open(ERR, "$errfile");
 while (<ERR>) { 
  if (/Connect to socket did not work:/) {
   &logdate();
   printf LOG "Error Searching @sources for $from: Bad connect (source down?)\n";
   &logdate(); printf LOG "Error: $_";
   printf OUT "\n**** Error Searching @sources: not responding ****\n";
   printf OUT "\tPlease send mail to the maintainer.\n";
  }
 }
 close(ERR);
 unlink($errfile);
 open(TMP, "$tmpfile");
 $inres = 0;
 while(<TMP>) {
   /:result-doc/ && ($inres = 1);
   if ($inres == 1) {
       /:score\s+(\d+)/ && ($score = $1);
       ((/:headline "(.*)"$/ && ($headline = $1)) ||
        (/:headline "(.*)$/ && ($headline = $1)));
       /:number-of-bytes\s+(\d+)/ && ($bytes = $1);
       /:type "(.*)"/ && ($type = $1);
       /:filename "(.*)"/ && ($sourcename = $1);
       /:original-database / && ($database = $');
       /:original-local-id / && ($docid = $');
       /:date "(\d+)"/ && ($date = $1, &docdone);
       }
 }
 printf OUT "\n______________________________________________________________________\n\n";
 close(TMP);
 close(OUT);
 $relevant = ''; @reldocs = '';
 unlink($tmpfile);
 }

sub doretrieve {
 $port = "0";
 $_ = $docid;
 s/^DocID: //g;
 if (/%/) {
  $docid = $`;
  $type = $';
 }
 $_ = $docid;
 /:/ && ($id = $`) && ($db = $');
 /@/ &&  ($_ = $`) && (/:/) && ($id = $`) && ($db = $');
 $_ = $docid;
 /@/ &&  ($_ = $') && (/:/) && ($host = $`) && ($port = $');
 open(SRC, ">/tmp/$sfile");
 printf SRC "(:source :version 3 \n";
 printf SRC " :database-name \"$db\" :ip-name \"$host\" :tcp-port $port)\n";
 close(SRC);
 open(TMP, ">$tmpfile");
 printf TMP "(:question :version 2 :result-documents \n";
 printf TMP "  ( (:document-id :document (:document :doc-id\n";
 printf TMP "    (:doc-id :original-database %s\n", &stringtoany($db);
 printf TMP "     :original-local-id %s )\n", &stringtoany($id);
 printf TMP "     :number-of-bytes -1 :type \"$type\"\n";
 printf TMP "     :source (:source-id :filename \"$sfile\") ) ) ) )\n";
 close(TMP);
 &logdate(); printf LOG "Sending \"$docid%%$type\" to $from\n";
 open(OUT, ">>$outfile");
 printf OUT "Retrieving DocID: \"$docid\"\nType: $type\n\n";
 printf OUT "______________________________________________________________________\n";
 close(OUT);
 $docid = $docid."%".$type;
 if ($type eq "" || $type eq "TEXT" || $type eq "WSRC") {
  $exres = system("waisq -s /tmp/ -f $tmpfile -v 1 >> $outfile 2> $errfile");
 }
 else {
  $exres = system("(waisq -s /tmp/ -f $tmpfile -v 1 | uuencode WAIS.res >> $outfile) 2> $errfile");
 }  
 unlink("/tmp/$sfile");
 open(OUT, ">>$outfile");
 open(ERR, "$errfile");
 while (<ERR>) { 
  if (/Missing DocID in request|Could not find Source/) {
   s/done//g;
   printf OUT "Error getting document:\n $_\n";
   printf OUT "(This is usually a bad DocID,\n or the server has deleted the document since you ran the search)\n";
   &logdate();
   printf LOG "Error Sending \"%s\" to $from: Bad DocID\n", $docid;
  }
 }
 close(ERR);
 unlink($errfile);
 printf OUT "______________________________________________________________________\n";
 close(OUT);
}

sub docdone {
 open(SRC, "$waissourcedir/$sourcename");
 while(<SRC>) {
	/:ip-name[ \t]{0,}"(.*)"/ && ($ipname = $1);
	/:database-name[ \t]{0,}"(.*)"/ && ($databasename = $1);
	/:tcp-port[ \t]{0,}"(.*)"/ && ($tcpport = $1);
	/:tcp-port[ \t]{0,}(\d+)/ && ($tcpport = $1);
	/:maintainer[ \t]{0,}"(.*)"/ && ($maintainer = $1);
 }
 close(SRC);
 select(OUT); chop($database); chop($docid); $num++;
 printf "\nResult #%2d Score:%4d lines:%3d bytes:%7d Date:%6d Type: %s\n", $num,  $score,  $lines, $bytes, $date, $type;
   printf "Headline: %s\n", $headline;
   printf "DocID: %s:%s", &anytostring($docid), &anytostring($database);
   if ($tcpport != 0) { printf "@%s:%d", $ipname, $tcpport; }
   printf "%%$type\n";
   $score = $headline = $lines = $bytes = $type = $date = '';
}

sub anytostring {
 local($any) = pop(@_);
 $res = '';
 $_ = $any;
 if (/:bytes  #\((.*)\)(.*)\)/ && ($string = $1)) {
   @chars = split(' ', $string);
   foreach $c (@chars) {
    $res = $res.sprintf("%c", $c);
   }
 }
 $res;
}

sub stringtoany {
 local($str) = pop(@_);
 $len = length($str);
 $res = sprintf("(:any  :size  %d :bytes #(  ", $len);
 for ($i = 0; $i < $len; $i++) {
  $res = $res.sprintf("%d  ", ord(substr($str,$i,1)));
 }
 $res = $res.")  )";
 $res;
}

sub badsource {
 &logdate();
 printf LOG "Unknown source: $source, sending sources to $from\n";
 open(TMP, ">$tmpfile");
 printf TMP "From: WAISmail\nTo: $from\nSubject: Your WAIS Request: $subject\n\n";
 printf TMP "The source you requested for searching\n\n";
 printf TMP "\t$source\n\n";
 printf TMP "is not available for searching.  You may search the following sources:\n\n";
 close(TMP);
 system("cd $waissourcedir;ls *.src >> $tmpfile");
 system("cat $tmpfile | /usr/lib/sendmail '$from'");
 unlink($tmpfile);
 exit 0;
} 

sub logdate {
 ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
 $mon = $mon + 1;
 printf LOG "$mon/$mday/$year %02d:%02d:%02d: ", $hour,$min,$sec;
}
