#!/usr/bin/perl
#-------------------------------------------------------------------
# This file is part of the wwwoffle-sql package
#
#  Version : 0.02  Released : 03/05/99
#
# Author : Peter Marbaise (pema@hippo.fido.de,2:2452/110.20)
#
#              GNU GENERAL PUBLIC LICENSE Version 2
#              (see LICENSE for the complete text)
#
#-------------------------------------------------------------------

# use FileHandle;
use lib '/usr/local/apache/share/htdocs/wwwoffle-sql/';
use wwwofflelib;
use CGI;

$cgimode    = 0;
$countpages = 0;
$counthosts = 0;

($dbh,$drh) = initdb();

sub InitVar {

    $server="";
    $date="";
    $keywords="";
    $length=0;
    $page="";
    $version="";
    $usrkeywords="";
}


sub ReadDir {
     opendir DIR, "$_[0]" || die "couldnt open directory $_[0]";
     @allfiles = grep !/^\.\.?$/,readdir DIR;
     closedir DIR;
     return @allfiles;
}
sub PrintHost {
    # host page found
    if ( $_[2] == 0 ) {
        if ( $cgimode == 1 ) {
            print "<TR><TD><A HREF=$_[0]>$_[0]</A></TD>";
        }
        else
        {    
            print "$_[0]\n";
        }
    }
    else
    {
        if ( $cgimode == 1 ) {
            print "<TD>&#160;</TD>";
        }
    }
    if ( $cgimode == 1 ) {
        print "<TD><A HREF=$_[0]$_[1]>$_[1]</A></TD>";
    }
    else
    {    
        print "  $_[1]\n";
    }
}

sub ReadFile {
    $DFILE = $_[0];
    $hosttype = $_[1];
    my $line;
    my $flag = 0;
    my $rc = 0;
    my $t  = 0;
    my $tcmp = 4;

    if ($hosttype == 1 ) {
        $tcmp = 2;
    }
    open DFILE or die "can't open $DFILE\n";
    LINE: while ($line = <DFILE>) {
        if ( $line =~ /(^Server\: )([a-zA-Z0-9\-\/\.]*)/ ) {
            $server=$2;
            $t++;
        }
        if ( $line =~ /^Content-Type\: text\/html;/i ) {
            $rc = 1;
        }
        if ( $line =~ /^Content-Type\: application\//i ) {
            $t++;
        }
        if ( $line =~ /(^Date\: .{3},[ ]+)(\d+[ ]+\w+[ ]+\d+)[ ]+(\d+\:\d+\:\d+)/ ) {
            $date=$2." ".$3;
            $t++;
        }
        # title and keywords are not handled for multiple lines so far
        if ( $line =~ /(^\<title\>)(.*)(<\/title\>)/i ) {
            $keywords=$keywords." ".$2;
        }
        if ( $line =~ /(meta name\=\"keywords\"[ ]*)(content\=\")(.*)(\")/i ) {
            $keywords=$keywords." ".$3;
        }
        if ( $line =~ /(meta keywords[ ]*)(\=\")(.*)(\")/i ) {
            $keywords=$keywords." ".$3;
        }
        if ( $line =~ /(^Content\-Length\:[ ]*)(\d+)/i ) {
            $length=$2;
            $t++;
        }
        if ( $line =~ /(\.zip|\.tgz|\.gz|\.exe|\.bz|\.bz2)([ ]*\()(\d+)/ ) {
            $length=$3;
            $t++;
        }
        last LINE if ($t == $tcmp);
        
    }
    $_=$keywords;
    s/\'/\"/g;
    s/,/ /g;
    $keywords=$_;
    if ( $keywords ne "" ) {
         if ( $cgimode == 1 ) {
             print "<TD>$keywords</TD></TR>";
         }
         else {
             print "keywords $keywords\n";
         }
    }
    else {
         if ( $cgimode == 1 ) {
             print "<TD>&#160;</TD></TR>";
         }
    }    
    close DFILE;
    return ($rc);
}


sub ReadWWWoffle {
# read the hosts 
  @fid = ReadDir ( $_[0] );
  #  print "-------------------------\n";
  foreach $file ( sort @fid )
  {
      ++$counthosts;
      $newdir = $_[0]."/".$file;
      # read the files on hosts
      @subfiles = ReadDir ( $newdir );
      $found = 0;
      foreach $sfile ( sort @subfiles )
      {
          InitVar();
          $made=0;
          $c = substr $sfile , 0,1 ;
          $rest = substr $sfile , 1;
          if ( $c eq "U" ) {
              $UFILE = $newdir."/".$sfile;
              open UFILE or die "can't open $rfile\n";
              while ($zeile = <UFILE>) {
                  #print "$zeile\n";
                  if ( $zeile =~ /\&|\?/ ) {
                      next;
                  }
                  if ( $zeile =~ /(^http\:\/\/[a-zA-Z0-9\.-]*)(\/.*)(\.html|\.htm)$/ ) {
                      if ( $found == 0 ) {
                          $host="http://".$file;
                      }
                      $_=$2.$3;
                      s/\%7E/\~/;
                      $page=$_;
                      PrintHost ( $host , $page , $found );
                      ReadFile ( $newdir."/D".$rest , 0);
                      insertdb ( $host , $page , $length , $keywords , $date , $server , $version, $usrkeywords );
                      $found = 1;
                      ++$countpages;
                      $made=1;
                  }
                  #                  if ( $zeile =~ /(\.gif|\.jpg)$/ ) {
                  #    $graphik = $zeile;
                  #    $graphik =~ /($file)/;
                  #    print "graphik $1";
                  #}
                  elsif ( $zeile =~ /(ftp\:\/\/[a-zA-Z0-9\.-]*)(\/.*)(\.gz|\.tgz|\.tar\.gz|\.zip|\.exe|\.bz|\.bz2)$/ ) {
                      if ( $found == 0 ) {
                          $host="ftp://".$file;
                      }
                      $page=$2.$3;
                      PrintHost ( $host , $page , $found );
                      ($version)= findversion ( $page );
                      $page =~ /([a-zA-Z0-9\.-]*)$/;
                      $keywords = $1;
                      ReadFile ( $newdir."/D".$rest ,1 );
                      insertdb ( $host , $page , $length , $keywords , $date , $server , $version , $usrkeywords);
                      $found = 1;
                      ++$countpages;
                      $made=1;
                  }
                  elsif ( $zeile =~/(http\:\/\/[a-zA-Z0-9\.-]*)(\/.*)(\.gz|\.tgz|\.tar\.gz|\.zip|\.exe|\.bz|\.bz2)$/ ) {
                      if ( $found == 0 ) {
                          $host="http://".$file;
                      }
                      $page=$2.$3;
                      PrintHost ( $host , $page , $found );
                      ($version)= findversion ( $page );
                      $page =~ /([a-zA-Z0-9\.-]*)$/;
                      $keywords = $1;
                      ReadFile ( $newdir."/D".$rest ,0 );
                      insertdb ( $host , $page , $length , $keywords ,$date , $server , $version , $usrkeywords );
                      $found = 1;
                      ++$countpages;
                      $made=1;
                  }
                  elsif ( $zeile =~ /(^http\:\/\/[a-zA-Z0-9\.-]*)(\/.*)(\/)$/ ) {
                      #                     if ( $made == 0 ) {
                      #    $rc = 0;
                          if ( $found == 0 ) {
                              $host="http://".$file;
                          }
                          $page=$2.$3;
                          $rc=ReadFile ( $newdir."/D".$rest ,0);
                          if ( $rc == 1 ) {
                              PrintHost ( $host , $page , $found );
                              insertdb ( $host , $page , $length , $keywords , $date , $server , $version, $usrkeywords );
                              $found = 1;
                              ++$countpages;
                          }
                          #}
                  }
                  
              }
              close UFILE;
          }
      }
      #      if ( $found == 1 ) {
      #    print "-------------------------\n";
      #}
  }
} # end of ReadWWWoffle


#initdb();
if (defined ($ENV{'HTTP_USER_AGENT'})) {
    $cgimode=1;
}

if ( $cgimode == 1 ) {
     $query=new CGI;
     print $query->header;
     htmlheader( "Reading the wwwoffle-database" ,
                 "Results from reading wwwoffle spool dir" );
     
     print "<TABLE BORDER=0 CELLPADDING=0>";
     print "<TR>";
     print "<TH WIDTH=60 ALIGN=LEFT VALIGN=TOP>";
     print "Host"; 
     print "</TH>";
     print "<TH WIDTH=250 ALIGN=LEFT VALIGN=TOP>";
     print "Page"; 
     print "</TH>";
     print "<TH WIDTH=20 ALIGN=LEFT VALIGN=TOP>";
     print "Keywords"; 
     print "</TH></TR>";
}
else {
    print "\n WWWoffle-SQL scripts (c) Peter Marbaise 1998\n";
    print "----------------------------------------------\n";
}

ReadWWWoffle ( "/var/spool/wwwoffle/ftp" );
ReadWWWoffle ( "/var/spool/wwwoffle/http" );

if ( $cgimode == 1 ) {
    print "</TABLE><HR>";
    htmlfooter();
}
else {
    print "\n----------------------------------------------\n";
    print " WWWoffle-SQL scripts (c) Peter Marbaise 1998\n";
    print "Release : 0.02 03/05/99\n";
}
exit 0;
# end of READWWWOFFLE 
