#!/usr/bin/perl ######################################################################### #site Search #Version 2.00b #copyright 1997-1998 Krishnan Jayakrishnan. #All Rights Reserved #------------------------------------------------------------------------ #copyright Notice:- #site Search is a shareware program and may be used , modified or #distributed by anyone free of charge. site Search is provided on an #"as-is" basis and Krishnan Jayakrishnan makes no warranties either #expressed or implied with respect to the software's merchantablity or #fitness for any particular purpose. By using site Search, you agree to #indemnify Krishnan Jayakrishnan from any and all incidental or #consequential damages that may arise from its use. Please obtain a #written permission before selling this program.In all cases this #copyright Notice must remain intact. #------------------------------------------------------------------------ #Contact information #krishnan@bayou.uh.edu # #Krishnan . J #Allied Geophysical Labs #University of Houston #Houston TX77204-4231 #------------------------------------------------------------------------ #Version History #Intial Release on September 17th 1997 Version 1.00 #2nd Release on September 23rd 1997 Version 1.01 #3rd Release on September 25th 1997 Version 1.02 #4th Release on January 21nd 1998 Version 2.00b # #The latest version is available at http://www.agl.uh.edu/~saljxk/site_Search #------------------------------------------------------------------------ #Thanks To, # #B. G. Mahesh (www.mahesh.com), #Darron Hutchison (tm)(www.webmakers.com.au), #Cinnamon Chu, Tina Ma, #Bob Lenaerts, Patrice Serrano, #V.A. #------------------------------------------------------------------------ open(READ,"site_Search.conf ") or &Error_Module("1"); require("./site_Search.conf"); require ("cgi-lib.pl") or &Error_Module("2"); $version = "2.00b"; @months = (January,February,March,April,May,June,July,August,September,October,November,December); &Check_Configurations if($check_script eq "1"); &Clean_Scratch if($clean_scratch eq "1"); &Get_Search_Terms; &Get_Files_to_Search if($skip_search ne "1"); &Search_Files; &Display_Results1 if ((scalar(@file_names_to_display) == 0) || (scalar(@file_names_to_display) <= int($get_hits)) || ($get_hits =~ /all/i) || ($get_hits eq "") || ($multi_display eq "0")); &Display_Results2; # -:PRIMARY MODULES:- # ######################################################### # Module Checks for errors in configuration file # ######################################################### sub Check_Configurations{ print &Header; &Error_Module("3") if(!(-r "site_Search.desc") && ($desc1 eq "1")); &Error_Module("4") if(!(-d "$base_path") || !(-r "$base_path")); &Error_Module("5") if(scalar(@filetypes) == 0); if ($multi_display eq "1"){ &Error_Module("6") if(($scratch eq "") || !(-d "$base_path/$scratch") || !(-w "$base_path/$scratch") || !(-r "$base_path/$scratch")); } if ($record_usage eq "1"){ &Error_Module("7") if ((-e "site_Search.usage") && !(-w "site_Search.usage")); } &Error_Module("8") if ($path_to_searchform eq ""); &Error_Module("9") if ((($output_form eq "1") && ($form_to_use !~ /\binternal\b/)) && !(-r "$form_to_use")); ($name_of_URI = "http:\/\/" . $ENV{'HTTP_HOST'}) if (length($name_of_URI) == 0); print "

No errors were Found. However, the script only runs a simple check through the configuration file. If you notice the script not working as intended please go through the files,

  • readme.html
  • FAQ.html

    before sending me a mail (krishnan\@bayou.uh.edu).

    To run the script set the variable \"\$check_script\" to \"0\" in the configuration file.

    " if ($errors ne "1"); print &Bottom; die; } ################################################# # Module Cleans the Scratch directory # ################################################# sub Clean_Scratch{ my @list; @list = <$base_path/$scratch/[1-9]*.html>; $^T = time; foreach (@list){ unlink($_) if (int(100*($age = -M)) > int(100*($empty_scratch))); } } ################################################# # Module Parses the Form Output # ################################################# sub Get_Search_Terms{ &ReadParse(*input); $search_terms = ($input{'Searchterms'}); @get_search_terms = split(/ /,$search_terms); $get_case = ($input{'Case'}); $get_construct = ($input{'Construct'}); if (defined $input{'OUTPUT_FORM'}){ $skip_search = $skip_hits = "1"; $get_hits = "ALL"; open(READ,$input{'List_File'}); @files_to_be_searched = ; } $get_hits = ($input{'Hits'}) if ($skip_hits ne "1"); &no_search_terms if (scalar(@get_search_terms) == 0); &record_usage if($record_usage eq "1"); &get_desc if ($desc1 eq "1"); } ################################################# # Module Gathers the Files to be Searched # ################################################# sub Get_Files_to_Search{ my $file; @files_from_scandir = &scandir($base_path); foreach $file (@files_from_scandir){ push(@files_from_scandir,&scandir($file)) if((-d $file) && (&dirs_avoid($file) ne "0")); } foreach $file (@files_from_scandir){ push(@files_to_be_searched,$file) if((&scantype($file) == 1) && (&files_avoid($file) ne "0")); } foreach $file (@files_to_include){ push(@files_to_be_searched,$file); } } ################################################# # Module Searches Files for Search Term(s) # ################################################# sub Search_Files{ my $count; foreach $file_name (@files_to_be_searched){ my $file_match_y = 0; my $str_length = 0; $mod_time = (stat($file_name))[9]; $mod_time{$file_name} = &Get_Date; open(READ,"$file_name"); my @file_chars = ; $file_to_search = join("",@file_chars); $title_array{$file_name} = "$1" if ($file_to_search =~ /([^>]+)<\/TITLE>/i); $desc_array{$file_name} = "$1" if (($file_to_search =~ /<[^>]*META[^>]+NAME\s*=[ "]*description[ "]+CONTENT\s*=\s*"(([^>"])*)"[^>]*>/i) && ($desc2 == "1")); $file_to_search =~ s/<([^>])*>//gs; @list_of_lines = split(/[.!?]/g,$file_to_search) if(($output_line eq "1") && ($smart eq "0")); @list_of_lines = split(/[a-z]['")]*[.!?]+['")]*\s/g,$file_to_search) if(($output_line eq "1") && ($smart eq "1")); $_ = $file_to_search; if ($get_construct eq "As a phrase"){ $get_search_terms = join(" ",@get_search_terms); push(@file_names_to_display,$file_name) if (($count = /\b$get_search_terms\b/o) && ($get_case eq "Sensitive")); push(@file_names_to_display,$file_name) if (($count = /\b$get_search_terms\b/io) && ($get_case eq "Insensitive")); $line_array{$file_name} = &get_line(@list_of_lines) if (($output_line eq "1") && ($count > 0)); $hits_array{$file_name} = &get_hits() if (($output_hits eq "1") && ($count > 0)); } if ($get_construct eq "Any search term"){ foreach $get_search_term (@get_search_terms) { push(@file_names_to_display,$file_name) if (($count = /\b$get_search_term/) && ($get_case eq "Sensitive")); push(@file_names_to_display,$file_name) if (($count = /\b$get_search_term/i) && ($get_case eq "Insensitive")); last if($count > 0); } $line_array{$file_name} = &get_line(@list_of_lines) if(($output_line eq "1") && ($count > 0)); $hits_array{$file_name} = &get_hits() if (($output_hits eq "1") && ($count > 0)); } if ($get_construct eq "All search terms"){ $str_length = @get_search_terms; foreach $get_search_term (@get_search_terms){ $file_match_y++ if (($count = /\b$get_search_term/) && ($get_case eq "Sensitive")); $file_match_y++ if (($count = /\b$get_search_term/i) && ($get_case eq "Insensitive")); } push(@file_names_to_display,$file_name) if ($file_match_y == $str_length); $line_array{$file_name} = &get_line(@list_of_lines) if (($output_line eq "1") && ($count > 0)); $hits_array{$file_name} = &get_hits() if (($output_hits eq "1") && ($count > 0)); } } } ################################################# # Modules Display the Search Results # ################################################# sub Display_Results1{ my $file_name; &No_Matches if (scalar(@file_names_to_display) == 0); print &Header; print &Top($title_for_search_page); print "<CENTER>Search Conditions<HR>"; print "Case:$get_case | Construct:$get_construct | Hits Per Page:$get_hits | Terms:$search_terms<BR><BR>"; print "<B>".scalar(@files_to_be_searched)." File(s) Searched ".scalar(@file_names_to_display)." Match(es) Found</B><BR><HR>"; print "</CENTER>"; foreach $file_name (@file_names_to_display) { my $file1 = substr($file_name,rindex($file_name,"/")+1); my $file2 = substr($file_name,length($base_path)); my $mod_time = $mod_time{$file_name}; print "<BR><TR><TD><B><LI>File Name:</B><A HREF=\"$name_of_URI$file2\">$file1</A><BR>"; print "<I><B>Modified:</B>$mod_time</I><BR>"; print "<B>Title: </B>$title_array{\"$file_name\"}<BR>"; print "<B>Description:</B> $desc_array{\"$file_name\"}<BR>" if (($desc1 eq "1") || ($desc2 eq "1")); print "$hits_array{\"$file_name\"}<BR>" if ($output_hits eq "1"); print "$line_array{\"$file_name\"}" if ($output_line eq "1"); print "</TD></TR>"; } print "</TABLE><BR>"; print "<CENTER><A HREF=\"$path_to_searchform\">[Search Again]</A></CENTER>"; print &output_form if ($output_form eq "1"); print "<HR><A HREF=\"http://www.agl.uh.edu/~saljxk/site_Search\"> <FONT SIZE=-1>site Search Version $version copyright\©\; 1997, Krishnan Jayakrishnan</FONT></A>"; print &Bottom; die; } ### ... Module 2 sub Display_Results2{ my $counter = 0; my $counter2; my $files_displayed = 0; my $files_to_display = int($get_hits); my $output_fileno = 1; my $hits = int($get_hits); while($counter < scalar(@file_names_to_display)){ if($files_displayed == 0){ print &Header; $output = "STDOUT"; $next_file = $$ . "_$output_fileno" . "_of_" . "$hits" . ".html"; }else{ $output = "WRITE"; open(WRITE,">$base_path/$scratch/$next_file"); $output_fileno++; $next_file = $$ . "_$output_fileno" . "_of_" . "$hits" . ".html"; } print $output &Top($title_for_search_page); print $output "<CENTER>Search Conditions<HR>"; print $output "Case:$get_case | Construct:$get_construct | Hits Per Page:$get_hits | Terms:$search_terms<BR><BR>"; print $output "<B>".scalar(@files_to_be_searched)." File(s) Searched ".scalar(@file_names_to_display)." Match(es) Found</B><BR><HR>"; print $output "<P>Match(es) " . ($files_displayed+1) . " to " . ($files_displayed+$hits) . " </P>" if (($counter + $hits) < scalar(@file_names_to_display)); print $output "<P>Match(es) " . ($files_displayed+1) . " to " . scalar(@file_names_to_display) . " </P>" if (($counter + $hits) > scalar(@file_names_to_display)); print $output "</CENTER>"; print $output "<FORM METHOD=\"GET\" ACTION=\"$name_of_URI/$scratch/$next_file\">" if (($counter + $hits) < scalar(@file_names_to_display)); for($counter2 = $files_displayed;$counter2 < $files_to_display;$counter2++) { last if(length($file_names_to_display[$counter2]) == 0); $file1 = substr($file_names_to_display[$counter2],rindex($file_names_to_display[$counter2],"/")+1); $file2 = substr($file_names_to_display[$counter2],length($base_path)); $mod_time = $mod_time{$file_names_to_display[$counter2]}; print $output "<BR><TR><TD><B><LI>File Name:</B><A HREF=\"$name_of_URI$file2\">$file1</A><BR>"; print $output "<I><B>Modified:</B>$mod_time</I><BR>"; print $output "<B>Title:</B>$title_array{\"$file_names_to_display[$counter2]\"}<BR>"; print $output "<B>Description:</B>$desc_array{\"$file_names_to_display[$counter2]\"}<BR>" if (($desc1 eq "1") || ($desc2 eq "1")); print $output "$hits_array{\"$file_names_to_display[$counter2]\"}<BR>" if ($output_hits eq "1"); print $output "$line_array{\"$file_names_to_display[$counter2]\"}" if ($output_line eq "1"); print $output "</TD></TR>"; } print $output "<P><CENTER><INPUT TYPE=\"SUBMIT\" VALUE=\"GET NEXT $hits HITS\"></CENTER></P>" if (($counter + $hits) < scalar(@file_names_to_display)); print $output "<BR><CENTER><A HREF=\"$path_to_searchform\">[Search Again]</A></CENTER>"; print $output "</FORM>"; print $output &output_form if ($output_form eq "1"); print $output "<HR><A HREF=\"http://www.agl.uh.edu/~saljxk/site_Search\"> <FONT SIZE=-1>site Search Version $version copyright\©\; 1997, Krishnan Jayakrishnan</FONT></A>"; print $output &Bottom; $files_displayed += $hits; $counter += $hits; $files_to_display += $hits; } } ######################################################### # -:SECONDARY MODULES:- # ######################################################### #___Module Prints Error Messages to Help Rectify___# sub Error_Module { $errors = "1"; print &Header if ($_[0] eq "1"); print "<FONT SIZE=\"3\" FACE=\"HELVETICA\"><BR>"; print "<P> A Configuration file <STRONG> site_Search.conf </STRONG> could not be located in the current<BR> directory, either the file is not present or does not have read permissions. To get help rectifying<BR> the error, read the \"Errors\" section, (Error-1) of the \"readme.html\" file.<BR>" if ($_[0] eq "1"); print "<P> Could not load the cgi-lib.pl library. To get help rectifying the error, read<BR> read the \"Errors\" section, (Error-2) of the \"readme.html\" file.<BR>" if ($_[0] eq "2"); print "<P> The description file <STRONG> site_Search.desc </STRONG> could not be opened for reading. There<BR> could be a permission problem. To get help rectifying the error, read the \"Errors\" section, (Error-3) of<BR> of the \"readme.html\" file.<BR>" if ($_[0] eq "3"); print "<P> There is a problem with the base-path specified to start the search. To get help rectifying the error,<BR> read the \"Errors\" section, (Error-4) of the \"readme.html\" file.<BR>" if ($_[0] eq "4"); print "<P> No filetypes have been specified to search. To get help rectifying the error, read the \"Errors\" section,<BR> (Error-5) of the \"readme.html\" file.<BR>" if ($_[0] eq "5"); print "<P> There is a problem with the scratch directory specified. To get help rectifying the error, read the <BR> \"Errors\" section, (Error-6) of the \"readme.html\" file.<BR>" if ($_[0] eq "6"); print "<P> There is a problem recording the script usage. To get help rectifying the error, read the \"Errors\" <BR> section, (Error-7) of the \"readme.html\" file.<BR>" if ($_[0] eq "7"); print "<P> A HTTP Path to the search form was not specified. To get help rectifying the error, read the \"Errors\" <BR> section, (Error-8) of the \"readme.html\" file.<BR>" if ($_[0] eq "8"); print "<P> No Valid form was found at $form_to_use . To get help rectifying the error, read the \"Errors\" <BR> section, (Error-9) of the \"readme.html\" file.<BR>" if ($_[0] eq "9"); print "</FONT>"; print &Bottom; } #_____Module Returns the Output Document Type______# sub Header { return "Content-type: text/html\n\n"; } #_____Module Returns the Output Document Head_______# sub Top { my ($title) = @_; return <<"ENDPRINT"; <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN"> <HTML> <HEAD> <TITLE>$title

    $title

    ENDPRINT } #_____Module Returns the Output Document Bottom______# sub Bottom { return "\n\n"; } #____Module Scans Directories for all Files_____# sub scandir { $directory_to_scan = ($_[0]); return(<$directory_to_scan/*>); } #__Module Returns Only Files of Specified Type__# sub scantype { foreach $type (@filetypes){ return 1 if((rindex $_[0], $type) >= 0); } } #____Module Checks for Validity of Directory____# sub dirs_avoid { my $check_dir = ($_[0]); my $dir; foreach $dir (@directories_to_avoid){ return 0 if($dir eq $check_dir); } } #______Module Checks for Validity of File______# sub files_avoid { my $check_file = ($_[0]); my $file; foreach $file (@files_to_avoid){ return 0 if($file eq $check_file); } } #_Module Prints Message if no Search Terms are Entered_# sub no_search_terms{ print &Header; print &Top("No Search Terms Entered"); print<<"ENDPRINT";

    No search terms were entered for search. Please use your browser's back button to go back to the form and search again or do a New Search.


    site Search Version $version copyright\©\; 1997, Krishnan Jayakrishnan ENDPRINT print &Bottom; die; } #_________Module Records Script Usage___________# sub record_usage{ open(WRITE,">>site_Search.usage"); $date = scalar localtime; chomp($date); print WRITE $date ."|".$ENV{'REMOTE_ADDR'}."|".$ENV{'REMOTE_HOST'}."|"."$search_terms\n"; close(WRITE); } #_________Module Gathers Descriptions___________# sub get_desc{ my $file_name; my $description; open(READ,"site_Search.desc"); while(){ ($file_name,$description) = split(/[sep]/,$_); $desc_array{"$file_name"} = "$description"; } } #_______Module Returns the File Mod_Date________# sub Get_Date{ $mod_time = time unless ($mod_time); ($day,$month,$year) = (localtime($mod_time))[3,4,5]; $date = "$day $months[$month] 19$year"; return $date; } #_Module Returns the Line with the Search Term(s)_# sub get_line{ my @array = @_; my $line; my $count = 0; my $sub_count = 0; my $search_term; if ($get_construct eq "As a phrase"){ foreach $line (@array){ $_ = $line; $count++; $sub_count = s/\b$get_search_terms\b/\\$get_search_terms\<\/FONT\>\<\/B\>/ig; return "$array[$count-2].$_.$array[$count]
    " if ($sub_count > 0 ); } } foreach $line (@array){ foreach $search_term (@get_search_terms){ $_ = $line; $count++; $sub_count = s/\b$search_term\b/\\$search_term\<\/FONT\>\<\/B\>/ig; return "$array[$count-2].$_.$array[$count]
    " if ($sub_count > 0 ); } } } #_________Module Returns the no of Hits__________# sub get_hits{ $_ = $file_to_search; my @hits; my $hits; my $search_term; my $line; my @lines; if ($get_construct eq "As a phrase"){ ((@hits) = /\b$get_search_terms\b/og) if ($get_case eq "Sensitive"); ((@hits) = /\b$get_search_terms\b/iog) if ($get_case eq "Insensitive"); $hits = int(scalar(@hits)); return "
  • $hits Occurence(s) of the search term $get_search_terms
    "; } foreach $search_term (@get_search_terms){ ($hits = (@hits = /\b$search_term/g)) if ($get_case eq "Sensitive"); ($hits = (@hits = /\b$search_term/ig)) if ($get_case eq "Insensitive"); push(@lines,"
  • $hits occurence(s) of the search term $search_term "); } $line = join("",@lines); return $line."
    "; } #__Module Prints a Message if no Matches are Found___# sub No_Matches { print &Header; print &Top("No Matches Found"); print "
    Case:$get_case | Construct:$get_construct | Hits Per Page:$get_hits | Terms:$search_terms


    "; print<<"ENDPRINT";

    No successful matches were found after the search. Please use your browser's back button to go back to the form and search again or do a New Search.


    site Search Version $version copyright\©\; 1997, Krishnan Jayakrishnan ENDPRINT print &Bottom; die; } #_Module Outputs a Form to Perform Additional Searches_# sub output_form{ my $files = scalar(@file_names_to_display); my $list_of_files = $$ . "_list_" . ".html"; open(LIST,">$base_path/$scratch/$list_of_files"); foreach $file (@file_names_to_display){ chomp($file); print LIST "$file\n"; } if ($form_to_use !~ /\binternal\b/){ open(READFORM,"$form_to_use"); @lines = ; return @lines; } return <<"ENDPRINT";
    $files file(s) matching your search criteria found.
    Narrow the search by searching $files File(s)?
    Case: Construct:
    ENDPRINT }