#! /usr/bin/perl
# -*- Mode: perl; -*-
#use Crypt::TripleDES;
#
# Check that coding standards have been followed.  This checks for common
# and easy to overlook errors, such as the use of non-portable system routines
#
# ToDo:
# Add support for
# /* style: allow:routine-name:count sig:value */
# where routine-name is the name of a routine that is on the warning list
# and value is a digital signature computed from the filename and routine-name
# by the lead programmer.  (currently implemented without the signature 
# check).
#
# Add line-length check.  Optionally warn of >80 character lines.
#
# Add option to output all of the "allow" styles encountered, with the file
# name and user (from the sig).
#
# Add an option to check that the "style" include files are used.
# E.g., if mpiimpl.h defines malloc/free, then make sure that mpiimpl.h
# (or a file that includes it) is used.  This could alternately use the
# 

require "maint/parse.sub";

$verbose = 0;
$debug = 0;
$showfiles = 0;
$depth = 1;
$curdir = ".";
%skipfiles = ();
$line_num = 0;
$allowRomioNames = 1;
$withinRomio = 0;
$skipTest = 1; 
$removeFileMissingCopyright = 0;
# Check for special args
@files = ();
#
# Checks to perform (all by default)
$checkComments  = 1;
$checkFunctions = 1;
$checkDefines   = 1;
$checkStyle     = 1;

$missingCopyright = 0;

foreach $arg (@ARGV) {
    if ($arg =~ /^-showfiles/) { $showfiles = 1; }
    elsif( $arg =~ /-debug/) { $debug = 1; }
    elsif ( $arg =~ /-verbose/) { $verbose = 1; }
    elsif ( $arg =~ /-skipfile=(.*)/) { $skipfiles{$1} = 1; }
    elsif ( $arg =~ /-allowromio=no/) { $allowRomioNames = 0; }
    elsif ( $arg =~ /-allowromio/) { $allowRomioNames = 1; }
    elsif ( $arg =~ /-checktest/) { $skipTest = 0; }
    elsif ( $arg =~ /-distcheck/) {
	# Perform minimal checks for the distribution
	$checkComments  = 0;
	$checkFunctions = 0;
	$checkDefines   = 0;
	$checkStyle     = 0;
	# Remove files that are missing a copyright so that the 
        # distribution can continue
        # DO NOT CHANGE THIS.  Files should have a copyright or be on the
        # list of foreign files.
	$removeFileMissingCopyright = 1;
    }
    else {
	print "Adding $arg to files\n" if $debug;
	$files[$#files+1] = $arg;
    }
}

# Call once for each file
sub ProcessFile {
    my $filename = $_[0];
    %AllowedRoutines = ();
    # Include system defined names
    %AllowedDefines = ( 'SA_RESETHAND' => 'sys', 
		        'WORDS_BIGENDIAN' => 'autoconf');
    my $cxx_comments_found = 0;
    my $cxx_header = 0;

    # Check for files that we should skip
    $f2name = "./" . $filename;
    if (defined($skipfiles{$filename}) || defined($skipfiles{$f2name})) {
	return;
    }

    # Check for romio
    if ($filename =~ /romio/) { $withinRomio = 1; }
    else                      { $withinRomio = 0; }

    &ClearPreamble;
    %warningmsgs = ();
    &ClearMismatchedStates;
    &ClearNested;
    $notInNested = 1;
    
    open( FD, "<$filename" ) || die "Could not open $filename";
    $line_num = 0;
    while (<FD>) {
	$line_num++;
	$errsrc = " in $filename";
	# Check for special comments
	if (/\/\*\s*style:\s*allow:(\w+):(\d+)\s+sig:(\d+)/) {
	    # Found a style comment.  Add the routine name to this 
	    # list of allowed routines.  Eventually, this may require
	    # a signed key
	    # Compute as (using the Crypt module)
            # $public = new Crypt::RSA::Key::Public(Filename => $keyfile, );
            # $rsa->verify( Message => "$1:$2:$filename", 
	    # Signature => $3, Key => $public ) && $AllowedRoutines{$1}=$2;
            # But for now, we do:
	    $AllowedRoutines{$1} = $2;
	}
	elsif ( /\/\*\s*style:\s*define:([_\w]*):(\d*)\s+sig:(\d*)/) {
	    # This style comment permits special definitions
	    #print "Allowing define of :$1:\n"; 
	    $AllowedDefines{$1} = $2;
	}
	elsif (/\/\*\s*style:\s*c\+\+ header/) {
	    $cxx_header = 1;
	}
	elsif (/\/\*\s*begin:nested\s*\*\//) {
	    $notInNested = 0;
	}
	elsif (/\/\*\s*end:nested\s*\*\//) {
	    $notInNested = 1;
	}
	# The preamble is within a comment
	&CheckForPreamble( $_ );
	$_ = StripComments( FD, $_ );
	if ($comment_line ne "") { &CheckForPreamble( $comment_line ); }
	# Check for C++ style comments
	if ($checkComments && /\/\//) {
	    $cxx_comments_found ++;
	    if ($cxx_comments_found == 1 && ! $cxx_header) {
		&PrintCurrentDirectory;
		print "C++ comment found in $filename:\n";
		my $line = $_;
		if (/(.*)\n/) {
		    $line = "$1\n";
		}
		else {
		    $line = $_;
		}
		print $line;
	    }
	    s/\/\/.*//;
	}
	# If there is a DEBUG that isn't closed, read ahead
	if ($checkFunctions) {
	    if (/DEBUG\s*\(/) {
		($leading,$result,$remainder) = &GetBalancedParen( FD, $_ );
		$_ = $leading . $remainder;
	    }
	    # Skip any single function calls within DEBUG
	    if (!/DEBUG\([^\(\)]*\([^\(\)]*\)/ && !$withinRomio) {
		# FIXME: eventually we need to include ROMIO in these checks
		&CheckForBadSubs( $_, $filename );
	    }

	    # We also need to check for the use of MPI routines from within
	    # the code (only use NMPI), at least for all of the MPI
	    # implementation code.  Withing src/mpi/*, we could look for
	    # MPI_([A-Z][a-z0-9_]*)[^;]*; and then complain if $1 is not one
	    # of Status, Comm, Group, Win, File, Aint, Fint, Offset, Request,
	    # File, Info, Datatype .
	    if ($notInNested) {
		&CheckForNested( FD, $_, $filename );
	    }			# 
	    if (! $filename =~ /binding\/f77/ &&
		! $filename =~ /include \/nmpi.h/) {
		# Allow PMPI calls within the f77 binding
		&CheckForPMPI( FD, $_, $filename );
	    }
	}

	if ($checkDefines) {
	    &CheckForGuardedDefines( FD, $_, $filename );
	    &CheckForBadDefines( FD, $_, $filename );
	    &CheckForMismatchedStates ( FD, $_, $filename );
	    &CheckForBadDefinitions( FD, $_, $filename );
	}
    }
    # Output summary messages
    foreach $key (keys(%warningmsgs)) {
	&PrintCurrentDirectory;
	print "$key ($warningmsgs{$key} times)\n";
    }
    if ($cxx_comments_found > 1 && !$cxx_header) {
	&PrintCurrentDirectory;
	print "$cxx_comments_found C++ comments found in $filename\n";
    }
    &ReportOnPreamble( $filename );
}

#
# Subroutines that we want to avoid
@avoidsubs = ( "fprintf", "fputs", "puts", "printf", 
               "malloc", "calloc", "free", "strdup", "realloc" );
@guardedsubs = ( "alloca", "closedir", "fnmatch", "getloadavg", 
		 "getmntent", "getpgrp", "memcmp", "mmap", 
		 "setpgrp", "setvbuf", "strcoll", "strftime",
		 "utime", "vfork", "vprintf", "wait3" );
# Removed strcmp because it is ok if one string is guaranteed to 
# have a terminating null.  This is still touchy, but it isn't the risk
# posed by the copy or cat routines.
%bettersubs = ( "strcpy" => "MPIU_Strncpy",
	        "strncpy" => "MPIU_Strncpy",
#		"strcmp" => "strncmp",
		"strcat" => "MPIU_Strnapp",
		);
# We can't recommend snprintf because it isn't standard.  We need an 
# MPIU_Snprintf instead.
#		"sprintf" => "snprintf" );

sub CheckForBadSubs {
    my $curline = $_[0];
    my $filename = $_[1];
    foreach $subname (@avoidsubs) {
	if (/(?<!\w)$subname\s*\(/ && !defined($AllowedRoutines{$subname})) {
	    $msg = "Warning: found $subname in $filename";
	    if (defined($warningmsgs{$msg})) { 
		$warningmsgs{$msg} = $warningmsgs{$msg} + 1;
	    }
	    else {
		$warningmsgs{$msg} = 1;
	    }
	}
    }
    # The guarded subs are ones that GNU autoconf is worried about.
    # This code should look for some sort of configure-style CPP value
    # and allow the ones that are properly guarded.
    # The mysterious (?<!\w) is a "negative look-behind match".  That is,
    # do *not* match a single "word" (alphanumeric+_) character.  This
    # keeps us from matching MPIU_vprintf when vprintf is in the list
    # of test routines and MPIU_vprintf is in the current line.
    foreach $subname (@guardedsubs) {
	if (/(?<!\w)$subname\s*\(/ && !defined($AllowedRoutines{$subname})) {
	    &PrintCurrentDirectory;
	    print "Caution: found reference to routine $subname in $filename\n";
	}
    }
    # See above for an explanation of (?<!\w)
    foreach $subname (@guardsubs) {
        if (/(?<!\w)$subname\s*\(/ && !defined($AllowedRoutines{$subname})) {
            $msg = "Caution: found $subname in $filename";
            if (defined($warningmsgs{$msg})) {
                $warningmsgs{$msg} = $warningmsgs{$msg} + 1;
            }
            else {
                $warningmsgs{$msg} = 1;
            }
        }
    }
#    foreach $subname (keys %bettersubs) {
#	if (/(?<!\w)$subname\s*\(/) {
#	    print "Caution: found reference to routine $subname in $filename\n";
#	    print "Consider using $bettersubs{$subname} instead\n";
#	}
#    }

    foreach $subname (keys %bettersubs) {
        if (/(?<!\w)$subname\s*\(/ && !defined($AllowedRoutines{$subname})) {
            $msg = "Caution: found $subname in $filename, consider $bettersubs{$subname}";
            if (defined($warningmsgs{$msg})) {
                $warningmsgs{$msg} = $warningmsgs{$msg} + 1;
            }
            else {
                $warningmsgs{$msg} = 1;
            }
        }
    }
}


@GoodNames = ( 'HAVE_[A-Za-z0-9_]*',  'USE_[A-Za-z0-9_]*',
	       'NEEDS_[A-Za-z0-9_]*', 'WITH_[A-Z0-9_]*',
	       'MPICH_SINGLE_THREADED', 'CHAR_PTR_IS_ADDRESS', 'FOO',
	       'STDC_HEADERS', '__cplusplus', 'IOV_MAX', 'S_ISLNK', 
	       'MPIR_[A-Za-z0-9_]*',
	       'MPID_[A-Za-z0-9_]*',
	       'MPIU_[A-Za-z0-9_]*',
	       'MPIDI_[A-Za-z0-9_]*',   # Internal MPID names
	       'MPICH_[A-Za-z0-9_]*', 
	       'ROMIO_[A-Za-z0-9_]*', 
	       'MPIO_[A-Za-z0-9_]*', 
	       'DBG_[A-Z0-9_]*',
	       '_H_INCLUDED',           # Ok for header files
	       'DEBUG_[A-Z0-9_]*',
	       'F77_[A-Z_]*',
	       'MPI_BUILD_PROFILING',
	       'MPI_Info_f2c', 'MPI_Info_c2f', 'MPI_File_f2c', 'MPI_File_c2f', 
	       'AFX_.*_INCLUDED_',      # Autogenerated defines for some Windows code
	       'APSTUDIO_INVOKED',      # Autogenerated ?
	       'PVFS_.*',                 # Used in some ROMIO files
	       'SA_RESTART', 'SA_INTERRUPT', # Sigaction values
	       );

@BadNames = ( 'rs6000', 'solaris', 'hpux', 'linux', 'HPUX', 'IRIX', 'LINUX', );

# These aren't really good names, we just want to suppress messages about them
# for now so that we can concentrate on the new MPICH2 code.
@GoodRomioNames = ( 'PROFILE', 'PRINT_ERR_MSG', 'HPUX', 'SPPUX', 'SX4',
		   'AIO_SUN', 'AIO_HANDLE_IN_AIOCB', 'NO_FD_IN_AIOCB',
		   'NO_AIO', 'AIO_PRIORITY_DEFAULT', 'AIO_SIGNOTIFY_NONE',
		   'MPISGI', 'CRAY', 'PARAGON', 'FREEBSD', 'LINUX',
		   'tflops', 'NFS', 'XFS', 'CB_CONFIG_LIST_DEBUG',
		   'SFS', 'HFS', 'UFS', 'MPI_hpux', 'FORTRANCAPS',
		   'MPILAM', 'NEEDS_ADIOCB_T', 'AGG_DEBUG', 'SOLARIS',
		   'IRIX', 'AIX', 'DEC', 'NEEDS_MPI_TEST', 'PFS', 
		   'PIOFS', 'MPICH', 'MPI_OFFSET_IS_INT', 
		   'MPI_COMBINER_NAMED', '_UNICOS', 'MPIHP', 
		   );
sub CheckName {
    my $name = $_[0];
    foreach $good (@GoodNames) {
	if ($name =~ /$good/) { return 0; }
    }
    if ($allowRomioNames && $withinRomio) {
	foreach $good (@GoodRomioNames) {
	    if ($name =~ /$good/) { return 0; }
	}
    }
    foreach $bad (@BadNames) {
	if ($name =~ /$bad/) { return 1; }
    }
    # Check for defining a __ name (not allowed by users)
    if ($name =~ /^__.*/) { return 1; }

    return 1;
}
sub CheckForBadDefines {
    my $FD = $_[0];
    my $curline = $_[1];
    my $filename = $_[2];
    if ($curline =~ /^\s*#if/) {
	while ($curline =~ /\/$/) {
	    $curline .= <$FD>;
	    $line_num ++;
	}
	# Search for either ifdef or defined(...)
	if ($curline =~ /^\s*#ifdef\s*(.*)\s*/) {
	    $defname = $1;
	    $defname =~ s/\r*\n*//g;
	    print "$defname\n" if $debug;
	    if (! defined($AllowedDefines{$defname}) && 
		&CheckName( $defname ) ) {
		&PrintCurrentDirectory;
		print "Warning: ifdef name $defname used in $filename is nonstandard\n"
	    }
	}
	elsif ($curline =~ /^\s*#if\s/ || $curline =~ /^\s*#elif\s/) {
	       while ($curline =~ /defined\(([^\)]*)\)/) {
		   $defname = $1;
		   print "$defname\n" if $debug;
		   if (! defined($AllowedDefines{$defname}) && 
		       &CheckName( $defname ) ) {
		       &PrintCurrentDirectory;
		       print "Warning: ifdef name $defname used in $filename is nonstandard\n"
		       }
		   $curline =~ s/defined\($defname\)//g;
	       }
	}
    }
}
# This checks for definitions that the user should not make (e.g., 
# #define __foo)
sub CheckForBadDefinitions {
    my $FD = $_[0];
    my $curline = $_[1];
    my $filename = $_[2];
    if ($curline =~ /^#\s*define\s+(__\w*)/) {
	$defname = $1;
	print "Warning: definition of name $defname used in $filename is not allowed\n"
    }
}

#
# This checks for
# # ifdef NEEDS_foo_DEFINED
# # define foo
# # endif
# Basically, it allows you to skip over a define that would otherwise
# cause an error report in CheckForBadDefines
sub CheckForGuardedDefines {
    my $FD = $_[0];
    my $curline = $_[1];
    my $filename = $_[2];
    my $allowed_name = "";
    if ($curline =~ /^#\s*ifdef\s+NEEDS_(.*)_DEFINED/ ||
	$curline =~ /^#\s*if\s+defined\(NEEDS_(.*)_DEFINED\)/) {
        $allowed_name = $1;
	# Read *only* the next line
	$curline = <$FD>;
	# Allow some extra underscores before and after 
	if ($curline =~ /^#\s*define\s+_*${allowed_name}_*/) {
	    ;
	}
	else {
	    &PrintCurrentDirectory;
	    print "Warning: unexpected line after NEEDS_${allowed_name}_DEFINED in $filename\n";
        }
    }
}

foreach $file (@files) {
    print "$file\n" if $showfiles;
    if (-d $file) {
	# Skip test directories if requested
	if ($skipTest && $file =~ /test$/) {
	    next;
	}
	&ProcessDir( $file );
    }
    else {
        &ProcessFile( $file );
    }
}

if ($missingCopyright && $removeFileMissingCopyright == 0) {
    print STDERR "Some files are missing the copyright statement\n";
    exit 1;
}

if ($missingCopyright && $removeFileMissingCopyright != 0) {
    print STDERR "\n\nSOME FILES ARE MISSING THE COPYRIGHT STATEMENT.\nABORTING BUILD OF DISTRIBUTION\n\n";
    exit 1;
}

exit 0;

# ----------------------------------------------------------------------------
sub ProcessDir {
    my $DIR = "DIR$depth"; $depth++;
    my $dir = $_[0]; $dir =~ s/\/$//;
    my $savedir = $curdir;
    $curdir =~ s/\/$//;

    my $f2name = "./" . "$curdir/$dir" ;
    if (defined($skipfiles{"$curdir/$dir"}) || defined($skipfiles{$f2name})) {
	print "Skipping directory $curdir\n" if ($verbose);
	$savedir = $curdir;
	return;
    }

    $curdir = "$curdir/$dir";

    print "Processing directory $curdir\n" if ($verbose);
    opendir( $DIR, "$curdir" ) || die "Cannot open directory $curdir\n";
    my @filelist;
    while ($file = readdir( $DIR) ) {
	print "File $file\n" if ($verbose);
	$filelist[$#filelist+1] = $file;
    }
    closedir( $DIR );
    foreach $file (@filelist) {
	print "processing $file\n" if ($verbose);
	if (-d "$curdir/$file") {
	    if (! ($file =~ /^\./) && ! ($file =~ /CVS/) ) { 
		&SetCurrentDirectory( "$curdir/$file" );
		&ProcessDir( $file );
	    }
	}
	elsif ($file =~ /\.[ch]$/) {
	    print "File $file\n" if ($verbose);
	    &ProcessFile( "$curdir/$file" );
	}
    }
    closedir( $DIR );
    $curdir = $savedir;
}
#
# Look for mismatched names in the state definitions 
# MPID_MPI_<optionalmode_>FUNC_EXIT, MPID_MPI_<optionalmode_>FUNC_ENTER
$statename = "";
$funcname  = "";
sub ClearMismatchedStates {
    $statename = "";
    $funcname  = "";
}
sub CheckForMismatchedStates {
    my $FD = $_[0];
    my $curline = $_[1];
    my $filename = $_[2];
    if ($curline =~ /#define\s\s*FUNCNAME\s\s*(.*)\s*$/) {
        $funcname = $1;
        $funcname =~ s/\r//;
	$funcname =~ tr/a-z/A-Z/;
    }
    elsif ($curline =~ /\s*#define/) { return; }
    elsif ($curline =~ /MPID_MPI_[A-Z0-9_]*FUNC_ENTER\(\s*(.*)\s*\)/ ||
	   $curline =~ /MPID_MPI_[A-Z0-9_]*FUNC_ENTER_FRONT\(\s*(.*)\s*\)/ ||
	   $curline =~ /MPID_MPI_[A-Z0-9_]*FUNC_ENTER_BOTH\(\s*(.*)\s*\)/ ||
	   $curline =~ /MPID_MPI_[A-Z0-9_]*FUNC_ENTER_BACK\(\s*(.*)\s*\)/) {
	$statename = $1;
	$funcstatename = "MPID_STATE_".$funcname;
	if ($funcname ne "" && $statename ne $funcstatename) {
	    &PrintCurrentDirectory;
	    print "Warning: State name \"$statename\" does not match function name \"$funcname\"\n";
	}
    }
    elsif ($curline =~ /MPID_MPI_[A-Z0-9_]*FUNC_EXIT\(\s*([^\s]*)\s*\)/) {
	$endname = $1;
	if ($statename ne $endname && $endname ne "stateid") {
	    &PrintCurrentDirectory;
	    print "Warning: State $statename ended with $endname in $filename\[$line_num\]\n";
	}
    }
}
#
# Check for nested routine calls (NMPI) without Nest_incr/decr
# Special feature:
# /* begin:nested */ and /* end:nested */ can be used around
# routines that are only called with by another routine that
# has incremented the nested count
$nested_seen = 0;

sub ClearNested {
    $nested_seen = 0;
}
sub CheckForNested {
    my $FD = $_[0];
    my $curline = $_[1];
    my $filename = $_[2];
    
    # Skip definitions
    if ($curline =~ /#define\s+NMPI/) {
	return;
    }
    if ($curline =~ /NMPI_H_INCLUDED/) { 
	return;
    }
    # Check for usage
    if ($curline =~ /NMPI_([A-Za-z0-9_]*)/) {
	my $name = $1;
	if (!$nested_seen && $name ne "Abort") {
	    # Don't warn about NMPI_Abort
	    &PrintCurrentDirectory;
	    print "Error: NMPI_$name called without MPIR_Nest_incr in $filename\[$line_num\]\n";
	}
    }
    elsif ($curline =~ /MPIR_Nest_incr/) {
	$nested_seen ++;
    }
    elsif ($curline =~ /MPIR_Nest_decr/) {
	# We do not decrement nested seen since we do not analyze program 
	# structure
	# $nested_seen --;
	;
    }
}
# Check for PMPI routine calls (normally, all calls should use NMPI)
sub CheckForPMPI {
    my $FD = $_[0];
    my $curline = $_[1];
    my $filename = $_[2];
    
    # Skip definitions
    if ($curline =~ /#define\s+MPI/) { # Skip #define MPI_xxx PMPI_xxx
	return;
    }
    if ($curline =~ /pragma/) { # Skip pragmas containing PMPI
	return;
    }
    if ($curline =~ /PMPI_LOCAL/) { # Skip the PMPI_LOCAL defn
	return;
    }
    if ($curline =~ /PMPI_([A-Za-z0-9_]*)/) {
	$name = $1;
	&PrintCurrentDirectory;
	print "Error: PMPI_$name called in $filename\[$line_num\]\n";
    }
}

#
# Check for proper preamble.  Record during file reading and check at end
$saw_c_style   = 0;
$saw_cxx_style = 0;
$saw_copyright = 0;
$saw_autogenerated_file = 0;

sub ClearPreamble {
    $saw_c_style   = 0;
    $saw_cxx_style = 0;
    $saw_copyright = 0;
    $saw_autogenerated_file = 0;
}    
sub CheckForPreamble {
    my $curline = $_[0];
    if ($curline =~ /-\*-\s*Mode:\s*C;\s*c-basic-offset:4/) {
	$saw_c_style = 1;
    }
    if ($curline =~ /-\*-\s*Mode:\s*C\+\+;\s*c-basic-offset:4/) {
	$saw_cxx_style = 1;
    }
    if ($curline =~ /See COPYRIGHT/) {
	$saw_copyright = 1;
    }
    if ($curline =~ /Copyright \(C\)/) {
	# This is for files with external copyrights, such as the
	# Etnus/Dolphin files for the Totalview interface
	$saw_copyright = 1;
    }
    if ($curline =~/DO NOT EDIT THIS FILE/) {
	$saw_autogenerated_file = 1;
    }
}
sub ReportOnPreamble {
    my $filename = $_[0];

    # Is this a derived file?
    if (-s "$filename.in") { return 0; }
    if ($checkStyle && ! $saw_c_style && !$saw_cxx_style) {
	&PrintCurrentDirectory;
	print "C style header (c-basic-offset:4) missing in $filename\n";
    }
    if (! $saw_copyright  && ! $saw_autogenerated_file) {
	&PrintCurrentDirectory;
	print "Copyright statement missing from $filename\n";
	$missingCopyright ++;
	if ($removeFileMissingCopyright) {
	    print "Removing $filename\n";
	    unlink $filename;
	}
    }
}
# --------------------------------------------------------------------------
# These routines ensure that the current directory is printed before any
# warning messages.  This is used to reduce the amount of noise that
# comes out of the codingcheck
# --------------------------------------------------------------------------
$currentDirectory = "";
$directoryPrinted = 0;
sub SetCurrentDirectory {
    $currentDirectory =  $_[0];
    $directoryPrinted = 0;
}
sub PrintCurrentDirectory {
    if (!$directoryPrinted) {
	$directoryPrinted = 1;
	print "Directory $currentDirectory\n";
    }
}
#
# ToDo
# Look for overly general .cvsignore patterns.
# E.g., reject Makefile\* 
