#! /usr/bin/perl
# 
require "maint/parse.sub";

$debug = 0;
$entrycount = 0;
%defined_args = ();
$showfiles = 0;
$quiet = 0;
$build_test_pgm = 1;

# Check for special args
@files = ();
$outfile = "";
foreach $arg (@ARGV) {
    if ($arg =~ /^-showfiles/) { $showfiles = 1; }
    elsif( $arg =~ /-debug/) { $debug = 1; }
    elsif( $arg =~ /-quiet/) { $quiet = 1; }
    elsif( $arg =~ /-notest/) { $build_test_pgm = 0; }
    elsif( $arg =~ /-outfile=(.*)/) { $outfile = $1; }
    else {
	print "Adding $arg to files\n" if $debug;
	if (-d $arg) {
	    # Add all .c files from directory $arg to the list of files 
	    # to process (this lets shorten the arg list)
	    @files = (@files, &ExpandDir( $arg ));
	}
	else {
	    $files[$#files+1] = $arg;
	}
    }
}

if ($outfile ne "") {
    $OUTFD = "MyOutFile";
    open( $OUTFD, ">$outfile" ) || die "Could not open $outfile\n";
}
else {
    $OUTFD = STDOUT;
}
# Setup before processing the files
if ($build_test_pgm) {
    open( TESTFD, ">test/mpi/errhan/errcode.c" ) || die "Cannot create test program\n";
    print TESTFD "#include <stdio.h>\n#include \"mpi.h\"\n";
    print TESTFD "#define MPIR_ERR_FATAL 1\n";
    print TESTFD "#define MPIR_ERR_RECOVERABLE 0\n";
    print TESTFD "int MPIR_Err_create_code( int, int, char *, int, int, const char [], const char [], ... );\n";
    print TESTFD "void ChkMsg( int, int, const char [] );\n\n";
    print TESTFD "int main(int argc, char **argv)\n";
    print TESTFD "{\n    int err;\n    MPI_Init( 0, 0 );\n";
}

# Process the definitions
foreach $file (@files) {
    print "$file\n" if $showfiles;
    &ProcessFile( $file );
}
&CreateErrmsgsHeader( $OUTFD );
&CreateErrMsgMapping( $OUTFD );

if ($build_test_pgm) {
    print TESTFD "    MPI_Finalize();\n    return 0;\n}\n";
}    

# From the data collected above, generate the file containing the error message
# text.
# This is a temporary routine; the exact output form will be defined later
sub CreateErrmsgsHeader {
    $FD = $_[0];
    print $FD "/* -*- Mode: C; c-basic-offset:4 ; -*- */\
/*  \
 *  (C) 2001 by Argonne National Laboratory.\
 *      See COPYRIGHT in top-level directory.\
 *\
 * This file automatically created by extracterrmsgs\
 * DO NOT EDIT\
 */\n";
    print $FD "typedef struct {\
        const char *short_name, *long_name; } msgpair;\n"
}
#
# We also need a way to create the records
# We then hash these on the first occurance (or precompute the hashes?)
#
# The error messages are output in the following form:
# typedef struct {const char short[], const long[]} namemap;
# Generic messages
# static const char[] short1 = "";
# static const char[] long1 = "";
# ...
# static const namemap[] = { {short1, long1}, {...} }
sub CreateErrMsgMapping {
    my $OUTFD = $_[0];
    #
    # Read the file with the long names
    #...
    open( FD, "<src/mpi/errhan/errnames.txt" ) || return 0;
    while (<FD>) {
	if (/^\s*\#/) { next; }
	if (/^\s*(\*\*[^:]*):(.*)$/) {
	    my $name = $1;
	    my $repl = $2;
	    $repl =~ s/\r*\n*$//g;
	    while ($repl =~ /\\\s*$/) {
		# If there is a \\ at the end, read another.  
		# Remove the \ at the end (an alternative is to turn
		# it into a \n (newline), but we may want to avoid 
		# multiline messages
		$repl =~ s/\\\s*$//;
		my $inline = <FD>;
		$inline =~ s/^\s*//;   # remove leading spaces
		$repl .= $inline;
		$repl =~ s/[\r\n]*$//g; # remove newlines
	    }
	    my $namehasformat = ($name =~ /%/);
	    my $replhasformat = ($repl =~ /%/);
	    if ($namehasformat != $replhasformat) {
		print STDERR "Warning: format control usage in $name and $repl do not agree in errnames.txt\n";
	    }
	    if (!defined($longnames{"\"$name\""}))
	    {
		$longnames{"\"$name\""} = $repl;
	    }
	    else
	    {
		print STDERR "Warning: attempt to redefine $name.  Duplicate ignored.\n";
	    }
	}
    }
    close( FD );

    # Create a mapping of MPI error classes to the specific error
    # message by index into generic_err_msgs.  This reads the file
    # baseerrnames, looks up the generic message, and maps the MPI error
    # class to the corresponding index.
    # We must do this here because we must ensure that all MPI error
    # classes have been added to the generic messages
    @class_msgs = ();
    open (FD, "<src/mpi/errhan/baseerrnames.txt" ) || 
	die "Could not open src/mpi/errhan/baseerrnames.txt";
    while (<FD>)
    {
	s/#.*$//;
	($mpiname,$num,$shortmsg) = split(/\s\s*/);
	if ($shortmsg ne "")
	{
	    if ($shortmsg =~ /\%/)
	    {
		print STDERR "Warning: generic message $shortmsg in baseerrnames.txt contains format control\n";
	    }

	    $generic_msgs{"\"$shortmsg\""}++;
	    $generic_loc{"\"$shortmsg\""} = ":baseerrnames.txt";

	    $class_msgs[$num] = "\"$shortmsg\"";
	}
    }
    close (FD);

    # Now, output each short,long key
    # Do the generic, followed by the specific, messages
    # The long messages must be available for the generic message output.
    # An alternative is to separate the short from the long messages;
    # the long messages are needed for > MSG_NONE, the short for > MSG_CLASS.
    print $OUTFD "#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG_NONE\n";
    print $OUTFD "/* The names are in sorted order, allowing the use of a simple\
  linear search or bisection algorithm to find the message corresponding to\
  a particular message */\n";
    my $num = 0;
    foreach my $key (sort keys %generic_msgs)
    {
	$longvalue = "\"\0\"";
	if (!defined($longnames{$key}))
	{
	    $seenfile = $generic_loc{$key};
	    print STDERR "Shortname $key has no expansion (first seen in file $seenfile)\n";
	    next;
	}
	
	# Escape any naked quotes
	$longvalue = s/(?<!\\)\"/\\\"/;
	$longvalue = "\"" . $longnames{$key} . "\"";

	print $OUTFD "static const char short_gen$num\[\] = $key;\n";
	print $OUTFD "static const char long_gen$num\[\]  = $longvalue;\n";
	# Remember the number assigned to this short string.
	$short_to_num{$key} = $num;
	$num ++;
    }
    # Generate the mapping of short to long names
    print $OUTFD "\nstatic const int generic_msgs_len = $num;\n";
    my $i = 0;
    print $OUTFD "static msgpair generic_err_msgs[] = {\n";
    for (my $i = 0; $i < $num; $i ++) {
	print $OUTFD "{ short_gen$i, long_gen$i }";
	print $OUTFD "," if ($i < $num - 1);
	print $OUTFD "\n";
    }
    print $OUTFD "};\n";
    print $OUTFD "#endif\n\n";

    my $num = 0;
    # Now output the instance specific messages
    print $OUTFD "#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG_GENERIC\n";
    foreach $key (sort keys %specific_msgs)
    {
	$longvalue = "\"\0\"";

	if (!defined($longnames{$key}))
	{
	    print STDERR "Shortname $key has no expansion (first seen in file $specific_loc{$key})\n";
	    next;
	}

	# Escape any naked quotes
	$longvalue = s/(?<!\\)\"/\\\"/;
	$longvalue = "\"" . $longnames{$key} . "\"";

	print $OUTFD "static const char short_spc$num\[\] = $key;\n";
	print $OUTFD "static const char long_spc$num\[\]  = $longvalue;\n";
	$num ++;
    }
    # Generate the mapping of short to long names
    my $i = 0;
    print $OUTFD "\nstatic const int specific_msgs_len = $num;\n";
    print $OUTFD "static msgpair specific_err_msgs[] = {\n";
    for ($i = 0; $i < $num ; $i ++) {
	print $OUTFD "{ short_spc$i, long_spc$i }";
	print $OUTFD "," if ($i < $num - 1);
	print $OUTFD "\n";
    }
    print $OUTFD "};\n";
    print $OUTFD "#endif\n\n";

    print $OUTFD "#if MPICH_ERROR_MSG_LEVEL > MPICH_ERROR_MSG_NONE\n";
    $maxval = $#class_msgs + 1;
    print $OUTFD "#define MPIR_MAX_ERROR_CLASS_INDEX $maxval\n";
    print $OUTFD "static int class_to_index[] = {\n";
    for ($i=0; $i<=$#class_msgs; $i++) {
	print $OUTFD "$short_to_num{$class_msgs[$i]}";
	print $OUTFD "," if ($i < $#class_msgs);
	print $OUTFD "\n" if !(($i + 1) % 10);
    }
    print $OUTFD "};\n";
    print $OUTFD "#endif\n";
}
#
# Add a call to test this message for the error message.
# Handle both the generic and specific messages
#
sub AddTestCall {

    my $last_errcode = $_[0];
    my $fatal_flag = $_[1];
    my $fcname = $_[2];
    my $linenum = $_[3];
    my $errclass = $_[4];
    my $generic_msg = $_[5];
    my $specific_msg = $_[6];

    # Ensure that the last_errcode, class and fatal flag are specified.  There are a few places where these are variables.
    if (!($last_errcode =~ /MPI_ERR_/) )
    {
	$last_errcode = "MPI_SUCCESS";
    }
    if (!($errclass =~ /MPI_ERR_/) )
    {
	$errclass = "MPI_ERR_OTHER";
    }
    if (!($fatal_flag =~ /MPIR_ERR_FATAL/) && !($fatal_flag =~ /MPIR_ERR_RECOVERABLE/))
    {
	$fatal_flag = "MPIR_ERR_FATAL";
    }

    # Generic message (first instance only)
    if (!defined($test_generic_msg{$generic_msg}))
    {
	$test_generic_msg{$generic_msg} = $filename;

	print TESTFD "    /* $filename */\n";
	print TESTFD "    err = MPIR_Err_create_code($last_errcode, $fatal_flag, \"errcode::main\", __LINE__, $errclass, ". 
	    "$generic_msg, 0);\n";
	print TESTFD "    ChkMsg( err, $errclass, $generic_msg );\n";
    }

    # Specific messages
    $specific_msg = "0" if ($specific_msg =~ /^\s*NULL\s*$/);
    if ($specific_msg ne "0" && !defined($test_specific_msg{$specific_msg}))
    {
	$test_specific_msg{$specific_msg} = $filename;

	print TESTFD "    {\n";
	print TESTFD "    /* $filename */\n";
	# Use types in the string to create the types with default
	# values
	my $format = $specific_msg;
	my $fullformat = $format;
	my $narg = 0;
	my @args = ();
	while ($format =~ /[^%]*%(.)(.*)/)
	{
	    my $type = $1; 
	    $format  = $2;
	    $narg ++;
	    if ($type eq "d")
	    {
		print TESTFD "    int i$narg = $narg;\n";
		$args[$#args+1] = "i$narg";
	    }
	    elsif ($type eq "s")
	    {
		print TESTFD "    char s$narg\[\] = \"string$narg\";\n";
		$args[$#args+1] = "s$narg";
	    }
	    elsif ($type eq "p")
	    {
		print TESTFD "    char s$narg\[\] = \"string$narg\";\n";
		$args[$#args+1] = "s$narg";
	    }
	    else
	    {
		print STDERR "Unrecognized format type $type for $fullformat in $filename\n";
	    }
	}   
	$actargs = $#_ - 6;
	if ($actargs != $narg)
	{
	    print STDERR "Error: Format $fullformat provides $narg arguments but call has $actargs in $filename\n";
	}
	print TESTFD "     err = MPIR_Err_create_code($last_errcode, $fatal_flag, \"errcode::main\", __LINE__, $errclass, " .
	    "$generic_msg, $specific_msg";
	foreach my $arg (@args) 
	{
	    print TESTFD ", $arg";
	}
	print TESTFD " );\n";
	print TESTFD "    ChkMsg( err, $errclass, $specific_msg );\n    }\n";
	# ToDo: pass another string to ChkMsg that contains the 
	# names of the variables, as a single string (comma separated).
	# This allows us to review the source of the values for the args.
    }
}


# ==========================================================================
# Call this for each file
# This reads a C source or header file and adds does the following:
#   adds any generic message short names encountered to the hash generic_msgs.
#   adds any specific message short names encounter to the hash specific_msgs.
#   adds the filename to the hash generic_loc{msg} as the value (: separated)
#       and the same for hash specific_loc{msg}.
#   The last two are used to provide better error reporting.
#
$filename = "";    # Make global so that other routines can echo filename
sub ProcessFile
{ 
    $filename = $_[0];
    open (FD, "<$filename" ) || die "Could not open $filename\n";

    while (<FD>) {
	# First, remove any comments
	$_ = StripComments( FD, $_ );
	# Skip the definition of the function
	if (/int\s*MPIR_Err_create_code/) { $remainder = ""; next; }
	while (/MPIR_Err_create_code\s*(\(.*$)/) {
	    ($leader, $remainder, @args ) = &GetSubArgs( FD, $1 );
	    if ($debug) {
		foreach $arg (@args) {
		    print "|$arg|\n";
		}
	    }
	    
	    # if signature does not match new function prototype, then skip it
	    if ($#args < 6 || $args[3] ne "__LINE__")
	    {
		if (!defined($bad_syntax_in_file{$filename}))
		{
		    $bad_syntax_in_file{$filename} = 1;
		    print STDERR "Warning: MPIR_Err_create_code() call with too few arguments in $filename\n";
		}
		next;
	    }
	    
	    my $last_errcode = $args[0];
	    my $fatal_flag = $args[1];
	    my $fcname = $args[2];
	    my $linenum = $args[3];
	    my $errclass = $args[4];
	    my $generic_msg = $args[5];
	    my $specific_msg = $args[6];

	    # Check the generic and specific message arguments
	    if ($generic_msg =~ /\s$/)
	    {
		print STDERR "Warning: trailing blank on arg $generic_msg in $filename!\n"; 
	    }
	    if (!($generic_msg =~ /^\"\*\*\S+\"$/))
	    {
		print STDERR "Error: generic message $generic_msg has incorrect format in $filename\n";
		next;
	    }
	    if ($generic_msg =~ /%/) {
		print STDERR "Warning: generic message $generic_msg in $filename contains a format control\n";
	    }

	    $specific_msg = "0" if ($specific_msg =~ /^\s*NULL\s*$/);
	    if ($specific_msg =~ /^[1-9]/)
	    {
		print STDERR "Error: instance specific message $specific_msg in $filename is an invalid integer ". 
		    "(must be 0 or a string)\n";
		next;
	    }
	    if ($specific_msg eq $generic_msg)
	    {
		print STDERR "Warning: generic and instance specific messages must be different " .
		    "(file $filename, message $generic_msg)\n";
	    }
	    if ($specific_msg ne "0" && !($specific_msg =~ /\%/))
	    {
		print STDERR "Warning: instance specific message $specific_msg in $filename contains no format control\n";
	    }

	    if ($build_test_pgm)
	    {
		&AddTestCall( @args )
	    }

	    $generic_msgs{$generic_msg}++;
	    $generic_loc{$generic_msg} .= ":$filename";

	    if ($specific_msg =~ /^\"\*\*/)
	    {
		$specific_msgs{$specific_msg}++;
		$specific_loc{$specific_msg} .= ":$filename";
	    }
	}
	continue
        {
            $_ = $remainder;
        }
    }		
    close FD;
}

# Get all of the .c files from the named directory, including any subdirs
sub ExpandDir {
    my $dir = $_[0];
    my @otherdirs = ();
    my @files = ();
    opendir DIR, "$dir";
    while ($filename = readdir DIR) {
	if ($filename =~ /^\./ || $filename eq "CVS") {
	    next;
	}
	elsif (-d "$dir/$filename") {
	    $otherdirs[$#otherdirs+1] = "$dir/$filename";
	}
	elsif ($filename =~ /(.*\.c)$/) {
	    $files[$#files + 1] = "$dir/$filename";
	}
    }
    closedir DIR;
    # (almost) tail recurse on otherdirs (we've closed the directory handle,
    # so we don't need to worry about it anymore)
    foreach $dir (@otherdirs) {
	@files = (@files, &ExpandDir( $dir ) );
    }
    return @files;
}
#
# Other todos:
# It would be good to keep track of any .N MPI_ERR_xxx names in the structured
# comment and match these against any MPI_ERR_yyy used in the code, emitting a
# warning message for MPI_ERR_yyy values used in the code but not mentioned 
# in the header.  This could even apply to routines that are not at the MPI
# layer, forcing all routines to document all MPI error classes that they might
# return (this is like requiring routines to document the exceptions that 
# they may throw).

