#!/usr/bin/perl -w
#
# This is codegen.pl  $Revision: 1.12 $
#
# See the LICENSE file for conditions of usage
#
# Used to generate Fortran77 code that solves a system
# of linear equations. We use QR factorization and back-substitution.
#
# Input arguments are none.
# As default, we take the optimqr.seq file generated by optimqr,
# and produce a Fortran file <output file> containing unrolled code that
# hopefully does the deeds.
#
# The input file is assumed to be named optimqr.seq
# The output files are fastqr.f and regress.system
#
# Usage:
#  ./codegen.pl  [-d] [-uf] [-e] [-ub]
#
# Flags:
#  -d    Write debugging checks in the code
#  -uf   Unroll all loops in factorization
#  -e    Allow extra work in factorization and back substitution, 
#        in order to generate fewer loops
#  -ub   Unroll all loops in back substitution
#

use strict;

my $dimension = 0;
my @row_ordering = ();
my @column_ordering = ();
my %row_rev_ordering = ();
my %column_rev_ordering = ();
my %system = ();

my $flops = 0;

#
# The ICOST is the cost factor between doing a FLOP in a loop, and
# doing an unrolled FLOP.
#
# Unrolled FLOPs are more expensive, because of instruction cache thrashing.
#
# MSEQCF is the Minimal Sequence Count, the minimal number of consecutive rotations
# that can trigger a loop generation.  The F count is for factorization, the B is for
# back-substitution
#
my $ICOST = 1.5;
my $MSEQCF = 3;
my $MSEQCB = 4;

my $fsingles = 0;
my $floops = 0;
my $feloops = 0;
my $bsingles = 0;
my $bloops = 0;
my $beloops = 0;

#
# Config flags
#

my $FLAG_D = 0;
my $FLAG_UF = 0;
my $FLAG_EF = 0;
my $FLAG_UB = 0;

#
# Strategy is:
#
# Read system size
# Read system sparsity pattern
# Read system row-/column ordering
# Generate code pre-amble
# Read Givens Rotation Sequence, and generate
#  code as appropriate
# 
# Done...
#

my @seqstart;
my @seqend;


#
# Helper routines, for fixing the column/row ordering
#

sub A {
    my ($i,$j) = @_;
    return "A(".($row_ordering[$i]+1).",".($column_ordering[$j]+1).")";
}

sub Ap {
    my ($i,$j) = @_;
    return "A(".($row_ordering[$i]+1).",".($j+1).")";
}

sub At {
    my ($i) = @_;
    return "A(".($row_ordering[$i]+1).",i)";
}

sub B {
    my ($i) = @_;
    return "B(".($row_ordering[$i]+1).")";
}

sub X {
    my ($i) = @_;
    return "X(".($column_ordering[$i]+1).")";
}

sub matrix {
    my ($i,$j) = @_;
    return ($system{$i,$j} eq 'x');
}

sub sysmatrix {
    my ($i,$j) = @_;
    return ($system{$row_ordering[$i],$column_ordering[$j]} eq 'x');
}

sub sysmatrixp {
    my ($i,$j) = @_;
    return ($system{$row_ordering[$i],$j} eq 'x');
}

#
# Decide whether we should defragment
#
sub do_defrag {
    my ($S,$G,$N) = @_;
    return 0 if !$FLAG_EF;
    # What we save by doing this, is $S + $N unrolled FLOPS
    # What we loose is $S + $G + $N looped FLOPS
    # benefit = $ICOST * ($S + $N) - ($S + $G + $N)
    if( $ICOST * ($S + $N) - ($S + $G + $N) > 0) { return 1 }
    return 0;
}

#
# Decide whether we should unroll the rotation sequence
#
sub unroll_rotation {
    my ($start,$stop) = @_;
    # Only bother to loop, if the number of elements in the loop are
    # above some threshold (to justify the loop counter stuff)
    return ($stop-$start+1) < $MSEQCF;
}

#
# Decide whether we sould unroll the back substitution of this line
#
sub unroll_backsub {
    my ($start,$stop) = @_;
    # Pretty much the same as unroll_rotation, just another threshold
    return ($stop-$start+1) < $MSEQCB;
}

#
# The generic sequence defragmenter 
#
# Defragments the sequences defined in the global
# arrays @seqend and @seqstart.
#
sub defragment_sequences {
    # Back this up for later use
    my @backseqend = reverse(@seqend);
    my @backseqstart = reverse(@seqstart);

    # Register sequences to defragment
    my @defrags = ();
    my ($S, $G, $N, $oldstop, $oldstart, $stop, $start);
    $oldstart = 0;
    $oldstop = -1;
    @seqend = reverse(@seqend);
    foreach $start (@seqstart) {
	$stop = pop(@seqend);
	# We have a new start and a new stop.
	# This means, 
	# Start block length is last last_stop-last_start+1
	$S = 1 + $oldstop - $oldstart;
	# The gap will be $start - $laststop -1
	$G = $start - $oldstop - 1;
	# And the current block is  $stop - $start + 1
	$N = $stop - $start + 1;

	# If we should defragment, register this for defragmentation.
	if(do_defrag($S,$G,$N) == 1) {
	    push @defrags, $oldstop;
	}

	# Book keeping...
	$oldstart = $start;
	$oldstop = $stop;
    }
    @defrags = reverse(@defrags);
    pop(@defrags);

    @seqend = @backseqend;
    @seqstart = @backseqstart;
#    print "defrags: ".join(" ",reverse(@defrags))."  /  $#defrags\n";
    my @newseqstart;
    my @newseqend;
    # Traverse thru start/stop lists, and
    # see when a stop matches one in the
    # defragment list.
    # When it does, new starts and stops should
    # be thrown away along with the defragment elements
    # until the stop no longer matches the current defragment
    # element.
    $start = pop(@seqstart);
    while($#defrags != -1) {
	$stop = pop(@seqend);
	if(!defined($defrags[-1])) {
	    # This cannot happen
	    die "Bummer! #1";
	}
	if(!defined($stop)) {
	    # This cannot happen
	    die "Bummer! #2";
	}
	if($stop == $defrags[-1]) {
#	    print "eq:  $stop / $defrags[-1]\n";
	    # Throw away defrag element
	    pop(@defrags);
	    # Ok, we should combine the
	    # current sequence and the next
	    # into one...
	    # This is done by not re-setting $start
	    pop(@seqstart);
	} else {
#	    print "neq: $stop / $defrags[-1]\n";
	    push @newseqstart, $start;
	    push @newseqend, $stop;
	    $start = pop(@seqstart);
	}
    }
    push @seqstart, $start;
    if($#seqstart != $#seqend) {
	die "start/stop invariant failed";
    }
    # Add the remaining starts and stops to newseq
    while($#seqstart != -1) {
	push @newseqstart, pop(@seqstart);
	push @newseqend, pop(@seqend);
    }

#    print "newstart: ".join(" ",@newseqstart)."\n";
#    print "newstop:  ".join(" ",@newseqend)."\n";


    # Generate code for sequences and for remaining elements
    @seqstart = reverse(@newseqstart);
    @seqend = reverse(@newseqend);
}


#
# Anihilate j,k with i,k
#
sub givens_rotation {
    my ($i,$j,$k) = @_;

    print OUTF "C\n";
    print OUTF "C     Annihilate ".A($j,$k)." with ".A($i,$k)."\n";
    print OUTF "C\n";
    print OUTF "      IF(ABS(".A($j,$k).").LT.1.d-6) THEN\n";
    print OUTF "       C = 1.d0\n";
    print OUTF "       S = 0.d0\n";
    print OUTF "      ELSE\n";
    print OUTF "       IF(ABS(".A($j,$k).").GT.ABS(".A($i,$k).")) THEN\n";
    print OUTF "        T = -".A($i,$k)."/".A($j,$k)."\n";
    $flops += 1;
    print OUTF "        S = 1.d0/SQRT(1.d0+T*T)\n";
    $flops += 4;
    print OUTF "        C = S*T\n";
    $flops += 1;
    print OUTF "       ELSE\n";
    print OUTF "        T = -".A($j,$k)."/".A($i,$k)."\n";
    print OUTF "        C = 1.d0/SQRT(1.d0+T*T)\n";
    print OUTF "        S = C*T\n";
    print OUTF "       END IF\n";
    print OUTF "      END IF\n";
    print OUTF "C     We now know the Givens rotation coefficients C and S\n";
    print OUTF "C     Apply Givens Rotation on the two rows\n";
    my $ci;

    #
    # The idea now is to find sequences of elements that need
    # rotation. Theese can be put into do-loops, to minimize
    # the total number of instructions in the code.
    #
    # The sequence can be stopped whenever one of the two rows
    # have a zero, but it could be continued, if another sequence
    # would just start again a few zeros away.
    #
    # First, find sequences where both rows have strictly nonzeros.
    # Then, join sequences, by accepting a little extra work when
    # we rotate rows where one or both hold zeros.
    #

    @seqend = ();
    @seqstart = ();

    # Scan for sequences
    my $ins = 0;
    for($ci = 0; $ci < $dimension; $ci++) {
	if($ins) {
	    # We're in a sequence, look for stop
	    if(!sysmatrixp($j,$ci) && !sysmatrixp($i,$ci)) {
		$ins = 0;
		push @seqend, $ci-1;
	    }
	} else {
	    # We're not in a sequence, look for new
	    if(sysmatrixp($j,$ci) || sysmatrixp($i,$ci)) {
		$ins = 1;
		push @seqstart, $ci;
	    }
	}
    }
    if($ins) { push @seqend, $dimension-1 }

    #
    # We have the sequences in @seqend and @seqstart
    #
    # Now we should call the defragmenter to have the
    # sequence concatenation optimization done
    #
    defragment_sequences();

    #
    # Finally, generate the loops and the single rotations needed
    # to do what we want to do.
    #
    while($#seqstart != -1) {
	my $start = pop(@seqstart);
	my $stop = pop(@seqend);

	if($FLAG_UF || unroll_rotation($start,$stop)) {
	    for($ci = $start; $ci <= $stop; $ci++) {
#		print "Single $ci\n";
		# Generate single element rotation
		if(sysmatrixp($j,$ci) || sysmatrixp($i,$ci)) {
		    print OUTF "C     Generate single element rotation\n";
		}
		if(sysmatrixp($j,$ci) && sysmatrixp($i,$ci)) {
		    $fsingles ++;
		    # Need complete rotation
		    print OUTF "      T = ".Ap($i,$ci)."\n";
		    print OUTF "      T2 = ".Ap($j,$ci)."\n";
		    print OUTF "      ".Ap($i,$ci)." = C*T - S*T2\n";
		    $flops += 3;
		} elsif(sysmatrixp($j,$ci)) {
		    $fsingles ++;
		    # Only destination is nonzero
		    print OUTF "      T2 = ".Ap($j,$ci)."\n";
		    print OUTF "      ".Ap($i,$ci)." = - S*T2\n";
		    $flops += 1;
		} elsif(sysmatrixp($i,$ci)) {
		    $fsingles ++;
		    # Only source is nonzero
		    print OUTF "      T = ".Ap($i,$ci)."\n";
		    print OUTF "      ".Ap($i,$ci)." = C*T\n";
		    $flops += 1;
		} else {
		    # Neither source nor destination are nonzero
		}
		# No need to rotate the element to eliminate to zero, just set it.
		if($ci == $column_ordering[$k]) {
		    if($FLAG_D) {
			print OUTF "C     Debugging: We check that we could eliminate\n";
			print OUTF "C     the element...\n";
			print OUTF "      if(abs(S*T+C*T2) .GT. 1.d-6) then\n";
			print OUTF "         print *,'Assertion failed!'\n";
			print OUTF "         print *,'Cannot eliminate element ".Ap($j,$ci)." = ',S*T+C*T2\n";
			print OUTF "         STOP\n";
			print OUTF "      end if\n";
		    }
		    print OUTF "C     We set the elim. element to zero\n";
		    print OUTF "      ".Ap($j,$ci)." = 0\n";
		} else {
		    if(sysmatrixp($j,$ci) && sysmatrixp($i,$ci)) {
			# Complete rotation done
			print OUTF "      ".Ap($j,$ci)." = S*T + C*T2\n";
			$flops += 3;
			# Mark fillins in our matrix structure, for later reference
			$system{$row_ordering[$j],$ci} = 'x';
			$system{$row_ordering[$i],$ci} = 'x';
		    } elsif(sysmatrixp($j,$ci)) {
			# T is zero
			print OUTF "      ".Ap($j,$ci)." = C*T2\n";		    
			$flops += 1;
			# Mark fillins in our matrix structure, for later reference
			$system{$row_ordering[$i],$ci} = 'x';
		    } elsif(sysmatrixp($i,$ci)) {
			# T2 is zero
			print OUTF "      ".Ap($j,$ci)." = S*T\n";
			$flops += 1;
			# Mark fillins in our matrix structure, for later reference
			$system{$row_ordering[$j],$ci} = 'x';
		    } else {
			# Nothing to do
		    }
		}
	    }
	} else {
	    $floops ++;
	    $feloops += $stop - $start + 1;
#	    print "Loop $start - $stop\n";
	    # Generate looped rotation
	    print OUTF "C     Generate looped rotation\n";
	    print OUTF "      DO i=".($start+1).",".($stop+1)."\n";
	    print OUTF "        T = ".At($i)."\n";
	    print OUTF "        T2 = ".At($j)."\n";
	    print OUTF "        ".At($i)." = C*T - S*T2\n";
	    print OUTF "        ".At($j)." = S*T + C*T2\n";
	    print OUTF "      END DO\n";
	    my $c;
	    for($c = $start; $c <= $stop; $c++) {
		if($FLAG_D) {
		    if($column_ordering[$k] == $c) {
			# Check that $j,$k is close to zero
			print OUTF "C     Make sure $j,$k is close to zero\n";
			print OUTF "      if (abs(".A($j,$k).") .GT. 1.d-6) then\n";
			print OUTF "         print *,'Fatal elimination error at ".A($j,$k)."'\n";
			print OUTF "         print *,'".A($i,$k)." is ',".A($i,$k)
			    .",' ".A($j,$k)." is ',".A($j,$k)."\n";
			print OUTF "         print *,'S is ',S,' C is ',C\n";
			print OUTF "         STOP\n";
			print OUTF "      endif\n";
		    }
		}
		if(sysmatrixp($i,$c) || sysmatrixp($j,$c)) {
		    $system{$row_ordering[$j],$c} = 'x';
		    $system{$row_ordering[$i],$c} = 'x';
		}
	    }
	    $flops += ($stop - $start +1) * 6;
	}

    }

    # Remove the non-zero we just eliminated from our system structure
    $system{$row_ordering[$j],$column_ordering[$k]} = '0';

    # Also do stuff with B
    print OUTF "C     Remember B\n";
    print OUTF "      T = ".B($i)."\n";
    print OUTF "      T2 = ".B($j)."\n";
    print OUTF "      ".B($i)." = C*T - S*T2\n";
    $flops += 3;
    print OUTF "      ".B($j)." = S*T + C*T2\n";
    $flops += 3;
}

sub writesys {
    my ($i,$j);
    for($i = 0; $i < $dimension; $i++) {
	printf("%3i,%3i: ", $i,$row_ordering[$i]);
	for($j = 0; $j < $dimension; $j++) {
	    if(matrix($i,$j)) {
		print "x ";
	    } else {
		print "0 ";
	    }
	}
	print "\n";
    }
}


sub backsubstitute_row {
    my ($row) = @_;
    
    #
    # Find sequences
    #
    @seqstart = ();
    @seqend = ();

    my $ins = 0;
    my $ci;
    for($ci = 0; $ci < $dimension; $ci++) {
	if($ins) {
	    # We're in a sequence, look for stop
	    if(!sysmatrixp($row,$ci) || ($column_rev_ordering{$ci} <= $row)) {
		$ins = 0;
		push @seqend, $ci-1;
	    }
	} else {
	    # We're not in a sequence, look for new
	    if(sysmatrixp($row,$ci) && ($column_rev_ordering{$ci} > $row)) {
		$ins = 1;
		push @seqstart, $ci;
	    }
	}
    }
    if($ins) { push @seqend, $dimension-1 }

#    print "row $row: \n";
#    print "start: ".join(" ",@seqstart)."\n";
#    print "stop:  ".join(" ",@seqend)."\n";

    if($#seqstart != -1) {

	#
	# We cannot just use the ordinary sequence optimizer.  It would
	# allow us to include the diagonal element which we don't want to include.
	#
	# If we use it, at least scan for the diagonal element in the resulting 
	# sequences, and re-fragment any sequence holding this element.
	#
	if(!$FLAG_UB) {
	    defragment_sequences();
	    #
	    # Scan for the diagonal element and refragment if nessecary
	    #
	    my @tmpstart = ();
	    my @tmpend = ();
	    while($#seqstart != -1) {
		my $start = pop(@seqstart);
		my $stop = pop(@seqend);
		my $j;
		if($start == $column_ordering[$row]) {
		    print "** This should not happen! #1\n";
		    $start ++;
		} elsif($stop == $column_ordering[$row]) {
		    print "** This should not happen! #2\n";
		    $stop --;
		} elsif( $start < $column_ordering[$row] 
		    && $column_ordering[$row] < $stop) {
		    push @tmpstart, $start;
		    push @tmpend, $column_ordering[$row]-1;
		    $start = $column_ordering[$row]+1;	    
		}
		push @tmpstart, $start;
		push @tmpend, $stop;
	    }	    
	    @seqstart = reverse(@tmpstart);
	    @seqend = reverse(@tmpend);
	}

	#
	# Generate code
	#
	print OUTF "      T = 0.d0\n";
	while($#seqstart != -1) {
	    my $start = pop(@seqstart);
	    my $stop = pop(@seqend);
	    if( $stop < $start ) {
		die "Bummer! In back-substitution code generator";
	    }
	    if($FLAG_UB || unroll_backsub($start,$stop)) {
		# Handle unrolled calculation
		# Calculate T = U(i,i+1:n)b(i+1:n)
		my $j;
		for($j = $start; $j <= $stop; $j++) {
		    if(sysmatrixp($row,$j) && ($column_rev_ordering{$j} > $row)) {
			print OUTF "      T = T + X(".($j+1).") * ".Ap($row,$j)."\n";
			$flops += 2;
			$bsingles ++;
		    }
		}
	    } else {
		# Handle looped back-substitution for row $row from $start to $stop
		print OUTF "      DO i=".($start+1).",".($stop+1)."\n";
		print OUTF "        T = T + X(i) * ".At($row)."\n";
		print OUTF "      END DO\n";
		$flops += ($stop - $start + 1) * 2;
		$bloops ++;
		$beloops += $stop - $start + 1;
	    }
	}
	# Calculate b(i) <- (b(i) - T) / U(i,i)
	print OUTF "      ".X($row)." = (".B($row)." - T) / ".A($row,$row)."\n";
	$flops += 2;
    } else {

	#
	# There is nothing but the diagonal element to include in this row back-substitution
	#
	# Calculate b(i) <- b(i) / U(i,i)
	print OUTF "      ".X($row)." = ".B($row)." / ".A($row,$row)."\n";
	$flops += 1;

    }
}

#
# Parse command line
#

my $cmdc;
for($cmdc = 0; $cmdc <= $#ARGV; $cmdc++) {
    if($ARGV[$cmdc] eq "-d") {
	$FLAG_D = 1;
    } elsif($ARGV[$cmdc] eq "-uf") {
	$FLAG_UF = 1;
    } elsif($ARGV[$cmdc] eq "-ub") {
	$FLAG_UB = 1;
    } elsif($ARGV[$cmdc] eq "-e") {
	$FLAG_EF = 1;
    } else {
	die "Unknown parameter given.";
    }
}

print "---------------------------------------\n";
print "    Unroll all factorization loops: ";
if($FLAG_UF) {print "yes"} else {print "no"}
print "\n";
print " Allow extra work (loop minimizer): ";
if($FLAG_EF) {print "yes"} else {print "no"}
print "\n";
print " Unroll all backsubstitution loops: ";
if($FLAG_UB) {print "yes"} else {print "no"}
print "\n";
print "         Generate debugging checks: ";
if($FLAG_D) {print "yes"} else {print "no"}
print "\n";
print "---------------------------------------\n";

#
# Get on with it
#

open(INF,"optimqr.seq") or die "Unable to open sequence file";
open(OUTF,">fastqr.f") or die "Unable to create output file";

# Read dimension
$dimension = <INF>;
chomp($dimension);
print "System dimension is ".$dimension."\n";

# Read sparsity pattern
my ($i,$j);
my $nz = 0;
for($i = 0; $i < $dimension; $i++) {
    my $line = <INF>;
    chomp($line);
    for($j = 0; $j < $dimension; $j++) {
	$line =~ s/(\S) (.*)/$2/;
	$system{$i,$j} = $1;
	$nz++ if ($1 eq "x");
    }
}
print "System holds ".$nz." non-zeros\n";

# Read row-/column ordering
for($i = 0; $i < $dimension; $i++) {
    my $line = <INF>;
    chomp($line);
    $line =~ /(\d+) (\d+)/;
    push(@row_ordering, $1);
    $row_rev_ordering{$1} = $i;
    push(@column_ordering, $2);
    $column_rev_ordering{$2} = $i;
}
print "Row-/column ordering read\n";

print "Building info file for the regression tester\n";

open(RTF, ">regress.system") or die "Couldn't create regress.system file";
print RTF "$dimension $dimension \n";
for($i = 0; $i < $dimension; $i++) {
    for($j = 0; $j < $dimension; $j++) {
	if($system{$i,$j} eq 'x') {
	    print RTF "1 ";
	} else {
	    print RTF "0 ";
	}
    }
    print RTF "\n";
}
close(RTF);

print "Building ordered-system file\n";
open(OSF, ">ordered.system") or die "Couldn't create ordered.system file";
for($i = 0; $i < $dimension; $i++) {
    for($j = 0; $j < $dimension; $j++) {
	if(sysmatrix($i,$j)) {
	    print OSF "1 ";
	} else {
	    print OSF "0 ";
	}
    }
    print OSF "\n";
}
close(OSF);

# Generate start of routine
print "Generating code pre-amble\n";

print OUTF "C\n";
print OUTF "C     This routine was automatically generated by the\n";
print OUTF "C     codegen.pl program, from data calculated by the\n";
print OUTF "C     optimqr sparse-QR optimization program.\n";
print OUTF "C\n";
print OUTF 'C     $Id: codegen.pl,v 1.12 1999/03/08 09:25:06 jakob Exp $ ' . "\n";
print OUTF "C\n";
print OUTF "C     It performs a QR factorization and back-substition\n";
print OUTF "C     on the system given as argument.\n";
print OUTF "C     This code will _only_ work for systems with the \n";
print OUTF "C     specific structure for which this code was generated.\n";
print OUTF "C\n";
print OUTF "C     Flags: ".join(" ",@ARGV)."\n";
print OUTF "C\n";
print OUTF "C     -uf and -ub probably are good on vector machines\n";
print OUTF "C     -ef reduces code size, which may help on cache machines\n";
print OUTF "C\n";
print OUTF "      SUBROUTINE FASTQR(A,X,B)\n";
print OUTF "      DOUBLE PRECISION A($dimension,$dimension),\n";
print OUTF "     &                 B($dimension), X($dimension),\n";
print OUTF "     &                 T, T2, C, S\n";
print OUTF "      INTEGER i\n\n";


#
# Read Givens Rotation Coordinates, and generate code that fits
#

print "Generating code for QR factorization...\n";
my $transform = 1;
while(<INF>) {
    chomp;
    /(\d+) (\d+) (\d+)/ or die "Error in input file: $_";
    my $src = $1;
    my $dst = $2;
    my $col = $3;
    if(!(sysmatrix($src,$col) && sysmatrix($dst,$col))) {
	writesys();
	print "Source $src,$col is ".sysmatrix($src,$col)."\n";
	print "Destination $dst,$col is ".sysmatrix($dst,$col)."\n";
	die "Integrity error running $transform!";
    }
    givens_rotation($src,$dst,$col);
    $transform++;
}


print "Building final-system file\n";
open(FSF, ">final.system") or die "Couldn't create final.system file";
for($i = 0; $i < $dimension; $i++) {
    for($j = 0; $j < $dimension; $j++) {
	if(sysmatrix($i,$j)) {
	    print FSF "1 ";
	} else {
	    print FSF "0 ";
	}
    }
    print FSF "\n";
}
close(FSF);


print "Generating code for back substitution...\n";
print OUTF "C\n";
print OUTF "C     Perform back-substitution\n";
print OUTF "C\n";
print OUTF "      ".X($dimension-1)." = ".B($dimension-1)."/".A($dimension-1,$dimension-1)."\n";
$flops += 1;
for($i = $dimension-2; $i >= 0; $i--) { backsubstitute_row($i) }

# Terminate routine
print "Terminating Fortran routine\n";
print OUTF "      RETURN\n";
print OUTF "      END\n";

print "-------------------------------\n";
print "Total FLOP count:    $flops\n";
print "In factorization:\n";
print " Looped eliminations: $feloops ($floops)\n";
print " Single eliminations: $fsingles\n";
print "In backsubstitution:\n";
print " Looped eliminations: $beloops ($bloops)\n";
print " Single eliminations: $bsingles\n";
print "-------------------------------\n";

print "Done.\n";

close(INF);
close(OUTF);

