# l2fit: least squares fit a data file to an expression
# The next line is a hack to include the text file with the awk program.
cat >/dev/null <<'ENDSCHEMAS'
@@@ FORTRAN
c Schema for Fortran l2fit program, with following subs made:
c  N:   Number of x, y observations
c  NP:  Number of p parameters
c  EXP: Expression to be fit
c  SP:  Number of start p parameters at front of file
c  NV:  Size of v vector, from p. 2 of port n2f documentation
	integer i, iv(2000), liv, lty, lv, ui(1), lp
	double precision v(@NV@)
	external dummy, resid
	double precision ty(@N@,3), p(@NP@)
	data liv /2000/, lv /@NV@/
	data lty /@N@/, lp /@NP@/
	do 10 i = 1, lp
		p(i) = 0.0
   10	continue
	do 20 i = 1, @N@
		read (*, *) ty(i,1), ty(i,2), ty(i,3)
   20	continue
	if (@SP@ .le. 0) goto 40
		do 30 i = 1, @SP@
			read (*, *) p(i)
   30		continue
   40	continue
	ui(1) = lty
	iv(1) = 0
	call dn2f(lty, lp, p, resid, iv, liv, lv, v, ui, ty, dummy)
	stop
	end
c
	subroutine dummy
	return
	end
c
	subroutine resid(n, lp, p, nf, r, lty, ty, uf)
	integer n, lp, nf, lty
	double precision p(lp), r(n), ty(lty, 3)
	external uf
	integer i
	double precision x
	do 10 i = 1, n
		x = ty(i, 1)
		r(i) = ty(i,3)*(ty(i, 2) - (@EXP@))
   10	continue
	return
	end
@@@ GRAP
.\" Schema for grap summary of l2fit, with these parameters
.\"  DFILE:	data file name
.\"  IFILE:	input file name
.\"  GEXP:	grap expression
.\"  IEXP:	input expression
.\"  CEXP:	canonical expression
.\"  LOGX:	log x
.\"  LOGY:	log y
.\"  OPARS:	output parameters
.\"  SPARS:	starting parameters
.\"  PARSTR:	names of parameters
.\"  ALGTERM:	algorithm termination
.\"  ERRS:	estimates of standard errors
.\"  WEIGHT:	weighted or unweighted
.\"  CLINE:	command line arguments
.\"  XTEXT:	description of x variable
.\"  YTEXT:	description of y variable
.\"  YBLANKS:	same length as YTEXT, but blanks
.\"  RESID:	residual description
.\"  SHRINK:	shrink factor for graphs
.sp 1
.nf
.ce 1
L2FIT SUMMARY
.sp 1
Command line: \f(CW@CLINE@\fP
Least squares regression type: @WEIGHT@
Input data file: @IFILE@
.EQ
delim %%
.EN
Input expression: @IEXP@         % @EEXP@ %
    Canonical form: @CEXP@         % @ECEXP@ %
.ta 1.2i 2.0i 2.8i 3.6i 4.4i 5.2i 6.0i 6.8i 7.6i
Parameters%@PARSTR@%
    Initial:@SPARS@
    Final:@OPARS@
    Standard errors:@ERRS@
Algorithm termination: @ALGTERM@
.sp 1
.G1
define glog X (log($1)/log(2.718281828459045)) X
define gexp X (2.718281828459045^($1)) X
define abs X (max(($1), 0-($1))) X
frame ht 3.0*@SHRINK@ wid 4.5*@SHRINK@
label bot "Circles: input %(x,y)% pairs.  Line: least squares fit %y ~=~ f(x,p)%.  @XTEXT@"
coord @LOGX@ @LOGY@
label right "@YTEXT@"
draw solid
lastx = 1e30
copy "@DFILE@" thru {
	tx = $1
	circle at tx, $2
	if lastx != tx then { next at tx, (@GEXP@) }
	lastx = tx
}
.G2
.sp 1
.G1
frame ht 2.0*@SHRINK@ wid 4.5*@SHRINK@
label bot "@RESID@"
label right "@YBLANKS@"
coord @LOGX@
draw solid
line1 = 1
copy "@DFILE@" thru {
	maxx = tx = $1
	if line1 == 1 then { line1 = 0; minx = tx }
	circle at $1, ($2-(@GEXP@))/$3
}
line from minx, 0 to maxx, 0
.G2
@@@
ENDSCHEMAS
#
# Now comes the Awk program that does the work
#
cat >l2temp.awk <<'ENDAWK'
BEGIN {
  # Initialize variables
	schemafile = ARGV[1]	# XXX Fix this -- put in complete file path
	squote = "\047"
	progname = "l2fit"
	grapoutfile = "l2fit.out"
	tempdir = ""
	# To make files unique: "echo $$" | getline pid
	fitfile     = tempdir "l2temp.p"
	fortfile    = tempdir "l2temp.f"
	exfile	    = tempdir "l2temp.x"
	grapfile    = tempdir "l2temp.g"
	errfile     = tempdir "l2temp.e"
	initfile    = tempdir "l2temp.ip"  # Initialization Parameters
	idatafile   = tempdir "l2temp.id"  # Input Data: all x, each y, 1
	fdatafile   = tempdir "l2temp.fd"  # Fortran Data: unique x, mean y, weight
	gdatafile   = tempdir "l2temp.gd"  # Grap Data: all x, each y, stddev(y)
	sortfile    = tempdir "l2temp.sd"  # Sorted data, gdatafile format
	usage = "usage: l2fit <-flags>* <expr> <startvals>* <file>?"
	numre = "^[+-]?([0-9]+[.]?[0-9]*|[.][0-9]+)([dDeE][+-]?[0-9]+)?$"
	savefiles = 0
	weighted = 0
	wantgrap = 1
	wanttroff = 1
  # Get arguments from command line
	EOFSTR = "EOFeofEOF"
	initlex()
	for (i = 1; i <= ARGC; i++) {
		s = ARGV[i]
		if (s ~ /[ \t]/) s = squote s squote
		cmdline = cmdline " " s
	}
	while (substr(token, 1, 1) == "-") {
		s = substr(token, 2)
		if      (s ~ /^t$/) savefiles = 1
		else if (s ~ /^i$/) wantgrap = 0
		else if (s ~ /^w$/) weighted = 1
		else if (s ~ /^troff$/) wanttroff = 0
		else if (s ~ /^lx$/)  { logx = "log x" }
		else if (s ~ /^ly$/)  { logy = "log y" }
		else if (s ~ /^lxy$/) { logx = "log x"; logy = "log y" }
		else if (s ~ /^x/) { xtext = substr(s, 2) }
		else if (s ~ /^y/) { ytext = substr(s, 2) }
		else if (s ~ /^s/) { shrink = 0+substr(s, 2) }
		else if (s ~ /^c/) { } # Comment
		else warn("unrecognized option: -" s)
		nexttoken()
	}
	if (shrink <= 0 || shrink > 2) shrink = 1
	if (token == EOFSTR) error(usage)
	inexpr = token
	nexttoken()
	startm = 0
	while (token ~ numre) {
		startp[++startm] = float(token, "in argument list")
		nexttoken()
	}
	if (token == EOFSTR)
		inputfile = "-"
	else {
		inputfile = token
		nexttoken()
		if (token != EOFSTR) error(usage)
	}
	ARGV[1] = inputfile
	ARGC = 2
  # Put expression in canonical form
	canexpr = tofortran(inexpr)
  # Compute m = p(i) elements in expr
	m = parmcount(canexpr)
	if (startm > m) {
		warn("too many start values; extras discarded")
		startm = m
	}
	for (i = 1; i <= m; i++)
		startstr = startstr "\t" (0 + startp[i])
  # Read data from input into idatafile
	while (getline > 0) {
		if (NF != 2) warn("data line " NR ": does not have 2 fields")
		x = float($1, "in data line " NR)
		y = float($2, "in data line " NR)
		print x, y, 1 >idatafile
		cnt[x]++
		sigy[x] += y
		sigy2[x] += y*y
		++n
	}
	close(idatafile)
  # Write fdatafile, for input to Fortran program
	if (weighted == 0) {
		fdatafile = idatafile
	} else {
		mins2 = HUGE = 1e30
		distinctx = 0
		for (x in cnt) {
			distinctx++
			tn = cnt[x]
			ts2 = sigy2[x]/tn - sigy[x]*sigy[x]/(tn*tn)
			s2[x] = ts2
			if (ts2 > 0 && ts2 < mins2)
				mins2 = ts2
		}
		if (mins2 == HUGE) mins2 = 1.0
		for (x in cnt) {
			ts2 = s2[x]
			if (ts2 <= 0.0) {
				warn("zero variance at x=" x \
					"; min variance substituted")
				ts2 = s2[x] = mins2
			}
			print x, sigy[x]/cnt[x], sqrt(cnt[x]/ts2) > fdatafile
		}
		close(fdatafile)
	}
  # Write initfile
	if (startm > 0) {
		for (i = 1; i <= startm; i++)
			print startp[i] >initfile
		close(initfile)
	} else {
		initfile = ""
	}
  # Write fortfile
	progn = weighted ? distinctx : n
	subarr["N"] = progn
	subarr["NP"] = m
	subarr["EXP"] = canexpr
	subarr["SP"] = startm
	subarr["NV"] = 200 + m*(progn + 2*m + 20) + 2*progn
	doschema(schemafile, "FORTRAN", fortfile, subarr)
	for (i in subarr) delete subarr[i]
  # Compile and execute program
	if (system("f77 " fortfile " -lport -o " exfile " 2>" errfile)) {
		system("cat " errfile"; rm " errfile)
		error("cannot compile (bad expression?)")
	}
	if (system("cat " fdatafile " " initfile " | " exfile " >" fitfile))
		error("execution error (computed number out of range?)")
  # Extract answers
	state = 0
	while (getline <fitfile > 0) {
		if      (state == 0 && $2 == "FINAL") state = 1
		else if (state == 1 && NF == 0) state = 2
		else if (state == 2 && NF == 0) state = 3
		else if (state == 2 && NF == 4) p[$1] = float($2, "in port output")
		if ($1 == "*****") conv = $0
		if ($1 == "ROW") estr = estr "\t" sqrt(float($NF, "in port output"))
	}
	s = pstr = ""
	for (i = 1; i <= m; i++) {
		pstr = pstr "\t" p[i]
		s = s " " p[i]
	}
	print substr(s, 2)
	gsub(/\*/, "", conv)
	sub(/^[ \t]+/, "", conv)
	for (i = 1; i <= 26; i++)
		gsub(substr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", i, 1),
		     substr("abcdefghijklmnopqrstuvwxyz", i, 1), conv)
	if (conv !~ /convergence/ || conv ~ /singular|false/) {
		warn("convergence problem: " conv)
		conv = "CONVERGENCE FAILURE: " conv
	}
  # Prepare grap summary, if desired
	if (wantgrap) {
	  # Make grap data file gdatafile
		if (weighted) {
			while (getline <idatafile > 0)
				print $1, $2, sqrt(s2[0+$1]) >gdatafile
			close(gdatafile)
		} else {
			gdatafile = idatafile
		}
	  # Sort data file for grap
		system("sort -n " gdatafile " > " sortfile)
	  # Write grapfile from gschemafile
		parstr = ""
		for (i = 1; i <= m; i++)
			parstr = parstr "\t" substr("abcdefghi", i, 1) "=p(" i ")"
		gsub(/\t-/, "\t\\-", pstr)
		gsub(/\t-/, "\t\\-", startstr)
		subarr["DFILE"] = sortfile
		subarr["IFILE"] = inputfile
		subarr["GEXP"] = tograp(canexpr)
		subarr["IEXP"] = inexpr
		subarr["CEXP"] = canexpr
		subarr["EEXP"] = toeqn(inexpr)
		subarr["ECEXP"] = toeqn(canexpr)
		subarr["LOGX"] = logx
		subarr["LOGY"] = logy
		subarr["OPARS"] = pstr
		subarr["SPARS"] = startstr
		subarr["PARSTR"] = toeqn(parstr)
		subarr["ALGTERM"] = conv
		subarr["ERRS"] = estr
		subarr["WEIGHT"] = weighted ? "Weighted" : "Unweighted"
		subarr["CLINE"] = cmdline
		subarr["SHRINK"] = shrink
		subarr["RESID"] = "Circles: weighted residuals %[y ~-~ f(x,p)]/stddev(x)%."
		if (xtext) xtext = "x: " xtext
		subarr["XTEXT"] = xtext
		if (ytext) ytext = "y: " ytext
		subarr["YTEXT"] = ytext
		yblanks = ytext
		gsub(/./, " ", yblanks)
		subarr["YBLANKS"] = yblanks
		if (weighted == 0)
			subarr["RESID"] = "Circles: residuals %y ~-~ f(x,p)%."
		doschema(schemafile, "GRAP", grapfile, subarr)
		for (i in subarr) delete subarr[i]
	  # Compile grapfile
		troffstr = ""
		if (wanttroff) troffstr = " | troff"
		system("grap " grapfile " | pic | eqn" troffstr " > " grapoutfile)
	  # Clean up files
		rmstr = sortfile " " grapfile
		if (weighted) rmstr = rmstr " " gdatafile
		if (savefiles == 0)
			system("rm " rmstr)
	}
  # Clean up files
	rmstr = exfile " " fortfile " " initfile " " idatafile " " \
		fitfile " " errfile
	if (weighted) rmstr = rmstr " " fdatafile
	if (savefiles == 0)
		system("rm " rmstr)
	exit
}

# Support functions for numerical interfaces

# FUNCTIONS TO CONVERT AMONG EQN, GRAP, FORTRAN, ETC.

function toeqn(s) {
	while (match(s, /exp\(/) > 0) {
		b = RSTART+RLENGTH-1
		n = matchpar(s, b)
		s = substr(s,1,n-1) "}" substr(s,n+1)
		sub(/exp\(/, " e sup {", s)
	}
	while (match(s, /(\*\*|\^)\(/) > 0) {
		b = RSTART+RLENGTH-1
		n = matchpar(s, b)
		s = substr(s,1,n-1) "}" substr(s,n+1)
		sub(/(\*\*|\^)\(/, " sup {", s)
	}
	gsub(/\([0-9]+\)/, " sub & ", s)	# this still leaves () around them
	for (i = 1; i <= 9; i++)
		gsub("\\(" i "\\)", i, s)
	gsub(/\(/, "{(", s)
	gsub(/\)/, ")}", s)
	gsub(/\*\*|\^/, " sup ", s)
	gsub(/\*/, "", s)
	gsub(/log/, " log ", s)
	gsub(/\t/, "\"\\t\"", s)
	return s
}

function matchpar(s, n,   i, k, c) {	# find matching paren to one at s[n]
	k = 0
	for (i = n; (c=substr(s,i,1)) != ""; i++) {
		if (c == "(")
			k++
		else if (c == ")") {
			if (--k <= 0)
				return i
		}
	}
	return n
}

function tograp(inputs,  s) {
	s = inputs
	gsub(/[ \t]/, "", s)
	for (i = 1; i <= m; i++)
		gsub("p\\(" i "\\)", "(" p[i] ")", s)
	gsub(/\*\*/, "^", s)
	s = " " s " "
	gsub(/[^a-zA-Z0-9]+/, " & ", s)
	gsub(/ log /, " glog ", s)
	gsub(/ exp /, " gexp ", s)
	gsub(/ x /,   " tx ", s)
	gsub(/ /, "", s)
	return s
}

function tofortran(inputs, s) {
	s = inputs
	gsub(/[ \t]/, "", s)
	gsub(/[^a-zA-Z0-9]+/, " & ", s)
	s = " " s " "
	for (i = 1; i <= 9; i++) {
		gsub(" " substr("abcdefghi", i, 1) " ", " p(" i ") ", s)
		gsub(" p" i " ", " p(" i ") ", s)
	}
	gsub(/\^/, "**", s)
	gsub(/ /, "", s)
	return s
}

function toawk(inputs,  s) {
	s = inputs
	gsub(/[ \t]/, "", s)
	gsub(/\*\*/, "^", s)
	return s
}

function parmcount(expr,  s, seen, i, n) {
	s = expr
	n = 0
	gsub(/[ \t]/, "", s)
	while (match(s, /p\([0-9]+\)/)) {
		i = 0 + substr(s, RSTART+2, RLENGTH-3)
		seen[i] = 1
		if (i > n) n = i
		s = substr(s, RSTART+RLENGTH)
	}
	for (i = 1; i < n; i++)
		if (!(i in seen))
			warn("p(" i ") not in expression")
	return n
}

# LEXICAL ANALYSIS FUNCTIONS

function initlex() {
	if	(ARGC == 2)	error(usage)		# ARGV[1] is prog name
	else if (ARGC == 3)	lexfile = ARGV[2]	# ARGV[1] is prog name
	else			lexfile = ""
	argptr = 1
	nexttoken()
}

function nexttoken(  status) {
	if (lexfile != "") {
		status = (getline token <lexfile)
		if (status == 0) token = EOFSTR
		if (status < 0) error("cannot open file " lexfile)
	} else {
		argptr++
		token = ARGV[argptr]
		if (argptr >= ARGC) token = EOFSTR
	}
	sub(/#.*/, "", token)
	sub(/^[ \t]+/, "", token)
	if (token == "") nexttoken()
}

# PROCESS SCHEMA FILE

function doschema(schemafile, marker, outfile, subarr,  temp, i) {
	while (getline <schemafile > 0)
		if ($1 == "@@@" && $2 == marker) break
	while (getline <schemafile > 0) {
		if ($1 == "@@@") break
		temp = $0
		if (temp ~ /@.*@/)
			for (i in subarr)
				if (index(temp,i))
					gsub("@" i "@", subarr[i], temp)
		print temp >outfile
	}
	close(schemafile)
	close(outfile)
}

# MISC SUPPORT FUNCTIONS

function float(s, where) {
	if (s ~ numre) {
		sub(/[dD]/, "e", s)
	} else {
		warn("unrecognized float: " s " " where)
	}
	return 0+s
}

function error(s) {
	print " " progname " fatal error: " s | "cat  1>&2"
	exit 1
}

function warn(s) {
	print " " progname "  warning: " s | "cat  1>&2"
}
ENDAWK
awk -f l2temp.awk $0 "$@"
rm l2temp.awk
