tlinregress - numtools - perform numerical operations on vectors and matrices in unix pipes
(HTM) git clone git://src.adamsgaard.dk/numtools
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
tlinregress (2493B)
---
1 #!/usr/bin/env python
2 import sys, getopt
3 import numpy
4 import scipy.stats
5
6 def usage():
7 print("usage: {} [-h] [-o outfile] [-s significance_level]".format(sys.argv[0]))
8
9 def read2dstdin():
10 input = numpy.loadtxt(sys.stdin)
11 return input[:,0], input[:, 1]
12
13 def reportregress(res):
14 print("slope =\t{:5g}\nslope_stderr =\t{:5g}".format(res.slope, res.stderr))
15 print("intercept =\t{:5g}\nintercept_stderr =\t{:5g}".format(res.intercept, res.intercept_stderr))
16 print("r_value =\t{:5g}\np_value =\t{:5g}\n".format(res.rvalue, res.pvalue))
17
18 def reportsignif(p, significlvl):
19 print("Checking null hypothesis using the Wald Test with t-distribution of the test statistic.")
20 print("The p-value is ", end="")
21 if (p < significlvl):
22 print("less than the significance level ({:g}),".format(significlvl))
23 print("which means that the null hypothesis of zero slope is REJECTED.\n")
24 else:
25 print("greater or equal than the significance level ({:g}),".format(significlvl))
26 print("which means that the null hypothesis of zero slope CANNOT be rejected.\n")
27
28 def reportcorr(r):
29 print("The correlation coefficient (r-value) denotes ", end="")
30 if (abs(r) < 0.01):
31 print("zero correlation.")
32 else:
33 print("a ", end="")
34 if (abs(r) < 0.2):
35 print("weak", end="")
36 elif (abs(r) < 0.3):
37 print("moderate", end="")
38 elif (abs(r) < 0.4):
39 print("strong", end="")
40 elif (abs(r) < 0.7):
41 print("very strong", end="")
42 print(" positive" if r > 0.0 else " negative")
43 print(" relationship.")
44
45 def plotresult(x, y, res, outfile):
46 import matplotlib.pyplot as plt
47 plt.figure()
48 plt.scatter(x, y, label="data")
49 x_ = numpy.linspace(min(x), max(x))
50 y_fit = res.slope * x_ + res.intercept
51 plt.title("p-value: {:.3g}, corr. coeff.: {:.3g}".format(res.pvalue, res.rvalue))
52 plt.plot(x_, y_fit, "-k", label="slope: {:.3g}, intercept: {:.3g}".format(res.slope, res.intercept))
53 plt.legend()
54 plt.tight_layout()
55 plt.savefig(outfile)
56
57 def main(argv):
58 significlvl = 0.05
59 outfile = ''
60 try:
61 opts, args = getopt.getopt(argv, "ho:s:")
62 except getopt.GetoptError:
63 usage()
64 sys.exit(2)
65 for opt, arg in opts:
66 if opt == "-h":
67 usage()
68 sys.exit(0)
69 elif opt == "-s":
70 significlvl = float(arg)
71 elif opt == "-o":
72 outfile = arg
73
74 x, y = read2dstdin()
75 res = scipy.stats.linregress(x, y, alternative="two-sided")
76 reportregress(res)
77 reportsignif(res.pvalue, significlvl)
78 reportcorr(res.rvalue)
79 if outfile:
80 plotresult(x, y, res, outfile)
81
82 if __name__ == "__main__":
83 main(sys.argv[1:])