tlinregress - numtools - perform numerical operations on vectors and matrices in unix pipes
 (HTM) git clone git://src.adamsgaard.dk/numtools
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       tlinregress (2493B)
       ---
            1 #!/usr/bin/env python
            2 import sys, getopt
            3 import numpy
            4 import scipy.stats
            5 
            6 def usage():
            7         print("usage: {} [-h] [-o outfile] [-s significance_level]".format(sys.argv[0]))
            8 
            9 def read2dstdin():
           10         input = numpy.loadtxt(sys.stdin)
           11         return input[:,0], input[:, 1]
           12 
           13 def reportregress(res):
           14         print("slope =\t{:5g}\nslope_stderr =\t{:5g}".format(res.slope, res.stderr))
           15         print("intercept =\t{:5g}\nintercept_stderr =\t{:5g}".format(res.intercept, res.intercept_stderr))
           16         print("r_value =\t{:5g}\np_value =\t{:5g}\n".format(res.rvalue, res.pvalue))
           17 
           18 def reportsignif(p, significlvl):
           19         print("Checking null hypothesis using the Wald Test with t-distribution of the test statistic.")
           20         print("The p-value is ", end="")
           21         if (p < significlvl):
           22                 print("less than the significance level ({:g}),".format(significlvl))
           23                 print("which means that the null hypothesis of zero slope is REJECTED.\n")
           24         else:
           25                 print("greater or equal than the significance level ({:g}),".format(significlvl))
           26                 print("which means that the null hypothesis of zero slope CANNOT be rejected.\n")
           27 
           28 def reportcorr(r):
           29         print("The correlation coefficient (r-value) denotes ", end="")
           30         if (abs(r) < 0.01):
           31                 print("zero correlation.")
           32         else:
           33                 print("a ", end="")
           34                 if (abs(r) < 0.2):
           35                         print("weak", end="")
           36                 elif (abs(r) < 0.3):
           37                         print("moderate", end="")
           38                 elif (abs(r) < 0.4):
           39                         print("strong", end="")
           40                 elif (abs(r) < 0.7):
           41                         print("very strong", end="")
           42                 print(" positive" if r > 0.0 else " negative")
           43                 print(" relationship.")
           44 
           45 def plotresult(x, y, res, outfile):
           46         import matplotlib.pyplot as plt
           47         plt.figure()
           48         plt.scatter(x, y, label="data")
           49         x_ = numpy.linspace(min(x), max(x))
           50         y_fit = res.slope * x_ + res.intercept
           51         plt.title("p-value: {:.3g}, corr. coeff.: {:.3g}".format(res.pvalue, res.rvalue))
           52         plt.plot(x_, y_fit, "-k", label="slope: {:.3g}, intercept: {:.3g}".format(res.slope, res.intercept))
           53         plt.legend()
           54         plt.tight_layout()
           55         plt.savefig(outfile)
           56 
           57 def main(argv):
           58         significlvl = 0.05
           59         outfile = ''
           60         try:
           61                 opts, args = getopt.getopt(argv, "ho:s:")
           62         except getopt.GetoptError:
           63                 usage()
           64                 sys.exit(2)
           65         for opt, arg in opts:
           66                 if opt == "-h":
           67                         usage()
           68                         sys.exit(0)
           69                 elif opt == "-s":
           70                         significlvl = float(arg)
           71                 elif opt == "-o":
           72                         outfile = arg
           73 
           74         x, y = read2dstdin()
           75         res = scipy.stats.linregress(x, y, alternative="two-sided")
           76         reportregress(res)
           77         reportsignif(res.pvalue, significlvl)
           78         reportcorr(res.rvalue)
           79         if outfile:
           80                 plotresult(x, y, res, outfile)
           81 
           82 if __name__ == "__main__":
           83         main(sys.argv[1:])