tadded python version of linear regression analysis - numtools - perform numerical operations on vectors and matrices in unix pipes
 (HTM) git clone git://src.adamsgaard.dk/numtools
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 0cc78918862ea2153868166130c2c4bf51ee5ae4
 (DIR) parent 980e62e250ba67b8a2e89cf4160b85aa9c14372e
 (HTM) Author: Anders Damsgaard <anders@adamsgaard.dk>
       Date:   Wed, 24 Nov 2021 14:40:40 +0100
       
       added python version of linear regression analysis
       
       Diffstat:
         A linregress                          |      67 +++++++++++++++++++++++++++++++
       
       1 file changed, 67 insertions(+), 0 deletions(-)
       ---
 (DIR) diff --git a/linregress b/linregress
       t@@ -0,0 +1,67 @@
       +#!/usr/bin/env python
       +import sys, getopt
       +import numpy
       +import scipy.stats
       +
       +def usage(arg0):
       +        print("usage: {} [-h] [-s significance_level]".format(sys.argv[0]))
       +
       +def read2dstdin():
       +        input = numpy.loadtxt(sys.stdin)
       +        return input[:,0], input[:, 1]
       +
       +def reportregress(res):
       +        print("slope =\t{:5g}\nslope_stderr =\t{:5g}".format(res.slope, res.stderr))
       +        print("intercept =\t{:5g}\nintercept_stderr =\t{:5g}".format(res.intercept, res.intercept_stderr))
       +        print("r_value =\t{:5g}\np_value =\t{:5g}\n".format(res.rvalue, res.pvalue))
       +
       +def reportsignif(p, significlvl):
       +        print("Checking null hypothesis using the Wald Test with t-distribution of the test statistic.")
       +        print("The p-value is ", end="")
       +        if (p < significlvl):
       +                print("less than the significance level ({:g}),".format(significlvl))
       +                print("which means that the null hypothesis of zero slope is REJECTED.\n")
       +        else:
       +                print("greater or equal than the significance level ({:g}),".format(significlvl))
       +                print("which means that the null hypothesis of zero slope CANNOT be rejected.\n")
       +
       +
       +def reportcorr(r):
       +        print("The correlation coefficient (r-value) denotes ", end="")
       +        if (abs(r) < 0.01):
       +                print("zero correlation.")
       +        else:
       +                print("a ", end="")
       +                if (abs(r) < 0.2):
       +                        print("weak", end="")
       +                elif (abs(r) < 0.3):
       +                        print("moderate", end="")
       +                elif (abs(r) < 0.4):
       +                        print("strong", end="")
       +                elif (abs(r) < 0.7):
       +                        print("very strong", end="")
       +                print(" positive" if r > 0.0 else " negative")
       +                print(" relationship.")
       +
       +def main(argv):
       +        significlvl = 0.05
       +        try:
       +                opts, args = getopt.getopt(argv, "hs:")
       +        except getopt.GetoptError:
       +                usage()
       +                sys.exit(2)
       +        for opt, arg in opts:
       +                if opt == "-h":
       +                        usage()
       +                        sys.exit(0)
       +                elif opt == "-s":
       +                        significlvl = float(arg)
       +
       +        x, y = read2dstdin()
       +        res = scipy.stats.linregress(x, y, alternative="two-sided")
       +        reportregress(res)
       +        reportsignif(res.pvalue, significlvl)
       +        reportcorr(res.rvalue)
       +
       +if __name__ == "__main__":
       +        main(sys.argv[1:])