/*
depend.c Version 1.4.0 - Dependent Probability Test
Copyright (C) 2005  dondalah@ripco.com (Dondalah)

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to:

	Free Software Foundation, Inc.
	59 Temple Place - Suite 330
	Boston, MA  02111-1307, USA.
*/

/* This chi-square test is based on */
/* Lincoln L. Chao */
/* Statistics for Management */
/* Chapter 5, Probability Rules and Functions */
/* Section  3, Dependent Events */
/* Chapter 12, Chi-square Tests */
/* Section  4, Testing for Independence */

/* Lincoln L. Chao was at California State University, */
/* Long Beach, California, when he wrote this book. */

/* sample test: */

/* depend 10000 6 6 */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "stat.h"
#include "rnd.h"

#define ZLMT 1.96

void putstx(pgm)
char *pgm;
   {
   fprintf(stderr,"Usage: %s size columns rows\n",
      pgm);
   fprintf(stderr,"Where size is 1000 to 100000\n");
   fprintf(stderr,"Where columns is 2 to 9\n");
   fprintf(stderr,"      rows    is 2 to 9\n");
   fprintf(stderr,"Size is more than 10 times larger "
      "than columns x rows\n");
   fprintf(stderr,"Example: %s 10000 6 6\n",
      pgm);
   fprintf(stderr,"Degrees of freedom is 25 in "
      "this example\n");
   exit(1);
   } /* putstx */

void bldprob(int size, double *prob,
   unsigned char *sd)
   {
   /* Build a sorted array of "size" probabilities */
   /* The sum of the array equals 1.0 */
   /* The probabilities are whole percentages */
   /* from .01 to .99 */
   int resort;
   int pctlmt;
   double tmp;
   double cumprob;
   double pct;
   double *p,*q,*nxtp;
   double *pctptr;
   double *pctend;
   double rndfrac(unsigned char *sd);
   double pctlst[100];
   if (size < 2 || size > 10)
      {
      fprintf(stderr,"srtprob: invalid size %d\n",
	 size);
      fprintf(stderr,"Valid size is 2 - 10\n");
      exit(1);
      } /* invalid size */
   p = (double *) pctlst;
   q = (double *) pctlst + 99;
   pct = 0.01;
   while (p < q)
      {
      *p++ = pct;
      pct += 0.01;
      } /* for each percent */
   pctptr = (double *) pctlst;
   pctlmt = 98;  /* 99 - 1 */
   pctend = (double *) pctlst + pctlmt;
   p = (double *) prob;
   q = (double *) prob + size - 1;
   *q = 1.0;
   while (p < q)
      {
      pctptr = (double *) pctlst + (int) rndnum(pctlmt,sd);
      *p++ = *pctptr;
      *pctptr = *pctend;
      *pctend-- = 0.0;
      pctlmt--;
      } /* for each cumulative probability */
   /* bubble sort */
   resort = 1;
   while (resort)
      {
      resort = 0;
      p = (double *) prob;
      q = (double *) prob + size - 1;
      while (p < q)
         {
	 nxtp = (double *) p + 1;
         if (*p > *nxtp)
	    {
	    tmp = *p;
	    *p = *nxtp;
	    *nxtp = tmp;
	    resort = 1;
	    } /* out of order */
	 p++;
         } /* for each cumulative probability */
      } /* for each pass of bubble sort */
   /* Convert cumulative probabilities to */
   /* probabilities. */
   p = (double *) prob;
   q = (double *) prob + size;
   cumprob = *p++;
   while (p < q)
      {
      *p = *p - cumprob;
      cumprob += *p++;
      } /* for each probability in the array */
   } /* bldprob */

int main(argc,argv)
int argc;
char **argv;
   {
   int i,size,cols,rows;
   int rowtot;
   int mtxtot;
   int currrow;
   int mtxsize;
   int degf;
   int tstrow;
   int tstcol;
   int *mtx;
   int *mtxptr;
   int *p,*q;
   double totprob;
   double cumprob;
   double *rowlst;
   double *rowlstptr;
   double *rowlstend;
   double *collst;
   double *collstptr;
   double *collstend;
   double *continmtx;
   double *continmtxptr;
   double *expmtx;
   double *expmtxptr;
   double tstprob;
   double expect;
   double diff;
   double diffsq;
   double dblsz;
   double dblobs;
   double popestx;
   double musq;
   double popestxsqd;
   double popvar;
   double popstdev;
   double obsestx;
   double obsestxsqd;
   double obsvar;
   double obsstdev;
   double dblmtxsz;
   double chistdev;
   double t;
   double negtblv,postblv;
   if (argc != 4) putstx(*argv);
   size = atoi(*(argv+1));
   if (size < 1000 || size > 100000)
      {
      fprintf(stderr,"Invalid size %s.\n",
	 *(argv+1));
      putstx(*argv);
      } /* bad size */
   dblsz = (double) size;
   cols = atoi(*(argv+2));
   if (cols < 2)
      {
      fprintf(stderr,"Columns is too small.\n");
      putstx(*argv);
      } /* not enough cols */
   if (cols > 9)
      {
      fprintf(stderr,"Columns is too large.\n");
      putstx(*argv);
      } /* if cols too large */
   rows = atoi(*(argv+3));
   if (rows < 2)
      {
      fprintf(stderr,"Rows is too small.\n");
      putstx(*argv);
      } /* not enough rows */
   if (rows > 9)
      {
      fprintf(stderr,"Rows is too large.\n");
      putstx(*argv);
      } /* if rows too large */
   if (size < cols * rows * 10)
      {
      fprintf(stderr,"Size is too small.\n");
      putstx(*argv);
      } /* if size too small */
   sd = (unsigned char *) rndinit();
   if (sd == NULL)
      {
      fprintf(stderr,"depend: out of memory "
	 "allocating sd.\n");
      exit(1);
      } /* out of mem */
   mtxsize = cols * rows;
   dblmtxsz = (double) mtxsize;
   /* contingency table of observed frequencies */
   mtx = (int *) malloc((mtxsize * sizeof(int)) + 128);
   if (mtx == NULL)
      {
      fprintf(stderr,"depend: out of memory "
	 "allocating table of observed frequencies.\n");
      exit(1);
      } /* out of memory */
   /* table of contingent probabilities */
   continmtx = (double *) malloc(mtxsize * sizeof(double));
   if (continmtx == NULL)
      {
      fprintf(stderr,"depend: out of memory "
	 "allocating contingency probability matrix.\n");
      exit(1);
      } /* out of memory */
   /* row probability array */
   rowlst = (double *) malloc(rows * sizeof(double));
   if (rowlst == NULL)
      {
      fprintf(stderr,"depend: out of memory "
	 "allocating row probability array.\n");
      exit(1);
      } /* out of memory */
   /* column probability array */
   collst = (double *) malloc(cols * sizeof(double));
   if (collst == NULL)
      {
      fprintf(stderr,"depend: out of memory "
	 "allocating column probability array.\n");
      exit(1);
      } /* out of memory */
   /* contingency table of expected frequencies */
   /* each row in the table sums to the row probability */
   expmtx = (double *) malloc(mtxsize * sizeof(double));
   if (expmtx == NULL)
      {
      fprintf(stderr,"depend: out of memory "
	 "allocating table of expected frequencies.\n");
      exit(1);
      } /* out of memory */
   bldprob(rows,rowlst,sd);
   printf("             Row Probabilities\n");
   i = 0;
   while (i < rows)
      {
      printf("Row %2d ", ++i);
      } /* for each row */
   printf("  Total\n");
   totprob = 0.0;
   rowlstptr = (double *) rowlst;
   rowlstend = (double *) rowlst + rows;
   while (rowlstptr < rowlstend)
      {
      printf("%6.4f ", *rowlstptr);
      totprob += *rowlstptr++;
      } /* for each row probability */
   printf("  %4.2f\n\n", totprob);
   continmtxptr = (double *) continmtx;
   rowlstptr = (double *) rowlst;
   rowlstend = (double *) rowlst + rows;
   while (rowlstptr < rowlstend)
      {
      bldprob(cols,continmtxptr,sd);
      continmtxptr = (double *) continmtxptr + cols;
      rowlstptr++;
      } /* for each row in contingency table */
   printf("             Contingent Probabilities\n");
   printf(" Row   ");
   i = 0;
   while (i < rows)
      {
      printf("Col %2d ", ++i);
      } /* for each row */
   printf("  Total\n");
   currrow = 0;
   continmtxptr = (double *) continmtx;
   rowlstptr = (double *) rowlst;
   rowlstend = (double *) rowlst + rows;
   while (rowlstptr < rowlstend)
      {
      totprob = 0.0;
      printf("%3d    ", ++currrow);
      i = cols;
      while (i--)
	 {
         printf("%6.4f ", *continmtxptr);
         totprob += *continmtxptr;
         continmtxptr++;
	 } /* for each column */
      printf("  %4.2f\n", totprob);
      rowlstptr++;
      } /* for each row in contingency table */
   printf("\n");
   expmtxptr    = (double *) expmtx;
   continmtxptr = (double *) continmtx;
   rowlstptr = (double *) rowlst;
   rowlstend = (double *) rowlst + rows;
   while (rowlstptr < rowlstend)
      {
      i = cols;
      while (i--)
	 {
         *expmtxptr++ = (*continmtxptr++) * (*rowlstptr);
	 } /* for each column in row */
      rowlstptr++;
      } /* for each row in contingency table */
   printf("             Expectant  Probabilities\n");
   printf(" Row   ");
   i = 0;
   while (i < rows)
      {
      printf("Col %2d ", ++i);
      } /* for each row */
   printf("  Total\n");
   popestx = popestxsqd = 0.0;
   currrow = 0;
   expmtxptr = (double *) expmtx;
   rowlstptr = (double *) rowlst;
   rowlstend = (double *) rowlst + rows;
   while (rowlstptr < rowlstend)
      {
      printf("%3d    ", ++currrow);
      totprob = 0.0;
      i = cols;
      while (i--)
	 {
	 printf("%6.4f ", *expmtxptr);
	 totprob += *expmtxptr;
	 expect = *expmtxptr * dblsz;
	 popestx    += (expect * *expmtxptr);
	 popestxsqd += (expect * expect * *expmtxptr);
	 expmtxptr++;
	 } /* for each column in row */
      printf("  %6.4f\n", totprob);
      rowlstptr++;
      } /* for each row in expectancy table */
   printf("\n");
   musq = popestx * popestx;
   popvar = popestxsqd - musq;
   popstdev = sqrt(popvar);
   collstptr = (double *) collst;
   collstend = (double *) collst + cols;
   while (collstptr < collstend)
      *collstptr++ = 0.0;
   expmtxptr = (double *) expmtx;
   rowlstptr = (double *) rowlst;
   rowlstend = (double *) rowlst + rows;
   while (rowlstptr < rowlstend)
      {
      collstptr = (double *) collst;
      i = cols;
      while (i--)
	 {
	 *collstptr++ += *expmtxptr++;
	 } /* for each column in row */
      rowlstptr++;
      } /* for each row in contingency table */
   printf("Totals ");
   collstptr = (double *) collst;
   collstend = (double *) collst + cols;
   totprob = 0.0;
   while (collstptr < collstend)
      {
      printf("%6.4f ", *collstptr);
      totprob += *collstptr++;
      } /* for each column in row */
   printf("  %6.4f\n", totprob);
   p = mtx;
   q = p + mtxsize;
   while (p < q) *p++ = 0;
   i = 0;
   while (i++ < size)
      {
      tstprob = rndfrac(sd);
      tstrow = 0;
      rowlstptr = (double *) rowlst;
      cumprob = *rowlstptr;
      while (tstprob > cumprob)
	 {
	 tstrow++;
	 rowlstptr++;
	 cumprob += *rowlstptr;
	 } /* for each row */
      tstprob = rndfrac(sd);
      tstcol = 0;
      continmtxptr = (double *) continmtx + (tstrow * cols);
      cumprob = *continmtxptr;
      while (tstprob > cumprob)
	 {
	 tstcol++;
	 continmtxptr++;
	 cumprob += *continmtxptr;
	 } /* for each row */
      p = mtx + (tstrow * cols) + tstcol;
      (*p)++;
      } /* generate joint probability matrix */
   printf("\n");
   printf("             Observed   Frequencies\n");
   printf(" Row   ");
   i = 0;
   while (i < rows)
      {
      printf("Col %2d ", ++i);
      } /* for each row */
   printf("  Total\n");
   mtxtot = currrow = 0;
   mtxptr = (int *) mtx;
   rowlstptr = (double *) rowlst;
   rowlstend = (double *) rowlst + rows;
   while (rowlstptr < rowlstend)
      {
      printf("%3d    ", ++currrow);
      rowtot = 0;
      i = cols;
      while (i--)
	 {
	 printf("%6d ", *mtxptr);
	 rowtot += *mtxptr++;
	 } /* for each column in row */
      printf("%7d\n", rowtot);
      mtxtot += rowtot;
      rowlstptr++;
      } /* for each row in expectancy table */
   printf("\n");
   printf("Observed Total %d\n", mtxtot);
   printf("\n");
   t = 0.0;
   obsestx = obsestxsqd = obsvar = 0.0;
   expmtxptr = (double *) expmtx;
   p = mtx;
   q = mtx + mtxsize;
   while (p < q)
      {
      dblobs = (double) *p;
      expect = *expmtxptr * dblsz;
      if (expect != 0.0)
	 {
         diff   = expect - dblobs;
         diffsq = diff * diff;
         t     += (diffsq / expect);
	 } /* if not divide by zero */
      obsestx    += (dblobs * *expmtxptr);
      obsestxsqd += (dblobs * dblobs * *expmtxptr);
      expmtxptr++;
      p++;
      } /* calc chi sq */
   musq = obsestx * obsestx;
   obsvar = obsestxsqd - musq;
   obsstdev = sqrt(obsvar);
   printf("                 Mean "
      "          Stdev\n");
   printf("Population %14.6f %14.6f\n",
      popestx, popstdev);
   printf("Observed   %14.6f %14.6f\n",
      obsestx, obsstdev);
   printf("\n");
   chistdev = (dblmtxsz - 1.0) * obsvar / popvar;
   printf("Chi-square %f   (Variance)\n",
      chistdev);
   printf("Chi-square %f   (Goodness of Fit)\n", t);
   degf = (cols-1) * (rows-1);
   chirange(degf,&negtblv,&postblv);
   printf("Range at 95 percent: %f  to  %f\n",
      negtblv, postblv);
   free(sd);
   free(mtx);
   return(0);
   } /* main */
