/*
pctdiff.c Version 1.4.0 - Difference between 2 proportions
Copyright (C) 2005  dondalah@ripco.com (Dondalah)

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to:

	Free Software Foundation, Inc.
	59 Temple Place - Suite 330
	Boston, MA  02111-1307, USA.
*/

/* This test is based on: */
/* Lincoln L. Chao */
/* Statistics for Management */
/* Palo Alto, CA: The Scientific Press, 1984 */
/* Chapter 10, Hypothesis Testing */
/* Section 5, Testing for the difference between two proportions */

/* Lincoln L. Chao was at California State University, */
/* Long Beach, CA, while writing this book. */

/* Sample test: */

/* pctdiff -a 0.6 -b 0.5 -c 95.0 -m 100 -n 400 -2 */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "rnd.h"

#define LOWER 1
#define UPPER 2
#define ONETAIL 1
#define TWOTAIL 2

void putstx(pgm)
char *pgm;
   {
   fprintf(stderr,"Usage: %s options\n",
      pgm);
   fprintf(stderr,"Options:\n");
   fprintf(stderr,"-c cc.c = confidence level\n");
   fprintf(stderr,"   where cc.c is 90.0, 95.0, "
      "98.0, 99.0, 99.5, or 99.9\n");
   fprintf(stderr,"   default = 95.0\n");
   fprintf(stderr,"-m zzzzzz = size of sample 1\n");
   fprintf(stderr,"   where zzzzzz is 30 to 1000000000\n");
   fprintf(stderr,"   default = 100\n");
   fprintf(stderr,"-n zzzzzz = size of sample 2\n");
   fprintf(stderr,"   where zzzzzz is 30 to 1000000000\n");
   fprintf(stderr,"   default = 100\n");
   fprintf(stderr,"-a n.n = probability of success in sample 1\n");
   fprintf(stderr,"   where n.n = 0.001 - 0.999\n");
   fprintf(stderr,"   default = 0.5\n");
   fprintf(stderr,"-b n.n = probability of success in sample 2\n");
   fprintf(stderr,"   where n.n = 0.001 - 0.999\n");
   fprintf(stderr,"   default = 0.5\n");
   fprintf(stderr,"-1 = one tail test\n");
   fprintf(stderr,"-2 = two tail test\n");
   fprintf(stderr,"-t xx = one tail test\n");
   fprintf(stderr,"   where xx = lt (lower tail) "
      "or ut (upper tail)\n");
   fprintf(stderr,"   default = lt\n");
   exit(1);
   } /* putstx */

int main(argc,argv)
int argc;
char **argv;
   {
   int i;
   int str_len;
   int size_1;
   int size_2;
   int rslt;
   int rslt2;
   int rslt3;
   int tail;
   int side;
   int obs_1;
   int obs_2;
   char tailname[64];
   char pass_msg[64];
   double confidence;
   double zval;
   double zscore;
   double popmu_1;
   double popmu_2;
   double dblobs_1;
   double dblobs_2;
   double prob_1;
   double prob_2;
   double q_1;
   double q_2;
   double std_err;
   double quot_1;
   double quot_2;
   double diff_obs;
   double diff_pop;
   double net_diff;
   double dblsz_1;
   double dblsz_2;
   double portion_1;
   double portion_2;
   size_1  = 100;
   size_2  = 100;
   dblsz_1 = (double) size_1;
   dblsz_2 = (double) size_2;
   confidence = 95.0;
   zval    = 1.96;
   tail = 2;
   side = 0;
   prob_1 = 0.5;
   prob_2 = 0.5;
   if (argc == 1) putstx(*argv);
   i = 1;
   while (i < argc)
      {
      rslt = strcmp(*(argv+i),"-h");
      if (!rslt) putstx(*argv);
      rslt = strcmp(*(argv+i),"-1");
      if (!rslt)
	 {
	 tail = ONETAIL;
	 i++;
	 continue;
	 } /* if one tail test */
      rslt = strcmp(*(argv+i),"-2");
      if (!rslt)
	 {
	 tail = TWOTAIL;
	 i++;
	 continue;
	 } /* if two tail test */
      rslt = strcmp(*(argv+i),"-t");
      if (!rslt)
	 {
	 i++;
	 if (i >= argc)
	    {
	    fprintf(stderr,"Missing lt or ut "
	       "for one tail test\n");
	    putstx(*argv);
	    } /* if out of arguments */
	 str_len = strlen(*(argv+i));
	 if (str_len != 2)
            {
            fprintf(stderr,"Invalid tail -t "
	       "length for one tail test\n");
            putstx(*argv);
            } /* bad upper/lower */
         strcpy(tailname,*(argv+i));
         rslt2 = strcmp(tailname,"lt");
         rslt3 = strcmp(tailname,"ut");
         if (!rslt2) side = LOWER;
         else if (!rslt3) side = UPPER;
         else
            {
            fprintf(stderr,"Invalid tail -t "
	       "for one tail test\n");
            putstx(*argv);
            } /* bad upper/lower */
	 i++;
	 continue;
	 } /* if one tail test lt or ut */
      rslt = strcmp(*(argv+i),"-m");
      if (!rslt)
	 {
	 i++;
	 if (i >= argc)
	    {
	    fprintf(stderr,"Missing size for sample 1\n");
	    putstx(*argv);
	    } /* if out of arguments */
         size_1 = atoi(*(argv+i));
         if (size_1 < 30 || size_1 > 1000000000)
            {
            fprintf(stderr,"Invalid size for sample 1\n");
            putstx(*argv);
            } /* bad size_1 */
         dblsz_1 = (double) size_1;
	 i++;
	 continue;
	 } /* if size_1 */
      rslt = strcmp(*(argv+i),"-n");
      if (!rslt)
	 {
	 i++;
	 if (i >= argc)
	    {
	    fprintf(stderr,"Missing size for sample 2\n");
	    putstx(*argv);
	    } /* if out of arguments */
         size_2 = atoi(*(argv+i));
         if (size_2 < 30 || size_2 > 1000000000)
            {
            fprintf(stderr,"Invalid size for sample 2\n");
            putstx(*argv);
            } /* bad size_2 */
         dblsz_2 = (double) size_2;
	 i++;
	 continue;
	 } /* if size_2 */
      rslt = strcmp(*(argv+i),"-c");
      if (!rslt)
	 {
	 i++;
	 if (i >= argc)
	    {
	    fprintf(stderr,"Missing confidence level\n");
	    putstx(*argv);
	    } /* if out of arguments */
         confidence = atof(*(argv+i));
         if (confidence == 90.0);
         else if (confidence == 95.0);
         else if (confidence == 98.0);
         else if (confidence == 99.0);
         else if (confidence == 99.5);
         else if (confidence == 99.9);
	 else
            {
            fprintf(stderr,"Invalid confidence level.\n");
            putstx(*argv);
            } /* bad confidence */
	 i++;
	 continue;
	 } /* if confidence level */
      rslt = strcmp(*(argv+i),"-a");
      if (!rslt)
	 {
	 i++;
	 if (i >= argc)
	    {
	    fprintf(stderr,"Missing probability for sample 1\n");
	    putstx(*argv);
	    } /* if out of arguments */
         prob_1 = atof(*(argv+i));
         if (prob_1 < 0.001 || prob_1 > 0.999)
            {
            fprintf(stderr,"Invalid probability for sample 1\n");
            putstx(*argv);
            } /* if invalid prob_1 */
	 i++;
	 continue;
	 } /* if probability for sample 1 */
      rslt = strcmp(*(argv+i),"-b");
      if (!rslt)
	 {
	 i++;
	 if (i >= argc)
	    {
	    fprintf(stderr,"Missing probability for sample 2\n");
	    putstx(*argv);
	    } /* if out of arguments */
         prob_2 = atof(*(argv+i));
         if (prob_2 < 0.001 || prob_2 > 0.999)
            {
            fprintf(stderr,"Invalid probability for sample 2\n");
            putstx(*argv);
            } /* if invalid prob_2 */
	 i++;
	 continue;
	 } /* if probability for sample 2 */
      fprintf(stderr,"Invalid option %s.\n",
	 *(argv+i));
      putstx(*argv);
      } /* for each argument */
   if (tail == ONETAIL)
      {
      if (confidence == 90.0)
          zval = 1.282;
      else if (confidence == 95.0)
          zval = 1.645;
      else if (confidence == 98.0)
          zval = 1.96;
      else if (confidence == 99.0)
          zval = 2.326;
      else if (confidence == 99.5)
          zval = 2.575;
      else if (confidence == 99.9)
          zval = 3.090;
      } /* if one tail test */
   else if (tail == TWOTAIL)
      {
      if (confidence == 90.0)
          zval = 1.645;
      else if (confidence == 95.0)
          zval = 1.96;
      else if (confidence == 98.0)
          zval = 2.326;
      else if (confidence == 99.0)
          zval = 2.575;
      else if (confidence == 99.5)
          zval = 3.090;
      else if (confidence == 99.9)
          zval = 3.291;
      if (side)
         {
         fprintf(stderr,"-t used with -2\n");
         putstx(*argv);
         } /* invalid -t with -2 */
      } /* if two tail test */
   else
      {
      fprintf(stderr,"Logic error 001\n");
      putstx(*argv);
      } /* invalid tail */
   popmu_1 = prob_1 * dblsz_1;
   if (popmu_1 < 30.0)
      {
      fprintf(stderr,"Increase -m size\n");
      putstx(*argv);
      } /* invalid size_1 */
   popmu_2 = prob_2 * dblsz_2;
   if (popmu_2 < 30.0)
      {
      fprintf(stderr,"Increase -n size\n");
      putstx(*argv);
      } /* invalid size_2 */
   if (tail == ONETAIL)
      {
      if (side == LOWER || side == UPPER);
      else
	 {
         fprintf(stderr,"One tail test: "
            "missing the -t parameter\n");
         putstx(*argv);
         } /* if missing -t */
      } /* if one tail */
   sd = (unsigned char *) rndinit();
   if (sd == NULL)
      {
      fprintf(stderr,"pctdiff: out of memory "
	 "allocating sd.\n");
      exit(1);
      } /* out of mem */
   obs_1 = obs_2 = 0;
   i = size_1;
   while (i--)
      {
      if (rndfrac(sd) < prob_1) obs_1++;
      } /* generate observations for sample 1 */
   i = size_2;
   while (i--)
      {
      if (rndfrac(sd) < prob_2) obs_2++;
      } /* generate observations for sample 2 */
   dblobs_1 = (double) obs_1;
   dblobs_2 = (double) obs_2;
   portion_1 = dblobs_1 / dblsz_1;
   portion_2 = dblobs_2 / dblsz_2;
   q_1    = 1.0 - portion_1;
   q_2    = 1.0 - portion_2;
   quot_1 = portion_1 * q_1 / dblsz_1;
   quot_2 = portion_2 * q_2 / dblsz_2;
   std_err = sqrt(quot_1 + quot_2);
   diff_obs  = portion_1 - portion_2;
   diff_pop = prob_1 - prob_2;
   net_diff = diff_obs - diff_pop;
   zscore = net_diff / std_err;

   printf("Difference between two proportions:\n\n");
   printf("               Observations "
      "    Probability "
      "    Sample Size\n");
   printf("Sample 1   %15.6f %15.6f %12.0f\n",
      portion_1, prob_1, dblsz_1);
   printf("Sample 2   %15.6f %15.6f %12.0f\n",
      portion_2, prob_2, dblsz_2);
   printf("Difference %15.6f %15.6f\n\n",
      diff_obs, diff_pop);
   printf("   Net Difference "
      "  Standard Error "
      "     Z Score "
      "     Confidence Level\n");
   printf("%15.6f %15.6f %15.6f %15.3f\n\n",
      net_diff, std_err, zscore, confidence);
   if (tail == ONETAIL)
      {
      if (side == LOWER)
	 {
	 printf("Reject H(0) if Z <= %6.3f\n",
	    -zval);
	 if (zscore < -zval)
	    strcpy(pass_msg,"Reject H(0)");
	 else
	    strcpy(pass_msg,"Do not reject H(0)");
	 } /* if lower tail */
      else if (side == UPPER)
	 {
	 printf("Reject H(0) if Z >= %6.3f\n",
	    zval);
	 if (zscore > zval)
	    strcpy(pass_msg,"Reject H(0)");
	 else
	    strcpy(pass_msg,"Do not reject H(0)");
	 } /* if upper tail */
      else
	 {
	 fprintf(stderr,"Logic error 002\n");
         exit(1);
	 } /* if missing -t parm */
      } /* if one tail test */
   else if (tail == TWOTAIL)
      {
      printf("Reject H(0) if Z <= %6.3f "
         "or Z >= %6.3f\n", -zval, zval);
      if (zscore > zval || zscore < -zval)
         strcpy(pass_msg,"Reject H(0)");
      else
         strcpy(pass_msg,"Do not reject H(0)");
      } /* if two tail test */
   printf("%s\n", pass_msg);
   return(0);
   } /* main */
