/*********************************************************************/
/*        LINEAR TREE for Supervised Learning                        */
/*        Versao 1.0 (10/12/1997)                                    */
/*        Developed by: Joao Gama                                    */
/*                LIACC - Uni.do Porto                               */
/*                jgama@ncc.up.pt                                    */
/*-------------------------------------------------------------------*/
/*   FILE:prune.c                                                    */  
/*********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "Ci_instances.h"
#include "tree.h"
#include "externs.i"

static double PruneTree(DomainInfo *domain, Tree *tree, int (Func)(DomainInfo *, void *, double **), double *nr_erros);
static double AddErrs1(double N, double e);
static double AddErrs(double N, double e);
static float Val[] = {  0,  0.001, 0.005, 0.01, 0.05, 0.10, 0.20, 0.40, 1.00};
static float Dev[] = {100,  3.09,  2.58,  2.33, 1.65, 1.28, 0.84, 0.25, 0.00};

TreeInfo *Prune_Tree\
(DomainInfo *domain, Tree *tree, int (Func)(DomainInfo *, void *, double **), double *erros)
{
  int depth = 0;
  double nr_erros=0;
  TreeInfo *treeinfo;

  PruneTree(domain, tree, Func, &nr_erros);
  treeinfo = Recompute_Nodes(tree, &depth);
  ERRORS(treeinfo) = nr_erros;
  DEPTH(treeinfo) = depth;
  return treeinfo;
}

static double PruneTree\
(DomainInfo *domain, Tree *tree, int (Func)(DomainInfo *, void *, double **), double *nr_erros)
{
  register int i, classe;
  double pessi_error = 0.0, estimated_error = 0.0, *distrib;
  double node_errors, leaf_errors = 0.0, sum_leaf_errors = 0.0;

  classe = Func(domain, tree->data, &distrib);
  node_errors = distrib[0] - distrib[classe];
  if (distrib[0])
    estimated_error = node_errors + AddErrs1(distrib[0], node_errors);
  else
    estimated_error = 0.5;  
  switch(tree->tipo) {
  case leaf:
    *nr_erros = node_errors;
    return estimated_error;
    break;
  case split_discrete:
    sum_leaf_errors = 0.0;
    for(i = 1; i <= NR_DESCENDENTS(tree); i++) {
      pessi_error += PruneTree(domain, DESCENDENT(tree, i), Func, &leaf_errors);
      sum_leaf_errors += leaf_errors;
    }
    break;
  case split_continuous:
  case split_linear:
    pessi_error += PruneTree(domain, DESCENDENT(tree, 1), Func, &leaf_errors);
    sum_leaf_errors = leaf_errors;
    pessi_error += PruneTree(domain, DESCENDENT(tree, 2), Func, &leaf_errors);
    sum_leaf_errors += leaf_errors;
    break;
  }

  *nr_erros = sum_leaf_errors;
  if ((estimated_error <= pessi_error) || (node_errors <= sum_leaf_errors)) {
    tree->tipo = leaf;
    DESCENDENTS(tree) = NULL;
    NR_DESCENDENTS(tree) = 0;
    *nr_erros = node_errors;
    return estimated_error;
  }
  return pessi_error;
}

static double AddErrs1(double N, double e)
{
  double estimate = 0.0;

  estimate = AddErrs(N, e);
  if (e > N / 2.0)
    estimate += AddErrs(N, N / 2.0);
  return estimate;
}
/* ==============================================
   Based on C4.5 Error estimator Routine
   ===============================================*/
static double AddErrs(double N, double e)
{
  register int i;
  static double Coeff=0;
  double Val0, Pr;
  
  if ( ! Coeff ) {
    i = 0;
    while ( CF > Val[i] ) i++;
    Coeff = Dev[i-1] +
      (Dev[i] - Dev[i-1]) * (CF - Val[i-1]) /(Val[i] - Val[i-1]);
    Coeff = Coeff * Coeff;
  }
  
  if ( e < 1E-6 )    {
    return N * (1 - exp(log(CF) / N));
  }
  else
    if ( e < 0.9999 )  {
      Val0 = N * (1 - exp(log(CF) / N));
      return Val0 + e * (AddErrs(N, 1.0) - Val0);
    }
    else
      if ( e + 0.5 >= N )  
        return 0.67 * (N - e);
      else    {
        Pr = (e + 0.5 + Coeff/2
              + sqrt(Coeff * ((e + 0.5) * (1 - (e + 0.5)/N) + Coeff/4)) )
          / (N + Coeff);
        return (N * Pr - e);
      }
}






