unit ClassifyAccuracy;

interface

uses
  Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms,
  Dialogs, Basic2, StdCtrls, Spin, Classifier, DB, DBTables, DBClassifier,
  Grids, ComCtrls, ExtCtrls;       


type
  TAccuracyForm = class(TBasicForm2)
    ResultGrid: TStringGrid;
    DBLC: TDBClassifier;
    DataTable: TTable;
    KNN: TKNearestNeighbors;
    NB: TNaiveBayes;
    LC: TLinearClassifier;
    Label1: TLabel;
    RunsEdit: TSpinEdit;
    Label4: TLabel;
    KNeighborsEdit: TSpinEdit;
    PruningBox: TCheckBox;
    TableLabel: TLabel;
    SplitLabel: TLabel;
    RunButton: TButton;
    procedure KNeighborsEditChange(Sender: TObject);
    procedure FormCreate(Sender: TObject);
    procedure RunButtonClick(Sender: TObject);
    procedure FormDestroy(Sender: TObject);
  private
    procedure RunTest(Index: integer);
    { Private declarations }
  public
    { Public declarations }
    TableNames: TStringList;
  end;

var
  AccuracyForm: TAccuracyForm;

implementation

{$R *.dfm}

procedure TAccuracyForm.KNeighborsEditChange(Sender: TObject);
begin
  KNN.KNeighbors := KNeighborsEdit.Value;
end;

procedure TAccuracyForm.FormCreate(Sender: TObject);
var ADir, AName: string;
    i: integer;
begin
    inherited;
    ADir := ExtractFileDir(Application.ExeName)+'\Data';
    AName := 'DewAIDataD';
    TableNames := TStringList.Create;
    DataTable.Close;
    Session.GetAliasNames(TableNames);
    TableNames.Sort;    
    if not TableNames.Find(AName,i) then Session.AddStandardAlias(AName,ADir, 'PARADOX');
    DataTable.DatabaseName := AName;
    Session.GetTableNames(AName,'*.DB',True,false, TableNames);

    With RichEdit1.Lines, RichEdit1 do
    begin
      Clear;
      Add('When you press the learn button an up to 30 cross-run classification tests will be run. ' +
          'K-Neighbours edit box specifies the k-nearest neighbours to be used for classification by the k-NN algorithm. ' +
          'You can compare the obtained results with the "Reference results", because they were run on the same ' +
          'data with the same test method. Each table is split up to 30 times in to a learn and test dataset. ' +
          'Classification accuracy is measured for each split and finally mean and standard deviation over all splits are estimated. ' +
          'This splits are repeated for each classification algorithm. ' +
          'If you check the Prune box, the algorithms will first try to determine the best ' +
          'attributes and only those will be then be used for classification. ' +
          'Examples count denotes the number of examples used for learning. ' +
          'Prior probability gives the percentage of examples belonging to the majority class. ' +
          'Best attribute is the classification success, if the algorithm would be looking ' +
          'at only one attribute, which separates the classes best.');
    end;
    TButton(Self.Owner.FindComponent('btnHelp')).Enabled := false;
end;

function ComputeMean(const a: array of double): double;
var i: integer;
begin
     Result := 0;
     for i := 0 to Length(a)-1 do  Result := Result + a[i];
     Result := Result/Length(a);
end;

function ComputeStdDev(const a: array of double; Mean: double): double;
var i: integer;
begin
     Result := 0;
     for i := 0 to Length(a)-1 do  Result := Result + Sqr(a[i]-Mean);
     if Length(a) > 1 then Result := Sqrt(Result/(Length(a)-1))
                      else Result := 0;
end;

procedure TAccuracyForm.RunTest(Index: integer);
var  i,j,k,Runs: integer;
     a,b,c,d: array of double;
     TimeMark: TDateTime;
begin
    Runs := RunsEdit.Value;
    SetLength(a,Runs);
    SetLength(b,Runs);
    SetLength(c,Runs);
    SetLength(d,Runs);
    TimeMark := Now;
    LC.UsePriorProbability := True;
    for i := 0 to TableNames.Count-1 do
    begin
         if TableNames[i] = 'Test1.DB' then Continue;
//         if NoSatiBox.Checked and (TableNames[i] = 'SATI.DB') then Continue;
         DataTable.TableName := TableNames[i];
         DataTable.Open;
         TableLabel.Caption := 'Current database table: ' + TableNames[i];
         Update;
         DBLC.Clear;
         if PruningBox.Checked and not (DataTable.TableName = 'SATI.DB') then
         begin
             SplitLabel.Caption := 'Optimizing...';
             Update;
             LC.UsePriorProbability := True;
             DataTable.Filtered := False;
             DBLC.LearnData;
             DBLC.Classifier.Prune;
         end;
         if Runs = 1 then DataTable.Filtered := False else DataTable.Filtered := True;
         for j := 0 to Runs-1 do
         begin
             DataTable.Filter := '[Split' + IntToStr(j) + '] = 0';   {Learn examples}
             SplitLabel.Caption := 'Split number: ' + IntToStr(j);
             Update;
             DBLC.Classifier.SoftReset;
             DBLC.LearnData;
             DataTable.Filter := '[Split' + IntToStr(j) + '] = 1';   {Test examples}
             a[j] := DBLC.ClassifyTest;
             b[j] := DataTable.RecordCount;
             if DBLC.Classifier = NB then c[j] := NB.MaxPriorProbability(k);
             if DBLC.Classifier = LC then d[j] := LC.MaxEntropy(k);
         end;
         ResultGrid.Cells[0,i+1] := TableNames[i];
         ResultGrid.Cells[Index,  i+1] := FormatFloat('0.00',ComputeMean(a)) + '+/-' + FormatFloat('0.00',ComputeStdDev(a,ComputeMean(a)));
         ResultGrid.Cells[4, i+1] := FormatFloat('0.00',ComputeMean(b)) + '+/-' + FormatFloat('0.00',ComputeStdDev(b,ComputeMean(b)));
         if DBLC.Classifier = NB then
         ResultGrid.Cells[5, i+1] := FormatFloat('0.00',ComputeMean(c)) + '+/-' + FormatFloat('0.00',ComputeStdDev(c,ComputeMean(c)));
         if DBLC.Classifier = LC then
         ResultGrid.Cells[6, i+1] := FormatFloat('0.00',ComputeMean(d)) + '+/-' + FormatFloat('0.00',ComputeStdDev(d,ComputeMean(d)));
         DataTable.Close;
    end;
    TimeMark := Now-TimeMark;
    ResultGrid.Cells[0,TableNames.Count+1] := 'Time [min:sec]';
    ResultGrid.Cells[Index, TableNames.Count+1] := FormatDateTime('nn:ss',TimeMark);
end;

procedure TAccuracyForm.RunButtonClick(Sender: TObject);
begin
    TableNames.Sorted := True;

    DataTable.Close;
    DataTable.Filter := '';
    DataTable.Filtered := True;

    ResultGrid.Cells[4, 0] := 'Test examples';
    ResultGrid.Cells[5, 0] := 'Prior probability';
    ResultGrid.Cells[6, 0] := 'Best attr. succ.[%]';

    DBLC.Classifier := LC;
    ResultGrid.Cells[1,0] := 'LC Success [%]';
    RunTest(1);

    DBLC.Classifier := NB;  {Naive bayes can only run on discrete attributes}
    ResultGrid.Cells[2,0] := 'NB Success [%]';
    RunTest(2);

    DBLC.Classifier := KNN;
    ResultGrid.Cells[3,0] := 'KNN Success [%]';
    RunTest(3);
end;


procedure TAccuracyForm.FormDestroy(Sender: TObject);
begin
  TableNames.Destroy;
  inherited;
end;

initialization
   RegisterClass(TAccuracyForm);
finalization
   UnRegisterClass(TAccuracyForm);

end.
