unit BlockProcessing;

interface

uses
  Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms,
  Dialogs, Basic2, StdCtrls, ComCtrls, ExtCtrls, MtxVec, MtxComCtrls,Probabilities,
  Math387, Basic1, TeeProcs, TeEngine, Chart, Series;

type
  TfrmBlockProc = class(TBasicForm1)
    Button1: TButton;
    Label1: TLabel;
    Series1: TLineSeries;
    Series2: TLineSeries;
    Series3: TLineSeries;
    procedure FormCreate(Sender: TObject);
    procedure FormDestroy(Sender: TObject);
    procedure Button1Click(Sender: TObject);
  private
    { Private declarations }
    Res, X, tmp: TVec;
    Len: Integer;
    Counter: DWord;
    procedure MaxwellBlock(Iter: integer; var Result: double);
    procedure MaxwellNoBlock(Iter: integer; var Result: double);
    procedure MaxwellFunction(Iter: integer; var Result: double);
  public
    { Public declarations }
  end;

var
  frmBlockProc: TfrmBlockProc;

implementation

{$R *.dfm}

{ Block process }

procedure TfrmBlockProc.MaxwellBlock(Iter: integer; var Result: double);
var i: Integer;
    a: TSample;
begin
  Counter := GetTickCount;
  a := 1;
  for i := 1 to Iter do
  begin
    Res.BlockInit;
    X.BlockInit;
    while not X.BlockEnd do
    begin
      tmp.Sqr(X);
      Res.Copy(tmp);
      Res.Scale(-0.5*a);
      Res.Exp;
      Res.Mul(tmp);
      Res.Scale(Sqrt(4*INVTWOPI));
      Res.BlockNext;
      X.BlockNext;
    end;
  end;
  Result := GetTickCount - Counter;
end;

procedure TfrmBlockProc.FormCreate(Sender: TObject);
begin
  inherited;
  With RichEdit1.Lines do
  begin
    Clear;
    Add('Block processing enables you to adapt the length '
      + 'of the memory being accessed by your algorithm to '
      + 'approximately match the size of the CPU cache. If '
      + 'the memory at the same memory location is accessed '
      + 'more than once, the second access will about 3-6 '
      + 'times faster. The trick is to break long vectors '
      + 'down in to a series of short ones and process each '
      + 'block separately, thus always holding the entire memory '
      + 'block in the CPU cache and radically reducing the number'
      + 'of CPU cache misses. Block processing is demonstrated by '
      + 'timing three versions of the same function. The function '
      + 'being benchmarked is Maxwell PDF.');
    Add('');
    Add('Block length (size of the sub vector) is automatically set '
      + 'by MtxVec to match the CPU cache size and so is the number '
      + 'iterations required to process the entire vector length.');
  end;
  CreateIt(X,Res,tmp);
  X.Size(Len,False);
  Res.Size(X);
end;

procedure TfrmBlockProc.FormDestroy(Sender: TObject);
begin
  FreeIt(X,Res,tmp);
  inherited;
end;

procedure TfrmBlockProc.MaxwellFunction(Iter: integer; var Result: double);
var i,j: integer;
    a: TSample;
begin
 { Maxwell distribution, a = 1.0 }
  a := 1;
  Counter := GetTickCount;
  for i := 1 to Iter do
  begin
       for j := 0 to x.Length-1 do Res.Values[j] := MaxwellPdf(x.Values[j],a);
  end;
  Result := GetTickCount - Counter;
end;

procedure TfrmBlockProc.MaxwellNoBlock(Iter: integer; var Result: double);
var i: integer;
    a: TSample;
begin
  { Maxwell distribution, a = 1.0 }
  a := 1;
  Counter := GetTickCount;
  for i := 1 to Iter do
  begin
    tmp.Sqr(X);
    Res.Copy(tmp);
    Res.Scale(-0.5*a);
    Res.Exp;
    Res.Mul(tmp);
    Res.Scale(Sqrt(4*INVTWOPI));
  end;
  Result := GetTickCount - Counter;
end;

function FindMax(var a: array of double): double;
var i: integer;
begin
     Result := MINNUM;
     for i := 0 to High(a) do
     begin
          if a[i] > Result then Result := a[i];
     end;
end;

procedure TfrmBlockProc.Button1Click(Sender: TObject);
var a1,a2,a3: array [0..5] of double;
    i,Iters: integer;
begin
  Screen.Cursor := crHourGlass;
  X.Size(10, False);
  X.SetVal(2);
  Res.Size(X);
  Iters := 1000000;
  for i := 0 to 5 do
  begin
       MaxwellFunction(Iters,a1[i]);
       Label1.Caption := 'Progress: ' + FormatFloat('0',(i+0.33)/6*100) + '%';
       Update;              
       MaxwellNoBlock(Iters,a2[i]);
       Label1.Caption := 'Progress: ' + FormatFloat('0',(i+0.66)/6*100) + '%';
       Update;
       MaxwellBlock(Iters,a3[i]);
       Label1.Caption := 'Progress: ' + FormatFloat('0',(i+0.99)/6*100) + '%';
       Update;
       Iters := Iters div 10;
       X.Length := x.Length*10;
       Res.Size(X);
       X.SetVal(2);
  end;
  Label1.Caption := 'Progress: 100%';
  Chart1.LeftAxis.Automatic := false;
  for i := 0 to Chart1.SeriesCount-1 do Chart1.Series[i].Clear;
  for i := 0 to High(a1) do Chart1.Series[0].AddY(a1[i],FormatFloat('0',Power(10,(i+1))));
  for i := 0 to High(a2) do Chart1.Series[1].AddY(a2[i]);
  for i := 0 to High(a3) do Chart1.Series[2].AddY(a3[i]);
  Chart1.LeftAxis.Automatic := False;
  Chart1.LeftAxis.SetMinMax(0,1.1*Max(Max(FindMax(a1),FindMax(a2)),FindMax(a3)));
  Screen.Cursor := crDefault;
end;

initialization
  RegisterClass(TfrmBlockProc)
  
end.
