// file detection unit

unit filedet;

interface

const
  { file data types }
  data_unknown = 1;
  data_stream  = 2;


function IsFile(abuf: pointer; bufsize: longword; var fileext: string; var datatype: byte): boolean;


implementation

uses common, helpers, sysutils;


function IsFile(abuf: pointer; bufsize: longword; var fileext: string; var datatype: byte): boolean;
var
  CRC: word;

   procedure crc16_addbits(bitstring: longword; len: byte);
   const
     polynomial = $8005;
   var
     bitmask: longword;
   begin
     bitmask:=1 SHL (len-1);
     repeat
       if (NOT(crc AND $8000)) XOR (NOT(bitstring AND bitmask)) > 0 then
       begin
         crc:=crc SHL 1;
         crc:=crc XOR polynomial;
       end else crc:=crc SHL 1;
       bitmask := bitmask SHR 1;
     until (bitmask = 0);
   end;

const
  BitRates: array[0..5, 0..15] of word=
    { Version 2 and V2.5: L3,L2,L1 }
   ((0,  8, 16, 24, 32, 40, 48, 56, 64, 80, 96,112,128,144,160,0),
    (0,  8, 16, 24, 32, 40, 48, 56, 64, 80, 96,112,128,144,160,0),
    (0, 32, 48, 56, 64, 80, 96,112,128,144,160,176,192,224,256,0),

    { Version 1:  L3,L2,L1 }
    (0, 32, 40, 48, 56, 64, 80, 96,112,128,160,192,224,256,320,0),
    (0, 32, 48, 56, 64, 80, 96,112,128,160,192,224,256,320,384,0),
    (0, 32, 64, 96,128,160,192,224,256,288,320,352,384,416,448,0));

  // Sampling rate frequency index (values are in Hz)
  // for MPEG2.5, Reserved, MPEG2, MPEG1
  SampleRates: array[0..3, 0..3] of word =
     ((11025, 12000,  8000, 0),
      (    0,     0,     0, 0),
      (22050, 24000, 16000, 0),
      (44100, 48000, 32000, 0));

  // MPEG Audio version IDs
  MPEGIDVer25     =  0;
  MPEGIDReserved  =  1;
  MPEGIDVer2      =  2;
  MPEGIDVer1      =  3;

  // Layer descriptions
  LayerReserved   = 0;
  LayerIII        = 1;
  LayerII         = 2;
  LayerI          = 3;

  ChannelStereo    = 0;
  ChannelJointStereo = 1;
  ChannelDual        = 2;
  ChannelSingle      = 3;

var
  buf: pbytearray;
  bufpos: pbytearray;
  maxbufpos: pbytearray;
  i, j, row: integer;
  framecount: integer;
  hdr: longword;
  MPEGAudioVer: byte;
  Layer       : byte;
  ChannelMode : byte;
  SampleRate  : word;
  Padding     : byte;
  BitRateIdx, SampleRateIdx: byte;
  BitRate     : word;
  CRCProtected: boolean;
  MPEG_CRC: word;
  Emphasis    : byte;
  FrameLengthInBytes: word;
  invalid: boolean;
  s: shortstring;
  recordno: word;
  vers: byte;
  recorddata: pbytearray;
label fin;
begin
  result:=FALSE; datatype:=data_unknown;

  try
    longword(buf):=longword(abuf);
    // -------------------- BINARY coded files ------------------------------------
    // check for JPG...
    { SOI = Start Of Image = 'FFD8'  This marker must be present in any JPG file *once* at the beginning of the file. (Any JPG file starts with the sequence FFD8.)
      EOI = End Of Image = 'FFD9'    Similar to EOI: any JPG file ends with FFD9. }
    if (buf^[0] = $FF) AND (buf^[1]=$D8) then
    begin
      result:=TRUE; fileext:='JPG';
      goto fin;
    end;

    // check for MS-WORD...
   (*  // Word's File-Information-Block (FIB) structure...
    typedef struct _fib {
    short magicNumber;
    // Word 6.0: 0xA5DC
    // Word 7.0 (95): 0xA5DC
    // Word 8.0 (97): 0xA5EC

    short version;   // >= 101 for Word 6.0 and higher...
    // Word 6.0: 101
    // Word 7.0 (95): 104
    // Word 8.0 (97): 105
   } FIB, *LPFIB; *)
    if (pword(@buf^[0])^ = $A5DC)
      OR (pword(@buf^[0])^ = $A5DB)
      OR (pword(@buf^[0])^ = $A5EC)
      OR ((buf^[0] = $D0) AND (buf^[1]= $CF) AND (buf^[2]=$11) AND (buf^[3]=$E0)) then
    begin
      result:=TRUE; fileext:='DOC';
      goto fin;
    end;

    // check for BIFF...
    recordno:=pword(@buf^[0])^;
    if (recordno AND $00FF) = $0009 then   // BOF record (Beginning of file) ?
    begin
      vers:=recordno SHR 8;
      if (vers = $04) OR (vers = $02) OR (vers = $00) OR  (vers = $08) then  // BIFF 3,4,5,7,8 ?
      begin
        result:=TRUE; fileext:='XLS';               // BIFF => XLS file
        goto fin;
      end;
    end;


    // check for BMP...
    if (buf^[0] = $42) AND (buf^[1]= $4D) then // the header is "BM"
    begin
      result:=TRUE; fileext:='BMP';
      goto fin;
    end;

    // check for GIF...
    if (buf^[0] = $47) AND (buf^[1]= $49) AND (buf^[2]=$46) AND (buf^[3]=$38)
      AND ((buf^[4]=$39) OR (buf^[4]=$37)) AND (buf^[4]=$61) then       // the header is "GIF89a" or "GIF87a"
    begin
      result:=TRUE; fileext:='BMP';
      goto fin;
    end;

    // check for TIFF...
    if ((buf^[0] = $49) AND (buf^[1]= $49))
      OR ((buf^[0]=$4D) AND (buf^[1]=$4D)) then       // the header is "4949" or "4D4D"
    begin
      result:=TRUE; fileext:='TIF';
      goto fin;
    end;

    // check for PNG...
    // magic: 137 80 78 71 13 10 26 10
    if (buf^[0] = 137) AND (buf^[1]= 80) AND (buf^[2]=78) AND (buf^[3]=71)
      AND (buf^[4]=13) AND (buf^[5]=10) AND (buf^[6]=26) AND (buf^[7]=10) then
    begin
      result:=TRUE; fileext:='PNG';
      goto fin;
    end;

    // check for RIFF...
    if IsDataEqual(@buf^[0], 'RIFF') then             // RIFF ID
    begin
      // RIFF file found...

      // check for CDR...
      if IsDataEqual(@buf^[8], 'CDR') then           // the format is "CDR"
      begin
        result:=TRUE; fileext:='CDR';
        goto fin;
      end;
      // check for WAVE...
      if IsDataEqual(@buf^[8], 'WAVE') then           // the format is "WAVE"
      begin
        result:=TRUE; fileext:='WAV';
        goto fin;
      end;
      // check for AVI...
      if IsDataEqual(@buf^[8], 'AVI ') then           // the format is "AVI "
      begin
        result:=TRUE; fileext:='AVI';
        goto fin;
      end;

      // else unknown RIFF...
      result:=TRUE; fileext:='RIFF';
      goto fin;
    end;

    // check for EXE...
    if (buf^[0] = $4D) AND (buf^[1]= $5A) then           // the header is "MZ"
    begin
      result:=TRUE; fileext:='EXE';
      goto fin;
    end;

    // check for DBF...
    if ((buf^[0] = $03) OR (buf^[0]= $83) OR (buf^[0]= $F5) OR (buf^[0]= $8B) OR (buf^[0]= $8E))  // dBase/FoxBase/FoxPro ?
      AND (buf^[12] = 0) AND (buf^[13] = 0)                     // res=0 ?
      AND (buf^[30] = 0) AND (buf^[31] = 0) then                // res=0 ?
    begin
      result:=TRUE; fileext:='DBF';
      goto fin;
    end;

    // check for HLP...
    if plong(@buf^[0])^ = $00035f3f then  // the magic is 0x00035f3f
    begin
      result:=TRUE; fileext:='HLP';
      goto fin;
    end;

    // check for TAR...
    if IsDataEqual(@buf^[257], 'ustar'+#0) then  // the magic is "ustar"+0
    begin
      result:=TRUE; fileext:='TAR';
      goto fin;
    end;

    // check for LZH...
    if IsDataEqual(@buf^[2], '-lh') OR IsDataEqual(@buf^[2], '-lz') then  // the magic is "-lh" or "-lz"
    begin
      result:=TRUE; fileext:='LZH';
      goto fin;
    end;

    // check for ZIP...
    if plong(@buf^[0])^ = $04034b50 then  // the local file header is (0x04034b50)
    begin
      result:=TRUE; fileext:='ZIP';
      goto fin;
    end;

    // check for ARJ...
    if pword(@buf^[0])^ = $EA60 then  // ARJ ID=EA60h
    begin
      result:=TRUE; fileext:='ARJ';
      goto fin;
    end;


    // check for RAR...
    // The marker block is actually considered as a fixed byte
    // sequence: 0x52 0x61 0x72 0x21 0x1a 0x07 0x00
    if (buf^[0] = $52) AND (buf^[1]= $61) AND (buf^[2]=$72) AND (buf^[3]=$21)
      AND (buf^[4]=$1a) AND (buf^[5]=$07) AND (buf^[6]=$00) then
    begin
      result:=TRUE; fileext:='RAR';
      goto fin;
    end;

    // check for PDF...
    // The first line of a PDF file specifies the version number of the PDF specification to
    // which the file adheres. The current version is 1.2; the first line of a 1.2-conforming
    // PDF file should be %PDF-1.2. However, 1.0-conforming files and 1.1-
    // conforming files are also 1.2-conforming files, so an application that understands
    // PDF 1.2 also accepts a file that begins with either %PDF-1.1 or %PDF-1.0.
    if (buf^[0] = $25) AND (buf^[1]= $50) AND (buf^[2]=$44) AND (buf^[3]=$46)
      AND (buf^[4]=$2D) AND (buf^[5]=$31) AND (buf^[6]=$2E)
      AND ((buf^[7]=$32) OR (buf^[7]=$31) OR (buf^[7]=$30))  then
    begin
      result:=TRUE; fileext:='PDF';
      goto fin;
    end;

    // check for MIDI...
    if IsDataEqual(@buf^[0], 'MThd') then             // head chunk
    begin
      result:=TRUE; fileext:='MID';
      goto fin;
    end;

    // check for QuickTime MOV...
    if IsDataEqual(@buf^[4], 'moov') then           // is basic atom type?
    begin
      result:=TRUE; fileext:='MOV';
      goto fin;
    end;



    // -------------------- TEXT coded files ------------------------------------
    // check for HTML...
    i:=0;
    while (i < bufsize-6) do
    begin
      if IsDataEqual(@buf^[i], '<HTML>') then   // HTML Tag found?
      begin
        result:=TRUE; fileext:='HTML';
        goto fin;
      end;
      inc(i);
    end;

    longword(bufpos):=longword(buf);
    maxbufpos:=pointer(longword(bufpos) + 32);
    repeat
      s:=GetNextTextLine(pointer(bufpos), pointer(maxbufpos));
      // check for DXF...
      // (find entries 0, SECTION, 2, HEADER)
      if pos('0', s) <> 0 then
      begin
        s:=GetNextTextLine(pointer(bufpos), pointer(maxbufpos));
        if s = 'SECTION' then
        begin
          s:=GetNextTextLine(pointer(bufpos), pointer(maxbufpos));
          if pos('2', s) <> 0 then
          begin
            s:=GetNextTextLine(pointer(bufpos), pointer(maxbufpos));
            if s = 'HEADER' then
            begin
              result:=TRUE; fileext:='DXF';
              goto fin;
            end;
          end;
        end;
      end
      // check for RTF...
      else if pos('{\rtf', s) <> 0 then
      begin
        result:=TRUE; fileext:='RTF';
        goto fin;
      end;
    until (s ='');


    // check for MPEG...
    { Remember, this is not enough, frame sync can be easily (and very frequently) found in any binary file.
      Also it is likely that MPEG file contains garbage on it's beginning which also may contain false sync.
      Thus, you have to check two or more frames in a row to assure you are really dealing with MPEG audio file }
    framecount:=0;
    i:=0;
    while i < bufsize-4 do
    begin
      if (buf^[i]=$FF) AND (buf^[i+1] AND $E0=$E0) then
      begin
        // sync found...
        invalid:=TRUE;
        hdr:=plong(@buf^[i])^;
        MPEGAudioVer :=(buf^[i+1] SHR 3) AND 3;
        if MPEGAudioVer <> MPEGIDReserved then
        begin
          Layer        :=(buf^[i+1] SHR 1) AND 3;
          if Layer <> LayerReserved then
          begin
            CRCProtected :=boolean((buf^[i+1]) AND 1);
            BitRateIdx   :=(buf^[i+2] SHR 4) AND $F;
            if (BitRateIdx <> $F) AND (BitRateIdx <> 0) then
            begin
              case MPEGAudioVer of
                MPEGIDVer25, MPEGIDVer2: row := 0;
                MPEGIDVer1             : row := 3;
              end;
              BitRate:=BitRates[row + (Layer-1), BitRateIdx];
              SampleRateIdx:=(buf^[i+2] SHR 2) AND 3;
              if SampleRateIdx <> 3 then
              begin
                row:=MPEGAudioVer;
                SampleRate:=SampleRates[row, SampleRateIdx];
                Padding :=((buf^[i+2]) SHR 1) AND 1;
                ChannelMode:=(buf^[i+3] SHR 6) AND 3;
                Emphasis:=(buf^[i+3] AND 3);
                if Emphasis <> 2 then
                begin
                  // calculate frame lenth (length of compressed frame)...
                  if Layer = LayerI then
                    FrameLengthInBytes := (12 * BitRate*1000 div SampleRate + Padding) * 4
                  else if Layer IN [LayerII, LayerIII] then
                    FrameLengthInBytes := 144 * BitRate*1000 div SampleRate + Padding;
                  if FrameLengthInBytes >= 32 then
                  begin
                    if CRCProtected then
                    begin
                      MPEG_CRC:=pword(@buf^[i+4])^;

                      if MPEG_CRC <> 0 then
                      begin
                        // check CRC...
                        crc:=0;
                        for j:=i+4 to i+FrameLengthInBytes -1 do
                        begin
                          crc16_addbits(buf^[j], 8);
                        end;
                        if (CRC >= MPEG_CRC-8) AND (CRC <= MPEG_CRC+8) then invalid:=FALSE;
                      end else invalid:=FALSE;
                    end else invalid:=FALSE;
                  end;
                end;
              end;
            end;
          end;
        end;
        if invalid then
        begin
          inc(i);
          framecount:=0; // reset frame count to zero
          break;
        end  else
        begin
          // MP3 header is valid...
          inc(i, FrameLengthInBytes);
          inc(framecount);
        end
      end else inc(i);
    end;
    if (framecount >= bufsize DIV 512) then
    begin
      result:=TRUE; fileext:='MP3'; datatype:=data_stream;
      goto fin;
    end;

    fin:

  except
    on E : Exception do
    begin
      result:=FALSE;
    end;
  end;
end;


end.
