% Copyright (C) 1994, 1996, 1997 Aladdin Enterprises. All rights reserved. % % This file is part of Aladdin Ghostscript. % % Aladdin Ghostscript is distributed with NO WARRANTY OF ANY KIND. No author % or distributor accepts any responsibility for the consequences of using it, % or for whether it serves any particular purpose or works at all, unless he % or she says so in writing. Refer to the Aladdin Ghostscript Free Public % License (the "License") for full details. % % Every copy of Aladdin Ghostscript must include a copy of the License, % normally in a plain ASCII text file named PUBLIC. The License grants you % the right to copy, modify and redistribute Aladdin Ghostscript, but only % under certain conditions described in the License. Among other things, the % License requires that the copyright notice and this notice be preserved on % all copies. % pdf_base.ps % Basic parser for PDF reader. % This handles basic parsing of the file (including the trailer % and cross-reference table), as well as objects, object references, % and streams; it doesn't include any facilities for making marks on % the page. /.setlanguagelevel where { pop 2 .setlanguagelevel } if .currentglobal true .setglobal /pdfdict where { pop } { /pdfdict 100 dict def } ifelse pdfdict begin % We rebind #, #?, #dsc, and #dscfile later if we're writing out PostScript. /# % ... # - { pop cvx exec } bind def /#? { false } bind def /#dsc % mark ... #dsc - { cleartomark } bind def /#dscfile % #dscfile - { pop } bind def % Define the name interpretation dictionary for reading values. /valueopdict mark (<<) cvn { mark } bind % don't push an actual mark! (>>) cvn /.dicttomark load ([) cvn { mark } bind % ditto (]) cvn dup load /true true /false false /null null /F dup cvx % see Objects section below /R dup cvx % see Objects section below /stream dup cvx % see Streams section below .dicttomark readonly def % ------ Utilities ------ % % Define a scratch string. The PDF language definition says that % no line in a PDF file can exceed 255 characters. /pdfstring 255 string def % Read the previous line of a file. If we aren't at a line boundary, % read the line containing the current position. % Skip any blank lines. /prevline % - prevline { PDFfile fileposition dup () pdfstring 2 index 257 sub 0 .max PDFfile exch setfileposition { % Stack: initpos linepos line string PDFfile fileposition PDFfile 2 index readline pop dup length 0 gt { 3 2 roll 5 -2 roll pop pop 2 index } { pop } ifelse % Stack: initpos linepos line string startpos PDFfile fileposition 5 index ge { exit } if pop } loop pop pop 3 -1 roll pop } bind def % Read a token from a file, recognizing the PDF 1.2 #nn escape convention. % This should be done in C! /.pdftoken % .pdftoken -true- % .pdftoken -false- { token { dup type /nametype eq { dup xcheck { true } { dup .namestring (#) search { name#escape cvn exch pop } { pop } ifelse true } ifelse } { true } ifelse } { false } ifelse } bind def /name#escape % <(#)>
 name#escape 
{ exch pop
  1 index 2 () /SubFileDecode filter dup (x) readhexstring
		% Stack: post pre stream char t/f
  not { /.pdftoken cvx /syntaxerror signalerror } if
  exch closefile concatstrings
  exch 2 1 index length 2 sub getinterval
  (#) search { name#escape } if concatstrings
} bind def

% Execute a file, interpreting its executable names in a given
% dictionary.  The name procedures may do whatever they want
% to the operand stack.
/.pdfrun			%   .pdfrun -
 {	% Construct a procedure with the stack depth, file and opdict
	% bound into it.
   1 index cvlit count 2 sub 3 1 roll mark mark 5 2 roll
    {	% Stack: ..operands.. count opdict file
      .pdftoken not { (%%EOF) cvn cvx } if
      dup xcheck
       { DEBUG { dup == flush } if
	 2 copy .knownget
	  { exch pop exch pop exch pop exec }
	  { BXlevel 0 le
	     { (%stderr) (w) file
	       dup (****************Unknown operator: ) writestring
	       dup 2 index .writecvs dup (\n) writestring flushfile
	     }
	    if pop pop
	    count exch sub { pop } repeat	% pop all the operands
	  }
	 ifelse
       }
       { exch pop exch pop DEBUG { dup ==only ( ) print flush } if
       }
      ifelse
    }
   aload pop .packtomark cvx
   /loop cvx 2 packedarray cvx
    { stopped /PDFsource } aload pop
   PDFsource
    { store { stop } if } aload pop .packtomark cvx
   /PDFsource 3 -1 roll store exec
 } bind def

% ------ File reading ------ %

% Read the cross-reference entry for an (unresolved) object.
% The caller must save and restore the PDFfile position if desired.
% For invalid (free) objects, we return 0.
/readxrefentry		%  readxrefentry 
 { dup Objects exch lget
   PDFfile exch setfileposition
   PDFfile token pop		% object position
   PDFfile token pop		% generation #
   PDFfile token pop		% n or f
   dup /n eq
    { pop 1 add dup 255 gt
       { Generations ltype /stringtype eq
	  {		% Convert Generations from a string to an array.
	    larray Generations llength lgrowto dup
	    0 1 2 index llength 1 sub
	     { Generations 1 index lget lput dup
	     }
	    for pop /Generations exch store
	  }
	 if
       }
      if
    }
    { /f eq
       { pop 0 }
       { /readxrefentry cvx /syntaxerror signalerror }
      ifelse
    }
   ifelse
		% Stack: obj# objpos 1+gen#
   Generations 4 -1 roll 3 -1 roll lput
 } bind def

% ================================ Objects ================================ %

% Since we may have more than 64K objects, we have to use a 2-D array to
% hold them (and the parallel Generations structure).
/lshift 9 def
/lnshift lshift neg def
/lsubmask 1 lshift bitshift 1 sub def
/lsublen lsubmask 1 add def
/larray {	% - larray 
  [ [] ]
} bind def
/lstring {	% - lstring 
  [ () ]
} bind def
/ltype {	%  type   lget 
  dup //lsubmask and 3 1 roll //lnshift bitshift get exch get
} bind def
/lput {		%    lput -
  3 1 roll
  dup //lsubmask and 4 1 roll //lnshift bitshift get
  3 1 roll put
} bind def
/llength {	%  llength 
  dup length 1 sub dup //lshift bitshift
  3 1 roll get length add
} bind def
% lgrowto assumes newlength > llength(lseq)
/growto {	%   growto 
  1 index type /stringtype eq { string } { array } ifelse
  2 copy copy pop exch pop
} bind def
/lgrowto {	%   lgrowto 
    dup //lsubmask add //lnshift bitshift dup 3 index length gt {
	% Add more sub-arrays.  Start by completing the last existing one.
		% Stack: lseq newlen newtoplen
    3 -1 roll dup llength 1 sub //lsubmask or 1 add lgrowto
		% Stack: newlen newtoplen lseq
    [ exch aload pop
    counttomark 2 add -1 roll		% newtoplen
    counttomark sub { dup 0 0 getinterval lsublen growto } repeat
    dup 0 0 getinterval ] exch
  } {
    pop
  } ifelse
	% Expand the last sub-array.
  1 sub //lsubmask and 1 add
  exch dup dup length 1 sub 2 copy
		% Stack: newsublen lseq lseq len-1 lseq len-1
  get 5 -1 roll growto put
} bind def

% We represent an unresolved object reference by a procedure of the form
% {obj# gen# resolveR}.  This is not a possible PDF object, because PDF has
% no way to represent procedures.  Since PDF in fact has no way to represent
% any PostScript object that doesn't evaluate to itself, we can 'force'
% a possibly indirect object painlessly with 'exec'.
% Note that since we represent streams by executable dictionaries
% (see below), we need both an xcheck and a type check to determine
% whether an object has been resolved.
/unresolved?		%  unresolved? 
 { Objects exch lget dup xcheck exch type /integertype eq and
 } bind def
/oforce /exec load def
/oget		%   oget 
		%   oget 
 { 2 copy get dup xcheck
    { exec dup 4 1 roll put }
    { exch pop exch pop }
   ifelse
 } bind def
% A null value in a dictionary is equivalent to an omitted key;
% we must check for this specially.
/knownoget
 { 2 copy known
    { oget dup null eq { pop false } { true } ifelse }
    { pop pop false }
   ifelse
 } bind def

% PDF 1.1 defines a 'foreign file reference', but not its meaning.
% Per the specification, we convert these to nulls.
/F		%    F 
 {		% Some PDF 1.1 files use F as a synonym for f!
   count 3 lt { f } { pop pop pop null } ifelse
 } bind def

% We keep track of objects in a pair of arrays, Objects and Generations.
% Generations[N] is 1+ the current generation number for object number N.
% (As far as we can tell, this is needed only for error checking.)
% If object N is loaded, Objects[N] is the actual object;
% otherwise, Objects[N] is an executable integer giving the file offset
% of the object's entry in the cross-reference table.
% For free objects, Generations[N] is 0.
/checkgeneration  %   checkgeneration  
 { Generations 2 index lget 1 sub 1 index eq
    { pop true
    }
    { (Warning: wrong generation: ) print 1 index =only ( ) print = false
    }
   ifelse
 } bind def
/R		%   R 
 { 1 index unresolved?
    { /resolveR cvx 3 packedarray cvx }
    { checkgeneration { Objects exch lget } { pop null } ifelse }
   ifelse
 } bind def

% If we encounter an object definition while reading sequentially,
% we just store it away and keep going.
/objopdict mark
  valueopdict { } forall
  /endobj dup cvx
.dicttomark readonly def
/obj			%   obj 
 { PDFfile objopdict .pdfrun
 } bind def
/endobj			%    endobj 
 { 3 1 roll
		% Read the xref entry if we haven't yet done so.
		% This is only needed for generation # checking.
   1 index unresolved?
    { PDFfile fileposition
      2 index readxrefentry pop
      PDFoffset add PDFfile exch setfileposition
    } if
   checkgeneration { Objects exch 2 index lput } { pop pop null } ifelse
 } bind def

% When resolving an object reference, we stop at the endobj.
/resolveopdict mark
  valueopdict { } forall
  /endobj { endobj exit } bind
.dicttomark readonly def
/resolveR		%   resolveR 
 { DEBUG { (%Resolving: ) print 2 copy 2 array astore == } if
   1 index unresolved?
    { PDFfile fileposition 3 1 roll
      1 index readxrefentry
      3 1 roll checkgeneration
       {		% Stack: savepos objpos obj#
	 exch PDFoffset add PDFfile exch setfileposition
	 PDFfile token pop 2 copy ne
	  { (xref error!\n) print /resolveR cvx /rangecheck signalerror
	  }
	 if pop PDFfile token pop
	 PDFfile token pop /obj ne
	  { (xref error!\n) print /resolveR cvx /rangecheck signalerror
	  }
	 if
	 pdf_run_resolve	% PDFfile resolveopdict .pdfrun
       }
       { Objects exch null lput pop null
       }
      ifelse exch PDFfile exch setfileposition
    }
    { pop Objects exch lget
    }
   ifelse
 } bind def      

%================================ Streams ================================ %

% We represent a stream by an executable dictionary that contains,
% in addition to the contents of the original stream dictionary:
%	/File - the file or string where the stream contents are stored;
%	/FilePosition - iff File is a file, the position in the file
%	  where the contents start.
%	/StreamKey - the key used to decrypt this stream if any
% We do the real work of constructing the data stream only when the
% contents are needed.

% Construct a stream.  The length is not reliable in the face of
% different end-of-line conventions, but it's all we've got.
%
% PDF files are inconsistent about what may fall between the 'stream' keyword
% and the actual stream data, and it appears that no one algorithm can
% detect this reliably.  We used to try to guess whether the file included
% extraneous \r and/or \n characters, but we no longer attempt to do so,
% especially since the PDF 1.2 specification states flatly that the only
% legal terminators following the 'stream' keyword are \n or \r\n, both of
% which are properly skipped and discarded by the token operator.
/stream
 { PDFsource PDFfile eq
    { dup /File PDFfile put
      dup /FilePosition PDFfile fileposition put
      DEBUG { (%FilePosition: ) print dup /FilePosition get == } if
      PDFfile fileposition 1 index /Length oget add
        PDFfile exch setfileposition
    }
    {	% We're already reading from a stream, which we can't reposition.
	% Capture the sub-stream contents in a string.
      dup /Length oget string PDFsource exch readstring
      not
       { (Unexpected EOF in stream!\n) print
	 /stream cvx /rangecheck signalerror
       }
      if
      1 index exch /File exch put
    }
   ifelse
   PDFsource token pop
     /endstream ne { /stream cvx /syntaxerror signalerror } if
   cvx
 } bind def

% Resolve a stream dictionary to a PostScript stream.
% Streams with no filters require special handling:
%	- If we are going to interpret their contents, we let endstream
%	  terminate the interpretation loop;
%	- If we are just going to read data from them, we impose
%	  a SubFileDecode filter that reads just the requisite amount of data.
% Note that, in general, resolving a stream repositions PDFfile.
% Clients must save and restore the position of PDFfile themselves.
/resolvestream		%   resolvestream 
 { exch dup /FilePosition .knownget
    { 1 index /File get exch setfileposition }
   if
		% Stack: readdata? dict
   dup /DecodeParms .knownget not { null } if
   1 index /Filter .knownget not { {} } if
   dup type /nametype eq
    { 1 array astore
      1 index null ne { exch 1 array astore exch } if
    }
   if
		% Stack: readdata? dict parms filternames
   2 index /File get exch
		% Stack: readdata? dict parms file/string filternames
   pdf_decrypt_stream		% add decryption if needed
   dup length 0 eq
    {		% All the PDF filters have EOD markers, but in this case
		% there is no specified filter.
      pop exch pop
		% Stack: readdata? dict file/string
      2 index
       {	% We're going to read data; use a SubFileDecode filter.
	 1 index /Length oget () /SubFileDecode filter
       }
       { dup type /filetype ne
	  {	% Use a SubFileDecode filter to read from a string.
	    0 () SubFileDecode filter
	  }
	 if
       }
      ifelse
    }
    { 2 index null eq
       { { filter }
       }
       {	% Stack: readdata? dict parms file/string filtername
         { 2 index 0 get dup null eq { pop } { exch } ifelse filter
	   exch dup length 1 sub 1 exch getinterval exch
	 }
       }
      ifelse forall exch pop
    }
   ifelse
		% Stack: readdata? dict file
   exch pop exch pop
 } bind def
/endstream { exit } def

end			% pdfdict
.setglobal

.