1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132 |
- % Copyright (C) 1994-2006 Artifex Software, Inc. All rights reserved.
- %
- % This software is provided AS-IS with no warranty, either express or
- % implied.
- %
- % This software is distributed under license and may not be copied,
- % modified or distributed except as expressly authorized under the terms
- % of the license contained in the file LICENSE in this distribution.
- %
- % For more information about licensing, please refer to
- % http://www.ghostscript.com/licensing/. For information on
- % commercial licensing, go to http://www.artifex.com/licensing/ or
- % contact Artifex Software, Inc., 101 Lucas Valley Road #110,
- % San Rafael, CA 94903, U.S.A., +1(415)492-9861.
-
- % $Id: pdf_base.ps 10498 2009-12-13 01:31:59Z alexcher $
- % pdf_base.ps
- % Basic parser for PDF reader.
-
- % This handles basic parsing of the file (including the trailer
- % and cross-reference table), as well as objects, object references,
- % streams, and name/number trees; it doesn't include any facilities for
- % making marks on the page.
-
- /.setlanguagelevel where { pop 2 .setlanguagelevel } if
- .currentglobal true .setglobal
- /pdfdict where { pop } { /pdfdict 100 dict def } ifelse
- pdfdict begin
-
- % Define the name interpretation dictionary for reading values.
- /valueopdict mark
- (<<) cvn { mark } bind % don't push an actual mark!
-
- (>>) cvn { { counttomark } stopped {
- ( **** File has an unbalanced >> \(close dictionary\).\n)
- pdfformaterror
- } if
- 131068 le {
- .dicttomark
- } {
- % Checks for other resources can be added if needed.
- counttomark 1 add index /XObject eq {
- counttomark mark exch 1 add 1 roll
- { counttomark
- dup 0 eq {
- pop pop ] exit
- } if
- 131068 .min
- mark exch 1 add 1 roll .dicttomark
- counttomark 1 add 1 roll
- } loop
- } {
- .dicttomark % will fail
- } ifelse
- } ifelse
- } bind
-
- ([) cvn { mark } bind % ditto
- (]) cvn dup load
- % /true true % see .pdfexectoken below
- % /false false % ibid.
- % /null null % ibid.
- /F dup cvx % see Objects section below
- /R dup cvx % see Objects section below
- /stream dup cvx % see Streams section below
- .dicttomark readonly def
-
- % ------ Utilities ------ %
-
- % Define a scratch string. The PDF language definition says that
- % no line in a PDF file can exceed 255 characters, but this string
- % is also used to search for %PDF-, which needs 1024 characters.
- /pdfstring 1024 string def
-
- % Read the previous line of a file. If we aren't at a line boundary,
- % read the line containing the current position.
- % Skip any blank lines.
- /prevline % - prevline <startpos> <substring>
- { PDFfile fileposition dup () pdfstring
- 2 index 257 sub 0 .max PDFfile exch setfileposition
- { % Stack: initpos linepos line string
- PDFfile fileposition
- PDFfile 2 index readline pop
- dup length 0 gt
- { 3 2 roll 5 -2 roll pop pop 2 index }
- { pop }
- ifelse
- % Stack: initpos linepos line string startpos
- PDFfile fileposition 5 index ge { exit } if
- pop
- }
- loop pop pop 3 -1 roll pop
- } bind def
-
- % Handle the PDF 1.2 #nn escape convention when reading from a file.
- % This should eventually be done in C.
- /.pdffixname { % <execname> .pdffixname <execname'>
- PDFversion 1.2 ge {
- dup .namestring (#) search {
- name#escape cvn exch pop
- } {
- pop
- } ifelse
- } if
- } bind def
- /name#escape % <post> <(#)> <pre> name#escape <string>
- { exch pop
- 1 index 2 () /SubFileDecode filter dup (x) readhexstring
- % Stack: post pre stream char t/f
- not { % tolerate, but complain about bad syntax
- pop closefile (#) concatstrings exch
- ( **** Warning: Invalid hex following '#' name escape, using literal '#' in name.\n)
- pdfformaterror
- } {
- exch closefile concatstrings
- exch 2 1 index length 2 sub getinterval
- } ifelse
- (#) search { name#escape } if concatstrings
- } bind def
-
- % Execute a file, interpreting its executable names in a given
- % dictionary. The name procedures may do whatever they want
- % to the operand stack.
- /.pdftokenerror { % <count> <opdict> <errtoken> .pdftokenerror -
- BXlevel 0 le {
- ( **** Unknown operator: ') pdfformaterror
- dup =string cvs pdfformaterror
- % Attempt a retry scan of the element after changing to PDFScanInvNum
- << /PDFScanInvNum true >> setuserparams
- =string cvs
- token pop exch pop dup type
- dup /integertype eq exch /realtype eq or {
- exch pop exch pop
- (', processed as number, value: ) pdfformaterror
- dup =string cvs pdfformaterror (\n) pdfformaterror
- << /PDFScanInvNum null >> setuserparams % reset to default scanning rules
- false % suppress any stack cleanup
- } {
- % error was non-recoverable with modified scanning rules
- ('\n) pdfformaterror
- true
- } ifelse
- } {
- true
- } ifelse
- { % clean up the operand stack if this was non-recoverable
- pop pop count exch sub { pop } repeat % pop all the operands
- } if
- } bind def
- /.pdfexectoken { % <count> <opdict> <exectoken> .pdfexectoken ?
- PDFDEBUG {
- pdfdict /PDFSTEPcount known not { pdfdict /PDFSTEPcount 1 .forceput } if
- PDFSTEP {
- pdfdict /PDFtokencount 2 copy .knownget { 1 add } { 1 } ifelse .forceput
- PDFSTEPcount 1 gt {
- pdfdict /PDFSTEPcount PDFSTEPcount 1 sub .forceput
- } {
- dup ==only
- ( step # ) print PDFtokencount =only
- ( ? ) print flush 1 false .outputpage
- (%stdin) (r) file 255 string readline {
- token {
- exch pop pdfdict /PDFSTEPcount 3 -1 roll .forceput
- } {
- pdfdict /PDFSTEPcount 1 .forceput
- } ifelse % token
- } {
- pop /PDFSTEP false def % EOF on stdin
- } ifelse % readline
- } ifelse % PDFSTEPcount > 1
- } {
- dup ==only () = flush
- } ifelse % PDFSTEP
- } if % PDFDEBUG
- 2 copy .knownget {
- exch pop exch pop exch pop exec
- } {
- % Normally, true, false, and null would appear in opdict
- % and be treated as "operators". However, there is a
- % special fast case in the PostScript interpreter for names
- % that are defined in, and only in, systemdict and/or
- % userdict: putting these three names in the PDF dictionaries
- % destroys this property for them, slowing down their
- % interpretation in all PostScript code. Therefore, we
- % check for them explicitly here instead.
- dup dup dup /true eq exch /false eq or exch /null eq or {
- exch pop exch pop //systemdict exch get
- } {
- % Hackish fix to detect missing whitespace after "endobj". Yet another
- % problem that (you guessed it!) Adobe Acrobat ignores silently
- 256 string cvs (endobj) anchorsearch {
- ( **** Missing whitespace after 'endobj'.\n) pdfformaterror
- exch pop cvn get exch pop exec
- } {
- .pdftokenerror
- } ifelse
- } ifelse
- } ifelse
- } bind def
- /PDFScanRules_true << /PDFScanRules true >> def
- /PDFScanRules_null << /PDFScanRules null >> def
- /.pdfrun { % <file> <opdict> .pdfrun -
- % Construct a procedure with the stack depth, file and opdict
- % bound into it.
- 1 index cvlit count 2 sub 3 1 roll mark
- /PDFScanRules .getuserparam //null eq {
- //PDFScanRules_true { setuserparams } 0 get % force PDF scanning mode
- mark 7 4 roll
- } {
- mark 5 2 roll
- } ifelse
- { % Stack: ..operands.. count opdict file
- { token } stopped {
- dup type /filetype eq { pop } if % pop the operand if it is restored
- ( **** Error reading a content stream. The page may be incomplete.\n)
- pdfformaterror
- //false
- } if {
- dup type /nametype eq {
- dup xcheck {
- .pdfexectoken
- } {
- .pdffixname
- exch pop exch pop PDFDEBUG {
- PDFSTEPcount 1 le {
- dup ==only ( ) print flush
- } if
- } if
- } ifelse
- } {
- exch pop exch pop PDFDEBUG {
- PDFSTEPcount 1 le {
- dup ==only ( ) print flush
- } if
- } if
- } ifelse
- } {
- (%%EOF) cvn cvx .pdfexectoken
- } ifelse
- }
- aload pop .packtomark cvx
- { loop } 0 get 2 packedarray cvx
- { stopped } 0 get
- /PDFScanRules .getuserparam //null eq {
- //PDFScanRules_null { setuserparams } 0 get % reset PDF scannig mode if it was off
- } if
- /PDFsource PDFsource
- { store { stop } if } aload pop .packtomark cvx
- /PDFsource 3 -1 roll store exec
- } bind def
-
- % Execute a file, like .pdfrun, for a marking context.
- % This temporarily rebinds LocalResources and DefaultQstate.
- /.pdfruncontext { % <resdict> <file> <opdict> .pdfruncontext -
- /.pdfrun load LocalResources DefaultQstate
- /LocalResources 7 -1 roll store
- /DefaultQstate qstate store
- 3 .execn
- /DefaultQstate exch store
- /LocalResources exch store
- } bind def
-
- % Get the depth of the PDF operand stack. The caller sets pdfemptycount
- % before calling .pdfrun or .pdfruncontext. It is initially set by
- % pdf_main, and is also set by any routine which changes the operand
- % stack depth (currently .pdfpaintproc, although there are other callers
- % of .pdfrun{context} which have not been checked for opstack depth.
- /.pdfcount { % - .pdfcount <count>
- count pdfemptycount sub
- } bind def
-
- % Read a token, but simply return false (no token read) in the case of an
- % error. This is messy because 'token' either may or may not pop its operand
- % if an error occurs, and because the return values are different depending
- % on whether the source is a file or a string. To avoid closing the file
- % check for '{' before trying 'token'.
- /token_nofail_dict mark
- ( ) { dup ( ) readstring pop pop } bind
- (\t) 1 index
- (\r) 1 index
- (\n) 1 index
- (\000) 1 index
- ({) { //null //true exit } bind
- .dicttomark def
-
- /token_nofail { % <file|string> token_nofail false
- % <file> token_nofail <token> true
- % <string> token_nofail <post> <token> true
- dup type /filetype eq {
- { dup ( ) .peekstring not { ({) } if
- //token_nofail_dict exch .knownget not {
- //null 1 index { token } .internalstopped exit
- } if
- exec
- } loop
- { % stack: source null [source]
- //null ne { pop } if pop //false
- } { % stack: source null ([post] token true | false)
- { 3 1 roll pop pop //true }
- { pop pop //false }
- ifelse
- } ifelse
- } {
- //null 1 index % stack: source null source
- { token } .internalstopped { % stack: source null [source]
- //null ne { pop } if pop //false
- } { % stack: source null ([post] token true | false)
- { 4 2 roll pop pop //true }
- { pop pop //false }
- ifelse
- } ifelse
- } ifelse
- } bind def
-
- currentdict /token_nofail_dict .undef
-
- % ================================ Objects ================================ %
-
- % Since we may have more than 64K objects, we have to use a 2-D array to
- % hold them (and the parallel Generations structure).
- /lshift 9 def
- /lnshift lshift neg def
- /lsubmask 1 lshift bitshift 1 sub def
- /lsublen lsubmask 1 add def
- /larray { % - larray <larray>
- [ [] ]
- } bind def
- /lstring { % - lstring <lstring>
- [ () ]
- } bind def
- /ltype { % <lseq> type <type>
- 0 get type
- } bind def
- /lget { % <lseq> <index> lget <value>
- dup //lsubmask and 3 1 roll //lnshift bitshift get exch get
- } bind def
- /lput { % <lseq> <index> <value> lput -
- 3 1 roll
- dup //lsubmask and 4 1 roll //lnshift bitshift get
- 3 1 roll put
- } bind def
- /llength { % <lseq> llength <length>
- dup length 1 sub dup //lshift bitshift
- 3 1 roll get length add
- } bind def
- % lgrowto assumes newlength > llength(lseq)
- /growto { % <string/array> <length> growto <string'/array'>
- 1 index type /stringtype eq { string } { array } ifelse
- 2 copy copy pop exch pop
- } bind def
- /lgrowto { % <lseq> <newlength> lgrowto <lseq'>
- dup //lsubmask add //lnshift bitshift dup 3 index length gt {
- % Add more sub-arrays. Start by completing the last existing one.
- % Stack: lseq newlen newtoplen
- 3 -1 roll dup llength 1 sub //lsubmask or 1 add lgrowto
- % Stack: newlen newtoplen lseq
- [ exch aload pop
- counttomark 2 add -1 roll % newtoplen
- counttomark sub { dup 0 0 getinterval lsublen growto } repeat
- dup 0 0 getinterval ] exch
- } {
- pop
- } ifelse
- % Expand the last sub-array.
- 1 sub //lsubmask and 1 add
- exch dup dup length 1 sub 2 copy
- % Stack: newsublen lseq lseq len-1 lseq len-1
- get 5 -1 roll growto put
- } bind def
- /lforall { % <lseq> <proc> lforall -
- /forall cvx 2 packedarray cvx forall
- } bind def
-
- % We keep track of PDF objects using the following PostScript variables:
- %
- % Generations (lstring): Generations[N] holds 1+ the current
- % generation number for object number N. (As far as we can tell,
- % this is needed only for error checking.) For free objects,
- % Generations[N] is 0.
- %
- % Objects (larray): If object N is loaded, Objects[N] is the actual
- % object; otherwise, Objects[N] is an executable integer giving
- % the file offset of the object's location in the file. If
- % ObjectStream[N] is non-zero then Objects[N] contains the index
- % into the object stream instead of the file offset of the object.
- %
- % ObjectStream (larray): If object N is in an object stream then
- % ObjectStream[N] holds the object number of the object stream.
- % Otherwise ObjectStream[N] contains 0. If ObjectStream[N]
- % is non-zero then Objects[N] contains the index into the object
- % stream.
- %
- % GlobalObjects (dictionary): If object N has been resolved in
- % global VM, GlobalObjects[N] is the same as Objects[N]
- % (except that GlobalObjects itself is stored in global VM,
- % so the entry will not be deleted at the end of the page).
- %
- % IsGlobal (lstring): IsGlobal[N] = 1 iff object N was resolved in
- % global VM. This is an accelerator to avoid having to do a
- % dictionary lookup in GlobalObjects when resolving every object.
-
- % Initialize the PDF object tables.
- /initPDFobjects { % - initPDFobjects -
- /ObjectStream larray def
- /Objects larray def
- /Generations lstring def
- .currentglobal true .setglobal
- /GlobalObjects 20 dict def
- .setglobal
- /IsGlobal lstring def
- } bind def
-
- % Grow the tables to a specified size.
- /growPDFobjects { % <minsize> growPDFobjects -
- dup ObjectStream llength gt {
- dup ObjectStream exch lgrowto /ObjectStream exch def
- } if
- dup Objects llength gt {
- dup Objects exch lgrowto /Objects exch def
- } if
- dup Generations llength gt {
- dup Generations exch lgrowto /Generations exch def
- } if
- dup IsGlobal llength gt {
- dup IsGlobal exch lgrowto /IsGlobal exch def
- } if
- pop
- } bind def
-
- % We represent an unresolved object reference by a procedure of the form
- % {obj# gen# resolveR}. This is not a possible PDF object, because PDF has
- % no way to represent procedures. Since PDF in fact has no way to represent
- % any PostScript object that doesn't evaluate to itself, we can 'force'
- % a possibly indirect object painlessly with 'exec'.
- % Note that since we represent streams by executable dictionaries
- % (see below), we need both an xcheck and a type check to determine
- % whether an object has been resolved.
- /resolved? { % <object#> resolved? <value> true
- % <object#> resolved? false
- Objects 1 index lget dup xcheck { % Check if executable
- dup type /integertype eq { % Check if an integer
- % Check whether the object is in GlobalObjects.
- pop IsGlobal 1 index lget 0 eq { % 0 --> Not in GlabalObjects
- pop false % The object is not resolved
- } { % The object is in GlobalObjects
- % Update Objects from GlobalObjects
- PDFDEBUG { (%Global=>local: ) print dup == } if
- GlobalObjects 1 index get dup Objects 4 1 roll lput true
- } ifelse
- } { % Else object is executable but not integer
- exch pop true % Therefore must be executable dict. (stream)
- } ifelse
- } { % Else object is not executable.
- exch pop true % Therefore it must have been resolved.
- } ifelse
- } bind def
- /oforce /exec load def
- /oget { % <array> <index> oget <object>
- % <dict> <key> oget <object>
- % Before release 6.20, this procedure stored the resolved
- % object back into the referring slot. In order to support
- % PDF linearization, we no longer do this.
- get oforce
- } bind def
- /oforce_array { % <array> oforce_array <array>
- [ exch { oforce } forall ]
- } bind def
- /oforce_elems { % <array> oforce_elems <first> ... <last>
- { oforce } forall
- } bind def
- /oforce_recursive { % <any> oforce_recursive <any>
- oforce dup type dup /arraytype eq {
- pop [ exch { oforce_recursive } forall ]
- } {
- /dicttype eq {
- << exch { oforce_recursive exch oforce exch } forall >>
- } if
- } ifelse
- } bind def
-
- % A null value in a dictionary is equivalent to an omitted key;
- % we must check for this specially.
- /knownoget { % <dict> <key> knownoget <value> true
- % <dict> <key> knownoget false
- % See oget above regarding this procedure.
- .knownget {
- oforce dup //null eq { pop //false } { //true } ifelse
- } {
- //false
- } ifelse
- } bind def
-
- % See /knownoget above.
- /oknown { % <dict> <key> oknown <bool>
- .knownget { oforce //null ne } { //false } ifelse
- } bind def
-
-
- /knownogetdict { % <dict> <key> knownogetdict <dict> true
- % <dict> <key> knownogetdict false
- //knownoget exec dup {
- 1 index type /dicttype ne { pop pop //false } if
- } if
- } bind def
-
-
- % PDF 1.1 defines a 'foreign file reference', but not its meaning.
- % Per the specification, we convert these to nulls.
- /F { % <file#> <object#> <generation#> F <object>
- % Some PDF 1.1 files use F as a synonym for f!
- .pdfcount 3 lt { f } { pop pop pop null } ifelse
- } bind def
-
- % Verify the generation number for a specified object
- % Note: The values in Generations is the generation number plus 1.
- % If the value in Generations is zero then the object is free.
- /checkgeneration { % <object#> <generation#> checkgeneration <object#> <OK>
- Generations 2 index lget 1 sub 1 index eq { % If generation # match ...
- pop true % Then return true
- } { % Else not a match ...
- QUIET not { % Create warning message if not QUIET
- Generations 2 index lget 0 eq { % Check if object is free ...
- ( **** Warning: reference to free object: )
- 2 index =string cvs concatstrings ( ) concatstrings % put obj #
- exch =string cvs concatstrings ( R\n) concatstrings % put gen #
- } {
- ( **** Warning: wrong generation: )
- 2 index =string cvs concatstrings ( ) concatstrings % put obj #
- exch =string cvs concatstrings % put gen #
- (, xref gen#: ) concatstrings 1 index Generations % put xref gen #
- exch lget 1 sub =string cvs concatstrings (\n) concatstrings
- } ifelse
- pdfformaterror % Output warning message
- } { % Else QUIET ...
- pop % Pop generation number
- } ifelse
- % We should return false for an incorrect generation number, however
- % we are simply printing a warning and then returning true. This makes
- % Ghostscript tolerant of of bad generation numbers.
- true
- } ifelse
- } bind def
- /R { % <object#> <generation#> R <object>
- /resolveR cvx 3 packedarray cvx
- } bind def
-
- % If we encounter an object definition while reading sequentially,
- % we just store it away and keep going.
- /objopdict mark
- valueopdict { } forall
- /endobj dup cvx
- .dicttomark readonly def
-
- /obj { % <object#> <generation#> obj <object>
- PDFfile objopdict .pdfrun
- } bind def
-
- /endobj { % <object#> <generation#> <object> endobj <object>
- 3 1 roll
- % Read the xref entry if we haven't yet done so.
- % This is only needed for generation # checking.
- 1 index resolved? {
- pop
- } if
- checkgeneration {
- % The only global objects we bother to save are
- % (resource) dictionaries.
- 1 index dup gcheck exch type /dicttype eq and {
- PDFDEBUG { (%Local=>global: ) print dup == } if
- GlobalObjects 1 index 3 index put
- IsGlobal 1 index 1 put
- } if
- Objects exch 2 index lput
- } {
- pop pop null
- } ifelse
- } bind def
-
- % When resolving an object reference in an object stream, we stop at
- % the end of file. Note: Objects in an object stream do not have either
- % a starting 'obj' or and ending 'endobj'.
- /resolveobjstreamopdict mark
- valueopdict { } forall
- (%%EOF) cvn { exit } bind
- /endobj { % bug 689795
- ( **** Warning: Objects in an object stream should not have 'endobj'.\n)
- pdfformaterror
- } bind
- .dicttomark readonly def
-
- % Note: This version of this function is not currently being used.
- % Resolve all objects in an object stream
- /resolveobjectstream { % <object stream #> resolveobjectstream -
- PDFDEBUG { (%Resolving object stream: ) print } if
- 0 resolveR % Get the objectstream dict, all objstrms use 0 as the gen #
- dup /First get % Save location of first object onto the stack
- 1 index /N get % Save number of objects onto the stack
- 2 index false resolvestream % Convert stream dict into a stream
- /ReusableStreamDecode filter % We need to be able to position stream
- % Objectstreams begin with list of object numbers and locations
- % Create two arrays to hold object numbers and stream location
- 1 index array % Array for holding object number
- 2 index array % Array for holding stream object location
- % Get the object numbers and locations.
- 0 1 5 index 1 sub { % Loop and collect obj # and locations
- % Stack: objstreamdict First N objectstream [obj#] [loc] index
- 2 index 1 index % Setup to put obj# into object number array
- 5 index token pop put % Get stream, then get obj# and put into array
- 1 index 1 index % Setup to put object loc into location array
- 5 index token pop put % Get stream, get obj loc and put into array
- pop % Remove loop index
- } for
- % Create a bytestring big enough for reading any object data
- % Scan for the size of the largest object
- 0 0 % Init max object size and previous location
- 2 index { % Loop through all object locations
- % Stack: ... maxsize prevloc currentloc
- dup 4 1 roll % Save copy of object location into stack
- exch sub % Object size = currentloc - prevloc
- .max % Determine maximum object size
- exch % Put max size under previous location
- } forall
- pop % Remove previous location
- .bigstring % Create bytestring based upon max obj size
- % Move to the start of the object data
- 3 index 6 index % Get objectstream and start of first object
- setfileposition % Move to the start of the data
- % Read the data for all objects except the last. We do
- % not know the size of the last object so we need to treat
- % it as a special case.
- 0 1 6 index 2 sub {
- dup 4 index exch get % Get our current object number
- % Stack: objstreamdict First N objectstream [obj#] [loc]
- % bytestring loopindex object#
- dup resolved? { % If we already have this object
- (yyy) = pstack (yyy) = flush xxx
- pop pop % Remove object and object number
- 1 add 2 index exch get % Get location of next object
- 6 index add 6 index exch % Form location of next object and get stream
- setfileposition % Move to the start of the next object data
- } { % Else this is a new object ...
- % We are going to create a string for reading the object
- 2 index 0 % use our working string
- % Determine the size of the object
- 5 index 4 index 1 add get % Get location of the next object
- 6 index 5 index get % Get location of this object
- sub % Size of object = next loc - this loc
- getinterval % Create string for reading object
- 6 index exch readstring pop % Read object
- /ReusableStreamDecode filter % Convert string into a stream
- resolveobjstreamopdict .pdfrun % Get PDF object
- Objects exch 2 index exch lput % Put object into Objects array
- pop pop % Remove object # and loop index
- } ifelse
- } for
- pop pop % Remove our working string and loc array
- % Now read the last object in the object stream. Since it
- % is the last object, we can use the original stream and
- % terminate when we hit the end of the stream
- % Stack: objstreamdict First N objectstream [obj#]
- 2 index 1 sub get % Get our current object number
- dup resolved? not { % If we do not already have this object
- exch % Get our object stream
- resolveobjstreamopdict .pdfrun % Get PDF object
- Objects exch 2 index exch lput % Put object into Objects array
- } if
- pop pop pop pop % Clear stack
- } bind def
-
- /no_debug_dict <<
- /PDFDEBUG false
- >> readonly def
-
- % Resolve all objects in an object stream
- /resolveobjectstream { % <object stream #> resolveobjectstream -
- PDFDEBUG { (%Resolving object stream: ) print } if
- dup 0 resolveR % Get the objectstream dict, all objstrms use 0 as the gen #
- dup /Type get /ObjStm ne { % Verify type is object stream
- ( **** Incorrect Type in object stream dictionary.\n) pdfformaterror
- /resolveobjectstream cvx /typecheck signalerror
- } if
- dup /N get % Save number of objects onto the stack
- 1 index false resolvestream % Convert stream dict into a stream
- /ReusableStreamDecode filter % We need to be able to position stream
- % Objectstreams begin with list of object numbers and locations
- 1 index array % Create array for holding object number
- % Get the object numbers
- 0 1 4 index 1 sub { % Loop and collect obj numbers
- % Stack: strm# objstreamdict N PDFDEBUG objectstream [obj#] loopindex
- 1 index 1 index % Setup to put obj# into object number array
- 4 index token pop put % Get stream, then get obj# and put into array
- 2 index token pop pop pop % Get stream, get obj loc and clear stack
- } for
- % Move to the start of the object data
- 1 index 4 index /First get % Get objectstream and start of first object
- setfileposition % Move to the start of the data
- % We disable PDFDEBUG while reading the data stream. We will
- % print the data later
- PDFDEBUG { //no_debug_dict begin } if
- % Read the data for all objects. We check to see if we get
- % the number of objects that we expect.
- % Stack: strm# objstreamdict N objectstream [obj#] PDFDEBUG
- mark 3 -1 roll % Get objectstream
- count 4 index add % Determine stack depth with objects
- 3 1 roll
- resolveobjstreamopdict .pdfrun % Get PDF objects
- count counttomark 1 add index ne { % Check stack depth
- ( **** Incorrect object count in object stream.\n) pdfformaterror
- /resolveobjectstream cvx /rangecheck signalerror
- } if
- % We have the object data
- counttomark array astore % Put objects into an array
- exch pop exch pop % Remove mark and count
- currentdict //no_debug_dict eq { end } if % Restore debug context
- % Save the objects into Objects
- 0 1 2 index length 1 sub { % Loop through all objects
- % Stack: strm# objstreamdict N [obj#] [objects] loopindex
- dup 3 index exch get % Get our current object number
- % Stack: strm# objstreamdict N [obj#] [objects] loopindex obj#
- dup ObjectStream exch lget 7 index eq {
- dup resolved? { % If we already have this object
- pop pop % Remove object and object number
- } { % Else if we do not have this object
- PDFDEBUG { (%Resolving compressed object: [) print dup =only ( 0]) = } if
- Objects exch 3 index % Put the object into Objects
- 3 index get
- PDFDEBUG { dup === flush } if
- lput
- } ifelse
- } {
- pop % Ignore old object; remove object number.
- } ifelse
- pop % Remove loop index
- } for
- pop pop pop pop pop % Remove strm# objstream, N, (obj#], and [objects]
- } bind def
-
- currentdict /no_debug_dict undef
-
- % When resolving an object reference, we stop at the endobj or endstream.
- /resolveopdict mark
- valueopdict { } forall
- /endstream { endobj exit } bind
- /endobj { endobj exit } bind
- /endjobj { % Bug 689876.
- ( **** Operator 'endobj' is misspelled as 'endjobj'.\n) pdfformaterror
- endobj exit
- } bind
- /enbobj { % Bug 690397.
- ( **** Operator 'endobj' is misspelled as 'enbobj'.\n) pdfformaterror
- endobj exit
- } bind
- % OmniForm generates PDF file with endobj missing in some
- % objects. AR ignores this. So we have to do it too.
- /obj { pop pop endobj exit } bind
- .dicttomark readonly def
-
- /resolveR { % <object#> <generation#> resolveR <object>
- PDFDEBUG {
- PDFSTEPcount 1 le {
- (%Resolving: ) print 2 copy 2 array astore ==
- } if
- } if
- 1 index resolved? { % If object has already been resolved ...
- exch pop exch pop % then clear stack and return object
- } { % Else if not resolved ...
- PDFfile fileposition 3 1 roll % Save current file position
- 1 index Objects exch lget % Get location of object from xref
- 3 1 roll checkgeneration { % Verify the generation number
- % Stack: savepos objpos obj#
- ObjectStream 1 index lget dup 0 eq { % Check if obj in not an objstream
- pop exch PDFoffset add PDFfile exch setfileposition
- PDFfile token pop 2 copy ne
- { ( **** Unrecoverable error in xref!\n) pdfformaterror
- /resolveR cvx /rangecheck signalerror
- }
- if pop PDFfile token pop
- PDFfile token pop /obj ne
- { ( **** Unrecoverable error in xref!\n) pdfformaterror
- /resolveR cvx /rangecheck signalerror
- }
- if
- pdf_run_resolve % PDFfile resolveopdict .pdfrun
- } { % Else the object is in an ObjectStream
- % Process an objectstream object. We are going to resolve all
- % of the objects in sthe stream and place them into the Objects
- % array.
- % Stack: savepos objpos obj# objectstream#
- resolveobjectstream
- resolved? { % If object has already been resolved ...
- exch pop % Remove object pos from stack.
- } {
- pop pop null % Pop objpos and obj#, put null for object
- } ifelse
- } ifelse
- } { % Else the generation number is wrong
- % Don't cache if the generation # is wrong.
- pop pop null % Pop objpos and obj#, put null for object
- } ifelse % ifelse generation number is correct
- exch PDFfile exch setfileposition % Return to original file position
- } ifelse
- } bind def
-
- % ================================ Streams ================================ %
-
- % We represent a stream by an executable dictionary that contains,
- % in addition to the contents of the original stream dictionary:
- % /File - the file or string where the stream contents are stored,
- % if the stream is not an external one.
- % /FilePosition - iff File is a file, the position in the file
- % where the contents start.
- % /StreamKey - the key used to decrypt this stream, if any.
- % We do the real work of constructing the data stream only when the
- % contents are needed.
-
- % Construct a stream. The length is not reliable in the face of
- % different end-of-line conventions, but it's all we've got.
- %
- % PDF files are inconsistent about what may fall between the 'stream' keyword
- % and the actual stream data, and it appears that no one algorithm can
- % detect this reliably. We used to try to guess whether the file included
- % extraneous \r and/or \n characters, but we no longer attempt to do so,
- % especially since the PDF 1.2 specification states flatly that the only
- % legal terminators following the 'stream' keyword are \n or \r\n, both of
- % which are properly skipped and discarded by the token operator.
- % Unfortunately, this doesn't account for other whitespace characters that
- % may have preceded the EOL, such as spaces or tabs. Thus we back up one
- % character and scan until we find the \n terminator.
- /stream { % <dict> stream <modified_dict>
- dup /Length oget 0 eq {
- dup /Filter undef % don't confuse any filters that require data
- } if
- dup /F known dup PDFsource PDFfile eq or {
- not {
- dup /File PDFfile put
- % make sure that we are just past the EOL \n character
- PDFfile dup fileposition 1 sub setfileposition % back up one
-
- PDFfile read pop
- dup 13 eq {
- % If there had been a \n, token would have advanced over it
- % thus, if the terminator was \r, we have a format error!
- ( **** Warning: stream operator not terminated by valid EOL.\n) pdfformaterror
- pop % fileposition is OK (just past the \r).
- } {
- % Otherwise, scan past \n
- { 10 eq { exit } if
- PDFfile read pop
- } loop
- } ifelse
- dup /FilePosition PDFfile fileposition put
- PDFDEBUG {
- PDFSTEPcount 1 le {
- (%FilePosition: ) print dup /FilePosition get ==
- } if
- } if
- } if
- % Some (bad) PDf files have invalid stream lengths. This causes problems
- % if we reposition beyond the end of the file. So we compare the given
- % length to number of bytes left in the file.
- dup /Length oget
- dup PDFfile bytesavailable lt { % compare to to bytes left in file
- PDFfile fileposition % reposition to the end of stream
- add PDFfile exch setfileposition
- } {
- pop % bad stream length - do not reposition.
- % This will force a length warning below
- } ifelse
- } {
- pop
- % We're already reading from a stream, which we can't reposition.
- % Capture the sub-stream contents in a string.
- dup /Length oget string PDFsource exch readstring
- not {
- ( **** Warning: Unexpected EOF in stream!\n) pdfformaterror
- /stream cvx /rangecheck signalerror
- } if
- 1 index exch /File exch put
- } ifelse
- PDFsource token_nofail not { //null } if
- dup /endobj eq {
- % Another case that Acrobat Reader handles -- 'endobj' without 'endstream'.
- ( **** Warning: stream missing 'endstream'.\n) pdfformaterror
- pop /endstream % fake a valid endstream
- } if
- /endstream ne {
- ( **** Warning: stream Length incorrect.\n) pdfformaterror
- dup /Length undef % prevent the use of the incorrect length.
- cvx endobj exit % exit from .pdfrun now.
- } {
- PDFsource (??) .peekstring pop (>>) eq { % Bug 690161, sample #1
- ( **** Warning: Spurious '>>' after 'endstream' ignored.\n) pdfformaterror
- PDFsource (12) readstring pop pop
- } if
- } ifelse
- cvx
- } bind def
- /endstream {
- exit
- } bind def
-
- % Contrary to the published PDF (1.3) specification, Acrobat Reader
- % accepts abbreviated filter names everywhere, not just for in-line images,
- % and some applications (notably htmldoc) rely on this.
- /unabbrevfilterdict mark
- /AHx /ASCIIHexDecode /A85 /ASCII85Decode /CCF /CCITTFaxDecode
- /DCT /DCTDecode /Fl /FlateDecode /LZW /LZWDecode /RL /RunLengthDecode
- .dicttomark readonly def
-
- % Extract and apply filters.
- /filterparms { % <dict> <DPkey> <Fkey> filterparms
- % <dict> <parms> <filternames>
- 2 index exch knownoget {
- oforce_recursive
- exch 2 index exch knownoget {
- % Both filters and parameters.
- oforce_recursive
- exch dup type /nametype eq {
- 1 array astore exch
- dup type /arraytype ne { 1 array astore } if exch
- } if
- } {
- % Filters, but no parameters.
- //null exch
- dup type /nametype eq { 1 array astore } if
- } ifelse
- } {
- % No filters: ignore parameters, if any.
- pop //null { }
- } ifelse
- } bind def
- /filtername { % <filtername> filtername <filtername'>
- //unabbrevfilterdict 1 index .knownget { exch pop } if
- dup /Filter resourcestatus { pop pop } {
- Repaired exch % this error is not the creator's fault
- ( **** ERROR: Unable to process ) pdfformaterror
- 64 string cvs pdfformaterror
- ( data. Page will be missing data.\n) pdfformaterror
- /Repaired exch store % restore the previous "Repaired" state
- % provide a filter that returns EOF (no data)
- /.EOFDecode
- } ifelse
- } bind def
-
- /pdf_rules_dict << /PDFRules //true >> readonly def
-
- % Add PDF option to ASCII85Decode filter
- % <source> <name> add_A85_param <source> <dict'> <name>
- % <source> <dict> <name> add_A85_param <source> <dict'> <name>
- /add_A85_param {
- dup /ASCII85Decode eq {
- 1 index type /dicttype eq {
- 3 -1 roll dup length 1 add dict copy
- dup /PDFRules //true put
- 3 1 roll
- } {
- //pdf_rules_dict exch
- } ifelse
- } if
- } bind def
-
- currentdict /pdf_rules_dict undef
-
- /applyfilters { % <parms> <source> <filternames> applyfilters <stream>
- 2 index //null eq {
- { filtername add_A85_param filter }
- } {
- { % Stack: parms source filtername
- 2 index 0 oget dup //null eq { pop } {
- exch filtername dup /JBIG2Decode eq { exch jbig2cachectx exch } if
- } ifelse add_A85_param filter
- exch dup length 1 sub 1 exch getinterval exch
- }
- } ifelse forall exch pop
- } bind def
-
- % JBIG2 streams have an optional 'globals' stream obj for
- % sharing redundant data between page images. Here we resolve
- % that stream reference (if any) and run it through the decoder,
- % creating a special -jbig2globalctx- postscript object our
- % JBIG2Decode filter implementation looks for in the parm dict.
- /jbig2cachectx { % <parmdict> jbig2cachectx <parmdict>
- dup /JBIG2Globals knownoget {
- % make global ctx
- PDFfile fileposition exch % resolvestream is not reentrant
- true resolvestream % stack after: PDFfileposition -file-
- % Read the data in a loop until EOF to so we can move the strings into a bytestring
- [ { counttomark 1 add index 60000 string readstring not { exit } if } loop ]
- exch pop 0 1 index { length add } forall % compute the total length
- % now copy the data from the array of strings into a bytestring
- .bytestring exch 0 exch { 3 copy putinterval length add } forall pop
- .jbig2makeglobalctx
- PDFfile 3 -1 roll setfileposition
- 1 index exch
- /.jbig2globalctx exch put
- } if
- } bind def
-
- % When used with a PDF image dict, the JPXDecode filter needs to know
- % about any ColorSpace entries, since this overrides whatever is in
- % the image stream itself. We therefore propagate any such key into
- % a filter's DecodeParms.
- /jpxparmfix { % <streamdict> <readdata?> jpxparmfix <streamdict <readdata?>
- 1 index /Filter .knownget
- { /JPXDecode eq % we only need to do this for JPXDecode filters
- % TODO: handle filter arrays
- {
- 1 index /ColorSpace knownoget {
- 2 index /DecodeParms knownoget {
- % insert in the existing DecodeParms dict
- /ColorSpace 3 -1 roll put
- }{
- 1 dict % need to create a custom DecodeParms dict
- dup /ColorSpace 4 -1 roll put
- 2 index exch /DecodeParms exch put
- } ifelse
- } if
- } if
- } if
- } bind def
-
- % Resolve a stream dictionary to a PostScript stream.
- % Streams with no filters require special handling:
- % - Whether we are going to interpret the stream, or If we are just
- % going to read data from them, we impose a SubFileDecode filter
- % that reads just the requisite amount of data.
- % Note that, in general, resolving a stream repositions PDFfile.
- % Clients must save and restore the position of PDFfile themselves.
- /resolvestream { % <streamdict> <readdata?> resolvestream <stream>
- jpxparmfix
- 1 index /F knownoget {
- % This stream is stored on an external file.
- (r) file 3 -1 roll
- /FDecodeParms /FFilter filterparms
- % Stack: readdata? file dict parms filternames
- 4 -1 roll exch
- pdf_decrypt_stream
- applyfilters
- } {
- exch
- dup /Length knownoget { 0 eq } { //false } ifelse {
- () 0 () /SubFileDecode filter
- } {
- dup /FilePosition .knownget {
- 1 index /File get exch setfileposition
- } if
- % Stack: readdata? dict
- /DecodeParms /Filter filterparms
- % Stack: readdata? dict parms filternames
- 2 index /File get exch
- % Stack: readdata? dict parms file/string filternames
- dup length 0 eq {
- % All the PDF filters have EOD markers, but in this case
- % there is no specified filter.
- exch dup type /filetype eq 5 index or {
- % Use length for any files or reading data from any source.
- 3 index /Length knownoget not { 0 } if
- } {
- 0 % Otherwise length of 0 for whole string
- } ifelse
- 4 index /IDFlag known { pop } { () /SubFileDecode filter } ifelse
- exch
- pdf_decrypt_stream % add decryption if needed
- pop exch pop
- } {
- % Stack: readdata? dict parms source filternames
- exch 3 index /Length knownoget {
- () /SubFileDecode filter
- } if exch
- pdf_decrypt_stream % add decryption if needed
- applyfilters
- } ifelse
- } ifelse
- } ifelse
- % Stack: readdata? dict file
- exch pop exch pop
- } bind def
-
- % ============================ Name/number trees ============================ %
-
- /nameoget { % <nametree> <key> nameoget <obj|null>
- exch /Names exch .treeget
- } bind def
-
- /numoget { % <numtree> <key> numoget <obj|null>
- exch /Nums exch .treeget
- } bind def
-
- /.treeget { % <key> <leafkey> <tree> .treeget <obj|null>
- dup /Kids knownoget {
- exch pop .branchget
- } {
- exch get .leafget
- } ifelse
- } bind def
-
- /.branchget { % <key> <leafkey> <kids> .branchget <obj|null>
- dup length 0 eq {
- pop pop pop null
- } {
- dup length -1 bitshift 2 copy oget
- % Stack: key leafkey kids mid kids[mid]
- dup /Limits oget aload pop
- % Stack: key leafkey kids mid kids[mid] min max
- 6 index lt {
- pop pop
- 1 add 1 index length 1 index sub getinterval .branchget
- } {
- 5 index gt {
- pop
- 0 exch getinterval .branchget
- } {
- exch pop exch pop .treeget
- } ifelse
- } ifelse
- } ifelse
- } bind def
-
- /.leafget { % <key> <pairs> .leafget <obj|null>
- dup length 2 eq {
- dup 0 get 2 index eq { 1 oget } { pop null } ifelse
- exch pop
- } {
- dup length -1 bitshift -2 and 2 copy oget
- % Stack: key pairs mid pairs[mid]
- 3 index gt { 0 exch } { 1 index length 1 index sub } ifelse
- getinterval .leafget
- } ifelse
- } bind def
-
- end % pdfdict
- .setglobal
|