1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
|
% Copyright (C) 1994-2006 Artifex Software, Inc. All rights reserved.
%
% This software is provided AS-IS with no warranty, either express or
% implied.
%
% This software is distributed under license and may not be copied,
% modified or distributed except as expressly authorized under the terms
% of the license contained in the file LICENSE in this distribution.
%
% For more information about licensing, please refer to
% http://www.ghostscript.com/licensing/. For information on
% commercial licensing, go to http://www.artifex.com/licensing/ or
% contact Artifex Software, Inc., 101 Lucas Valley Road #110,
% San Rafael, CA 94903, U.S.A., +1(415)492-9861.
% $Id$
% pdf_base.ps
% Basic parser for PDF reader.
% This handles basic parsing of the file (including the trailer
% and cross-reference table), as well as objects, object references,
% streams, and name/number trees; it doesn't include any facilities for
% making marks on the page.
/.setlanguagelevel where { pop 2 .setlanguagelevel } if
.currentglobal true .setglobal
/pdfdict where { pop } { /pdfdict 100 dict def } ifelse
pdfdict begin
% Define the name interpretation dictionary for reading values.
/valueopdict mark
(<<) cvn { mark } bind % don't push an actual mark!
(>>) cvn { { .dicttomark } stopped {
( **** File has an unbalanced >> \(close dictionary\).\n)
pdfformaterror
} if
} bind
([) cvn { mark } bind % ditto
(]) cvn dup load
% /true true % see .pdfexectoken below
% /false false % ibid.
% /null null % ibid.
/F dup cvx % see Objects section below
/R dup cvx % see Objects section below
/stream dup cvx % see Streams section below
.dicttomark readonly def
% ------ Utilities ------ %
% Define a scratch string. The PDF language definition says that
% no line in a PDF file can exceed 255 characters, but this string
% is also used to search for %PDF-, which needs 1024 characters.
/pdfstring 1024 string def
% Read the previous line of a file. If we aren't at a line boundary,
% read the line containing the current position.
% Skip any blank lines.
/prevline % - prevline <startpos> <substring>
{ PDFfile fileposition dup () pdfstring
2 index 257 sub 0 .max PDFfile exch setfileposition
{ % Stack: initpos linepos line string
PDFfile fileposition
PDFfile 2 index readline pop
dup length 0 gt
{ 3 2 roll 5 -2 roll pop pop 2 index }
{ pop }
ifelse
% Stack: initpos linepos line string startpos
PDFfile fileposition 5 index ge { exit } if
pop
}
loop pop pop 3 -1 roll pop
} bind def
% Handle the PDF 1.2 #nn escape convention when reading from a file.
% This should eventually be done in C.
/.pdffixname { % <execname> .pdffixname <execname'>
PDFversion 1.2 ge {
dup .namestring (#) search {
name#escape cvn exch pop
} {
pop
} ifelse
} if
} bind def
/name#escape % <post> <(#)> <pre> name#escape <string>
{ exch pop
1 index 2 () /SubFileDecode filter dup (x) readhexstring
% Stack: post pre stream char t/f
not { % tolerate, but complain about bad syntax
pop closefile (#) concatstrings exch
( **** Warning: Invalid hex following '#' name escape, using literal '#' in name.\n)
pdfformaterror
} {
exch closefile concatstrings
exch 2 1 index length 2 sub getinterval
} ifelse
(#) search { name#escape } if concatstrings
} bind def
% Execute a file, interpreting its executable names in a given
% dictionary. The name procedures may do whatever they want
% to the operand stack.
/.pdftokenerror { % <count> <opdict> <errtoken> .pdftokenerror -
BXlevel 0 le {
( **** Unknown operator: ') pdfformaterror
dup =string cvs pdfformaterror
% Attempt a retry scan of the element after changing to PDFScanInvNum
<< /PDFScanInvNum true >> setuserparams
=string cvs
token pop exch pop dup type
dup /integertype eq exch /realtype eq or {
exch pop exch pop
(', processed as number, value: ) pdfformaterror
dup =string cvs pdfformaterror (\n) pdfformaterror
<< /PDFScanInvNum null >> setuserparams % reset to default scanning rules
false % suppress any stack cleanup
} {
% error was non-recoverable with modified scanning rules
('\n) pdfformaterror
true
} ifelse
} {
true
} ifelse
{ % clean up the operand stack if this was non-recoverable
pop pop count exch sub { pop } repeat % pop all the operands
} if
} bind def
/.pdfexectoken { % <count> <opdict> <exectoken> .pdfexectoken ?
PDFDEBUG {
pdfdict /PDFSTEPcount known not { pdfdict /PDFSTEPcount 1 .forceput } if
PDFSTEP {
pdfdict /PDFtokencount 2 copy .knownget { 1 add } { 1 } ifelse .forceput
PDFSTEPcount 1 gt {
pdfdict /PDFSTEPcount PDFSTEPcount 1 sub .forceput
} {
dup ==only
( step # ) print PDFtokencount =only
( ? ) print flush 1 false .outputpage
(%stdin) (r) file 255 string readline {
token {
exch pop pdfdict /PDFSTEPcount 3 -1 roll .forceput
} {
pdfdict /PDFSTEPcount 1 .forceput
} ifelse % token
} {
pop /PDFSTEP false def % EOF on stdin
} ifelse % readline
} ifelse % PDFSTEPcount > 1
} {
dup ==only () = flush
} ifelse % PDFSTEP
} if % PDFDEBUG
2 copy .knownget {
exch pop exch pop exch pop exec
} {
% Normally, true, false, and null would appear in opdict
% and be treated as "operators". However, there is a
% special fast case in the PostScript interpreter for names
% that are defined in, and only in, systemdict and/or
% userdict: putting these three names in the PDF dictionaries
% destroys this property for them, slowing down their
% interpretation in all PostScript code. Therefore, we
% check for them explicitly here instead.
dup dup dup /true eq exch /false eq or exch /null eq or {
exch pop exch pop //systemdict exch get
} {
% Hackish fix to detect missing whitespace after "endobj". Yet another
% problem that (you guessed it!) Adobe Acrobat ignores silently
256 string cvs (endobj) anchorsearch {
( **** Missing whitespace after 'endobj'.\n) pdfformaterror
exch pop cvn get exch pop exec
} {
.pdftokenerror
} ifelse
} ifelse
} ifelse
} bind def
/PDFScanRules_true << /PDFScanRules true >> def
/PDFScanRules_null << /PDFScanRules null >> def
/.pdfrun { % <file> <opdict> .pdfrun -
% Construct a procedure with the stack depth, file and opdict
% bound into it.
1 index cvlit count 2 sub 3 1 roll mark
/PDFScanRules .getuserparam //null eq {
//PDFScanRules_true { setuserparams } 0 get % force PDF scanning mode
mark 7 4 roll
} {
mark 5 2 roll
} ifelse
{ % Stack: ..operands.. count opdict file
{ token } stopped {
dup type /filetype eq { pop } if % pop the operand if it is restored
( **** Error reading a content stream. The page may be incomplete.\n)
pdfformaterror
//false
} if {
dup type /nametype eq {
dup xcheck {
.pdfexectoken
} {
.pdffixname
exch pop exch pop PDFDEBUG {
PDFSTEPcount 1 le {
dup ==only ( ) print flush
} if
} if
} ifelse
} {
exch pop exch pop PDFDEBUG {
PDFSTEPcount 1 le {
dup ==only ( ) print flush
} if
} if
} ifelse
} {
(%%EOF) cvn cvx .pdfexectoken
} ifelse
}
aload pop .packtomark cvx
{ loop } 0 get 2 packedarray cvx
{ stopped } 0 get
/PDFScanRules .getuserparam //null eq {
//PDFScanRules_null { setuserparams } 0 get % reset PDF scannig mode if it was off
} if
/PDFsource PDFsource
{ store { stop } if } aload pop .packtomark cvx
/PDFsource 3 -1 roll store exec
} bind def
% Execute a file, like .pdfrun, for a marking context.
% This temporarily rebinds LocalResources and DefaultQstate.
/.pdfruncontext { % <resdict> <file> <opdict> .pdfruncontext -
/.pdfrun load LocalResources DefaultQstate
/LocalResources 7 -1 roll store
/DefaultQstate qstate store
3 .execn
/DefaultQstate exch store
/LocalResources exch store
} bind def
% Get the depth of the PDF operand stack. The caller sets pdfemptycount
% before calling .pdfrun or .pdfruncontext. It is initially set by
% pdf_main, and is also set by any routine which changes the operand
% stack depth (currently .pdfpaintproc, although there are other callers
% of .pdfrun{context} which have not been checked for opstack depth.
/.pdfcount { % - .pdfcount <count>
count pdfemptycount sub
} bind def
% Read a token, but simply return false (no token read) in the case of an
% error. This is messy because 'token' either may or may not pop its operand
% if an error occurs, and because the return values are different depending
% on whether the source is a file or a string. To avoid closing the file
% check for '{' before trying 'token'.
/token_nofail_dict mark
( ) { dup ( ) readstring pop pop } bind
(\t) 1 index
(\r) 1 index
(\n) 1 index
(\000) 1 index
({) { //null //true exit } bind
.dicttomark def
/token_nofail { % <file|string> token_nofail false
% <file> token_nofail <token> true
% <string> token_nofail <post> <token> true
dup type /filetype eq {
{ dup ( ) .peekstring not { ({) } if
//token_nofail_dict exch .knownget not {
//null 1 index { token } .internalstopped exit
} if
exec
} loop
{ % stack: source null [source]
//null ne { pop } if pop //false
} { % stack: source null ([post] token true | false)
{ 3 1 roll pop pop //true }
{ pop pop //false }
ifelse
} ifelse
} {
//null 1 index % stack: source null source
{ token } .internalstopped { % stack: source null [source]
//null ne { pop } if pop //false
} { % stack: source null ([post] token true | false)
{ 4 2 roll pop pop //true }
{ pop pop //false }
ifelse
} ifelse
} ifelse
} bind def
currentdict /token_nofail_dict .undef
% ================================ Objects ================================ %
% Since we may have more than 64K objects, we have to use a 2-D array to
% hold them (and the parallel Generations structure).
/lshift 9 def
/lnshift lshift neg def
/lsubmask 1 lshift bitshift 1 sub def
/lsublen lsubmask 1 add def
/larray { % - larray <larray>
[ [] ]
} bind def
/lstring { % - lstring <lstring>
[ () ]
} bind def
/ltype { % <lseq> type <type>
0 get type
} bind def
/lget { % <lseq> <index> lget <value>
dup //lsubmask and 3 1 roll //lnshift bitshift get exch get
} bind def
/lput { % <lseq> <index> <value> lput -
3 1 roll
dup //lsubmask and 4 1 roll //lnshift bitshift get
3 1 roll put
} bind def
/llength { % <lseq> llength <length>
dup length 1 sub dup //lshift bitshift
3 1 roll get length add
} bind def
% lgrowto assumes newlength > llength(lseq)
/growto { % <string/array> <length> growto <string'/array'>
1 index type /stringtype eq { string } { array } ifelse
2 copy copy pop exch pop
} bind def
/lgrowto { % <lseq> <newlength> lgrowto <lseq'>
dup //lsubmask add //lnshift bitshift dup 3 index length gt {
% Add more sub-arrays. Start by completing the last existing one.
% Stack: lseq newlen newtoplen
3 -1 roll dup llength 1 sub //lsubmask or 1 add lgrowto
% Stack: newlen newtoplen lseq
[ exch aload pop
counttomark 2 add -1 roll % newtoplen
counttomark sub { dup 0 0 getinterval lsublen growto } repeat
dup 0 0 getinterval ] exch
} {
pop
} ifelse
% Expand the last sub-array.
1 sub //lsubmask and 1 add
exch dup dup length 1 sub 2 copy
% Stack: newsublen lseq lseq len-1 lseq len-1
get 5 -1 roll growto put
} bind def
/lforall { % <lseq> <proc> lforall -
/forall cvx 2 packedarray cvx forall
} bind def
% We keep track of PDF objects using the following PostScript variables:
%
% Generations (lstring): Generations[N] holds 1+ the current
% generation number for object number N. (As far as we can tell,
% this is needed only for error checking.) For free objects,
% Generations[N] is 0.
%
% Objects (larray): If object N is loaded, Objects[N] is the actual
% object; otherwise, Objects[N] is an executable integer giving
% the file offset of the object's location in the file. If
% ObjectStream[N] is non-zero then Objects[N] contains the index
% into the object stream instead of the file offset of the object.
%
% ObjectStream (larray): If object N is in an object stream then
% ObjectStream[N] holds the object number of the object stream.
% Otherwise ObjectStream[N] contains 0. If ObjectStream[N]
% is non-zero then Objects[N] contains the index into the object
% stream.
%
% GlobalObjects (dictionary): If object N has been resolved in
% global VM, GlobalObjects[N] is the same as Objects[N]
% (except that GlobalObjects itself is stored in global VM,
% so the entry will not be deleted at the end of the page).
%
% IsGlobal (lstring): IsGlobal[N] = 1 iff object N was resolved in
% global VM. This is an accelerator to avoid having to do a
% dictionary lookup in GlobalObjects when resolving every object.
% Initialize the PDF object tables.
/initPDFobjects { % - initPDFobjects -
/ObjectStream larray def
/Objects larray def
/Generations lstring def
.currentglobal true .setglobal
/GlobalObjects 20 dict def
.setglobal
/IsGlobal lstring def
} bind def
% Grow the tables to a specified size.
/growPDFobjects { % <minsize> growPDFobjects -
dup ObjectStream llength gt {
dup ObjectStream exch lgrowto /ObjectStream exch def
} if
dup Objects llength gt {
dup Objects exch lgrowto /Objects exch def
} if
dup Generations llength gt {
dup Generations exch lgrowto /Generations exch def
} if
dup IsGlobal llength gt {
dup IsGlobal exch lgrowto /IsGlobal exch def
} if
pop
} bind def
% We represent an unresolved object reference by a procedure of the form
% {obj# gen# resolveR}. This is not a possible PDF object, because PDF has
% no way to represent procedures. Since PDF in fact has no way to represent
% any PostScript object that doesn't evaluate to itself, we can 'force'
% a possibly indirect object painlessly with 'exec'.
% Note that since we represent streams by executable dictionaries
% (see below), we need both an xcheck and a type check to determine
% whether an object has been resolved.
/resolved? { % <object#> resolved? <value> true
% <object#> resolved? false
Objects 1 index lget dup xcheck { % Check if executable
dup type /integertype eq { % Check if an integer
% Check whether the object is in GlobalObjects.
pop IsGlobal 1 index lget 0 eq { % 0 --> Not in GlabalObjects
pop false % The object is not resolved
} { % The object is in GlobalObjects
% Update Objects from GlobalObjects
PDFDEBUG { (%Global=>local: ) print dup == } if
GlobalObjects 1 index get dup Objects 4 1 roll lput true
} ifelse
} { % Else object is executable but not integer
exch pop true % Therefore must be executable dict. (stream)
} ifelse
} { % Else object is not executable.
exch pop true % Therefore it must have been resolved.
} ifelse
} bind def
/oforce /exec load def
/oget { % <array> <index> oget <object>
% <dict> <key> oget <object>
% Before release 6.20, this procedure stored the resolved
% object back into the referring slot. In order to support
% PDF linearization, we no longer do this.
get oforce
} bind def
/oforce_array { % <array> oforce_array <array>
[ exch { oforce } forall ]
} bind def
/oforce_elems { % <array> oforce_elems <first> ... <last>
{ oforce } forall
} bind def
/oforce_recursive { % <any> oforce_recursive <any>
oforce dup type dup /arraytype eq {
pop [ exch { oforce_recursive } forall ]
} {
/dicttype eq {
<< exch { oforce_recursive exch oforce exch } forall >>
} if
} ifelse
} bind def
% A null value in a dictionary is equivalent to an omitted key;
% we must check for this specially.
/knownoget { % <dict> <key> knownoget <value> true
% <dict> <key> knownoget false
% See oget above regarding this procedure.
.knownget {
oforce dup //null eq { pop //false } { //true } ifelse
} {
//false
} ifelse
} bind def
/knownogetdict { % <dict> <key> knownogetdict <dict> true
% <dict> <key> knownogetdict false
//knownoget exec dup {
1 index type /dicttype ne { pop pop //false } if
} if
} bind def
% PDF 1.1 defines a 'foreign file reference', but not its meaning.
% Per the specification, we convert these to nulls.
/F { % <file#> <object#> <generation#> F <object>
% Some PDF 1.1 files use F as a synonym for f!
.pdfcount 3 lt { f } { pop pop pop null } ifelse
} bind def
% Verify the generation number for a specified object
% Note: The values in Generations is the generation number plus 1.
% If the value in Generations is zero then the object is free.
/checkgeneration { % <object#> <generation#> checkgeneration <object#> <OK>
Generations 2 index lget 1 sub 1 index eq { % If generation # match ...
pop true % Then return true
} { % Else not a match ...
QUIET not { % Create warning message if not QUIET
Generations 2 index lget 0 eq { % Check if object is free ...
( **** Warning: reference to free object: )
2 index =string cvs concatstrings ( ) concatstrings % put obj #
exch =string cvs concatstrings ( R\n) concatstrings % put gen #
} {
( **** Warning: wrong generation: )
2 index =string cvs concatstrings ( ) concatstrings % put obj #
exch =string cvs concatstrings % put gen #
(, xref gen#: ) concatstrings 1 index Generations % put xref gen #
exch lget 1 sub =string cvs concatstrings (\n) concatstrings
} ifelse
pdfformaterror % Output warning message
} { % Else QUIET ...
pop % Pop generation number
} ifelse
% We should return false for an incorrect generation number, however
% we are simply printing a warning and then returning true. This makes
% Ghostscript tolerant of of bad generation numbers.
true
} ifelse
} bind def
/R { % <object#> <generation#> R <object>
/resolveR cvx 3 packedarray cvx
} bind def
% If we encounter an object definition while reading sequentially,
% we just store it away and keep going.
/objopdict mark
valueopdict { } forall
/endobj dup cvx
.dicttomark readonly def
/obj { % <object#> <generation#> obj <object>
PDFfile objopdict .pdfrun
} bind def
/endobj { % <object#> <generation#> <object> endobj <object>
3 1 roll
% Read the xref entry if we haven't yet done so.
% This is only needed for generation # checking.
1 index resolved? {
pop
} if
checkgeneration {
% The only global objects we bother to save are
% (resource) dictionaries.
1 index dup gcheck exch type /dicttype eq and {
PDFDEBUG { (%Local=>global: ) print dup == } if
GlobalObjects 1 index 3 index put
IsGlobal 1 index 1 put
} if
Objects exch 2 index lput
} {
pop pop null
} ifelse
} bind def
% When resolving an object reference in an object stream, we stop at
% the end of file. Note: Objects in an object stream do not have either
% a starting 'obj' or and ending 'endobj'.
/resolveobjstreamopdict mark
valueopdict { } forall
(%%EOF) cvn { exit } bind
/endobj { % bug 689795
( **** Warning: Objects in an object stream should not have 'endobj'.\n)
pdfformaterror
} bind
.dicttomark readonly def
% Note: This version of this function is not currently being used.
% Resolve all objects in an object stream
/resolveobjectstream { % <object stream #> resolveobjectstream -
PDFDEBUG { (%Resolving object stream: ) print } if
0 resolveR % Get the objectstream dict, all objstrms use 0 as the gen #
dup /First get % Save location of first object onto the stack
1 index /N get % Save number of objects onto the stack
2 index false resolvestream % Convert stream dict into a stream
/ReusableStreamDecode filter % We need to be able to position stream
% Objectstreams begin with list of object numbers and locations
% Create two arrays to hold object numbers and stream location
1 index array % Array for holding object number
2 index array % Array for holding stream object location
% Get the object numbers and locations.
0 1 5 index 1 sub { % Loop and collect obj # and locations
% Stack: objstreamdict First N objectstream [obj#] [loc] index
2 index 1 index % Setup to put obj# into object number array
5 index token pop put % Get stream, then get obj# and put into array
1 index 1 index % Setup to put object loc into location array
5 index token pop put % Get stream, get obj loc and put into array
pop % Remove loop index
} for
% Create a bytestring big enough for reading any object data
% Scan for the size of the largest object
0 0 % Init max object size and previous location
2 index { % Loop through all object locations
% Stack: ... maxsize prevloc currentloc
dup 4 1 roll % Save copy of object location into stack
exch sub % Object size = currentloc - prevloc
.max % Determine maximum object size
exch % Put max size under previous location
} forall
pop % Remove previous location
.bigstring % Create bytestring based upon max obj size
% Move to the start of the object data
3 index 6 index % Get objectstream and start of first object
setfileposition % Move to the start of the data
% Read the data for all objects except the last. We do
% not know the size of the last object so we need to treat
% it as a special case.
0 1 6 index 2 sub {
dup 4 index exch get % Get our current object number
% Stack: objstreamdict First N objectstream [obj#] [loc]
% bytestring loopindex object#
dup resolved? { % If we already have this object
(yyy) = pstack (yyy) = flush xxx
pop pop % Remove object and object number
1 add 2 index exch get % Get location of next object
6 index add 6 index exch % Form location of next object and get stream
setfileposition % Move to the start of the next object data
} { % Else this is a new object ...
% We are going to create a string for reading the object
2 index 0 % use our working string
% Determine the size of the object
5 index 4 index 1 add get % Get location of the next object
6 index 5 index get % Get location of this object
sub % Size of object = next loc - this loc
getinterval % Create string for reading object
6 index exch readstring pop % Read object
/ReusableStreamDecode filter % Convert string into a stream
resolveobjstreamopdict .pdfrun % Get PDF object
Objects exch 2 index exch lput % Put object into Objects array
pop pop % Remove object # and loop index
} ifelse
} for
pop pop % Remove our working string and loc array
% Now read the last object in the object stream. Since it
% is the last object, we can use the original stream and
% terminate when we hit the end of the stream
% Stack: objstreamdict First N objectstream [obj#]
2 index 1 sub get % Get our current object number
dup resolved? not { % If we do not already have this object
exch % Get our object stream
resolveobjstreamopdict .pdfrun % Get PDF object
Objects exch 2 index exch lput % Put object into Objects array
} if
pop pop pop pop % Clear stack
} bind def
% Resolve all objects in an object stream
/resolveobjectstream { % <object stream #> resolveobjectstream -
PDFDEBUG { (%Resolving object stream: ) print } if
0 resolveR % Get the objectstream dict, all objstrms use 0 as the gen #
dup /Type get /ObjStm ne { % Verify type is object stream
( **** Incorrect Type in object stream dictionary.\n) pdfformaterror
/resolveobjectstream cvx /typecheck signalerror
} if
dup /N get % Save number of objects onto the stack
1 index false resolvestream % Convert stream dict into a stream
/ReusableStreamDecode filter % We need to be able to position stream
% Objectstreams begin with list of object numbers and locations
1 index array % Create array for holding object number
% Get the object numbers
0 1 4 index 1 sub { % Loop and collect obj numbers
% Stack: objstreamdict N PDFDEBUG objectstream [obj#] loopindex
1 index 1 index % Setup to put obj# into object number array
4 index token pop put % Get stream, then get obj# and put into array
2 index token pop pop pop % Get stream, get obj loc and clear stack
} for
% Move to the start of the object data
1 index 4 index /First get % Get objectstream and start of first object
setfileposition % Move to the start of the data
% We disable PDFDEBUG while reading the data stream. We will
% print the data later
PDFDEBUG /PDFDEBUG false def % Save PDFDEBUG and disable it while reading
% Read the data for all objects. We check to see if we get
% the number of objects that we expect.
% Stack: objstreamdict N objectstream [obj#] PDFDEBUG
mark 4 -1 roll % Get objectstream
count 5 index add % Determine stack depth with objects
/PDFObjectStkCount exch def
resolveobjstreamopdict .pdfrun % Get PDF objects
PDFObjectStkCount count ne { % Check stack depth
( **** Incorrect object count in object stream.\n) pdfformaterror
/resolveobjectstream cvx /rangecheck signalerror
} if
% We have the object data
counttomark array astore % Put objects into an array
exch pop % Remove mark
exch /PDFDEBUG exch def % Restore PDFDEBUG flag
% Save the objects into Objects
0 1 2 index length 1 sub { % Loop through all objects
% Stack: objstreamdict N [obj#] [objects] loopindex
dup 3 index exch get % Get our current object number
dup resolved? { % If we already have this object
pop pop % Remove object and object number
} { % Else if we do not have this object
PDFDEBUG { (%Resolving compressed object: [) print dup =only ( 0]) = } if
Objects exch 3 index % Put the object into Objects
3 index get
PDFDEBUG { dup === flush } if
lput
} ifelse
pop % Remove loop index
} for
pop pop pop pop % Remove objstream, N, (obj#], and [objects]
} bind def
% When resolving an object reference, we stop at the endobj or endstream.
/resolveopdict mark
valueopdict { } forall
/endstream { endobj exit } bind
/endobj { endobj exit } bind
/endjobj { % Bug 689876.
( **** Operator 'endobj' is misspelled as 'endjobj'.\n) pdfformaterror
endobj exit
} bind
% OmniForm generates PDF file with endobj missing in some
% objects. AR ignores this. So we have to do it too.
/obj { pop pop endobj exit } bind
.dicttomark readonly def
/resolveR { % <object#> <generation#> resolveR <object>
PDFDEBUG {
PDFSTEPcount 1 le {
(%Resolving: ) print 2 copy 2 array astore ==
} if
} if
1 index resolved? { % If object has already been resolved ...
exch pop exch pop % then clear stack and return object
} { % Else if not resolved ...
PDFfile fileposition 3 1 roll % Save current file position
1 index Objects exch lget % Get location of object from xref
3 1 roll checkgeneration { % Verify the generation number
% Stack: savepos objpos obj#
ObjectStream 1 index lget dup 0 eq { % Check if obj in not an objstream
pop exch PDFoffset add PDFfile exch setfileposition
PDFfile token pop 2 copy ne
{ ( **** Unrecoverable error in xref!\n) pdfformaterror
/resolveR cvx /rangecheck signalerror
}
if pop PDFfile token pop
PDFfile token pop /obj ne
{ ( **** Unrecoverable error in xref!\n) pdfformaterror
/resolveR cvx /rangecheck signalerror
}
if
pdf_run_resolve % PDFfile resolveopdict .pdfrun
} { % Else the object is in an ObjectStream
% Process an objectstream object. We are going to resolve all
% of the objects in sthe stream and place them into the Objects
% array.
% Stack: savepos objpos obj# objectstream#
resolveobjectstream
resolved? { % If object has already been resolved ...
exch pop % Remove object pos from stack.
} {
pop pop null % Pop objpos and obj#, put null for object
} ifelse
} ifelse
} { % Else the generation number is wrong
% Don't cache if the generation # is wrong.
pop pop null % Pop objpos and obj#, put null for object
} ifelse % ifelse generation number is correct
exch PDFfile exch setfileposition % Return to original file position
} ifelse
} bind def
% ================================ Streams ================================ %
% We represent a stream by an executable dictionary that contains,
% in addition to the contents of the original stream dictionary:
% /File - the file or string where the stream contents are stored,
% if the stream is not an external one.
% /FilePosition - iff File is a file, the position in the file
% where the contents start.
% /StreamKey - the key used to decrypt this stream, if any.
% We do the real work of constructing the data stream only when the
% contents are needed.
% Construct a stream. The length is not reliable in the face of
% different end-of-line conventions, but it's all we've got.
%
% PDF files are inconsistent about what may fall between the 'stream' keyword
% and the actual stream data, and it appears that no one algorithm can
% detect this reliably. We used to try to guess whether the file included
% extraneous \r and/or \n characters, but we no longer attempt to do so,
% especially since the PDF 1.2 specification states flatly that the only
% legal terminators following the 'stream' keyword are \n or \r\n, both of
% which are properly skipped and discarded by the token operator.
% Unfortunately, this doesn't account for other whitespace characters that
% may have preceded the EOL, such as spaces or tabs. Thus we back up one
% character and scan until we find the \n terminator.
/stream { % <dict> stream <modified_dict>
dup /Length oget 0 eq {
dup /Filter undef % don't confuse any filters that require data
} if
dup /F known dup PDFsource PDFfile eq or {
not {
dup /File PDFfile put
% make sure that we are just past the EOL \n character
PDFfile dup fileposition 1 sub setfileposition % back up one
PDFfile read pop
dup 13 eq {
% If there had been a \n, token would have advanced over it
% thus, if the terminator was \r, we have a format error!
( **** Warning: stream operator not terminated by valid EOL.\n) pdfformaterror
pop % fileposition is OK (just past the \r).
} {
% Otherwise, scan past \n
{ 10 eq { exit } if
PDFfile read pop
} loop
} ifelse
dup /FilePosition PDFfile fileposition put
PDFDEBUG {
PDFSTEPcount 1 le {
(%FilePosition: ) print dup /FilePosition get ==
} if
} if
} if
% Some (bad) PDf files have invalid stream lengths. This causes problems
% if we reposition beyond the end of the file. So we compare the given
% length to number of bytes left in the file.
dup /Length oget
dup PDFfile bytesavailable lt { % compare to to bytes left in file
PDFfile fileposition % reposition to the end of stream
add PDFfile exch setfileposition
} {
pop % bad stream length - do not reposition.
% This will force a length warning below
} ifelse
} {
pop
% We're already reading from a stream, which we can't reposition.
% Capture the sub-stream contents in a string.
dup /Length oget string PDFsource exch readstring
not {
( **** Warning: Unexpected EOF in stream!\n) pdfformaterror
/stream cvx /rangecheck signalerror
} if
1 index exch /File exch put
} ifelse
PDFsource token_nofail not { null } if
dup /endobj eq {
% Another case that Acrobat Reader handles -- 'endobj' without 'endstream'.
( **** Warning: stream missing 'endstream'.\n) pdfformaterror
pop /endstream % fake a valid endstream
} if
/endstream ne {
( **** Warning: stream Length incorrect.\n) pdfformaterror
dup /Length undef % prevent the use of the incorrect length.
cvx endobj exit % exit from .pdfrun now.
} if
cvx
} bind def
/endstream {
exit
} bind def
% Contrary to the published PDF (1.3) specification, Acrobat Reader
% accepts abbreviated filter names everywhere, not just for in-line images,
% and some applications (notably htmldoc) rely on this.
/unabbrevfilterdict mark
/AHx /ASCIIHexDecode /A85 /ASCII85Decode /CCF /CCITTFaxDecode
/DCT /DCTDecode /Fl /FlateDecode /LZW /LZWDecode /RL /RunLengthDecode
.dicttomark readonly def
% Extract and apply filters.
/filterparms { % <dict> <DPkey> <Fkey> filterparms
% <dict> <parms> <filternames>
2 index exch knownoget {
oforce_recursive
exch 2 index exch knownoget {
% Both filters and parameters.
oforce_recursive
exch dup type /nametype eq {
1 array astore exch
dup type /arraytype ne { 1 array astore } if exch
} if
} {
% Filters, but no parameters.
//null exch
dup type /nametype eq { 1 array astore } if
} ifelse
} {
% No filters: ignore parameters, if any.
pop //null { }
} ifelse
} bind def
/filtername { % <filtername> filtername <filtername'>
//unabbrevfilterdict 1 index .knownget { exch pop } if
dup /Filter resourcestatus { pop pop } {
Repaired exch % this error is not the creator's fault
( **** ERROR: Unable to process ) pdfformaterror
64 string cvs pdfformaterror
( data. Page will be missing data.\n) pdfformaterror
/Repaired exch store % restore the previous "Repaired" state
% provide a filter that returns EOF (no data)
/.EOFDecode
} ifelse
} bind def
/applyfilters { % <parms> <source> <filternames> applyfilters <stream>
2 index null eq {
{ filtername filter }
} {
{ % Stack: parms source filtername
2 index 0 oget dup null eq { pop } {
exch filtername dup /JBIG2Decode eq { exch jbig2cachectx exch } if
} ifelse filter
exch dup length 1 sub 1 exch getinterval exch
}
} ifelse forall exch pop
} bind def
% JBIG2 streams have an optional 'globals' stream obj for
% sharing redundant data between page images. Here we resolve
% that stream reference (if any) and run it through the decoder,
% creating a special -jbig2globalctx- postscript object our
% JBIG2Decode filter implementation looks for in the parm dict.
/jbig2cachectx { % <parmdict> jbig2cachectx <parmdict>
dup /JBIG2Globals knownoget {
% make global ctx
PDFfile fileposition exch % resolvestream is not reentrant
true resolvestream % stack after: PDFfileposition -file-
% Read the data in a loop until EOF to so we can move the strings into a bytestring
[ { counttomark 1 add index 60000 string readstring not { exit } if } loop ]
exch pop 0 1 index { length add } forall % compute the total length
% now copy the data from the array of strings into a bytestring
.bytestring exch 0 exch { 3 copy putinterval length add } forall pop
.jbig2makeglobalctx
PDFfile 3 -1 roll setfileposition
1 index exch
/.jbig2globalctx exch put
} if
} bind def
% When used with a PDF image dict, the JPXDecode filter needs to know
% about any ColorSpace entries, since this overrides whatever is in
% the image stream itself. We therefore propagate any such key into
% a filter's DecodeParms.
/jpxparmfix { % <streamdict> <readdata?> jpxparmfix <streamdict <readdata?>
1 index /Filter .knownget
{ /JPXDecode eq % we only need to do this for JPXDecode filters
% TODO: handle filter arrays
{
1 index /ColorSpace knownoget {
2 index /DecodeParms knownoget {
% insert in the existing DecodeParms dict
/ColorSpace 3 -1 roll put
}{
1 dict % need to create a custom DecodeParms dict
dup /ColorSpace 4 -1 roll put
2 index exch /DecodeParms exch put
} ifelse
} if
} if
} if
} bind def
% Resolve a stream dictionary to a PostScript stream.
% Streams with no filters require special handling:
% - Whether we are going to interpret the stream, or If we are just
% going to read data from them, we impose a SubFileDecode filter
% that reads just the requisite amount of data.
% Note that, in general, resolving a stream repositions PDFfile.
% Clients must save and restore the position of PDFfile themselves.
/resolvestream { % <streamdict> <readdata?> resolvestream <stream>
jpxparmfix
1 index /F knownoget {
% This stream is stored on an external file.
(r) file 3 -1 roll
/FDecodeParms /FFilter filterparms
% Stack: readdata? file dict parms filternames
4 -1 roll exch
pdf_decrypt_stream
applyfilters
} {
exch
dup /Length knownoget { 0 eq } { //false } ifelse {
() 0 () /SubFileDecode filter
} {
dup /FilePosition .knownget {
1 index /File get exch setfileposition
} if
% Stack: readdata? dict
/DecodeParms /Filter filterparms
% Stack: readdata? dict parms filternames
2 index /File get exch
% Stack: readdata? dict parms file/string filternames
dup length 0 eq {
% All the PDF filters have EOD markers, but in this case
% there is no specified filter.
exch dup type /filetype eq 5 index or {
% Use length for any files or reading data from any source.
3 index /Length knownoget not { 0 } if
} {
0 % Otherwise length of 0 for whole string
} ifelse
4 index /IDFlag known { pop } { () /SubFileDecode filter } ifelse
exch
pdf_decrypt_stream % add decryption if needed
pop exch pop
} {
% Stack: readdata? dict parms source filternames
exch 3 index /Length knownoget {
() /SubFileDecode filter
} if exch
pdf_decrypt_stream % add decryption if needed
applyfilters
} ifelse
} ifelse
} ifelse
% Stack: readdata? dict file
exch pop exch pop
} bind def
% ============================ Name/number trees ============================ %
/nameoget { % <nametree> <key> nameoget <obj|null>
exch /Names exch .treeget
} bind def
/numoget { % <numtree> <key> numoget <obj|null>
exch /Nums exch .treeget
} bind def
/.treeget { % <key> <leafkey> <tree> .treeget <obj|null>
dup /Kids knownoget {
exch pop .branchget
} {
exch get .leafget
} ifelse
} bind def
/.branchget { % <key> <leafkey> <kids> .branchget <obj|null>
dup length 0 eq {
pop pop pop null
} {
dup length -1 bitshift 2 copy oget
% Stack: key leafkey kids mid kids[mid]
dup /Limits oget aload pop
% Stack: key leafkey kids mid kids[mid] min max
6 index lt {
pop pop
1 add 1 index length 1 index sub getinterval .branchget
} {
5 index gt {
pop
0 exch getinterval .branchget
} {
exch pop exch pop .treeget
} ifelse
} ifelse
} ifelse
} bind def
/.leafget { % <key> <pairs> .leafget <obj|null>
dup length 2 eq {
dup 0 get 2 index eq { 1 oget } { pop null } ifelse
exch pop
} {
dup length -1 bitshift -2 and 2 copy oget
% Stack: key pairs mid pairs[mid]
3 index gt { 0 exch } { 1 index length 1 index sub } ifelse
getinterval .leafget
} ifelse
} bind def
end % pdfdict
.setglobal
|