1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
|
/* Copyright (C) 2001-2006 Artifex Software, Inc.
All Rights Reserved.
This software is provided AS-IS with no warranty, either express or
implied.
This software is distributed under license and may not be copied, modified
or distributed except as expressly authorized under the terms of that
license. Refer to licensing information at http://www.artifex.com/
or contact Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134,
San Rafael, CA 94903, U.S.A., +1(415)492-9861, for further information.
*/
/* $Id$ */
/* Internal CMap structure definitions */
/* This file should be called gxcmap.h, except that name is already used. */
#ifndef gxfcmap_INCLUDED
# define gxfcmap_INCLUDED
#include "gsfcmap.h"
#include "gsuid.h"
#include "gxcid.h"
/*
* CMaps are the structures that map (possibly variable-length) characters
* appearing in a text string to glyph numbers in some font-specific space.
* The structure defined here generally follows Adobe's specifications, but
* the actual implementation of the code space and the lookup tables is
* virtual, so that the same interface can be used for direct access to the
* corresponding "cmap" structure in TrueType fonts, rather than having to
* convert that structure to the Adobe-based one.
*/
/*
* A CMap conceptually consists of three parts:
*
* - The code space, used for parsing the input string into (possibly
* variable-length) characters.
*
* - A 'def' map, which maps defined parsed characters to values.
*
* - A 'notdef' map, which maps parsed but undefined characters to
* values.
*
* The value of a character may be a string, a name, or a CID. For more
* information, see the Adobe documentation.
*/
/* ---------------- Code space ranges ---------------- */
/*
* A code space is a non-empty, lexicographically sorted sequence of
* code space ranges. Ranges must not overlap. In each range,
* first[i] <= last[i] for 0 <= i < size.
*/
#define MAX_CMAP_CODE_SIZE 4
typedef struct gx_code_space_range_s {
byte first[MAX_CMAP_CODE_SIZE];
byte last[MAX_CMAP_CODE_SIZE];
int size; /* 1 .. MAX_CMAP_CODE_SIZE */
} gx_code_space_range_t;
/* ---------------- Lookup tables ---------------- */
/*
* A lookup table is a non-empty sequence of lookup ranges. Each range has
* an associated sorted lookup table, indexed by the num_key_bytes low-order
* code bytes. If key_is_range is true, each key is a range (2 x key_size
* bytes); if false, each key is a single code (key_size bytes).
*
* The only difference between CODE_VALUE_CID and CODE_VALUE_NOTDEF is
* that after looking up a CID in a table, for CODE_VALUE_CID the result
* is incremented by the difference between the input code and the key
* (i.e., a single CODE_VALUE_CID entry actually represents a range of
* CIDs), whereas for CODE_VALUE_NOTDEF, the result is not incremented.
* The defined-character map for a CMap uses the former behavior; the
* notdef map uses the latter.
*
* CODE_VALUE_GLYPH and CODE_VALUE_CHARS are reserved for
* rearranged font CMaps, which are not implemented yet.
*/
typedef enum {
CODE_VALUE_CID, /* CIDs */
CODE_VALUE_GLYPH, /* glyphs */
CODE_VALUE_CHARS, /* character(s) */
CODE_VALUE_NOTDEF /* CID - for notdef(char|range) dst */
#define CODE_VALUE_MAX CODE_VALUE_NOTDEF
} gx_cmap_code_value_type_t;
typedef struct gx_cmap_lookup_entry_s {
/* Key */
byte key[2][MAX_CMAP_CODE_SIZE]; /* [key_is_range + 1][key_size] */
int key_size; /* 0 .. MAX_CMAP_CODE_SIZE */
bool key_is_range;
/* Value */
gx_cmap_code_value_type_t value_type;
gs_const_string value;
int font_index; /* for rearranged fonts */
} gx_cmap_lookup_entry_t;
/* ---------------- CMaps proper ---------------- */
/*
* Define the elements common to all CMaps. Currently we include all
* elements from the Adobe specification except for the actual code space
* ranges and lookup tables.
*
* CMapType and id are common to all CMapTypes. We really only support the
* single Adobe standard CMap format. Note that the only documented values
* of CMapType in the PLRM are 0 and 1, which are equivalent; however, in
* the second PDF Reference, the CMapType for the example ToUnicode CMap is
* 2.
*
* glyph_name and glyph_name_data are only used if the CMap has lookup
* entries of type CODE_VALUE_GLYPH. We deliberately chose to make
* glyph_name a function pointer rather than including it in the procs
* virtual functions. The rationale is that the virtual functions are
* dependent on the representation of the CMap, so they should be set by the
* code that must work with this structure. However, glyph_name is not
* dependent on the representation of the CMap: it does not need to know
* anything about how the CMap is stored. Rather, it is meant to be used by
* the client who constructs the CMap, who decides how stored
* CODE_VALUE_GLYPH values correspond to printable glyph names. The same
* glyph_name procedure can, in principle, be used with multiple different
* subclasses of gs_cmap_t.
*/
#ifndef gs_cmap_DEFINED
# define gs_cmap_DEFINED
typedef struct gs_cmap_s gs_cmap_t;
#endif
#define GS_CMAP_COMMON\
int CMapType; /* must be first */\
gs_id id; /* internal ID (no relation to UID) */\
/* End of entries common to all CMapTypes */\
gs_const_string CMapName;\
gs_cid_system_info_t *CIDSystemInfo; /* [num_fonts] */\
int num_fonts;\
float CMapVersion;\
gs_uid uid; /* XUID or nothing */\
long UIDOffset;\
int WMode;\
bool from_Unicode; /* if true, characters are Unicode */\
bool ToUnicode; /* if true, it is a ToUnicode CMap */\
gs_glyph_name_proc_t glyph_name; /* glyph name procedure for printing */\
void *glyph_name_data; /* closure data */\
const gs_cmap_procs_t *procs
extern_st(st_cmap);
#define public_st_cmap() /* in gsfcmap.c */\
BASIC_PTRS(cmap_ptrs) {\
GC_CONST_STRING_ELT(gs_cmap_t, CMapName),\
GC_OBJ_ELT3(gs_cmap_t, CIDSystemInfo, uid.xvalues, glyph_name_data)\
};\
gs_public_st_basic(st_cmap, gs_cmap_t, "gs_cmap_t", cmap_ptrs, cmap_data)
typedef struct gs_cmap_ranges_enum_s gs_cmap_ranges_enum_t;
typedef struct gs_cmap_lookups_enum_s gs_cmap_lookups_enum_t;
typedef struct gs_cmap_procs_s {
/*
* Decode and map a character from a string using a CMap.
* See gsfcmap.h for details.
*/
int (*decode_next)(const gs_cmap_t *pcmap, const gs_const_string *str,
uint *pindex, uint *pfidx,
gs_char *pchr, gs_glyph *pglyph);
/*
* Initialize an enumeration of code space ranges. See below.
*/
void (*enum_ranges)(const gs_cmap_t *pcmap,
gs_cmap_ranges_enum_t *penum);
/*
* Initialize an enumeration of lookups. See below.
*/
void (*enum_lookups)(const gs_cmap_t *pcmap, int which,
gs_cmap_lookups_enum_t *penum);
/*
* Check if the cmap is identity.
*/
bool (*is_identity)(const gs_cmap_t *pcmap, int font_index_only);
} gs_cmap_procs_t;
struct gs_cmap_s {
GS_CMAP_COMMON;
};
/* ---------------- Enumerators ---------------- */
/*
* Define enumeration structures for code space ranges and lookup tables.
* Since all current and currently envisioned implementations are very
* simple, we don't bother to make this fully general, with subclasses
* or a "finish" procedure.
*/
typedef struct gs_cmap_ranges_enum_procs_s {
int (*next_range)(gs_cmap_ranges_enum_t *penum);
} gs_cmap_ranges_enum_procs_t;
struct gs_cmap_ranges_enum_s {
/*
* Return the next code space range here.
*/
gx_code_space_range_t range;
/*
* The rest of the information is private to the implementation.
*/
const gs_cmap_t *cmap;
const gs_cmap_ranges_enum_procs_t *procs;
uint index;
};
typedef struct gs_cmap_lookups_enum_procs_s {
int (*next_lookup)(gs_cmap_lookups_enum_t *penum);
int (*next_entry)(gs_cmap_lookups_enum_t *penum);
} gs_cmap_lookups_enum_procs_t;
struct gs_cmap_lookups_enum_s {
/*
* Return the next lookup and entry here.
*/
gx_cmap_lookup_entry_t entry;
/*
* The rest of the information is private to the implementation.
*/
const gs_cmap_t *cmap;
const gs_cmap_lookups_enum_procs_t *procs;
uint index[2];
byte temp_value[max(sizeof(gs_glyph), sizeof(gs_char))];
};
/*
* Define a vacuous next_lookup procedure, useful for the notdef lookups
* for CMaps that don't have any.
*/
extern const gs_cmap_lookups_enum_procs_t gs_cmap_no_lookups_procs;
/* ---------------- Client procedures ---------------- */
/*
* Initialize the enumeration of the code space ranges, and enumerate
* the next range. enum_next returns 0 if OK, 1 if finished, <0 if error.
* The intended usage is:
*
* for (gs_cmap_ranges_enum_init(pcmap, &renum);
* (code = gs_cmap_enum_next_range(&renum)) == 0; ) {
* ...
* }
* if (code < 0) <<error>>
*/
void gs_cmap_ranges_enum_init(const gs_cmap_t *pcmap,
gs_cmap_ranges_enum_t *penum);
int gs_cmap_enum_next_range(gs_cmap_ranges_enum_t *penum);
/*
* Initialize the enumeration of the lookups, and enumerate the next
* the next lookup or entry. which = 0 for defined characters,
* which = 1 for notdef. next_xxx returns 0 if OK, 1 if finished,
* <0 if error. The intended usage is:
*
* for (gs_cmap_lookups_enum_init(pcmap, which, &lenum);
* (code = gs_cmap_enum_next_lookup(&lenum)) == 0; ) {
* while ((code = gs_cmap_enum_next_entry(&lenum)) == 0) {
* ...
* }
* if (code < 0) <<error>>
* }
* if (code < 0) <<error>>
*
* Note that next_lookup sets (at least) penum->entry.
* key_size, key_is_range, value_type, font_index
* whereas next_entry sets penum->entry.
* key[0][*], key[1][*], value
* Clients must not modify any members of the enumerator.
* The bytes of the value string may be allocated locally (in the enumerator
* itself) and not survive from one call to the next.
*/
void gs_cmap_lookups_enum_init(const gs_cmap_t *pcmap, int which,
gs_cmap_lookups_enum_t *penum);
int gs_cmap_enum_next_lookup(gs_cmap_lookups_enum_t *penum);
int gs_cmap_enum_next_entry(gs_cmap_lookups_enum_t *penum);
/* ---------------- Implementation procedures ---------------- */
/*
* Initialize a just-allocated CMap, to ensure that all pointers are clean
* for the GC. Note that this only initializes the common part.
*/
void gs_cmap_init(const gs_memory_t *mem, gs_cmap_t *pcmap, int num_fonts);
/*
* Allocate and initialize (the common part of) a CMap.
*/
int gs_cmap_alloc(gs_cmap_t **ppcmap, const gs_memory_struct_type_t *pstype,
int wmode, const byte *map_name, uint name_size,
const gs_cid_system_info_t *pcidsi, int num_fonts,
const gs_cmap_procs_t *procs, gs_memory_t *mem);
int gs_cmap_free(gs_cmap_t *pcmap, gs_memory_t *mem);
/*
* Initialize an enumerator with convenient defaults (index = 0).
*/
void gs_cmap_ranges_enum_setup(gs_cmap_ranges_enum_t *penum,
const gs_cmap_t *pcmap,
const gs_cmap_ranges_enum_procs_t *procs);
void gs_cmap_lookups_enum_setup(gs_cmap_lookups_enum_t *penum,
const gs_cmap_t *pcmap,
const gs_cmap_lookups_enum_procs_t *procs);
/*
* Check for identity CMap. Uses a fast check for special cases.
*/
bool gs_cmap_is_identity(const gs_cmap_t *pcmap, int font_index_only);
/*
* For a random CMap, compute whether it is identity.
* It is not applicable to gs_cmap_ToUnicode_t due to
* different sizes of domain keys and range values.
*/
bool gs_cmap_compute_identity(const gs_cmap_t *pcmap, int font_index_only);
#endif /* gxfcmap_INCLUDED */
|