1 : // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "cmap.h"
6 :
7 : #include <algorithm>
8 : #include <set>
9 : #include <utility>
10 : #include <vector>
11 :
12 : #include "maxp.h"
13 : #include "os2.h"
14 :
15 : // cmap - Character To Glyph Index Mapping Table
16 : // http://www.microsoft.com/opentype/otspec/cmap.htm
17 :
18 : namespace {
19 :
20 : struct CMAPSubtableHeader {
21 : uint16_t platform;
22 : uint16_t encoding;
23 : uint32_t offset;
24 : uint16_t format;
25 : uint32_t length;
26 : };
27 :
28 : struct Subtable314Range {
29 : uint16_t start_range;
30 : uint16_t end_range;
31 : int16_t id_delta;
32 : uint16_t id_range_offset;
33 : uint32_t id_range_offset_offset;
34 : };
35 :
36 : // The maximum number of groups in format 12, 13 or 14 subtables.
37 : // Note: 0xFFFF is the maximum number of glyphs in a single font file.
38 : const unsigned kMaxCMAPGroups = 0xFFFF;
39 :
40 : // Glyph array size for the Mac Roman (format 0) table.
41 : const size_t kFormat0ArraySize = 256;
42 :
43 : // The upper limit of the Unicode code point.
44 : const uint32_t kUnicodeUpperLimit = 0x10FFFF;
45 :
46 : // The maximum number of UVS records (See below).
47 : const uint32_t kMaxCMAPSelectorRecords = 259;
48 : // The range of UVSes are:
49 : // 0x180B-0x180D (3 code points)
50 : // 0xFE00-0xFE0F (16 code points)
51 : // 0xE0100-0xE01EF (240 code points)
52 : const uint32_t kMongolianVSStart = 0x180B;
53 : const uint32_t kMongolianVSEnd = 0x180D;
54 : const uint32_t kVSStart = 0xFE00;
55 : const uint32_t kVSEnd = 0xFE0F;
56 : const uint32_t kIVSStart = 0xE0100;
57 : const uint32_t kIVSEnd = 0xE01EF;
58 : const uint32_t kUVSUpperLimit = 0xFFFFFF;
59 :
60 : // Parses Format 4 tables
61 0 : bool ParseFormat4(ots::OpenTypeFile *file, int platform, int encoding,
62 : const uint8_t *data, size_t length, uint16_t num_glyphs) {
63 0 : ots::Buffer subtable(data, length);
64 :
65 : // 0.3.4, 3.0.4 or 3.1.4 subtables are complex and, rather than expanding the
66 : // whole thing and recompacting it, we validate it and include it verbatim
67 : // in the output.
68 :
69 0 : if (!file->os2) {
70 0 : return OTS_FAILURE();
71 : }
72 :
73 0 : if (!subtable.Skip(4)) {
74 0 : return OTS_FAILURE();
75 : }
76 0 : uint16_t language = 0;
77 0 : if (!subtable.ReadU16(&language)) {
78 0 : return OTS_FAILURE();
79 : }
80 0 : if (language) {
81 : // Platform ID 3 (windows) subtables should have language '0'.
82 0 : return OTS_FAILURE();
83 : }
84 :
85 : uint16_t segcountx2, search_range, entry_selector, range_shift;
86 0 : segcountx2 = search_range = entry_selector = range_shift = 0;
87 0 : if (!subtable.ReadU16(&segcountx2) ||
88 0 : !subtable.ReadU16(&search_range) ||
89 0 : !subtable.ReadU16(&entry_selector) ||
90 0 : !subtable.ReadU16(&range_shift)) {
91 0 : return OTS_FAILURE();
92 : }
93 :
94 0 : if (segcountx2 & 1 || search_range & 1) {
95 0 : return OTS_FAILURE();
96 : }
97 0 : const uint16_t segcount = segcountx2 >> 1;
98 : // There must be at least one segment according the spec.
99 0 : if (segcount < 1) {
100 0 : return OTS_FAILURE();
101 : }
102 :
103 : // log2segcount is the maximal x s.t. 2^x < segcount
104 0 : unsigned log2segcount = 0;
105 0 : while (1u << (log2segcount + 1) <= segcount) {
106 0 : log2segcount++;
107 : }
108 :
109 0 : const uint16_t expected_search_range = 2 * 1u << log2segcount;
110 0 : if (expected_search_range != search_range) {
111 0 : return OTS_FAILURE();
112 : }
113 :
114 0 : if (entry_selector != log2segcount) {
115 0 : return OTS_FAILURE();
116 : }
117 :
118 0 : const uint16_t expected_range_shift = segcountx2 - search_range;
119 0 : if (range_shift != expected_range_shift) {
120 0 : return OTS_FAILURE();
121 : }
122 :
123 0 : std::vector<Subtable314Range> ranges(segcount);
124 :
125 0 : for (unsigned i = 0; i < segcount; ++i) {
126 0 : if (!subtable.ReadU16(&ranges[i].end_range)) {
127 0 : return OTS_FAILURE();
128 : }
129 : }
130 :
131 : uint16_t padding;
132 0 : if (!subtable.ReadU16(&padding)) {
133 0 : return OTS_FAILURE();
134 : }
135 0 : if (padding) {
136 0 : return OTS_FAILURE();
137 : }
138 :
139 0 : for (unsigned i = 0; i < segcount; ++i) {
140 0 : if (!subtable.ReadU16(&ranges[i].start_range)) {
141 0 : return OTS_FAILURE();
142 : }
143 : }
144 0 : for (unsigned i = 0; i < segcount; ++i) {
145 0 : if (!subtable.ReadS16(&ranges[i].id_delta)) {
146 0 : return OTS_FAILURE();
147 : }
148 : }
149 0 : for (unsigned i = 0; i < segcount; ++i) {
150 0 : ranges[i].id_range_offset_offset = subtable.offset();
151 0 : if (!subtable.ReadU16(&ranges[i].id_range_offset)) {
152 0 : return OTS_FAILURE();
153 : }
154 :
155 0 : if (ranges[i].id_range_offset & 1) {
156 : // Some font generators seem to put 65535 on id_range_offset
157 : // for 0xFFFF-0xFFFF range.
158 : // (e.g., many fonts in http://www.princexml.com/fonts/)
159 0 : if (i == segcount - 1u) {
160 : OTS_WARNING("bad id_range_offset");
161 0 : ranges[i].id_range_offset = 0;
162 : // The id_range_offset value in the transcoded font will not change
163 : // since this table is not actually "transcoded" yet.
164 : } else {
165 0 : return OTS_FAILURE();
166 : }
167 : }
168 : }
169 :
170 : // ranges must be ascending order, based on the end_code. Ranges may not
171 : // overlap.
172 0 : for (unsigned i = 1; i < segcount; ++i) {
173 0 : if ((i == segcount - 1u) &&
174 0 : (ranges[i - 1].start_range == 0xffff) &&
175 0 : (ranges[i - 1].end_range == 0xffff) &&
176 0 : (ranges[i].start_range == 0xffff) &&
177 0 : (ranges[i].end_range == 0xffff)) {
178 : // Some fonts (e.g., Germania.ttf) have multiple 0xffff terminators.
179 : // We'll accept them as an exception.
180 : OTS_WARNING("multiple 0xffff terminators found");
181 0 : continue;
182 : }
183 :
184 : // Note: some Linux fonts (e.g., LucidaSansOblique.ttf, bsmi00lp.ttf) have
185 : // unsorted table...
186 0 : if (ranges[i].end_range <= ranges[i - 1].end_range) {
187 0 : return OTS_FAILURE();
188 : }
189 0 : if (ranges[i].start_range <= ranges[i - 1].end_range) {
190 0 : return OTS_FAILURE();
191 : }
192 :
193 : // On many fonts, the value of {first, last}_char_index are incorrect.
194 : // Fix them.
195 0 : if (file->os2->first_char_index != 0xFFFF &&
196 0 : ranges[i].start_range != 0xFFFF &&
197 0 : file->os2->first_char_index > ranges[i].start_range) {
198 0 : file->os2->first_char_index = ranges[i].start_range;
199 : }
200 0 : if (file->os2->last_char_index != 0xFFFF &&
201 0 : ranges[i].end_range != 0xFFFF &&
202 0 : file->os2->last_char_index < ranges[i].end_range) {
203 0 : file->os2->last_char_index = ranges[i].end_range;
204 : }
205 : }
206 :
207 : // The last range must end at 0xffff
208 0 : if (ranges[segcount - 1].end_range != 0xffff) {
209 0 : return OTS_FAILURE();
210 : }
211 :
212 : // A format 4 CMAP subtable is complex. To be safe we simulate a lookup of
213 : // each code-point defined in the table and make sure that they are all valid
214 : // glyphs and that we don't access anything out-of-bounds.
215 0 : for (unsigned i = 1; i < segcount; ++i) {
216 0 : for (unsigned cp = ranges[i].start_range; cp <= ranges[i].end_range; ++cp) {
217 0 : const uint16_t code_point = cp;
218 0 : if (ranges[i].id_range_offset == 0) {
219 : // this is explictly allowed to overflow in the spec
220 0 : const uint16_t glyph = code_point + ranges[i].id_delta;
221 0 : if (glyph >= num_glyphs) {
222 0 : return OTS_FAILURE();
223 : }
224 : } else {
225 0 : const uint16_t range_delta = code_point - ranges[i].start_range;
226 : // this might seem odd, but it's true. The offset is relative to the
227 : // location of the offset value itself.
228 0 : const uint32_t glyph_id_offset = ranges[i].id_range_offset_offset +
229 0 : ranges[i].id_range_offset +
230 0 : range_delta * 2;
231 : // We need to be able to access a 16-bit value from this offset
232 0 : if (glyph_id_offset + 1 >= length) {
233 0 : return OTS_FAILURE();
234 : }
235 : uint16_t glyph;
236 0 : memcpy(&glyph, data + glyph_id_offset, 2);
237 0 : glyph = ntohs(glyph);
238 0 : if (glyph >= num_glyphs) {
239 0 : return OTS_FAILURE();
240 : }
241 : }
242 : }
243 : }
244 :
245 : // We accept the table.
246 : // TODO(yusukes): transcode the subtable.
247 0 : if (platform == 3 && encoding == 0) {
248 0 : file->cmap->subtable_3_0_4_data = data;
249 0 : file->cmap->subtable_3_0_4_length = length;
250 0 : } else if (platform == 3 && encoding == 1) {
251 0 : file->cmap->subtable_3_1_4_data = data;
252 0 : file->cmap->subtable_3_1_4_length = length;
253 0 : } else if (platform == 0 && encoding == 3) {
254 0 : file->cmap->subtable_0_3_4_data = data;
255 0 : file->cmap->subtable_0_3_4_length = length;
256 : } else {
257 0 : return OTS_FAILURE();
258 : }
259 :
260 0 : return true;
261 : }
262 :
263 0 : bool Parse31012(ots::OpenTypeFile *file,
264 : const uint8_t *data, size_t length, uint16_t num_glyphs) {
265 0 : ots::Buffer subtable(data, length);
266 :
267 : // Format 12 tables are simple. We parse these and fully serialise them
268 : // later.
269 :
270 0 : if (!subtable.Skip(8)) {
271 0 : return OTS_FAILURE();
272 : }
273 0 : uint32_t language = 0;
274 0 : if (!subtable.ReadU32(&language)) {
275 0 : return OTS_FAILURE();
276 : }
277 0 : if (language) {
278 0 : return OTS_FAILURE();
279 : }
280 :
281 0 : uint32_t num_groups = 0;
282 0 : if (!subtable.ReadU32(&num_groups)) {
283 0 : return OTS_FAILURE();
284 : }
285 0 : if (num_groups == 0 || num_groups > kMaxCMAPGroups) {
286 0 : return OTS_FAILURE();
287 : }
288 :
289 : std::vector<ots::OpenTypeCMAPSubtableRange> &groups
290 0 : = file->cmap->subtable_3_10_12;
291 0 : groups.resize(num_groups);
292 :
293 0 : for (unsigned i = 0; i < num_groups; ++i) {
294 0 : if (!subtable.ReadU32(&groups[i].start_range) ||
295 0 : !subtable.ReadU32(&groups[i].end_range) ||
296 0 : !subtable.ReadU32(&groups[i].start_glyph_id)) {
297 0 : return OTS_FAILURE();
298 : }
299 :
300 0 : if (groups[i].start_range > kUnicodeUpperLimit ||
301 0 : groups[i].end_range > kUnicodeUpperLimit ||
302 0 : groups[i].start_glyph_id > 0xFFFF) {
303 0 : return OTS_FAILURE();
304 : }
305 :
306 : // [0xD800, 0xDFFF] are surrogate code points.
307 0 : if (groups[i].start_range >= 0xD800 &&
308 0 : groups[i].start_range <= 0xDFFF) {
309 0 : return OTS_FAILURE();
310 : }
311 0 : if (groups[i].end_range >= 0xD800 &&
312 0 : groups[i].end_range <= 0xDFFF) {
313 0 : return OTS_FAILURE();
314 : }
315 0 : if (groups[i].start_range < 0xD800 &&
316 0 : groups[i].end_range > 0xDFFF) {
317 0 : return OTS_FAILURE();
318 : }
319 :
320 : // We assert that the glyph value is within range. Because of the range
321 : // limits, above, we don't need to worry about overflow.
322 0 : if (groups[i].end_range < groups[i].start_range) {
323 0 : return OTS_FAILURE();
324 : }
325 0 : if ((groups[i].end_range - groups[i].start_range) +
326 0 : groups[i].start_glyph_id > num_glyphs) {
327 0 : return OTS_FAILURE();
328 : }
329 : }
330 :
331 : // the groups must be sorted by start code and may not overlap
332 0 : for (unsigned i = 1; i < num_groups; ++i) {
333 0 : if (groups[i].start_range <= groups[i - 1].start_range) {
334 0 : return OTS_FAILURE();
335 : }
336 0 : if (groups[i].start_range <= groups[i - 1].end_range) {
337 0 : return OTS_FAILURE();
338 : }
339 : }
340 :
341 0 : return true;
342 : }
343 :
344 0 : bool Parse31013(ots::OpenTypeFile *file,
345 : const uint8_t *data, size_t length, uint16_t num_glyphs) {
346 0 : ots::Buffer subtable(data, length);
347 :
348 : // Format 13 tables are simple. We parse these and fully serialise them
349 : // later.
350 :
351 0 : if (!subtable.Skip(8)) {
352 0 : return OTS_FAILURE();
353 : }
354 0 : uint16_t language = 0;
355 0 : if (!subtable.ReadU16(&language)) {
356 0 : return OTS_FAILURE();
357 : }
358 0 : if (language) {
359 0 : return OTS_FAILURE();
360 : }
361 :
362 0 : uint32_t num_groups = 0;
363 0 : if (!subtable.ReadU32(&num_groups)) {
364 0 : return OTS_FAILURE();
365 : }
366 :
367 : // We limit the number of groups in the same way as in 3.10.12 tables. See
368 : // the comment there in
369 0 : if (num_groups == 0 || num_groups > kMaxCMAPGroups) {
370 0 : return OTS_FAILURE();
371 : }
372 :
373 : std::vector<ots::OpenTypeCMAPSubtableRange> &groups
374 0 : = file->cmap->subtable_3_10_13;
375 0 : groups.resize(num_groups);
376 :
377 0 : for (unsigned i = 0; i < num_groups; ++i) {
378 0 : if (!subtable.ReadU32(&groups[i].start_range) ||
379 0 : !subtable.ReadU32(&groups[i].end_range) ||
380 0 : !subtable.ReadU32(&groups[i].start_glyph_id)) {
381 0 : return OTS_FAILURE();
382 : }
383 :
384 : // We conservatively limit all of the values to protect some parsers from
385 : // overflows
386 0 : if (groups[i].start_range > kUnicodeUpperLimit ||
387 0 : groups[i].end_range > kUnicodeUpperLimit ||
388 0 : groups[i].start_glyph_id > 0xFFFF) {
389 0 : return OTS_FAILURE();
390 : }
391 :
392 0 : if (groups[i].start_glyph_id >= num_glyphs) {
393 0 : return OTS_FAILURE();
394 : }
395 : }
396 :
397 : // the groups must be sorted by start code and may not overlap
398 0 : for (unsigned i = 1; i < num_groups; ++i) {
399 0 : if (groups[i].start_range <= groups[i - 1].start_range) {
400 0 : return OTS_FAILURE();
401 : }
402 0 : if (groups[i].start_range <= groups[i - 1].end_range) {
403 0 : return OTS_FAILURE();
404 : }
405 : }
406 :
407 0 : return true;
408 : }
409 :
410 0 : bool Parse0514(ots::OpenTypeFile *file,
411 : const uint8_t *data, size_t length, uint16_t num_glyphs) {
412 : // Unicode Variation Selector table
413 0 : ots::Buffer subtable(data, length);
414 :
415 : // Format 14 tables are simple. We parse these and fully serialise them
416 : // later.
417 :
418 : // Skip format (USHORT) and length (ULONG)
419 0 : if (!subtable.Skip(6)) {
420 0 : return OTS_FAILURE();
421 : }
422 :
423 0 : uint32_t num_records = 0;
424 0 : if (!subtable.ReadU32(&num_records)) {
425 0 : return OTS_FAILURE();
426 : }
427 0 : if (num_records == 0 || num_records > kMaxCMAPSelectorRecords) {
428 0 : return OTS_FAILURE();
429 : }
430 :
431 : std::vector<ots::OpenTypeCMAPSubtableVSRecord>& records
432 0 : = file->cmap->subtable_0_5_14;
433 0 : records.resize(num_records);
434 :
435 0 : for (unsigned i = 0; i < num_records; ++i) {
436 0 : if (!subtable.ReadU24(&records[i].var_selector) ||
437 0 : !subtable.ReadU32(&records[i].default_offset) ||
438 0 : !subtable.ReadU32(&records[i].non_default_offset)) {
439 0 : return OTS_FAILURE();
440 : }
441 : // Checks the value of variation selector
442 0 : if (!((records[i].var_selector >= kMongolianVSStart &&
443 0 : records[i].var_selector <= kMongolianVSEnd) ||
444 0 : (records[i].var_selector >= kVSStart &&
445 0 : records[i].var_selector <= kVSEnd) ||
446 0 : (records[i].var_selector >= kIVSStart &&
447 0 : records[i].var_selector <= kIVSEnd))) {
448 0 : return OTS_FAILURE();
449 : }
450 0 : if (i > 0 &&
451 0 : records[i-1].var_selector >= records[i].var_selector) {
452 0 : return OTS_FAILURE();
453 : }
454 :
455 : // Checks offsets
456 0 : if (!records[i].default_offset && !records[i].non_default_offset) {
457 0 : return OTS_FAILURE();
458 : }
459 0 : if (records[i].default_offset &&
460 0 : records[i].default_offset >= length) {
461 0 : return OTS_FAILURE();
462 : }
463 0 : if (records[i].non_default_offset &&
464 0 : records[i].non_default_offset >= length) {
465 0 : return OTS_FAILURE();
466 : }
467 : }
468 :
469 0 : for (unsigned i = 0; i < num_records; ++i) {
470 : // Checks default UVS table
471 0 : if (records[i].default_offset) {
472 0 : subtable.set_offset(records[i].default_offset);
473 0 : uint32_t num_ranges = 0;
474 0 : if (!subtable.ReadU32(&num_ranges)) {
475 0 : return OTS_FAILURE();
476 : }
477 0 : if (!num_ranges || num_ranges > kMaxCMAPGroups) {
478 0 : return OTS_FAILURE();
479 : }
480 :
481 0 : uint32_t last_unicode_value = 0;
482 : std::vector<ots::OpenTypeCMAPSubtableVSRange>& ranges
483 0 : = records[i].ranges;
484 0 : ranges.resize(num_ranges);
485 :
486 0 : for (unsigned j = 0; j < num_ranges; ++j) {
487 0 : if (!subtable.ReadU24(&ranges[j].unicode_value) ||
488 0 : !subtable.ReadU8(&ranges[j].additional_count)) {
489 0 : return OTS_FAILURE();
490 : }
491 : const uint32_t check_value =
492 0 : ranges[j].unicode_value + ranges[j].additional_count;
493 0 : if (ranges[j].unicode_value == 0 ||
494 0 : ranges[j].unicode_value > kUnicodeUpperLimit ||
495 : check_value > kUVSUpperLimit ||
496 : (last_unicode_value &&
497 0 : ranges[j].unicode_value <= last_unicode_value)) {
498 0 : return OTS_FAILURE();
499 : }
500 0 : last_unicode_value = check_value;
501 : }
502 : }
503 :
504 : // Checks non default UVS table
505 0 : if (records[i].non_default_offset) {
506 0 : subtable.set_offset(records[i].non_default_offset);
507 0 : uint32_t num_mappings = 0;
508 0 : if (!subtable.ReadU32(&num_mappings)) {
509 0 : return OTS_FAILURE();
510 : }
511 0 : if (!num_mappings || num_mappings > kMaxCMAPGroups) {
512 0 : return OTS_FAILURE();
513 : }
514 :
515 0 : uint32_t last_unicode_value = 0;
516 : std::vector<ots::OpenTypeCMAPSubtableVSMapping>& mappings
517 0 : = records[i].mappings;
518 0 : mappings.resize(num_mappings);
519 :
520 0 : for (unsigned j = 0; j < num_mappings; ++j) {
521 0 : if (!subtable.ReadU24(&mappings[j].unicode_value) ||
522 0 : !subtable.ReadU16(&mappings[j].glyph_id)) {
523 0 : return OTS_FAILURE();
524 : }
525 0 : if (mappings[j].glyph_id == 0 ||
526 0 : mappings[j].unicode_value == 0 ||
527 0 : mappings[j].unicode_value > kUnicodeUpperLimit ||
528 : (last_unicode_value &&
529 0 : mappings[j].unicode_value <= last_unicode_value)) {
530 0 : return OTS_FAILURE();
531 : }
532 0 : last_unicode_value = mappings[j].unicode_value;
533 : }
534 : }
535 : }
536 :
537 0 : if (subtable.offset() != length) {
538 0 : return OTS_FAILURE();
539 : }
540 0 : file->cmap->subtable_0_5_14_length = subtable.offset();
541 0 : return true;
542 : }
543 :
544 0 : bool Parse100(ots::OpenTypeFile *file, const uint8_t *data, size_t length) {
545 : // Mac Roman table
546 0 : ots::Buffer subtable(data, length);
547 :
548 0 : if (!subtable.Skip(4)) {
549 0 : return OTS_FAILURE();
550 : }
551 0 : uint16_t language = 0;
552 0 : if (!subtable.ReadU16(&language)) {
553 0 : return OTS_FAILURE();
554 : }
555 0 : if (language) {
556 : // simsun.ttf has non-zero language id.
557 : OTS_WARNING("language id should be zero: %u", language);
558 : }
559 :
560 0 : file->cmap->subtable_1_0_0.reserve(kFormat0ArraySize);
561 0 : for (size_t i = 0; i < kFormat0ArraySize; ++i) {
562 0 : uint8_t glyph_id = 0;
563 0 : if (!subtable.ReadU8(&glyph_id)) {
564 0 : return OTS_FAILURE();
565 : }
566 0 : file->cmap->subtable_1_0_0.push_back(glyph_id);
567 : }
568 :
569 0 : return true;
570 : }
571 :
572 : } // namespace
573 :
574 : namespace ots {
575 :
576 0 : bool ots_cmap_parse(OpenTypeFile *file, const uint8_t *data, size_t length) {
577 0 : Buffer table(data, length);
578 0 : file->cmap = new OpenTypeCMAP;
579 :
580 0 : uint16_t version = 0;
581 0 : uint16_t num_tables = 0;
582 0 : if (!table.ReadU16(&version) ||
583 0 : !table.ReadU16(&num_tables)) {
584 0 : return OTS_FAILURE();
585 : }
586 :
587 0 : if (version != 0) {
588 0 : return OTS_FAILURE();
589 : }
590 0 : if (!num_tables) {
591 0 : return OTS_FAILURE();
592 : }
593 :
594 0 : std::vector<CMAPSubtableHeader> subtable_headers;
595 :
596 : // read the subtable headers
597 0 : subtable_headers.reserve(num_tables);
598 0 : for (unsigned i = 0; i < num_tables; ++i) {
599 : CMAPSubtableHeader subt;
600 :
601 0 : if (!table.ReadU16(&subt.platform) ||
602 0 : !table.ReadU16(&subt.encoding) ||
603 0 : !table.ReadU32(&subt.offset)) {
604 0 : return OTS_FAILURE();
605 : }
606 :
607 0 : subtable_headers.push_back(subt);
608 : }
609 :
610 0 : const size_t data_offset = table.offset();
611 :
612 : // make sure that all the offsets are valid.
613 0 : uint32_t last_id = 0;
614 0 : for (unsigned i = 0; i < num_tables; ++i) {
615 0 : if (subtable_headers[i].offset > 1024 * 1024 * 1024) {
616 0 : return OTS_FAILURE();
617 : }
618 0 : if (subtable_headers[i].offset < data_offset ||
619 0 : subtable_headers[i].offset >= length) {
620 0 : return OTS_FAILURE();
621 : }
622 :
623 : // check if the table is sorted first by platform ID, then by encoding ID.
624 : uint32_t current_id
625 0 : = (subtable_headers[i].platform << 16) + subtable_headers[i].encoding;
626 0 : if ((i != 0) && (last_id >= current_id)) {
627 0 : return OTS_FAILURE();
628 : }
629 0 : last_id = current_id;
630 : }
631 :
632 : // the format of the table is the first couple of bytes in the table. The
633 : // length of the table is stored in a format-specific way.
634 0 : for (unsigned i = 0; i < num_tables; ++i) {
635 0 : table.set_offset(subtable_headers[i].offset);
636 0 : if (!table.ReadU16(&subtable_headers[i].format)) {
637 0 : return OTS_FAILURE();
638 : }
639 :
640 0 : uint16_t len = 0;
641 0 : switch (subtable_headers[i].format) {
642 : case 0:
643 : case 4:
644 0 : if (!table.ReadU16(&len)) {
645 0 : return OTS_FAILURE();
646 : }
647 0 : subtable_headers[i].length = len;
648 0 : break;
649 : case 12:
650 : case 13:
651 0 : if (!table.Skip(2)) {
652 0 : return OTS_FAILURE();
653 : }
654 0 : if (!table.ReadU32(&subtable_headers[i].length)) {
655 0 : return OTS_FAILURE();
656 : }
657 0 : break;
658 : case 14:
659 0 : if (!table.ReadU32(&subtable_headers[i].length)) {
660 0 : return OTS_FAILURE();
661 : }
662 0 : break;
663 : default:
664 0 : subtable_headers[i].length = 0;
665 0 : break;
666 : }
667 : }
668 :
669 : // Now, verify that all the lengths are sane
670 0 : for (unsigned i = 0; i < num_tables; ++i) {
671 0 : if (!subtable_headers[i].length) continue;
672 0 : if (subtable_headers[i].length > 1024 * 1024 * 1024) {
673 0 : return OTS_FAILURE();
674 : }
675 : // We know that both the offset and length are < 1GB, so the following
676 : // addition doesn't overflow
677 : const uint32_t end_byte
678 0 : = subtable_headers[i].offset + subtable_headers[i].length;
679 0 : if (end_byte > length) {
680 0 : return OTS_FAILURE();
681 : }
682 : }
683 :
684 : // check that the cmap subtables are not overlapping.
685 0 : std::set<std::pair<uint32_t, uint32_t> > uniq_checker;
686 0 : std::vector<std::pair<uint32_t, uint8_t> > overlap_checker;
687 0 : for (unsigned i = 0; i < num_tables; ++i) {
688 : const uint32_t end_byte
689 0 : = subtable_headers[i].offset + subtable_headers[i].length;
690 :
691 0 : if (!uniq_checker.insert(std::make_pair(subtable_headers[i].offset,
692 0 : end_byte)).second) {
693 : // Sometimes Unicode table and MS table share exactly the same data.
694 : // We'll allow this.
695 0 : continue;
696 : }
697 : overlap_checker.push_back(
698 0 : std::make_pair(subtable_headers[i].offset,
699 0 : static_cast<uint8_t>(1) /* start */));
700 : overlap_checker.push_back(
701 0 : std::make_pair(end_byte, static_cast<uint8_t>(0) /* end */));
702 : }
703 0 : std::sort(overlap_checker.begin(), overlap_checker.end());
704 0 : int overlap_count = 0;
705 0 : for (unsigned i = 0; i < overlap_checker.size(); ++i) {
706 0 : overlap_count += (overlap_checker[i].second ? 1 : -1);
707 0 : if (overlap_count > 1) {
708 0 : return OTS_FAILURE();
709 : }
710 : }
711 :
712 : // we grab the number of glyphs in the file from the maxp table to make sure
713 : // that the character map isn't referencing anything beyound this range.
714 0 : if (!file->maxp) {
715 0 : return OTS_FAILURE();
716 : }
717 0 : const uint16_t num_glyphs = file->maxp->num_glyphs;
718 :
719 : // We only support a subset of the possible character map tables. Microsoft
720 : // 'strongly recommends' that everyone supports the Unicode BMP table with
721 : // the UCS-4 table for non-BMP glyphs. We'll pass the following subtables:
722 : // Platform ID Encoding ID Format
723 : // 0 0 4 (Unicode Default)
724 : // 0 3 4 (Unicode BMP)
725 : // 0 3 12 (Unicode UCS-4)
726 : // 0 5 14 (Unicode Variation Sequences)
727 : // 1 0 0 (Mac Roman)
728 : // 3 0 4 (MS Symbol)
729 : // 3 1 4 (MS Unicode BMP)
730 : // 3 10 12 (MS Unicode UCS-4)
731 : // 3 10 13 (MS UCS-4 Fallback mapping)
732 : //
733 : // Note:
734 : // * 0-0-4 table is (usually) written as a 3-1-4 table. If 3-1-4 table
735 : // also exists, the 0-0-4 table is ignored.
736 : // * Unlike 0-0-4 table, 0-3-4 table is written as a 0-3-4 table.
737 : // Some fonts which include 0-5-14 table seems to be required 0-3-4
738 : // table. The 0-3-4 table will be wriiten even if 3-1-4 table also exists.
739 : // * 0-3-12 table is written as a 3-10-12 table. If 3-10-12 table also
740 : // exists, the 0-3-12 table is ignored.
741 : //
742 :
743 0 : for (unsigned i = 0; i < num_tables; ++i) {
744 0 : if (subtable_headers[i].platform == 0) {
745 : // Unicode platform
746 :
747 0 : if ((subtable_headers[i].encoding == 0) &&
748 0 : (subtable_headers[i].format == 4)) {
749 : // parse and output the 0-0-4 table as 3-1-4 table. Sometimes the 0-0-4
750 : // table actually points to MS symbol data and thus should be parsed as
751 : // 3-0-4 table (e.g., marqueem.ttf and quixotic.ttf). This error will be
752 : // recovered in ots_cmap_serialise().
753 0 : if (!ParseFormat4(file, 3, 1, data + subtable_headers[i].offset,
754 0 : subtable_headers[i].length, num_glyphs)) {
755 0 : return OTS_FAILURE();
756 : }
757 0 : } else if ((subtable_headers[i].encoding == 3) &&
758 0 : (subtable_headers[i].format == 4)) {
759 : // parse and output the 0-3-4 table as 0-3-4 table.
760 0 : if (!ParseFormat4(file, 0, 3, data + subtable_headers[i].offset,
761 0 : subtable_headers[i].length, num_glyphs)) {
762 0 : return OTS_FAILURE();
763 : }
764 0 : } else if ((subtable_headers[i].encoding == 3) &&
765 0 : (subtable_headers[i].format == 12)) {
766 : // parse and output the 0-3-12 table as 3-10-12 table.
767 0 : if (!Parse31012(file, data + subtable_headers[i].offset,
768 0 : subtable_headers[i].length, num_glyphs)) {
769 0 : return OTS_FAILURE();
770 : }
771 0 : } else if ((subtable_headers[i].encoding == 5) &&
772 0 : (subtable_headers[i].format == 14)) {
773 0 : if (!Parse0514(file, data + subtable_headers[i].offset,
774 0 : subtable_headers[i].length, num_glyphs)) {
775 0 : return OTS_FAILURE();
776 : }
777 : }
778 0 : } else if (subtable_headers[i].platform == 1) {
779 : // Mac platform
780 :
781 0 : if ((subtable_headers[i].encoding == 0) &&
782 0 : (subtable_headers[i].format == 0)) {
783 : // parse and output the 1-0-0 table.
784 0 : if (!Parse100(file, data + subtable_headers[i].offset,
785 0 : subtable_headers[i].length)) {
786 0 : return OTS_FAILURE();
787 : }
788 : }
789 0 : } else if (subtable_headers[i].platform == 3) {
790 : // MS platform
791 :
792 0 : switch (subtable_headers[i].encoding) {
793 : case 0:
794 : case 1:
795 0 : if (subtable_headers[i].format == 4) {
796 : // parse 3-0-4 or 3-1-4 table.
797 0 : if (!ParseFormat4(file, subtable_headers[i].platform,
798 0 : subtable_headers[i].encoding,
799 0 : data + subtable_headers[i].offset,
800 0 : subtable_headers[i].length, num_glyphs)) {
801 0 : return OTS_FAILURE();
802 : }
803 : }
804 0 : break;
805 : case 10:
806 0 : if (subtable_headers[i].format == 12) {
807 0 : file->cmap->subtable_3_10_12.clear();
808 0 : if (!Parse31012(file, data + subtable_headers[i].offset,
809 0 : subtable_headers[i].length, num_glyphs)) {
810 0 : return OTS_FAILURE();
811 : }
812 0 : } else if (subtable_headers[i].format == 13) {
813 0 : file->cmap->subtable_3_10_13.clear();
814 0 : if (!Parse31013(file, data + subtable_headers[i].offset,
815 0 : subtable_headers[i].length, num_glyphs)) {
816 0 : return OTS_FAILURE();
817 : }
818 : }
819 0 : break;
820 : }
821 : }
822 : }
823 :
824 0 : return true;
825 : }
826 :
827 0 : bool ots_cmap_should_serialise(OpenTypeFile *file) {
828 0 : return file->cmap != NULL;
829 : }
830 :
831 0 : bool ots_cmap_serialise(OTSStream *out, OpenTypeFile *file) {
832 0 : const bool have_034 = file->cmap->subtable_0_3_4_data != NULL;
833 0 : const bool have_0514 = file->cmap->subtable_0_5_14.size() != 0;
834 0 : const bool have_100 = file->cmap->subtable_1_0_0.size() != 0;
835 0 : const bool have_304 = file->cmap->subtable_3_0_4_data != NULL;
836 : // MS Symbol and MS Unicode tables should not co-exist.
837 : // See the comment above in 0-0-4 parser.
838 0 : const bool have_314 = (!have_304) && file->cmap->subtable_3_1_4_data;
839 0 : const bool have_31012 = file->cmap->subtable_3_10_12.size() != 0;
840 0 : const bool have_31013 = file->cmap->subtable_3_10_13.size() != 0;
841 : const unsigned num_subtables = static_cast<unsigned>(have_034) +
842 : static_cast<unsigned>(have_0514) +
843 : static_cast<unsigned>(have_100) +
844 : static_cast<unsigned>(have_304) +
845 : static_cast<unsigned>(have_314) +
846 : static_cast<unsigned>(have_31012) +
847 0 : static_cast<unsigned>(have_31013);
848 0 : const off_t table_start = out->Tell();
849 :
850 : // Some fonts don't have 3-0-4 MS Symbol nor 3-1-4 Unicode BMP tables
851 : // (e.g., old fonts for Mac). We don't support them.
852 0 : if (!have_304 && !have_314 && !have_034) {
853 0 : return OTS_FAILURE();
854 : }
855 :
856 0 : if (!out->WriteU16(0) ||
857 0 : !out->WriteU16(num_subtables)) {
858 0 : return OTS_FAILURE();
859 : }
860 :
861 0 : const off_t record_offset = out->Tell();
862 0 : if (!out->Pad(num_subtables * 8)) {
863 0 : return OTS_FAILURE();
864 : }
865 :
866 0 : const off_t offset_034 = out->Tell();
867 0 : if (have_034) {
868 0 : if (!out->Write(file->cmap->subtable_0_3_4_data,
869 0 : file->cmap->subtable_0_3_4_length)) {
870 0 : return OTS_FAILURE();
871 : }
872 : }
873 :
874 0 : const off_t offset_0514 = out->Tell();
875 0 : if (have_0514) {
876 : const std::vector<ots::OpenTypeCMAPSubtableVSRecord> &records
877 0 : = file->cmap->subtable_0_5_14;
878 0 : const unsigned num_records = records.size();
879 0 : if (!out->WriteU16(14) ||
880 0 : !out->WriteU32(file->cmap->subtable_0_5_14_length) ||
881 0 : !out->WriteU32(num_records)) {
882 0 : return OTS_FAILURE();
883 : }
884 0 : for (unsigned i = 0; i < num_records; ++i) {
885 0 : if (!out->WriteU24(records[i].var_selector) ||
886 0 : !out->WriteU32(records[i].default_offset) ||
887 0 : !out->WriteU32(records[i].non_default_offset)) {
888 0 : return OTS_FAILURE();
889 : }
890 : }
891 0 : for (unsigned i = 0; i < num_records; ++i) {
892 0 : if (records[i].default_offset) {
893 : const std::vector<ots::OpenTypeCMAPSubtableVSRange> &ranges
894 0 : = records[i].ranges;
895 0 : const unsigned num_ranges = ranges.size();
896 0 : if (!out->Seek(records[i].default_offset + offset_0514) ||
897 0 : !out->WriteU32(num_ranges)) {
898 0 : return OTS_FAILURE();
899 : }
900 0 : for (unsigned j = 0; j < num_ranges; ++j) {
901 0 : if (!out->WriteU24(ranges[j].unicode_value) ||
902 0 : !out->WriteU8(ranges[j].additional_count)) {
903 0 : return OTS_FAILURE();
904 : }
905 : }
906 : }
907 0 : if (records[i].non_default_offset) {
908 : const std::vector<ots::OpenTypeCMAPSubtableVSMapping> &mappings
909 0 : = records[i].mappings;
910 0 : const unsigned num_mappings = mappings.size();
911 0 : if (!out->Seek(records[i].non_default_offset + offset_0514) ||
912 0 : !out->WriteU32(num_mappings)) {
913 0 : return OTS_FAILURE();
914 : }
915 0 : for (unsigned j = 0; j < num_mappings; ++j) {
916 0 : if (!out->WriteU24(mappings[j].unicode_value) ||
917 0 : !out->WriteU16(mappings[j].glyph_id)) {
918 0 : return OTS_FAILURE();
919 : }
920 : }
921 : }
922 : }
923 : }
924 :
925 0 : const off_t offset_100 = out->Tell();
926 0 : if (have_100) {
927 0 : if (!out->WriteU16(0) || // format
928 0 : !out->WriteU16(6 + kFormat0ArraySize) || // length
929 0 : !out->WriteU16(0)) { // language
930 0 : return OTS_FAILURE();
931 : }
932 0 : if (!out->Write(&(file->cmap->subtable_1_0_0[0]), kFormat0ArraySize)) {
933 0 : return OTS_FAILURE();
934 : }
935 : }
936 :
937 0 : const off_t offset_304 = out->Tell();
938 0 : if (have_304) {
939 0 : if (!out->Write(file->cmap->subtable_3_0_4_data,
940 0 : file->cmap->subtable_3_0_4_length)) {
941 0 : return OTS_FAILURE();
942 : }
943 : }
944 :
945 0 : const off_t offset_314 = out->Tell();
946 0 : if (have_314) {
947 0 : if (!out->Write(file->cmap->subtable_3_1_4_data,
948 0 : file->cmap->subtable_3_1_4_length)) {
949 0 : return OTS_FAILURE();
950 : }
951 : }
952 :
953 0 : const off_t offset_31012 = out->Tell();
954 0 : if (have_31012) {
955 : std::vector<OpenTypeCMAPSubtableRange> &groups
956 0 : = file->cmap->subtable_3_10_12;
957 0 : const unsigned num_groups = groups.size();
958 0 : if (!out->WriteU16(12) ||
959 0 : !out->WriteU16(0) ||
960 0 : !out->WriteU32(num_groups * 12 + 16) ||
961 0 : !out->WriteU32(0) ||
962 0 : !out->WriteU32(num_groups)) {
963 0 : return OTS_FAILURE();
964 : }
965 :
966 0 : for (unsigned i = 0; i < num_groups; ++i) {
967 0 : if (!out->WriteU32(groups[i].start_range) ||
968 0 : !out->WriteU32(groups[i].end_range) ||
969 0 : !out->WriteU32(groups[i].start_glyph_id)) {
970 0 : return OTS_FAILURE();
971 : }
972 : }
973 : }
974 :
975 0 : const off_t offset_31013 = out->Tell();
976 0 : if (have_31013) {
977 : std::vector<OpenTypeCMAPSubtableRange> &groups
978 0 : = file->cmap->subtable_3_10_13;
979 0 : const unsigned num_groups = groups.size();
980 0 : if (!out->WriteU16(13) ||
981 0 : !out->WriteU16(0) ||
982 0 : !out->WriteU32(num_groups * 12 + 14) ||
983 0 : !out->WriteU32(0) ||
984 0 : !out->WriteU32(num_groups)) {
985 0 : return OTS_FAILURE();
986 : }
987 :
988 0 : for (unsigned i = 0; i < num_groups; ++i) {
989 0 : if (!out->WriteU32(groups[i].start_range) ||
990 0 : !out->WriteU32(groups[i].end_range) ||
991 0 : !out->WriteU32(groups[i].start_glyph_id)) {
992 0 : return OTS_FAILURE();
993 : }
994 : }
995 : }
996 :
997 0 : const off_t table_end = out->Tell();
998 : // We might have hanging bytes from the above's checksum which the OTSStream
999 : // then merges into the table of offsets.
1000 0 : OTSStream::ChecksumState saved_checksum = out->SaveChecksumState();
1001 0 : out->ResetChecksum();
1002 :
1003 : // Now seek back and write the table of offsets
1004 0 : if (!out->Seek(record_offset)) {
1005 0 : return OTS_FAILURE();
1006 : }
1007 :
1008 0 : if (have_034) {
1009 0 : if (!out->WriteU16(0) ||
1010 0 : !out->WriteU16(3) ||
1011 0 : !out->WriteU32(offset_034 - table_start)) {
1012 0 : return OTS_FAILURE();
1013 : }
1014 : }
1015 :
1016 0 : if (have_0514) {
1017 0 : if (!out->WriteU16(0) ||
1018 0 : !out->WriteU16(5) ||
1019 0 : !out->WriteU32(offset_0514 - table_start)) {
1020 0 : return OTS_FAILURE();
1021 : }
1022 : }
1023 :
1024 0 : if (have_100) {
1025 0 : if (!out->WriteU16(1) ||
1026 0 : !out->WriteU16(0) ||
1027 0 : !out->WriteU32(offset_100 - table_start)) {
1028 0 : return OTS_FAILURE();
1029 : }
1030 : }
1031 :
1032 0 : if (have_304) {
1033 0 : if (!out->WriteU16(3) ||
1034 0 : !out->WriteU16(0) ||
1035 0 : !out->WriteU32(offset_304 - table_start)) {
1036 0 : return OTS_FAILURE();
1037 : }
1038 : }
1039 :
1040 0 : if (have_314) {
1041 0 : if (!out->WriteU16(3) ||
1042 0 : !out->WriteU16(1) ||
1043 0 : !out->WriteU32(offset_314 - table_start)) {
1044 0 : return OTS_FAILURE();
1045 : }
1046 : }
1047 :
1048 0 : if (have_31012) {
1049 0 : if (!out->WriteU16(3) ||
1050 0 : !out->WriteU16(10) ||
1051 0 : !out->WriteU32(offset_31012 - table_start)) {
1052 0 : return OTS_FAILURE();
1053 : }
1054 : }
1055 :
1056 0 : if (have_31013) {
1057 0 : if (!out->WriteU16(3) ||
1058 0 : !out->WriteU16(10) ||
1059 0 : !out->WriteU32(offset_31013 - table_start)) {
1060 0 : return OTS_FAILURE();
1061 : }
1062 : }
1063 :
1064 0 : if (!out->Seek(table_end)) {
1065 0 : return OTS_FAILURE();
1066 : }
1067 0 : out->RestoreChecksum(saved_checksum);
1068 :
1069 0 : return true;
1070 : }
1071 :
1072 0 : void ots_cmap_free(OpenTypeFile *file) {
1073 0 : delete file->cmap;
1074 0 : }
1075 :
1076 : } // namespace ots
|