1 : /* GRAPHITE2 LICENSING
2 :
3 : Copyright 2010, SIL International
4 : All rights reserved.
5 :
6 : This library is free software; you can redistribute it and/or modify
7 : it under the terms of the GNU Lesser General Public License as published
8 : by the Free Software Foundation; either version 2.1 of License, or
9 : (at your option) any later version.
10 :
11 : This program is distributed in the hope that it will be useful,
12 : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 : Lesser General Public License for more details.
15 :
16 : You should also have received a copy of the GNU Lesser General Public
17 : License along with this library in the file named "LICENSE".
18 : If not, write to the Free Software Foundation, 51 Franklin Street,
19 : Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
20 : internet at http://www.fsf.org/licenses/lgpl.html.
21 :
22 : Alternatively, the contents of this file may be used under the terms of the
23 : Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
24 : License, as published by the Free Software Foundation, either version 2
25 : of the License or (at your option) any later version.
26 : */
27 : #include "inc/Main.h"
28 : #include "inc/debug.h"
29 : #include "inc/Endian.h"
30 : #include "inc/Pass.h"
31 : #include <cstring>
32 : #include <cstdlib>
33 : #include <cassert>
34 : #include "inc/Segment.h"
35 : #include "inc/Code.h"
36 : #include "inc/Rule.h"
37 :
38 : using namespace graphite2;
39 : using vm::Machine;
40 : typedef Machine::Code Code;
41 :
42 :
43 0 : Pass::Pass()
44 : :
45 : m_silf(0),
46 : m_cols(0),
47 : m_rules(0),
48 : m_ruleMap(0),
49 : m_startStates(0),
50 : m_sTable(0),
51 0 : m_states(0)
52 : {
53 0 : }
54 :
55 0 : Pass::~Pass()
56 : {
57 0 : free(m_cols);
58 0 : free(m_startStates);
59 0 : free(m_sTable);
60 0 : free(m_states);
61 0 : free(m_ruleMap);
62 :
63 0 : delete [] m_rules;
64 0 : }
65 :
66 0 : bool Pass::readPass(void *pass, size_t pass_length, size_t subtable_base, const Face & face)
67 : {
68 0 : const byte * p = reinterpret_cast<const byte *>(pass),
69 0 : * const pass_start = p,
70 0 : * const pass_end = p + pass_length;
71 : size_t numRanges;
72 :
73 0 : if (pass_length < 40) return false;
74 : // Read in basic values
75 0 : m_immutable = (*p++) & 0x1U;
76 0 : m_iMaxLoop = *p++;
77 0 : p++; // skip maxContext
78 0 : p += sizeof(byte); // skip maxBackup
79 0 : m_numRules = be::read<uint16>(p);
80 0 : p += sizeof(uint16); // not sure why we would want this
81 0 : const byte * const pcCode = pass_start + be::read<uint32>(p) - subtable_base,
82 0 : * const rcCode = pass_start + be::read<uint32>(p) - subtable_base,
83 0 : * const aCode = pass_start + be::read<uint32>(p) - subtable_base;
84 0 : p += sizeof(uint32);
85 0 : m_sRows = be::read<uint16>(p);
86 0 : m_sTransition = be::read<uint16>(p);
87 0 : m_sSuccess = be::read<uint16>(p);
88 0 : m_sColumns = be::read<uint16>(p);
89 0 : numRanges = be::read<uint16>(p);
90 : p += sizeof(uint16) // skip searchRange
91 : + sizeof(uint16) // skip entrySelector
92 0 : + sizeof(uint16); // skip rangeShift
93 0 : assert(p - pass_start == 40);
94 : // Perform some sanity checks.
95 0 : if ( m_sTransition > m_sRows
96 : || m_sSuccess > m_sRows
97 : || m_sSuccess + m_sTransition < m_sRows)
98 0 : return false;
99 :
100 0 : if (p + numRanges * 6 - 4 > pass_end) return false;
101 0 : m_numGlyphs = be::peek<uint16>(p + numRanges * 6 - 4) + 1;
102 : // Caculate the start of vairous arrays.
103 0 : const byte * const ranges = p;
104 0 : p += numRanges*sizeof(uint16)*3;
105 0 : const byte * const o_rule_map = p;
106 0 : p += (m_sSuccess + 1)*sizeof(uint16);
107 :
108 : // More sanity checks
109 0 : if ( reinterpret_cast<const byte *>(o_rule_map) > pass_end
110 : || p > pass_end)
111 0 : return false;
112 0 : const size_t numEntries = be::peek<uint16>(o_rule_map + m_sSuccess*sizeof(uint16));
113 0 : const byte * const rule_map = p;
114 0 : p += numEntries*sizeof(uint16);
115 :
116 0 : if (p > pass_end) return false;
117 0 : m_minPreCtxt = *p++;
118 0 : m_maxPreCtxt = *p++;
119 0 : const byte * const start_states = p;
120 0 : p += (m_maxPreCtxt - m_minPreCtxt + 1)*sizeof(int16);
121 0 : const uint16 * const sort_keys = reinterpret_cast<const uint16 *>(p);
122 0 : p += m_numRules*sizeof(uint16);
123 0 : const byte * const precontext = p;
124 0 : p += m_numRules;
125 0 : p += sizeof(byte); // skip reserved byte
126 :
127 0 : if (p > pass_end) return false;
128 0 : const size_t pass_constraint_len = be::read<uint16>(p);
129 0 : const uint16 * const o_constraint = reinterpret_cast<const uint16 *>(p);
130 0 : p += (m_numRules + 1)*sizeof(uint16);
131 0 : const uint16 * const o_actions = reinterpret_cast<const uint16 *>(p);
132 0 : p += (m_numRules + 1)*sizeof(uint16);
133 0 : const byte * const states = p;
134 0 : p += m_sTransition*m_sColumns*sizeof(int16);
135 0 : p += sizeof(byte); // skip reserved byte
136 0 : if (p != pcCode || p >= pass_end) return false;
137 0 : p += pass_constraint_len;
138 0 : if (p != rcCode || p >= pass_end) return false;
139 0 : p += be::peek<uint16>(o_constraint + m_numRules);
140 0 : if (p != aCode || p >= pass_end) return false;
141 0 : if (size_t(rcCode - pcCode) != pass_constraint_len) return false;
142 :
143 : // Load the pass constraint if there is one.
144 0 : if (pass_constraint_len)
145 : {
146 : m_cPConstraint = vm::Machine::Code(true, pcCode, pcCode + pass_constraint_len,
147 0 : precontext[0], be::peek<uint16>(sort_keys), *m_silf, face);
148 0 : if (!m_cPConstraint) return false;
149 : }
150 0 : if (!readRanges(ranges, numRanges)) return false;
151 0 : if (!readRules(rule_map, numEntries, precontext, sort_keys,
152 0 : o_constraint, rcCode, o_actions, aCode, face)) return false;
153 0 : return readStates(start_states, states, o_rule_map);
154 : }
155 :
156 :
157 0 : bool Pass::readRules(const byte * rule_map, const size_t num_entries,
158 : const byte *precontext, const uint16 * sort_key,
159 : const uint16 * o_constraint, const byte *rc_data,
160 : const uint16 * o_action, const byte * ac_data,
161 : const Face & face)
162 : {
163 0 : const byte * const ac_data_end = ac_data + be::peek<uint16>(o_action + m_numRules);
164 0 : const byte * const rc_data_end = rc_data + be::peek<uint16>(o_constraint + m_numRules);
165 :
166 0 : if (!(m_rules = new Rule [m_numRules])) return false;
167 0 : precontext += m_numRules;
168 0 : sort_key += m_numRules;
169 0 : o_constraint += m_numRules;
170 0 : o_action += m_numRules;
171 :
172 : // Load rules.
173 0 : const byte * ac_begin = 0, * rc_begin = 0,
174 0 : * ac_end = ac_data + be::peek<uint16>(o_action),
175 0 : * rc_end = rc_data + be::peek<uint16>(o_constraint);
176 0 : Rule * r = m_rules + m_numRules - 1;
177 0 : for (size_t n = m_numRules; n; --n, --r, ac_end = ac_begin, rc_end = rc_begin)
178 : {
179 0 : r->preContext = *--precontext;
180 0 : r->sort = be::peek<uint16>(--sort_key);
181 : #ifndef NDEBUG
182 0 : r->rule_idx = n - 1;
183 : #endif
184 0 : if (r->sort > 63 || r->preContext >= r->sort || r->preContext > m_maxPreCtxt || r->preContext < m_minPreCtxt)
185 0 : return false;
186 0 : ac_begin = ac_data + be::peek<uint16>(--o_action);
187 0 : rc_begin = *--o_constraint ? rc_data + be::peek<uint16>(o_constraint) : rc_end;
188 :
189 0 : if (ac_begin > ac_end || ac_begin > ac_data_end || ac_end > ac_data_end
190 : || rc_begin > rc_end || rc_begin > rc_data_end || rc_end > rc_data_end)
191 0 : return false;
192 0 : r->action = new vm::Machine::Code(false, ac_begin, ac_end, r->preContext, r->sort, *m_silf, face);
193 0 : r->constraint = new vm::Machine::Code(true, rc_begin, rc_end, r->preContext, r->sort, *m_silf, face);
194 :
195 0 : if (!r->action || !r->constraint
196 0 : || r->action->status() != Code::loaded
197 0 : || r->constraint->status() != Code::loaded
198 0 : || !r->constraint->immutable())
199 0 : return false;
200 : }
201 :
202 : // Load the rule entries map
203 0 : RuleEntry * re = m_ruleMap = gralloc<RuleEntry>(num_entries);
204 0 : for (size_t n = num_entries; n; --n, ++re)
205 : {
206 0 : const ptrdiff_t rn = be::read<uint16>(rule_map);
207 0 : if (rn >= m_numRules) return false;
208 0 : re->rule = m_rules + rn;
209 : }
210 :
211 0 : return true;
212 : }
213 :
214 0 : static int cmpRuleEntry(const void *a, const void *b) { return (*(RuleEntry *)a < *(RuleEntry *)b ? -1 :
215 0 : (*(RuleEntry *)b < *(RuleEntry *)a ? 1 : 0)); }
216 :
217 0 : bool Pass::readStates(const byte * starts, const byte *states, const byte * o_rule_map)
218 : {
219 0 : m_startStates = gralloc<State *>(m_maxPreCtxt - m_minPreCtxt + 1);
220 0 : m_states = gralloc<State>(m_sRows);
221 0 : m_sTable = gralloc<State *>(m_sTransition * m_sColumns);
222 :
223 0 : if (!m_startStates || !m_states || !m_sTable) return false;
224 : // load start states
225 0 : for (State * * s = m_startStates,
226 0 : * * const s_end = s + m_maxPreCtxt - m_minPreCtxt + 1; s != s_end; ++s)
227 : {
228 0 : *s = m_states + be::read<uint16>(starts);
229 0 : if (*s < m_states || *s >= m_states + m_sRows) return false; // true;
230 : }
231 :
232 : // load state transition table.
233 0 : for (State * * t = m_sTable,
234 0 : * * const t_end = t + m_sTransition*m_sColumns; t != t_end; ++t)
235 : {
236 0 : *t = m_states + be::read<uint16>(states);
237 0 : if (*t < m_states || *t >= m_states + m_sRows) return false;
238 : }
239 :
240 0 : State * s = m_states,
241 0 : * const transitions_end = m_states + m_sTransition,
242 0 : * const success_begin = m_states + m_sRows - m_sSuccess;
243 0 : const RuleEntry * rule_map_end = m_ruleMap + be::peek<uint16>(o_rule_map + m_sSuccess*sizeof(uint16));
244 0 : for (size_t n = m_sRows; n; --n, ++s)
245 : {
246 0 : s->transitions = s < transitions_end ? m_sTable + (s-m_states)*m_sColumns : 0;
247 0 : RuleEntry * const begin = s < success_begin ? 0 : m_ruleMap + be::read<uint16>(o_rule_map),
248 0 : * const end = s < success_begin ? 0 : m_ruleMap + be::peek<uint16>(o_rule_map);
249 :
250 0 : if (begin >= rule_map_end || end > rule_map_end || begin > end)
251 0 : return false;
252 : #ifndef NDEBUG
253 0 : s->index = (s - m_states);
254 : #endif
255 0 : s->rules = begin;
256 : s->rules_end = (end - begin <= FiniteStateMachine::MAX_RULES)? end :
257 0 : begin + FiniteStateMachine::MAX_RULES;
258 0 : qsort(begin, end - begin, sizeof(RuleEntry), &cmpRuleEntry);
259 : }
260 :
261 0 : return true;
262 : }
263 :
264 0 : bool Pass::readRanges(const byte * ranges, size_t num_ranges)
265 : {
266 0 : m_cols = gralloc<uint16>(m_numGlyphs);
267 0 : memset(m_cols, 0xFF, m_numGlyphs * sizeof(uint16));
268 0 : for (size_t n = num_ranges; n; --n)
269 : {
270 0 : const uint16 first = be::read<uint16>(ranges),
271 0 : last = be::read<uint16>(ranges),
272 0 : col = be::read<uint16>(ranges);
273 : uint16 *p;
274 :
275 0 : if (first > last || last >= m_numGlyphs || col >= m_sColumns)
276 0 : return false;
277 :
278 0 : for (p = m_cols + first; p <= m_cols + last; )
279 0 : *p++ = col;
280 : }
281 0 : return true;
282 : }
283 :
284 :
285 0 : void Pass::runGraphite(Machine & m, FiniteStateMachine & fsm) const
286 : {
287 0 : Slot *s = m.slotMap().segment.first();
288 0 : if (!s || !testPassConstraint(m)) return;
289 0 : Slot *currHigh = s->next();
290 :
291 : #if !defined GRAPHITE2_NTRACING
292 : if (dbgout) *dbgout << "rules" << json::array;
293 : json::closer rules_array_closer = dbgout;
294 : #endif
295 :
296 0 : m.slotMap().highwater(currHigh);
297 0 : int lc = m_iMaxLoop;
298 0 : do
299 : {
300 0 : findNDoRule(s, m, fsm);
301 0 : if (s && (m.slotMap().highpassed() || s == m.slotMap().highwater() || --lc == 0)) {
302 0 : if (!lc)
303 : {
304 : // if (dbgout) *dbgout << json::item << json::flat << rule_event(-1, s, 1);
305 0 : s = m.slotMap().highwater();
306 : }
307 0 : lc = m_iMaxLoop;
308 0 : if (s)
309 0 : m.slotMap().highwater(s->next());
310 : }
311 : } while (s);
312 : }
313 :
314 0 : inline uint16 Pass::glyphToCol(const uint16 gid) const
315 : {
316 0 : return gid < m_numGlyphs ? m_cols[gid] : 0xffffU;
317 : }
318 :
319 0 : bool Pass::runFSM(FiniteStateMachine& fsm, Slot * slot) const
320 : {
321 0 : fsm.reset(slot, m_maxPreCtxt);
322 0 : if (fsm.slots.context() < m_minPreCtxt)
323 0 : return false;
324 :
325 0 : const State * state = m_startStates[m_maxPreCtxt - fsm.slots.context()];
326 0 : do
327 : {
328 0 : fsm.slots.pushSlot(slot);
329 0 : if (fsm.slots.size() >= SlotMap::MAX_SLOTS) return false;
330 0 : const uint16 col = glyphToCol(slot->gid());
331 0 : if (col == 0xffffU || !state->is_transition()) return true;
332 :
333 0 : state = state->transitions[col];
334 0 : if (state->is_success())
335 0 : fsm.rules.accumulate_rules(*state);
336 :
337 0 : slot = slot->next();
338 : } while (state != m_states && slot);
339 :
340 0 : fsm.slots.pushSlot(slot);
341 0 : return true;
342 : }
343 :
344 0 : void Pass::findNDoRule(Slot * & slot, Machine &m, FiniteStateMachine & fsm) const
345 : {
346 0 : assert(slot);
347 :
348 0 : if (runFSM(fsm, slot))
349 : {
350 : // Search for the first rule which passes the constraint
351 0 : const RuleEntry * r = fsm.rules.begin(),
352 0 : * const re = fsm.rules.end();
353 0 : for (; r != re && !testConstraint(*r->rule, m); ++r);
354 :
355 : #if !defined GRAPHITE2_NTRACING
356 : if (dbgout)
357 : {
358 : if (fsm.rules.size() != 0)
359 : {
360 : *dbgout << json::item << json::object;
361 : dumpRuleEventConsidered(fsm, *r);
362 : if (r != re)
363 : {
364 : const int adv = doAction(r->rule->action, slot, m);
365 : dumpRuleEventOutput(fsm, *r->rule, slot);
366 : if (r->rule->action->deletes()) fsm.slots.collectGarbage();
367 : adjustSlot(adv, slot, fsm.slots);
368 : *dbgout << "cursor" << slotid(slot)
369 : << json::close // Close "output" object
370 : << json::close; // Close RuelEvent object
371 :
372 : return;
373 : }
374 : else
375 : *dbgout << json::close // close "considered" array
376 : << "output" << json::object
377 : << "slots" << json::array << json::close
378 : << "cursor" << slotid(slot->next())
379 : << json::close
380 : << json::close;
381 : }
382 : }
383 : else
384 : #endif
385 : {
386 0 : if (r != re)
387 : {
388 0 : const int adv = doAction(r->rule->action, slot, m);
389 0 : if (r->rule->action->deletes()) fsm.slots.collectGarbage();
390 0 : adjustSlot(adv, slot, fsm.slots);
391 0 : return;
392 : }
393 : }
394 : }
395 :
396 0 : slot = slot->next();
397 : }
398 :
399 : #if !defined GRAPHITE2_NTRACING
400 :
401 : inline
402 : Slot * input_slot(const SlotMap & slots, const int n)
403 : {
404 : Slot * s = slots[slots.context() + n];
405 : if (!s->isCopied()) return s;
406 :
407 : return s->prev() ? s->prev()->next() : s->next()->prev();
408 : }
409 :
410 : inline
411 : Slot * output_slot(const SlotMap & slots, const int n)
412 : {
413 : Slot * s = slots[slots.context() + n - 1];
414 : return s ? s->next() : slots.segment.first();
415 : }
416 :
417 :
418 : void Pass::dumpRuleEventConsidered(const FiniteStateMachine & fsm, const RuleEntry & re) const
419 : {
420 : *dbgout << "considered" << json::array;
421 : for (const RuleEntry *r = fsm.rules.begin(); r != &re; ++r)
422 : {
423 : if (r->rule->preContext > fsm.slots.context()) continue;
424 : *dbgout << json::flat << json::object
425 : << "id" << r->rule - m_rules
426 : << "failed" << true
427 : << "input" << json::flat << json::object
428 : << "start" << slotid(input_slot(fsm.slots, -r->rule->preContext))
429 : << "length" << r->rule->sort
430 : << json::close // close "input"
431 : << json::close; // close Rule object
432 : }
433 : }
434 :
435 :
436 : void Pass::dumpRuleEventOutput(const FiniteStateMachine & fsm, const Rule & r, Slot * const last_slot) const
437 : {
438 : *dbgout << json::item << json::flat << json::object
439 : << "id" << &r - m_rules
440 : << "failed" << false
441 : << "input" << json::flat << json::object
442 : << "start" << slotid(input_slot(fsm.slots, 0))
443 : << "length" << r.sort - r.preContext
444 : << json::close // close "input"
445 : << json::close // close Rule object
446 : << json::close // close considered array
447 : << "output" << json::object
448 : << "slots" << json::array;
449 : fsm.slots.segment.positionSlots(0);
450 : for(Slot * slot = output_slot(fsm.slots, 0); slot != last_slot; slot = slot->next())
451 : *dbgout << dslot(&fsm.slots.segment, slot);
452 : *dbgout << json::close; // close "slots";
453 : }
454 :
455 : #endif
456 :
457 :
458 : inline
459 0 : bool Pass::testPassConstraint(Machine & m) const
460 : {
461 0 : if (!m_cPConstraint) return true;
462 :
463 0 : assert(m_cPConstraint.constraint());
464 :
465 0 : vm::slotref * map = m.slotMap().begin();
466 0 : *map = m.slotMap().segment.first();
467 0 : const uint32 ret = m_cPConstraint.run(m, map);
468 :
469 : #if !defined GRAPHITE2_NTRACING
470 : if (dbgout)
471 : *dbgout << "constraint" << (ret || m.status() != Machine::finished);
472 : #endif
473 :
474 0 : return ret || m.status() != Machine::finished;
475 : }
476 :
477 :
478 0 : bool Pass::testConstraint(const Rule &r, Machine & m) const
479 : {
480 0 : if ((r.sort - r.preContext) > (m.slotMap().size() - m.slotMap().context())) return false;
481 0 : if (m.slotMap().context() - r.preContext < 0) return false;
482 0 : if (!*r.constraint) return true;
483 0 : assert(r.constraint->constraint());
484 :
485 0 : vm::slotref * map = m.slotMap().begin() + m.slotMap().context() - r.preContext;
486 0 : for (int n = r.sort; n && map; --n, ++map)
487 : {
488 0 : if (!*map) continue;
489 0 : const int32 ret = r.constraint->run(m, map);
490 0 : if (!ret || m.status() != Machine::finished)
491 0 : return false;
492 : }
493 :
494 0 : return true;
495 : }
496 :
497 :
498 0 : void SlotMap::collectGarbage()
499 : {
500 0 : for(Slot **s = begin(), *const *const se = end() - 1; s != se; ++s) {
501 0 : Slot *& slot = *s;
502 0 : if(slot->isDeleted() || slot->isCopied())
503 0 : segment.freeSlot(slot);
504 : }
505 0 : }
506 :
507 :
508 :
509 0 : int Pass::doAction(const Code *codeptr, Slot * & slot_out, vm::Machine & m) const
510 : {
511 0 : assert(codeptr);
512 0 : if (!*codeptr) return 0;
513 0 : SlotMap & smap = m.slotMap();
514 0 : vm::slotref * map = &smap[smap.context()];
515 0 : smap.highpassed(false);
516 :
517 0 : int32 ret = codeptr->run(m, map);
518 :
519 0 : if (m.status() != Machine::finished)
520 : {
521 0 : slot_out = NULL;
522 0 : smap.highwater(0);
523 0 : return 0;
524 : }
525 :
526 0 : slot_out = *map;
527 0 : return ret;
528 : }
529 :
530 :
531 0 : void Pass::adjustSlot(int delta, Slot * & slot_out, SlotMap & smap) const
532 : {
533 0 : if (delta < 0)
534 : {
535 0 : if (!slot_out)
536 : {
537 0 : slot_out = smap.segment.last();
538 0 : ++delta;
539 0 : if (smap.highpassed() && !smap.highwater())
540 0 : smap.highpassed(false);
541 : }
542 0 : while (++delta <= 0 && slot_out)
543 : {
544 0 : if (smap.highpassed() && smap.highwater() == slot_out)
545 0 : smap.highpassed(false);
546 0 : slot_out = slot_out->prev();
547 : }
548 : }
549 0 : else if (delta > 0)
550 : {
551 0 : if (!slot_out)
552 : {
553 0 : slot_out = smap.segment.first();
554 0 : --delta;
555 : }
556 0 : while (--delta >= 0 && slot_out)
557 : {
558 0 : slot_out = slot_out->next();
559 0 : if (slot_out == smap.highwater() && slot_out)
560 0 : smap.highpassed(true);
561 : }
562 : }
563 0 : }
564 :
|