1<?php
2
3namespace Mpdf\Shaper;
4
5class Myanmar
6{
7	/* FROM hb-ot-shape-complex-indic-private.hh */
8
9	// indic_category
10	const OT_X = 0;
11	const OT_C = 1;
12	const OT_V = 2;
13	const OT_N = 3;
14	const OT_H = 4;
15	const OT_ZWNJ = 5;
16	const OT_ZWJ = 6;
17	const OT_M = 7;  /* Matra or Dependent Vowel */
18	const OT_SM = 8;
19	const OT_VD = 9;
20	const OT_A = 10;
21	const OT_NBSP = 11;
22	const OT_DOTTEDCIRCLE = 12; /* Not in the spec, but special in Uniscribe. /Very very/ special! */
23	const OT_RS = 13;   /* Register Shifter, used in Khmer OT spec */
24	const OT_COENG = 14;
25	const OT_REPHA = 15;
26	const OT_RA = 16;   /* Not explicitly listed in the OT spec, but used in the grammar. */
27	const OT_CM = 17;
28
29	/* FROM hb-ot-shape-complex-myanmar.hh */
30
31	// myanmar_category
32	const OT_DB = 3;  // same as Indic::OT_N; /* Dot below */
33	const OT_GB = 12;  // same as Indic::OT_DOTTEDCIRCLE;
34	const OT_AS = 18; /* Asat */
35	const OT_D = 19; /* Digits except zero */
36	const OT_D0 = 20; /* Digit zero */
37	const OT_MH = 21; /* Various consonant medial types */
38	const OT_MR = 22; /* Various consonant medial types */
39	const OT_MW = 23; /* Various consonant medial types */
40	const OT_MY = 24; /* Various consonant medial types */
41	const OT_PT = 25; /* Pwo and other tones */
42
43	const OT_VABV = 26;
44	const OT_VBLW = 27;
45	const OT_VPRE = 28;
46	const OT_VPST = 29;
47	const OT_VS = 30; /* Variation selectors */
48
49	/* Visual positions in a syllable from left to right. */
50	/* FROM hb-ot-shape-complex-myanmar-private.hh */
51
52	// myanmar_position
53	const POS_START = 0;
54
55	const POS_RA_TO_BECOME_REPH = 1;
56	const POS_PRE_M = 2;
57	const POS_PRE_C = 3;
58
59	const POS_BASE_C = 4;
60	const POS_AFTER_MAIN = 5;
61
62	const POS_ABOVE_C = 6;
63
64	const POS_BEFORE_SUB = 7;
65	const POS_BELOW_C = 8;
66	const POS_AFTER_SUB = 9;
67
68	const POS_BEFORE_POST = 10;
69	const POS_POST_C = 11;
70	const POS_AFTER_POST = 12;
71
72	const POS_FINAL_C = 13;
73	const POS_SMVD = 14;
74
75	const POS_END = 15;
76
77	// Based on myanmar_category used to make string to find syllables
78	// OT_ to string character (using e.g. OT_C from MYANMAR) hb-ot-shape-complex-myanmar-private.hh
79	public static $myanmar_category_char = [
80		'x',
81		'C',
82		'V',
83		'N',
84		'H',
85		'Z',
86		'J',
87		'x',
88		'S',
89		'x',
90		'A',
91		'x',
92		'D',
93		'x',
94		'x',
95		'x',
96		'R',
97		'x',
98		'a', /* As Asat */
99		'd', /* Digits except zero */
100		'o', /* Digit zero */
101		'k', /* Medial types */
102		'l', /* Medial types */
103		'm', /* Medial types */
104		'n', /* Medial types */
105		'p', /* Pwo and other tones */
106		'v', /* Vowel aboVe */
107		'b', /* Vowel Below */
108		'e', /* Vowel prE */
109		't', /* Vowel posT */
110		's', /* variation Selector */
111	];
112
113	public static function set_myanmar_properties(&$info)
114	{
115		$u = $info['uni'];
116		$type = self::myanmar_get_categories($u);
117		$cat = ($type & 0x7F);
118		$pos = ($type >> 8);
119		/*
120		 * Re-assign category
121		 * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm#analyze
122		 */
123		if (self::in_range($u, 0xFE00, 0xFE0F)) {
124			$cat = self::OT_VS;
125		} elseif ($u == 0x200C) {
126			$cat = self::OT_ZWNJ;
127		} elseif ($u == 0x200D) {
128			$cat = self::OT_ZWJ;
129		}
130
131		switch ($u) {
132			case 0x002D:
133			case 0x00A0:
134			case 0x00D7:
135			case 0x2012:
136			case 0x2013:
137			case 0x2014:
138			case 0x2015:
139			case 0x2022:
140			case 0x25CC:
141			case 0x25FB:
142			case 0x25FC:
143			case 0x25FD:
144			case 0x25FE:
145				$cat = self::OT_GB;
146				break;
147
148			case 0x1004:
149			case 0x101B:
150			case 0x105A:
151					$cat = self::OT_RA;
152				break;
153
154			case 0x1032:
155			case 0x1036:
156				$cat = self::OT_A;
157				break;
158
159			case 0x103A:
160				$cat = self::OT_AS;
161				break;
162
163			case 0x1041:
164			case 0x1042:
165			case 0x1043:
166			case 0x1044:
167			case 0x1045:
168			case 0x1046:
169			case 0x1047:
170			case 0x1048:
171			case 0x1049:
172			case 0x1090:
173			case 0x1091:
174			case 0x1092:
175			case 0x1093:
176			case 0x1094:
177			case 0x1095:
178			case 0x1096:
179			case 0x1097:
180			case 0x1098:
181			case 0x1099:
182																											$cat = self::OT_D;
183				break;
184
185			case 0x1040:
186				$cat = self::OT_D; /* XXX The spec says D0, but Uniscribe doesn't seem to do. */
187				break;
188
189			case 0x103E:
190			case 0x1060:
191				$cat = self::OT_MH;
192				break;
193
194			case 0x103C:
195				$cat = self::OT_MR;
196				break;
197
198			case 0x103D:
199			case 0x1082:
200				$cat = self::OT_MW;
201				break;
202
203			case 0x103B:
204			case 0x105E:
205			case 0x105F:
206					$cat = self::OT_MY;
207				break;
208
209			case 0x1063:
210			case 0x1064:
211			case 0x1069:
212			case 0x106A:
213			case 0x106B:
214			case 0x106C:
215			case 0x106D:
216			case 0xAA7B:
217									$cat = self::OT_PT;
218				break;
219
220			case 0x1038:
221			case 0x1087:
222			case 0x1088:
223			case 0x1089:
224			case 0x108A:
225			case 0x108B:
226			case 0x108C:
227			case 0x108D:
228			case 0x108F:
229			case 0x109A:
230			case 0x109B:
231			case 0x109C:
232															$cat = self::OT_SM;
233				break;
234		}
235
236		if ($cat == self::OT_M) {
237			switch ($pos) {
238				case self::POS_PRE_C:
239					$cat = self::OT_VPRE;
240					$pos = self::POS_PRE_M;
241					break;
242				case self::POS_ABOVE_C:
243					$cat = self::OT_VABV;
244					break;
245				case self::POS_BELOW_C:
246					$cat = self::OT_VBLW;
247					break;
248				case self::POS_POST_C:
249					$cat = self::OT_VPST;
250					break;
251			}
252		}
253		$info['myanmar_category'] = $cat;
254		$info['myanmar_position'] = $pos;
255	}
256
257	// syllable_type
258	const CONSONANT_SYLLABLE = 0;
259	const BROKEN_CLUSTER = 3;
260	const NON_MYANMAR_CLUSTER = 4;
261
262	public static function set_syllables(&$o, $s, &$broken_syllables)
263	{
264		$ptr = 0;
265		$syllable_serial = 1;
266		$broken_syllables = false;
267
268		while ($ptr < strlen($s)) {
269			$match = '';
270			$syllable_length = 1;
271			$syllable_type = self::NON_MYANMAR_CLUSTER;
272			// CONSONANT_SYLLABLE Consonant syllable
273			// From OT spec:
274			if (preg_match('/^(RaH)?([C|R]|V|d|D)[s]?(H([C|R|V])[s]?)*(H|[a]*[n]?[l]?((m[k]?|k)[a]?)?[e]*[v]*[b]*[A]*(N[a]?)?(t[k]?[a]*[v]*[A]*(N[a]?)?)*(p[A]*(N[a]?)?)*S*[J|Z]?)/', substr($s, $ptr), $ma)) {
275				$syllable_length = strlen($ma[0]);
276				$syllable_type = self::CONSONANT_SYLLABLE;
277			} // BROKEN_CLUSTER syllable
278			elseif (preg_match('/^(RaH)?s?(H|[a]*[n]?[l]?((m[k]?|k)[a]?)?[e]*[v]*[b]*[A]*(N[a]?)?(t[k]?[a]*[v]*[A]*(N[a]?)?)*(p[A]*(N[a]?)?)*S*[J|Z]?)/', substr($s, $ptr), $ma)) {
279				if (strlen($ma[0])) { // May match blank
280					$syllable_length = strlen($ma[0]);
281					$syllable_type = self::BROKEN_CLUSTER;
282					$broken_syllables = true;
283				}
284			}
285			for ($i = $ptr; $i < $ptr + $syllable_length; $i++) {
286				$o[$i]['syllable'] = ($syllable_serial << 4) | $syllable_type;
287			}
288			$ptr += $syllable_length;
289			$syllable_serial++;
290			if ($syllable_serial == 16) {
291				$syllable_serial = 1;
292			}
293		}
294	}
295
296	public static function reordering(&$info, $GSUBdata, $broken_syllables, $dottedcircle)
297	{
298		if ($broken_syllables && $dottedcircle) {
299			self::insert_dotted_circles($info, $dottedcircle);
300		}
301		$count = count($info);
302		if (!$count) {
303			return;
304		}
305		$last = 0;
306		$last_syllable = $info[0]['syllable'];
307		for ($i = 1; $i < $count; $i++) {
308			if ($last_syllable != $info[$i]['syllable']) {
309				self::reordering_syllable($info, $GSUBdata, $last, $i);
310				$last = $i;
311				$last_syllable = $info[$last]['syllable'];
312			}
313		}
314		self::reordering_syllable($info, $GSUBdata, $last, $count);
315	}
316
317	public static function insert_dotted_circles(&$info, $dottedcircle)
318	{
319		$idx = 0;
320		$last_syllable = 0;
321		while ($idx < count($info)) {
322			$syllable = $info[$idx]['syllable'];
323			$syllable_type = ($syllable & 0x0F);
324			if ($last_syllable != $syllable && $syllable_type == self::BROKEN_CLUSTER) {
325				$last_syllable = $syllable;
326				$dottedcircle[0]['syllable'] = $info[$idx]['syllable'];
327				array_splice($info, $idx, 0, $dottedcircle);
328			} else {
329				$idx++;
330			}
331		}
332		// In case of final bloken cluster...
333		$syllable = $info[$idx]['syllable'];
334		$syllable_type = ($syllable & 0x0F);
335		if ($last_syllable != $syllable && $syllable_type == self::BROKEN_CLUSTER) {
336			$dottedcircle[0]['syllable'] = $info[$idx]['syllable'];
337			array_splice($info, $idx, 0, $dottedcircle);
338		}
339	}
340
341	/* Rules from:
342	 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */
343
344	public static function reordering_syllable(&$info, $GSUBdata, $start, $end)
345	{
346		/* vowel_syllable: We made the vowels look like consonants. So uses the consonant logic! */
347		/* broken_cluster: We already inserted dotted-circles, so just call the standalone_cluster. */
348
349		$syllable_type = ($info[$start]['syllable'] & 0x0F);
350		if ($syllable_type == self::NON_MYANMAR_CLUSTER) {
351			return;
352		}
353		if ($syllable_type == self::BROKEN_CLUSTER) {
354			//if ($uniscribe_bug_compatible) {
355			/* For dotted-circle, this is what Uniscribe does:
356			 * If dotted-circle is the last glyph, it just does nothing.
357			 * i.e. It doesn't form Reph. */
358			if ($info[$end - 1]['myanmar_category'] == self::OT_DOTTEDCIRCLE) {
359				return;
360			}
361		}
362
363		$base = $end;
364		$has_reph = false;
365		$limit = $start;
366
367		if (($start + 3 <= $end) &&
368			$info[$start]['myanmar_category'] == self::OT_RA &&
369			$info[$start + 1]['myanmar_category'] == self::OT_AS &&
370			$info[$start + 2]['myanmar_category'] == self::OT_H) {
371			$limit += 3;
372			$base = $start;
373			$has_reph = true;
374		}
375
376		if (!$has_reph) {
377			$base = $limit;
378		}
379
380		for ($i = $limit; $i < $end; $i++) {
381			if (self::is_consonant($info[$i])) {
382				$base = $i;
383				break;
384			}
385		}
386
387
388		/* Reorder! */
389		$i = $start;
390		for (; $i < $start + ($has_reph ? 3 : 0); $i++) {
391			$info[$i]['myanmar_position'] = self::POS_AFTER_MAIN;
392		}
393		for (; $i < $base; $i++) {
394			$info[$i]['myanmar_position'] = self::POS_PRE_C;
395		}
396		if ($i < $end) {
397			$info[$i]['myanmar_position'] = self::POS_BASE_C;
398			$i++;
399		}
400		$pos = self::POS_AFTER_MAIN;
401		/* The following loop may be ugly, but it implements all of
402		 * Myanmar reordering! */
403		for (; $i < $end; $i++) {
404			if ($info[$i]['myanmar_category'] == self::OT_MR) { /* Pre-base reordering */
405				$info[$i]['myanmar_position'] = self::POS_PRE_C;
406				continue;
407			}
408			if ($info[$i]['myanmar_position'] < self::POS_BASE_C) { /* Left matra */
409				continue;
410			}
411
412			if ($pos == self::POS_AFTER_MAIN && $info[$i]['myanmar_category'] == self::OT_VBLW) {
413				$pos = self::POS_BELOW_C;
414				$info[$i]['myanmar_position'] = $pos;
415				continue;
416			}
417
418			if ($pos == self::POS_BELOW_C && $info[$i]['myanmar_category'] == self::OT_A) {
419				$info[$i]['myanmar_position'] = self::POS_BEFORE_SUB;
420				continue;
421			}
422			if ($pos == self::POS_BELOW_C && $info[$i]['myanmar_category'] == self::OT_VBLW) {
423				$info[$i]['myanmar_position'] = $pos;
424				continue;
425			}
426			if ($pos == self::POS_BELOW_C && $info[$i]['myanmar_category'] != self::OT_A) {
427				$pos = self::POS_AFTER_SUB;
428				$info[$i]['myanmar_position'] = $pos;
429				continue;
430			}
431			$info[$i]['myanmar_position'] = $pos;
432		}
433
434
435		/* Sit tight, rock 'n roll! */
436		self::bubble_sort($info, $start, $end - $start);
437	}
438
439	public static function is_one_of($info, $flags)
440	{
441		if (isset($info['is_ligature']) && $info['is_ligature']) {
442			return false;
443		} /* If it ligated, all bets are off. */
444		return !!(self::FLAG($info['myanmar_category']) & $flags);
445	}
446
447	/* Vowels and placeholders treated as if they were consonants. */
448
449	public static function is_consonant($info)
450	{
451		return self::is_one_of($info, (self::FLAG(self::OT_C) | self::FLAG(self::OT_CM) | self::FLAG(self::OT_RA) | self::FLAG(self::OT_V) | self::FLAG(self::OT_NBSP) | self::FLAG(self::OT_GB)));
452	}
453
454// From hb-private.hh
455	public static function in_range($u, $lo, $hi)
456	{
457		if ((($lo ^ $hi) & $lo) == 0 && (($lo ^ $hi) & $hi) == ($lo ^ $hi) && (($lo ^ $hi) & (($lo ^ $hi) + 1)) == 0) {
458			return ($u & ~($lo ^ $hi)) == $lo;
459		} else {
460			return $lo <= $u && $u <= $hi;
461		}
462	}
463
464	// From hb-private.hh
465	public static function FLAG($x)
466	{
467		return (1 << ($x));
468	}
469
470	public static function FLAG_RANGE($x, $y)
471	{
472		return self::FLAG($y + 1) - self::FLAG($x);
473	}
474
475	// BELOW from hb-ot-shape-complex-indic.cc
476	// see INDIC for details
477	public static $myanmar_table = [
478		/* Myanmar  (1000..109F) */
479
480		/* 1000 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
481		/* 1008 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
482		/* 1010 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
483		/* 1018 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
484		/* 1020 */ 3841, 3842, 3842, 3842, 3842, 3842, 3842, 3842,
485		/* 1028 */ 3842, 3842, 3842, 2823, 2823, 1543, 1543, 2055,
486		/* 1030 */ 2055, 775, 1543, 1543, 1543, 1543, 3848, 3843,
487		/* 1038 */ 3848, 3844, 1540, 3857, 3857, 3857, 3857, 3841,
488		/* 1040 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
489		/* 1048 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
490		/* 1050 */ 3841, 3841, 3842, 3842, 3842, 3842, 2823, 2823,
491		/* 1058 */ 2055, 2055, 3841, 3841, 3841, 3841, 3857, 3857,
492		/* 1060 */ 3857, 3841, 2823, 3843, 3843, 3841, 3841, 2823,
493		/* 1068 */ 2823, 3843, 3843, 3843, 3843, 3843, 3841, 3841,
494		/* 1070 */ 3841, 1543, 1543, 1543, 1543, 3841, 3841, 3841,
495		/* 1078 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
496		/* 1080 */ 3841, 3841, 3857, 2823, 775, 1543, 1543, 3843,
497		/* 1088 */ 3843, 3843, 3843, 3843, 3843, 3843, 3841, 3843,
498		/* 1090 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
499		/* 1098 */ 3840, 3840, 3843, 3843, 2823, 1543, 3840, 3840,
500		/* Myanmar Extended-A  (AA60..AA7F) */
501
502		/* AA60 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
503		/* AA68 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
504		/* AA70 */ 3840, 3841, 3841, 3841, 3840, 3840, 3840, 3840,
505		/* AA78 */ 3840, 3840, 3841, 3843, 3840, 3840, 3840, 3840,
506	];
507
508	// from "hb-ot-shape-complex-indic-table.cc"
509	public static function myanmar_get_categories($u)
510	{
511		if (0x1000 <= $u && $u <= 0x109F) {
512			return self::$myanmar_table[$u - 0x1000 + 0]; // offset 0 for Most "myanmar"
513		}
514		if (0xAA60 <= $u && $u <= 0xAA7F) {
515			return self::$myanmar_table[$u - 0xAA60 + 160]; // offset for extensions
516		}
517		if ($u == 0x00A0) {
518			return 3851; // (ISC_CP | (IMC_x << 8))
519		}
520		if ($u == 0x25CC) {
521			return 3851; // (ISC_CP | (IMC_x << 8))
522		}
523		return 3840; // (ISC_x | (IMC_x << 8))
524	}
525
526	public static function bubble_sort(&$arr, $start, $len)
527	{
528		if ($len < 2) {
529			return;
530		}
531		$k = $start + $len - 2;
532		while ($k >= $start) {
533			for ($j = $start; $j <= $k; $j++) {
534				if ($arr[$j]['myanmar_position'] > $arr[$j + 1]['myanmar_position']) {
535					$t = $arr[$j];
536					$arr[$j] = $arr[$j + 1];
537					$arr[$j + 1] = $t;
538				}
539			}
540			$k--;
541		}
542	}
543}
544