1<?php 2 3namespace Mpdf\Shaper; 4 5class Myanmar 6{ 7 /* FROM hb-ot-shape-complex-indic-private.hh */ 8 9 // indic_category 10 const OT_X = 0; 11 const OT_C = 1; 12 const OT_V = 2; 13 const OT_N = 3; 14 const OT_H = 4; 15 const OT_ZWNJ = 5; 16 const OT_ZWJ = 6; 17 const OT_M = 7; /* Matra or Dependent Vowel */ 18 const OT_SM = 8; 19 const OT_VD = 9; 20 const OT_A = 10; 21 const OT_NBSP = 11; 22 const OT_DOTTEDCIRCLE = 12; /* Not in the spec, but special in Uniscribe. /Very very/ special! */ 23 const OT_RS = 13; /* Register Shifter, used in Khmer OT spec */ 24 const OT_COENG = 14; 25 const OT_REPHA = 15; 26 const OT_RA = 16; /* Not explicitly listed in the OT spec, but used in the grammar. */ 27 const OT_CM = 17; 28 29 /* FROM hb-ot-shape-complex-myanmar.hh */ 30 31 // myanmar_category 32 const OT_DB = 3; // same as Indic::OT_N; /* Dot below */ 33 const OT_GB = 12; // same as Indic::OT_DOTTEDCIRCLE; 34 const OT_AS = 18; /* Asat */ 35 const OT_D = 19; /* Digits except zero */ 36 const OT_D0 = 20; /* Digit zero */ 37 const OT_MH = 21; /* Various consonant medial types */ 38 const OT_MR = 22; /* Various consonant medial types */ 39 const OT_MW = 23; /* Various consonant medial types */ 40 const OT_MY = 24; /* Various consonant medial types */ 41 const OT_PT = 25; /* Pwo and other tones */ 42 43 const OT_VABV = 26; 44 const OT_VBLW = 27; 45 const OT_VPRE = 28; 46 const OT_VPST = 29; 47 const OT_VS = 30; /* Variation selectors */ 48 49 /* Visual positions in a syllable from left to right. */ 50 /* FROM hb-ot-shape-complex-myanmar-private.hh */ 51 52 // myanmar_position 53 const POS_START = 0; 54 55 const POS_RA_TO_BECOME_REPH = 1; 56 const POS_PRE_M = 2; 57 const POS_PRE_C = 3; 58 59 const POS_BASE_C = 4; 60 const POS_AFTER_MAIN = 5; 61 62 const POS_ABOVE_C = 6; 63 64 const POS_BEFORE_SUB = 7; 65 const POS_BELOW_C = 8; 66 const POS_AFTER_SUB = 9; 67 68 const POS_BEFORE_POST = 10; 69 const POS_POST_C = 11; 70 const POS_AFTER_POST = 12; 71 72 const POS_FINAL_C = 13; 73 const POS_SMVD = 14; 74 75 const POS_END = 15; 76 77 // Based on myanmar_category used to make string to find syllables 78 // OT_ to string character (using e.g. OT_C from MYANMAR) hb-ot-shape-complex-myanmar-private.hh 79 public static $myanmar_category_char = [ 80 'x', 81 'C', 82 'V', 83 'N', 84 'H', 85 'Z', 86 'J', 87 'x', 88 'S', 89 'x', 90 'A', 91 'x', 92 'D', 93 'x', 94 'x', 95 'x', 96 'R', 97 'x', 98 'a', /* As Asat */ 99 'd', /* Digits except zero */ 100 'o', /* Digit zero */ 101 'k', /* Medial types */ 102 'l', /* Medial types */ 103 'm', /* Medial types */ 104 'n', /* Medial types */ 105 'p', /* Pwo and other tones */ 106 'v', /* Vowel aboVe */ 107 'b', /* Vowel Below */ 108 'e', /* Vowel prE */ 109 't', /* Vowel posT */ 110 's', /* variation Selector */ 111 ]; 112 113 public static function set_myanmar_properties(&$info) 114 { 115 $u = $info['uni']; 116 $type = self::myanmar_get_categories($u); 117 $cat = ($type & 0x7F); 118 $pos = ($type >> 8); 119 /* 120 * Re-assign category 121 * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm#analyze 122 */ 123 if (self::in_range($u, 0xFE00, 0xFE0F)) { 124 $cat = self::OT_VS; 125 } elseif ($u == 0x200C) { 126 $cat = self::OT_ZWNJ; 127 } elseif ($u == 0x200D) { 128 $cat = self::OT_ZWJ; 129 } 130 131 switch ($u) { 132 case 0x002D: 133 case 0x00A0: 134 case 0x00D7: 135 case 0x2012: 136 case 0x2013: 137 case 0x2014: 138 case 0x2015: 139 case 0x2022: 140 case 0x25CC: 141 case 0x25FB: 142 case 0x25FC: 143 case 0x25FD: 144 case 0x25FE: 145 $cat = self::OT_GB; 146 break; 147 148 case 0x1004: 149 case 0x101B: 150 case 0x105A: 151 $cat = self::OT_RA; 152 break; 153 154 case 0x1032: 155 case 0x1036: 156 $cat = self::OT_A; 157 break; 158 159 case 0x103A: 160 $cat = self::OT_AS; 161 break; 162 163 case 0x1041: 164 case 0x1042: 165 case 0x1043: 166 case 0x1044: 167 case 0x1045: 168 case 0x1046: 169 case 0x1047: 170 case 0x1048: 171 case 0x1049: 172 case 0x1090: 173 case 0x1091: 174 case 0x1092: 175 case 0x1093: 176 case 0x1094: 177 case 0x1095: 178 case 0x1096: 179 case 0x1097: 180 case 0x1098: 181 case 0x1099: 182 $cat = self::OT_D; 183 break; 184 185 case 0x1040: 186 $cat = self::OT_D; /* XXX The spec says D0, but Uniscribe doesn't seem to do. */ 187 break; 188 189 case 0x103E: 190 case 0x1060: 191 $cat = self::OT_MH; 192 break; 193 194 case 0x103C: 195 $cat = self::OT_MR; 196 break; 197 198 case 0x103D: 199 case 0x1082: 200 $cat = self::OT_MW; 201 break; 202 203 case 0x103B: 204 case 0x105E: 205 case 0x105F: 206 $cat = self::OT_MY; 207 break; 208 209 case 0x1063: 210 case 0x1064: 211 case 0x1069: 212 case 0x106A: 213 case 0x106B: 214 case 0x106C: 215 case 0x106D: 216 case 0xAA7B: 217 $cat = self::OT_PT; 218 break; 219 220 case 0x1038: 221 case 0x1087: 222 case 0x1088: 223 case 0x1089: 224 case 0x108A: 225 case 0x108B: 226 case 0x108C: 227 case 0x108D: 228 case 0x108F: 229 case 0x109A: 230 case 0x109B: 231 case 0x109C: 232 $cat = self::OT_SM; 233 break; 234 } 235 236 if ($cat == self::OT_M) { 237 switch ($pos) { 238 case self::POS_PRE_C: 239 $cat = self::OT_VPRE; 240 $pos = self::POS_PRE_M; 241 break; 242 case self::POS_ABOVE_C: 243 $cat = self::OT_VABV; 244 break; 245 case self::POS_BELOW_C: 246 $cat = self::OT_VBLW; 247 break; 248 case self::POS_POST_C: 249 $cat = self::OT_VPST; 250 break; 251 } 252 } 253 $info['myanmar_category'] = $cat; 254 $info['myanmar_position'] = $pos; 255 } 256 257 // syllable_type 258 const CONSONANT_SYLLABLE = 0; 259 const BROKEN_CLUSTER = 3; 260 const NON_MYANMAR_CLUSTER = 4; 261 262 public static function set_syllables(&$o, $s, &$broken_syllables) 263 { 264 $ptr = 0; 265 $syllable_serial = 1; 266 $broken_syllables = false; 267 268 while ($ptr < strlen($s)) { 269 $match = ''; 270 $syllable_length = 1; 271 $syllable_type = self::NON_MYANMAR_CLUSTER; 272 // CONSONANT_SYLLABLE Consonant syllable 273 // From OT spec: 274 if (preg_match('/^(RaH)?([C|R]|V|d|D)[s]?(H([C|R|V])[s]?)*(H|[a]*[n]?[l]?((m[k]?|k)[a]?)?[e]*[v]*[b]*[A]*(N[a]?)?(t[k]?[a]*[v]*[A]*(N[a]?)?)*(p[A]*(N[a]?)?)*S*[J|Z]?)/', substr($s, $ptr), $ma)) { 275 $syllable_length = strlen($ma[0]); 276 $syllable_type = self::CONSONANT_SYLLABLE; 277 } // BROKEN_CLUSTER syllable 278 elseif (preg_match('/^(RaH)?s?(H|[a]*[n]?[l]?((m[k]?|k)[a]?)?[e]*[v]*[b]*[A]*(N[a]?)?(t[k]?[a]*[v]*[A]*(N[a]?)?)*(p[A]*(N[a]?)?)*S*[J|Z]?)/', substr($s, $ptr), $ma)) { 279 if (strlen($ma[0])) { // May match blank 280 $syllable_length = strlen($ma[0]); 281 $syllable_type = self::BROKEN_CLUSTER; 282 $broken_syllables = true; 283 } 284 } 285 for ($i = $ptr; $i < $ptr + $syllable_length; $i++) { 286 $o[$i]['syllable'] = ($syllable_serial << 4) | $syllable_type; 287 } 288 $ptr += $syllable_length; 289 $syllable_serial++; 290 if ($syllable_serial == 16) { 291 $syllable_serial = 1; 292 } 293 } 294 } 295 296 public static function reordering(&$info, $GSUBdata, $broken_syllables, $dottedcircle) 297 { 298 if ($broken_syllables && $dottedcircle) { 299 self::insert_dotted_circles($info, $dottedcircle); 300 } 301 $count = count($info); 302 if (!$count) { 303 return; 304 } 305 $last = 0; 306 $last_syllable = $info[0]['syllable']; 307 for ($i = 1; $i < $count; $i++) { 308 if ($last_syllable != $info[$i]['syllable']) { 309 self::reordering_syllable($info, $GSUBdata, $last, $i); 310 $last = $i; 311 $last_syllable = $info[$last]['syllable']; 312 } 313 } 314 self::reordering_syllable($info, $GSUBdata, $last, $count); 315 } 316 317 public static function insert_dotted_circles(&$info, $dottedcircle) 318 { 319 $idx = 0; 320 $last_syllable = 0; 321 while ($idx < count($info)) { 322 $syllable = $info[$idx]['syllable']; 323 $syllable_type = ($syllable & 0x0F); 324 if ($last_syllable != $syllable && $syllable_type == self::BROKEN_CLUSTER) { 325 $last_syllable = $syllable; 326 $dottedcircle[0]['syllable'] = $info[$idx]['syllable']; 327 array_splice($info, $idx, 0, $dottedcircle); 328 } else { 329 $idx++; 330 } 331 } 332 // In case of final bloken cluster... 333 $syllable = $info[$idx]['syllable']; 334 $syllable_type = ($syllable & 0x0F); 335 if ($last_syllable != $syllable && $syllable_type == self::BROKEN_CLUSTER) { 336 $dottedcircle[0]['syllable'] = $info[$idx]['syllable']; 337 array_splice($info, $idx, 0, $dottedcircle); 338 } 339 } 340 341 /* Rules from: 342 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */ 343 344 public static function reordering_syllable(&$info, $GSUBdata, $start, $end) 345 { 346 /* vowel_syllable: We made the vowels look like consonants. So uses the consonant logic! */ 347 /* broken_cluster: We already inserted dotted-circles, so just call the standalone_cluster. */ 348 349 $syllable_type = ($info[$start]['syllable'] & 0x0F); 350 if ($syllable_type == self::NON_MYANMAR_CLUSTER) { 351 return; 352 } 353 if ($syllable_type == self::BROKEN_CLUSTER) { 354 //if ($uniscribe_bug_compatible) { 355 /* For dotted-circle, this is what Uniscribe does: 356 * If dotted-circle is the last glyph, it just does nothing. 357 * i.e. It doesn't form Reph. */ 358 if ($info[$end - 1]['myanmar_category'] == self::OT_DOTTEDCIRCLE) { 359 return; 360 } 361 } 362 363 $base = $end; 364 $has_reph = false; 365 $limit = $start; 366 367 if (($start + 3 <= $end) && 368 $info[$start]['myanmar_category'] == self::OT_RA && 369 $info[$start + 1]['myanmar_category'] == self::OT_AS && 370 $info[$start + 2]['myanmar_category'] == self::OT_H) { 371 $limit += 3; 372 $base = $start; 373 $has_reph = true; 374 } 375 376 if (!$has_reph) { 377 $base = $limit; 378 } 379 380 for ($i = $limit; $i < $end; $i++) { 381 if (self::is_consonant($info[$i])) { 382 $base = $i; 383 break; 384 } 385 } 386 387 388 /* Reorder! */ 389 $i = $start; 390 for (; $i < $start + ($has_reph ? 3 : 0); $i++) { 391 $info[$i]['myanmar_position'] = self::POS_AFTER_MAIN; 392 } 393 for (; $i < $base; $i++) { 394 $info[$i]['myanmar_position'] = self::POS_PRE_C; 395 } 396 if ($i < $end) { 397 $info[$i]['myanmar_position'] = self::POS_BASE_C; 398 $i++; 399 } 400 $pos = self::POS_AFTER_MAIN; 401 /* The following loop may be ugly, but it implements all of 402 * Myanmar reordering! */ 403 for (; $i < $end; $i++) { 404 if ($info[$i]['myanmar_category'] == self::OT_MR) { /* Pre-base reordering */ 405 $info[$i]['myanmar_position'] = self::POS_PRE_C; 406 continue; 407 } 408 if ($info[$i]['myanmar_position'] < self::POS_BASE_C) { /* Left matra */ 409 continue; 410 } 411 412 if ($pos == self::POS_AFTER_MAIN && $info[$i]['myanmar_category'] == self::OT_VBLW) { 413 $pos = self::POS_BELOW_C; 414 $info[$i]['myanmar_position'] = $pos; 415 continue; 416 } 417 418 if ($pos == self::POS_BELOW_C && $info[$i]['myanmar_category'] == self::OT_A) { 419 $info[$i]['myanmar_position'] = self::POS_BEFORE_SUB; 420 continue; 421 } 422 if ($pos == self::POS_BELOW_C && $info[$i]['myanmar_category'] == self::OT_VBLW) { 423 $info[$i]['myanmar_position'] = $pos; 424 continue; 425 } 426 if ($pos == self::POS_BELOW_C && $info[$i]['myanmar_category'] != self::OT_A) { 427 $pos = self::POS_AFTER_SUB; 428 $info[$i]['myanmar_position'] = $pos; 429 continue; 430 } 431 $info[$i]['myanmar_position'] = $pos; 432 } 433 434 435 /* Sit tight, rock 'n roll! */ 436 self::bubble_sort($info, $start, $end - $start); 437 } 438 439 public static function is_one_of($info, $flags) 440 { 441 if (isset($info['is_ligature']) && $info['is_ligature']) { 442 return false; 443 } /* If it ligated, all bets are off. */ 444 return !!(self::FLAG($info['myanmar_category']) & $flags); 445 } 446 447 /* Vowels and placeholders treated as if they were consonants. */ 448 449 public static function is_consonant($info) 450 { 451 return self::is_one_of($info, (self::FLAG(self::OT_C) | self::FLAG(self::OT_CM) | self::FLAG(self::OT_RA) | self::FLAG(self::OT_V) | self::FLAG(self::OT_NBSP) | self::FLAG(self::OT_GB))); 452 } 453 454// From hb-private.hh 455 public static function in_range($u, $lo, $hi) 456 { 457 if ((($lo ^ $hi) & $lo) == 0 && (($lo ^ $hi) & $hi) == ($lo ^ $hi) && (($lo ^ $hi) & (($lo ^ $hi) + 1)) == 0) { 458 return ($u & ~($lo ^ $hi)) == $lo; 459 } else { 460 return $lo <= $u && $u <= $hi; 461 } 462 } 463 464 // From hb-private.hh 465 public static function FLAG($x) 466 { 467 return (1 << ($x)); 468 } 469 470 public static function FLAG_RANGE($x, $y) 471 { 472 return self::FLAG($y + 1) - self::FLAG($x); 473 } 474 475 // BELOW from hb-ot-shape-complex-indic.cc 476 // see INDIC for details 477 public static $myanmar_table = [ 478 /* Myanmar (1000..109F) */ 479 480 /* 1000 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 481 /* 1008 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 482 /* 1010 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 483 /* 1018 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 484 /* 1020 */ 3841, 3842, 3842, 3842, 3842, 3842, 3842, 3842, 485 /* 1028 */ 3842, 3842, 3842, 2823, 2823, 1543, 1543, 2055, 486 /* 1030 */ 2055, 775, 1543, 1543, 1543, 1543, 3848, 3843, 487 /* 1038 */ 3848, 3844, 1540, 3857, 3857, 3857, 3857, 3841, 488 /* 1040 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 489 /* 1048 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 490 /* 1050 */ 3841, 3841, 3842, 3842, 3842, 3842, 2823, 2823, 491 /* 1058 */ 2055, 2055, 3841, 3841, 3841, 3841, 3857, 3857, 492 /* 1060 */ 3857, 3841, 2823, 3843, 3843, 3841, 3841, 2823, 493 /* 1068 */ 2823, 3843, 3843, 3843, 3843, 3843, 3841, 3841, 494 /* 1070 */ 3841, 1543, 1543, 1543, 1543, 3841, 3841, 3841, 495 /* 1078 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 496 /* 1080 */ 3841, 3841, 3857, 2823, 775, 1543, 1543, 3843, 497 /* 1088 */ 3843, 3843, 3843, 3843, 3843, 3843, 3841, 3843, 498 /* 1090 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 499 /* 1098 */ 3840, 3840, 3843, 3843, 2823, 1543, 3840, 3840, 500 /* Myanmar Extended-A (AA60..AA7F) */ 501 502 /* AA60 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 503 /* AA68 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 504 /* AA70 */ 3840, 3841, 3841, 3841, 3840, 3840, 3840, 3840, 505 /* AA78 */ 3840, 3840, 3841, 3843, 3840, 3840, 3840, 3840, 506 ]; 507 508 // from "hb-ot-shape-complex-indic-table.cc" 509 public static function myanmar_get_categories($u) 510 { 511 if (0x1000 <= $u && $u <= 0x109F) { 512 return self::$myanmar_table[$u - 0x1000 + 0]; // offset 0 for Most "myanmar" 513 } 514 if (0xAA60 <= $u && $u <= 0xAA7F) { 515 return self::$myanmar_table[$u - 0xAA60 + 160]; // offset for extensions 516 } 517 if ($u == 0x00A0) { 518 return 3851; // (ISC_CP | (IMC_x << 8)) 519 } 520 if ($u == 0x25CC) { 521 return 3851; // (ISC_CP | (IMC_x << 8)) 522 } 523 return 3840; // (ISC_x | (IMC_x << 8)) 524 } 525 526 public static function bubble_sort(&$arr, $start, $len) 527 { 528 if ($len < 2) { 529 return; 530 } 531 $k = $start + $len - 2; 532 while ($k >= $start) { 533 for ($j = $start; $j <= $k; $j++) { 534 if ($arr[$j]['myanmar_position'] > $arr[$j + 1]['myanmar_position']) { 535 $t = $arr[$j]; 536 $arr[$j] = $arr[$j + 1]; 537 $arr[$j + 1] = $t; 538 } 539 } 540 $k--; 541 } 542 } 543} 544