1<?php 2 3namespace Mpdf\Shaper; 4 5use Mpdf\Ucdn; 6 7class Indic 8{ 9 /* FROM hb-ot-shape-complex-indic-private.hh */ 10 11 // indic_category 12 const OT_X = 0; 13 const OT_C = 1; 14 const OT_V = 2; 15 const OT_N = 3; 16 const OT_H = 4; 17 const OT_ZWNJ = 5; 18 const OT_ZWJ = 6; 19 const OT_M = 7; /* Matra or Dependent Vowel */ 20 const OT_SM = 8; 21 const OT_VD = 9; 22 const OT_A = 10; 23 const OT_NBSP = 11; 24 const OT_DOTTEDCIRCLE = 12; /* Not in the spec, but special in Uniscribe. /Very very/ special! */ 25 const OT_RS = 13; /* Register Shifter, used in Khmer OT spec */ 26 const OT_COENG = 14; 27 const OT_REPHA = 15; 28 29 const OT_RA = 16; /* Not explicitly listed in the OT spec, but used in the grammar. */ 30 const OT_CM = 17; 31 32 /* Visual positions in a syllable from left to right. */ 33 /* FROM hb-ot-shape-complex-indic-private.hh */ 34 35 // indic_position 36 const POS_START = 0; 37 38 const POS_RA_TO_BECOME_REPH = 1; 39 const POS_PRE_M = 2; 40 const POS_PRE_C = 3; 41 42 const POS_BASE_C = 4; 43 const POS_AFTER_MAIN = 5; 44 45 const POS_ABOVE_C = 6; 46 47 const POS_BEFORE_SUB = 7; 48 const POS_BELOW_C = 8; 49 const POS_AFTER_SUB = 9; 50 51 const POS_BEFORE_POST = 10; 52 const POS_POST_C = 11; 53 const POS_AFTER_POST = 12; 54 55 const POS_FINAL_C = 13; 56 const POS_SMVD = 14; 57 58 const POS_END = 15; 59 60 /* 61 * Basic features. 62 * These features are applied in order, one at a time, after initial_reordering. 63 */ 64 65 /* 66 * Must be in the same order as the indic_features array. Ones starting with _ are F_GLOBAL 67 * Ones without the _ are only applied where the mask says! 68 */ 69 70 const _NUKT = 0; 71 const _AKHN = 1; 72 const RPHF = 2; 73 const _RKRF = 3; 74 const PREF = 4; 75 const BLWF = 5; 76 const HALF = 6; 77 const ABVF = 7; 78 const PSTF = 8; 79 const CFAR = 9; // Khmer only 80 const _VATU = 10; 81 const _CJCT = 11; 82 const INIT = 12; 83 84 // Based on indic_category used to make string to find syllables 85 // OT_ to string character (using e.g. OT_C from INDIC) hb-ot-shape-complex-indic-private.hh 86 public static $indic_category_char = [ 87 'x', 88 'C', 89 'V', 90 'N', 91 'H', 92 'Z', 93 'J', 94 'M', 95 'S', 96 'v', 97 'A', /* Spec gives Andutta U+0952 as OT_A. However, testing shows that Uniscribe 98 * treats U+0951..U+0952 all as OT_VD - see set_indic_properties */ 99 's', 100 'D', 101 'F', /* Register shift Khmer only */ 102 'G', /* Khmer only */ 103 'r', /* 0D4E (dot reph) only one in Malayalam */ 104 'R', 105 'm', /* Consonant medial only used in Indic 0A75 in Gurmukhi (0A00..0A7F) : also in Lao, Myanmar, Tai Tham, Javanese & Cham */ 106 ]; 107 108 public static function set_indic_properties(&$info, $scriptblock) 109 { 110 $u = $info['uni']; 111 $type = self::indic_get_categories($u); 112 $cat = ($type & 0x7F); 113 $pos = ($type >> 8); 114 115 /* 116 * Re-assign category 117 */ 118 119 if ($u == 0x17D1) { 120 $cat = self::OT_X; 121 } 122 123 if ($cat == self::OT_X && self::in_range($u, 0x17CB, 0x17D3)) { /* Khmer Various signs */ 124 /* These are like Top Matras. */ 125 $cat = self::OT_M; 126 $pos = self::POS_ABOVE_C; 127 } 128 129 if ($u == 0x17C6) { 130 $cat = self::OT_N; 131 } /* Khmer Bindu doesn't like to be repositioned. */ 132 133 if ($u == 0x17D2) { 134 $cat = self::OT_COENG; 135 } /* Khmer coeng */ 136 137 /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe 138 * treats U+0951..U+0952 all as OT_VD. 139 * TESTS: 140 * U+092E,U+0947,U+0952 141 * U+092E,U+0952,U+0947 142 * U+092E,U+0947,U+0951 143 * U+092E,U+0951,U+0947 144 * */ 145 //if ($u == 0x0952) $cat = self::OT_A; 146 if (self::in_range($u, 0x0951, 0x0954)) { 147 $cat = self::OT_VD; 148 } 149 150 if ($u == 0x200C) { 151 $cat = self::OT_ZWNJ; 152 } elseif ($u == 0x200D) { 153 $cat = self::OT_ZWJ; 154 } elseif ($u == 0x25CC) { 155 $cat = self::OT_DOTTEDCIRCLE; 156 } elseif ($u == 0x0A71) { 157 $cat = self::OT_SM; 158 } /* GURMUKHI ADDAK. More like consonant medial. like 0A75. */ 159 160 if ($cat == self::OT_REPHA) { 161 /* There are two kinds of characters marked as Repha: 162 * - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer) 163 * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam) 164 * 165 * We recategorize the first kind to look like a Nukta and attached to the base directly. 166 */ 167 if ($info['general_category'] == Ucdn::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) { 168 $cat = self::OT_N; 169 } 170 } 171 172 /* 173 * Re-assign position. 174 */ 175 176 if ((self::FLAG($cat) & (self::FLAG(self::OT_C) | self::FLAG(self::OT_CM) | self::FLAG(self::OT_RA) | self::FLAG(self::OT_V) | self::FLAG(self::OT_NBSP) | self::FLAG(self::OT_DOTTEDCIRCLE)))) { // = CONSONANT_FLAGS like is_consonant 177 if ($scriptblock == Ucdn::SCRIPT_KHMER) { 178 $pos = self::POS_BELOW_C; 179 } /* Khmer differs from Indic here. */ 180 else { 181 $pos = self::POS_BASE_C; 182 } /* Will recategorize later based on font lookups. */ 183 184 if (self::is_ra($u)) { 185 $cat = self::OT_RA; 186 } 187 } elseif ($cat == self::OT_M) { 188 $pos = self::matra_position($u, $pos); 189 } elseif ($cat == self::OT_SM || $cat == self::OT_VD) { 190 $pos = self::POS_SMVD; 191 } 192 193 if ($u == 0x0B01) { 194 $pos = self::POS_BEFORE_SUB; 195 } /* Oriya Bindu is BeforeSub in the spec. */ 196 197 $info['indic_category'] = $cat; 198 $info['indic_position'] = $pos; 199 } 200 201 // syllable_type 202 const CONSONANT_SYLLABLE = 0; 203 const VOWEL_SYLLABLE = 1; 204 const STANDALONE_CLUSTER = 2; 205 const BROKEN_CLUSTER = 3; 206 const NON_INDIC_CLUSTER = 4; 207 208 public static function set_syllables(&$o, $s, &$broken_syllables) 209 { 210 $ptr = 0; 211 $syllable_serial = 1; 212 $broken_syllables = false; 213 214 while ($ptr < strlen($s)) { 215 $match = ''; 216 $syllable_length = 1; 217 $syllable_type = self::NON_INDIC_CLUSTER; 218 // CONSONANT_SYLLABLE Consonant syllable 219 // From OT spec: 220 if (preg_match('/^([CR]m*[N]?(H[ZJ]?|[ZJ]H))*[CR]m*[N]?[A]?(H[ZJ]?|[M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s, $ptr), $ma)) { 221 // From HarfBuzz: 222 //if (preg_match('/^r?([CR]J?(Z?[N]{0,2})?[ZJ]?H(J[N]?)?){0,4}[CR]J?(Z?[N]{0,2})?A?((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})?(S[Z]?)?[v]{0,2}/', substr($s,$ptr), $ma)) { 223 $syllable_length = strlen($ma[0]); 224 $syllable_type = self::CONSONANT_SYLLABLE; 225 } // VOWEL_SYLLABLE Vowel-based syllable 226 // From OT spec: 227 elseif (preg_match('/^(RH|r)?V[N]?([ZJ]?H[CR]m*|J[CR]m*)?([M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s, $ptr), $ma)) { 228 // From HarfBuzz: 229 //else if (preg_match('/^(RH|r)?V(Z?[N]{0,2})?(J|([ZJ]?H(J[N]?)?[CR]J?(Z?[N]{0,2})?){0,4}((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})?(S[Z]?)?[v]{0,2})/', substr($s,$ptr), $ma)) { 230 $syllable_length = strlen($ma[0]); 231 $syllable_type = self::VOWEL_SYLLABLE; 232 } /* Apply only if it's a word start. */ 233 // STANDALONE_CLUSTER Stand Alone syllable at start of word 234 // From OT spec: 235 elseif (($ptr == 0 || 236 $o[$ptr - 1]['general_category'] < Ucdn::UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER || 237 $o[$ptr - 1]['general_category'] > Ucdn::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK 238 ) && (preg_match('/^(RH|r)?[sD][N]?([ZJ]?H[CR]m*)?([M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s, $ptr), $ma))) { 239 // From HarfBuzz: 240 // && (preg_match('/^(RH|r)?[sD](Z?[N]{0,2})?(([ZJ]?H(J[N]?)?)[CR]J?(Z?[N]{0,2})?){0,4}((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})?(S[Z]?)?[v]{0,2}/', substr($s,$ptr), $ma)) { 241 $syllable_length = strlen($ma[0]); 242 $syllable_type = self::STANDALONE_CLUSTER; 243 } // BROKEN_CLUSTER syllable 244 elseif (preg_match('/^(RH|r)?[N]?([ZJ]?H[CR])?([M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s, $ptr), $ma)) { 245 // From HarfBuzz: 246 //else if (preg_match('/^(RH|r)?(Z?[N]{0,2})?(([ZJ]?H(J[N]?)?)[CR]J?(Z?[N]{0,2})?){0,4}((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})(S[Z]?)?[v]{0,2}/', substr($s,$ptr), $ma)) { 247 if (strlen($ma[0])) { // May match blank 248 $syllable_length = strlen($ma[0]); 249 $syllable_type = self::BROKEN_CLUSTER; 250 $broken_syllables = true; 251 } 252 } 253 254 for ($i = $ptr; $i < $ptr + $syllable_length; $i++) { 255 $o[$i]['syllable'] = ($syllable_serial << 4) | $syllable_type; 256 } 257 $ptr += $syllable_length; 258 $syllable_serial++; 259 if ($syllable_serial == 16) { 260 $syllable_serial = 1; 261 } 262 } 263 } 264 265 public static function set_syllables_sinhala(&$o, $s, &$broken_syllables) 266 { 267 $ptr = 0; 268 $syllable_serial = 1; 269 $broken_syllables = false; 270 271 while ($ptr < strlen($s)) { 272 $match = ''; 273 $syllable_length = 1; 274 $syllable_type = self::NON_INDIC_CLUSTER; 275 // CONSONANT_SYLLABLE Consonant syllable 276 // From OT spec: 277 if (preg_match('/^([CR]HJ|[CR]JH){0,8}[CR][HM]{0,3}[S]{0,1}/', substr($s, $ptr), $ma)) { 278 $syllable_length = strlen($ma[0]); 279 $syllable_type = self::CONSONANT_SYLLABLE; 280 } // VOWEL_SYLLABLE Vowel-based syllable 281 // From OT spec: 282 elseif (preg_match('/^V[S]{0,1}/', substr($s, $ptr), $ma)) { 283 $syllable_length = strlen($ma[0]); 284 $syllable_type = self::VOWEL_SYLLABLE; 285 } 286 287 for ($i = $ptr; $i < $ptr + $syllable_length; $i++) { 288 $o[$i]['syllable'] = ($syllable_serial << 4) | $syllable_type; 289 } 290 $ptr += $syllable_length; 291 $syllable_serial++; 292 if ($syllable_serial == 16) { 293 $syllable_serial = 1; 294 } 295 } 296 } 297 298 public static function set_syllables_khmer(&$o, $s, &$broken_syllables) 299 { 300 $ptr = 0; 301 $syllable_serial = 1; 302 $broken_syllables = false; 303 304 while ($ptr < strlen($s)) { 305 $match = ''; 306 $syllable_length = 1; 307 $syllable_type = self::NON_INDIC_CLUSTER; 308 // CONSONANT_SYLLABLE Consonant syllable 309 if (preg_match('/^r?([CR]J?((Z?F)?[N]{0,2})?[ZJ]?G(JN?)?){0,4}[CR]J?((Z?F)?[N]{0,2})?A?((([ZJ]?G(JN?)?)|GZ)|(GJ)?([ZJ]{0,3}MN?(H|JHJR)?){0,4})?(G([CR]J?((Z?F)?[N]{0,2})?|V))?(SZ?)?[v]{0,2}/', substr($s, $ptr), $ma)) { 310 $syllable_length = strlen($ma[0]); 311 $syllable_type = self::CONSONANT_SYLLABLE; 312 } // VOWEL_SYLLABLE Vowel-based syllable 313 elseif (preg_match('/^(RH|r)?V((Z?F)?[N]{0,2})?(J|([ZJ]?G(JN?)?[CR]J?((Z?F)?[N]{0,2})?){0,4}((([ZJ]?G(JN?)?)|GZ)|(GJ)?([ZJ]{0,3}MN?(H|JHJR)?){0,4})?(G([CR]J?((Z?F)?[N]{0,2})?|V))?(SZ?)?[v]{0,2})/', substr($s, $ptr), $ma)) { 314 $syllable_length = strlen($ma[0]); 315 $syllable_type = self::VOWEL_SYLLABLE; 316 } // BROKEN_CLUSTER syllable 317 elseif (preg_match('/^(RH|r)?((Z?F)?[N]{0,2})?(([ZJ]?G(JN?)?)[CR]J?((Z?F)?[N]{0,2})?){0,4}((([ZJ]?G(JN?)?)|GZ)|(GJ)?([ZJ]{0,3}MN?(H|JHJR)?){0,4})(G([CR]J?((Z?F)?[N]{0,2})?|V))?(SZ?)?[v]{0,2}/', substr($s, $ptr), $ma)) { 318 if (strlen($ma[0])) { // May match blank 319 $syllable_length = strlen($ma[0]); 320 $syllable_type = self::BROKEN_CLUSTER; 321 $broken_syllables = true; 322 } 323 } 324 325 for ($i = $ptr; $i < $ptr + $syllable_length; $i++) { 326 $o[$i]['syllable'] = ($syllable_serial << 4) | $syllable_type; 327 } 328 $ptr += $syllable_length; 329 $syllable_serial++; 330 if ($syllable_serial == 16) { 331 $syllable_serial = 1; 332 } 333 } 334 } 335 336 public static function initial_reordering(&$info, $GSUBdata, $broken_syllables, $indic_config, $scriptblock, $is_old_spec, $dottedcircle) 337 { 338 339 self::update_consonant_positions($info, $GSUBdata); 340 341 if ($broken_syllables && $dottedcircle) { 342 self::insert_dotted_circles($info, $dottedcircle); 343 } 344 345 $count = count($info); 346 if (!$count) { 347 return; 348 } 349 $last = 0; 350 $last_syllable = $info[0]['syllable']; 351 for ($i = 1; $i < $count; $i++) { 352 if ($last_syllable != $info[$i]['syllable']) { 353 self::initial_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $i); 354 $last = $i; 355 $last_syllable = $info[$last]['syllable']; 356 } 357 } 358 self::initial_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $count); 359 } 360 361 public static function update_consonant_positions(&$info, $GSUBdata) 362 { 363 $count = count($info); 364 for ($i = 0; $i < $count; $i++) { 365 if ($info[$i]['indic_position'] == self::POS_BASE_C) { 366 $c = $info[$i]['uni']; 367 // If would substitute... 368 if (isset($GSUBdata['pref'][$c])) { 369 $info[$i]['indic_position'] = self::POS_POST_C; 370 } elseif (isset($GSUBdata['blwf'][$c])) { 371 $info[$i]['indic_position'] = self::POS_BELOW_C; 372 } elseif (isset($GSUBdata['pstf'][$c])) { 373 $info[$i]['indic_position'] = self::POS_POST_C; 374 } 375 } 376 } 377 } 378 379 public static function insert_dotted_circles(&$info, $dottedcircle) 380 { 381 $idx = 0; 382 $last_syllable = 0; 383 while ($idx < count($info)) { 384 $syllable = $info[$idx]['syllable']; 385 $syllable_type = ($syllable & 0x0F); 386 if ($last_syllable != $syllable && $syllable_type == self::BROKEN_CLUSTER) { 387 $last_syllable = $syllable; 388 389 $dottedcircle[0]['syllable'] = $info[$idx]['syllable']; 390 391 /* Insert dottedcircle after possible Repha. */ 392 while ($idx < count($info) && $last_syllable == $info[$idx]['syllable'] && $info[$idx]['indic_category'] == self::OT_REPHA) { 393 $idx++; 394 } 395 array_splice($info, $idx, 0, $dottedcircle); 396 } else { 397 $idx++; 398 } 399 } 400 401 // I am not sue how this code below got in here, since $idx should now be > count($info) and thus invalid. 402 // In case I am missing something(!) I'll leave a warning here for now: 403 if (isset($info[$idx])) { 404 throw new \Mpdf\MpdfException('Unexpected error occurred in Indic processing'); 405 } 406 // In case of final bloken cluster... 407 //$syllable = $info[$idx]['syllable']; 408 //$syllable_type = ($syllable & 0x0F); 409 //if ($last_syllable != $syllable && $syllable_type == self::BROKEN_CLUSTER) { 410 // $dottedcircle[0]['syllable'] = $info[$idx]['syllable']; 411 // array_splice($info, $idx, 0, $dottedcircle); 412 //} 413 } 414 415 /* Rules from: 416 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */ 417 418 public static function initial_reordering_syllable(&$info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $start, $end) 419 { 420 /* vowel_syllable: We made the vowels look like consonants. So uses the consonant logic! */ 421 /* broken_cluster: We already inserted dotted-circles, so just call the standalone_cluster. */ 422 /* standalone_cluster: We treat NBSP/dotted-circle as if they are consonants, so we should just chain. */ 423 424 $syllable_type = ($info[$start]['syllable'] & 0x0F); 425 if ($syllable_type == self::NON_INDIC_CLUSTER) { 426 return; 427 } 428 if ($syllable_type == self::BROKEN_CLUSTER || $syllable_type == self::STANDALONE_CLUSTER) { 429 //if ($uniscribe_bug_compatible) { 430 /* For dotted-circle, this is what Uniscribe does: 431 * If dotted-circle is the last glyph, it just does nothing. 432 * i.e. It doesn't form Reph. */ 433 if ($info[$end - 1]['indic_category'] == self::OT_DOTTEDCIRCLE) { 434 return; 435 } 436 } 437 438 /* 1. Find base consonant: 439 * 440 * The shaping engine finds the base consonant of the syllable, using the 441 * following algorithm: starting from the end of the syllable, move backwards 442 * until a consonant is found that does not have a below-base or post-base 443 * form (post-base forms have to follow below-base forms), or that is not a 444 * pre-base reordering Ra, or arrive at the first consonant. The consonant 445 * stopped at will be the base. 446 * 447 * o If the syllable starts with Ra + Halant (in a script that has Reph) 448 * and has more than one consonant, Ra is excluded from candidates for 449 * base consonants. 450 */ 451 452 $base = $end; 453 $has_reph = false; 454 $limit = $start; 455 456 if ($scriptblock != Ucdn::SCRIPT_KHMER) { 457 /* -> If the syllable starts with Ra + Halant (in a script that has Reph) 458 * and has more than one consonant, Ra is excluded from candidates for 459 * base consonants. */ 460 if (count($GSUBdata['rphf']) /* ?? $indic_plan->mask_array[RPHF] */ && $start + 3 <= $end && 461 ( 462 ($indic_config[4] == self::REPH_MODE_IMPLICIT && !self::is_joiner($info[$start + 2])) || 463 ($indic_config[4] == self::REPH_MODE_EXPLICIT && $info[$start + 2]['indic_category'] == self::OT_ZWJ) 464 )) { 465 /* See if it matches the 'rphf' feature. */ 466 //$glyphs = array($info[$start]['uni'], $info[$start + 1]['uni']); 467 //if ($indic_plan->rphf->would_substitute ($glyphs, count($glyphs), true, face)) { 468 if (isset($GSUBdata['rphf'][$info[$start]['uni']]) && self::is_halant_or_coeng($info[$start + 1])) { 469 $limit += 2; 470 while ($limit < $end && self::is_joiner($info[$limit])) { 471 $limit++; 472 } 473 $base = $start; 474 $has_reph = true; 475 } 476 } elseif ($indic_config[4] == self::REPH_MODE_LOG_REPHA && $info[$start]['indic_category'] == self::OT_REPHA) { 477 $limit += 1; 478 while ($limit < $end && self::is_joiner($info[$limit])) { 479 $limit++; 480 } 481 $base = $start; 482 $has_reph = true; 483 } 484 } 485 486 switch ($indic_config[2]) { // base_pos 487 case self::BASE_POS_LAST: 488 /* -> starting from the end of the syllable, move backwards */ 489 $i = $end; 490 $seen_below = false; 491 do { 492 $i--; 493 /* -> until a consonant is found */ 494 if (self::is_consonant($info[$i])) { 495 /* -> that does not have a below-base or post-base form 496 * (post-base forms have to follow below-base forms), */ 497 if ($info[$i]['indic_position'] != self::POS_BELOW_C && ($info[$i]['indic_position'] != self::POS_POST_C || $seen_below)) { 498 $base = $i; 499 break; 500 } 501 if ($info[$i]['indic_position'] == self::POS_BELOW_C) { 502 $seen_below = true; 503 } 504 505 /* -> or that is not a pre-base reordering Ra, 506 * 507 * IMPLEMENTATION NOTES: 508 * 509 * Our pre-base reordering Ra's are marked POS_POST_C, so will be skipped 510 * by the logic above already. 511 */ 512 513 /* -> or arrive at the first consonant. The consonant stopped at will 514 * be the base. */ 515 $base = $i; 516 } else { 517 /* A ZWJ after a Halant stops the base search, and requests an explicit 518 * half form. 519 * [A ZWJ before a Halant, requests a subjoined form instead, and hence 520 * search continues. This is particularly important for Bengali 521 * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya] */ 522 if ($start < $i && $info[$i]['indic_category'] == self::OT_ZWJ && $info[$i - 1]['indic_category'] == self::OT_H) { 523 if (!defined("OMIT_INDIC_FIX_1") || OMIT_INDIC_FIX_1 != 1) { 524 $base = $i; 525 } // INDIC_FIX_1 526 break; 527 } 528 // ZKI8 529 if ($start < $i && $info[$i]['indic_category'] == self::OT_ZWNJ) { 530 break; 531 } 532 } 533 } while ($i > $limit); 534 break; 535 536 case self::BASE_POS_FIRST: 537 /* In scripts without half forms (eg. Khmer), the first consonant is always the base. */ 538 539 if (!$has_reph) { 540 $base = $limit; 541 } 542 543 /* Find the last base consonant that is not blocked by ZWJ. If there is 544 * a ZWJ right before a base consonant, that would request a subjoined form. */ 545 for ($i = $limit; $i < $end; $i++) { 546 if (self::is_consonant($info[$i]) && $info[$i]['indic_position'] == self::POS_BASE_C) { 547 if ($limit < $i && $info[$i - 1]['indic_category'] == self::OT_ZWJ) { 548 break; 549 } else { 550 $base = $i; 551 } 552 } 553 } 554 555 /* Mark all subsequent consonants as below. */ 556 for ($i = $base + 1; $i < $end; $i++) { 557 if (self::is_consonant($info[$i]) && $info[$i]['indic_position'] == self::POS_BASE_C) { 558 $info[$i]['indic_position'] = self::POS_BELOW_C; 559 } 560 } 561 break; 562 //default: 563 //assert (false); 564 /* fallthrough */ 565 } 566 567 /* -> If the syllable starts with Ra + Halant (in a script that has Reph) 568 * and has more than one consonant, Ra is excluded from candidates for 569 * base consonants. 570 * 571 * Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */ 572 if ($scriptblock != Ucdn::SCRIPT_KHMER) { 573 if ($has_reph && $base == $start && $limit - $base <= 2) { 574 /* Have no other consonant, so Reph is not formed and Ra becomes base. */ 575 $has_reph = false; 576 } 577 } 578 579 /* 2. Decompose and reorder Matras: 580 * 581 * Each matra and any syllable modifier sign in the cluster are moved to the 582 * appropriate position relative to the consonant(s) in the cluster. The 583 * shaping engine decomposes two- or three-part matras into their constituent 584 * parts before any repositioning. Matra characters are classified by which 585 * consonant in a conjunct they have affinity for and are reordered to the 586 * following positions: 587 * 588 * o Before first half form in the syllable 589 * o After subjoined consonants 590 * o After post-form consonant 591 * o After main consonant (for above marks) 592 * 593 * IMPLEMENTATION NOTES: 594 * 595 * The normalize() routine has already decomposed matras for us, so we don't 596 * need to worry about that. 597 */ 598 599 600 /* 3. Reorder marks to canonical order: 601 * 602 * Adjacent nukta and halant or nukta and vedic sign are always repositioned 603 * if necessary, so that the nukta is first. 604 * 605 * IMPLEMENTATION NOTES: 606 * 607 * Use the combining Class from Unicode categories? to bubble_sort. 608 */ 609 610 /* Reorder characters */ 611 612 for ($i = $start; $i < $base; $i++) { 613 $info[$i]['indic_position'] = min(self::POS_PRE_C, $info[$i]['indic_position']); 614 } 615 616 if ($base < $end) { 617 $info[$base]['indic_position'] = self::POS_BASE_C; 618 } 619 620 /* Mark final consonants. A final consonant is one appearing after a matra, 621 * ? only in Khmer. */ 622 for ($i = $base + 1; $i < $end; $i++) { 623 if ($info[$i]['indic_category'] == self::OT_M) { 624 for ($j = $i + 1; $j < $end; $j++) { 625 if (self::is_consonant($info[$j])) { 626 $info[$j]['indic_position'] = self::POS_FINAL_C; 627 break; 628 } 629 } 630 break; 631 } 632 } 633 634 /* Handle beginning Ra */ 635 if ($scriptblock != Ucdn::SCRIPT_KHMER) { 636 if ($has_reph) { 637 $info[$start]['indic_position'] = self::POS_RA_TO_BECOME_REPH; 638 } 639 } 640 641 642 /* For old-style Indic script tags, move the first post-base Halant after 643 * last consonant. Only do this if there is *not* a Halant after last 644 * consonant. Otherwise it becomes messy. */ 645 if ($is_old_spec) { 646 for ($i = $base + 1; $i < $end; $i++) { 647 if ($info[$i]['indic_category'] == self::OT_H) { 648 for ($j = $end - 1; $j > $i; $j--) { 649 if (self::is_consonant($info[$j]) || $info[$j]['indic_category'] == self::OT_H) { 650 break; 651 } 652 } 653 if ($info[$j]['indic_category'] != self::OT_H && $j > $i) { 654 /* Move Halant to after last consonant. */ 655 self::_move_info_pos($info, $i, $j + 1); 656 } 657 break; 658 } 659 } 660 } 661 662 /* Attach misc marks to previous char to move with them. */ 663 $last_pos = self::POS_START; 664 for ($i = $start; $i < $end; $i++) { 665 if ((self::FLAG($info[$i]['indic_category']) & (self::FLAG(self::OT_ZWJ) | self::FLAG(self::OT_ZWNJ) | self::FLAG(self::OT_N) | self::FLAG(self::OT_RS) | self::FLAG(self::OT_H) | self::FLAG(self::OT_COENG) ))) { 666 $info[$i]['indic_position'] = $last_pos; 667 if ($info[$i]['indic_category'] == self::OT_H && $info[$i]['indic_position'] == self::POS_PRE_M) { 668 /* 669 * Uniscribe doesn't move the Halant with Left Matra. 670 * TEST: U+092B,U+093F,U+094DE 671 * We follow. This is important for the Sinhala 672 * U+0DDA split matra since it decomposes to U+0DD9,U+0DCA 673 * where U+0DD9 is a left matra and U+0DCA is the virama. 674 * We don't want to move the virama with the left matra. 675 * TEST: U+0D9A,U+0DDA 676 */ 677 for ($j = $i; $j > $start; $j--) { 678 if ($info[$j - 1]['indic_position'] != self::POS_PRE_M) { 679 $info[$i]['indic_position'] = $info[$j - 1]['indic_position']; 680 break; 681 } 682 } 683 } 684 } elseif ($info[$i]['indic_position'] != self::POS_SMVD) { 685 $last_pos = $info[$i]['indic_position']; 686 } 687 } 688 689 /* Re-attach ZWJ, ZWNJ, and halant to next char, for after-base consonants. */ 690 $last_halant = $end; 691 for ($i = $base + 1; $i < $end; $i++) { 692 if (self::is_halant_or_coeng($info[$i])) { 693 $last_halant = $i; 694 } elseif (self::is_consonant($info[$i])) { 695 for ($j = $last_halant; $j < $i; $j++) { 696 if ($info[$j]['indic_position'] != self::POS_SMVD) { 697 $info[$j]['indic_position'] = $info[$i]['indic_position']; 698 } 699 } 700 } 701 } 702 703 704 if ($scriptblock == Ucdn::SCRIPT_KHMER) { 705 /* KHMER_FIX_2 */ 706 /* Move Coeng+RO (Halant,Ra) sequence before base consonant. */ 707 for ($i = $base + 1; $i < $end; $i++) { 708 if (self::is_halant_or_coeng($info[$i]) && self::is_ra($info[$i + 1]['uni'])) { 709 $info[$i]['indic_position'] = self::POS_PRE_C; 710 $info[$i + 1]['indic_position'] = self::POS_PRE_C; 711 break; 712 } 713 } 714 } 715 716 717 /* 718 if (!defined("OMIT_INDIC_FIX_2") || OMIT_INDIC_FIX_2 != 1) { 719 // INDIC_FIX_2 720 $ZWNJ_found = false; 721 $POST_ZWNJ_c_found = false; 722 for ($i = $base + 1; $i < $end; $i++) { 723 if ($info[$i]['indic_category'] == self::OT_ZWNJ) { $ZWNJ_found = true; } 724 else if ($ZWNJ_found && $info[$i]['indic_category'] == self::OT_C) { $POST_ZWNJ_c_found = true; } 725 else if ($POST_ZWNJ_c_found && $info[$i]['indic_position'] == self::POS_BEFORE_SUB) { $info[$i]['indic_position'] = self::POS_AFTER_SUB; } 726 } 727 } 728 */ 729 730 /* Setup masks now */ 731 for ($i = $start; $i < $end; $i++) { 732 $info[$i]['mask'] = 0; 733 } 734 735 736 if ($scriptblock == Ucdn::SCRIPT_KHMER) { 737 /* Find a Coeng+RO (Halant,Ra) sequence and mark it for pre-base processing. */ 738 $mask = self::FLAG(self::PREF); 739 for ($i = $base; $i < $end - 1; $i++) { /* KHMER_FIX_1 From $start (not base) */ 740 if (self::is_halant_or_coeng($info[$i]) && self::is_ra($info[$i + 1]['uni'])) { 741 $info[$i]['mask'] |= self::FLAG(self::PREF); 742 $info[$i + 1]['mask'] |= self::FLAG(self::PREF); 743 744 /* Mark the subsequent stuff with 'cfar'. Used in Khmer. 745 * Read the feature spec. 746 * This allows distinguishing the following cases with MS Khmer fonts: 747 * U+1784,U+17D2,U+179A,U+17D2,U+1782 [C+Coeng+RO+Coeng+C] => Should activate CFAR 748 * U+1784,U+17D2,U+1782,U+17D2,U+179A [C+Coeng+C+Coeng+RO] => Should NOT activate CFAR 749 */ 750 for ($j = ($i + 2); $j < $end; $j++) { 751 $info[$j]['mask'] |= self::FLAG(self::CFAR); 752 } 753 754 break; 755 } 756 } 757 } 758 759 760 761 /* Sit tight, rock 'n roll! */ 762 self::bubble_sort($info, $start, $end - $start); 763 764 /* Find base again */ 765 $base = $end; 766 for ($i = $start; $i < $end; $i++) { 767 if ($info[$i]['indic_position'] == self::POS_BASE_C) { 768 $base = $i; 769 break; 770 } 771 } 772 773 if ($scriptblock != Ucdn::SCRIPT_KHMER) { 774 /* Reph */ 775 for ($i = $start; $i < $end; $i++) { 776 if ($info[$i]['indic_position'] == self::POS_RA_TO_BECOME_REPH) { 777 $info[$i]['mask'] |= self::FLAG(self::RPHF); 778 } 779 } 780 781 /* Pre-base */ 782 $mask = self::FLAG(self::HALF); 783 for ($i = $start; $i < $base; $i++) { 784 $info[$i]['mask'] |= $mask; 785 } 786 } 787 788 /* Post-base */ 789 $mask = (self::FLAG(self::BLWF) | self::FLAG(self::ABVF) | self::FLAG(self::PSTF)); 790 for ($i = $base + 1; $i < $end; $i++) { 791 $info[$i]['mask'] |= $mask; 792 } 793 794 795 if ($scriptblock != Ucdn::SCRIPT_KHMER) { 796 if (!defined("OMIT_INDIC_FIX_3") || OMIT_INDIC_FIX_3 != 1) { 797 /* INDIC_FIX_3 */ 798 /* Find a (pre-base) Consonant, Halant,Ra sequence and mark Halant|Ra for below-base BLWF processing. */ 799 // TEST CASE ক্র্ক in FreeSans versus Vrinda 800 if (($base - $start) >= 3) { 801 for ($i = $start; $i < ($base - 2); $i++) { 802 if (self::is_consonant($info[$i])) { 803 if (self::is_halant_or_coeng($info[$i + 1]) && self::is_ra($info[$i + 2]['uni'])) { 804 // If would substitute Halant+Ra...BLWF 805 if (isset($GSUBdata['blwf'][$info[$i + 2]['uni']])) { 806 $info[$i + 1]['mask'] |= self::FLAG(self::BLWF); 807 $info[$i + 2]['mask'] |= self::FLAG(self::BLWF); 808 } /* If would not substitute as blwf, mark Ra+Halant for RPHF using following Halant (if present) */ elseif (self::is_halant_or_coeng($info[$i + 3])) { 809 $info[$i + 2]['mask'] |= self::FLAG(self::RPHF); 810 $info[$i + 3]['mask'] |= self::FLAG(self::RPHF); 811 } 812 break; 813 } 814 } 815 } 816 } 817 } 818 } 819 820 821 822 if ($is_old_spec && $scriptblock == Ucdn::SCRIPT_DEVANAGARI) { 823 /* Old-spec eye-lash Ra needs special handling. From the spec: 824 * "The feature 'below-base form' is applied to consonants 825 * having below-base forms and following the base consonant. 826 * The exception is vattu, which may appear below half forms 827 * as well as below the base glyph. The feature 'below-base 828 * form' will be applied to all such occurrences of Ra as well." 829 * 830 * Test case: U+0924,U+094D,U+0930,U+094d,U+0915 831 * with Sanskrit 2003 font. 832 * 833 * However, note that Ra,Halant,ZWJ is the correct way to 834 * request eyelash form of Ra, so we wouldbn't inhibit it 835 * in that sequence. 836 * 837 * Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915 838 */ 839 for ($i = $start; ($i + 1) < $base; $i++) { 840 if ($info[$i]['indic_category'] == self::OT_RA && $info[$i + 1]['indic_category'] == self::OT_H && 841 ($i + 2 == $base || $info[$i + 2]['indic_category'] != self::OT_ZWJ)) { 842 $info[$i]['mask'] |= self::FLAG(self::BLWF); 843 $info[$i + 1]['mask'] |= self::FLAG(self::BLWF); 844 } 845 } 846 } 847 848 if ($scriptblock != Ucdn::SCRIPT_KHMER) { 849 if (count($GSUBdata['pref']) && $base + 2 < $end) { 850 /* Find a Halant,Ra sequence and mark it for pre-base processing. */ 851 for ($i = $base + 1; $i + 1 < $end; $i++) { 852 // If old_spec find Ra-Halant... 853 if ((isset($GSUBdata['pref'][$info[$i + 1]['uni']]) && self::is_halant_or_coeng($info[$i]) && self::is_ra($info[$i + 1]['uni']) ) || 854 ($is_old_spec && isset($GSUBdata['pref'][$info[$i]['uni']]) && self::is_halant_or_coeng($info[$i + 1]) && self::is_ra($info[$i]['uni']) ) 855 ) { 856 $info[$i++]['mask'] |= self::FLAG(self::PREF); 857 $info[$i++]['mask'] |= self::FLAG(self::PREF); 858 break; 859 } 860 } 861 } 862 } 863 864 865 /* Apply ZWJ/ZWNJ effects */ 866 for ($i = $start + 1; $i < $end; $i++) { 867 if (self::is_joiner($info[$i])) { 868 $non_joiner = ($info[$i]['indic_category'] == self::OT_ZWNJ); 869 $j = $i; 870 while ($j > $start) { 871 if (defined("OMIT_INDIC_FIX_4") && OMIT_INDIC_FIX_4 == 1) { 872 // INDIC_FIX_4 = do nothing - carry on // 873 // ZWNJ should block H C from forming blwf post-base - need to unmask backwards beyond first consonant arrived at // 874 if (!self::is_consonant($info[$j])) { 875 break; 876 } 877 } 878 $j--; 879 880 /* ZWJ/ZWNJ should disable CJCT. They do that by simply 881 * being there, since we don't skip them for the CJCT 882 * feature (ie. F_MANUAL_ZWJ) */ 883 884 /* A ZWNJ disables HALF. */ 885 if ($non_joiner) { 886 $info[$j]['mask'] &= ~(self::FLAG(self::HALF) | self::FLAG(self::BLWF)); 887 } 888 } 889 } 890 } 891 } 892 893 public static function final_reordering(&$info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec) 894 { 895 $count = count($info); 896 if (!$count) { 897 return; 898 } 899 $last = 0; 900 $last_syllable = $info[0]['syllable']; 901 for ($i = 1; $i < $count; $i++) { 902 if ($last_syllable != $info[$i]['syllable']) { 903 self::final_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $i); 904 $last = $i; 905 $last_syllable = $info[$last]['syllable']; 906 } 907 } 908 self::final_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $count); 909 } 910 911 public static function final_reordering_syllable(&$info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $start, $end) 912 { 913 914 /* 4. Final reordering: 915 * 916 * After the localized forms and basic shaping forms GSUB features have been 917 * applied (see below), the shaping engine performs some final glyph 918 * reordering before applying all the remaining font features to the entire 919 * cluster. 920 */ 921 922 /* Find base again */ 923 for ($base = $start; $base < $end; $base++) { 924 if ($info[$base]['indic_position'] >= self::POS_BASE_C) { 925 if ($start < $base && $info[$base]['indic_position'] > self::POS_BASE_C) { 926 $base--; 927 } 928 break; 929 } 930 } 931 if ($base == $end && $start < $base && $info[$base - 1]['indic_category'] != self::OT_ZWJ) { 932 $base--; 933 } 934 while ($start < $base && isset($info[$base]) && ($info[$base]['indic_category'] == self::OT_H || $info[$base]['indic_category'] == self::OT_N)) { 935 $base--; 936 } 937 938 939 /* o Reorder matras: 940 * 941 * If a pre-base matra character had been reordered before applying basic 942 * features, the glyph can be moved closer to the main consonant based on 943 * whether half-forms had been formed. Actual position for the matra is 944 * defined as "after last standalone halant glyph, after initial matra 945 * position and before the main consonant". If ZWJ or ZWNJ follow this 946 * halant, position is moved after it. 947 */ 948 949 950 if ($start + 1 < $end && $start < $base) { /* Otherwise there can't be any pre-base matra characters. */ 951 /* If we lost track of base, alas, position before last thingy. */ 952 $new_pos = ($base == $end) ? $base - 2 : $base - 1; 953 954 /* Malayalam / Tamil do not have "half" forms or explicit virama forms. 955 * The glyphs formed by 'half' are Chillus or ligated explicit viramas. 956 * We want to position matra after them. 957 */ 958 if ($scriptblock != Ucdn::SCRIPT_MALAYALAM && $scriptblock != Ucdn::SCRIPT_TAMIL) { 959 while ($new_pos > $start && !(self::is_one_of($info[$new_pos], (self::FLAG(self::OT_M) | self::FLAG(self::OT_H) | self::FLAG(self::OT_COENG))))) { 960 $new_pos--; 961 } 962 963 /* If we found no Halant we are done. 964 * Otherwise only proceed if the Halant does 965 * not belong to the Matra itself! */ 966 if (self::is_halant_or_coeng($info[$new_pos]) && $info[$new_pos]['indic_position'] != self::POS_PRE_M) { 967 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ 968 if ($new_pos + 1 < $end && self::is_joiner($info[$new_pos + 1])) { 969 $new_pos++; 970 } 971 } else { 972 $new_pos = $start; 973 } /* No move. */ 974 } 975 976 if ($start < $new_pos && $info[$new_pos]['indic_position'] != self::POS_PRE_M) { 977 /* Now go see if there's actually any matras... */ 978 for ($i = $new_pos; $i > $start; $i--) { 979 if ($info[$i - 1]['indic_position'] == self::POS_PRE_M) { 980 $old_pos = $i - 1; 981 //memmove (&info[$old_pos], &info[$old_pos + 1], ($new_pos - $old_pos) * sizeof ($info[0])); 982 self::_move_info_pos($info, $old_pos, $new_pos + 1); 983 984 if ($old_pos < $base && $base <= $new_pos) { /* Shouldn't actually happen. */ 985 $base--; 986 } 987 $new_pos--; 988 } 989 } 990 } 991 } 992 993 994 /* o Reorder reph: 995 * 996 * Reph's original position is always at the beginning of the syllable, 997 * (i.e. it is not reordered at the character reordering stage). However, 998 * it will be reordered according to the basic-forms shaping results. 999 * Possible positions for reph, depending on the script, are; after main, 1000 * before post-base consonant forms, and after post-base consonant forms. 1001 */ 1002 1003 /* If there's anything after the Ra that has the REPH pos, it ought to be halant. 1004 * Which means that the font has failed to ligate the Reph. In which case, we 1005 * shouldn't move. */ 1006 if ($start + 1 < $end && 1007 $info[$start]['indic_position'] == self::POS_RA_TO_BECOME_REPH && $info[$start + 1]['indic_position'] != self::POS_RA_TO_BECOME_REPH) { 1008 $reph_pos = $indic_config[3]; 1009 $skip_to_reph_step_5 = false; 1010 $skip_to_reph_move = false; 1011 1012 /* 1. If reph should be positioned after post-base consonant forms, 1013 * proceed to step 5. 1014 */ 1015 if ($reph_pos == self::REPH_POS_AFTER_POST) { 1016 $skip_to_reph_step_5 = true; 1017 } 1018 1019 /* 2. If the reph repositioning class is not after post-base: target 1020 * position is after the first explicit halant glyph between the 1021 * first post-reph consonant and last main consonant. If ZWJ or ZWNJ 1022 * are following this halant, position is moved after it. If such 1023 * position is found, this is the target position. Otherwise, 1024 * proceed to the next step. 1025 * 1026 * Note: in old-implementation fonts, where classifications were 1027 * fixed in shaping engine, there was no case where reph position 1028 * will be found on this step. 1029 */ 1030 1031 if (!$skip_to_reph_step_5) { 1032 $new_reph_pos = $start + 1; 1033 1034 while ($new_reph_pos < $base && !self::is_halant_or_coeng($info[$new_reph_pos])) { 1035 $new_reph_pos++; 1036 } 1037 1038 if ($new_reph_pos < $base && self::is_halant_or_coeng($info[$new_reph_pos])) { 1039 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ 1040 if ($new_reph_pos + 1 < $base && self::is_joiner($info[$new_reph_pos + 1])) { 1041 $new_reph_pos++; 1042 } 1043 $skip_to_reph_move = true; 1044 } 1045 } 1046 1047 /* 3. If reph should be repositioned after the main consonant: find the 1048 * first consonant not ligated with main, or find the first 1049 * consonant that is not a potential pre-base reordering Ra. 1050 */ 1051 if ($reph_pos == self::REPH_POS_AFTER_MAIN && !$skip_to_reph_move && !$skip_to_reph_step_5) { 1052 $new_reph_pos = $base; 1053 /* XXX Skip potential pre-base reordering Ra. */ 1054 while ($new_reph_pos + 1 < $end && $info[$new_reph_pos + 1]['indic_position'] <= self::POS_AFTER_MAIN) { 1055 $new_reph_pos++; 1056 } 1057 if ($new_reph_pos < $end) { 1058 $skip_to_reph_move = true; 1059 } 1060 } 1061 1062 /* 4. If reph should be positioned before post-base consonant, find 1063 * first post-base classified consonant not ligated with main. If no 1064 * consonant is found, the target position should be before the 1065 * first matra, syllable modifier sign or vedic sign. 1066 */ 1067 /* This is our take on what step 4 is trying to say (and failing, BADLY). */ 1068 if ($reph_pos == self::REPH_POS_AFTER_SUB && !$skip_to_reph_move && !$skip_to_reph_step_5) { 1069 $new_reph_pos = $base; 1070 while ($new_reph_pos < $end && isset($info[$new_reph_pos + 1]['indic_position']) && 1071 !( self::FLAG($info[$new_reph_pos + 1]['indic_position']) & (self::FLAG(self::POS_POST_C) | self::FLAG(self::POS_AFTER_POST) | self::FLAG(self::POS_SMVD)))) { 1072 $new_reph_pos++; 1073 } 1074 if ($new_reph_pos < $end) { 1075 $skip_to_reph_move = true; 1076 } 1077 } 1078 1079 /* 5. If no consonant is found in steps 3 or 4, move reph to a position 1080 * immediately before the first post-base matra, syllable modifier 1081 * sign or vedic sign that has a reordering class after the intended 1082 * reph position. For example, if the reordering position for reph 1083 * is post-main, it will skip above-base matras that also have a 1084 * post-main position. 1085 */ 1086 if (!$skip_to_reph_move) { 1087 /* Copied from step 2. */ 1088 $new_reph_pos = $start + 1; 1089 while ($new_reph_pos < $base && !self::is_halant_or_coeng($info[$new_reph_pos])) { 1090 $new_reph_pos++; 1091 } 1092 1093 if ($new_reph_pos < $base && self::is_halant_or_coeng($info[$new_reph_pos])) { 1094 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ 1095 if ($new_reph_pos + 1 < $base && self::is_joiner($info[$new_reph_pos + 1])) { 1096 $new_reph_pos++; 1097 } 1098 $skip_to_reph_move = true; 1099 } 1100 } 1101 1102 1103 /* 6. Otherwise, reorder reph to the end of the syllable. 1104 */ 1105 if (!$skip_to_reph_move) { 1106 $new_reph_pos = $end - 1; 1107 while ($new_reph_pos > $start && $info[$new_reph_pos]['indic_position'] == self::POS_SMVD) { 1108 $new_reph_pos--; 1109 } 1110 1111 /* 1112 * If the Reph is to be ending up after a Matra,Halant sequence, 1113 * position it before that Halant so it can interact with the Matra. 1114 * However, if it's a plain Consonant,Halant we shouldn't do that. 1115 * Uniscribe doesn't do this. 1116 * TEST: U+0930,U+094D,U+0915,U+094B,U+094D 1117 */ 1118 //if (!$hb_options.uniscribe_bug_compatible && self::is_halant_or_coeng($info[$new_reph_pos])) { 1119 if (self::is_halant_or_coeng($info[$new_reph_pos])) { 1120 for ($i = $base + 1; $i < $new_reph_pos; $i++) { 1121 if ($info[$i]['indic_category'] == self::OT_M) { 1122 /* Ok, got it. */ 1123 $new_reph_pos--; 1124 } 1125 } 1126 } 1127 } 1128 1129 1130 /* Move */ 1131 self::_move_info_pos($info, $start, $new_reph_pos + 1); 1132 1133 if ($start < $base && $base <= $new_reph_pos) { 1134 $base--; 1135 } 1136 } 1137 1138 1139 /* o Reorder pre-base reordering consonants: 1140 * 1141 * If a pre-base reordering consonant is found, reorder it according to 1142 * the following rules: 1143 */ 1144 1145 1146 if (count($GSUBdata['pref']) && $base + 1 < $end) { /* Otherwise there can't be any pre-base reordering Ra. */ 1147 for ($i = $base + 1; $i < $end; $i++) { 1148 if ($info[$i]['mask'] & self::FLAG(self::PREF)) { 1149 /* 1. Only reorder a glyph produced by substitution during application 1150 * of the <pref> feature. (Note that a font may shape a Ra consonant with 1151 * the feature generally but block it in certain contexts.) 1152 */ 1153 // ??? Need to TEST if actual substitution has occurred 1154 if ($i + 1 == $end || ($info[$i + 1]['mask'] & self::FLAG(self::PREF)) == 0) { 1155 /* 1156 * 2. Try to find a target position the same way as for pre-base matra. 1157 * If it is found, reorder pre-base consonant glyph. 1158 * 1159 * 3. If position is not found, reorder immediately before main 1160 * consonant. 1161 */ 1162 $new_pos = $base; 1163 /* Malayalam / Tamil do not have "half" forms or explicit virama forms. 1164 * The glyphs formed by 'half' are Chillus or ligated explicit viramas. 1165 * We want to position matra after them. 1166 */ 1167 if ($scriptblock != Ucdn::SCRIPT_MALAYALAM && $scriptblock != Ucdn::SCRIPT_TAMIL) { 1168 while ($new_pos > $start && 1169 !(self::is_one_of($info[$new_pos - 1], self::FLAG(self::OT_M) | self::FLAG(self::OT_H) | self::FLAG(self::OT_COENG)))) { 1170 $new_pos--; 1171 } 1172 1173 /* In Khmer coeng model, a V,Ra can go *after* matras. If it goes after a 1174 * split matra, it should be reordered to *before* the left part of such matra. */ 1175 if ($new_pos > $start && $info[$new_pos - 1]['indic_category'] == self::OT_M) { 1176 $old_pos = $i; 1177 for ($i = $base + 1; $i < $old_pos; $i++) { 1178 if ($info[$i]['indic_category'] == self::OT_M) { 1179 $new_pos--; 1180 break; 1181 } 1182 } 1183 } 1184 } 1185 1186 if ($new_pos > $start && self::is_halant_or_coeng($info[$new_pos - 1])) { 1187 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ 1188 if ($new_pos < $end && self::is_joiner($info[$new_pos])) { 1189 $new_pos++; 1190 } 1191 } 1192 1193 $old_pos = $i; 1194 self::_move_info_pos($info, $old_pos, $new_pos); 1195 1196 if ($new_pos <= $base && $base < $old_pos) { 1197 $base++; 1198 } 1199 } 1200 1201 break; 1202 } 1203 } 1204 } 1205 1206 1207 /* Apply 'init' to the Left Matra if it's a word start. */ 1208 if ($info[$start]['indic_position'] == self::POS_PRE_M && 1209 ($start == 0 || 1210 ($info[$start - 1]['general_category'] < Ucdn::UNICODE_GENERAL_CATEGORY_FORMAT || $info[$start - 1]['general_category'] > Ucdn::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) 1211 )) { 1212 $info[$start]['mask'] |= self::FLAG(self::INIT); 1213 } 1214 1215 1216 /* 1217 * Finish off and go home! 1218 */ 1219 } 1220 1221 public static function _move_info_pos(&$info, $from, $to) 1222 { 1223 $t = []; 1224 $t[0] = $info[$from]; 1225 if ($from > $to) { 1226 array_splice($info, $from, 1); 1227 array_splice($info, $to, 0, $t); 1228 } else { 1229 array_splice($info, $to, 0, $t); 1230 array_splice($info, $from, 1); 1231 } 1232 } 1233 1234 public static $ra_chars = [ 1235 0x0930 => 1, /* Devanagari */ 1236 0x09B0 => 1, /* Bengali */ 1237 0x09F0 => 1, /* Bengali (Assamese) */ 1238 0x0A30 => 1, /* Gurmukhi */ /* No Reph */ 1239 0x0AB0 => 1, /* Gujarati */ 1240 0x0B30 => 1, /* Oriya */ 1241 0x0BB0 => 1, /* Tamil */ /* No Reph */ 1242 0x0C30 => 1, /* Telugu */ /* Reph formed only with ZWJ */ 1243 0x0CB0 => 1, /* Kannada */ 1244 0x0D30 => 1, /* Malayalam */ /* No Reph, Logical Repha */ 1245 0x0DBB => 1, /* Sinhala */ /* Reph formed only with ZWJ */ 1246 0x179A => 1, /* Khmer */ /* No Reph, Visual Repha */ 1247 ]; 1248 1249 public static function is_ra($u) 1250 { 1251 return isset(self::$ra_chars[$u]); 1252 } 1253 1254 public static function is_one_of($info, $flags) 1255 { 1256 if (isset($info['is_ligature']) && $info['is_ligature']) { 1257 return false; 1258 } /* If it ligated, all bets are off. */ 1259 return !!(self::FLAG($info['indic_category']) & $flags); 1260 } 1261 1262 public static function is_joiner($info) 1263 { 1264 return self::is_one_of($info, (self::FLAG(self::OT_ZWJ) | self::FLAG(self::OT_ZWNJ))); 1265 } 1266 1267 /* Vowels and placeholders treated as if they were consonants. */ 1268 1269 public static function is_consonant($info) 1270 { 1271 return self::is_one_of($info, (self::FLAG(self::OT_C) | self::FLAG(self::OT_CM) | self::FLAG(self::OT_RA) | self::FLAG(self::OT_V) | self::FLAG(self::OT_NBSP) | self::FLAG(self::OT_DOTTEDCIRCLE))); 1272 } 1273 1274 public static function is_halant_or_coeng($info) 1275 { 1276 return self::is_one_of($info, (self::FLAG(self::OT_H) | self::FLAG(self::OT_COENG))); 1277 } 1278 1279 // From hb-private.hh 1280 public static function in_range($u, $lo, $hi) 1281 { 1282 if ((($lo ^ $hi) & $lo) == 0 && (($lo ^ $hi) & $hi) == ($lo ^ $hi) && (($lo ^ $hi) & (($lo ^ $hi) + 1)) == 0) { 1283 return ($u & ~($lo ^ $hi)) == $lo; 1284 } else { 1285 return $lo <= $u && $u <= $hi; 1286 } 1287 } 1288 1289 // From hb-private.hh 1290 public static function FLAG($x) 1291 { 1292 return (1 << ($x)); 1293 } 1294 1295 // BELOW from hb-ot-shape-complex-indic.cc 1296 1297 /* 1298 * Indic configurations. 1299 */ 1300 1301 // base_position 1302 const BASE_POS_FIRST = 0; 1303 const BASE_POS_LAST = 1; 1304 1305 // reph_position 1306 const REPH_POS_DEFAULT = 10; // POS_BEFORE_POST, 1307 1308 const REPH_POS_AFTER_MAIN = 5; // POS_AFTER_MAIN, 1309 1310 const REPH_POS_BEFORE_SUB = 7; // POS_BEFORE_SUB, 1311 const REPH_POS_AFTER_SUB = 9; // POS_AFTER_SUB, 1312 const REPH_POS_BEFORE_POST = 10; // POS_BEFORE_POST, 1313 const REPH_POS_AFTER_POST = 12; // POS_AFTER_POST 1314 1315 // reph_mode 1316 const REPH_MODE_IMPLICIT = 0; /* Reph formed out of initial Ra,H sequence. */ 1317 const REPH_MODE_EXPLICIT = 1; /* Reph formed out of initial Ra,H,ZWJ sequence. */ 1318 const REPH_MODE_VIS_REPHA = 2; /* Encoded Repha character, no reordering needed. */ 1319 const REPH_MODE_LOG_REPHA = 3; /* Encoded Repha character, needs reordering. */ 1320 1321 /* 1322 struct of indic_configs{ 1323 KEY - script; 1324 0 - has_old_spec; 1325 1 - virama; 1326 2 - base_pos; 1327 3 - reph_pos; 1328 4 - reph_mode; 1329 }; 1330 */ 1331 1332 public static $indic_configs = [/* index is SCRIPT_number from UCDN */ 1333 9 => [true, 0x094D, 1, 10, 0], 1334 10 => [true, 0x09CD, 1, 9, 0], 1335 11 => [true, 0x0A4D, 1, 7, 0], 1336 12 => [true, 0x0ACD, 1, 10, 0], 1337 13 => [true, 0x0B4D, 1, 5, 0], 1338 14 => [true, 0x0BCD, 1, 12, 0], 1339 15 => [true, 0x0C4D, 1, 12, 1], 1340 16 => [true, 0x0CCD, 1, 12, 0], 1341 17 => [true, 0x0D4D, 1, 5, 3], 1342 18 => [false, 0x0DCA, 0, 5, 1], /* Sinhala */ 1343 30 => [false, 0x17D2, 0, 10, 2], /* Khmer */ 1344 84 => [false, 0xA9C0, 1, 10, 0], /* Javanese */ 1345 ]; 1346 1347 1348 1349 /* 1350 1351 // from "hb-ot-shape-complex-indic-table.cc" 1352 1353 1354 const ISC_A = 0; // INDIC_SYLLABIC_CATEGORY_AVAGRAHA Avagraha 1355 const ISC_Bi = 8; // INDIC_SYLLABIC_CATEGORY_BINDU Bindu 1356 const ISC_C = 1; // INDIC_SYLLABIC_CATEGORY_CONSONANT Consonant 1357 const ISC_CD = 1; // INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD Consonant_Dead 1358 const ISC_CF = 17; // INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL Consonant_Final 1359 const ISC_CHL = 1; // INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER Consonant_Head_Letter 1360 const ISC_CM = 17; // INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL Consonant_Medial 1361 const ISC_CP = 11; // INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER Consonant_Placeholder 1362 const ISC_CR = 15; // INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA Consonant_Repha 1363 const ISC_CS = 1; // INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED Consonant_Subjoined 1364 const ISC_ML = 0; // INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER Modifying_Letter 1365 const ISC_N = 3; // INDIC_SYLLABIC_CATEGORY_NUKTA Nukta 1366 const ISC_x = 0; // INDIC_SYLLABIC_CATEGORY_OTHER Other 1367 const ISC_RS = 13; // INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER Register_Shifter 1368 const ISC_TL = 0; // INDIC_SYLLABIC_CATEGORY_TONE_LETTER Tone_Letter 1369 const ISC_TM = 3; // INDIC_SYLLABIC_CATEGORY_TONE_MARK Tone_Mark 1370 const ISC_V = 4; // INDIC_SYLLABIC_CATEGORY_VIRAMA Virama 1371 const ISC_Vs = 8; // INDIC_SYLLABIC_CATEGORY_VISARGA Visarga 1372 const ISC_Vo = 2; // INDIC_SYLLABIC_CATEGORY_VOWEL Vowel 1373 const ISC_M = 7; // INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT Vowel_Dependent 1374 const ISC_VI = 2; // INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT Vowel_Independent 1375 1376 const IMC_B = 8; // INDIC_MATRA_CATEGORY_BOTTOM Bottom 1377 const IMC_BR = 11; // INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT Bottom_And_Right 1378 const IMC_I = 15; // INDIC_MATRA_CATEGORY_INVISIBLE Invisible 1379 const IMC_L = 3; // INDIC_MATRA_CATEGORY_LEFT Left 1380 const IMC_LR = 11; // INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT Left_And_Right 1381 const IMC_x = 15; // INDIC_MATRA_CATEGORY_NOT_APPLICABLE Not_Applicable 1382 const IMC_O = 5; // INDIC_MATRA_CATEGORY_OVERSTRUCK Overstruck 1383 const IMC_R = 11; // INDIC_MATRA_CATEGORY_RIGHT Right 1384 const IMC_T = 6; // INDIC_MATRA_CATEGORY_TOP Top 1385 const IMC_TB = 8; // INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM Top_And_Bottom 1386 const IMC_TBR = 11; // INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT Top_And_Bottom_And_Right 1387 const IMC_TL = 6; // INDIC_MATRA_CATEGORY_TOP_AND_LEFT Top_And_Left 1388 const IMC_TLR = 11; // INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT Top_And_Left_And_Right 1389 const IMC_TR = 11; // INDIC_MATRA_CATEGORY_TOP_AND_RIGHT Top_And_Right 1390 const IMC_VOL = 2; // INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT Visual_Order_Left 1391 1392 If in original table = _(C,x), that = ISC_C,IMC_x 1393 Value is IMC_x << 8 (or IMC_x * 256) = 3840 1394 plus ISC_C = 1, so = 3841 1395 1396 */ 1397 1398 public static $indic_table = [ 1399 /* Devanagari (0900..097F) */ 1400 1401 /* 0900 */ 3848, 3848, 3848, 3848, 3842, 3842, 3842, 3842, 1402 /* 0908 */ 3842, 3842, 3842, 3842, 3842, 3842, 3842, 3842, 1403 /* 0910 */ 3842, 3842, 3842, 3842, 3842, 3841, 3841, 3841, 1404 /* 0918 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1405 /* 0920 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1406 /* 0928 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1407 /* 0930 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1408 /* 0938 */ 3841, 3841, 1543, 2823, 3843, 3840, 2823, 775, 1409 /* 0940 */ 2823, 2055, 2055, 2055, 2055, 1543, 1543, 1543, 1410 /* 0948 */ 1543, 2823, 2823, 2823, 2823, 2052, 775, 2823, 1411 /* 0950 */ 3840, 3840, 3840, 3840, 3840, 1543, 2055, 2055, 1412 /* 0958 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1413 /* 0960 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840, 1414 /* 0968 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1415 /* 0970 */ 3840, 3840, 3842, 3842, 3842, 3842, 3842, 3842, 1416 /* 0978 */ 3840, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1417 /* Bengali (0980..09FF) */ 1418 1419 /* 0980 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842, 1420 /* 0988 */ 3842, 3842, 3842, 3842, 3842, 3840, 3840, 3842, 1421 /* 0990 */ 3842, 3840, 3840, 3842, 3842, 3841, 3841, 3841, 1422 /* 0998 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1423 /* 09A0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1424 /* 09A8 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841, 1425 /* 09B0 */ 3841, 3840, 3841, 3840, 3840, 3840, 3841, 3841, 1426 /* 09B8 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 775, 1427 /* 09C0 */ 2823, 2055, 2055, 2055, 2055, 3840, 3840, 775, 1428 /* 09C8 */ 775, 3840, 3840, 2823, 2823, 2052, 3841, 3840, 1429 /* 09D0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 2823, 1430 /* 09D8 */ 3840, 3840, 3840, 3840, 3841, 3841, 3840, 3841, 1431 /* 09E0 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840, 1432 /* 09E8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1433 /* 09F0 */ 3841, 3841, 3840, 3840, 3840, 3840, 3840, 3840, 1434 /* 09F8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1435 /* Gurmukhi (0A00..0A7F) */ 1436 1437 /* 0A00 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842, 1438 /* 0A08 */ 3842, 3842, 3842, 3840, 3840, 3840, 3840, 3842, 1439 /* 0A10 */ 3842, 3840, 3840, 3842, 3842, 3841, 3841, 3841, 1440 /* 0A18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1441 /* 0A20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1442 /* 0A28 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841, 1443 /* 0A30 */ 3841, 3840, 3841, 3841, 3840, 3841, 3841, 3840, 1444 /* 0A38 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 775, 1445 /* 0A40 */ 2823, 2055, 2055, 3840, 3840, 3840, 3840, 1543, 1446 /* 0A48 */ 1543, 3840, 3840, 1543, 1543, 2052, 3840, 3840, 1447 /* 0A50 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1448 /* 0A58 */ 3840, 3841, 3841, 3841, 3841, 3840, 3841, 3840, 1449 /* 0A60 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1450 /* 0A68 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1451 /* 0A70 */ 3848, 3840, 13841, 13841, 3840, 3857, 3840, 3840, 1452 /* 0A78 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1453 /* Gujarati (0A80..0AFF) */ 1454 1455 /* 0A80 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842, 1456 /* 0A88 */ 3842, 3842, 3842, 3842, 3842, 3842, 3840, 3842, 1457 /* 0A90 */ 3842, 3842, 3840, 3842, 3842, 3841, 3841, 3841, 1458 /* 0A98 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1459 /* 0AA0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1460 /* 0AA8 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841, 1461 /* 0AB0 */ 3841, 3840, 3841, 3841, 3840, 3841, 3841, 3841, 1462 /* 0AB8 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 775, 1463 /* 0AC0 */ 2823, 2055, 2055, 2055, 2055, 1543, 3840, 1543, 1464 /* 0AC8 */ 1543, 2823, 3840, 2823, 2823, 2052, 3840, 3840, 1465 /* 0AD0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1466 /* 0AD8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1467 /* 0AE0 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840, 1468 /* 0AE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1469 /* 0AF0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1470 /* 0AF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1471 /* Oriya (0B00..0B7F) */ 1472 1473 /* 0B00 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842, 1474 /* 0B08 */ 3842, 3842, 3842, 3842, 3842, 3840, 3840, 3842, 1475 /* 0B10 */ 3842, 3840, 3840, 3842, 3842, 3841, 3841, 3841, 1476 /* 0B18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1477 /* 0B20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1478 /* 0B28 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841, 1479 /* 0B30 */ 3841, 3840, 3841, 3841, 3840, 3841, 3841, 3841, 1480 /* 0B38 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 1543, 1481 /* 0B40 */ 2823, 2055, 2055, 2055, 2055, 3840, 3840, 775, 1482 /* 0B48 */ 1543, 3840, 3840, 2823, 2823, 2052, 3840, 3840, 1483 /* 0B50 */ 3840, 3840, 3840, 3840, 3840, 3840, 1543, 2823, 1484 /* 0B58 */ 3840, 3840, 3840, 3840, 3841, 3841, 3840, 3841, 1485 /* 0B60 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840, 1486 /* 0B68 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1487 /* 0B70 */ 3840, 3841, 3840, 3840, 3840, 3840, 3840, 3840, 1488 /* 0B78 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1489 /* Tamil (0B80..0BFF) */ 1490 1491 /* 0B80 */ 3840, 3840, 3848, 3840, 3840, 3842, 3842, 3842, 1492 /* 0B88 */ 3842, 3842, 3842, 3840, 3840, 3840, 3842, 3842, 1493 /* 0B90 */ 3842, 3840, 3842, 3842, 3842, 3841, 3840, 3840, 1494 /* 0B98 */ 3840, 3841, 3841, 3840, 3841, 3840, 3841, 3841, 1495 /* 0BA0 */ 3840, 3840, 3840, 3841, 3841, 3840, 3840, 3840, 1496 /* 0BA8 */ 3841, 3841, 3841, 3840, 3840, 3840, 3841, 3841, 1497 /* 0BB0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1498 /* 0BB8 */ 3841, 3841, 3840, 3840, 3840, 3840, 2823, 2823, 1499 /* 0BC0 */ 1543, 2055, 2055, 3840, 3840, 3840, 775, 775, 1500 /* 0BC8 */ 775, 3840, 2823, 2823, 2823, 1540, 3840, 3840, 1501 /* 0BD0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 2823, 1502 /* 0BD8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1503 /* 0BE0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1504 /* 0BE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1505 /* 0BF0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1506 /* 0BF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1507 /* Telugu (0C00..0C7F) */ 1508 1509 /* 0C00 */ 3840, 3848, 3848, 3848, 3840, 3842, 3842, 3842, 1510 /* 0C08 */ 3842, 3842, 3842, 3842, 3842, 3840, 3842, 3842, 1511 /* 0C10 */ 3842, 3840, 3842, 3842, 3842, 3841, 3841, 3841, 1512 /* 0C18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1513 /* 0C20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1514 /* 0C28 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841, 1515 /* 0C30 */ 3841, 3841, 3841, 3841, 3840, 3841, 3841, 3841, 1516 /* 0C38 */ 3841, 3841, 3840, 3840, 3840, 3840, 1543, 1543, 1517 /* 0C40 */ 1543, 2823, 2823, 2823, 2823, 3840, 1543, 1543, 1518 /* 0C48 */ 2055, 3840, 1543, 1543, 1543, 1540, 3840, 3840, 1519 /* 0C50 */ 3840, 3840, 3840, 3840, 3840, 1543, 2055, 3840, 1520 /* 0C58 */ 3841, 3841, 3840, 3840, 3840, 3840, 3840, 3840, 1521 /* 0C60 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840, 1522 /* 0C68 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1523 /* 0C70 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1524 /* 0C78 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1525 /* Kannada (0C80..0CFF) */ 1526 1527 /* 0C80 */ 3840, 3840, 3848, 3848, 3840, 3842, 3842, 3842, 1528 /* 0C88 */ 3842, 3842, 3842, 3842, 3842, 3840, 3842, 3842, 1529 /* 0C90 */ 3842, 3840, 3842, 3842, 3842, 3841, 3841, 3841, 1530 /* 0C98 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1531 /* 0CA0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1532 /* 0CA8 */ 3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841, 1533 /* 0CB0 */ 3841, 3841, 3841, 3841, 3840, 3841, 3841, 3841, 1534 /* 0CB8 */ 3841, 3841, 3840, 3840, 3843, 3840, 2823, 1543, 1535 /* 0CC0 */ 2823, 2823, 2823, 2823, 2823, 3840, 1543, 2823, 1536 /* 0CC8 */ 2823, 3840, 2823, 2823, 1543, 1540, 3840, 3840, 1537 /* 0CD0 */ 3840, 3840, 3840, 3840, 3840, 2823, 2823, 3840, 1538 /* 0CD8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3841, 3840, 1539 /* 0CE0 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840, 1540 /* 0CE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1541 /* 0CF0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1542 /* 0CF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1543 /* Malayalam (0D00..0D7F) */ 1544 1545 /* 0D00 */ 3840, 3840, 3848, 3848, 3840, 3842, 3842, 3842, 1546 /* 0D08 */ 3842, 3842, 3842, 3842, 3842, 3840, 3842, 3842, 1547 /* 0D10 */ 3842, 3840, 3842, 3842, 3842, 3841, 3841, 3841, 1548 /* 0D18 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1549 /* 0D20 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1550 /* 0D28 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1551 /* 0D30 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1552 /* 0D38 */ 3841, 3841, 3841, 3840, 3840, 3840, 2823, 2823, 1553 /* 0D40 */ 2823, 2823, 2823, 2055, 2055, 3840, 775, 775, 1554 /* 0D48 */ 775, 3840, 2823, 2823, 2823, 1540, 3855, 3840, 1555 /* 0D50 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 2823, 1556 /* 0D58 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1557 /* 0D60 */ 3842, 3842, 2055, 2055, 3840, 3840, 3840, 3840, 1558 /* 0D68 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1559 /* 0D70 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1560 /* 0D78 */ 3840, 3840, 3841, 3841, 3841, 3841, 3841, 3841, 1561 /* Sinhala (0D80..0DFF) */ 1562 1563 /* 0D80 */ 3840, 3840, 3848, 3848, 3840, 3842, 3842, 3842, 1564 /* 0D88 */ 3842, 3842, 3842, 3842, 3842, 3842, 3842, 3842, 1565 /* 0D90 */ 3842, 3842, 3842, 3842, 3842, 3842, 3842, 3840, 1566 /* 0D98 */ 3840, 3840, 3841, 3841, 3841, 3841, 3841, 3841, 1567 /* 0DA0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1568 /* 0DA8 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1569 /* 0DB0 */ 3841, 3841, 3840, 3841, 3841, 3841, 3841, 3841, 1570 /* 0DB8 */ 3841, 3841, 3841, 3841, 3840, 3841, 3840, 3840, 1571 /* 0DC0 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3840, 1572 /* 0DC8 */ 3840, 3840, 1540, 3840, 3840, 3840, 3840, 2823, 1573 /* 0DD0 */ 2823, 2823, 1543, 1543, 2055, 3840, 2055, 3840, 1574 /* 0DD8 */ 2823, 775, 1543, 775, 2823, 2823, 2823, 2823, 1575 /* 0DE0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1576 /* 0DE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1577 /* 0DF0 */ 3840, 3840, 2823, 2823, 3840, 3840, 3840, 3840, 1578 /* 0DF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1579 /* Vedic Extensions (1CD0..1CFF) */ 1580 1581 /* 1CD0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1582 /* 1CD8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1583 /* 1CE0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1584 /* 1CE8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1585 /* 1CF0 */ 3840, 3840, 3848, 3848, 3840, 3840, 3840, 3840, 1586 /* 1CF8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1587 ]; 1588 1589 public static $khmer_table = [ 1590 /* Khmer (1780..17FF) */ 1591 1592 /* 1780 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1593 /* 1788 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1594 /* 1790 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1595 /* 1798 */ 3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841, 1596 /* 17A0 */ 3841, 3841, 3841, 3842, 3842, 3842, 3842, 3842, 1597 /* 17A8 */ 3842, 3842, 3842, 3842, 3842, 3842, 3842, 3842, 1598 /* 17B0 */ 3842, 3842, 3842, 3842, 3840, 3840, 2823, 1543, 1599 /* 17B8 */ 1543, 1543, 1543, 2055, 2055, 2055, 1543, 2823, 1600 /* 17C0 */ 2823, 775, 775, 775, 2823, 2823, 3848, 3848, 1601 /* 17C8 */ 2823, 3853, 3853, 3840, 3855, 3840, 3840, 3840, 1602 /* 17D0 */ 3840, 1540, 3844, 3840, 3840, 3840, 3840, 3840, 1603 /* 17D8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1604 /* 17E0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1605 /* 17E8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1606 /* 17F0 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1607 /* 17F8 */ 3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840, 1608 ]; 1609 1610 // from "hb-ot-shape-complex-indic-table.cc" 1611 public static function indic_get_categories($u) 1612 { 1613 if (0x0900 <= $u && $u <= 0x0DFF) { 1614 return self::$indic_table[$u - 0x0900 + 0]; // offset 0 for Most "indic" 1615 } 1616 if (0x1CD0 <= $u && $u <= 0x1D00) { 1617 return self::$indic_table[$u - 0x1CD0 + 1152]; // offset for Vedic extensions 1618 } 1619 if (0x1780 <= $u && $u <= 0x17FF) { 1620 return self::$khmer_table[$u - 0x1780]; // Khmer 1621 } 1622 if ($u == 0x00A0) { 1623 return 3851; // (ISC_CP | (IMC_x << 8)) 1624 } 1625 if ($u == 0x25CC) { 1626 return 3851; // (ISC_CP | (IMC_x << 8)) 1627 } 1628 return 3840; // (ISC_x | (IMC_x << 8)) 1629 } 1630 1631 // BELOW from hb-ot-shape-complex-indic.cc 1632 /* 1633 * Indic shaper. 1634 */ 1635 1636 public static function IN_HALF_BLOCK($u, $Base) 1637 { 1638 return (($u & ~0x7F) == $Base); 1639 } 1640 1641 public static function IS_DEVA($u) 1642 { 1643 return self::IN_HALF_BLOCK($u, 0x0900); 1644 } 1645 1646 public static function IS_BENG($u) 1647 { 1648 return self::IN_HALF_BLOCK($u, 0x0980); 1649 } 1650 1651 public static function IS_GURU($u) 1652 { 1653 return self::IN_HALF_BLOCK($u, 0x0A00); 1654 } 1655 1656 public static function IS_GUJR($u) 1657 { 1658 return self::IN_HALF_BLOCK($u, 0x0A80); 1659 } 1660 1661 public static function IS_ORYA($u) 1662 { 1663 return self::IN_HALF_BLOCK($u, 0x0B00); 1664 } 1665 1666 public static function IS_TAML($u) 1667 { 1668 return self::IN_HALF_BLOCK($u, 0x0B80); 1669 } 1670 1671 public static function IS_TELU($u) 1672 { 1673 return self::IN_HALF_BLOCK($u, 0x0C00); 1674 } 1675 1676 public static function IS_KNDA($u) 1677 { 1678 return self::IN_HALF_BLOCK($u, 0x0C80); 1679 } 1680 1681 public static function IS_MLYM($u) 1682 { 1683 return self::IN_HALF_BLOCK($u, 0x0D00); 1684 } 1685 1686 public static function IS_SINH($u) 1687 { 1688 return self::IN_HALF_BLOCK($u, 0x0D80); 1689 } 1690 1691 public static function IS_KHMR($u) 1692 { 1693 return self::IN_HALF_BLOCK($u, 0x1780); 1694 } 1695 1696 public static function MATRA_POS_LEFT($u) 1697 { 1698 return self::POS_PRE_M; 1699 } 1700 1701 public static function MATRA_POS_RIGHT($u) 1702 { 1703 return 1704 (self::IS_DEVA($u) ? self::POS_AFTER_SUB : 1705 (self::IS_BENG($u) ? self::POS_AFTER_POST : 1706 (self::IS_GURU($u) ? self::POS_AFTER_POST : 1707 (self::IS_GUJR($u) ? self::POS_AFTER_POST : 1708 (self::IS_ORYA($u) ? self::POS_AFTER_POST : 1709 (self::IS_TAML($u) ? self::POS_AFTER_POST : 1710 (self::IS_TELU($u) ? ($u <= 0x0C42 ? self::POS_BEFORE_SUB : self::POS_AFTER_SUB) : 1711 (self::IS_KNDA($u) ? ($u < 0x0CC3 || $u > 0xCD6 ? self::POS_BEFORE_SUB : self::POS_AFTER_SUB) : 1712 (self::IS_MLYM($u) ? self::POS_AFTER_POST : 1713 (self::IS_SINH($u) ? self::POS_AFTER_SUB : 1714 (self::IS_KHMR($u) ? self::POS_AFTER_POST : 1715 self::POS_AFTER_SUB))))))))))); /* default */ 1716 } 1717 1718 public static function MATRA_POS_TOP($u) 1719 { 1720 return /* BENG and MLYM don't have top matras. */ 1721 (self::IS_DEVA($u) ? self::POS_AFTER_SUB : 1722 (self::IS_GURU($u) ? self::POS_AFTER_POST : /* Deviate from spec */ 1723 (self::IS_GUJR($u) ? self::POS_AFTER_SUB : 1724 (self::IS_ORYA($u) ? self::POS_AFTER_MAIN : 1725 (self::IS_TAML($u) ? self::POS_AFTER_SUB : 1726 (self::IS_TELU($u) ? self::POS_BEFORE_SUB : 1727 (self::IS_KNDA($u) ? self::POS_BEFORE_SUB : 1728 (self::IS_SINH($u) ? self::POS_AFTER_SUB : 1729 (self::IS_KHMR($u) ? self::POS_AFTER_POST : 1730 self::POS_AFTER_SUB))))))))); /* default */ 1731 } 1732 1733 public static function MATRA_POS_BOTTOM($u) 1734 { 1735 return 1736 (self::IS_DEVA($u) ? self::POS_AFTER_SUB : 1737 (self::IS_BENG($u) ? self::POS_AFTER_SUB : 1738 (self::IS_GURU($u) ? self::POS_AFTER_POST : 1739 (self::IS_GUJR($u) ? self::POS_AFTER_POST : 1740 (self::IS_ORYA($u) ? self::POS_AFTER_SUB : 1741 (self::IS_TAML($u) ? self::POS_AFTER_POST : 1742 (self::IS_TELU($u) ? self::POS_BEFORE_SUB : 1743 (self::IS_KNDA($u) ? self::POS_BEFORE_SUB : 1744 (self::IS_MLYM($u) ? self::POS_AFTER_POST : 1745 (self::IS_SINH($u) ? self::POS_AFTER_SUB : 1746 (self::IS_KHMR($u) ? self::POS_AFTER_POST : 1747 self::POS_AFTER_SUB))))))))))); /* default */ 1748 } 1749 1750 public static function matra_position($u, $side) 1751 { 1752 switch ($side) { 1753 case self::POS_PRE_C: 1754 return self::MATRA_POS_LEFT($u); 1755 case self::POS_POST_C: 1756 return self::MATRA_POS_RIGHT($u); 1757 case self::POS_ABOVE_C: 1758 return self::MATRA_POS_TOP($u); 1759 case self::POS_BELOW_C: 1760 return self::MATRA_POS_BOTTOM($u); 1761 } 1762 return $side; 1763 } 1764 1765 // vowel matras that have to be split into two parts. 1766 // From Harfbuzz (old) 1767 // New HarfBuzz uses /src/hb-ucdn/ucdn.c and unicodedata_db.h for full method of decomposition for all characters 1768 // Should always fully decompose and then recompose back, but we will just do the split matras 1769 public static function decompose_indic($ab) 1770 { 1771 $sub = []; 1772 switch ($ab) { 1773 /* 1774 * Decompose split matras. 1775 */ 1776 /* bengali */ 1777 case 0x9cb: 1778 $sub[0] = 0x9c7; 1779 $sub[1] = 0x9be; 1780 return $sub; 1781 case 0x9cc: 1782 $sub[0] = 0x9c7; 1783 $sub[1] = 0x9d7; 1784 return $sub; 1785 /* oriya */ 1786 case 0xb48: 1787 $sub[0] = 0xb47; 1788 $sub[1] = 0xb56; 1789 return $sub; 1790 case 0xb4b: 1791 $sub[0] = 0xb47; 1792 $sub[1] = 0xb3e; 1793 return $sub; 1794 case 0xb4c: 1795 $sub[0] = 0xb47; 1796 $sub[1] = 0xb57; 1797 return $sub; 1798 /* tamil */ 1799 case 0xbca: 1800 $sub[0] = 0xbc6; 1801 $sub[1] = 0xbbe; 1802 return $sub; 1803 case 0xbcb: 1804 $sub[0] = 0xbc7; 1805 $sub[1] = 0xbbe; 1806 return $sub; 1807 case 0xbcc: 1808 $sub[0] = 0xbc6; 1809 $sub[1] = 0xbd7; 1810 return $sub; 1811 /* telugu */ 1812 case 0xc48: 1813 $sub[0] = 0xc46; 1814 $sub[1] = 0xc56; 1815 return $sub; 1816 /* kannada */ 1817 case 0xcc0: 1818 $sub[0] = 0xcbf; 1819 $sub[1] = 0xcd5; 1820 return $sub; 1821 case 0xcc7: 1822 $sub[0] = 0xcc6; 1823 $sub[1] = 0xcd5; 1824 return $sub; 1825 case 0xcc8: 1826 $sub[0] = 0xcc6; 1827 $sub[1] = 0xcd6; 1828 return $sub; 1829 case 0xcca: 1830 $sub[0] = 0xcc6; 1831 $sub[1] = 0xcc2; 1832 return $sub; 1833 case 0xccb: 1834 $sub[0] = 0xcc6; 1835 $sub[1] = 0xcc2; 1836 $sub[2] = 0xcd5; 1837 return $sub; 1838 /* malayalam */ 1839 case 0xd4a: 1840 $sub[0] = 0xd46; 1841 $sub[1] = 0xd3e; 1842 return $sub; 1843 case 0xd4b: 1844 $sub[0] = 0xd47; 1845 $sub[1] = 0xd3e; 1846 return $sub; 1847 case 0xd4c: 1848 $sub[0] = 0xd46; 1849 $sub[1] = 0xd57; 1850 return $sub; 1851 /* sinhala */ 1852 // NB Some fonts break with these Sinhala decomps (although this is Uniscribe spec) 1853 // Can check if character would be substituted by pstf and only decompose if true 1854 // e.g. if (isset($GSUBdata['pstf'][$ab])) - would need to pass $GSUBdata as parameter to this function 1855 case 0xdda: 1856 $sub[0] = 0xdd9; 1857 $sub[1] = 0xdca; 1858 return $sub; 1859 case 0xddc: 1860 $sub[0] = 0xdd9; 1861 $sub[1] = 0xdcf; 1862 return $sub; 1863 case 0xddd: 1864 $sub[0] = 0xdd9; 1865 $sub[1] = 0xdcf; 1866 $sub[2] = 0xdca; 1867 return $sub; 1868 case 0xdde: 1869 $sub[0] = 0xdd9; 1870 $sub[1] = 0xddf; 1871 return $sub; 1872 /* khmer */ 1873 case 0x17be: 1874 $sub[0] = 0x17c1; 1875 $sub[1] = 0x17be; 1876 return $sub; 1877 case 0x17bf: 1878 $sub[0] = 0x17c1; 1879 $sub[1] = 0x17bf; 1880 return $sub; 1881 case 0x17c0: 1882 $sub[0] = 0x17c1; 1883 $sub[1] = 0x17c0; 1884 return $sub; 1885 1886 case 0x17c4: 1887 $sub[0] = 0x17c1; 1888 $sub[1] = 0x17c4; 1889 return $sub; 1890 case 0x17c5: 1891 $sub[0] = 0x17c1; 1892 $sub[1] = 0x17c5; 1893 return $sub; 1894 /* tibetan - included here although does not use Inidc shaper in other ways */ 1895 case 0xf73: 1896 $sub[0] = 0xf71; 1897 $sub[1] = 0xf72; 1898 return $sub; 1899 case 0xf75: 1900 $sub[0] = 0xf71; 1901 $sub[1] = 0xf74; 1902 return $sub; 1903 case 0xf76: 1904 $sub[0] = 0xfb2; 1905 $sub[1] = 0xf80; 1906 return $sub; 1907 case 0xf77: 1908 $sub[0] = 0xfb2; 1909 $sub[1] = 0xf81; 1910 return $sub; 1911 case 0xf78: 1912 $sub[0] = 0xfb3; 1913 $sub[1] = 0xf80; 1914 return $sub; 1915 case 0xf79: 1916 $sub[0] = 0xfb3; 1917 $sub[1] = 0xf71; 1918 $sub[2] = 0xf80; 1919 return $sub; 1920 case 0xf81: 1921 $sub[0] = 0xf71; 1922 $sub[1] = 0xf80; 1923 return $sub; 1924 } 1925 return false; 1926 } 1927 1928 public static function bubble_sort(&$arr, $start, $len) 1929 { 1930 if ($len < 2) { 1931 return; 1932 } 1933 $k = $start + $len - 2; 1934 while ($k >= $start) { 1935 for ($j = $start; $j <= $k; $j++) { 1936 if ($arr[$j]['indic_position'] > $arr[$j + 1]['indic_position']) { 1937 $t = $arr[$j]; 1938 $arr[$j] = $arr[$j + 1]; 1939 $arr[$j + 1] = $t; 1940 } 1941 } 1942 $k--; 1943 } 1944 } 1945} 1946