1<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com> and Trevor Rowbotham <trevor.rowbotham@pm.me>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Polyfill\Intl\Idn;
13
14use Exception;
15use Normalizer;
16use Symfony\Polyfill\Intl\Idn\Resources\unidata\DisallowedRanges;
17use Symfony\Polyfill\Intl\Idn\Resources\unidata\Regex;
18
19/**
20 * @see https://www.unicode.org/reports/tr46/
21 *
22 * @internal
23 */
24final class Idn
25{
26    public const ERROR_EMPTY_LABEL = 1;
27    public const ERROR_LABEL_TOO_LONG = 2;
28    public const ERROR_DOMAIN_NAME_TOO_LONG = 4;
29    public const ERROR_LEADING_HYPHEN = 8;
30    public const ERROR_TRAILING_HYPHEN = 0x10;
31    public const ERROR_HYPHEN_3_4 = 0x20;
32    public const ERROR_LEADING_COMBINING_MARK = 0x40;
33    public const ERROR_DISALLOWED = 0x80;
34    public const ERROR_PUNYCODE = 0x100;
35    public const ERROR_LABEL_HAS_DOT = 0x200;
36    public const ERROR_INVALID_ACE_LABEL = 0x400;
37    public const ERROR_BIDI = 0x800;
38    public const ERROR_CONTEXTJ = 0x1000;
39    public const ERROR_CONTEXTO_PUNCTUATION = 0x2000;
40    public const ERROR_CONTEXTO_DIGITS = 0x4000;
41
42    public const INTL_IDNA_VARIANT_2003 = 0;
43    public const INTL_IDNA_VARIANT_UTS46 = 1;
44
45    public const IDNA_DEFAULT = 0;
46    public const IDNA_ALLOW_UNASSIGNED = 1;
47    public const IDNA_USE_STD3_RULES = 2;
48    public const IDNA_CHECK_BIDI = 4;
49    public const IDNA_CHECK_CONTEXTJ = 8;
50    public const IDNA_NONTRANSITIONAL_TO_ASCII = 16;
51    public const IDNA_NONTRANSITIONAL_TO_UNICODE = 32;
52
53    public const MAX_DOMAIN_SIZE = 253;
54    public const MAX_LABEL_SIZE = 63;
55
56    public const BASE = 36;
57    public const TMIN = 1;
58    public const TMAX = 26;
59    public const SKEW = 38;
60    public const DAMP = 700;
61    public const INITIAL_BIAS = 72;
62    public const INITIAL_N = 128;
63    public const DELIMITER = '-';
64    public const MAX_INT = 2147483647;
65
66    /**
67     * Contains the numeric value of a basic code point (for use in representing integers) in the
68     * range 0 to BASE-1, or -1 if b is does not represent a value.
69     *
70     * @var array<int, int>
71     */
72    private static $basicToDigit = [
73        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
74        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
75
76        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
77        26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
78
79        -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
80        15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
81
82        -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
83        15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
84
85        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
86        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
87
88        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
89        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90
91        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
92        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
93
94        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
95        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
96    ];
97
98    /**
99     * @var array<int, int>
100     */
101    private static $virama;
102
103    /**
104     * @var array<int, string>
105     */
106    private static $mapped;
107
108    /**
109     * @var array<int, bool>
110     */
111    private static $ignored;
112
113    /**
114     * @var array<int, string>
115     */
116    private static $deviation;
117
118    /**
119     * @var array<int, bool>
120     */
121    private static $disallowed;
122
123    /**
124     * @var array<int, string>
125     */
126    private static $disallowed_STD3_mapped;
127
128    /**
129     * @var array<int, bool>
130     */
131    private static $disallowed_STD3_valid;
132
133    /**
134     * @var bool
135     */
136    private static $mappingTableLoaded = false;
137
138    /**
139     * @see https://www.unicode.org/reports/tr46/#ToASCII
140     *
141     * @param string $domainName
142     * @param int    $options
143     * @param int    $variant
144     * @param array  $idna_info
145     *
146     * @return string|false
147     */
148    public static function idn_to_ascii($domainName, $options = self::IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = [])
149    {
150        if (\PHP_VERSION_ID >= 70200 && self::INTL_IDNA_VARIANT_2003 === $variant) {
151            @trigger_error('idn_to_ascii(): INTL_IDNA_VARIANT_2003 is deprecated', \E_USER_DEPRECATED);
152        }
153
154        $options = [
155            'CheckHyphens' => true,
156            'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & self::IDNA_CHECK_BIDI),
157            'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & self::IDNA_CHECK_CONTEXTJ),
158            'UseSTD3ASCIIRules' => 0 !== ($options & self::IDNA_USE_STD3_RULES),
159            'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & self::IDNA_NONTRANSITIONAL_TO_ASCII),
160            'VerifyDnsLength' => true,
161        ];
162        $info = new Info();
163        $labels = self::process((string) $domainName, $options, $info);
164
165        foreach ($labels as $i => $label) {
166            // Only convert labels to punycode that contain non-ASCII code points
167            if (1 === preg_match('/[^\x00-\x7F]/', $label)) {
168                try {
169                    $label = 'xn--'.self::punycodeEncode($label);
170                } catch (Exception $e) {
171                    $info->errors |= self::ERROR_PUNYCODE;
172                }
173
174                $labels[$i] = $label;
175            }
176        }
177
178        if ($options['VerifyDnsLength']) {
179            self::validateDomainAndLabelLength($labels, $info);
180        }
181
182        $idna_info = [
183            'result' => implode('.', $labels),
184            'isTransitionalDifferent' => $info->transitionalDifferent,
185            'errors' => $info->errors,
186        ];
187
188        return 0 === $info->errors ? $idna_info['result'] : false;
189    }
190
191    /**
192     * @see https://www.unicode.org/reports/tr46/#ToUnicode
193     *
194     * @param string $domainName
195     * @param int    $options
196     * @param int    $variant
197     * @param array  $idna_info
198     *
199     * @return string|false
200     */
201    public static function idn_to_utf8($domainName, $options = self::IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = [])
202    {
203        if (\PHP_VERSION_ID >= 70200 && self::INTL_IDNA_VARIANT_2003 === $variant) {
204            @trigger_error('idn_to_utf8(): INTL_IDNA_VARIANT_2003 is deprecated', \E_USER_DEPRECATED);
205        }
206
207        $info = new Info();
208        $labels = self::process((string) $domainName, [
209            'CheckHyphens' => true,
210            'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & self::IDNA_CHECK_BIDI),
211            'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & self::IDNA_CHECK_CONTEXTJ),
212            'UseSTD3ASCIIRules' => 0 !== ($options & self::IDNA_USE_STD3_RULES),
213            'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & self::IDNA_NONTRANSITIONAL_TO_UNICODE),
214        ], $info);
215        $idna_info = [
216            'result' => implode('.', $labels),
217            'isTransitionalDifferent' => $info->transitionalDifferent,
218            'errors' => $info->errors,
219        ];
220
221        return 0 === $info->errors ? $idna_info['result'] : false;
222    }
223
224    /**
225     * @param string $label
226     *
227     * @return bool
228     */
229    private static function isValidContextJ(array $codePoints, $label)
230    {
231        if (!isset(self::$virama)) {
232            self::$virama = require __DIR__.\DIRECTORY_SEPARATOR.'Resources'.\DIRECTORY_SEPARATOR.'unidata'.\DIRECTORY_SEPARATOR.'virama.php';
233        }
234
235        $offset = 0;
236
237        foreach ($codePoints as $i => $codePoint) {
238            if (0x200C !== $codePoint && 0x200D !== $codePoint) {
239                continue;
240            }
241
242            if (!isset($codePoints[$i - 1])) {
243                return false;
244            }
245
246            // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
247            if (isset(self::$virama[$codePoints[$i - 1]])) {
248                continue;
249            }
250
251            // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C(Joining_Type:T)*(Joining_Type:{R,D})) Then
252            // True;
253            // Generated RegExp = ([Joining_Type:{L,D}][Joining_Type:T]*\u200C[Joining_Type:T]*)[Joining_Type:{R,D}]
254            if (0x200C === $codePoint && 1 === preg_match(Regex::ZWNJ, $label, $matches, \PREG_OFFSET_CAPTURE, $offset)) {
255                $offset += \strlen($matches[1][0]);
256
257                continue;
258            }
259
260            return false;
261        }
262
263        return true;
264    }
265
266    /**
267     * @see https://www.unicode.org/reports/tr46/#ProcessingStepMap
268     *
269     * @param string              $input
270     * @param array<string, bool> $options
271     *
272     * @return string
273     */
274    private static function mapCodePoints($input, array $options, Info $info)
275    {
276        $str = '';
277        $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules'];
278        $transitional = $options['Transitional_Processing'];
279
280        foreach (self::utf8Decode($input) as $codePoint) {
281            $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules);
282
283            switch ($data['status']) {
284                case 'disallowed':
285                    $info->errors |= self::ERROR_DISALLOWED;
286
287                    // no break.
288
289                case 'valid':
290                    $str .= mb_chr($codePoint, 'utf-8');
291
292                    break;
293
294                case 'ignored':
295                    // Do nothing.
296                    break;
297
298                case 'mapped':
299                    $str .= $data['mapping'];
300
301                    break;
302
303                case 'deviation':
304                    $info->transitionalDifferent = true;
305                    $str .= ($transitional ? $data['mapping'] : mb_chr($codePoint, 'utf-8'));
306
307                    break;
308            }
309        }
310
311        return $str;
312    }
313
314    /**
315     * @see https://www.unicode.org/reports/tr46/#Processing
316     *
317     * @param string              $domain
318     * @param array<string, bool> $options
319     *
320     * @return array<int, string>
321     */
322    private static function process($domain, array $options, Info $info)
323    {
324        // If VerifyDnsLength is not set, we are doing ToUnicode otherwise we are doing ToASCII and
325        // we need to respect the VerifyDnsLength option.
326        $checkForEmptyLabels = !isset($options['VerifyDnsLength']) || $options['VerifyDnsLength'];
327
328        if ($checkForEmptyLabels && '' === $domain) {
329            $info->errors |= self::ERROR_EMPTY_LABEL;
330
331            return [$domain];
332        }
333
334        // Step 1. Map each code point in the domain name string
335        $domain = self::mapCodePoints($domain, $options, $info);
336
337        // Step 2. Normalize the domain name string to Unicode Normalization Form C.
338        if (!Normalizer::isNormalized($domain, Normalizer::FORM_C)) {
339            $domain = Normalizer::normalize($domain, Normalizer::FORM_C);
340        }
341
342        // Step 3. Break the string into labels at U+002E (.) FULL STOP.
343        $labels = explode('.', $domain);
344        $lastLabelIndex = \count($labels) - 1;
345
346        // Step 4. Convert and validate each label in the domain name string.
347        foreach ($labels as $i => $label) {
348            $validationOptions = $options;
349
350            if ('xn--' === substr($label, 0, 4)) {
351                try {
352                    $label = self::punycodeDecode(substr($label, 4));
353                } catch (Exception $e) {
354                    $info->errors |= self::ERROR_PUNYCODE;
355
356                    continue;
357                }
358
359                $validationOptions['Transitional_Processing'] = false;
360                $labels[$i] = $label;
361            }
362
363            self::validateLabel($label, $info, $validationOptions, $i > 0 && $i === $lastLabelIndex);
364        }
365
366        if ($info->bidiDomain && !$info->validBidiDomain) {
367            $info->errors |= self::ERROR_BIDI;
368        }
369
370        // Any input domain name string that does not record an error has been successfully
371        // processed according to this specification. Conversely, if an input domain_name string
372        // causes an error, then the processing of the input domain_name string fails. Determining
373        // what to do with error input is up to the caller, and not in the scope of this document.
374        return $labels;
375    }
376
377    /**
378     * @see https://tools.ietf.org/html/rfc5893#section-2
379     *
380     * @param string $label
381     */
382    private static function validateBidiLabel($label, Info $info)
383    {
384        if (1 === preg_match(Regex::RTL_LABEL, $label)) {
385            $info->bidiDomain = true;
386
387            // Step 1. The first character must be a character with Bidi property L, R, or AL.
388            // If it has the R or AL property, it is an RTL label
389            if (1 !== preg_match(Regex::BIDI_STEP_1_RTL, $label)) {
390                $info->validBidiDomain = false;
391
392                return;
393            }
394
395            // Step 2. In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES,
396            // CS, ET, ON, BN, or NSM are allowed.
397            if (1 === preg_match(Regex::BIDI_STEP_2, $label)) {
398                $info->validBidiDomain = false;
399
400                return;
401            }
402
403            // Step 3. In an RTL label, the end of the label must be a character with Bidi property
404            // R, AL, EN, or AN, followed by zero or more characters with Bidi property NSM.
405            if (1 !== preg_match(Regex::BIDI_STEP_3, $label)) {
406                $info->validBidiDomain = false;
407
408                return;
409            }
410
411            // Step 4. In an RTL label, if an EN is present, no AN may be present, and vice versa.
412            if (1 === preg_match(Regex::BIDI_STEP_4_AN, $label) && 1 === preg_match(Regex::BIDI_STEP_4_EN, $label)) {
413                $info->validBidiDomain = false;
414
415                return;
416            }
417
418            return;
419        }
420
421        // We are a LTR label
422        // Step 1. The first character must be a character with Bidi property L, R, or AL.
423        // If it has the L property, it is an LTR label.
424        if (1 !== preg_match(Regex::BIDI_STEP_1_LTR, $label)) {
425            $info->validBidiDomain = false;
426
427            return;
428        }
429
430        // Step 5. In an LTR label, only characters with the Bidi properties L, EN,
431        // ES, CS, ET, ON, BN, or NSM are allowed.
432        if (1 === preg_match(Regex::BIDI_STEP_5, $label)) {
433            $info->validBidiDomain = false;
434
435            return;
436        }
437
438        // Step 6.In an LTR label, the end of the label must be a character with Bidi property L or
439        // EN, followed by zero or more characters with Bidi property NSM.
440        if (1 !== preg_match(Regex::BIDI_STEP_6, $label)) {
441            $info->validBidiDomain = false;
442
443            return;
444        }
445    }
446
447    /**
448     * @param array<int, string> $labels
449     */
450    private static function validateDomainAndLabelLength(array $labels, Info $info)
451    {
452        $maxDomainSize = self::MAX_DOMAIN_SIZE;
453        $length = \count($labels);
454
455        // Number of "." delimiters.
456        $domainLength = $length - 1;
457
458        // If the last label is empty and it is not the first label, then it is the root label.
459        // Increase the max size by 1, making it 254, to account for the root label's "."
460        // delimiter. This also means we don't need to check the last label's length for being too
461        // long.
462        if ($length > 1 && '' === $labels[$length - 1]) {
463            ++$maxDomainSize;
464            --$length;
465        }
466
467        for ($i = 0; $i < $length; ++$i) {
468            $bytes = \strlen($labels[$i]);
469            $domainLength += $bytes;
470
471            if ($bytes > self::MAX_LABEL_SIZE) {
472                $info->errors |= self::ERROR_LABEL_TOO_LONG;
473            }
474        }
475
476        if ($domainLength > $maxDomainSize) {
477            $info->errors |= self::ERROR_DOMAIN_NAME_TOO_LONG;
478        }
479    }
480
481    /**
482     * @see https://www.unicode.org/reports/tr46/#Validity_Criteria
483     *
484     * @param string              $label
485     * @param array<string, bool> $options
486     * @param bool                $canBeEmpty
487     */
488    private static function validateLabel($label, Info $info, array $options, $canBeEmpty)
489    {
490        if ('' === $label) {
491            if (!$canBeEmpty && (!isset($options['VerifyDnsLength']) || $options['VerifyDnsLength'])) {
492                $info->errors |= self::ERROR_EMPTY_LABEL;
493            }
494
495            return;
496        }
497
498        // Step 1. The label must be in Unicode Normalization Form C.
499        if (!Normalizer::isNormalized($label, Normalizer::FORM_C)) {
500            $info->errors |= self::ERROR_INVALID_ACE_LABEL;
501        }
502
503        $codePoints = self::utf8Decode($label);
504
505        if ($options['CheckHyphens']) {
506            // Step 2. If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character
507            // in both the thrid and fourth positions.
508            if (isset($codePoints[2], $codePoints[3]) && 0x002D === $codePoints[2] && 0x002D === $codePoints[3]) {
509                $info->errors |= self::ERROR_HYPHEN_3_4;
510            }
511
512            // Step 3. If CheckHyphens, the label must neither begin nor end with a U+002D
513            // HYPHEN-MINUS character.
514            if ('-' === substr($label, 0, 1)) {
515                $info->errors |= self::ERROR_LEADING_HYPHEN;
516            }
517
518            if ('-' === substr($label, -1, 1)) {
519                $info->errors |= self::ERROR_TRAILING_HYPHEN;
520            }
521        }
522
523        // Step 4. The label must not contain a U+002E (.) FULL STOP.
524        if (false !== strpos($label, '.')) {
525            $info->errors |= self::ERROR_LABEL_HAS_DOT;
526        }
527
528        // Step 5. The label must not begin with a combining mark, that is: General_Category=Mark.
529        if (1 === preg_match(Regex::COMBINING_MARK, $label)) {
530            $info->errors |= self::ERROR_LEADING_COMBINING_MARK;
531        }
532
533        // Step 6. Each code point in the label must only have certain status values according to
534        // Section 5, IDNA Mapping Table:
535        $transitional = $options['Transitional_Processing'];
536        $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules'];
537
538        foreach ($codePoints as $codePoint) {
539            $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules);
540            $status = $data['status'];
541
542            if ('valid' === $status || (!$transitional && 'deviation' === $status)) {
543                continue;
544            }
545
546            $info->errors |= self::ERROR_DISALLOWED;
547
548            break;
549        }
550
551        // Step 7. If CheckJoiners, the label must satisify the ContextJ rules from Appendix A, in
552        // The Unicode Code Points and Internationalized Domain Names for Applications (IDNA)
553        // [IDNA2008].
554        if ($options['CheckJoiners'] && !self::isValidContextJ($codePoints, $label)) {
555            $info->errors |= self::ERROR_CONTEXTJ;
556        }
557
558        // Step 8. If CheckBidi, and if the domain name is a  Bidi domain name, then the label must
559        // satisfy all six of the numbered conditions in [IDNA2008] RFC 5893, Section 2.
560        if ($options['CheckBidi'] && (!$info->bidiDomain || $info->validBidiDomain)) {
561            self::validateBidiLabel($label, $info);
562        }
563    }
564
565    /**
566     * @see https://tools.ietf.org/html/rfc3492#section-6.2
567     *
568     * @param string $input
569     *
570     * @return string
571     */
572    private static function punycodeDecode($input)
573    {
574        $n = self::INITIAL_N;
575        $out = 0;
576        $i = 0;
577        $bias = self::INITIAL_BIAS;
578        $lastDelimIndex = strrpos($input, self::DELIMITER);
579        $b = false === $lastDelimIndex ? 0 : $lastDelimIndex;
580        $inputLength = \strlen($input);
581        $output = [];
582        $bytes = array_map('ord', str_split($input));
583
584        for ($j = 0; $j < $b; ++$j) {
585            if ($bytes[$j] > 0x7F) {
586                throw new Exception('Invalid input');
587            }
588
589            $output[$out++] = $input[$j];
590        }
591
592        if ($b > 0) {
593            ++$b;
594        }
595
596        for ($in = $b; $in < $inputLength; ++$out) {
597            $oldi = $i;
598            $w = 1;
599
600            for ($k = self::BASE; /* no condition */; $k += self::BASE) {
601                if ($in >= $inputLength) {
602                    throw new Exception('Invalid input');
603                }
604
605                $digit = self::$basicToDigit[$bytes[$in++] & 0xFF];
606
607                if ($digit < 0) {
608                    throw new Exception('Invalid input');
609                }
610
611                if ($digit > intdiv(self::MAX_INT - $i, $w)) {
612                    throw new Exception('Integer overflow');
613                }
614
615                $i += $digit * $w;
616
617                if ($k <= $bias) {
618                    $t = self::TMIN;
619                } elseif ($k >= $bias + self::TMAX) {
620                    $t = self::TMAX;
621                } else {
622                    $t = $k - $bias;
623                }
624
625                if ($digit < $t) {
626                    break;
627                }
628
629                $baseMinusT = self::BASE - $t;
630
631                if ($w > intdiv(self::MAX_INT, $baseMinusT)) {
632                    throw new Exception('Integer overflow');
633                }
634
635                $w *= $baseMinusT;
636            }
637
638            $outPlusOne = $out + 1;
639            $bias = self::adaptBias($i - $oldi, $outPlusOne, 0 === $oldi);
640
641            if (intdiv($i, $outPlusOne) > self::MAX_INT - $n) {
642                throw new Exception('Integer overflow');
643            }
644
645            $n += intdiv($i, $outPlusOne);
646            $i %= $outPlusOne;
647            array_splice($output, $i++, 0, [mb_chr($n, 'utf-8')]);
648        }
649
650        return implode('', $output);
651    }
652
653    /**
654     * @see https://tools.ietf.org/html/rfc3492#section-6.3
655     *
656     * @param string $input
657     *
658     * @return string
659     */
660    private static function punycodeEncode($input)
661    {
662        $n = self::INITIAL_N;
663        $delta = 0;
664        $out = 0;
665        $bias = self::INITIAL_BIAS;
666        $inputLength = 0;
667        $output = '';
668        $iter = self::utf8Decode($input);
669
670        foreach ($iter as $codePoint) {
671            ++$inputLength;
672
673            if ($codePoint < 0x80) {
674                $output .= \chr($codePoint);
675                ++$out;
676            }
677        }
678
679        $h = $out;
680        $b = $out;
681
682        if ($b > 0) {
683            $output .= self::DELIMITER;
684            ++$out;
685        }
686
687        while ($h < $inputLength) {
688            $m = self::MAX_INT;
689
690            foreach ($iter as $codePoint) {
691                if ($codePoint >= $n && $codePoint < $m) {
692                    $m = $codePoint;
693                }
694            }
695
696            if ($m - $n > intdiv(self::MAX_INT - $delta, $h + 1)) {
697                throw new Exception('Integer overflow');
698            }
699
700            $delta += ($m - $n) * ($h + 1);
701            $n = $m;
702
703            foreach ($iter as $codePoint) {
704                if ($codePoint < $n && 0 === ++$delta) {
705                    throw new Exception('Integer overflow');
706                }
707
708                if ($codePoint === $n) {
709                    $q = $delta;
710
711                    for ($k = self::BASE; /* no condition */; $k += self::BASE) {
712                        if ($k <= $bias) {
713                            $t = self::TMIN;
714                        } elseif ($k >= $bias + self::TMAX) {
715                            $t = self::TMAX;
716                        } else {
717                            $t = $k - $bias;
718                        }
719
720                        if ($q < $t) {
721                            break;
722                        }
723
724                        $qMinusT = $q - $t;
725                        $baseMinusT = self::BASE - $t;
726                        $output .= self::encodeDigit($t + ($qMinusT) % ($baseMinusT), false);
727                        ++$out;
728                        $q = intdiv($qMinusT, $baseMinusT);
729                    }
730
731                    $output .= self::encodeDigit($q, false);
732                    ++$out;
733                    $bias = self::adaptBias($delta, $h + 1, $h === $b);
734                    $delta = 0;
735                    ++$h;
736                }
737            }
738
739            ++$delta;
740            ++$n;
741        }
742
743        return $output;
744    }
745
746    /**
747     * @see https://tools.ietf.org/html/rfc3492#section-6.1
748     *
749     * @param int  $delta
750     * @param int  $numPoints
751     * @param bool $firstTime
752     *
753     * @return int
754     */
755    private static function adaptBias($delta, $numPoints, $firstTime)
756    {
757        // xxx >> 1 is a faster way of doing intdiv(xxx, 2)
758        $delta = $firstTime ? intdiv($delta, self::DAMP) : $delta >> 1;
759        $delta += intdiv($delta, $numPoints);
760        $k = 0;
761
762        while ($delta > ((self::BASE - self::TMIN) * self::TMAX) >> 1) {
763            $delta = intdiv($delta, self::BASE - self::TMIN);
764            $k += self::BASE;
765        }
766
767        return $k + intdiv((self::BASE - self::TMIN + 1) * $delta, $delta + self::SKEW);
768    }
769
770    /**
771     * @param int  $d
772     * @param bool $flag
773     *
774     * @return string
775     */
776    private static function encodeDigit($d, $flag)
777    {
778        return \chr($d + 22 + 75 * ($d < 26 ? 1 : 0) - (($flag ? 1 : 0) << 5));
779    }
780
781    /**
782     * Takes a UTF-8 encoded string and converts it into a series of integer code points. Any
783     * invalid byte sequences will be replaced by a U+FFFD replacement code point.
784     *
785     * @see https://encoding.spec.whatwg.org/#utf-8-decoder
786     *
787     * @param string $input
788     *
789     * @return array<int, int>
790     */
791    private static function utf8Decode($input)
792    {
793        $bytesSeen = 0;
794        $bytesNeeded = 0;
795        $lowerBoundary = 0x80;
796        $upperBoundary = 0xBF;
797        $codePoint = 0;
798        $codePoints = [];
799        $length = \strlen($input);
800
801        for ($i = 0; $i < $length; ++$i) {
802            $byte = \ord($input[$i]);
803
804            if (0 === $bytesNeeded) {
805                if ($byte >= 0x00 && $byte <= 0x7F) {
806                    $codePoints[] = $byte;
807
808                    continue;
809                }
810
811                if ($byte >= 0xC2 && $byte <= 0xDF) {
812                    $bytesNeeded = 1;
813                    $codePoint = $byte & 0x1F;
814                } elseif ($byte >= 0xE0 && $byte <= 0xEF) {
815                    if (0xE0 === $byte) {
816                        $lowerBoundary = 0xA0;
817                    } elseif (0xED === $byte) {
818                        $upperBoundary = 0x9F;
819                    }
820
821                    $bytesNeeded = 2;
822                    $codePoint = $byte & 0xF;
823                } elseif ($byte >= 0xF0 && $byte <= 0xF4) {
824                    if (0xF0 === $byte) {
825                        $lowerBoundary = 0x90;
826                    } elseif (0xF4 === $byte) {
827                        $upperBoundary = 0x8F;
828                    }
829
830                    $bytesNeeded = 3;
831                    $codePoint = $byte & 0x7;
832                } else {
833                    $codePoints[] = 0xFFFD;
834                }
835
836                continue;
837            }
838
839            if ($byte < $lowerBoundary || $byte > $upperBoundary) {
840                $codePoint = 0;
841                $bytesNeeded = 0;
842                $bytesSeen = 0;
843                $lowerBoundary = 0x80;
844                $upperBoundary = 0xBF;
845                --$i;
846                $codePoints[] = 0xFFFD;
847
848                continue;
849            }
850
851            $lowerBoundary = 0x80;
852            $upperBoundary = 0xBF;
853            $codePoint = ($codePoint << 6) | ($byte & 0x3F);
854
855            if (++$bytesSeen !== $bytesNeeded) {
856                continue;
857            }
858
859            $codePoints[] = $codePoint;
860            $codePoint = 0;
861            $bytesNeeded = 0;
862            $bytesSeen = 0;
863        }
864
865        // String unexpectedly ended, so append a U+FFFD code point.
866        if (0 !== $bytesNeeded) {
867            $codePoints[] = 0xFFFD;
868        }
869
870        return $codePoints;
871    }
872
873    /**
874     * @param int  $codePoint
875     * @param bool $useSTD3ASCIIRules
876     *
877     * @return array{status: string, mapping?: string}
878     */
879    private static function lookupCodePointStatus($codePoint, $useSTD3ASCIIRules)
880    {
881        if (!self::$mappingTableLoaded) {
882            self::$mappingTableLoaded = true;
883            self::$mapped = require __DIR__.'/Resources/unidata/mapped.php';
884            self::$ignored = require __DIR__.'/Resources/unidata/ignored.php';
885            self::$deviation = require __DIR__.'/Resources/unidata/deviation.php';
886            self::$disallowed = require __DIR__.'/Resources/unidata/disallowed.php';
887            self::$disallowed_STD3_mapped = require __DIR__.'/Resources/unidata/disallowed_STD3_mapped.php';
888            self::$disallowed_STD3_valid = require __DIR__.'/Resources/unidata/disallowed_STD3_valid.php';
889        }
890
891        if (isset(self::$mapped[$codePoint])) {
892            return ['status' => 'mapped', 'mapping' => self::$mapped[$codePoint]];
893        }
894
895        if (isset(self::$ignored[$codePoint])) {
896            return ['status' => 'ignored'];
897        }
898
899        if (isset(self::$deviation[$codePoint])) {
900            return ['status' => 'deviation', 'mapping' => self::$deviation[$codePoint]];
901        }
902
903        if (isset(self::$disallowed[$codePoint]) || DisallowedRanges::inRange($codePoint)) {
904            return ['status' => 'disallowed'];
905        }
906
907        $isDisallowedMapped = isset(self::$disallowed_STD3_mapped[$codePoint]);
908
909        if ($isDisallowedMapped || isset(self::$disallowed_STD3_valid[$codePoint])) {
910            $status = 'disallowed';
911
912            if (!$useSTD3ASCIIRules) {
913                $status = $isDisallowedMapped ? 'mapped' : 'valid';
914            }
915
916            if ($isDisallowedMapped) {
917                return ['status' => $status, 'mapping' => self::$disallowed_STD3_mapped[$codePoint]];
918            }
919
920            return ['status' => $status];
921        }
922
923        return ['status' => 'valid'];
924    }
925}
926