1<?php
2
3/*
4 * This file is part of the league/commonmark package.
5 *
6 * (c) Colin O'Dell <colinodell@gmail.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12declare(strict_types=1);
13
14namespace League\CommonMark\Normalizer;
15
16/***
17 * Normalize text input using the steps given by the CommonMark spec to normalize labels
18 *
19 * @see https://spec.commonmark.org/0.29/#matches
20 */
21final class TextNormalizer implements TextNormalizerInterface
22{
23    /**
24     * @var array<int, array<int, string>>
25     *
26     * Source: https://github.com/symfony/polyfill-mbstring/blob/master/Mbstring.php
27     */
28    private const CASE_FOLD = [
29        ['µ', 'ſ', "\xCD\x85", 'ς', "\xCF\x90", "\xCF\x91", "\xCF\x95", "\xCF\x96", "\xCF\xB0", "\xCF\xB1", "\xCF\xB5", "\xE1\xBA\x9B", "\xE1\xBE\xBE", "\xC3\x9F", "\xE1\xBA\x9E"],
30        ['μ', 's', 'ι',        'σ', 'β',        'θ',        'φ',        'π',        'κ',        'ρ',        'ε',        "\xE1\xB9\xA1", 'ι',            'ss',       'ss'],
31    ];
32
33    /**
34     * {@inheritdoc}
35     */
36    public function normalize(string $text, $context = null): string
37    {
38        // Collapse internal whitespace to single space and remove
39        // leading/trailing whitespace
40        $text = \preg_replace('/\s+/', ' ', \trim($text));
41
42        if (!\defined('MB_CASE_FOLD')) {
43            // We're not on a version of PHP (7.3+) which has this feature
44            $text = \str_replace(self::CASE_FOLD[0], self::CASE_FOLD[1], $text);
45
46            return \mb_strtolower($text, 'UTF-8');
47        }
48
49        return \mb_convert_case($text, \MB_CASE_FOLD, 'UTF-8');
50    }
51}
52