xref: /dokuwiki/_test/tests/inc/sort_without_collator.test.php (revision e32b1b0ffe5e0c48ff8faed9c74099599659774c)
1<?php
2
3use dokuwiki\Utf8\Sort;
4
5require_once __DIR__ . '/sort_with_collator.test.php';
6
7/**
8 * Based on sort_with_collator.test.php.
9 *
10 * @author Moisés Braga Ribeiro <moisesbr@gmail.com>
11 * @author Andreas Gohr <andi@splitbrain.org>
12 */
13class sort_without_collator_test extends sort_with_collator_test
14{
15    /**
16     * Disable the "intl" extension.
17     */
18    public static function setUpBeforeClass() : void
19    {
20        parent::setUpBeforeClass();
21        Sort::useIntl(false);
22    }
23
24    /**
25     * Reenable the "intl" extension.
26     */
27    public static function tearDownAfterClass() : void
28    {
29        Sort::useIntl(true);
30        parent::tearDownAfterClass();
31    }
32
33    /**
34     * Since we always use the fallback sort, we do not check for
35     * the availability of the "intl" extension here at all.
36     */
37    public function testIntlExtensionAvailability()
38    {
39        $this->assertTrue(true); // avoid being marked as risky for having no assertion
40    }
41
42    /**
43     * Provide real word pairs of the languages being tested (when possible).
44     * The pairs should show what the fallback sort can or cannot do, as it
45     * simply follows character codes.
46     *
47     * In particular, there should be a test to show that every character with
48     * an accent (diacritic) used in the language is WRONGLY sorted after Z.
49     *
50     * @return Generator|array
51     * @see testStrcmp
52     */
53    public function provideWordPairs()
54    {
55        static $pairs = [
56            // Esperanto
57            'eo' => [
58                // fallback sort works for c < ĉ, but not for ĉ < d (and so on)
59                ['celo', 'ĉapo'], ['glacio', 'ĝirafo'], ['horo', 'ĥameleono'],
60                ['jes', 'ĵaŭdo'], ['seka', 'ŝako'], ['urso', 'ŭaŭ'],
61                // fallback sort WRONGLY puts ĉ/ĝ/ĥ/ĵ/ŝ/ŭ after z
62                ['zorio', 'ĉokolado'], ['zorio', 'ĝojo'], ['zorio', 'ĥoro'],
63                ['zorio', 'ĵurnalo'], ['zorio', 'ŝuo'], ['zorio', 'ŭo'],
64                // natural sort works as usual
65                ['paĝo 2', 'paĝo 10'], ['paĝo 51', 'paĝo 100']
66            ],
67
68            // German
69            'de' => [
70                // fallback sort WRONGLY puts ä/ö/ü/ß after z
71                ['Zebra', 'Äpfel'], ['Zebra', 'Öl'], ['Zebra', 'Übersetzung'],
72                ['Weizen', 'weiß'],
73                // natural sort works as usual
74                ['Seite 2', 'Seite 10'], ['Seite 51', 'Seite 100']
75            ],
76
77            // Portuguese
78            'pt' => [
79                // fallback sort WRONGLY puts accented letters after z
80                ['zebra', 'às'], ['zebra', 'água'], ['zebra', 'âmbar'],
81                ['zebra', 'épico'], ['zebra', 'ênclise'], ['zebra', 'índio'],
82                ['zebra', 'ótimo'], ['zebra', 'ônibus'], ['zebra', 'último'],
83                ['pizza', 'pião'], ['pizza', 'piões'], ['azar', 'aço'],
84                // natural sort works as usual
85                ['página 2', 'página 10'], ['página 51', 'página 100']
86            ],
87
88            // Spanish
89            'es' => [
90                // fallback sort works for n < ñ, but not for ñ < o
91                ['nube', 'ñu'],
92                // fallback sort WRONGLY puts accented letters after z
93                ['zapato', 'ácido'], ['zapato', 'él'], ['zapato', 'íntimo'],
94                ['zapato', 'óptimo'], ['zapato', 'último'],
95                ['pizza', 'piña'],
96                // natural sort works as usual
97                ['página 2', 'página 10'], ['página 51', 'página 100']
98            ],
99        ];
100
101        foreach ($pairs as $lang => $list) {
102            foreach ($list as $pair) {
103                yield [$lang, $pair[0], $pair[1]];
104            }
105        }
106    }
107
108    /**
109     * Provide WRONG sorted sequences of all characters used in the languages
110     * being tested, as the fallback sort simply follows character codes.
111     *
112     * The sorted sequences given in class "sort_with_collator" are simply
113     * reordered here, starting with A-Z and continuing with accented characters
114     * ordered by character codes.
115     *
116     * @return Generator|array
117     * @see testSort
118     * @see testKSort
119     * @see testASort
120     * @see testASortFnUrl
121     * @see testASortFnSafe
122     * @see testASortFnUtf8
123     */
124    public function provideSortedCharList()
125    {
126        static $lists = [
127            // Esperanto
128            //      'a b c ĉ d e f g ĝ h ĥ i j ĵ k l m n o p r s ŝ t u ŭ v z'
129            'eo' => 'a b c d e f g h i j k l m n o p r s t u v z ĉ ĝ ĥ ĵ ŝ ŭ',
130
131            // German
132            //      'a ä b c d e f g h i j k l m n o ö p q r s ß t u ü v w x y z'
133            'de' => 'a b c d e f g h i j k l m n o p q r s t u v w x y z ß ä ö ü',
134
135            // Portuguese
136            //      'a á à â ã b c ç d e é ê f g h i í j k l m n o ó ô õ p q r s t u ú ü v w x y z'
137            'pt' => 'a b c d e f g h i j k l m n o p q r s t u v w x y z à á â ã ç é ê í ó ô õ ú ü',
138
139            // Spanish
140            //      'a á b c d e é f g h i í j k l m n ñ o ó p q r s t u ú v w x y z'
141            'es' => 'a b c d e f g h i j k l m n o p q r s t u v w x y z á é í ñ ó ú',
142        ];
143
144        foreach ($lists as $lang => $list) {
145            yield [$lang, $list];
146        }
147    }
148}
149