1<?php
2
3use dokuwiki\Utf8\Sort;
4
5/**
6 * @author Moisés Braga Ribeiro <moisesbr@gmail.com>
7 * @author Andreas Gohr <andi@splitbrain.org>
8 */
9class sort_with_collator_test extends DokuWikiTest
10{
11    /*
12     * Dependency for tests that need "intl" extension.
13     */
14    public function testIntlExtensionAvailability()
15    {
16        if (!class_exists('\Collator')) {
17            $this->markTestSkipped('Skipping all sort tests with collator, as they need "intl" extension');
18        }
19        $this->assertTrue(true); // avoid being marked as risky for having no assertion
20    }
21
22    /**
23     * Provide real word pairs of the languages being tested (when possible).
24     * Everything which is beyond the usual A-Z order should be checked,
25     * including every character with an accent (diacritic) used in the language.
26     *
27     * CHECKING NON-EQUIVALENT CHARACTERS (X < Y)
28     *
29     * In this case, the words are always sorted according to the character pair.
30     * Craft word pairs to double-check the collator, such that sort by the next
31     * character yields the opposite result.
32     *
33     *   Esperanto example: ĉ < d
34     *   ĉokolado, dento ==> ĉ < d ==> ĉokolado < dento
35     *   (if ĉ < d would fail, o < e would also fail ==> collator failure)
36     *
37     * CHECKING EQUIVALENT CHARACTERS (X = Y)
38     *
39     * If the sole difference between the words is the character pair, the sort
40     * will be as if X < Y. Otherwise the characters will be treated as the same.
41     * Craft two word pairs to test both conditions.
42     *
43     *   German example: a = ä
44     *   Sole diff.: Apfel, Äpfel ==> a < ä        ==> Apfel < Äpfel
45     *   Otherwise:  Ämter, Arzt  ==> a = ä, m < r ==> Ämter < Arzt
46     *
47     * CHECKING MULTIPLE EQUIVALENT CHARACTERS (X = Y = Z = ...)
48     *
49     * An extension of the above case. If the sole difference between the words is
50     * a character pair from the given set, the sort will be as if X < Y < Z < ...
51     * Otherwise the characters will be treated as the same.
52     * Craft at least one word pair to test the first case and as many as possible
53     * to test the other case.
54     *
55     *   Portuguese example: e = é = ê
56     *   Sole diff.: de, dê         ==> e < ê                  ==> de < dê
57     *   Otherwise:  pé, pedra      ==> é = e, end of word < d ==> pé < pedra
58     *               pêssego, peste ==> ê = e, s = s, s < t    ==> pêssego < peste
59     *
60     * @return Generator|array
61     * @see testStrcmp
62     */
63    public function provideWordPairs()
64    {
65        static $pairs = [
66            // Esperanto
67            'eo' => [
68                // c < ĉ < d
69                ['celo', 'ĉapo'], ['ĉokolado', 'dento'],
70                // g < ĝ < h < ĥ < i
71                ['glacio', 'ĝirafo'], ['ĝojo', 'haro'], ['horo', 'ĥameleono'], ['ĥoro', 'iam'],
72                // j < ĵ < k
73                ['jes', 'ĵaŭdo'], ['ĵurnalo', 'kapo'],
74                // s < ŝ < t
75                ['seka', 'ŝako'], ['ŝuo', 'tablo'],
76                // u < ŭ < v
77                ['urso', 'ŭaŭ'], ['ŭo', 'vino'],
78                // natural sort
79                ['paĝo 2', 'paĝo 10'], ['paĝo 51', 'paĝo 100']
80            ],
81
82            // German
83            'de' => [
84                // a = ä
85                ['Apfel', 'Äpfel'], ['Ämter', 'Arzt'],
86                // o = ö
87                ['Tochter', 'Töchter'], ['Öl', 'Orange'],
88                // u = ü
89                ['Mutter', 'Mütter'], ['Übersetzung', 'Uhrzeit'],
90                // ß = ss
91                ['weiss', 'weiß'], ['Fuchs', 'Fuß'], ['Fraß', 'Frau'],
92                // natural sort
93                ['Seite 2', 'Seite 10'], ['Seite 51', 'Seite 100']
94            ],
95
96            // Portuguese
97            'pt' => [
98                // a = á = à = â = ã
99                ['a', 'à'], ['água', 'amor'], ['às', 'ato'], ['âmbar', 'arte'], ['lã', 'lata'],
100                // e = é = ê
101                ['de', 'dê'], ['pé', 'pedra'], ['pêssego', 'peste'],
102                // i = í
103                ['liquido', 'líquido'], ['índio', 'indireto'],
104                // o = ó = ô = õ
105                ['avó', 'avô'], ['ótimo', 'ovo'], ['ônibus', 'osso'], ['limões', 'limonada'],
106                // u = ú = ü (ü appears in old texts)
107                ['numero', 'número'], ['último', 'um'], ['tranqüila', 'tranquilamente'],
108                // c = ç
109                ['faca', 'faça'], ['taça', 'taco'],
110                // natural sort
111                ['página 2', 'página 10'], ['página 51', 'página 100']
112            ],
113
114            // Spanish
115            'es' => [
116                // n < ñ < o
117                ['nube', 'ñoño'], ['ñu', 'ojo'],
118                // a = á
119                ['mas', 'más'], ['ácido', 'agua'],
120                // e = é
121                ['de', 'dé'], ['él', 'elefante'],
122                // i = í
123                ['mi', 'mí'], ['íntimo', 'isla'],
124                // o = ó
125                ['como', 'cómo'], ['óptimo', 'oreja'],
126                // u = ú
127                ['tu', 'tú'], ['último', 'uno'],
128                // natural sort
129                ['página 2', 'página 10'], ['página 51', 'página 100']
130            ],
131        ];
132
133        foreach ($pairs as $lang => $list) {
134            foreach ($list as $pair) {
135                yield [$lang, $pair[0], $pair[1]];
136            }
137        }
138    }
139
140    /**
141     * Provide the sorted sequences of all characters used in the languages being tested.
142     * Everything which is beyond the usual A-Z order should be checked.
143     *
144     * CHECKING NON-EQUIVALENT CHARACTERS (X < Y)
145     *
146     * Add a 2nd character to double-check the collator, such that sort by the 2nd
147     * character yields the opposite result.
148     *
149     *   Esperanto example: ĉ < d
150     *   2nd character: ĉe, da ==> ĉ < d ==> ĉe < da
151     *   (if ĉ < d would fail, e < a would also fail ==> collator failure)
152     *
153     * CHECKING EQUIVALENT CHARACTERS (X = Y = Z)
154     *
155     * Don't add a 2nd character, because it would break the test. The lone characters
156     * will be sorted as words with a sole difference, that is, as if X < Y < Z.
157     *
158     *   German example: a = ä
159     *   Sole difference: a, ä ==> a < ä
160     *
161     * @return Generator|array
162     * @see testSort
163     * @see testKSort
164     * @see testASort
165     * @see testASortFnUrl
166     * @see testASortFnSafe
167     * @see testASortFnUtf8
168     */
169    public function provideSortedCharList()
170    {
171        static $lists = [
172            // Esperanto
173            // c < ĉ < d
174            // g < ĝ < h < ĥ < i
175            // j < ĵ < k
176            // s < ŝ < t
177            // u < ŭ < v
178            'eo' => 'a b ci ĉe da e f gu ĝo hi ĥe ia ju ĵo ke l m n o p r so ŝi te us ŭo ve z',
179
180            // German
181            // a = ä
182            // o = ö
183            // u = ü
184            // ß = ss
185            'de' => 'a ä b c d e f g h i j k l m n o ö p q r s ss ß st t u ü v w x y z',
186
187            // Portuguese
188            // a = á = à = â = ã
189            // e = é = ê
190            // i = í
191            // o = ó = ô = õ
192            // u = ú = ü (ü appears in old texts)
193            // c = ç
194            'pt' => 'a á à â ã b c ç d e é ê f g h i í j k l m n o ó ô õ p q r s t u ú ü v w x y z',
195
196            // Spanish
197            // n < ñ < o
198            // a = á
199            // e = é
200            // i = í
201            // o = ó
202            // u = ú
203            'es' => 'a á b c d e é f g h i í j k l m nu ño oh óh p q r s t u ú v w x y z',
204        ];
205
206        foreach ($lists as $lang => $list) {
207            yield [$lang, $list];
208        }
209    }
210
211    /**
212     * @depends      testIntlExtensionAvailability
213     * @dataProvider provideWordPairs
214     * @param string $lang
215     * @param string $word1
216     * @param string $word2
217     */
218    public function testStrcmp($lang, $word1, $word2)
219    {
220        global $conf;
221        $conf['lang'] = $lang;
222
223        $this->assertLessThan(0, Sort::strcmp($word1, $word2));
224    }
225
226    /**
227     * @dataProvider provideSortedCharList
228     * @depends      testIntlExtensionAvailability
229     * @param string $lang
230     * @param string $list
231     */
232    public function testSort($lang, $list)
233    {
234        global $conf;
235        $conf['lang'] = $lang;
236
237        $sorted = explode(' ', $list);
238        $random = explode(' ', $list);
239        shuffle($random);
240        Sort::sort($random);
241        $this->assertEquals(array_values($random), array_values($sorted));
242    }
243
244    /**
245     * @dataProvider provideSortedCharList
246     * @depends      testIntlExtensionAvailability
247     * @param string $lang
248     * @param string $list
249     */
250    public function testKSort($lang, $list)
251    {
252        global $conf;
253        $conf['lang'] = $lang;
254
255        $sorted = array_flip(explode(' ', $list));
256        $random = explode(' ', $list);
257        shuffle($random);
258        $random = array_flip($random);
259        Sort::ksort($random);
260        $this->assertEquals(array_keys($random), array_keys($sorted));
261    }
262
263    /**
264     * @dataProvider provideSortedCharList
265     * @depends      testIntlExtensionAvailability
266     * @param string $lang
267     * @param string $list
268     */
269    public function testASort($lang, $list)
270    {
271        global $conf;
272        $conf['lang'] = $lang;
273
274        $sorted = explode(' ', $list);
275        $keys = array_keys($sorted);
276        shuffle($keys);
277        foreach ($keys as $key) {
278            $random[$key] = $sorted[$key];
279        }
280        Sort::asort($random);
281        $this->assertEquals(array_values($random), array_values($sorted));
282        $this->assertEquals(array_keys($random), array_keys($sorted));
283    }
284
285    /**
286     * @dataProvider provideSortedCharList
287     * @depends      testIntlExtensionAvailability
288     * @param string $lang
289     * @param string $list
290     */
291    public function testASortFnUrl($lang, $list)
292    {
293        global $conf;
294        $conf['fnencode'] = 'url';
295        $conf['lang'] = $lang;
296
297        $sorted = explode('+', urlencode($list));
298        $keys = array_keys($sorted);
299        shuffle($keys);
300        foreach ($keys as $key) {
301            $random[$key] = $sorted[$key];
302        }
303        Sort::asortFN($random);
304        $this->assertEquals(array_values($random), array_values($sorted));
305        $this->assertEquals(array_keys($random), array_keys($sorted));
306    }
307
308    /**
309     * @dataProvider provideSortedCharList
310     * @depends      testIntlExtensionAvailability
311     * @param string $lang
312     * @param string $list
313     */
314    public function testASortFnSafe($lang, $list)
315    {
316        global $conf;
317        $conf['fnencode'] = 'safe';
318        $conf['lang'] = $lang;
319
320        $sorted = explode(' ', $list);
321        foreach (array_keys($sorted) as $key) {
322            $sorted[$key] = SafeFN::encode($sorted[$key]);
323        }
324        $keys = array_keys($sorted);
325        shuffle($keys);
326        foreach ($keys as $key) {
327            $random[$key] = $sorted[$key];
328        }
329        Sort::asortFN($random);
330        $this->assertEquals(array_values($random), array_values($sorted));
331        $this->assertEquals(array_keys($random), array_keys($sorted));
332    }
333
334    /**
335     * @dataProvider provideSortedCharList
336     * @depends      testIntlExtensionAvailability
337     * @param string $lang
338     * @param string $list
339     */
340    public function testASortFnUtf8($lang, $list)
341    {
342        global $conf;
343        $conf['fnencode'] = 'utf-8';
344        $conf['lang'] = $lang;
345
346        $sorted = explode(' ', $list);
347        $keys = array_keys($sorted);
348        shuffle($keys);
349        foreach ($keys as $key) {
350            $random[$key] = $sorted[$key];
351        }
352        Sort::asortFN($random);
353        $this->assertEquals(array_values($random), array_values($sorted));
354        $this->assertEquals(array_keys($random), array_keys($sorted));
355    }
356}
357