xref: /dokuwiki/_test/tests/Parsing/ModeRegistryTest.php (revision 47a02a102092be9e1e6f1ddaf158bdfffdb13d4f)
1<?php
2
3namespace dokuwiki\test\Parsing;
4
5use dokuwiki\Parsing\Handler;
6use dokuwiki\Parsing\ModeRegistry;
7use dokuwiki\Parsing\Parser;
8use dokuwiki\Parsing\ParserMode\AbstractMode;
9
10class ModeRegistryTest extends \DokuWikiTest
11{
12    /** @var ModeRegistry */
13    private $registry;
14
15    function setUp(): void
16    {
17        parent::setUp();
18        global $conf;
19        $this->registry = new ModeRegistry($conf['syntax']);
20    }
21
22    function testGetModesPublishesGlobalMirror()
23    {
24        // The deprecated global mirror is published when the mode list is built.
25        global $PARSER_MODES;
26        (new ModeRegistry('dw'))->getModes();
27        $this->assertIsArray($PARSER_MODES);
28        $this->assertArrayHasKey('container', $PARSER_MODES);
29        $this->assertArrayHasKey('formatting', $PARSER_MODES);
30        $this->assertArrayHasKey('substition', $PARSER_MODES);
31        $this->assertArrayHasKey('protected', $PARSER_MODES);
32        $this->assertArrayHasKey('disabled', $PARSER_MODES);
33        $this->assertArrayHasKey('paragraphs', $PARSER_MODES);
34        $this->assertArrayHasKey('baseonly', $PARSER_MODES);
35    }
36
37    function testGlobalMirrorMatchesInstanceTaxonomy()
38    {
39        // After the mode list is built, the global mirror equals the instance's
40        // taxonomy (defaults + plugin modes).
41        global $PARSER_MODES;
42        $registry = new ModeRegistry('dw');
43        $registry->getModes();
44        $this->assertSame($registry->getCategories(), $PARSER_MODES);
45    }
46
47    function testGetSyntaxReturnsConstructorArgument()
48    {
49        $this->assertSame('md+dw', (new ModeRegistry('md+dw'))->getSyntax());
50    }
51
52    function testGetCategories()
53    {
54        $cats = $this->registry->getCategories();
55        $this->assertArrayHasKey('container', $cats);
56        $this->assertArrayHasKey('formatting', $cats);
57        $this->assertArrayHasKey('baseonly', $cats);
58    }
59
60    function testGetModesForSingleCategory()
61    {
62        $modes = $this->registry->getModesForCategories([ModeRegistry::CATEGORY_CONTAINER]);
63        $this->assertContains('listblock', $modes);
64        $this->assertContains('table', $modes);
65        $this->assertContains('gfm_quote', $modes);
66        $this->assertContains('gfm_hr', $modes);
67    }
68
69    function testGetModesForMultipleCategories()
70    {
71        $modes = $this->registry->getModesForCategories([
72            ModeRegistry::CATEGORY_CONTAINER,
73            ModeRegistry::CATEGORY_BASEONLY,
74        ]);
75        $this->assertContains('listblock', $modes);
76        $this->assertContains('header', $modes);
77    }
78
79    function testGetModesForCategoriesDeduplicates()
80    {
81        $modes = $this->registry->getModesForCategories([
82            ModeRegistry::CATEGORY_CONTAINER,
83            ModeRegistry::CATEGORY_CONTAINER,
84        ]);
85        $counts = array_count_values($modes);
86        foreach ($counts as $count) {
87            $this->assertEquals(1, $count);
88        }
89    }
90
91    function testGetModesForUnknownCategoryReturnsEmpty()
92    {
93        $modes = $this->registry->getModesForCategories(['nonexistent']);
94        $this->assertSame([], $modes);
95    }
96
97    function testRegisterMode()
98    {
99        $this->registry->registerMode(ModeRegistry::CATEGORY_CONTAINER, 'testmode');
100        $this->assertContains(
101            'testmode',
102            $this->registry->getModesForCategories([ModeRegistry::CATEGORY_CONTAINER])
103        );
104    }
105
106    function testRegisterModeIsPerInstance()
107    {
108        // Registering on one registry must not leak into another.
109        $this->registry->registerMode(ModeRegistry::CATEGORY_CONTAINER, 'leaktest');
110        $other = new ModeRegistry('dw');
111        $this->assertNotContains(
112            'leaktest',
113            $other->getModesForCategories([ModeRegistry::CATEGORY_CONTAINER])
114        );
115    }
116
117    function testGetModesReturnsSortedArray()
118    {
119        $modes = $this->registry->getModes();
120        $this->assertNotEmpty($modes);
121
122        $sortValues = array_column($modes, 'sort');
123        $sorted = $sortValues;
124        sort($sorted);
125        $this->assertSame($sorted, $sortValues);
126    }
127
128    function testGetModesContainsExpectedKeys()
129    {
130        $modes = $this->registry->getModes();
131        foreach ($modes as $entry) {
132            $this->assertArrayHasKey('sort', $entry);
133            $this->assertArrayHasKey('mode', $entry);
134            $this->assertArrayHasKey('obj', $entry);
135            $this->assertIsInt($entry['sort']);
136            $this->assertIsString($entry['mode']);
137            $this->assertInstanceOf(AbstractMode::class, $entry['obj']);
138        }
139    }
140
141    function testGetModesContainsBuiltinModes()
142    {
143        $modes = (new ModeRegistry('dw'))->getModes();
144        $modeNames = array_column($modes, 'mode');
145        $this->assertContains('strong', $modeNames);
146        $this->assertContains('header', $modeNames);
147        $this->assertContains('listblock', $modeNames);
148        $this->assertContains('eol', $modeNames);
149        $this->assertContains('smiley', $modeNames);
150        $this->assertContains('acronym', $modeNames);
151        $this->assertContains('entity', $modeNames);
152    }
153
154    function testSortModes()
155    {
156        $a = ['sort' => 10, 'mode' => 'a'];
157        $b = ['sort' => 20, 'mode' => 'b'];
158        $this->assertLessThan(0, ModeRegistry::sortModes($a, $b));
159        $this->assertGreaterThan(0, ModeRegistry::sortModes($b, $a));
160        $this->assertEquals(0, ModeRegistry::sortModes($a, $a));
161    }
162
163    function testBlockEolModesEmptyByDefault()
164    {
165        $this->assertSame([], $this->registry->getBlockEolModes());
166    }
167
168    function testRegisterBlockEolMode()
169    {
170        $this->registry->registerBlockEolMode('listblock');
171        $this->registry->registerBlockEolMode('table');
172        $this->assertSame(['listblock', 'table'], $this->registry->getBlockEolModes());
173    }
174
175    function testBlockEolModesArePerRegistry()
176    {
177        $this->registry->registerBlockEolMode('listblock');
178        $fresh = new ModeRegistry('dw');
179        $this->assertSame([], $fresh->getBlockEolModes());
180    }
181
182    /**
183     * The default syntax setting must produce the exact same mode set as before
184     * the syntax setting was introduced (no-op guarantee).
185     */
186    function testGetModesDefaultSyntaxMatchesLegacy()
187    {
188        $modes = (new ModeRegistry('dw'))->getModes();
189        $modeNames = array_column($modes, 'mode');
190
191        // All original built-in modes must be present (with `quote`
192        // and `hr` replaced by the unified `gfm_quote` and `gfm_hr`
193        // that cover both DW and GFM dialects).
194        $expected = [
195            'listblock', 'preformatted', 'notoc', 'nocache',
196            'header', 'table', 'linebreak', 'footnote',
197            'gfm_hr', 'unformatted', 'code', 'file', 'gfm_quote',
198            'internallink', 'rss', 'media', 'externallink',
199            'emaillink', 'windowssharelink', 'eol',
200            'strong', 'emphasis', 'underline', 'monospace',
201            'subscript', 'superscript', 'deleted',
202            'smiley', 'acronym', 'entity',
203        ];
204        foreach ($expected as $mode) {
205            $this->assertContains($mode, $modeNames, "Mode '$mode' missing in dw syntax setting");
206        }
207    }
208
209    /** DW-only modes must be absent when syntax is 'md' */
210    function testGetModesDwModesSkippedInMarkdownOnly()
211    {
212        $modes = (new ModeRegistry('md'))->getModes();
213        $modeNames = array_column($modes, 'mode');
214
215        $dwOnly = [
216            'emphasis', 'deleted', 'code', 'header',
217            'linebreak', 'internallink', 'media', 'listblock', 'table',
218            'monospace', 'unformatted', 'file',
219        ];
220        foreach ($dwOnly as $mode) {
221            $this->assertNotContains($mode, $modeNames, "DW mode '$mode' should not load in md-only mode");
222        }
223    }
224
225    /** Always-loaded modes must still be present in md-only mode */
226    function testGetModesAlwaysModesPresentInMarkdownOnly()
227    {
228        $modes = (new ModeRegistry('md'))->getModes();
229        $modeNames = array_column($modes, 'mode');
230
231        $always = [
232            'strong', 'subscript', 'superscript',
233            'footnote', 'eol', 'preformatted',
234            'gfm_quote', 'gfm_hr', 'externallink', 'emaillink', 'windowssharelink',
235            'notoc', 'nocache', 'rss',
236            'smiley', 'acronym', 'entity',
237        ];
238        foreach ($always as $mode) {
239            $this->assertContains($mode, $modeNames, "Always-loaded mode '$mode' missing in md syntax setting");
240        }
241    }
242
243    /** In mixed modes, DW modes must still load (except those that are
244     * preference-gated — see provideModeLoadingCases for the per-mode rules) */
245    function testGetModesMixedModesLoadDwModes()
246    {
247        // DW modes that load in both dw+md and md+dw (no MD-side conflict)
248        $dwAlways = [
249            'emphasis', 'deleted', 'code', 'header',
250            'linebreak', 'internallink', 'media', 'table',
251            'monospace', 'unformatted', 'file',
252        ];
253
254        foreach (['dw+md', 'md+dw'] as $syntax) {
255            $modes = (new ModeRegistry($syntax))->getModes();
256            $modeNames = array_column($modes, 'mode');
257
258            foreach ($dwAlways as $mode) {
259                $this->assertContains($mode, $modeNames, "DW mode '$mode' missing in '$syntax' syntax setting");
260            }
261        }
262    }
263
264    /**
265     * Two registries built with different syntaxes in the same request must
266     * produce different mode lists — the guarantee that the registry is a
267     * per-parse value, not shared global state.
268     */
269    function testRegistriesWithDifferentSyntaxesDiffer()
270    {
271        $dw = array_column((new ModeRegistry('dw'))->getModes(), 'mode');
272        $md = array_column((new ModeRegistry('md'))->getModes(), 'mode');
273
274        $this->assertContains('internallink', $dw);
275        $this->assertNotContains('internallink', $md);
276        $this->assertContains('gfm_emphasis', $md);
277        $this->assertNotContains('gfm_emphasis', $dw);
278    }
279
280    function testAcquireSubParserReturnsParser()
281    {
282        $parser = $this->registry->acquireSubParser();
283        $this->assertInstanceOf(Parser::class, $parser);
284        $this->registry->releaseSubParser();
285    }
286
287    function testAcquireReleaseAcquireReturnsSameInstance()
288    {
289        // Sequential acquire/release pairs on the same key reuse the
290        // pool slot — the second acquire gets the same instance because
291        // it is no longer in use.
292        $first = $this->registry->acquireSubParser();
293        $this->registry->releaseSubParser();
294        $second = $this->registry->acquireSubParser();
295        $this->registry->releaseSubParser();
296        $this->assertSame($first, $second);
297    }
298
299    function testNestedAcquireReturnsDifferentInstance()
300    {
301        // While one parser is checked out for a given exclusion key, a
302        // second acquire on the same key must hand back a different
303        // instance — the pool grows on demand to support re-entrancy.
304        $outer = $this->registry->acquireSubParser();
305        $inner = $this->registry->acquireSubParser();
306        try {
307            $this->assertNotSame($outer, $inner);
308        } finally {
309            $this->registry->releaseSubParser();
310            $this->registry->releaseSubParser();
311        }
312    }
313
314    function testWithSubParserReleasesEvenOnException()
315    {
316        try {
317            $this->registry->withSubParser([], [], static function () {
318                throw new \RuntimeException('boom');
319            });
320        } catch (\RuntimeException) {
321            // expected
322        }
323        // After the throw, a fresh acquire on the same key must reuse
324        // the pool slot — proving the release ran in the finally clause.
325        $first = $this->registry->acquireSubParser([], []);
326        $this->registry->releaseSubParser([], []);
327        $second = $this->registry->acquireSubParser([], []);
328        $this->registry->releaseSubParser([], []);
329        $this->assertSame($first, $second);
330    }
331
332    function testAcquireSubParserExcludesBaseonlyByDefault()
333    {
334        $registry = new ModeRegistry('md');
335
336        $parser = $registry->acquireSubParser();
337        try {
338            $parser->parse("# A header\n");
339            // gfm_header would emit `header` and `section_open`; both absent here
340            $names = array_column($parser->getHandler()->calls, 0);
341            $this->assertNotContains('header', $names);
342            $this->assertNotContains('section_open', $names);
343        } finally {
344            $registry->releaseSubParser();
345        }
346    }
347
348    function testAcquireSubParserHonoursCustomExclusions()
349    {
350        $registry = new ModeRegistry('md');
351
352        // With FORMATTING also excluded, gfm_emphasis is gone and `*foo*` stays literal
353        $excludes = [
354            ModeRegistry::CATEGORY_BASEONLY,
355            ModeRegistry::CATEGORY_FORMATTING,
356        ];
357        $parser = $registry->acquireSubParser($excludes);
358        try {
359            $parser->parse("*foo*\n");
360            $names = array_column($parser->getHandler()->calls, 0);
361            $this->assertNotContains('emphasis_open', $names);
362        } finally {
363            $registry->releaseSubParser($excludes);
364        }
365    }
366
367    function testSubParserPoolIsPerRegistry()
368    {
369        $first = $this->registry->acquireSubParser();
370        $this->registry->releaseSubParser();
371        $other = new ModeRegistry('dw');
372        $second = $other->acquireSubParser();
373        $other->releaseSubParser();
374        $this->assertNotSame($first, $second);
375    }
376
377    function testAcquireSubParserDoesNotClobberMainParserModes()
378    {
379        // Wire the main parser up the way real callers do: addMode() attaches
380        // the main parser's lexer to each mode. The sub-parser must then clone
381        // these modes so its own addMode() does not overwrite those references
382        // and break the main parse.
383        $main = $this->registry->getModes();
384        $mainParser = new Parser(new Handler($this->registry), $this->registry);
385        foreach ($main as $m) {
386            $mainParser->addMode($m['mode'], $m['obj']);
387        }
388
389        $mainLexers = [];
390        foreach ($main as $m) {
391            $this->assertNotNull(
392                $m['obj']->getLexer(),
393                "precondition: main mode '{$m['mode']}' must have a Lexer attached"
394            );
395            $mainLexers[$m['mode']] = $m['obj']->getLexer();
396        }
397
398        $this->registry->acquireSubParser();
399        $this->registry->releaseSubParser();
400
401        foreach ($main as $m) {
402            $this->assertSame(
403                $mainLexers[$m['mode']],
404                $m['obj']->getLexer(),
405                "sub-parser must not clobber main mode '{$m['mode']}'->Lexer"
406            );
407        }
408    }
409
410    /**
411     * Verifies that each mode is loaded in the expected combinations of
412     * `$conf['syntax']`. One data set per (mode, syntax) pair.
413     *
414     * Add new mode-gating rules to {@see provideModeLoadingCases} — each
415     * entry lists the four syntax settings and whether the mode should be
416     * loaded there.
417     *
418     * @dataProvider provideModeLoadingCases
419     */
420    function testModeLoadingBySyntax(string $mode, string $syntax, bool $shouldLoad): void
421    {
422        $modeNames = array_column((new ModeRegistry($syntax))->getModes(), 'mode');
423
424        if ($shouldLoad) {
425            $this->assertContains($mode, $modeNames, "$mode must load in '$syntax'");
426        } else {
427            $this->assertNotContains($mode, $modeNames, "$mode must NOT load in '$syntax'");
428        }
429    }
430
431    /**
432     * Data provider for {@see testModeLoadingBySyntax}.
433     *
434     * Declares, per parser mode, whether it should be loaded in each of the
435     * four `$conf['syntax']` settings (`dw`, `md`, `dw+md`, `md+dw`).
436     * Entries are expanded into one data set per (mode, syntax) pair so
437     * PHPUnit reports failures with a specific label.
438     *
439     * Five gating categories are represented:
440     *
441     * - **Always**: loaded unconditionally (no syntax-specific counterpart
442     *   or conflict). Covers core formatting, paragraphs, and data-driven
443     *   modes (smileys, acronyms, entities).
444     * - **DW-always**: loaded whenever DokuWiki is part of the syntax. Used
445     *   for features that have a Markdown counterpart but no delimiter
446     *   conflict (e.g. `**bold**` for emphasis).
447     * - **DW-preferred**: loaded only when DokuWiki is the primary syntax.
448     *   Used when the delimiter conflicts with a Markdown mode in MD-
449     *   preferred settings (e.g. `__` clashes with GFM strong).
450     * - **MD-always**: mirror — loaded whenever Markdown is part of the
451     *   syntax. Used when the delimiter has no DokuWiki counterpart (e.g.
452     *   `*` for emphasis).
453     * - **MD-preferred**: mirror — loaded only when Markdown is primary.
454     *   Used when the delimiter conflicts with a DokuWiki mode in DW-
455     *   preferred settings (e.g. `_`, `__`, `___` clash with Underline).
456     *
457     * Add a new line to the `$rules` table to register additional mode-
458     * gating rules.
459     *
460     * @return array<string, array{0: string, 1: string, 2: bool}> map from
461     *     test-case label to [mode name, syntax setting, should-load]
462     */
463    public static function provideModeLoadingCases(): array
464    {
465        $rules = [
466            // Always-loaded (unconditional — no syntax-specific counterpart)
467            'strong'                         => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
468            'subscript'                      => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
469            'superscript'                    => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
470            'footnote'                       => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
471            'eol'                            => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
472            'preformatted'                   => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
473            'gfm_quote'                      => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
474            'gfm_hr'                         => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
475            'externallink'                   => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
476            'emaillink'                      => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
477            'windowssharelink'               => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
478            'notoc'                          => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
479            'nocache'                        => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
480            'rss'                            => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
481            'smiley'                         => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
482            'acronym'                        => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
483            'entity'                         => ['dw' => true,  'md' => true,  'dw+md' => true,  'md+dw' => true ],
484            // DW-always (features with MD counterparts but no delimiter clash)
485            'emphasis'                       => ['dw' => true,  'md' => false, 'dw+md' => true,  'md+dw' => true ],
486            'deleted'                        => ['dw' => true,  'md' => false, 'dw+md' => true,  'md+dw' => true ],
487            'code'                           => ['dw' => true,  'md' => false, 'dw+md' => true,  'md+dw' => true ],
488            'header'                         => ['dw' => true,  'md' => false, 'dw+md' => true,  'md+dw' => true ],
489            'linebreak'                      => ['dw' => true,  'md' => false, 'dw+md' => true,  'md+dw' => true ],
490            'internallink'                   => ['dw' => true,  'md' => false, 'dw+md' => true,  'md+dw' => true ],
491            'media'                          => ['dw' => true,  'md' => false, 'dw+md' => true,  'md+dw' => true ],
492            'listblock'                      => ['dw' => true,  'md' => false, 'dw+md' => true,  'md+dw' => false],
493            'table'                          => ['dw' => true,  'md' => false, 'dw+md' => true,  'md+dw' => true ],
494            'monospace'                      => ['dw' => true,  'md' => false, 'dw+md' => true,  'md+dw' => true ],
495            'unformatted'                    => ['dw' => true,  'md' => false, 'dw+md' => true,  'md+dw' => true ],
496            'file'                           => ['dw' => true,  'md' => false, 'dw+md' => true,  'md+dw' => true ],
497            // MD-always (`*` / `~~` have no conflicting DW counterpart)
498            'gfm_emphasis'                   => ['dw' => false, 'md' => true,  'dw+md' => true,  'md+dw' => true ],
499            'gfm_emphasis_strong'            => ['dw' => false, 'md' => true,  'dw+md' => true,  'md+dw' => true ],
500            'gfm_deleted'                    => ['dw' => false, 'md' => true,  'dw+md' => true,  'md+dw' => true ],
501            'gfm_backtick_single'            => ['dw' => false, 'md' => true,  'dw+md' => true,  'md+dw' => true ],
502            'gfm_backtick_double'            => ['dw' => false, 'md' => true,  'dw+md' => true,  'md+dw' => true ],
503            'gfm_header'                     => ['dw' => false, 'md' => true,  'dw+md' => true,  'md+dw' => true ],
504            'gfm_link'                       => ['dw' => false, 'md' => true,  'dw+md' => true,  'md+dw' => true ],
505            'gfm_media'                      => ['dw' => false, 'md' => true,  'dw+md' => true,  'md+dw' => true ],
506            'gfm_code'                       => ['dw' => false, 'md' => true,  'dw+md' => true,  'md+dw' => true ],
507            'gfm_file'                       => ['dw' => false, 'md' => true,  'dw+md' => true,  'md+dw' => true ],
508            'gfm_table'                      => ['dw' => false, 'md' => true,  'dw+md' => true,  'md+dw' => true ],
509            'gfm_escape'                     => ['dw' => false, 'md' => true,  'dw+md' => true,  'md+dw' => true ],
510            'gfm_linebreak'                  => ['dw' => false, 'md' => true,  'dw+md' => true,  'md+dw' => true ],
511            // MD-preferred (`_`, `__`, `___` clash with Underline in DW)
512            'gfm_emphasis_underscore'        => ['dw' => false, 'md' => true,  'dw+md' => false, 'md+dw' => true ],
513            'gfm_strong_underscore'          => ['dw' => false, 'md' => true,  'dw+md' => false, 'md+dw' => true ],
514            'gfm_emphasis_strong_underscore' => ['dw' => false, 'md' => true,  'dw+md' => false, 'md+dw' => true ],
515            'gfm_listblock'                  => ['dw' => false, 'md' => true,  'dw+md' => false, 'md+dw' => true ],
516            // DW-preferred (Underline's `__` clashes with GFM strong)
517            'underline'                      => ['dw' => true,  'md' => false, 'dw+md' => true,  'md+dw' => false],
518        ];
519
520        $cases = [];
521        foreach ($rules as $mode => $bySyntax) {
522            foreach ($bySyntax as $syntax => $shouldLoad) {
523                $cases["$mode in $syntax"] = [$mode, $syntax, $shouldLoad];
524            }
525        }
526        return $cases;
527    }
528}
529