xref: /dokuwiki/inc/Parsing/ModeRegistry.php (revision 3e6baeff313fa406e4d4b5dd2e5ab85ec7d7816d)
1<?php
2
3namespace dokuwiki\Parsing;
4
5use dokuwiki\Extension\PluginInterface;
6use dokuwiki\Extension\SyntaxPlugin;
7use dokuwiki\Parsing\ParserMode\Acronym;
8use dokuwiki\Parsing\ParserMode\ModeInterface;
9use dokuwiki\Parsing\ParserMode\Camelcaselink;
10use dokuwiki\Parsing\ParserMode\Entity;
11use dokuwiki\Parsing\ParserMode\Smiley;
12use dokuwiki\Parsing\Handler;
13use dokuwiki\Parsing\Parser;
14
15/**
16 * Central registry for parser mode categories and mode instantiation.
17 *
18 * The underlying data is kept in the global $PARSER_MODES array because
19 * third-party plugins read and write it directly at runtime (e.g. to register
20 * their mode in a category). All methods in this class operate on that global
21 * so changes are visible to both old and new code.
22 */
23class ModeRegistry
24{
25    // Category constants (preserving the historical 'substition' typo)
26    public const CATEGORY_CONTAINER   = 'container';
27    public const CATEGORY_BASEONLY    = 'baseonly';
28    public const CATEGORY_FORMATTING  = 'formatting';
29    public const CATEGORY_SUBSTITION  = 'substition';
30    public const CATEGORY_PROTECTED   = 'protected';
31    public const CATEGORY_DISABLED    = 'disabled';
32    public const CATEGORY_PARAGRAPHS  = 'paragraphs';
33
34    /** @var array{sort: int, mode: string, obj: ModeInterface}[]|null */
35    protected ?array $modes = null;
36
37    /** @var array<string, array{parsers: Parser[], inUse: int}> Pool of sub-parsers per exclusion-set identifier. */
38    protected array $subParsers = [];
39
40    /** @var string[] Modes that handle their own line endings (skip EOL connection) */
41    protected array $blockEolModes = [];
42
43    protected static ?self $instance = null;
44
45    /**
46     * Get the singleton instance of the ModeRegistry.
47     *
48     * @return self
49     */
50    public static function getInstance(): self
51    {
52        if (!self::$instance instanceof self) {
53            self::$instance = new self();
54        }
55        return self::$instance;
56    }
57
58    /**
59     * Reset the singleton instance.
60     *
61     * This is mainly useful for testing to force re-initialization.
62     *
63     * @return void
64     */
65    public static function reset(): void
66    {
67        self::$instance = null;
68    }
69
70    /**
71     * Constructor. Initializes the global $PARSER_MODES array with the default mode categories.
72     */
73    protected function __construct()
74    {
75        global $PARSER_MODES;
76        $PARSER_MODES = [
77            self::CATEGORY_CONTAINER  => ['listblock', 'table', 'gfm_listblock', 'gfm_table', 'gfm_quote', 'gfm_hr'],
78            self::CATEGORY_BASEONLY   => ['header', 'gfm_header'],
79            self::CATEGORY_FORMATTING => [
80                'strong', 'emphasis', 'underline', 'monospace',
81                'subscript', 'superscript', 'deleted', 'footnote',
82                'gfm_emphasis', 'gfm_emphasis_underscore', 'gfm_strong_underscore',
83                'gfm_emphasis_strong', 'gfm_emphasis_strong_underscore',
84                'gfm_deleted', 'gfm_backtick_single', 'gfm_backtick_double',
85            ],
86            self::CATEGORY_SUBSTITION => [
87                'acronym', 'smiley', 'wordblock', 'entity',
88                'camelcaselink', 'internallink', 'media', 'externallink',
89                'linebreak', 'emaillink', 'windowssharelink', 'filelink',
90                'notoc', 'nocache', 'multiplyentity', 'quotes', 'rss',
91                'gfm_link', 'gfm_media', 'gfm_escape',
92            ],
93            self::CATEGORY_PROTECTED  => ['preformatted', 'code', 'file', 'gfm_code', 'gfm_file'],
94            self::CATEGORY_DISABLED   => ['unformatted'],
95            self::CATEGORY_PARAGRAPHS => ['eol'],
96        ];
97    }
98
99    /**
100     * Get all mode names in the given categories.
101     *
102     * @param string[] $categories One or more CATEGORY_* constants
103     * @return string[] Unique list of mode names
104     */
105    public function getModesForCategories(array $categories): array
106    {
107        global $PARSER_MODES;
108        $modes = [];
109        foreach ($categories as $cat) {
110            if (isset($PARSER_MODES[$cat])) {
111                $modes = array_merge($modes, $PARSER_MODES[$cat]);
112            }
113        }
114        return array_unique($modes);
115    }
116
117    /**
118     * Get the raw categories array.
119     *
120     * @return array<string, string[]> Category name => list of mode names
121     */
122    public function getCategories(): array
123    {
124        global $PARSER_MODES;
125        return $PARSER_MODES;
126    }
127
128    /**
129     * Register a mode in a category.
130     *
131     * @param string $category One of the CATEGORY_* constants
132     * @param string $modeName The mode name to register
133     * @return void
134     */
135    public function registerMode(string $category, string $modeName): void
136    {
137        global $PARSER_MODES;
138        $PARSER_MODES[$category][] = $modeName;
139        $this->modes = null; // invalidate cached mode list
140    }
141
142    /**
143     * Register a mode that handles its own line endings.
144     * Modes registered here will be skipped by Eol's connectTo().
145     *
146     * @param string $mode The mode name
147     * @return void
148     */
149    public function registerBlockEolMode(string $mode): void
150    {
151        $this->blockEolModes[] = $mode;
152    }
153
154    /**
155     * Get all modes that handle their own line endings.
156     *
157     * @return string[]
158     */
159    public function getBlockEolModes(): array
160    {
161        return $this->blockEolModes;
162    }
163
164    /**
165     * Whether DokuWiki is the preferred syntax (`dokuwiki` or `dw+md`).
166     *
167     * Modes that have to choose between DW-flavored and MD-flavored
168     * behavior at runtime read this flag. Compare with isMdPreferred()
169     * — exactly one of the two is true for any valid `$conf['syntax']`
170     * setting.
171     */
172    public function isDwPreferred(): bool
173    {
174        global $conf;
175        return in_array($conf['syntax'], ['dokuwiki', 'dw+md'], true);
176    }
177
178    /**
179     * Whether Markdown is the preferred syntax (`markdown` or `md+dw`).
180     */
181    public function isMdPreferred(): bool
182    {
183        global $conf;
184        return in_array($conf['syntax'], ['markdown', 'md+dw'], true);
185    }
186
187    /**
188     * Get all parser modes, fully instantiated and sorted by priority.
189     *
190     * This includes syntax plugins, built-in modes, formatting modes, and
191     * data-driven modes (smileys, acronyms, entities). Results are cached
192     * unless running in a test environment.
193     *
194     * @return array[] Each entry is ['sort' => int, 'mode' => string, 'obj' => ModeInterface]
195     */
196    public function getModes(): array
197    {
198        global $conf;
199
200        if ($this->modes !== null && !defined('DOKU_UNITTEST')) {
201            return $this->modes;
202        }
203
204        $this->modes = [];
205        $loadDw = in_array($conf['syntax'], ['dokuwiki', 'dw+md', 'md+dw']);
206        $loadMd = in_array($conf['syntax'], ['markdown', 'dw+md', 'md+dw']);
207
208        $this->loadPluginModes();
209        $this->loadAlwaysModes();
210        if ($loadDw) $this->loadDokuWikiModes();
211        if ($loadMd) $this->loadMarkdownModes();
212        $this->loadDataModes();
213
214        usort($this->modes, self::sortModes(...));
215        return $this->modes;
216    }
217
218    //region Sub-parser pool
219
220    /**
221     * Acquire a sub-parser for the given exclusion set.
222     *
223     * The registry maintains a pool of sub-parsers per exclusion key.
224     * Each acquire returns the next free instance from that pool;
225     * releaseSubParser must be called (with the same exclusion set)
226     * once the caller is done. If all instances in a pool are already
227     * checked out — re-entrancy on the same key — a fresh instance is
228     * built and appended to the pool. Real-world nesting for any one
229     * mode tops out at a handful of levels, so pool growth is bounded.
230     *
231     * Use this primitive when the caller wants to hold the parser
232     * across multiple parse() calls (e.g. iterating over list items).
233     * For single-shot use, prefer {@see withSubParser} so release is
234     * automatic.
235     *
236     * The returned Parser is shared infrastructure: callers must call
237     * `$parser->getHandler()->reset()` before each parse() to avoid
238     * inheriting state from a previous use.
239     *
240     * @param string[] $excludeCategories CATEGORY_* constants whose modes should be excluded
241     * @param string[] $excludeModes specific mode names to exclude in addition to category-based exclusions
242     */
243    public function acquireSubParser(
244        array $excludeCategories = [self::CATEGORY_BASEONLY],
245        array $excludeModes = []
246    ): Parser {
247        $key = $this->subParserKey($excludeCategories, $excludeModes);
248        $entry = $this->subParsers[$key] ?? ['parsers' => [], 'inUse' => 0];
249
250        if ($entry['inUse'] >= count($entry['parsers'])) {
251            $entry['parsers'][] = $this->buildSubParser($excludeCategories, $excludeModes);
252        }
253        $parser = $entry['parsers'][$entry['inUse']];
254        $entry['inUse']++;
255        $this->subParsers[$key] = $entry;
256        return $parser;
257    }
258
259    /**
260     * Release a previously-acquired sub-parser back to its pool.
261     *
262     * Should be paired with a prior {@see acquireSubParser} call for
263     * the same exclusion set. Callers must release in LIFO order with
264     * respect to other acquires on the same key — the implementation
265     * does not enforce LIFO, but out-of-order release would silently
266     * hand the same parser to two callers, so the caller is responsible
267     * for the discipline. Wrapping each acquire/release pair in a
268     * single try/finally (or using {@see withSubParser}) makes the
269     * ordering correct by construction.
270     *
271     * Throws if no acquire is outstanding for the given key — that
272     * indicates an acquire/release imbalance bug in the caller.
273     *
274     * @param string[] $excludeCategories
275     * @param string[] $excludeModes
276     * @throws \RuntimeException on release without a matching acquire
277     */
278    public function releaseSubParser(
279        array $excludeCategories = [self::CATEGORY_BASEONLY],
280        array $excludeModes = []
281    ): void {
282        $key = $this->subParserKey($excludeCategories, $excludeModes);
283        if (!isset($this->subParsers[$key]) || $this->subParsers[$key]['inUse'] <= 0) {
284            throw new \RuntimeException(
285                "releaseSubParser called without matching acquireSubParser for key '$key'"
286            );
287        }
288        $this->subParsers[$key]['inUse']--;
289    }
290
291    /**
292     * Run a callback with an exclusively-held sub-parser.
293     *
294     * Convenience wrapper around acquire/release. The parser is checked
295     * out for the duration of the callback, then released even if the
296     * callback throws. Preferred shape for single-shot sub-parses
297     * (one parse() call per acquire); use the explicit pair for cases
298     * where the parser is held across a loop or other longer scope.
299     *
300     * @template T
301     * @param string[] $excludeCategories
302     * @param string[] $excludeModes
303     * @param callable(Parser): T $fn
304     * @return T
305     */
306    public function withSubParser(
307        array $excludeCategories,
308        array $excludeModes,
309        callable $fn
310    ) {
311        $parser = $this->acquireSubParser($excludeCategories, $excludeModes);
312        try {
313            return $fn($parser);
314        } finally {
315            $this->releaseSubParser($excludeCategories, $excludeModes);
316        }
317    }
318
319    /**
320     * Build a fresh Parser preconfigured with every active mode except
321     * the ones excluded.
322     *
323     * Mode objects are cloned before being attached so that
324     * Parser::addMode()'s assignment to $Lexer does not clobber the
325     * main parser's mode references.
326     *
327     * @param string[] $excludeCategories
328     * @param string[] $excludeModes
329     */
330    protected function buildSubParser(
331        array $excludeCategories,
332        array $excludeModes
333    ): Parser {
334        $categories = $this->getCategories();
335        $excluded = $excludeModes;
336        foreach ($excludeCategories as $cat) {
337            $excluded = array_merge($excluded, $categories[$cat] ?? []);
338        }
339
340        $parser = new Parser(new Handler());
341        foreach ($this->getModes() as $m) {
342            if (in_array($m['mode'], $excluded, true)) continue;
343            // Mode objects expose a single $Lexer slot which Parser::addMode()
344            // overwrites at registration time. The objects in $this->modes are
345            // already attached to the main parser's lexer; reusing them here
346            // would clobber that reference and break the main parse. Clone so
347            // the sub-parser gets its own copy with its own $Lexer slot.
348            $parser->addMode($m['mode'], clone $m['obj']);
349        }
350        return $parser;
351    }
352
353    /**
354     * Build the cache key used to identify a sub-parser exclusion set.
355     */
356    protected function subParserKey(array $excludeCategories, array $excludeModes): string
357    {
358        return implode(',', $excludeCategories) . '|' . implode(',', $excludeModes);
359    }
360
361    //endregion
362
363    //region Mode loading
364
365    /**
366     * Load syntax plugin modes and register them in their categories.
367     */
368    protected function loadPluginModes(): void
369    {
370        global $PARSER_MODES;
371
372        $plugins = plugin_list('syntax');
373        foreach ($plugins as $p) {
374            $obj = plugin_load('syntax', $p);
375            if (!$obj instanceof PluginInterface) continue;
376            $PARSER_MODES[$obj->getType()][] = "plugin_$p";
377            $this->modes[] = [
378                'sort' => $obj->getSort(),
379                'mode' => "plugin_$p",
380                'obj'  => $obj,
381            ];
382            unset($obj);
383        }
384    }
385
386    /**
387     * Load modes that have no equivalent in the other syntax.
388     * These are always active regardless of the syntax setting.
389     */
390    protected function loadAlwaysModes(): void
391    {
392        global $conf;
393
394        $modes = [
395            'strong', 'subscript', 'superscript',
396            'footnote', 'eol', 'preformatted',
397            'gfm_quote', 'gfm_hr', 'externallink', 'emaillink', 'windowssharelink',
398            'notoc', 'nocache', 'rss',
399        ];
400
401        if ($conf['typography']) {
402            $modes[] = 'quotes';
403            $modes[] = 'multiplyentity';
404        }
405
406        $this->instantiateModes($modes);
407    }
408
409    /**
410     * Load DokuWiki-specific modes for features that also exist in Markdown.
411     * Skipped when syntax is 'markdown'.
412     */
413    protected function loadDokuWikiModes(): void
414    {
415        $modes = [
416            'emphasis', 'deleted', 'code', 'header',
417            'linebreak', 'internallink', 'media', 'table',
418            'monospace', 'unformatted', 'file',
419        ];
420
421        // Underline only loads when DokuWiki is preferred. In MD-preferred
422        // modes, `__` means strong (via gfm_strong_underscore) and loading
423        // Underline here would conflict.
424        //
425        // Listblock only loads when DokuWiki is preferred. In MD-preferred
426        // modes, GfmListblock owns the `-`/`*`/`+` markers and zero-indent
427        // top-level items, which conflicts with DokuWiki's required-2-space-
428        // indent list model.
429        if ($this->isDwPreferred()) {
430            $modes[] = 'underline';
431            $modes[] = 'listblock';
432        }
433
434        $this->instantiateModes($modes);
435    }
436
437    /**
438     * Load Markdown-specific modes for features that also exist in DokuWiki.
439     * Skipped when syntax is 'dokuwiki'.
440     */
441    protected function loadMarkdownModes(): void
442    {
443        $modes = [
444            'gfm_escape',
445            'gfm_emphasis', 'gfm_emphasis_strong', 'gfm_deleted',
446            'gfm_backtick_single', 'gfm_backtick_double',
447            'gfm_header', 'gfm_link', 'gfm_media',
448            'gfm_code', 'gfm_file', 'gfm_table',
449        ];
450
451        // Underscore-based emphasis and strong only load when Markdown is
452        // preferred. In DW-preferred modes, `__` means underline and loading
453        // these would conflict.
454        //
455        // GfmListblock only loads when Markdown is preferred. In DW-preferred
456        // modes, the DokuWiki Listblock owns the `-`/`*` markers (with the
457        // 2-space indent rule); the two list models cannot co-exist.
458        if ($this->isMdPreferred()) {
459            $modes[] = 'gfm_emphasis_underscore';
460            $modes[] = 'gfm_strong_underscore';
461            $modes[] = 'gfm_emphasis_strong_underscore';
462            $modes[] = 'gfm_listblock';
463        }
464
465        $this->instantiateModes($modes);
466    }
467
468    /**
469     * Load data-driven modes that require constructor arguments
470     * (smileys, acronyms, entities) and optional config-gated modes.
471     */
472    protected function loadDataModes(): void
473    {
474        global $conf;
475
476        $obj = new Smiley(array_keys(getSmileys()));
477        $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'smiley', 'obj' => $obj];
478
479        $obj = new Acronym(array_keys(getAcronyms()));
480        $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'acronym', 'obj' => $obj];
481
482        $obj = new Entity(array_keys(getEntities()));
483        $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'entity', 'obj' => $obj];
484
485        if (!empty($conf['camelcase'])) {
486            $obj = new Camelcaselink();
487            $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'camelcaselink', 'obj' => $obj];
488        }
489    }
490
491    /**
492     * Instantiate mode classes by name and add them to the mode list.
493     *
494     * Mode names are split on `_` and each segment is PascalCased to form the
495     * class name (e.g. `gfm_emphasis_underscore` → `GfmEmphasisUnderscore`,
496     * `internallink` → `Internallink`, `strong` → `Strong`).
497     *
498     * @param string[] $modeNames
499     */
500    protected function instantiateModes(array $modeNames): void
501    {
502        foreach ($modeNames as $mode) {
503            $class = implode('', array_map('ucfirst', explode('_', $mode))); // snake_case to PascalCase
504            $class = 'dokuwiki\\Parsing\\ParserMode\\' . $class; // prepend namespace
505            $obj = new $class();
506            $this->modes[] = [
507                'sort' => $obj->getSort(),
508                'mode' => $mode,
509                'obj'  => $obj,
510            ];
511        }
512    }
513
514    //endregion
515
516    /**
517     * Callback function for usort
518     *
519     * @param array $a
520     * @param array $b
521     * @return int
522     */
523    public static function sortModes(array $a, array $b): int
524    {
525        return $a['sort'] <=> $b['sort'];
526    }
527}
528