xref: /dokuwiki/inc/Parsing/ModeRegistry.php (revision d20858669cbb910f566e0b7d1ba9da293d1b794e)
1<?php
2
3namespace dokuwiki\Parsing;
4
5use dokuwiki\Extension\PluginInterface;
6use dokuwiki\Extension\SyntaxPlugin;
7use dokuwiki\Parsing\ParserMode\Acronym;
8use dokuwiki\Parsing\ParserMode\ModeInterface;
9use dokuwiki\Parsing\ParserMode\Camelcaselink;
10use dokuwiki\Parsing\ParserMode\Entity;
11use dokuwiki\Parsing\ParserMode\Smiley;
12use dokuwiki\Parsing\Handler;
13use dokuwiki\Parsing\Parser;
14
15/**
16 * Central registry for parser mode categories and mode instantiation.
17 *
18 * The underlying data is kept in the global $PARSER_MODES array because
19 * third-party plugins read and write it directly at runtime (e.g. to register
20 * their mode in a category). All methods in this class operate on that global
21 * so changes are visible to both old and new code.
22 */
23class ModeRegistry
24{
25    // Category constants (preserving the historical 'substition' typo)
26    public const CATEGORY_CONTAINER   = 'container';
27    public const CATEGORY_BASEONLY    = 'baseonly';
28    public const CATEGORY_FORMATTING  = 'formatting';
29    public const CATEGORY_SUBSTITION  = 'substition';
30    public const CATEGORY_PROTECTED   = 'protected';
31    public const CATEGORY_DISABLED    = 'disabled';
32    public const CATEGORY_PARAGRAPHS  = 'paragraphs';
33
34    /** @var array{sort: int, mode: string, obj: ModeInterface}[]|null */
35    protected ?array $modes = null;
36
37    /** @var array<string, array{parsers: Parser[], inUse: int}> Pool of sub-parsers per exclusion-set identifier. */
38    protected array $subParsers = [];
39
40    /** @var string[] Modes that handle their own line endings (skip EOL connection) */
41    protected array $blockEolModes = [];
42
43    protected static ?self $instance = null;
44
45    /**
46     * Get the singleton instance of the ModeRegistry.
47     *
48     * @return self
49     */
50    public static function getInstance(): self
51    {
52        if (!self::$instance instanceof self) {
53            self::$instance = new self();
54        }
55        return self::$instance;
56    }
57
58    /**
59     * Reset the singleton instance.
60     *
61     * This is mainly useful for testing to force re-initialization.
62     *
63     * @return void
64     */
65    public static function reset(): void
66    {
67        self::$instance = null;
68    }
69
70    /**
71     * Constructor. Initializes the global $PARSER_MODES array with the default mode categories.
72     */
73    protected function __construct()
74    {
75        global $PARSER_MODES;
76        $PARSER_MODES = [
77            self::CATEGORY_CONTAINER  => ['listblock', 'table', 'gfm_listblock', 'gfm_table', 'gfm_quote', 'gfm_hr'],
78            self::CATEGORY_BASEONLY   => ['header', 'gfm_header'],
79            self::CATEGORY_FORMATTING => [
80                'strong', 'emphasis', 'underline', 'monospace',
81                'subscript', 'superscript', 'deleted', 'footnote',
82                'gfm_emphasis', 'gfm_emphasis_underscore', 'gfm_strong_underscore',
83                'gfm_emphasis_strong', 'gfm_emphasis_strong_underscore',
84                'gfm_deleted', 'gfm_backtick_single', 'gfm_backtick_double',
85            ],
86            self::CATEGORY_SUBSTITION => [
87                'acronym', 'smiley', 'wordblock', 'entity',
88                'camelcaselink', 'internallink', 'media', 'externallink',
89                'linebreak', 'emaillink', 'windowssharelink', 'filelink',
90                'notoc', 'nocache', 'multiplyentity', 'quotes', 'rss',
91                'gfm_link', 'gfm_media', 'gfm_escape', 'gfm_linebreak',
92                'gfm_html_entity',
93            ],
94            self::CATEGORY_PROTECTED  => ['preformatted', 'code', 'file', 'gfm_code', 'gfm_file'],
95            self::CATEGORY_DISABLED   => ['unformatted'],
96            self::CATEGORY_PARAGRAPHS => ['eol'],
97        ];
98    }
99
100    /**
101     * Get all mode names in the given categories.
102     *
103     * @param string[] $categories One or more CATEGORY_* constants
104     * @return string[] Unique list of mode names
105     */
106    public function getModesForCategories(array $categories): array
107    {
108        global $PARSER_MODES;
109        $modes = [];
110        foreach ($categories as $cat) {
111            if (isset($PARSER_MODES[$cat])) {
112                $modes = array_merge($modes, $PARSER_MODES[$cat]);
113            }
114        }
115        return array_unique($modes);
116    }
117
118    /**
119     * Get the raw categories array.
120     *
121     * @return array<string, string[]> Category name => list of mode names
122     */
123    public function getCategories(): array
124    {
125        global $PARSER_MODES;
126        return $PARSER_MODES;
127    }
128
129    /**
130     * Register a mode in a category.
131     *
132     * @param string $category One of the CATEGORY_* constants
133     * @param string $modeName The mode name to register
134     * @return void
135     */
136    public function registerMode(string $category, string $modeName): void
137    {
138        global $PARSER_MODES;
139        $PARSER_MODES[$category][] = $modeName;
140        $this->modes = null; // invalidate cached mode list
141    }
142
143    /**
144     * Register a mode that handles its own line endings.
145     * Modes registered here will be skipped by Eol's connectTo().
146     *
147     * @param string $mode The mode name
148     * @return void
149     */
150    public function registerBlockEolMode(string $mode): void
151    {
152        $this->blockEolModes[] = $mode;
153    }
154
155    /**
156     * Get all modes that handle their own line endings.
157     *
158     * @return string[]
159     */
160    public function getBlockEolModes(): array
161    {
162        return $this->blockEolModes;
163    }
164
165    /**
166     * Whether DokuWiki is the preferred syntax (`dw` or `dw+md`).
167     *
168     * Modes that have to choose between DW-flavored and MD-flavored
169     * behavior at runtime read this flag. Compare with isMdPreferred()
170     * — exactly one of the two is true for any valid `$conf['syntax']`
171     * setting.
172     */
173    public function isDwPreferred(): bool
174    {
175        global $conf;
176        return in_array($conf['syntax'], ['dw', 'dw+md'], true);
177    }
178
179    /**
180     * Whether Markdown is the preferred syntax (`md` or `md+dw`).
181     */
182    public function isMdPreferred(): bool
183    {
184        global $conf;
185        return in_array($conf['syntax'], ['md', 'md+dw'], true);
186    }
187
188    /**
189     * Get all parser modes, fully instantiated and sorted by priority.
190     *
191     * This includes syntax plugins, built-in modes, formatting modes, and
192     * data-driven modes (smileys, acronyms, entities). Results are cached
193     * unless running in a test environment.
194     *
195     * @return array[] Each entry is ['sort' => int, 'mode' => string, 'obj' => ModeInterface]
196     */
197    public function getModes(): array
198    {
199        global $conf;
200
201        if ($this->modes !== null && !defined('DOKU_UNITTEST')) {
202            return $this->modes;
203        }
204
205        $this->modes = [];
206        $loadDw = in_array($conf['syntax'], ['dw', 'dw+md', 'md+dw']);
207        $loadMd = in_array($conf['syntax'], ['md', 'dw+md', 'md+dw']);
208
209        $this->loadPluginModes();
210        $this->loadAlwaysModes();
211        if ($loadDw) $this->loadDokuWikiModes();
212        if ($loadMd) $this->loadMarkdownModes();
213        $this->loadDataModes();
214
215        usort($this->modes, self::sortModes(...));
216        return $this->modes;
217    }
218
219    //region Sub-parser pool
220
221    /**
222     * Acquire a sub-parser for the given exclusion set.
223     *
224     * The registry maintains a pool of sub-parsers per exclusion key.
225     * Each acquire returns the next free instance from that pool;
226     * releaseSubParser must be called (with the same exclusion set)
227     * once the caller is done. If all instances in a pool are already
228     * checked out — re-entrancy on the same key — a fresh instance is
229     * built and appended to the pool. Real-world nesting for any one
230     * mode tops out at a handful of levels, so pool growth is bounded.
231     *
232     * Use this primitive when the caller wants to hold the parser
233     * across multiple parse() calls (e.g. iterating over list items).
234     * For single-shot use, prefer {@see withSubParser} so release is
235     * automatic.
236     *
237     * The returned Parser is shared infrastructure: callers must call
238     * `$parser->getHandler()->reset()` before each parse() to avoid
239     * inheriting state from a previous use.
240     *
241     * @param string[] $excludeCategories CATEGORY_* constants whose modes should be excluded
242     * @param string[] $excludeModes specific mode names to exclude in addition to category-based exclusions
243     */
244    public function acquireSubParser(
245        array $excludeCategories = [self::CATEGORY_BASEONLY],
246        array $excludeModes = []
247    ): Parser {
248        $key = $this->subParserKey($excludeCategories, $excludeModes);
249        $entry = $this->subParsers[$key] ?? ['parsers' => [], 'inUse' => 0];
250
251        if ($entry['inUse'] >= count($entry['parsers'])) {
252            $entry['parsers'][] = $this->buildSubParser($excludeCategories, $excludeModes);
253        }
254        $parser = $entry['parsers'][$entry['inUse']];
255        $entry['inUse']++;
256        $this->subParsers[$key] = $entry;
257        return $parser;
258    }
259
260    /**
261     * Release a previously-acquired sub-parser back to its pool.
262     *
263     * Should be paired with a prior {@see acquireSubParser} call for
264     * the same exclusion set. Callers must release in LIFO order with
265     * respect to other acquires on the same key — the implementation
266     * does not enforce LIFO, but out-of-order release would silently
267     * hand the same parser to two callers, so the caller is responsible
268     * for the discipline. Wrapping each acquire/release pair in a
269     * single try/finally (or using {@see withSubParser}) makes the
270     * ordering correct by construction.
271     *
272     * Throws if no acquire is outstanding for the given key — that
273     * indicates an acquire/release imbalance bug in the caller.
274     *
275     * @param string[] $excludeCategories
276     * @param string[] $excludeModes
277     * @throws \RuntimeException on release without a matching acquire
278     */
279    public function releaseSubParser(
280        array $excludeCategories = [self::CATEGORY_BASEONLY],
281        array $excludeModes = []
282    ): void {
283        $key = $this->subParserKey($excludeCategories, $excludeModes);
284        if (!isset($this->subParsers[$key]) || $this->subParsers[$key]['inUse'] <= 0) {
285            throw new \RuntimeException(
286                "releaseSubParser called without matching acquireSubParser for key '$key'"
287            );
288        }
289        $this->subParsers[$key]['inUse']--;
290    }
291
292    /**
293     * Run a callback with an exclusively-held sub-parser.
294     *
295     * Convenience wrapper around acquire/release. The parser is checked
296     * out for the duration of the callback, then released even if the
297     * callback throws. Preferred shape for single-shot sub-parses
298     * (one parse() call per acquire); use the explicit pair for cases
299     * where the parser is held across a loop or other longer scope.
300     *
301     * @template T
302     * @param string[] $excludeCategories
303     * @param string[] $excludeModes
304     * @param callable(Parser): T $fn
305     * @return T
306     */
307    public function withSubParser(
308        array $excludeCategories,
309        array $excludeModes,
310        callable $fn
311    ) {
312        $parser = $this->acquireSubParser($excludeCategories, $excludeModes);
313        try {
314            return $fn($parser);
315        } finally {
316            $this->releaseSubParser($excludeCategories, $excludeModes);
317        }
318    }
319
320    /**
321     * Build a fresh Parser preconfigured with every active mode except
322     * the ones excluded.
323     *
324     * Mode objects are cloned before being attached so that
325     * Parser::addMode()'s assignment to $Lexer does not clobber the
326     * main parser's mode references.
327     *
328     * @param string[] $excludeCategories
329     * @param string[] $excludeModes
330     */
331    protected function buildSubParser(
332        array $excludeCategories,
333        array $excludeModes
334    ): Parser {
335        $categories = $this->getCategories();
336        $excluded = $excludeModes;
337        foreach ($excludeCategories as $cat) {
338            $excluded = array_merge($excluded, $categories[$cat] ?? []);
339        }
340
341        $parser = new Parser(new Handler());
342        foreach ($this->getModes() as $m) {
343            if (in_array($m['mode'], $excluded, true)) continue;
344            // Mode objects expose a single $Lexer slot which Parser::addMode()
345            // overwrites at registration time. The objects in $this->modes are
346            // already attached to the main parser's lexer; reusing them here
347            // would clobber that reference and break the main parse. Clone so
348            // the sub-parser gets its own copy with its own $Lexer slot.
349            $parser->addMode($m['mode'], clone $m['obj']);
350        }
351        return $parser;
352    }
353
354    /**
355     * Build the cache key used to identify a sub-parser exclusion set.
356     */
357    protected function subParserKey(array $excludeCategories, array $excludeModes): string
358    {
359        return implode(',', $excludeCategories) . '|' . implode(',', $excludeModes);
360    }
361
362    //endregion
363
364    //region Mode loading
365
366    /**
367     * Load syntax plugin modes and register them in their categories.
368     */
369    protected function loadPluginModes(): void
370    {
371        global $PARSER_MODES;
372
373        $plugins = plugin_list('syntax');
374        foreach ($plugins as $p) {
375            $obj = plugin_load('syntax', $p);
376            if (!$obj instanceof PluginInterface) continue;
377            $PARSER_MODES[$obj->getType()][] = "plugin_$p";
378            $this->modes[] = [
379                'sort' => $obj->getSort(),
380                'mode' => "plugin_$p",
381                'obj'  => $obj,
382            ];
383            unset($obj);
384        }
385    }
386
387    /**
388     * Load modes that have no equivalent in the other syntax.
389     * These are always active regardless of the syntax setting.
390     */
391    protected function loadAlwaysModes(): void
392    {
393        global $conf;
394
395        $modes = [
396            'strong', 'subscript', 'superscript',
397            'footnote', 'eol', 'preformatted',
398            'gfm_quote', 'gfm_hr', 'externallink', 'emaillink', 'windowssharelink',
399            'notoc', 'nocache', 'rss',
400        ];
401
402        if ($conf['typography']) {
403            $modes[] = 'quotes';
404            $modes[] = 'multiplyentity';
405        }
406
407        $this->instantiateModes($modes);
408    }
409
410    /**
411     * Load DokuWiki-specific modes for features that also exist in Markdown.
412     * Skipped when syntax is 'md'.
413     */
414    protected function loadDokuWikiModes(): void
415    {
416        $modes = [
417            'emphasis', 'deleted', 'code', 'header',
418            'linebreak', 'internallink', 'media', 'table',
419            'monospace', 'unformatted', 'file',
420        ];
421
422        // Underline only loads when DokuWiki is preferred. In MD-preferred
423        // modes, `__` means strong (via gfm_strong_underscore) and loading
424        // Underline here would conflict.
425        //
426        // Listblock only loads when DokuWiki is preferred. In MD-preferred
427        // modes, GfmListblock owns the `-`/`*`/`+` markers and zero-indent
428        // top-level items, which conflicts with DokuWiki's required-2-space-
429        // indent list model.
430        if ($this->isDwPreferred()) {
431            $modes[] = 'underline';
432            $modes[] = 'listblock';
433        }
434
435        $this->instantiateModes($modes);
436    }
437
438    /**
439     * Load Markdown-specific modes for features that also exist in DokuWiki.
440     * Skipped when syntax is 'dw'.
441     */
442    protected function loadMarkdownModes(): void
443    {
444        $modes = [
445            'gfm_escape', 'gfm_linebreak', 'gfm_html_entity',
446            'gfm_emphasis', 'gfm_emphasis_strong', 'gfm_deleted',
447            'gfm_backtick_single', 'gfm_backtick_double',
448            'gfm_header', 'gfm_link', 'gfm_media',
449            'gfm_code', 'gfm_file', 'gfm_table',
450        ];
451
452        // Underscore-based emphasis and strong only load when Markdown is
453        // preferred. In DW-preferred modes, `__` means underline and loading
454        // these would conflict.
455        //
456        // GfmListblock only loads when Markdown is preferred. In DW-preferred
457        // modes, the DokuWiki Listblock owns the `-`/`*` markers (with the
458        // 2-space indent rule); the two list models cannot co-exist.
459        if ($this->isMdPreferred()) {
460            $modes[] = 'gfm_emphasis_underscore';
461            $modes[] = 'gfm_strong_underscore';
462            $modes[] = 'gfm_emphasis_strong_underscore';
463            $modes[] = 'gfm_listblock';
464        }
465
466        $this->instantiateModes($modes);
467    }
468
469    /**
470     * Load data-driven modes that require constructor arguments
471     * (smileys, acronyms, entities) and optional config-gated modes.
472     */
473    protected function loadDataModes(): void
474    {
475        global $conf;
476
477        $obj = new Smiley(array_keys(getSmileys()));
478        $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'smiley', 'obj' => $obj];
479
480        $obj = new Acronym(array_keys(getAcronyms()));
481        $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'acronym', 'obj' => $obj];
482
483        $obj = new Entity(array_keys(getEntities()));
484        $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'entity', 'obj' => $obj];
485
486        if (!empty($conf['camelcase'])) {
487            $obj = new Camelcaselink();
488            $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'camelcaselink', 'obj' => $obj];
489        }
490    }
491
492    /**
493     * Instantiate mode classes by name and add them to the mode list.
494     *
495     * Mode names are split on `_` and each segment is PascalCased to form the
496     * class name (e.g. `gfm_emphasis_underscore` → `GfmEmphasisUnderscore`,
497     * `internallink` → `Internallink`, `strong` → `Strong`).
498     *
499     * @param string[] $modeNames
500     */
501    protected function instantiateModes(array $modeNames): void
502    {
503        foreach ($modeNames as $mode) {
504            $class = implode('', array_map('ucfirst', explode('_', $mode))); // snake_case to PascalCase
505            $class = 'dokuwiki\\Parsing\\ParserMode\\' . $class; // prepend namespace
506            $obj = new $class();
507            $this->modes[] = [
508                'sort' => $obj->getSort(),
509                'mode' => $mode,
510                'obj'  => $obj,
511            ];
512        }
513    }
514
515    //endregion
516
517    /**
518     * Callback function for usort
519     *
520     * @param array $a
521     * @param array $b
522     * @return int
523     */
524    public static function sortModes(array $a, array $b): int
525    {
526        return $a['sort'] <=> $b['sort'];
527    }
528}
529