xref: /dokuwiki/inc/Parsing/ModeRegistry.php (revision 75364f13219a5af44f52c564ea0a62df64c3a17f)
1<?php
2
3namespace dokuwiki\Parsing;
4
5use dokuwiki\Extension\PluginInterface;
6use dokuwiki\Extension\SyntaxPlugin;
7use dokuwiki\Parsing\ParserMode\Acronym;
8use dokuwiki\Parsing\ParserMode\AbstractMode;
9use dokuwiki\Parsing\ParserMode\Camelcaselink;
10use dokuwiki\Parsing\ParserMode\Entity;
11use dokuwiki\Parsing\ParserMode\Smiley;
12use dokuwiki\Parsing\Handler;
13use dokuwiki\Parsing\Parser;
14
15/**
16 * The set of parser modes for a single parse, plus the mode taxonomy.
17 *
18 * A ModeRegistry is built once per parse (see p_get_instructions) and
19 * carries the parse-specific state: the active syntax flavour, the
20 * block-EOL bookkeeping, and the sub-parser pool. It is a short-lived
21 * value, not a singleton — two parses in the same request (e.g. a plugin
22 * rendering bundled DW text inside an otherwise-Markdown page) get two
23 * independent registries.
24 *
25 * Three distinct concepts meet here; keep them apart:
26 *
27 *   1. The user's configured syntax PREFERENCE is a setting. Its source
28 *      of truth is $conf['syntax']. Read it only in UI code (editor
29 *      toolbar, admin settings, syntax-preference plugins) — never from
30 *      inside the parser. $conf['syntax'] enters the parser exactly once,
31 *      at the top-level entry point, as this registry's constructor
32 *      argument.
33 *
34 *   2. The active parse's syntax is a PARAMETER of this registry
35 *      (getSyntax / isDwPreferred / isMdPreferred). Every mode descends from
36 *      AbstractMode, which Parser::addMode() injects this registry into, so a
37 *      mode reads it via $this->registry; a plugin handle()/render() reads
38 *      $handler->getModeRegistry(). No code inside inc/Parsing/ reads
39 *      $conf['syntax'] directly.
40 *
41 *   3. The mode TAXONOMY — which mode names belong to which category — is
42 *      owned by this registry instance ($this->categories), seeded from the
43 *      immutable DEFAULT_CATEGORIES and extended with plugin_* entries during
44 *      loadPluginModes(). Core reads it through the instance accessors
45 *      (getModesForCategories / getCategories). The legacy global
46 *      $PARSER_MODES is kept only as a deprecated mirror, published during
47 *      loadPluginModes() for third-party plugins that read the array directly
48 *      and for the bundled info plugin — no core code reads it.
49 */
50class ModeRegistry
51{
52    // Category constants (preserving the historical 'substition' typo)
53    public const CATEGORY_CONTAINER    = 'container';
54    public const CATEGORY_BASEONLY     = 'baseonly';
55    public const CATEGORY_FORMATTING   = 'formatting';
56    public const CATEGORY_SUBSTITUTION = 'substition';
57    public const CATEGORY_PROTECTED    = 'protected';
58    public const CATEGORY_DISABLED     = 'disabled';
59    public const CATEGORY_PARAGRAPHS   = 'paragraphs';
60
61    /**
62     * The built-in mode taxonomy: category => list of mode names.
63     *
64     * Immutable defaults. Each registry starts from a copy of this in
65     * $this->categories; loadPluginModes() then merges plugin_* entries into
66     * that copy. Being a const, it is never mutated and so needs no resetting
67     * between parses or tests.
68     */
69    protected const DEFAULT_CATEGORIES = [
70        self::CATEGORY_CONTAINER  => ['listblock', 'table', 'gfm_listblock', 'gfm_table', 'gfm_quote', 'gfm_hr'],
71        self::CATEGORY_BASEONLY   => ['header', 'gfm_header'],
72        self::CATEGORY_FORMATTING => [
73            'strong', 'emphasis', 'underline', 'monospace',
74            'subscript', 'superscript', 'deleted', 'footnote',
75            'gfm_emphasis', 'gfm_emphasis_underscore', 'gfm_strong_underscore',
76            'gfm_emphasis_strong', 'gfm_emphasis_strong_underscore',
77            'gfm_deleted', 'gfm_backtick_single', 'gfm_backtick_double',
78        ],
79        self::CATEGORY_SUBSTITUTION => [
80            'acronym', 'smiley', 'wordblock', 'entity',
81            'camelcaselink', 'internallink', 'media', 'externallink',
82            'linebreak', 'emaillink', 'windowssharelink', 'filelink',
83            'notoc', 'nocache', 'multiplyentity', 'quotes', 'rss',
84            'gfm_link', 'gfm_media', 'gfm_escape', 'gfm_linebreak',
85            'gfm_html_entity',
86        ],
87        self::CATEGORY_PROTECTED  => ['preformatted', 'code', 'file', 'gfm_code', 'gfm_file'],
88        self::CATEGORY_DISABLED   => ['unformatted'],
89        self::CATEGORY_PARAGRAPHS => ['eol'],
90    ];
91
92    /** @var array{sort: int, mode: string, obj: AbstractMode}[]|null */
93    protected ?array $modes = null;
94
95    /** @var array<string, array{parsers: Parser[], inUse: int}> Pool of sub-parsers per exclusion-set identifier. */
96    protected array $subParsers = [];
97
98    /** @var string[] Modes that handle their own line endings (skip EOL connection) */
99    protected array $blockEolModes = [];
100
101    /** @var string the syntax flavour this parse runs under (dw, md, dw+md, md+dw) */
102    protected string $syntax;
103
104    /** @var array<string, string[]> this parse's mode taxonomy (defaults + plugin modes) */
105    protected array $categories;
106
107    /**
108     * @param string $syntax the syntax flavour for this parse: one of
109     *     'dw', 'md', 'dw+md', 'md+dw'. This is the active-parse parameter,
110     *     not the user preference — see the class docblock.
111     */
112    public function __construct(string $syntax)
113    {
114        $this->syntax = $syntax;
115        $this->categories = self::DEFAULT_CATEGORIES;
116    }
117
118    /**
119     * The syntax flavour of this parse.
120     *
121     * @return string one of 'dw', 'md', 'dw+md', 'md+dw'
122     */
123    public function getSyntax(): string
124    {
125        return $this->syntax;
126    }
127
128    /**
129     * Get all mode names in the given categories of this parse's taxonomy.
130     *
131     * @param string[] $categories One or more CATEGORY_* constants
132     * @return string[] Unique list of mode names
133     */
134    public function getModesForCategories(array $categories): array
135    {
136        $modes = [];
137        foreach ($categories as $cat) {
138            if (isset($this->categories[$cat])) {
139                $modes = array_merge($modes, $this->categories[$cat]);
140            }
141        }
142        return array_unique($modes);
143    }
144
145    /**
146     * Get this parse's raw category map.
147     *
148     * @return array<string, string[]> Category name => list of mode names
149     */
150    public function getCategories(): array
151    {
152        return $this->categories;
153    }
154
155    /**
156     * Register a mode in a category of this parse's taxonomy.
157     *
158     * @param string $category One of the CATEGORY_* constants
159     * @param string $modeName The mode name to register
160     * @return void
161     */
162    public function registerMode(string $category, string $modeName): void
163    {
164        $this->categories[$category][] = $modeName;
165    }
166
167    /**
168     * Register a mode that handles its own line endings.
169     * Modes registered here will be skipped by Eol's connectTo().
170     *
171     * @param string $mode The mode name
172     * @return void
173     */
174    public function registerBlockEolMode(string $mode): void
175    {
176        $this->blockEolModes[] = $mode;
177    }
178
179    /**
180     * Get all modes that handle their own line endings.
181     *
182     * @return string[]
183     */
184    public function getBlockEolModes(): array
185    {
186        return $this->blockEolModes;
187    }
188
189    /**
190     * Whether DokuWiki is the preferred syntax (`dw` or `dw+md`).
191     *
192     * Modes that have to choose between DW-flavored and MD-flavored
193     * behavior at runtime read this flag. Compare with isMdPreferred()
194     * — exactly one of the two is true for any valid `$conf['syntax']`
195     * setting.
196     */
197    public function isDwPreferred(): bool
198    {
199        return in_array($this->syntax, ['dw', 'dw+md'], true);
200    }
201
202    /**
203     * Whether Markdown is the preferred syntax (`md` or `md+dw`).
204     */
205    public function isMdPreferred(): bool
206    {
207        return in_array($this->syntax, ['md', 'md+dw'], true);
208    }
209
210    /**
211     * Get all parser modes, fully instantiated and sorted by priority.
212     *
213     * This includes syntax plugins, built-in modes, formatting modes, and
214     * data-driven modes (smileys, acronyms, entities). Built once per
215     * registry and memoised for that registry's (short) lifetime.
216     *
217     * @return array[] Each entry is ['sort' => int, 'mode' => string, 'obj' => AbstractMode]
218     */
219    public function getModes(): array
220    {
221        if ($this->modes !== null) {
222            return $this->modes;
223        }
224
225        $this->modes = [];
226        $loadDw = in_array($this->syntax, ['dw', 'dw+md', 'md+dw']);
227        $loadMd = in_array($this->syntax, ['md', 'dw+md', 'md+dw']);
228
229        $this->loadPluginModes();
230        $this->loadAlwaysModes();
231        if ($loadDw) $this->loadDokuWikiModes();
232        if ($loadMd) $this->loadMarkdownModes();
233        $this->loadDataModes();
234
235        usort($this->modes, self::sortModes(...));
236        return $this->modes;
237    }
238
239    //region Sub-parser pool
240
241    /**
242     * Acquire a sub-parser for the given exclusion set.
243     *
244     * The registry maintains a pool of sub-parsers per exclusion key.
245     * Each acquire returns the next free instance from that pool;
246     * releaseSubParser must be called (with the same exclusion set)
247     * once the caller is done. If all instances in a pool are already
248     * checked out — re-entrancy on the same key — a fresh instance is
249     * built and appended to the pool. Real-world nesting for any one
250     * mode tops out at a handful of levels, so pool growth is bounded.
251     *
252     * Use this primitive when the caller wants to hold the parser
253     * across multiple parse() calls (e.g. iterating over list items).
254     * For single-shot use, prefer {@see withSubParser} so release is
255     * automatic.
256     *
257     * The returned Parser is shared infrastructure: callers must call
258     * `$parser->getHandler()->reset()` before each parse() to avoid
259     * inheriting state from a previous use.
260     *
261     * @param string[] $excludeCategories CATEGORY_* constants whose modes should be excluded
262     * @param string[] $excludeModes specific mode names to exclude in addition to category-based exclusions
263     */
264    public function acquireSubParser(
265        array $excludeCategories = [self::CATEGORY_BASEONLY],
266        array $excludeModes = []
267    ): Parser {
268        $key = $this->subParserKey($excludeCategories, $excludeModes);
269        $entry = $this->subParsers[$key] ?? ['parsers' => [], 'inUse' => 0];
270
271        if ($entry['inUse'] >= count($entry['parsers'])) {
272            $entry['parsers'][] = $this->buildSubParser($excludeCategories, $excludeModes);
273        }
274        $parser = $entry['parsers'][$entry['inUse']];
275        $entry['inUse']++;
276        $this->subParsers[$key] = $entry;
277        return $parser;
278    }
279
280    /**
281     * Release a previously-acquired sub-parser back to its pool.
282     *
283     * Should be paired with a prior {@see acquireSubParser} call for
284     * the same exclusion set. Callers must release in LIFO order with
285     * respect to other acquires on the same key — the implementation
286     * does not enforce LIFO, but out-of-order release would silently
287     * hand the same parser to two callers, so the caller is responsible
288     * for the discipline. Wrapping each acquire/release pair in a
289     * single try/finally (or using {@see withSubParser}) makes the
290     * ordering correct by construction.
291     *
292     * Throws if no acquire is outstanding for the given key — that
293     * indicates an acquire/release imbalance bug in the caller.
294     *
295     * @param string[] $excludeCategories
296     * @param string[] $excludeModes
297     * @throws \RuntimeException on release without a matching acquire
298     */
299    public function releaseSubParser(
300        array $excludeCategories = [self::CATEGORY_BASEONLY],
301        array $excludeModes = []
302    ): void {
303        $key = $this->subParserKey($excludeCategories, $excludeModes);
304        if (!isset($this->subParsers[$key]) || $this->subParsers[$key]['inUse'] <= 0) {
305            throw new \RuntimeException(
306                "releaseSubParser called without matching acquireSubParser for key '$key'"
307            );
308        }
309        $this->subParsers[$key]['inUse']--;
310    }
311
312    /**
313     * Run a callback with an exclusively-held sub-parser.
314     *
315     * Convenience wrapper around acquire/release. The parser is checked
316     * out for the duration of the callback, then released even if the
317     * callback throws. Preferred shape for single-shot sub-parses
318     * (one parse() call per acquire); use the explicit pair for cases
319     * where the parser is held across a loop or other longer scope.
320     *
321     * @template T
322     * @param string[] $excludeCategories
323     * @param string[] $excludeModes
324     * @param callable(Parser): T $fn
325     * @return T
326     */
327    public function withSubParser(
328        array $excludeCategories,
329        array $excludeModes,
330        callable $fn
331    ) {
332        $parser = $this->acquireSubParser($excludeCategories, $excludeModes);
333        try {
334            return $fn($parser);
335        } finally {
336            $this->releaseSubParser($excludeCategories, $excludeModes);
337        }
338    }
339
340    /**
341     * Build a fresh Parser preconfigured with every active mode except
342     * the ones excluded.
343     *
344     * Mode objects are cloned before being attached so that
345     * Parser::addMode() pointing each mode at the sub-parser's lexer does not
346     * clobber the main parser's mode references.
347     *
348     * @param string[] $excludeCategories
349     * @param string[] $excludeModes
350     */
351    protected function buildSubParser(
352        array $excludeCategories,
353        array $excludeModes
354    ): Parser {
355        $categories = $this->getCategories();
356        $excluded = $excludeModes;
357        foreach ($excludeCategories as $cat) {
358            $excluded = array_merge($excluded, $categories[$cat] ?? []);
359        }
360
361        $parser = new Parser(new Handler($this), $this);
362        foreach ($this->getModes() as $m) {
363            if (in_array($m['mode'], $excluded, true)) continue;
364            // Mode objects expose a single $Lexer slot which Parser::addMode()
365            // overwrites at registration time. The objects in $this->modes are
366            // already attached to the main parser's lexer; reusing them here
367            // would clobber that reference and break the main parse. Clone so
368            // the sub-parser gets its own copy with its own $Lexer slot.
369            $parser->addMode($m['mode'], clone $m['obj']);
370        }
371        return $parser;
372    }
373
374    /**
375     * Build the cache key used to identify a sub-parser exclusion set.
376     */
377    protected function subParserKey(array $excludeCategories, array $excludeModes): string
378    {
379        return implode(',', $excludeCategories) . '|' . implode(',', $excludeModes);
380    }
381
382    //endregion
383
384    //region Mode loading
385
386    /**
387     * Load syntax plugin modes and register them in their categories.
388     */
389    protected function loadPluginModes(): void
390    {
391        global $PARSER_MODES;
392
393        // Publish this parse's taxonomy into the deprecated global mirror right
394        // before plugins load — third-party plugins read $PARSER_MODES directly
395        // (often from their constructor) and the info plugin reads it at render.
396        // Core never reads the mirror; it reads $this->categories. The mirror is
397        // kept in sync incrementally below so a plugin loaded later sees the
398        // modes registered by plugins loaded before it (historical behaviour).
399        // @deprecated reading $PARSER_MODES directly — use the ModeRegistry API.
400        $PARSER_MODES = $this->categories;
401
402        $plugins = plugin_list('syntax');
403        foreach ($plugins as $p) {
404            $obj = plugin_load('syntax', $p);
405            if (!$obj instanceof PluginInterface) continue;
406            $this->categories[$obj->getType()][] = "plugin_$p";
407            $PARSER_MODES[$obj->getType()][] = "plugin_$p";
408            $this->modes[] = [
409                'sort' => $obj->getSort(),
410                'mode' => "plugin_$p",
411                'obj'  => $obj,
412            ];
413            unset($obj);
414        }
415    }
416
417    /**
418     * Load modes that have no equivalent in the other syntax.
419     * These are always active regardless of the syntax setting.
420     */
421    protected function loadAlwaysModes(): void
422    {
423        global $conf;
424
425        $modes = [
426            'strong', 'subscript', 'superscript',
427            'footnote', 'eol', 'preformatted',
428            'gfm_quote', 'gfm_hr',
429            'externallink', 'emaillink', 'windowssharelink',
430            'notoc', 'nocache', 'rss',
431        ];
432
433        if ($conf['typography']) {
434            $modes[] = 'quotes';
435            $modes[] = 'multiplyentity';
436        }
437
438        $this->instantiateModes($modes);
439    }
440
441    /**
442     * Load DokuWiki-specific modes for features that also exist in Markdown.
443     * Skipped when syntax is 'md'.
444     */
445    protected function loadDokuWikiModes(): void
446    {
447        $modes = [
448            'emphasis', 'deleted', 'code', 'header',
449            'linebreak', 'internallink', 'media', 'table',
450            'monospace', 'unformatted', 'file',
451        ];
452
453        // Underline only loads when DokuWiki is preferred. In MD-preferred
454        // modes, `__` means strong (via gfm_strong_underscore) and loading
455        // Underline here would conflict.
456        //
457        // Listblock only loads when DokuWiki is preferred. In MD-preferred
458        // modes, GfmListblock owns the `-`/`*`/`+` markers and zero-indent
459        // top-level items, which conflicts with DokuWiki's required-2-space-
460        // indent list model.
461        if ($this->isDwPreferred()) {
462            $modes[] = 'underline';
463            $modes[] = 'listblock';
464        }
465
466        $this->instantiateModes($modes);
467    }
468
469    /**
470     * Load Markdown-specific modes for features that also exist in DokuWiki.
471     * Skipped when syntax is 'dw'.
472     */
473    protected function loadMarkdownModes(): void
474    {
475        $modes = [
476            'gfm_escape', 'gfm_linebreak', 'gfm_html_entity',
477            'gfm_emphasis', 'gfm_emphasis_strong', 'gfm_deleted',
478            'gfm_backtick_single', 'gfm_backtick_double',
479            'gfm_header', 'gfm_link', 'gfm_media',
480            'gfm_code', 'gfm_file', 'gfm_table',
481        ];
482
483        // Underscore-based emphasis and strong only load when Markdown is
484        // preferred. In DW-preferred modes, `__` means underline and loading
485        // these would conflict.
486        //
487        // GfmListblock only loads when Markdown is preferred. In DW-preferred
488        // modes, the DokuWiki Listblock owns the `-`/`*` markers (with the
489        // 2-space indent rule); the two list models cannot co-exist.
490        if ($this->isMdPreferred()) {
491            $modes[] = 'gfm_emphasis_underscore';
492            $modes[] = 'gfm_strong_underscore';
493            $modes[] = 'gfm_emphasis_strong_underscore';
494            $modes[] = 'gfm_listblock';
495        }
496
497        $this->instantiateModes($modes);
498    }
499
500    /**
501     * Load data-driven modes that require constructor arguments
502     * (smileys, acronyms, entities) and optional config-gated modes.
503     */
504    protected function loadDataModes(): void
505    {
506        global $conf;
507
508        $obj = new Smiley(array_keys(getSmileys()));
509        $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'smiley', 'obj' => $obj];
510
511        $obj = new Acronym(array_keys(getAcronyms()));
512        $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'acronym', 'obj' => $obj];
513
514        $obj = new Entity(array_keys(getEntities()));
515        $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'entity', 'obj' => $obj];
516
517        if (!empty($conf['camelcase'])) {
518            $obj = new Camelcaselink();
519            $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'camelcaselink', 'obj' => $obj];
520        }
521    }
522
523    /**
524     * Instantiate mode classes by name and add them to the mode list.
525     *
526     * Mode names are split on `_` and each segment is PascalCased to form the
527     * class name (e.g. `gfm_emphasis_underscore` → `GfmEmphasisUnderscore`,
528     * `internallink` → `Internallink`, `strong` → `Strong`).
529     *
530     * @param string[] $modeNames
531     */
532    protected function instantiateModes(array $modeNames): void
533    {
534        foreach ($modeNames as $mode) {
535            $class = implode('', array_map(ucfirst(...), explode('_', $mode))); // snake_case to PascalCase
536            $class = 'dokuwiki\\Parsing\\ParserMode\\' . $class; // prepend namespace
537            $obj = new $class();
538            $this->modes[] = [
539                'sort' => $obj->getSort(),
540                'mode' => $mode,
541                'obj'  => $obj,
542            ];
543        }
544    }
545
546    //endregion
547
548    /**
549     * Callback function for usort
550     *
551     * @param array $a
552     * @param array $b
553     * @return int
554     */
555    public static function sortModes(array $a, array $b): int
556    {
557        return $a['sort'] <=> $b['sort'];
558    }
559}
560