xref: /dokuwiki/inc/Parsing/ModeRegistry.php (revision 9172eccff11a70df61d7aad697f84bbd815147ce)
1<?php
2
3namespace dokuwiki\Parsing;
4
5use dokuwiki\Extension\PluginInterface;
6use dokuwiki\Extension\SyntaxPlugin;
7use dokuwiki\Parsing\ParserMode\Acronym;
8use dokuwiki\Parsing\ParserMode\ModeInterface;
9use dokuwiki\Parsing\ParserMode\Camelcaselink;
10use dokuwiki\Parsing\ParserMode\Entity;
11use dokuwiki\Parsing\ParserMode\Smiley;
12use dokuwiki\Parsing\Handler;
13use dokuwiki\Parsing\Parser;
14
15/**
16 * Central registry for parser mode categories and mode instantiation.
17 *
18 * The underlying data is kept in the global $PARSER_MODES array because
19 * third-party plugins read and write it directly at runtime (e.g. to register
20 * their mode in a category). All methods in this class operate on that global
21 * so changes are visible to both old and new code.
22 */
23class ModeRegistry
24{
25    // Category constants (preserving the historical 'substition' typo)
26    public const CATEGORY_CONTAINER   = 'container';
27    public const CATEGORY_BASEONLY    = 'baseonly';
28    public const CATEGORY_FORMATTING  = 'formatting';
29    public const CATEGORY_SUBSTITION  = 'substition';
30    public const CATEGORY_PROTECTED   = 'protected';
31    public const CATEGORY_DISABLED    = 'disabled';
32    public const CATEGORY_PARAGRAPHS  = 'paragraphs';
33
34    /** @var array{sort: int, mode: string, obj: ModeInterface}[]|null */
35    private ?array $modes = null;
36
37    /** @var array<string, Parser> Cached sub-parsers keyed by exclusion-set identifier */
38    private array $subParsers = [];
39
40    /** @var string[] Modes that handle their own line endings (skip EOL connection) */
41    private array $blockEolModes = [];
42
43    private static ?self $instance = null;
44
45    /**
46     * Get the singleton instance of the ModeRegistry.
47     *
48     * @return self
49     */
50    public static function getInstance(): self
51    {
52        if (!self::$instance instanceof self) {
53            self::$instance = new self();
54        }
55        return self::$instance;
56    }
57
58    /**
59     * Reset the singleton instance.
60     *
61     * This is mainly useful for testing to force re-initialization.
62     *
63     * @return void
64     */
65    public static function reset(): void
66    {
67        self::$instance = null;
68    }
69
70    /**
71     * Constructor. Initializes the global $PARSER_MODES array with the default mode categories.
72     */
73    private function __construct()
74    {
75        global $PARSER_MODES;
76        $PARSER_MODES = [
77            self::CATEGORY_CONTAINER  => ['listblock', 'table', 'quote', 'hr'],
78            self::CATEGORY_BASEONLY   => ['header', 'gfm_header'],
79            self::CATEGORY_FORMATTING => [
80                'strong', 'emphasis', 'underline', 'monospace',
81                'subscript', 'superscript', 'deleted', 'footnote',
82                'gfm_emphasis', 'gfm_emphasis_underscore', 'gfm_strong_underscore',
83                'gfm_emphasis_strong', 'gfm_emphasis_strong_underscore',
84                'gfm_deleted', 'gfm_backtick_single', 'gfm_backtick_double',
85            ],
86            self::CATEGORY_SUBSTITION => [
87                'acronym', 'smiley', 'wordblock', 'entity',
88                'camelcaselink', 'internallink', 'media', 'externallink',
89                'linebreak', 'emaillink', 'windowssharelink', 'filelink',
90                'notoc', 'nocache', 'multiplyentity', 'quotes', 'rss',
91                'gfm_link', 'gfm_media',
92            ],
93            self::CATEGORY_PROTECTED  => ['preformatted', 'code', 'file', 'gfm_code', 'gfm_file'],
94            self::CATEGORY_DISABLED   => ['unformatted'],
95            self::CATEGORY_PARAGRAPHS => ['eol'],
96        ];
97    }
98
99    /**
100     * Get all mode names in the given categories.
101     *
102     * @param string[] $categories One or more CATEGORY_* constants
103     * @return string[] Unique list of mode names
104     */
105    public function getModesForCategories(array $categories): array
106    {
107        global $PARSER_MODES;
108        $modes = [];
109        foreach ($categories as $cat) {
110            if (isset($PARSER_MODES[$cat])) {
111                $modes = array_merge($modes, $PARSER_MODES[$cat]);
112            }
113        }
114        return array_unique($modes);
115    }
116
117    /**
118     * Get the raw categories array.
119     *
120     * @return array<string, string[]> Category name => list of mode names
121     */
122    public function getCategories(): array
123    {
124        global $PARSER_MODES;
125        return $PARSER_MODES;
126    }
127
128    /**
129     * Register a mode in a category.
130     *
131     * @param string $category One of the CATEGORY_* constants
132     * @param string $modeName The mode name to register
133     * @return void
134     */
135    public function registerMode(string $category, string $modeName): void
136    {
137        global $PARSER_MODES;
138        $PARSER_MODES[$category][] = $modeName;
139        $this->modes = null; // invalidate cached mode list
140    }
141
142    /**
143     * Register a mode that handles its own line endings.
144     * Modes registered here will be skipped by Eol's connectTo().
145     *
146     * @param string $mode The mode name
147     * @return void
148     */
149    public function registerBlockEolMode(string $mode): void
150    {
151        $this->blockEolModes[] = $mode;
152    }
153
154    /**
155     * Get all modes that handle their own line endings.
156     *
157     * @return string[]
158     */
159    public function getBlockEolModes(): array
160    {
161        return $this->blockEolModes;
162    }
163
164    /**
165     * Get all parser modes, fully instantiated and sorted by priority.
166     *
167     * This includes syntax plugins, built-in modes, formatting modes, and
168     * data-driven modes (smileys, acronyms, entities). Results are cached
169     * unless running in a test environment.
170     *
171     * @return array[] Each entry is ['sort' => int, 'mode' => string, 'obj' => ModeInterface]
172     */
173    public function getModes(): array
174    {
175        global $conf;
176
177        if ($this->modes !== null && !defined('DOKU_UNITTEST')) {
178            return $this->modes;
179        }
180
181        $this->modes = [];
182        $syntax = $conf['syntax'] ?? 'dokuwiki';
183        $loadDw = in_array($syntax, ['dokuwiki', 'dw+md', 'md+dw']);
184        $loadMd = in_array($syntax, ['markdown', 'dw+md', 'md+dw']);
185
186        $this->loadPluginModes();
187        $this->loadAlwaysModes();
188        if ($loadDw) $this->loadDokuWikiModes();
189        if ($loadMd) $this->loadMarkdownModes();
190        $this->loadDataModes();
191
192        usort($this->modes, self::sortModes(...));
193        return $this->modes;
194    }
195
196    /**
197     * Return a cached Parser preconfigured with every active mode except the
198     * ones excluded.
199     *
200     * Built lazily on first call and reused thereafter. Mode objects are cloned
201     * before being attached to the sub-parser so that connectTo()'s assignment
202     * to $Lexer does not clobber the main parser's mode references.
203     *
204     * The returned Parser must not be re-entered: each call should reset the
205     * Handler (via $parser->getHandler()->reset()) before invoking parse().
206     * Callers that need to sub-parse during their own handle() must ensure
207     * they are not already inside a sub-parse on the same exclusion set —
208     * one common case is excluding the calling mode itself from the sub-parser
209     * to rule out re-entry.
210     *
211     * @param string[] $excludeCategories CATEGORY_* constants whose modes should be excluded
212     * @param string[] $excludeModes specific mode names to exclude in addition to category-based exclusions
213     * @return Parser
214     */
215    public function getSubParser(
216        array $excludeCategories = [self::CATEGORY_BASEONLY],
217        array $excludeModes = []
218    ): Parser {
219        $key = implode(',', $excludeCategories) . '|' . implode(',', $excludeModes);
220        if (isset($this->subParsers[$key])) return $this->subParsers[$key];
221
222        $categories = $this->getCategories();
223        $excluded = $excludeModes;
224        foreach ($excludeCategories as $cat) {
225            $excluded = array_merge($excluded, $categories[$cat] ?? []);
226        }
227
228        $parser = new Parser(new Handler());
229        foreach ($this->getModes() as $m) {
230            if (in_array($m['mode'], $excluded, true)) continue;
231            // Mode objects expose a single $Lexer slot which Parser::addMode()
232            // overwrites at registration time. The objects in $this->modes are
233            // already attached to the main parser's lexer; reusing them here
234            // would clobber that reference and break the main parse. Clone so
235            // the sub-parser gets its own copy with its own $Lexer slot.
236            $parser->addMode($m['mode'], clone $m['obj']);
237        }
238
239        return $this->subParsers[$key] = $parser;
240    }
241
242    /**
243     * Load syntax plugin modes and register them in their categories.
244     */
245    protected function loadPluginModes(): void
246    {
247        global $PARSER_MODES;
248
249        $plugins = plugin_list('syntax');
250        foreach ($plugins as $p) {
251            $obj = plugin_load('syntax', $p);
252            if (!$obj instanceof PluginInterface) continue;
253            $PARSER_MODES[$obj->getType()][] = "plugin_$p";
254            $this->modes[] = [
255                'sort' => $obj->getSort(),
256                'mode' => "plugin_$p",
257                'obj'  => $obj,
258            ];
259            unset($obj);
260        }
261    }
262
263    /**
264     * Load modes that have no equivalent in the other syntax.
265     * These are always active regardless of the syntax setting.
266     */
267    protected function loadAlwaysModes(): void
268    {
269        global $conf;
270
271        $modes = [
272            'strong', 'subscript', 'superscript',
273            'footnote', 'eol', 'preformatted',
274            'quote', 'externallink', 'emaillink', 'windowssharelink',
275            'notoc', 'nocache', 'rss',
276        ];
277
278        if ($conf['typography']) {
279            $modes[] = 'quotes';
280            $modes[] = 'multiplyentity';
281        }
282
283        $this->instantiateModes($modes);
284    }
285
286    /**
287     * Load DokuWiki-specific modes for features that also exist in Markdown.
288     * Skipped when syntax is 'markdown'.
289     */
290    protected function loadDokuWikiModes(): void
291    {
292        global $conf;
293        $syntax = $conf['syntax'] ?? 'dokuwiki';
294        $dwPreferred = in_array($syntax, ['dokuwiki', 'dw+md'], true);
295
296        $modes = [
297            'emphasis', 'deleted', 'code', 'header', 'hr',
298            'linebreak', 'internallink', 'media', 'listblock', 'table',
299            'monospace', 'unformatted', 'file',
300        ];
301
302        // Underline only loads when DokuWiki is preferred. In MD-preferred
303        // modes, `__` means strong (via gfm_strong_underscore) and loading
304        // Underline here would conflict.
305        if ($dwPreferred) {
306            $modes[] = 'underline';
307        }
308
309        $this->instantiateModes($modes);
310    }
311
312    /**
313     * Load Markdown-specific modes for features that also exist in DokuWiki.
314     * Skipped when syntax is 'dokuwiki'.
315     */
316    protected function loadMarkdownModes(): void
317    {
318        global $conf;
319        $syntax = $conf['syntax'] ?? 'dokuwiki';
320        $mdPreferred = in_array($syntax, ['markdown', 'md+dw'], true);
321
322        $modes = [
323            'gfm_emphasis', 'gfm_emphasis_strong', 'gfm_deleted',
324            'gfm_backtick_single', 'gfm_backtick_double',
325            'gfm_header', 'gfm_link', 'gfm_media',
326            'gfm_code', 'gfm_file',
327        ];
328
329        // Underscore-based emphasis and strong only load when Markdown is
330        // preferred. In DW-preferred modes, `__` means underline and loading
331        // these would conflict.
332        if ($mdPreferred) {
333            $modes[] = 'gfm_emphasis_underscore';
334            $modes[] = 'gfm_strong_underscore';
335            $modes[] = 'gfm_emphasis_strong_underscore';
336        }
337
338        $this->instantiateModes($modes);
339    }
340
341    /**
342     * Load data-driven modes that require constructor arguments
343     * (smileys, acronyms, entities) and optional config-gated modes.
344     */
345    protected function loadDataModes(): void
346    {
347        global $conf;
348
349        $obj = new Smiley(array_keys(getSmileys()));
350        $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'smiley', 'obj' => $obj];
351
352        $obj = new Acronym(array_keys(getAcronyms()));
353        $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'acronym', 'obj' => $obj];
354
355        $obj = new Entity(array_keys(getEntities()));
356        $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'entity', 'obj' => $obj];
357
358        if (!empty($conf['camelcase'])) {
359            $obj = new Camelcaselink();
360            $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'camelcaselink', 'obj' => $obj];
361        }
362    }
363
364    /**
365     * Instantiate mode classes by name and add them to the mode list.
366     *
367     * Mode names are split on `_` and each segment is PascalCased to form the
368     * class name (e.g. `gfm_emphasis_underscore` → `GfmEmphasisUnderscore`,
369     * `internallink` → `Internallink`, `strong` → `Strong`).
370     *
371     * @param string[] $modeNames
372     */
373    protected function instantiateModes(array $modeNames): void
374    {
375        foreach ($modeNames as $mode) {
376            $class = implode('', array_map('ucfirst', explode('_', $mode))); // snake_case to PascalCase
377            $class = 'dokuwiki\\Parsing\\ParserMode\\' . $class; // prepend namespace
378            $obj = new $class();
379            $this->modes[] = [
380                'sort' => $obj->getSort(),
381                'mode' => $mode,
382                'obj'  => $obj,
383            ];
384        }
385    }
386
387    /**
388     * Callback function for usort
389     *
390     * @param array $a
391     * @param array $b
392     * @return int
393     */
394    public static function sortModes(array $a, array $b): int
395    {
396        return $a['sort'] <=> $b['sort'];
397    }
398}
399