1<?php 2 3namespace dokuwiki\Parsing; 4 5use dokuwiki\Extension\PluginInterface; 6use dokuwiki\Extension\SyntaxPlugin; 7use dokuwiki\Parsing\ParserMode\Acronym; 8use dokuwiki\Parsing\ParserMode\AbstractMode; 9use dokuwiki\Parsing\ParserMode\Camelcaselink; 10use dokuwiki\Parsing\ParserMode\Entity; 11use dokuwiki\Parsing\ParserMode\Smiley; 12use dokuwiki\Parsing\Handler; 13use dokuwiki\Parsing\Parser; 14 15/** 16 * The set of parser modes for a single parse, plus the mode taxonomy. 17 * 18 * A ModeRegistry is built once per parse (see p_get_instructions) and 19 * carries the parse-specific state: the active syntax flavour, the 20 * block-EOL bookkeeping, and the sub-parser pool. It is a short-lived 21 * value, not a singleton — two parses in the same request (e.g. a plugin 22 * rendering bundled DW text inside an otherwise-Markdown page) get two 23 * independent registries. 24 * 25 * Three distinct concepts meet here; keep them apart: 26 * 27 * 1. The user's configured syntax PREFERENCE is a setting. Its source 28 * of truth is $conf['syntax']. Read it only in UI code (editor 29 * toolbar, admin settings, syntax-preference plugins) — never from 30 * inside the parser. $conf['syntax'] enters the parser exactly once, 31 * at the top-level entry point, as this registry's constructor 32 * argument. 33 * 34 * 2. The active parse's syntax is a PARAMETER of this registry 35 * (getSyntax / isDwPreferred / isMdPreferred). Every mode descends from 36 * AbstractMode, which Parser::addMode() injects this registry into, so a 37 * mode reads it via $this->registry; a plugin handle()/render() reads 38 * $handler->getModeRegistry(). No code inside inc/Parsing/ reads 39 * $conf['syntax'] directly. 40 * 41 * 3. The mode TAXONOMY — which mode names belong to which category — is 42 * owned by this registry instance ($this->categories), seeded from the 43 * immutable DEFAULT_CATEGORIES and extended with plugin_* entries during 44 * loadPluginModes(). Core reads it through the instance accessors 45 * (getModesForCategories / getCategories). The legacy global 46 * $PARSER_MODES is kept only as a deprecated mirror, published during 47 * loadPluginModes() for third-party plugins that read the array directly 48 * and for the bundled info plugin — no core code reads it. 49 */ 50class ModeRegistry 51{ 52 // Category constants (preserving the historical 'substition' typo) 53 public const CATEGORY_CONTAINER = 'container'; 54 public const CATEGORY_BASEONLY = 'baseonly'; 55 public const CATEGORY_FORMATTING = 'formatting'; 56 public const CATEGORY_SUBSTITUTION = 'substition'; 57 public const CATEGORY_PROTECTED = 'protected'; 58 public const CATEGORY_DISABLED = 'disabled'; 59 public const CATEGORY_PARAGRAPHS = 'paragraphs'; 60 61 /** 62 * The built-in mode taxonomy: category => list of mode names. 63 * 64 * Immutable defaults. Each registry starts from a copy of this in 65 * $this->categories; loadPluginModes() then merges plugin_* entries into 66 * that copy. Being a const, it is never mutated and so needs no resetting 67 * between parses or tests. 68 */ 69 protected const DEFAULT_CATEGORIES = [ 70 self::CATEGORY_CONTAINER => ['listblock', 'table', 'gfm_listblock', 'gfm_table', 'gfm_quote', 'gfm_hr'], 71 self::CATEGORY_BASEONLY => ['header', 'gfm_header'], 72 self::CATEGORY_FORMATTING => [ 73 'strong', 'emphasis', 'underline', 'monospace', 74 'subscript', 'superscript', 'deleted', 'footnote', 75 'gfm_emphasis', 'gfm_emphasis_underscore', 'gfm_strong_underscore', 76 'gfm_emphasis_strong', 'gfm_emphasis_strong_underscore', 77 'gfm_deleted', 'gfm_backtick_single', 'gfm_backtick_double', 78 ], 79 self::CATEGORY_SUBSTITUTION => [ 80 'acronym', 'smiley', 'wordblock', 'entity', 81 'camelcaselink', 'internallink', 'media', 'externallink', 82 'linebreak', 'emaillink', 'windowssharelink', 'filelink', 83 'notoc', 'nocache', 'multiplyentity', 'quotes', 'rss', 84 'gfm_link', 'gfm_media', 'gfm_escape', 'gfm_linebreak', 85 'gfm_html_entity', 86 ], 87 self::CATEGORY_PROTECTED => ['preformatted', 'code', 'file', 'gfm_code', 'gfm_file'], 88 self::CATEGORY_DISABLED => ['unformatted'], 89 self::CATEGORY_PARAGRAPHS => ['eol'], 90 ]; 91 92 /** @var array{sort: int, mode: string, obj: AbstractMode}[]|null */ 93 protected ?array $modes = null; 94 95 /** @var array<string, array{parsers: Parser[], inUse: int}> Pool of sub-parsers per exclusion-set identifier. */ 96 protected array $subParsers = []; 97 98 /** @var string[] Modes that handle their own line endings (skip EOL connection) */ 99 protected array $blockEolModes = []; 100 101 /** @var string the syntax flavour this parse runs under (dw, md, dw+md, md+dw) */ 102 protected string $syntax; 103 104 /** @var array<string, string[]> this parse's mode taxonomy (defaults + plugin modes) */ 105 protected array $categories; 106 107 /** 108 * @param string $syntax the syntax flavour for this parse: one of 109 * 'dw', 'md', 'dw+md', 'md+dw'. This is the active-parse parameter, 110 * not the user preference — see the class docblock. 111 */ 112 public function __construct(string $syntax) 113 { 114 $this->syntax = $syntax; 115 $this->categories = self::DEFAULT_CATEGORIES; 116 } 117 118 /** 119 * The syntax flavour of this parse. 120 * 121 * @return string one of 'dw', 'md', 'dw+md', 'md+dw' 122 */ 123 public function getSyntax(): string 124 { 125 return $this->syntax; 126 } 127 128 /** 129 * Get all mode names in the given categories of this parse's taxonomy. 130 * 131 * @param string[] $categories One or more CATEGORY_* constants 132 * @return string[] Unique list of mode names 133 */ 134 public function getModesForCategories(array $categories): array 135 { 136 $modes = []; 137 foreach ($categories as $cat) { 138 if (isset($this->categories[$cat])) { 139 $modes = array_merge($modes, $this->categories[$cat]); 140 } 141 } 142 return array_unique($modes); 143 } 144 145 /** 146 * Get this parse's raw category map. 147 * 148 * @return array<string, string[]> Category name => list of mode names 149 */ 150 public function getCategories(): array 151 { 152 return $this->categories; 153 } 154 155 /** 156 * Register a mode in a category of this parse's taxonomy. 157 * 158 * @param string $category One of the CATEGORY_* constants 159 * @param string $modeName The mode name to register 160 * @return void 161 */ 162 public function registerMode(string $category, string $modeName): void 163 { 164 $this->categories[$category][] = $modeName; 165 } 166 167 /** 168 * Register a mode that handles its own line endings. 169 * Modes registered here will be skipped by Eol's connectTo(). 170 * 171 * @param string $mode The mode name 172 * @return void 173 */ 174 public function registerBlockEolMode(string $mode): void 175 { 176 $this->blockEolModes[] = $mode; 177 } 178 179 /** 180 * Get all modes that handle their own line endings. 181 * 182 * @return string[] 183 */ 184 public function getBlockEolModes(): array 185 { 186 return $this->blockEolModes; 187 } 188 189 /** 190 * Whether DokuWiki is the preferred syntax (`dw` or `dw+md`). 191 * 192 * Modes that have to choose between DW-flavored and MD-flavored 193 * behavior at runtime read this flag. Compare with isMdPreferred() 194 * — exactly one of the two is true for any valid `$conf['syntax']` 195 * setting. 196 */ 197 public function isDwPreferred(): bool 198 { 199 return in_array($this->syntax, ['dw', 'dw+md'], true); 200 } 201 202 /** 203 * Whether Markdown is the preferred syntax (`md` or `md+dw`). 204 */ 205 public function isMdPreferred(): bool 206 { 207 return in_array($this->syntax, ['md', 'md+dw'], true); 208 } 209 210 /** 211 * Get all parser modes, fully instantiated and sorted by priority. 212 * 213 * This includes syntax plugins, built-in modes, formatting modes, and 214 * data-driven modes (smileys, acronyms, entities). Built once per 215 * registry and memoised for that registry's (short) lifetime. 216 * 217 * @return array[] Each entry is ['sort' => int, 'mode' => string, 'obj' => AbstractMode] 218 */ 219 public function getModes(): array 220 { 221 if ($this->modes !== null) { 222 return $this->modes; 223 } 224 225 $this->modes = []; 226 $loadDw = in_array($this->syntax, ['dw', 'dw+md', 'md+dw']); 227 $loadMd = in_array($this->syntax, ['md', 'dw+md', 'md+dw']); 228 229 $this->loadPluginModes(); 230 $this->loadAlwaysModes(); 231 if ($loadDw) $this->loadDokuWikiModes(); 232 if ($loadMd) $this->loadMarkdownModes(); 233 $this->loadDataModes(); 234 235 usort($this->modes, self::sortModes(...)); 236 return $this->modes; 237 } 238 239 //region Sub-parser pool 240 241 /** 242 * Acquire a sub-parser for the given exclusion set. 243 * 244 * The registry maintains a pool of sub-parsers per exclusion key. 245 * Each acquire returns the next free instance from that pool; 246 * releaseSubParser must be called (with the same exclusion set) 247 * once the caller is done. If all instances in a pool are already 248 * checked out — re-entrancy on the same key — a fresh instance is 249 * built and appended to the pool. Real-world nesting for any one 250 * mode tops out at a handful of levels, so pool growth is bounded. 251 * 252 * Use this primitive when the caller wants to hold the parser 253 * across multiple parse() calls (e.g. iterating over list items). 254 * For single-shot use, prefer {@see withSubParser} so release is 255 * automatic. 256 * 257 * The returned Parser is shared infrastructure: callers must call 258 * `$parser->getHandler()->reset()` before each parse() to avoid 259 * inheriting state from a previous use. 260 * 261 * @param string[] $excludeCategories CATEGORY_* constants whose modes should be excluded 262 * @param string[] $excludeModes specific mode names to exclude in addition to category-based exclusions 263 */ 264 public function acquireSubParser( 265 array $excludeCategories = [self::CATEGORY_BASEONLY], 266 array $excludeModes = [] 267 ): Parser { 268 $key = $this->subParserKey($excludeCategories, $excludeModes); 269 $entry = $this->subParsers[$key] ?? ['parsers' => [], 'inUse' => 0]; 270 271 if ($entry['inUse'] >= count($entry['parsers'])) { 272 $entry['parsers'][] = $this->buildSubParser($excludeCategories, $excludeModes); 273 } 274 $parser = $entry['parsers'][$entry['inUse']]; 275 $entry['inUse']++; 276 $this->subParsers[$key] = $entry; 277 return $parser; 278 } 279 280 /** 281 * Release a previously-acquired sub-parser back to its pool. 282 * 283 * Should be paired with a prior {@see acquireSubParser} call for 284 * the same exclusion set. Callers must release in LIFO order with 285 * respect to other acquires on the same key — the implementation 286 * does not enforce LIFO, but out-of-order release would silently 287 * hand the same parser to two callers, so the caller is responsible 288 * for the discipline. Wrapping each acquire/release pair in a 289 * single try/finally (or using {@see withSubParser}) makes the 290 * ordering correct by construction. 291 * 292 * Throws if no acquire is outstanding for the given key — that 293 * indicates an acquire/release imbalance bug in the caller. 294 * 295 * @param string[] $excludeCategories 296 * @param string[] $excludeModes 297 * @throws \RuntimeException on release without a matching acquire 298 */ 299 public function releaseSubParser( 300 array $excludeCategories = [self::CATEGORY_BASEONLY], 301 array $excludeModes = [] 302 ): void { 303 $key = $this->subParserKey($excludeCategories, $excludeModes); 304 if (!isset($this->subParsers[$key]) || $this->subParsers[$key]['inUse'] <= 0) { 305 throw new \RuntimeException( 306 "releaseSubParser called without matching acquireSubParser for key '$key'" 307 ); 308 } 309 $this->subParsers[$key]['inUse']--; 310 } 311 312 /** 313 * Run a callback with an exclusively-held sub-parser. 314 * 315 * Convenience wrapper around acquire/release. The parser is checked 316 * out for the duration of the callback, then released even if the 317 * callback throws. Preferred shape for single-shot sub-parses 318 * (one parse() call per acquire); use the explicit pair for cases 319 * where the parser is held across a loop or other longer scope. 320 * 321 * @template T 322 * @param string[] $excludeCategories 323 * @param string[] $excludeModes 324 * @param callable(Parser): T $fn 325 * @return T 326 */ 327 public function withSubParser( 328 array $excludeCategories, 329 array $excludeModes, 330 callable $fn 331 ) { 332 $parser = $this->acquireSubParser($excludeCategories, $excludeModes); 333 try { 334 return $fn($parser); 335 } finally { 336 $this->releaseSubParser($excludeCategories, $excludeModes); 337 } 338 } 339 340 /** 341 * Build a fresh Parser preconfigured with every active mode except 342 * the ones excluded. 343 * 344 * Mode objects are cloned before being attached so that 345 * Parser::addMode() pointing each mode at the sub-parser's lexer does not 346 * clobber the main parser's mode references. 347 * 348 * @param string[] $excludeCategories 349 * @param string[] $excludeModes 350 */ 351 protected function buildSubParser( 352 array $excludeCategories, 353 array $excludeModes 354 ): Parser { 355 $categories = $this->getCategories(); 356 $excluded = $excludeModes; 357 foreach ($excludeCategories as $cat) { 358 $excluded = array_merge($excluded, $categories[$cat] ?? []); 359 } 360 361 $parser = new Parser(new Handler($this), $this); 362 foreach ($this->getModes() as $m) { 363 if (in_array($m['mode'], $excluded, true)) continue; 364 // Mode objects expose a single $Lexer slot which Parser::addMode() 365 // overwrites at registration time. The objects in $this->modes are 366 // already attached to the main parser's lexer; reusing them here 367 // would clobber that reference and break the main parse. Clone so 368 // the sub-parser gets its own copy with its own $Lexer slot. 369 $parser->addMode($m['mode'], clone $m['obj']); 370 } 371 return $parser; 372 } 373 374 /** 375 * Build the cache key used to identify a sub-parser exclusion set. 376 */ 377 protected function subParserKey(array $excludeCategories, array $excludeModes): string 378 { 379 return implode(',', $excludeCategories) . '|' . implode(',', $excludeModes); 380 } 381 382 //endregion 383 384 //region Mode loading 385 386 /** 387 * Load syntax plugin modes and register them in their categories. 388 */ 389 protected function loadPluginModes(): void 390 { 391 global $PARSER_MODES; 392 393 // Publish this parse's taxonomy into the deprecated global mirror right 394 // before plugins load — third-party plugins read $PARSER_MODES directly 395 // (often from their constructor) and the info plugin reads it at render. 396 // Core never reads the mirror; it reads $this->categories. The mirror is 397 // kept in sync incrementally below so a plugin loaded later sees the 398 // modes registered by plugins loaded before it (historical behaviour). 399 // @deprecated reading $PARSER_MODES directly — use the ModeRegistry API. 400 $PARSER_MODES = $this->categories; 401 402 $plugins = plugin_list('syntax'); 403 foreach ($plugins as $p) { 404 $obj = plugin_load('syntax', $p); 405 if (!$obj instanceof PluginInterface) continue; 406 $this->categories[$obj->getType()][] = "plugin_$p"; 407 $PARSER_MODES[$obj->getType()][] = "plugin_$p"; 408 $this->modes[] = [ 409 'sort' => $obj->getSort(), 410 'mode' => "plugin_$p", 411 'obj' => $obj, 412 ]; 413 unset($obj); 414 } 415 } 416 417 /** 418 * Load modes that have no equivalent in the other syntax. 419 * These are always active regardless of the syntax setting. 420 */ 421 protected function loadAlwaysModes(): void 422 { 423 global $conf; 424 425 $modes = [ 426 'strong', 'subscript', 'superscript', 427 'footnote', 'eol', 'preformatted', 428 'gfm_quote', 'gfm_hr', 429 'externallink', 'emaillink', 'windowssharelink', 430 'notoc', 'nocache', 'rss', 431 ]; 432 433 if ($conf['typography']) { 434 $modes[] = 'quotes'; 435 $modes[] = 'multiplyentity'; 436 } 437 438 $this->instantiateModes($modes); 439 } 440 441 /** 442 * Load DokuWiki-specific modes for features that also exist in Markdown. 443 * Skipped when syntax is 'md'. 444 */ 445 protected function loadDokuWikiModes(): void 446 { 447 $modes = [ 448 'emphasis', 'deleted', 'code', 'header', 449 'linebreak', 'internallink', 'media', 'table', 450 'monospace', 'unformatted', 'file', 451 ]; 452 453 // Underline only loads when DokuWiki is preferred. In MD-preferred 454 // modes, `__` means strong (via gfm_strong_underscore) and loading 455 // Underline here would conflict. 456 // 457 // Listblock only loads when DokuWiki is preferred. In MD-preferred 458 // modes, GfmListblock owns the `-`/`*`/`+` markers and zero-indent 459 // top-level items, which conflicts with DokuWiki's required-2-space- 460 // indent list model. 461 if ($this->isDwPreferred()) { 462 $modes[] = 'underline'; 463 $modes[] = 'listblock'; 464 } 465 466 $this->instantiateModes($modes); 467 } 468 469 /** 470 * Load Markdown-specific modes for features that also exist in DokuWiki. 471 * Skipped when syntax is 'dw'. 472 */ 473 protected function loadMarkdownModes(): void 474 { 475 $modes = [ 476 'gfm_escape', 'gfm_linebreak', 'gfm_html_entity', 477 'gfm_emphasis', 'gfm_emphasis_strong', 'gfm_deleted', 478 'gfm_backtick_single', 'gfm_backtick_double', 479 'gfm_header', 'gfm_link', 'gfm_media', 480 'gfm_code', 'gfm_file', 'gfm_table', 481 ]; 482 483 // Underscore-based emphasis and strong only load when Markdown is 484 // preferred. In DW-preferred modes, `__` means underline and loading 485 // these would conflict. 486 // 487 // GfmListblock only loads when Markdown is preferred. In DW-preferred 488 // modes, the DokuWiki Listblock owns the `-`/`*` markers (with the 489 // 2-space indent rule); the two list models cannot co-exist. 490 if ($this->isMdPreferred()) { 491 $modes[] = 'gfm_emphasis_underscore'; 492 $modes[] = 'gfm_strong_underscore'; 493 $modes[] = 'gfm_emphasis_strong_underscore'; 494 $modes[] = 'gfm_listblock'; 495 } 496 497 $this->instantiateModes($modes); 498 } 499 500 /** 501 * Load data-driven modes that require constructor arguments 502 * (smileys, acronyms, entities) and optional config-gated modes. 503 */ 504 protected function loadDataModes(): void 505 { 506 global $conf; 507 508 $obj = new Smiley(array_keys(getSmileys())); 509 $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'smiley', 'obj' => $obj]; 510 511 $obj = new Acronym(array_keys(getAcronyms())); 512 $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'acronym', 'obj' => $obj]; 513 514 $obj = new Entity(array_keys(getEntities())); 515 $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'entity', 'obj' => $obj]; 516 517 if (!empty($conf['camelcase'])) { 518 $obj = new Camelcaselink(); 519 $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'camelcaselink', 'obj' => $obj]; 520 } 521 } 522 523 /** 524 * Instantiate mode classes by name and add them to the mode list. 525 * 526 * Mode names are split on `_` and each segment is PascalCased to form the 527 * class name (e.g. `gfm_emphasis_underscore` → `GfmEmphasisUnderscore`, 528 * `internallink` → `Internallink`, `strong` → `Strong`). 529 * 530 * @param string[] $modeNames 531 */ 532 protected function instantiateModes(array $modeNames): void 533 { 534 foreach ($modeNames as $mode) { 535 $class = implode('', array_map(ucfirst(...), explode('_', $mode))); // snake_case to PascalCase 536 $class = 'dokuwiki\\Parsing\\ParserMode\\' . $class; // prepend namespace 537 $obj = new $class(); 538 $this->modes[] = [ 539 'sort' => $obj->getSort(), 540 'mode' => $mode, 541 'obj' => $obj, 542 ]; 543 } 544 } 545 546 //endregion 547 548 /** 549 * Callback function for usort 550 * 551 * @param array $a 552 * @param array $b 553 * @return int 554 */ 555 public static function sortModes(array $a, array $b): int 556 { 557 return $a['sort'] <=> $b['sort']; 558 } 559} 560