1<?php 2 3namespace dokuwiki\Parsing; 4 5use dokuwiki\Extension\PluginInterface; 6use dokuwiki\Extension\SyntaxPlugin; 7use dokuwiki\Parsing\ParserMode\Acronym; 8use dokuwiki\Parsing\ParserMode\ModeInterface; 9use dokuwiki\Parsing\ParserMode\Camelcaselink; 10use dokuwiki\Parsing\ParserMode\Entity; 11use dokuwiki\Parsing\ParserMode\Smiley; 12use dokuwiki\Parsing\Handler; 13use dokuwiki\Parsing\Parser; 14 15/** 16 * Central registry for parser mode categories and mode instantiation. 17 * 18 * The underlying data is kept in the global $PARSER_MODES array because 19 * third-party plugins read and write it directly at runtime (e.g. to register 20 * their mode in a category). All methods in this class operate on that global 21 * so changes are visible to both old and new code. 22 */ 23class ModeRegistry 24{ 25 // Category constants (preserving the historical 'substition' typo) 26 public const CATEGORY_CONTAINER = 'container'; 27 public const CATEGORY_BASEONLY = 'baseonly'; 28 public const CATEGORY_FORMATTING = 'formatting'; 29 public const CATEGORY_SUBSTITION = 'substition'; 30 public const CATEGORY_PROTECTED = 'protected'; 31 public const CATEGORY_DISABLED = 'disabled'; 32 public const CATEGORY_PARAGRAPHS = 'paragraphs'; 33 34 /** @var array{sort: int, mode: string, obj: ModeInterface}[]|null */ 35 protected ?array $modes = null; 36 37 /** @var array<string, array{parsers: Parser[], inUse: int}> Pool of sub-parsers per exclusion-set identifier. */ 38 protected array $subParsers = []; 39 40 /** @var string[] Modes that handle their own line endings (skip EOL connection) */ 41 protected array $blockEolModes = []; 42 43 protected static ?self $instance = null; 44 45 /** 46 * Get the singleton instance of the ModeRegistry. 47 * 48 * @return self 49 */ 50 public static function getInstance(): self 51 { 52 if (!self::$instance instanceof self) { 53 self::$instance = new self(); 54 } 55 return self::$instance; 56 } 57 58 /** 59 * Reset the singleton instance. 60 * 61 * This is mainly useful for testing to force re-initialization. 62 * 63 * @return void 64 */ 65 public static function reset(): void 66 { 67 self::$instance = null; 68 } 69 70 /** 71 * Constructor. Initializes the global $PARSER_MODES array with the default mode categories. 72 */ 73 protected function __construct() 74 { 75 global $PARSER_MODES; 76 $PARSER_MODES = [ 77 self::CATEGORY_CONTAINER => ['listblock', 'table', 'gfm_listblock', 'gfm_table', 'gfm_quote', 'gfm_hr'], 78 self::CATEGORY_BASEONLY => ['header', 'gfm_header'], 79 self::CATEGORY_FORMATTING => [ 80 'strong', 'emphasis', 'underline', 'monospace', 81 'subscript', 'superscript', 'deleted', 'footnote', 82 'gfm_emphasis', 'gfm_emphasis_underscore', 'gfm_strong_underscore', 83 'gfm_emphasis_strong', 'gfm_emphasis_strong_underscore', 84 'gfm_deleted', 'gfm_backtick_single', 'gfm_backtick_double', 85 ], 86 self::CATEGORY_SUBSTITION => [ 87 'acronym', 'smiley', 'wordblock', 'entity', 88 'camelcaselink', 'internallink', 'media', 'externallink', 89 'linebreak', 'emaillink', 'windowssharelink', 'filelink', 90 'notoc', 'nocache', 'multiplyentity', 'quotes', 'rss', 91 'gfm_link', 'gfm_media', 'gfm_escape', 'gfm_linebreak', 92 'gfm_html_entity', 93 ], 94 self::CATEGORY_PROTECTED => ['preformatted', 'code', 'file', 'gfm_code', 'gfm_file'], 95 self::CATEGORY_DISABLED => ['unformatted'], 96 self::CATEGORY_PARAGRAPHS => ['eol'], 97 ]; 98 } 99 100 /** 101 * Get all mode names in the given categories. 102 * 103 * @param string[] $categories One or more CATEGORY_* constants 104 * @return string[] Unique list of mode names 105 */ 106 public function getModesForCategories(array $categories): array 107 { 108 global $PARSER_MODES; 109 $modes = []; 110 foreach ($categories as $cat) { 111 if (isset($PARSER_MODES[$cat])) { 112 $modes = array_merge($modes, $PARSER_MODES[$cat]); 113 } 114 } 115 return array_unique($modes); 116 } 117 118 /** 119 * Get the raw categories array. 120 * 121 * @return array<string, string[]> Category name => list of mode names 122 */ 123 public function getCategories(): array 124 { 125 global $PARSER_MODES; 126 return $PARSER_MODES; 127 } 128 129 /** 130 * Register a mode in a category. 131 * 132 * @param string $category One of the CATEGORY_* constants 133 * @param string $modeName The mode name to register 134 * @return void 135 */ 136 public function registerMode(string $category, string $modeName): void 137 { 138 global $PARSER_MODES; 139 $PARSER_MODES[$category][] = $modeName; 140 $this->modes = null; // invalidate cached mode list 141 } 142 143 /** 144 * Register a mode that handles its own line endings. 145 * Modes registered here will be skipped by Eol's connectTo(). 146 * 147 * @param string $mode The mode name 148 * @return void 149 */ 150 public function registerBlockEolMode(string $mode): void 151 { 152 $this->blockEolModes[] = $mode; 153 } 154 155 /** 156 * Get all modes that handle their own line endings. 157 * 158 * @return string[] 159 */ 160 public function getBlockEolModes(): array 161 { 162 return $this->blockEolModes; 163 } 164 165 /** 166 * Whether DokuWiki is the preferred syntax (`dw` or `dw+md`). 167 * 168 * Modes that have to choose between DW-flavored and MD-flavored 169 * behavior at runtime read this flag. Compare with isMdPreferred() 170 * — exactly one of the two is true for any valid `$conf['syntax']` 171 * setting. 172 */ 173 public function isDwPreferred(): bool 174 { 175 global $conf; 176 return in_array($conf['syntax'], ['dw', 'dw+md'], true); 177 } 178 179 /** 180 * Whether Markdown is the preferred syntax (`md` or `md+dw`). 181 */ 182 public function isMdPreferred(): bool 183 { 184 global $conf; 185 return in_array($conf['syntax'], ['md', 'md+dw'], true); 186 } 187 188 /** 189 * Get all parser modes, fully instantiated and sorted by priority. 190 * 191 * This includes syntax plugins, built-in modes, formatting modes, and 192 * data-driven modes (smileys, acronyms, entities). Results are cached 193 * unless running in a test environment. 194 * 195 * @return array[] Each entry is ['sort' => int, 'mode' => string, 'obj' => ModeInterface] 196 */ 197 public function getModes(): array 198 { 199 global $conf; 200 201 if ($this->modes !== null && !defined('DOKU_UNITTEST')) { 202 return $this->modes; 203 } 204 205 $this->modes = []; 206 $loadDw = in_array($conf['syntax'], ['dw', 'dw+md', 'md+dw']); 207 $loadMd = in_array($conf['syntax'], ['md', 'dw+md', 'md+dw']); 208 209 $this->loadPluginModes(); 210 $this->loadAlwaysModes(); 211 if ($loadDw) $this->loadDokuWikiModes(); 212 if ($loadMd) $this->loadMarkdownModes(); 213 $this->loadDataModes(); 214 215 usort($this->modes, self::sortModes(...)); 216 return $this->modes; 217 } 218 219 //region Sub-parser pool 220 221 /** 222 * Acquire a sub-parser for the given exclusion set. 223 * 224 * The registry maintains a pool of sub-parsers per exclusion key. 225 * Each acquire returns the next free instance from that pool; 226 * releaseSubParser must be called (with the same exclusion set) 227 * once the caller is done. If all instances in a pool are already 228 * checked out — re-entrancy on the same key — a fresh instance is 229 * built and appended to the pool. Real-world nesting for any one 230 * mode tops out at a handful of levels, so pool growth is bounded. 231 * 232 * Use this primitive when the caller wants to hold the parser 233 * across multiple parse() calls (e.g. iterating over list items). 234 * For single-shot use, prefer {@see withSubParser} so release is 235 * automatic. 236 * 237 * The returned Parser is shared infrastructure: callers must call 238 * `$parser->getHandler()->reset()` before each parse() to avoid 239 * inheriting state from a previous use. 240 * 241 * @param string[] $excludeCategories CATEGORY_* constants whose modes should be excluded 242 * @param string[] $excludeModes specific mode names to exclude in addition to category-based exclusions 243 */ 244 public function acquireSubParser( 245 array $excludeCategories = [self::CATEGORY_BASEONLY], 246 array $excludeModes = [] 247 ): Parser { 248 $key = $this->subParserKey($excludeCategories, $excludeModes); 249 $entry = $this->subParsers[$key] ?? ['parsers' => [], 'inUse' => 0]; 250 251 if ($entry['inUse'] >= count($entry['parsers'])) { 252 $entry['parsers'][] = $this->buildSubParser($excludeCategories, $excludeModes); 253 } 254 $parser = $entry['parsers'][$entry['inUse']]; 255 $entry['inUse']++; 256 $this->subParsers[$key] = $entry; 257 return $parser; 258 } 259 260 /** 261 * Release a previously-acquired sub-parser back to its pool. 262 * 263 * Should be paired with a prior {@see acquireSubParser} call for 264 * the same exclusion set. Callers must release in LIFO order with 265 * respect to other acquires on the same key — the implementation 266 * does not enforce LIFO, but out-of-order release would silently 267 * hand the same parser to two callers, so the caller is responsible 268 * for the discipline. Wrapping each acquire/release pair in a 269 * single try/finally (or using {@see withSubParser}) makes the 270 * ordering correct by construction. 271 * 272 * Throws if no acquire is outstanding for the given key — that 273 * indicates an acquire/release imbalance bug in the caller. 274 * 275 * @param string[] $excludeCategories 276 * @param string[] $excludeModes 277 * @throws \RuntimeException on release without a matching acquire 278 */ 279 public function releaseSubParser( 280 array $excludeCategories = [self::CATEGORY_BASEONLY], 281 array $excludeModes = [] 282 ): void { 283 $key = $this->subParserKey($excludeCategories, $excludeModes); 284 if (!isset($this->subParsers[$key]) || $this->subParsers[$key]['inUse'] <= 0) { 285 throw new \RuntimeException( 286 "releaseSubParser called without matching acquireSubParser for key '$key'" 287 ); 288 } 289 $this->subParsers[$key]['inUse']--; 290 } 291 292 /** 293 * Run a callback with an exclusively-held sub-parser. 294 * 295 * Convenience wrapper around acquire/release. The parser is checked 296 * out for the duration of the callback, then released even if the 297 * callback throws. Preferred shape for single-shot sub-parses 298 * (one parse() call per acquire); use the explicit pair for cases 299 * where the parser is held across a loop or other longer scope. 300 * 301 * @template T 302 * @param string[] $excludeCategories 303 * @param string[] $excludeModes 304 * @param callable(Parser): T $fn 305 * @return T 306 */ 307 public function withSubParser( 308 array $excludeCategories, 309 array $excludeModes, 310 callable $fn 311 ) { 312 $parser = $this->acquireSubParser($excludeCategories, $excludeModes); 313 try { 314 return $fn($parser); 315 } finally { 316 $this->releaseSubParser($excludeCategories, $excludeModes); 317 } 318 } 319 320 /** 321 * Build a fresh Parser preconfigured with every active mode except 322 * the ones excluded. 323 * 324 * Mode objects are cloned before being attached so that 325 * Parser::addMode()'s assignment to $Lexer does not clobber the 326 * main parser's mode references. 327 * 328 * @param string[] $excludeCategories 329 * @param string[] $excludeModes 330 */ 331 protected function buildSubParser( 332 array $excludeCategories, 333 array $excludeModes 334 ): Parser { 335 $categories = $this->getCategories(); 336 $excluded = $excludeModes; 337 foreach ($excludeCategories as $cat) { 338 $excluded = array_merge($excluded, $categories[$cat] ?? []); 339 } 340 341 $parser = new Parser(new Handler()); 342 foreach ($this->getModes() as $m) { 343 if (in_array($m['mode'], $excluded, true)) continue; 344 // Mode objects expose a single $Lexer slot which Parser::addMode() 345 // overwrites at registration time. The objects in $this->modes are 346 // already attached to the main parser's lexer; reusing them here 347 // would clobber that reference and break the main parse. Clone so 348 // the sub-parser gets its own copy with its own $Lexer slot. 349 $parser->addMode($m['mode'], clone $m['obj']); 350 } 351 return $parser; 352 } 353 354 /** 355 * Build the cache key used to identify a sub-parser exclusion set. 356 */ 357 protected function subParserKey(array $excludeCategories, array $excludeModes): string 358 { 359 return implode(',', $excludeCategories) . '|' . implode(',', $excludeModes); 360 } 361 362 //endregion 363 364 //region Mode loading 365 366 /** 367 * Load syntax plugin modes and register them in their categories. 368 */ 369 protected function loadPluginModes(): void 370 { 371 global $PARSER_MODES; 372 373 $plugins = plugin_list('syntax'); 374 foreach ($plugins as $p) { 375 $obj = plugin_load('syntax', $p); 376 if (!$obj instanceof PluginInterface) continue; 377 $PARSER_MODES[$obj->getType()][] = "plugin_$p"; 378 $this->modes[] = [ 379 'sort' => $obj->getSort(), 380 'mode' => "plugin_$p", 381 'obj' => $obj, 382 ]; 383 unset($obj); 384 } 385 } 386 387 /** 388 * Load modes that have no equivalent in the other syntax. 389 * These are always active regardless of the syntax setting. 390 */ 391 protected function loadAlwaysModes(): void 392 { 393 global $conf; 394 395 $modes = [ 396 'strong', 'subscript', 'superscript', 397 'footnote', 'eol', 'preformatted', 398 'gfm_quote', 'gfm_hr', 399 'externallink', 'emaillink', 'windowssharelink', 400 'notoc', 'nocache', 'rss', 401 ]; 402 403 if ($conf['typography']) { 404 $modes[] = 'quotes'; 405 $modes[] = 'multiplyentity'; 406 } 407 408 $this->instantiateModes($modes); 409 } 410 411 /** 412 * Load DokuWiki-specific modes for features that also exist in Markdown. 413 * Skipped when syntax is 'md'. 414 */ 415 protected function loadDokuWikiModes(): void 416 { 417 $modes = [ 418 'emphasis', 'deleted', 'code', 'header', 419 'linebreak', 'internallink', 'media', 'table', 420 'monospace', 'unformatted', 'file', 421 ]; 422 423 // Underline only loads when DokuWiki is preferred. In MD-preferred 424 // modes, `__` means strong (via gfm_strong_underscore) and loading 425 // Underline here would conflict. 426 // 427 // Listblock only loads when DokuWiki is preferred. In MD-preferred 428 // modes, GfmListblock owns the `-`/`*`/`+` markers and zero-indent 429 // top-level items, which conflicts with DokuWiki's required-2-space- 430 // indent list model. 431 if ($this->isDwPreferred()) { 432 $modes[] = 'underline'; 433 $modes[] = 'listblock'; 434 } 435 436 $this->instantiateModes($modes); 437 } 438 439 /** 440 * Load Markdown-specific modes for features that also exist in DokuWiki. 441 * Skipped when syntax is 'dw'. 442 */ 443 protected function loadMarkdownModes(): void 444 { 445 $modes = [ 446 'gfm_escape', 'gfm_linebreak', 'gfm_html_entity', 447 'gfm_emphasis', 'gfm_emphasis_strong', 'gfm_deleted', 448 'gfm_backtick_single', 'gfm_backtick_double', 449 'gfm_header', 'gfm_link', 'gfm_media', 450 'gfm_code', 'gfm_file', 'gfm_table', 451 ]; 452 453 // Underscore-based emphasis and strong only load when Markdown is 454 // preferred. In DW-preferred modes, `__` means underline and loading 455 // these would conflict. 456 // 457 // GfmListblock only loads when Markdown is preferred. In DW-preferred 458 // modes, the DokuWiki Listblock owns the `-`/`*` markers (with the 459 // 2-space indent rule); the two list models cannot co-exist. 460 if ($this->isMdPreferred()) { 461 $modes[] = 'gfm_emphasis_underscore'; 462 $modes[] = 'gfm_strong_underscore'; 463 $modes[] = 'gfm_emphasis_strong_underscore'; 464 $modes[] = 'gfm_listblock'; 465 } 466 467 $this->instantiateModes($modes); 468 } 469 470 /** 471 * Load data-driven modes that require constructor arguments 472 * (smileys, acronyms, entities) and optional config-gated modes. 473 */ 474 protected function loadDataModes(): void 475 { 476 global $conf; 477 478 $obj = new Smiley(array_keys(getSmileys())); 479 $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'smiley', 'obj' => $obj]; 480 481 $obj = new Acronym(array_keys(getAcronyms())); 482 $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'acronym', 'obj' => $obj]; 483 484 $obj = new Entity(array_keys(getEntities())); 485 $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'entity', 'obj' => $obj]; 486 487 if (!empty($conf['camelcase'])) { 488 $obj = new Camelcaselink(); 489 $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'camelcaselink', 'obj' => $obj]; 490 } 491 } 492 493 /** 494 * Instantiate mode classes by name and add them to the mode list. 495 * 496 * Mode names are split on `_` and each segment is PascalCased to form the 497 * class name (e.g. `gfm_emphasis_underscore` → `GfmEmphasisUnderscore`, 498 * `internallink` → `Internallink`, `strong` → `Strong`). 499 * 500 * @param string[] $modeNames 501 */ 502 protected function instantiateModes(array $modeNames): void 503 { 504 foreach ($modeNames as $mode) { 505 $class = implode('', array_map('ucfirst', explode('_', $mode))); // snake_case to PascalCase 506 $class = 'dokuwiki\\Parsing\\ParserMode\\' . $class; // prepend namespace 507 $obj = new $class(); 508 $this->modes[] = [ 509 'sort' => $obj->getSort(), 510 'mode' => $mode, 511 'obj' => $obj, 512 ]; 513 } 514 } 515 516 //endregion 517 518 /** 519 * Callback function for usort 520 * 521 * @param array $a 522 * @param array $b 523 * @return int 524 */ 525 public static function sortModes(array $a, array $b): int 526 { 527 return $a['sort'] <=> $b['sort']; 528 } 529} 530