1<?php 2 3namespace dokuwiki\Parsing; 4 5use dokuwiki\Extension\PluginInterface; 6use dokuwiki\Extension\SyntaxPlugin; 7use dokuwiki\Parsing\ParserMode\Acronym; 8use dokuwiki\Parsing\ParserMode\ModeInterface; 9use dokuwiki\Parsing\ParserMode\Camelcaselink; 10use dokuwiki\Parsing\ParserMode\Entity; 11use dokuwiki\Parsing\ParserMode\Smiley; 12use dokuwiki\Parsing\Handler; 13use dokuwiki\Parsing\Parser; 14 15/** 16 * Central registry for parser mode categories and mode instantiation. 17 * 18 * The underlying data is kept in the global $PARSER_MODES array because 19 * third-party plugins read and write it directly at runtime (e.g. to register 20 * their mode in a category). All methods in this class operate on that global 21 * so changes are visible to both old and new code. 22 */ 23class ModeRegistry 24{ 25 // Category constants (preserving the historical 'substition' typo) 26 public const CATEGORY_CONTAINER = 'container'; 27 public const CATEGORY_BASEONLY = 'baseonly'; 28 public const CATEGORY_FORMATTING = 'formatting'; 29 public const CATEGORY_SUBSTITION = 'substition'; 30 public const CATEGORY_PROTECTED = 'protected'; 31 public const CATEGORY_DISABLED = 'disabled'; 32 public const CATEGORY_PARAGRAPHS = 'paragraphs'; 33 34 /** @var array{sort: int, mode: string, obj: ModeInterface}[]|null */ 35 private ?array $modes = null; 36 37 /** @var array<string, Parser> Cached sub-parsers keyed by exclusion-set identifier */ 38 private array $subParsers = []; 39 40 /** @var string[] Modes that handle their own line endings (skip EOL connection) */ 41 private array $blockEolModes = []; 42 43 private static ?self $instance = null; 44 45 /** 46 * Get the singleton instance of the ModeRegistry. 47 * 48 * @return self 49 */ 50 public static function getInstance(): self 51 { 52 if (!self::$instance instanceof self) { 53 self::$instance = new self(); 54 } 55 return self::$instance; 56 } 57 58 /** 59 * Reset the singleton instance. 60 * 61 * This is mainly useful for testing to force re-initialization. 62 * 63 * @return void 64 */ 65 public static function reset(): void 66 { 67 self::$instance = null; 68 } 69 70 /** 71 * Constructor. Initializes the global $PARSER_MODES array with the default mode categories. 72 */ 73 private function __construct() 74 { 75 global $PARSER_MODES; 76 $PARSER_MODES = [ 77 self::CATEGORY_CONTAINER => ['listblock', 'table', 'quote', 'hr', 'gfm_listblock', 'gfm_table'], 78 self::CATEGORY_BASEONLY => ['header', 'gfm_header'], 79 self::CATEGORY_FORMATTING => [ 80 'strong', 'emphasis', 'underline', 'monospace', 81 'subscript', 'superscript', 'deleted', 'footnote', 82 'gfm_emphasis', 'gfm_emphasis_underscore', 'gfm_strong_underscore', 83 'gfm_emphasis_strong', 'gfm_emphasis_strong_underscore', 84 'gfm_deleted', 'gfm_backtick_single', 'gfm_backtick_double', 85 ], 86 self::CATEGORY_SUBSTITION => [ 87 'acronym', 'smiley', 'wordblock', 'entity', 88 'camelcaselink', 'internallink', 'media', 'externallink', 89 'linebreak', 'emaillink', 'windowssharelink', 'filelink', 90 'notoc', 'nocache', 'multiplyentity', 'quotes', 'rss', 91 'gfm_link', 'gfm_media', 92 ], 93 self::CATEGORY_PROTECTED => ['preformatted', 'code', 'file', 'gfm_code', 'gfm_file'], 94 self::CATEGORY_DISABLED => ['unformatted'], 95 self::CATEGORY_PARAGRAPHS => ['eol'], 96 ]; 97 } 98 99 /** 100 * Get all mode names in the given categories. 101 * 102 * @param string[] $categories One or more CATEGORY_* constants 103 * @return string[] Unique list of mode names 104 */ 105 public function getModesForCategories(array $categories): array 106 { 107 global $PARSER_MODES; 108 $modes = []; 109 foreach ($categories as $cat) { 110 if (isset($PARSER_MODES[$cat])) { 111 $modes = array_merge($modes, $PARSER_MODES[$cat]); 112 } 113 } 114 return array_unique($modes); 115 } 116 117 /** 118 * Get the raw categories array. 119 * 120 * @return array<string, string[]> Category name => list of mode names 121 */ 122 public function getCategories(): array 123 { 124 global $PARSER_MODES; 125 return $PARSER_MODES; 126 } 127 128 /** 129 * Register a mode in a category. 130 * 131 * @param string $category One of the CATEGORY_* constants 132 * @param string $modeName The mode name to register 133 * @return void 134 */ 135 public function registerMode(string $category, string $modeName): void 136 { 137 global $PARSER_MODES; 138 $PARSER_MODES[$category][] = $modeName; 139 $this->modes = null; // invalidate cached mode list 140 } 141 142 /** 143 * Register a mode that handles its own line endings. 144 * Modes registered here will be skipped by Eol's connectTo(). 145 * 146 * @param string $mode The mode name 147 * @return void 148 */ 149 public function registerBlockEolMode(string $mode): void 150 { 151 $this->blockEolModes[] = $mode; 152 } 153 154 /** 155 * Get all modes that handle their own line endings. 156 * 157 * @return string[] 158 */ 159 public function getBlockEolModes(): array 160 { 161 return $this->blockEolModes; 162 } 163 164 /** 165 * Get all parser modes, fully instantiated and sorted by priority. 166 * 167 * This includes syntax plugins, built-in modes, formatting modes, and 168 * data-driven modes (smileys, acronyms, entities). Results are cached 169 * unless running in a test environment. 170 * 171 * @return array[] Each entry is ['sort' => int, 'mode' => string, 'obj' => ModeInterface] 172 */ 173 public function getModes(): array 174 { 175 global $conf; 176 177 if ($this->modes !== null && !defined('DOKU_UNITTEST')) { 178 return $this->modes; 179 } 180 181 $this->modes = []; 182 $syntax = $conf['syntax'] ?? 'dokuwiki'; 183 $loadDw = in_array($syntax, ['dokuwiki', 'dw+md', 'md+dw']); 184 $loadMd = in_array($syntax, ['markdown', 'dw+md', 'md+dw']); 185 186 $this->loadPluginModes(); 187 $this->loadAlwaysModes(); 188 if ($loadDw) $this->loadDokuWikiModes(); 189 if ($loadMd) $this->loadMarkdownModes(); 190 $this->loadDataModes(); 191 192 usort($this->modes, self::sortModes(...)); 193 return $this->modes; 194 } 195 196 /** 197 * Return a cached Parser preconfigured with every active mode except the 198 * ones excluded. 199 * 200 * Built lazily on first call and reused thereafter. Mode objects are cloned 201 * before being attached to the sub-parser so that connectTo()'s assignment 202 * to $Lexer does not clobber the main parser's mode references. 203 * 204 * The returned Parser must not be re-entered: each call should reset the 205 * Handler (via $parser->getHandler()->reset()) before invoking parse(). 206 * Callers that need to sub-parse during their own handle() must ensure 207 * they are not already inside a sub-parse on the same exclusion set — 208 * one common case is excluding the calling mode itself from the sub-parser 209 * to rule out re-entry. 210 * 211 * @param string[] $excludeCategories CATEGORY_* constants whose modes should be excluded 212 * @param string[] $excludeModes specific mode names to exclude in addition to category-based exclusions 213 * @return Parser 214 */ 215 public function getSubParser( 216 array $excludeCategories = [self::CATEGORY_BASEONLY], 217 array $excludeModes = [] 218 ): Parser { 219 $key = implode(',', $excludeCategories) . '|' . implode(',', $excludeModes); 220 if (isset($this->subParsers[$key])) return $this->subParsers[$key]; 221 222 $categories = $this->getCategories(); 223 $excluded = $excludeModes; 224 foreach ($excludeCategories as $cat) { 225 $excluded = array_merge($excluded, $categories[$cat] ?? []); 226 } 227 228 $parser = new Parser(new Handler()); 229 foreach ($this->getModes() as $m) { 230 if (in_array($m['mode'], $excluded, true)) continue; 231 // Mode objects expose a single $Lexer slot which Parser::addMode() 232 // overwrites at registration time. The objects in $this->modes are 233 // already attached to the main parser's lexer; reusing them here 234 // would clobber that reference and break the main parse. Clone so 235 // the sub-parser gets its own copy with its own $Lexer slot. 236 $parser->addMode($m['mode'], clone $m['obj']); 237 } 238 239 return $this->subParsers[$key] = $parser; 240 } 241 242 /** 243 * Load syntax plugin modes and register them in their categories. 244 */ 245 protected function loadPluginModes(): void 246 { 247 global $PARSER_MODES; 248 249 $plugins = plugin_list('syntax'); 250 foreach ($plugins as $p) { 251 $obj = plugin_load('syntax', $p); 252 if (!$obj instanceof PluginInterface) continue; 253 $PARSER_MODES[$obj->getType()][] = "plugin_$p"; 254 $this->modes[] = [ 255 'sort' => $obj->getSort(), 256 'mode' => "plugin_$p", 257 'obj' => $obj, 258 ]; 259 unset($obj); 260 } 261 } 262 263 /** 264 * Load modes that have no equivalent in the other syntax. 265 * These are always active regardless of the syntax setting. 266 */ 267 protected function loadAlwaysModes(): void 268 { 269 global $conf; 270 271 $modes = [ 272 'strong', 'subscript', 'superscript', 273 'footnote', 'eol', 'preformatted', 274 'quote', 'externallink', 'emaillink', 'windowssharelink', 275 'notoc', 'nocache', 'rss', 276 ]; 277 278 if ($conf['typography']) { 279 $modes[] = 'quotes'; 280 $modes[] = 'multiplyentity'; 281 } 282 283 $this->instantiateModes($modes); 284 } 285 286 /** 287 * Load DokuWiki-specific modes for features that also exist in Markdown. 288 * Skipped when syntax is 'markdown'. 289 */ 290 protected function loadDokuWikiModes(): void 291 { 292 global $conf; 293 $syntax = $conf['syntax'] ?? 'dokuwiki'; 294 $dwPreferred = in_array($syntax, ['dokuwiki', 'dw+md'], true); 295 296 $modes = [ 297 'emphasis', 'deleted', 'code', 'header', 'hr', 298 'linebreak', 'internallink', 'media', 'table', 299 'monospace', 'unformatted', 'file', 300 ]; 301 302 // Underline only loads when DokuWiki is preferred. In MD-preferred 303 // modes, `__` means strong (via gfm_strong_underscore) and loading 304 // Underline here would conflict. 305 // 306 // Listblock only loads when DokuWiki is preferred. In MD-preferred 307 // modes, GfmListblock owns the `-`/`*`/`+` markers and zero-indent 308 // top-level items, which conflicts with DokuWiki's required-2-space- 309 // indent list model. 310 if ($dwPreferred) { 311 $modes[] = 'underline'; 312 $modes[] = 'listblock'; 313 } 314 315 $this->instantiateModes($modes); 316 } 317 318 /** 319 * Load Markdown-specific modes for features that also exist in DokuWiki. 320 * Skipped when syntax is 'dokuwiki'. 321 */ 322 protected function loadMarkdownModes(): void 323 { 324 global $conf; 325 $syntax = $conf['syntax'] ?? 'dokuwiki'; 326 $mdPreferred = in_array($syntax, ['markdown', 'md+dw'], true); 327 328 $modes = [ 329 'gfm_emphasis', 'gfm_emphasis_strong', 'gfm_deleted', 330 'gfm_backtick_single', 'gfm_backtick_double', 331 'gfm_header', 'gfm_link', 'gfm_media', 332 'gfm_code', 'gfm_file', 'gfm_table', 333 ]; 334 335 // Underscore-based emphasis and strong only load when Markdown is 336 // preferred. In DW-preferred modes, `__` means underline and loading 337 // these would conflict. 338 // 339 // GfmListblock only loads when Markdown is preferred. In DW-preferred 340 // modes, the DokuWiki Listblock owns the `-`/`*` markers (with the 341 // 2-space indent rule); the two list models cannot co-exist. 342 if ($mdPreferred) { 343 $modes[] = 'gfm_emphasis_underscore'; 344 $modes[] = 'gfm_strong_underscore'; 345 $modes[] = 'gfm_emphasis_strong_underscore'; 346 $modes[] = 'gfm_listblock'; 347 } 348 349 $this->instantiateModes($modes); 350 } 351 352 /** 353 * Load data-driven modes that require constructor arguments 354 * (smileys, acronyms, entities) and optional config-gated modes. 355 */ 356 protected function loadDataModes(): void 357 { 358 global $conf; 359 360 $obj = new Smiley(array_keys(getSmileys())); 361 $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'smiley', 'obj' => $obj]; 362 363 $obj = new Acronym(array_keys(getAcronyms())); 364 $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'acronym', 'obj' => $obj]; 365 366 $obj = new Entity(array_keys(getEntities())); 367 $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'entity', 'obj' => $obj]; 368 369 if (!empty($conf['camelcase'])) { 370 $obj = new Camelcaselink(); 371 $this->modes[] = ['sort' => $obj->getSort(), 'mode' => 'camelcaselink', 'obj' => $obj]; 372 } 373 } 374 375 /** 376 * Instantiate mode classes by name and add them to the mode list. 377 * 378 * Mode names are split on `_` and each segment is PascalCased to form the 379 * class name (e.g. `gfm_emphasis_underscore` → `GfmEmphasisUnderscore`, 380 * `internallink` → `Internallink`, `strong` → `Strong`). 381 * 382 * @param string[] $modeNames 383 */ 384 protected function instantiateModes(array $modeNames): void 385 { 386 foreach ($modeNames as $mode) { 387 $class = implode('', array_map('ucfirst', explode('_', $mode))); // snake_case to PascalCase 388 $class = 'dokuwiki\\Parsing\\ParserMode\\' . $class; // prepend namespace 389 $obj = new $class(); 390 $this->modes[] = [ 391 'sort' => $obj->getSort(), 392 'mode' => $mode, 393 'obj' => $obj, 394 ]; 395 } 396 } 397 398 /** 399 * Callback function for usort 400 * 401 * @param array $a 402 * @param array $b 403 * @return int 404 */ 405 public static function sortModes(array $a, array $b): int 406 { 407 return $a['sort'] <=> $b['sort']; 408 } 409} 410