13dabe4e0SAndreas Gohr<?php 23dabe4e0SAndreas Gohr 33dabe4e0SAndreas Gohrnamespace dokuwiki\Parsing\ParserMode; 43dabe4e0SAndreas Gohr 53dabe4e0SAndreas Gohruse dokuwiki\Parsing\Handler; 63dabe4e0SAndreas Gohruse dokuwiki\Parsing\Handler\GfmTable as GfmTableRewriter; 73dabe4e0SAndreas Gohruse dokuwiki\Parsing\ModeRegistry; 83dabe4e0SAndreas Gohr 93dabe4e0SAndreas Gohr/** 103dabe4e0SAndreas Gohr * GFM table block. 113dabe4e0SAndreas Gohr * 123dabe4e0SAndreas Gohr * Architecturally mirrors DokuWiki's native Table mode: an entry/exit 133dabe4e0SAndreas Gohr * lexer state with inline modes nested via `allowedModes`, plus a small 143dabe4e0SAndreas Gohr * post-processing rewriter (Handler\GfmTable) that turns the flat token 153dabe4e0SAndreas Gohr * stream into the canonical DokuWiki table call sequence. 163dabe4e0SAndreas Gohr * 173dabe4e0SAndreas Gohr * Cells are inline-only per spec ("Block-level elements cannot be inserted 183dabe4e0SAndreas Gohr * in a table"). Allowed nested categories therefore mirror DW Table: 19d331a839SAndreas Gohr * FORMATTING, SUBSTITUTION, PROTECTED, DISABLED. 203dabe4e0SAndreas Gohr * 213dabe4e0SAndreas Gohr * Entry-pattern strategy: a single zero-width lookahead asserts the table 223dabe4e0SAndreas Gohr * shape (header line containing a pipe, followed by a delimiter row whose 233dabe4e0SAndreas Gohr * cells are exactly `:?-+:?`). Only the leading newline is consumed; the 243dabe4e0SAndreas Gohr * lookahead validates the rest. Non-tables — paragraphs that happen to 253dabe4e0SAndreas Gohr * contain pipes — never enter the mode. 263dabe4e0SAndreas Gohr * 273dabe4e0SAndreas Gohr * The internal patterns recognise: 283dabe4e0SAndreas Gohr * - `\|` as a cell separator, with a `(?<!\\)` lookbehind so a backslash- 293dabe4e0SAndreas Gohr * prefixed pipe is left as raw input — the cell-splitting concern. The 3074031e46SAndreas Gohr * unescape (turning `\|` into a literal `|`) is handled downstream: 3174031e46SAndreas Gohr * GfmEscape consumes `\|` in normal cell text, and Handler\GfmTable's 3274031e46SAndreas Gohr * unescapePipes() applies the tables-extension rewrite inside code 3374031e46SAndreas Gohr * spans, where standard §6.1 escapes don't fire. 343dabe4e0SAndreas Gohr * - `\n` followed by a non-newline, non-`>` character as a row separator; 353dabe4e0SAndreas Gohr * - any other `\n` exits the mode (blank line, blockquote start, EOF). 363dabe4e0SAndreas Gohr * 373dabe4e0SAndreas Gohr * Sort 55 — one below DW Table's 60 — so that in `dw+md` and `md+dw` (where 383dabe4e0SAndreas Gohr * both modes load) the GFM lookahead-validated entry tries first; if it 393dabe4e0SAndreas Gohr * does not see a valid delimiter row, DW Table at sort 60 takes over for 403dabe4e0SAndreas Gohr * `\n|` rows. 413dabe4e0SAndreas Gohr */ 423dabe4e0SAndreas Gohrclass GfmTable extends AbstractMode 433dabe4e0SAndreas Gohr{ 443dabe4e0SAndreas Gohr /** 453dabe4e0SAndreas Gohr * GFM table cells parse only inline content. 46*47a02a10SAndreas Gohr * 47*47a02a10SAndreas Gohr * @inheritdoc 483dabe4e0SAndreas Gohr */ 49*47a02a10SAndreas Gohr protected function allowedCategories(): array 503dabe4e0SAndreas Gohr { 51*47a02a10SAndreas Gohr return [ 523dabe4e0SAndreas Gohr ModeRegistry::CATEGORY_FORMATTING, 53d331a839SAndreas Gohr ModeRegistry::CATEGORY_SUBSTITUTION, 543dabe4e0SAndreas Gohr ModeRegistry::CATEGORY_PROTECTED, 553dabe4e0SAndreas Gohr ModeRegistry::CATEGORY_DISABLED, 56*47a02a10SAndreas Gohr ]; 573dabe4e0SAndreas Gohr } 583dabe4e0SAndreas Gohr 593dabe4e0SAndreas Gohr /** @inheritdoc */ 603dabe4e0SAndreas Gohr public function getSort() 613dabe4e0SAndreas Gohr { 623dabe4e0SAndreas Gohr return 55; 633dabe4e0SAndreas Gohr } 643dabe4e0SAndreas Gohr 653dabe4e0SAndreas Gohr /** @inheritdoc */ 663dabe4e0SAndreas Gohr public function preConnect() 673dabe4e0SAndreas Gohr { 68*47a02a10SAndreas Gohr $this->registry->registerBlockEolMode('gfm_table'); 693dabe4e0SAndreas Gohr } 703dabe4e0SAndreas Gohr 713dabe4e0SAndreas Gohr /** 723dabe4e0SAndreas Gohr * Entry pattern with lookahead-validated delimiter row. 733dabe4e0SAndreas Gohr * 743dabe4e0SAndreas Gohr * Consumes only `\n`; the zero-width lookahead asserts: 753dabe4e0SAndreas Gohr * - a header line containing at least one `|`, and 763dabe4e0SAndreas Gohr * - a delimiter row of `:?-+:?` cells separated by `|`. 773dabe4e0SAndreas Gohr * 783dabe4e0SAndreas Gohr * Without that validation, any paragraph containing a pipe would 793dabe4e0SAndreas Gohr * trigger the table mode. With it, non-tables flow through as plain 803dabe4e0SAndreas Gohr * paragraphs. 813dabe4e0SAndreas Gohr * 823dabe4e0SAndreas Gohr * @inheritdoc 833dabe4e0SAndreas Gohr */ 843dabe4e0SAndreas Gohr public function connectTo($mode) 853dabe4e0SAndreas Gohr { 863dabe4e0SAndreas Gohr $delim = 873dabe4e0SAndreas Gohr '[ \t]*\|?[ \t]*:?-+:?' . 883dabe4e0SAndreas Gohr '(?:[ \t]*\|[ \t]*:?-+:?)*' . 893dabe4e0SAndreas Gohr '[ \t]*\|?[ \t]*'; 903dabe4e0SAndreas Gohr $entry = 913dabe4e0SAndreas Gohr '\n(?=' . 923dabe4e0SAndreas Gohr '[^\n]*\|[^\n]*' . // header line containing a pipe 933dabe4e0SAndreas Gohr '\n' . $delim . 943dabe4e0SAndreas Gohr '(?:\n|$)' . 953dabe4e0SAndreas Gohr ')'; 963dabe4e0SAndreas Gohr $this->Lexer->addEntryPattern($entry, $mode, 'gfm_table'); 973dabe4e0SAndreas Gohr } 983dabe4e0SAndreas Gohr 993dabe4e0SAndreas Gohr /** @inheritdoc */ 1003dabe4e0SAndreas Gohr public function postConnect() 1013dabe4e0SAndreas Gohr { 1023dabe4e0SAndreas Gohr // Cell separator. The `(?<!\\)` lookbehind keeps `\|` from being 1033dabe4e0SAndreas Gohr // treated as a separator so backslash-escaped pipes don't split 1043dabe4e0SAndreas Gohr // cells. The unescape — turning `\|` into a literal `|` in cell 10574031e46SAndreas Gohr // content — is handled downstream: GfmEscape consumes `\|` in 10674031e46SAndreas Gohr // normal text, and Handler\GfmTable::unescapePipes() applies the 10774031e46SAndreas Gohr // tables-extension rewrite inside code spans. We just need the 10874031e46SAndreas Gohr // cells to come out the right shape. Edge: `\\|` (escaped 10974031e46SAndreas Gohr // backslash, then a real separator pipe) is technically wrong 11074031e46SAndreas Gohr // here — the lookbehind sees the second `\` and refuses to split 11174031e46SAndreas Gohr // — but GfmEscape consumes `\\` first, leaving a clean `|` at 11274031e46SAndreas Gohr // separator position. 1133dabe4e0SAndreas Gohr $this->Lexer->addPattern('(?<!\\\\)\|', 'gfm_table'); 1143dabe4e0SAndreas Gohr // Row separator: a newline followed by a non-newline, non-`>` char. 1153dabe4e0SAndreas Gohr // Excluding `>` lets a blockquote terminate the table (spec 201); 1163dabe4e0SAndreas Gohr // requiring a non-newline excludes blank lines and end-of-input. 1173dabe4e0SAndreas Gohr $this->Lexer->addPattern('\n(?=[^\n>])', 'gfm_table'); 1183dabe4e0SAndreas Gohr // Any other newline (blank line, blockquote start, EOF) exits. 1193dabe4e0SAndreas Gohr $this->Lexer->addExitPattern('\n', 'gfm_table'); 1203dabe4e0SAndreas Gohr } 1213dabe4e0SAndreas Gohr 1223dabe4e0SAndreas Gohr /** @inheritdoc */ 1233dabe4e0SAndreas Gohr public function handle($match, $state, $pos, Handler $handler) 1243dabe4e0SAndreas Gohr { 1253dabe4e0SAndreas Gohr switch ($state) { 1263dabe4e0SAndreas Gohr case DOKU_LEXER_ENTER: 1273dabe4e0SAndreas Gohr $handler->setCallWriter(new GfmTableRewriter($handler->getCallWriter())); 1283dabe4e0SAndreas Gohr // table_start carries the body position (skip the consumed `\n`). 1293dabe4e0SAndreas Gohr $handler->addCall('gfm_table_start', [$pos + 1], $pos); 1303dabe4e0SAndreas Gohr $handler->addCall('gfm_table_row', [], $pos); 1313dabe4e0SAndreas Gohr $handler->addCall('gfm_table_cell', [], $pos); 1323dabe4e0SAndreas Gohr break; 1333dabe4e0SAndreas Gohr 1343dabe4e0SAndreas Gohr case DOKU_LEXER_MATCHED: 1353dabe4e0SAndreas Gohr if (str_contains($match, "\n")) { 1363dabe4e0SAndreas Gohr // Row separator: also opens the first cell of the new row. 1373dabe4e0SAndreas Gohr $handler->addCall('gfm_table_row', [], $pos); 1383dabe4e0SAndreas Gohr $handler->addCall('gfm_table_cell', [], $pos); 1393dabe4e0SAndreas Gohr } else { 1403dabe4e0SAndreas Gohr // Bare `|` — cell separator within the current row. 1413dabe4e0SAndreas Gohr $handler->addCall('gfm_table_cell', [], $pos); 1423dabe4e0SAndreas Gohr } 1433dabe4e0SAndreas Gohr break; 1443dabe4e0SAndreas Gohr 1453dabe4e0SAndreas Gohr case DOKU_LEXER_UNMATCHED: 1463dabe4e0SAndreas Gohr $handler->addCall('cdata', [$match], $pos); 1473dabe4e0SAndreas Gohr break; 1483dabe4e0SAndreas Gohr 1493dabe4e0SAndreas Gohr case DOKU_LEXER_EXIT: 1503dabe4e0SAndreas Gohr $handler->addCall('gfm_table_end', [], $pos); 1513dabe4e0SAndreas Gohr /** @var GfmTableRewriter $reWriter */ 1523dabe4e0SAndreas Gohr $reWriter = $handler->getCallWriter(); 1533dabe4e0SAndreas Gohr $handler->setCallWriter($reWriter->process()); 1543dabe4e0SAndreas Gohr break; 1553dabe4e0SAndreas Gohr } 1563dabe4e0SAndreas Gohr return true; 1573dabe4e0SAndreas Gohr } 1583dabe4e0SAndreas Gohr} 159