13dabe4e0SAndreas Gohr<?php 23dabe4e0SAndreas Gohr 33dabe4e0SAndreas Gohrnamespace dokuwiki\Parsing\ParserMode; 43dabe4e0SAndreas Gohr 53dabe4e0SAndreas Gohruse dokuwiki\Parsing\Handler; 63dabe4e0SAndreas Gohruse dokuwiki\Parsing\Handler\GfmTable as GfmTableRewriter; 73dabe4e0SAndreas Gohruse dokuwiki\Parsing\ModeRegistry; 83dabe4e0SAndreas Gohr 93dabe4e0SAndreas Gohr/** 103dabe4e0SAndreas Gohr * GFM table block. 113dabe4e0SAndreas Gohr * 123dabe4e0SAndreas Gohr * Architecturally mirrors DokuWiki's native Table mode: an entry/exit 133dabe4e0SAndreas Gohr * lexer state with inline modes nested via `allowedModes`, plus a small 143dabe4e0SAndreas Gohr * post-processing rewriter (Handler\GfmTable) that turns the flat token 153dabe4e0SAndreas Gohr * stream into the canonical DokuWiki table call sequence. 163dabe4e0SAndreas Gohr * 173dabe4e0SAndreas Gohr * Cells are inline-only per spec ("Block-level elements cannot be inserted 183dabe4e0SAndreas Gohr * in a table"). Allowed nested categories therefore mirror DW Table: 193dabe4e0SAndreas Gohr * FORMATTING, SUBSTITION, PROTECTED, DISABLED. 203dabe4e0SAndreas Gohr * 213dabe4e0SAndreas Gohr * Entry-pattern strategy: a single zero-width lookahead asserts the table 223dabe4e0SAndreas Gohr * shape (header line containing a pipe, followed by a delimiter row whose 233dabe4e0SAndreas Gohr * cells are exactly `:?-+:?`). Only the leading newline is consumed; the 243dabe4e0SAndreas Gohr * lookahead validates the rest. Non-tables — paragraphs that happen to 253dabe4e0SAndreas Gohr * contain pipes — never enter the mode. 263dabe4e0SAndreas Gohr * 273dabe4e0SAndreas Gohr * The internal patterns recognise: 283dabe4e0SAndreas Gohr * - `\|` as a cell separator, with a `(?<!\\)` lookbehind so a backslash- 293dabe4e0SAndreas Gohr * prefixed pipe is left as raw input — the cell-splitting concern. The 30*74031e46SAndreas Gohr * unescape (turning `\|` into a literal `|`) is handled downstream: 31*74031e46SAndreas Gohr * GfmEscape consumes `\|` in normal cell text, and Handler\GfmTable's 32*74031e46SAndreas Gohr * unescapePipes() applies the tables-extension rewrite inside code 33*74031e46SAndreas Gohr * spans, where standard §6.1 escapes don't fire. 343dabe4e0SAndreas Gohr * - `\n` followed by a non-newline, non-`>` character as a row separator; 353dabe4e0SAndreas Gohr * - any other `\n` exits the mode (blank line, blockquote start, EOF). 363dabe4e0SAndreas Gohr * 373dabe4e0SAndreas Gohr * Sort 55 — one below DW Table's 60 — so that in `dw+md` and `md+dw` (where 383dabe4e0SAndreas Gohr * both modes load) the GFM lookahead-validated entry tries first; if it 393dabe4e0SAndreas Gohr * does not see a valid delimiter row, DW Table at sort 60 takes over for 403dabe4e0SAndreas Gohr * `\n|` rows. 413dabe4e0SAndreas Gohr */ 423dabe4e0SAndreas Gohrclass GfmTable extends AbstractMode 433dabe4e0SAndreas Gohr{ 443dabe4e0SAndreas Gohr /** 453dabe4e0SAndreas Gohr * GFM table cells parse only inline content. 463dabe4e0SAndreas Gohr */ 473dabe4e0SAndreas Gohr public function __construct() 483dabe4e0SAndreas Gohr { 493dabe4e0SAndreas Gohr $this->allowedModes = ModeRegistry::getInstance()->getModesForCategories([ 503dabe4e0SAndreas Gohr ModeRegistry::CATEGORY_FORMATTING, 513dabe4e0SAndreas Gohr ModeRegistry::CATEGORY_SUBSTITION, 523dabe4e0SAndreas Gohr ModeRegistry::CATEGORY_PROTECTED, 533dabe4e0SAndreas Gohr ModeRegistry::CATEGORY_DISABLED, 543dabe4e0SAndreas Gohr ]); 553dabe4e0SAndreas Gohr } 563dabe4e0SAndreas Gohr 573dabe4e0SAndreas Gohr /** @inheritdoc */ 583dabe4e0SAndreas Gohr public function getSort() 593dabe4e0SAndreas Gohr { 603dabe4e0SAndreas Gohr return 55; 613dabe4e0SAndreas Gohr } 623dabe4e0SAndreas Gohr 633dabe4e0SAndreas Gohr /** @inheritdoc */ 643dabe4e0SAndreas Gohr public function preConnect() 653dabe4e0SAndreas Gohr { 663dabe4e0SAndreas Gohr ModeRegistry::getInstance()->registerBlockEolMode('gfm_table'); 673dabe4e0SAndreas Gohr } 683dabe4e0SAndreas Gohr 693dabe4e0SAndreas Gohr /** 703dabe4e0SAndreas Gohr * Entry pattern with lookahead-validated delimiter row. 713dabe4e0SAndreas Gohr * 723dabe4e0SAndreas Gohr * Consumes only `\n`; the zero-width lookahead asserts: 733dabe4e0SAndreas Gohr * - a header line containing at least one `|`, and 743dabe4e0SAndreas Gohr * - a delimiter row of `:?-+:?` cells separated by `|`. 753dabe4e0SAndreas Gohr * 763dabe4e0SAndreas Gohr * Without that validation, any paragraph containing a pipe would 773dabe4e0SAndreas Gohr * trigger the table mode. With it, non-tables flow through as plain 783dabe4e0SAndreas Gohr * paragraphs. 793dabe4e0SAndreas Gohr * 803dabe4e0SAndreas Gohr * @inheritdoc 813dabe4e0SAndreas Gohr */ 823dabe4e0SAndreas Gohr public function connectTo($mode) 833dabe4e0SAndreas Gohr { 843dabe4e0SAndreas Gohr $delim = 853dabe4e0SAndreas Gohr '[ \t]*\|?[ \t]*:?-+:?' . 863dabe4e0SAndreas Gohr '(?:[ \t]*\|[ \t]*:?-+:?)*' . 873dabe4e0SAndreas Gohr '[ \t]*\|?[ \t]*'; 883dabe4e0SAndreas Gohr $entry = 893dabe4e0SAndreas Gohr '\n(?=' . 903dabe4e0SAndreas Gohr '[^\n]*\|[^\n]*' . // header line containing a pipe 913dabe4e0SAndreas Gohr '\n' . $delim . 923dabe4e0SAndreas Gohr '(?:\n|$)' . 933dabe4e0SAndreas Gohr ')'; 943dabe4e0SAndreas Gohr $this->Lexer->addEntryPattern($entry, $mode, 'gfm_table'); 953dabe4e0SAndreas Gohr } 963dabe4e0SAndreas Gohr 973dabe4e0SAndreas Gohr /** @inheritdoc */ 983dabe4e0SAndreas Gohr public function postConnect() 993dabe4e0SAndreas Gohr { 1003dabe4e0SAndreas Gohr // Cell separator. The `(?<!\\)` lookbehind keeps `\|` from being 1013dabe4e0SAndreas Gohr // treated as a separator so backslash-escaped pipes don't split 1023dabe4e0SAndreas Gohr // cells. The unescape — turning `\|` into a literal `|` in cell 103*74031e46SAndreas Gohr // content — is handled downstream: GfmEscape consumes `\|` in 104*74031e46SAndreas Gohr // normal text, and Handler\GfmTable::unescapePipes() applies the 105*74031e46SAndreas Gohr // tables-extension rewrite inside code spans. We just need the 106*74031e46SAndreas Gohr // cells to come out the right shape. Edge: `\\|` (escaped 107*74031e46SAndreas Gohr // backslash, then a real separator pipe) is technically wrong 108*74031e46SAndreas Gohr // here — the lookbehind sees the second `\` and refuses to split 109*74031e46SAndreas Gohr // — but GfmEscape consumes `\\` first, leaving a clean `|` at 110*74031e46SAndreas Gohr // separator position. 1113dabe4e0SAndreas Gohr $this->Lexer->addPattern('(?<!\\\\)\|', 'gfm_table'); 1123dabe4e0SAndreas Gohr // Row separator: a newline followed by a non-newline, non-`>` char. 1133dabe4e0SAndreas Gohr // Excluding `>` lets a blockquote terminate the table (spec 201); 1143dabe4e0SAndreas Gohr // requiring a non-newline excludes blank lines and end-of-input. 1153dabe4e0SAndreas Gohr $this->Lexer->addPattern('\n(?=[^\n>])', 'gfm_table'); 1163dabe4e0SAndreas Gohr // Any other newline (blank line, blockquote start, EOF) exits. 1173dabe4e0SAndreas Gohr $this->Lexer->addExitPattern('\n', 'gfm_table'); 1183dabe4e0SAndreas Gohr } 1193dabe4e0SAndreas Gohr 1203dabe4e0SAndreas Gohr /** @inheritdoc */ 1213dabe4e0SAndreas Gohr public function handle($match, $state, $pos, Handler $handler) 1223dabe4e0SAndreas Gohr { 1233dabe4e0SAndreas Gohr switch ($state) { 1243dabe4e0SAndreas Gohr case DOKU_LEXER_ENTER: 1253dabe4e0SAndreas Gohr $handler->setCallWriter(new GfmTableRewriter($handler->getCallWriter())); 1263dabe4e0SAndreas Gohr // table_start carries the body position (skip the consumed `\n`). 1273dabe4e0SAndreas Gohr $handler->addCall('gfm_table_start', [$pos + 1], $pos); 1283dabe4e0SAndreas Gohr $handler->addCall('gfm_table_row', [], $pos); 1293dabe4e0SAndreas Gohr $handler->addCall('gfm_table_cell', [], $pos); 1303dabe4e0SAndreas Gohr break; 1313dabe4e0SAndreas Gohr 1323dabe4e0SAndreas Gohr case DOKU_LEXER_MATCHED: 1333dabe4e0SAndreas Gohr if (str_contains($match, "\n")) { 1343dabe4e0SAndreas Gohr // Row separator: also opens the first cell of the new row. 1353dabe4e0SAndreas Gohr $handler->addCall('gfm_table_row', [], $pos); 1363dabe4e0SAndreas Gohr $handler->addCall('gfm_table_cell', [], $pos); 1373dabe4e0SAndreas Gohr } else { 1383dabe4e0SAndreas Gohr // Bare `|` — cell separator within the current row. 1393dabe4e0SAndreas Gohr $handler->addCall('gfm_table_cell', [], $pos); 1403dabe4e0SAndreas Gohr } 1413dabe4e0SAndreas Gohr break; 1423dabe4e0SAndreas Gohr 1433dabe4e0SAndreas Gohr case DOKU_LEXER_UNMATCHED: 1443dabe4e0SAndreas Gohr $handler->addCall('cdata', [$match], $pos); 1453dabe4e0SAndreas Gohr break; 1463dabe4e0SAndreas Gohr 1473dabe4e0SAndreas Gohr case DOKU_LEXER_EXIT: 1483dabe4e0SAndreas Gohr $handler->addCall('gfm_table_end', [], $pos); 1493dabe4e0SAndreas Gohr /** @var GfmTableRewriter $reWriter */ 1503dabe4e0SAndreas Gohr $reWriter = $handler->getCallWriter(); 1513dabe4e0SAndreas Gohr $handler->setCallWriter($reWriter->process()); 1523dabe4e0SAndreas Gohr break; 1533dabe4e0SAndreas Gohr } 1543dabe4e0SAndreas Gohr return true; 1553dabe4e0SAndreas Gohr } 1563dabe4e0SAndreas Gohr} 157