1<?php 2 3namespace dokuwiki\Parsing\ParserMode; 4 5use dokuwiki\Parsing\Handler; 6use dokuwiki\Parsing\Handler\GfmTable as GfmTableRewriter; 7use dokuwiki\Parsing\ModeRegistry; 8 9/** 10 * GFM table block. 11 * 12 * Architecturally mirrors DokuWiki's native Table mode: an entry/exit 13 * lexer state with inline modes nested via `allowedModes`, plus a small 14 * post-processing rewriter (Handler\GfmTable) that turns the flat token 15 * stream into the canonical DokuWiki table call sequence. 16 * 17 * Cells are inline-only per spec ("Block-level elements cannot be inserted 18 * in a table"). Allowed nested categories therefore mirror DW Table: 19 * FORMATTING, SUBSTITION, PROTECTED, DISABLED. 20 * 21 * Entry-pattern strategy: a single zero-width lookahead asserts the table 22 * shape (header line containing a pipe, followed by a delimiter row whose 23 * cells are exactly `:?-+:?`). Only the leading newline is consumed; the 24 * lookahead validates the rest. Non-tables — paragraphs that happen to 25 * contain pipes — never enter the mode. 26 * 27 * The internal patterns recognise: 28 * - `\|` as a cell separator, with a `(?<!\\)` lookbehind so a backslash- 29 * prefixed pipe is left as raw input — the cell-splitting concern. The 30 * unescape itself (turning `\|` into a literal `|`) is GfmEscape's 31 * concern, not this mode's; until that mode lands, `\|` survives in 32 * cell content as the literal two-char sequence. 33 * - `\n` followed by a non-newline, non-`>` character as a row separator; 34 * - any other `\n` exits the mode (blank line, blockquote start, EOF). 35 * 36 * Sort 55 — one below DW Table's 60 — so that in `dw+md` and `md+dw` (where 37 * both modes load) the GFM lookahead-validated entry tries first; if it 38 * does not see a valid delimiter row, DW Table at sort 60 takes over for 39 * `\n|` rows. 40 */ 41class GfmTable extends AbstractMode 42{ 43 /** 44 * GFM table cells parse only inline content. 45 */ 46 public function __construct() 47 { 48 $this->allowedModes = ModeRegistry::getInstance()->getModesForCategories([ 49 ModeRegistry::CATEGORY_FORMATTING, 50 ModeRegistry::CATEGORY_SUBSTITION, 51 ModeRegistry::CATEGORY_PROTECTED, 52 ModeRegistry::CATEGORY_DISABLED, 53 ]); 54 } 55 56 /** @inheritdoc */ 57 public function getSort() 58 { 59 return 55; 60 } 61 62 /** @inheritdoc */ 63 public function preConnect() 64 { 65 ModeRegistry::getInstance()->registerBlockEolMode('gfm_table'); 66 } 67 68 /** 69 * Entry pattern with lookahead-validated delimiter row. 70 * 71 * Consumes only `\n`; the zero-width lookahead asserts: 72 * - a header line containing at least one `|`, and 73 * - a delimiter row of `:?-+:?` cells separated by `|`. 74 * 75 * Without that validation, any paragraph containing a pipe would 76 * trigger the table mode. With it, non-tables flow through as plain 77 * paragraphs. 78 * 79 * @inheritdoc 80 */ 81 public function connectTo($mode) 82 { 83 $delim = 84 '[ \t]*\|?[ \t]*:?-+:?' . 85 '(?:[ \t]*\|[ \t]*:?-+:?)*' . 86 '[ \t]*\|?[ \t]*'; 87 $entry = 88 '\n(?=' . 89 '[^\n]*\|[^\n]*' . // header line containing a pipe 90 '\n' . $delim . 91 '(?:\n|$)' . 92 ')'; 93 $this->Lexer->addEntryPattern($entry, $mode, 'gfm_table'); 94 } 95 96 /** @inheritdoc */ 97 public function postConnect() 98 { 99 // Cell separator. The `(?<!\\)` lookbehind keeps `\|` from being 100 // treated as a separator so backslash-escaped pipes don't split 101 // cells. The unescape — turning `\|` into a literal `|` in cell 102 // content — is GfmEscape's responsibility; we just need the cells 103 // to come out the right shape. Edge: `\\|` (escaped backslash, 104 // then a real separator pipe) is technically wrong here — the 105 // lookbehind sees the second `\` and refuses to split — but 106 // GfmEscape will fix it for free by consuming `\\` first, leaving 107 // a clean `|` at separator position. 108 $this->Lexer->addPattern('(?<!\\\\)\|', 'gfm_table'); 109 // Row separator: a newline followed by a non-newline, non-`>` char. 110 // Excluding `>` lets a blockquote terminate the table (spec 201); 111 // requiring a non-newline excludes blank lines and end-of-input. 112 $this->Lexer->addPattern('\n(?=[^\n>])', 'gfm_table'); 113 // Any other newline (blank line, blockquote start, EOF) exits. 114 $this->Lexer->addExitPattern('\n', 'gfm_table'); 115 } 116 117 /** @inheritdoc */ 118 public function handle($match, $state, $pos, Handler $handler) 119 { 120 switch ($state) { 121 case DOKU_LEXER_ENTER: 122 $handler->setCallWriter(new GfmTableRewriter($handler->getCallWriter())); 123 // table_start carries the body position (skip the consumed `\n`). 124 $handler->addCall('gfm_table_start', [$pos + 1], $pos); 125 $handler->addCall('gfm_table_row', [], $pos); 126 $handler->addCall('gfm_table_cell', [], $pos); 127 break; 128 129 case DOKU_LEXER_MATCHED: 130 if (str_contains($match, "\n")) { 131 // Row separator: also opens the first cell of the new row. 132 $handler->addCall('gfm_table_row', [], $pos); 133 $handler->addCall('gfm_table_cell', [], $pos); 134 } else { 135 // Bare `|` — cell separator within the current row. 136 $handler->addCall('gfm_table_cell', [], $pos); 137 } 138 break; 139 140 case DOKU_LEXER_UNMATCHED: 141 $handler->addCall('cdata', [$match], $pos); 142 break; 143 144 case DOKU_LEXER_EXIT: 145 $handler->addCall('gfm_table_end', [], $pos); 146 /** @var GfmTableRewriter $reWriter */ 147 $reWriter = $handler->getCallWriter(); 148 $handler->setCallWriter($reWriter->process()); 149 break; 150 } 151 return true; 152 } 153} 154