1<?php 2 3namespace dokuwiki\Parsing\ParserMode; 4 5use dokuwiki\Parsing\Handler; 6use dokuwiki\Parsing\Handler\GfmTable as GfmTableRewriter; 7use dokuwiki\Parsing\ModeRegistry; 8 9/** 10 * GFM table block. 11 * 12 * Architecturally mirrors DokuWiki's native Table mode: an entry/exit 13 * lexer state with inline modes nested via `allowedModes`, plus a small 14 * post-processing rewriter (Handler\GfmTable) that turns the flat token 15 * stream into the canonical DokuWiki table call sequence. 16 * 17 * Cells are inline-only per spec ("Block-level elements cannot be inserted 18 * in a table"). Allowed nested categories therefore mirror DW Table: 19 * FORMATTING, SUBSTITUTION, PROTECTED, DISABLED. 20 * 21 * Entry-pattern strategy: a single zero-width lookahead asserts the table 22 * shape (header line containing a pipe, followed by a delimiter row whose 23 * cells are exactly `:?-+:?`). Only the leading newline is consumed; the 24 * lookahead validates the rest. Non-tables — paragraphs that happen to 25 * contain pipes — never enter the mode. 26 * 27 * The internal patterns recognise: 28 * - `\|` as a cell separator, with a `(?<!\\)` lookbehind so a backslash- 29 * prefixed pipe is left as raw input — the cell-splitting concern. The 30 * unescape (turning `\|` into a literal `|`) is handled downstream: 31 * GfmEscape consumes `\|` in normal cell text, and Handler\GfmTable's 32 * unescapePipes() applies the tables-extension rewrite inside code 33 * spans, where standard §6.1 escapes don't fire. 34 * - `\n` followed by a non-newline, non-`>` character as a row separator; 35 * - any other `\n` exits the mode (blank line, blockquote start, EOF). 36 * 37 * Sort 55 — one below DW Table's 60 — so that in `dw+md` and `md+dw` (where 38 * both modes load) the GFM lookahead-validated entry tries first; if it 39 * does not see a valid delimiter row, DW Table at sort 60 takes over for 40 * `\n|` rows. 41 */ 42class GfmTable extends AbstractMode 43{ 44 /** 45 * GFM table cells parse only inline content. 46 * 47 * @inheritdoc 48 */ 49 protected function allowedCategories(): array 50 { 51 return [ 52 ModeRegistry::CATEGORY_FORMATTING, 53 ModeRegistry::CATEGORY_SUBSTITUTION, 54 ModeRegistry::CATEGORY_PROTECTED, 55 ModeRegistry::CATEGORY_DISABLED, 56 ]; 57 } 58 59 /** @inheritdoc */ 60 public function getSort() 61 { 62 return 55; 63 } 64 65 /** @inheritdoc */ 66 public function preConnect() 67 { 68 $this->registry->registerBlockEolMode('gfm_table'); 69 } 70 71 /** 72 * Entry pattern with lookahead-validated delimiter row. 73 * 74 * Consumes only `\n`; the zero-width lookahead asserts: 75 * - a header line containing at least one `|`, and 76 * - a delimiter row of `:?-+:?` cells separated by `|`. 77 * 78 * Without that validation, any paragraph containing a pipe would 79 * trigger the table mode. With it, non-tables flow through as plain 80 * paragraphs. 81 * 82 * @inheritdoc 83 */ 84 public function connectTo($mode) 85 { 86 $delim = 87 '[ \t]*\|?[ \t]*:?-+:?' . 88 '(?:[ \t]*\|[ \t]*:?-+:?)*' . 89 '[ \t]*\|?[ \t]*'; 90 $entry = 91 '\n(?=' . 92 '[^\n]*\|[^\n]*' . // header line containing a pipe 93 '\n' . $delim . 94 '(?:\n|$)' . 95 ')'; 96 $this->Lexer->addEntryPattern($entry, $mode, 'gfm_table'); 97 } 98 99 /** @inheritdoc */ 100 public function postConnect() 101 { 102 // Cell separator. The `(?<!\\)` lookbehind keeps `\|` from being 103 // treated as a separator so backslash-escaped pipes don't split 104 // cells. The unescape — turning `\|` into a literal `|` in cell 105 // content — is handled downstream: GfmEscape consumes `\|` in 106 // normal text, and Handler\GfmTable::unescapePipes() applies the 107 // tables-extension rewrite inside code spans. We just need the 108 // cells to come out the right shape. Edge: `\\|` (escaped 109 // backslash, then a real separator pipe) is technically wrong 110 // here — the lookbehind sees the second `\` and refuses to split 111 // — but GfmEscape consumes `\\` first, leaving a clean `|` at 112 // separator position. 113 $this->Lexer->addPattern('(?<!\\\\)\|', 'gfm_table'); 114 // Row separator: a newline followed by a non-newline, non-`>` char. 115 // Excluding `>` lets a blockquote terminate the table (spec 201); 116 // requiring a non-newline excludes blank lines and end-of-input. 117 $this->Lexer->addPattern('\n(?=[^\n>])', 'gfm_table'); 118 // Any other newline (blank line, blockquote start, EOF) exits. 119 $this->Lexer->addExitPattern('\n', 'gfm_table'); 120 } 121 122 /** @inheritdoc */ 123 public function handle($match, $state, $pos, Handler $handler) 124 { 125 switch ($state) { 126 case DOKU_LEXER_ENTER: 127 $handler->setCallWriter(new GfmTableRewriter($handler->getCallWriter())); 128 // table_start carries the body position (skip the consumed `\n`). 129 $handler->addCall('gfm_table_start', [$pos + 1], $pos); 130 $handler->addCall('gfm_table_row', [], $pos); 131 $handler->addCall('gfm_table_cell', [], $pos); 132 break; 133 134 case DOKU_LEXER_MATCHED: 135 if (str_contains($match, "\n")) { 136 // Row separator: also opens the first cell of the new row. 137 $handler->addCall('gfm_table_row', [], $pos); 138 $handler->addCall('gfm_table_cell', [], $pos); 139 } else { 140 // Bare `|` — cell separator within the current row. 141 $handler->addCall('gfm_table_cell', [], $pos); 142 } 143 break; 144 145 case DOKU_LEXER_UNMATCHED: 146 $handler->addCall('cdata', [$match], $pos); 147 break; 148 149 case DOKU_LEXER_EXIT: 150 $handler->addCall('gfm_table_end', [], $pos); 151 /** @var GfmTableRewriter $reWriter */ 152 $reWriter = $handler->getCallWriter(); 153 $handler->setCallWriter($reWriter->process()); 154 break; 155 } 156 return true; 157 } 158} 159