1<?php 2 3namespace dokuwiki\Parsing\ParserMode; 4 5use dokuwiki\Parsing\Handler; 6use dokuwiki\Parsing\Handler\GfmTable as GfmTableRewriter; 7use dokuwiki\Parsing\ModeRegistry; 8 9/** 10 * GFM table block. 11 * 12 * Architecturally mirrors DokuWiki's native Table mode: an entry/exit 13 * lexer state with inline modes nested via `allowedModes`, plus a small 14 * post-processing rewriter (Handler\GfmTable) that turns the flat token 15 * stream into the canonical DokuWiki table call sequence. 16 * 17 * Cells are inline-only per spec ("Block-level elements cannot be inserted 18 * in a table"). Allowed nested categories therefore mirror DW Table: 19 * FORMATTING, SUBSTITION, PROTECTED, DISABLED. 20 * 21 * Entry-pattern strategy: a single zero-width lookahead asserts the table 22 * shape (header line containing a pipe, followed by a delimiter row whose 23 * cells are exactly `:?-+:?`). Only the leading newline is consumed; the 24 * lookahead validates the rest. Non-tables — paragraphs that happen to 25 * contain pipes — never enter the mode. 26 * 27 * The internal patterns recognise: 28 * - `\|` as a cell separator, with a `(?<!\\)` lookbehind so a backslash- 29 * prefixed pipe is left as raw input — the cell-splitting concern. The 30 * unescape (turning `\|` into a literal `|`) is handled downstream: 31 * GfmEscape consumes `\|` in normal cell text, and Handler\GfmTable's 32 * unescapePipes() applies the tables-extension rewrite inside code 33 * spans, where standard §6.1 escapes don't fire. 34 * - `\n` followed by a non-newline, non-`>` character as a row separator; 35 * - any other `\n` exits the mode (blank line, blockquote start, EOF). 36 * 37 * Sort 55 — one below DW Table's 60 — so that in `dw+md` and `md+dw` (where 38 * both modes load) the GFM lookahead-validated entry tries first; if it 39 * does not see a valid delimiter row, DW Table at sort 60 takes over for 40 * `\n|` rows. 41 */ 42class GfmTable extends AbstractMode 43{ 44 /** 45 * GFM table cells parse only inline content. 46 */ 47 public function __construct() 48 { 49 $this->allowedModes = ModeRegistry::getInstance()->getModesForCategories([ 50 ModeRegistry::CATEGORY_FORMATTING, 51 ModeRegistry::CATEGORY_SUBSTITION, 52 ModeRegistry::CATEGORY_PROTECTED, 53 ModeRegistry::CATEGORY_DISABLED, 54 ]); 55 } 56 57 /** @inheritdoc */ 58 public function getSort() 59 { 60 return 55; 61 } 62 63 /** @inheritdoc */ 64 public function preConnect() 65 { 66 ModeRegistry::getInstance()->registerBlockEolMode('gfm_table'); 67 } 68 69 /** 70 * Entry pattern with lookahead-validated delimiter row. 71 * 72 * Consumes only `\n`; the zero-width lookahead asserts: 73 * - a header line containing at least one `|`, and 74 * - a delimiter row of `:?-+:?` cells separated by `|`. 75 * 76 * Without that validation, any paragraph containing a pipe would 77 * trigger the table mode. With it, non-tables flow through as plain 78 * paragraphs. 79 * 80 * @inheritdoc 81 */ 82 public function connectTo($mode) 83 { 84 $delim = 85 '[ \t]*\|?[ \t]*:?-+:?' . 86 '(?:[ \t]*\|[ \t]*:?-+:?)*' . 87 '[ \t]*\|?[ \t]*'; 88 $entry = 89 '\n(?=' . 90 '[^\n]*\|[^\n]*' . // header line containing a pipe 91 '\n' . $delim . 92 '(?:\n|$)' . 93 ')'; 94 $this->Lexer->addEntryPattern($entry, $mode, 'gfm_table'); 95 } 96 97 /** @inheritdoc */ 98 public function postConnect() 99 { 100 // Cell separator. The `(?<!\\)` lookbehind keeps `\|` from being 101 // treated as a separator so backslash-escaped pipes don't split 102 // cells. The unescape — turning `\|` into a literal `|` in cell 103 // content — is handled downstream: GfmEscape consumes `\|` in 104 // normal text, and Handler\GfmTable::unescapePipes() applies the 105 // tables-extension rewrite inside code spans. We just need the 106 // cells to come out the right shape. Edge: `\\|` (escaped 107 // backslash, then a real separator pipe) is technically wrong 108 // here — the lookbehind sees the second `\` and refuses to split 109 // — but GfmEscape consumes `\\` first, leaving a clean `|` at 110 // separator position. 111 $this->Lexer->addPattern('(?<!\\\\)\|', 'gfm_table'); 112 // Row separator: a newline followed by a non-newline, non-`>` char. 113 // Excluding `>` lets a blockquote terminate the table (spec 201); 114 // requiring a non-newline excludes blank lines and end-of-input. 115 $this->Lexer->addPattern('\n(?=[^\n>])', 'gfm_table'); 116 // Any other newline (blank line, blockquote start, EOF) exits. 117 $this->Lexer->addExitPattern('\n', 'gfm_table'); 118 } 119 120 /** @inheritdoc */ 121 public function handle($match, $state, $pos, Handler $handler) 122 { 123 switch ($state) { 124 case DOKU_LEXER_ENTER: 125 $handler->setCallWriter(new GfmTableRewriter($handler->getCallWriter())); 126 // table_start carries the body position (skip the consumed `\n`). 127 $handler->addCall('gfm_table_start', [$pos + 1], $pos); 128 $handler->addCall('gfm_table_row', [], $pos); 129 $handler->addCall('gfm_table_cell', [], $pos); 130 break; 131 132 case DOKU_LEXER_MATCHED: 133 if (str_contains($match, "\n")) { 134 // Row separator: also opens the first cell of the new row. 135 $handler->addCall('gfm_table_row', [], $pos); 136 $handler->addCall('gfm_table_cell', [], $pos); 137 } else { 138 // Bare `|` — cell separator within the current row. 139 $handler->addCall('gfm_table_cell', [], $pos); 140 } 141 break; 142 143 case DOKU_LEXER_UNMATCHED: 144 $handler->addCall('cdata', [$match], $pos); 145 break; 146 147 case DOKU_LEXER_EXIT: 148 $handler->addCall('gfm_table_end', [], $pos); 149 /** @var GfmTableRewriter $reWriter */ 150 $reWriter = $handler->getCallWriter(); 151 $handler->setCallWriter($reWriter->process()); 152 break; 153 } 154 return true; 155 } 156} 157