1*3dabe4e0SAndreas Gohr<?php 2*3dabe4e0SAndreas Gohr 3*3dabe4e0SAndreas Gohrnamespace dokuwiki\Parsing\ParserMode; 4*3dabe4e0SAndreas Gohr 5*3dabe4e0SAndreas Gohruse dokuwiki\Parsing\Handler; 6*3dabe4e0SAndreas Gohruse dokuwiki\Parsing\Handler\GfmTable as GfmTableRewriter; 7*3dabe4e0SAndreas Gohruse dokuwiki\Parsing\ModeRegistry; 8*3dabe4e0SAndreas Gohr 9*3dabe4e0SAndreas Gohr/** 10*3dabe4e0SAndreas Gohr * GFM table block. 11*3dabe4e0SAndreas Gohr * 12*3dabe4e0SAndreas Gohr * Architecturally mirrors DokuWiki's native Table mode: an entry/exit 13*3dabe4e0SAndreas Gohr * lexer state with inline modes nested via `allowedModes`, plus a small 14*3dabe4e0SAndreas Gohr * post-processing rewriter (Handler\GfmTable) that turns the flat token 15*3dabe4e0SAndreas Gohr * stream into the canonical DokuWiki table call sequence. 16*3dabe4e0SAndreas Gohr * 17*3dabe4e0SAndreas Gohr * Cells are inline-only per spec ("Block-level elements cannot be inserted 18*3dabe4e0SAndreas Gohr * in a table"). Allowed nested categories therefore mirror DW Table: 19*3dabe4e0SAndreas Gohr * FORMATTING, SUBSTITION, PROTECTED, DISABLED. 20*3dabe4e0SAndreas Gohr * 21*3dabe4e0SAndreas Gohr * Entry-pattern strategy: a single zero-width lookahead asserts the table 22*3dabe4e0SAndreas Gohr * shape (header line containing a pipe, followed by a delimiter row whose 23*3dabe4e0SAndreas Gohr * cells are exactly `:?-+:?`). Only the leading newline is consumed; the 24*3dabe4e0SAndreas Gohr * lookahead validates the rest. Non-tables — paragraphs that happen to 25*3dabe4e0SAndreas Gohr * contain pipes — never enter the mode. 26*3dabe4e0SAndreas Gohr * 27*3dabe4e0SAndreas Gohr * The internal patterns recognise: 28*3dabe4e0SAndreas Gohr * - `\|` as a cell separator, with a `(?<!\\)` lookbehind so a backslash- 29*3dabe4e0SAndreas Gohr * prefixed pipe is left as raw input — the cell-splitting concern. The 30*3dabe4e0SAndreas Gohr * unescape itself (turning `\|` into a literal `|`) is GfmEscape's 31*3dabe4e0SAndreas Gohr * concern, not this mode's; until that mode lands, `\|` survives in 32*3dabe4e0SAndreas Gohr * cell content as the literal two-char sequence. 33*3dabe4e0SAndreas Gohr * - `\n` followed by a non-newline, non-`>` character as a row separator; 34*3dabe4e0SAndreas Gohr * - any other `\n` exits the mode (blank line, blockquote start, EOF). 35*3dabe4e0SAndreas Gohr * 36*3dabe4e0SAndreas Gohr * Sort 55 — one below DW Table's 60 — so that in `dw+md` and `md+dw` (where 37*3dabe4e0SAndreas Gohr * both modes load) the GFM lookahead-validated entry tries first; if it 38*3dabe4e0SAndreas Gohr * does not see a valid delimiter row, DW Table at sort 60 takes over for 39*3dabe4e0SAndreas Gohr * `\n|` rows. 40*3dabe4e0SAndreas Gohr */ 41*3dabe4e0SAndreas Gohrclass GfmTable extends AbstractMode 42*3dabe4e0SAndreas Gohr{ 43*3dabe4e0SAndreas Gohr /** 44*3dabe4e0SAndreas Gohr * GFM table cells parse only inline content. 45*3dabe4e0SAndreas Gohr */ 46*3dabe4e0SAndreas Gohr public function __construct() 47*3dabe4e0SAndreas Gohr { 48*3dabe4e0SAndreas Gohr $this->allowedModes = ModeRegistry::getInstance()->getModesForCategories([ 49*3dabe4e0SAndreas Gohr ModeRegistry::CATEGORY_FORMATTING, 50*3dabe4e0SAndreas Gohr ModeRegistry::CATEGORY_SUBSTITION, 51*3dabe4e0SAndreas Gohr ModeRegistry::CATEGORY_PROTECTED, 52*3dabe4e0SAndreas Gohr ModeRegistry::CATEGORY_DISABLED, 53*3dabe4e0SAndreas Gohr ]); 54*3dabe4e0SAndreas Gohr } 55*3dabe4e0SAndreas Gohr 56*3dabe4e0SAndreas Gohr /** @inheritdoc */ 57*3dabe4e0SAndreas Gohr public function getSort() 58*3dabe4e0SAndreas Gohr { 59*3dabe4e0SAndreas Gohr return 55; 60*3dabe4e0SAndreas Gohr } 61*3dabe4e0SAndreas Gohr 62*3dabe4e0SAndreas Gohr /** @inheritdoc */ 63*3dabe4e0SAndreas Gohr public function preConnect() 64*3dabe4e0SAndreas Gohr { 65*3dabe4e0SAndreas Gohr ModeRegistry::getInstance()->registerBlockEolMode('gfm_table'); 66*3dabe4e0SAndreas Gohr } 67*3dabe4e0SAndreas Gohr 68*3dabe4e0SAndreas Gohr /** 69*3dabe4e0SAndreas Gohr * Entry pattern with lookahead-validated delimiter row. 70*3dabe4e0SAndreas Gohr * 71*3dabe4e0SAndreas Gohr * Consumes only `\n`; the zero-width lookahead asserts: 72*3dabe4e0SAndreas Gohr * - a header line containing at least one `|`, and 73*3dabe4e0SAndreas Gohr * - a delimiter row of `:?-+:?` cells separated by `|`. 74*3dabe4e0SAndreas Gohr * 75*3dabe4e0SAndreas Gohr * Without that validation, any paragraph containing a pipe would 76*3dabe4e0SAndreas Gohr * trigger the table mode. With it, non-tables flow through as plain 77*3dabe4e0SAndreas Gohr * paragraphs. 78*3dabe4e0SAndreas Gohr * 79*3dabe4e0SAndreas Gohr * @inheritdoc 80*3dabe4e0SAndreas Gohr */ 81*3dabe4e0SAndreas Gohr public function connectTo($mode) 82*3dabe4e0SAndreas Gohr { 83*3dabe4e0SAndreas Gohr $delim = 84*3dabe4e0SAndreas Gohr '[ \t]*\|?[ \t]*:?-+:?' . 85*3dabe4e0SAndreas Gohr '(?:[ \t]*\|[ \t]*:?-+:?)*' . 86*3dabe4e0SAndreas Gohr '[ \t]*\|?[ \t]*'; 87*3dabe4e0SAndreas Gohr $entry = 88*3dabe4e0SAndreas Gohr '\n(?=' . 89*3dabe4e0SAndreas Gohr '[^\n]*\|[^\n]*' . // header line containing a pipe 90*3dabe4e0SAndreas Gohr '\n' . $delim . 91*3dabe4e0SAndreas Gohr '(?:\n|$)' . 92*3dabe4e0SAndreas Gohr ')'; 93*3dabe4e0SAndreas Gohr $this->Lexer->addEntryPattern($entry, $mode, 'gfm_table'); 94*3dabe4e0SAndreas Gohr } 95*3dabe4e0SAndreas Gohr 96*3dabe4e0SAndreas Gohr /** @inheritdoc */ 97*3dabe4e0SAndreas Gohr public function postConnect() 98*3dabe4e0SAndreas Gohr { 99*3dabe4e0SAndreas Gohr // Cell separator. The `(?<!\\)` lookbehind keeps `\|` from being 100*3dabe4e0SAndreas Gohr // treated as a separator so backslash-escaped pipes don't split 101*3dabe4e0SAndreas Gohr // cells. The unescape — turning `\|` into a literal `|` in cell 102*3dabe4e0SAndreas Gohr // content — is GfmEscape's responsibility; we just need the cells 103*3dabe4e0SAndreas Gohr // to come out the right shape. Edge: `\\|` (escaped backslash, 104*3dabe4e0SAndreas Gohr // then a real separator pipe) is technically wrong here — the 105*3dabe4e0SAndreas Gohr // lookbehind sees the second `\` and refuses to split — but 106*3dabe4e0SAndreas Gohr // GfmEscape will fix it for free by consuming `\\` first, leaving 107*3dabe4e0SAndreas Gohr // a clean `|` at separator position. 108*3dabe4e0SAndreas Gohr $this->Lexer->addPattern('(?<!\\\\)\|', 'gfm_table'); 109*3dabe4e0SAndreas Gohr // Row separator: a newline followed by a non-newline, non-`>` char. 110*3dabe4e0SAndreas Gohr // Excluding `>` lets a blockquote terminate the table (spec 201); 111*3dabe4e0SAndreas Gohr // requiring a non-newline excludes blank lines and end-of-input. 112*3dabe4e0SAndreas Gohr $this->Lexer->addPattern('\n(?=[^\n>])', 'gfm_table'); 113*3dabe4e0SAndreas Gohr // Any other newline (blank line, blockquote start, EOF) exits. 114*3dabe4e0SAndreas Gohr $this->Lexer->addExitPattern('\n', 'gfm_table'); 115*3dabe4e0SAndreas Gohr } 116*3dabe4e0SAndreas Gohr 117*3dabe4e0SAndreas Gohr /** @inheritdoc */ 118*3dabe4e0SAndreas Gohr public function handle($match, $state, $pos, Handler $handler) 119*3dabe4e0SAndreas Gohr { 120*3dabe4e0SAndreas Gohr switch ($state) { 121*3dabe4e0SAndreas Gohr case DOKU_LEXER_ENTER: 122*3dabe4e0SAndreas Gohr $handler->setCallWriter(new GfmTableRewriter($handler->getCallWriter())); 123*3dabe4e0SAndreas Gohr // table_start carries the body position (skip the consumed `\n`). 124*3dabe4e0SAndreas Gohr $handler->addCall('gfm_table_start', [$pos + 1], $pos); 125*3dabe4e0SAndreas Gohr $handler->addCall('gfm_table_row', [], $pos); 126*3dabe4e0SAndreas Gohr $handler->addCall('gfm_table_cell', [], $pos); 127*3dabe4e0SAndreas Gohr break; 128*3dabe4e0SAndreas Gohr 129*3dabe4e0SAndreas Gohr case DOKU_LEXER_MATCHED: 130*3dabe4e0SAndreas Gohr if (str_contains($match, "\n")) { 131*3dabe4e0SAndreas Gohr // Row separator: also opens the first cell of the new row. 132*3dabe4e0SAndreas Gohr $handler->addCall('gfm_table_row', [], $pos); 133*3dabe4e0SAndreas Gohr $handler->addCall('gfm_table_cell', [], $pos); 134*3dabe4e0SAndreas Gohr } else { 135*3dabe4e0SAndreas Gohr // Bare `|` — cell separator within the current row. 136*3dabe4e0SAndreas Gohr $handler->addCall('gfm_table_cell', [], $pos); 137*3dabe4e0SAndreas Gohr } 138*3dabe4e0SAndreas Gohr break; 139*3dabe4e0SAndreas Gohr 140*3dabe4e0SAndreas Gohr case DOKU_LEXER_UNMATCHED: 141*3dabe4e0SAndreas Gohr $handler->addCall('cdata', [$match], $pos); 142*3dabe4e0SAndreas Gohr break; 143*3dabe4e0SAndreas Gohr 144*3dabe4e0SAndreas Gohr case DOKU_LEXER_EXIT: 145*3dabe4e0SAndreas Gohr $handler->addCall('gfm_table_end', [], $pos); 146*3dabe4e0SAndreas Gohr /** @var GfmTableRewriter $reWriter */ 147*3dabe4e0SAndreas Gohr $reWriter = $handler->getCallWriter(); 148*3dabe4e0SAndreas Gohr $handler->setCallWriter($reWriter->process()); 149*3dabe4e0SAndreas Gohr break; 150*3dabe4e0SAndreas Gohr } 151*3dabe4e0SAndreas Gohr return true; 152*3dabe4e0SAndreas Gohr } 153*3dabe4e0SAndreas Gohr} 154