xref: /dokuwiki/inc/Parsing/ParserMode/GfmTable.php (revision 3dabe4e0a0d70b79a7aced8ac8a36d4b37a61024)
1<?php
2
3namespace dokuwiki\Parsing\ParserMode;
4
5use dokuwiki\Parsing\Handler;
6use dokuwiki\Parsing\Handler\GfmTable as GfmTableRewriter;
7use dokuwiki\Parsing\ModeRegistry;
8
9/**
10 * GFM table block.
11 *
12 * Architecturally mirrors DokuWiki's native Table mode: an entry/exit
13 * lexer state with inline modes nested via `allowedModes`, plus a small
14 * post-processing rewriter (Handler\GfmTable) that turns the flat token
15 * stream into the canonical DokuWiki table call sequence.
16 *
17 * Cells are inline-only per spec ("Block-level elements cannot be inserted
18 * in a table"). Allowed nested categories therefore mirror DW Table:
19 * FORMATTING, SUBSTITION, PROTECTED, DISABLED.
20 *
21 * Entry-pattern strategy: a single zero-width lookahead asserts the table
22 * shape (header line containing a pipe, followed by a delimiter row whose
23 * cells are exactly `:?-+:?`). Only the leading newline is consumed; the
24 * lookahead validates the rest. Non-tables — paragraphs that happen to
25 * contain pipes — never enter the mode.
26 *
27 * The internal patterns recognise:
28 *   - `\|` as a cell separator, with a `(?<!\\)` lookbehind so a backslash-
29 *     prefixed pipe is left as raw input — the cell-splitting concern. The
30 *     unescape itself (turning `\|` into a literal `|`) is GfmEscape's
31 *     concern, not this mode's; until that mode lands, `\|` survives in
32 *     cell content as the literal two-char sequence.
33 *   - `\n` followed by a non-newline, non-`>` character as a row separator;
34 *   - any other `\n` exits the mode (blank line, blockquote start, EOF).
35 *
36 * Sort 55 — one below DW Table's 60 — so that in `dw+md` and `md+dw` (where
37 * both modes load) the GFM lookahead-validated entry tries first; if it
38 * does not see a valid delimiter row, DW Table at sort 60 takes over for
39 * `\n|` rows.
40 */
41class GfmTable extends AbstractMode
42{
43    /**
44     * GFM table cells parse only inline content.
45     */
46    public function __construct()
47    {
48        $this->allowedModes = ModeRegistry::getInstance()->getModesForCategories([
49            ModeRegistry::CATEGORY_FORMATTING,
50            ModeRegistry::CATEGORY_SUBSTITION,
51            ModeRegistry::CATEGORY_PROTECTED,
52            ModeRegistry::CATEGORY_DISABLED,
53        ]);
54    }
55
56    /** @inheritdoc */
57    public function getSort()
58    {
59        return 55;
60    }
61
62    /** @inheritdoc */
63    public function preConnect()
64    {
65        ModeRegistry::getInstance()->registerBlockEolMode('gfm_table');
66    }
67
68    /**
69     * Entry pattern with lookahead-validated delimiter row.
70     *
71     * Consumes only `\n`; the zero-width lookahead asserts:
72     *   - a header line containing at least one `|`, and
73     *   - a delimiter row of `:?-+:?` cells separated by `|`.
74     *
75     * Without that validation, any paragraph containing a pipe would
76     * trigger the table mode. With it, non-tables flow through as plain
77     * paragraphs.
78     *
79     * @inheritdoc
80     */
81    public function connectTo($mode)
82    {
83        $delim =
84            '[ \t]*\|?[ \t]*:?-+:?' .
85            '(?:[ \t]*\|[ \t]*:?-+:?)*' .
86            '[ \t]*\|?[ \t]*';
87        $entry =
88            '\n(?=' .
89                '[^\n]*\|[^\n]*' .  // header line containing a pipe
90                '\n' . $delim .
91                '(?:\n|$)' .
92            ')';
93        $this->Lexer->addEntryPattern($entry, $mode, 'gfm_table');
94    }
95
96    /** @inheritdoc */
97    public function postConnect()
98    {
99        // Cell separator. The `(?<!\\)` lookbehind keeps `\|` from being
100        // treated as a separator so backslash-escaped pipes don't split
101        // cells. The unescape — turning `\|` into a literal `|` in cell
102        // content — is GfmEscape's responsibility; we just need the cells
103        // to come out the right shape. Edge: `\\|` (escaped backslash,
104        // then a real separator pipe) is technically wrong here — the
105        // lookbehind sees the second `\` and refuses to split — but
106        // GfmEscape will fix it for free by consuming `\\` first, leaving
107        // a clean `|` at separator position.
108        $this->Lexer->addPattern('(?<!\\\\)\|', 'gfm_table');
109        // Row separator: a newline followed by a non-newline, non-`>` char.
110        // Excluding `>` lets a blockquote terminate the table (spec 201);
111        // requiring a non-newline excludes blank lines and end-of-input.
112        $this->Lexer->addPattern('\n(?=[^\n>])', 'gfm_table');
113        // Any other newline (blank line, blockquote start, EOF) exits.
114        $this->Lexer->addExitPattern('\n', 'gfm_table');
115    }
116
117    /** @inheritdoc */
118    public function handle($match, $state, $pos, Handler $handler)
119    {
120        switch ($state) {
121            case DOKU_LEXER_ENTER:
122                $handler->setCallWriter(new GfmTableRewriter($handler->getCallWriter()));
123                // table_start carries the body position (skip the consumed `\n`).
124                $handler->addCall('gfm_table_start', [$pos + 1], $pos);
125                $handler->addCall('gfm_table_row', [], $pos);
126                $handler->addCall('gfm_table_cell', [], $pos);
127                break;
128
129            case DOKU_LEXER_MATCHED:
130                if (str_contains($match, "\n")) {
131                    // Row separator: also opens the first cell of the new row.
132                    $handler->addCall('gfm_table_row', [], $pos);
133                    $handler->addCall('gfm_table_cell', [], $pos);
134                } else {
135                    // Bare `|` — cell separator within the current row.
136                    $handler->addCall('gfm_table_cell', [], $pos);
137                }
138                break;
139
140            case DOKU_LEXER_UNMATCHED:
141                $handler->addCall('cdata', [$match], $pos);
142                break;
143
144            case DOKU_LEXER_EXIT:
145                $handler->addCall('gfm_table_end', [], $pos);
146                /** @var GfmTableRewriter $reWriter */
147                $reWriter = $handler->getCallWriter();
148                $handler->setCallWriter($reWriter->process());
149                break;
150        }
151        return true;
152    }
153}
154