xref: /dokuwiki/inc/Parsing/ParserMode/GfmTable.php (revision 884caed926ca0aa0af6ce3f34ae3aa7317a3361a)
1<?php
2
3namespace dokuwiki\Parsing\ParserMode;
4
5use dokuwiki\Parsing\Handler;
6use dokuwiki\Parsing\Handler\GfmTable as GfmTableRewriter;
7use dokuwiki\Parsing\ModeRegistry;
8
9/**
10 * GFM table block.
11 *
12 * Architecturally mirrors DokuWiki's native Table mode: an entry/exit
13 * lexer state with inline modes nested via `allowedModes`, plus a small
14 * post-processing rewriter (Handler\GfmTable) that turns the flat token
15 * stream into the canonical DokuWiki table call sequence.
16 *
17 * Cells are inline-only per spec ("Block-level elements cannot be inserted
18 * in a table"). Allowed nested categories therefore mirror DW Table:
19 * FORMATTING, SUBSTITUTION, PROTECTED, DISABLED.
20 *
21 * Entry-pattern strategy: a single zero-width lookahead asserts the table
22 * shape (header line containing a pipe, followed by a delimiter row whose
23 * cells are exactly `:?-+:?`). Only the leading newline is consumed; the
24 * lookahead validates the rest. Non-tables — paragraphs that happen to
25 * contain pipes — never enter the mode.
26 *
27 * The internal patterns recognise:
28 *   - `\|` as a cell separator, with a `(?<!\\)` lookbehind so a backslash-
29 *     prefixed pipe is left as raw input — the cell-splitting concern. The
30 *     unescape (turning `\|` into a literal `|`) is handled downstream:
31 *     GfmEscape consumes `\|` in normal cell text, and Handler\GfmTable's
32 *     unescapePipes() applies the tables-extension rewrite inside code
33 *     spans, where standard §6.1 escapes don't fire.
34 *   - `\n` followed by a non-newline, non-`>` character as a row separator;
35 *   - any other `\n` exits the mode (blank line, blockquote start, EOF).
36 *
37 * Sort 55 — one below DW Table's 60 — so that in `dw+md` and `md+dw` (where
38 * both modes load) the GFM lookahead-validated entry tries first; if it
39 * does not see a valid delimiter row, DW Table at sort 60 takes over for
40 * `\n|` rows.
41 */
42class GfmTable extends AbstractMode
43{
44    /**
45     * GFM table cells parse only inline content.
46     *
47     * @inheritdoc
48     */
49    protected function allowedCategories(): array
50    {
51        return [
52            ModeRegistry::CATEGORY_FORMATTING,
53            ModeRegistry::CATEGORY_SUBSTITUTION,
54            ModeRegistry::CATEGORY_PROTECTED,
55            ModeRegistry::CATEGORY_DISABLED,
56        ];
57    }
58
59    /** @inheritdoc */
60    public function getSort()
61    {
62        return 55;
63    }
64
65    /** @inheritdoc */
66    public function preConnect()
67    {
68        $this->registry->registerBlockEolMode('gfm_table');
69    }
70
71    /**
72     * Entry pattern with lookahead-validated delimiter row.
73     *
74     * Consumes only `\n`; the zero-width lookahead asserts:
75     *   - a header line containing at least one `|`, and
76     *   - a delimiter row of `:?-+:?` cells separated by `|`.
77     *
78     * Without that validation, any paragraph containing a pipe would
79     * trigger the table mode. With it, non-tables flow through as plain
80     * paragraphs.
81     *
82     * @inheritdoc
83     */
84    public function connectTo($mode)
85    {
86        $delim =
87            '[ \t]*\|?[ \t]*:?-+:?' .
88            '(?:[ \t]*\|[ \t]*:?-+:?)*' .
89            '[ \t]*\|?[ \t]*';
90        $entry =
91            '\n(?=' .
92                '[^\n]*\|[^\n]*' .  // header line containing a pipe
93                '\n' . $delim .
94                '(?:\n|$)' .
95            ')';
96        $this->Lexer->addEntryPattern($entry, $mode, 'gfm_table');
97    }
98
99    /** @inheritdoc */
100    public function postConnect()
101    {
102        // Cell separator. The `(?<!\\)` lookbehind keeps `\|` from being
103        // treated as a separator so backslash-escaped pipes don't split
104        // cells. The unescape — turning `\|` into a literal `|` in cell
105        // content — is handled downstream: GfmEscape consumes `\|` in
106        // normal text, and Handler\GfmTable::unescapePipes() applies the
107        // tables-extension rewrite inside code spans. We just need the
108        // cells to come out the right shape. Edge: `\\|` (escaped
109        // backslash, then a real separator pipe) is technically wrong
110        // here — the lookbehind sees the second `\` and refuses to split
111        // — but GfmEscape consumes `\\` first, leaving a clean `|` at
112        // separator position.
113        $this->Lexer->addPattern('(?<!\\\\)\|', 'gfm_table');
114        // Row separator: a newline followed by a non-newline, non-`>` char.
115        // Excluding `>` lets a blockquote terminate the table (spec 201);
116        // requiring a non-newline excludes blank lines and end-of-input.
117        $this->Lexer->addPattern('\n(?=[^\n>])', 'gfm_table');
118        // Any other newline (blank line, blockquote start, EOF) exits.
119        $this->Lexer->addExitPattern('\n', 'gfm_table');
120    }
121
122    /** @inheritdoc */
123    public function handle($match, $state, $pos, Handler $handler)
124    {
125        switch ($state) {
126            case DOKU_LEXER_ENTER:
127                $handler->setCallWriter(new GfmTableRewriter($handler->getCallWriter()));
128                // table_start carries the body position (skip the consumed `\n`).
129                $handler->addCall('gfm_table_start', [$pos + 1], $pos);
130                $handler->addCall('gfm_table_row', [], $pos);
131                $handler->addCall('gfm_table_cell', [], $pos);
132                break;
133
134            case DOKU_LEXER_MATCHED:
135                if (str_contains($match, "\n")) {
136                    // Row separator: also opens the first cell of the new row.
137                    $handler->addCall('gfm_table_row', [], $pos);
138                    $handler->addCall('gfm_table_cell', [], $pos);
139                } else {
140                    // Bare `|` — cell separator within the current row.
141                    $handler->addCall('gfm_table_cell', [], $pos);
142                }
143                break;
144
145            case DOKU_LEXER_UNMATCHED:
146                $handler->addCall('cdata', [$match], $pos);
147                break;
148
149            case DOKU_LEXER_EXIT:
150                $handler->addCall('gfm_table_end', [], $pos);
151                /** @var GfmTableRewriter $reWriter */
152                $reWriter = $handler->getCallWriter();
153                $handler->setCallWriter($reWriter->process());
154                break;
155        }
156        return true;
157    }
158}
159