xref: /dokuwiki/inc/Parsing/ParserMode/GfmTable.php (revision 95f694202286c1add4c442936a5caa38db0dd603)
1<?php
2
3namespace dokuwiki\Parsing\ParserMode;
4
5use dokuwiki\Parsing\Handler;
6use dokuwiki\Parsing\Handler\GfmTable as GfmTableRewriter;
7use dokuwiki\Parsing\ModeRegistry;
8
9/**
10 * GFM table block.
11 *
12 * Architecturally mirrors DokuWiki's native Table mode: an entry/exit
13 * lexer state with inline modes nested via `allowedModes`, plus a small
14 * post-processing rewriter (Handler\GfmTable) that turns the flat token
15 * stream into the canonical DokuWiki table call sequence.
16 *
17 * Cells are inline-only per spec ("Block-level elements cannot be inserted
18 * in a table"). Allowed nested categories therefore mirror DW Table:
19 * FORMATTING, SUBSTITION, PROTECTED, DISABLED.
20 *
21 * Entry-pattern strategy: a single zero-width lookahead asserts the table
22 * shape (header line containing a pipe, followed by a delimiter row whose
23 * cells are exactly `:?-+:?`). Only the leading newline is consumed; the
24 * lookahead validates the rest. Non-tables — paragraphs that happen to
25 * contain pipes — never enter the mode.
26 *
27 * The internal patterns recognise:
28 *   - `\|` as a cell separator, with a `(?<!\\)` lookbehind so a backslash-
29 *     prefixed pipe is left as raw input — the cell-splitting concern. The
30 *     unescape (turning `\|` into a literal `|`) is handled downstream:
31 *     GfmEscape consumes `\|` in normal cell text, and Handler\GfmTable's
32 *     unescapePipes() applies the tables-extension rewrite inside code
33 *     spans, where standard §6.1 escapes don't fire.
34 *   - `\n` followed by a non-newline, non-`>` character as a row separator;
35 *   - any other `\n` exits the mode (blank line, blockquote start, EOF).
36 *
37 * Sort 55 — one below DW Table's 60 — so that in `dw+md` and `md+dw` (where
38 * both modes load) the GFM lookahead-validated entry tries first; if it
39 * does not see a valid delimiter row, DW Table at sort 60 takes over for
40 * `\n|` rows.
41 */
42class GfmTable extends AbstractMode
43{
44    /**
45     * GFM table cells parse only inline content.
46     */
47    public function __construct()
48    {
49        $this->allowedModes = ModeRegistry::getInstance()->getModesForCategories([
50            ModeRegistry::CATEGORY_FORMATTING,
51            ModeRegistry::CATEGORY_SUBSTITION,
52            ModeRegistry::CATEGORY_PROTECTED,
53            ModeRegistry::CATEGORY_DISABLED,
54        ]);
55    }
56
57    /** @inheritdoc */
58    public function getSort()
59    {
60        return 55;
61    }
62
63    /** @inheritdoc */
64    public function preConnect()
65    {
66        ModeRegistry::getInstance()->registerBlockEolMode('gfm_table');
67    }
68
69    /**
70     * Entry pattern with lookahead-validated delimiter row.
71     *
72     * Consumes only `\n`; the zero-width lookahead asserts:
73     *   - a header line containing at least one `|`, and
74     *   - a delimiter row of `:?-+:?` cells separated by `|`.
75     *
76     * Without that validation, any paragraph containing a pipe would
77     * trigger the table mode. With it, non-tables flow through as plain
78     * paragraphs.
79     *
80     * @inheritdoc
81     */
82    public function connectTo($mode)
83    {
84        $delim =
85            '[ \t]*\|?[ \t]*:?-+:?' .
86            '(?:[ \t]*\|[ \t]*:?-+:?)*' .
87            '[ \t]*\|?[ \t]*';
88        $entry =
89            '\n(?=' .
90                '[^\n]*\|[^\n]*' .  // header line containing a pipe
91                '\n' . $delim .
92                '(?:\n|$)' .
93            ')';
94        $this->Lexer->addEntryPattern($entry, $mode, 'gfm_table');
95    }
96
97    /** @inheritdoc */
98    public function postConnect()
99    {
100        // Cell separator. The `(?<!\\)` lookbehind keeps `\|` from being
101        // treated as a separator so backslash-escaped pipes don't split
102        // cells. The unescape — turning `\|` into a literal `|` in cell
103        // content — is handled downstream: GfmEscape consumes `\|` in
104        // normal text, and Handler\GfmTable::unescapePipes() applies the
105        // tables-extension rewrite inside code spans. We just need the
106        // cells to come out the right shape. Edge: `\\|` (escaped
107        // backslash, then a real separator pipe) is technically wrong
108        // here — the lookbehind sees the second `\` and refuses to split
109        // — but GfmEscape consumes `\\` first, leaving a clean `|` at
110        // separator position.
111        $this->Lexer->addPattern('(?<!\\\\)\|', 'gfm_table');
112        // Row separator: a newline followed by a non-newline, non-`>` char.
113        // Excluding `>` lets a blockquote terminate the table (spec 201);
114        // requiring a non-newline excludes blank lines and end-of-input.
115        $this->Lexer->addPattern('\n(?=[^\n>])', 'gfm_table');
116        // Any other newline (blank line, blockquote start, EOF) exits.
117        $this->Lexer->addExitPattern('\n', 'gfm_table');
118    }
119
120    /** @inheritdoc */
121    public function handle($match, $state, $pos, Handler $handler)
122    {
123        switch ($state) {
124            case DOKU_LEXER_ENTER:
125                $handler->setCallWriter(new GfmTableRewriter($handler->getCallWriter()));
126                // table_start carries the body position (skip the consumed `\n`).
127                $handler->addCall('gfm_table_start', [$pos + 1], $pos);
128                $handler->addCall('gfm_table_row', [], $pos);
129                $handler->addCall('gfm_table_cell', [], $pos);
130                break;
131
132            case DOKU_LEXER_MATCHED:
133                if (str_contains($match, "\n")) {
134                    // Row separator: also opens the first cell of the new row.
135                    $handler->addCall('gfm_table_row', [], $pos);
136                    $handler->addCall('gfm_table_cell', [], $pos);
137                } else {
138                    // Bare `|` — cell separator within the current row.
139                    $handler->addCall('gfm_table_cell', [], $pos);
140                }
141                break;
142
143            case DOKU_LEXER_UNMATCHED:
144                $handler->addCall('cdata', [$match], $pos);
145                break;
146
147            case DOKU_LEXER_EXIT:
148                $handler->addCall('gfm_table_end', [], $pos);
149                /** @var GfmTableRewriter $reWriter */
150                $reWriter = $handler->getCallWriter();
151                $handler->setCallWriter($reWriter->process());
152                break;
153        }
154        return true;
155    }
156}
157