xref: /dokuwiki/inc/Parsing/ParserMode/GfmHeader.php (revision 4b31eadfd0dd82e519dd953a4cb0ad079114879d)
1<?php
2
3namespace dokuwiki\Parsing\ParserMode;
4
5use dokuwiki\Parsing\Handler;
6
7/**
8 * GFM ATX heading: 1-6 leading `#` characters, a mandatory space (or end of
9 * line for an empty heading), and optional body; emits the same
10 * header / section_open / section_close instructions as DokuWiki's Header
11 * so renderers and TOC treat it identically.
12 *
13 * Setext headings (=== / --- underlines) are deliberately not supported —
14 * they collide with DokuWiki's horizontal rule and heading delimiters.
15 *
16 * Leading indentation is also not supported: GFM allows 0-3 spaces before
17 * the opener, but DokuWiki uses 2-space indent for Preformatted blocks
18 * and that collision isn't worth untangling for a tolerance feature. The
19 * opener must sit at column 0.
20 */
21class GfmHeader extends AbstractMode
22{
23    /** @inheritdoc */
24    public function getSort()
25    {
26        return 50;
27    }
28
29    /** @inheritdoc */
30    public function connectTo($mode)
31    {
32        // Entry pattern breakdown:
33        //   (?<=\n)              — line start (Parser prepends a newline);
34        //                          a lookbehind, so the newline is NOT part
35        //                          of the match and the reported position
36        //                          lands on the first `#`. Consuming it
37        //                          instead would push the section-edit start
38        //                          onto the blank line above the heading and
39        //                          eat it on save.
40        //   #{1,6}(?!#)          — 1-6 `#` characters; the lookahead
41        //                          rejects 7+ so `####### foo` stays as
42        //                          paragraph text
43        //   (?:[ \t][^\n]*)?     — optional body starting with a space
44        //                          or tab; a hash touching a letter
45        //                          (`#hashtag`) has no body match and
46        //                          the `(?=\n)` below fails unless the
47        //                          whole line is just the hashes
48        //   (?=\n)               — must end the line
49        $this->Lexer->addSpecialPattern(
50            '(?<=\n)#{1,6}(?!#)(?:[ \t][^\n]*)?(?=\n)',
51            $mode,
52            'gfm_header'
53        );
54    }
55
56    /** @inheritdoc */
57    public function handle($match, $state, $pos, Handler $handler)
58    {
59        $level = strspn($match, '#');
60        $title = trim(substr($match, $level));
61
62        // Optional closing `#` run. The sequence must be preceded by
63        // whitespace; a `#` touching the body (`# foo#`) is content.
64        // A body that is nothing but `#`s is a closer with no title.
65        if ($title !== '' && preg_match('/^#+$/', $title)) {
66            $title = '';
67        } elseif (preg_match('/^(.*?)[ \t]+#+$/', $title, $m)) {
68            $title = rtrim($m[1]);
69        }
70
71        if ($handler->getStatus('section')) {
72            $handler->addCall('section_close', [], $pos);
73        }
74        $handler->addCall('header', [$title, $level, $pos], $pos);
75        $handler->addCall('section_open', [$level], $pos);
76        $handler->setStatus('section', true);
77        return true;
78    }
79}
80