xref: /dokuwiki/inc/Parsing/ParserMode/GfmHeader.php (revision 4b31eadfd0dd82e519dd953a4cb0ad079114879d)
18719732dSAndreas Gohr<?php
28719732dSAndreas Gohr
38719732dSAndreas Gohrnamespace dokuwiki\Parsing\ParserMode;
48719732dSAndreas Gohr
58719732dSAndreas Gohruse dokuwiki\Parsing\Handler;
68719732dSAndreas Gohr
78719732dSAndreas Gohr/**
88719732dSAndreas Gohr * GFM ATX heading: 1-6 leading `#` characters, a mandatory space (or end of
98719732dSAndreas Gohr * line for an empty heading), and optional body; emits the same
108719732dSAndreas Gohr * header / section_open / section_close instructions as DokuWiki's Header
118719732dSAndreas Gohr * so renderers and TOC treat it identically.
128719732dSAndreas Gohr *
138719732dSAndreas Gohr * Setext headings (=== / --- underlines) are deliberately not supported —
148719732dSAndreas Gohr * they collide with DokuWiki's horizontal rule and heading delimiters.
158719732dSAndreas Gohr *
168719732dSAndreas Gohr * Leading indentation is also not supported: GFM allows 0-3 spaces before
178719732dSAndreas Gohr * the opener, but DokuWiki uses 2-space indent for Preformatted blocks
188719732dSAndreas Gohr * and that collision isn't worth untangling for a tolerance feature. The
198719732dSAndreas Gohr * opener must sit at column 0.
208719732dSAndreas Gohr */
218719732dSAndreas Gohrclass GfmHeader extends AbstractMode
228719732dSAndreas Gohr{
238719732dSAndreas Gohr    /** @inheritdoc */
248719732dSAndreas Gohr    public function getSort()
258719732dSAndreas Gohr    {
268719732dSAndreas Gohr        return 50;
278719732dSAndreas Gohr    }
288719732dSAndreas Gohr
298719732dSAndreas Gohr    /** @inheritdoc */
308719732dSAndreas Gohr    public function connectTo($mode)
318719732dSAndreas Gohr    {
328719732dSAndreas Gohr        // Entry pattern breakdown:
33*4b31eadfSAndreas Gohr        //   (?<=\n)              — line start (Parser prepends a newline);
34*4b31eadfSAndreas Gohr        //                          a lookbehind, so the newline is NOT part
35*4b31eadfSAndreas Gohr        //                          of the match and the reported position
36*4b31eadfSAndreas Gohr        //                          lands on the first `#`. Consuming it
37*4b31eadfSAndreas Gohr        //                          instead would push the section-edit start
38*4b31eadfSAndreas Gohr        //                          onto the blank line above the heading and
39*4b31eadfSAndreas Gohr        //                          eat it on save.
408719732dSAndreas Gohr        //   #{1,6}(?!#)          — 1-6 `#` characters; the lookahead
418719732dSAndreas Gohr        //                          rejects 7+ so `####### foo` stays as
428719732dSAndreas Gohr        //                          paragraph text
438719732dSAndreas Gohr        //   (?:[ \t][^\n]*)?     — optional body starting with a space
448719732dSAndreas Gohr        //                          or tab; a hash touching a letter
458719732dSAndreas Gohr        //                          (`#hashtag`) has no body match and
468719732dSAndreas Gohr        //                          the `(?=\n)` below fails unless the
478719732dSAndreas Gohr        //                          whole line is just the hashes
488719732dSAndreas Gohr        //   (?=\n)               — must end the line
498719732dSAndreas Gohr        $this->Lexer->addSpecialPattern(
50*4b31eadfSAndreas Gohr            '(?<=\n)#{1,6}(?!#)(?:[ \t][^\n]*)?(?=\n)',
518719732dSAndreas Gohr            $mode,
528719732dSAndreas Gohr            'gfm_header'
538719732dSAndreas Gohr        );
548719732dSAndreas Gohr    }
558719732dSAndreas Gohr
568719732dSAndreas Gohr    /** @inheritdoc */
578719732dSAndreas Gohr    public function handle($match, $state, $pos, Handler $handler)
588719732dSAndreas Gohr    {
59*4b31eadfSAndreas Gohr        $level = strspn($match, '#');
60*4b31eadfSAndreas Gohr        $title = trim(substr($match, $level));
618719732dSAndreas Gohr
628719732dSAndreas Gohr        // Optional closing `#` run. The sequence must be preceded by
638719732dSAndreas Gohr        // whitespace; a `#` touching the body (`# foo#`) is content.
648719732dSAndreas Gohr        // A body that is nothing but `#`s is a closer with no title.
658719732dSAndreas Gohr        if ($title !== '' && preg_match('/^#+$/', $title)) {
668719732dSAndreas Gohr            $title = '';
678719732dSAndreas Gohr        } elseif (preg_match('/^(.*?)[ \t]+#+$/', $title, $m)) {
688719732dSAndreas Gohr            $title = rtrim($m[1]);
698719732dSAndreas Gohr        }
708719732dSAndreas Gohr
718719732dSAndreas Gohr        if ($handler->getStatus('section')) {
728719732dSAndreas Gohr            $handler->addCall('section_close', [], $pos);
738719732dSAndreas Gohr        }
748719732dSAndreas Gohr        $handler->addCall('header', [$title, $level, $pos], $pos);
758719732dSAndreas Gohr        $handler->addCall('section_open', [$level], $pos);
768719732dSAndreas Gohr        $handler->setStatus('section', true);
778719732dSAndreas Gohr        return true;
788719732dSAndreas Gohr    }
798719732dSAndreas Gohr}
80