xref: /dokuwiki/_test/tests/Parsing/Markdown/GfmSpecTest.php (revision 72b2703b4f922ca520e7cb2e7765a252175f30d3)
1*72b2703bSAndreas Gohr<?php
2*72b2703bSAndreas Gohr
3*72b2703bSAndreas Gohrnamespace dokuwiki\test\Parsing\Markdown;
4*72b2703bSAndreas Gohr
5*72b2703bSAndreas Gohruse dokuwiki\Parsing\ModeRegistry;
6*72b2703bSAndreas Gohr
7*72b2703bSAndreas Gohr/**
8*72b2703bSAndreas Gohr * Roundtrip tests driven by GFM's spec.txt.
9*72b2703bSAndreas Gohr *
10*72b2703bSAndreas Gohr * Each example in gfm-spec/spec.txt becomes one data-provider case. The
11*72b2703bSAndreas Gohr * markdown input is run through DokuWiki's full pipeline (parser + XHTML
12*72b2703bSAndreas Gohr * renderer) and the result is compared to the expected HTML from the spec,
13*72b2703bSAndreas Gohr * tolerating whitespace differences around block-level tags.
14*72b2703bSAndreas Gohr *
15*72b2703bSAndreas Gohr * Most examples are expected to FAIL until the relevant GFM parser modes
16*72b2703bSAndreas Gohr * are implemented — they are the branch's living TODO list for GFM parity.
17*72b2703bSAndreas Gohr * Do not mark such failures incomplete or skipped.
18*72b2703bSAndreas Gohr *
19*72b2703bSAndreas Gohr * `gfm-spec/skip.php` lists examples that are deliberately out of scope
20*72b2703bSAndreas Gohr * for DokuWiki (e.g. CommonMark flanking-delimiter edge cases). Those are
21*72b2703bSAndreas Gohr * reported as skipped with a reason.
22*72b2703bSAndreas Gohr */
23*72b2703bSAndreas Gohrclass GfmSpecTest extends \DokuWikiTest
24*72b2703bSAndreas Gohr{
25*72b2703bSAndreas Gohr    private const FIXTURE_DIR = __DIR__ . '/gfm-spec/';
26*72b2703bSAndreas Gohr
27*72b2703bSAndreas Gohr    public static function specProvider(): iterable
28*72b2703bSAndreas Gohr    {
29*72b2703bSAndreas Gohr        $reader = new SpecReader(self::FIXTURE_DIR . 'spec.txt');
30*72b2703bSAndreas Gohr        $skip   = require self::FIXTURE_DIR . 'skip.php';
31*72b2703bSAndreas Gohr
32*72b2703bSAndreas Gohr        foreach ($reader->examples() as $ex) {
33*72b2703bSAndreas Gohr            $reason = $skip[$ex['number']] ?? null;
34*72b2703bSAndreas Gohr            $label  = sprintf('#%d %s', $ex['number'], $ex['section']);
35*72b2703bSAndreas Gohr            yield $label => [$ex['markdown'], $ex['html'], $reason];
36*72b2703bSAndreas Gohr        }
37*72b2703bSAndreas Gohr    }
38*72b2703bSAndreas Gohr
39*72b2703bSAndreas Gohr    /**
40*72b2703bSAndreas Gohr     * @dataProvider specProvider
41*72b2703bSAndreas Gohr     */
42*72b2703bSAndreas Gohr    public function testExample(string $md, string $expected, ?string $skipReason): void
43*72b2703bSAndreas Gohr    {
44*72b2703bSAndreas Gohr        if ($skipReason !== null) {
45*72b2703bSAndreas Gohr            $this->markTestSkipped($skipReason);
46*72b2703bSAndreas Gohr        }
47*72b2703bSAndreas Gohr        $actual = $this->renderMarkdown($md);
48*72b2703bSAndreas Gohr        $this->assertHtmlEquals($expected, $actual);
49*72b2703bSAndreas Gohr    }
50*72b2703bSAndreas Gohr
51*72b2703bSAndreas Gohr    public function tearDown(): void
52*72b2703bSAndreas Gohr    {
53*72b2703bSAndreas Gohr        ModeRegistry::reset();
54*72b2703bSAndreas Gohr        parent::tearDown();
55*72b2703bSAndreas Gohr    }
56*72b2703bSAndreas Gohr
57*72b2703bSAndreas Gohr    /**
58*72b2703bSAndreas Gohr     * Render markdown text through DokuWiki's full parser + XHTML renderer
59*72b2703bSAndreas Gohr     * pipeline under the `markdown` syntax setting.
60*72b2703bSAndreas Gohr     */
61*72b2703bSAndreas Gohr    private function renderMarkdown(string $text): string
62*72b2703bSAndreas Gohr    {
63*72b2703bSAndreas Gohr        global $conf;
64*72b2703bSAndreas Gohr        $conf['syntax'] = 'markdown';
65*72b2703bSAndreas Gohr        ModeRegistry::reset();
66*72b2703bSAndreas Gohr
67*72b2703bSAndreas Gohr        $instructions = p_get_instructions($text);
68*72b2703bSAndreas Gohr        $info = [];
69*72b2703bSAndreas Gohr        return p_render('xhtml', $instructions, $info);
70*72b2703bSAndreas Gohr    }
71*72b2703bSAndreas Gohr
72*72b2703bSAndreas Gohr    /**
73*72b2703bSAndreas Gohr     * Assert two HTML strings are equivalent after whitespace normalization.
74*72b2703bSAndreas Gohr     *
75*72b2703bSAndreas Gohr     * DokuWiki's XHTML renderer emits extra whitespace around block tags
76*72b2703bSAndreas Gohr     * that the spec's reference HTML omits. The comparator strips whitespace
77*72b2703bSAndreas Gohr     * only around **block-level** tags (p, div, h1-h6, ul/ol/li, table/tr/td,
78*72b2703bSAndreas Gohr     * blockquote, pre, hr). Whitespace around **inline** tags (em, strong,
79*72b2703bSAndreas Gohr     * a, code, span, img, br, etc.) is preserved, because `<em>x</em> y`
80*72b2703bSAndreas Gohr     * and `<em>x</em>y` render differently.
81*72b2703bSAndreas Gohr     */
82*72b2703bSAndreas Gohr    private function assertHtmlEquals(string $expected, string $actual): void
83*72b2703bSAndreas Gohr    {
84*72b2703bSAndreas Gohr        $this->assertEquals(
85*72b2703bSAndreas Gohr            $this->normalizeHtml($expected),
86*72b2703bSAndreas Gohr            $this->normalizeHtml($actual)
87*72b2703bSAndreas Gohr        );
88*72b2703bSAndreas Gohr    }
89*72b2703bSAndreas Gohr
90*72b2703bSAndreas Gohr    /**
91*72b2703bSAndreas Gohr     * Strip whitespace adjacent to block-level tags; leave inline tags alone.
92*72b2703bSAndreas Gohr     */
93*72b2703bSAndreas Gohr    private function normalizeHtml(string $html): string
94*72b2703bSAndreas Gohr    {
95*72b2703bSAndreas Gohr        $block = 'p|div|h[1-6]|hr|ul|ol|li|blockquote|pre|table|thead|tbody|tfoot|tr|th|td';
96*72b2703bSAndreas Gohr
97*72b2703bSAndreas Gohr        // Whitespace before/after an opening block tag (including attributes)
98*72b2703bSAndreas Gohr        $html = preg_replace('#\s*<(' . $block . ')((?:\s[^>]*)?)>\s*#', '<$1$2>', $html);
99*72b2703bSAndreas Gohr        // Whitespace before/after a closing block tag
100*72b2703bSAndreas Gohr        $html = preg_replace('#\s*</(' . $block . ')>\s*#', '</$1>', $html);
101*72b2703bSAndreas Gohr
102*72b2703bSAndreas Gohr        return trim($html);
103*72b2703bSAndreas Gohr    }
104*72b2703bSAndreas Gohr}
105