xref: /dokuwiki/_test/tests/Parsing/Markdown/GfmSpecTest.php (revision 8719732d06ab7306149725c7c5ea71deb8ff0382)
1<?php
2
3namespace dokuwiki\test\Parsing\Markdown;
4
5use dokuwiki\Parsing\ModeRegistry;
6
7/**
8 * Roundtrip tests driven by GFM's spec.txt.
9 *
10 * Each example in gfm-spec/spec.txt becomes one data-provider case. The
11 * markdown input is run through DokuWiki's full pipeline (parser + XHTML
12 * renderer) and the result is compared to the expected HTML from the spec,
13 * tolerating whitespace differences around block-level tags.
14 *
15 * Most examples are expected to FAIL until the relevant GFM parser modes
16 * are implemented — they are the branch's living TODO list for GFM parity.
17 * Do not mark such failures incomplete or skipped.
18 *
19 * `gfm-spec/skip.php` lists examples that are deliberately out of scope
20 * for DokuWiki (e.g. CommonMark flanking-delimiter edge cases). Those are
21 * reported as skipped with a reason.
22 */
23class GfmSpecTest extends \DokuWikiTest
24{
25    private const FIXTURE_DIR = __DIR__ . '/gfm-spec/';
26
27    public static function specProvider(): iterable
28    {
29        $reader = new SpecReader(self::FIXTURE_DIR . 'spec.txt');
30        $skip   = require self::FIXTURE_DIR . 'skip.php';
31
32        foreach ($reader->examples() as $ex) {
33            $reason = $skip[$ex['number']] ?? null;
34            $label  = sprintf('#%d %s', $ex['number'], $ex['section']);
35            yield $label => [$ex['markdown'], $ex['html'], $reason];
36        }
37    }
38
39    /**
40     * @dataProvider specProvider
41     */
42    public function testExample(string $md, string $expected, ?string $skipReason): void
43    {
44        if ($skipReason !== null) {
45            $this->markTestSkipped($skipReason);
46        }
47        $actual = $this->renderMarkdown($md);
48        $this->assertHtmlEquals($expected, $actual);
49    }
50
51    public function tearDown(): void
52    {
53        ModeRegistry::reset();
54        parent::tearDown();
55    }
56
57    /**
58     * Render markdown text through DokuWiki's full parser + XHTML renderer
59     * pipeline under the `markdown` syntax setting.
60     */
61    private function renderMarkdown(string $text): string
62    {
63        global $conf;
64        $conf['syntax'] = 'markdown';
65        ModeRegistry::reset();
66
67        $instructions = p_get_instructions($text);
68        $info = [];
69        return p_render('xhtml', $instructions, $info);
70    }
71
72    /**
73     * Assert two HTML strings are equivalent after whitespace normalization.
74     *
75     * DokuWiki's XHTML renderer emits extra whitespace around block tags
76     * that the spec's reference HTML omits. The comparator strips whitespace
77     * only around **block-level** tags (p, div, h1-h6, ul/ol/li, table/tr/td,
78     * blockquote, pre, hr). Whitespace around **inline** tags (em, strong,
79     * a, code, span, img, br, etc.) is preserved, because `<em>x</em> y`
80     * and `<em>x</em>y` render differently.
81     */
82    private function assertHtmlEquals(string $expected, string $actual): void
83    {
84        $this->assertEquals(
85            $this->normalizeHtml($expected),
86            $this->normalizeHtml($actual)
87        );
88    }
89
90    /**
91     * Strip whitespace adjacent to block-level tags; leave inline tags alone.
92     *
93     * Additionally drops DokuWiki-specific heading decoration that carries no
94     * semantic meaning for GFM-conformance checks:
95     *
96     * - `<div class="levelN">` / matching `</div>` section wrappers the
97     *   renderer emits after every header call.
98     * - `class="..."` / `id="..."` attributes on h1-h6 (section-edit anchor
99     *   and header-id generation; fine to ignore, the spec output has none).
100     */
101    private function normalizeHtml(string $html): string
102    {
103        $block = 'p|div|h[1-6]|hr|ul|ol|li|blockquote|pre|table|thead|tbody|tfoot|tr|th|td';
104
105        // Drop DokuWiki's `<div class="levelN">` section wrappers and the
106        // HTML comments (`<!-- EDIT... -->`) its section-edit machinery
107        // inserts after each heading. Neither is semantically part of the
108        // heading and GFM reference output never contains them.
109        $html = preg_replace('#<div class="level[1-6]">\s*#', '', $html);
110        $html = preg_replace('#\s*</div>\s*#', '', $html);
111        $html = preg_replace('#<!--[^<]*?-->#', '', $html);
112
113        // Strip sectionedit/id decoration from headings.
114        $html = preg_replace('#<(h[1-6])(?:\s+(?:class|id)="[^"]*")+\s*>#', '<$1>', $html);
115
116        // Whitespace before/after an opening block tag (including attributes)
117        $html = preg_replace('#\s*<(' . $block . ')((?:\s[^>]*)?)>\s*#', '<$1$2>', $html);
118        // Whitespace before/after a closing block tag
119        $html = preg_replace('#\s*</(' . $block . ')>\s*#', '</$1>', $html);
120
121        return trim($html);
122    }
123}
124