Parsing/Markdown/GfmSpecTest.php

<?php

namespace dokuwiki\test\Parsing\Markdown;

use dokuwiki\Parsing\ModeRegistry;

/**
 * Roundtrip tests driven by GFM's spec.txt.
 *
 * Each example in gfm-spec/spec.txt becomes one data-provider case. The
 * markdown input is run through DokuWiki's full pipeline (parser + XHTML
 * renderer) and the result is compared to the expected HTML from the spec,
 * tolerating whitespace differences around block-level tags.
 *
 * Most examples are expected to FAIL until the relevant GFM parser modes
 * are implemented — they are the branch's living TODO list for GFM parity.
 * Do not mark such failures incomplete or skipped.
 *
 * `gfm-spec/skip.php` lists examples that are deliberately out of scope
 * for DokuWiki (e.g. CommonMark flanking-delimiter edge cases). Those are
 * reported as skipped with a reason.
 */
class GfmSpecTest extends \DokuWikiTest
{
    private const FIXTURE_DIR = __DIR__ . '/gfm-spec/';

    public static function specProvider(): iterable
    {
        $reader = new SpecReader(self::FIXTURE_DIR . 'spec.txt');
        $skip   = require self::FIXTURE_DIR . 'skip.php';

        foreach ($reader->examples() as $ex) {
            $reason = $skip[$ex['number']] ?? null;
            $label  = sprintf('#%d %s', $ex['number'], $ex['section']);
            yield $label => [$ex['markdown'], $ex['html'], $reason];
        }
    }

    /**
     * @dataProvider specProvider
     */
    public function testExample(string $md, string $expected, ?string $skipReason): void
    {
        if ($skipReason !== null) {
            $this->markTestSkipped($skipReason);
        }
        $actual = $this->renderMarkdown($md);
        $this->assertHtmlEquals($expected, $actual);
    }

    public function tearDown(): void
    {
        ModeRegistry::reset();
        parent::tearDown();
    }

    /**
     * Render markdown text through DokuWiki's full parser + XHTML renderer
     * pipeline under the `markdown` syntax setting.
     */
    private function renderMarkdown(string $text): string
    {
        global $conf;
        $conf['syntax'] = 'markdown';
        ModeRegistry::reset();

        $instructions = p_get_instructions($text);
        $info = [];
        return p_render('xhtml', $instructions, $info);
    }

    /**
     * Assert two HTML strings are equivalent after whitespace normalization.
     *
     * DokuWiki's XHTML renderer emits extra whitespace around block tags
     * that the spec's reference HTML omits. The comparator strips whitespace
     * only around **block-level** tags (p, div, h1-h6, ul/ol/li, table/tr/td,
     * blockquote, pre, hr). Whitespace around **inline** tags (em, strong,
     * a, code, span, img, br, etc.) is preserved, because `<em>x</em> y`
     * and `<em>x</em>y` render differently.
     */
    private function assertHtmlEquals(string $expected, string $actual): void
    {
        $this->assertEquals(
            $this->normalizeHtml($expected),
            $this->normalizeHtml($actual)
        );
    }

    /**
     * Strip whitespace adjacent to block-level tags; leave inline tags alone.
     */
    private function normalizeHtml(string $html): string
    {
        $block = 'p|div|h[1-6]|hr|ul|ol|li|blockquote|pre|table|thead|tbody|tfoot|tr|th|td';

        // Whitespace before/after an opening block tag (including attributes)
        $html = preg_replace('#\s*<(' . $block . ')((?:\s[^>]*)?)>\s*#', '<$1$2>', $html);
        // Whitespace before/after a closing block tag
        $html = preg_replace('#\s*</(' . $block . ')>\s*#', '</$1>', $html);

        return trim($html);
    }
}