1<?php 2 3namespace dokuwiki\test\Parsing\Markdown; 4 5use dokuwiki\Parsing\ModeRegistry; 6 7/** 8 * Roundtrip tests driven by GFM's spec.txt. 9 * 10 * Each example in gfm-spec/spec.txt becomes one data-provider case. The 11 * markdown input is run through DokuWiki's full pipeline (parser + XHTML 12 * renderer) and the result is compared to the expected HTML from the spec, 13 * tolerating whitespace differences around block-level tags. 14 * 15 * Most examples are expected to FAIL until the relevant GFM parser modes 16 * are implemented — they are the branch's living TODO list for GFM parity. 17 * Do not mark such failures incomplete or skipped. 18 * 19 * `gfm-spec/skip.php` lists examples that are deliberately out of scope 20 * for DokuWiki (e.g. CommonMark flanking-delimiter edge cases). Those are 21 * reported as skipped with a reason. 22 */ 23class GfmSpecTest extends \DokuWikiTest 24{ 25 private const FIXTURE_DIR = __DIR__ . '/gfm-spec/'; 26 27 public static function specProvider(): iterable 28 { 29 $reader = new SpecReader(self::FIXTURE_DIR . 'spec.txt'); 30 $skip = require self::FIXTURE_DIR . 'skip.php'; 31 32 foreach ($reader->examples() as $ex) { 33 $reason = $skip[$ex['number']] ?? null; 34 $label = sprintf('#%d %s', $ex['number'], $ex['section']); 35 yield $label => [$ex['markdown'], $ex['html'], $reason]; 36 } 37 } 38 39 /** 40 * @dataProvider specProvider 41 */ 42 public function testExample(string $md, string $expected, ?string $skipReason): void 43 { 44 if ($skipReason !== null) { 45 $this->markTestSkipped($skipReason); 46 } 47 $actual = $this->renderMarkdown($md); 48 $this->assertHtmlEquals($expected, $actual); 49 } 50 51 public function tearDown(): void 52 { 53 ModeRegistry::reset(); 54 parent::tearDown(); 55 } 56 57 /** 58 * Render markdown text through DokuWiki's full parser pipeline under 59 * the `md` syntax setting, using {@see SpecCompatRenderer} — 60 * an XHTML renderer subclass that emits the minimal link/media HTML 61 * shape the GFM spec expects. Production rendering is unchanged; 62 * this override exists so spec output can be compared byte-for-byte. 63 */ 64 private function renderMarkdown(string $text): string 65 { 66 global $conf; 67 $conf['syntax'] = 'md'; 68 ModeRegistry::reset(); 69 70 $instructions = p_get_instructions($text); 71 72 $renderer = new SpecCompatRenderer(); 73 $renderer->reset(); 74 $renderer->smileys = getSmileys(); 75 $renderer->entities = getEntities(); 76 $renderer->acronyms = getAcronyms(); 77 $renderer->interwiki = getInterwiki(); 78 79 foreach ($instructions as $instruction) { 80 if (method_exists($renderer, $instruction[0])) { 81 call_user_func_array([$renderer, $instruction[0]], $instruction[1] ?: []); 82 } 83 } 84 return $renderer->doc; 85 } 86 87 /** 88 * Assert two HTML strings are equivalent after whitespace normalization. 89 * 90 * DokuWiki's XHTML renderer emits extra whitespace around block tags 91 * that the spec's reference HTML omits. The comparator strips whitespace 92 * only around **block-level** tags (p, div, h1-h6, ul/ol/li, table/tr/td, 93 * blockquote, pre, hr). Whitespace around **inline** tags (em, strong, 94 * a, code, span, img, br, etc.) is preserved, because `<em>x</em> y` 95 * and `<em>x</em>y` render differently. 96 */ 97 private function assertHtmlEquals(string $expected, string $actual): void 98 { 99 $this->assertEquals( 100 $this->normalizeHtml($expected), 101 $this->normalizeHtml($actual) 102 ); 103 } 104 105 /** 106 * Strip whitespace adjacent to block-level tags; leave inline tags alone. 107 * 108 * Additionally drops DokuWiki-specific heading decoration that carries no 109 * semantic meaning for GFM-conformance checks: 110 * 111 * - `<div class="levelN">` / matching `</div>` section wrappers the 112 * renderer emits after every header call. 113 * - `class="..."` / `id="..."` attributes on h1-h6 (section-edit anchor 114 * and header-id generation; fine to ignore, the spec output has none). 115 */ 116 private function normalizeHtml(string $html): string 117 { 118 $block = 'p|div|h[1-6]|hr|ul|ol|li|blockquote|pre|table|thead|tbody|tfoot|tr|th|td'; 119 120 // Drop DokuWiki's `<div class="levelN">` section wrappers and the 121 // HTML comments (`<!-- EDIT... -->`) its section-edit machinery 122 // inserts after each heading. Neither is semantically part of the 123 // heading and GFM reference output never contains them. 124 $html = preg_replace('#<div class="level[1-6]">\s*#', '', $html); 125 $html = preg_replace('#\s*</div>\s*#', '', $html); 126 $html = preg_replace('#<!--[^<]*?-->#', '', $html); 127 128 // Strip sectionedit/id decoration from headings. 129 $html = preg_replace('#<(h[1-6])(?:\s+(?:class|id)="[^"]*")+\s*>#', '<$1>', $html); 130 131 // Whitespace before/after an opening block tag (including attributes) 132 $html = preg_replace('#\s*<(' . $block . ')((?:\s[^>]*)?)>\s*#', '<$1$2>', $html); 133 // Whitespace before/after a closing block tag 134 $html = preg_replace('#\s*</(' . $block . ')>\s*#', '</$1>', $html); 135 136 return trim($html); 137 } 138} 139