1<?php 2 3namespace dokuwiki\test\Parsing\Markdown; 4 5use dokuwiki\Parsing\ModeRegistry; 6 7/** 8 * Roundtrip tests driven by GFM's spec.txt. 9 * 10 * Each example in gfm-spec/spec.txt becomes one data-provider case. The 11 * markdown input is run through DokuWiki's full pipeline (parser + XHTML 12 * renderer) and the result is compared to the expected HTML from the spec, 13 * tolerating whitespace differences around block-level tags. 14 * 15 * `gfm-spec/skip.php` lists examples that are deliberately out of scope 16 * for DokuWiki (e.g. CommonMark flanking-delimiter edge cases). Those are 17 * reported as skipped with a reason. 18 */ 19class GfmSpecTest extends \DokuWikiTest 20{ 21 private const FIXTURE_DIR = __DIR__ . '/gfm-spec/'; 22 23 public static function specProvider(): iterable 24 { 25 $reader = new SpecReader(self::FIXTURE_DIR . 'spec.txt'); 26 $skip = require self::FIXTURE_DIR . 'skip.php'; 27 28 // Spec convention (spec.txt §"About this document"): the `→` 29 // character in examples represents a literal tab. Restore the 30 // tab in both input and expected output so the corpus exercises 31 // real tab-handling behavior, not arrow-character handling. 32 foreach ($reader->examples() as $ex) { 33 $reason = $skip[$ex['number']] ?? null; 34 $label = sprintf('#%d %s', $ex['number'], $ex['section']); 35 $md = strtr($ex['markdown'], ["\u{2192}" => "\t"]); 36 $html = strtr($ex['html'], ["\u{2192}" => "\t"]); 37 yield $label => [$md, $html, $reason]; 38 } 39 } 40 41 /** 42 * @dataProvider specProvider 43 */ 44 public function testExample(string $md, string $expected, ?string $skipReason): void 45 { 46 if ($skipReason !== null) { 47 $this->markTestSkipped($skipReason); 48 } 49 $actual = $this->renderMarkdown($md); 50 $this->assertHtmlEquals($expected, $actual); 51 } 52 53 public function tearDown(): void 54 { 55 ModeRegistry::reset(); 56 parent::tearDown(); 57 } 58 59 /** 60 * Render markdown text through DokuWiki's full parser pipeline under 61 * the `md` syntax setting, using {@see SpecCompatRenderer} — 62 * an XHTML renderer subclass that emits the minimal link/media HTML 63 * shape the GFM spec expects. Production rendering is unchanged; 64 * this override exists so spec output can be compared byte-for-byte. 65 * 66 * Typography is forced off for the spec run: $conf[typography] = 0 67 * keeps the Quotes and MultiplyEntity modes (curly quote pairing, 68 * apostrophe to numeric entity) out of the mode list. Both are 69 * correct for production wiki prose but diverge byte-for-byte from 70 * spec output. SpecCompatRenderer additionally neutralizes the 71 * Entity-table substitutions (--, ---, ->, (c), ...) at render time; 72 * see SpecCompatRenderer::entity(). 73 * 74 * The renderer's acronym table is left empty so the parser-emitted 75 * acronym() calls (e.g. for "FTP") fall through to literal text 76 * instead of wrapping in <abbr>, which the spec output never has. 77 */ 78 private function renderMarkdown(string $text): string 79 { 80 global $conf; 81 $conf['syntax'] = 'md'; 82 $conf['typography'] = 0; 83 ModeRegistry::reset(); 84 85 $instructions = p_get_instructions($text); 86 87 $renderer = new SpecCompatRenderer(); 88 $renderer->reset(); 89 $renderer->smileys = getSmileys(); 90 $renderer->entities = getEntities(); 91 $renderer->acronyms = []; 92 $renderer->interwiki = getInterwiki(); 93 94 foreach ($instructions as $instruction) { 95 if (method_exists($renderer, $instruction[0])) { 96 call_user_func_array([$renderer, $instruction[0]], $instruction[1] ?: []); 97 } 98 } 99 return $renderer->doc; 100 } 101 102 /** 103 * Assert two HTML strings are equivalent after whitespace normalization. 104 * 105 * DokuWiki's XHTML renderer emits extra whitespace around block tags 106 * that the spec's reference HTML omits. The comparator strips whitespace 107 * only around **block-level** tags (p, div, h1-h6, ul/ol/li, table/tr/td, 108 * blockquote, pre, hr). Whitespace around **inline** tags (em, strong, 109 * a, code, span, img, br, etc.) is preserved, because `<em>x</em> y` 110 * and `<em>x</em>y` render differently. 111 */ 112 private function assertHtmlEquals(string $expected, string $actual): void 113 { 114 $this->assertEquals( 115 $this->normalizeHtml($expected), 116 $this->normalizeHtml($actual) 117 ); 118 } 119 120 /** 121 * Strip whitespace adjacent to block-level tags; leave inline tags alone. 122 * 123 * Additionally drops DokuWiki-specific heading decoration that carries no 124 * semantic meaning for GFM-conformance checks: 125 * 126 * - `<div class="levelN">` / matching `</div>` section wrappers the 127 * renderer emits after every header call. 128 * - `class="..."` / `id="..."` attributes on h1-h6 (section-edit anchor 129 * and header-id generation; fine to ignore, the spec output has none). 130 */ 131 private function normalizeHtml(string $html): string 132 { 133 $block = 'p|div|h[1-6]|hr|ul|ol|li|blockquote|pre|table|thead|tbody|tfoot|tr|th|td'; 134 135 // Drop DokuWiki's `<div class="levelN">` section wrappers and the 136 // HTML comments (`<!-- EDIT... -->`) its section-edit machinery 137 // inserts after each heading. Neither is semantically part of the 138 // heading and GFM reference output never contains them. 139 $html = preg_replace('#<div class="level[1-6]">\s*#', '', $html); 140 $html = preg_replace('#\s*</div>\s*#', '', $html); 141 $html = preg_replace('#<!--[^<]*?-->#', '', $html); 142 143 // Strip sectionedit/id decoration from headings. 144 $html = preg_replace('#<(h[1-6])(?:\s+(?:class|id)="[^"]*")+\s*>#', '<$1>', $html); 145 146 // Whitespace before/after an opening block tag (including attributes) 147 $html = preg_replace('#\s*<(' . $block . ')((?:\s[^>]*)?)>\s*#', '<$1$2>', $html); 148 // Whitespace before/after a closing block tag 149 $html = preg_replace('#\s*</(' . $block . ')>\s*#', '</$1>', $html); 150 151 return trim($html); 152 } 153} 154