172b2703bSAndreas Gohr<?php 272b2703bSAndreas Gohr 372b2703bSAndreas Gohrnamespace dokuwiki\test\Parsing\Markdown; 472b2703bSAndreas Gohr 572b2703bSAndreas Gohr/** 672b2703bSAndreas Gohr * Roundtrip tests driven by GFM's spec.txt. 772b2703bSAndreas Gohr * 872b2703bSAndreas Gohr * Each example in gfm-spec/spec.txt becomes one data-provider case. The 972b2703bSAndreas Gohr * markdown input is run through DokuWiki's full pipeline (parser + XHTML 1072b2703bSAndreas Gohr * renderer) and the result is compared to the expected HTML from the spec, 1172b2703bSAndreas Gohr * tolerating whitespace differences around block-level tags. 1272b2703bSAndreas Gohr * 1372b2703bSAndreas Gohr * `gfm-spec/skip.php` lists examples that are deliberately out of scope 1472b2703bSAndreas Gohr * for DokuWiki (e.g. CommonMark flanking-delimiter edge cases). Those are 1572b2703bSAndreas Gohr * reported as skipped with a reason. 1672b2703bSAndreas Gohr */ 1772b2703bSAndreas Gohrclass GfmSpecTest extends \DokuWikiTest 1872b2703bSAndreas Gohr{ 1972b2703bSAndreas Gohr private const FIXTURE_DIR = __DIR__ . '/gfm-spec/'; 2072b2703bSAndreas Gohr 2172b2703bSAndreas Gohr public static function specProvider(): iterable 2272b2703bSAndreas Gohr { 2372b2703bSAndreas Gohr $reader = new SpecReader(self::FIXTURE_DIR . 'spec.txt'); 2472b2703bSAndreas Gohr $skip = require self::FIXTURE_DIR . 'skip.php'; 2572b2703bSAndreas Gohr 2609f34c31SAndreas Gohr // Spec convention (spec.txt §"About this document"): the `→` 2709f34c31SAndreas Gohr // character in examples represents a literal tab. Restore the 2809f34c31SAndreas Gohr // tab in both input and expected output so the corpus exercises 2909f34c31SAndreas Gohr // real tab-handling behavior, not arrow-character handling. 3072b2703bSAndreas Gohr foreach ($reader->examples() as $ex) { 3172b2703bSAndreas Gohr $reason = $skip[$ex['number']] ?? null; 3272b2703bSAndreas Gohr $label = sprintf('#%d %s', $ex['number'], $ex['section']); 3309f34c31SAndreas Gohr $md = strtr($ex['markdown'], ["\u{2192}" => "\t"]); 3409f34c31SAndreas Gohr $html = strtr($ex['html'], ["\u{2192}" => "\t"]); 3509f34c31SAndreas Gohr yield $label => [$md, $html, $reason]; 3672b2703bSAndreas Gohr } 3772b2703bSAndreas Gohr } 3872b2703bSAndreas Gohr 3972b2703bSAndreas Gohr /** 4072b2703bSAndreas Gohr * @dataProvider specProvider 4172b2703bSAndreas Gohr */ 4272b2703bSAndreas Gohr public function testExample(string $md, string $expected, ?string $skipReason): void 4372b2703bSAndreas Gohr { 4472b2703bSAndreas Gohr if ($skipReason !== null) { 4572b2703bSAndreas Gohr $this->markTestSkipped($skipReason); 4672b2703bSAndreas Gohr } 4772b2703bSAndreas Gohr $actual = $this->renderMarkdown($md); 4872b2703bSAndreas Gohr $this->assertHtmlEquals($expected, $actual); 4972b2703bSAndreas Gohr } 5072b2703bSAndreas Gohr 5172b2703bSAndreas Gohr /** 523440a8c0SAndreas Gohr * Render markdown text through DokuWiki's full parser pipeline under 5313a62f81SAndreas Gohr * the `md` syntax setting, using {@see SpecCompatRenderer} — 543440a8c0SAndreas Gohr * an XHTML renderer subclass that emits the minimal link/media HTML 553440a8c0SAndreas Gohr * shape the GFM spec expects. Production rendering is unchanged; 563440a8c0SAndreas Gohr * this override exists so spec output can be compared byte-for-byte. 57d379b737SAndreas Gohr * 58d379b737SAndreas Gohr * Typography is forced off for the spec run: $conf[typography] = 0 59d379b737SAndreas Gohr * keeps the Quotes and MultiplyEntity modes (curly quote pairing, 60d379b737SAndreas Gohr * apostrophe to numeric entity) out of the mode list. Both are 61d379b737SAndreas Gohr * correct for production wiki prose but diverge byte-for-byte from 62d379b737SAndreas Gohr * spec output. SpecCompatRenderer additionally neutralizes the 63d379b737SAndreas Gohr * Entity-table substitutions (--, ---, ->, (c), ...) at render time; 64d379b737SAndreas Gohr * see SpecCompatRenderer::entity(). 65*aa346d4bSAndreas Gohr * 66*aa346d4bSAndreas Gohr * The renderer's acronym table is left empty so the parser-emitted 67*aa346d4bSAndreas Gohr * acronym() calls (e.g. for "FTP") fall through to literal text 68*aa346d4bSAndreas Gohr * instead of wrapping in <abbr>, which the spec output never has. 6972b2703bSAndreas Gohr */ 7072b2703bSAndreas Gohr private function renderMarkdown(string $text): string 7172b2703bSAndreas Gohr { 7272b2703bSAndreas Gohr global $conf; 7313a62f81SAndreas Gohr $conf['syntax'] = 'md'; 74d379b737SAndreas Gohr $conf['typography'] = 0; 7572b2703bSAndreas Gohr 7672b2703bSAndreas Gohr $instructions = p_get_instructions($text); 773440a8c0SAndreas Gohr 783440a8c0SAndreas Gohr $renderer = new SpecCompatRenderer(); 793440a8c0SAndreas Gohr $renderer->reset(); 803440a8c0SAndreas Gohr $renderer->smileys = getSmileys(); 813440a8c0SAndreas Gohr $renderer->entities = getEntities(); 82*aa346d4bSAndreas Gohr $renderer->acronyms = []; 833440a8c0SAndreas Gohr $renderer->interwiki = getInterwiki(); 843440a8c0SAndreas Gohr 853440a8c0SAndreas Gohr foreach ($instructions as $instruction) { 863440a8c0SAndreas Gohr if (method_exists($renderer, $instruction[0])) { 873440a8c0SAndreas Gohr call_user_func_array([$renderer, $instruction[0]], $instruction[1] ?: []); 883440a8c0SAndreas Gohr } 893440a8c0SAndreas Gohr } 903440a8c0SAndreas Gohr return $renderer->doc; 9172b2703bSAndreas Gohr } 9272b2703bSAndreas Gohr 9372b2703bSAndreas Gohr /** 9472b2703bSAndreas Gohr * Assert two HTML strings are equivalent after whitespace normalization. 9572b2703bSAndreas Gohr * 9672b2703bSAndreas Gohr * DokuWiki's XHTML renderer emits extra whitespace around block tags 9772b2703bSAndreas Gohr * that the spec's reference HTML omits. The comparator strips whitespace 9872b2703bSAndreas Gohr * only around **block-level** tags (p, div, h1-h6, ul/ol/li, table/tr/td, 9972b2703bSAndreas Gohr * blockquote, pre, hr). Whitespace around **inline** tags (em, strong, 10072b2703bSAndreas Gohr * a, code, span, img, br, etc.) is preserved, because `<em>x</em> y` 10172b2703bSAndreas Gohr * and `<em>x</em>y` render differently. 10272b2703bSAndreas Gohr */ 10372b2703bSAndreas Gohr private function assertHtmlEquals(string $expected, string $actual): void 10472b2703bSAndreas Gohr { 10572b2703bSAndreas Gohr $this->assertEquals( 10672b2703bSAndreas Gohr $this->normalizeHtml($expected), 10772b2703bSAndreas Gohr $this->normalizeHtml($actual) 10872b2703bSAndreas Gohr ); 10972b2703bSAndreas Gohr } 11072b2703bSAndreas Gohr 11172b2703bSAndreas Gohr /** 11272b2703bSAndreas Gohr * Strip whitespace adjacent to block-level tags; leave inline tags alone. 1138719732dSAndreas Gohr * 1148719732dSAndreas Gohr * Additionally drops DokuWiki-specific heading decoration that carries no 1158719732dSAndreas Gohr * semantic meaning for GFM-conformance checks: 1168719732dSAndreas Gohr * 1178719732dSAndreas Gohr * - `<div class="levelN">` / matching `</div>` section wrappers the 1188719732dSAndreas Gohr * renderer emits after every header call. 1198719732dSAndreas Gohr * - `class="..."` / `id="..."` attributes on h1-h6 (section-edit anchor 1208719732dSAndreas Gohr * and header-id generation; fine to ignore, the spec output has none). 12172b2703bSAndreas Gohr */ 12272b2703bSAndreas Gohr private function normalizeHtml(string $html): string 12372b2703bSAndreas Gohr { 12472b2703bSAndreas Gohr $block = 'p|div|h[1-6]|hr|ul|ol|li|blockquote|pre|table|thead|tbody|tfoot|tr|th|td'; 12572b2703bSAndreas Gohr 1268719732dSAndreas Gohr // Drop DokuWiki's `<div class="levelN">` section wrappers and the 1278719732dSAndreas Gohr // HTML comments (`<!-- EDIT... -->`) its section-edit machinery 1288719732dSAndreas Gohr // inserts after each heading. Neither is semantically part of the 1298719732dSAndreas Gohr // heading and GFM reference output never contains them. 1308719732dSAndreas Gohr $html = preg_replace('#<div class="level[1-6]">\s*#', '', $html); 1318719732dSAndreas Gohr $html = preg_replace('#\s*</div>\s*#', '', $html); 1328719732dSAndreas Gohr $html = preg_replace('#<!--[^<]*?-->#', '', $html); 1338719732dSAndreas Gohr 1348719732dSAndreas Gohr // Strip sectionedit/id decoration from headings. 1358719732dSAndreas Gohr $html = preg_replace('#<(h[1-6])(?:\s+(?:class|id)="[^"]*")+\s*>#', '<$1>', $html); 1368719732dSAndreas Gohr 13772b2703bSAndreas Gohr // Whitespace before/after an opening block tag (including attributes) 13872b2703bSAndreas Gohr $html = preg_replace('#\s*<(' . $block . ')((?:\s[^>]*)?)>\s*#', '<$1$2>', $html); 13972b2703bSAndreas Gohr // Whitespace before/after a closing block tag 14072b2703bSAndreas Gohr $html = preg_replace('#\s*</(' . $block . ')>\s*#', '</$1>', $html); 14172b2703bSAndreas Gohr 14272b2703bSAndreas Gohr return trim($html); 14372b2703bSAndreas Gohr } 14472b2703bSAndreas Gohr} 145