examples() as $ex) { $reason = $skip[$ex['number']] ?? null; $label = sprintf('#%d %s', $ex['number'], $ex['section']); $md = strtr($ex['markdown'], ["\u{2192}" => "\t"]); $html = strtr($ex['html'], ["\u{2192}" => "\t"]); yield $label => [$md, $html, $reason]; } } /** * @dataProvider specProvider */ public function testExample(string $md, string $expected, ?string $skipReason): void { if ($skipReason !== null) { $this->markTestSkipped($skipReason); } $actual = $this->renderMarkdown($md); $this->assertHtmlEquals($expected, $actual); } public function tearDown(): void { ModeRegistry::reset(); parent::tearDown(); } /** * Render markdown text through DokuWiki's full parser pipeline under * the `md` syntax setting, using {@see SpecCompatRenderer} — * an XHTML renderer subclass that emits the minimal link/media HTML * shape the GFM spec expects. Production rendering is unchanged; * this override exists so spec output can be compared byte-for-byte. * * Typography is forced off for the spec run: $conf[typography] = 0 * keeps the Quotes and MultiplyEntity modes (curly quote pairing, * apostrophe to numeric entity) out of the mode list. Both are * correct for production wiki prose but diverge byte-for-byte from * spec output. SpecCompatRenderer additionally neutralizes the * Entity-table substitutions (--, ---, ->, (c), ...) at render time; * see SpecCompatRenderer::entity(). * * The renderer's acronym table is left empty so the parser-emitted * acronym() calls (e.g. for "FTP") fall through to literal text * instead of wrapping in , which the spec output never has. */ private function renderMarkdown(string $text): string { global $conf; $conf['syntax'] = 'md'; $conf['typography'] = 0; ModeRegistry::reset(); $instructions = p_get_instructions($text); $renderer = new SpecCompatRenderer(); $renderer->reset(); $renderer->smileys = getSmileys(); $renderer->entities = getEntities(); $renderer->acronyms = []; $renderer->interwiki = getInterwiki(); foreach ($instructions as $instruction) { if (method_exists($renderer, $instruction[0])) { call_user_func_array([$renderer, $instruction[0]], $instruction[1] ?: []); } } return $renderer->doc; } /** * Assert two HTML strings are equivalent after whitespace normalization. * * DokuWiki's XHTML renderer emits extra whitespace around block tags * that the spec's reference HTML omits. The comparator strips whitespace * only around **block-level** tags (p, div, h1-h6, ul/ol/li, table/tr/td, * blockquote, pre, hr). Whitespace around **inline** tags (em, strong, * a, code, span, img, br, etc.) is preserved, because `x y` * and `xy` render differently. */ private function assertHtmlEquals(string $expected, string $actual): void { $this->assertEquals( $this->normalizeHtml($expected), $this->normalizeHtml($actual) ); } /** * Strip whitespace adjacent to block-level tags; leave inline tags alone. * * Additionally drops DokuWiki-specific heading decoration that carries no * semantic meaning for GFM-conformance checks: * * - `
` / matching `
` section wrappers the * renderer emits after every header call. * - `class="..."` / `id="..."` attributes on h1-h6 (section-edit anchor * and header-id generation; fine to ignore, the spec output has none). */ private function normalizeHtml(string $html): string { $block = 'p|div|h[1-6]|hr|ul|ol|li|blockquote|pre|table|thead|tbody|tfoot|tr|th|td'; // Drop DokuWiki's `
` section wrappers and the // HTML comments (``) its section-edit machinery // inserts after each heading. Neither is semantically part of the // heading and GFM reference output never contains them. $html = preg_replace('#
\s*#', '', $html); $html = preg_replace('#\s*
\s*#', '', $html); $html = preg_replace('##', '', $html); // Strip sectionedit/id decoration from headings. $html = preg_replace('#<(h[1-6])(?:\s+(?:class|id)="[^"]*")+\s*>#', '<$1>', $html); // Whitespace before/after an opening block tag (including attributes) $html = preg_replace('#\s*<(' . $block . ')((?:\s[^>]*)?)>\s*#', '<$1$2>', $html); // Whitespace before/after a closing block tag $html = preg_replace('#\s*\s*#', '', $html); return trim($html); } }