13440a8c0SAndreas Gohr<?php 23440a8c0SAndreas Gohr 33440a8c0SAndreas Gohrnamespace dokuwiki\test\Parsing\Markdown; 43440a8c0SAndreas Gohr 53440a8c0SAndreas Gohruse Doku_Renderer_xhtml; 63440a8c0SAndreas Gohr 73440a8c0SAndreas Gohr/** 83440a8c0SAndreas Gohr * XHTML renderer tuned to emit the minimal HTML shape GFM's spec.txt uses. 93440a8c0SAndreas Gohr * 103440a8c0SAndreas Gohr * DokuWiki's production XHTML renderer wraps internal media in details 113440a8c0SAndreas Gohr * links pointing at `/lib/exe/fetch.php?media=...` / `/lib/exe/detail.php?media=...`, 123440a8c0SAndreas Gohr * rewrites internal link hrefs to `/doku.php?id=...`, and adds wiki-specific 133440a8c0SAndreas Gohr * classes and attributes. All of this is correct for live wiki pages but 143440a8c0SAndreas Gohr * diverges byte-for-byte from GFM's bare `<img src="...">` and 153440a8c0SAndreas Gohr * `<a href="...">...</a>`. 163440a8c0SAndreas Gohr * 173440a8c0SAndreas Gohr * This renderer is used only by {@see GfmSpecTest} so the spec roundtrip 183440a8c0SAndreas Gohr * can compare against byte-level spec HTML. Production rendering is 193440a8c0SAndreas Gohr * unchanged. Methods not overridden here fall through to the XHTML 203440a8c0SAndreas Gohr * renderer (paragraphs, emphasis, code spans, lists, etc.) — those render 213440a8c0SAndreas Gohr * the same shape the spec expects. 223440a8c0SAndreas Gohr * 233440a8c0SAndreas Gohr * Note: title attributes on links/images are discarded at handle time 243440a8c0SAndreas Gohr * (no DW instruction slot), so spec examples that expect `title="..."` 253440a8c0SAndreas Gohr * still don't pass and stay in `skip.php`. 263440a8c0SAndreas Gohr */ 273440a8c0SAndreas Gohrclass SpecCompatRenderer extends Doku_Renderer_xhtml 283440a8c0SAndreas Gohr{ 293dabe4e0SAndreas Gohr public function table_open($maxcols = null, $numrows = null, $pos = null, $classes = null) 303dabe4e0SAndreas Gohr { 313dabe4e0SAndreas Gohr // Production DW wraps `<table>` in `<div class="table"><table class="inline">`; 323dabe4e0SAndreas Gohr // the spec expects bare `<table>`. 333dabe4e0SAndreas Gohr $this->doc .= "<table>\n"; 343dabe4e0SAndreas Gohr } 353dabe4e0SAndreas Gohr 363dabe4e0SAndreas Gohr public function table_close($pos = null) 373dabe4e0SAndreas Gohr { 383dabe4e0SAndreas Gohr // Drop the matching `</div>` from the production wrapper. 393dabe4e0SAndreas Gohr $this->doc .= "</table>"; 403dabe4e0SAndreas Gohr } 413dabe4e0SAndreas Gohr 423dabe4e0SAndreas Gohr public function tablerow_open($classes = null) 433dabe4e0SAndreas Gohr { 443dabe4e0SAndreas Gohr // Strip DW's `class="rowN"` row counter — spec rows have no class. 453dabe4e0SAndreas Gohr $this->doc .= "<tr>\n"; 463dabe4e0SAndreas Gohr } 473dabe4e0SAndreas Gohr 483dabe4e0SAndreas Gohr public function tableheader_open($colspan = 1, $align = null, $rowspan = 1, $classes = null) 493dabe4e0SAndreas Gohr { 503dabe4e0SAndreas Gohr // Production DW emits alignment as `class="...align"`; the spec uses 513dabe4e0SAndreas Gohr // an `align="..."` attribute. Drop the `class="colN"` counter too. 523dabe4e0SAndreas Gohr $this->doc .= '<th' . $this->alignAttr($align) . '>'; 533dabe4e0SAndreas Gohr } 543dabe4e0SAndreas Gohr 553dabe4e0SAndreas Gohr public function tablecell_open($colspan = 1, $align = null, $rowspan = 1, $classes = null) 563dabe4e0SAndreas Gohr { 573dabe4e0SAndreas Gohr $this->doc .= '<td' . $this->alignAttr($align) . '>'; 583dabe4e0SAndreas Gohr } 593dabe4e0SAndreas Gohr 603dabe4e0SAndreas Gohr private function alignAttr(?string $align): string 613dabe4e0SAndreas Gohr { 623dabe4e0SAndreas Gohr if ($align === null) return ''; 633dabe4e0SAndreas Gohr return ' align="' . $align . '"'; 643dabe4e0SAndreas Gohr } 65b1c59bedSAndreas Gohr 663440a8c0SAndreas Gohr public function internalmedia( 673440a8c0SAndreas Gohr $src, 683440a8c0SAndreas Gohr $title = null, 693440a8c0SAndreas Gohr $align = null, 703440a8c0SAndreas Gohr $width = null, 713440a8c0SAndreas Gohr $height = null, 723440a8c0SAndreas Gohr $cache = null, 733440a8c0SAndreas Gohr $linking = null, 743440a8c0SAndreas Gohr $return = false 753440a8c0SAndreas Gohr ) { 763440a8c0SAndreas Gohr $this->doc .= $this->specImg($src, $title, $width, $height); 773440a8c0SAndreas Gohr } 783440a8c0SAndreas Gohr 793440a8c0SAndreas Gohr public function externalmedia( 803440a8c0SAndreas Gohr $src, 813440a8c0SAndreas Gohr $title = null, 823440a8c0SAndreas Gohr $align = null, 833440a8c0SAndreas Gohr $width = null, 843440a8c0SAndreas Gohr $height = null, 853440a8c0SAndreas Gohr $cache = null, 863440a8c0SAndreas Gohr $linking = null, 873440a8c0SAndreas Gohr $return = false 883440a8c0SAndreas Gohr ) { 893440a8c0SAndreas Gohr $this->doc .= $this->specImg($src, $title, $width, $height); 903440a8c0SAndreas Gohr } 913440a8c0SAndreas Gohr 923440a8c0SAndreas Gohr public function internallink($id, $name = null, $search = null, $returnonly = false, $linktype = 'content') 933440a8c0SAndreas Gohr { 943440a8c0SAndreas Gohr $this->doc .= $this->specLink($id, $name); 953440a8c0SAndreas Gohr } 963440a8c0SAndreas Gohr 973440a8c0SAndreas Gohr public function externallink($url, $name = null, $returnonly = false) 983440a8c0SAndreas Gohr { 993440a8c0SAndreas Gohr $this->doc .= $this->specLink($url, $name); 1003440a8c0SAndreas Gohr } 1013440a8c0SAndreas Gohr 1023440a8c0SAndreas Gohr public function interwikilink($match, $name, $wikiName, $wikiUri, $returnonly = false) 1033440a8c0SAndreas Gohr { 1043440a8c0SAndreas Gohr // Spec has no interwiki expectations; emit the raw `wp>Page` form as 1053440a8c0SAndreas Gohr // href so the mode is still visible but obviously non-standard. 1063440a8c0SAndreas Gohr $this->doc .= $this->specLink($match, $name); 1073440a8c0SAndreas Gohr } 1083440a8c0SAndreas Gohr 1093440a8c0SAndreas Gohr public function emaillink($address, $name = null, $returnonly = false) 1103440a8c0SAndreas Gohr { 1113440a8c0SAndreas Gohr $this->doc .= $this->specLink('mailto:' . $address, $name ?? $address); 1123440a8c0SAndreas Gohr } 1133440a8c0SAndreas Gohr 1143440a8c0SAndreas Gohr public function locallink($hash, $name = null, $returnonly = false) 1153440a8c0SAndreas Gohr { 1163440a8c0SAndreas Gohr $this->doc .= $this->specLink('#' . $hash, $name ?? $hash); 1173440a8c0SAndreas Gohr } 1183440a8c0SAndreas Gohr 1193440a8c0SAndreas Gohr public function windowssharelink($url, $name = null, $returnonly = false) 1203440a8c0SAndreas Gohr { 1213440a8c0SAndreas Gohr $this->doc .= $this->specLink($url, $name); 1223440a8c0SAndreas Gohr } 1233440a8c0SAndreas Gohr 124b1c59bedSAndreas Gohr public function code($text, $language = null, $filename = null, $options = null) 125b1c59bedSAndreas Gohr { 126b1c59bedSAndreas Gohr $this->doc .= $this->specCode($text, $language); 127b1c59bedSAndreas Gohr } 128b1c59bedSAndreas Gohr 129c4bcbc2eSAndreas Gohr public function linebreak() 130c4bcbc2eSAndreas Gohr { 131c4bcbc2eSAndreas Gohr // Production DW emits `<br/>` (no space); the spec expects the 132c4bcbc2eSAndreas Gohr // XHTML-classic `<br />` (space before the slash). 133c4bcbc2eSAndreas Gohr $this->doc .= '<br />' . DOKU_LF; 134c4bcbc2eSAndreas Gohr } 135c4bcbc2eSAndreas Gohr 136*d379b737SAndreas Gohr public function entity($entity) 137*d379b737SAndreas Gohr { 138*d379b737SAndreas Gohr // The Entity mode rewrites --, ---, ->, (c), ... and other prose 139*d379b737SAndreas Gohr // abbreviations into typographic glyphs via conf/entities.conf. 140*d379b737SAndreas Gohr // Correct for live wiki pages, diverges byte-for-byte from the 141*d379b737SAndreas Gohr // GFM spec corpus which expects those bytes preserved literally. 142*d379b737SAndreas Gohr // Emit the original match unchanged. 143*d379b737SAndreas Gohr $this->doc .= $this->_xmlEntities((string) $entity); 144*d379b737SAndreas Gohr } 145*d379b737SAndreas Gohr 146*d379b737SAndreas Gohr public function _xmlEntities($string) 147*d379b737SAndreas Gohr { 148*d379b737SAndreas Gohr // Production hsc() escapes both `"` and `'` (ENT_QUOTES) so cdata 149*d379b737SAndreas Gohr // is safe to splice into any HTML attribute as well as body text. 150*d379b737SAndreas Gohr // CommonMark / GFM spec output uses a narrower body-text policy: 151*d379b737SAndreas Gohr // `"` is escaped to `"` (e.g. example #323) but `'` is left 152*d379b737SAndreas Gohr // literal (e.g. example #670). ENT_COMPAT matches that exactly. 153*d379b737SAndreas Gohr // Attribute values rendered by SpecCompatRenderer (href, src, alt) 154*d379b737SAndreas Gohr // still go through hsc() in specLink / specImg, which escapes both. 155*d379b737SAndreas Gohr return htmlspecialchars( 156*d379b737SAndreas Gohr (string) $string, 157*d379b737SAndreas Gohr ENT_COMPAT | ENT_SUBSTITUTE | ENT_HTML401, 158*d379b737SAndreas Gohr 'UTF-8' 159*d379b737SAndreas Gohr ); 160*d379b737SAndreas Gohr } 161*d379b737SAndreas Gohr 162309a0852SAndreas Gohr public function quote_open() 163309a0852SAndreas Gohr { 164309a0852SAndreas Gohr // Production DW wraps blockquote content in `<div class="no">`; 165309a0852SAndreas Gohr // the spec expects bare `<blockquote>...</blockquote>`. 166309a0852SAndreas Gohr $this->doc .= "<blockquote>\n"; 167309a0852SAndreas Gohr } 168309a0852SAndreas Gohr 169309a0852SAndreas Gohr public function quote_close() 170309a0852SAndreas Gohr { 171309a0852SAndreas Gohr $this->doc .= "</blockquote>\n"; 172309a0852SAndreas Gohr } 173309a0852SAndreas Gohr 174685560ebSAndreas Gohr public function listu_open($classes = null) 175685560ebSAndreas Gohr { 176685560ebSAndreas Gohr $this->doc .= "<ul>\n"; 177685560ebSAndreas Gohr } 178685560ebSAndreas Gohr 179685560ebSAndreas Gohr public function listu_close() 180685560ebSAndreas Gohr { 181685560ebSAndreas Gohr $this->doc .= "</ul>\n"; 182685560ebSAndreas Gohr } 183685560ebSAndreas Gohr 184f7c6e4acSAndreas Gohr public function listo_open($classes = null) 185685560ebSAndreas Gohr { 186685560ebSAndreas Gohr $this->doc .= "<ol>\n"; 187685560ebSAndreas Gohr } 188f7c6e4acSAndreas Gohr 189f7c6e4acSAndreas Gohr public function listo_open_start($start = 1) 190f7c6e4acSAndreas Gohr { 191f7c6e4acSAndreas Gohr $start = (int) $start; 192f7c6e4acSAndreas Gohr if ($start === 1) { 193f7c6e4acSAndreas Gohr $this->listo_open(); 194f7c6e4acSAndreas Gohr return; 195f7c6e4acSAndreas Gohr } 196f7c6e4acSAndreas Gohr $this->doc .= '<ol start="' . $start . "\">\n"; 197685560ebSAndreas Gohr } 198685560ebSAndreas Gohr 199685560ebSAndreas Gohr public function listo_close() 200685560ebSAndreas Gohr { 201685560ebSAndreas Gohr $this->doc .= "</ol>\n"; 202685560ebSAndreas Gohr } 203685560ebSAndreas Gohr 204685560ebSAndreas Gohr public function listitem_open($level, $node = false) 205685560ebSAndreas Gohr { 206685560ebSAndreas Gohr $this->doc .= '<li>'; 207685560ebSAndreas Gohr } 208685560ebSAndreas Gohr 209685560ebSAndreas Gohr public function listitem_close() 210685560ebSAndreas Gohr { 211685560ebSAndreas Gohr $this->doc .= "</li>\n"; 212685560ebSAndreas Gohr } 213685560ebSAndreas Gohr 214685560ebSAndreas Gohr public function listcontent_open() 215685560ebSAndreas Gohr { 216685560ebSAndreas Gohr // GFM has no per-item content wrapper - tight items put text directly 217685560ebSAndreas Gohr // inside <li>, loose items wrap it in <p>. The handler emits/strips 218685560ebSAndreas Gohr // p_open / p_close to drive that distinction; the wrapper itself 219685560ebSAndreas Gohr // produces no output here. 220685560ebSAndreas Gohr } 221685560ebSAndreas Gohr 222685560ebSAndreas Gohr public function listcontent_close() 223685560ebSAndreas Gohr { 224685560ebSAndreas Gohr } 225685560ebSAndreas Gohr 226b1c59bedSAndreas Gohr public function file($text, $language = null, $filename = null, $options = null) 227b1c59bedSAndreas Gohr { 228b1c59bedSAndreas Gohr $this->doc .= $this->specCode($text, $language); 229b1c59bedSAndreas Gohr } 230b1c59bedSAndreas Gohr 231b1c59bedSAndreas Gohr public function preformatted($text) 232b1c59bedSAndreas Gohr { 233b1c59bedSAndreas Gohr // The Preformatted CallWriter rewriter collapses start/content/ 234b1c59bedSAndreas Gohr // newline/end into one `preformatted` call. GFM expects the body 235b1c59bedSAndreas Gohr // to end with a newline (spec example 104); DW's internal text 236b1c59bedSAndreas Gohr // loses it to `trim()`, so we re-append here. 237b1c59bedSAndreas Gohr $this->doc .= $this->specCode($text . "\n", null); 238b1c59bedSAndreas Gohr } 239b1c59bedSAndreas Gohr 240b1c59bedSAndreas Gohr /** 241b1c59bedSAndreas Gohr * GFM shape: <pre><code class="language-xxx">...</code></pre>. The 242b1c59bedSAndreas Gohr * production DW renderer emits <pre class="code"> with no inner 243b1c59bedSAndreas Gohr * <code>, which diverges byte-for-byte. 244b1c59bedSAndreas Gohr */ 245b1c59bedSAndreas Gohr private function specCode($text, $language): string 246b1c59bedSAndreas Gohr { 247b1c59bedSAndreas Gohr $classAttr = ''; 248b1c59bedSAndreas Gohr if ($language !== null && $language !== '') { 249b1c59bedSAndreas Gohr $classAttr = ' class="language-' . hsc((string) $language) . '"'; 250b1c59bedSAndreas Gohr } 251b1c59bedSAndreas Gohr return '<pre><code' . $classAttr . '>' . hsc((string) $text) . '</code></pre>'; 252b1c59bedSAndreas Gohr } 253b1c59bedSAndreas Gohr 2543440a8c0SAndreas Gohr private function specImg($src, $alt, $width, $height): string 2553440a8c0SAndreas Gohr { 2563440a8c0SAndreas Gohr $out = '<img src="' . hsc((string) $src) . '"'; 2573440a8c0SAndreas Gohr $out .= ' alt="' . hsc((string) $alt) . '"'; 2583440a8c0SAndreas Gohr if ($width !== null) $out .= ' width="' . (int) $width . '"'; 2593440a8c0SAndreas Gohr if ($height !== null) $out .= ' height="' . (int) $height . '"'; 2603440a8c0SAndreas Gohr $out .= ' />'; 2613440a8c0SAndreas Gohr return $out; 2623440a8c0SAndreas Gohr } 2633440a8c0SAndreas Gohr 2643440a8c0SAndreas Gohr /** 2653440a8c0SAndreas Gohr * Emit a bare <a href="...">label</a>. If the label is a media 2663440a8c0SAndreas Gohr * descriptor array (the shape Media::parseMedia() returns, passed by 2673440a8c0SAndreas Gohr * Internallink / GfmLink when the label is `{{img}}` / ``), 2683440a8c0SAndreas Gohr * render the <img> inside the <a>. 2693440a8c0SAndreas Gohr */ 2703440a8c0SAndreas Gohr private function specLink($href, $label): string 2713440a8c0SAndreas Gohr { 2726359e7fdSAndreas Gohr $href = $this->specEncodeUrl((string) $href); 2733440a8c0SAndreas Gohr if (is_array($label) && isset($label['type'])) { 2743440a8c0SAndreas Gohr $img = $this->specImg( 2753440a8c0SAndreas Gohr $label['src'], 2763440a8c0SAndreas Gohr $label['title'], 2773440a8c0SAndreas Gohr $label['width'] ?? null, 2783440a8c0SAndreas Gohr $label['height'] ?? null 2793440a8c0SAndreas Gohr ); 2806359e7fdSAndreas Gohr return '<a href="' . hsc($href) . '">' . $img . '</a>'; 2813440a8c0SAndreas Gohr } 2823440a8c0SAndreas Gohr $text = ($label === null || $label === '') ? $href : $label; 2836359e7fdSAndreas Gohr return '<a href="' . hsc($href) . '">' . hsc((string) $text) . '</a>'; 2846359e7fdSAndreas Gohr } 2856359e7fdSAndreas Gohr 2866359e7fdSAndreas Gohr /** 2876359e7fdSAndreas Gohr * Percent-encode characters not in CommonMark's URL-safe set, 2886359e7fdSAndreas Gohr * preserving existing %XX sequences. Matches what cmark-gfm's 2896359e7fdSAndreas Gohr * reference renderer does for the spec corpus: UTF-8 bytes and 2906359e7fdSAndreas Gohr * non-URL-safe ASCII (e.g. `\`, space) become %XX; alphanumerics, 2916359e7fdSAndreas Gohr * RFC 3986 unreserved/reserved, and `%` itself pass through. 2926359e7fdSAndreas Gohr */ 2936359e7fdSAndreas Gohr private function specEncodeUrl(string $url): string 2946359e7fdSAndreas Gohr { 2956359e7fdSAndreas Gohr return preg_replace_callback( 2966359e7fdSAndreas Gohr "/[^A-Za-z0-9\\-._~:\\/?#\\[\\]@!$&'()*+,;=%]/", 2976359e7fdSAndreas Gohr static fn($m) => '%' . strtoupper(bin2hex($m[0])), 2986359e7fdSAndreas Gohr $url 2996359e7fdSAndreas Gohr ); 3003440a8c0SAndreas Gohr } 3013440a8c0SAndreas Gohr} 302