1<?php 2 3namespace dokuwiki\test\Parsing\Markdown; 4 5use Doku_Renderer_xhtml; 6 7/** 8 * XHTML renderer tuned to emit the minimal HTML shape GFM's spec.txt uses. 9 * 10 * DokuWiki's production XHTML renderer wraps internal media in details 11 * links pointing at `/lib/exe/fetch.php?media=...` / `/lib/exe/detail.php?media=...`, 12 * rewrites internal link hrefs to `/doku.php?id=...`, and adds wiki-specific 13 * classes and attributes. All of this is correct for live wiki pages but 14 * diverges byte-for-byte from GFM's bare `<img src="...">` and 15 * `<a href="...">...</a>`. 16 * 17 * This renderer is used only by {@see GfmSpecTest} so the spec roundtrip 18 * can compare against byte-level spec HTML. Production rendering is 19 * unchanged. Methods not overridden here fall through to the XHTML 20 * renderer (paragraphs, emphasis, code spans, lists, etc.) — those render 21 * the same shape the spec expects. 22 * 23 * Note: title attributes on links/images are discarded at handle time 24 * (no DW instruction slot), so spec examples that expect `title="..."` 25 * still don't pass and stay in `skip.php`. 26 */ 27class SpecCompatRenderer extends Doku_Renderer_xhtml 28{ 29 public function table_open($maxcols = null, $numrows = null, $pos = null, $classes = null) 30 { 31 // Production DW wraps `<table>` in `<div class="table"><table class="inline">`; 32 // the spec expects bare `<table>`. 33 $this->doc .= "<table>\n"; 34 } 35 36 public function table_close($pos = null) 37 { 38 // Drop the matching `</div>` from the production wrapper. 39 $this->doc .= "</table>"; 40 } 41 42 public function tablerow_open($classes = null) 43 { 44 // Strip DW's `class="rowN"` row counter — spec rows have no class. 45 $this->doc .= "<tr>\n"; 46 } 47 48 public function tableheader_open($colspan = 1, $align = null, $rowspan = 1, $classes = null) 49 { 50 // Production DW emits alignment as `class="...align"`; the spec uses 51 // an `align="..."` attribute. Drop the `class="colN"` counter too. 52 $this->doc .= '<th' . $this->alignAttr($align) . '>'; 53 } 54 55 public function tablecell_open($colspan = 1, $align = null, $rowspan = 1, $classes = null) 56 { 57 $this->doc .= '<td' . $this->alignAttr($align) . '>'; 58 } 59 60 private function alignAttr(?string $align): string 61 { 62 if ($align === null) return ''; 63 return ' align="' . $align . '"'; 64 } 65 66 public function internalmedia( 67 $src, 68 $title = null, 69 $align = null, 70 $width = null, 71 $height = null, 72 $cache = null, 73 $linking = null, 74 $return = false 75 ) { 76 $this->doc .= $this->specImg($src, $title, $width, $height); 77 } 78 79 public function externalmedia( 80 $src, 81 $title = null, 82 $align = null, 83 $width = null, 84 $height = null, 85 $cache = null, 86 $linking = null, 87 $return = false 88 ) { 89 $this->doc .= $this->specImg($src, $title, $width, $height); 90 } 91 92 public function internallink($id, $name = null, $search = null, $returnonly = false, $linktype = 'content') 93 { 94 $this->doc .= $this->specLink($id, $name); 95 } 96 97 public function externallink($url, $name = null, $returnonly = false) 98 { 99 $this->doc .= $this->specLink($url, $name); 100 } 101 102 public function interwikilink($match, $name, $wikiName, $wikiUri, $returnonly = false) 103 { 104 // Spec has no interwiki expectations; emit the raw `wp>Page` form as 105 // href so the mode is still visible but obviously non-standard. 106 $this->doc .= $this->specLink($match, $name); 107 } 108 109 public function emaillink($address, $name = null, $returnonly = false) 110 { 111 $this->doc .= $this->specLink('mailto:' . $address, $name ?? $address); 112 } 113 114 public function locallink($hash, $name = null, $returnonly = false) 115 { 116 $this->doc .= $this->specLink('#' . $hash, $name ?? $hash); 117 } 118 119 public function windowssharelink($url, $name = null, $returnonly = false) 120 { 121 $this->doc .= $this->specLink($url, $name); 122 } 123 124 public function code($text, $language = null, $filename = null, $options = null) 125 { 126 $this->doc .= $this->specCode($text, $language); 127 } 128 129 public function linebreak() 130 { 131 // Production DW emits `<br/>` (no space); the spec expects the 132 // XHTML-classic `<br />` (space before the slash). 133 $this->doc .= '<br />' . DOKU_LF; 134 } 135 136 public function entity($entity) 137 { 138 // The Entity mode rewrites --, ---, ->, (c), ... and other prose 139 // abbreviations into typographic glyphs via conf/entities.conf. 140 // Correct for live wiki pages, diverges byte-for-byte from the 141 // GFM spec corpus which expects those bytes preserved literally. 142 // Emit the original match unchanged. 143 $this->doc .= $this->_xmlEntities((string) $entity); 144 } 145 146 public function _xmlEntities($string) 147 { 148 // Production hsc() escapes both `"` and `'` (ENT_QUOTES) so cdata 149 // is safe to splice into any HTML attribute as well as body text. 150 // CommonMark / GFM spec output uses a narrower body-text policy: 151 // `"` is escaped to `"` (e.g. example #323) but `'` is left 152 // literal (e.g. example #670). ENT_COMPAT matches that exactly. 153 // Attribute values rendered by SpecCompatRenderer (href, src, alt) 154 // still go through hsc() in specLink / specImg, which escapes both. 155 return htmlspecialchars( 156 (string) $string, 157 ENT_COMPAT | ENT_SUBSTITUTE | ENT_HTML401, 158 'UTF-8' 159 ); 160 } 161 162 public function quote_open() 163 { 164 // Production DW wraps blockquote content in `<div class="no">`; 165 // the spec expects bare `<blockquote>...</blockquote>`. 166 $this->doc .= "<blockquote>\n"; 167 } 168 169 public function quote_close() 170 { 171 $this->doc .= "</blockquote>\n"; 172 } 173 174 public function listu_open($classes = null) 175 { 176 $this->doc .= "<ul>\n"; 177 } 178 179 public function listu_close() 180 { 181 $this->doc .= "</ul>\n"; 182 } 183 184 public function listo_open($classes = null) 185 { 186 $this->doc .= "<ol>\n"; 187 } 188 189 public function listo_open_start($start = 1) 190 { 191 $start = (int) $start; 192 if ($start === 1) { 193 $this->listo_open(); 194 return; 195 } 196 $this->doc .= '<ol start="' . $start . "\">\n"; 197 } 198 199 public function listo_close() 200 { 201 $this->doc .= "</ol>\n"; 202 } 203 204 public function listitem_open($level, $node = false) 205 { 206 $this->doc .= '<li>'; 207 } 208 209 public function listitem_close() 210 { 211 $this->doc .= "</li>\n"; 212 } 213 214 public function listcontent_open() 215 { 216 // GFM has no per-item content wrapper - tight items put text directly 217 // inside <li>, loose items wrap it in <p>. The handler emits/strips 218 // p_open / p_close to drive that distinction; the wrapper itself 219 // produces no output here. 220 } 221 222 public function listcontent_close() 223 { 224 } 225 226 public function file($text, $language = null, $filename = null, $options = null) 227 { 228 $this->doc .= $this->specCode($text, $language); 229 } 230 231 public function preformatted($text) 232 { 233 // The Preformatted CallWriter rewriter collapses start/content/ 234 // newline/end into one `preformatted` call. GFM expects the body 235 // to end with a newline (spec example 104); DW's internal text 236 // loses it to `trim()`, so we re-append here. 237 $this->doc .= $this->specCode($text . "\n", null); 238 } 239 240 /** 241 * GFM shape: <pre><code class="language-xxx">...</code></pre>. The 242 * production DW renderer emits <pre class="code"> with no inner 243 * <code>, which diverges byte-for-byte. 244 */ 245 private function specCode($text, $language): string 246 { 247 $classAttr = ''; 248 if ($language !== null && $language !== '') { 249 $classAttr = ' class="language-' . hsc((string) $language) . '"'; 250 } 251 return '<pre><code' . $classAttr . '>' . hsc((string) $text) . '</code></pre>'; 252 } 253 254 private function specImg($src, $alt, $width, $height): string 255 { 256 $out = '<img src="' . hsc((string) $src) . '"'; 257 $out .= ' alt="' . hsc((string) $alt) . '"'; 258 if ($width !== null) $out .= ' width="' . (int) $width . '"'; 259 if ($height !== null) $out .= ' height="' . (int) $height . '"'; 260 $out .= ' />'; 261 return $out; 262 } 263 264 /** 265 * Emit a bare <a href="...">label</a>. If the label is a media 266 * descriptor array (the shape Media::parseMedia() returns, passed by 267 * Internallink / GfmLink when the label is `{{img}}` / ``), 268 * render the <img> inside the <a>. 269 */ 270 private function specLink($href, $label): string 271 { 272 $href = $this->specEncodeUrl((string) $href); 273 if (is_array($label) && isset($label['type'])) { 274 $img = $this->specImg( 275 $label['src'], 276 $label['title'], 277 $label['width'] ?? null, 278 $label['height'] ?? null 279 ); 280 return '<a href="' . hsc($href) . '">' . $img . '</a>'; 281 } 282 $text = ($label === null || $label === '') ? $href : $label; 283 return '<a href="' . hsc($href) . '">' . hsc((string) $text) . '</a>'; 284 } 285 286 /** 287 * Percent-encode characters not in CommonMark's URL-safe set, 288 * preserving existing %XX sequences. Matches what cmark-gfm's 289 * reference renderer does for the spec corpus: UTF-8 bytes and 290 * non-URL-safe ASCII (e.g. `\`, space) become %XX; alphanumerics, 291 * RFC 3986 unreserved/reserved, and `%` itself pass through. 292 */ 293 private function specEncodeUrl(string $url): string 294 { 295 return preg_replace_callback( 296 "/[^A-Za-z0-9\\-._~:\\/?#\\[\\]@!$&'()*+,;=%]/", 297 static fn($m) => '%' . strtoupper(bin2hex($m[0])), 298 $url 299 ); 300 } 301} 302