xref: /dokuwiki/_test/tests/Parsing/Markdown/SpecCompatRenderer.php (revision 2e43b79909f3bc04928779d886f68c1242b5d436)
1<?php
2
3namespace dokuwiki\test\Parsing\Markdown;
4
5use Doku_Renderer_xhtml;
6
7/**
8 * XHTML renderer tuned to emit the minimal HTML shape GFM's spec.txt uses.
9 *
10 * DokuWiki's production XHTML renderer wraps internal media in details
11 * links pointing at `/lib/exe/fetch.php?media=...` / `/lib/exe/detail.php?media=...`,
12 * rewrites internal link hrefs to `/doku.php?id=...`, and adds wiki-specific
13 * classes and attributes. All of this is correct for live wiki pages but
14 * diverges byte-for-byte from GFM's bare `<img src="...">` and
15 * `<a href="...">...</a>`.
16 *
17 * This renderer is used only by {@see GfmSpecTest} so the spec roundtrip
18 * can compare against byte-level spec HTML. Production rendering is
19 * unchanged. Methods not overridden here fall through to the XHTML
20 * renderer (paragraphs, emphasis, code spans, lists, etc.) — those render
21 * the same shape the spec expects.
22 *
23 * Note: title attributes on links/images are discarded at handle time
24 * (no DW instruction slot), so spec examples that expect `title="..."`
25 * still don't pass and stay in `skip.php`.
26 */
27class SpecCompatRenderer extends Doku_Renderer_xhtml
28{
29    public function table_open($maxcols = null, $numrows = null, $pos = null, $classes = null)
30    {
31        // Production DW wraps `<table>` in `<div class="table"><table class="inline">`;
32        // the spec expects bare `<table>`.
33        $this->doc .= "<table>\n";
34    }
35
36    public function table_close($pos = null)
37    {
38        // Drop the matching `</div>` from the production wrapper.
39        $this->doc .= "</table>";
40    }
41
42    public function tablerow_open($classes = null)
43    {
44        // Strip DW's `class="rowN"` row counter — spec rows have no class.
45        $this->doc .= "<tr>\n";
46    }
47
48    public function tableheader_open($colspan = 1, $align = null, $rowspan = 1, $classes = null)
49    {
50        // Production DW emits alignment as `class="...align"`; the spec uses
51        // an `align="..."` attribute. Drop the `class="colN"` counter too.
52        $this->doc .= '<th' . $this->alignAttr($align) . '>';
53    }
54
55    public function tablecell_open($colspan = 1, $align = null, $rowspan = 1, $classes = null)
56    {
57        $this->doc .= '<td' . $this->alignAttr($align) . '>';
58    }
59
60    private function alignAttr(?string $align): string
61    {
62        if ($align === null) return '';
63        return ' align="' . $align . '"';
64    }
65
66    public function internalmedia(
67        $src,
68        $title = null,
69        $align = null,
70        $width = null,
71        $height = null,
72        $cache = null,
73        $linking = null,
74        $return = false
75    ) {
76        $this->doc .= $this->specImg($src, $title, $width, $height);
77    }
78
79    public function externalmedia(
80        $src,
81        $title = null,
82        $align = null,
83        $width = null,
84        $height = null,
85        $cache = null,
86        $linking = null,
87        $return = false
88    ) {
89        $this->doc .= $this->specImg($src, $title, $width, $height);
90    }
91
92    public function internallink($id, $name = null, $search = null, $returnonly = false, $linktype = 'content')
93    {
94        $this->doc .= $this->specLink($id, $name);
95    }
96
97    public function externallink($url, $name = null, $returnonly = false)
98    {
99        $this->doc .= $this->specLink($url, $name);
100    }
101
102    public function interwikilink($match, $name, $wikiName, $wikiUri, $returnonly = false)
103    {
104        // Spec has no interwiki expectations; emit the raw `wp>Page` form as
105        // href so the mode is still visible but obviously non-standard.
106        $this->doc .= $this->specLink($match, $name);
107    }
108
109    public function emaillink($address, $name = null, $returnonly = false)
110    {
111        $this->doc .= $this->specLink('mailto:' . $address, $name ?? $address);
112    }
113
114    public function locallink($hash, $name = null, $returnonly = false)
115    {
116        $this->doc .= $this->specLink('#' . $hash, $name ?? $hash);
117    }
118
119    public function windowssharelink($url, $name = null, $returnonly = false)
120    {
121        $this->doc .= $this->specLink($url, $name);
122    }
123
124    public function code($text, $language = null, $filename = null, $options = null)
125    {
126        $this->doc .= $this->specCode($text, $language);
127    }
128
129    public function linebreak()
130    {
131        // Production DW emits `<br/>` (no space); the spec expects the
132        // XHTML-classic `<br />` (space before the slash).
133        $this->doc .= '<br />' . DOKU_LF;
134    }
135
136    public function entity($entity)
137    {
138        // The Entity mode rewrites --, ---, ->, (c), ... and other prose
139        // abbreviations into typographic glyphs via conf/entities.conf.
140        // Correct for live wiki pages, diverges byte-for-byte from the
141        // GFM spec corpus which expects those bytes preserved literally.
142        // Emit the original match unchanged.
143        $this->doc .= $this->_xmlEntities((string) $entity);
144    }
145
146    public function _xmlEntities($string)
147    {
148        // Production hsc() escapes both `"` and `'` (ENT_QUOTES) so cdata
149        // is safe to splice into any HTML attribute as well as body text.
150        // CommonMark / GFM spec output uses a narrower body-text policy:
151        // `"` is escaped to `&quot;` (e.g. example #323) but `'` is left
152        // literal (e.g. example #670). ENT_COMPAT matches that exactly.
153        // Attribute values rendered by SpecCompatRenderer (href, src, alt)
154        // still go through hsc() in specLink / specImg, which escapes both.
155        return htmlspecialchars(
156            (string) $string,
157            ENT_COMPAT | ENT_SUBSTITUTE | ENT_HTML401,
158            'UTF-8'
159        );
160    }
161
162    public function quote_open()
163    {
164        // Production DW wraps blockquote content in `<div class="no">`;
165        // the spec expects bare `<blockquote>...</blockquote>`.
166        $this->doc .= "<blockquote>\n";
167    }
168
169    public function quote_close()
170    {
171        $this->doc .= "</blockquote>\n";
172    }
173
174    public function listu_open($classes = null)
175    {
176        $this->doc .= "<ul>\n";
177    }
178
179    public function listu_close()
180    {
181        $this->doc .= "</ul>\n";
182    }
183
184    public function listo_open($classes = null)
185    {
186        $this->doc .= "<ol>\n";
187    }
188
189    public function listo_open_start($start = 1)
190    {
191        $start = (int) $start;
192        if ($start === 1) {
193            $this->listo_open();
194            return;
195        }
196        $this->doc .= '<ol start="' . $start . "\">\n";
197    }
198
199    public function listo_close()
200    {
201        $this->doc .= "</ol>\n";
202    }
203
204    public function listitem_open($level, $node = false)
205    {
206        $this->doc .= '<li>';
207    }
208
209    public function listitem_close()
210    {
211        $this->doc .= "</li>\n";
212    }
213
214    public function listcontent_open()
215    {
216        // GFM has no per-item content wrapper - tight items put text directly
217        // inside <li>, loose items wrap it in <p>. The handler emits/strips
218        // p_open / p_close to drive that distinction; the wrapper itself
219        // produces no output here.
220    }
221
222    public function listcontent_close()
223    {
224    }
225
226    public function file($text, $language = null, $filename = null, $options = null)
227    {
228        $this->doc .= $this->specCode($text, $language);
229    }
230
231    public function preformatted($text)
232    {
233        // The Preformatted CallWriter rewriter collapses start/content/
234        // newline/end into one `preformatted` call. GFM expects the body
235        // to end with a newline (spec example 104); DW's internal text
236        // loses it to `trim()`, so we re-append here.
237        $this->doc .= $this->specCode($text . "\n", null);
238    }
239
240    /**
241     * GFM shape: <pre><code class="language-xxx">...</code></pre>. The
242     * production DW renderer emits <pre class="code"> with no inner
243     * <code>, which diverges byte-for-byte.
244     */
245    private function specCode($text, $language): string
246    {
247        $classAttr = '';
248        if ($language !== null && $language !== '') {
249            $classAttr = ' class="language-' . hsc((string) $language) . '"';
250        }
251        return '<pre><code' . $classAttr . '>' . hsc((string) $text) . '</code></pre>';
252    }
253
254    private function specImg($src, $alt, $width, $height): string
255    {
256        $out = '<img src="' . hsc((string) $src) . '"';
257        $out .= ' alt="' . hsc((string) $alt) . '"';
258        if ($width !== null)  $out .= ' width="' . (int) $width . '"';
259        if ($height !== null) $out .= ' height="' . (int) $height . '"';
260        $out .= ' />';
261        return $out;
262    }
263
264    /**
265     * Emit a bare <a href="...">label</a>. If the label is a media
266     * descriptor array (the shape Media::parseMedia() returns, passed by
267     * Internallink / GfmLink when the label is `{{img}}` / `![alt](img)`),
268     * render the <img> inside the <a>.
269     */
270    private function specLink($href, $label): string
271    {
272        $href = $this->specEncodeUrl((string) $href);
273        if (is_array($label) && isset($label['type'])) {
274            $img = $this->specImg(
275                $label['src'],
276                $label['title'],
277                $label['width'] ?? null,
278                $label['height'] ?? null
279            );
280            return '<a href="' . hsc($href) . '">' . $img . '</a>';
281        }
282        $text = ($label === null || $label === '') ? $href : $label;
283        return '<a href="' . hsc($href) . '">' . hsc((string) $text) . '</a>';
284    }
285
286    /**
287     * Percent-encode characters not in cmark-gfm's URL-safe set,
288     * preserving existing %XX sequences. cmark-gfm's HREF_SAFE table
289     * (houdini_href_e.c) excludes square brackets, backslash, caret,
290     * backtick, and braces from the safe set even though RFC 3986
291     * lists `[]` as reserved gen-delims; matching that table is what
292     * lets the spec corpus round-trip byte-for-byte.
293     */
294    private function specEncodeUrl(string $url): string
295    {
296        return preg_replace_callback(
297            "/[^A-Za-z0-9\\-._~:\\/?#@!$&'()*+,;=%]/",
298            static fn($m) => '%' . strtoupper(bin2hex($m[0])),
299            $url
300        );
301    }
302}
303