xref: /dokuwiki/_test/tests/Parsing/Markdown/SpecCompatRenderer.php (revision 6359e7fdf570c2d76e6de8eb24d1cf4c5eb296ae)
13440a8c0SAndreas Gohr<?php
23440a8c0SAndreas Gohr
33440a8c0SAndreas Gohrnamespace dokuwiki\test\Parsing\Markdown;
43440a8c0SAndreas Gohr
53440a8c0SAndreas Gohruse Doku_Renderer_xhtml;
63440a8c0SAndreas Gohr
73440a8c0SAndreas Gohr/**
83440a8c0SAndreas Gohr * XHTML renderer tuned to emit the minimal HTML shape GFM's spec.txt uses.
93440a8c0SAndreas Gohr *
103440a8c0SAndreas Gohr * DokuWiki's production XHTML renderer wraps internal media in details
113440a8c0SAndreas Gohr * links pointing at `/lib/exe/fetch.php?media=...` / `/lib/exe/detail.php?media=...`,
123440a8c0SAndreas Gohr * rewrites internal link hrefs to `/doku.php?id=...`, and adds wiki-specific
133440a8c0SAndreas Gohr * classes and attributes. All of this is correct for live wiki pages but
143440a8c0SAndreas Gohr * diverges byte-for-byte from GFM's bare `<img src="...">` and
153440a8c0SAndreas Gohr * `<a href="...">...</a>`.
163440a8c0SAndreas Gohr *
173440a8c0SAndreas Gohr * This renderer is used only by {@see GfmSpecTest} so the spec roundtrip
183440a8c0SAndreas Gohr * can compare against byte-level spec HTML. Production rendering is
193440a8c0SAndreas Gohr * unchanged. Methods not overridden here fall through to the XHTML
203440a8c0SAndreas Gohr * renderer (paragraphs, emphasis, code spans, lists, etc.) — those render
213440a8c0SAndreas Gohr * the same shape the spec expects.
223440a8c0SAndreas Gohr *
233440a8c0SAndreas Gohr * Note: title attributes on links/images are discarded at handle time
243440a8c0SAndreas Gohr * (no DW instruction slot), so spec examples that expect `title="..."`
253440a8c0SAndreas Gohr * still don't pass and stay in `skip.php`.
263440a8c0SAndreas Gohr */
273440a8c0SAndreas Gohrclass SpecCompatRenderer extends Doku_Renderer_xhtml
283440a8c0SAndreas Gohr{
293dabe4e0SAndreas Gohr    public function table_open($maxcols = null, $numrows = null, $pos = null, $classes = null)
303dabe4e0SAndreas Gohr    {
313dabe4e0SAndreas Gohr        // Production DW wraps `<table>` in `<div class="table"><table class="inline">`;
323dabe4e0SAndreas Gohr        // the spec expects bare `<table>`.
333dabe4e0SAndreas Gohr        $this->doc .= "<table>\n";
343dabe4e0SAndreas Gohr    }
353dabe4e0SAndreas Gohr
363dabe4e0SAndreas Gohr    public function table_close($pos = null)
373dabe4e0SAndreas Gohr    {
383dabe4e0SAndreas Gohr        // Drop the matching `</div>` from the production wrapper.
393dabe4e0SAndreas Gohr        $this->doc .= "</table>";
403dabe4e0SAndreas Gohr    }
413dabe4e0SAndreas Gohr
423dabe4e0SAndreas Gohr    public function tablerow_open($classes = null)
433dabe4e0SAndreas Gohr    {
443dabe4e0SAndreas Gohr        // Strip DW's `class="rowN"` row counter — spec rows have no class.
453dabe4e0SAndreas Gohr        $this->doc .= "<tr>\n";
463dabe4e0SAndreas Gohr    }
473dabe4e0SAndreas Gohr
483dabe4e0SAndreas Gohr    public function tableheader_open($colspan = 1, $align = null, $rowspan = 1, $classes = null)
493dabe4e0SAndreas Gohr    {
503dabe4e0SAndreas Gohr        // Production DW emits alignment as `class="...align"`; the spec uses
513dabe4e0SAndreas Gohr        // an `align="..."` attribute. Drop the `class="colN"` counter too.
523dabe4e0SAndreas Gohr        $this->doc .= '<th' . $this->alignAttr($align) . '>';
533dabe4e0SAndreas Gohr    }
543dabe4e0SAndreas Gohr
553dabe4e0SAndreas Gohr    public function tablecell_open($colspan = 1, $align = null, $rowspan = 1, $classes = null)
563dabe4e0SAndreas Gohr    {
573dabe4e0SAndreas Gohr        $this->doc .= '<td' . $this->alignAttr($align) . '>';
583dabe4e0SAndreas Gohr    }
593dabe4e0SAndreas Gohr
603dabe4e0SAndreas Gohr    private function alignAttr(?string $align): string
613dabe4e0SAndreas Gohr    {
623dabe4e0SAndreas Gohr        if ($align === null) return '';
633dabe4e0SAndreas Gohr        return ' align="' . $align . '"';
643dabe4e0SAndreas Gohr    }
65b1c59bedSAndreas Gohr
663440a8c0SAndreas Gohr    public function internalmedia(
673440a8c0SAndreas Gohr        $src,
683440a8c0SAndreas Gohr        $title = null,
693440a8c0SAndreas Gohr        $align = null,
703440a8c0SAndreas Gohr        $width = null,
713440a8c0SAndreas Gohr        $height = null,
723440a8c0SAndreas Gohr        $cache = null,
733440a8c0SAndreas Gohr        $linking = null,
743440a8c0SAndreas Gohr        $return = false
753440a8c0SAndreas Gohr    ) {
763440a8c0SAndreas Gohr        $this->doc .= $this->specImg($src, $title, $width, $height);
773440a8c0SAndreas Gohr    }
783440a8c0SAndreas Gohr
793440a8c0SAndreas Gohr    public function externalmedia(
803440a8c0SAndreas Gohr        $src,
813440a8c0SAndreas Gohr        $title = null,
823440a8c0SAndreas Gohr        $align = null,
833440a8c0SAndreas Gohr        $width = null,
843440a8c0SAndreas Gohr        $height = null,
853440a8c0SAndreas Gohr        $cache = null,
863440a8c0SAndreas Gohr        $linking = null,
873440a8c0SAndreas Gohr        $return = false
883440a8c0SAndreas Gohr    ) {
893440a8c0SAndreas Gohr        $this->doc .= $this->specImg($src, $title, $width, $height);
903440a8c0SAndreas Gohr    }
913440a8c0SAndreas Gohr
923440a8c0SAndreas Gohr    public function internallink($id, $name = null, $search = null, $returnonly = false, $linktype = 'content')
933440a8c0SAndreas Gohr    {
943440a8c0SAndreas Gohr        $this->doc .= $this->specLink($id, $name);
953440a8c0SAndreas Gohr    }
963440a8c0SAndreas Gohr
973440a8c0SAndreas Gohr    public function externallink($url, $name = null, $returnonly = false)
983440a8c0SAndreas Gohr    {
993440a8c0SAndreas Gohr        $this->doc .= $this->specLink($url, $name);
1003440a8c0SAndreas Gohr    }
1013440a8c0SAndreas Gohr
1023440a8c0SAndreas Gohr    public function interwikilink($match, $name, $wikiName, $wikiUri, $returnonly = false)
1033440a8c0SAndreas Gohr    {
1043440a8c0SAndreas Gohr        // Spec has no interwiki expectations; emit the raw `wp>Page` form as
1053440a8c0SAndreas Gohr        // href so the mode is still visible but obviously non-standard.
1063440a8c0SAndreas Gohr        $this->doc .= $this->specLink($match, $name);
1073440a8c0SAndreas Gohr    }
1083440a8c0SAndreas Gohr
1093440a8c0SAndreas Gohr    public function emaillink($address, $name = null, $returnonly = false)
1103440a8c0SAndreas Gohr    {
1113440a8c0SAndreas Gohr        $this->doc .= $this->specLink('mailto:' . $address, $name ?? $address);
1123440a8c0SAndreas Gohr    }
1133440a8c0SAndreas Gohr
1143440a8c0SAndreas Gohr    public function locallink($hash, $name = null, $returnonly = false)
1153440a8c0SAndreas Gohr    {
1163440a8c0SAndreas Gohr        $this->doc .= $this->specLink('#' . $hash, $name ?? $hash);
1173440a8c0SAndreas Gohr    }
1183440a8c0SAndreas Gohr
1193440a8c0SAndreas Gohr    public function windowssharelink($url, $name = null, $returnonly = false)
1203440a8c0SAndreas Gohr    {
1213440a8c0SAndreas Gohr        $this->doc .= $this->specLink($url, $name);
1223440a8c0SAndreas Gohr    }
1233440a8c0SAndreas Gohr
124b1c59bedSAndreas Gohr    public function code($text, $language = null, $filename = null, $options = null)
125b1c59bedSAndreas Gohr    {
126b1c59bedSAndreas Gohr        $this->doc .= $this->specCode($text, $language);
127b1c59bedSAndreas Gohr    }
128b1c59bedSAndreas Gohr
129c4bcbc2eSAndreas Gohr    public function linebreak()
130c4bcbc2eSAndreas Gohr    {
131c4bcbc2eSAndreas Gohr        // Production DW emits `<br/>` (no space); the spec expects the
132c4bcbc2eSAndreas Gohr        // XHTML-classic `<br />` (space before the slash).
133c4bcbc2eSAndreas Gohr        $this->doc .= '<br />' . DOKU_LF;
134c4bcbc2eSAndreas Gohr    }
135c4bcbc2eSAndreas Gohr
136309a0852SAndreas Gohr    public function quote_open()
137309a0852SAndreas Gohr    {
138309a0852SAndreas Gohr        // Production DW wraps blockquote content in `<div class="no">`;
139309a0852SAndreas Gohr        // the spec expects bare `<blockquote>...</blockquote>`.
140309a0852SAndreas Gohr        $this->doc .= "<blockquote>\n";
141309a0852SAndreas Gohr    }
142309a0852SAndreas Gohr
143309a0852SAndreas Gohr    public function quote_close()
144309a0852SAndreas Gohr    {
145309a0852SAndreas Gohr        $this->doc .= "</blockquote>\n";
146309a0852SAndreas Gohr    }
147309a0852SAndreas Gohr
148685560ebSAndreas Gohr    public function listu_open($classes = null)
149685560ebSAndreas Gohr    {
150685560ebSAndreas Gohr        $this->doc .= "<ul>\n";
151685560ebSAndreas Gohr    }
152685560ebSAndreas Gohr
153685560ebSAndreas Gohr    public function listu_close()
154685560ebSAndreas Gohr    {
155685560ebSAndreas Gohr        $this->doc .= "</ul>\n";
156685560ebSAndreas Gohr    }
157685560ebSAndreas Gohr
158f7c6e4acSAndreas Gohr    public function listo_open($classes = null)
159685560ebSAndreas Gohr    {
160685560ebSAndreas Gohr        $this->doc .= "<ol>\n";
161685560ebSAndreas Gohr    }
162f7c6e4acSAndreas Gohr
163f7c6e4acSAndreas Gohr    public function listo_open_start($start = 1)
164f7c6e4acSAndreas Gohr    {
165f7c6e4acSAndreas Gohr        $start = (int) $start;
166f7c6e4acSAndreas Gohr        if ($start === 1) {
167f7c6e4acSAndreas Gohr            $this->listo_open();
168f7c6e4acSAndreas Gohr            return;
169f7c6e4acSAndreas Gohr        }
170f7c6e4acSAndreas Gohr        $this->doc .= '<ol start="' . $start . "\">\n";
171685560ebSAndreas Gohr    }
172685560ebSAndreas Gohr
173685560ebSAndreas Gohr    public function listo_close()
174685560ebSAndreas Gohr    {
175685560ebSAndreas Gohr        $this->doc .= "</ol>\n";
176685560ebSAndreas Gohr    }
177685560ebSAndreas Gohr
178685560ebSAndreas Gohr    public function listitem_open($level, $node = false)
179685560ebSAndreas Gohr    {
180685560ebSAndreas Gohr        $this->doc .= '<li>';
181685560ebSAndreas Gohr    }
182685560ebSAndreas Gohr
183685560ebSAndreas Gohr    public function listitem_close()
184685560ebSAndreas Gohr    {
185685560ebSAndreas Gohr        $this->doc .= "</li>\n";
186685560ebSAndreas Gohr    }
187685560ebSAndreas Gohr
188685560ebSAndreas Gohr    public function listcontent_open()
189685560ebSAndreas Gohr    {
190685560ebSAndreas Gohr        // GFM has no per-item content wrapper - tight items put text directly
191685560ebSAndreas Gohr        // inside <li>, loose items wrap it in <p>. The handler emits/strips
192685560ebSAndreas Gohr        // p_open / p_close to drive that distinction; the wrapper itself
193685560ebSAndreas Gohr        // produces no output here.
194685560ebSAndreas Gohr    }
195685560ebSAndreas Gohr
196685560ebSAndreas Gohr    public function listcontent_close()
197685560ebSAndreas Gohr    {
198685560ebSAndreas Gohr    }
199685560ebSAndreas Gohr
200b1c59bedSAndreas Gohr    public function file($text, $language = null, $filename = null, $options = null)
201b1c59bedSAndreas Gohr    {
202b1c59bedSAndreas Gohr        $this->doc .= $this->specCode($text, $language);
203b1c59bedSAndreas Gohr    }
204b1c59bedSAndreas Gohr
205b1c59bedSAndreas Gohr    public function preformatted($text)
206b1c59bedSAndreas Gohr    {
207b1c59bedSAndreas Gohr        // The Preformatted CallWriter rewriter collapses start/content/
208b1c59bedSAndreas Gohr        // newline/end into one `preformatted` call. GFM expects the body
209b1c59bedSAndreas Gohr        // to end with a newline (spec example 104); DW's internal text
210b1c59bedSAndreas Gohr        // loses it to `trim()`, so we re-append here.
211b1c59bedSAndreas Gohr        $this->doc .= $this->specCode($text . "\n", null);
212b1c59bedSAndreas Gohr    }
213b1c59bedSAndreas Gohr
214b1c59bedSAndreas Gohr    /**
215b1c59bedSAndreas Gohr     * GFM shape: <pre><code class="language-xxx">...</code></pre>. The
216b1c59bedSAndreas Gohr     * production DW renderer emits <pre class="code"> with no inner
217b1c59bedSAndreas Gohr     * <code>, which diverges byte-for-byte.
218b1c59bedSAndreas Gohr     */
219b1c59bedSAndreas Gohr    private function specCode($text, $language): string
220b1c59bedSAndreas Gohr    {
221b1c59bedSAndreas Gohr        $classAttr = '';
222b1c59bedSAndreas Gohr        if ($language !== null && $language !== '') {
223b1c59bedSAndreas Gohr            $classAttr = ' class="language-' . hsc((string) $language) . '"';
224b1c59bedSAndreas Gohr        }
225b1c59bedSAndreas Gohr        return '<pre><code' . $classAttr . '>' . hsc((string) $text) . '</code></pre>';
226b1c59bedSAndreas Gohr    }
227b1c59bedSAndreas Gohr
2283440a8c0SAndreas Gohr    private function specImg($src, $alt, $width, $height): string
2293440a8c0SAndreas Gohr    {
2303440a8c0SAndreas Gohr        $out = '<img src="' . hsc((string) $src) . '"';
2313440a8c0SAndreas Gohr        $out .= ' alt="' . hsc((string) $alt) . '"';
2323440a8c0SAndreas Gohr        if ($width !== null)  $out .= ' width="' . (int) $width . '"';
2333440a8c0SAndreas Gohr        if ($height !== null) $out .= ' height="' . (int) $height . '"';
2343440a8c0SAndreas Gohr        $out .= ' />';
2353440a8c0SAndreas Gohr        return $out;
2363440a8c0SAndreas Gohr    }
2373440a8c0SAndreas Gohr
2383440a8c0SAndreas Gohr    /**
2393440a8c0SAndreas Gohr     * Emit a bare <a href="...">label</a>. If the label is a media
2403440a8c0SAndreas Gohr     * descriptor array (the shape Media::parseMedia() returns, passed by
2413440a8c0SAndreas Gohr     * Internallink / GfmLink when the label is `{{img}}` / `![alt](img)`),
2423440a8c0SAndreas Gohr     * render the <img> inside the <a>.
2433440a8c0SAndreas Gohr     */
2443440a8c0SAndreas Gohr    private function specLink($href, $label): string
2453440a8c0SAndreas Gohr    {
246*6359e7fdSAndreas Gohr        $href = $this->specEncodeUrl((string) $href);
2473440a8c0SAndreas Gohr        if (is_array($label) && isset($label['type'])) {
2483440a8c0SAndreas Gohr            $img = $this->specImg(
2493440a8c0SAndreas Gohr                $label['src'],
2503440a8c0SAndreas Gohr                $label['title'],
2513440a8c0SAndreas Gohr                $label['width'] ?? null,
2523440a8c0SAndreas Gohr                $label['height'] ?? null
2533440a8c0SAndreas Gohr            );
254*6359e7fdSAndreas Gohr            return '<a href="' . hsc($href) . '">' . $img . '</a>';
2553440a8c0SAndreas Gohr        }
2563440a8c0SAndreas Gohr        $text = ($label === null || $label === '') ? $href : $label;
257*6359e7fdSAndreas Gohr        return '<a href="' . hsc($href) . '">' . hsc((string) $text) . '</a>';
258*6359e7fdSAndreas Gohr    }
259*6359e7fdSAndreas Gohr
260*6359e7fdSAndreas Gohr    /**
261*6359e7fdSAndreas Gohr     * Percent-encode characters not in CommonMark's URL-safe set,
262*6359e7fdSAndreas Gohr     * preserving existing %XX sequences. Matches what cmark-gfm's
263*6359e7fdSAndreas Gohr     * reference renderer does for the spec corpus: UTF-8 bytes and
264*6359e7fdSAndreas Gohr     * non-URL-safe ASCII (e.g. `\`, space) become %XX; alphanumerics,
265*6359e7fdSAndreas Gohr     * RFC 3986 unreserved/reserved, and `%` itself pass through.
266*6359e7fdSAndreas Gohr     */
267*6359e7fdSAndreas Gohr    private function specEncodeUrl(string $url): string
268*6359e7fdSAndreas Gohr    {
269*6359e7fdSAndreas Gohr        return preg_replace_callback(
270*6359e7fdSAndreas Gohr            "/[^A-Za-z0-9\\-._~:\\/?#\\[\\]@!$&'()*+,;=%]/",
271*6359e7fdSAndreas Gohr            static fn($m) => '%' . strtoupper(bin2hex($m[0])),
272*6359e7fdSAndreas Gohr            $url
273*6359e7fdSAndreas Gohr        );
2743440a8c0SAndreas Gohr    }
2753440a8c0SAndreas Gohr}
276