xref: /dokuwiki/_test/tests/Parsing/Markdown/gfm-spec/skip.php (revision b1c59bed2e3645a1f5f11438cdbe7d1596f4a3a4)
1<?php
2
3/**
4 * GFM spec examples that GfmSpecTest should skip, keyed by example number
5 * (as numbered in spec.txt / the rendered spec).
6 *
7 * Add entries here ONLY for behavior DokuWiki has explicitly decided not to
8 * implement — not for features that are merely pending. Unimplemented
9 * features should show as real failures so they remain visible TODOs on
10 * the branch.
11 *
12 * Each value is a short human-readable reason that will appear in phpunit's
13 * skip output.
14 */
15
16return [
17    // --------------------------------------------------------------------
18    // Fenced code blocks (GfmCode / GfmFile) — deliberate simplifications
19    // versus strict GFM. All of these are consequences of lexer constraints
20    // (no regex backreferences) or the deliberate column-0-only policy.
21    // --------------------------------------------------------------------
22    94  => 'fenced code: closing fence must be ≥ opening length — DokuWiki'
23         . ' accepts any 3+ run as a closer (no regex backreferences for'
24         . ' length pairing). Deliberate relaxation.',
25    95  => 'fenced code (tilde variant): closing fence must be ≥ opening'
26         . ' length — see example 94.',
27    96  => 'fenced code: unclosed fence — DokuWiki convention requires a'
28         . ' closer (matches DW <code> tag), so unclosed fences stay'
29         . ' literal rather than consuming to EOF. GFM spec rule depends'
30         . ' on CommonMark\'s two-pass block parser, which our single-pass'
31         . ' lexer cannot implement fully anyway (see example 98).',
32    97  => 'fenced code: unclosed fence with intervening short run — stays'
33         . ' literal, see example 96.',
34    101 => 'fenced code: opener indented 1 space — DokuWiki requires'
35         . ' column-0 fences. Indent tolerance + per-line body dedent out'
36         . ' of scope.',
37    102 => 'fenced code: opener indented 2 spaces — see example 101.',
38    103 => 'fenced code: opener indented 3 spaces — see example 101.',
39    105 => 'fenced code: closer indented 2 spaces — column-0-only policy,'
40         . ' see example 101.',
41    106 => 'fenced code: indented opener with less-indented closer —'
42         . ' column-0-only policy, see example 101.',
43    107 => 'fenced code: 4-space-indented closer — with column-0-only'
44         . ' policy there is no valid closer, so the fence stays literal'
45         . ' (see example 96).',
46    109 => 'fenced code: malformed closer `~~~ ~~` (space-broken run) —'
47         . ' with no valid closer the fence stays literal (see example 96).',
48    108 => 'fenced code: `` `` is not a valid fence; GFM falls back to an'
49         . ' inline code span of length 3. Inline spans with n≥3 not'
50         . ' implemented (GfmBacktickSingle/Double cover only n=1, n=2).',
51    111 => 'fenced code interrupting Setext heading (`foo\n---`): Setext'
52         . ' headings are deliberately not supported (SPEC.md Limits).',
53    115 => 'fenced code: `` `` backtick-fence-with-backticks-in-info-string'
54         . ' is invalid; GFM falls back to n=3 inline span — inline spans'
55         . ' with n≥3 not implemented. See example 108.',
56
57    // --------------------------------------------------------------------
58    // Code-span edge cases that collide with project-wide decisions
59    // (no raw HTML, no GFM angle-bracket autolinks, typography on by
60    // default) or with the single-pass lexer's limits.
61    // --------------------------------------------------------------------
62    351 => 'code span vs. emphasis: cross-positional precedence would require'
63         . ' a pre-scan pass — the single-pass lexer matches leftmost-first'
64         . ' and cannot reject an earlier emphasis opener because a later'
65         . ' backtick span would consume its closer',
66    353 => 'code span: the trailing `"` outside the span is converted to a'
67         . ' curly quote by DokuWiki typography, diverging from the spec HTML',
68    354 => 'raw HTML tag pass-through: DokuWiki does not render raw HTML by'
69         . ' default; `<html>` blocks are the opt-in',
70    356 => 'GFM angle-bracket autolink `<http://…>`: not implemented — we'
71         . ' rely on DokuWiki\'s existing bare-URL detection, which does not'
72         . ' parse `<URL>` form',
73
74    // --------------------------------------------------------------------
75    // CommonMark §6.2 flanking-delimiter analysis — deliberately not
76    // implemented. DokuWiki's regex lexer uses leftmost-match and cannot
77    // apply CommonMark's left/right-flanking rules that distinguish
78    // word-chars, whitespace, and punctuation for `*`/`_` delimiters, or
79    // the "multiple-of-3" rule for overlapping runs. These examples all
80    // rely on that machinery.
81    // --------------------------------------------------------------------
82
83    // Unicode whitespace in flanking context. Our `\s` is ASCII-only
84    // because the lexer doesn't set the PCRE `u` flag.
85    363 => 'Unicode whitespace (U+00A0) flanking — requires u-flag-aware regex',
86
87    // Punctuation-adjacent flanking for `*` / `_` / `**` / `__`
88    362 => 'flanking: punctuation-adjacent `*` (left-flanking vs. right-flanking)',
89    368 => 'flanking: punctuation-adjacent `_`',
90    372 => 'flanking: intraword `_` with punctuation inside',
91    377 => 'flanking: `*` followed by `(` requires punctuation-aware flanking',
92    378 => 'flanking: nested `*(*foo*)*` requires flanking + balanced-pair analysis',
93    382 => 'flanking: nested `_(_foo_)_` requires flanking + balanced-pair analysis',
94    389 => 'flanking: punctuation-adjacent `**`',
95    394 => 'flanking: punctuation-adjacent `__`',
96    401 => 'flanking: `**` followed by `(`',
97    404 => 'flanking: nested `*bar*` inside `**foo ... foo**` with punctuation',
98    407 => 'flanking: `__` followed by `(`',
99    470 => 'flanking: nested `*_foo_*` requires balanced-pair analysis',
100    472 => 'flanking: nested `_*foo*_` requires balanced-pair analysis',
101
102    // Intraword `__` strong (even multibyte) — flanking rule for `_` requires
103    // examining whether the delimiter run is word-boundary-flanking, which our
104    // simple lookbehind/lookahead approximation doesn't fully match.
105    395 => 'flanking: intraword `__` (`foo__bar__`) — left-flanking vs right-flanking',
106    396 => 'flanking: intraword `__` across digits (`5__6__78`)',
107    397 => 'flanking: intraword `__` with Cyrillic',
108    398 => 'flanking: `__foo, __bar__, baz__` — flanking + balanced pairing',
109    409 => 'flanking: `__foo__bar` — intraword close',
110    410 => 'flanking: intraword `__` with Cyrillic (leading)',
111    411 => 'flanking: `__foo__bar__baz__` — multiple `__` pairs with flanking',
112    412 => 'flanking: `__(bar)__.` — punctuation-adjacent',
113
114    // Overlapping / multiple-of-3 rule for runs
115    416 => 'CommonMark rule 9 (overlapping same-delimiter `_foo _bar_ baz_`)',
116    417 => 'CommonMark overlapping `_` / `__` with flanking',
117    418 => 'CommonMark overlapping `*foo *bar**` — multiple-of-3 rule',
118    419 => 'CommonMark nested `*foo **bar** baz*` — balanced-pair analysis',
119    421 => 'CommonMark overlapping `*foo**bar*` — multiple-of-3',
120    422 => 'CommonMark nested `***foo** bar*` — triple-delimiter analysis',
121    423 => 'CommonMark nested `*foo **bar***` — triple-delimiter analysis',
122    424 => 'CommonMark nested `*foo**bar***` — triple-delimiter analysis',
123    425 => 'CommonMark triple `foo***bar***baz` — triple-delimiter analysis',
124    426 => 'CommonMark long delimiter runs `foo******bar*********baz`',
125    427 => 'CommonMark deeply nested `*foo **bar *baz* bim** bop*`',
126    434 => 'CommonMark overlapping `__foo __bar__ baz__` — multiple-of-3',
127    435 => 'CommonMark `____foo__ bar__` — leading long delimiter run',
128    436 => 'CommonMark `**foo **bar****` — trailing long delimiter run',
129    439 => 'CommonMark nested `***foo* bar**` — triple-delimiter',
130    440 => 'CommonMark nested `**foo *bar***` — triple-delimiter',
131    441 => 'CommonMark deeply nested `**foo *bar **baz** bim* bop**`',
132
133    // `__foo_` / `_foo__` — mixing `_` and `__` requires flanking to decide
134    // which delimiter pairs open/close.
135    463 => 'flanking: `__foo_` — mixed `_`/`__` pairing',
136    464 => 'flanking: `_foo__` — mixed `_`/`__` pairing',
137    465 => 'flanking: `___foo__` — delimiter-run length analysis',
138    466 => 'flanking: `____foo_` — delimiter-run length analysis',
139    467 => 'flanking: `__foo___` — delimiter-run length analysis',
140    468 => 'flanking: `_foo____` — delimiter-run length analysis',
141
142    // Long delimiter runs require excess-drop logic (2 outer chars dropped
143    // from each side). Stack-based pairing needed — out of scope.
144    473 => 'CommonMark `****foo****` — excess-drop (4+4 → strong only)',
145    474 => 'CommonMark `____foo____` — excess-drop (4+4 → strong only)',
146    475 => 'CommonMark `******foo******` — excess-drop (6+6 → strong only)',
147    477 => 'CommonMark `_____foo_____` — excess-drop (5+5 → em+strong, 2 dropped each side)',
148
149    // Overlapping / crossing delimiters
150    478 => 'CommonMark `*foo _bar* baz_` — overlapping different delimiters',
151    479 => 'CommonMark `*foo __bar *baz bim__ bam*` — crossing delimiters',
152    480 => 'CommonMark `**foo **bar baz**` — overlapping same delimiter',
153
154    // --------------------------------------------------------------------
155    // Inline link `[text](url)` — features GfmLink deliberately does not
156    // implement. Either rarely-used syntax paid for with disproportionate
157    // regex complexity, or single-pass-lexer limits that can't be worked
158    // around inside one mode.
159    // --------------------------------------------------------------------
160
161    // GFM link title attribute (`"title"` / `'title'` / `(title)` after
162    // the URL). Parses cleanly but is discarded: DokuWiki's link handler
163    // instructions have no title-attribute slot, and plumbing one through
164    // every renderer is out of scope for GfmLink.
165    493 => 'link title attribute: GfmLink parses but discards — DokuWiki link instructions have no title slot',
166    513 => 'link title attribute (three quoting styles): discarded by GfmLink',
167    515 => 'link title separated by non-breaking space: title slot not supported',
168    516 => 'link title with nested balanced quotes: Markdown.pl quirk, not supported',
169    517 => 'link title with different quote type for inner quotes: title slot not supported',
170    518 => 'multi-line link title: title slot not supported',
171
172    // Pointy-bracket link destinations `<...>`. Rarely used; regex cost
173    // and interaction with raw-HTML detection outweigh the benefit.
174    496 => 'pointy-bracket link destination `<>`: not supported',
175    498 => 'pointy-bracket destination with spaces `<...>`: not supported',
176    500 => 'pointy-bracket destination with newline: not supported',
177    501 => 'pointy-bracket destination containing `)`: not supported',
178    502 => 'pointy-bracket destination with trailing backslash: not supported',
179    503 => 'malformed pointy-bracket destinations: renderer output differs',
180    507 => 'pointy-bracket destination wrapping unbalanced parens: not supported',
181
182    // Balanced-parens inside URL destinations.
183    505 => 'balanced-parens in URL destination: not supported (regex single-level)',
184
185    // Other URL-level edges.
186    495 => 'empty URL destination `[link]()`: pattern requires non-empty URL',
187    510 => 'backslash in URL destination: URL-encoding differs from spec',
188    511 => 'HTML entity / percent-encoding in URL: renderer normalization differs',
189    512 => 'link destination that parses as a title: edge case not supported',
190
191    // Inherent single-pass-lexer limits for link text containing nested
192    // structures. These cannot be resolved inside one mode.
193    522 => 'nested bracket forms inner link, outer falls back to literal',
194    526 => 'nested links: inner is a link, outer falls back to literal',
195    527 => 'nested links inside emphasis: not supported',
196    529 => 'link text grouping vs. emphasis: leftmost-match cannot override',
197    530 => 'emphasis/bracket crossing: leftmost-match cannot override',
198    532 => 'raw HTML inside link text: project-wide "no raw HTML" limit',
199    533 => 'code span inside link text: requires pre-scan pass (see #351)',
200    534 => 'autolink inside link text: raw `<URL>` autolinks not supported (see #356)',
201
202    // Reference links (`[text][id]`, `[text][]`, `[foo]` with matching
203    // `[foo]: url` definition). Not implemented: resolving forward
204    // references would require a two-pass parse, but DokuWiki's lexer is
205    // single-pass. Inline links `[text](url)` are the only supported
206    // form.
207    535 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
208    536 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
209    537 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
210    538 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
211    539 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
212    540 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
213    541 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
214    542 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
215    543 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
216    544 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
217    545 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
218    546 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
219    547 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
220    548 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
221    549 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
222    550 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
223    551 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
224    552 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
225    553 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
226    557 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
227    558 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
228    560 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
229    561 => 'collapsed reference link: forward-reference definitions not supported (single-pass lexer)',
230    562 => 'collapsed reference link: forward-reference definitions not supported (single-pass lexer)',
231    563 => 'collapsed reference link: forward-reference definitions not supported (single-pass lexer)',
232    564 => 'collapsed reference link: forward-reference definitions not supported (single-pass lexer)',
233    565 => 'shortcut reference link: forward-reference definitions not supported (single-pass lexer)',
234    566 => 'shortcut reference link: forward-reference definitions not supported (single-pass lexer)',
235    567 => 'shortcut reference link: forward-reference definitions not supported (single-pass lexer)',
236    568 => 'shortcut reference link: forward-reference definitions not supported (single-pass lexer)',
237    569 => 'shortcut reference link: forward-reference definitions not supported (single-pass lexer)',
238    570 => 'shortcut reference link: forward-reference definitions not supported (single-pass lexer)',
239    571 => 'shortcut reference link with escape: forward-reference definitions not supported (single-pass lexer)',
240    572 => 'shortcut reference link with emphasis: forward-reference definitions not supported (single-pass lexer)',
241    573 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
242    574 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
243    575 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
244    576 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
245    577 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
246    578 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
247    579 => 'reference link: forward-reference definitions not supported (single-pass lexer)',
248
249    // --------------------------------------------------------------------
250    // Inline image `![alt](url)`. The XHTML renderer's default media
251    // rendering diverges from GFM's bare <img> (it wraps in a details <a>
252    // with fetch.php/detail.php proxy URLs) — GfmSpecTest uses
253    // SpecCompatRenderer to emit spec-shape bare <img>, so only the
254    // parser-level or feature-level gaps remain as skips: title attribute
255    // (no DW slot), reference images, pointy-bracket destinations, nested
256    // brackets, and escape-dependent cases.
257    // --------------------------------------------------------------------
258
259    580 => 'image with title attribute: GfmMedia discards titles (no DW slot)',
260    581 => 'reference-style image: forward-reference definitions not supported (single-pass lexer)',
261    582 => 'nested image-in-image `![foo ![bar](x)](y)`: alt class forbids brackets;'
262         . ' leftmost-match cannot reorder — outer falls back to literal (see #526)',
263    583 => 'link-in-image alt `![foo [bar](x)](y)`: alt class forbids brackets;'
264         . ' leftmost-match cannot reorder — outer falls back to literal (see #526)',
265    584 => 'collapsed reference-style image: forward-reference definitions not supported',
266    585 => 'full reference-style image: forward-reference definitions not supported',
267    587 => 'image with title attribute: title discarded (no DW slot)',
268    588 => 'pointy-bracket image destination `![alt](<url>)`: not supported (see GfmLink #496)',
269    590 => 'reference-style image: forward-reference definitions not supported',
270    591 => 'reference-style image (case-insensitive label): forward-reference definitions not supported',
271    592 => 'collapsed reference-style image `![foo][]`: forward-reference definitions not supported',
272    593 => 'collapsed reference-style image with emphasis in label: forward-reference definitions not supported',
273    594 => 'collapsed reference-style image (case-insensitive): forward-reference definitions not supported',
274    595 => 'reference-style image with intervening whitespace: forward-reference definitions not supported',
275    596 => 'shortcut reference-style image `![foo]`: forward-reference definitions not supported',
276    597 => 'shortcut reference-style image with emphasis: forward-reference definitions not supported',
277    598 => 'image with unescaped nested brackets `![[foo]]`: literal-fallback behavior not supported',
278    599 => 'shortcut reference-style image (case-insensitive): forward-reference definitions not supported',
279    600 => 'escape in image syntax `!\[foo]`: depends on GfmEscape (pending)',
280    601 => 'backslash-escape of `!` before link: depends on GfmEscape (pending)',
281
282    // --------------------------------------------------------------------
283    // ATX heading collisions with DokuWiki-specific behavior.
284    // --------------------------------------------------------------------
285    38 => 'ATX heading with leading spaces: GFM tolerates 0-3 spaces of'
286        . ' indent before the opener; we require the `#` at column 0.'
287        . ' Indent tolerance collides with DokuWiki\'s 2-space-indent'
288        . ' preformatted block and isn\'t worth untangling',
289    39 => 'indented code block: DokuWiki uses 2-space indent for'
290        . ' preformatted; GFM 4-space indented code blocks are not'
291        . ' implemented',
292    40 => 'indented code block: 4-space indent after a paragraph is a'
293        . ' continuation in GFM but preformatted in DokuWiki — not'
294        . ' implemented',
295    41 => 'ATX heading with leading spaces: second heading is indented'
296        . ' by 2 spaces; we require the `#` at column 0',
297    49 => 'empty ATX heading: DokuWiki\'s XHTML renderer deliberately'
298        . ' skips blank headings (blank() guard in Doku_Renderer_xhtml::header)',
299];
300