xref: /dokuwiki/_test/tests/Parsing/Markdown/gfm-spec/skip.php (revision 8ed75a23932353c18b43f67323808e9a662f532a)
1<?php
2
3/**
4 * GFM spec examples that GfmSpecTest should skip, keyed by example number
5 * (as numbered in spec.txt / the rendered spec).
6 *
7 * Add entries here ONLY for behavior DokuWiki has explicitly decided not to
8 * implement — not for features that are merely pending. Unimplemented
9 * features should show as real failures so they remain visible TODOs on
10 * the branch.
11 *
12 * Each value is a short human-readable reason that will appear in phpunit's
13 * skip output.
14 */
15
16return [
17    // --------------------------------------------------------------------
18    // Code-span edge cases that collide with project-wide decisions
19    // (no raw HTML, no GFM angle-bracket autolinks, typography on by
20    // default) or with the single-pass lexer's limits.
21    // --------------------------------------------------------------------
22    351 => 'code span vs. emphasis: cross-positional precedence would require'
23         . ' a pre-scan pass — the single-pass lexer matches leftmost-first'
24         . ' and cannot reject an earlier emphasis opener because a later'
25         . ' backtick span would consume its closer',
26    353 => 'code span: the trailing `"` outside the span is converted to a'
27         . ' curly quote by DokuWiki typography, diverging from the spec HTML',
28    354 => 'raw HTML tag pass-through: DokuWiki does not render raw HTML by'
29         . ' default; `<html>` blocks are the opt-in',
30    356 => 'GFM angle-bracket autolink `<http://…>`: not implemented — we'
31         . ' rely on DokuWiki\'s existing bare-URL detection, which does not'
32         . ' parse `<URL>` form',
33
34    // --------------------------------------------------------------------
35    // CommonMark §6.2 flanking-delimiter analysis — deliberately not
36    // implemented. DokuWiki's regex lexer uses leftmost-match and cannot
37    // apply CommonMark's left/right-flanking rules that distinguish
38    // word-chars, whitespace, and punctuation for `*`/`_` delimiters, or
39    // the "multiple-of-3" rule for overlapping runs. These examples all
40    // rely on that machinery.
41    // --------------------------------------------------------------------
42
43    // Unicode whitespace in flanking context. Our `\s` is ASCII-only
44    // because the lexer doesn't set the PCRE `u` flag.
45    363 => 'Unicode whitespace (U+00A0) flanking — requires u-flag-aware regex',
46
47    // Punctuation-adjacent flanking for `*` / `_` / `**` / `__`
48    362 => 'flanking: punctuation-adjacent `*` (left-flanking vs. right-flanking)',
49    368 => 'flanking: punctuation-adjacent `_`',
50    372 => 'flanking: intraword `_` with punctuation inside',
51    377 => 'flanking: `*` followed by `(` requires punctuation-aware flanking',
52    378 => 'flanking: nested `*(*foo*)*` requires flanking + balanced-pair analysis',
53    382 => 'flanking: nested `_(_foo_)_` requires flanking + balanced-pair analysis',
54    389 => 'flanking: punctuation-adjacent `**`',
55    394 => 'flanking: punctuation-adjacent `__`',
56    401 => 'flanking: `**` followed by `(`',
57    404 => 'flanking: nested `*bar*` inside `**foo ... foo**` with punctuation',
58    407 => 'flanking: `__` followed by `(`',
59    470 => 'flanking: nested `*_foo_*` requires balanced-pair analysis',
60    472 => 'flanking: nested `_*foo*_` requires balanced-pair analysis',
61
62    // Intraword `__` strong (even multibyte) — flanking rule for `_` requires
63    // examining whether the delimiter run is word-boundary-flanking, which our
64    // simple lookbehind/lookahead approximation doesn't fully match.
65    395 => 'flanking: intraword `__` (`foo__bar__`) — left-flanking vs right-flanking',
66    396 => 'flanking: intraword `__` across digits (`5__6__78`)',
67    397 => 'flanking: intraword `__` with Cyrillic',
68    398 => 'flanking: `__foo, __bar__, baz__` — flanking + balanced pairing',
69    409 => 'flanking: `__foo__bar` — intraword close',
70    410 => 'flanking: intraword `__` with Cyrillic (leading)',
71    411 => 'flanking: `__foo__bar__baz__` — multiple `__` pairs with flanking',
72    412 => 'flanking: `__(bar)__.` — punctuation-adjacent',
73
74    // Overlapping / multiple-of-3 rule for runs
75    416 => 'CommonMark rule 9 (overlapping same-delimiter `_foo _bar_ baz_`)',
76    417 => 'CommonMark overlapping `_` / `__` with flanking',
77    418 => 'CommonMark overlapping `*foo *bar**` — multiple-of-3 rule',
78    419 => 'CommonMark nested `*foo **bar** baz*` — balanced-pair analysis',
79    421 => 'CommonMark overlapping `*foo**bar*` — multiple-of-3',
80    422 => 'CommonMark nested `***foo** bar*` — triple-delimiter analysis',
81    423 => 'CommonMark nested `*foo **bar***` — triple-delimiter analysis',
82    424 => 'CommonMark nested `*foo**bar***` — triple-delimiter analysis',
83    425 => 'CommonMark triple `foo***bar***baz` — triple-delimiter analysis',
84    426 => 'CommonMark long delimiter runs `foo******bar*********baz`',
85    427 => 'CommonMark deeply nested `*foo **bar *baz* bim** bop*`',
86    434 => 'CommonMark overlapping `__foo __bar__ baz__` — multiple-of-3',
87    435 => 'CommonMark `____foo__ bar__` — leading long delimiter run',
88    436 => 'CommonMark `**foo **bar****` — trailing long delimiter run',
89    439 => 'CommonMark nested `***foo* bar**` — triple-delimiter',
90    440 => 'CommonMark nested `**foo *bar***` — triple-delimiter',
91    441 => 'CommonMark deeply nested `**foo *bar **baz** bim* bop**`',
92
93    // `__foo_` / `_foo__` — mixing `_` and `__` requires flanking to decide
94    // which delimiter pairs open/close.
95    463 => 'flanking: `__foo_` — mixed `_`/`__` pairing',
96    464 => 'flanking: `_foo__` — mixed `_`/`__` pairing',
97    465 => 'flanking: `___foo__` — delimiter-run length analysis',
98    466 => 'flanking: `____foo_` — delimiter-run length analysis',
99    467 => 'flanking: `__foo___` — delimiter-run length analysis',
100    468 => 'flanking: `_foo____` — delimiter-run length analysis',
101
102    // Long delimiter runs require excess-drop logic (2 outer chars dropped
103    // from each side). Stack-based pairing needed — out of scope.
104    473 => 'CommonMark `****foo****` — excess-drop (4+4 → strong only)',
105    474 => 'CommonMark `____foo____` — excess-drop (4+4 → strong only)',
106    475 => 'CommonMark `******foo******` — excess-drop (6+6 → strong only)',
107    477 => 'CommonMark `_____foo_____` — excess-drop (5+5 → em+strong, 2 dropped each side)',
108
109    // Overlapping / crossing delimiters
110    478 => 'CommonMark `*foo _bar* baz_` — overlapping different delimiters',
111    479 => 'CommonMark `*foo __bar *baz bim__ bam*` — crossing delimiters',
112    480 => 'CommonMark `**foo **bar baz**` — overlapping same delimiter',
113];
114