xref: /dokuwiki/_test/tests/Parsing/ParserMode/ExternallinkTest.php (revision 47a02a102092be9e1e6f1ddaf158bdfffdb13d4f)
1<?php
2
3namespace dokuwiki\test\Parsing\ParserMode;
4
5use dokuwiki\Parsing\ParserMode\Externallink;
6use dokuwiki\Parsing\ParserMode\Internallink;
7
8/**
9 * Tests for the {@see Externallink} parser mode.
10 *
11 * Covers the classic DokuWiki autolink behavior (bare URLs, www./ftp. shortcuts, IPv4/IPv6,
12 * scheme allow-listing), the Markdown angle-bracket autolink form (CommonMark §6.5), and the
13 * GFM autolink extension trim step (paren balancing, trailing entity-ref decoding).
14 *
15 * @group parser_links
16 */
17class ExternallinkTest extends ParserTestBase
18{
19    public function setUp(): void
20    {
21        parent::setUp();
22        $this->setSyntax('md');
23    }
24
25    // ----- basic bare-URL autolink -----
26
27    function testSimple() {
28        $this->P->addMode('externallink', new Externallink());
29        $this->P->parse("Foo http://www.google.com Bar");
30        $calls = [
31            ['document_start', []],
32            ['p_open', []],
33            ['cdata', ["\n" . 'Foo ']],
34            ['externallink', ['http://www.google.com', null]],
35            ['cdata', [' Bar']],
36            ['p_close', []],
37            ['document_end', []],
38        ];
39        $this->assertCalls($calls, $this->H->calls);
40    }
41
42    function testCase() {
43        $this->P->addMode('externallink', new Externallink());
44        $this->P->parse("Foo HTTP://WWW.GOOGLE.COM Bar");
45        $calls = [
46            ['document_start', []],
47            ['p_open', []],
48            ['cdata', ["\n" . 'Foo ']],
49            ['externallink', ['HTTP://WWW.GOOGLE.COM', null]],
50            ['cdata', [' Bar']],
51            ['p_close', []],
52            ['document_end', []],
53        ];
54        $this->assertCalls($calls, $this->H->calls);
55    }
56
57    function testIPv4() {
58        $this->P->addMode('externallink', new Externallink());
59        $this->P->parse("Foo http://123.123.3.21/foo Bar");
60        $calls = [
61            ['document_start', []],
62            ['p_open', []],
63            ['cdata', ["\n" . 'Foo ']],
64            ['externallink', ['http://123.123.3.21/foo', null]],
65            ['cdata', [' Bar']],
66            ['p_close', []],
67            ['document_end', []],
68        ];
69        $this->assertCalls($calls, $this->H->calls);
70    }
71
72    function testIPv6() {
73        $this->P->addMode('externallink', new Externallink());
74        $this->P->parse("Foo http://[3ffe:2a00:100:7031::1]/foo Bar");
75        $calls = [
76            ['document_start', []],
77            ['p_open', []],
78            ['cdata', ["\n" . 'Foo ']],
79            ['externallink', ['http://[3ffe:2a00:100:7031::1]/foo', null]],
80            ['cdata', [' Bar']],
81            ['p_close', []],
82            ['document_end', []],
83        ];
84        $this->assertCalls($calls, $this->H->calls);
85    }
86
87    function testMulti() {
88        $this->teardown();
89
90        $links = [
91            'http://www.google.com',
92            'HTTP://WWW.GOOGLE.COM',
93            'http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html',
94            'http://[1080:0:0:0:8:800:200C:417A]/index.html',
95            'http://[3ffe:2a00:100:7031::1]',
96            'http://[1080::8:800:200C:417A]/foo',
97            'http://[::192.9.5.5]/ipng',
98            'http://[::FFFF:129.144.52.38]:80/index.html',
99            'http://[2010:836B:4179::836B:4179]',
100        ];
101        $titles = [false, null, 'foo bar'];
102        foreach ($links as $link) {
103            foreach ($titles as $title) {
104                if ($title === false) {
105                    $source = $link;
106                    $name = null;
107                } elseif ($title === null) {
108                    $source = "[[$link]]";
109                    $name = null;
110                } else {
111                    $source = "[[$link|$title]]";
112                    $name = $title;
113                }
114                $this->setup();
115                $this->P->addMode('internallink', new Internallink());
116                $this->P->addMode('externallink', new Externallink());
117                $this->P->parse("Foo $source Bar");
118                $calls = [
119                    ['document_start', []],
120                    ['p_open', []],
121                    ['cdata', ["\n" . 'Foo ']],
122                    ['externallink', [$link, $name]],
123                    ['cdata', [' Bar']],
124                    ['p_close', []],
125                    ['document_end', []],
126                ];
127                $this->assertCalls($calls, $this->H->calls, $source);
128                $this->teardown();
129            }
130        }
131
132        $this->setup();
133    }
134
135    function testJavascriptScheme() {
136        $this->P->addMode('externallink', new Externallink());
137        $this->P->parse("Foo javascript:alert('XSS'); Bar");
138        $calls = [
139            ['document_start', []],
140            ['p_open', []],
141            ['cdata', ["\nFoo javascript:alert('XSS'); Bar"]],
142            ['p_close', []],
143            ['document_end', []],
144        ];
145        $this->assertCalls($calls, $this->H->calls);
146    }
147
148    // ----- www. / ftp. shortcuts -----
149
150    function testWWWLink() {
151        $this->P->addMode('externallink', new Externallink());
152        $this->P->parse("Foo www.google.com Bar");
153        $calls = [
154            ['document_start', []],
155            ['p_open', []],
156            ['cdata', ["\n" . 'Foo ']],
157            ['externallink', ['http://www.google.com', 'www.google.com']],
158            ['cdata', [' Bar']],
159            ['p_close', []],
160            ['document_end', []],
161        ];
162        $this->assertCalls($calls, $this->H->calls);
163    }
164
165    function testWWWLinkStartOfLine() {
166        // Regression test for issue #2399
167        $calls = [
168            ['document_start', []],
169            ['p_open', []],
170            ['externallink', ['http://www.google.com', 'www.google.com']],
171            ['cdata', [' Bar']],
172            ['p_close', []],
173            ['document_end', []],
174        ];
175        $instructions = p_get_instructions("www.google.com Bar");
176        $this->assertCalls($calls, $instructions);
177    }
178
179    function testWWWLinkInRoundBrackets() {
180        $this->P->addMode('externallink', new Externallink());
181        $this->P->parse("Foo (www.google.com) Bar");
182        $calls = [
183            ['document_start', []],
184            ['p_open', []],
185            ['cdata', ["\n" . 'Foo (']],
186            ['externallink', ['http://www.google.com', 'www.google.com']],
187            ['cdata', [') Bar']],
188            ['p_close', []],
189            ['document_end', []],
190        ];
191        $this->assertCalls($calls, $this->H->calls);
192    }
193
194    function testWWWLinkInPath() {
195        $this->P->addMode('externallink', new Externallink());
196        // See issue #936. Should NOT generate a link!
197        $this->P->parse("Foo /home/subdir/www/www.something.de/somedir/ Bar");
198        $calls = [
199            ['document_start', []],
200            ['p_open', []],
201            ['cdata', ["\n" . 'Foo /home/subdir/www/www.something.de/somedir/ Bar']],
202            ['p_close', []],
203            ['document_end', []],
204        ];
205        $this->assertCalls($calls, $this->H->calls);
206    }
207
208    function testWWWLinkFollowingPath() {
209        $this->P->addMode('externallink', new Externallink());
210        $this->P->parse("Foo /home/subdir/www/ www.something.de/somedir/ Bar");
211        $calls = [
212            ['document_start', []],
213            ['p_open', []],
214            ['cdata', ["\n" . 'Foo /home/subdir/www/ ']],
215            ['externallink', ['http://www.something.de/somedir/', 'www.something.de/somedir/']],
216            ['cdata', [' Bar']],
217            ['p_close', []],
218            ['document_end', []],
219        ];
220        $this->assertCalls($calls, $this->H->calls);
221    }
222
223    function testFTPLink() {
224        $this->P->addMode('externallink', new Externallink());
225        $this->P->parse("Foo ftp.sunsite.com Bar");
226        $calls = [
227            ['document_start', []],
228            ['p_open', []],
229            ['cdata', ["\n" . 'Foo ']],
230            ['externallink', ['ftp://ftp.sunsite.com', 'ftp.sunsite.com']],
231            ['cdata', [' Bar']],
232            ['p_close', []],
233            ['document_end', []],
234        ];
235        $this->assertCalls($calls, $this->H->calls);
236    }
237
238    function testFTPLinkInPath() {
239        $this->P->addMode('externallink', new Externallink());
240        // See issue #936. Should NOT generate a link!
241        $this->P->parse("Foo /home/subdir/www/ftp.something.de/somedir/ Bar");
242        $calls = [
243            ['document_start', []],
244            ['p_open', []],
245            ['cdata', ["\n" . 'Foo /home/subdir/www/ftp.something.de/somedir/ Bar']],
246            ['p_close', []],
247            ['document_end', []],
248        ];
249        $this->assertCalls($calls, $this->H->calls);
250    }
251
252    function testFTPLinkFollowingPath() {
253        $this->P->addMode('externallink', new Externallink());
254        $this->P->parse("Foo /home/subdir/www/ ftp.something.de/somedir/ Bar");
255        $calls = [
256            ['document_start', []],
257            ['p_open', []],
258            ['cdata', ["\n" . 'Foo /home/subdir/www/ ']],
259            ['externallink', ['ftp://ftp.something.de/somedir/', 'ftp.something.de/somedir/']],
260            ['cdata', [' Bar']],
261            ['p_close', []],
262            ['document_end', []],
263        ];
264        $this->assertCalls($calls, $this->H->calls);
265    }
266
267    // ----- Markdown angle-bracket autolinks (§6.5) -----
268
269    function testAngleBracketAutolink() {
270        $this->P->addMode('externallink', new Externallink());
271        $this->P->parse("Foo <http://www.google.com> Bar");
272        $calls = [
273            ['document_start', []],
274            ['p_open', []],
275            ['cdata', ["\n" . 'Foo ']],
276            ['externallink', ['http://www.google.com', 'http://www.google.com']],
277            ['cdata', [' Bar']],
278            ['p_close', []],
279            ['document_end', []],
280        ];
281        $this->assertCalls($calls, $this->H->calls);
282    }
283
284    function testAngleBracketDisqualifiedByInternalWhitespace() {
285        $this->P->addMode('externallink', new Externallink());
286        $this->P->parse("Foo <http://www.google.com bim> Bar");
287        // Internal whitespace disqualifies the autolink. The whole envelope is consumed as cdata so the
288        // bare-URL detector cannot pick up http://www.google.com inside and leave dangling brackets.
289        $calls = [
290            ['document_start', []],
291            ['p_open', []],
292            ['cdata', ["\nFoo <http://www.google.com bim> Bar"]],
293            ['p_close', []],
294            ['document_end', []],
295        ];
296        $this->assertCalls($calls, $this->H->calls);
297    }
298
299    function testAngleBracketDisqualifiedByLeadingWhitespace() {
300        $this->P->addMode('externallink', new Externallink());
301        $this->P->parse("Foo < http://www.google.com > Bar");
302        $calls = [
303            ['document_start', []],
304            ['p_open', []],
305            ['cdata', ["\nFoo < http://www.google.com > Bar"]],
306            ['p_close', []],
307            ['document_end', []],
308        ];
309        $this->assertCalls($calls, $this->H->calls);
310    }
311
312    function testAngleBracketUnregisteredScheme() {
313        $this->P->addMode('externallink', new Externallink());
314        // mailto is not in the default conf/scheme.conf allow-list, so no per-scheme angle pattern is built
315        // for it. The brackets fall through to cdata, matching DokuWiki's bare-URL scheme policy.
316        $this->P->parse("Foo <mailto:foo@example.com> Bar");
317        $calls = [
318            ['document_start', []],
319            ['p_open', []],
320            ['cdata', ["\nFoo <mailto:foo@example.com> Bar"]],
321            ['p_close', []],
322            ['document_end', []],
323        ];
324        $this->assertCalls($calls, $this->H->calls);
325    }
326
327    function testAngleBracketInactiveInDwMode() {
328        $this->setSyntax('dw');
329        $this->P->addMode('externallink', new Externallink());
330        // In DW-only syntax, angle-bracket processing is intentionally not active. The bare-URL pattern still
331        // picks up the URL inside and the angle brackets fall through as literal text — matches the pre-merge
332        // behavior of DokuWiki's Externallink mode.
333        $this->P->parse("Foo <http://www.google.com> Bar");
334        $calls = [
335            ['document_start', []],
336            ['p_open', []],
337            ['cdata', ["\n" . 'Foo <']],
338            ['externallink', ['http://www.google.com', null]],
339            ['cdata', ['> Bar']],
340            ['p_close', []],
341            ['document_end', []],
342        ];
343        $this->assertCalls($calls, $this->H->calls);
344    }
345
346    // ----- GFM autolink extension: paren balancing -----
347
348    function testBalancedParensInUrl() {
349        $this->P->addMode('externallink', new Externallink());
350        $this->P->parse('See www.example.com/path(foo) end');
351        $calls = [
352            ['document_start', []],
353            ['p_open', []],
354            ['cdata', ["\nSee "]],
355            ['externallink', ['http://www.example.com/path(foo)', 'www.example.com/path(foo)']],
356            ['cdata', [' end']],
357            ['p_close', []],
358            ['document_end', []],
359        ];
360        $this->assertCalls($calls, $this->H->calls);
361    }
362
363    function testTrailingUnbalancedParenExcluded() {
364        $this->P->addMode('externallink', new Externallink());
365        $this->P->parse('See (www.example.com/path(foo)) end');
366        $calls = [
367            ['document_start', []],
368            ['p_open', []],
369            ['cdata', ["\nSee ("]],
370            ['externallink', ['http://www.example.com/path(foo)', 'www.example.com/path(foo)']],
371            ['cdata', [') end']],
372            ['p_close', []],
373            ['document_end', []],
374        ];
375        $this->assertCalls($calls, $this->H->calls);
376    }
377
378    function testMultipleTrailingParensTrimmedUntilBalanced() {
379        $this->P->addMode('externallink', new Externallink());
380        // Inner `(foo)` is balanced and stays inside the URL; the two unbalanced trailing `)` are peeled off.
381        $this->P->parse('See www.example.com/path(foo))) end');
382        $calls = [
383            ['document_start', []],
384            ['p_open', []],
385            ['cdata', ["\nSee "]],
386            ['externallink', ['http://www.example.com/path(foo)', 'www.example.com/path(foo)']],
387            ['cdata', [')) end']],
388            ['p_close', []],
389            ['document_end', []],
390        ];
391        $this->assertCalls($calls, $this->H->calls);
392    }
393
394    function testParenInsideUrlNoTrailing() {
395        $this->P->addMode('externallink', new Externallink());
396        $this->P->parse('See www.example.com/search?q=(business))+ok end');
397        $calls = [
398            ['document_start', []],
399            ['p_open', []],
400            ['cdata', ["\nSee "]],
401            ['externallink', [
402                'http://www.example.com/search?q=(business))+ok',
403                'www.example.com/search?q=(business))+ok'
404            ]],
405            ['cdata', [' end']],
406            ['p_close', []],
407            ['document_end', []],
408        ];
409        $this->assertCalls($calls, $this->H->calls);
410    }
411
412    // ----- GFM autolink extension: trailing entity references -----
413
414    function testTrailingValidEntityDecodedToUnicode() {
415        $this->P->addMode('externallink', new Externallink());
416        $this->P->parse('See http://example.com/&copy; end');
417        $calls = [
418            ['document_start', []],
419            ['p_open', []],
420            ['cdata', ["\nSee "]],
421            ['externallink', ['http://example.com/', null]],
422            ['cdata', ['© end']],
423            ['p_close', []],
424            ['document_end', []],
425        ];
426        $this->assertCalls($calls, $this->H->calls);
427    }
428
429    function testTrailingUnknownEntityRoundTripsLiterally() {
430        $this->P->addMode('externallink', new Externallink());
431        $this->P->parse('See http://example.com/&hl; end');
432        $calls = [
433            ['document_start', []],
434            ['p_open', []],
435            ['cdata', ["\nSee "]],
436            ['externallink', ['http://example.com/', null]],
437            ['cdata', ['&hl; end']],
438            ['p_close', []],
439            ['document_end', []],
440        ];
441        $this->assertCalls($calls, $this->H->calls);
442    }
443
444    function testTrailingNumericEntityDecoded() {
445        $this->P->addMode('externallink', new Externallink());
446        $this->P->parse('See http://example.com/&#65; end');
447        $calls = [
448            ['document_start', []],
449            ['p_open', []],
450            ['cdata', ["\nSee "]],
451            ['externallink', ['http://example.com/', null]],
452            ['cdata', ['A end']],
453            ['p_close', []],
454            ['document_end', []],
455        ];
456        $this->assertCalls($calls, $this->H->calls);
457    }
458
459    function testNonTrailingEntityStaysInsideUrl() {
460        $this->P->addMode('externallink', new Externallink());
461        $this->P->parse('See http://example.com/&copy;more end');
462        $calls = [
463            ['document_start', []],
464            ['p_open', []],
465            ['cdata', ["\nSee "]],
466            ['externallink', ['http://example.com/&copy;more', null]],
467            ['cdata', [' end']],
468            ['p_close', []],
469            ['document_end', []],
470        ];
471        $this->assertCalls($calls, $this->H->calls);
472    }
473
474    function testMixtureParenThenEntityPeelsBoth() {
475        $this->P->addMode('externallink', new Externallink());
476        $this->P->parse('See (http://example.com/path)&copy; end');
477        $calls = [
478            ['document_start', []],
479            ['p_open', []],
480            ['cdata', ["\nSee ("]],
481            ['externallink', ['http://example.com/path', null]],
482            ['cdata', [')© end']],
483            ['p_close', []],
484            ['document_end', []],
485        ];
486        $this->assertCalls($calls, $this->H->calls);
487    }
488
489    function testMixtureMultipleEntitiesAndParens() {
490        $this->P->addMode('externallink', new Externallink());
491        $this->P->parse('See http://example.com/)&copy;)&hl; end');
492        $calls = [
493            ['document_start', []],
494            ['p_open', []],
495            ['cdata', ["\nSee "]],
496            ['externallink', ['http://example.com/', null]],
497            ['cdata', [')©)&hl; end']],
498            ['p_close', []],
499            ['document_end', []],
500        ];
501        $this->assertCalls($calls, $this->H->calls);
502    }
503}
504