xref: /dokuwiki/_test/tests/Parsing/ParserMode/ExternallinkTest.php (revision 8a34b0d87864546b9e35ee6a4621d30bf4cd6475)
1<?php
2
3namespace dokuwiki\test\Parsing\ParserMode;
4
5use dokuwiki\Parsing\ModeRegistry;
6use dokuwiki\Parsing\ParserMode\Externallink;
7use dokuwiki\Parsing\ParserMode\Internallink;
8
9/**
10 * Tests for the {@see Externallink} parser mode.
11 *
12 * Covers the classic DokuWiki autolink behavior (bare URLs, www./ftp. shortcuts, IPv4/IPv6,
13 * scheme allow-listing), the Markdown angle-bracket autolink form (CommonMark §6.5), and the
14 * GFM autolink extension trim step (paren balancing, trailing entity-ref decoding).
15 *
16 * @group parser_links
17 */
18class ExternallinkTest extends ParserTestBase
19{
20    public function setUp(): void
21    {
22        parent::setUp();
23        global $conf;
24        $conf['syntax'] = 'md';
25        ModeRegistry::reset();
26    }
27
28    public function tearDown(): void
29    {
30        ModeRegistry::reset();
31        parent::tearDown();
32    }
33
34    // ----- basic bare-URL autolink -----
35
36    function testSimple() {
37        $this->P->addMode('externallink', new Externallink());
38        $this->P->parse("Foo http://www.google.com Bar");
39        $calls = [
40            ['document_start', []],
41            ['p_open', []],
42            ['cdata', ["\n" . 'Foo ']],
43            ['externallink', ['http://www.google.com', null]],
44            ['cdata', [' Bar']],
45            ['p_close', []],
46            ['document_end', []],
47        ];
48        $this->assertCalls($calls, $this->H->calls);
49    }
50
51    function testCase() {
52        $this->P->addMode('externallink', new Externallink());
53        $this->P->parse("Foo HTTP://WWW.GOOGLE.COM Bar");
54        $calls = [
55            ['document_start', []],
56            ['p_open', []],
57            ['cdata', ["\n" . 'Foo ']],
58            ['externallink', ['HTTP://WWW.GOOGLE.COM', null]],
59            ['cdata', [' Bar']],
60            ['p_close', []],
61            ['document_end', []],
62        ];
63        $this->assertCalls($calls, $this->H->calls);
64    }
65
66    function testIPv4() {
67        $this->P->addMode('externallink', new Externallink());
68        $this->P->parse("Foo http://123.123.3.21/foo Bar");
69        $calls = [
70            ['document_start', []],
71            ['p_open', []],
72            ['cdata', ["\n" . 'Foo ']],
73            ['externallink', ['http://123.123.3.21/foo', null]],
74            ['cdata', [' Bar']],
75            ['p_close', []],
76            ['document_end', []],
77        ];
78        $this->assertCalls($calls, $this->H->calls);
79    }
80
81    function testIPv6() {
82        $this->P->addMode('externallink', new Externallink());
83        $this->P->parse("Foo http://[3ffe:2a00:100:7031::1]/foo Bar");
84        $calls = [
85            ['document_start', []],
86            ['p_open', []],
87            ['cdata', ["\n" . 'Foo ']],
88            ['externallink', ['http://[3ffe:2a00:100:7031::1]/foo', null]],
89            ['cdata', [' Bar']],
90            ['p_close', []],
91            ['document_end', []],
92        ];
93        $this->assertCalls($calls, $this->H->calls);
94    }
95
96    function testMulti() {
97        $this->teardown();
98
99        $links = [
100            'http://www.google.com',
101            'HTTP://WWW.GOOGLE.COM',
102            'http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html',
103            'http://[1080:0:0:0:8:800:200C:417A]/index.html',
104            'http://[3ffe:2a00:100:7031::1]',
105            'http://[1080::8:800:200C:417A]/foo',
106            'http://[::192.9.5.5]/ipng',
107            'http://[::FFFF:129.144.52.38]:80/index.html',
108            'http://[2010:836B:4179::836B:4179]',
109        ];
110        $titles = [false, null, 'foo bar'];
111        foreach ($links as $link) {
112            foreach ($titles as $title) {
113                if ($title === false) {
114                    $source = $link;
115                    $name = null;
116                } elseif ($title === null) {
117                    $source = "[[$link]]";
118                    $name = null;
119                } else {
120                    $source = "[[$link|$title]]";
121                    $name = $title;
122                }
123                $this->setup();
124                $this->P->addMode('internallink', new Internallink());
125                $this->P->addMode('externallink', new Externallink());
126                $this->P->parse("Foo $source Bar");
127                $calls = [
128                    ['document_start', []],
129                    ['p_open', []],
130                    ['cdata', ["\n" . 'Foo ']],
131                    ['externallink', [$link, $name]],
132                    ['cdata', [' Bar']],
133                    ['p_close', []],
134                    ['document_end', []],
135                ];
136                $this->assertCalls($calls, $this->H->calls, $source);
137                $this->teardown();
138            }
139        }
140
141        $this->setup();
142    }
143
144    function testJavascriptScheme() {
145        $this->P->addMode('externallink', new Externallink());
146        $this->P->parse("Foo javascript:alert('XSS'); Bar");
147        $calls = [
148            ['document_start', []],
149            ['p_open', []],
150            ['cdata', ["\nFoo javascript:alert('XSS'); Bar"]],
151            ['p_close', []],
152            ['document_end', []],
153        ];
154        $this->assertCalls($calls, $this->H->calls);
155    }
156
157    // ----- www. / ftp. shortcuts -----
158
159    function testWWWLink() {
160        $this->P->addMode('externallink', new Externallink());
161        $this->P->parse("Foo www.google.com Bar");
162        $calls = [
163            ['document_start', []],
164            ['p_open', []],
165            ['cdata', ["\n" . 'Foo ']],
166            ['externallink', ['http://www.google.com', 'www.google.com']],
167            ['cdata', [' Bar']],
168            ['p_close', []],
169            ['document_end', []],
170        ];
171        $this->assertCalls($calls, $this->H->calls);
172    }
173
174    function testWWWLinkStartOfLine() {
175        // Regression test for issue #2399
176        $calls = [
177            ['document_start', []],
178            ['p_open', []],
179            ['externallink', ['http://www.google.com', 'www.google.com']],
180            ['cdata', [' Bar']],
181            ['p_close', []],
182            ['document_end', []],
183        ];
184        $instructions = p_get_instructions("www.google.com Bar");
185        $this->assertCalls($calls, $instructions);
186    }
187
188    function testWWWLinkInRoundBrackets() {
189        $this->P->addMode('externallink', new Externallink());
190        $this->P->parse("Foo (www.google.com) Bar");
191        $calls = [
192            ['document_start', []],
193            ['p_open', []],
194            ['cdata', ["\n" . 'Foo (']],
195            ['externallink', ['http://www.google.com', 'www.google.com']],
196            ['cdata', [') Bar']],
197            ['p_close', []],
198            ['document_end', []],
199        ];
200        $this->assertCalls($calls, $this->H->calls);
201    }
202
203    function testWWWLinkInPath() {
204        $this->P->addMode('externallink', new Externallink());
205        // See issue #936. Should NOT generate a link!
206        $this->P->parse("Foo /home/subdir/www/www.something.de/somedir/ Bar");
207        $calls = [
208            ['document_start', []],
209            ['p_open', []],
210            ['cdata', ["\n" . 'Foo /home/subdir/www/www.something.de/somedir/ Bar']],
211            ['p_close', []],
212            ['document_end', []],
213        ];
214        $this->assertCalls($calls, $this->H->calls);
215    }
216
217    function testWWWLinkFollowingPath() {
218        $this->P->addMode('externallink', new Externallink());
219        $this->P->parse("Foo /home/subdir/www/ www.something.de/somedir/ Bar");
220        $calls = [
221            ['document_start', []],
222            ['p_open', []],
223            ['cdata', ["\n" . 'Foo /home/subdir/www/ ']],
224            ['externallink', ['http://www.something.de/somedir/', 'www.something.de/somedir/']],
225            ['cdata', [' Bar']],
226            ['p_close', []],
227            ['document_end', []],
228        ];
229        $this->assertCalls($calls, $this->H->calls);
230    }
231
232    function testFTPLink() {
233        $this->P->addMode('externallink', new Externallink());
234        $this->P->parse("Foo ftp.sunsite.com Bar");
235        $calls = [
236            ['document_start', []],
237            ['p_open', []],
238            ['cdata', ["\n" . 'Foo ']],
239            ['externallink', ['ftp://ftp.sunsite.com', 'ftp.sunsite.com']],
240            ['cdata', [' Bar']],
241            ['p_close', []],
242            ['document_end', []],
243        ];
244        $this->assertCalls($calls, $this->H->calls);
245    }
246
247    function testFTPLinkInPath() {
248        $this->P->addMode('externallink', new Externallink());
249        // See issue #936. Should NOT generate a link!
250        $this->P->parse("Foo /home/subdir/www/ftp.something.de/somedir/ Bar");
251        $calls = [
252            ['document_start', []],
253            ['p_open', []],
254            ['cdata', ["\n" . 'Foo /home/subdir/www/ftp.something.de/somedir/ Bar']],
255            ['p_close', []],
256            ['document_end', []],
257        ];
258        $this->assertCalls($calls, $this->H->calls);
259    }
260
261    function testFTPLinkFollowingPath() {
262        $this->P->addMode('externallink', new Externallink());
263        $this->P->parse("Foo /home/subdir/www/ ftp.something.de/somedir/ Bar");
264        $calls = [
265            ['document_start', []],
266            ['p_open', []],
267            ['cdata', ["\n" . 'Foo /home/subdir/www/ ']],
268            ['externallink', ['ftp://ftp.something.de/somedir/', 'ftp.something.de/somedir/']],
269            ['cdata', [' Bar']],
270            ['p_close', []],
271            ['document_end', []],
272        ];
273        $this->assertCalls($calls, $this->H->calls);
274    }
275
276    // ----- Markdown angle-bracket autolinks (§6.5) -----
277
278    function testAngleBracketAutolink() {
279        $this->P->addMode('externallink', new Externallink());
280        $this->P->parse("Foo <http://www.google.com> Bar");
281        $calls = [
282            ['document_start', []],
283            ['p_open', []],
284            ['cdata', ["\n" . 'Foo ']],
285            ['externallink', ['http://www.google.com', 'http://www.google.com']],
286            ['cdata', [' Bar']],
287            ['p_close', []],
288            ['document_end', []],
289        ];
290        $this->assertCalls($calls, $this->H->calls);
291    }
292
293    function testAngleBracketDisqualifiedByInternalWhitespace() {
294        $this->P->addMode('externallink', new Externallink());
295        $this->P->parse("Foo <http://www.google.com bim> Bar");
296        // Internal whitespace disqualifies the autolink. The whole envelope is consumed as cdata so the
297        // bare-URL detector cannot pick up http://www.google.com inside and leave dangling brackets.
298        $calls = [
299            ['document_start', []],
300            ['p_open', []],
301            ['cdata', ["\nFoo <http://www.google.com bim> Bar"]],
302            ['p_close', []],
303            ['document_end', []],
304        ];
305        $this->assertCalls($calls, $this->H->calls);
306    }
307
308    function testAngleBracketDisqualifiedByLeadingWhitespace() {
309        $this->P->addMode('externallink', new Externallink());
310        $this->P->parse("Foo < http://www.google.com > Bar");
311        $calls = [
312            ['document_start', []],
313            ['p_open', []],
314            ['cdata', ["\nFoo < http://www.google.com > Bar"]],
315            ['p_close', []],
316            ['document_end', []],
317        ];
318        $this->assertCalls($calls, $this->H->calls);
319    }
320
321    function testAngleBracketUnregisteredScheme() {
322        $this->P->addMode('externallink', new Externallink());
323        // mailto is not in the default conf/scheme.conf allow-list, so no per-scheme angle pattern is built
324        // for it. The brackets fall through to cdata, matching DokuWiki's bare-URL scheme policy.
325        $this->P->parse("Foo <mailto:foo@example.com> Bar");
326        $calls = [
327            ['document_start', []],
328            ['p_open', []],
329            ['cdata', ["\nFoo <mailto:foo@example.com> Bar"]],
330            ['p_close', []],
331            ['document_end', []],
332        ];
333        $this->assertCalls($calls, $this->H->calls);
334    }
335
336    function testAngleBracketInactiveInDwMode() {
337        global $conf;
338        $conf['syntax'] = 'dw';
339        $this->P->addMode('externallink', new Externallink());
340        // In DW-only syntax, angle-bracket processing is intentionally not active. The bare-URL pattern still
341        // picks up the URL inside and the angle brackets fall through as literal text — matches the pre-merge
342        // behavior of DokuWiki's Externallink mode.
343        $this->P->parse("Foo <http://www.google.com> Bar");
344        $calls = [
345            ['document_start', []],
346            ['p_open', []],
347            ['cdata', ["\n" . 'Foo <']],
348            ['externallink', ['http://www.google.com', null]],
349            ['cdata', ['> Bar']],
350            ['p_close', []],
351            ['document_end', []],
352        ];
353        $this->assertCalls($calls, $this->H->calls);
354    }
355
356    // ----- GFM autolink extension: paren balancing -----
357
358    function testBalancedParensInUrl() {
359        $this->P->addMode('externallink', new Externallink());
360        $this->P->parse('See www.example.com/path(foo) end');
361        $calls = [
362            ['document_start', []],
363            ['p_open', []],
364            ['cdata', ["\nSee "]],
365            ['externallink', ['http://www.example.com/path(foo)', 'www.example.com/path(foo)']],
366            ['cdata', [' end']],
367            ['p_close', []],
368            ['document_end', []],
369        ];
370        $this->assertCalls($calls, $this->H->calls);
371    }
372
373    function testTrailingUnbalancedParenExcluded() {
374        $this->P->addMode('externallink', new Externallink());
375        $this->P->parse('See (www.example.com/path(foo)) end');
376        $calls = [
377            ['document_start', []],
378            ['p_open', []],
379            ['cdata', ["\nSee ("]],
380            ['externallink', ['http://www.example.com/path(foo)', 'www.example.com/path(foo)']],
381            ['cdata', [') end']],
382            ['p_close', []],
383            ['document_end', []],
384        ];
385        $this->assertCalls($calls, $this->H->calls);
386    }
387
388    function testMultipleTrailingParensTrimmedUntilBalanced() {
389        $this->P->addMode('externallink', new Externallink());
390        // Inner `(foo)` is balanced and stays inside the URL; the two unbalanced trailing `)` are peeled off.
391        $this->P->parse('See www.example.com/path(foo))) end');
392        $calls = [
393            ['document_start', []],
394            ['p_open', []],
395            ['cdata', ["\nSee "]],
396            ['externallink', ['http://www.example.com/path(foo)', 'www.example.com/path(foo)']],
397            ['cdata', [')) end']],
398            ['p_close', []],
399            ['document_end', []],
400        ];
401        $this->assertCalls($calls, $this->H->calls);
402    }
403
404    function testParenInsideUrlNoTrailing() {
405        $this->P->addMode('externallink', new Externallink());
406        $this->P->parse('See www.example.com/search?q=(business))+ok end');
407        $calls = [
408            ['document_start', []],
409            ['p_open', []],
410            ['cdata', ["\nSee "]],
411            ['externallink', [
412                'http://www.example.com/search?q=(business))+ok',
413                'www.example.com/search?q=(business))+ok'
414            ]],
415            ['cdata', [' end']],
416            ['p_close', []],
417            ['document_end', []],
418        ];
419        $this->assertCalls($calls, $this->H->calls);
420    }
421
422    // ----- GFM autolink extension: trailing entity references -----
423
424    function testTrailingValidEntityDecodedToUnicode() {
425        $this->P->addMode('externallink', new Externallink());
426        $this->P->parse('See http://example.com/&copy; end');
427        $calls = [
428            ['document_start', []],
429            ['p_open', []],
430            ['cdata', ["\nSee "]],
431            ['externallink', ['http://example.com/', null]],
432            ['cdata', ['© end']],
433            ['p_close', []],
434            ['document_end', []],
435        ];
436        $this->assertCalls($calls, $this->H->calls);
437    }
438
439    function testTrailingUnknownEntityRoundTripsLiterally() {
440        $this->P->addMode('externallink', new Externallink());
441        $this->P->parse('See http://example.com/&hl; end');
442        $calls = [
443            ['document_start', []],
444            ['p_open', []],
445            ['cdata', ["\nSee "]],
446            ['externallink', ['http://example.com/', null]],
447            ['cdata', ['&hl; end']],
448            ['p_close', []],
449            ['document_end', []],
450        ];
451        $this->assertCalls($calls, $this->H->calls);
452    }
453
454    function testTrailingNumericEntityDecoded() {
455        $this->P->addMode('externallink', new Externallink());
456        $this->P->parse('See http://example.com/&#65; end');
457        $calls = [
458            ['document_start', []],
459            ['p_open', []],
460            ['cdata', ["\nSee "]],
461            ['externallink', ['http://example.com/', null]],
462            ['cdata', ['A end']],
463            ['p_close', []],
464            ['document_end', []],
465        ];
466        $this->assertCalls($calls, $this->H->calls);
467    }
468
469    function testNonTrailingEntityStaysInsideUrl() {
470        $this->P->addMode('externallink', new Externallink());
471        $this->P->parse('See http://example.com/&copy;more end');
472        $calls = [
473            ['document_start', []],
474            ['p_open', []],
475            ['cdata', ["\nSee "]],
476            ['externallink', ['http://example.com/&copy;more', null]],
477            ['cdata', [' end']],
478            ['p_close', []],
479            ['document_end', []],
480        ];
481        $this->assertCalls($calls, $this->H->calls);
482    }
483
484    function testMixtureParenThenEntityPeelsBoth() {
485        $this->P->addMode('externallink', new Externallink());
486        $this->P->parse('See (http://example.com/path)&copy; end');
487        $calls = [
488            ['document_start', []],
489            ['p_open', []],
490            ['cdata', ["\nSee ("]],
491            ['externallink', ['http://example.com/path', null]],
492            ['cdata', [')© end']],
493            ['p_close', []],
494            ['document_end', []],
495        ];
496        $this->assertCalls($calls, $this->H->calls);
497    }
498
499    function testMixtureMultipleEntitiesAndParens() {
500        $this->P->addMode('externallink', new Externallink());
501        $this->P->parse('See http://example.com/)&copy;)&hl; end');
502        $calls = [
503            ['document_start', []],
504            ['p_open', []],
505            ['cdata', ["\nSee "]],
506            ['externallink', ['http://example.com/', null]],
507            ['cdata', [')©)&hl; end']],
508            ['p_close', []],
509            ['document_end', []],
510        ];
511        $this->assertCalls($calls, $this->H->calls);
512    }
513}
514