xref: /dokuwiki/inc/Parsing/ParserMode/Externallink.php (revision f9d3b7bd008099dc4c61ce262a02a0ed8bc94254)
1<?php
2
3namespace dokuwiki\Parsing\ParserMode;
4
5use dokuwiki\Parsing\Handler;
6use dokuwiki\Parsing\ModeRegistry;
7
8/**
9 * Parser mode for external links (URLs).
10 *
11 * This mode is responsible for recognizing and handling external links in the text. It uses regular expressions
12 * to identify URLs based on common schemes and patterns, and it can handle both standard URLs and Markdown-style
13 * angle-bracket autolinks.
14 */
15class Externallink extends AbstractMode
16{
17    protected $schemes = [];
18    protected $patterns = [];
19
20    /** @inheritdoc */
21    public function getSort()
22    {
23        return 330;
24    }
25
26    /** @inheritdoc */
27    public function preConnect()
28    {
29        if (count($this->patterns)) return;
30
31        $ltrs = '\w';
32        $gunk = '/\#~:.?+=&%@!\-\[\]';
33        $punc = '.:?\-;,';
34        $host = $ltrs . $punc;
35        $any  = $ltrs . $gunk . $punc;
36
37        $this->schemes = getSchemes();
38        foreach ($this->schemes as $scheme) {
39            $this->patterns[] = '\b(?i)' . $scheme . '(?-i)://[' . $any . ']+?(?=[' . $punc . ']*[^' . $any . '])';
40        }
41
42        $this->patterns[] = '(?<![/\\\\])\b(?i)www?(?-i)\.[' . $host . ']+?\.' .
43                            '[' . $host . ']+?[' . $any . ']+?(?=[' . $punc . ']*[^' . $any . '])';
44        $this->patterns[] = '(?<![/\\\\])\b(?i)ftp?(?-i)\.[' . $host . ']+?\.' .
45                            '[' . $host . ']+?[' . $any . ']+?(?=[' . $punc . ']*[^' . $any . '])';
46
47        // Markdown-only: angle-bracket autolinks per CommonMark §6.5. One per-scheme pattern that captures the whole
48        // envelope; handle() decides at match time whether to emit a link or literal cdata based on whether the content
49        // contains whitespace (which disqualifies the autolink).
50        // Angle brackets with white space are basically a simple way to write a URL without triggering autolinking
51        if (ModeRegistry::getInstance()->isMdPreferred()) {
52            foreach ($this->schemes as $scheme) {
53                $this->patterns[] = '<[ \t]*(?i)' . $scheme . '(?-i)://[^<>\n]*>';
54            }
55        }
56    }
57
58    /** @inheritdoc */
59    public function connectTo($mode)
60    {
61
62        foreach ($this->patterns as $pattern) {
63            $this->Lexer->addSpecialPattern($pattern, $mode, 'externallink');
64        }
65    }
66
67    /** @inheritdoc */
68    public function handle($match, $state, $pos, Handler $handler)
69    {
70        // Angle-bracket autolink (Markdown §6.5).
71        if (str_starts_with($match, '<') && str_ends_with($match, '>')) {
72            if (preg_match('/\s/', $match)) {
73                // Disqualified by internal whitespace — render literally
74                $handler->addCall('cdata', [$match], $pos);
75                return true;
76            }
77            $url = substr($match, 1, -1);
78            // Pass URL as both href and visible label so the rendered text shows the URL exactly as written
79            $handler->addCall('externallink', [$url, $url], $pos);
80            return true;
81        }
82
83        $url = $match;
84        $title = null;
85
86        // add protocol on simple short URLs
87        if (str_starts_with($url, 'ftp') && !str_starts_with($url, 'ftp://')) {
88            $title = $url;
89            $url = 'ftp://' . $url;
90        }
91        if (str_starts_with($url, 'www')) {
92            $title = $url;
93            $url = 'http://' . $url;
94        }
95
96        $handler->addCall('externallink', [$url, $title], $pos);
97        return true;
98    }
99
100    /**
101     * @return array
102     */
103    public function getPatterns()
104    {
105        return $this->patterns;
106    }
107}
108