1<?php 2 3namespace dokuwiki\Parsing\ParserMode; 4 5use dokuwiki\Parsing\Handler; 6use dokuwiki\Parsing\ModeRegistry; 7 8/** 9 * Parser mode for external links (URLs). 10 * 11 * This mode is responsible for recognizing and handling external links in the text. It uses regular expressions 12 * to identify URLs based on common schemes and patterns, and it can handle both standard URLs and Markdown-style 13 * angle-bracket autolinks. 14 */ 15class Externallink extends AbstractMode 16{ 17 protected $schemes = []; 18 protected $patterns = []; 19 20 /** @inheritdoc */ 21 public function getSort() 22 { 23 return 330; 24 } 25 26 /** @inheritdoc */ 27 public function preConnect() 28 { 29 if (count($this->patterns)) return; 30 31 $ltrs = '\w'; 32 $gunk = '/\#~:.?+=&%@!\-\[\]'; 33 $punc = '.:?\-;,'; 34 $host = $ltrs . $punc; 35 $any = $ltrs . $gunk . $punc; 36 37 $this->schemes = getSchemes(); 38 foreach ($this->schemes as $scheme) { 39 $this->patterns[] = '\b(?i)' . $scheme . '(?-i)://[' . $any . ']+?(?=[' . $punc . ']*[^' . $any . '])'; 40 } 41 42 $this->patterns[] = '(?<![/\\\\])\b(?i)www?(?-i)\.[' . $host . ']+?\.' . 43 '[' . $host . ']+?[' . $any . ']+?(?=[' . $punc . ']*[^' . $any . '])'; 44 $this->patterns[] = '(?<![/\\\\])\b(?i)ftp?(?-i)\.[' . $host . ']+?\.' . 45 '[' . $host . ']+?[' . $any . ']+?(?=[' . $punc . ']*[^' . $any . '])'; 46 47 // Markdown-only: angle-bracket autolinks per CommonMark §6.5. One per-scheme pattern that captures the whole 48 // envelope; handle() decides at match time whether to emit a link or literal cdata based on whether the content 49 // contains whitespace (which disqualifies the autolink). 50 // Angle brackets with white space are basically a simple way to write a URL without triggering autolinking 51 if (ModeRegistry::getInstance()->isMdPreferred()) { 52 foreach ($this->schemes as $scheme) { 53 $this->patterns[] = '<[ \t]*(?i)' . $scheme . '(?-i)://[^<>\n]*>'; 54 } 55 } 56 } 57 58 /** @inheritdoc */ 59 public function connectTo($mode) 60 { 61 62 foreach ($this->patterns as $pattern) { 63 $this->Lexer->addSpecialPattern($pattern, $mode, 'externallink'); 64 } 65 } 66 67 /** @inheritdoc */ 68 public function handle($match, $state, $pos, Handler $handler) 69 { 70 // Angle-bracket autolink (Markdown §6.5). 71 if (str_starts_with($match, '<') && str_ends_with($match, '>')) { 72 if (preg_match('/\s/', $match)) { 73 // Disqualified by internal whitespace — render literally 74 $handler->addCall('cdata', [$match], $pos); 75 return true; 76 } 77 $url = substr($match, 1, -1); 78 // Pass URL as both href and visible label so the rendered text shows the URL exactly as written 79 $handler->addCall('externallink', [$url, $url], $pos); 80 return true; 81 } 82 83 $url = $match; 84 $title = null; 85 86 // add protocol on simple short URLs 87 if (str_starts_with($url, 'ftp') && !str_starts_with($url, 'ftp://')) { 88 $title = $url; 89 $url = 'ftp://' . $url; 90 } 91 if (str_starts_with($url, 'www')) { 92 $title = $url; 93 $url = 'http://' . $url; 94 } 95 96 $handler->addCall('externallink', [$url, $title], $pos); 97 return true; 98 } 99 100 /** 101 * @return array 102 */ 103 public function getPatterns() 104 { 105 return $this->patterns; 106 } 107} 108