1<?php 2 3namespace dokuwiki\Parsing\ParserMode; 4 5use dokuwiki\Parsing\Handler; 6use dokuwiki\Parsing\Helpers\Escape; 7use dokuwiki\Parsing\Helpers\Link; 8use dokuwiki\Parsing\Helpers\Media as MediaHelper; 9 10/** 11 * GFM inline link [text](url) with optional title [text](url "title"). 12 * 13 * The link text may be either plain text (the common case) or an inline 14 * image `` — the Markdown equivalent of DW's 15 * `[[target|{{imgUrl}}]]`. The image-as-label form emits a single link 16 * handler call with a media descriptor array in the label slot, reusing 17 * the same flow that `Internallink` already drives. No new handler 18 * instructions; renderers (xhtml, odt, metadata, …) already know how to 19 * render a link whose label is a media descriptor. 20 * 21 * Mirrors DW's `Internallink` architecture: a permissive outer pattern 22 * plus handle-time parsing, rather than encoding every GFM rule at 23 * pattern level. 24 * 25 * Deliberately not supported (see skip.php for the affected spec examples): 26 * 27 * - Reference links [text][id] / [text][] / [foo] — the single-pass 28 * lexer cannot resolve forward references to [foo]: url definitions. 29 * - Pointy-bracket destinations [link](<foo bar>) — the simplified 30 * pattern will happily match, but handle() produces an internallink 31 * with a broken src; spec tests for this stay in skip.php. 32 * - Balanced-parens inside URLs [link](foo(bar)) — matches truncate 33 * at first `)`, producing odd output; also in skip.php. 34 * - Title HTML attribute — DokuWiki link handler instructions have no 35 * title-attribute slot, and plumbing one through every renderer just 36 * for this is out of scope. The title parses cleanly but is discarded. 37 * - Mixed text + image in the label ([prefix  suffix](url)) 38 * — matches DW's policy: Internallink only converts the label to a 39 * media descriptor when it matches `^{{…}}$` exactly. 40 */ 41class GfmLink extends AbstractMode 42{ 43 // Image sub-pattern reused for both the label alternative in the main 44 // pattern and the image-as-label detector in handle(). No capture 45 // groups here — the lexer wraps user patterns in a capture and 46 // additional captures would renumber unpredictably. 47 private const IMAGE_SUB = '!\[[^\[\]\n]*\]\([^)\n]+\)'; 48 49 /** @inheritdoc */ 50 public function getSort() 51 { 52 return 300; 53 } 54 55 /** @inheritdoc */ 56 public function connectTo($mode) 57 { 58 // Outer shape: `[text-or-image](url)`. Text class forbids brackets 59 // and newlines; the image alternative explicitly matches one 60 // inline image. URL slot is permissive (`[^)\n]+`) — handle() does 61 // URL / title splitting post-entry, mirroring how DW Internallink 62 // parses inside `[[...]]`. 63 $pattern = '\[(?!\[)(?:[^\[\]\n]+|' . self::IMAGE_SUB . ')\]\([^)\n]+\)'; 64 $this->Lexer->addSpecialPattern($pattern, $mode, 'gfm_link'); 65 } 66 67 /** @inheritdoc */ 68 public function handle($match, $state, $pos, Handler $handler) 69 { 70 // Detect image-as-label `[](target)`. Parallels 71 // Internallink's `^{{…}}$` check — when the label is exactly an 72 // inline image, parse it into a media descriptor; otherwise 73 // treat the label as plain text. 74 if (preg_match('/^\[(' . self::IMAGE_SUB . ')\]\(([^)\n]+)\)$/', $match, $m)) { 75 $label = $this->parseImageDescriptor($m[1]); 76 $targetUrl = $this->extractUrl($m[2]); 77 } else { 78 // Plain text label can't contain `]`, so the first `](` is 79 // the label/target separator. 80 $sep = strpos($match, ']('); 81 $label = Escape::unescapeBackslashes(substr($match, 1, $sep - 1)); 82 $targetUrl = $this->extractUrl(substr($match, $sep + 2, -1)); 83 } 84 85 // Classify on the raw URL so windowssharelink detection sees the 86 // literal `\\host\path` runs intact — GFM's `\\` → `\` collapse 87 // would otherwise destroy the share prefix. 88 [$call, $args] = Link::classify($targetUrl, $label); 89 if ($call !== 'windowssharelink') { 90 $args[0] = Escape::unescapeBackslashes($args[0]); 91 } 92 $handler->addCall($call, $args, $pos); 93 return true; 94 } 95 96 /** 97 * Extract the URL from a parenthesized payload: trim surrounding 98 * whitespace, then take the first whitespace-delimited token. Any 99 * trailing title is discarded (no renderer slot for it). 100 */ 101 private function extractUrl(string $inside): string 102 { 103 $inside = trim($inside); 104 return substr($inside, 0, strcspn($inside, " \t\n")); 105 } 106 107 /** 108 * Parse an inline image sub-match `` into the media 109 * descriptor shape Media::parseMedia() returns, so the link handler 110 * can treat it as a media label identically to `[[page|{{img}}]]`. 111 */ 112 private function parseImageDescriptor(string $imageMatch): array 113 { 114 $sep = strpos($imageMatch, ']('); 115 $alt = Escape::unescapeBackslashes(substr($imageMatch, 2, $sep - 2)); 116 $imgUrl = Escape::unescapeBackslashes($this->extractUrl(substr($imageMatch, $sep + 2, -1))); 117 118 $p = MediaHelper::parseParameters($imgUrl); 119 $type = (media_isexternal($p['src']) || link_isinterwiki($p['src'])) 120 ? 'externalmedia' 121 : 'internalmedia'; 122 123 return [ 124 'type' => $type, 125 'src' => $p['src'], 126 'title' => $alt !== '' ? $alt : null, 127 'align' => $p['align'], 128 'width' => $p['width'], 129 'height' => $p['height'], 130 'cache' => $p['cache'], 131 'linking' => $p['linking'], 132 ]; 133 } 134} 135