1<?php 2 3namespace dokuwiki\Parsing\ParserMode; 4 5use dokuwiki\Parsing\Handler; 6use dokuwiki\Parsing\Helpers; 7 8/** 9 * GFM inline link [text](url) with optional title [text](url "title"). 10 * 11 * The link text may be either plain text (the common case) or an inline 12 * image `` — the Markdown equivalent of DW's 13 * `[[target|{{imgUrl}}]]`. The image-as-label form emits a single link 14 * handler call with a media descriptor array in the label slot, reusing 15 * the same flow that `Internallink` already drives. No new handler 16 * instructions; renderers (xhtml, odt, metadata, …) already know how to 17 * render a link whose label is a media descriptor. 18 * 19 * Mirrors DW's `Internallink` architecture: a permissive outer pattern 20 * plus handle-time parsing, rather than encoding every GFM rule at 21 * pattern level. 22 * 23 * Deliberately not supported (see skip.php for the affected spec examples): 24 * 25 * - Reference links [text][id] / [text][] / [foo] — the single-pass 26 * lexer cannot resolve forward references to [foo]: url definitions. 27 * - Pointy-bracket destinations [link](<foo bar>) — the simplified 28 * pattern will happily match, but handle() produces an internallink 29 * with a broken src; spec tests for this stay in skip.php. 30 * - Balanced-parens inside URLs [link](foo(bar)) — matches truncate 31 * at first `)`, producing odd output; also in skip.php. 32 * - Title HTML attribute — DokuWiki link handler instructions have no 33 * title-attribute slot, and plumbing one through every renderer just 34 * for this is out of scope. The title parses cleanly but is discarded. 35 * - Mixed text + image in the label ([prefix  suffix](url)) 36 * — matches DW's policy: Internallink only converts the label to a 37 * media descriptor when it matches `^{{…}}$` exactly. 38 */ 39class GfmLink extends AbstractMode 40{ 41 // Image sub-pattern reused for both the label alternative in the main 42 // pattern and the image-as-label detector in handle(). No capture 43 // groups here — the lexer wraps user patterns in a capture and 44 // additional captures would renumber unpredictably. 45 private const IMAGE_SUB = '!\[[^\[\]\n]*\]\([^)\n]+\)'; 46 47 /** @inheritdoc */ 48 public function getSort() 49 { 50 return 300; 51 } 52 53 /** @inheritdoc */ 54 public function connectTo($mode) 55 { 56 // Outer shape: `[text-or-image](url)`. Text class forbids brackets 57 // and newlines; the image alternative explicitly matches one 58 // inline image. URL slot is permissive (`[^)\n]+`) — handle() does 59 // URL / title splitting post-entry, mirroring how DW Internallink 60 // parses inside `[[...]]`. 61 $pattern = '\[(?!\[)(?:[^\[\]\n]+|' . self::IMAGE_SUB . ')\]\([^)\n]+\)'; 62 $this->Lexer->addSpecialPattern($pattern, $mode, 'gfm_link'); 63 } 64 65 /** @inheritdoc */ 66 public function handle($match, $state, $pos, Handler $handler) 67 { 68 // Detect image-as-label `[](target)`. Parallels 69 // Internallink's `^{{…}}$` check — when the label is exactly an 70 // inline image, parse it into a media descriptor; otherwise 71 // treat the label as plain text. 72 if (preg_match('/^\[(' . self::IMAGE_SUB . ')\]\(([^)\n]+)\)$/', $match, $m)) { 73 $label = $this->parseImageDescriptor($m[1]); 74 $targetUrl = $this->extractUrl($m[2]); 75 } else { 76 // Plain text label can't contain `]`, so the first `](` is 77 // the label/target separator. 78 $sep = strpos($match, ']('); 79 $label = substr($match, 1, $sep - 1); 80 $targetUrl = $this->extractUrl(substr($match, $sep + 2, -1)); 81 } 82 83 [$call, $args] = Helpers::classifyLink($targetUrl, $label); 84 $handler->addCall($call, $args, $pos); 85 return true; 86 } 87 88 /** 89 * Extract the URL from a parenthesized payload: trim surrounding 90 * whitespace, then take the first whitespace-delimited token. Any 91 * trailing title is discarded (no renderer slot for it). 92 */ 93 private function extractUrl(string $inside): string 94 { 95 $inside = trim($inside); 96 return substr($inside, 0, strcspn($inside, " \t\n")); 97 } 98 99 /** 100 * Parse an inline image sub-match `` into the media 101 * descriptor shape Media::parseMedia() returns, so the link handler 102 * can treat it as a media label identically to `[[page|{{img}}]]`. 103 */ 104 private function parseImageDescriptor(string $imageMatch): array 105 { 106 $sep = strpos($imageMatch, ']('); 107 $alt = substr($imageMatch, 2, $sep - 2); 108 $imgUrl = $this->extractUrl(substr($imageMatch, $sep + 2, -1)); 109 110 $p = Helpers::parseMediaParameters($imgUrl); 111 $type = (media_isexternal($p['src']) || link_isinterwiki($p['src'])) 112 ? 'externalmedia' 113 : 'internalmedia'; 114 115 return [ 116 'type' => $type, 117 'src' => $p['src'], 118 'title' => $alt !== '' ? $alt : null, 119 'align' => $p['align'], 120 'width' => $p['width'], 121 'height' => $p['height'], 122 'cache' => $p['cache'], 123 'linking' => $p['linking'], 124 ]; 125 } 126} 127