1<?php 2 3namespace dokuwiki\Parsing\ParserMode; 4 5use dokuwiki\Parsing\Handler; 6use dokuwiki\Parsing\Helpers\Link; 7use dokuwiki\Parsing\Helpers\Media as MediaHelper; 8 9/** 10 * GFM inline link [text](url) with optional title [text](url "title"). 11 * 12 * The link text may be either plain text (the common case) or an inline 13 * image `` — the Markdown equivalent of DW's 14 * `[[target|{{imgUrl}}]]`. The image-as-label form emits a single link 15 * handler call with a media descriptor array in the label slot, reusing 16 * the same flow that `Internallink` already drives. No new handler 17 * instructions; renderers (xhtml, odt, metadata, …) already know how to 18 * render a link whose label is a media descriptor. 19 * 20 * Mirrors DW's `Internallink` architecture: a permissive outer pattern 21 * plus handle-time parsing, rather than encoding every GFM rule at 22 * pattern level. 23 * 24 * Deliberately not supported (see skip.php for the affected spec examples): 25 * 26 * - Reference links [text][id] / [text][] / [foo] — the single-pass 27 * lexer cannot resolve forward references to [foo]: url definitions. 28 * - Pointy-bracket destinations [link](<foo bar>) — the simplified 29 * pattern will happily match, but handle() produces an internallink 30 * with a broken src; spec tests for this stay in skip.php. 31 * - Balanced-parens inside URLs [link](foo(bar)) — matches truncate 32 * at first `)`, producing odd output; also in skip.php. 33 * - Title HTML attribute — DokuWiki link handler instructions have no 34 * title-attribute slot, and plumbing one through every renderer just 35 * for this is out of scope. The title parses cleanly but is discarded. 36 * - Mixed text + image in the label ([prefix  suffix](url)) 37 * — matches DW's policy: Internallink only converts the label to a 38 * media descriptor when it matches `^{{…}}$` exactly. 39 */ 40class GfmLink extends AbstractMode 41{ 42 // Image sub-pattern reused for both the label alternative in the main 43 // pattern and the image-as-label detector in handle(). No capture 44 // groups here — the lexer wraps user patterns in a capture and 45 // additional captures would renumber unpredictably. 46 private const IMAGE_SUB = '!\[[^\[\]\n]*\]\([^)\n]+\)'; 47 48 /** @inheritdoc */ 49 public function getSort() 50 { 51 return 300; 52 } 53 54 /** @inheritdoc */ 55 public function connectTo($mode) 56 { 57 // Outer shape: `[text-or-image](url)`. Text class forbids brackets 58 // and newlines; the image alternative explicitly matches one 59 // inline image. URL slot is permissive (`[^)\n]+`) — handle() does 60 // URL / title splitting post-entry, mirroring how DW Internallink 61 // parses inside `[[...]]`. 62 $pattern = '\[(?!\[)(?:[^\[\]\n]+|' . self::IMAGE_SUB . ')\]\([^)\n]+\)'; 63 $this->Lexer->addSpecialPattern($pattern, $mode, 'gfm_link'); 64 } 65 66 /** @inheritdoc */ 67 public function handle($match, $state, $pos, Handler $handler) 68 { 69 // Detect image-as-label `[](target)`. Parallels 70 // Internallink's `^{{…}}$` check — when the label is exactly an 71 // inline image, parse it into a media descriptor; otherwise 72 // treat the label as plain text. 73 if (preg_match('/^\[(' . self::IMAGE_SUB . ')\]\(([^)\n]+)\)$/', $match, $m)) { 74 $label = $this->parseImageDescriptor($m[1]); 75 $targetUrl = $this->extractUrl($m[2]); 76 } else { 77 // Plain text label can't contain `]`, so the first `](` is 78 // the label/target separator. 79 $sep = strpos($match, ']('); 80 $label = substr($match, 1, $sep - 1); 81 $targetUrl = $this->extractUrl(substr($match, $sep + 2, -1)); 82 } 83 84 [$call, $args] = Link::classify($targetUrl, $label); 85 $handler->addCall($call, $args, $pos); 86 return true; 87 } 88 89 /** 90 * Extract the URL from a parenthesized payload: trim surrounding 91 * whitespace, then take the first whitespace-delimited token. Any 92 * trailing title is discarded (no renderer slot for it). 93 */ 94 private function extractUrl(string $inside): string 95 { 96 $inside = trim($inside); 97 return substr($inside, 0, strcspn($inside, " \t\n")); 98 } 99 100 /** 101 * Parse an inline image sub-match `` into the media 102 * descriptor shape Media::parseMedia() returns, so the link handler 103 * can treat it as a media label identically to `[[page|{{img}}]]`. 104 */ 105 private function parseImageDescriptor(string $imageMatch): array 106 { 107 $sep = strpos($imageMatch, ']('); 108 $alt = substr($imageMatch, 2, $sep - 2); 109 $imgUrl = $this->extractUrl(substr($imageMatch, $sep + 2, -1)); 110 111 $p = MediaHelper::parseParameters($imgUrl); 112 $type = (media_isexternal($p['src']) || link_isinterwiki($p['src'])) 113 ? 'externalmedia' 114 : 'internalmedia'; 115 116 return [ 117 'type' => $type, 118 'src' => $p['src'], 119 'title' => $alt !== '' ? $alt : null, 120 'align' => $p['align'], 121 'width' => $p['width'], 122 'height' => $p['height'], 123 'cache' => $p['cache'], 124 'linking' => $p['linking'], 125 ]; 126 } 127} 128