1e89aeebdSAndreas Gohr<?php 2e89aeebdSAndreas Gohr 3e89aeebdSAndreas Gohrnamespace dokuwiki\Parsing\ParserMode; 4e89aeebdSAndreas Gohr 5e89aeebdSAndreas Gohruse dokuwiki\Parsing\Handler; 6*1e28e406SAndreas Gohruse dokuwiki\Parsing\Helpers\Link; 7*1e28e406SAndreas Gohruse dokuwiki\Parsing\Helpers\Media as MediaHelper; 8e89aeebdSAndreas Gohr 9e89aeebdSAndreas Gohr/** 10e89aeebdSAndreas Gohr * GFM inline link [text](url) with optional title [text](url "title"). 11e89aeebdSAndreas Gohr * 123440a8c0SAndreas Gohr * The link text may be either plain text (the common case) or an inline 133440a8c0SAndreas Gohr * image `` — the Markdown equivalent of DW's 143440a8c0SAndreas Gohr * `[[target|{{imgUrl}}]]`. The image-as-label form emits a single link 153440a8c0SAndreas Gohr * handler call with a media descriptor array in the label slot, reusing 163440a8c0SAndreas Gohr * the same flow that `Internallink` already drives. No new handler 173440a8c0SAndreas Gohr * instructions; renderers (xhtml, odt, metadata, …) already know how to 183440a8c0SAndreas Gohr * render a link whose label is a media descriptor. 193440a8c0SAndreas Gohr * 203440a8c0SAndreas Gohr * Mirrors DW's `Internallink` architecture: a permissive outer pattern 213440a8c0SAndreas Gohr * plus handle-time parsing, rather than encoding every GFM rule at 223440a8c0SAndreas Gohr * pattern level. 233440a8c0SAndreas Gohr * 24e89aeebdSAndreas Gohr * Deliberately not supported (see skip.php for the affected spec examples): 25e89aeebdSAndreas Gohr * 26e89aeebdSAndreas Gohr * - Reference links [text][id] / [text][] / [foo] — the single-pass 27e89aeebdSAndreas Gohr * lexer cannot resolve forward references to [foo]: url definitions. 283440a8c0SAndreas Gohr * - Pointy-bracket destinations [link](<foo bar>) — the simplified 293440a8c0SAndreas Gohr * pattern will happily match, but handle() produces an internallink 303440a8c0SAndreas Gohr * with a broken src; spec tests for this stay in skip.php. 313440a8c0SAndreas Gohr * - Balanced-parens inside URLs [link](foo(bar)) — matches truncate 323440a8c0SAndreas Gohr * at first `)`, producing odd output; also in skip.php. 33e89aeebdSAndreas Gohr * - Title HTML attribute — DokuWiki link handler instructions have no 34e89aeebdSAndreas Gohr * title-attribute slot, and plumbing one through every renderer just 35e89aeebdSAndreas Gohr * for this is out of scope. The title parses cleanly but is discarded. 363440a8c0SAndreas Gohr * - Mixed text + image in the label ([prefix  suffix](url)) 373440a8c0SAndreas Gohr * — matches DW's policy: Internallink only converts the label to a 383440a8c0SAndreas Gohr * media descriptor when it matches `^{{…}}$` exactly. 39e89aeebdSAndreas Gohr */ 40e89aeebdSAndreas Gohrclass GfmLink extends AbstractMode 41e89aeebdSAndreas Gohr{ 423440a8c0SAndreas Gohr // Image sub-pattern reused for both the label alternative in the main 433440a8c0SAndreas Gohr // pattern and the image-as-label detector in handle(). No capture 443440a8c0SAndreas Gohr // groups here — the lexer wraps user patterns in a capture and 453440a8c0SAndreas Gohr // additional captures would renumber unpredictably. 463440a8c0SAndreas Gohr private const IMAGE_SUB = '!\[[^\[\]\n]*\]\([^)\n]+\)'; 47e89aeebdSAndreas Gohr 48e89aeebdSAndreas Gohr /** @inheritdoc */ 49e89aeebdSAndreas Gohr public function getSort() 50e89aeebdSAndreas Gohr { 51e89aeebdSAndreas Gohr return 300; 52e89aeebdSAndreas Gohr } 53e89aeebdSAndreas Gohr 54e89aeebdSAndreas Gohr /** @inheritdoc */ 55e89aeebdSAndreas Gohr public function connectTo($mode) 56e89aeebdSAndreas Gohr { 573440a8c0SAndreas Gohr // Outer shape: `[text-or-image](url)`. Text class forbids brackets 583440a8c0SAndreas Gohr // and newlines; the image alternative explicitly matches one 593440a8c0SAndreas Gohr // inline image. URL slot is permissive (`[^)\n]+`) — handle() does 603440a8c0SAndreas Gohr // URL / title splitting post-entry, mirroring how DW Internallink 613440a8c0SAndreas Gohr // parses inside `[[...]]`. 623440a8c0SAndreas Gohr $pattern = '\[(?!\[)(?:[^\[\]\n]+|' . self::IMAGE_SUB . ')\]\([^)\n]+\)'; 63e89aeebdSAndreas Gohr $this->Lexer->addSpecialPattern($pattern, $mode, 'gfm_link'); 64e89aeebdSAndreas Gohr } 65e89aeebdSAndreas Gohr 66e89aeebdSAndreas Gohr /** @inheritdoc */ 67e89aeebdSAndreas Gohr public function handle($match, $state, $pos, Handler $handler) 68e89aeebdSAndreas Gohr { 693440a8c0SAndreas Gohr // Detect image-as-label `[](target)`. Parallels 703440a8c0SAndreas Gohr // Internallink's `^{{…}}$` check — when the label is exactly an 713440a8c0SAndreas Gohr // inline image, parse it into a media descriptor; otherwise 723440a8c0SAndreas Gohr // treat the label as plain text. 733440a8c0SAndreas Gohr if (preg_match('/^\[(' . self::IMAGE_SUB . ')\]\(([^)\n]+)\)$/', $match, $m)) { 743440a8c0SAndreas Gohr $label = $this->parseImageDescriptor($m[1]); 753440a8c0SAndreas Gohr $targetUrl = $this->extractUrl($m[2]); 763440a8c0SAndreas Gohr } else { 773440a8c0SAndreas Gohr // Plain text label can't contain `]`, so the first `](` is 783440a8c0SAndreas Gohr // the label/target separator. 79e89aeebdSAndreas Gohr $sep = strpos($match, ']('); 803440a8c0SAndreas Gohr $label = substr($match, 1, $sep - 1); 813440a8c0SAndreas Gohr $targetUrl = $this->extractUrl(substr($match, $sep + 2, -1)); 823440a8c0SAndreas Gohr } 83e89aeebdSAndreas Gohr 84*1e28e406SAndreas Gohr [$call, $args] = Link::classify($targetUrl, $label); 853440a8c0SAndreas Gohr $handler->addCall($call, $args, $pos); 86e89aeebdSAndreas Gohr return true; 87e89aeebdSAndreas Gohr } 883440a8c0SAndreas Gohr 893440a8c0SAndreas Gohr /** 903440a8c0SAndreas Gohr * Extract the URL from a parenthesized payload: trim surrounding 913440a8c0SAndreas Gohr * whitespace, then take the first whitespace-delimited token. Any 923440a8c0SAndreas Gohr * trailing title is discarded (no renderer slot for it). 933440a8c0SAndreas Gohr */ 943440a8c0SAndreas Gohr private function extractUrl(string $inside): string 953440a8c0SAndreas Gohr { 963440a8c0SAndreas Gohr $inside = trim($inside); 973440a8c0SAndreas Gohr return substr($inside, 0, strcspn($inside, " \t\n")); 983440a8c0SAndreas Gohr } 993440a8c0SAndreas Gohr 1003440a8c0SAndreas Gohr /** 1013440a8c0SAndreas Gohr * Parse an inline image sub-match `` into the media 1023440a8c0SAndreas Gohr * descriptor shape Media::parseMedia() returns, so the link handler 1033440a8c0SAndreas Gohr * can treat it as a media label identically to `[[page|{{img}}]]`. 1043440a8c0SAndreas Gohr */ 1053440a8c0SAndreas Gohr private function parseImageDescriptor(string $imageMatch): array 1063440a8c0SAndreas Gohr { 1073440a8c0SAndreas Gohr $sep = strpos($imageMatch, ']('); 1083440a8c0SAndreas Gohr $alt = substr($imageMatch, 2, $sep - 2); 1093440a8c0SAndreas Gohr $imgUrl = $this->extractUrl(substr($imageMatch, $sep + 2, -1)); 1103440a8c0SAndreas Gohr 111*1e28e406SAndreas Gohr $p = MediaHelper::parseParameters($imgUrl); 1123440a8c0SAndreas Gohr $type = (media_isexternal($p['src']) || link_isinterwiki($p['src'])) 1133440a8c0SAndreas Gohr ? 'externalmedia' 1143440a8c0SAndreas Gohr : 'internalmedia'; 1153440a8c0SAndreas Gohr 1163440a8c0SAndreas Gohr return [ 1173440a8c0SAndreas Gohr 'type' => $type, 1183440a8c0SAndreas Gohr 'src' => $p['src'], 1193440a8c0SAndreas Gohr 'title' => $alt !== '' ? $alt : null, 1203440a8c0SAndreas Gohr 'align' => $p['align'], 1213440a8c0SAndreas Gohr 'width' => $p['width'], 1223440a8c0SAndreas Gohr 'height' => $p['height'], 1233440a8c0SAndreas Gohr 'cache' => $p['cache'], 1243440a8c0SAndreas Gohr 'linking' => $p['linking'], 1253440a8c0SAndreas Gohr ]; 1263440a8c0SAndreas Gohr } 127e89aeebdSAndreas Gohr} 128