1e89aeebdSAndreas Gohr<?php 2e89aeebdSAndreas Gohr 3e89aeebdSAndreas Gohrnamespace dokuwiki\Parsing\ParserMode; 4e89aeebdSAndreas Gohr 5e89aeebdSAndreas Gohruse dokuwiki\Parsing\Handler; 6*3440a8c0SAndreas Gohruse dokuwiki\Parsing\Helpers; 7e89aeebdSAndreas Gohr 8e89aeebdSAndreas Gohr/** 9e89aeebdSAndreas Gohr * GFM inline link [text](url) with optional title [text](url "title"). 10e89aeebdSAndreas Gohr * 11*3440a8c0SAndreas Gohr * The link text may be either plain text (the common case) or an inline 12*3440a8c0SAndreas Gohr * image `` — the Markdown equivalent of DW's 13*3440a8c0SAndreas Gohr * `[[target|{{imgUrl}}]]`. The image-as-label form emits a single link 14*3440a8c0SAndreas Gohr * handler call with a media descriptor array in the label slot, reusing 15*3440a8c0SAndreas Gohr * the same flow that `Internallink` already drives. No new handler 16*3440a8c0SAndreas Gohr * instructions; renderers (xhtml, odt, metadata, …) already know how to 17*3440a8c0SAndreas Gohr * render a link whose label is a media descriptor. 18*3440a8c0SAndreas Gohr * 19*3440a8c0SAndreas Gohr * Mirrors DW's `Internallink` architecture: a permissive outer pattern 20*3440a8c0SAndreas Gohr * plus handle-time parsing, rather than encoding every GFM rule at 21*3440a8c0SAndreas Gohr * pattern level. 22*3440a8c0SAndreas Gohr * 23e89aeebdSAndreas Gohr * Deliberately not supported (see skip.php for the affected spec examples): 24e89aeebdSAndreas Gohr * 25e89aeebdSAndreas Gohr * - Reference links [text][id] / [text][] / [foo] — the single-pass 26e89aeebdSAndreas Gohr * lexer cannot resolve forward references to [foo]: url definitions. 27*3440a8c0SAndreas Gohr * - Pointy-bracket destinations [link](<foo bar>) — the simplified 28*3440a8c0SAndreas Gohr * pattern will happily match, but handle() produces an internallink 29*3440a8c0SAndreas Gohr * with a broken src; spec tests for this stay in skip.php. 30*3440a8c0SAndreas Gohr * - Balanced-parens inside URLs [link](foo(bar)) — matches truncate 31*3440a8c0SAndreas Gohr * at first `)`, producing odd output; also in skip.php. 32e89aeebdSAndreas Gohr * - Title HTML attribute — DokuWiki link handler instructions have no 33e89aeebdSAndreas Gohr * title-attribute slot, and plumbing one through every renderer just 34e89aeebdSAndreas Gohr * for this is out of scope. The title parses cleanly but is discarded. 35*3440a8c0SAndreas Gohr * - Mixed text + image in the label ([prefix  suffix](url)) 36*3440a8c0SAndreas Gohr * — matches DW's policy: Internallink only converts the label to a 37*3440a8c0SAndreas Gohr * media descriptor when it matches `^{{…}}$` exactly. 38e89aeebdSAndreas Gohr */ 39e89aeebdSAndreas Gohrclass GfmLink extends AbstractMode 40e89aeebdSAndreas Gohr{ 41*3440a8c0SAndreas Gohr // Image sub-pattern reused for both the label alternative in the main 42*3440a8c0SAndreas Gohr // pattern and the image-as-label detector in handle(). No capture 43*3440a8c0SAndreas Gohr // groups here — the lexer wraps user patterns in a capture and 44*3440a8c0SAndreas Gohr // additional captures would renumber unpredictably. 45*3440a8c0SAndreas Gohr private const IMAGE_SUB = '!\[[^\[\]\n]*\]\([^)\n]+\)'; 46e89aeebdSAndreas Gohr 47e89aeebdSAndreas Gohr /** @inheritdoc */ 48e89aeebdSAndreas Gohr public function getSort() 49e89aeebdSAndreas Gohr { 50e89aeebdSAndreas Gohr return 300; 51e89aeebdSAndreas Gohr } 52e89aeebdSAndreas Gohr 53e89aeebdSAndreas Gohr /** @inheritdoc */ 54e89aeebdSAndreas Gohr public function connectTo($mode) 55e89aeebdSAndreas Gohr { 56*3440a8c0SAndreas Gohr // Outer shape: `[text-or-image](url)`. Text class forbids brackets 57*3440a8c0SAndreas Gohr // and newlines; the image alternative explicitly matches one 58*3440a8c0SAndreas Gohr // inline image. URL slot is permissive (`[^)\n]+`) — handle() does 59*3440a8c0SAndreas Gohr // URL / title splitting post-entry, mirroring how DW Internallink 60*3440a8c0SAndreas Gohr // parses inside `[[...]]`. 61*3440a8c0SAndreas Gohr $pattern = '\[(?!\[)(?:[^\[\]\n]+|' . self::IMAGE_SUB . ')\]\([^)\n]+\)'; 62e89aeebdSAndreas Gohr $this->Lexer->addSpecialPattern($pattern, $mode, 'gfm_link'); 63e89aeebdSAndreas Gohr } 64e89aeebdSAndreas Gohr 65e89aeebdSAndreas Gohr /** @inheritdoc */ 66e89aeebdSAndreas Gohr public function handle($match, $state, $pos, Handler $handler) 67e89aeebdSAndreas Gohr { 68*3440a8c0SAndreas Gohr // Detect image-as-label `[](target)`. Parallels 69*3440a8c0SAndreas Gohr // Internallink's `^{{…}}$` check — when the label is exactly an 70*3440a8c0SAndreas Gohr // inline image, parse it into a media descriptor; otherwise 71*3440a8c0SAndreas Gohr // treat the label as plain text. 72*3440a8c0SAndreas Gohr if (preg_match('/^\[(' . self::IMAGE_SUB . ')\]\(([^)\n]+)\)$/', $match, $m)) { 73*3440a8c0SAndreas Gohr $label = $this->parseImageDescriptor($m[1]); 74*3440a8c0SAndreas Gohr $targetUrl = $this->extractUrl($m[2]); 75*3440a8c0SAndreas Gohr } else { 76*3440a8c0SAndreas Gohr // Plain text label can't contain `]`, so the first `](` is 77*3440a8c0SAndreas Gohr // the label/target separator. 78e89aeebdSAndreas Gohr $sep = strpos($match, ']('); 79*3440a8c0SAndreas Gohr $label = substr($match, 1, $sep - 1); 80*3440a8c0SAndreas Gohr $targetUrl = $this->extractUrl(substr($match, $sep + 2, -1)); 81*3440a8c0SAndreas Gohr } 82e89aeebdSAndreas Gohr 83*3440a8c0SAndreas Gohr [$call, $args] = Helpers::classifyLink($targetUrl, $label); 84*3440a8c0SAndreas Gohr $handler->addCall($call, $args, $pos); 85e89aeebdSAndreas Gohr return true; 86e89aeebdSAndreas Gohr } 87*3440a8c0SAndreas Gohr 88*3440a8c0SAndreas Gohr /** 89*3440a8c0SAndreas Gohr * Extract the URL from a parenthesized payload: trim surrounding 90*3440a8c0SAndreas Gohr * whitespace, then take the first whitespace-delimited token. Any 91*3440a8c0SAndreas Gohr * trailing title is discarded (no renderer slot for it). 92*3440a8c0SAndreas Gohr */ 93*3440a8c0SAndreas Gohr private function extractUrl(string $inside): string 94*3440a8c0SAndreas Gohr { 95*3440a8c0SAndreas Gohr $inside = trim($inside); 96*3440a8c0SAndreas Gohr return substr($inside, 0, strcspn($inside, " \t\n")); 97*3440a8c0SAndreas Gohr } 98*3440a8c0SAndreas Gohr 99*3440a8c0SAndreas Gohr /** 100*3440a8c0SAndreas Gohr * Parse an inline image sub-match `` into the media 101*3440a8c0SAndreas Gohr * descriptor shape Media::parseMedia() returns, so the link handler 102*3440a8c0SAndreas Gohr * can treat it as a media label identically to `[[page|{{img}}]]`. 103*3440a8c0SAndreas Gohr */ 104*3440a8c0SAndreas Gohr private function parseImageDescriptor(string $imageMatch): array 105*3440a8c0SAndreas Gohr { 106*3440a8c0SAndreas Gohr $sep = strpos($imageMatch, ']('); 107*3440a8c0SAndreas Gohr $alt = substr($imageMatch, 2, $sep - 2); 108*3440a8c0SAndreas Gohr $imgUrl = $this->extractUrl(substr($imageMatch, $sep + 2, -1)); 109*3440a8c0SAndreas Gohr 110*3440a8c0SAndreas Gohr $p = Helpers::parseMediaParameters($imgUrl); 111*3440a8c0SAndreas Gohr $type = (media_isexternal($p['src']) || link_isinterwiki($p['src'])) 112*3440a8c0SAndreas Gohr ? 'externalmedia' 113*3440a8c0SAndreas Gohr : 'internalmedia'; 114*3440a8c0SAndreas Gohr 115*3440a8c0SAndreas Gohr return [ 116*3440a8c0SAndreas Gohr 'type' => $type, 117*3440a8c0SAndreas Gohr 'src' => $p['src'], 118*3440a8c0SAndreas Gohr 'title' => $alt !== '' ? $alt : null, 119*3440a8c0SAndreas Gohr 'align' => $p['align'], 120*3440a8c0SAndreas Gohr 'width' => $p['width'], 121*3440a8c0SAndreas Gohr 'height' => $p['height'], 122*3440a8c0SAndreas Gohr 'cache' => $p['cache'], 123*3440a8c0SAndreas Gohr 'linking' => $p['linking'], 124*3440a8c0SAndreas Gohr ]; 125*3440a8c0SAndreas Gohr } 126e89aeebdSAndreas Gohr} 127