xref: /dokuwiki/inc/Parsing/ParserMode/GfmLink.php (revision 1e28e406b358f79221c515b2a56520d5dbbfb6c8)
1<?php
2
3namespace dokuwiki\Parsing\ParserMode;
4
5use dokuwiki\Parsing\Handler;
6use dokuwiki\Parsing\Helpers\Link;
7use dokuwiki\Parsing\Helpers\Media as MediaHelper;
8
9/**
10 * GFM inline link [text](url) with optional title [text](url "title").
11 *
12 * The link text may be either plain text (the common case) or an inline
13 * image `![alt](imgUrl)` — the Markdown equivalent of DW's
14 * `[[target|{{imgUrl}}]]`. The image-as-label form emits a single link
15 * handler call with a media descriptor array in the label slot, reusing
16 * the same flow that `Internallink` already drives. No new handler
17 * instructions; renderers (xhtml, odt, metadata, …) already know how to
18 * render a link whose label is a media descriptor.
19 *
20 * Mirrors DW's `Internallink` architecture: a permissive outer pattern
21 * plus handle-time parsing, rather than encoding every GFM rule at
22 * pattern level.
23 *
24 * Deliberately not supported (see skip.php for the affected spec examples):
25 *
26 *   - Reference links [text][id] / [text][] / [foo] — the single-pass
27 *     lexer cannot resolve forward references to [foo]: url definitions.
28 *   - Pointy-bracket destinations [link](<foo bar>) — the simplified
29 *     pattern will happily match, but handle() produces an internallink
30 *     with a broken src; spec tests for this stay in skip.php.
31 *   - Balanced-parens inside URLs [link](foo(bar)) — matches truncate
32 *     at first `)`, producing odd output; also in skip.php.
33 *   - Title HTML attribute — DokuWiki link handler instructions have no
34 *     title-attribute slot, and plumbing one through every renderer just
35 *     for this is out of scope. The title parses cleanly but is discarded.
36 *   - Mixed text + image in the label ([prefix ![alt](img) suffix](url))
37 *     — matches DW's policy: Internallink only converts the label to a
38 *     media descriptor when it matches `^{{…}}$` exactly.
39 */
40class GfmLink extends AbstractMode
41{
42    // Image sub-pattern reused for both the label alternative in the main
43    // pattern and the image-as-label detector in handle(). No capture
44    // groups here — the lexer wraps user patterns in a capture and
45    // additional captures would renumber unpredictably.
46    private const IMAGE_SUB = '!\[[^\[\]\n]*\]\([^)\n]+\)';
47
48    /** @inheritdoc */
49    public function getSort()
50    {
51        return 300;
52    }
53
54    /** @inheritdoc */
55    public function connectTo($mode)
56    {
57        // Outer shape: `[text-or-image](url)`. Text class forbids brackets
58        // and newlines; the image alternative explicitly matches one
59        // inline image. URL slot is permissive (`[^)\n]+`) — handle() does
60        // URL / title splitting post-entry, mirroring how DW Internallink
61        // parses inside `[[...]]`.
62        $pattern = '\[(?!\[)(?:[^\[\]\n]+|' . self::IMAGE_SUB . ')\]\([^)\n]+\)';
63        $this->Lexer->addSpecialPattern($pattern, $mode, 'gfm_link');
64    }
65
66    /** @inheritdoc */
67    public function handle($match, $state, $pos, Handler $handler)
68    {
69        // Detect image-as-label `[![alt](img)](target)`. Parallels
70        // Internallink's `^{{…}}$` check — when the label is exactly an
71        // inline image, parse it into a media descriptor; otherwise
72        // treat the label as plain text.
73        if (preg_match('/^\[(' . self::IMAGE_SUB . ')\]\(([^)\n]+)\)$/', $match, $m)) {
74            $label     = $this->parseImageDescriptor($m[1]);
75            $targetUrl = $this->extractUrl($m[2]);
76        } else {
77            // Plain text label can't contain `]`, so the first `](` is
78            // the label/target separator.
79            $sep       = strpos($match, '](');
80            $label     = substr($match, 1, $sep - 1);
81            $targetUrl = $this->extractUrl(substr($match, $sep + 2, -1));
82        }
83
84        [$call, $args] = Link::classify($targetUrl, $label);
85        $handler->addCall($call, $args, $pos);
86        return true;
87    }
88
89    /**
90     * Extract the URL from a parenthesized payload: trim surrounding
91     * whitespace, then take the first whitespace-delimited token. Any
92     * trailing title is discarded (no renderer slot for it).
93     */
94    private function extractUrl(string $inside): string
95    {
96        $inside = trim($inside);
97        return substr($inside, 0, strcspn($inside, " \t\n"));
98    }
99
100    /**
101     * Parse an inline image sub-match `![alt](imgUrl)` into the media
102     * descriptor shape Media::parseMedia() returns, so the link handler
103     * can treat it as a media label identically to `[[page|{{img}}]]`.
104     */
105    private function parseImageDescriptor(string $imageMatch): array
106    {
107        $sep    = strpos($imageMatch, '](');
108        $alt    = substr($imageMatch, 2, $sep - 2);
109        $imgUrl = $this->extractUrl(substr($imageMatch, $sep + 2, -1));
110
111        $p = MediaHelper::parseParameters($imgUrl);
112        $type = (media_isexternal($p['src']) || link_isinterwiki($p['src']))
113            ? 'externalmedia'
114            : 'internalmedia';
115
116        return [
117            'type'    => $type,
118            'src'     => $p['src'],
119            'title'   => $alt !== '' ? $alt : null,
120            'align'   => $p['align'],
121            'width'   => $p['width'],
122            'height'  => $p['height'],
123            'cache'   => $p['cache'],
124            'linking' => $p['linking'],
125        ];
126    }
127}
128