1<?php
2
3declare(strict_types=1);
4
5/*
6 * This file is part of the league/commonmark package.
7 *
8 * (c) Colin O'Dell <colinodell@gmail.com>
9 *
10 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
11 *  - (c) John MacFarlane
12 *
13 * For the full copyright and license information, please view the LICENSE
14 * file that was distributed with this source code.
15 */
16
17namespace League\CommonMark\Reference;
18
19use League\CommonMark\Parser\Cursor;
20use League\CommonMark\Util\LinkParserHelper;
21
22final class ReferenceParser
23{
24    // Looking for the start of a definition, i.e. `[`
25    private const START_DEFINITION = 0;
26    // Looking for and parsing the label, i.e. `[foo]` within `[foo]`
27    private const LABEL = 1;
28    // Parsing the destination, i.e. `/url` in `[foo]: /url`
29    private const DESTINATION = 2;
30    // Looking for the start of a title, i.e. the first `"` in `[foo]: /url "title"`
31    private const START_TITLE = 3;
32    // Parsing the content of the title, i.e. `title` in `[foo]: /url "title"`
33    private const TITLE = 4;
34    // End state, no matter what kind of lines we add, they won't be references
35    private const PARAGRAPH = 5;
36
37    /** @psalm-readonly-allow-private-mutation */
38    private string $paragraph = '';
39
40    /**
41     * @var array<int, ReferenceInterface>
42     *
43     * @psalm-readonly-allow-private-mutation
44     */
45    private array $references = [];
46
47    /** @psalm-readonly-allow-private-mutation */
48    private int $state = self::START_DEFINITION;
49
50    /** @psalm-readonly-allow-private-mutation */
51    private ?string $label = null;
52
53    /** @psalm-readonly-allow-private-mutation */
54    private ?string $destination = null;
55
56    /**
57     * @var string string
58     *
59     * @psalm-readonly-allow-private-mutation
60     */
61    private string $title = '';
62
63    /** @psalm-readonly-allow-private-mutation */
64    private ?string $titleDelimiter = null;
65
66    /** @psalm-readonly-allow-private-mutation */
67    private bool $referenceValid = false;
68
69    public function getParagraphContent(): string
70    {
71        return $this->paragraph;
72    }
73
74    /**
75     * @return ReferenceInterface[]
76     */
77    public function getReferences(): iterable
78    {
79        $this->finishReference();
80
81        return $this->references;
82    }
83
84    public function hasReferences(): bool
85    {
86        return $this->references !== [];
87    }
88
89    public function parse(string $line): void
90    {
91        if ($this->paragraph !== '') {
92            $this->paragraph .= "\n";
93        }
94
95        $this->paragraph .= $line;
96
97        $cursor = new Cursor($line);
98        while (! $cursor->isAtEnd()) {
99            $result = false;
100            switch ($this->state) {
101                case self::PARAGRAPH:
102                    // We're in a paragraph now. Link reference definitions can only appear at the beginning, so once
103                    // we're in a paragraph, there's no going back.
104                    return;
105                case self::START_DEFINITION:
106                    $result = $this->parseStartDefinition($cursor);
107                    break;
108                case self::LABEL:
109                    $result = $this->parseLabel($cursor);
110                    break;
111                case self::DESTINATION:
112                    $result = $this->parseDestination($cursor);
113                    break;
114                case self::START_TITLE:
115                    $result = $this->parseStartTitle($cursor);
116                    break;
117                case self::TITLE:
118                    $result = $this->parseTitle($cursor);
119                    break;
120                default:
121                    // this should never happen
122                    break;
123            }
124
125            if (! $result) {
126                $this->state = self::PARAGRAPH;
127
128                return;
129            }
130        }
131    }
132
133    private function parseStartDefinition(Cursor $cursor): bool
134    {
135        $cursor->advanceToNextNonSpaceOrTab();
136        if ($cursor->isAtEnd() || $cursor->getCurrentCharacter() !== '[') {
137            return false;
138        }
139
140        $this->state = self::LABEL;
141        $this->label = '';
142
143        $cursor->advance();
144        if ($cursor->isAtEnd()) {
145            $this->label .= "\n";
146        }
147
148        return true;
149    }
150
151    private function parseLabel(Cursor $cursor): bool
152    {
153        $cursor->advanceToNextNonSpaceOrTab();
154
155        $partialLabel = LinkParserHelper::parsePartialLinkLabel($cursor);
156        if ($partialLabel === null) {
157            return false;
158        }
159
160        \assert($this->label !== null);
161        $this->label .= $partialLabel;
162
163        if ($cursor->isAtEnd()) {
164            // label might continue on next line
165            $this->label .= "\n";
166
167            return true;
168        }
169
170        if ($cursor->getCurrentCharacter() !== ']') {
171            return false;
172        }
173
174        $cursor->advance();
175
176        // end of label
177        if ($cursor->getCurrentCharacter() !== ':') {
178            return false;
179        }
180
181        $cursor->advance();
182
183        // spec: A link label can have at most 999 characters inside the square brackets
184        if (\mb_strlen($this->label, 'UTF-8') > 999) {
185            return false;
186        }
187
188        // spec: A link label must contain at least one non-whitespace character
189        if (\trim($this->label) === '') {
190            return false;
191        }
192
193        $cursor->advanceToNextNonSpaceOrTab();
194
195        $this->state = self::DESTINATION;
196
197        return true;
198    }
199
200    private function parseDestination(Cursor $cursor): bool
201    {
202        $cursor->advanceToNextNonSpaceOrTab();
203
204        $destination = LinkParserHelper::parseLinkDestination($cursor);
205        if ($destination === null) {
206            return false;
207        }
208
209        $this->destination = $destination;
210
211        $advanced = $cursor->advanceToNextNonSpaceOrTab();
212        if ($cursor->isAtEnd()) {
213            // Destination was at end of line, so this is a valid reference for sure (and maybe a title).
214            // If not at end of line, wait for title to be valid first.
215            $this->referenceValid = true;
216            $this->paragraph      = '';
217        } elseif ($advanced === 0) {
218            // spec: The title must be separated from the link destination by whitespace
219            return false;
220        }
221
222        $this->state = self::START_TITLE;
223
224        return true;
225    }
226
227    private function parseStartTitle(Cursor $cursor): bool
228    {
229        $cursor->advanceToNextNonSpaceOrTab();
230        if ($cursor->isAtEnd()) {
231            $this->state = self::START_DEFINITION;
232
233            return true;
234        }
235
236        $this->titleDelimiter = null;
237        switch ($c = $cursor->getCurrentCharacter()) {
238            case '"':
239            case "'":
240                $this->titleDelimiter = $c;
241                break;
242            case '(':
243                $this->titleDelimiter = ')';
244                break;
245            default:
246                // no title delimter found
247                break;
248        }
249
250        if ($this->titleDelimiter !== null) {
251            $this->state = self::TITLE;
252            $cursor->advance();
253            if ($cursor->isAtEnd()) {
254                $this->title .= "\n";
255            }
256        } else {
257            $this->finishReference();
258            // There might be another reference instead, try that for the same character.
259            $this->state = self::START_DEFINITION;
260        }
261
262        return true;
263    }
264
265    private function parseTitle(Cursor $cursor): bool
266    {
267        \assert($this->titleDelimiter !== null);
268        $title = LinkParserHelper::parsePartialLinkTitle($cursor, $this->titleDelimiter);
269
270        if ($title === null) {
271            // Invalid title, stop
272            return false;
273        }
274
275        // Did we find the end delimiter?
276        $endDelimiterFound = false;
277        if (\substr($title, -1) === $this->titleDelimiter) {
278            $endDelimiterFound = true;
279            // Chop it off
280            $title = \substr($title, 0, -1);
281        }
282
283        $this->title .= $title;
284
285        if (! $endDelimiterFound && $cursor->isAtEnd()) {
286            // Title still going, continue on next line
287            $this->title .= "\n";
288
289            return true;
290        }
291
292        // We either hit the end delimiter or some extra whitespace
293        $cursor->advanceToNextNonSpaceOrTab();
294        if (! $cursor->isAtEnd()) {
295            // spec: No further non-whitespace characters may occur on the line.
296            return false;
297        }
298
299        $this->referenceValid = true;
300        $this->finishReference();
301        $this->paragraph = '';
302
303        // See if there's another definition
304        $this->state = self::START_DEFINITION;
305
306        return true;
307    }
308
309    private function finishReference(): void
310    {
311        if (! $this->referenceValid) {
312            return;
313        }
314
315        /** @psalm-suppress PossiblyNullArgument -- these can't possibly be null if we're in this state */
316        $this->references[] = new Reference($this->label, $this->destination, $this->title);
317
318        $this->label          = null;
319        $this->referenceValid = false;
320        $this->destination    = null;
321        $this->title          = '';
322        $this->titleDelimiter = null;
323    }
324}
325