1<?php
2
3declare(strict_types=1);
4
5/*
6 * This file is part of the league/commonmark package.
7 *
8 * (c) Colin O'Dell <colinodell@gmail.com>
9 *
10 * For the full copyright and license information, please view the LICENSE
11 * file that was distributed with this source code.
12 */
13
14namespace League\CommonMark;
15
16use League\CommonMark\Exception\UnexpectedEncodingException;
17
18class Cursor
19{
20    public const INDENT_LEVEL = 4;
21
22    /**
23     * @var string
24     */
25    private $line;
26
27    /**
28     * @var int
29     */
30    private $length;
31
32    /**
33     * @var int
34     *
35     * It's possible for this to be 1 char past the end, meaning we've parsed all chars and have
36     * reached the end.  In this state, any character-returning method MUST return null.
37     */
38    private $currentPosition = 0;
39
40    /**
41     * @var int
42     */
43    private $column = 0;
44
45    /**
46     * @var int
47     */
48    private $indent = 0;
49
50    /**
51     * @var int
52     */
53    private $previousPosition = 0;
54
55    /**
56     * @var int|null
57     */
58    private $nextNonSpaceCache;
59
60    /**
61     * @var bool
62     */
63    private $partiallyConsumedTab = false;
64
65    /**
66     * @var bool
67     */
68    private $lineContainsTabs;
69
70    /**
71     * @var bool
72     */
73    private $isMultibyte;
74
75    /**
76     * @var array<int, string>
77     */
78    private $charCache = [];
79
80    /**
81     * @param string $line The line being parsed (ASCII or UTF-8)
82     */
83    public function __construct(string $line)
84    {
85        if (!\mb_check_encoding($line, 'UTF-8')) {
86            throw new UnexpectedEncodingException('Unexpected encoding - UTF-8 or ASCII was expected');
87        }
88
89        $this->line = $line;
90        $this->length = \mb_strlen($line, 'UTF-8') ?: 0;
91        $this->isMultibyte = $this->length !== \strlen($line);
92        $this->lineContainsTabs = false !== \strpos($line, "\t");
93    }
94
95    /**
96     * Returns the position of the next character which is not a space (or tab)
97     *
98     * @return int
99     */
100    public function getNextNonSpacePosition(): int
101    {
102        if ($this->nextNonSpaceCache !== null) {
103            return $this->nextNonSpaceCache;
104        }
105
106        $i = $this->currentPosition;
107        $cols = $this->column;
108
109        while (($c = $this->getCharacter($i)) !== null) {
110            if ($c === ' ') {
111                $i++;
112                $cols++;
113            } elseif ($c === "\t") {
114                $i++;
115                $cols += (4 - ($cols % 4));
116            } else {
117                break;
118            }
119        }
120
121        $nextNonSpace = ($c === null) ? $this->length : $i;
122        $this->indent = $cols - $this->column;
123
124        return $this->nextNonSpaceCache = $nextNonSpace;
125    }
126
127    /**
128     * Returns the next character which isn't a space (or tab)
129     *
130     * @return string
131     */
132    public function getNextNonSpaceCharacter(): ?string
133    {
134        return $this->getCharacter($this->getNextNonSpacePosition());
135    }
136
137    /**
138     * Calculates the current indent (number of spaces after current position)
139     *
140     * @return int
141     */
142    public function getIndent(): int
143    {
144        if ($this->nextNonSpaceCache === null) {
145            $this->getNextNonSpacePosition();
146        }
147
148        return $this->indent;
149    }
150
151    /**
152     * Whether the cursor is indented to INDENT_LEVEL
153     *
154     * @return bool
155     */
156    public function isIndented(): bool
157    {
158        return $this->getIndent() >= self::INDENT_LEVEL;
159    }
160
161    /**
162     * @param int|null $index
163     *
164     * @return string|null
165     */
166    public function getCharacter(?int $index = null): ?string
167    {
168        if ($index === null) {
169            $index = $this->currentPosition;
170        }
171
172        // Index out-of-bounds, or we're at the end
173        if ($index < 0 || $index >= $this->length) {
174            return null;
175        }
176
177        if ($this->isMultibyte) {
178            if (isset($this->charCache[$index])) {
179                return $this->charCache[$index];
180            }
181
182            return $this->charCache[$index] = \mb_substr($this->line, $index, 1, 'UTF-8');
183        }
184
185        return $this->line[$index];
186    }
187
188    /**
189     * Returns the next character (or null, if none) without advancing forwards
190     *
191     * @param int $offset
192     *
193     * @return string|null
194     */
195    public function peek(int $offset = 1): ?string
196    {
197        return $this->getCharacter($this->currentPosition + $offset);
198    }
199
200    /**
201     * Whether the remainder is blank
202     *
203     * @return bool
204     */
205    public function isBlank(): bool
206    {
207        return $this->nextNonSpaceCache === $this->length || $this->getNextNonSpacePosition() === $this->length;
208    }
209
210    /**
211     * Move the cursor forwards
212     *
213     * @return void
214     */
215    public function advance()
216    {
217        $this->advanceBy(1);
218    }
219
220    /**
221     * Move the cursor forwards
222     *
223     * @param int  $characters       Number of characters to advance by
224     * @param bool $advanceByColumns Whether to advance by columns instead of spaces
225     *
226     * @return void
227     */
228    public function advanceBy(int $characters, bool $advanceByColumns = false)
229    {
230        if ($characters === 0) {
231            $this->previousPosition = $this->currentPosition;
232
233            return;
234        }
235
236        $this->previousPosition = $this->currentPosition;
237        $this->nextNonSpaceCache = null;
238
239        // Optimization to avoid tab handling logic if we have no tabs
240        if (!$this->lineContainsTabs || false === \strpos(
241            $nextFewChars = $this->isMultibyte ?
242                \mb_substr($this->line, $this->currentPosition, $characters, 'UTF-8') :
243                \substr($this->line, $this->currentPosition, $characters),
244            "\t"
245        )) {
246            $length = \min($characters, $this->length - $this->currentPosition);
247            $this->partiallyConsumedTab = false;
248            $this->currentPosition += $length;
249            $this->column += $length;
250
251            return;
252        }
253
254        if ($characters === 1 && !empty($nextFewChars)) {
255            $asArray = [$nextFewChars];
256        } elseif ($this->isMultibyte) {
257            /** @var string[] $asArray */
258            $asArray = \preg_split('//u', $nextFewChars, -1, \PREG_SPLIT_NO_EMPTY);
259        } else {
260            $asArray = \str_split($nextFewChars);
261        }
262
263        foreach ($asArray as $relPos => $c) {
264            if ($c === "\t") {
265                $charsToTab = 4 - ($this->column % 4);
266                if ($advanceByColumns) {
267                    $this->partiallyConsumedTab = $charsToTab > $characters;
268                    $charsToAdvance = $charsToTab > $characters ? $characters : $charsToTab;
269                    $this->column += $charsToAdvance;
270                    $this->currentPosition += $this->partiallyConsumedTab ? 0 : 1;
271                    $characters -= $charsToAdvance;
272                } else {
273                    $this->partiallyConsumedTab = false;
274                    $this->column += $charsToTab;
275                    $this->currentPosition++;
276                    $characters--;
277                }
278            } else {
279                $this->partiallyConsumedTab = false;
280                $this->currentPosition++;
281                $this->column++;
282                $characters--;
283            }
284
285            if ($characters <= 0) {
286                break;
287            }
288        }
289    }
290
291    /**
292     * Advances the cursor by a single space or tab, if present
293     *
294     * @return bool
295     */
296    public function advanceBySpaceOrTab(): bool
297    {
298        $character = $this->getCharacter();
299
300        if ($character === ' ' || $character === "\t") {
301            $this->advanceBy(1, true);
302
303            return true;
304        }
305
306        return false;
307    }
308
309    /**
310     * Parse zero or more space/tab characters
311     *
312     * @return int Number of positions moved
313     */
314    public function advanceToNextNonSpaceOrTab(): int
315    {
316        $newPosition = $this->getNextNonSpacePosition();
317        $this->advanceBy($newPosition - $this->currentPosition);
318        $this->partiallyConsumedTab = false;
319
320        return $this->currentPosition - $this->previousPosition;
321    }
322
323    /**
324     * Parse zero or more space characters, including at most one newline.
325     *
326     * Tab characters are not parsed with this function.
327     *
328     * @return int Number of positions moved
329     */
330    public function advanceToNextNonSpaceOrNewline(): int
331    {
332        $remainder = $this->getRemainder();
333
334        // Optimization: Avoid the regex if we know there are no spaces or newlines
335        if (empty($remainder) || ($remainder[0] !== ' ' && $remainder[0] !== "\n")) {
336            $this->previousPosition = $this->currentPosition;
337
338            return 0;
339        }
340
341        $matches = [];
342        \preg_match('/^ *(?:\n *)?/', $remainder, $matches, \PREG_OFFSET_CAPTURE);
343
344        // [0][0] contains the matched text
345        // [0][1] contains the index of that match
346        $increment = $matches[0][1] + \strlen($matches[0][0]);
347
348        $this->advanceBy($increment);
349
350        return $this->currentPosition - $this->previousPosition;
351    }
352
353    /**
354     * Move the position to the very end of the line
355     *
356     * @return int The number of characters moved
357     */
358    public function advanceToEnd(): int
359    {
360        $this->previousPosition = $this->currentPosition;
361        $this->nextNonSpaceCache = null;
362
363        $this->currentPosition = $this->length;
364
365        return $this->currentPosition - $this->previousPosition;
366    }
367
368    public function getRemainder(): string
369    {
370        if ($this->currentPosition >= $this->length) {
371            return '';
372        }
373
374        $prefix = '';
375        $position = $this->currentPosition;
376        if ($this->partiallyConsumedTab) {
377            $position++;
378            $charsToTab = 4 - ($this->column % 4);
379            $prefix = \str_repeat(' ', $charsToTab);
380        }
381
382        $subString = $this->isMultibyte ?
383            \mb_substr($this->line, $position, null, 'UTF-8') :
384            \substr($this->line, $position);
385
386        return $prefix . $subString;
387    }
388
389    public function getLine(): string
390    {
391        return $this->line;
392    }
393
394    public function isAtEnd(): bool
395    {
396        return $this->currentPosition >= $this->length;
397    }
398
399    /**
400     * Try to match a regular expression
401     *
402     * Returns the matching text and advances to the end of that match
403     *
404     * @param string $regex
405     *
406     * @return string|null
407     */
408    public function match(string $regex): ?string
409    {
410        $subject = $this->getRemainder();
411
412        if (!\preg_match($regex, $subject, $matches, \PREG_OFFSET_CAPTURE)) {
413            return null;
414        }
415
416        // $matches[0][0] contains the matched text
417        // $matches[0][1] contains the index of that match
418
419        if ($this->isMultibyte) {
420            // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
421            $offset = \mb_strlen(\substr($subject, 0, $matches[0][1]), 'UTF-8');
422            $matchLength = \mb_strlen($matches[0][0], 'UTF-8');
423        } else {
424            $offset = $matches[0][1];
425            $matchLength = \strlen($matches[0][0]);
426        }
427
428        // [0][0] contains the matched text
429        // [0][1] contains the index of that match
430        $this->advanceBy($offset + $matchLength);
431
432        return $matches[0][0];
433    }
434
435    /**
436     * Encapsulates the current state of this cursor in case you need to rollback later.
437     *
438     * WARNING: Do not parse or use the return value for ANYTHING except for
439     * passing it back into restoreState(), as the number of values and their
440     * contents may change in any future release without warning.
441     *
442     * @return array<mixed>
443     */
444    public function saveState()
445    {
446        return [
447            $this->currentPosition,
448            $this->previousPosition,
449            $this->nextNonSpaceCache,
450            $this->indent,
451            $this->column,
452            $this->partiallyConsumedTab,
453        ];
454    }
455
456    /**
457     * Restore the cursor to a previous state.
458     *
459     * Pass in the value previously obtained by calling saveState().
460     *
461     * @param array<mixed> $state
462     *
463     * @return void
464     */
465    public function restoreState($state)
466    {
467        list(
468            $this->currentPosition,
469            $this->previousPosition,
470            $this->nextNonSpaceCache,
471            $this->indent,
472            $this->column,
473            $this->partiallyConsumedTab,
474          ) = $state;
475    }
476
477    public function getPosition(): int
478    {
479        return $this->currentPosition;
480    }
481
482    public function getPreviousText(): string
483    {
484        return \mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, 'UTF-8');
485    }
486
487    public function getSubstring(int $start, ?int $length = null): string
488    {
489        if ($this->isMultibyte) {
490            return \mb_substr($this->line, $start, $length, 'UTF-8');
491        } elseif ($length !== null) {
492            return \substr($this->line, $start, $length);
493        }
494
495        return \substr($this->line, $start);
496    }
497
498    public function getColumn(): int
499    {
500        return $this->column;
501    }
502}
503