1<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Component\Yaml;
13
14use Symfony\Component\Yaml\Exception\ParseException;
15use Symfony\Component\Yaml\Tag\TaggedValue;
16
17/**
18 * Parser parses YAML strings to convert them to PHP arrays.
19 *
20 * @author Fabien Potencier <fabien@symfony.com>
21 *
22 * @final
23 */
24class Parser
25{
26    public const TAG_PATTERN = '(?P<tag>![\w!.\/:-]+)';
27    public const BLOCK_SCALAR_HEADER_PATTERN = '(?P<separator>\||>)(?P<modifiers>\+|\-|\d+|\+\d+|\-\d+|\d+\+|\d+\-)?(?P<comments> +#.*)?';
28    public const REFERENCE_PATTERN = '#^&(?P<ref>[^ ]++) *+(?P<value>.*)#u';
29
30    private $filename;
31    private $offset = 0;
32    private $numberOfParsedLines = 0;
33    private $totalNumberOfLines;
34    private $lines = [];
35    private $currentLineNb = -1;
36    private $currentLine = '';
37    private $refs = [];
38    private $skippedLineNumbers = [];
39    private $locallySkippedLineNumbers = [];
40    private $refsBeingParsed = [];
41
42    /**
43     * Parses a YAML file into a PHP value.
44     *
45     * @param string $filename The path to the YAML file to be parsed
46     * @param int    $flags    A bit field of Yaml::PARSE_* constants to customize the YAML parser behavior
47     *
48     * @return mixed
49     *
50     * @throws ParseException If the file could not be read or the YAML is not valid
51     */
52    public function parseFile(string $filename, int $flags = 0)
53    {
54        if (!is_file($filename)) {
55            throw new ParseException(sprintf('File "%s" does not exist.', $filename));
56        }
57
58        if (!is_readable($filename)) {
59            throw new ParseException(sprintf('File "%s" cannot be read.', $filename));
60        }
61
62        $this->filename = $filename;
63
64        try {
65            return $this->parse(file_get_contents($filename), $flags);
66        } finally {
67            $this->filename = null;
68        }
69    }
70
71    /**
72     * Parses a YAML string to a PHP value.
73     *
74     * @param string $value A YAML string
75     * @param int    $flags A bit field of Yaml::PARSE_* constants to customize the YAML parser behavior
76     *
77     * @return mixed
78     *
79     * @throws ParseException If the YAML is not valid
80     */
81    public function parse(string $value, int $flags = 0)
82    {
83        if (false === preg_match('//u', $value)) {
84            throw new ParseException('The YAML value does not appear to be valid UTF-8.', -1, null, $this->filename);
85        }
86
87        $this->refs = [];
88
89        $mbEncoding = null;
90
91        if (2 /* MB_OVERLOAD_STRING */ & (int) \ini_get('mbstring.func_overload')) {
92            $mbEncoding = mb_internal_encoding();
93            mb_internal_encoding('UTF-8');
94        }
95
96        try {
97            $data = $this->doParse($value, $flags);
98        } finally {
99            if (null !== $mbEncoding) {
100                mb_internal_encoding($mbEncoding);
101            }
102            $this->refsBeingParsed = [];
103            $this->offset = 0;
104            $this->lines = [];
105            $this->currentLine = '';
106            $this->numberOfParsedLines = 0;
107            $this->refs = [];
108            $this->skippedLineNumbers = [];
109            $this->locallySkippedLineNumbers = [];
110            $this->totalNumberOfLines = null;
111        }
112
113        return $data;
114    }
115
116    private function doParse(string $value, int $flags)
117    {
118        $this->currentLineNb = -1;
119        $this->currentLine = '';
120        $value = $this->cleanup($value);
121        $this->lines = explode("\n", $value);
122        $this->numberOfParsedLines = \count($this->lines);
123        $this->locallySkippedLineNumbers = [];
124
125        if (null === $this->totalNumberOfLines) {
126            $this->totalNumberOfLines = $this->numberOfParsedLines;
127        }
128
129        if (!$this->moveToNextLine()) {
130            return null;
131        }
132
133        $data = [];
134        $context = null;
135        $allowOverwrite = false;
136
137        while ($this->isCurrentLineEmpty()) {
138            if (!$this->moveToNextLine()) {
139                return null;
140            }
141        }
142
143        // Resolves the tag and returns if end of the document
144        if (null !== ($tag = $this->getLineTag($this->currentLine, $flags, false)) && !$this->moveToNextLine()) {
145            return new TaggedValue($tag, '');
146        }
147
148        do {
149            if ($this->isCurrentLineEmpty()) {
150                continue;
151            }
152
153            // tab?
154            if ("\t" === $this->currentLine[0]) {
155                throw new ParseException('A YAML file cannot contain tabs as indentation.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
156            }
157
158            Inline::initialize($flags, $this->getRealCurrentLineNb(), $this->filename);
159
160            $isRef = $mergeNode = false;
161            if ('-' === $this->currentLine[0] && self::preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+))?$#u', rtrim($this->currentLine), $values)) {
162                if ($context && 'mapping' == $context) {
163                    throw new ParseException('You cannot define a sequence item when in a mapping.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
164                }
165                $context = 'sequence';
166
167                if (isset($values['value']) && '&' === $values['value'][0] && self::preg_match(self::REFERENCE_PATTERN, $values['value'], $matches)) {
168                    $isRef = $matches['ref'];
169                    $this->refsBeingParsed[] = $isRef;
170                    $values['value'] = $matches['value'];
171                }
172
173                if (isset($values['value'][1]) && '?' === $values['value'][0] && ' ' === $values['value'][1]) {
174                    throw new ParseException('Complex mappings are not supported.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
175                }
176
177                // array
178                if (isset($values['value']) && 0 === strpos(ltrim($values['value'], ' '), '-')) {
179                    // Inline first child
180                    $currentLineNumber = $this->getRealCurrentLineNb();
181
182                    $sequenceIndentation = \strlen($values['leadspaces']) + 1;
183                    $sequenceYaml = substr($this->currentLine, $sequenceIndentation);
184                    $sequenceYaml .= "\n".$this->getNextEmbedBlock($sequenceIndentation, true);
185
186                    $data[] = $this->parseBlock($currentLineNumber, rtrim($sequenceYaml), $flags);
187                } elseif (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#')) {
188                    $data[] = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true) ?? '', $flags);
189                } elseif (null !== $subTag = $this->getLineTag(ltrim($values['value'], ' '), $flags)) {
190                    $data[] = new TaggedValue(
191                        $subTag,
192                        $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true), $flags)
193                    );
194                } else {
195                    if (
196                        isset($values['leadspaces'])
197                        && (
198                            '!' === $values['value'][0]
199                            || self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->trimTag($values['value']), $matches)
200                        )
201                    ) {
202                        // this is a compact notation element, add to next block and parse
203                        $block = $values['value'];
204                        if ($this->isNextLineIndented()) {
205                            $block .= "\n".$this->getNextEmbedBlock($this->getCurrentLineIndentation() + \strlen($values['leadspaces']) + 1);
206                        }
207
208                        $data[] = $this->parseBlock($this->getRealCurrentLineNb(), $block, $flags);
209                    } else {
210                        $data[] = $this->parseValue($values['value'], $flags, $context);
211                    }
212                }
213                if ($isRef) {
214                    $this->refs[$isRef] = end($data);
215                    array_pop($this->refsBeingParsed);
216                }
217            } elseif (
218                self::preg_match('#^(?P<key>(?:![^\s]++\s++)?(?:'.Inline::REGEX_QUOTED_STRING.'|(?:!?!php/const:)?[^ \'"\[\{!].*?)) *\:(( |\t)++(?P<value>.+))?$#u', rtrim($this->currentLine), $values)
219                && (false === strpos($values['key'], ' #') || \in_array($values['key'][0], ['"', "'"]))
220            ) {
221                if ($context && 'sequence' == $context) {
222                    throw new ParseException('You cannot define a mapping item when in a sequence.', $this->currentLineNb + 1, $this->currentLine, $this->filename);
223                }
224                $context = 'mapping';
225
226                try {
227                    $key = Inline::parseScalar($values['key']);
228                } catch (ParseException $e) {
229                    $e->setParsedLine($this->getRealCurrentLineNb() + 1);
230                    $e->setSnippet($this->currentLine);
231
232                    throw $e;
233                }
234
235                if (!\is_string($key) && !\is_int($key)) {
236                    throw new ParseException((is_numeric($key) ? 'Numeric' : 'Non-string').' keys are not supported. Quote your evaluable mapping keys instead.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
237                }
238
239                // Convert float keys to strings, to avoid being converted to integers by PHP
240                if (\is_float($key)) {
241                    $key = (string) $key;
242                }
243
244                if ('<<' === $key && (!isset($values['value']) || '&' !== $values['value'][0] || !self::preg_match('#^&(?P<ref>[^ ]+)#u', $values['value'], $refMatches))) {
245                    $mergeNode = true;
246                    $allowOverwrite = true;
247                    if (isset($values['value'][0]) && '*' === $values['value'][0]) {
248                        $refName = substr(rtrim($values['value']), 1);
249                        if (!\array_key_exists($refName, $this->refs)) {
250                            if (false !== $pos = array_search($refName, $this->refsBeingParsed, true)) {
251                                throw new ParseException(sprintf('Circular reference [%s] detected for reference "%s".', implode(', ', array_merge(\array_slice($this->refsBeingParsed, $pos), [$refName])), $refName), $this->currentLineNb + 1, $this->currentLine, $this->filename);
252                            }
253
254                            throw new ParseException(sprintf('Reference "%s" does not exist.', $refName), $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
255                        }
256
257                        $refValue = $this->refs[$refName];
258
259                        if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $refValue instanceof \stdClass) {
260                            $refValue = (array) $refValue;
261                        }
262
263                        if (!\is_array($refValue)) {
264                            throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
265                        }
266
267                        $data += $refValue; // array union
268                    } else {
269                        if (isset($values['value']) && '' !== $values['value']) {
270                            $value = $values['value'];
271                        } else {
272                            $value = $this->getNextEmbedBlock();
273                        }
274                        $parsed = $this->parseBlock($this->getRealCurrentLineNb() + 1, $value, $flags);
275
276                        if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $parsed instanceof \stdClass) {
277                            $parsed = (array) $parsed;
278                        }
279
280                        if (!\is_array($parsed)) {
281                            throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
282                        }
283
284                        if (isset($parsed[0])) {
285                            // If the value associated with the merge key is a sequence, then this sequence is expected to contain mapping nodes
286                            // and each of these nodes is merged in turn according to its order in the sequence. Keys in mapping nodes earlier
287                            // in the sequence override keys specified in later mapping nodes.
288                            foreach ($parsed as $parsedItem) {
289                                if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $parsedItem instanceof \stdClass) {
290                                    $parsedItem = (array) $parsedItem;
291                                }
292
293                                if (!\is_array($parsedItem)) {
294                                    throw new ParseException('Merge items must be arrays.', $this->getRealCurrentLineNb() + 1, $parsedItem, $this->filename);
295                                }
296
297                                $data += $parsedItem; // array union
298                            }
299                        } else {
300                            // If the value associated with the key is a single mapping node, each of its key/value pairs is inserted into the
301                            // current mapping, unless the key already exists in it.
302                            $data += $parsed; // array union
303                        }
304                    }
305                } elseif ('<<' !== $key && isset($values['value']) && '&' === $values['value'][0] && self::preg_match(self::REFERENCE_PATTERN, $values['value'], $matches)) {
306                    $isRef = $matches['ref'];
307                    $this->refsBeingParsed[] = $isRef;
308                    $values['value'] = $matches['value'];
309                }
310
311                $subTag = null;
312                if ($mergeNode) {
313                    // Merge keys
314                } elseif (!isset($values['value']) || '' === $values['value'] || 0 === strpos($values['value'], '#') || (null !== $subTag = $this->getLineTag($values['value'], $flags)) || '<<' === $key) {
315                    // hash
316                    // if next line is less indented or equal, then it means that the current value is null
317                    if (!$this->isNextLineIndented() && !$this->isNextLineUnIndentedCollection()) {
318                        // Spec: Keys MUST be unique; first one wins.
319                        // But overwriting is allowed when a merge node is used in current block.
320                        if ($allowOverwrite || !isset($data[$key])) {
321                            if (null !== $subTag) {
322                                $data[$key] = new TaggedValue($subTag, '');
323                            } else {
324                                $data[$key] = null;
325                            }
326                        } else {
327                            throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $this->getRealCurrentLineNb() + 1, $this->currentLine);
328                        }
329                    } else {
330                        // remember the parsed line number here in case we need it to provide some contexts in error messages below
331                        $realCurrentLineNbKey = $this->getRealCurrentLineNb();
332                        $value = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(), $flags);
333                        if ('<<' === $key) {
334                            $this->refs[$refMatches['ref']] = $value;
335
336                            if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $value instanceof \stdClass) {
337                                $value = (array) $value;
338                            }
339
340                            $data += $value;
341                        } elseif ($allowOverwrite || !isset($data[$key])) {
342                            // Spec: Keys MUST be unique; first one wins.
343                            // But overwriting is allowed when a merge node is used in current block.
344                            if (null !== $subTag) {
345                                $data[$key] = new TaggedValue($subTag, $value);
346                            } else {
347                                $data[$key] = $value;
348                            }
349                        } else {
350                            throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $realCurrentLineNbKey + 1, $this->currentLine);
351                        }
352                    }
353                } else {
354                    $value = $this->parseValue(rtrim($values['value']), $flags, $context);
355                    // Spec: Keys MUST be unique; first one wins.
356                    // But overwriting is allowed when a merge node is used in current block.
357                    if ($allowOverwrite || !isset($data[$key])) {
358                        $data[$key] = $value;
359                    } else {
360                        throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $this->getRealCurrentLineNb() + 1, $this->currentLine);
361                    }
362                }
363                if ($isRef) {
364                    $this->refs[$isRef] = $data[$key];
365                    array_pop($this->refsBeingParsed);
366                }
367            } elseif ('"' === $this->currentLine[0] || "'" === $this->currentLine[0]) {
368                if (null !== $context) {
369                    throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
370                }
371
372                try {
373                    return Inline::parse($this->lexInlineQuotedString(), $flags, $this->refs);
374                } catch (ParseException $e) {
375                    $e->setParsedLine($this->getRealCurrentLineNb() + 1);
376                    $e->setSnippet($this->currentLine);
377
378                    throw $e;
379                }
380            } elseif ('{' === $this->currentLine[0]) {
381                if (null !== $context) {
382                    throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
383                }
384
385                try {
386                    $parsedMapping = Inline::parse($this->lexInlineMapping(), $flags, $this->refs);
387
388                    while ($this->moveToNextLine()) {
389                        if (!$this->isCurrentLineEmpty()) {
390                            throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
391                        }
392                    }
393
394                    return $parsedMapping;
395                } catch (ParseException $e) {
396                    $e->setParsedLine($this->getRealCurrentLineNb() + 1);
397                    $e->setSnippet($this->currentLine);
398
399                    throw $e;
400                }
401            } elseif ('[' === $this->currentLine[0]) {
402                if (null !== $context) {
403                    throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
404                }
405
406                try {
407                    $parsedSequence = Inline::parse($this->lexInlineSequence(), $flags, $this->refs);
408
409                    while ($this->moveToNextLine()) {
410                        if (!$this->isCurrentLineEmpty()) {
411                            throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
412                        }
413                    }
414
415                    return $parsedSequence;
416                } catch (ParseException $e) {
417                    $e->setParsedLine($this->getRealCurrentLineNb() + 1);
418                    $e->setSnippet($this->currentLine);
419
420                    throw $e;
421                }
422            } else {
423                // multiple documents are not supported
424                if ('---' === $this->currentLine) {
425                    throw new ParseException('Multiple documents are not supported.', $this->currentLineNb + 1, $this->currentLine, $this->filename);
426                }
427
428                if ($deprecatedUsage = (isset($this->currentLine[1]) && '?' === $this->currentLine[0] && ' ' === $this->currentLine[1])) {
429                    throw new ParseException('Complex mappings are not supported.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
430                }
431
432                // 1-liner optionally followed by newline(s)
433                if (\is_string($value) && $this->lines[0] === trim($value)) {
434                    try {
435                        $value = Inline::parse($this->lines[0], $flags, $this->refs);
436                    } catch (ParseException $e) {
437                        $e->setParsedLine($this->getRealCurrentLineNb() + 1);
438                        $e->setSnippet($this->currentLine);
439
440                        throw $e;
441                    }
442
443                    return $value;
444                }
445
446                // try to parse the value as a multi-line string as a last resort
447                if (0 === $this->currentLineNb) {
448                    $previousLineWasNewline = false;
449                    $previousLineWasTerminatedWithBackslash = false;
450                    $value = '';
451
452                    foreach ($this->lines as $line) {
453                        $trimmedLine = trim($line);
454                        if ('#' === ($trimmedLine[0] ?? '')) {
455                            continue;
456                        }
457                        // If the indentation is not consistent at offset 0, it is to be considered as a ParseError
458                        if (0 === $this->offset && !$deprecatedUsage && isset($line[0]) && ' ' === $line[0]) {
459                            throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
460                        }
461
462                        if (false !== strpos($line, ': ')) {
463                            throw new ParseException('Mapping values are not allowed in multi-line blocks.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
464                        }
465
466                        if ('' === $trimmedLine) {
467                            $value .= "\n";
468                        } elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
469                            $value .= ' ';
470                        }
471
472                        if ('' !== $trimmedLine && '\\' === substr($line, -1)) {
473                            $value .= ltrim(substr($line, 0, -1));
474                        } elseif ('' !== $trimmedLine) {
475                            $value .= $trimmedLine;
476                        }
477
478                        if ('' === $trimmedLine) {
479                            $previousLineWasNewline = true;
480                            $previousLineWasTerminatedWithBackslash = false;
481                        } elseif ('\\' === substr($line, -1)) {
482                            $previousLineWasNewline = false;
483                            $previousLineWasTerminatedWithBackslash = true;
484                        } else {
485                            $previousLineWasNewline = false;
486                            $previousLineWasTerminatedWithBackslash = false;
487                        }
488                    }
489
490                    try {
491                        return Inline::parse(trim($value));
492                    } catch (ParseException $e) {
493                        // fall-through to the ParseException thrown below
494                    }
495                }
496
497                throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
498            }
499        } while ($this->moveToNextLine());
500
501        if (null !== $tag) {
502            $data = new TaggedValue($tag, $data);
503        }
504
505        if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && 'mapping' === $context && !\is_object($data)) {
506            $object = new \stdClass();
507
508            foreach ($data as $key => $value) {
509                $object->$key = $value;
510            }
511
512            $data = $object;
513        }
514
515        return empty($data) ? null : $data;
516    }
517
518    private function parseBlock(int $offset, string $yaml, int $flags)
519    {
520        $skippedLineNumbers = $this->skippedLineNumbers;
521
522        foreach ($this->locallySkippedLineNumbers as $lineNumber) {
523            if ($lineNumber < $offset) {
524                continue;
525            }
526
527            $skippedLineNumbers[] = $lineNumber;
528        }
529
530        $parser = new self();
531        $parser->offset = $offset;
532        $parser->totalNumberOfLines = $this->totalNumberOfLines;
533        $parser->skippedLineNumbers = $skippedLineNumbers;
534        $parser->refs = &$this->refs;
535        $parser->refsBeingParsed = $this->refsBeingParsed;
536
537        return $parser->doParse($yaml, $flags);
538    }
539
540    /**
541     * Returns the current line number (takes the offset into account).
542     *
543     * @internal
544     */
545    public function getRealCurrentLineNb(): int
546    {
547        $realCurrentLineNumber = $this->currentLineNb + $this->offset;
548
549        foreach ($this->skippedLineNumbers as $skippedLineNumber) {
550            if ($skippedLineNumber > $realCurrentLineNumber) {
551                break;
552            }
553
554            ++$realCurrentLineNumber;
555        }
556
557        return $realCurrentLineNumber;
558    }
559
560    /**
561     * Returns the current line indentation.
562     */
563    private function getCurrentLineIndentation(): int
564    {
565        if (' ' !== ($this->currentLine[0] ?? '')) {
566            return 0;
567        }
568
569        return \strlen($this->currentLine) - \strlen(ltrim($this->currentLine, ' '));
570    }
571
572    /**
573     * Returns the next embed block of YAML.
574     *
575     * @param int|null $indentation The indent level at which the block is to be read, or null for default
576     * @param bool     $inSequence  True if the enclosing data structure is a sequence
577     *
578     * @throws ParseException When indentation problem are detected
579     */
580    private function getNextEmbedBlock(int $indentation = null, bool $inSequence = false): string
581    {
582        $oldLineIndentation = $this->getCurrentLineIndentation();
583
584        if (!$this->moveToNextLine()) {
585            return '';
586        }
587
588        if (null === $indentation) {
589            $newIndent = null;
590            $movements = 0;
591
592            do {
593                $EOF = false;
594
595                // empty and comment-like lines do not influence the indentation depth
596                if ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()) {
597                    $EOF = !$this->moveToNextLine();
598
599                    if (!$EOF) {
600                        ++$movements;
601                    }
602                } else {
603                    $newIndent = $this->getCurrentLineIndentation();
604                }
605            } while (!$EOF && null === $newIndent);
606
607            for ($i = 0; $i < $movements; ++$i) {
608                $this->moveToPreviousLine();
609            }
610
611            $unindentedEmbedBlock = $this->isStringUnIndentedCollectionItem();
612
613            if (!$this->isCurrentLineEmpty() && 0 === $newIndent && !$unindentedEmbedBlock) {
614                throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
615            }
616        } else {
617            $newIndent = $indentation;
618        }
619
620        $data = [];
621
622        if ($this->getCurrentLineIndentation() >= $newIndent) {
623            $data[] = substr($this->currentLine, $newIndent ?? 0);
624        } elseif ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()) {
625            $data[] = $this->currentLine;
626        } else {
627            $this->moveToPreviousLine();
628
629            return '';
630        }
631
632        if ($inSequence && $oldLineIndentation === $newIndent && isset($data[0][0]) && '-' === $data[0][0]) {
633            // the previous line contained a dash but no item content, this line is a sequence item with the same indentation
634            // and therefore no nested list or mapping
635            $this->moveToPreviousLine();
636
637            return '';
638        }
639
640        $isItUnindentedCollection = $this->isStringUnIndentedCollectionItem();
641        $isItComment = $this->isCurrentLineComment();
642
643        while ($this->moveToNextLine()) {
644            if ($isItComment && !$isItUnindentedCollection) {
645                $isItUnindentedCollection = $this->isStringUnIndentedCollectionItem();
646                $isItComment = $this->isCurrentLineComment();
647            }
648
649            $indent = $this->getCurrentLineIndentation();
650
651            if ($isItUnindentedCollection && !$this->isCurrentLineEmpty() && !$this->isStringUnIndentedCollectionItem() && $newIndent === $indent) {
652                $this->moveToPreviousLine();
653                break;
654            }
655
656            if ($this->isCurrentLineBlank()) {
657                $data[] = substr($this->currentLine, $newIndent);
658                continue;
659            }
660
661            if ($indent >= $newIndent) {
662                $data[] = substr($this->currentLine, $newIndent);
663            } elseif ($this->isCurrentLineComment()) {
664                $data[] = $this->currentLine;
665            } elseif (0 == $indent) {
666                $this->moveToPreviousLine();
667
668                break;
669            } else {
670                throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
671            }
672        }
673
674        return implode("\n", $data);
675    }
676
677    private function hasMoreLines(): bool
678    {
679        return (\count($this->lines) - 1) > $this->currentLineNb;
680    }
681
682    /**
683     * Moves the parser to the next line.
684     */
685    private function moveToNextLine(): bool
686    {
687        if ($this->currentLineNb >= $this->numberOfParsedLines - 1) {
688            return false;
689        }
690
691        $this->currentLine = $this->lines[++$this->currentLineNb];
692
693        return true;
694    }
695
696    /**
697     * Moves the parser to the previous line.
698     */
699    private function moveToPreviousLine(): bool
700    {
701        if ($this->currentLineNb < 1) {
702            return false;
703        }
704
705        $this->currentLine = $this->lines[--$this->currentLineNb];
706
707        return true;
708    }
709
710    /**
711     * Parses a YAML value.
712     *
713     * @param string $value   A YAML value
714     * @param int    $flags   A bit field of Yaml::PARSE_* constants to customize the YAML parser behavior
715     * @param string $context The parser context (either sequence or mapping)
716     *
717     * @return mixed
718     *
719     * @throws ParseException When reference does not exist
720     */
721    private function parseValue(string $value, int $flags, string $context)
722    {
723        if (0 === strpos($value, '*')) {
724            if (false !== $pos = strpos($value, '#')) {
725                $value = substr($value, 1, $pos - 2);
726            } else {
727                $value = substr($value, 1);
728            }
729
730            if (!\array_key_exists($value, $this->refs)) {
731                if (false !== $pos = array_search($value, $this->refsBeingParsed, true)) {
732                    throw new ParseException(sprintf('Circular reference [%s] detected for reference "%s".', implode(', ', array_merge(\array_slice($this->refsBeingParsed, $pos), [$value])), $value), $this->currentLineNb + 1, $this->currentLine, $this->filename);
733                }
734
735                throw new ParseException(sprintf('Reference "%s" does not exist.', $value), $this->currentLineNb + 1, $this->currentLine, $this->filename);
736            }
737
738            return $this->refs[$value];
739        }
740
741        if (\in_array($value[0], ['!', '|', '>'], true) && self::preg_match('/^(?:'.self::TAG_PATTERN.' +)?'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) {
742            $modifiers = $matches['modifiers'] ?? '';
743
744            $data = $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), abs((int) $modifiers));
745
746            if ('' !== $matches['tag'] && '!' !== $matches['tag']) {
747                if ('!!binary' === $matches['tag']) {
748                    return Inline::evaluateBinaryScalar($data);
749                }
750
751                return new TaggedValue(substr($matches['tag'], 1), $data);
752            }
753
754            return $data;
755        }
756
757        try {
758            if ('' !== $value && '{' === $value[0]) {
759                $cursor = \strlen(rtrim($this->currentLine)) - \strlen(rtrim($value));
760
761                return Inline::parse($this->lexInlineMapping($cursor), $flags, $this->refs);
762            } elseif ('' !== $value && '[' === $value[0]) {
763                $cursor = \strlen(rtrim($this->currentLine)) - \strlen(rtrim($value));
764
765                return Inline::parse($this->lexInlineSequence($cursor), $flags, $this->refs);
766            }
767
768            switch ($value[0] ?? '') {
769                case '"':
770                case "'":
771                    $cursor = \strlen(rtrim($this->currentLine)) - \strlen(rtrim($value));
772                    $parsedValue = Inline::parse($this->lexInlineQuotedString($cursor), $flags, $this->refs);
773
774                    if (isset($this->currentLine[$cursor]) && preg_replace('/\s*(#.*)?$/A', '', substr($this->currentLine, $cursor))) {
775                        throw new ParseException(sprintf('Unexpected characters near "%s".', substr($this->currentLine, $cursor)));
776                    }
777
778                    return $parsedValue;
779                default:
780                    $lines = [];
781
782                    while ($this->moveToNextLine()) {
783                        // unquoted strings end before the first unindented line
784                        if (0 === $this->getCurrentLineIndentation()) {
785                            $this->moveToPreviousLine();
786
787                            break;
788                        }
789
790                        $lines[] = trim($this->currentLine);
791                    }
792
793                    for ($i = 0, $linesCount = \count($lines), $previousLineBlank = false; $i < $linesCount; ++$i) {
794                        if ('' === $lines[$i]) {
795                            $value .= "\n";
796                            $previousLineBlank = true;
797                        } elseif ($previousLineBlank) {
798                            $value .= $lines[$i];
799                            $previousLineBlank = false;
800                        } else {
801                            $value .= ' '.$lines[$i];
802                            $previousLineBlank = false;
803                        }
804                    }
805
806                    Inline::$parsedLineNumber = $this->getRealCurrentLineNb();
807
808                    $parsedValue = Inline::parse($value, $flags, $this->refs);
809
810                    if ('mapping' === $context && \is_string($parsedValue) && '"' !== $value[0] && "'" !== $value[0] && '[' !== $value[0] && '{' !== $value[0] && '!' !== $value[0] && false !== strpos($parsedValue, ': ')) {
811                        throw new ParseException('A colon cannot be used in an unquoted mapping value.', $this->getRealCurrentLineNb() + 1, $value, $this->filename);
812                    }
813
814                    return $parsedValue;
815            }
816        } catch (ParseException $e) {
817            $e->setParsedLine($this->getRealCurrentLineNb() + 1);
818            $e->setSnippet($this->currentLine);
819
820            throw $e;
821        }
822    }
823
824    /**
825     * Parses a block scalar.
826     *
827     * @param string $style       The style indicator that was used to begin this block scalar (| or >)
828     * @param string $chomping    The chomping indicator that was used to begin this block scalar (+ or -)
829     * @param int    $indentation The indentation indicator that was used to begin this block scalar
830     */
831    private function parseBlockScalar(string $style, string $chomping = '', int $indentation = 0): string
832    {
833        $notEOF = $this->moveToNextLine();
834        if (!$notEOF) {
835            return '';
836        }
837
838        $isCurrentLineBlank = $this->isCurrentLineBlank();
839        $blockLines = [];
840
841        // leading blank lines are consumed before determining indentation
842        while ($notEOF && $isCurrentLineBlank) {
843            // newline only if not EOF
844            if ($notEOF = $this->moveToNextLine()) {
845                $blockLines[] = '';
846                $isCurrentLineBlank = $this->isCurrentLineBlank();
847            }
848        }
849
850        // determine indentation if not specified
851        if (0 === $indentation) {
852            $currentLineLength = \strlen($this->currentLine);
853
854            for ($i = 0; $i < $currentLineLength && ' ' === $this->currentLine[$i]; ++$i) {
855                ++$indentation;
856            }
857        }
858
859        if ($indentation > 0) {
860            $pattern = sprintf('/^ {%d}(.*)$/', $indentation);
861
862            while (
863                $notEOF && (
864                    $isCurrentLineBlank ||
865                    self::preg_match($pattern, $this->currentLine, $matches)
866                )
867            ) {
868                if ($isCurrentLineBlank && \strlen($this->currentLine) > $indentation) {
869                    $blockLines[] = substr($this->currentLine, $indentation);
870                } elseif ($isCurrentLineBlank) {
871                    $blockLines[] = '';
872                } else {
873                    $blockLines[] = $matches[1];
874                }
875
876                // newline only if not EOF
877                if ($notEOF = $this->moveToNextLine()) {
878                    $isCurrentLineBlank = $this->isCurrentLineBlank();
879                }
880            }
881        } elseif ($notEOF) {
882            $blockLines[] = '';
883        }
884
885        if ($notEOF) {
886            $blockLines[] = '';
887            $this->moveToPreviousLine();
888        } elseif (!$notEOF && !$this->isCurrentLineLastLineInDocument()) {
889            $blockLines[] = '';
890        }
891
892        // folded style
893        if ('>' === $style) {
894            $text = '';
895            $previousLineIndented = false;
896            $previousLineBlank = false;
897
898            for ($i = 0, $blockLinesCount = \count($blockLines); $i < $blockLinesCount; ++$i) {
899                if ('' === $blockLines[$i]) {
900                    $text .= "\n";
901                    $previousLineIndented = false;
902                    $previousLineBlank = true;
903                } elseif (' ' === $blockLines[$i][0]) {
904                    $text .= "\n".$blockLines[$i];
905                    $previousLineIndented = true;
906                    $previousLineBlank = false;
907                } elseif ($previousLineIndented) {
908                    $text .= "\n".$blockLines[$i];
909                    $previousLineIndented = false;
910                    $previousLineBlank = false;
911                } elseif ($previousLineBlank || 0 === $i) {
912                    $text .= $blockLines[$i];
913                    $previousLineIndented = false;
914                    $previousLineBlank = false;
915                } else {
916                    $text .= ' '.$blockLines[$i];
917                    $previousLineIndented = false;
918                    $previousLineBlank = false;
919                }
920            }
921        } else {
922            $text = implode("\n", $blockLines);
923        }
924
925        // deal with trailing newlines
926        if ('' === $chomping) {
927            $text = preg_replace('/\n+$/', "\n", $text);
928        } elseif ('-' === $chomping) {
929            $text = preg_replace('/\n+$/', '', $text);
930        }
931
932        return $text;
933    }
934
935    /**
936     * Returns true if the next line is indented.
937     */
938    private function isNextLineIndented(): bool
939    {
940        $currentIndentation = $this->getCurrentLineIndentation();
941        $movements = 0;
942
943        do {
944            $EOF = !$this->moveToNextLine();
945
946            if (!$EOF) {
947                ++$movements;
948            }
949        } while (!$EOF && ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()));
950
951        if ($EOF) {
952            return false;
953        }
954
955        $ret = $this->getCurrentLineIndentation() > $currentIndentation;
956
957        for ($i = 0; $i < $movements; ++$i) {
958            $this->moveToPreviousLine();
959        }
960
961        return $ret;
962    }
963
964    /**
965     * Returns true if the current line is blank or if it is a comment line.
966     */
967    private function isCurrentLineEmpty(): bool
968    {
969        return $this->isCurrentLineBlank() || $this->isCurrentLineComment();
970    }
971
972    /**
973     * Returns true if the current line is blank.
974     */
975    private function isCurrentLineBlank(): bool
976    {
977        return '' === $this->currentLine || '' === trim($this->currentLine, ' ');
978    }
979
980    /**
981     * Returns true if the current line is a comment line.
982     */
983    private function isCurrentLineComment(): bool
984    {
985        // checking explicitly the first char of the trim is faster than loops or strpos
986        $ltrimmedLine = '' !== $this->currentLine && ' ' === $this->currentLine[0] ? ltrim($this->currentLine, ' ') : $this->currentLine;
987
988        return '' !== $ltrimmedLine && '#' === $ltrimmedLine[0];
989    }
990
991    private function isCurrentLineLastLineInDocument(): bool
992    {
993        return ($this->offset + $this->currentLineNb) >= ($this->totalNumberOfLines - 1);
994    }
995
996    /**
997     * Cleanups a YAML string to be parsed.
998     *
999     * @param string $value The input YAML string
1000     */
1001    private function cleanup(string $value): string
1002    {
1003        $value = str_replace(["\r\n", "\r"], "\n", $value);
1004
1005        // strip YAML header
1006        $count = 0;
1007        $value = preg_replace('#^\%YAML[: ][\d\.]+.*\n#u', '', $value, -1, $count);
1008        $this->offset += $count;
1009
1010        // remove leading comments
1011        $trimmedValue = preg_replace('#^(\#.*?\n)+#s', '', $value, -1, $count);
1012        if (1 === $count) {
1013            // items have been removed, update the offset
1014            $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
1015            $value = $trimmedValue;
1016        }
1017
1018        // remove start of the document marker (---)
1019        $trimmedValue = preg_replace('#^\-\-\-.*?\n#s', '', $value, -1, $count);
1020        if (1 === $count) {
1021            // items have been removed, update the offset
1022            $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
1023            $value = $trimmedValue;
1024
1025            // remove end of the document marker (...)
1026            $value = preg_replace('#\.\.\.\s*$#', '', $value);
1027        }
1028
1029        return $value;
1030    }
1031
1032    /**
1033     * Returns true if the next line starts unindented collection.
1034     */
1035    private function isNextLineUnIndentedCollection(): bool
1036    {
1037        $currentIndentation = $this->getCurrentLineIndentation();
1038        $movements = 0;
1039
1040        do {
1041            $EOF = !$this->moveToNextLine();
1042
1043            if (!$EOF) {
1044                ++$movements;
1045            }
1046        } while (!$EOF && ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()));
1047
1048        if ($EOF) {
1049            return false;
1050        }
1051
1052        $ret = $this->getCurrentLineIndentation() === $currentIndentation && $this->isStringUnIndentedCollectionItem();
1053
1054        for ($i = 0; $i < $movements; ++$i) {
1055            $this->moveToPreviousLine();
1056        }
1057
1058        return $ret;
1059    }
1060
1061    /**
1062     * Returns true if the string is un-indented collection item.
1063     */
1064    private function isStringUnIndentedCollectionItem(): bool
1065    {
1066        return '-' === rtrim($this->currentLine) || 0 === strpos($this->currentLine, '- ');
1067    }
1068
1069    /**
1070     * A local wrapper for "preg_match" which will throw a ParseException if there
1071     * is an internal error in the PCRE engine.
1072     *
1073     * This avoids us needing to check for "false" every time PCRE is used
1074     * in the YAML engine
1075     *
1076     * @throws ParseException on a PCRE internal error
1077     *
1078     * @see preg_last_error()
1079     *
1080     * @internal
1081     */
1082    public static function preg_match(string $pattern, string $subject, array &$matches = null, int $flags = 0, int $offset = 0): int
1083    {
1084        if (false === $ret = preg_match($pattern, $subject, $matches, $flags, $offset)) {
1085            switch (preg_last_error()) {
1086                case \PREG_INTERNAL_ERROR:
1087                    $error = 'Internal PCRE error.';
1088                    break;
1089                case \PREG_BACKTRACK_LIMIT_ERROR:
1090                    $error = 'pcre.backtrack_limit reached.';
1091                    break;
1092                case \PREG_RECURSION_LIMIT_ERROR:
1093                    $error = 'pcre.recursion_limit reached.';
1094                    break;
1095                case \PREG_BAD_UTF8_ERROR:
1096                    $error = 'Malformed UTF-8 data.';
1097                    break;
1098                case \PREG_BAD_UTF8_OFFSET_ERROR:
1099                    $error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
1100                    break;
1101                default:
1102                    $error = 'Error.';
1103            }
1104
1105            throw new ParseException($error);
1106        }
1107
1108        return $ret;
1109    }
1110
1111    /**
1112     * Trim the tag on top of the value.
1113     *
1114     * Prevent values such as "!foo {quz: bar}" to be considered as
1115     * a mapping block.
1116     */
1117    private function trimTag(string $value): string
1118    {
1119        if ('!' === $value[0]) {
1120            return ltrim(substr($value, 1, strcspn($value, " \r\n", 1)), ' ');
1121        }
1122
1123        return $value;
1124    }
1125
1126    private function getLineTag(string $value, int $flags, bool $nextLineCheck = true): ?string
1127    {
1128        if ('' === $value || '!' !== $value[0] || 1 !== self::preg_match('/^'.self::TAG_PATTERN.' *( +#.*)?$/', $value, $matches)) {
1129            return null;
1130        }
1131
1132        if ($nextLineCheck && !$this->isNextLineIndented()) {
1133            return null;
1134        }
1135
1136        $tag = substr($matches['tag'], 1);
1137
1138        // Built-in tags
1139        if ($tag && '!' === $tag[0]) {
1140            throw new ParseException(sprintf('The built-in tag "!%s" is not implemented.', $tag), $this->getRealCurrentLineNb() + 1, $value, $this->filename);
1141        }
1142
1143        if (Yaml::PARSE_CUSTOM_TAGS & $flags) {
1144            return $tag;
1145        }
1146
1147        throw new ParseException(sprintf('Tags support is not enabled. You must use the flag "Yaml::PARSE_CUSTOM_TAGS" to use "%s".', $matches['tag']), $this->getRealCurrentLineNb() + 1, $value, $this->filename);
1148    }
1149
1150    private function lexInlineQuotedString(int &$cursor = 0): string
1151    {
1152        $quotation = $this->currentLine[$cursor];
1153        $value = $quotation;
1154        ++$cursor;
1155
1156        $previousLineWasNewline = true;
1157        $previousLineWasTerminatedWithBackslash = false;
1158        $lineNumber = 0;
1159
1160        do {
1161            if (++$lineNumber > 1) {
1162                $cursor += strspn($this->currentLine, ' ', $cursor);
1163            }
1164
1165            if ($this->isCurrentLineBlank()) {
1166                $value .= "\n";
1167            } elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
1168                $value .= ' ';
1169            }
1170
1171            for (; \strlen($this->currentLine) > $cursor; ++$cursor) {
1172                switch ($this->currentLine[$cursor]) {
1173                    case '\\':
1174                        if ("'" === $quotation) {
1175                            $value .= '\\';
1176                        } elseif (isset($this->currentLine[++$cursor])) {
1177                            $value .= '\\'.$this->currentLine[$cursor];
1178                        }
1179
1180                        break;
1181                    case $quotation:
1182                        ++$cursor;
1183
1184                        if ("'" === $quotation && isset($this->currentLine[$cursor]) && "'" === $this->currentLine[$cursor]) {
1185                            $value .= "''";
1186                            break;
1187                        }
1188
1189                        return $value.$quotation;
1190                    default:
1191                        $value .= $this->currentLine[$cursor];
1192                }
1193            }
1194
1195            if ($this->isCurrentLineBlank()) {
1196                $previousLineWasNewline = true;
1197                $previousLineWasTerminatedWithBackslash = false;
1198            } elseif ('\\' === $this->currentLine[-1]) {
1199                $previousLineWasNewline = false;
1200                $previousLineWasTerminatedWithBackslash = true;
1201            } else {
1202                $previousLineWasNewline = false;
1203                $previousLineWasTerminatedWithBackslash = false;
1204            }
1205
1206            if ($this->hasMoreLines()) {
1207                $cursor = 0;
1208            }
1209        } while ($this->moveToNextLine());
1210
1211        throw new ParseException('Malformed inline YAML string.');
1212    }
1213
1214    private function lexUnquotedString(int &$cursor): string
1215    {
1216        $offset = $cursor;
1217        $cursor += strcspn($this->currentLine, '[]{},: ', $cursor);
1218
1219        if ($cursor === $offset) {
1220            throw new ParseException('Malformed unquoted YAML string.');
1221        }
1222
1223        return substr($this->currentLine, $offset, $cursor - $offset);
1224    }
1225
1226    private function lexInlineMapping(int &$cursor = 0): string
1227    {
1228        return $this->lexInlineStructure($cursor, '}');
1229    }
1230
1231    private function lexInlineSequence(int &$cursor = 0): string
1232    {
1233        return $this->lexInlineStructure($cursor, ']');
1234    }
1235
1236    private function lexInlineStructure(int &$cursor, string $closingTag): string
1237    {
1238        $value = $this->currentLine[$cursor];
1239        ++$cursor;
1240
1241        do {
1242            $this->consumeWhitespaces($cursor);
1243
1244            while (isset($this->currentLine[$cursor])) {
1245                switch ($this->currentLine[$cursor]) {
1246                    case '"':
1247                    case "'":
1248                        $value .= $this->lexInlineQuotedString($cursor);
1249                        break;
1250                    case ':':
1251                    case ',':
1252                        $value .= $this->currentLine[$cursor];
1253                        ++$cursor;
1254                        break;
1255                    case '{':
1256                        $value .= $this->lexInlineMapping($cursor);
1257                        break;
1258                    case '[':
1259                        $value .= $this->lexInlineSequence($cursor);
1260                        break;
1261                    case $closingTag:
1262                        $value .= $this->currentLine[$cursor];
1263                        ++$cursor;
1264
1265                        return $value;
1266                    case '#':
1267                        break 2;
1268                    default:
1269                        $value .= $this->lexUnquotedString($cursor);
1270                }
1271
1272                if ($this->consumeWhitespaces($cursor)) {
1273                    $value .= ' ';
1274                }
1275            }
1276
1277            if ($this->hasMoreLines()) {
1278                $cursor = 0;
1279            }
1280        } while ($this->moveToNextLine());
1281
1282        throw new ParseException('Malformed inline YAML string.');
1283    }
1284
1285    private function consumeWhitespaces(int &$cursor): bool
1286    {
1287        $whitespacesConsumed = 0;
1288
1289        do {
1290            $whitespaceOnlyTokenLength = strspn($this->currentLine, ' ', $cursor);
1291            $whitespacesConsumed += $whitespaceOnlyTokenLength;
1292            $cursor += $whitespaceOnlyTokenLength;
1293
1294            if (isset($this->currentLine[$cursor])) {
1295                return 0 < $whitespacesConsumed;
1296            }
1297
1298            if ($this->hasMoreLines()) {
1299                $cursor = 0;
1300            }
1301        } while ($this->moveToNextLine());
1302
1303        return 0 < $whitespacesConsumed;
1304    }
1305}
1306