1<?php 2 3/* 4 * This file is part of the Symfony package. 5 * 6 * (c) Fabien Potencier <fabien@symfony.com> 7 * 8 * For the full copyright and license information, please view the LICENSE 9 * file that was distributed with this source code. 10 */ 11 12namespace Symfony\Component\Yaml; 13 14use Symfony\Component\Yaml\Exception\ParseException; 15use Symfony\Component\Yaml\Tag\TaggedValue; 16 17/** 18 * Parser parses YAML strings to convert them to PHP arrays. 19 * 20 * @author Fabien Potencier <fabien@symfony.com> 21 * 22 * @final 23 */ 24class Parser 25{ 26 public const TAG_PATTERN = '(?P<tag>![\w!.\/:-]+)'; 27 public const BLOCK_SCALAR_HEADER_PATTERN = '(?P<separator>\||>)(?P<modifiers>\+|\-|\d+|\+\d+|\-\d+|\d+\+|\d+\-)?(?P<comments> +#.*)?'; 28 public const REFERENCE_PATTERN = '#^&(?P<ref>[^ ]++) *+(?P<value>.*)#u'; 29 30 private $filename; 31 private $offset = 0; 32 private $totalNumberOfLines; 33 private $lines = []; 34 private $currentLineNb = -1; 35 private $currentLine = ''; 36 private $refs = []; 37 private $skippedLineNumbers = []; 38 private $locallySkippedLineNumbers = []; 39 private $refsBeingParsed = []; 40 41 /** 42 * Parses a YAML file into a PHP value. 43 * 44 * @param string $filename The path to the YAML file to be parsed 45 * @param int $flags A bit field of PARSE_* constants to customize the YAML parser behavior 46 * 47 * @return mixed The YAML converted to a PHP value 48 * 49 * @throws ParseException If the file could not be read or the YAML is not valid 50 */ 51 public function parseFile(string $filename, int $flags = 0) 52 { 53 if (!is_file($filename)) { 54 throw new ParseException(sprintf('File "%s" does not exist.', $filename)); 55 } 56 57 if (!is_readable($filename)) { 58 throw new ParseException(sprintf('File "%s" cannot be read.', $filename)); 59 } 60 61 $this->filename = $filename; 62 63 try { 64 return $this->parse(file_get_contents($filename), $flags); 65 } finally { 66 $this->filename = null; 67 } 68 } 69 70 /** 71 * Parses a YAML string to a PHP value. 72 * 73 * @param string $value A YAML string 74 * @param int $flags A bit field of PARSE_* constants to customize the YAML parser behavior 75 * 76 * @return mixed A PHP value 77 * 78 * @throws ParseException If the YAML is not valid 79 */ 80 public function parse(string $value, int $flags = 0) 81 { 82 if (false === preg_match('//u', $value)) { 83 throw new ParseException('The YAML value does not appear to be valid UTF-8.', -1, null, $this->filename); 84 } 85 86 $this->refs = []; 87 88 $mbEncoding = null; 89 90 if (2 /* MB_OVERLOAD_STRING */ & (int) \ini_get('mbstring.func_overload')) { 91 $mbEncoding = mb_internal_encoding(); 92 mb_internal_encoding('UTF-8'); 93 } 94 95 try { 96 $data = $this->doParse($value, $flags); 97 } finally { 98 if (null !== $mbEncoding) { 99 mb_internal_encoding($mbEncoding); 100 } 101 $this->refsBeingParsed = []; 102 $this->offset = 0; 103 $this->lines = []; 104 $this->currentLine = ''; 105 $this->refs = []; 106 $this->skippedLineNumbers = []; 107 $this->locallySkippedLineNumbers = []; 108 $this->totalNumberOfLines = null; 109 } 110 111 return $data; 112 } 113 114 private function doParse(string $value, int $flags) 115 { 116 $this->currentLineNb = -1; 117 $this->currentLine = ''; 118 $value = $this->cleanup($value); 119 $this->lines = explode("\n", $value); 120 $this->locallySkippedLineNumbers = []; 121 122 if (null === $this->totalNumberOfLines) { 123 $this->totalNumberOfLines = \count($this->lines); 124 } 125 126 if (!$this->moveToNextLine()) { 127 return null; 128 } 129 130 $data = []; 131 $context = null; 132 $allowOverwrite = false; 133 134 while ($this->isCurrentLineEmpty()) { 135 if (!$this->moveToNextLine()) { 136 return null; 137 } 138 } 139 140 // Resolves the tag and returns if end of the document 141 if (null !== ($tag = $this->getLineTag($this->currentLine, $flags, false)) && !$this->moveToNextLine()) { 142 return new TaggedValue($tag, ''); 143 } 144 145 do { 146 if ($this->isCurrentLineEmpty()) { 147 continue; 148 } 149 150 // tab? 151 if ("\t" === $this->currentLine[0]) { 152 throw new ParseException('A YAML file cannot contain tabs as indentation.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 153 } 154 155 Inline::initialize($flags, $this->getRealCurrentLineNb(), $this->filename); 156 157 $isRef = $mergeNode = false; 158 if ('-' === $this->currentLine[0] && self::preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+))?$#u', rtrim($this->currentLine), $values)) { 159 if ($context && 'mapping' == $context) { 160 throw new ParseException('You cannot define a sequence item when in a mapping.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 161 } 162 $context = 'sequence'; 163 164 if (isset($values['value']) && '&' === $values['value'][0] && self::preg_match(self::REFERENCE_PATTERN, $values['value'], $matches)) { 165 $isRef = $matches['ref']; 166 $this->refsBeingParsed[] = $isRef; 167 $values['value'] = $matches['value']; 168 } 169 170 if (isset($values['value'][1]) && '?' === $values['value'][0] && ' ' === $values['value'][1]) { 171 throw new ParseException('Complex mappings are not supported.', $this->getRealCurrentLineNb() + 1, $this->currentLine); 172 } 173 174 // array 175 if (isset($values['value']) && 0 === strpos(ltrim($values['value'], ' '), '-')) { 176 // Inline first child 177 $currentLineNumber = $this->getRealCurrentLineNb(); 178 179 $sequenceIndentation = \strlen($values['leadspaces']) + 1; 180 $sequenceYaml = substr($this->currentLine, $sequenceIndentation); 181 $sequenceYaml .= "\n".$this->getNextEmbedBlock($sequenceIndentation, true); 182 183 $data[] = $this->parseBlock($currentLineNumber, rtrim($sequenceYaml), $flags); 184 } elseif (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#')) { 185 $data[] = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true) ?? '', $flags); 186 } elseif (null !== $subTag = $this->getLineTag(ltrim($values['value'], ' '), $flags)) { 187 $data[] = new TaggedValue( 188 $subTag, 189 $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true), $flags) 190 ); 191 } else { 192 if ( 193 isset($values['leadspaces']) 194 && ( 195 '!' === $values['value'][0] 196 || self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->trimTag($values['value']), $matches) 197 ) 198 ) { 199 // this is a compact notation element, add to next block and parse 200 $block = $values['value']; 201 if ($this->isNextLineIndented()) { 202 $block .= "\n".$this->getNextEmbedBlock($this->getCurrentLineIndentation() + \strlen($values['leadspaces']) + 1); 203 } 204 205 $data[] = $this->parseBlock($this->getRealCurrentLineNb(), $block, $flags); 206 } else { 207 $data[] = $this->parseValue($values['value'], $flags, $context); 208 } 209 } 210 if ($isRef) { 211 $this->refs[$isRef] = end($data); 212 array_pop($this->refsBeingParsed); 213 } 214 } elseif ( 215 self::preg_match('#^(?P<key>(?:![^\s]++\s++)?(?:'.Inline::REGEX_QUOTED_STRING.'|(?:!?!php/const:)?[^ \'"\[\{!].*?)) *\:(( |\t)++(?P<value>.+))?$#u', rtrim($this->currentLine), $values) 216 && (false === strpos($values['key'], ' #') || \in_array($values['key'][0], ['"', "'"])) 217 ) { 218 if ($context && 'sequence' == $context) { 219 throw new ParseException('You cannot define a mapping item when in a sequence.', $this->currentLineNb + 1, $this->currentLine, $this->filename); 220 } 221 $context = 'mapping'; 222 223 try { 224 $key = Inline::parseScalar($values['key']); 225 } catch (ParseException $e) { 226 $e->setParsedLine($this->getRealCurrentLineNb() + 1); 227 $e->setSnippet($this->currentLine); 228 229 throw $e; 230 } 231 232 if (!\is_string($key) && !\is_int($key)) { 233 throw new ParseException((is_numeric($key) ? 'Numeric' : 'Non-string').' keys are not supported. Quote your evaluable mapping keys instead.', $this->getRealCurrentLineNb() + 1, $this->currentLine); 234 } 235 236 // Convert float keys to strings, to avoid being converted to integers by PHP 237 if (\is_float($key)) { 238 $key = (string) $key; 239 } 240 241 if ('<<' === $key && (!isset($values['value']) || '&' !== $values['value'][0] || !self::preg_match('#^&(?P<ref>[^ ]+)#u', $values['value'], $refMatches))) { 242 $mergeNode = true; 243 $allowOverwrite = true; 244 if (isset($values['value'][0]) && '*' === $values['value'][0]) { 245 $refName = substr(rtrim($values['value']), 1); 246 if (!\array_key_exists($refName, $this->refs)) { 247 if (false !== $pos = array_search($refName, $this->refsBeingParsed, true)) { 248 throw new ParseException(sprintf('Circular reference [%s] detected for reference "%s".', implode(', ', array_merge(\array_slice($this->refsBeingParsed, $pos), [$refName])), $refName), $this->currentLineNb + 1, $this->currentLine, $this->filename); 249 } 250 251 throw new ParseException(sprintf('Reference "%s" does not exist.', $refName), $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 252 } 253 254 $refValue = $this->refs[$refName]; 255 256 if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $refValue instanceof \stdClass) { 257 $refValue = (array) $refValue; 258 } 259 260 if (!\is_array($refValue)) { 261 throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 262 } 263 264 $data += $refValue; // array union 265 } else { 266 if (isset($values['value']) && '' !== $values['value']) { 267 $value = $values['value']; 268 } else { 269 $value = $this->getNextEmbedBlock(); 270 } 271 $parsed = $this->parseBlock($this->getRealCurrentLineNb() + 1, $value, $flags); 272 273 if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $parsed instanceof \stdClass) { 274 $parsed = (array) $parsed; 275 } 276 277 if (!\is_array($parsed)) { 278 throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 279 } 280 281 if (isset($parsed[0])) { 282 // If the value associated with the merge key is a sequence, then this sequence is expected to contain mapping nodes 283 // and each of these nodes is merged in turn according to its order in the sequence. Keys in mapping nodes earlier 284 // in the sequence override keys specified in later mapping nodes. 285 foreach ($parsed as $parsedItem) { 286 if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $parsedItem instanceof \stdClass) { 287 $parsedItem = (array) $parsedItem; 288 } 289 290 if (!\is_array($parsedItem)) { 291 throw new ParseException('Merge items must be arrays.', $this->getRealCurrentLineNb() + 1, $parsedItem, $this->filename); 292 } 293 294 $data += $parsedItem; // array union 295 } 296 } else { 297 // If the value associated with the key is a single mapping node, each of its key/value pairs is inserted into the 298 // current mapping, unless the key already exists in it. 299 $data += $parsed; // array union 300 } 301 } 302 } elseif ('<<' !== $key && isset($values['value']) && '&' === $values['value'][0] && self::preg_match(self::REFERENCE_PATTERN, $values['value'], $matches)) { 303 $isRef = $matches['ref']; 304 $this->refsBeingParsed[] = $isRef; 305 $values['value'] = $matches['value']; 306 } 307 308 $subTag = null; 309 if ($mergeNode) { 310 // Merge keys 311 } elseif (!isset($values['value']) || '' === $values['value'] || 0 === strpos($values['value'], '#') || (null !== $subTag = $this->getLineTag($values['value'], $flags)) || '<<' === $key) { 312 // hash 313 // if next line is less indented or equal, then it means that the current value is null 314 if (!$this->isNextLineIndented() && !$this->isNextLineUnIndentedCollection()) { 315 // Spec: Keys MUST be unique; first one wins. 316 // But overwriting is allowed when a merge node is used in current block. 317 if ($allowOverwrite || !isset($data[$key])) { 318 if (null !== $subTag) { 319 $data[$key] = new TaggedValue($subTag, ''); 320 } else { 321 $data[$key] = null; 322 } 323 } else { 324 throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $this->getRealCurrentLineNb() + 1, $this->currentLine); 325 } 326 } else { 327 // remember the parsed line number here in case we need it to provide some contexts in error messages below 328 $realCurrentLineNbKey = $this->getRealCurrentLineNb(); 329 $value = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(), $flags); 330 if ('<<' === $key) { 331 $this->refs[$refMatches['ref']] = $value; 332 333 if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $value instanceof \stdClass) { 334 $value = (array) $value; 335 } 336 337 $data += $value; 338 } elseif ($allowOverwrite || !isset($data[$key])) { 339 // Spec: Keys MUST be unique; first one wins. 340 // But overwriting is allowed when a merge node is used in current block. 341 if (null !== $subTag) { 342 $data[$key] = new TaggedValue($subTag, $value); 343 } else { 344 $data[$key] = $value; 345 } 346 } else { 347 throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $realCurrentLineNbKey + 1, $this->currentLine); 348 } 349 } 350 } else { 351 $value = $this->parseValue(rtrim($values['value']), $flags, $context); 352 // Spec: Keys MUST be unique; first one wins. 353 // But overwriting is allowed when a merge node is used in current block. 354 if ($allowOverwrite || !isset($data[$key])) { 355 $data[$key] = $value; 356 } else { 357 throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $this->getRealCurrentLineNb() + 1, $this->currentLine); 358 } 359 } 360 if ($isRef) { 361 $this->refs[$isRef] = $data[$key]; 362 array_pop($this->refsBeingParsed); 363 } 364 } elseif ('"' === $this->currentLine[0] || "'" === $this->currentLine[0]) { 365 if (null !== $context) { 366 throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 367 } 368 369 try { 370 return Inline::parse($this->lexInlineQuotedString(), $flags, $this->refs); 371 } catch (ParseException $e) { 372 $e->setParsedLine($this->getRealCurrentLineNb() + 1); 373 $e->setSnippet($this->currentLine); 374 375 throw $e; 376 } 377 } elseif ('{' === $this->currentLine[0]) { 378 if (null !== $context) { 379 throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 380 } 381 382 try { 383 $parsedMapping = Inline::parse($this->lexInlineMapping(), $flags, $this->refs); 384 385 while ($this->moveToNextLine()) { 386 if (!$this->isCurrentLineEmpty()) { 387 throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 388 } 389 } 390 391 return $parsedMapping; 392 } catch (ParseException $e) { 393 $e->setParsedLine($this->getRealCurrentLineNb() + 1); 394 $e->setSnippet($this->currentLine); 395 396 throw $e; 397 } 398 } elseif ('[' === $this->currentLine[0]) { 399 if (null !== $context) { 400 throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 401 } 402 403 try { 404 $parsedSequence = Inline::parse($this->lexInlineSequence(), $flags, $this->refs); 405 406 while ($this->moveToNextLine()) { 407 if (!$this->isCurrentLineEmpty()) { 408 throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 409 } 410 } 411 412 return $parsedSequence; 413 } catch (ParseException $e) { 414 $e->setParsedLine($this->getRealCurrentLineNb() + 1); 415 $e->setSnippet($this->currentLine); 416 417 throw $e; 418 } 419 } else { 420 // multiple documents are not supported 421 if ('---' === $this->currentLine) { 422 throw new ParseException('Multiple documents are not supported.', $this->currentLineNb + 1, $this->currentLine, $this->filename); 423 } 424 425 if ($deprecatedUsage = (isset($this->currentLine[1]) && '?' === $this->currentLine[0] && ' ' === $this->currentLine[1])) { 426 throw new ParseException('Complex mappings are not supported.', $this->getRealCurrentLineNb() + 1, $this->currentLine); 427 } 428 429 // 1-liner optionally followed by newline(s) 430 if (\is_string($value) && $this->lines[0] === trim($value)) { 431 try { 432 $value = Inline::parse($this->lines[0], $flags, $this->refs); 433 } catch (ParseException $e) { 434 $e->setParsedLine($this->getRealCurrentLineNb() + 1); 435 $e->setSnippet($this->currentLine); 436 437 throw $e; 438 } 439 440 return $value; 441 } 442 443 // try to parse the value as a multi-line string as a last resort 444 if (0 === $this->currentLineNb) { 445 $previousLineWasNewline = false; 446 $previousLineWasTerminatedWithBackslash = false; 447 $value = ''; 448 449 foreach ($this->lines as $line) { 450 if ('' !== ltrim($line) && '#' === ltrim($line)[0]) { 451 continue; 452 } 453 // If the indentation is not consistent at offset 0, it is to be considered as a ParseError 454 if (0 === $this->offset && !$deprecatedUsage && isset($line[0]) && ' ' === $line[0]) { 455 throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 456 } 457 458 if (false !== strpos($line, ': ')) { 459 @trigger_error('Support for mapping keys in multi-line blocks is deprecated since Symfony 4.3 and will throw a ParseException in 5.0.', \E_USER_DEPRECATED); 460 } 461 462 if ('' === trim($line)) { 463 $value .= "\n"; 464 } elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) { 465 $value .= ' '; 466 } 467 468 if ('' !== trim($line) && '\\' === substr($line, -1)) { 469 $value .= ltrim(substr($line, 0, -1)); 470 } elseif ('' !== trim($line)) { 471 $value .= trim($line); 472 } 473 474 if ('' === trim($line)) { 475 $previousLineWasNewline = true; 476 $previousLineWasTerminatedWithBackslash = false; 477 } elseif ('\\' === substr($line, -1)) { 478 $previousLineWasNewline = false; 479 $previousLineWasTerminatedWithBackslash = true; 480 } else { 481 $previousLineWasNewline = false; 482 $previousLineWasTerminatedWithBackslash = false; 483 } 484 } 485 486 try { 487 return Inline::parse(trim($value)); 488 } catch (ParseException $e) { 489 // fall-through to the ParseException thrown below 490 } 491 } 492 493 throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 494 } 495 } while ($this->moveToNextLine()); 496 497 if (null !== $tag) { 498 $data = new TaggedValue($tag, $data); 499 } 500 501 if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && !\is_object($data) && 'mapping' === $context) { 502 $object = new \stdClass(); 503 504 foreach ($data as $key => $value) { 505 $object->$key = $value; 506 } 507 508 $data = $object; 509 } 510 511 return empty($data) ? null : $data; 512 } 513 514 private function parseBlock(int $offset, string $yaml, int $flags) 515 { 516 $skippedLineNumbers = $this->skippedLineNumbers; 517 518 foreach ($this->locallySkippedLineNumbers as $lineNumber) { 519 if ($lineNumber < $offset) { 520 continue; 521 } 522 523 $skippedLineNumbers[] = $lineNumber; 524 } 525 526 $parser = new self(); 527 $parser->offset = $offset; 528 $parser->totalNumberOfLines = $this->totalNumberOfLines; 529 $parser->skippedLineNumbers = $skippedLineNumbers; 530 $parser->refs = &$this->refs; 531 $parser->refsBeingParsed = $this->refsBeingParsed; 532 533 return $parser->doParse($yaml, $flags); 534 } 535 536 /** 537 * Returns the current line number (takes the offset into account). 538 * 539 * @internal 540 * 541 * @return int The current line number 542 */ 543 public function getRealCurrentLineNb(): int 544 { 545 $realCurrentLineNumber = $this->currentLineNb + $this->offset; 546 547 foreach ($this->skippedLineNumbers as $skippedLineNumber) { 548 if ($skippedLineNumber > $realCurrentLineNumber) { 549 break; 550 } 551 552 ++$realCurrentLineNumber; 553 } 554 555 return $realCurrentLineNumber; 556 } 557 558 /** 559 * Returns the current line indentation. 560 * 561 * @return int The current line indentation 562 */ 563 private function getCurrentLineIndentation(): int 564 { 565 return \strlen($this->currentLine) - \strlen(ltrim($this->currentLine, ' ')); 566 } 567 568 /** 569 * Returns the next embed block of YAML. 570 * 571 * @param int|null $indentation The indent level at which the block is to be read, or null for default 572 * @param bool $inSequence True if the enclosing data structure is a sequence 573 * 574 * @return string A YAML string 575 * 576 * @throws ParseException When indentation problem are detected 577 */ 578 private function getNextEmbedBlock(int $indentation = null, bool $inSequence = false): string 579 { 580 $oldLineIndentation = $this->getCurrentLineIndentation(); 581 582 if (!$this->moveToNextLine()) { 583 return ''; 584 } 585 586 if (null === $indentation) { 587 $newIndent = null; 588 $movements = 0; 589 590 do { 591 $EOF = false; 592 593 // empty and comment-like lines do not influence the indentation depth 594 if ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()) { 595 $EOF = !$this->moveToNextLine(); 596 597 if (!$EOF) { 598 ++$movements; 599 } 600 } else { 601 $newIndent = $this->getCurrentLineIndentation(); 602 } 603 } while (!$EOF && null === $newIndent); 604 605 for ($i = 0; $i < $movements; ++$i) { 606 $this->moveToPreviousLine(); 607 } 608 609 $unindentedEmbedBlock = $this->isStringUnIndentedCollectionItem(); 610 611 if (!$this->isCurrentLineEmpty() && 0 === $newIndent && !$unindentedEmbedBlock) { 612 throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 613 } 614 } else { 615 $newIndent = $indentation; 616 } 617 618 $data = []; 619 620 if ($this->getCurrentLineIndentation() >= $newIndent) { 621 $data[] = substr($this->currentLine, $newIndent ?? 0); 622 } elseif ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()) { 623 $data[] = $this->currentLine; 624 } else { 625 $this->moveToPreviousLine(); 626 627 return ''; 628 } 629 630 if ($inSequence && $oldLineIndentation === $newIndent && isset($data[0][0]) && '-' === $data[0][0]) { 631 // the previous line contained a dash but no item content, this line is a sequence item with the same indentation 632 // and therefore no nested list or mapping 633 $this->moveToPreviousLine(); 634 635 return ''; 636 } 637 638 $isItUnindentedCollection = $this->isStringUnIndentedCollectionItem(); 639 $isItComment = $this->isCurrentLineComment(); 640 641 while ($this->moveToNextLine()) { 642 if ($isItComment && !$isItUnindentedCollection) { 643 $isItUnindentedCollection = $this->isStringUnIndentedCollectionItem(); 644 $isItComment = $this->isCurrentLineComment(); 645 } 646 647 $indent = $this->getCurrentLineIndentation(); 648 649 if ($isItUnindentedCollection && !$this->isCurrentLineEmpty() && !$this->isStringUnIndentedCollectionItem() && $newIndent === $indent) { 650 $this->moveToPreviousLine(); 651 break; 652 } 653 654 if ($this->isCurrentLineBlank()) { 655 $data[] = substr($this->currentLine, $newIndent); 656 continue; 657 } 658 659 if ($indent >= $newIndent) { 660 $data[] = substr($this->currentLine, $newIndent); 661 } elseif ($this->isCurrentLineComment()) { 662 $data[] = $this->currentLine; 663 } elseif (0 == $indent) { 664 $this->moveToPreviousLine(); 665 666 break; 667 } else { 668 throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 669 } 670 } 671 672 return implode("\n", $data); 673 } 674 675 private function hasMoreLines(): bool 676 { 677 return (\count($this->lines) - 1) > $this->currentLineNb; 678 } 679 680 /** 681 * Moves the parser to the next line. 682 */ 683 private function moveToNextLine(): bool 684 { 685 if ($this->currentLineNb >= \count($this->lines) - 1) { 686 return false; 687 } 688 689 $this->currentLine = $this->lines[++$this->currentLineNb]; 690 691 return true; 692 } 693 694 /** 695 * Moves the parser to the previous line. 696 */ 697 private function moveToPreviousLine(): bool 698 { 699 if ($this->currentLineNb < 1) { 700 return false; 701 } 702 703 $this->currentLine = $this->lines[--$this->currentLineNb]; 704 705 return true; 706 } 707 708 /** 709 * Parses a YAML value. 710 * 711 * @param string $value A YAML value 712 * @param int $flags A bit field of PARSE_* constants to customize the YAML parser behavior 713 * @param string $context The parser context (either sequence or mapping) 714 * 715 * @return mixed A PHP value 716 * 717 * @throws ParseException When reference does not exist 718 */ 719 private function parseValue(string $value, int $flags, string $context) 720 { 721 if (0 === strpos($value, '*')) { 722 if (false !== $pos = strpos($value, '#')) { 723 $value = substr($value, 1, $pos - 2); 724 } else { 725 $value = substr($value, 1); 726 } 727 728 if (!\array_key_exists($value, $this->refs)) { 729 if (false !== $pos = array_search($value, $this->refsBeingParsed, true)) { 730 throw new ParseException(sprintf('Circular reference [%s] detected for reference "%s".', implode(', ', array_merge(\array_slice($this->refsBeingParsed, $pos), [$value])), $value), $this->currentLineNb + 1, $this->currentLine, $this->filename); 731 } 732 733 throw new ParseException(sprintf('Reference "%s" does not exist.', $value), $this->currentLineNb + 1, $this->currentLine, $this->filename); 734 } 735 736 return $this->refs[$value]; 737 } 738 739 if (\in_array($value[0], ['!', '|', '>'], true) && self::preg_match('/^(?:'.self::TAG_PATTERN.' +)?'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) { 740 $modifiers = $matches['modifiers'] ?? ''; 741 742 $data = $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), abs((int) $modifiers)); 743 744 if ('' !== $matches['tag'] && '!' !== $matches['tag']) { 745 if ('!!binary' === $matches['tag']) { 746 return Inline::evaluateBinaryScalar($data); 747 } 748 749 return new TaggedValue(substr($matches['tag'], 1), $data); 750 } 751 752 return $data; 753 } 754 755 try { 756 if ('' !== $value && '{' === $value[0]) { 757 $cursor = \strlen(rtrim($this->currentLine)) - \strlen(rtrim($value)); 758 759 return Inline::parse($this->lexInlineMapping($cursor), $flags, $this->refs); 760 } elseif ('' !== $value && '[' === $value[0]) { 761 $cursor = \strlen(rtrim($this->currentLine)) - \strlen(rtrim($value)); 762 763 return Inline::parse($this->lexInlineSequence($cursor), $flags, $this->refs); 764 } 765 766 switch ($value[0] ?? '') { 767 case '"': 768 case "'": 769 $cursor = \strlen(rtrim($this->currentLine)) - \strlen(rtrim($value)); 770 $parsedValue = Inline::parse($this->lexInlineQuotedString($cursor), $flags, $this->refs); 771 772 if (isset($this->currentLine[$cursor]) && preg_replace('/\s*(#.*)?$/A', '', substr($this->currentLine, $cursor))) { 773 throw new ParseException(sprintf('Unexpected characters near "%s".', substr($this->currentLine, $cursor))); 774 } 775 776 return $parsedValue; 777 default: 778 $lines = []; 779 780 while ($this->moveToNextLine()) { 781 // unquoted strings end before the first unindented line 782 if (0 === $this->getCurrentLineIndentation()) { 783 $this->moveToPreviousLine(); 784 785 break; 786 } 787 788 $lines[] = trim($this->currentLine); 789 } 790 791 for ($i = 0, $linesCount = \count($lines), $previousLineBlank = false; $i < $linesCount; ++$i) { 792 if ('' === $lines[$i]) { 793 $value .= "\n"; 794 $previousLineBlank = true; 795 } elseif ($previousLineBlank) { 796 $value .= $lines[$i]; 797 $previousLineBlank = false; 798 } else { 799 $value .= ' '.$lines[$i]; 800 $previousLineBlank = false; 801 } 802 } 803 804 Inline::$parsedLineNumber = $this->getRealCurrentLineNb(); 805 806 $parsedValue = Inline::parse($value, $flags, $this->refs); 807 808 if ('mapping' === $context && \is_string($parsedValue) && '"' !== $value[0] && "'" !== $value[0] && '[' !== $value[0] && '{' !== $value[0] && '!' !== $value[0] && false !== strpos($parsedValue, ': ')) { 809 throw new ParseException('A colon cannot be used in an unquoted mapping value.', $this->getRealCurrentLineNb() + 1, $value, $this->filename); 810 } 811 812 return $parsedValue; 813 } 814 } catch (ParseException $e) { 815 $e->setParsedLine($this->getRealCurrentLineNb() + 1); 816 $e->setSnippet($this->currentLine); 817 818 throw $e; 819 } 820 } 821 822 /** 823 * Parses a block scalar. 824 * 825 * @param string $style The style indicator that was used to begin this block scalar (| or >) 826 * @param string $chomping The chomping indicator that was used to begin this block scalar (+ or -) 827 * @param int $indentation The indentation indicator that was used to begin this block scalar 828 */ 829 private function parseBlockScalar(string $style, string $chomping = '', int $indentation = 0): string 830 { 831 $notEOF = $this->moveToNextLine(); 832 if (!$notEOF) { 833 return ''; 834 } 835 836 $isCurrentLineBlank = $this->isCurrentLineBlank(); 837 $blockLines = []; 838 839 // leading blank lines are consumed before determining indentation 840 while ($notEOF && $isCurrentLineBlank) { 841 // newline only if not EOF 842 if ($notEOF = $this->moveToNextLine()) { 843 $blockLines[] = ''; 844 $isCurrentLineBlank = $this->isCurrentLineBlank(); 845 } 846 } 847 848 // determine indentation if not specified 849 if (0 === $indentation) { 850 $currentLineLength = \strlen($this->currentLine); 851 852 for ($i = 0; $i < $currentLineLength && ' ' === $this->currentLine[$i]; ++$i) { 853 ++$indentation; 854 } 855 } 856 857 if ($indentation > 0) { 858 $pattern = sprintf('/^ {%d}(.*)$/', $indentation); 859 860 while ( 861 $notEOF && ( 862 $isCurrentLineBlank || 863 self::preg_match($pattern, $this->currentLine, $matches) 864 ) 865 ) { 866 if ($isCurrentLineBlank && \strlen($this->currentLine) > $indentation) { 867 $blockLines[] = substr($this->currentLine, $indentation); 868 } elseif ($isCurrentLineBlank) { 869 $blockLines[] = ''; 870 } else { 871 $blockLines[] = $matches[1]; 872 } 873 874 // newline only if not EOF 875 if ($notEOF = $this->moveToNextLine()) { 876 $isCurrentLineBlank = $this->isCurrentLineBlank(); 877 } 878 } 879 } elseif ($notEOF) { 880 $blockLines[] = ''; 881 } 882 883 if ($notEOF) { 884 $blockLines[] = ''; 885 $this->moveToPreviousLine(); 886 } elseif (!$notEOF && !$this->isCurrentLineLastLineInDocument()) { 887 $blockLines[] = ''; 888 } 889 890 // folded style 891 if ('>' === $style) { 892 $text = ''; 893 $previousLineIndented = false; 894 $previousLineBlank = false; 895 896 for ($i = 0, $blockLinesCount = \count($blockLines); $i < $blockLinesCount; ++$i) { 897 if ('' === $blockLines[$i]) { 898 $text .= "\n"; 899 $previousLineIndented = false; 900 $previousLineBlank = true; 901 } elseif (' ' === $blockLines[$i][0]) { 902 $text .= "\n".$blockLines[$i]; 903 $previousLineIndented = true; 904 $previousLineBlank = false; 905 } elseif ($previousLineIndented) { 906 $text .= "\n".$blockLines[$i]; 907 $previousLineIndented = false; 908 $previousLineBlank = false; 909 } elseif ($previousLineBlank || 0 === $i) { 910 $text .= $blockLines[$i]; 911 $previousLineIndented = false; 912 $previousLineBlank = false; 913 } else { 914 $text .= ' '.$blockLines[$i]; 915 $previousLineIndented = false; 916 $previousLineBlank = false; 917 } 918 } 919 } else { 920 $text = implode("\n", $blockLines); 921 } 922 923 // deal with trailing newlines 924 if ('' === $chomping) { 925 $text = preg_replace('/\n+$/', "\n", $text); 926 } elseif ('-' === $chomping) { 927 $text = preg_replace('/\n+$/', '', $text); 928 } 929 930 return $text; 931 } 932 933 /** 934 * Returns true if the next line is indented. 935 * 936 * @return bool Returns true if the next line is indented, false otherwise 937 */ 938 private function isNextLineIndented(): bool 939 { 940 $currentIndentation = $this->getCurrentLineIndentation(); 941 $movements = 0; 942 943 do { 944 $EOF = !$this->moveToNextLine(); 945 946 if (!$EOF) { 947 ++$movements; 948 } 949 } while (!$EOF && ($this->isCurrentLineEmpty() || $this->isCurrentLineComment())); 950 951 if ($EOF) { 952 return false; 953 } 954 955 $ret = $this->getCurrentLineIndentation() > $currentIndentation; 956 957 for ($i = 0; $i < $movements; ++$i) { 958 $this->moveToPreviousLine(); 959 } 960 961 return $ret; 962 } 963 964 /** 965 * Returns true if the current line is blank or if it is a comment line. 966 * 967 * @return bool Returns true if the current line is empty or if it is a comment line, false otherwise 968 */ 969 private function isCurrentLineEmpty(): bool 970 { 971 return $this->isCurrentLineBlank() || $this->isCurrentLineComment(); 972 } 973 974 /** 975 * Returns true if the current line is blank. 976 * 977 * @return bool Returns true if the current line is blank, false otherwise 978 */ 979 private function isCurrentLineBlank(): bool 980 { 981 return '' == trim($this->currentLine, ' '); 982 } 983 984 /** 985 * Returns true if the current line is a comment line. 986 * 987 * @return bool Returns true if the current line is a comment line, false otherwise 988 */ 989 private function isCurrentLineComment(): bool 990 { 991 // checking explicitly the first char of the trim is faster than loops or strpos 992 $ltrimmedLine = ltrim($this->currentLine, ' '); 993 994 return '' !== $ltrimmedLine && '#' === $ltrimmedLine[0]; 995 } 996 997 private function isCurrentLineLastLineInDocument(): bool 998 { 999 return ($this->offset + $this->currentLineNb) >= ($this->totalNumberOfLines - 1); 1000 } 1001 1002 /** 1003 * Cleanups a YAML string to be parsed. 1004 * 1005 * @param string $value The input YAML string 1006 * 1007 * @return string A cleaned up YAML string 1008 */ 1009 private function cleanup(string $value): string 1010 { 1011 $value = str_replace(["\r\n", "\r"], "\n", $value); 1012 1013 // strip YAML header 1014 $count = 0; 1015 $value = preg_replace('#^\%YAML[: ][\d\.]+.*\n#u', '', $value, -1, $count); 1016 $this->offset += $count; 1017 1018 // remove leading comments 1019 $trimmedValue = preg_replace('#^(\#.*?\n)+#s', '', $value, -1, $count); 1020 if (1 === $count) { 1021 // items have been removed, update the offset 1022 $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n"); 1023 $value = $trimmedValue; 1024 } 1025 1026 // remove start of the document marker (---) 1027 $trimmedValue = preg_replace('#^\-\-\-.*?\n#s', '', $value, -1, $count); 1028 if (1 === $count) { 1029 // items have been removed, update the offset 1030 $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n"); 1031 $value = $trimmedValue; 1032 1033 // remove end of the document marker (...) 1034 $value = preg_replace('#\.\.\.\s*$#', '', $value); 1035 } 1036 1037 return $value; 1038 } 1039 1040 /** 1041 * Returns true if the next line starts unindented collection. 1042 * 1043 * @return bool Returns true if the next line starts unindented collection, false otherwise 1044 */ 1045 private function isNextLineUnIndentedCollection(): bool 1046 { 1047 $currentIndentation = $this->getCurrentLineIndentation(); 1048 $movements = 0; 1049 1050 do { 1051 $EOF = !$this->moveToNextLine(); 1052 1053 if (!$EOF) { 1054 ++$movements; 1055 } 1056 } while (!$EOF && ($this->isCurrentLineEmpty() || $this->isCurrentLineComment())); 1057 1058 if ($EOF) { 1059 return false; 1060 } 1061 1062 $ret = $this->getCurrentLineIndentation() === $currentIndentation && $this->isStringUnIndentedCollectionItem(); 1063 1064 for ($i = 0; $i < $movements; ++$i) { 1065 $this->moveToPreviousLine(); 1066 } 1067 1068 return $ret; 1069 } 1070 1071 /** 1072 * Returns true if the string is un-indented collection item. 1073 * 1074 * @return bool Returns true if the string is un-indented collection item, false otherwise 1075 */ 1076 private function isStringUnIndentedCollectionItem(): bool 1077 { 1078 return '-' === rtrim($this->currentLine) || 0 === strpos($this->currentLine, '- '); 1079 } 1080 1081 /** 1082 * A local wrapper for "preg_match" which will throw a ParseException if there 1083 * is an internal error in the PCRE engine. 1084 * 1085 * This avoids us needing to check for "false" every time PCRE is used 1086 * in the YAML engine 1087 * 1088 * @throws ParseException on a PCRE internal error 1089 * 1090 * @see preg_last_error() 1091 * 1092 * @internal 1093 */ 1094 public static function preg_match(string $pattern, string $subject, array &$matches = null, int $flags = 0, int $offset = 0): int 1095 { 1096 if (false === $ret = preg_match($pattern, $subject, $matches, $flags, $offset)) { 1097 switch (preg_last_error()) { 1098 case \PREG_INTERNAL_ERROR: 1099 $error = 'Internal PCRE error.'; 1100 break; 1101 case \PREG_BACKTRACK_LIMIT_ERROR: 1102 $error = 'pcre.backtrack_limit reached.'; 1103 break; 1104 case \PREG_RECURSION_LIMIT_ERROR: 1105 $error = 'pcre.recursion_limit reached.'; 1106 break; 1107 case \PREG_BAD_UTF8_ERROR: 1108 $error = 'Malformed UTF-8 data.'; 1109 break; 1110 case \PREG_BAD_UTF8_OFFSET_ERROR: 1111 $error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.'; 1112 break; 1113 default: 1114 $error = 'Error.'; 1115 } 1116 1117 throw new ParseException($error); 1118 } 1119 1120 return $ret; 1121 } 1122 1123 /** 1124 * Trim the tag on top of the value. 1125 * 1126 * Prevent values such as "!foo {quz: bar}" to be considered as 1127 * a mapping block. 1128 */ 1129 private function trimTag(string $value): string 1130 { 1131 if ('!' === $value[0]) { 1132 return ltrim(substr($value, 1, strcspn($value, " \r\n", 1)), ' '); 1133 } 1134 1135 return $value; 1136 } 1137 1138 private function getLineTag(string $value, int $flags, bool $nextLineCheck = true): ?string 1139 { 1140 if ('' === $value || '!' !== $value[0] || 1 !== self::preg_match('/^'.self::TAG_PATTERN.' *( +#.*)?$/', $value, $matches)) { 1141 return null; 1142 } 1143 1144 if ($nextLineCheck && !$this->isNextLineIndented()) { 1145 return null; 1146 } 1147 1148 $tag = substr($matches['tag'], 1); 1149 1150 // Built-in tags 1151 if ($tag && '!' === $tag[0]) { 1152 throw new ParseException(sprintf('The built-in tag "!%s" is not implemented.', $tag), $this->getRealCurrentLineNb() + 1, $value, $this->filename); 1153 } 1154 1155 if (Yaml::PARSE_CUSTOM_TAGS & $flags) { 1156 return $tag; 1157 } 1158 1159 throw new ParseException(sprintf('Tags support is not enabled. You must use the flag "Yaml::PARSE_CUSTOM_TAGS" to use "%s".', $matches['tag']), $this->getRealCurrentLineNb() + 1, $value, $this->filename); 1160 } 1161 1162 private function lexInlineQuotedString(int &$cursor = 0): string 1163 { 1164 $quotation = $this->currentLine[$cursor]; 1165 $value = $quotation; 1166 ++$cursor; 1167 1168 $previousLineWasNewline = true; 1169 $previousLineWasTerminatedWithBackslash = false; 1170 $lineNumber = 0; 1171 1172 do { 1173 if (++$lineNumber > 1) { 1174 $cursor += strspn($this->currentLine, ' ', $cursor); 1175 } 1176 1177 if ($this->isCurrentLineBlank()) { 1178 $value .= "\n"; 1179 } elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) { 1180 $value .= ' '; 1181 } 1182 1183 for (; \strlen($this->currentLine) > $cursor; ++$cursor) { 1184 switch ($this->currentLine[$cursor]) { 1185 case '\\': 1186 if ("'" === $quotation) { 1187 $value .= '\\'; 1188 } elseif (isset($this->currentLine[++$cursor])) { 1189 $value .= '\\'.$this->currentLine[$cursor]; 1190 } 1191 1192 break; 1193 case $quotation: 1194 ++$cursor; 1195 1196 if ("'" === $quotation && isset($this->currentLine[$cursor]) && "'" === $this->currentLine[$cursor]) { 1197 $value .= "''"; 1198 break; 1199 } 1200 1201 return $value.$quotation; 1202 default: 1203 $value .= $this->currentLine[$cursor]; 1204 } 1205 } 1206 1207 if ($this->isCurrentLineBlank()) { 1208 $previousLineWasNewline = true; 1209 $previousLineWasTerminatedWithBackslash = false; 1210 } elseif ('\\' === $this->currentLine[-1]) { 1211 $previousLineWasNewline = false; 1212 $previousLineWasTerminatedWithBackslash = true; 1213 } else { 1214 $previousLineWasNewline = false; 1215 $previousLineWasTerminatedWithBackslash = false; 1216 } 1217 1218 if ($this->hasMoreLines()) { 1219 $cursor = 0; 1220 } 1221 } while ($this->moveToNextLine()); 1222 1223 throw new ParseException('Malformed inline YAML string.'); 1224 } 1225 1226 private function lexUnquotedString(int &$cursor): string 1227 { 1228 $offset = $cursor; 1229 $cursor += strcspn($this->currentLine, '[]{},: ', $cursor); 1230 1231 if ($cursor === $offset) { 1232 throw new ParseException('Malformed unquoted YAML string.'); 1233 } 1234 1235 return substr($this->currentLine, $offset, $cursor - $offset); 1236 } 1237 1238 private function lexInlineMapping(int &$cursor = 0): string 1239 { 1240 return $this->lexInlineStructure($cursor, '}'); 1241 } 1242 1243 private function lexInlineSequence(int &$cursor = 0): string 1244 { 1245 return $this->lexInlineStructure($cursor, ']'); 1246 } 1247 1248 private function lexInlineStructure(int &$cursor, string $closingTag): string 1249 { 1250 $value = $this->currentLine[$cursor]; 1251 ++$cursor; 1252 1253 do { 1254 $this->consumeWhitespaces($cursor); 1255 1256 while (isset($this->currentLine[$cursor])) { 1257 switch ($this->currentLine[$cursor]) { 1258 case '"': 1259 case "'": 1260 $value .= $this->lexInlineQuotedString($cursor); 1261 break; 1262 case ':': 1263 case ',': 1264 $value .= $this->currentLine[$cursor]; 1265 ++$cursor; 1266 break; 1267 case '{': 1268 $value .= $this->lexInlineMapping($cursor); 1269 break; 1270 case '[': 1271 $value .= $this->lexInlineSequence($cursor); 1272 break; 1273 case $closingTag: 1274 $value .= $this->currentLine[$cursor]; 1275 ++$cursor; 1276 1277 return $value; 1278 case '#': 1279 break 2; 1280 default: 1281 $value .= $this->lexUnquotedString($cursor); 1282 } 1283 1284 if ($this->consumeWhitespaces($cursor)) { 1285 $value .= ' '; 1286 } 1287 } 1288 1289 if ($this->hasMoreLines()) { 1290 $cursor = 0; 1291 } 1292 } while ($this->moveToNextLine()); 1293 1294 throw new ParseException('Malformed inline YAML string.'); 1295 } 1296 1297 private function consumeWhitespaces(int &$cursor): bool 1298 { 1299 $whitespacesConsumed = 0; 1300 1301 do { 1302 $whitespaceOnlyTokenLength = strspn($this->currentLine, ' ', $cursor); 1303 $whitespacesConsumed += $whitespaceOnlyTokenLength; 1304 $cursor += $whitespaceOnlyTokenLength; 1305 1306 if (isset($this->currentLine[$cursor])) { 1307 return 0 < $whitespacesConsumed; 1308 } 1309 1310 if ($this->hasMoreLines()) { 1311 $cursor = 0; 1312 } 1313 } while ($this->moveToNextLine()); 1314 1315 return 0 < $whitespacesConsumed; 1316 } 1317} 1318