1<?php 2 3/* 4 * This file is part of the Symfony package. 5 * 6 * (c) Fabien Potencier <fabien@symfony.com> 7 * 8 * For the full copyright and license information, please view the LICENSE 9 * file that was distributed with this source code. 10 */ 11 12namespace Symfony\Component\Yaml; 13 14use Symfony\Component\Yaml\Exception\ParseException; 15use Symfony\Component\Yaml\Tag\TaggedValue; 16 17/** 18 * Parser parses YAML strings to convert them to PHP arrays. 19 * 20 * @author Fabien Potencier <fabien@symfony.com> 21 * 22 * @final 23 */ 24class Parser 25{ 26 public const TAG_PATTERN = '(?P<tag>![\w!.\/:-]+)'; 27 public const BLOCK_SCALAR_HEADER_PATTERN = '(?P<separator>\||>)(?P<modifiers>\+|\-|\d+|\+\d+|\-\d+|\d+\+|\d+\-)?(?P<comments> +#.*)?'; 28 public const REFERENCE_PATTERN = '#^&(?P<ref>[^ ]++) *+(?P<value>.*)#u'; 29 30 private $filename; 31 private $offset = 0; 32 private $numberOfParsedLines = 0; 33 private $totalNumberOfLines; 34 private $lines = []; 35 private $currentLineNb = -1; 36 private $currentLine = ''; 37 private $refs = []; 38 private $skippedLineNumbers = []; 39 private $locallySkippedLineNumbers = []; 40 private $refsBeingParsed = []; 41 42 /** 43 * Parses a YAML file into a PHP value. 44 * 45 * @param string $filename The path to the YAML file to be parsed 46 * @param int $flags A bit field of Yaml::PARSE_* constants to customize the YAML parser behavior 47 * 48 * @return mixed 49 * 50 * @throws ParseException If the file could not be read or the YAML is not valid 51 */ 52 public function parseFile(string $filename, int $flags = 0) 53 { 54 if (!is_file($filename)) { 55 throw new ParseException(sprintf('File "%s" does not exist.', $filename)); 56 } 57 58 if (!is_readable($filename)) { 59 throw new ParseException(sprintf('File "%s" cannot be read.', $filename)); 60 } 61 62 $this->filename = $filename; 63 64 try { 65 return $this->parse(file_get_contents($filename), $flags); 66 } finally { 67 $this->filename = null; 68 } 69 } 70 71 /** 72 * Parses a YAML string to a PHP value. 73 * 74 * @param string $value A YAML string 75 * @param int $flags A bit field of Yaml::PARSE_* constants to customize the YAML parser behavior 76 * 77 * @return mixed 78 * 79 * @throws ParseException If the YAML is not valid 80 */ 81 public function parse(string $value, int $flags = 0) 82 { 83 if (false === preg_match('//u', $value)) { 84 throw new ParseException('The YAML value does not appear to be valid UTF-8.', -1, null, $this->filename); 85 } 86 87 $this->refs = []; 88 89 $mbEncoding = null; 90 91 if (2 /* MB_OVERLOAD_STRING */ & (int) \ini_get('mbstring.func_overload')) { 92 $mbEncoding = mb_internal_encoding(); 93 mb_internal_encoding('UTF-8'); 94 } 95 96 try { 97 $data = $this->doParse($value, $flags); 98 } finally { 99 if (null !== $mbEncoding) { 100 mb_internal_encoding($mbEncoding); 101 } 102 $this->refsBeingParsed = []; 103 $this->offset = 0; 104 $this->lines = []; 105 $this->currentLine = ''; 106 $this->numberOfParsedLines = 0; 107 $this->refs = []; 108 $this->skippedLineNumbers = []; 109 $this->locallySkippedLineNumbers = []; 110 $this->totalNumberOfLines = null; 111 } 112 113 return $data; 114 } 115 116 private function doParse(string $value, int $flags) 117 { 118 $this->currentLineNb = -1; 119 $this->currentLine = ''; 120 $value = $this->cleanup($value); 121 $this->lines = explode("\n", $value); 122 $this->numberOfParsedLines = \count($this->lines); 123 $this->locallySkippedLineNumbers = []; 124 125 if (null === $this->totalNumberOfLines) { 126 $this->totalNumberOfLines = $this->numberOfParsedLines; 127 } 128 129 if (!$this->moveToNextLine()) { 130 return null; 131 } 132 133 $data = []; 134 $context = null; 135 $allowOverwrite = false; 136 137 while ($this->isCurrentLineEmpty()) { 138 if (!$this->moveToNextLine()) { 139 return null; 140 } 141 } 142 143 // Resolves the tag and returns if end of the document 144 if (null !== ($tag = $this->getLineTag($this->currentLine, $flags, false)) && !$this->moveToNextLine()) { 145 return new TaggedValue($tag, ''); 146 } 147 148 do { 149 if ($this->isCurrentLineEmpty()) { 150 continue; 151 } 152 153 // tab? 154 if ("\t" === $this->currentLine[0]) { 155 throw new ParseException('A YAML file cannot contain tabs as indentation.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 156 } 157 158 Inline::initialize($flags, $this->getRealCurrentLineNb(), $this->filename); 159 160 $isRef = $mergeNode = false; 161 if ('-' === $this->currentLine[0] && self::preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+))?$#u', rtrim($this->currentLine), $values)) { 162 if ($context && 'mapping' == $context) { 163 throw new ParseException('You cannot define a sequence item when in a mapping.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 164 } 165 $context = 'sequence'; 166 167 if (isset($values['value']) && '&' === $values['value'][0] && self::preg_match(self::REFERENCE_PATTERN, $values['value'], $matches)) { 168 $isRef = $matches['ref']; 169 $this->refsBeingParsed[] = $isRef; 170 $values['value'] = $matches['value']; 171 } 172 173 if (isset($values['value'][1]) && '?' === $values['value'][0] && ' ' === $values['value'][1]) { 174 throw new ParseException('Complex mappings are not supported.', $this->getRealCurrentLineNb() + 1, $this->currentLine); 175 } 176 177 // array 178 if (isset($values['value']) && 0 === strpos(ltrim($values['value'], ' '), '-')) { 179 // Inline first child 180 $currentLineNumber = $this->getRealCurrentLineNb(); 181 182 $sequenceIndentation = \strlen($values['leadspaces']) + 1; 183 $sequenceYaml = substr($this->currentLine, $sequenceIndentation); 184 $sequenceYaml .= "\n".$this->getNextEmbedBlock($sequenceIndentation, true); 185 186 $data[] = $this->parseBlock($currentLineNumber, rtrim($sequenceYaml), $flags); 187 } elseif (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#')) { 188 $data[] = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true) ?? '', $flags); 189 } elseif (null !== $subTag = $this->getLineTag(ltrim($values['value'], ' '), $flags)) { 190 $data[] = new TaggedValue( 191 $subTag, 192 $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true), $flags) 193 ); 194 } else { 195 if ( 196 isset($values['leadspaces']) 197 && ( 198 '!' === $values['value'][0] 199 || self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->trimTag($values['value']), $matches) 200 ) 201 ) { 202 // this is a compact notation element, add to next block and parse 203 $block = $values['value']; 204 if ($this->isNextLineIndented()) { 205 $block .= "\n".$this->getNextEmbedBlock($this->getCurrentLineIndentation() + \strlen($values['leadspaces']) + 1); 206 } 207 208 $data[] = $this->parseBlock($this->getRealCurrentLineNb(), $block, $flags); 209 } else { 210 $data[] = $this->parseValue($values['value'], $flags, $context); 211 } 212 } 213 if ($isRef) { 214 $this->refs[$isRef] = end($data); 215 array_pop($this->refsBeingParsed); 216 } 217 } elseif ( 218 self::preg_match('#^(?P<key>(?:![^\s]++\s++)?(?:'.Inline::REGEX_QUOTED_STRING.'|(?:!?!php/const:)?[^ \'"\[\{!].*?)) *\:(( |\t)++(?P<value>.+))?$#u', rtrim($this->currentLine), $values) 219 && (false === strpos($values['key'], ' #') || \in_array($values['key'][0], ['"', "'"])) 220 ) { 221 if ($context && 'sequence' == $context) { 222 throw new ParseException('You cannot define a mapping item when in a sequence.', $this->currentLineNb + 1, $this->currentLine, $this->filename); 223 } 224 $context = 'mapping'; 225 226 try { 227 $key = Inline::parseScalar($values['key']); 228 } catch (ParseException $e) { 229 $e->setParsedLine($this->getRealCurrentLineNb() + 1); 230 $e->setSnippet($this->currentLine); 231 232 throw $e; 233 } 234 235 if (!\is_string($key) && !\is_int($key)) { 236 throw new ParseException((is_numeric($key) ? 'Numeric' : 'Non-string').' keys are not supported. Quote your evaluable mapping keys instead.', $this->getRealCurrentLineNb() + 1, $this->currentLine); 237 } 238 239 // Convert float keys to strings, to avoid being converted to integers by PHP 240 if (\is_float($key)) { 241 $key = (string) $key; 242 } 243 244 if ('<<' === $key && (!isset($values['value']) || '&' !== $values['value'][0] || !self::preg_match('#^&(?P<ref>[^ ]+)#u', $values['value'], $refMatches))) { 245 $mergeNode = true; 246 $allowOverwrite = true; 247 if (isset($values['value'][0]) && '*' === $values['value'][0]) { 248 $refName = substr(rtrim($values['value']), 1); 249 if (!\array_key_exists($refName, $this->refs)) { 250 if (false !== $pos = array_search($refName, $this->refsBeingParsed, true)) { 251 throw new ParseException(sprintf('Circular reference [%s] detected for reference "%s".', implode(', ', array_merge(\array_slice($this->refsBeingParsed, $pos), [$refName])), $refName), $this->currentLineNb + 1, $this->currentLine, $this->filename); 252 } 253 254 throw new ParseException(sprintf('Reference "%s" does not exist.', $refName), $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 255 } 256 257 $refValue = $this->refs[$refName]; 258 259 if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $refValue instanceof \stdClass) { 260 $refValue = (array) $refValue; 261 } 262 263 if (!\is_array($refValue)) { 264 throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 265 } 266 267 $data += $refValue; // array union 268 } else { 269 if (isset($values['value']) && '' !== $values['value']) { 270 $value = $values['value']; 271 } else { 272 $value = $this->getNextEmbedBlock(); 273 } 274 $parsed = $this->parseBlock($this->getRealCurrentLineNb() + 1, $value, $flags); 275 276 if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $parsed instanceof \stdClass) { 277 $parsed = (array) $parsed; 278 } 279 280 if (!\is_array($parsed)) { 281 throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 282 } 283 284 if (isset($parsed[0])) { 285 // If the value associated with the merge key is a sequence, then this sequence is expected to contain mapping nodes 286 // and each of these nodes is merged in turn according to its order in the sequence. Keys in mapping nodes earlier 287 // in the sequence override keys specified in later mapping nodes. 288 foreach ($parsed as $parsedItem) { 289 if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $parsedItem instanceof \stdClass) { 290 $parsedItem = (array) $parsedItem; 291 } 292 293 if (!\is_array($parsedItem)) { 294 throw new ParseException('Merge items must be arrays.', $this->getRealCurrentLineNb() + 1, $parsedItem, $this->filename); 295 } 296 297 $data += $parsedItem; // array union 298 } 299 } else { 300 // If the value associated with the key is a single mapping node, each of its key/value pairs is inserted into the 301 // current mapping, unless the key already exists in it. 302 $data += $parsed; // array union 303 } 304 } 305 } elseif ('<<' !== $key && isset($values['value']) && '&' === $values['value'][0] && self::preg_match(self::REFERENCE_PATTERN, $values['value'], $matches)) { 306 $isRef = $matches['ref']; 307 $this->refsBeingParsed[] = $isRef; 308 $values['value'] = $matches['value']; 309 } 310 311 $subTag = null; 312 if ($mergeNode) { 313 // Merge keys 314 } elseif (!isset($values['value']) || '' === $values['value'] || 0 === strpos($values['value'], '#') || (null !== $subTag = $this->getLineTag($values['value'], $flags)) || '<<' === $key) { 315 // hash 316 // if next line is less indented or equal, then it means that the current value is null 317 if (!$this->isNextLineIndented() && !$this->isNextLineUnIndentedCollection()) { 318 // Spec: Keys MUST be unique; first one wins. 319 // But overwriting is allowed when a merge node is used in current block. 320 if ($allowOverwrite || !isset($data[$key])) { 321 if (null !== $subTag) { 322 $data[$key] = new TaggedValue($subTag, ''); 323 } else { 324 $data[$key] = null; 325 } 326 } else { 327 throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $this->getRealCurrentLineNb() + 1, $this->currentLine); 328 } 329 } else { 330 // remember the parsed line number here in case we need it to provide some contexts in error messages below 331 $realCurrentLineNbKey = $this->getRealCurrentLineNb(); 332 $value = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(), $flags); 333 if ('<<' === $key) { 334 $this->refs[$refMatches['ref']] = $value; 335 336 if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $value instanceof \stdClass) { 337 $value = (array) $value; 338 } 339 340 $data += $value; 341 } elseif ($allowOverwrite || !isset($data[$key])) { 342 // Spec: Keys MUST be unique; first one wins. 343 // But overwriting is allowed when a merge node is used in current block. 344 if (null !== $subTag) { 345 $data[$key] = new TaggedValue($subTag, $value); 346 } else { 347 $data[$key] = $value; 348 } 349 } else { 350 throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $realCurrentLineNbKey + 1, $this->currentLine); 351 } 352 } 353 } else { 354 $value = $this->parseValue(rtrim($values['value']), $flags, $context); 355 // Spec: Keys MUST be unique; first one wins. 356 // But overwriting is allowed when a merge node is used in current block. 357 if ($allowOverwrite || !isset($data[$key])) { 358 $data[$key] = $value; 359 } else { 360 throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $this->getRealCurrentLineNb() + 1, $this->currentLine); 361 } 362 } 363 if ($isRef) { 364 $this->refs[$isRef] = $data[$key]; 365 array_pop($this->refsBeingParsed); 366 } 367 } elseif ('"' === $this->currentLine[0] || "'" === $this->currentLine[0]) { 368 if (null !== $context) { 369 throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 370 } 371 372 try { 373 return Inline::parse($this->lexInlineQuotedString(), $flags, $this->refs); 374 } catch (ParseException $e) { 375 $e->setParsedLine($this->getRealCurrentLineNb() + 1); 376 $e->setSnippet($this->currentLine); 377 378 throw $e; 379 } 380 } elseif ('{' === $this->currentLine[0]) { 381 if (null !== $context) { 382 throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 383 } 384 385 try { 386 $parsedMapping = Inline::parse($this->lexInlineMapping(), $flags, $this->refs); 387 388 while ($this->moveToNextLine()) { 389 if (!$this->isCurrentLineEmpty()) { 390 throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 391 } 392 } 393 394 return $parsedMapping; 395 } catch (ParseException $e) { 396 $e->setParsedLine($this->getRealCurrentLineNb() + 1); 397 $e->setSnippet($this->currentLine); 398 399 throw $e; 400 } 401 } elseif ('[' === $this->currentLine[0]) { 402 if (null !== $context) { 403 throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 404 } 405 406 try { 407 $parsedSequence = Inline::parse($this->lexInlineSequence(), $flags, $this->refs); 408 409 while ($this->moveToNextLine()) { 410 if (!$this->isCurrentLineEmpty()) { 411 throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 412 } 413 } 414 415 return $parsedSequence; 416 } catch (ParseException $e) { 417 $e->setParsedLine($this->getRealCurrentLineNb() + 1); 418 $e->setSnippet($this->currentLine); 419 420 throw $e; 421 } 422 } else { 423 // multiple documents are not supported 424 if ('---' === $this->currentLine) { 425 throw new ParseException('Multiple documents are not supported.', $this->currentLineNb + 1, $this->currentLine, $this->filename); 426 } 427 428 if ($deprecatedUsage = (isset($this->currentLine[1]) && '?' === $this->currentLine[0] && ' ' === $this->currentLine[1])) { 429 throw new ParseException('Complex mappings are not supported.', $this->getRealCurrentLineNb() + 1, $this->currentLine); 430 } 431 432 // 1-liner optionally followed by newline(s) 433 if (\is_string($value) && $this->lines[0] === trim($value)) { 434 try { 435 $value = Inline::parse($this->lines[0], $flags, $this->refs); 436 } catch (ParseException $e) { 437 $e->setParsedLine($this->getRealCurrentLineNb() + 1); 438 $e->setSnippet($this->currentLine); 439 440 throw $e; 441 } 442 443 return $value; 444 } 445 446 // try to parse the value as a multi-line string as a last resort 447 if (0 === $this->currentLineNb) { 448 $previousLineWasNewline = false; 449 $previousLineWasTerminatedWithBackslash = false; 450 $value = ''; 451 452 foreach ($this->lines as $line) { 453 $trimmedLine = trim($line); 454 if ('#' === ($trimmedLine[0] ?? '')) { 455 continue; 456 } 457 // If the indentation is not consistent at offset 0, it is to be considered as a ParseError 458 if (0 === $this->offset && !$deprecatedUsage && isset($line[0]) && ' ' === $line[0]) { 459 throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 460 } 461 462 if (false !== strpos($line, ': ')) { 463 throw new ParseException('Mapping values are not allowed in multi-line blocks.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 464 } 465 466 if ('' === $trimmedLine) { 467 $value .= "\n"; 468 } elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) { 469 $value .= ' '; 470 } 471 472 if ('' !== $trimmedLine && '\\' === substr($line, -1)) { 473 $value .= ltrim(substr($line, 0, -1)); 474 } elseif ('' !== $trimmedLine) { 475 $value .= $trimmedLine; 476 } 477 478 if ('' === $trimmedLine) { 479 $previousLineWasNewline = true; 480 $previousLineWasTerminatedWithBackslash = false; 481 } elseif ('\\' === substr($line, -1)) { 482 $previousLineWasNewline = false; 483 $previousLineWasTerminatedWithBackslash = true; 484 } else { 485 $previousLineWasNewline = false; 486 $previousLineWasTerminatedWithBackslash = false; 487 } 488 } 489 490 try { 491 return Inline::parse(trim($value)); 492 } catch (ParseException $e) { 493 // fall-through to the ParseException thrown below 494 } 495 } 496 497 throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 498 } 499 } while ($this->moveToNextLine()); 500 501 if (null !== $tag) { 502 $data = new TaggedValue($tag, $data); 503 } 504 505 if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && 'mapping' === $context && !\is_object($data)) { 506 $object = new \stdClass(); 507 508 foreach ($data as $key => $value) { 509 $object->$key = $value; 510 } 511 512 $data = $object; 513 } 514 515 return empty($data) ? null : $data; 516 } 517 518 private function parseBlock(int $offset, string $yaml, int $flags) 519 { 520 $skippedLineNumbers = $this->skippedLineNumbers; 521 522 foreach ($this->locallySkippedLineNumbers as $lineNumber) { 523 if ($lineNumber < $offset) { 524 continue; 525 } 526 527 $skippedLineNumbers[] = $lineNumber; 528 } 529 530 $parser = new self(); 531 $parser->offset = $offset; 532 $parser->totalNumberOfLines = $this->totalNumberOfLines; 533 $parser->skippedLineNumbers = $skippedLineNumbers; 534 $parser->refs = &$this->refs; 535 $parser->refsBeingParsed = $this->refsBeingParsed; 536 537 return $parser->doParse($yaml, $flags); 538 } 539 540 /** 541 * Returns the current line number (takes the offset into account). 542 * 543 * @internal 544 */ 545 public function getRealCurrentLineNb(): int 546 { 547 $realCurrentLineNumber = $this->currentLineNb + $this->offset; 548 549 foreach ($this->skippedLineNumbers as $skippedLineNumber) { 550 if ($skippedLineNumber > $realCurrentLineNumber) { 551 break; 552 } 553 554 ++$realCurrentLineNumber; 555 } 556 557 return $realCurrentLineNumber; 558 } 559 560 /** 561 * Returns the current line indentation. 562 */ 563 private function getCurrentLineIndentation(): int 564 { 565 if (' ' !== ($this->currentLine[0] ?? '')) { 566 return 0; 567 } 568 569 return \strlen($this->currentLine) - \strlen(ltrim($this->currentLine, ' ')); 570 } 571 572 /** 573 * Returns the next embed block of YAML. 574 * 575 * @param int|null $indentation The indent level at which the block is to be read, or null for default 576 * @param bool $inSequence True if the enclosing data structure is a sequence 577 * 578 * @throws ParseException When indentation problem are detected 579 */ 580 private function getNextEmbedBlock(int $indentation = null, bool $inSequence = false): string 581 { 582 $oldLineIndentation = $this->getCurrentLineIndentation(); 583 584 if (!$this->moveToNextLine()) { 585 return ''; 586 } 587 588 if (null === $indentation) { 589 $newIndent = null; 590 $movements = 0; 591 592 do { 593 $EOF = false; 594 595 // empty and comment-like lines do not influence the indentation depth 596 if ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()) { 597 $EOF = !$this->moveToNextLine(); 598 599 if (!$EOF) { 600 ++$movements; 601 } 602 } else { 603 $newIndent = $this->getCurrentLineIndentation(); 604 } 605 } while (!$EOF && null === $newIndent); 606 607 for ($i = 0; $i < $movements; ++$i) { 608 $this->moveToPreviousLine(); 609 } 610 611 $unindentedEmbedBlock = $this->isStringUnIndentedCollectionItem(); 612 613 if (!$this->isCurrentLineEmpty() && 0 === $newIndent && !$unindentedEmbedBlock) { 614 throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 615 } 616 } else { 617 $newIndent = $indentation; 618 } 619 620 $data = []; 621 622 if ($this->getCurrentLineIndentation() >= $newIndent) { 623 $data[] = substr($this->currentLine, $newIndent ?? 0); 624 } elseif ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()) { 625 $data[] = $this->currentLine; 626 } else { 627 $this->moveToPreviousLine(); 628 629 return ''; 630 } 631 632 if ($inSequence && $oldLineIndentation === $newIndent && isset($data[0][0]) && '-' === $data[0][0]) { 633 // the previous line contained a dash but no item content, this line is a sequence item with the same indentation 634 // and therefore no nested list or mapping 635 $this->moveToPreviousLine(); 636 637 return ''; 638 } 639 640 $isItUnindentedCollection = $this->isStringUnIndentedCollectionItem(); 641 $isItComment = $this->isCurrentLineComment(); 642 643 while ($this->moveToNextLine()) { 644 if ($isItComment && !$isItUnindentedCollection) { 645 $isItUnindentedCollection = $this->isStringUnIndentedCollectionItem(); 646 $isItComment = $this->isCurrentLineComment(); 647 } 648 649 $indent = $this->getCurrentLineIndentation(); 650 651 if ($isItUnindentedCollection && !$this->isCurrentLineEmpty() && !$this->isStringUnIndentedCollectionItem() && $newIndent === $indent) { 652 $this->moveToPreviousLine(); 653 break; 654 } 655 656 if ($this->isCurrentLineBlank()) { 657 $data[] = substr($this->currentLine, $newIndent); 658 continue; 659 } 660 661 if ($indent >= $newIndent) { 662 $data[] = substr($this->currentLine, $newIndent); 663 } elseif ($this->isCurrentLineComment()) { 664 $data[] = $this->currentLine; 665 } elseif (0 == $indent) { 666 $this->moveToPreviousLine(); 667 668 break; 669 } else { 670 throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename); 671 } 672 } 673 674 return implode("\n", $data); 675 } 676 677 private function hasMoreLines(): bool 678 { 679 return (\count($this->lines) - 1) > $this->currentLineNb; 680 } 681 682 /** 683 * Moves the parser to the next line. 684 */ 685 private function moveToNextLine(): bool 686 { 687 if ($this->currentLineNb >= $this->numberOfParsedLines - 1) { 688 return false; 689 } 690 691 $this->currentLine = $this->lines[++$this->currentLineNb]; 692 693 return true; 694 } 695 696 /** 697 * Moves the parser to the previous line. 698 */ 699 private function moveToPreviousLine(): bool 700 { 701 if ($this->currentLineNb < 1) { 702 return false; 703 } 704 705 $this->currentLine = $this->lines[--$this->currentLineNb]; 706 707 return true; 708 } 709 710 /** 711 * Parses a YAML value. 712 * 713 * @param string $value A YAML value 714 * @param int $flags A bit field of Yaml::PARSE_* constants to customize the YAML parser behavior 715 * @param string $context The parser context (either sequence or mapping) 716 * 717 * @return mixed 718 * 719 * @throws ParseException When reference does not exist 720 */ 721 private function parseValue(string $value, int $flags, string $context) 722 { 723 if (0 === strpos($value, '*')) { 724 if (false !== $pos = strpos($value, '#')) { 725 $value = substr($value, 1, $pos - 2); 726 } else { 727 $value = substr($value, 1); 728 } 729 730 if (!\array_key_exists($value, $this->refs)) { 731 if (false !== $pos = array_search($value, $this->refsBeingParsed, true)) { 732 throw new ParseException(sprintf('Circular reference [%s] detected for reference "%s".', implode(', ', array_merge(\array_slice($this->refsBeingParsed, $pos), [$value])), $value), $this->currentLineNb + 1, $this->currentLine, $this->filename); 733 } 734 735 throw new ParseException(sprintf('Reference "%s" does not exist.', $value), $this->currentLineNb + 1, $this->currentLine, $this->filename); 736 } 737 738 return $this->refs[$value]; 739 } 740 741 if (\in_array($value[0], ['!', '|', '>'], true) && self::preg_match('/^(?:'.self::TAG_PATTERN.' +)?'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) { 742 $modifiers = $matches['modifiers'] ?? ''; 743 744 $data = $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), abs((int) $modifiers)); 745 746 if ('' !== $matches['tag'] && '!' !== $matches['tag']) { 747 if ('!!binary' === $matches['tag']) { 748 return Inline::evaluateBinaryScalar($data); 749 } 750 751 return new TaggedValue(substr($matches['tag'], 1), $data); 752 } 753 754 return $data; 755 } 756 757 try { 758 if ('' !== $value && '{' === $value[0]) { 759 $cursor = \strlen(rtrim($this->currentLine)) - \strlen(rtrim($value)); 760 761 return Inline::parse($this->lexInlineMapping($cursor), $flags, $this->refs); 762 } elseif ('' !== $value && '[' === $value[0]) { 763 $cursor = \strlen(rtrim($this->currentLine)) - \strlen(rtrim($value)); 764 765 return Inline::parse($this->lexInlineSequence($cursor), $flags, $this->refs); 766 } 767 768 switch ($value[0] ?? '') { 769 case '"': 770 case "'": 771 $cursor = \strlen(rtrim($this->currentLine)) - \strlen(rtrim($value)); 772 $parsedValue = Inline::parse($this->lexInlineQuotedString($cursor), $flags, $this->refs); 773 774 if (isset($this->currentLine[$cursor]) && preg_replace('/\s*(#.*)?$/A', '', substr($this->currentLine, $cursor))) { 775 throw new ParseException(sprintf('Unexpected characters near "%s".', substr($this->currentLine, $cursor))); 776 } 777 778 return $parsedValue; 779 default: 780 $lines = []; 781 782 while ($this->moveToNextLine()) { 783 // unquoted strings end before the first unindented line 784 if (0 === $this->getCurrentLineIndentation()) { 785 $this->moveToPreviousLine(); 786 787 break; 788 } 789 790 $lines[] = trim($this->currentLine); 791 } 792 793 for ($i = 0, $linesCount = \count($lines), $previousLineBlank = false; $i < $linesCount; ++$i) { 794 if ('' === $lines[$i]) { 795 $value .= "\n"; 796 $previousLineBlank = true; 797 } elseif ($previousLineBlank) { 798 $value .= $lines[$i]; 799 $previousLineBlank = false; 800 } else { 801 $value .= ' '.$lines[$i]; 802 $previousLineBlank = false; 803 } 804 } 805 806 Inline::$parsedLineNumber = $this->getRealCurrentLineNb(); 807 808 $parsedValue = Inline::parse($value, $flags, $this->refs); 809 810 if ('mapping' === $context && \is_string($parsedValue) && '"' !== $value[0] && "'" !== $value[0] && '[' !== $value[0] && '{' !== $value[0] && '!' !== $value[0] && false !== strpos($parsedValue, ': ')) { 811 throw new ParseException('A colon cannot be used in an unquoted mapping value.', $this->getRealCurrentLineNb() + 1, $value, $this->filename); 812 } 813 814 return $parsedValue; 815 } 816 } catch (ParseException $e) { 817 $e->setParsedLine($this->getRealCurrentLineNb() + 1); 818 $e->setSnippet($this->currentLine); 819 820 throw $e; 821 } 822 } 823 824 /** 825 * Parses a block scalar. 826 * 827 * @param string $style The style indicator that was used to begin this block scalar (| or >) 828 * @param string $chomping The chomping indicator that was used to begin this block scalar (+ or -) 829 * @param int $indentation The indentation indicator that was used to begin this block scalar 830 */ 831 private function parseBlockScalar(string $style, string $chomping = '', int $indentation = 0): string 832 { 833 $notEOF = $this->moveToNextLine(); 834 if (!$notEOF) { 835 return ''; 836 } 837 838 $isCurrentLineBlank = $this->isCurrentLineBlank(); 839 $blockLines = []; 840 841 // leading blank lines are consumed before determining indentation 842 while ($notEOF && $isCurrentLineBlank) { 843 // newline only if not EOF 844 if ($notEOF = $this->moveToNextLine()) { 845 $blockLines[] = ''; 846 $isCurrentLineBlank = $this->isCurrentLineBlank(); 847 } 848 } 849 850 // determine indentation if not specified 851 if (0 === $indentation) { 852 $currentLineLength = \strlen($this->currentLine); 853 854 for ($i = 0; $i < $currentLineLength && ' ' === $this->currentLine[$i]; ++$i) { 855 ++$indentation; 856 } 857 } 858 859 if ($indentation > 0) { 860 $pattern = sprintf('/^ {%d}(.*)$/', $indentation); 861 862 while ( 863 $notEOF && ( 864 $isCurrentLineBlank || 865 self::preg_match($pattern, $this->currentLine, $matches) 866 ) 867 ) { 868 if ($isCurrentLineBlank && \strlen($this->currentLine) > $indentation) { 869 $blockLines[] = substr($this->currentLine, $indentation); 870 } elseif ($isCurrentLineBlank) { 871 $blockLines[] = ''; 872 } else { 873 $blockLines[] = $matches[1]; 874 } 875 876 // newline only if not EOF 877 if ($notEOF = $this->moveToNextLine()) { 878 $isCurrentLineBlank = $this->isCurrentLineBlank(); 879 } 880 } 881 } elseif ($notEOF) { 882 $blockLines[] = ''; 883 } 884 885 if ($notEOF) { 886 $blockLines[] = ''; 887 $this->moveToPreviousLine(); 888 } elseif (!$notEOF && !$this->isCurrentLineLastLineInDocument()) { 889 $blockLines[] = ''; 890 } 891 892 // folded style 893 if ('>' === $style) { 894 $text = ''; 895 $previousLineIndented = false; 896 $previousLineBlank = false; 897 898 for ($i = 0, $blockLinesCount = \count($blockLines); $i < $blockLinesCount; ++$i) { 899 if ('' === $blockLines[$i]) { 900 $text .= "\n"; 901 $previousLineIndented = false; 902 $previousLineBlank = true; 903 } elseif (' ' === $blockLines[$i][0]) { 904 $text .= "\n".$blockLines[$i]; 905 $previousLineIndented = true; 906 $previousLineBlank = false; 907 } elseif ($previousLineIndented) { 908 $text .= "\n".$blockLines[$i]; 909 $previousLineIndented = false; 910 $previousLineBlank = false; 911 } elseif ($previousLineBlank || 0 === $i) { 912 $text .= $blockLines[$i]; 913 $previousLineIndented = false; 914 $previousLineBlank = false; 915 } else { 916 $text .= ' '.$blockLines[$i]; 917 $previousLineIndented = false; 918 $previousLineBlank = false; 919 } 920 } 921 } else { 922 $text = implode("\n", $blockLines); 923 } 924 925 // deal with trailing newlines 926 if ('' === $chomping) { 927 $text = preg_replace('/\n+$/', "\n", $text); 928 } elseif ('-' === $chomping) { 929 $text = preg_replace('/\n+$/', '', $text); 930 } 931 932 return $text; 933 } 934 935 /** 936 * Returns true if the next line is indented. 937 */ 938 private function isNextLineIndented(): bool 939 { 940 $currentIndentation = $this->getCurrentLineIndentation(); 941 $movements = 0; 942 943 do { 944 $EOF = !$this->moveToNextLine(); 945 946 if (!$EOF) { 947 ++$movements; 948 } 949 } while (!$EOF && ($this->isCurrentLineEmpty() || $this->isCurrentLineComment())); 950 951 if ($EOF) { 952 return false; 953 } 954 955 $ret = $this->getCurrentLineIndentation() > $currentIndentation; 956 957 for ($i = 0; $i < $movements; ++$i) { 958 $this->moveToPreviousLine(); 959 } 960 961 return $ret; 962 } 963 964 /** 965 * Returns true if the current line is blank or if it is a comment line. 966 */ 967 private function isCurrentLineEmpty(): bool 968 { 969 return $this->isCurrentLineBlank() || $this->isCurrentLineComment(); 970 } 971 972 /** 973 * Returns true if the current line is blank. 974 */ 975 private function isCurrentLineBlank(): bool 976 { 977 return '' === $this->currentLine || '' === trim($this->currentLine, ' '); 978 } 979 980 /** 981 * Returns true if the current line is a comment line. 982 */ 983 private function isCurrentLineComment(): bool 984 { 985 // checking explicitly the first char of the trim is faster than loops or strpos 986 $ltrimmedLine = '' !== $this->currentLine && ' ' === $this->currentLine[0] ? ltrim($this->currentLine, ' ') : $this->currentLine; 987 988 return '' !== $ltrimmedLine && '#' === $ltrimmedLine[0]; 989 } 990 991 private function isCurrentLineLastLineInDocument(): bool 992 { 993 return ($this->offset + $this->currentLineNb) >= ($this->totalNumberOfLines - 1); 994 } 995 996 /** 997 * Cleanups a YAML string to be parsed. 998 * 999 * @param string $value The input YAML string 1000 */ 1001 private function cleanup(string $value): string 1002 { 1003 $value = str_replace(["\r\n", "\r"], "\n", $value); 1004 1005 // strip YAML header 1006 $count = 0; 1007 $value = preg_replace('#^\%YAML[: ][\d\.]+.*\n#u', '', $value, -1, $count); 1008 $this->offset += $count; 1009 1010 // remove leading comments 1011 $trimmedValue = preg_replace('#^(\#.*?\n)+#s', '', $value, -1, $count); 1012 if (1 === $count) { 1013 // items have been removed, update the offset 1014 $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n"); 1015 $value = $trimmedValue; 1016 } 1017 1018 // remove start of the document marker (---) 1019 $trimmedValue = preg_replace('#^\-\-\-.*?\n#s', '', $value, -1, $count); 1020 if (1 === $count) { 1021 // items have been removed, update the offset 1022 $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n"); 1023 $value = $trimmedValue; 1024 1025 // remove end of the document marker (...) 1026 $value = preg_replace('#\.\.\.\s*$#', '', $value); 1027 } 1028 1029 return $value; 1030 } 1031 1032 /** 1033 * Returns true if the next line starts unindented collection. 1034 */ 1035 private function isNextLineUnIndentedCollection(): bool 1036 { 1037 $currentIndentation = $this->getCurrentLineIndentation(); 1038 $movements = 0; 1039 1040 do { 1041 $EOF = !$this->moveToNextLine(); 1042 1043 if (!$EOF) { 1044 ++$movements; 1045 } 1046 } while (!$EOF && ($this->isCurrentLineEmpty() || $this->isCurrentLineComment())); 1047 1048 if ($EOF) { 1049 return false; 1050 } 1051 1052 $ret = $this->getCurrentLineIndentation() === $currentIndentation && $this->isStringUnIndentedCollectionItem(); 1053 1054 for ($i = 0; $i < $movements; ++$i) { 1055 $this->moveToPreviousLine(); 1056 } 1057 1058 return $ret; 1059 } 1060 1061 /** 1062 * Returns true if the string is un-indented collection item. 1063 */ 1064 private function isStringUnIndentedCollectionItem(): bool 1065 { 1066 return '-' === rtrim($this->currentLine) || 0 === strpos($this->currentLine, '- '); 1067 } 1068 1069 /** 1070 * A local wrapper for "preg_match" which will throw a ParseException if there 1071 * is an internal error in the PCRE engine. 1072 * 1073 * This avoids us needing to check for "false" every time PCRE is used 1074 * in the YAML engine 1075 * 1076 * @throws ParseException on a PCRE internal error 1077 * 1078 * @see preg_last_error() 1079 * 1080 * @internal 1081 */ 1082 public static function preg_match(string $pattern, string $subject, array &$matches = null, int $flags = 0, int $offset = 0): int 1083 { 1084 if (false === $ret = preg_match($pattern, $subject, $matches, $flags, $offset)) { 1085 switch (preg_last_error()) { 1086 case \PREG_INTERNAL_ERROR: 1087 $error = 'Internal PCRE error.'; 1088 break; 1089 case \PREG_BACKTRACK_LIMIT_ERROR: 1090 $error = 'pcre.backtrack_limit reached.'; 1091 break; 1092 case \PREG_RECURSION_LIMIT_ERROR: 1093 $error = 'pcre.recursion_limit reached.'; 1094 break; 1095 case \PREG_BAD_UTF8_ERROR: 1096 $error = 'Malformed UTF-8 data.'; 1097 break; 1098 case \PREG_BAD_UTF8_OFFSET_ERROR: 1099 $error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.'; 1100 break; 1101 default: 1102 $error = 'Error.'; 1103 } 1104 1105 throw new ParseException($error); 1106 } 1107 1108 return $ret; 1109 } 1110 1111 /** 1112 * Trim the tag on top of the value. 1113 * 1114 * Prevent values such as "!foo {quz: bar}" to be considered as 1115 * a mapping block. 1116 */ 1117 private function trimTag(string $value): string 1118 { 1119 if ('!' === $value[0]) { 1120 return ltrim(substr($value, 1, strcspn($value, " \r\n", 1)), ' '); 1121 } 1122 1123 return $value; 1124 } 1125 1126 private function getLineTag(string $value, int $flags, bool $nextLineCheck = true): ?string 1127 { 1128 if ('' === $value || '!' !== $value[0] || 1 !== self::preg_match('/^'.self::TAG_PATTERN.' *( +#.*)?$/', $value, $matches)) { 1129 return null; 1130 } 1131 1132 if ($nextLineCheck && !$this->isNextLineIndented()) { 1133 return null; 1134 } 1135 1136 $tag = substr($matches['tag'], 1); 1137 1138 // Built-in tags 1139 if ($tag && '!' === $tag[0]) { 1140 throw new ParseException(sprintf('The built-in tag "!%s" is not implemented.', $tag), $this->getRealCurrentLineNb() + 1, $value, $this->filename); 1141 } 1142 1143 if (Yaml::PARSE_CUSTOM_TAGS & $flags) { 1144 return $tag; 1145 } 1146 1147 throw new ParseException(sprintf('Tags support is not enabled. You must use the flag "Yaml::PARSE_CUSTOM_TAGS" to use "%s".', $matches['tag']), $this->getRealCurrentLineNb() + 1, $value, $this->filename); 1148 } 1149 1150 private function lexInlineQuotedString(int &$cursor = 0): string 1151 { 1152 $quotation = $this->currentLine[$cursor]; 1153 $value = $quotation; 1154 ++$cursor; 1155 1156 $previousLineWasNewline = true; 1157 $previousLineWasTerminatedWithBackslash = false; 1158 $lineNumber = 0; 1159 1160 do { 1161 if (++$lineNumber > 1) { 1162 $cursor += strspn($this->currentLine, ' ', $cursor); 1163 } 1164 1165 if ($this->isCurrentLineBlank()) { 1166 $value .= "\n"; 1167 } elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) { 1168 $value .= ' '; 1169 } 1170 1171 for (; \strlen($this->currentLine) > $cursor; ++$cursor) { 1172 switch ($this->currentLine[$cursor]) { 1173 case '\\': 1174 if ("'" === $quotation) { 1175 $value .= '\\'; 1176 } elseif (isset($this->currentLine[++$cursor])) { 1177 $value .= '\\'.$this->currentLine[$cursor]; 1178 } 1179 1180 break; 1181 case $quotation: 1182 ++$cursor; 1183 1184 if ("'" === $quotation && isset($this->currentLine[$cursor]) && "'" === $this->currentLine[$cursor]) { 1185 $value .= "''"; 1186 break; 1187 } 1188 1189 return $value.$quotation; 1190 default: 1191 $value .= $this->currentLine[$cursor]; 1192 } 1193 } 1194 1195 if ($this->isCurrentLineBlank()) { 1196 $previousLineWasNewline = true; 1197 $previousLineWasTerminatedWithBackslash = false; 1198 } elseif ('\\' === $this->currentLine[-1]) { 1199 $previousLineWasNewline = false; 1200 $previousLineWasTerminatedWithBackslash = true; 1201 } else { 1202 $previousLineWasNewline = false; 1203 $previousLineWasTerminatedWithBackslash = false; 1204 } 1205 1206 if ($this->hasMoreLines()) { 1207 $cursor = 0; 1208 } 1209 } while ($this->moveToNextLine()); 1210 1211 throw new ParseException('Malformed inline YAML string.'); 1212 } 1213 1214 private function lexUnquotedString(int &$cursor): string 1215 { 1216 $offset = $cursor; 1217 $cursor += strcspn($this->currentLine, '[]{},: ', $cursor); 1218 1219 if ($cursor === $offset) { 1220 throw new ParseException('Malformed unquoted YAML string.'); 1221 } 1222 1223 return substr($this->currentLine, $offset, $cursor - $offset); 1224 } 1225 1226 private function lexInlineMapping(int &$cursor = 0): string 1227 { 1228 return $this->lexInlineStructure($cursor, '}'); 1229 } 1230 1231 private function lexInlineSequence(int &$cursor = 0): string 1232 { 1233 return $this->lexInlineStructure($cursor, ']'); 1234 } 1235 1236 private function lexInlineStructure(int &$cursor, string $closingTag): string 1237 { 1238 $value = $this->currentLine[$cursor]; 1239 ++$cursor; 1240 1241 do { 1242 $this->consumeWhitespaces($cursor); 1243 1244 while (isset($this->currentLine[$cursor])) { 1245 switch ($this->currentLine[$cursor]) { 1246 case '"': 1247 case "'": 1248 $value .= $this->lexInlineQuotedString($cursor); 1249 break; 1250 case ':': 1251 case ',': 1252 $value .= $this->currentLine[$cursor]; 1253 ++$cursor; 1254 break; 1255 case '{': 1256 $value .= $this->lexInlineMapping($cursor); 1257 break; 1258 case '[': 1259 $value .= $this->lexInlineSequence($cursor); 1260 break; 1261 case $closingTag: 1262 $value .= $this->currentLine[$cursor]; 1263 ++$cursor; 1264 1265 return $value; 1266 case '#': 1267 break 2; 1268 default: 1269 $value .= $this->lexUnquotedString($cursor); 1270 } 1271 1272 if ($this->consumeWhitespaces($cursor)) { 1273 $value .= ' '; 1274 } 1275 } 1276 1277 if ($this->hasMoreLines()) { 1278 $cursor = 0; 1279 } 1280 } while ($this->moveToNextLine()); 1281 1282 throw new ParseException('Malformed inline YAML string.'); 1283 } 1284 1285 private function consumeWhitespaces(int &$cursor): bool 1286 { 1287 $whitespacesConsumed = 0; 1288 1289 do { 1290 $whitespaceOnlyTokenLength = strspn($this->currentLine, ' ', $cursor); 1291 $whitespacesConsumed += $whitespaceOnlyTokenLength; 1292 $cursor += $whitespaceOnlyTokenLength; 1293 1294 if (isset($this->currentLine[$cursor])) { 1295 return 0 < $whitespacesConsumed; 1296 } 1297 1298 if ($this->hasMoreLines()) { 1299 $cursor = 0; 1300 } 1301 } while ($this->moveToNextLine()); 1302 1303 return 0 < $whitespacesConsumed; 1304 } 1305} 1306