1<?php 2 3/** 4 * Hoa 5 * 6 * 7 * @license 8 * 9 * New BSD License 10 * 11 * Copyright © 2007-2017, Hoa community. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions are met: 15 * * Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * * Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * * Neither the name of the Hoa nor the names of its contributors may be 21 * used to endorse or promote products derived from this software without 22 * specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE 28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37namespace Hoa\Compiler\Llk; 38 39use Hoa\Compiler; 40use Hoa\Iterator; 41 42/** 43 * Class \Hoa\Compiler\Llk\Parser. 44 * 45 * LL(k) parser. 46 * 47 * @copyright Copyright © 2007-2017 Hoa community 48 * @license New BSD License 49 */ 50class Parser 51{ 52 /** 53 * List of pragmas. 54 * 55 * @var array 56 */ 57 protected $_pragmas = null; 58 59 /** 60 * List of skipped tokens. 61 * 62 * @var array 63 */ 64 protected $_skip = null; 65 66 /** 67 * Associative array (token name => token regex), to be defined in 68 * precedence order. 69 * 70 * @var array 71 */ 72 protected $_tokens = null; 73 74 /** 75 * Rules, to be defined as associative array, name => Rule object. 76 * 77 * @var array 78 */ 79 protected $_rules = null; 80 81 /** 82 * Lexer iterator. 83 * 84 * @var \Hoa\Iterator\Lookahead 85 */ 86 protected $_tokenSequence = null; 87 88 /** 89 * Possible token causing an error. 90 * 91 * @var array 92 */ 93 protected $_errorToken = null; 94 95 /** 96 * Trace of activated rules. 97 * 98 * @var array 99 */ 100 protected $_trace = []; 101 102 /** 103 * Stack of todo list. 104 * 105 * @var array 106 */ 107 protected $_todo = null; 108 109 /** 110 * AST. 111 * 112 * @var \Hoa\Compiler\Llk\TreeNode 113 */ 114 protected $_tree = null; 115 116 /** 117 * Current depth while building the trace. 118 * 119 * @var int 120 */ 121 protected $_depth = -1; 122 123 124 125 /** 126 * Construct the parser. 127 * 128 * @param array $tokens Tokens. 129 * @param array $rules Rules. 130 * @param array $pragmas Pragmas. 131 */ 132 public function __construct( 133 array $tokens = [], 134 array $rules = [], 135 array $pragmas = [] 136 ) { 137 $this->_tokens = $tokens; 138 $this->_rules = $rules; 139 $this->_pragmas = $pragmas; 140 141 return; 142 } 143 144 /** 145 * Parse :-). 146 * 147 * @param string $text Text to parse. 148 * @param string $rule The axiom, i.e. root rule. 149 * @param bool $tree Whether build tree or not. 150 * @return mixed 151 * @throws \Hoa\Compiler\Exception\UnexpectedToken 152 */ 153 public function parse($text, $rule = null, $tree = true) 154 { 155 $k = 1024; 156 157 if (isset($this->_pragmas['parser.lookahead'])) { 158 $k = max(0, intval($this->_pragmas['parser.lookahead'])); 159 } 160 161 $lexer = new Lexer($this->_pragmas); 162 $this->_tokenSequence = new Iterator\Buffer( 163 $lexer->lexMe($text, $this->_tokens), 164 $k 165 ); 166 $this->_tokenSequence->rewind(); 167 168 $this->_errorToken = null; 169 $this->_trace = []; 170 $this->_todo = []; 171 172 if (false === array_key_exists($rule, $this->_rules)) { 173 $rule = $this->getRootRule(); 174 } 175 176 $closeRule = new Rule\Ekzit($rule, 0); 177 $openRule = new Rule\Entry($rule, 0, [$closeRule]); 178 $this->_todo = [$closeRule, $openRule]; 179 180 do { 181 $out = $this->unfold(); 182 183 if (null !== $out && 184 'EOF' === $this->_tokenSequence->current()['token']) { 185 break; 186 } 187 188 if (false === $this->backtrack()) { 189 $token = $this->_errorToken; 190 191 if (null === $this->_errorToken) { 192 $token = $this->_tokenSequence->current(); 193 } 194 195 $offset = $token['offset']; 196 $line = 1; 197 $column = 1; 198 199 if (!empty($text)) { 200 if (0 === $offset) { 201 $leftnl = 0; 202 } else { 203 $leftnl = strrpos($text, "\n", -(strlen($text) - $offset) - 1) ?: 0; 204 } 205 206 $rightnl = strpos($text, "\n", $offset); 207 $line = substr_count($text, "\n", 0, $leftnl + 1) + 1; 208 $column = $offset - $leftnl + (0 === $leftnl); 209 210 if (false !== $rightnl) { 211 $text = trim(substr($text, $leftnl, $rightnl - $leftnl), "\n"); 212 } 213 } 214 215 throw new Compiler\Exception\UnexpectedToken( 216 'Unexpected token "%s" (%s) at line %d and column %d:' . 217 "\n" . '%s' . "\n" . str_repeat(' ', $column - 1) . '↑', 218 0, 219 [ 220 $token['value'], 221 $token['token'], 222 $line, 223 $column, 224 $text 225 ], 226 $line, 227 $column 228 ); 229 } 230 } while (true); 231 232 if (false === $tree) { 233 return true; 234 } 235 236 $tree = $this->_buildTree(); 237 238 if (!($tree instanceof TreeNode)) { 239 throw new Compiler\Exception( 240 'Parsing error: cannot build AST, the trace is corrupted.', 241 1 242 ); 243 } 244 245 return $this->_tree = $tree; 246 } 247 248 /** 249 * Unfold trace. 250 * 251 * @return mixed 252 */ 253 protected function unfold() 254 { 255 while (0 < count($this->_todo)) { 256 $rule = array_pop($this->_todo); 257 258 if ($rule instanceof Rule\Ekzit) { 259 $rule->setDepth($this->_depth); 260 $this->_trace[] = $rule; 261 262 if (false === $rule->isTransitional()) { 263 --$this->_depth; 264 } 265 } else { 266 $ruleName = $rule->getRule(); 267 $next = $rule->getData(); 268 $zeRule = $this->_rules[$ruleName]; 269 $out = $this->_parse($zeRule, $next); 270 271 if (false === $out && false === $this->backtrack()) { 272 return null; 273 } 274 } 275 } 276 277 return true; 278 } 279 280 /** 281 * Parse current rule. 282 * 283 * @param \Hoa\Compiler\Llk\Rule $zeRule Current rule. 284 * @param int $next Next rule index. 285 * @return bool 286 */ 287 protected function _parse(Rule $zeRule, $next) 288 { 289 if ($zeRule instanceof Rule\Token) { 290 $name = $this->_tokenSequence->current()['token']; 291 292 if ($zeRule->getTokenName() !== $name) { 293 return false; 294 } 295 296 $value = $this->_tokenSequence->current()['value']; 297 298 if (0 <= $unification = $zeRule->getUnificationIndex()) { 299 for ($skip = 0, $i = count($this->_trace) - 1; $i >= 0; --$i) { 300 $trace = $this->_trace[$i]; 301 302 if ($trace instanceof Rule\Entry) { 303 if (false === $trace->isTransitional()) { 304 if ($trace->getDepth() <= $this->_depth) { 305 break; 306 } 307 308 --$skip; 309 } 310 } elseif ($trace instanceof Rule\Ekzit && 311 false === $trace->isTransitional()) { 312 $skip += $trace->getDepth() > $this->_depth; 313 } 314 315 if (0 < $skip) { 316 continue; 317 } 318 319 if ($trace instanceof Rule\Token && 320 $unification === $trace->getUnificationIndex() && 321 $value !== $trace->getValue()) { 322 return false; 323 } 324 } 325 } 326 327 $namespace = $this->_tokenSequence->current()['namespace']; 328 $zzeRule = clone $zeRule; 329 $zzeRule->setValue($value); 330 $zzeRule->setNamespace($namespace); 331 332 if (isset($this->_tokens[$namespace][$name])) { 333 $zzeRule->setRepresentation($this->_tokens[$namespace][$name]); 334 } else { 335 foreach ($this->_tokens[$namespace] as $_name => $regex) { 336 if (false === $pos = strpos($_name, ':')) { 337 continue; 338 } 339 340 $_name = substr($_name, 0, $pos); 341 342 if ($_name === $name) { 343 break; 344 } 345 } 346 347 $zzeRule->setRepresentation($regex); 348 } 349 350 array_pop($this->_todo); 351 $this->_trace[] = $zzeRule; 352 $this->_tokenSequence->next(); 353 $this->_errorToken = $this->_tokenSequence->current(); 354 355 return true; 356 } elseif ($zeRule instanceof Rule\Concatenation) { 357 if (false === $zeRule->isTransitional()) { 358 ++$this->_depth; 359 } 360 361 $this->_trace[] = new Rule\Entry( 362 $zeRule->getName(), 363 0, 364 null, 365 $this->_depth 366 ); 367 $children = $zeRule->getChildren(); 368 369 for ($i = count($children) - 1; $i >= 0; --$i) { 370 $nextRule = $children[$i]; 371 $this->_todo[] = new Rule\Ekzit($nextRule, 0); 372 $this->_todo[] = new Rule\Entry($nextRule, 0); 373 } 374 375 return true; 376 } elseif ($zeRule instanceof Rule\Choice) { 377 $children = $zeRule->getChildren(); 378 379 if ($next >= count($children)) { 380 return false; 381 } 382 383 if (false === $zeRule->isTransitional()) { 384 ++$this->_depth; 385 } 386 387 $this->_trace[] = new Rule\Entry( 388 $zeRule->getName(), 389 $next, 390 $this->_todo, 391 $this->_depth 392 ); 393 $nextRule = $children[$next]; 394 $this->_todo[] = new Rule\Ekzit($nextRule, 0); 395 $this->_todo[] = new Rule\Entry($nextRule, 0); 396 397 return true; 398 } elseif ($zeRule instanceof Rule\Repetition) { 399 $nextRule = $zeRule->getChildren(); 400 401 if (0 === $next) { 402 $name = $zeRule->getName(); 403 $min = $zeRule->getMin(); 404 405 if (false === $zeRule->isTransitional()) { 406 ++$this->_depth; 407 } 408 409 $this->_trace[] = new Rule\Entry( 410 $name, 411 $min, 412 null, 413 $this->_depth 414 ); 415 array_pop($this->_todo); 416 $this->_todo[] = new Rule\Ekzit( 417 $name, 418 $min, 419 $this->_todo 420 ); 421 422 for ($i = 0; $i < $min; ++$i) { 423 $this->_todo[] = new Rule\Ekzit($nextRule, 0); 424 $this->_todo[] = new Rule\Entry($nextRule, 0); 425 } 426 427 return true; 428 } else { 429 $max = $zeRule->getMax(); 430 431 if (-1 != $max && $next > $max) { 432 return false; 433 } 434 435 $this->_todo[] = new Rule\Ekzit( 436 $zeRule->getName(), 437 $next, 438 $this->_todo 439 ); 440 $this->_todo[] = new Rule\Ekzit($nextRule, 0); 441 $this->_todo[] = new Rule\Entry($nextRule, 0); 442 443 return true; 444 } 445 } 446 447 return false; 448 } 449 450 /** 451 * Backtrack the trace. 452 * 453 * @return bool 454 */ 455 protected function backtrack() 456 { 457 $found = false; 458 459 do { 460 $last = array_pop($this->_trace); 461 462 if ($last instanceof Rule\Entry) { 463 $zeRule = $this->_rules[$last->getRule()]; 464 $found = $zeRule instanceof Rule\Choice; 465 } elseif ($last instanceof Rule\Ekzit) { 466 $zeRule = $this->_rules[$last->getRule()]; 467 $found = $zeRule instanceof Rule\Repetition; 468 } elseif ($last instanceof Rule\Token) { 469 $this->_tokenSequence->previous(); 470 471 if (false === $this->_tokenSequence->valid()) { 472 return false; 473 } 474 } 475 } while (0 < count($this->_trace) && false === $found); 476 477 if (false === $found) { 478 return false; 479 } 480 481 $rule = $last->getRule(); 482 $next = $last->getData() + 1; 483 $this->_depth = $last->getDepth(); 484 $this->_todo = $last->getTodo(); 485 $this->_todo[] = new Rule\Entry($rule, $next); 486 487 return true; 488 } 489 490 /** 491 * Build AST from trace. 492 * Walk through the trace iteratively and recursively. 493 * 494 * @param int $i Current trace index. 495 * @param array &$children Collected children. 496 * @return \Hoa\Compiler\Llk\TreeNode 497 */ 498 protected function _buildTree($i = 0, &$children = []) 499 { 500 $max = count($this->_trace); 501 502 while ($i < $max) { 503 $trace = $this->_trace[$i]; 504 505 if ($trace instanceof Rule\Entry) { 506 $ruleName = $trace->getRule(); 507 $rule = $this->_rules[$ruleName]; 508 $isRule = false === $trace->isTransitional(); 509 $nextTrace = $this->_trace[$i + 1]; 510 $id = $rule->getNodeId(); 511 512 // Optimization: Skip empty trace sequence. 513 if ($nextTrace instanceof Rule\Ekzit && 514 $ruleName == $nextTrace->getRule()) { 515 $i += 2; 516 517 continue; 518 } 519 520 if (true === $isRule) { 521 $children[] = $ruleName; 522 } 523 524 if (null !== $id) { 525 $children[] = [ 526 'id' => $id, 527 'options' => $rule->getNodeOptions() 528 ]; 529 } 530 531 $i = $this->_buildTree($i + 1, $children); 532 533 if (false === $isRule) { 534 continue; 535 } 536 537 $handle = []; 538 $cId = null; 539 $cOptions = []; 540 541 do { 542 $pop = array_pop($children); 543 544 if (true === is_object($pop)) { 545 $handle[] = $pop; 546 } elseif (true === is_array($pop) && null === $cId) { 547 $cId = $pop['id']; 548 $cOptions = $pop['options']; 549 } elseif ($ruleName == $pop) { 550 break; 551 } 552 } while (null !== $pop); 553 554 if (null === $cId) { 555 $cId = $rule->getDefaultId(); 556 $cOptions = $rule->getDefaultOptions(); 557 } 558 559 if (null === $cId) { 560 for ($j = count($handle) - 1; $j >= 0; --$j) { 561 $children[] = $handle[$j]; 562 } 563 564 continue; 565 } 566 567 if (true === in_array('M', $cOptions) && 568 true === $this->mergeTree($children, $handle, $cId)) { 569 continue; 570 } 571 572 if (true === in_array('m', $cOptions) && 573 true === $this->mergeTree($children, $handle, $cId, true)) { 574 continue; 575 } 576 577 $cTree = new TreeNode($id ?: $cId); 578 579 foreach ($handle as $child) { 580 $child->setParent($cTree); 581 $cTree->prependChild($child); 582 } 583 584 $children[] = $cTree; 585 } elseif ($trace instanceof Rule\Ekzit) { 586 return $i + 1; 587 } else { 588 if (false === $trace->isKept()) { 589 ++$i; 590 591 continue; 592 } 593 594 $child = new TreeNode('token', [ 595 'token' => $trace->getTokenName(), 596 'value' => $trace->getValue(), 597 'namespace' => $trace->getNamespace(), 598 ]); 599 $children[] = $child; 600 ++$i; 601 } 602 } 603 604 return $children[0]; 605 } 606 607 /** 608 * Try to merge directly children into an existing node. 609 * 610 * @param array &$children Current children being gathering. 611 * @param array &$handle Children of the new node. 612 * @param string $cId Node ID. 613 * @param bool $recursive Whether we should merge recursively or 614 * not. 615 * @return bool 616 */ 617 protected function mergeTree( 618 &$children, 619 &$handle, 620 $cId, 621 $recursive = false 622 ) { 623 end($children); 624 $last = current($children); 625 626 if (!is_object($last)) { 627 return false; 628 } 629 630 if ($cId !== $last->getId()) { 631 return false; 632 } 633 634 if (true === $recursive) { 635 foreach ($handle as $child) { 636 $this->mergeTreeRecursive($last, $child); 637 } 638 639 return true; 640 } 641 642 foreach ($handle as $child) { 643 $last->appendChild($child); 644 $child->setParent($last); 645 } 646 647 return true; 648 } 649 650 /** 651 * Merge recursively. 652 * Please, see self::mergeTree() to know the context. 653 * 654 * @param \Hoa\Compiler\Llk\TreeNode $node Node that receives. 655 * @param \Hoa\Compiler\Llk\TreeNode $newNode Node to merge. 656 * @return void 657 */ 658 protected function mergeTreeRecursive(TreeNode $node, TreeNode $newNode) 659 { 660 $nNId = $newNode->getId(); 661 662 if ('token' === $nNId) { 663 $node->appendChild($newNode); 664 $newNode->setParent($node); 665 666 return; 667 } 668 669 $children = $node->getChildren(); 670 end($children); 671 $last = current($children); 672 673 if ($last->getId() !== $nNId) { 674 $node->appendChild($newNode); 675 $newNode->setParent($node); 676 677 return; 678 } 679 680 foreach ($newNode->getChildren() as $child) { 681 $this->mergeTreeRecursive($last, $child); 682 } 683 684 return; 685 } 686 687 /** 688 * Get AST. 689 * 690 * @return \Hoa\Compiler\Llk\TreeNode 691 */ 692 public function getTree() 693 { 694 return $this->_tree; 695 } 696 697 /** 698 * Get trace. 699 * 700 * @return array 701 */ 702 public function getTrace() 703 { 704 return $this->_trace; 705 } 706 707 /** 708 * Get pragmas. 709 * 710 * @return array 711 */ 712 public function getPragmas() 713 { 714 return $this->_pragmas; 715 } 716 717 /** 718 * Get tokens. 719 * 720 * @return array 721 */ 722 public function getTokens() 723 { 724 return $this->_tokens; 725 } 726 727 /** 728 * Get the lexer iterator. 729 * 730 * @return \Hoa\Iterator\Buffer 731 */ 732 public function getTokenSequence() 733 { 734 return $this->_tokenSequence; 735 } 736 737 /** 738 * Get rule by name. 739 * 740 * @param string $name Rule name. 741 * @return \Hoa\Compiler\Llk\Rule 742 */ 743 public function getRule($name) 744 { 745 if (!isset($this->_rules[$name])) { 746 return null; 747 } 748 749 return $this->_rules[$name]; 750 } 751 752 /** 753 * Get rules. 754 * 755 * @return array 756 */ 757 public function getRules() 758 { 759 return $this->_rules; 760 } 761 762 /** 763 * Get root rule. 764 * 765 * @return string 766 */ 767 public function getRootRule() 768 { 769 foreach ($this->_rules as $rule => $_) { 770 if (!is_int($rule)) { 771 break; 772 } 773 } 774 775 return $rule; 776 } 777} 778