1<?php 2 3namespace AST; 4use \InvalidArgumentException; 5use \LogicException; 6use \RuntimeException; 7 8require_once "tokenizer.php"; 9require_once "exceptions.php"; 10 11class ParseConfig { 12 public $EXPR_DEPTH_LIMIT = 50; 13} 14 15function parse_config() { 16 static $config = null; 17 if ($config == null) { 18 $config = new ParseConfig(); 19 } 20 return $config; 21} 22 23abstract class Fixing { 24 const None = 0; 25 const Prefix = 1; 26 const Postfix = 2; 27 const Infix = 3; 28 const Wrap = 4; 29} 30 31class ElementInstance { 32 private $_definition = null; 33 private $_args = null; 34 35 public function __construct($definition, $args) { 36 $this->_definition = $definition; 37 $this->_args = $args; 38 } 39 40 public function definition() { return $this->_definition; } 41 public function args() { return $this->_args; } 42 43 public function getStringValue() { 44 $pieces = array(); 45 foreach ($this->args() as $arg) { 46 if ($arg instanceof ElementInstance) { 47 $pieces[] = $arg->getStringValue(); 48 } else { 49 $pieces[] = $arg->match(); 50 } 51 } 52 return implode('', $pieces); 53 } 54 55 public function getRepresentation() { 56 static $stack_depth = 0; 57 if (++$stack_depth > parse_config()->EXPR_DEPTH_LIMIT) { 58 throw new RuntimeException('Depth limit exceeded.'); 59 } 60 try { 61 if ($this->definition() === null) { 62 $this->ensureWellFormed(false); 63 // The expression is guaranteed to have 0 or 1 arguments because it's well formed. 64 if (count($this->args()) == 0) { 65 return ''; 66 } 67 if ($this->args()[0] instanceof ElementInstance) { 68 return $this->args()[0]->getRepresentation(); 69 } else { 70 return $this->args()[0]->match(); 71 } 72 } else { 73 return $this->definition()->getRepresentation($this); 74 } 75 } finally { 76 --$stack_depth; 77 } 78 } 79 80 public function isExpanded($recursive=true) { 81 static $stack_depth = 0; 82 if (++$stack_depth > parse_config()->EXPR_DEPTH_LIMIT) { 83 throw new RuntimeException('Depth limit exceeded.'); 84 } 85 try { 86 if ($this->definition() !== null && $this->definition()->fixing() == Fixing::None) { 87 return true; 88 } 89 foreach ($this->args() as $arg) { 90 if ($arg instanceof TokenInstance || ($recursive && !$arg->isExpanded($recursive))) { 91 return false; 92 } 93 } 94 } finally { 95 --$stack_depth; 96 } 97 return true; 98 } 99 100 public function expand($elmDef, $recursive=true) { 101 static $stack_depth = 0; 102 if (++$stack_depth > parse_config()->EXPR_DEPTH_LIMIT) { 103 throw new RuntimeException('Depth limit exceeded.'); 104 } 105 try { 106 if ($this->isExpanded($recursive)) { 107 return; 108 } 109 $elmDef->spliceInstancesIn($this->_args); 110 if ($recursive) { 111 foreach ($this->args() as $arg) { 112 if ($arg instanceof ElementInstance && !$arg->isExpanded($recursive)) { 113 $arg->expand($elmDef); 114 } 115 } 116 } 117 } finally { 118 --$stack_depth; 119 } 120 } 121 122 public function findUnexpandedToken($recursive=true) { 123 static $stack_depth = 0; 124 if (++$stack_depth > parse_config()->EXPR_DEPTH_LIMIT) { 125 throw new RuntimeException('Depth limit exceeded.'); 126 } 127 try { 128 if ($this->isExpanded($recursive)) { 129 return null; 130 } 131 foreach ($this->args() as $arg) { 132 if ($arg instanceof TokenInstance) { 133 return $arg; 134 } elseif ($recursive) { 135 $tok = $arg->findUnexpandedToken($recursive); 136 if ($tok !== null) { 137 return $tok; 138 } 139 } 140 } 141 throw LogicException('A fully expanded element instance has no stray token!'); 142 } finally { 143 --$stack_depth; 144 } 145 } 146 147 public function printTree($indentStr='', $isLastChild=false) { 148 static $stack_depth = 0; 149 if (++$stack_depth > parse_config()->EXPR_DEPTH_LIMIT) { 150 throw new RuntimeException('Depth limit exceeded.'); 151 } 152 try { 153 if ($this->definition() !== null) { 154 echo $indentStr . '+-' . $this->definition()->name() . "\n"; 155 } 156 $extraIndentStr = ($isLastChild ? ' ' : '| '); 157 $argIdx = 0; 158 foreach ($this->args() as $arg) { 159 ++$argIdx; 160 if ($arg instanceof TokenInstance) { 161 echo $indentStr . $extraIndentStr . '+-' . $arg . "\n"; 162 } else { 163 $arg->printTree($indentStr . $extraIndentStr, $argIdx == count($this->args())); 164 } 165 } 166 } finally { 167 --$stack_depth; 168 } 169 } 170 171 public function evaluateArgs() { 172 $values = array(); 173 foreach ($this->args() as $arg) { 174 if ($arg instanceof ElementInstance) { 175 $values[] = $arg->evaluate(); 176 } else { 177 $values[] = $arg; 178 } 179 } 180 return $values; 181 } 182 183 public function ensureWellFormed($recursive=true) { 184 static $stack_depth = 0; 185 if (++$stack_depth > parse_config()->EXPR_DEPTH_LIMIT) { 186 throw new RuntimeException('Depth limit exceeded.'); 187 } 188 try { 189 if ($this->definition() === null) { 190 if (count($this->args()) > 1) { 191 throw new MalformedExpressionException($this, 'An expression that has more than a single root is not well defined.'); 192 } 193 } else { 194 $this->definition()->ensureWellFormed($this); 195 } 196 if ($recursive) { 197 foreach ($this->args() as $arg) { 198 if ($arg instanceof ElementInstance) { 199 $arg->ensureWellFormed($recursive); 200 } 201 } 202 } 203 } finally { 204 --$stack_depth; 205 } 206 } 207 208 public function evaluate() { 209 static $stack_depth = 0; 210 if (++$stack_depth > parse_config()->EXPR_DEPTH_LIMIT) { 211 throw new RuntimeException('Depth limit exceeded.'); 212 } 213 try { 214 if ($this->definition() === null) { 215 $this->ensureWellFormed(false); 216 // The expression is guaranteed to have 0 or 1 arguments because it's well formed. 217 if (count($this->args()) == 0) { 218 return null; 219 } 220 return $this->evaluateArgs()[0]; 221 } else { 222 return $this->definition()->evaluate($this); 223 } 224 } finally { 225 --$stack_depth; 226 } 227 } 228} 229 230class ElementDefinition { 231 private $_name = null; 232 private $_arity = null; 233 private $_nested = null; 234 private $_fixing = null; 235 private $_priority = null; 236 private $_tokenDefs = null; 237 238 public function arity() { return $this->_arity; } 239 public function nested() { return $this->_nested; } 240 public function fixing() { return $this->_fixing; } 241 public function priority() { return $this->_priority; } 242 public function tokenDefs() { return $this->_tokenDefs; } 243 public function name() { return $this->_name; } 244 245 public function __construct($name, $fixing, $tokenDefs, $priority, $arity=null, $nested=null) { 246 if ($tokenDefs instanceof TokenDefinition) { 247 $tokenDefs = array($tokenDefs); 248 } 249 if (!is_array($tokenDefs)) { 250 throw new InvalidArgumentException('tokenDefs can only be a TokenDefinition or an array of them.'); 251 } 252 if ($arity === null) { 253 switch ($fixing) { 254 case Fixing::None: 255 $arity = 0; 256 break; 257 case Fixing::Prefix: 258 case Fixing::Postfix: 259 $arity = 1; 260 break; 261 case Fixing::Infix: 262 $arity = -1; 263 break; 264 case Fixing::Wrap: 265 break; 266 default: 267 throw new InvalidArgumentException('Invalid fixing.'); 268 break; 269 } 270 } 271 $this->_name = $name; 272 $this->_tokenDefs = $tokenDefs; 273 $this->_arity = $arity; 274 $this->_nested = $nested; 275 $this->_fixing = $fixing; 276 $this->_priority = $priority; 277 278 switch ($this->fixing()) { 279 case Fixing::None: 280 if ($this->arity() != 0) { 281 throw new LogicException('An element with no fixing must be 0-ary.'); 282 } 283 if (count($this->tokenDefs()) != 1) { 284 throw new LogicException('An element with no fixing must have exactly 1 token.'); 285 } 286 break; 287 case Fixing::Prefix: 288 if ($this->arity() == 0) { 289 throw new LogicException('An prefix element must be n-ary with n > 0.'); 290 } 291 if (count($this->tokenDefs()) != 1 && count($this->tokenDefs()) != $this->arity()) { 292 throw new LogicException('A n-ary prefix operator must have either 1 or n tokens.'); 293 } 294 break; 295 case Fixing::Postfix: 296 if ($this->arity() == 0) { 297 throw new LogicException('An postfix element must be n-ary with n > 0.'); 298 } 299 if (count($this->tokenDefs()) != 1 && count($this->tokenDefs()) != $this->arity()) { 300 throw new LogicException('A n-ary postfix operator must have either 1 or n tokens.'); 301 } 302 break; 303 case Fixing::Infix: 304 if ($this->arity() == 0 || $this->arity() == 1) { 305 throw new LogicException('An infix element must be n-ary with n > 1.'); 306 } 307 if (count($this->tokenDefs()) != 1 && count($this->tokenDefs()) != $this->arity() - 1) { 308 throw new LogicException('A n-ary infix operator must have either 1 or n-1 tokens.'); 309 } 310 break; 311 case Fixing::Wrap: 312 if ($this->arity() !== null) { 313 throw new LogicException('Arity does not apply to a wrapping element.'); 314 } 315 if (count($this->tokenDefs()) != 2) { 316 throw new LogicException('Wrapping operators are identified by exactly two tokens.'); 317 } 318 break; 319 default: 320 throw new InvalidArgumentException('Invalid fixing specified.'); 321 break; 322 } 323 324 if ($this->fixing() == Fixing::Wrap) { 325 if ($this->nested() === null) { 326 throw new LogicException('You must specify whether a wrapping operator is nested.'); 327 } 328 } else { 329 if ($this->nested() !== null) { 330 throw new LogicException('Nested applies only to wrapping operators.'); 331 } 332 } 333 } 334 335 private static function _getLongestAlternateChain($args, $position, $tokDef, $stopAt=-1) { 336 $nFound = 0; 337 for ($lastFound = $position; $lastFound < count($args); $lastFound += 2) { 338 if ($args[$lastFound]->definition() == $tokDef) { 339 if ($stopAt >= 0 && $nFound >= $stopAt) { 340 break; 341 } 342 ++$nFound; 343 } else { 344 break; 345 } 346 } 347 return $nFound; 348 } 349 350 private static function _isMatchingAlternateChain($args, $position, $tokDefs) { 351 $tokDefIdx = 0; 352 for ($lastFound = $position; $lastFound < count($args) && $tokDefIdx < count($tokDefs); $lastFound += 2) { 353 if ($args[$lastFound]->definition() == $tokDefs[$tokDefIdx]) { 354 ++$tokDefIdx; 355 } else { 356 return false; 357 } 358 } 359 return ($tokDefIdx == count($tokDefs)); 360 } 361 362 private static function _getWrappedSequence($args, $position, $tokDefs, $nested) { 363 if (count($tokDefs) != 2) { 364 throw new LogicException('Wrapping operators must have exactly 2 tokens.'); 365 } 366 list($openTokDef, $closeTokDef) = $tokDefs; 367 if ($args[$position]->definition() != $openTokDef) { 368 return 0; 369 } 370 $nest = 1; 371 for ($i = $position + 1; $i < count($args); ++$i) { 372 if ($args[$i]->definition() == $closeTokDef) { 373 if (--$nest <= 0) { 374 return $i - $position + 1; 375 } 376 } else if ($nested && $args[$i]->definition() == $openTokDef) { 377 ++$nest; 378 } 379 } 380 return 1; // Which means unmatched sequence 381 } 382 383 private static function _extractAlternateChain($args, $position, $length) { 384 $retval = array(); 385 for ($i = $position; $i < count($args) && count($retval) < $length; $i += 2) { 386 $retval[] = $args[$i]; 387 } 388 return $retval; 389 } 390 391 private static function _splicePrefix(&$args, $firstTokPosition, $chainLength, $definition) { 392 if ($firstTokPosition < 0) { 393 throw new InvalidArgumentException('Attempt to _splicePrefix with a negative offset.'); 394 } 395 $elmArgs = self::_extractAlternateChain($args, $firstTokPosition + 1, $chainLength); 396 $elmInst = new ElementInstance($definition, $elmArgs); 397 if ($firstTokPosition + $chainLength * 2 > count($args)) { 398 throw new NotEnoughArgumentsException($definition); 399 } 400 array_splice($args, $firstTokPosition, $chainLength * 2, array($elmInst)); 401 return $firstTokPosition; 402 } 403 404 private static function _splicePostfix(&$args, $firstTokPosition, $chainLength, $definition) { 405 if ($firstTokPosition < 0) { 406 throw new InvalidArgumentException('Attempt to _splicePostfix with a negative offset.'); 407 } 408 $elmArgs = self::_extractAlternateChain($args, $firstTokPosition - 1, $chainLength); 409 $elmInst = new ElementInstance($definition, $elmArgs); 410 if ($firstTokPosition == 0) { 411 throw new NotEnoughArgumentsException($definition); 412 } elseif ($firstTokPosition + $chainLength * 2 - 1 > count($args)) { 413 throw new NotEnoughArgumentsException($definition); 414 } 415 array_splice($args, $firstTokPosition - 1, $chainLength * 2, array($elmInst)); 416 return $firstTokPosition - 1; 417 } 418 419 private static function _spliceInfix(&$args, $firstTokPosition, $chainLength, $definition) { 420 if ($firstTokPosition < 0) { 421 throw new InvalidArgumentException('Attempt to _spliceInfix with a negative offset.'); 422 } 423 $elmArgs = self::_extractAlternateChain($args, $firstTokPosition - 1, $chainLength + 1); 424 $elmInst = new ElementInstance($definition, $elmArgs); 425 if ($firstTokPosition == 0) { 426 throw new NotEnoughArgumentsException($definition); 427 } elseif ($firstTokPosition + $chainLength * 2 > count($args)) { 428 throw new NotEnoughArgumentsException($definition); 429 } 430 array_splice($args, $firstTokPosition - 1, $chainLength * 2 + 1, array($elmInst)); 431 return $firstTokPosition - 1; 432 } 433 434 private static function _spliceWrap(&$args, $firstTokPosition, $sequenceLength, $definition) { 435 if ($firstTokPosition < 0) { 436 throw new InvalidArgumentException('Attempt to _spliceWrap with a negative offset.'); 437 } 438 if ($sequenceLength < 2) { 439 throw new LogicException('A wrapping sequence must consist of at least the two wrapping tokens.'); 440 } 441 if ($firstTokPosition + $sequenceLength > count($args)) { 442 throw new LogicException('You requested to cut a sequence longer than the number of tokens.'); 443 } 444 $elmArgs = array_slice($args, $firstTokPosition + 1, $sequenceLength - 2); 445 $elmInst = new ElementInstance($definition, $elmArgs); 446 array_splice($args, $firstTokPosition, $sequenceLength, array($elmInst)); 447 return $firstTokPosition; 448 } 449 450 private static function _spliceNone(&$args, $firstTokPosition, $definition) { 451 if ($firstTokPosition < 0) { 452 throw new InvalidArgumentException('Attempt to _spliceNone with a negative offset.'); 453 } 454 if ($firstTokPosition + 1 > count($args)) { 455 throw new LogicException('You requested to cut a token at the end of the tokens array.'); 456 } 457 array_splice($args, $firstTokPosition, 1, array(new ElementInstance($definition, array($args[$firstTokPosition])))); 458 return $firstTokPosition; 459 } 460 461 public function trySpliceAt(&$args, &$position) { 462 switch ($this->fixing()) { 463 case Fixing::None: 464 if ($args[$position]->definition() == $this->tokenDefs()[0]) { 465 $position = self::_spliceNone($args, $position, $this); 466 return true; 467 } 468 break; 469 case Fixing::Prefix: 470 if ($this->arity() < 0) { 471 $chainLength = self::_getLongestAlternateChain($args, $position, $this->tokenDefs()[0]); 472 if ($chainLength > 0) { 473 $position = self::_splicePrefix($args, $position, $chainLength, $this); 474 return true; 475 } 476 } else if (count($this->tokenDefs()) == 1) { 477 $chainLength = self::_getLongestAlternateChain($args, $position, $this->tokenDefs()[0], $this->arity()); 478 if ($chainLength == $this->arity()) { 479 $position = self::_splicePrefix($args, $position, $chainLength, $this); 480 return true; 481 } 482 } else if (self::_isMatchingAlternateChain($args, $position, $this->tokenDefs())) { 483 $position = self::_splicePrefix($args, $position, $this->arity(), $this); 484 return true; 485 } 486 break; 487 case Fixing::Postfix: 488 if ($this->arity() < 0) { 489 $chainLength = self::_getLongestAlternateChain($args, $position + 1, $this->tokenDefs()[0]); 490 if ($chainLength > 0) { 491 $position = self::_splicePostfix($args, $position + 1, $chainLength, $this); 492 return true; 493 } 494 } else if (count($this->tokenDefs()) == 1) { 495 $chainLength = self::_getLongestAlternateChain($args, $position + 1, $this->tokenDefs()[0], $this->arity()); 496 if ($chainLength == $this->arity()) { 497 $position = self::_splicePostfix($args, $position + 1, $chainLength, $this); 498 return true; 499 } 500 } else if (self::_isMatchingAlternateChain($args, $position + 1, $this->tokenDefs())) { 501 $position = self::_splicePostfix($args, $position + 1, $this->arity(), $this); 502 return true; 503 } 504 break; 505 case Fixing::Infix: 506 if ($this->arity() < 0) { 507 $chainLength = self::_getLongestAlternateChain($args, $position + 1, $this->tokenDefs()[0]); 508 if ($chainLength > 0) { 509 $position = self::_spliceInfix($args, $position + 1, $chainLength, $this); 510 return true; 511 } 512 } else if (count($this->tokenDefs()) == 1) { 513 $chainLength = self::_getLongestAlternateChain($args, $position + 1, $this->tokenDefs()[0], $this->arity()); 514 if ($chainLength == $this->arity() - 1) { 515 $position = self::_spliceInfix($args, $position + 1, $chainLength, $this); 516 return true; 517 } 518 } else if (self::_isMatchingAlternateChain($args, $position + 1, $this->tokenDefs())) { 519 $position = self::_spliceInfix($args, $position + 1, $this->arity() - 1, $this); 520 return true; 521 } 522 break; 523 case Fixing::Wrap: 524 $sequenceLength = self::_getWrappedSequence($args, $position, $this->tokenDefs(), $this->nested()); 525 if ($sequenceLength >= 2) { 526 $position = self::_spliceWrap($args, $position, $sequenceLength, $this); 527 return true; 528 } elseif ($sequenceLength == 1) { 529 throw new UnmatchedWrapperException($this, $args[$position]); 530 } 531 break; 532 } 533 } 534 535 public function spliceInstancesIn(&$args) { 536 $somethingHappened = false; 537 for ($i = 0; $i < count($args); ++$i) { 538 if ($this->trySpliceAt($args, $i)) { 539 $somethingHappened = true; 540 } 541 } 542 return $somethingHappened; 543 } 544 545 public function _evaluateWellFormed($elmInstance) { 546 return null; 547 } 548 549 public function evaluate($elmInstance) { 550 $this->ensureWellFormed($elmInstance); 551 return $this->_evaluateWellFormed($elmInstance); 552 } 553 554 public function getTokenDefRepr($idx) { 555 if ($idx < count($this->tokenDefs())) { 556 return $this->tokenDefs()[$idx]->representation(); 557 } elseif (count($this->tokenDefs()) == 1) { 558 return $this->tokenDefs()[0]->representation(); 559 } 560 return ''; 561 } 562 563 public function getRepresentation($elmInstance) { 564 $this->ensureWellFormed($elmInstance); 565 $argsRepr = array(); 566 foreach ($elmInstance->args() as $arg) { 567 if ($arg instanceof TokenInstance) { 568 $argsRepr[] = $arg->match(); 569 } else { 570 $argsRepr[] = $arg->getRepresentation(); 571 } 572 } 573 $pieces = array(); 574 if ($this->fixing() == Fixing::Prefix || $this->fixing() == Fixing::Wrap) { 575 $pieces[] = $this->getTokenDefRepr(0); 576 } 577 $argIdx = ($this->fixing() == Fixing::Postfix || $this->fixing() == Fixing::Infix ? -1 : 0); 578 foreach ($argsRepr as $argRepr) { 579 $pieces[] = $argRepr; 580 ++$argIdx; 581 if ($this->fixing() == Fixing::None && $this->fixing() == Fixing::Wrap) { 582 continue; 583 } 584 if ($argIdx >= count($argsRepr) || ($this->fixing() == Fixing::Infix && $argIdx >= count($argsRepr) - 1)) { 585 continue; 586 } 587 $pieces[] = $this->getTokenDefRepr($argIdx); 588 } 589 if ($this->fixing() == Fixing::Wrap) { 590 $pieces[] = $this->getTokenDefRepr(1); 591 } 592 return implode('', $pieces); 593 } 594 595 static public function extractUsedTokens($elmDefs) { 596 $retval = array(); 597 foreach ($elmDefs as $elmDef) { 598 $retval = array_merge($retval, $elmDef->tokenDefs()); 599 } 600 return $retval; 601 } 602 603 public function ensureWellFormed($elmInstance) { 604 if ($elmInstance->definition() != $this) { 605 throw new LogicException('This instance is not an instance of the given definition.'); 606 } 607 $args = $elmInstance->args(); 608 // Check arity 609 switch ($this->fixing()) { 610 case Fixing::None: 611 if (count($args) != 1) { 612 throw new MalformedExpressionException($elmInstance, 'Expected exactly one token.'); 613 } else if (!($args[0] instanceof TokenInstance)) { 614 throw new MalformedExpressionException($elmInstance, 'Expected a token instance.'); 615 } else if ($args[0]->definition() != $this->tokenDefs()[0]) { 616 throw new MalformedExpressionException($elmInstance, 'Expected a token of type ' . $this->tokenDefs()[0]->name()); 617 } 618 break; 619 case Fixing::Prefix: 620 case Fixing::Postfix: 621 case Fixing::Infix: 622 if ($this->arity() < 0) { // Any number of tokens, > 0 623 if (count($args) < 1 && $this->fixing() == Fixing::Infix) { 624 throw new MalformedExpressionException($elmInstance, 'Expected at least two arguments.'); 625 } elseif (count($args) == 0) { 626 throw new MalformedExpressionException($elmInstance, 'Expected at least one argument.'); 627 } 628 } elseif (count($args) != $this->arity()) { 629 throw new MalformedExpressionException($elmInstance, 'Expected exactly ' . $this->arity() . ' arguments.'); 630 } 631 break; 632 } 633 } 634} 635 636function parse(array $tokInsts, array $elmDefs) { 637 usort($elmDefs, function ($a, $b) { return $a->priority() - $b->priority(); }); 638 $root = new ElementInstance(null, $tokInsts); 639 foreach ($elmDefs as $elmDef) { 640 $root->expand($elmDef); 641 } 642 if (!$root->isExpanded()) { 643 throw new StrayTokenException($root->findUnexpandedToken()); 644 } 645 return $root; 646} 647 648?>