1<?php 2 3/** 4 * Hoa 5 * 6 * 7 * @license 8 * 9 * New BSD License 10 * 11 * Copyright © 2007-2017, Hoa community. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions are met: 15 * * Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * * Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * * Neither the name of the Hoa nor the names of its contributors may be 21 * used to endorse or promote products derived from this software without 22 * specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE 28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37namespace Hoa\Compiler\Llk\Rule; 38 39use Hoa\Compiler; 40use Hoa\Iterator; 41 42/** 43 * Class \Hoa\Compiler\Llk\Rule\Analyzer. 44 * 45 * Analyze rules and transform them into atomic rules operations. 46 * 47 * @copyright Copyright © 2007-2017 Hoa community 48 * @license New BSD License 49 */ 50class Analyzer 51{ 52 /** 53 * PP lexemes. 54 */ 55 protected static $_ppLexemes = [ 56 'default' => [ 57 'skip' => '\s', 58 'or' => '\|', 59 'zero_or_one' => '\?', 60 'one_or_more' => '\+', 61 'zero_or_more' => '\*', 62 'n_to_m' => '\{[0-9]+,[0-9]+\}', 63 'zero_to_m' => '\{,[0-9]+\}', 64 'n_or_more' => '\{[0-9]+,\}', 65 'exactly_n' => '\{[0-9]+\}', 66 'skipped' => '::[a-zA-Z_][a-zA-Z0-9_]*(\[\d+\])?::', 67 'kept' => '<[a-zA-Z_][a-zA-Z0-9_]*(\[\d+\])?>', 68 'named' => '[a-zA-Z_][a-zA-Z0-9_]*\(\)', 69 'node' => '#[a-zA-Z_][a-zA-Z0-9_]*(:[mM])?', 70 'capturing_' => '\(', 71 '_capturing' => '\)' 72 ] 73 ]; 74 75 /** 76 * Lexer iterator. 77 * 78 * @var \Hoa\Iterator\Lookahead 79 */ 80 protected $_lexer = null; 81 82 /** 83 * Tokens representing rules. 84 * 85 * @var array 86 */ 87 protected $_tokens = null; 88 89 /** 90 * Rules. 91 * 92 * @var array 93 */ 94 protected $_rules = null; 95 96 /** 97 * Rule name being analyzed. 98 * 99 * @var string 100 */ 101 private $_ruleName = null; 102 103 /** 104 * Parsed rules. 105 * 106 * @var array 107 */ 108 protected $_parsedRules = null; 109 110 /** 111 * Counter to auto-name transitional rules. 112 * 113 * @var int 114 */ 115 protected $_transitionalRuleCounter = 0; 116 117 118 119 /** 120 * Constructor. 121 * 122 * @param array $tokens Tokens. 123 */ 124 public function __construct(array $tokens) 125 { 126 $this->_tokens = $tokens; 127 128 return; 129 } 130 131 /** 132 * Build the analyzer of the rules (does not analyze the rules). 133 * 134 * @param array $rules Rule to be analyzed. 135 * @return void 136 * @throws \Hoa\Compiler\Exception 137 */ 138 public function analyzeRules(array $rules) 139 { 140 if (empty($rules)) { 141 throw new Compiler\Exception\Rule('No rules specified!', 0); 142 } 143 144 $this->_parsedRules = []; 145 $this->_rules = $rules; 146 $lexer = new Compiler\Llk\Lexer(); 147 148 foreach ($rules as $key => $value) { 149 $this->_lexer = new Iterator\Lookahead($lexer->lexMe($value, static::$_ppLexemes)); 150 $this->_lexer->rewind(); 151 152 $this->_ruleName = $key; 153 $nodeId = null; 154 155 if ('#' === $key[0]) { 156 $nodeId = $key; 157 $key = substr($key, 1); 158 } 159 160 $pNodeId = $nodeId; 161 $rule = $this->rule($pNodeId); 162 163 if (null === $rule) { 164 throw new Compiler\Exception( 165 'Error while parsing rule %s.', 166 1, 167 $key 168 ); 169 } 170 171 $zeRule = $this->_parsedRules[$rule]; 172 $zeRule->setName($key); 173 $zeRule->setPPRepresentation($value); 174 175 if (null !== $nodeId) { 176 $zeRule->setDefaultId($nodeId); 177 } 178 179 unset($this->_parsedRules[$rule]); 180 $this->_parsedRules[$key] = $zeRule; 181 } 182 183 return $this->_parsedRules; 184 } 185 186 /** 187 * Implementation of “rule”. 188 * 189 * @return mixed 190 */ 191 protected function rule(&$pNodeId) 192 { 193 return $this->choice($pNodeId); 194 } 195 196 /** 197 * Implementation of “choice”. 198 * 199 * @return mixed 200 */ 201 protected function choice(&$pNodeId) 202 { 203 $children = []; 204 205 // concatenation() … 206 $nNodeId = $pNodeId; 207 $rule = $this->concatenation($nNodeId); 208 209 if (null === $rule) { 210 return null; 211 } 212 213 if (null !== $nNodeId) { 214 $this->_parsedRules[$rule]->setNodeId($nNodeId); 215 } 216 217 $children[] = $rule; 218 $others = false; 219 220 // … ( ::or:: concatenation() )* 221 while ('or' === $this->_lexer->current()['token']) { 222 $this->_lexer->next(); 223 $others = true; 224 $nNodeId = $pNodeId; 225 $rule = $this->concatenation($nNodeId); 226 227 if (null === $rule) { 228 return null; 229 } 230 231 if (null !== $nNodeId) { 232 $this->_parsedRules[$rule]->setNodeId($nNodeId); 233 } 234 235 $children[] = $rule; 236 } 237 238 $pNodeId = null; 239 240 if (false === $others) { 241 return $rule; 242 } 243 244 $name = $this->_transitionalRuleCounter++; 245 $this->_parsedRules[$name] = new Choice($name, $children); 246 247 return $name; 248 } 249 250 /** 251 * Implementation of “concatenation”. 252 * 253 * @return mixed 254 */ 255 protected function concatenation(&$pNodeId) 256 { 257 $children = []; 258 259 // repetition() … 260 $rule = $this->repetition($pNodeId); 261 262 if (null === $rule) { 263 return null; 264 } 265 266 $children[] = $rule; 267 $others = false; 268 269 // … repetition()* 270 while (null !== $r1 = $this->repetition($pNodeId)) { 271 $children[] = $r1; 272 $others = true; 273 } 274 275 if (false === $others && null === $pNodeId) { 276 return $rule; 277 } 278 279 $name = $this->_transitionalRuleCounter++; 280 $this->_parsedRules[$name] = new Concatenation( 281 $name, 282 $children, 283 null 284 ); 285 286 return $name; 287 } 288 289 /** 290 * Implementation of “repetition”. 291 * 292 * @return mixed 293 * @throws \Hoa\Compiler\Exception 294 */ 295 protected function repetition(&$pNodeId) 296 { 297 // simple() … 298 $children = $this->simple($pNodeId); 299 300 if (null === $children) { 301 return null; 302 } 303 304 // … quantifier()? 305 switch ($this->_lexer->current()['token']) { 306 case 'zero_or_one': 307 $min = 0; 308 $max = 1; 309 $this->_lexer->next(); 310 311 break; 312 313 case 'one_or_more': 314 $min = 1; 315 $max = -1; 316 $this->_lexer->next(); 317 318 break; 319 320 case 'zero_or_more': 321 $min = 0; 322 $max = -1; 323 $this->_lexer->next(); 324 325 break; 326 327 case 'n_to_m': 328 $handle = trim($this->_lexer->current()['value'], '{}'); 329 $nm = explode(',', $handle); 330 $min = (int) trim($nm[0]); 331 $max = (int) trim($nm[1]); 332 $this->_lexer->next(); 333 334 break; 335 336 case 'zero_to_m': 337 $min = 0; 338 $max = (int) trim($this->_lexer->current()['value'], '{,}'); 339 $this->_lexer->next(); 340 341 break; 342 343 case 'n_or_more': 344 $min = (int) trim($this->_lexer->current()['value'], '{,}'); 345 $max = -1; 346 $this->_lexer->next(); 347 348 break; 349 350 case 'exactly_n': 351 $handle = trim($this->_lexer->current()['value'], '{}'); 352 $min = (int) $handle; 353 $max = $min; 354 $this->_lexer->next(); 355 356 break; 357 } 358 359 // … <node>? 360 if ('node' === $this->_lexer->current()['token']) { 361 $pNodeId = $this->_lexer->current()['value']; 362 $this->_lexer->next(); 363 } 364 365 if (!isset($min)) { 366 return $children; 367 } 368 369 if (-1 != $max && $max < $min) { 370 throw new Compiler\Exception( 371 'Upper bound %d must be greater or ' . 372 'equal to lower bound %d in rule %s.', 373 2, 374 [$max, $min, $this->_ruleName] 375 ); 376 } 377 378 $name = $this->_transitionalRuleCounter++; 379 $this->_parsedRules[$name] = new Repetition( 380 $name, 381 $min, 382 $max, 383 $children, 384 null 385 ); 386 387 return $name; 388 } 389 390 /** 391 * Implementation of “simple”. 392 * 393 * @return mixed 394 * @throws \Hoa\Compiler\Exception 395 * @throws \Hoa\Compiler\Exception\Rule 396 */ 397 protected function simple(&$pNodeId) 398 { 399 if ('capturing_' === $this->_lexer->current()['token']) { 400 $this->_lexer->next(); 401 $rule = $this->choice($pNodeId); 402 403 if (null === $rule) { 404 return null; 405 } 406 407 if ('_capturing' != $this->_lexer->current()['token']) { 408 return null; 409 } 410 411 $this->_lexer->next(); 412 413 return $rule; 414 } 415 416 if ('skipped' === $this->_lexer->current()['token']) { 417 $tokenName = trim($this->_lexer->current()['value'], ':'); 418 419 if (']' === substr($tokenName, -1)) { 420 $uId = (int) substr($tokenName, strpos($tokenName, '[') + 1, -1); 421 $tokenName = substr($tokenName, 0, strpos($tokenName, '[')); 422 } else { 423 $uId = -1; 424 } 425 426 $exists = false; 427 428 foreach ($this->_tokens as $namespace => $tokens) { 429 foreach ($tokens as $token => $value) { 430 if ($token === $tokenName || 431 substr($token, 0, strpos($token, ':')) === $tokenName) { 432 $exists = true; 433 434 break 2; 435 } 436 } 437 } 438 439 if (false == $exists) { 440 throw new Compiler\Exception( 441 'Token ::%s:: does not exist in rule %s.', 442 3, 443 [$tokenName, $this->_ruleName] 444 ); 445 } 446 447 $name = $this->_transitionalRuleCounter++; 448 $this->_parsedRules[$name] = new Token( 449 $name, 450 $tokenName, 451 null, 452 $uId 453 ); 454 $this->_lexer->next(); 455 456 return $name; 457 } 458 459 if ('kept' === $this->_lexer->current()['token']) { 460 $tokenName = trim($this->_lexer->current()['value'], '<>'); 461 462 if (']' === substr($tokenName, -1)) { 463 $uId = (int) substr($tokenName, strpos($tokenName, '[') + 1, -1); 464 $tokenName = substr($tokenName, 0, strpos($tokenName, '[')); 465 } else { 466 $uId = -1; 467 } 468 469 $exists = false; 470 471 foreach ($this->_tokens as $namespace => $tokens) { 472 foreach ($tokens as $token => $value) { 473 if ($token === $tokenName 474 || substr($token, 0, strpos($token, ':')) === $tokenName) { 475 $exists = true; 476 477 break 2; 478 } 479 } 480 } 481 482 if (false == $exists) { 483 throw new Compiler\Exception( 484 'Token <%s> does not exist in rule %s.', 485 4, 486 [$tokenName, $this->_ruleName] 487 ); 488 } 489 490 $name = $this->_transitionalRuleCounter++; 491 $token = new Token( 492 $name, 493 $tokenName, 494 null, 495 $uId, 496 true 497 ); 498 $this->_parsedRules[$name] = $token; 499 $this->_lexer->next(); 500 501 return $name; 502 } 503 504 if ('named' === $this->_lexer->current()['token']) { 505 $tokenName = rtrim($this->_lexer->current()['value'], '()'); 506 507 if (false === array_key_exists($tokenName, $this->_rules) && 508 false === array_key_exists('#' . $tokenName, $this->_rules)) { 509 throw new Compiler\Exception\Rule( 510 'Cannot call rule %s() in rule %s because it does not exist.', 511 5, 512 [$tokenName, $this->_ruleName] 513 ); 514 } 515 516 if (0 === $this->_lexer->key() && 517 'EOF' === $this->_lexer->getNext()['token']) { 518 $name = $this->_transitionalRuleCounter++; 519 $this->_parsedRules[$name] = new Concatenation( 520 $name, 521 [$tokenName], 522 null 523 ); 524 } else { 525 $name = $tokenName; 526 } 527 528 $this->_lexer->next(); 529 530 return $name; 531 } 532 533 return null; 534 } 535} 536