1<?php /** @noinspection PhpUnused */ 2 3/** 4 * An visitor that compiles the AST into a xpath expression 5 * 6 * @license http://www.opensource.org/licenses/mit-license.php The MIT License 7 * @copyright Copyright 2010-2014 PhpCss Team 8 */ 9 10namespace PhpCss\Ast\Visitor { 11 12 use InvalidArgumentException; 13 use LogicException; 14 use PhpCss\Ast; 15 use PhpCss\Exception; 16 use Transliterator; 17 18 /** 19 * An visitor that compiles the AST into a xpath expression 20 */ 21 class Xpath extends Overload { 22 23 /** 24 * use explicit namespaces only, no defined namespace means no namespaces. This option and 25 * OPTION_DEFAULT_NAMESPACE can not be used at the same time. 26 */ 27 public const OPTION_EXPLICIT_NAMESPACES = 1; 28 29 /** 30 * use a default namespace, no defined namespace means both no and the default namespace. 31 * This option and OPTION_EXPLICIT_NAMESPACES can not be used at the same time. 32 * 33 * If not changed 'html' is used as the additional prefix for elements. 34 * 35 * Example: foo -> *[(self::foo or self::html:foo)] 36 * 37 */ 38 public const OPTION_DEFAULT_NAMESPACE = 16; 39 40 /** 41 * start expressions in document context 42 */ 43 public const OPTION_USE_DOCUMENT_CONTEXT = 2; 44 public const OPTION_USE_CONTEXT_DOCUMENT = 2; 45 46 /** 47 * start expressions in descendant-or-self context 48 */ 49 public const OPTION_USE_CONTEXT_SELF = 32; 50 /** 51 * limit expressions to self context 52 */ 53 public const OPTION_USE_CONTEXT_SELF_LIMIT = 64; 54 55 /** 56 * lowercase the element names (not the namespace prefixes) 57 */ 58 public const OPTION_LOWERCASE_ELEMENTS = 4; 59 /** 60 * use xml:id and xml:lang not just id or lang 61 */ 62 public const OPTION_XML_ATTRIBUTES = 8; 63 64 private const STATUS_DEFAULT = 0; 65 private const STATUS_ELEMENT = 1; 66 private const STATUS_CONDITION = 2; 67 private const STATUS_COMBINATOR = 3; 68 private const STATUS_PSEUDOCLASS = 4; 69 70 private $_buffer = ''; 71 72 /** 73 * Current visitor status (position in expression) 74 * @var integer 75 */ 76 private $_status = self::STATUS_DEFAULT; 77 78 /** 79 * Visitor mode 80 * @var integer 81 */ 82 private $_options = 0; 83 84 /** 85 * The default namespace prefix used for elements with no namespace prefix if OPTION_DEFAULT_NAMESPACE is 86 * active. 87 */ 88 private const DEFAULT_NAMESPACE_PREFIX = 'html'; 89 90 /** 91 * @var string 92 */ 93 private $_defaultNamespacePrefix = self::DEFAULT_NAMESPACE_PREFIX; 94 95 /** 96 * store expressions for use in visitor methods, the actual expression can depend on 97 * the visitor methods called before. 98 * 99 * @var array 100 */ 101 private $_expressions = []; 102 103 /** 104 * store an expression for the current element (type selector) 105 * @var string 106 */ 107 private $_element = '*'; 108 109 /** 110 * Create visitor and store mode options 111 * 112 * @param integer $options 113 * @param string $defaultPrefix 114 */ 115 public function __construct($options = 0, $defaultPrefix = self::DEFAULT_NAMESPACE_PREFIX) { 116 $this->setOptions($options, $defaultPrefix); 117 } 118 119 /** 120 * Validate and store the options. 121 * 122 * @param int $options 123 * @param string $defaultPrefix 124 * @throws InvalidArgumentException 125 */ 126 public function setOptions( 127 int $options = 0, string $defaultPrefix = self::DEFAULT_NAMESPACE_PREFIX 128 ): void { 129 if ( 130 $this->hasOption(self::OPTION_EXPLICIT_NAMESPACES) && 131 $this->hasOption(self::OPTION_DEFAULT_NAMESPACE) 132 ) { 133 throw new InvalidArgumentException( 134 'Options OPTION_EXPLICIT_NAMESPACES and OPTION_DEFAULT_NAMESPACE can not be set at the same time.' 135 ); 136 } 137 if (trim($defaultPrefix) === '') { 138 throw new InvalidArgumentException( 139 'The default namespace prefix "'.$defaultPrefix.'" is not valid.' 140 ); 141 } 142 $this->_options = $options; 143 $this->_defaultNamespacePrefix = trim($defaultPrefix); 144 } 145 146 /** 147 * Clear the visitor object to visit another selector group 148 */ 149 public function clear(): void { 150 $this->_buffer = ''; 151 $this->_status = self::STATUS_DEFAULT; 152 } 153 154 /** 155 * Add a string to the buffer 156 * 157 * @param string $string 158 */ 159 private function add(string $string): void { 160 $this->_buffer .= $string; 161 } 162 163 /** 164 * Get/Set the current visiting status 165 * 166 * @param null|int $status 167 * @return int 168 */ 169 private function status($status = NULL): int { 170 if (isset($status)) { 171 $this->_status = $status; 172 } 173 return $this->_status; 174 } 175 176 /** 177 * Read the status of an option 178 * 179 * @param $option 180 * @return bool 181 */ 182 public function hasOption($option): bool { 183 return ($this->_options & $option) === $option; 184 } 185 186 /** 187 * Return the collected selector string 188 */ 189 public function __toString() { 190 return $this->_buffer; 191 } 192 193 private function setElement($element): void { 194 switch ($this->status()) { 195 case self::STATUS_DEFAULT : 196 case self::STATUS_COMBINATOR : 197 $this->_element = $element; 198 break; 199 } 200 } 201 202 /** 203 * prepare buffer to add a condition to the xpath expression 204 */ 205 private function addCondition($condition): void { 206 if (!empty($condition)) { 207 switch ($this->status()) { 208 case self::STATUS_DEFAULT : 209 case self::STATUS_COMBINATOR : 210 $this->setElement('*'); 211 $this->add('*['); 212 break; 213 case self::STATUS_PSEUDOCLASS : 214 $this->add($condition); 215 return; 216 case self::STATUS_ELEMENT : 217 $this->add('['); 218 break; 219 case self::STATUS_CONDITION : 220 $this->add(' and '); 221 break; 222 } 223 $this->status(self::STATUS_CONDITION); 224 $this->add($condition); 225 } 226 } 227 228 /** 229 * end condition if in condition status 230 */ 231 private function endConditions(): void { 232 if ($this->status() === self::STATUS_CONDITION) { 233 $this->add(']'); 234 } 235 $this->status(self::STATUS_DEFAULT); 236 } 237 238 /** 239 * Quote literal if needed 240 * 241 * @param string $literal 242 * @return string 243 */ 244 private function quoteLiteral(string $literal): string { 245 $hasDoubleQuote = FALSE !== strpos($literal, '"'); 246 if ($hasDoubleQuote) { 247 $hasSingleQuote = FALSE !== strpos($literal, "'"); 248 if ($hasSingleQuote) { 249 $result = ''; 250 $parts = explode('"', $literal); 251 foreach ($parts as $part) { 252 $result .= ", '\"'"; 253 if ("" !== $part) { 254 $result .= ', "'.$part.'"'; 255 } 256 } 257 return 'concat('.substr($result, 7).')'; 258 } 259 return "'".$literal."'"; 260 } 261 return '"'.$literal.'"'; 262 } 263 264 /** 265 * Validate the buffer before visiting a Ast\Selector\Group. 266 * If the buffer already contains data, throw an exception. 267 * 268 * @param Ast\Selector\Group $group 269 * @return boolean 270 * @throws LogicException 271 */ 272 public function visitEnterSelectorSequenceGroup(Ast\Selector\Group $group): bool { 273 if (!empty($this->_buffer)) { 274 throw new LogicException( 275 sprintf( 276 'Visitor buffer already contains data, can not visit "%s"', 277 get_class($group) 278 ) 279 ); 280 } 281 return TRUE; 282 } 283 284 /** 285 * If here is already data in the buffer, add a separator before starting the next. 286 * 287 * @param Ast\Selector\Sequence $sequence 288 * @return boolean 289 */ 290 public function visitEnterSelectorSequence(Ast\Selector\Sequence $sequence): bool { 291 switch ($this->status()) { 292 case self::STATUS_DEFAULT : 293 if (!empty($this->_buffer)) { 294 $this->add('|'); 295 } 296 if (empty($sequence->simples) && NULL !== $sequence->combinator) { 297 return TRUE; 298 } 299 if ($this->hasOption(self::OPTION_USE_CONTEXT_DOCUMENT)) { 300 $this->add('//'); 301 } elseif ($this->hasOption(self::OPTION_USE_CONTEXT_SELF_LIMIT)) { 302 $this->add('self::'); 303 } elseif ($this->hasOption(self::OPTION_USE_CONTEXT_SELF)) { 304 $this->add('descendant-or-self::'); 305 } else { 306 $this->add('.//'); 307 } 308 break; 309 case self::STATUS_CONDITION : 310 $this->endConditions(); 311 $this->add('//'); 312 break; 313 } 314 return TRUE; 315 } 316 317 /** 318 * If the visitor is in the condition status, close it. 319 * 320 * @return boolean 321 */ 322 public function visitLeaveSelectorSequence(): bool { 323 $this->endConditions(); 324 return TRUE; 325 } 326 327 /** 328 * Output the universal type (* or xmlns|*) selector to the buffer 329 * 330 * @param Ast\Selector\Simple\Universal $universal 331 */ 332 public function visitSelectorSimpleUniversal(Ast\Selector\Simple\Universal $universal): void { 333 if ($universal->namespacePrefix !== '*' && trim($universal->namespacePrefix) !== '') { 334 $element = $universal->namespacePrefix.':*'; 335 } else { 336 $element = '*'; 337 } 338 $this->setElement($element); 339 $this->add($element); 340 $this->status(self::STATUS_ELEMENT); 341 } 342 343 /** 344 * Output the type (element name) selector to the buffer 345 * 346 * @param Ast\Selector\Simple\Type $type 347 */ 348 public function visitSelectorSimpleType(Ast\Selector\Simple\Type $type): void { 349 if ($this->hasOption(self::OPTION_LOWERCASE_ELEMENTS)) { 350 $elementName = $this->strToLower($type->elementName); 351 } else { 352 $elementName = $type->elementName; 353 } 354 if ('' === $type->namespacePrefix && $this->hasOption(self::OPTION_EXPLICIT_NAMESPACES)) { 355 $this->add($elementName); 356 $this->setElement($elementName); 357 $this->status(self::STATUS_ELEMENT); 358 } else { 359 $isEmptyPrefix = !isset($type->namespacePrefix) || $type->namespacePrefix === ''; 360 if (!$isEmptyPrefix && $type->namespacePrefix !== '*') { 361 $this->add($type->namespacePrefix.':'.$elementName); 362 $this->setElement($type->namespacePrefix.':'.$elementName); 363 $this->status(self::STATUS_ELEMENT); 364 } elseif ($isEmptyPrefix && $this->hasOption(self::OPTION_DEFAULT_NAMESPACE)) { 365 $condition = '(self::'.$elementName.' or self::'.$this->_defaultNamespacePrefix.':'.$elementName.')'; 366 if ($this->status() !== self::STATUS_PSEUDOCLASS) { 367 $this->setElement('*['.$condition.']'); 368 $this->add('*'); 369 $this->status(self::STATUS_ELEMENT); 370 } 371 $this->addCondition($condition); 372 } else { 373 $condition = 'local-name() = '.$this->quoteLiteral($elementName); 374 if ($this->status() !== self::STATUS_PSEUDOCLASS) { 375 $this->setElement('*['.$condition.']'); 376 $this->add('*'); 377 $this->status(self::STATUS_ELEMENT); 378 } 379 $this->addCondition($condition); 380 } 381 } 382 } 383 384 /** 385 * Output the class selector to the buffer 386 * 387 * @param Ast\Selector\Simple\Id $id 388 */ 389 public function visitSelectorSimpleId(Ast\Selector\Simple\Id $id): void { 390 $this->addCondition( 391 sprintf( 392 '@%1$s = %2$s', 393 $this->hasOption(self::OPTION_XML_ATTRIBUTES) ? 'xml:id' : 'id', 394 $this->quoteLiteral($id->id) 395 ) 396 ); 397 } 398 399 400 /** 401 * Output the class selector to the buffer 402 * 403 * @param Ast\Selector\Simple\ClassName $class 404 */ 405 public function visitSelectorSimpleClassName(Ast\Selector\Simple\ClassName $class): void { 406 $this->addCondition( 407 sprintf( 408 'contains(concat(" ", normalize-space(@class), " "), " %s ")', 409 $class->className 410 ) 411 ); 412 } 413 414 public function visitSelectorSimpleAttribute( 415 Ast\Selector\Simple\Attribute $attribute 416 ): void { 417 switch ($attribute->match) { 418 case Ast\Selector\Simple\Attribute::MATCH_PREFIX : 419 $condition = sprintf( 420 'starts-with(@%s, %s)', 421 $attribute->name, 422 $this->quoteLiteral($attribute->literal->value) 423 ); 424 break; 425 case Ast\Selector\Simple\Attribute::MATCH_SUFFIX : 426 $condition = sprintf( 427 'substring(@%1$s, string-length(@%1$s) - %2$s) = %3$s', 428 $attribute->name, 429 strlen($attribute->literal->value), 430 $this->quoteLiteral($attribute->literal->value) 431 ); 432 break; 433 case Ast\Selector\Simple\Attribute::MATCH_SUBSTRING : 434 $condition = sprintf( 435 'contains(@%s, %s)', 436 $attribute->name, 437 $this->quoteLiteral($attribute->literal->value) 438 ); 439 break; 440 case Ast\Selector\Simple\Attribute::MATCH_EQUALS : 441 $condition = '@'.$attribute->name.' = '.$this->quoteLiteral($attribute->literal->value); 442 break; 443 case Ast\Selector\Simple\Attribute::MATCH_INCLUDES : 444 $condition = sprintf( 445 'contains(concat(" ", normalize-space(@%s), " "), %s)', 446 $attribute->name, 447 $this->quoteLiteral(' '.trim($attribute->literal->value).' ') 448 ); 449 break; 450 case Ast\Selector\Simple\Attribute::MATCH_DASHMATCH : 451 $condition = sprintf( 452 '(@%1$s = %2$s or substring-before(@%1$s, "-") = %2$s)', 453 $attribute->name, 454 $this->quoteLiteral($attribute->literal->value) 455 ); 456 break; 457 case Ast\Selector\Simple\Attribute::MATCH_EXISTS : 458 default : 459 $condition = '@'.$attribute->name; 460 break; 461 } 462 if (!empty($condition)) { 463 $this->addCondition($condition); 464 } 465 } 466 467 public function visitSelectorCombinatorChild(): void { 468 $this->endConditions(); 469 if ($this->_buffer !== '') { 470 $this->add('/'); 471 } 472 $this->status(self::STATUS_COMBINATOR); 473 } 474 475 public function visitSelectorCombinatorDescendant(): void { 476 $this->endConditions(); 477 if ($this->_buffer !== '') { 478 $this->add('//'); 479 } else { 480 $this->add('.//'); 481 } 482 $this->status(self::STATUS_COMBINATOR); 483 } 484 485 public function visitSelectorCombinatorFollower(): void { 486 $this->endConditions(); 487 if ($this->_buffer !== '') { 488 $this->add('/'); 489 } 490 $this->add('following-sibling::'); 491 $this->status(self::STATUS_COMBINATOR); 492 } 493 494 public function visitSelectorCombinatorNext(): void { 495 $this->endConditions(); 496 if ($this->_buffer !== '') { 497 $this->add('/'); 498 } 499 $this->add('following-sibling::*[1]/self::'); 500 $this->status(self::STATUS_COMBINATOR); 501 } 502 503 /** 504 * @throws Exception\NotConvertibleException 505 */ 506 public function visitSelectorSimplePseudoClass(Ast\Selector\Simple\PseudoClass $pseudoClass): void { 507 switch ($pseudoClass->name) { 508 case 'root' : 509 $condition = '(. = //*)'; 510 break; 511 case 'empty' : 512 $condition = '(count(*|text()) = 0)'; 513 break; 514 case 'enabled' : 515 $condition = 'not(@disabled)'; 516 break; 517 case 'disabled' : 518 case 'checked' : 519 $condition = '@'.$pseudoClass->name; 520 break; 521 case 'first-child' : 522 $condition = 'position() = 1'; 523 break; 524 case 'last-child' : 525 $condition = 'position() = last()'; 526 break; 527 case 'first-of-type' : 528 $condition = '(count(preceding-sibling::'.$this->_element.') = 0)'; 529 break; 530 case 'last-of-type' : 531 $condition = '(count(following-sibling::'.$this->_element.') = 0)'; 532 break; 533 case 'only-child' : 534 $condition = '(count(parent::*/*|parent::*/text()) = 1)'; 535 break; 536 case 'only-of-type' : 537 $condition = '(count(parent::*/'.$this->_element.') = 1)'; 538 break; 539 case 'odd' : 540 if ($this->status() === self::STATUS_CONDITION) { 541 $this->add(']'); 542 $this->status(self::STATUS_ELEMENT); 543 } 544 $condition = 'position() mod 2 = 0'; 545 break; 546 case 'even' : 547 if ($this->status() === self::STATUS_CONDITION) { 548 $this->add(']'); 549 $this->status(self::STATUS_ELEMENT); 550 } 551 $condition = 'position() mod 2 = 1'; 552 break; 553 default : 554 throw new Exception\NotConvertibleException('pseudoclass '.$pseudoClass->name, 'Xpath'); 555 } 556 $this->addCondition($condition); 557 } 558 559 public function visitEnterSelectorSimplePseudoClass(Ast\Selector\Simple\PseudoClass $pseudoClass): bool { 560 switch ($pseudoClass->name) { 561 case 'not' : 562 $this->addCondition('not('); 563 $this->status(self::STATUS_PSEUDOCLASS); 564 return TRUE; 565 case 'has' : 566 $this->addCondition('('); 567 $this->status(self::STATUS_DEFAULT); 568 return TRUE; 569 case 'contains': 570 if ( 571 ($parameter = $pseudoClass->parameter) && 572 ($parameter instanceof Ast\Value\Number || $parameter instanceof Ast\Value\Literal) 573 ) { 574 $this->addCondition('contains(., '.$this->quoteLiteral($parameter->value)); 575 $this->status(self::STATUS_PSEUDOCLASS); 576 } 577 return TRUE; 578 case 'gt' : 579 case 'lt' : 580 if ( 581 ($parameter = $pseudoClass->parameter) && 582 ($parameter instanceof Ast\Value\Number || $parameter instanceof Ast\Value\Literal) 583 ) { 584 if ($this->status() === self::STATUS_CONDITION) { 585 $this->add(']'); 586 } 587 $this->status(self::STATUS_ELEMENT); 588 $operator = $pseudoClass->name === 'gt' ? '>' : '<'; 589 $condition = $parameter->value < 0 590 ? 'last() - '.abs($parameter->value - 1) 591 : $parameter->value + 1; 592 $this->addCondition( 593 'position() '.$operator.' '.$condition 594 ); 595 } 596 break; 597 case 'nth-child' : 598 $this->addCondition('('); 599 $this->status(self::STATUS_PSEUDOCLASS); 600 $this->_expressions['position'] = 'position()'; 601 $this->_expressions['count'] = 'last()'; 602 return TRUE; 603 case 'nth-last-child' : 604 $this->addCondition('('); 605 $this->status(self::STATUS_PSEUDOCLASS); 606 $this->_expressions['position'] = '(last() - position() + 1)'; 607 $this->_expressions['count'] = 'count()'; 608 return TRUE; 609 case 'nth-of-type' : 610 $this->addCondition('('); 611 $this->status(self::STATUS_PSEUDOCLASS); 612 $this->_expressions['position'] = '(count(preceding-sibling::'.$this->_element.') + 1)'; 613 $this->_expressions['count'] = 'count(parent::*/'.$this->_element.')'; 614 return TRUE; 615 case 'nth-last-of-type' : 616 $this->addCondition('('); 617 $this->status(self::STATUS_PSEUDOCLASS); 618 $this->_expressions['position'] = '(count(following-sibling::'.$this->_element.') + 1)'; 619 $this->_expressions['count'] = 'count(parent::*/'.$this->_element.')'; 620 return TRUE; 621 } 622 return FALSE; 623 } 624 625 public function visitLeaveSelectorSimplePseudoClass(): void { 626 $this->endConditions(); 627 $this->add(')'); 628 $this->status(self::STATUS_CONDITION); 629 } 630 631 public function visitValuePosition( 632 Ast\Value\Position $position 633 ): void { 634 $repeat = $position->repeat; 635 $add = $position->add; 636 $expressionPosition = empty($this->_expressions['position']) 637 ? 'position()' : $this->_expressions['position']; 638 $expressionCount = empty($this->_expressions['count']) 639 ? 'last()' : $this->_expressions['count']; 640 if ($repeat === 0) { 641 $condition = $expressionPosition.' = '.$add; 642 } else { 643 if ($add > $repeat) { 644 $balance = $add - (floor($add / $repeat) * $repeat); 645 $start = $add; 646 } elseif ($add < 0) { 647 if (abs($add) > $repeat) { 648 $balance = $add - (floor($add / $repeat) * $repeat); 649 $start = $add; 650 } else { 651 $balance = $repeat + $add; 652 $start = 1; 653 } 654 } else { 655 $balance = $add; 656 $start = 1; 657 } 658 $condition = sprintf('(%s mod %d) = %d', $expressionPosition, $repeat, $balance); 659 if ($start > 1) { 660 $condition .= sprintf(' %s >= %d', $expressionPosition, $start); 661 } elseif ($start < 0) { 662 $condition .= sprintf(' %s <= %s - %d', $expressionPosition, $expressionCount, abs($start)); 663 } 664 } 665 $this->add($condition); 666 } 667 668 /** 669 * @throws Exception\NotConvertibleException 670 */ 671 public function visitSelectorSimplePseudoElement(Ast\Selector\Simple\PseudoElement $pseudoElement): void { 672 throw new Exception\NotConvertibleException('pseudoelement '.$pseudoElement->name, 'Xpath'); 673 } 674 675 public function visitValueLanguage( 676 Ast\Value\Language $language 677 ): void { 678 $this->addCondition( 679 sprintf( 680 '(ancestor-or-self::*[@%2$s][1]/@%2$s = %1$s or'. 681 ' substring-before(ancestor-or-self::*[@%2$s][1]/@%2$s, "-") = %1$s)', 682 $this->quoteLiteral($language->language), 683 $this->hasOption(self::OPTION_XML_ATTRIBUTES) ? 'xml:lang' : 'lang' 684 ) 685 ); 686 } 687 688 /** 689 * Use unicode aware strtolower if available 690 * 691 * @param string $string 692 * @return string 693 */ 694 private function strToLower(string $string): string { 695 if (is_callable('mb_strtolower')) { 696 return mb_strtolower($string, 'utf-8'); 697 } 698 if (class_exists('Transliterator', FALSE)) { 699 $transliterator = Transliterator::create('Any-Lower'); 700 if ($transliterator) { 701 return $transliterator->transliterate($string); 702 } 703 } 704 return strtolower($string); 705 } 706 } 707} 708