1<?php /** @noinspection PhpUnused */
2
3/**
4 * An visitor that compiles the AST into a xpath expression
5 *
6 * @license http://www.opensource.org/licenses/mit-license.php The MIT License
7 * @copyright Copyright 2010-2014 PhpCss Team
8 */
9
10namespace PhpCss\Ast\Visitor {
11
12  use InvalidArgumentException;
13  use LogicException;
14  use PhpCss\Ast;
15  use PhpCss\Exception;
16  use Transliterator;
17
18  /**
19   * An visitor that compiles the AST into a xpath expression
20   */
21  class Xpath extends Overload {
22
23    /**
24     * use explicit namespaces only, no defined namespace means no namespaces. This option and
25     * OPTION_DEFAULT_NAMESPACE can not be used at the same time.
26     */
27    public const OPTION_EXPLICIT_NAMESPACES = 1;
28
29    /**
30     * use a default namespace, no defined namespace means both no and the default namespace.
31     * This option and OPTION_EXPLICIT_NAMESPACES can not be used at the same time.
32     *
33     * If not changed 'html' is used as the additional prefix for elements.
34     *
35     * Example: foo -> *[(self::foo or self::html:foo)]
36     *
37     */
38    public const OPTION_DEFAULT_NAMESPACE = 16;
39
40    /**
41     * start expressions in document context
42     */
43    public const OPTION_USE_DOCUMENT_CONTEXT = 2;
44    public const OPTION_USE_CONTEXT_DOCUMENT = 2;
45
46    /**
47     * start expressions in descendant-or-self context
48     */
49    public const OPTION_USE_CONTEXT_SELF = 32;
50    /**
51     * limit expressions to self context
52     */
53    public const OPTION_USE_CONTEXT_SELF_LIMIT = 64;
54
55    /**
56     * lowercase the element names (not the namespace prefixes)
57     */
58    public const OPTION_LOWERCASE_ELEMENTS = 4;
59    /**
60     * use xml:id and xml:lang not just id or lang
61     */
62    public const OPTION_XML_ATTRIBUTES = 8;
63
64    private const STATUS_DEFAULT = 0;
65    private const STATUS_ELEMENT = 1;
66    private const STATUS_CONDITION = 2;
67    private const STATUS_COMBINATOR = 3;
68    private const STATUS_PSEUDOCLASS = 4;
69
70    private $_buffer = '';
71
72    /**
73     * Current visitor status (position in expression)
74     * @var integer
75     */
76    private $_status = self::STATUS_DEFAULT;
77
78    /**
79     * Visitor mode
80     * @var integer
81     */
82    private $_options = 0;
83
84    /**
85     * The default namespace prefix used for elements with no namespace prefix if OPTION_DEFAULT_NAMESPACE is
86     * active.
87     */
88    private const DEFAULT_NAMESPACE_PREFIX = 'html';
89
90    /**
91     * @var string
92     */
93    private $_defaultNamespacePrefix = self::DEFAULT_NAMESPACE_PREFIX;
94
95    /**
96     * store expressions for use in visitor methods, the actual expression can depend on
97     * the visitor methods called before.
98     *
99     * @var array
100     */
101    private $_expressions = [];
102
103    /**
104     * store an expression for the current element (type selector)
105     * @var string
106     */
107    private $_element = '*';
108
109    /**
110     * Create visitor and store mode options
111     *
112     * @param integer $options
113     * @param string $defaultPrefix
114     */
115    public function __construct($options = 0, $defaultPrefix = self::DEFAULT_NAMESPACE_PREFIX) {
116      $this->setOptions($options, $defaultPrefix);
117    }
118
119    /**
120     * Validate and store the options.
121     *
122     * @param int $options
123     * @param string $defaultPrefix
124     * @throws InvalidArgumentException
125     */
126    public function setOptions(
127      int $options = 0, string $defaultPrefix = self::DEFAULT_NAMESPACE_PREFIX
128    ): void {
129      if (
130        $this->hasOption(self::OPTION_EXPLICIT_NAMESPACES) &&
131        $this->hasOption(self::OPTION_DEFAULT_NAMESPACE)
132      ) {
133        throw new InvalidArgumentException(
134          'Options OPTION_EXPLICIT_NAMESPACES and OPTION_DEFAULT_NAMESPACE can not be set at the same time.'
135        );
136      }
137      if (trim($defaultPrefix) === '') {
138        throw new InvalidArgumentException(
139          'The default namespace prefix "'.$defaultPrefix.'" is not valid.'
140        );
141      }
142      $this->_options = $options;
143      $this->_defaultNamespacePrefix = trim($defaultPrefix);
144    }
145
146    /**
147     * Clear the visitor object to visit another selector group
148     */
149    public function clear(): void {
150      $this->_buffer = '';
151      $this->_status = self::STATUS_DEFAULT;
152    }
153
154    /**
155     * Add a string to the buffer
156     *
157     * @param string $string
158     */
159    private function add(string $string): void {
160      $this->_buffer .= $string;
161    }
162
163    /**
164     * Get/Set the current visiting status
165     *
166     * @param null|int $status
167     * @return int
168     */
169    private function status($status = NULL): int {
170      if (isset($status)) {
171        $this->_status = $status;
172      }
173      return $this->_status;
174    }
175
176    /**
177     * Read the status of an option
178     *
179     * @param $option
180     * @return bool
181     */
182    public function hasOption($option): bool {
183      return ($this->_options & $option) === $option;
184    }
185
186    /**
187     * Return the collected selector string
188     */
189    public function __toString() {
190      return $this->_buffer;
191    }
192
193    private function setElement($element): void {
194      switch ($this->status()) {
195      case self::STATUS_DEFAULT :
196      case self::STATUS_COMBINATOR :
197        $this->_element = $element;
198        break;
199      }
200    }
201
202    /**
203     * prepare buffer to add a condition to the xpath expression
204     */
205    private function addCondition($condition): void {
206      if (!empty($condition)) {
207        switch ($this->status()) {
208        case self::STATUS_DEFAULT :
209        case self::STATUS_COMBINATOR :
210          $this->setElement('*');
211          $this->add('*[');
212          break;
213        case self::STATUS_PSEUDOCLASS :
214          $this->add($condition);
215          return;
216        case self::STATUS_ELEMENT :
217          $this->add('[');
218          break;
219        case self::STATUS_CONDITION :
220          $this->add(' and ');
221          break;
222        }
223        $this->status(self::STATUS_CONDITION);
224        $this->add($condition);
225      }
226    }
227
228    /**
229     * end condition if in condition status
230     */
231    private function endConditions(): void {
232      if ($this->status() === self::STATUS_CONDITION) {
233        $this->add(']');
234      }
235      $this->status(self::STATUS_DEFAULT);
236    }
237
238    /**
239     * Quote literal if needed
240     *
241     * @param string $literal
242     * @return string
243     */
244    private function quoteLiteral(string $literal): string {
245      $hasDoubleQuote = FALSE !== strpos($literal, '"');
246      if ($hasDoubleQuote) {
247        $hasSingleQuote = FALSE !== strpos($literal, "'");
248        if ($hasSingleQuote) {
249          $result = '';
250          $parts = explode('"', $literal);
251          foreach ($parts as $part) {
252            $result .= ", '\"'";
253            if ("" !== $part) {
254              $result .= ', "'.$part.'"';
255            }
256          }
257          return 'concat('.substr($result, 7).')';
258        }
259        return "'".$literal."'";
260      }
261      return '"'.$literal.'"';
262    }
263
264    /**
265     * Validate the buffer before visiting a Ast\Selector\Group.
266     * If the buffer already contains data, throw an exception.
267     *
268     * @param Ast\Selector\Group $group
269     * @return boolean
270     * @throws LogicException
271     */
272    public function visitEnterSelectorSequenceGroup(Ast\Selector\Group $group): bool {
273      if (!empty($this->_buffer)) {
274        throw new LogicException(
275          sprintf(
276            'Visitor buffer already contains data, can not visit "%s"',
277            get_class($group)
278          )
279        );
280      }
281      return TRUE;
282    }
283
284    /**
285     * If here is already data in the buffer, add a separator before starting the next.
286     *
287     * @param Ast\Selector\Sequence $sequence
288     * @return boolean
289     */
290    public function visitEnterSelectorSequence(Ast\Selector\Sequence $sequence): bool {
291      switch ($this->status()) {
292      case self::STATUS_DEFAULT :
293        if (!empty($this->_buffer)) {
294          $this->add('|');
295        }
296        if (empty($sequence->simples) && NULL !== $sequence->combinator) {
297          return TRUE;
298        }
299        if ($this->hasOption(self::OPTION_USE_CONTEXT_DOCUMENT)) {
300          $this->add('//');
301        } elseif ($this->hasOption(self::OPTION_USE_CONTEXT_SELF_LIMIT)) {
302          $this->add('self::');
303        } elseif ($this->hasOption(self::OPTION_USE_CONTEXT_SELF)) {
304          $this->add('descendant-or-self::');
305        } else {
306          $this->add('.//');
307        }
308        break;
309      case self::STATUS_CONDITION :
310        $this->endConditions();
311        $this->add('//');
312        break;
313      }
314      return TRUE;
315    }
316
317    /**
318     * If the visitor is in the condition status, close it.
319     *
320     * @return boolean
321     */
322    public function visitLeaveSelectorSequence(): bool {
323      $this->endConditions();
324      return TRUE;
325    }
326
327    /**
328     * Output the universal type (* or xmlns|*) selector to the buffer
329     *
330     * @param Ast\Selector\Simple\Universal $universal
331     */
332    public function visitSelectorSimpleUniversal(Ast\Selector\Simple\Universal $universal): void {
333      if ($universal->namespacePrefix !== '*' && trim($universal->namespacePrefix) !== '') {
334        $element = $universal->namespacePrefix.':*';
335      } else {
336        $element = '*';
337      }
338      $this->setElement($element);
339      $this->add($element);
340      $this->status(self::STATUS_ELEMENT);
341    }
342
343    /**
344     * Output the type (element name) selector to the buffer
345     *
346     * @param Ast\Selector\Simple\Type $type
347     */
348    public function visitSelectorSimpleType(Ast\Selector\Simple\Type $type): void {
349      if ($this->hasOption(self::OPTION_LOWERCASE_ELEMENTS)) {
350        $elementName = $this->strToLower($type->elementName);
351      } else {
352        $elementName = $type->elementName;
353      }
354      if ('' === $type->namespacePrefix && $this->hasOption(self::OPTION_EXPLICIT_NAMESPACES)) {
355        $this->add($elementName);
356        $this->setElement($elementName);
357        $this->status(self::STATUS_ELEMENT);
358      } else {
359        $isEmptyPrefix = !isset($type->namespacePrefix) || $type->namespacePrefix === '';
360        if (!$isEmptyPrefix && $type->namespacePrefix !== '*') {
361          $this->add($type->namespacePrefix.':'.$elementName);
362          $this->setElement($type->namespacePrefix.':'.$elementName);
363          $this->status(self::STATUS_ELEMENT);
364        } elseif ($isEmptyPrefix && $this->hasOption(self::OPTION_DEFAULT_NAMESPACE)) {
365          $condition = '(self::'.$elementName.' or self::'.$this->_defaultNamespacePrefix.':'.$elementName.')';
366          if ($this->status() !== self::STATUS_PSEUDOCLASS) {
367            $this->setElement('*['.$condition.']');
368            $this->add('*');
369            $this->status(self::STATUS_ELEMENT);
370          }
371          $this->addCondition($condition);
372        } else {
373          $condition = 'local-name() = '.$this->quoteLiteral($elementName);
374          if ($this->status() !== self::STATUS_PSEUDOCLASS) {
375            $this->setElement('*['.$condition.']');
376            $this->add('*');
377            $this->status(self::STATUS_ELEMENT);
378          }
379          $this->addCondition($condition);
380        }
381      }
382    }
383
384    /**
385     * Output the class selector to the buffer
386     *
387     * @param Ast\Selector\Simple\Id $id
388     */
389    public function visitSelectorSimpleId(Ast\Selector\Simple\Id $id): void {
390      $this->addCondition(
391        sprintf(
392          '@%1$s = %2$s',
393          $this->hasOption(self::OPTION_XML_ATTRIBUTES) ? 'xml:id' : 'id',
394          $this->quoteLiteral($id->id)
395        )
396      );
397    }
398
399
400    /**
401     * Output the class selector to the buffer
402     *
403     * @param Ast\Selector\Simple\ClassName $class
404     */
405    public function visitSelectorSimpleClassName(Ast\Selector\Simple\ClassName $class): void {
406      $this->addCondition(
407        sprintf(
408          'contains(concat(" ", normalize-space(@class), " "), " %s ")',
409          $class->className
410        )
411      );
412    }
413
414    public function visitSelectorSimpleAttribute(
415      Ast\Selector\Simple\Attribute $attribute
416    ): void {
417      switch ($attribute->match) {
418      case Ast\Selector\Simple\Attribute::MATCH_PREFIX :
419        $condition = sprintf(
420          'starts-with(@%s, %s)',
421          $attribute->name,
422          $this->quoteLiteral($attribute->literal->value)
423        );
424        break;
425      case Ast\Selector\Simple\Attribute::MATCH_SUFFIX :
426        $condition = sprintf(
427          'substring(@%1$s, string-length(@%1$s) - %2$s) = %3$s',
428          $attribute->name,
429          strlen($attribute->literal->value),
430          $this->quoteLiteral($attribute->literal->value)
431        );
432        break;
433      case Ast\Selector\Simple\Attribute::MATCH_SUBSTRING :
434        $condition = sprintf(
435          'contains(@%s, %s)',
436          $attribute->name,
437          $this->quoteLiteral($attribute->literal->value)
438        );
439        break;
440      case Ast\Selector\Simple\Attribute::MATCH_EQUALS :
441        $condition = '@'.$attribute->name.' = '.$this->quoteLiteral($attribute->literal->value);
442        break;
443      case Ast\Selector\Simple\Attribute::MATCH_INCLUDES :
444        $condition = sprintf(
445          'contains(concat(" ", normalize-space(@%s), " "), %s)',
446          $attribute->name,
447          $this->quoteLiteral(' '.trim($attribute->literal->value).' ')
448        );
449        break;
450      case Ast\Selector\Simple\Attribute::MATCH_DASHMATCH :
451        $condition = sprintf(
452          '(@%1$s = %2$s or substring-before(@%1$s, "-") = %2$s)',
453          $attribute->name,
454          $this->quoteLiteral($attribute->literal->value)
455        );
456        break;
457      case Ast\Selector\Simple\Attribute::MATCH_EXISTS :
458      default :
459        $condition = '@'.$attribute->name;
460        break;
461      }
462      if (!empty($condition)) {
463        $this->addCondition($condition);
464      }
465    }
466
467    public function visitSelectorCombinatorChild(): void {
468      $this->endConditions();
469      if ($this->_buffer !== '') {
470        $this->add('/');
471      }
472      $this->status(self::STATUS_COMBINATOR);
473    }
474
475    public function visitSelectorCombinatorDescendant(): void {
476      $this->endConditions();
477      if ($this->_buffer !== '') {
478        $this->add('//');
479      } else {
480        $this->add('.//');
481      }
482      $this->status(self::STATUS_COMBINATOR);
483    }
484
485    public function visitSelectorCombinatorFollower(): void {
486      $this->endConditions();
487      if ($this->_buffer !== '') {
488        $this->add('/');
489      }
490      $this->add('following-sibling::');
491      $this->status(self::STATUS_COMBINATOR);
492    }
493
494    public function visitSelectorCombinatorNext(): void {
495      $this->endConditions();
496      if ($this->_buffer !== '') {
497        $this->add('/');
498      }
499      $this->add('following-sibling::*[1]/self::');
500      $this->status(self::STATUS_COMBINATOR);
501    }
502
503    /**
504     * @throws Exception\NotConvertibleException
505     */
506    public function visitSelectorSimplePseudoClass(Ast\Selector\Simple\PseudoClass $pseudoClass): void {
507      switch ($pseudoClass->name) {
508      case 'root' :
509        $condition = '(. = //*)';
510        break;
511      case 'empty' :
512        $condition = '(count(*|text()) = 0)';
513        break;
514      case 'enabled' :
515        $condition = 'not(@disabled)';
516        break;
517      case 'disabled' :
518      case 'checked' :
519        $condition = '@'.$pseudoClass->name;
520        break;
521      case 'first-child' :
522        $condition = 'position() = 1';
523        break;
524      case 'last-child' :
525        $condition = 'position() = last()';
526        break;
527      case 'first-of-type' :
528        $condition = '(count(preceding-sibling::'.$this->_element.') = 0)';
529        break;
530      case 'last-of-type' :
531        $condition = '(count(following-sibling::'.$this->_element.') = 0)';
532        break;
533      case 'only-child' :
534        $condition = '(count(parent::*/*|parent::*/text()) = 1)';
535        break;
536      case 'only-of-type' :
537        $condition = '(count(parent::*/'.$this->_element.') = 1)';
538        break;
539      case 'odd' :
540        if ($this->status() === self::STATUS_CONDITION) {
541          $this->add(']');
542          $this->status(self::STATUS_ELEMENT);
543        }
544        $condition = 'position() mod 2 = 0';
545        break;
546      case 'even' :
547        if ($this->status() === self::STATUS_CONDITION) {
548          $this->add(']');
549          $this->status(self::STATUS_ELEMENT);
550        }
551        $condition = 'position() mod 2 = 1';
552        break;
553      default :
554        throw new Exception\NotConvertibleException('pseudoclass '.$pseudoClass->name, 'Xpath');
555      }
556      $this->addCondition($condition);
557    }
558
559    public function visitEnterSelectorSimplePseudoClass(Ast\Selector\Simple\PseudoClass $pseudoClass): bool {
560      switch ($pseudoClass->name) {
561      case 'not' :
562        $this->addCondition('not(');
563        $this->status(self::STATUS_PSEUDOCLASS);
564        return TRUE;
565      case 'has' :
566        $this->addCondition('(');
567        $this->status(self::STATUS_DEFAULT);
568        return TRUE;
569      case 'contains':
570        if (
571          ($parameter = $pseudoClass->parameter) &&
572          ($parameter instanceof Ast\Value\Number || $parameter instanceof Ast\Value\Literal)
573        ) {
574          $this->addCondition('contains(., '.$this->quoteLiteral($parameter->value));
575          $this->status(self::STATUS_PSEUDOCLASS);
576        }
577        return TRUE;
578      case 'gt' :
579      case 'lt' :
580        if (
581          ($parameter = $pseudoClass->parameter) &&
582          ($parameter instanceof Ast\Value\Number || $parameter instanceof Ast\Value\Literal)
583        ) {
584          if ($this->status() === self::STATUS_CONDITION) {
585            $this->add(']');
586          }
587          $this->status(self::STATUS_ELEMENT);
588          $operator = $pseudoClass->name === 'gt' ? '>' : '<';
589          $condition = $parameter->value < 0
590            ? 'last() - '.abs($parameter->value - 1)
591            : $parameter->value + 1;
592          $this->addCondition(
593            'position() '.$operator.' '.$condition
594          );
595        }
596        break;
597      case 'nth-child' :
598        $this->addCondition('(');
599        $this->status(self::STATUS_PSEUDOCLASS);
600        $this->_expressions['position'] = 'position()';
601        $this->_expressions['count'] = 'last()';
602        return TRUE;
603      case 'nth-last-child' :
604        $this->addCondition('(');
605        $this->status(self::STATUS_PSEUDOCLASS);
606        $this->_expressions['position'] = '(last() - position() + 1)';
607        $this->_expressions['count'] = 'count()';
608        return TRUE;
609      case 'nth-of-type' :
610        $this->addCondition('(');
611        $this->status(self::STATUS_PSEUDOCLASS);
612        $this->_expressions['position'] = '(count(preceding-sibling::'.$this->_element.') + 1)';
613        $this->_expressions['count'] = 'count(parent::*/'.$this->_element.')';
614        return TRUE;
615      case 'nth-last-of-type' :
616        $this->addCondition('(');
617        $this->status(self::STATUS_PSEUDOCLASS);
618        $this->_expressions['position'] = '(count(following-sibling::'.$this->_element.') + 1)';
619        $this->_expressions['count'] = 'count(parent::*/'.$this->_element.')';
620        return TRUE;
621      }
622      return FALSE;
623    }
624
625    public function visitLeaveSelectorSimplePseudoClass(): void {
626      $this->endConditions();
627      $this->add(')');
628      $this->status(self::STATUS_CONDITION);
629    }
630
631    public function visitValuePosition(
632      Ast\Value\Position $position
633    ): void {
634      $repeat = $position->repeat;
635      $add = $position->add;
636      $expressionPosition = empty($this->_expressions['position'])
637        ? 'position()' : $this->_expressions['position'];
638      $expressionCount = empty($this->_expressions['count'])
639        ? 'last()' : $this->_expressions['count'];
640      if ($repeat === 0) {
641        $condition = $expressionPosition.' = '.$add;
642      } else {
643        if ($add > $repeat) {
644          $balance = $add - (floor($add / $repeat) * $repeat);
645          $start = $add;
646        } elseif ($add < 0) {
647          if (abs($add) > $repeat) {
648            $balance = $add - (floor($add / $repeat) * $repeat);
649            $start = $add;
650          } else {
651            $balance = $repeat + $add;
652            $start = 1;
653          }
654        } else {
655          $balance = $add;
656          $start = 1;
657        }
658        $condition = sprintf('(%s mod %d) = %d', $expressionPosition, $repeat, $balance);
659        if ($start > 1) {
660          $condition .= sprintf(' %s >= %d', $expressionPosition, $start);
661        } elseif ($start < 0) {
662          $condition .= sprintf(' %s <= %s - %d', $expressionPosition, $expressionCount, abs($start));
663        }
664      }
665      $this->add($condition);
666    }
667
668    /**
669     * @throws Exception\NotConvertibleException
670     */
671    public function visitSelectorSimplePseudoElement(Ast\Selector\Simple\PseudoElement $pseudoElement): void {
672      throw new Exception\NotConvertibleException('pseudoelement '.$pseudoElement->name, 'Xpath');
673    }
674
675    public function visitValueLanguage(
676      Ast\Value\Language $language
677    ): void {
678      $this->addCondition(
679        sprintf(
680          '(ancestor-or-self::*[@%2$s][1]/@%2$s = %1$s or'.
681          ' substring-before(ancestor-or-self::*[@%2$s][1]/@%2$s, "-") = %1$s)',
682          $this->quoteLiteral($language->language),
683          $this->hasOption(self::OPTION_XML_ATTRIBUTES) ? 'xml:lang' : 'lang'
684        )
685      );
686    }
687
688    /**
689     * Use unicode aware strtolower if available
690     *
691     * @param string $string
692     * @return string
693     */
694    private function strToLower(string $string): string {
695      if (is_callable('mb_strtolower')) {
696        return mb_strtolower($string, 'utf-8');
697      }
698      if (class_exists('Transliterator', FALSE)) {
699        $transliterator = Transliterator::create('Any-Lower');
700        if ($transliterator) {
701          return $transliterator->transliterate($string);
702        }
703      }
704      return strtolower($string);
705    }
706  }
707}
708