1<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Component\CssSelector\Parser;
13
14use Symfony\Component\CssSelector\Exception\SyntaxErrorException;
15use Symfony\Component\CssSelector\Node;
16use Symfony\Component\CssSelector\Parser\Tokenizer\Tokenizer;
17
18/**
19 * CSS selector parser.
20 *
21 * This component is a port of the Python cssselect library,
22 * which is copyright Ian Bicking, @see https://github.com/SimonSapin/cssselect.
23 *
24 * @author Jean-François Simon <jeanfrancois.simon@sensiolabs.com>
25 *
26 * @internal
27 */
28class Parser implements ParserInterface
29{
30    private $tokenizer;
31
32    public function __construct(Tokenizer $tokenizer = null)
33    {
34        $this->tokenizer = $tokenizer ?? new Tokenizer();
35    }
36
37    /**
38     * {@inheritdoc}
39     */
40    public function parse(string $source): array
41    {
42        $reader = new Reader($source);
43        $stream = $this->tokenizer->tokenize($reader);
44
45        return $this->parseSelectorList($stream);
46    }
47
48    /**
49     * Parses the arguments for ":nth-child()" and friends.
50     *
51     * @param Token[] $tokens
52     *
53     * @throws SyntaxErrorException
54     */
55    public static function parseSeries(array $tokens): array
56    {
57        foreach ($tokens as $token) {
58            if ($token->isString()) {
59                throw SyntaxErrorException::stringAsFunctionArgument();
60            }
61        }
62
63        $joined = trim(implode('', array_map(function (Token $token) {
64            return $token->getValue();
65        }, $tokens)));
66
67        $int = function ($string) {
68            if (!is_numeric($string)) {
69                throw SyntaxErrorException::stringAsFunctionArgument();
70            }
71
72            return (int) $string;
73        };
74
75        switch (true) {
76            case 'odd' === $joined:
77                return [2, 1];
78            case 'even' === $joined:
79                return [2, 0];
80            case 'n' === $joined:
81                return [1, 0];
82            case !str_contains($joined, 'n'):
83                return [0, $int($joined)];
84        }
85
86        $split = explode('n', $joined);
87        $first = $split[0] ?? null;
88
89        return [
90            $first ? ('-' === $first || '+' === $first ? $int($first.'1') : $int($first)) : 1,
91            isset($split[1]) && $split[1] ? $int($split[1]) : 0,
92        ];
93    }
94
95    private function parseSelectorList(TokenStream $stream): array
96    {
97        $stream->skipWhitespace();
98        $selectors = [];
99
100        while (true) {
101            $selectors[] = $this->parserSelectorNode($stream);
102
103            if ($stream->getPeek()->isDelimiter([','])) {
104                $stream->getNext();
105                $stream->skipWhitespace();
106            } else {
107                break;
108            }
109        }
110
111        return $selectors;
112    }
113
114    private function parserSelectorNode(TokenStream $stream): Node\SelectorNode
115    {
116        [$result, $pseudoElement] = $this->parseSimpleSelector($stream);
117
118        while (true) {
119            $stream->skipWhitespace();
120            $peek = $stream->getPeek();
121
122            if ($peek->isFileEnd() || $peek->isDelimiter([','])) {
123                break;
124            }
125
126            if (null !== $pseudoElement) {
127                throw SyntaxErrorException::pseudoElementFound($pseudoElement, 'not at the end of a selector');
128            }
129
130            if ($peek->isDelimiter(['+', '>', '~'])) {
131                $combinator = $stream->getNext()->getValue();
132                $stream->skipWhitespace();
133            } else {
134                $combinator = ' ';
135            }
136
137            [$nextSelector, $pseudoElement] = $this->parseSimpleSelector($stream);
138            $result = new Node\CombinedSelectorNode($result, $combinator, $nextSelector);
139        }
140
141        return new Node\SelectorNode($result, $pseudoElement);
142    }
143
144    /**
145     * Parses next simple node (hash, class, pseudo, negation).
146     *
147     * @throws SyntaxErrorException
148     */
149    private function parseSimpleSelector(TokenStream $stream, bool $insideNegation = false): array
150    {
151        $stream->skipWhitespace();
152
153        $selectorStart = \count($stream->getUsed());
154        $result = $this->parseElementNode($stream);
155        $pseudoElement = null;
156
157        while (true) {
158            $peek = $stream->getPeek();
159            if ($peek->isWhitespace()
160                || $peek->isFileEnd()
161                || $peek->isDelimiter([',', '+', '>', '~'])
162                || ($insideNegation && $peek->isDelimiter([')']))
163            ) {
164                break;
165            }
166
167            if (null !== $pseudoElement) {
168                throw SyntaxErrorException::pseudoElementFound($pseudoElement, 'not at the end of a selector');
169            }
170
171            if ($peek->isHash()) {
172                $result = new Node\HashNode($result, $stream->getNext()->getValue());
173            } elseif ($peek->isDelimiter(['.'])) {
174                $stream->getNext();
175                $result = new Node\ClassNode($result, $stream->getNextIdentifier());
176            } elseif ($peek->isDelimiter(['['])) {
177                $stream->getNext();
178                $result = $this->parseAttributeNode($result, $stream);
179            } elseif ($peek->isDelimiter([':'])) {
180                $stream->getNext();
181
182                if ($stream->getPeek()->isDelimiter([':'])) {
183                    $stream->getNext();
184                    $pseudoElement = $stream->getNextIdentifier();
185
186                    continue;
187                }
188
189                $identifier = $stream->getNextIdentifier();
190                if (\in_array(strtolower($identifier), ['first-line', 'first-letter', 'before', 'after'])) {
191                    // Special case: CSS 2.1 pseudo-elements can have a single ':'.
192                    // Any new pseudo-element must have two.
193                    $pseudoElement = $identifier;
194
195                    continue;
196                }
197
198                if (!$stream->getPeek()->isDelimiter(['('])) {
199                    $result = new Node\PseudoNode($result, $identifier);
200
201                    continue;
202                }
203
204                $stream->getNext();
205                $stream->skipWhitespace();
206
207                if ('not' === strtolower($identifier)) {
208                    if ($insideNegation) {
209                        throw SyntaxErrorException::nestedNot();
210                    }
211
212                    [$argument, $argumentPseudoElement] = $this->parseSimpleSelector($stream, true);
213                    $next = $stream->getNext();
214
215                    if (null !== $argumentPseudoElement) {
216                        throw SyntaxErrorException::pseudoElementFound($argumentPseudoElement, 'inside ::not()');
217                    }
218
219                    if (!$next->isDelimiter([')'])) {
220                        throw SyntaxErrorException::unexpectedToken('")"', $next);
221                    }
222
223                    $result = new Node\NegationNode($result, $argument);
224                } else {
225                    $arguments = [];
226                    $next = null;
227
228                    while (true) {
229                        $stream->skipWhitespace();
230                        $next = $stream->getNext();
231
232                        if ($next->isIdentifier()
233                            || $next->isString()
234                            || $next->isNumber()
235                            || $next->isDelimiter(['+', '-'])
236                        ) {
237                            $arguments[] = $next;
238                        } elseif ($next->isDelimiter([')'])) {
239                            break;
240                        } else {
241                            throw SyntaxErrorException::unexpectedToken('an argument', $next);
242                        }
243                    }
244
245                    if (empty($arguments)) {
246                        throw SyntaxErrorException::unexpectedToken('at least one argument', $next);
247                    }
248
249                    $result = new Node\FunctionNode($result, $identifier, $arguments);
250                }
251            } else {
252                throw SyntaxErrorException::unexpectedToken('selector', $peek);
253            }
254        }
255
256        if (\count($stream->getUsed()) === $selectorStart) {
257            throw SyntaxErrorException::unexpectedToken('selector', $stream->getPeek());
258        }
259
260        return [$result, $pseudoElement];
261    }
262
263    private function parseElementNode(TokenStream $stream): Node\ElementNode
264    {
265        $peek = $stream->getPeek();
266
267        if ($peek->isIdentifier() || $peek->isDelimiter(['*'])) {
268            if ($peek->isIdentifier()) {
269                $namespace = $stream->getNext()->getValue();
270            } else {
271                $stream->getNext();
272                $namespace = null;
273            }
274
275            if ($stream->getPeek()->isDelimiter(['|'])) {
276                $stream->getNext();
277                $element = $stream->getNextIdentifierOrStar();
278            } else {
279                $element = $namespace;
280                $namespace = null;
281            }
282        } else {
283            $element = $namespace = null;
284        }
285
286        return new Node\ElementNode($namespace, $element);
287    }
288
289    private function parseAttributeNode(Node\NodeInterface $selector, TokenStream $stream): Node\AttributeNode
290    {
291        $stream->skipWhitespace();
292        $attribute = $stream->getNextIdentifierOrStar();
293
294        if (null === $attribute && !$stream->getPeek()->isDelimiter(['|'])) {
295            throw SyntaxErrorException::unexpectedToken('"|"', $stream->getPeek());
296        }
297
298        if ($stream->getPeek()->isDelimiter(['|'])) {
299            $stream->getNext();
300
301            if ($stream->getPeek()->isDelimiter(['='])) {
302                $namespace = null;
303                $stream->getNext();
304                $operator = '|=';
305            } else {
306                $namespace = $attribute;
307                $attribute = $stream->getNextIdentifier();
308                $operator = null;
309            }
310        } else {
311            $namespace = $operator = null;
312        }
313
314        if (null === $operator) {
315            $stream->skipWhitespace();
316            $next = $stream->getNext();
317
318            if ($next->isDelimiter([']'])) {
319                return new Node\AttributeNode($selector, $namespace, $attribute, 'exists', null);
320            } elseif ($next->isDelimiter(['='])) {
321                $operator = '=';
322            } elseif ($next->isDelimiter(['^', '$', '*', '~', '|', '!'])
323                && $stream->getPeek()->isDelimiter(['='])
324            ) {
325                $operator = $next->getValue().'=';
326                $stream->getNext();
327            } else {
328                throw SyntaxErrorException::unexpectedToken('operator', $next);
329            }
330        }
331
332        $stream->skipWhitespace();
333        $value = $stream->getNext();
334
335        if ($value->isNumber()) {
336            // if the value is a number, it's casted into a string
337            $value = new Token(Token::TYPE_STRING, (string) $value->getValue(), $value->getPosition());
338        }
339
340        if (!($value->isIdentifier() || $value->isString())) {
341            throw SyntaxErrorException::unexpectedToken('string or identifier', $value);
342        }
343
344        $stream->skipWhitespace();
345        $next = $stream->getNext();
346
347        if (!$next->isDelimiter([']'])) {
348            throw SyntaxErrorException::unexpectedToken('"]"', $next);
349        }
350
351        return new Node\AttributeNode($selector, $namespace, $attribute, $operator, $value->getValue());
352    }
353}
354