1<?php
2
3/**
4 * This file is part of the FreeDSx LDAP package.
5 *
6 * (c) Chad Sikorra <Chad.Sikorra@gmail.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace FreeDSx\Ldap\Search;
13
14use FreeDSx\Ldap\Exception\FilterParseException;
15use FreeDSx\Ldap\Search\Filter\AndFilter;
16use FreeDSx\Ldap\Search\Filter\FilterInterface;
17use FreeDSx\Ldap\Search\Filter\MatchingRuleFilter;
18use FreeDSx\Ldap\Search\Filter\OrFilter;
19use FreeDSx\Ldap\Search\Filter\SubstringFilter;
20
21/**
22 * Parses LDAP filter strings. RFC 4515.
23 *
24 * @author Chad Sikorra <Chad.Sikorra@gmail.com>
25 */
26class FilterParser
27{
28    protected const MATCHING_RULE = '/^([a-zA-Z0-9\.]+)?(\:dn)?(\:([a-zA-Z0-9\.]+))?$/';
29
30    /**
31     * @var string
32     */
33    protected $filter = '';
34
35    /**
36     * @var int
37     */
38    protected $length = 0;
39
40    /**
41     * @var int
42     */
43    protected $depth = 0;
44
45    /**
46     * @var null|array
47     * @psalm-var null|array<0|positive-int, array{startAt?: int, endAt: null|int}>
48     */
49    protected $containers;
50
51    /**
52     * @param string $filter
53     */
54    public function __construct(string $filter)
55    {
56        $this->filter = $filter;
57        $this->length = strlen($filter);
58    }
59
60    /**
61     * @param string $filter
62     * @return FilterInterface
63     * @psalm-return AndFilter|Filter\ApproximateFilter|Filter\EqualityFilter|Filter\GreaterThanOrEqualFilter|Filter\LessThanOrEqualFilter|Filter\NotFilter|Filter\PresentFilter|MatchingRuleFilter|OrFilter|SubstringFilter
64     * @throws FilterParseException
65     */
66    public static function parse(string $filter): FilterInterface
67    {
68        if ($filter === '') {
69            throw new FilterParseException('The filter cannot be empty.');
70        }
71
72        return (new self($filter))->parseFilterString(0, true)[1];
73    }
74
75    /**
76     * @param int $startAt
77     * @param bool $isRoot
78     * @return array
79     * @psalm-return array{0: int|null, 1: AndFilter|Filter\ApproximateFilter|Filter\EqualityFilter|Filter\GreaterThanOrEqualFilter|Filter\LessThanOrEqualFilter|Filter\NotFilter|Filter\PresentFilter|MatchingRuleFilter|OrFilter|SubstringFilter}
80     * @throws FilterParseException
81     */
82    protected function parseFilterString(int $startAt, bool $isRoot = false): array
83    {
84        if ($this->isAtFilterContainer($startAt)) {
85            [$endsAt, $filter] = $this->parseFilterContainer($startAt, $isRoot);
86        } else {
87            [$endsAt, $filter] = $this->parseComparisonFilter($startAt, $isRoot);
88        }
89        if ($isRoot && $endsAt !== $this->length) {
90            throw new FilterParseException(sprintf(
91                'Unexpected value at end of the filter: %s',
92                substr($this->filter, $endsAt)
93            ));
94        }
95
96        return [$endsAt, $filter];
97    }
98
99    /**
100     * @param int $pos
101     * @return bool
102     */
103    protected function isAtFilterContainer(int $pos): bool
104    {
105        if (!$this->startsWith(FilterInterface::PAREN_LEFT, $pos)) {
106            return false;
107        }
108
109        foreach (FilterInterface::OPERATORS as $compOp) {
110            if ($this->startsWith($compOp, $pos + 1)) {
111                return true;
112            }
113        }
114
115        return false;
116    }
117
118    /**
119     * Parse a filter container. Always returns the ending position followed by the filter container.
120     *
121     * @param int $startAt
122     * @param bool $isRoot
123     * @return array
124     * @psalm-return array{0: int, 1: AndFilter|Filter\NotFilter|OrFilter}
125     * @throws FilterParseException
126     */
127    protected function parseFilterContainer(int $startAt, bool $isRoot): array
128    {
129        if ($this->containers === null) {
130            $this->parseContainerDepths();
131        }
132        $this->depth += $isRoot ? 0 : 1;
133        if (!isset($this->containers[$this->depth])) {
134            throw new FilterParseException(sprintf(
135                'The container at position %s is unrecognized. Perhaps there\'s an unmatched "(".',
136                $startAt
137            ));
138        }
139        $endAt = $this->containers[$this->depth]['endAt'] ?? 0;
140        $operator = substr($this->filter, $startAt + 1, 1);
141
142        if ($operator === FilterInterface::OPERATOR_NOT) {
143            return [$endAt, $this->getNotFilterObject($startAt, $endAt)];
144        }
145
146        $startAt += 2;
147        $filter = $operator === FilterInterface::OPERATOR_AND ? new AndFilter() : new OrFilter();
148        while ($endAt !== ($startAt + 1)) {
149            [$startAt, $childFilter] = $this->parseFilterString($startAt ?? 0);
150            $filter->add($childFilter);
151        }
152
153        return [$endAt, $filter];
154    }
155
156    /**
157     * Parse a comparison operator. Always returns the ending position followed by the filter.
158     *
159     * @param int $startAt
160     * @param bool $isRoot
161     * @return array
162     * @psalm-return array{0: int, 1: Filter\ApproximateFilter|Filter\EqualityFilter|Filter\GreaterThanOrEqualFilter|Filter\LessThanOrEqualFilter|Filter\PresentFilter|MatchingRuleFilter|SubstringFilter}
163     * @throws FilterParseException
164     */
165    protected function parseComparisonFilter(int $startAt, bool $isRoot = false): array
166    {
167        $parenthesis = $this->validateComparisonFilter($startAt, $isRoot);
168        $endAt = !$parenthesis && $isRoot ? $this->length : $this->nextClosingParenthesis($startAt) + 1;
169
170        $attributeEndsAfter = null;
171        $filterType = null;
172        $valueStartsAt = null;
173        for ($i = $startAt; $i < $endAt; $i++) {
174            foreach (FilterInterface::FILTERS as $op) {
175                if ($this->filter[$i] === $op) {
176                    $filterType = $op;
177                } elseif (($i + 1) < $endAt && $this->filter[$i] . $this->filter[$i + 1] === $op) {
178                    $filterType = $op;
179                }
180                if ($filterType !== null) {
181                    break;
182                }
183            }
184            if ($filterType !== null) {
185                $attributeEndsAfter = $i - $startAt;
186                $valueStartsAt = $i + strlen($filterType);
187                break;
188            }
189        }
190        $this->validateParsedFilter($filterType, $startAt, $valueStartsAt, $endAt);
191
192        $attribute = substr(
193            $this->filter,
194            $startAt + ($parenthesis ? 1 : 0),
195            (int) $attributeEndsAfter - ($parenthesis ? 1 : 0)
196        );
197        $value = substr(
198            $this->filter,
199            (int) $valueStartsAt,
200            $endAt - (int) $valueStartsAt - ($parenthesis ? 1 : 0)
201        );
202
203        if ($attribute === '') {
204            throw new FilterParseException(sprintf(
205                'The attribute is missing in the filter near position %s.',
206                $startAt
207            ));
208        }
209
210        return [$endAt, $this->getComparisonFilterObject((string) $filterType, $attribute, $value)];
211    }
212
213    /**
214     * Validates some initial filter logic and determines if the filter is wrapped in parenthesis (validly).
215     *
216     * @param int $startAt
217     * @param bool $isRoot
218     * @return bool
219     * @throws FilterParseException
220     */
221    protected function validateComparisonFilter(int $startAt, bool $isRoot): bool
222    {
223        $parenthesis = true;
224
225        # A filter without an opening parenthesis is only valid if it is the root. And it cannot have a closing parenthesis.
226        if ($isRoot && !$this->startsWith(FilterInterface::PAREN_LEFT, $startAt)) {
227            $parenthesis = false;
228            $pos = false;
229            try {
230                $pos = $this->nextClosingParenthesis($startAt);
231            } catch (FilterParseException $e) {
232            }
233            if ($pos !== false) {
234                throw new FilterParseException(sprintf('The ")" at char %s has no matching parenthesis', $pos));
235            }
236            # If this is not a root filter, it must start with an opening parenthesis.
237        } elseif (!$isRoot && !$this->startsWith(FilterInterface::PAREN_LEFT, $startAt)) {
238            throw new FilterParseException(sprintf(
239                'The character "%s" at position %s was unexpected. Expected "(".',
240                $this->filter[$startAt],
241                $startAt
242            ));
243        }
244
245        return $parenthesis;
246    }
247
248    /**
249     * Validate some basic aspects of the filter after it is parsed.
250     *
251     * @param string|null $filterType
252     * @param int|null $startsAt
253     * @param int|null $startValue
254     * @param int $endAt
255     * @throws FilterParseException
256     */
257    protected function validateParsedFilter(?string $filterType, ?int $startsAt, ?int $startValue, $endAt): void
258    {
259        if ($filterType === null) {
260            throw new FilterParseException(sprintf(
261                'Expected one of "%s" in the filter after position %s, but received none.',
262                implode(', ', FilterInterface::FILTERS),
263                $startsAt
264            ));
265        }
266        if ($startValue === null || $startValue === $endAt - 1) {
267            throw new FilterParseException(sprintf(
268                'Expected a value after "%s" at position %s, but got none.',
269                $filterType,
270                $startValue
271            ));
272        }
273    }
274
275    /**
276     * @param string $operator
277     * @param string $attribute
278     * @param string $value
279     * @return FilterInterface
280     * @psalm-return Filter\ApproximateFilter|Filter\EqualityFilter|Filter\GreaterThanOrEqualFilter|Filter\LessThanOrEqualFilter|Filter\PresentFilter|MatchingRuleFilter|SubstringFilter
281     * @throws FilterParseException
282     */
283    protected function getComparisonFilterObject(string $operator, string $attribute, string $value): FilterInterface
284    {
285        if ($operator === FilterInterface::FILTER_LTE) {
286            return Filters::lessThanOrEqual($attribute, $this->unescapeValue($value));
287        } elseif ($operator === FilterInterface::FILTER_GTE) {
288            return Filters::greaterThanOrEqual($attribute, $this->unescapeValue($value));
289        } elseif ($operator === FilterInterface::FILTER_APPROX) {
290            return Filters::approximate($attribute, $this->unescapeValue($value));
291        } elseif ($operator === FilterInterface::FILTER_EXT) {
292            return $this->getMatchingRuleFilterObject($attribute, $this->unescapeValue($value));
293        }
294
295        if ($value === '*') {
296            return Filters::present($attribute);
297        }
298
299        if (preg_match('/\*/', $value) !== 0) {
300            return $this->getSubstringFilterObject($attribute, $value);
301        } else {
302            return Filters::equal($attribute, $this->unescapeValue($value));
303        }
304    }
305
306    /**
307     * @param string $attribute
308     * @param string $value
309     * @return MatchingRuleFilter
310     * @throws FilterParseException
311     */
312    protected function getMatchingRuleFilterObject(string $attribute, string $value): MatchingRuleFilter
313    {
314        if (preg_match(self::MATCHING_RULE, $attribute, $matches) === 0) {
315            throw new FilterParseException(sprintf('The matching rule is not valid: %s', $attribute));
316        }
317        $matchRuleObj = new MatchingRuleFilter(null, null, $value);
318
319        $matchingRule = $matches[4] ?? '';
320        $attrName = $matches[1] ?? '';
321        $useDnAttr = isset($matches[2]);
322
323        # RFC 4511, 4.5.1.7.7: If the matchingRule field is absent, the type field MUST be present [..]
324        if ($matchingRule === '' && $attrName === '') {
325            throw new FilterParseException(sprintf(
326                'If the matching rule is absent the attribute type must be present, but it is not: %s',
327                $attribute
328            ));
329        }
330
331        if ($attrName !== '') {
332            $matchRuleObj->setAttribute($attrName);
333        }
334        if ($matchingRule !== '') {
335            $matchRuleObj->setMatchingRule($matchingRule);
336        }
337        $matchRuleObj->setUseDnAttributes($useDnAttr);
338
339        return $matchRuleObj;
340    }
341
342    /**
343     * @param string $attribute
344     * @param string $value
345     * @return SubstringFilter
346     * @throws FilterParseException
347     */
348    protected function getSubstringFilterObject(string $attribute, string $value): SubstringFilter
349    {
350        $filter = new SubstringFilter($attribute);
351        $substrings = preg_split('/\*/', $value, -1, PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_NO_EMPTY);
352        if (!is_array($substrings)) {
353            throw new FilterParseException(sprintf(
354                'Unable to parse the substring filter for attribute %s.',
355                $attribute
356            ));
357        }
358
359        $contains = [];
360        foreach ($substrings as $substring) {
361            $substringValue = $this->unescapeValue($substring[0]);
362            $substringType = (int) $substring[1];
363
364            if ($substringType === 0) {
365                $filter->setStartsWith($substring[0]);
366            } elseif (($substringType + strlen($substringValue)) === strlen($value)) {
367                $filter->setEndsWith($substring[0]);
368            } else {
369                $contains[] = $substringValue;
370            }
371        }
372        $filter->setContains(...$contains);
373
374        return $filter;
375    }
376
377    /**
378     * @param int $startAt
379     * @param int $endAt
380     * @return FilterInterface
381     * @psalm-return Filter\NotFilter
382     * @throws FilterParseException
383     */
384    protected function getNotFilterObject(int $startAt, int $endAt): FilterInterface
385    {
386        if ($this->isAtFilterContainer($startAt + 2)) {
387            throw new FilterParseException(sprintf(
388                'The "not" filter at position %s cannot contain multiple filters.',
389                $startAt
390            ));
391        }
392        $info = $this->parseComparisonFilter($startAt + 2);
393        if (($info[0] + 1) !== $endAt) {
394            throw new FilterParseException(sprintf(
395                'The value after the "not" filter value was unexpected: %s',
396                substr($this->filter, $info[0] + 1, $endAt - ((int) $info[0] + 1))
397            ));
398        }
399
400        return Filters::not($info[1]);
401    }
402
403    /**
404     * @param string $char
405     * @param int $pos
406     * @return bool
407     */
408    protected function startsWith(string $char, int $pos): bool
409    {
410        return isset($this->filter[$pos]) && $this->filter[$pos] === $char;
411    }
412
413    /**
414     * This seems like a potential minefield. But the general idea is to loop through the complete filter to get the
415     * start and end positions of each container along the way. The container index marks the depth, with the lowest (0)
416     * being the furthest out and the higher numbers being closer inside. We skip positions inside the for loop when a
417     * comparison filter is encountered to only capture beginning and end parenthesis of containers.
418     *
419     * Each container encountered has its end point marked by detecting the closing parenthesis then we loop through known
420     * detected containers starting at the highest level that have no endpoints marked yet and working our way down.
421     *
422     * @throws FilterParseException
423     */
424    protected function parseContainerDepths(): void
425    {
426        $this->containers = $this->containers ?? [];
427
428        $child = null;
429        for ($i = 0; $i < $this->length; $i++) {
430            # Detect an unescaped left parenthesis
431            if ($this->filter[$i] === FilterInterface::PAREN_LEFT) {
432                [$i, $child] = $this->parseContainerStart((int)$i, $child);
433            # We have reached a closing parenthesis of a container, work backwards from those defined to set the ending.
434            } elseif ($this->filter[$i] === FilterInterface::PAREN_RIGHT) {
435                $this->parseContainerEnd((int)$i);
436            }
437        }
438
439        if ($this->containers !== null) {
440            foreach ($this->containers as $info) {
441                if ($info['endAt'] === null) {
442                    throw new FilterParseException('The filter has an unmatched "(".');
443                }
444            }
445        }
446    }
447
448    /**
449     * @param int $startAt
450     * @return int
451     * @throws FilterParseException
452     */
453    protected function nextClosingParenthesis(int $startAt)
454    {
455        for ($i = $startAt; $i < $this->length; $i++) {
456            # Look for the char, but ignore it if it is escaped
457            if ($this->filter[$i] === FilterInterface::PAREN_RIGHT) {
458                return $i;
459            }
460        }
461
462        throw new FilterParseException(sprintf(
463            'Expected a matching ")" after position %s, but none was found',
464            $startAt
465        ));
466    }
467
468    /**
469     * @param string $value
470     * @return string
471     */
472    protected function unescapeValue(string $value)
473    {
474        return (string) preg_replace_callback('/\\\\([0-9A-Fa-f]{2})/', function ($matches) {
475            return hex2bin($matches[1]);
476        }, $value);
477    }
478
479    /**
480     * @psalm-param 0|positive-int $child
481     * @throws FilterParseException
482     */
483    protected function parseContainerStart(int $i, ?int $child): array
484    {
485        # Is the parenthesis followed by an (ie. |, &, !) operator? If so it can contain children ...
486        if (isset($this->filter[$i + 1]) && in_array($this->filter[$i + 1], FilterInterface::OPERATORS, true)) {
487            $child = $child === null ? 0 : $child + 1;
488            $this->containers[$child] = ['startAt' => $i, 'endAt' => null];
489
490            $i += 2;
491            # Container inside the container ...
492            if ($this->isAtFilterContainer($i)) {
493                $i--;
494            # Comparison filter inside the container...
495            } elseif (isset($this->filter[$i]) && $this->filter[$i] === FilterInterface::PAREN_LEFT) {
496                $i = $this->nextClosingParenthesis($i);
497            # An empty container is not allowed...
498            } elseif (isset($this->filter[$i]) && $this->filter[$i] === FilterInterface::PAREN_RIGHT) {
499                throw new FilterParseException(sprintf(
500                    'The filter container near position %s is empty.',
501                    $i
502                ));
503            # Any other conditions possible? This shouldn't happen unless the filter is malformed..
504            } else {
505                throw new FilterParseException(sprintf(
506                    'Unexpected value after "%s" at position %s: %s',
507                    $this->filter[$i - 1] ?? '',
508                    $i + 1,
509                    $this->filter[$i + 1] ?? ''
510                ));
511            }
512            # If there is no operator this is a standard comparison filter, just find the next closing parenthesis
513        } else {
514            $i = $this->nextClosingParenthesis($i + 1);
515        }
516
517        return [$i, $child];
518    }
519
520    /**
521     * @throws FilterParseException
522     */
523    protected function parseContainerEnd(int $i): void
524    {
525        $matchFound = false;
526        foreach (array_reverse((array) $this->containers, true) as $ci => $info) {
527            if ($info['endAt'] === null) {
528                $this->containers[$ci]['endAt'] = $i + 1;
529                $matchFound = true;
530                break;
531            }
532        }
533        if (!$matchFound) {
534            throw new FilterParseException(sprintf(
535                'The closing ")" at position %s has no matching parenthesis',
536                $i
537            ));
538        }
539    }
540}
541