1<?php
2/**
3 * This file is part of the FreeDSx LDAP package.
4 *
5 * (c) Chad Sikorra <Chad.Sikorra@gmail.com>
6 *
7 * For the full copyright and license information, please view the LICENSE
8 * file that was distributed with this source code.
9 */
10
11namespace FreeDSx\Ldap\Search;
12
13use FreeDSx\Ldap\Exception\FilterParseException;
14use FreeDSx\Ldap\Search\Filter\AndFilter;
15use FreeDSx\Ldap\Search\Filter\FilterInterface;
16use FreeDSx\Ldap\Search\Filter\MatchingRuleFilter;
17use FreeDSx\Ldap\Search\Filter\OrFilter;
18use FreeDSx\Ldap\Search\Filter\SubstringFilter;
19
20/**
21 * Parses LDAP filter strings. RFC 4515.
22 *
23 * @author Chad Sikorra <Chad.Sikorra@gmail.com>
24 */
25class FilterParser
26{
27    protected const MATCHING_RULE = '/^([a-zA-Z0-9\.]+)?(\:dn)?(\:([a-zA-Z0-9\.]+))?$/';
28
29    /**
30     * @var string
31     */
32    protected $filter = '';
33
34    /**
35     * @var int
36     */
37    protected $length = 0;
38
39    /**
40     * @var int
41     */
42    protected $depth = 0;
43
44    /**
45     * @var null|array
46     */
47    protected $containers;
48
49    /**
50     * @param string $filter
51     */
52    public function __construct(string $filter)
53    {
54        $this->filter = $filter;
55        $this->length = strlen($filter);
56    }
57
58    /**
59     * @param string $filter
60     * @return FilterInterface
61     * @throws FilterParseException
62     */
63    public static function parse(string $filter): FilterInterface
64    {
65        if ($filter === '') {
66            throw new FilterParseException('The filter cannot be empty.');
67        }
68
69        return (new self($filter))->parseFilterString(0, true)[1];
70    }
71
72    /**
73     * @param int $startAt
74     * @param bool $isRoot
75     * @return array
76     * @throws FilterParseException
77     */
78    protected function parseFilterString(int $startAt, bool $isRoot = false): array
79    {
80        if ($this->isAtFilterContainer($startAt)) {
81            [$endsAt, $filter] = $this->parseFilterContainer($startAt, $isRoot);
82        } else {
83            [$endsAt, $filter] = $this->parseComparisonFilter($startAt, $isRoot);
84        }
85        if ($isRoot && $endsAt !== $this->length) {
86            throw new FilterParseException(sprintf(
87                'Unexpected value at end of the filter: %s',
88                substr($this->filter, $endsAt)
89            ));
90        }
91
92        return [$endsAt, $filter];
93    }
94
95    /**
96     * @param int $pos
97     * @return bool
98     */
99    protected function isAtFilterContainer(int $pos): bool
100    {
101        if (!$this->startsWith(FilterInterface::PAREN_LEFT, $pos)) {
102            return false;
103        }
104
105        foreach (FilterInterface::OPERATORS as $compOp) {
106            if ($this->startsWith($compOp, $pos + 1)) {
107                return true;
108            }
109        }
110
111        return false;
112    }
113
114    /**
115     * Parse a filter container. Always returns the ending position followed by the filter container.
116     *
117     * @param int $startAt
118     * @param bool $isRoot
119     * @return array
120     * @throws FilterParseException
121     */
122    protected function parseFilterContainer(int $startAt, bool $isRoot): array
123    {
124        if ($this->containers === null) {
125            $this->parseContainerDepths();
126        }
127        $this->depth += $isRoot ? 0 : 1;
128        if (!isset($this->containers[$this->depth])) {
129            throw new FilterParseException(sprintf(
130                'The container at position %s is unrecognized. Perhaps there\'s an unmatched "(".' .
131                $startAt
132            ));
133        }
134        $endAt = $this->containers[$this->depth]['endAt'];
135        $operator = substr($this->filter, $startAt + 1, 1);
136
137        if ($operator === FilterInterface::OPERATOR_NOT) {
138            return [$endAt, $this->getNotFilterObject($startAt, $endAt)];
139        }
140
141        $startAt += 2;
142        $filter = $operator === FilterInterface::OPERATOR_AND ? new AndFilter() : new OrFilter();
143        while ($endAt !== ($startAt + 1)) {
144            [$startAt, $childFilter] = $this->parseFilterString($startAt);
145            $filter->add($childFilter);
146        }
147
148        return [$endAt, $filter];
149    }
150
151    /**
152     * Parse a comparison operator. Always returns the ending position followed by the filter.
153     *
154     * @param int $startAt
155     * @param bool $isRoot
156     * @return array
157     * @throws FilterParseException
158     */
159    protected function parseComparisonFilter(int $startAt, bool $isRoot = false): array
160    {
161        $parenthesis = $this->validateComparisonFilter($startAt, $isRoot);
162        $endAt = !$parenthesis && $isRoot ? $this->length : $this->nextClosingParenthesis($startAt) + 1;
163
164        $attributeEndsAfter = null;
165        $filterType = null;
166        $valueStartsAt = null;
167        for ($i = $startAt; $i < $endAt; $i++) {
168            foreach (FilterInterface::FILTERS as $op) {
169                if ($this->filter[$i] === $op) {
170                    $filterType = $op;
171                } elseif (($i + 1) < $endAt && $this->filter[$i] . $this->filter[$i + 1] === $op) {
172                    $filterType = $op;
173                }
174                if ($filterType !== null) {
175                    break;
176                }
177            }
178            if ($filterType !== null) {
179                $attributeEndsAfter = $i - $startAt;
180                $valueStartsAt = $i + strlen($filterType);
181                break;
182            }
183        }
184        $this->validateParsedFilter($filterType, $startAt, $valueStartsAt, $endAt);
185
186        $attribute = substr(
187            $this->filter,
188            $startAt + ($parenthesis ? 1 : 0),
189            (int) $attributeEndsAfter - ($parenthesis ? 1 : 0)
190        );
191        $value = substr(
192            $this->filter,
193            (int) $valueStartsAt,
194            $endAt - (int) $valueStartsAt - ($parenthesis ? 1 : 0)
195        );
196
197        if ($attribute === '') {
198            throw new FilterParseException(sprintf(
199                'The attribute is missing in the filter near position %s.',
200                $startAt
201            ));
202        }
203
204        return [$endAt, $this->getComparisonFilterObject((string) $filterType, $attribute, $value)];
205    }
206
207    /**
208     * Validates some initial filter logic and determines if the filter is wrapped in parenthesis (validly).
209     *
210     * @param int $startAt
211     * @param bool $isRoot
212     * @return bool
213     * @throws FilterParseException
214     */
215    protected function validateComparisonFilter(int $startAt, bool $isRoot): bool
216    {
217        $parenthesis = true;
218
219        # A filter without an opening parenthesis is only valid if it is the root. And it cannot have a closing parenthesis.
220        if ($isRoot && !$this->startsWith(FilterInterface::PAREN_LEFT, $startAt)) {
221            $parenthesis = false;
222            $pos = false;
223            try {
224                $pos = $this->nextClosingParenthesis($startAt);
225            } catch (FilterParseException $e) {
226            }
227            if ($pos !== false) {
228                throw new FilterParseException(sprintf('The ")" at char %s has no matching parenthesis', $pos));
229            }
230            # If this is not a root filter, it must start with an opening parenthesis.
231        } elseif (!$isRoot && !$this->startsWith(FilterInterface::PAREN_LEFT, $startAt)) {
232            throw new FilterParseException(sprintf(
233                'The character "%s" at position %s was unexpected. Expected "(".',
234                $this->filter[$startAt],
235                $startAt
236            ));
237        }
238
239        return $parenthesis;
240    }
241
242    /**
243     * Validate some basic aspects of the filter after it is parsed.
244     *
245     * @param string|null $filterType
246     * @param int|null $startsAt
247     * @param int|null $startValue
248     * @param int $endAt
249     * @throws FilterParseException
250     */
251    protected function validateParsedFilter(?string $filterType, ?int $startsAt, ?int $startValue, $endAt): void
252    {
253        if ($filterType === null) {
254            throw new FilterParseException(sprintf(
255                'Expected one of "%s" in the filter after position %s, but received none.',
256                implode(', ', FilterInterface::FILTERS),
257                $startsAt
258            ));
259        }
260        if ($startValue === null || $startValue === $endAt - 1) {
261            throw new FilterParseException(sprintf(
262                'Expected a value after "%s" at position %s, but got none.',
263                $filterType,
264                $startValue
265            ));
266        }
267    }
268
269    /**
270     * @param string $operator
271     * @param string $attribute
272     * @param string $value
273     * @return FilterInterface
274     */
275    protected function getComparisonFilterObject(string $operator, string $attribute, string $value): FilterInterface
276    {
277        if ($operator === FilterInterface::FILTER_LTE) {
278            return Filters::lessThanOrEqual($attribute, $this->unescapeValue($value));
279        } elseif ($operator === FilterInterface::FILTER_GTE) {
280            return Filters::greaterThanOrEqual($attribute, $this->unescapeValue($value));
281        } elseif ($operator === FilterInterface::FILTER_APPROX) {
282            return Filters::approximate($attribute, $this->unescapeValue($value));
283        } elseif ($operator === FilterInterface::FILTER_EXT) {
284            return $this->getMatchingRuleFilterObject($attribute, $this->unescapeValue($value));
285        }
286
287        if ($value === '*') {
288            return Filters::present($attribute);
289        }
290
291        if (preg_match('/\*/', $value) !== 0) {
292            return $this->getSubstringFilterObject($attribute, $value);
293        } else {
294            return Filters::equal($attribute, $this->unescapeValue($value));
295        }
296    }
297
298    /**
299     * @param string $attribute
300     * @param string $value
301     * @return MatchingRuleFilter
302     * @throws FilterParseException
303     */
304    protected function getMatchingRuleFilterObject(string $attribute, string $value): MatchingRuleFilter
305    {
306        if (preg_match(self::MATCHING_RULE, $attribute, $matches) === 0) {
307            throw new FilterParseException(sprintf('The matching rule is not valid: %s', $attribute));
308        }
309        $matchRuleObj = new MatchingRuleFilter(null, null, $value);
310
311        $matchingRule = $matches[4] ?? '';
312        $attrName = $matches[1] ?? '';
313        $useDnAttr = isset($matches[2]);
314
315        # RFC 4511, 4.5.1.7.7: If the matchingRule field is absent, the type field MUST be present [..]
316        if ($matchingRule === '' && $attrName === '') {
317            throw new FilterParseException(sprintf(
318                'If the matching rule is absent the attribute type must be present, but it is not: %s',
319                $attribute
320            ));
321        }
322
323        if ($attrName !== '') {
324            $matchRuleObj->setAttribute($attrName);
325        }
326        if ($matchingRule !== '') {
327            $matchRuleObj->setMatchingRule($matchingRule);
328        }
329        $matchRuleObj->setUseDnAttributes($useDnAttr);
330
331        return $matchRuleObj;
332    }
333
334    /**
335     * @param string $attribute
336     * @param string $value
337     * @return SubstringFilter
338     */
339    protected function getSubstringFilterObject(string $attribute, string $value): SubstringFilter
340    {
341        $filter = new SubstringFilter($attribute);
342        $substrings = preg_split('/\*/', $value, -1, PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_NO_EMPTY);
343        if (!is_array($substrings)) {
344            throw new FilterParseException(sprintf(
345                'Unable to parse the substring filter for attribute %s.',
346                $attribute
347            ));
348        }
349
350        $contains = [];
351        foreach ($substrings as $substring) {
352            $substringValue = $this->unescapeValue($substring[0]);
353            $substringType = (int) $substring[1];
354
355            if ($substringType === 0) {
356                $filter->setStartsWith($substring[0]);
357            } elseif (($substringType + strlen($substringValue)) === strlen($value)) {
358                $filter->setEndsWith($substring[0]);
359            } else {
360                $contains[] = $substringValue;
361            }
362        }
363        $filter->setContains(...$contains);
364
365        return $filter;
366    }
367
368    /**
369     * @param int $startAt
370     * @param int $endAt
371     * @return FilterInterface
372     * @throws FilterParseException
373     */
374    protected function getNotFilterObject(int $startAt, int $endAt): FilterInterface
375    {
376        if ($this->isAtFilterContainer($startAt + 2)) {
377            throw new FilterParseException(sprintf(
378                'The "not" filter at position %s cannot contain multiple filters.',
379                $startAt
380            ));
381        }
382        $info = $this->parseComparisonFilter($startAt + 2);
383        if (($info[0] + 1) !== $endAt) {
384            throw new FilterParseException(sprintf(
385                'The value after the "not" filter value was unexpected: %s',
386                substr($this->filter, $info[0] + 1, $endAt - ((int) $info[0] + 1))
387            ));
388        }
389
390        return Filters::not($info[1]);
391    }
392
393    /**
394     * @param string $char
395     * @param int $pos
396     * @return bool
397     */
398    protected function startsWith(string $char, int $pos): bool
399    {
400        return isset($this->filter[$pos]) && $this->filter[$pos] === $char;
401    }
402
403    /**
404     * This seems like a potential minefield. But the general idea is to loop through the complete filter to get the
405     * start and end positions of each container along the way. The container index marks the depth, with the lowest (0)
406     * being the furthest out and the higher numbers being closer inside. We skip positions inside the for loop when a
407     * comparison filter is encountered to only capture beginning and end parenthesis of containers.
408     *
409     * Each container encountered has its end point marked by detecting the closing parenthesis then we loop through known
410     * detected containers starting at the highest level that have no endpoints marked yet and working our way down.
411     *
412     * @throws FilterParseException
413     */
414    protected function parseContainerDepths(): void
415    {
416        $this->containers = $this->containers ?? [];
417
418        $child = null;
419        for ($i = 0; $i < $this->length; $i++) {
420            # Detect an unescaped left parenthesis
421            if ($this->filter[$i] === FilterInterface::PAREN_LEFT) {
422                [$i, $child] = $this->parseContainerStart($i, $child);
423            # We have reached a closing parenthesis of a container, work backwards from those defined to set the ending.
424            } elseif ($this->filter[$i] === FilterInterface::PAREN_RIGHT) {
425                $this->parseContainerEnd($i);
426            }
427        }
428
429        foreach ($this->containers as $info) {
430            if ($info['endAt'] === null) {
431                throw new FilterParseException('The filter has an unmatched "(".');
432            }
433        }
434    }
435
436    /**
437     * @param int $startAt
438     * @return int
439     * @throws FilterParseException
440     */
441    protected function nextClosingParenthesis(int $startAt)
442    {
443        for ($i = $startAt; $i < $this->length; $i++) {
444            # Look for the char, but ignore it if it is escaped
445            if ($this->filter[$i] === FilterInterface::PAREN_RIGHT) {
446                return $i;
447            }
448        }
449
450        throw new FilterParseException(sprintf(
451            'Expected a matching ")" after position %s, but none was found',
452            $startAt
453        ));
454    }
455
456    /**
457     * @param string $value
458     * @return string
459     */
460    protected function unescapeValue(string $value)
461    {
462        return (string) preg_replace_callback('/\\\\([0-9A-Fa-f]{2})/', function ($matches) {
463            return hex2bin($matches[1]);
464        }, $value);
465    }
466
467    /**
468     * @throws FilterParseException
469     */
470    protected function parseContainerStart(int $i, ?int $child): array
471    {
472        # Is the parenthesis followed by an (ie. |, &, !) operator? If so it can contain children ...
473        if (isset($this->filter[$i + 1]) && in_array($this->filter[$i + 1], FilterInterface::OPERATORS, true)) {
474            $child = $child === null ? 0 : $child + 1;
475            $this->containers[$child] = ['startAt' => $i, 'endAt' => null];
476
477            $i += 2;
478            # Container inside the container ...
479            if ($this->isAtFilterContainer($i)) {
480                $i--;
481            # Comparison filter inside the container...
482            } elseif (isset($this->filter[$i]) && $this->filter[$i] === FilterInterface::PAREN_LEFT) {
483                $i = $this->nextClosingParenthesis($i);
484            # An empty container is not allowed...
485            } elseif (isset($this->filter[$i]) && $this->filter[$i] === FilterInterface::PAREN_RIGHT) {
486                throw new FilterParseException(sprintf(
487                    'The filter container near position %s is empty.',
488                    $i
489                ));
490            # Any other conditions possible? This shouldn't happen unless the filter is malformed..
491            } else {
492                throw new FilterParseException(sprintf(
493                    'Unexpected value after "%s" at position %s: %s',
494                    $this->filter[$i - 1] ?? '',
495                    $i + 1,
496                    $this->filter[$i + 1] ?? ''
497                ));
498            }
499            # If there is no operator this is a standard comparison filter, just find the next closing parenthesis
500        } else {
501            $i = $this->nextClosingParenthesis($i + 1);
502        }
503
504        return [$i, $child];
505    }
506
507    /**
508     * @throws FilterParseException
509     */
510    protected function parseContainerEnd(int $i): void
511    {
512        $matchFound = false;
513        foreach (array_reverse((array) $this->containers, true) as $ci => $info) {
514            if ($info['endAt'] === null) {
515                $this->containers[$ci]['endAt'] = $i + 1;
516                $matchFound = true;
517                break;
518            }
519        }
520        if (!$matchFound) {
521            throw new FilterParseException(sprintf(
522                'The closing ")" at position %s has no matching parenthesis',
523                $i
524            ));
525        }
526    }
527}
528