1<?php 2 3/** 4 * This file is part of the FreeDSx LDAP package. 5 * 6 * (c) Chad Sikorra <Chad.Sikorra@gmail.com> 7 * 8 * For the full copyright and license information, please view the LICENSE 9 * file that was distributed with this source code. 10 */ 11 12namespace FreeDSx\Ldap\Search; 13 14use FreeDSx\Ldap\Exception\FilterParseException; 15use FreeDSx\Ldap\Search\Filter\AndFilter; 16use FreeDSx\Ldap\Search\Filter\FilterInterface; 17use FreeDSx\Ldap\Search\Filter\MatchingRuleFilter; 18use FreeDSx\Ldap\Search\Filter\OrFilter; 19use FreeDSx\Ldap\Search\Filter\SubstringFilter; 20 21/** 22 * Parses LDAP filter strings. RFC 4515. 23 * 24 * @author Chad Sikorra <Chad.Sikorra@gmail.com> 25 */ 26class FilterParser 27{ 28 protected const MATCHING_RULE = '/^([a-zA-Z0-9\.]+)?(\:dn)?(\:([a-zA-Z0-9\.]+))?$/'; 29 30 /** 31 * @var string 32 */ 33 protected $filter = ''; 34 35 /** 36 * @var int 37 */ 38 protected $length = 0; 39 40 /** 41 * @var int 42 */ 43 protected $depth = 0; 44 45 /** 46 * @var null|array 47 * @psalm-var null|array<0|positive-int, array{startAt?: int, endAt: null|int}> 48 */ 49 protected $containers; 50 51 /** 52 * @param string $filter 53 */ 54 public function __construct(string $filter) 55 { 56 $this->filter = $filter; 57 $this->length = strlen($filter); 58 } 59 60 /** 61 * @param string $filter 62 * @return FilterInterface 63 * @psalm-return AndFilter|Filter\ApproximateFilter|Filter\EqualityFilter|Filter\GreaterThanOrEqualFilter|Filter\LessThanOrEqualFilter|Filter\NotFilter|Filter\PresentFilter|MatchingRuleFilter|OrFilter|SubstringFilter 64 * @throws FilterParseException 65 */ 66 public static function parse(string $filter): FilterInterface 67 { 68 if ($filter === '') { 69 throw new FilterParseException('The filter cannot be empty.'); 70 } 71 72 return (new self($filter))->parseFilterString(0, true)[1]; 73 } 74 75 /** 76 * @param int $startAt 77 * @param bool $isRoot 78 * @return array 79 * @psalm-return array{0: int|null, 1: AndFilter|Filter\ApproximateFilter|Filter\EqualityFilter|Filter\GreaterThanOrEqualFilter|Filter\LessThanOrEqualFilter|Filter\NotFilter|Filter\PresentFilter|MatchingRuleFilter|OrFilter|SubstringFilter} 80 * @throws FilterParseException 81 */ 82 protected function parseFilterString(int $startAt, bool $isRoot = false): array 83 { 84 if ($this->isAtFilterContainer($startAt)) { 85 [$endsAt, $filter] = $this->parseFilterContainer($startAt, $isRoot); 86 } else { 87 [$endsAt, $filter] = $this->parseComparisonFilter($startAt, $isRoot); 88 } 89 if ($isRoot && $endsAt !== $this->length) { 90 throw new FilterParseException(sprintf( 91 'Unexpected value at end of the filter: %s', 92 substr($this->filter, $endsAt) 93 )); 94 } 95 96 return [$endsAt, $filter]; 97 } 98 99 /** 100 * @param int $pos 101 * @return bool 102 */ 103 protected function isAtFilterContainer(int $pos): bool 104 { 105 if (!$this->startsWith(FilterInterface::PAREN_LEFT, $pos)) { 106 return false; 107 } 108 109 foreach (FilterInterface::OPERATORS as $compOp) { 110 if ($this->startsWith($compOp, $pos + 1)) { 111 return true; 112 } 113 } 114 115 return false; 116 } 117 118 /** 119 * Parse a filter container. Always returns the ending position followed by the filter container. 120 * 121 * @param int $startAt 122 * @param bool $isRoot 123 * @return array 124 * @psalm-return array{0: int, 1: AndFilter|Filter\NotFilter|OrFilter} 125 * @throws FilterParseException 126 */ 127 protected function parseFilterContainer(int $startAt, bool $isRoot): array 128 { 129 if ($this->containers === null) { 130 $this->parseContainerDepths(); 131 } 132 $this->depth += $isRoot ? 0 : 1; 133 if (!isset($this->containers[$this->depth])) { 134 throw new FilterParseException(sprintf( 135 'The container at position %s is unrecognized. Perhaps there\'s an unmatched "(".', 136 $startAt 137 )); 138 } 139 $endAt = $this->containers[$this->depth]['endAt'] ?? 0; 140 $operator = substr($this->filter, $startAt + 1, 1); 141 142 if ($operator === FilterInterface::OPERATOR_NOT) { 143 return [$endAt, $this->getNotFilterObject($startAt, $endAt)]; 144 } 145 146 $startAt += 2; 147 $filter = $operator === FilterInterface::OPERATOR_AND ? new AndFilter() : new OrFilter(); 148 while ($endAt !== ($startAt + 1)) { 149 [$startAt, $childFilter] = $this->parseFilterString($startAt ?? 0); 150 $filter->add($childFilter); 151 } 152 153 return [$endAt, $filter]; 154 } 155 156 /** 157 * Parse a comparison operator. Always returns the ending position followed by the filter. 158 * 159 * @param int $startAt 160 * @param bool $isRoot 161 * @return array 162 * @psalm-return array{0: int, 1: Filter\ApproximateFilter|Filter\EqualityFilter|Filter\GreaterThanOrEqualFilter|Filter\LessThanOrEqualFilter|Filter\PresentFilter|MatchingRuleFilter|SubstringFilter} 163 * @throws FilterParseException 164 */ 165 protected function parseComparisonFilter(int $startAt, bool $isRoot = false): array 166 { 167 $parenthesis = $this->validateComparisonFilter($startAt, $isRoot); 168 $endAt = !$parenthesis && $isRoot ? $this->length : $this->nextClosingParenthesis($startAt) + 1; 169 170 $attributeEndsAfter = null; 171 $filterType = null; 172 $valueStartsAt = null; 173 for ($i = $startAt; $i < $endAt; $i++) { 174 foreach (FilterInterface::FILTERS as $op) { 175 if ($this->filter[$i] === $op) { 176 $filterType = $op; 177 } elseif (($i + 1) < $endAt && $this->filter[$i] . $this->filter[$i + 1] === $op) { 178 $filterType = $op; 179 } 180 if ($filterType !== null) { 181 break; 182 } 183 } 184 if ($filterType !== null) { 185 $attributeEndsAfter = $i - $startAt; 186 $valueStartsAt = $i + strlen($filterType); 187 break; 188 } 189 } 190 $this->validateParsedFilter($filterType, $startAt, $valueStartsAt, $endAt); 191 192 $attribute = substr( 193 $this->filter, 194 $startAt + ($parenthesis ? 1 : 0), 195 (int) $attributeEndsAfter - ($parenthesis ? 1 : 0) 196 ); 197 $value = substr( 198 $this->filter, 199 (int) $valueStartsAt, 200 $endAt - (int) $valueStartsAt - ($parenthesis ? 1 : 0) 201 ); 202 203 if ($attribute === '') { 204 throw new FilterParseException(sprintf( 205 'The attribute is missing in the filter near position %s.', 206 $startAt 207 )); 208 } 209 210 return [$endAt, $this->getComparisonFilterObject((string) $filterType, $attribute, $value)]; 211 } 212 213 /** 214 * Validates some initial filter logic and determines if the filter is wrapped in parenthesis (validly). 215 * 216 * @param int $startAt 217 * @param bool $isRoot 218 * @return bool 219 * @throws FilterParseException 220 */ 221 protected function validateComparisonFilter(int $startAt, bool $isRoot): bool 222 { 223 $parenthesis = true; 224 225 # A filter without an opening parenthesis is only valid if it is the root. And it cannot have a closing parenthesis. 226 if ($isRoot && !$this->startsWith(FilterInterface::PAREN_LEFT, $startAt)) { 227 $parenthesis = false; 228 $pos = false; 229 try { 230 $pos = $this->nextClosingParenthesis($startAt); 231 } catch (FilterParseException $e) { 232 } 233 if ($pos !== false) { 234 throw new FilterParseException(sprintf('The ")" at char %s has no matching parenthesis', $pos)); 235 } 236 # If this is not a root filter, it must start with an opening parenthesis. 237 } elseif (!$isRoot && !$this->startsWith(FilterInterface::PAREN_LEFT, $startAt)) { 238 throw new FilterParseException(sprintf( 239 'The character "%s" at position %s was unexpected. Expected "(".', 240 $this->filter[$startAt], 241 $startAt 242 )); 243 } 244 245 return $parenthesis; 246 } 247 248 /** 249 * Validate some basic aspects of the filter after it is parsed. 250 * 251 * @param string|null $filterType 252 * @param int|null $startsAt 253 * @param int|null $startValue 254 * @param int $endAt 255 * @throws FilterParseException 256 */ 257 protected function validateParsedFilter(?string $filterType, ?int $startsAt, ?int $startValue, $endAt): void 258 { 259 if ($filterType === null) { 260 throw new FilterParseException(sprintf( 261 'Expected one of "%s" in the filter after position %s, but received none.', 262 implode(', ', FilterInterface::FILTERS), 263 $startsAt 264 )); 265 } 266 if ($startValue === null || $startValue === $endAt - 1) { 267 throw new FilterParseException(sprintf( 268 'Expected a value after "%s" at position %s, but got none.', 269 $filterType, 270 $startValue 271 )); 272 } 273 } 274 275 /** 276 * @param string $operator 277 * @param string $attribute 278 * @param string $value 279 * @return FilterInterface 280 * @psalm-return Filter\ApproximateFilter|Filter\EqualityFilter|Filter\GreaterThanOrEqualFilter|Filter\LessThanOrEqualFilter|Filter\PresentFilter|MatchingRuleFilter|SubstringFilter 281 * @throws FilterParseException 282 */ 283 protected function getComparisonFilterObject(string $operator, string $attribute, string $value): FilterInterface 284 { 285 if ($operator === FilterInterface::FILTER_LTE) { 286 return Filters::lessThanOrEqual($attribute, $this->unescapeValue($value)); 287 } elseif ($operator === FilterInterface::FILTER_GTE) { 288 return Filters::greaterThanOrEqual($attribute, $this->unescapeValue($value)); 289 } elseif ($operator === FilterInterface::FILTER_APPROX) { 290 return Filters::approximate($attribute, $this->unescapeValue($value)); 291 } elseif ($operator === FilterInterface::FILTER_EXT) { 292 return $this->getMatchingRuleFilterObject($attribute, $this->unescapeValue($value)); 293 } 294 295 if ($value === '*') { 296 return Filters::present($attribute); 297 } 298 299 if (preg_match('/\*/', $value) !== 0) { 300 return $this->getSubstringFilterObject($attribute, $value); 301 } else { 302 return Filters::equal($attribute, $this->unescapeValue($value)); 303 } 304 } 305 306 /** 307 * @param string $attribute 308 * @param string $value 309 * @return MatchingRuleFilter 310 * @throws FilterParseException 311 */ 312 protected function getMatchingRuleFilterObject(string $attribute, string $value): MatchingRuleFilter 313 { 314 if (preg_match(self::MATCHING_RULE, $attribute, $matches) === 0) { 315 throw new FilterParseException(sprintf('The matching rule is not valid: %s', $attribute)); 316 } 317 $matchRuleObj = new MatchingRuleFilter(null, null, $value); 318 319 $matchingRule = $matches[4] ?? ''; 320 $attrName = $matches[1] ?? ''; 321 $useDnAttr = isset($matches[2]); 322 323 # RFC 4511, 4.5.1.7.7: If the matchingRule field is absent, the type field MUST be present [..] 324 if ($matchingRule === '' && $attrName === '') { 325 throw new FilterParseException(sprintf( 326 'If the matching rule is absent the attribute type must be present, but it is not: %s', 327 $attribute 328 )); 329 } 330 331 if ($attrName !== '') { 332 $matchRuleObj->setAttribute($attrName); 333 } 334 if ($matchingRule !== '') { 335 $matchRuleObj->setMatchingRule($matchingRule); 336 } 337 $matchRuleObj->setUseDnAttributes($useDnAttr); 338 339 return $matchRuleObj; 340 } 341 342 /** 343 * @param string $attribute 344 * @param string $value 345 * @return SubstringFilter 346 * @throws FilterParseException 347 */ 348 protected function getSubstringFilterObject(string $attribute, string $value): SubstringFilter 349 { 350 $filter = new SubstringFilter($attribute); 351 $substrings = preg_split('/\*/', $value, -1, PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_NO_EMPTY); 352 if (!is_array($substrings)) { 353 throw new FilterParseException(sprintf( 354 'Unable to parse the substring filter for attribute %s.', 355 $attribute 356 )); 357 } 358 359 $contains = []; 360 foreach ($substrings as $substring) { 361 $substringValue = $this->unescapeValue($substring[0]); 362 $substringType = (int) $substring[1]; 363 364 if ($substringType === 0) { 365 $filter->setStartsWith($substring[0]); 366 } elseif (($substringType + strlen($substringValue)) === strlen($value)) { 367 $filter->setEndsWith($substring[0]); 368 } else { 369 $contains[] = $substringValue; 370 } 371 } 372 $filter->setContains(...$contains); 373 374 return $filter; 375 } 376 377 /** 378 * @param int $startAt 379 * @param int $endAt 380 * @return FilterInterface 381 * @psalm-return Filter\NotFilter 382 * @throws FilterParseException 383 */ 384 protected function getNotFilterObject(int $startAt, int $endAt): FilterInterface 385 { 386 if ($this->isAtFilterContainer($startAt + 2)) { 387 throw new FilterParseException(sprintf( 388 'The "not" filter at position %s cannot contain multiple filters.', 389 $startAt 390 )); 391 } 392 $info = $this->parseComparisonFilter($startAt + 2); 393 if (($info[0] + 1) !== $endAt) { 394 throw new FilterParseException(sprintf( 395 'The value after the "not" filter value was unexpected: %s', 396 substr($this->filter, $info[0] + 1, $endAt - ((int) $info[0] + 1)) 397 )); 398 } 399 400 return Filters::not($info[1]); 401 } 402 403 /** 404 * @param string $char 405 * @param int $pos 406 * @return bool 407 */ 408 protected function startsWith(string $char, int $pos): bool 409 { 410 return isset($this->filter[$pos]) && $this->filter[$pos] === $char; 411 } 412 413 /** 414 * This seems like a potential minefield. But the general idea is to loop through the complete filter to get the 415 * start and end positions of each container along the way. The container index marks the depth, with the lowest (0) 416 * being the furthest out and the higher numbers being closer inside. We skip positions inside the for loop when a 417 * comparison filter is encountered to only capture beginning and end parenthesis of containers. 418 * 419 * Each container encountered has its end point marked by detecting the closing parenthesis then we loop through known 420 * detected containers starting at the highest level that have no endpoints marked yet and working our way down. 421 * 422 * @throws FilterParseException 423 */ 424 protected function parseContainerDepths(): void 425 { 426 $this->containers = $this->containers ?? []; 427 428 $child = null; 429 for ($i = 0; $i < $this->length; $i++) { 430 # Detect an unescaped left parenthesis 431 if ($this->filter[$i] === FilterInterface::PAREN_LEFT) { 432 [$i, $child] = $this->parseContainerStart((int)$i, $child); 433 # We have reached a closing parenthesis of a container, work backwards from those defined to set the ending. 434 } elseif ($this->filter[$i] === FilterInterface::PAREN_RIGHT) { 435 $this->parseContainerEnd((int)$i); 436 } 437 } 438 439 if ($this->containers !== null) { 440 foreach ($this->containers as $info) { 441 if ($info['endAt'] === null) { 442 throw new FilterParseException('The filter has an unmatched "(".'); 443 } 444 } 445 } 446 } 447 448 /** 449 * @param int $startAt 450 * @return int 451 * @throws FilterParseException 452 */ 453 protected function nextClosingParenthesis(int $startAt) 454 { 455 for ($i = $startAt; $i < $this->length; $i++) { 456 # Look for the char, but ignore it if it is escaped 457 if ($this->filter[$i] === FilterInterface::PAREN_RIGHT) { 458 return $i; 459 } 460 } 461 462 throw new FilterParseException(sprintf( 463 'Expected a matching ")" after position %s, but none was found', 464 $startAt 465 )); 466 } 467 468 /** 469 * @param string $value 470 * @return string 471 */ 472 protected function unescapeValue(string $value) 473 { 474 return (string) preg_replace_callback('/\\\\([0-9A-Fa-f]{2})/', function ($matches) { 475 return hex2bin($matches[1]); 476 }, $value); 477 } 478 479 /** 480 * @psalm-param 0|positive-int $child 481 * @throws FilterParseException 482 */ 483 protected function parseContainerStart(int $i, ?int $child): array 484 { 485 # Is the parenthesis followed by an (ie. |, &, !) operator? If so it can contain children ... 486 if (isset($this->filter[$i + 1]) && in_array($this->filter[$i + 1], FilterInterface::OPERATORS, true)) { 487 $child = $child === null ? 0 : $child + 1; 488 $this->containers[$child] = ['startAt' => $i, 'endAt' => null]; 489 490 $i += 2; 491 # Container inside the container ... 492 if ($this->isAtFilterContainer($i)) { 493 $i--; 494 # Comparison filter inside the container... 495 } elseif (isset($this->filter[$i]) && $this->filter[$i] === FilterInterface::PAREN_LEFT) { 496 $i = $this->nextClosingParenthesis($i); 497 # An empty container is not allowed... 498 } elseif (isset($this->filter[$i]) && $this->filter[$i] === FilterInterface::PAREN_RIGHT) { 499 throw new FilterParseException(sprintf( 500 'The filter container near position %s is empty.', 501 $i 502 )); 503 # Any other conditions possible? This shouldn't happen unless the filter is malformed.. 504 } else { 505 throw new FilterParseException(sprintf( 506 'Unexpected value after "%s" at position %s: %s', 507 $this->filter[$i - 1] ?? '', 508 $i + 1, 509 $this->filter[$i + 1] ?? '' 510 )); 511 } 512 # If there is no operator this is a standard comparison filter, just find the next closing parenthesis 513 } else { 514 $i = $this->nextClosingParenthesis($i + 1); 515 } 516 517 return [$i, $child]; 518 } 519 520 /** 521 * @throws FilterParseException 522 */ 523 protected function parseContainerEnd(int $i): void 524 { 525 $matchFound = false; 526 foreach (array_reverse((array) $this->containers, true) as $ci => $info) { 527 if ($info['endAt'] === null) { 528 $this->containers[$ci]['endAt'] = $i + 1; 529 $matchFound = true; 530 break; 531 } 532 } 533 if (!$matchFound) { 534 throw new FilterParseException(sprintf( 535 'The closing ")" at position %s has no matching parenthesis', 536 $i 537 )); 538 } 539 } 540} 541