1<?php 2/** 3 * This file is part of the FreeDSx LDAP package. 4 * 5 * (c) Chad Sikorra <Chad.Sikorra@gmail.com> 6 * 7 * For the full copyright and license information, please view the LICENSE 8 * file that was distributed with this source code. 9 */ 10 11namespace FreeDSx\Ldap\Search; 12 13use FreeDSx\Ldap\Exception\FilterParseException; 14use FreeDSx\Ldap\Search\Filter\AndFilter; 15use FreeDSx\Ldap\Search\Filter\FilterInterface; 16use FreeDSx\Ldap\Search\Filter\MatchingRuleFilter; 17use FreeDSx\Ldap\Search\Filter\OrFilter; 18use FreeDSx\Ldap\Search\Filter\SubstringFilter; 19 20/** 21 * Parses LDAP filter strings. RFC 4515. 22 * 23 * @author Chad Sikorra <Chad.Sikorra@gmail.com> 24 */ 25class FilterParser 26{ 27 protected const MATCHING_RULE = '/^([a-zA-Z0-9\.]+)?(\:dn)?(\:([a-zA-Z0-9\.]+))?$/'; 28 29 /** 30 * @var string 31 */ 32 protected $filter = ''; 33 34 /** 35 * @var int 36 */ 37 protected $length = 0; 38 39 /** 40 * @var int 41 */ 42 protected $depth = 0; 43 44 /** 45 * @var null|array 46 */ 47 protected $containers; 48 49 /** 50 * @param string $filter 51 */ 52 public function __construct(string $filter) 53 { 54 $this->filter = $filter; 55 $this->length = strlen($filter); 56 } 57 58 /** 59 * @param string $filter 60 * @return FilterInterface 61 * @throws FilterParseException 62 */ 63 public static function parse(string $filter): FilterInterface 64 { 65 if ($filter === '') { 66 throw new FilterParseException('The filter cannot be empty.'); 67 } 68 69 return (new self($filter))->parseFilterString(0, true)[1]; 70 } 71 72 /** 73 * @param int $startAt 74 * @param bool $isRoot 75 * @return array 76 * @throws FilterParseException 77 */ 78 protected function parseFilterString(int $startAt, bool $isRoot = false): array 79 { 80 if ($this->isAtFilterContainer($startAt)) { 81 [$endsAt, $filter] = $this->parseFilterContainer($startAt, $isRoot); 82 } else { 83 [$endsAt, $filter] = $this->parseComparisonFilter($startAt, $isRoot); 84 } 85 if ($isRoot && $endsAt !== $this->length) { 86 throw new FilterParseException(sprintf( 87 'Unexpected value at end of the filter: %s', 88 substr($this->filter, $endsAt) 89 )); 90 } 91 92 return [$endsAt, $filter]; 93 } 94 95 /** 96 * @param int $pos 97 * @return bool 98 */ 99 protected function isAtFilterContainer(int $pos): bool 100 { 101 if (!$this->startsWith(FilterInterface::PAREN_LEFT, $pos)) { 102 return false; 103 } 104 105 foreach (FilterInterface::OPERATORS as $compOp) { 106 if ($this->startsWith($compOp, $pos + 1)) { 107 return true; 108 } 109 } 110 111 return false; 112 } 113 114 /** 115 * Parse a filter container. Always returns the ending position followed by the filter container. 116 * 117 * @param int $startAt 118 * @param bool $isRoot 119 * @return array 120 * @throws FilterParseException 121 */ 122 protected function parseFilterContainer(int $startAt, bool $isRoot): array 123 { 124 if ($this->containers === null) { 125 $this->parseContainerDepths(); 126 } 127 $this->depth += $isRoot ? 0 : 1; 128 if (!isset($this->containers[$this->depth])) { 129 throw new FilterParseException(sprintf( 130 'The container at position %s is unrecognized. Perhaps there\'s an unmatched "(".' . 131 $startAt 132 )); 133 } 134 $endAt = $this->containers[$this->depth]['endAt']; 135 $operator = substr($this->filter, $startAt + 1, 1); 136 137 if ($operator === FilterInterface::OPERATOR_NOT) { 138 return [$endAt, $this->getNotFilterObject($startAt, $endAt)]; 139 } 140 141 $startAt += 2; 142 $filter = $operator === FilterInterface::OPERATOR_AND ? new AndFilter() : new OrFilter(); 143 while ($endAt !== ($startAt + 1)) { 144 [$startAt, $childFilter] = $this->parseFilterString($startAt); 145 $filter->add($childFilter); 146 } 147 148 return [$endAt, $filter]; 149 } 150 151 /** 152 * Parse a comparison operator. Always returns the ending position followed by the filter. 153 * 154 * @param int $startAt 155 * @param bool $isRoot 156 * @return array 157 * @throws FilterParseException 158 */ 159 protected function parseComparisonFilter(int $startAt, bool $isRoot = false): array 160 { 161 $parenthesis = $this->validateComparisonFilter($startAt, $isRoot); 162 $endAt = !$parenthesis && $isRoot ? $this->length : $this->nextClosingParenthesis($startAt) + 1; 163 164 $attributeEndsAfter = null; 165 $filterType = null; 166 $valueStartsAt = null; 167 for ($i = $startAt; $i < $endAt; $i++) { 168 foreach (FilterInterface::FILTERS as $op) { 169 if ($this->filter[$i] === $op) { 170 $filterType = $op; 171 } elseif (($i + 1) < $endAt && $this->filter[$i] . $this->filter[$i + 1] === $op) { 172 $filterType = $op; 173 } 174 if ($filterType !== null) { 175 break; 176 } 177 } 178 if ($filterType !== null) { 179 $attributeEndsAfter = $i - $startAt; 180 $valueStartsAt = $i + strlen($filterType); 181 break; 182 } 183 } 184 $this->validateParsedFilter($filterType, $startAt, $valueStartsAt, $endAt); 185 186 $attribute = substr( 187 $this->filter, 188 $startAt + ($parenthesis ? 1 : 0), 189 (int) $attributeEndsAfter - ($parenthesis ? 1 : 0) 190 ); 191 $value = substr( 192 $this->filter, 193 (int) $valueStartsAt, 194 $endAt - (int) $valueStartsAt - ($parenthesis ? 1 : 0) 195 ); 196 197 if ($attribute === '') { 198 throw new FilterParseException(sprintf( 199 'The attribute is missing in the filter near position %s.', 200 $startAt 201 )); 202 } 203 204 return [$endAt, $this->getComparisonFilterObject((string) $filterType, $attribute, $value)]; 205 } 206 207 /** 208 * Validates some initial filter logic and determines if the filter is wrapped in parenthesis (validly). 209 * 210 * @param int $startAt 211 * @param bool $isRoot 212 * @return bool 213 * @throws FilterParseException 214 */ 215 protected function validateComparisonFilter(int $startAt, bool $isRoot): bool 216 { 217 $parenthesis = true; 218 219 # A filter without an opening parenthesis is only valid if it is the root. And it cannot have a closing parenthesis. 220 if ($isRoot && !$this->startsWith(FilterInterface::PAREN_LEFT, $startAt)) { 221 $parenthesis = false; 222 $pos = false; 223 try { 224 $pos = $this->nextClosingParenthesis($startAt); 225 } catch (FilterParseException $e) { 226 } 227 if ($pos !== false) { 228 throw new FilterParseException(sprintf('The ")" at char %s has no matching parenthesis', $pos)); 229 } 230 # If this is not a root filter, it must start with an opening parenthesis. 231 } elseif (!$isRoot && !$this->startsWith(FilterInterface::PAREN_LEFT, $startAt)) { 232 throw new FilterParseException(sprintf( 233 'The character "%s" at position %s was unexpected. Expected "(".', 234 $this->filter[$startAt], 235 $startAt 236 )); 237 } 238 239 return $parenthesis; 240 } 241 242 /** 243 * Validate some basic aspects of the filter after it is parsed. 244 * 245 * @param string|null $filterType 246 * @param int|null $startsAt 247 * @param int|null $startValue 248 * @param int $endAt 249 * @throws FilterParseException 250 */ 251 protected function validateParsedFilter(?string $filterType, ?int $startsAt, ?int $startValue, $endAt): void 252 { 253 if ($filterType === null) { 254 throw new FilterParseException(sprintf( 255 'Expected one of "%s" in the filter after position %s, but received none.', 256 implode(', ', FilterInterface::FILTERS), 257 $startsAt 258 )); 259 } 260 if ($startValue === null || $startValue === $endAt - 1) { 261 throw new FilterParseException(sprintf( 262 'Expected a value after "%s" at position %s, but got none.', 263 $filterType, 264 $startValue 265 )); 266 } 267 } 268 269 /** 270 * @param string $operator 271 * @param string $attribute 272 * @param string $value 273 * @return FilterInterface 274 */ 275 protected function getComparisonFilterObject(string $operator, string $attribute, string $value): FilterInterface 276 { 277 if ($operator === FilterInterface::FILTER_LTE) { 278 return Filters::lessThanOrEqual($attribute, $this->unescapeValue($value)); 279 } elseif ($operator === FilterInterface::FILTER_GTE) { 280 return Filters::greaterThanOrEqual($attribute, $this->unescapeValue($value)); 281 } elseif ($operator === FilterInterface::FILTER_APPROX) { 282 return Filters::approximate($attribute, $this->unescapeValue($value)); 283 } elseif ($operator === FilterInterface::FILTER_EXT) { 284 return $this->getMatchingRuleFilterObject($attribute, $this->unescapeValue($value)); 285 } 286 287 if ($value === '*') { 288 return Filters::present($attribute); 289 } 290 291 if (preg_match('/\*/', $value) !== 0) { 292 return $this->getSubstringFilterObject($attribute, $value); 293 } else { 294 return Filters::equal($attribute, $this->unescapeValue($value)); 295 } 296 } 297 298 /** 299 * @param string $attribute 300 * @param string $value 301 * @return MatchingRuleFilter 302 * @throws FilterParseException 303 */ 304 protected function getMatchingRuleFilterObject(string $attribute, string $value): MatchingRuleFilter 305 { 306 if (preg_match(self::MATCHING_RULE, $attribute, $matches) === 0) { 307 throw new FilterParseException(sprintf('The matching rule is not valid: %s', $attribute)); 308 } 309 $matchRuleObj = new MatchingRuleFilter(null, null, $value); 310 311 $matchingRule = $matches[4] ?? ''; 312 $attrName = $matches[1] ?? ''; 313 $useDnAttr = isset($matches[2]); 314 315 # RFC 4511, 4.5.1.7.7: If the matchingRule field is absent, the type field MUST be present [..] 316 if ($matchingRule === '' && $attrName === '') { 317 throw new FilterParseException(sprintf( 318 'If the matching rule is absent the attribute type must be present, but it is not: %s', 319 $attribute 320 )); 321 } 322 323 if ($attrName !== '') { 324 $matchRuleObj->setAttribute($attrName); 325 } 326 if ($matchingRule !== '') { 327 $matchRuleObj->setMatchingRule($matchingRule); 328 } 329 $matchRuleObj->setUseDnAttributes($useDnAttr); 330 331 return $matchRuleObj; 332 } 333 334 /** 335 * @param string $attribute 336 * @param string $value 337 * @return SubstringFilter 338 */ 339 protected function getSubstringFilterObject(string $attribute, string $value): SubstringFilter 340 { 341 $filter = new SubstringFilter($attribute); 342 $substrings = preg_split('/\*/', $value, -1, PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_NO_EMPTY); 343 if (!is_array($substrings)) { 344 throw new FilterParseException(sprintf( 345 'Unable to parse the substring filter for attribute %s.', 346 $attribute 347 )); 348 } 349 350 $contains = []; 351 foreach ($substrings as $substring) { 352 $substringValue = $this->unescapeValue($substring[0]); 353 $substringType = (int) $substring[1]; 354 355 if ($substringType === 0) { 356 $filter->setStartsWith($substring[0]); 357 } elseif (($substringType + strlen($substringValue)) === strlen($value)) { 358 $filter->setEndsWith($substring[0]); 359 } else { 360 $contains[] = $substringValue; 361 } 362 } 363 $filter->setContains(...$contains); 364 365 return $filter; 366 } 367 368 /** 369 * @param int $startAt 370 * @param int $endAt 371 * @return FilterInterface 372 * @throws FilterParseException 373 */ 374 protected function getNotFilterObject(int $startAt, int $endAt): FilterInterface 375 { 376 if ($this->isAtFilterContainer($startAt + 2)) { 377 throw new FilterParseException(sprintf( 378 'The "not" filter at position %s cannot contain multiple filters.', 379 $startAt 380 )); 381 } 382 $info = $this->parseComparisonFilter($startAt + 2); 383 if (($info[0] + 1) !== $endAt) { 384 throw new FilterParseException(sprintf( 385 'The value after the "not" filter value was unexpected: %s', 386 substr($this->filter, $info[0] + 1, $endAt - ((int) $info[0] + 1)) 387 )); 388 } 389 390 return Filters::not($info[1]); 391 } 392 393 /** 394 * @param string $char 395 * @param int $pos 396 * @return bool 397 */ 398 protected function startsWith(string $char, int $pos): bool 399 { 400 return isset($this->filter[$pos]) && $this->filter[$pos] === $char; 401 } 402 403 /** 404 * This seems like a potential minefield. But the general idea is to loop through the complete filter to get the 405 * start and end positions of each container along the way. The container index marks the depth, with the lowest (0) 406 * being the furthest out and the higher numbers being closer inside. We skip positions inside the for loop when a 407 * comparison filter is encountered to only capture beginning and end parenthesis of containers. 408 * 409 * Each container encountered has its end point marked by detecting the closing parenthesis then we loop through known 410 * detected containers starting at the highest level that have no endpoints marked yet and working our way down. 411 * 412 * @throws FilterParseException 413 */ 414 protected function parseContainerDepths(): void 415 { 416 $this->containers = $this->containers ?? []; 417 418 $child = null; 419 for ($i = 0; $i < $this->length; $i++) { 420 # Detect an unescaped left parenthesis 421 if ($this->filter[$i] === FilterInterface::PAREN_LEFT) { 422 [$i, $child] = $this->parseContainerStart($i, $child); 423 # We have reached a closing parenthesis of a container, work backwards from those defined to set the ending. 424 } elseif ($this->filter[$i] === FilterInterface::PAREN_RIGHT) { 425 $this->parseContainerEnd($i); 426 } 427 } 428 429 foreach ($this->containers as $info) { 430 if ($info['endAt'] === null) { 431 throw new FilterParseException('The filter has an unmatched "(".'); 432 } 433 } 434 } 435 436 /** 437 * @param int $startAt 438 * @return int 439 * @throws FilterParseException 440 */ 441 protected function nextClosingParenthesis(int $startAt) 442 { 443 for ($i = $startAt; $i < $this->length; $i++) { 444 # Look for the char, but ignore it if it is escaped 445 if ($this->filter[$i] === FilterInterface::PAREN_RIGHT) { 446 return $i; 447 } 448 } 449 450 throw new FilterParseException(sprintf( 451 'Expected a matching ")" after position %s, but none was found', 452 $startAt 453 )); 454 } 455 456 /** 457 * @param string $value 458 * @return string 459 */ 460 protected function unescapeValue(string $value) 461 { 462 return (string) preg_replace_callback('/\\\\([0-9A-Fa-f]{2})/', function ($matches) { 463 return hex2bin($matches[1]); 464 }, $value); 465 } 466 467 /** 468 * @throws FilterParseException 469 */ 470 protected function parseContainerStart(int $i, ?int $child): array 471 { 472 # Is the parenthesis followed by an (ie. |, &, !) operator? If so it can contain children ... 473 if (isset($this->filter[$i + 1]) && in_array($this->filter[$i + 1], FilterInterface::OPERATORS, true)) { 474 $child = $child === null ? 0 : $child + 1; 475 $this->containers[$child] = ['startAt' => $i, 'endAt' => null]; 476 477 $i += 2; 478 # Container inside the container ... 479 if ($this->isAtFilterContainer($i)) { 480 $i--; 481 # Comparison filter inside the container... 482 } elseif (isset($this->filter[$i]) && $this->filter[$i] === FilterInterface::PAREN_LEFT) { 483 $i = $this->nextClosingParenthesis($i); 484 # An empty container is not allowed... 485 } elseif (isset($this->filter[$i]) && $this->filter[$i] === FilterInterface::PAREN_RIGHT) { 486 throw new FilterParseException(sprintf( 487 'The filter container near position %s is empty.', 488 $i 489 )); 490 # Any other conditions possible? This shouldn't happen unless the filter is malformed.. 491 } else { 492 throw new FilterParseException(sprintf( 493 'Unexpected value after "%s" at position %s: %s', 494 $this->filter[$i - 1] ?? '', 495 $i + 1, 496 $this->filter[$i + 1] ?? '' 497 )); 498 } 499 # If there is no operator this is a standard comparison filter, just find the next closing parenthesis 500 } else { 501 $i = $this->nextClosingParenthesis($i + 1); 502 } 503 504 return [$i, $child]; 505 } 506 507 /** 508 * @throws FilterParseException 509 */ 510 protected function parseContainerEnd(int $i): void 511 { 512 $matchFound = false; 513 foreach (array_reverse((array) $this->containers, true) as $ci => $info) { 514 if ($info['endAt'] === null) { 515 $this->containers[$ci]['endAt'] = $i + 1; 516 $matchFound = true; 517 break; 518 } 519 } 520 if (!$matchFound) { 521 throw new FilterParseException(sprintf( 522 'The closing ")" at position %s has no matching parenthesis', 523 $i 524 )); 525 } 526 } 527} 528