1<?php 2 3/** 4 * This file is part of the Nette Framework (https://nette.org) 5 * Copyright (c) 2004 David Grudl (https://davidgrudl.com) 6 */ 7 8declare(strict_types=1); 9 10namespace Nette\Utils; 11 12use Nette; 13 14 15/** 16 * Finder allows searching through directory trees using iterator. 17 * 18 * Finder::findFiles('*.php') 19 * ->size('> 10kB') 20 * ->from('.') 21 * ->exclude('temp'); 22 * 23 * @implements \IteratorAggregate<string, FileInfo> 24 */ 25class Finder implements \IteratorAggregate 26{ 27 use Nette\SmartObject; 28 29 /** @var array<array{string, string}> */ 30 private array $find = []; 31 32 /** @var string[] */ 33 private array $in = []; 34 35 /** @var \Closure[] */ 36 private array $filters = []; 37 38 /** @var \Closure[] */ 39 private array $descentFilters = []; 40 41 /** @var array<string|self> */ 42 private array $appends = []; 43 private bool $childFirst = false; 44 45 /** @var ?callable */ 46 private $sort; 47 private int $maxDepth = -1; 48 private bool $ignoreUnreadableDirs = true; 49 50 51 /** 52 * Begins search for files and directories matching mask. 53 */ 54 public static function find(string|array $masks = ['*']): static 55 { 56 $masks = is_array($masks) ? $masks : func_get_args(); // compatibility with variadic 57 return (new static)->addMask($masks, 'dir')->addMask($masks, 'file'); 58 } 59 60 61 /** 62 * Begins search for files matching mask. 63 */ 64 public static function findFiles(string|array $masks = ['*']): static 65 { 66 $masks = is_array($masks) ? $masks : func_get_args(); // compatibility with variadic 67 return (new static)->addMask($masks, 'file'); 68 } 69 70 71 /** 72 * Begins search for directories matching mask. 73 */ 74 public static function findDirectories(string|array $masks = ['*']): static 75 { 76 $masks = is_array($masks) ? $masks : func_get_args(); // compatibility with variadic 77 return (new static)->addMask($masks, 'dir'); 78 } 79 80 81 /** 82 * Finds files matching the specified masks. 83 */ 84 public function files(string|array $masks = ['*']): static 85 { 86 return $this->addMask((array) $masks, 'file'); 87 } 88 89 90 /** 91 * Finds directories matching the specified masks. 92 */ 93 public function directories(string|array $masks = ['*']): static 94 { 95 return $this->addMask((array) $masks, 'dir'); 96 } 97 98 99 private function addMask(array $masks, string $mode): static 100 { 101 foreach ($masks as $mask) { 102 $mask = FileSystem::unixSlashes($mask); 103 if ($mode === 'dir') { 104 $mask = rtrim($mask, '/'); 105 } 106 if ($mask === '' || ($mode === 'file' && str_ends_with($mask, '/'))) { 107 throw new Nette\InvalidArgumentException("Invalid mask '$mask'"); 108 } 109 if (str_starts_with($mask, '**/')) { 110 $mask = substr($mask, 3); 111 } 112 $this->find[] = [$mask, $mode]; 113 } 114 return $this; 115 } 116 117 118 /** 119 * Searches in the given directories. Wildcards are allowed. 120 */ 121 public function in(string|array $paths): static 122 { 123 $paths = is_array($paths) ? $paths : func_get_args(); // compatibility with variadic 124 $this->addLocation($paths, ''); 125 return $this; 126 } 127 128 129 /** 130 * Searches recursively from the given directories. Wildcards are allowed. 131 */ 132 public function from(string|array $paths): static 133 { 134 $paths = is_array($paths) ? $paths : func_get_args(); // compatibility with variadic 135 $this->addLocation($paths, '/**'); 136 return $this; 137 } 138 139 140 private function addLocation(array $paths, string $ext): void 141 { 142 foreach ($paths as $path) { 143 if ($path === '') { 144 throw new Nette\InvalidArgumentException("Invalid directory '$path'"); 145 } 146 $path = rtrim(FileSystem::unixSlashes($path), '/'); 147 $this->in[] = $path . $ext; 148 } 149 } 150 151 152 /** 153 * Lists directory's contents before the directory itself. By default, this is disabled. 154 */ 155 public function childFirst(bool $state = true): static 156 { 157 $this->childFirst = $state; 158 return $this; 159 } 160 161 162 /** 163 * Ignores unreadable directories. By default, this is enabled. 164 */ 165 public function ignoreUnreadableDirs(bool $state = true): static 166 { 167 $this->ignoreUnreadableDirs = $state; 168 return $this; 169 } 170 171 172 /** 173 * Set a compare function for sorting directory entries. The function will be called to sort entries from the same directory. 174 * @param callable(FileInfo, FileInfo): int $callback 175 */ 176 public function sortBy(callable $callback): static 177 { 178 $this->sort = $callback; 179 return $this; 180 } 181 182 183 /** 184 * Sorts files in each directory naturally by name. 185 */ 186 public function sortByName(): static 187 { 188 $this->sort = fn(FileInfo $a, FileInfo $b): int => strnatcmp($a->getBasename(), $b->getBasename()); 189 return $this; 190 } 191 192 193 /** 194 * Adds the specified paths or appends a new finder that returns. 195 */ 196 public function append(string|array|null $paths = null): static 197 { 198 if ($paths === null) { 199 return $this->appends[] = new static; 200 } 201 202 $this->appends = array_merge($this->appends, (array) $paths); 203 return $this; 204 } 205 206 207 /********************* filtering ****************d*g**/ 208 209 210 /** 211 * Skips entries that matches the given masks relative to the ones defined with the in() or from() methods. 212 */ 213 public function exclude(string|array $masks): static 214 { 215 $masks = is_array($masks) ? $masks : func_get_args(); // compatibility with variadic 216 foreach ($masks as $mask) { 217 $mask = FileSystem::unixSlashes($mask); 218 if (!preg_match('~^/?(\*\*/)?(.+)(/\*\*|/\*|/|)$~D', $mask, $m)) { 219 throw new Nette\InvalidArgumentException("Invalid mask '$mask'"); 220 } 221 $end = $m[3]; 222 $re = $this->buildPattern($m[2]); 223 $filter = fn(FileInfo $file): bool => ($end && !$file->isDir()) 224 || !preg_match($re, FileSystem::unixSlashes($file->getRelativePathname())); 225 226 $this->descentFilter($filter); 227 if ($end !== '/*') { 228 $this->filter($filter); 229 } 230 } 231 232 return $this; 233 } 234 235 236 /** 237 * Yields only entries which satisfy the given filter. 238 * @param callable(FileInfo): bool $callback 239 */ 240 public function filter(callable $callback): static 241 { 242 $this->filters[] = \Closure::fromCallable($callback); 243 return $this; 244 } 245 246 247 /** 248 * It descends only to directories that match the specified filter. 249 * @param callable(FileInfo): bool $callback 250 */ 251 public function descentFilter(callable $callback): static 252 { 253 $this->descentFilters[] = \Closure::fromCallable($callback); 254 return $this; 255 } 256 257 258 /** 259 * Sets the maximum depth of entries. 260 */ 261 public function limitDepth(?int $depth): static 262 { 263 $this->maxDepth = $depth ?? -1; 264 return $this; 265 } 266 267 268 /** 269 * Restricts the search by size. $operator accepts "[operator] [size] [unit]" example: >=10kB 270 */ 271 public function size(string $operator, ?int $size = null): static 272 { 273 if (func_num_args() === 1) { // in $operator is predicate 274 if (!preg_match('#^(?:([=<>!]=?|<>)\s*)?((?:\d*\.)?\d+)\s*(K|M|G|)B?$#Di', $operator, $matches)) { 275 throw new Nette\InvalidArgumentException('Invalid size predicate format.'); 276 } 277 278 [, $operator, $size, $unit] = $matches; 279 $units = ['' => 1, 'k' => 1e3, 'm' => 1e6, 'g' => 1e9]; 280 $size *= $units[strtolower($unit)]; 281 $operator = $operator ?: '='; 282 } 283 284 return $this->filter(fn(FileInfo $file): bool => !$file->isFile() || Helpers::compare($file->getSize(), $operator, $size)); 285 } 286 287 288 /** 289 * Restricts the search by modified time. $operator accepts "[operator] [date]" example: >1978-01-23 290 */ 291 public function date(string $operator, string|int|\DateTimeInterface|null $date = null): static 292 { 293 if (func_num_args() === 1) { // in $operator is predicate 294 if (!preg_match('#^(?:([=<>!]=?|<>)\s*)?(.+)$#Di', $operator, $matches)) { 295 throw new Nette\InvalidArgumentException('Invalid date predicate format.'); 296 } 297 298 [, $operator, $date] = $matches; 299 $operator = $operator ?: '='; 300 } 301 302 $date = DateTime::from($date)->format('U'); 303 return $this->filter(fn(FileInfo $file): bool => !$file->isFile() || Helpers::compare($file->getMTime(), $operator, $date)); 304 } 305 306 307 /********************* iterator generator ****************d*g**/ 308 309 310 /** 311 * Returns an array with all found files and directories. 312 * @return list<FileInfo> 313 */ 314 public function collect(): array 315 { 316 return iterator_to_array($this->getIterator(), preserve_keys: false); 317 } 318 319 320 /** @return \Generator<string, FileInfo> */ 321 public function getIterator(): \Generator 322 { 323 $plan = $this->buildPlan(); 324 foreach ($plan as $dir => $searches) { 325 yield from $this->traverseDir($dir, $searches); 326 } 327 328 foreach ($this->appends as $item) { 329 if ($item instanceof self) { 330 yield from $item->getIterator(); 331 } else { 332 $item = FileSystem::platformSlashes($item); 333 yield $item => new FileInfo($item); 334 } 335 } 336 } 337 338 339 /** 340 * @param array<\stdClass{pattern: string, mode: string, recursive: bool}> $searches 341 * @param string[] $subdirs 342 * @return \Generator<string, FileInfo> 343 */ 344 private function traverseDir(string $dir, array $searches, array $subdirs = []): \Generator 345 { 346 if ($this->maxDepth >= 0 && count($subdirs) > $this->maxDepth) { 347 return; 348 } elseif (!is_dir($dir)) { 349 throw new Nette\InvalidStateException(sprintf("Directory '%s' does not exist.", rtrim($dir, '/\\'))); 350 } 351 352 try { 353 $pathNames = new \FilesystemIterator($dir, \FilesystemIterator::FOLLOW_SYMLINKS | \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME | \FilesystemIterator::UNIX_PATHS); 354 } catch (\UnexpectedValueException $e) { 355 if ($this->ignoreUnreadableDirs) { 356 return; 357 } else { 358 throw new Nette\InvalidStateException($e->getMessage()); 359 } 360 } 361 362 $files = $this->convertToFiles($pathNames, implode('/', $subdirs), FileSystem::isAbsolute($dir)); 363 364 if ($this->sort) { 365 $files = iterator_to_array($files); 366 usort($files, $this->sort); 367 } 368 369 foreach ($files as $file) { 370 $pathName = $file->getPathname(); 371 $cache = $subSearch = []; 372 373 if ($file->isDir()) { 374 foreach ($searches as $search) { 375 if ($search->recursive && $this->proveFilters($this->descentFilters, $file, $cache)) { 376 $subSearch[] = $search; 377 } 378 } 379 } 380 381 if ($this->childFirst && $subSearch) { 382 yield from $this->traverseDir($pathName, $subSearch, array_merge($subdirs, [$file->getBasename()])); 383 } 384 385 $relativePathname = FileSystem::unixSlashes($file->getRelativePathname()); 386 foreach ($searches as $search) { 387 if ( 388 $file->getType() === $search->mode 389 && preg_match($search->pattern, $relativePathname) 390 && $this->proveFilters($this->filters, $file, $cache) 391 ) { 392 yield $pathName => $file; 393 break; 394 } 395 } 396 397 if (!$this->childFirst && $subSearch) { 398 yield from $this->traverseDir($pathName, $subSearch, array_merge($subdirs, [$file->getBasename()])); 399 } 400 } 401 } 402 403 404 private function convertToFiles(iterable $pathNames, string $relativePath, bool $absolute): \Generator 405 { 406 foreach ($pathNames as $pathName) { 407 if (!$absolute) { 408 $pathName = preg_replace('~\.?/~A', '', $pathName); 409 } 410 $pathName = FileSystem::platformSlashes($pathName); 411 yield new FileInfo($pathName, $relativePath); 412 } 413 } 414 415 416 private function proveFilters(array $filters, FileInfo $file, array &$cache): bool 417 { 418 foreach ($filters as $filter) { 419 $res = &$cache[spl_object_id($filter)]; 420 $res ??= $filter($file); 421 if (!$res) { 422 return false; 423 } 424 } 425 426 return true; 427 } 428 429 430 /** @return array<string, array<\stdClass{pattern: string, mode: string, recursive: bool}>> */ 431 private function buildPlan(): array 432 { 433 $plan = $dirCache = []; 434 foreach ($this->find as [$mask, $mode]) { 435 $splits = []; 436 if (FileSystem::isAbsolute($mask)) { 437 if ($this->in) { 438 throw new Nette\InvalidStateException("You cannot combine the absolute path in the mask '$mask' and the directory to search '{$this->in[0]}'."); 439 } 440 $splits[] = self::splitRecursivePart($mask); 441 } else { 442 foreach ($this->in ?: ['.'] as $in) { 443 $in = strtr($in, ['[' => '[[]', ']' => '[]]']); // in path, do not treat [ and ] as a pattern by glob() 444 $splits[] = self::splitRecursivePart($in . '/' . $mask); 445 } 446 } 447 448 foreach ($splits as [$base, $rest, $recursive]) { 449 $base = $base === '' ? '.' : $base; 450 $dirs = $dirCache[$base] ??= strpbrk($base, '*?[') 451 ? glob($base, GLOB_NOSORT | GLOB_ONLYDIR | GLOB_NOESCAPE) 452 : [strtr($base, ['[[]' => '[', '[]]' => ']'])]; // unescape [ and ] 453 454 if (!$dirs) { 455 throw new Nette\InvalidStateException(sprintf("Directory '%s' does not exist.", rtrim($base, '/\\'))); 456 } 457 458 $search = (object) ['pattern' => $this->buildPattern($rest), 'mode' => $mode, 'recursive' => $recursive]; 459 foreach ($dirs as $dir) { 460 $plan[$dir][] = $search; 461 } 462 } 463 } 464 465 return $plan; 466 } 467 468 469 /** 470 * Since glob() does not know ** wildcard, we divide the path into a part for glob and a part for manual traversal. 471 */ 472 private static function splitRecursivePart(string $path): array 473 { 474 $a = strrpos($path, '/'); 475 $parts = preg_split('~(?<=^|/)\*\*($|/)~', substr($path, 0, $a + 1), 2); 476 return isset($parts[1]) 477 ? [$parts[0], $parts[1] . substr($path, $a + 1), true] 478 : [$parts[0], substr($path, $a + 1), false]; 479 } 480 481 482 /** 483 * Converts wildcards to regular expression. 484 */ 485 private function buildPattern(string $mask): string 486 { 487 if ($mask === '*') { 488 return '##'; 489 } elseif (str_starts_with($mask, './')) { 490 $anchor = '^'; 491 $mask = substr($mask, 2); 492 } else { 493 $anchor = '(?:^|/)'; 494 } 495 496 $pattern = strtr( 497 preg_quote($mask, '#'), 498 [ 499 '\*\*/' => '(.+/)?', 500 '\*' => '[^/]*', 501 '\?' => '[^/]', 502 '\[\!' => '[^', 503 '\[' => '[', 504 '\]' => ']', 505 '\-' => '-', 506 ], 507 ); 508 return '#' . $anchor . $pattern . '$#D' . (defined('PHP_WINDOWS_VERSION_BUILD') ? 'i' : ''); 509 } 510} 511