1<?php
2
3/**
4 * This file is part of the Nette Framework (https://nette.org)
5 * Copyright (c) 2004 David Grudl (https://davidgrudl.com)
6 */
7
8declare(strict_types=1);
9
10namespace Nette\Utils;
11
12use Nette;
13
14
15/**
16 * Finder allows searching through directory trees using iterator.
17 *
18 * Finder::findFiles('*.php')
19 *     ->size('> 10kB')
20 *     ->from('.')
21 *     ->exclude('temp');
22 *
23 * @implements \IteratorAggregate<string, FileInfo>
24 */
25class Finder implements \IteratorAggregate
26{
27	use Nette\SmartObject;
28
29	/** @var array<array{string, string}> */
30	private array $find = [];
31
32	/** @var string[] */
33	private array $in = [];
34
35	/** @var \Closure[] */
36	private array $filters = [];
37
38	/** @var \Closure[] */
39	private array $descentFilters = [];
40
41	/** @var array<string|self> */
42	private array $appends = [];
43	private bool $childFirst = false;
44
45	/** @var ?callable */
46	private $sort;
47	private int $maxDepth = -1;
48	private bool $ignoreUnreadableDirs = true;
49
50
51	/**
52	 * Begins search for files and directories matching mask.
53	 */
54	public static function find(string|array $masks = ['*']): static
55	{
56		$masks = is_array($masks) ? $masks : func_get_args(); // compatibility with variadic
57		return (new static)->addMask($masks, 'dir')->addMask($masks, 'file');
58	}
59
60
61	/**
62	 * Begins search for files matching mask.
63	 */
64	public static function findFiles(string|array $masks = ['*']): static
65	{
66		$masks = is_array($masks) ? $masks : func_get_args(); // compatibility with variadic
67		return (new static)->addMask($masks, 'file');
68	}
69
70
71	/**
72	 * Begins search for directories matching mask.
73	 */
74	public static function findDirectories(string|array $masks = ['*']): static
75	{
76		$masks = is_array($masks) ? $masks : func_get_args(); // compatibility with variadic
77		return (new static)->addMask($masks, 'dir');
78	}
79
80
81	/**
82	 * Finds files matching the specified masks.
83	 */
84	public function files(string|array $masks = ['*']): static
85	{
86		return $this->addMask((array) $masks, 'file');
87	}
88
89
90	/**
91	 * Finds directories matching the specified masks.
92	 */
93	public function directories(string|array $masks = ['*']): static
94	{
95		return $this->addMask((array) $masks, 'dir');
96	}
97
98
99	private function addMask(array $masks, string $mode): static
100	{
101		foreach ($masks as $mask) {
102			$mask = FileSystem::unixSlashes($mask);
103			if ($mode === 'dir') {
104				$mask = rtrim($mask, '/');
105			}
106			if ($mask === '' || ($mode === 'file' && str_ends_with($mask, '/'))) {
107				throw new Nette\InvalidArgumentException("Invalid mask '$mask'");
108			}
109			if (str_starts_with($mask, '**/')) {
110				$mask = substr($mask, 3);
111			}
112			$this->find[] = [$mask, $mode];
113		}
114		return $this;
115	}
116
117
118	/**
119	 * Searches in the given directories. Wildcards are allowed.
120	 */
121	public function in(string|array $paths): static
122	{
123		$paths = is_array($paths) ? $paths : func_get_args(); // compatibility with variadic
124		$this->addLocation($paths, '');
125		return $this;
126	}
127
128
129	/**
130	 * Searches recursively from the given directories. Wildcards are allowed.
131	 */
132	public function from(string|array $paths): static
133	{
134		$paths = is_array($paths) ? $paths : func_get_args(); // compatibility with variadic
135		$this->addLocation($paths, '/**');
136		return $this;
137	}
138
139
140	private function addLocation(array $paths, string $ext): void
141	{
142		foreach ($paths as $path) {
143			if ($path === '') {
144				throw new Nette\InvalidArgumentException("Invalid directory '$path'");
145			}
146			$path = rtrim(FileSystem::unixSlashes($path), '/');
147			$this->in[] = $path . $ext;
148		}
149	}
150
151
152	/**
153	 * Lists directory's contents before the directory itself. By default, this is disabled.
154	 */
155	public function childFirst(bool $state = true): static
156	{
157		$this->childFirst = $state;
158		return $this;
159	}
160
161
162	/**
163	 * Ignores unreadable directories. By default, this is enabled.
164	 */
165	public function ignoreUnreadableDirs(bool $state = true): static
166	{
167		$this->ignoreUnreadableDirs = $state;
168		return $this;
169	}
170
171
172	/**
173	 * Set a compare function for sorting directory entries. The function will be called to sort entries from the same directory.
174	 * @param  callable(FileInfo, FileInfo): int  $callback
175	 */
176	public function sortBy(callable $callback): static
177	{
178		$this->sort = $callback;
179		return $this;
180	}
181
182
183	/**
184	 * Sorts files in each directory naturally by name.
185	 */
186	public function sortByName(): static
187	{
188		$this->sort = fn(FileInfo $a, FileInfo $b): int => strnatcmp($a->getBasename(), $b->getBasename());
189		return $this;
190	}
191
192
193	/**
194	 * Adds the specified paths or appends a new finder that returns.
195	 */
196	public function append(string|array|null $paths = null): static
197	{
198		if ($paths === null) {
199			return $this->appends[] = new static;
200		}
201
202		$this->appends = array_merge($this->appends, (array) $paths);
203		return $this;
204	}
205
206
207	/********************* filtering ****************d*g**/
208
209
210	/**
211	 * Skips entries that matches the given masks relative to the ones defined with the in() or from() methods.
212	 */
213	public function exclude(string|array $masks): static
214	{
215		$masks = is_array($masks) ? $masks : func_get_args(); // compatibility with variadic
216		foreach ($masks as $mask) {
217			$mask = FileSystem::unixSlashes($mask);
218			if (!preg_match('~^/?(\*\*/)?(.+)(/\*\*|/\*|/|)$~D', $mask, $m)) {
219				throw new Nette\InvalidArgumentException("Invalid mask '$mask'");
220			}
221			$end = $m[3];
222			$re = $this->buildPattern($m[2]);
223			$filter = fn(FileInfo $file): bool => ($end && !$file->isDir())
224				|| !preg_match($re, FileSystem::unixSlashes($file->getRelativePathname()));
225
226			$this->descentFilter($filter);
227			if ($end !== '/*') {
228				$this->filter($filter);
229			}
230		}
231
232		return $this;
233	}
234
235
236	/**
237	 * Yields only entries which satisfy the given filter.
238	 * @param  callable(FileInfo): bool  $callback
239	 */
240	public function filter(callable $callback): static
241	{
242		$this->filters[] = \Closure::fromCallable($callback);
243		return $this;
244	}
245
246
247	/**
248	 * It descends only to directories that match the specified filter.
249	 * @param  callable(FileInfo): bool  $callback
250	 */
251	public function descentFilter(callable $callback): static
252	{
253		$this->descentFilters[] = \Closure::fromCallable($callback);
254		return $this;
255	}
256
257
258	/**
259	 * Sets the maximum depth of entries.
260	 */
261	public function limitDepth(?int $depth): static
262	{
263		$this->maxDepth = $depth ?? -1;
264		return $this;
265	}
266
267
268	/**
269	 * Restricts the search by size. $operator accepts "[operator] [size] [unit]" example: >=10kB
270	 */
271	public function size(string $operator, ?int $size = null): static
272	{
273		if (func_num_args() === 1) { // in $operator is predicate
274			if (!preg_match('#^(?:([=<>!]=?|<>)\s*)?((?:\d*\.)?\d+)\s*(K|M|G|)B?$#Di', $operator, $matches)) {
275				throw new Nette\InvalidArgumentException('Invalid size predicate format.');
276			}
277
278			[, $operator, $size, $unit] = $matches;
279			$units = ['' => 1, 'k' => 1e3, 'm' => 1e6, 'g' => 1e9];
280			$size *= $units[strtolower($unit)];
281			$operator = $operator ?: '=';
282		}
283
284		return $this->filter(fn(FileInfo $file): bool => !$file->isFile() || Helpers::compare($file->getSize(), $operator, $size));
285	}
286
287
288	/**
289	 * Restricts the search by modified time. $operator accepts "[operator] [date]" example: >1978-01-23
290	 */
291	public function date(string $operator, string|int|\DateTimeInterface|null $date = null): static
292	{
293		if (func_num_args() === 1) { // in $operator is predicate
294			if (!preg_match('#^(?:([=<>!]=?|<>)\s*)?(.+)$#Di', $operator, $matches)) {
295				throw new Nette\InvalidArgumentException('Invalid date predicate format.');
296			}
297
298			[, $operator, $date] = $matches;
299			$operator = $operator ?: '=';
300		}
301
302		$date = DateTime::from($date)->format('U');
303		return $this->filter(fn(FileInfo $file): bool => !$file->isFile() || Helpers::compare($file->getMTime(), $operator, $date));
304	}
305
306
307	/********************* iterator generator ****************d*g**/
308
309
310	/**
311	 * Returns an array with all found files and directories.
312	 * @return list<FileInfo>
313	 */
314	public function collect(): array
315	{
316		return iterator_to_array($this->getIterator(), preserve_keys: false);
317	}
318
319
320	/** @return \Generator<string, FileInfo> */
321	public function getIterator(): \Generator
322	{
323		$plan = $this->buildPlan();
324		foreach ($plan as $dir => $searches) {
325			yield from $this->traverseDir($dir, $searches);
326		}
327
328		foreach ($this->appends as $item) {
329			if ($item instanceof self) {
330				yield from $item->getIterator();
331			} else {
332				$item = FileSystem::platformSlashes($item);
333				yield $item => new FileInfo($item);
334			}
335		}
336	}
337
338
339	/**
340	 * @param  array<\stdClass{pattern: string, mode: string, recursive: bool}>  $searches
341	 * @param  string[]  $subdirs
342	 * @return \Generator<string, FileInfo>
343	 */
344	private function traverseDir(string $dir, array $searches, array $subdirs = []): \Generator
345	{
346		if ($this->maxDepth >= 0 && count($subdirs) > $this->maxDepth) {
347			return;
348		} elseif (!is_dir($dir)) {
349			throw new Nette\InvalidStateException(sprintf("Directory '%s' does not exist.", rtrim($dir, '/\\')));
350		}
351
352		try {
353			$pathNames = new \FilesystemIterator($dir, \FilesystemIterator::FOLLOW_SYMLINKS | \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::CURRENT_AS_PATHNAME | \FilesystemIterator::UNIX_PATHS);
354		} catch (\UnexpectedValueException $e) {
355			if ($this->ignoreUnreadableDirs) {
356				return;
357			} else {
358				throw new Nette\InvalidStateException($e->getMessage());
359			}
360		}
361
362		$files = $this->convertToFiles($pathNames, implode('/', $subdirs), FileSystem::isAbsolute($dir));
363
364		if ($this->sort) {
365			$files = iterator_to_array($files);
366			usort($files, $this->sort);
367		}
368
369		foreach ($files as $file) {
370			$pathName = $file->getPathname();
371			$cache = $subSearch = [];
372
373			if ($file->isDir()) {
374				foreach ($searches as $search) {
375					if ($search->recursive && $this->proveFilters($this->descentFilters, $file, $cache)) {
376						$subSearch[] = $search;
377					}
378				}
379			}
380
381			if ($this->childFirst && $subSearch) {
382				yield from $this->traverseDir($pathName, $subSearch, array_merge($subdirs, [$file->getBasename()]));
383			}
384
385			$relativePathname = FileSystem::unixSlashes($file->getRelativePathname());
386			foreach ($searches as $search) {
387				if (
388					$file->getType() === $search->mode
389					&& preg_match($search->pattern, $relativePathname)
390					&& $this->proveFilters($this->filters, $file, $cache)
391				) {
392					yield $pathName => $file;
393					break;
394				}
395			}
396
397			if (!$this->childFirst && $subSearch) {
398				yield from $this->traverseDir($pathName, $subSearch, array_merge($subdirs, [$file->getBasename()]));
399			}
400		}
401	}
402
403
404	private function convertToFiles(iterable $pathNames, string $relativePath, bool $absolute): \Generator
405	{
406		foreach ($pathNames as $pathName) {
407			if (!$absolute) {
408				$pathName = preg_replace('~\.?/~A', '', $pathName);
409			}
410			$pathName = FileSystem::platformSlashes($pathName);
411			yield new FileInfo($pathName, $relativePath);
412		}
413	}
414
415
416	private function proveFilters(array $filters, FileInfo $file, array &$cache): bool
417	{
418		foreach ($filters as $filter) {
419			$res = &$cache[spl_object_id($filter)];
420			$res ??= $filter($file);
421			if (!$res) {
422				return false;
423			}
424		}
425
426		return true;
427	}
428
429
430	/** @return array<string, array<\stdClass{pattern: string, mode: string, recursive: bool}>> */
431	private function buildPlan(): array
432	{
433		$plan = $dirCache = [];
434		foreach ($this->find as [$mask, $mode]) {
435			$splits = [];
436			if (FileSystem::isAbsolute($mask)) {
437				if ($this->in) {
438					throw new Nette\InvalidStateException("You cannot combine the absolute path in the mask '$mask' and the directory to search '{$this->in[0]}'.");
439				}
440				$splits[] = self::splitRecursivePart($mask);
441			} else {
442				foreach ($this->in ?: ['.'] as $in) {
443					$in = strtr($in, ['[' => '[[]', ']' => '[]]']); // in path, do not treat [ and ] as a pattern by glob()
444					$splits[] = self::splitRecursivePart($in . '/' . $mask);
445				}
446			}
447
448			foreach ($splits as [$base, $rest, $recursive]) {
449				$base = $base === '' ? '.' : $base;
450				$dirs = $dirCache[$base] ??= strpbrk($base, '*?[')
451					? glob($base, GLOB_NOSORT | GLOB_ONLYDIR | GLOB_NOESCAPE)
452					: [strtr($base, ['[[]' => '[', '[]]' => ']'])]; // unescape [ and ]
453
454				if (!$dirs) {
455					throw new Nette\InvalidStateException(sprintf("Directory '%s' does not exist.", rtrim($base, '/\\')));
456				}
457
458				$search = (object) ['pattern' => $this->buildPattern($rest), 'mode' => $mode, 'recursive' => $recursive];
459				foreach ($dirs as $dir) {
460					$plan[$dir][] = $search;
461				}
462			}
463		}
464
465		return $plan;
466	}
467
468
469	/**
470	 * Since glob() does not know ** wildcard, we divide the path into a part for glob and a part for manual traversal.
471	 */
472	private static function splitRecursivePart(string $path): array
473	{
474		$a = strrpos($path, '/');
475		$parts = preg_split('~(?<=^|/)\*\*($|/)~', substr($path, 0, $a + 1), 2);
476		return isset($parts[1])
477			? [$parts[0], $parts[1] . substr($path, $a + 1), true]
478			: [$parts[0], substr($path, $a + 1), false];
479	}
480
481
482	/**
483	 * Converts wildcards to regular expression.
484	 */
485	private function buildPattern(string $mask): string
486	{
487		if ($mask === '*') {
488			return '##';
489		} elseif (str_starts_with($mask, './')) {
490			$anchor = '^';
491			$mask = substr($mask, 2);
492		} else {
493			$anchor = '(?:^|/)';
494		}
495
496		$pattern = strtr(
497			preg_quote($mask, '#'),
498			[
499				'\*\*/' => '(.+/)?',
500				'\*' => '[^/]*',
501				'\?' => '[^/]',
502				'\[\!' => '[^',
503				'\[' => '[',
504				'\]' => ']',
505				'\-' => '-',
506			],
507		);
508		return '#' . $anchor . $pattern . '$#D' . (defined('PHP_WINDOWS_VERSION_BUILD') ? 'i' : '');
509	}
510}
511