[], 'duplicates' => [], 'sizes' => [], 'modified_groups' => [], 'total_files' => 0, 'total_size' => 0, ]; private array $hashMap = []; // md5 => [ext, count] public function __construct(string $path) { if (!is_dir($path)) { throw new InvalidArgumentException("Path '$path' is not a valid directory."); } $this->path = rtrim($path, DIRECTORY_SEPARATOR); } public function collect(): array { $iterator = new RecursiveIteratorIterator( new RecursiveDirectoryIterator($this->path, RecursiveDirectoryIterator::SKIP_DOTS) ); $now = time(); foreach ($iterator as $fileInfo) { /** @var SplFileInfo $fileInfo */ if (!$fileInfo->isFile()) { continue; } $this->stats['total_files']++; $ext = strtolower($fileInfo->getExtension()) ?: 'no_extension'; $path = $fileInfo->getPathname(); $size = $fileInfo->getSize(); $mtime = $fileInfo->getMTime(); // size aggregated per extension $this->stats['sizes'][$ext] = ($this->stats['sizes'][$ext] ?? 0) + $size; $this->stats['total_size'] += $size; // count per extension $this->stats['extensions'][$ext] = ($this->stats['extensions'][$ext] ?? 0) + 1; // group by modified time $group = $this->getModifiedGroup($now - $mtime); $this->stats['modified_groups'][$ext][$group] = ($this->stats['modified_groups'][$ext][$group] ?? 0) + 1; // handle duplicates by checksum $md5 = md5_file($path); if (isset($this->hashMap[$md5])) { $this->hashMap[$md5]['count']++; } else { $this->hashMap[$md5] = ['ext' => $ext, 'count' => 1]; } } // summarize duplicates foreach ($this->hashMap as $hash => $info) { if ($info['count'] > 1) { $this->stats['duplicates'][$info['ext']] = ($this->stats['duplicates'][$info['ext']] ?? 0) + ($info['count'] - 1); } } return $this->stats; } private function getModifiedGroup(int $ageSeconds): string { $day = 86400; return match (true) { $ageSeconds < $day => '<1d', $ageSeconds < 7 * $day => '<1w', $ageSeconds < 30 * $day => '<1m', $ageSeconds < 90 * $day => '<3m', $ageSeconds < 180 * $day => '<6m', $ageSeconds < 365 * $day => '<1y', default => '>1y', }; } }