1<?php 2 3namespace dokuwiki\plugin\cachestats; 4 5use InvalidArgumentException; 6use RecursiveDirectoryIterator; 7use RecursiveIteratorIterator; 8use SplFileInfo; 9 10/** 11 * Class FileStatistics 12 * 13 * Recursively scans a directory and collects: 14 * - number of files per file extension 15 * - duplicate files (based on MD5 checksum) per file extension 16 * - size of files summed up per extension 17 * - number of files per extension grouped by last modified date 18 * - total number of files 19 * - total size of all files 20 */ 21class FileStatistics 22{ 23 private string $path; 24 25 private array $stats = [ 26 'extensions' => [], 27 'duplicates' => [], 28 'sizes' => [], 29 'modified_groups' => [], 30 'total_files' => 0, 31 'total_size' => 0, 32 ]; 33 34 private array $hashMap = []; // md5 => [ext, count] 35 36 public function __construct(string $path) 37 { 38 if (!is_dir($path)) { 39 throw new InvalidArgumentException("Path '$path' is not a valid directory."); 40 } 41 42 $this->path = rtrim($path, DIRECTORY_SEPARATOR); 43 } 44 45 public function collect(): array 46 { 47 $iterator = new RecursiveIteratorIterator( 48 new RecursiveDirectoryIterator($this->path, RecursiveDirectoryIterator::SKIP_DOTS) 49 ); 50 51 $now = time(); 52 53 foreach ($iterator as $fileInfo) { 54 /** @var SplFileInfo $fileInfo */ 55 if (!$fileInfo->isFile()) { 56 continue; 57 } 58 59 $this->stats['total_files']++; 60 $ext = strtolower($fileInfo->getExtension()) ?: 'no_extension'; 61 $path = $fileInfo->getPathname(); 62 $size = $fileInfo->getSize(); 63 $mtime = $fileInfo->getMTime(); 64 65 // size aggregated per extension 66 $this->stats['sizes'][$ext] = ($this->stats['sizes'][$ext] ?? 0) + $size; 67 $this->stats['total_size'] += $size; 68 69 // count per extension 70 $this->stats['extensions'][$ext] = ($this->stats['extensions'][$ext] ?? 0) + 1; 71 72 // group by modified time 73 $group = $this->getModifiedGroup($now - $mtime); 74 $this->stats['modified_groups'][$ext][$group] = 75 ($this->stats['modified_groups'][$ext][$group] ?? 0) + 1; 76 77 // handle duplicates by checksum 78 $md5 = md5_file($path); 79 if (isset($this->hashMap[$md5])) { 80 $this->hashMap[$md5]['count']++; 81 } else { 82 $this->hashMap[$md5] = ['ext' => $ext, 'count' => 1]; 83 } 84 } 85 86 // summarize duplicates 87 foreach ($this->hashMap as $hash => $info) { 88 if ($info['count'] > 1) { 89 $this->stats['duplicates'][$info['ext']] = 90 ($this->stats['duplicates'][$info['ext']] ?? 0) + ($info['count'] - 1); 91 } 92 } 93 94 return $this->stats; 95 } 96 97 private function getModifiedGroup(int $ageSeconds): string 98 { 99 $day = 86400; 100 return match (true) { 101 $ageSeconds < $day => '<1d', 102 $ageSeconds < 7 * $day => '<1w', 103 $ageSeconds < 30 * $day => '<1m', 104 $ageSeconds < 90 * $day => '<3m', 105 $ageSeconds < 180 * $day => '<6m', 106 $ageSeconds < 365 * $day => '<1y', 107 default => '>1y', 108 }; 109 } 110} 111 112