1<?php 2 3namespace dokuwiki\plugin\cachestats; 4 5use InvalidArgumentException; 6use RecursiveDirectoryIterator; 7use RecursiveIteratorIterator; 8use SplFileInfo; 9 10/** 11 * Class FileStatistics 12 * 13 * Recursively scans a directory and collects: 14 * - number of files per file extension 15 * - duplicate files (based on MD5 checksum) per file extension 16 * - size of files summed up per extension 17 * - number of files per extension grouped by last modified date 18 */ 19class FileStatistics 20{ 21 private string $path; 22 23 private const BUCKETS = ['<1d', '<1w', '<1m', '<3m', '<6m', '<1y', '>1y']; 24 25 private array $result = []; 26 27 private array $hashMap = []; // md5 => [ext, count] 28 29 public function __construct(string $path) 30 { 31 if (!is_dir($path)) { 32 throw new InvalidArgumentException("Path '$path' is not a valid directory."); 33 } 34 35 $this->path = rtrim($path, DIRECTORY_SEPARATOR); 36 } 37 38 public function collect(): array 39 { 40 $iterator = new RecursiveIteratorIterator( 41 new RecursiveDirectoryIterator($this->path, RecursiveDirectoryIterator::SKIP_DOTS) 42 ); 43 44 $now = time(); 45 46 foreach ($iterator as $fileInfo) { 47 /** @var SplFileInfo $fileInfo */ 48 if (!$fileInfo->isFile()) { 49 continue; 50 } 51 52 $ext = strtolower($fileInfo->getExtension()) ?: 'no_extension'; 53 $path = $fileInfo->getPathname(); 54 $size = $fileInfo->getSize(); 55 $mtime = $fileInfo->getMTime(); 56 57 $this->initExtension($ext); 58 59 $this->result[$ext]['count']++; 60 $this->result[$ext]['size'] += $size; 61 62 // group by modified time 63 $group = $this->getModifiedGroup($now - $mtime); 64 $this->result[$ext][$group]++; 65 66 // handle duplicates by checksum 67 $md5 = md5_file($path); 68 if (isset($this->hashMap[$md5])) { 69 $this->hashMap[$md5]['count']++; 70 } else { 71 $this->hashMap[$md5] = ['ext' => $ext, 'count' => 1]; 72 } 73 } 74 75 // summarize duplicates 76 foreach ($this->hashMap as $hash => $info) { 77 if ($info['count'] > 1) { 78 $ext = $info['ext']; 79 $this->initExtension($ext); 80 $this->result[$ext]['dups'] += $info['count'] - 1; 81 } 82 } 83 84 return $this->result; 85 } 86 87 private function getModifiedGroup(int $ageSeconds): string 88 { 89 $day = 86400; 90 return match (true) { 91 $ageSeconds < $day => '<1d', 92 $ageSeconds < 7 * $day => '<1w', 93 $ageSeconds < 30 * $day => '<1m', 94 $ageSeconds < 90 * $day => '<3m', 95 $ageSeconds < 180 * $day => '<6m', 96 $ageSeconds < 365 * $day => '<1y', 97 default => '>1y', 98 }; 99 } 100 101 private function initExtension(string $ext): void 102 { 103 if (isset($this->result[$ext])) { 104 return; 105 } 106 107 $this->result[$ext] = [ 108 'count' => 0, 109 'size' => 0, 110 'dups' => 0, 111 ]; 112 foreach (self::BUCKETS as $bucket) { 113 $this->result[$ext][$bucket] = 0; 114 } 115 } 116} 117