1<?php 2 3namespace dokuwiki\plugin\cachestats; 4 5use InvalidArgumentException; 6use RecursiveDirectoryIterator; 7use RecursiveIteratorIterator; 8use SplFileInfo; 9 10/** 11 * Class FileStatistics 12 * 13 * Recursively scans a directory and collects: 14 * - number of files per file extension 15 * - duplicate files (based on MD5 checksum) per file extension 16 * - size of files summed up per extension 17 * - number of files per extension grouped by last modified date 18 * - total number of files 19 * - total size of all files 20 */ 21class FileStatistics 22{ 23 private string $path; 24 25 /** @var string[] */ 26 private array $buckets = ['<1d', '<1w', '<1m', '<3m', '<6m', '<1y', '>1y']; 27 28 private array $stats = [ 29 'extensions' => [], 30 'duplicates' => [], 31 'sizes' => [], 32 'modified_groups' => [], 33 'total_files' => 0, 34 'total_size' => 0, 35 ]; 36 37 private array $hashMap = []; // md5 => [ext, count] 38 39 public function __construct(string $path) 40 { 41 if (!is_dir($path)) { 42 throw new InvalidArgumentException("Path '$path' is not a valid directory."); 43 } 44 45 $this->path = rtrim($path, DIRECTORY_SEPARATOR); 46 } 47 48 public function collect(): array 49 { 50 $iterator = new RecursiveIteratorIterator( 51 new RecursiveDirectoryIterator($this->path, RecursiveDirectoryIterator::SKIP_DOTS) 52 ); 53 54 $now = time(); 55 56 foreach ($iterator as $fileInfo) { 57 /** @var SplFileInfo $fileInfo */ 58 if (!$fileInfo->isFile()) { 59 continue; 60 } 61 62 $this->stats['total_files']++; 63 $ext = strtolower($fileInfo->getExtension()) ?: 'no_extension'; 64 $path = $fileInfo->getPathname(); 65 $size = $fileInfo->getSize(); 66 $mtime = $fileInfo->getMTime(); 67 68 // size aggregated per extension 69 $this->stats['sizes'][$ext] = ($this->stats['sizes'][$ext] ?? 0) + $size; 70 $this->stats['total_size'] += $size; 71 72 // count per extension 73 $this->stats['extensions'][$ext] = ($this->stats['extensions'][$ext] ?? 0) + 1; 74 75 // group by modified time 76 $group = $this->getModifiedGroup($now - $mtime); 77 $this->stats['modified_groups'][$ext][$group] = 78 ($this->stats['modified_groups'][$ext][$group] ?? 0) + 1; 79 80 // handle duplicates by checksum 81 $md5 = md5_file($path); 82 if (isset($this->hashMap[$md5])) { 83 $this->hashMap[$md5]['count']++; 84 } else { 85 $this->hashMap[$md5] = ['ext' => $ext, 'count' => 1]; 86 } 87 } 88 89 // summarize duplicates 90 foreach ($this->hashMap as $hash => $info) { 91 if ($info['count'] > 1) { 92 $this->stats['duplicates'][$info['ext']] = 93 ($this->stats['duplicates'][$info['ext']] ?? 0) + ($info['count'] - 1); 94 } 95 } 96 97 return $this->buildResult(); 98 } 99 100 private function getModifiedGroup(int $ageSeconds): string 101 { 102 $day = 86400; 103 return match (true) { 104 $ageSeconds < $day => '<1d', 105 $ageSeconds < 7 * $day => '<1w', 106 $ageSeconds < 30 * $day => '<1m', 107 $ageSeconds < 90 * $day => '<3m', 108 $ageSeconds < 180 * $day => '<6m', 109 $ageSeconds < 365 * $day => '<1y', 110 default => '>1y', 111 }; 112 } 113 114 /** 115 * Combine collected sub statistics into a single result array keyed by extension 116 */ 117 private function buildResult(): array 118 { 119 $keys = array_unique( 120 array_merge( 121 array_keys($this->stats['extensions']), 122 array_keys($this->stats['sizes']), 123 array_keys($this->stats['duplicates']), 124 array_keys($this->stats['modified_groups']) 125 ) 126 ); 127 128 $result = []; 129 foreach ($keys as $key) { 130 $result[$key] = [ 131 'count' => $this->stats['extensions'][$key] ?? 0, 132 'size' => $this->stats['sizes'][$key] ?? 0, 133 'dups' => $this->stats['duplicates'][$key] ?? 0, 134 ]; 135 foreach ($this->buckets as $bucket) { 136 $result[$key][$bucket] = $this->stats['modified_groups'][$key][$bucket] ?? 0; 137 } 138 } 139 140 return $result; 141 } 142} 143