1<?php 2 3namespace dokuwiki\plugin\cachestats; 4 5use InvalidArgumentException; 6use RecursiveDirectoryIterator; 7use RecursiveIteratorIterator; 8use SplFileInfo; 9 10/** 11 * Recursively scans a directory and builds cache statistics keyed by extension. 12 * Data includes counts, total size, duplicate counts, and age buckets. 13 */ 14class FileStatistics 15{ 16 private string $path; 17 18 private const BUCKETS = ['<1d', '<1w', '<1m', '<3m', '<6m', '<1y', '>1y']; 19 20 private array $result = []; 21 22 private array $hashMap = []; // md5 => [ext, count] 23 24 /** 25 * @param string $path Absolute path to the cache directory 26 */ 27 public function __construct(string $path) 28 { 29 if (!is_dir($path)) { 30 throw new InvalidArgumentException("Path '$path' is not a valid directory."); 31 } 32 33 $this->path = rtrim($path, DIRECTORY_SEPARATOR); 34 } 35 36 /** 37 * Walk the directory tree and return statistics keyed by extension. 38 * 39 * @param callable<int,SplFileInfo>|null $cb Optional callback to report progress 40 * @return array<string, array> 41 */ 42 public function collect(?callable $cb = null): array 43 { 44 $iterator = new RecursiveIteratorIterator( 45 new RecursiveDirectoryIterator($this->path, RecursiveDirectoryIterator::SKIP_DOTS) 46 ); 47 48 $now = time(); 49 $counter = 0; 50 foreach ($iterator as $fileInfo) { 51 /** @var SplFileInfo $fileInfo */ 52 if (!$fileInfo->isFile()) { 53 continue; 54 } 55 56 if($cb) $cb(++$counter, $fileInfo); 57 58 $ext = strtolower($fileInfo->getExtension()) ?: '-'; 59 $path = $fileInfo->getPathname(); 60 $size = $fileInfo->getSize(); 61 $mtime = $fileInfo->getMTime(); 62 63 $this->initExtension($ext); 64 65 $this->result[$ext]['count']++; 66 $this->result[$ext]['size'] += $size; 67 68 // group by modified time 69 $group = $this->getModifiedGroup($now - $mtime); 70 $this->result[$ext][$group]++; 71 72 // handle duplicates by checksum 73 $md5 = md5_file($path); 74 if (isset($this->hashMap[$md5])) { 75 $this->hashMap[$md5]['count']++; 76 } else { 77 $this->hashMap[$md5] = ['ext' => $ext, 'count' => 1]; 78 } 79 } 80 81 // summarize duplicates 82 foreach ($this->hashMap as $hash => $info) { 83 if ($info['count'] > 1) { 84 $ext = $info['ext']; 85 $this->initExtension($ext); 86 $this->result[$ext]['dups'] += $info['count'] - 1; 87 } 88 } 89 90 return $this->result; 91 } 92 93 /** 94 * Map file age to a human-friendly bucket label. 95 * 96 * @param int $ageSeconds Age in seconds since last modification 97 */ 98 private function getModifiedGroup(int $ageSeconds): string 99 { 100 $day = 86400; 101 return match (true) { 102 $ageSeconds < $day => '<1d', 103 $ageSeconds < 7 * $day => '<1w', 104 $ageSeconds < 30 * $day => '<1m', 105 $ageSeconds < 90 * $day => '<3m', 106 $ageSeconds < 180 * $day => '<6m', 107 $ageSeconds < 365 * $day => '<1y', 108 default => '>1y', 109 }; 110 } 111 112 /** 113 * Ensure an extension has all expected keys initialized. 114 * 115 * @param string $ext Lowercased file extension (or 'no_extension') 116 */ 117 private function initExtension(string $ext): void 118 { 119 if (isset($this->result[$ext])) { 120 return; 121 } 122 123 $this->result[$ext] = [ 124 'count' => 0, 125 'size' => 0, 126 'dups' => 0, 127 ]; 128 foreach (self::BUCKETS as $bucket) { 129 $this->result[$ext][$bucket] = 0; 130 } 131 } 132} 133