1<?php 2 3namespace dokuwiki\plugin\cachestats; 4 5use InvalidArgumentException; 6use RecursiveDirectoryIterator; 7use RecursiveIteratorIterator; 8use SplFileInfo; 9 10/** 11 * Recursively scans a directory and builds cache statistics keyed by extension. 12 * Data includes counts, total size, duplicate counts, and age buckets. 13 */ 14class FileStatistics 15{ 16 private string $path; 17 18 private const BUCKETS = ['<1d', '<1w', '<1m', '<3m', '<6m', '<1y', '>1y']; 19 20 private array $result = []; 21 22 private array $hashMap = []; // md5 => [ext, count] 23 24 /** 25 * @param string $path Absolute path to the cache directory 26 */ 27 public function __construct(string $path) 28 { 29 if (!is_dir($path)) { 30 throw new InvalidArgumentException("Path '$path' is not a valid directory."); 31 } 32 33 $this->path = rtrim($path, DIRECTORY_SEPARATOR); 34 } 35 36 /** 37 * Walk the directory tree and return statistics keyed by extension. 38 * 39 * @return array<string, array> 40 */ 41 public function collect(): array 42 { 43 $iterator = new RecursiveIteratorIterator( 44 new RecursiveDirectoryIterator($this->path, RecursiveDirectoryIterator::SKIP_DOTS) 45 ); 46 47 $now = time(); 48 49 foreach ($iterator as $fileInfo) { 50 /** @var SplFileInfo $fileInfo */ 51 if (!$fileInfo->isFile()) { 52 continue; 53 } 54 55 $ext = strtolower($fileInfo->getExtension()) ?: 'no_extension'; 56 $path = $fileInfo->getPathname(); 57 $size = $fileInfo->getSize(); 58 $mtime = $fileInfo->getMTime(); 59 60 $this->initExtension($ext); 61 62 $this->result[$ext]['count']++; 63 $this->result[$ext]['size'] += $size; 64 65 // group by modified time 66 $group = $this->getModifiedGroup($now - $mtime); 67 $this->result[$ext][$group]++; 68 69 // handle duplicates by checksum 70 $md5 = md5_file($path); 71 if (isset($this->hashMap[$md5])) { 72 $this->hashMap[$md5]['count']++; 73 } else { 74 $this->hashMap[$md5] = ['ext' => $ext, 'count' => 1]; 75 } 76 } 77 78 // summarize duplicates 79 foreach ($this->hashMap as $hash => $info) { 80 if ($info['count'] > 1) { 81 $ext = $info['ext']; 82 $this->initExtension($ext); 83 $this->result[$ext]['dups'] += $info['count'] - 1; 84 } 85 } 86 87 return $this->result; 88 } 89 90 /** 91 * Map file age to a human-friendly bucket label. 92 * 93 * @param int $ageSeconds Age in seconds since last modification 94 */ 95 private function getModifiedGroup(int $ageSeconds): string 96 { 97 $day = 86400; 98 return match (true) { 99 $ageSeconds < $day => '<1d', 100 $ageSeconds < 7 * $day => '<1w', 101 $ageSeconds < 30 * $day => '<1m', 102 $ageSeconds < 90 * $day => '<3m', 103 $ageSeconds < 180 * $day => '<6m', 104 $ageSeconds < 365 * $day => '<1y', 105 default => '>1y', 106 }; 107 } 108 109 /** 110 * Ensure an extension has all expected keys initialized. 111 * 112 * @param string $ext Lowercased file extension (or 'no_extension') 113 */ 114 private function initExtension(string $ext): void 115 { 116 if (isset($this->result[$ext])) { 117 return; 118 } 119 120 $this->result[$ext] = [ 121 'count' => 0, 122 'size' => 0, 123 'dups' => 0, 124 ]; 125 foreach (self::BUCKETS as $bucket) { 126 $this->result[$ext][$bucket] = 0; 127 } 128 } 129} 130