1a3092f6cSAndreas Gohr<?php 2a3092f6cSAndreas Gohr 3a3092f6cSAndreas Gohrnamespace dokuwiki\plugin\cachestats; 4a3092f6cSAndreas Gohr 5a3092f6cSAndreas Gohruse InvalidArgumentException; 6a3092f6cSAndreas Gohruse RecursiveDirectoryIterator; 7a3092f6cSAndreas Gohruse RecursiveIteratorIterator; 8a3092f6cSAndreas Gohruse SplFileInfo; 9a3092f6cSAndreas Gohr 10a3092f6cSAndreas Gohr/** 11*c180bf5fSAndreas Gohr * Recursively scans a directory and builds cache statistics keyed by extension. 12*c180bf5fSAndreas Gohr * Data includes counts, total size, duplicate counts, and age buckets. 13a3092f6cSAndreas Gohr */ 14a3092f6cSAndreas Gohrclass FileStatistics 15a3092f6cSAndreas Gohr{ 16a3092f6cSAndreas Gohr private string $path; 17a3092f6cSAndreas Gohr 18537711ebSAndreas Gohr private const BUCKETS = ['<1d', '<1w', '<1m', '<3m', '<6m', '<1y', '>1y']; 19c25debc6SAndreas Gohr 20537711ebSAndreas Gohr private array $result = []; 21a3092f6cSAndreas Gohr 22a3092f6cSAndreas Gohr private array $hashMap = []; // md5 => [ext, count] 23a3092f6cSAndreas Gohr 24*c180bf5fSAndreas Gohr /** 25*c180bf5fSAndreas Gohr * @param string $path Absolute path to the cache directory 26*c180bf5fSAndreas Gohr */ 27a3092f6cSAndreas Gohr public function __construct(string $path) 28a3092f6cSAndreas Gohr { 29a3092f6cSAndreas Gohr if (!is_dir($path)) { 30a3092f6cSAndreas Gohr throw new InvalidArgumentException("Path '$path' is not a valid directory."); 31a3092f6cSAndreas Gohr } 32a3092f6cSAndreas Gohr 33a3092f6cSAndreas Gohr $this->path = rtrim($path, DIRECTORY_SEPARATOR); 34a3092f6cSAndreas Gohr } 35a3092f6cSAndreas Gohr 36*c180bf5fSAndreas Gohr /** 37*c180bf5fSAndreas Gohr * Walk the directory tree and return statistics keyed by extension. 38*c180bf5fSAndreas Gohr * 39*c180bf5fSAndreas Gohr * @return array<string, array> 40*c180bf5fSAndreas Gohr */ 41a3092f6cSAndreas Gohr public function collect(): array 42a3092f6cSAndreas Gohr { 43a3092f6cSAndreas Gohr $iterator = new RecursiveIteratorIterator( 44a3092f6cSAndreas Gohr new RecursiveDirectoryIterator($this->path, RecursiveDirectoryIterator::SKIP_DOTS) 45a3092f6cSAndreas Gohr ); 46a3092f6cSAndreas Gohr 47a3092f6cSAndreas Gohr $now = time(); 48a3092f6cSAndreas Gohr 49a3092f6cSAndreas Gohr foreach ($iterator as $fileInfo) { 50a3092f6cSAndreas Gohr /** @var SplFileInfo $fileInfo */ 51a3092f6cSAndreas Gohr if (!$fileInfo->isFile()) { 52a3092f6cSAndreas Gohr continue; 53a3092f6cSAndreas Gohr } 54a3092f6cSAndreas Gohr 55a3092f6cSAndreas Gohr $ext = strtolower($fileInfo->getExtension()) ?: 'no_extension'; 56a3092f6cSAndreas Gohr $path = $fileInfo->getPathname(); 57a3092f6cSAndreas Gohr $size = $fileInfo->getSize(); 58a3092f6cSAndreas Gohr $mtime = $fileInfo->getMTime(); 59a3092f6cSAndreas Gohr 60537711ebSAndreas Gohr $this->initExtension($ext); 61a3092f6cSAndreas Gohr 62537711ebSAndreas Gohr $this->result[$ext]['count']++; 63537711ebSAndreas Gohr $this->result[$ext]['size'] += $size; 64a3092f6cSAndreas Gohr 65a3092f6cSAndreas Gohr // group by modified time 66a3092f6cSAndreas Gohr $group = $this->getModifiedGroup($now - $mtime); 67537711ebSAndreas Gohr $this->result[$ext][$group]++; 68a3092f6cSAndreas Gohr 69a3092f6cSAndreas Gohr // handle duplicates by checksum 70a3092f6cSAndreas Gohr $md5 = md5_file($path); 71a3092f6cSAndreas Gohr if (isset($this->hashMap[$md5])) { 72a3092f6cSAndreas Gohr $this->hashMap[$md5]['count']++; 73a3092f6cSAndreas Gohr } else { 74a3092f6cSAndreas Gohr $this->hashMap[$md5] = ['ext' => $ext, 'count' => 1]; 75a3092f6cSAndreas Gohr } 76a3092f6cSAndreas Gohr } 77a3092f6cSAndreas Gohr 78a3092f6cSAndreas Gohr // summarize duplicates 79a3092f6cSAndreas Gohr foreach ($this->hashMap as $hash => $info) { 80a3092f6cSAndreas Gohr if ($info['count'] > 1) { 81537711ebSAndreas Gohr $ext = $info['ext']; 82537711ebSAndreas Gohr $this->initExtension($ext); 83537711ebSAndreas Gohr $this->result[$ext]['dups'] += $info['count'] - 1; 84a3092f6cSAndreas Gohr } 85a3092f6cSAndreas Gohr } 86a3092f6cSAndreas Gohr 87537711ebSAndreas Gohr return $this->result; 88a3092f6cSAndreas Gohr } 89a3092f6cSAndreas Gohr 90*c180bf5fSAndreas Gohr /** 91*c180bf5fSAndreas Gohr * Map file age to a human-friendly bucket label. 92*c180bf5fSAndreas Gohr * 93*c180bf5fSAndreas Gohr * @param int $ageSeconds Age in seconds since last modification 94*c180bf5fSAndreas Gohr */ 95a3092f6cSAndreas Gohr private function getModifiedGroup(int $ageSeconds): string 96a3092f6cSAndreas Gohr { 97a3092f6cSAndreas Gohr $day = 86400; 98a3092f6cSAndreas Gohr return match (true) { 99a3092f6cSAndreas Gohr $ageSeconds < $day => '<1d', 100a3092f6cSAndreas Gohr $ageSeconds < 7 * $day => '<1w', 101a3092f6cSAndreas Gohr $ageSeconds < 30 * $day => '<1m', 102a3092f6cSAndreas Gohr $ageSeconds < 90 * $day => '<3m', 103a3092f6cSAndreas Gohr $ageSeconds < 180 * $day => '<6m', 104a3092f6cSAndreas Gohr $ageSeconds < 365 * $day => '<1y', 105a3092f6cSAndreas Gohr default => '>1y', 106a3092f6cSAndreas Gohr }; 107a3092f6cSAndreas Gohr } 108c25debc6SAndreas Gohr 109*c180bf5fSAndreas Gohr /** 110*c180bf5fSAndreas Gohr * Ensure an extension has all expected keys initialized. 111*c180bf5fSAndreas Gohr * 112*c180bf5fSAndreas Gohr * @param string $ext Lowercased file extension (or 'no_extension') 113*c180bf5fSAndreas Gohr */ 114537711ebSAndreas Gohr private function initExtension(string $ext): void 115c25debc6SAndreas Gohr { 116537711ebSAndreas Gohr if (isset($this->result[$ext])) { 117537711ebSAndreas Gohr return; 118537711ebSAndreas Gohr } 119c25debc6SAndreas Gohr 120537711ebSAndreas Gohr $this->result[$ext] = [ 121537711ebSAndreas Gohr 'count' => 0, 122537711ebSAndreas Gohr 'size' => 0, 123537711ebSAndreas Gohr 'dups' => 0, 124c25debc6SAndreas Gohr ]; 125537711ebSAndreas Gohr foreach (self::BUCKETS as $bucket) { 126537711ebSAndreas Gohr $this->result[$ext][$bucket] = 0; 127c25debc6SAndreas Gohr } 128a3092f6cSAndreas Gohr } 129c25debc6SAndreas Gohr} 130