1a3092f6cSAndreas Gohr<?php 2a3092f6cSAndreas Gohr 3a3092f6cSAndreas Gohrnamespace dokuwiki\plugin\cachestats; 4a3092f6cSAndreas Gohr 5a3092f6cSAndreas Gohruse InvalidArgumentException; 6a3092f6cSAndreas Gohruse RecursiveDirectoryIterator; 7a3092f6cSAndreas Gohruse RecursiveIteratorIterator; 8a3092f6cSAndreas Gohruse SplFileInfo; 9a3092f6cSAndreas Gohr 10a3092f6cSAndreas Gohr/** 11c180bf5fSAndreas Gohr * Recursively scans a directory and builds cache statistics keyed by extension. 12c180bf5fSAndreas Gohr * Data includes counts, total size, duplicate counts, and age buckets. 13a3092f6cSAndreas Gohr */ 14a3092f6cSAndreas Gohrclass FileStatistics 15a3092f6cSAndreas Gohr{ 16a3092f6cSAndreas Gohr private string $path; 17a3092f6cSAndreas Gohr 18537711ebSAndreas Gohr private const BUCKETS = ['<1d', '<1w', '<1m', '<3m', '<6m', '<1y', '>1y']; 19c25debc6SAndreas Gohr 20537711ebSAndreas Gohr private array $result = []; 21a3092f6cSAndreas Gohr 22a3092f6cSAndreas Gohr private array $hashMap = []; // md5 => [ext, count] 23a3092f6cSAndreas Gohr 24c180bf5fSAndreas Gohr /** 25c180bf5fSAndreas Gohr * @param string $path Absolute path to the cache directory 26c180bf5fSAndreas Gohr */ 27a3092f6cSAndreas Gohr public function __construct(string $path) 28a3092f6cSAndreas Gohr { 29a3092f6cSAndreas Gohr if (!is_dir($path)) { 30a3092f6cSAndreas Gohr throw new InvalidArgumentException("Path '$path' is not a valid directory."); 31a3092f6cSAndreas Gohr } 32a3092f6cSAndreas Gohr 33a3092f6cSAndreas Gohr $this->path = rtrim($path, DIRECTORY_SEPARATOR); 34a3092f6cSAndreas Gohr } 35a3092f6cSAndreas Gohr 36c180bf5fSAndreas Gohr /** 37c180bf5fSAndreas Gohr * Walk the directory tree and return statistics keyed by extension. 38c180bf5fSAndreas Gohr * 39*a6282b43SAndreas Gohr * @param callable<int,SplFileInfo>|null $cb Optional callback to report progress 40c180bf5fSAndreas Gohr * @return array<string, array> 41c180bf5fSAndreas Gohr */ 42*a6282b43SAndreas Gohr public function collect(?callable $cb = null): array 43a3092f6cSAndreas Gohr { 44a3092f6cSAndreas Gohr $iterator = new RecursiveIteratorIterator( 45a3092f6cSAndreas Gohr new RecursiveDirectoryIterator($this->path, RecursiveDirectoryIterator::SKIP_DOTS) 46a3092f6cSAndreas Gohr ); 47a3092f6cSAndreas Gohr 48a3092f6cSAndreas Gohr $now = time(); 49*a6282b43SAndreas Gohr $counter = 0; 50a3092f6cSAndreas Gohr foreach ($iterator as $fileInfo) { 51a3092f6cSAndreas Gohr /** @var SplFileInfo $fileInfo */ 52a3092f6cSAndreas Gohr if (!$fileInfo->isFile()) { 53a3092f6cSAndreas Gohr continue; 54a3092f6cSAndreas Gohr } 55a3092f6cSAndreas Gohr 56*a6282b43SAndreas Gohr if($cb) $cb(++$counter, $fileInfo); 57*a6282b43SAndreas Gohr 58*a6282b43SAndreas Gohr $ext = strtolower($fileInfo->getExtension()) ?: '-'; 59a3092f6cSAndreas Gohr $path = $fileInfo->getPathname(); 60a3092f6cSAndreas Gohr $size = $fileInfo->getSize(); 61a3092f6cSAndreas Gohr $mtime = $fileInfo->getMTime(); 62a3092f6cSAndreas Gohr 63537711ebSAndreas Gohr $this->initExtension($ext); 64a3092f6cSAndreas Gohr 65537711ebSAndreas Gohr $this->result[$ext]['count']++; 66537711ebSAndreas Gohr $this->result[$ext]['size'] += $size; 67a3092f6cSAndreas Gohr 68a3092f6cSAndreas Gohr // group by modified time 69a3092f6cSAndreas Gohr $group = $this->getModifiedGroup($now - $mtime); 70537711ebSAndreas Gohr $this->result[$ext][$group]++; 71a3092f6cSAndreas Gohr 72a3092f6cSAndreas Gohr // handle duplicates by checksum 73a3092f6cSAndreas Gohr $md5 = md5_file($path); 74a3092f6cSAndreas Gohr if (isset($this->hashMap[$md5])) { 75a3092f6cSAndreas Gohr $this->hashMap[$md5]['count']++; 76a3092f6cSAndreas Gohr } else { 77a3092f6cSAndreas Gohr $this->hashMap[$md5] = ['ext' => $ext, 'count' => 1]; 78a3092f6cSAndreas Gohr } 79a3092f6cSAndreas Gohr } 80a3092f6cSAndreas Gohr 81a3092f6cSAndreas Gohr // summarize duplicates 82a3092f6cSAndreas Gohr foreach ($this->hashMap as $hash => $info) { 83a3092f6cSAndreas Gohr if ($info['count'] > 1) { 84537711ebSAndreas Gohr $ext = $info['ext']; 85537711ebSAndreas Gohr $this->initExtension($ext); 86537711ebSAndreas Gohr $this->result[$ext]['dups'] += $info['count'] - 1; 87a3092f6cSAndreas Gohr } 88a3092f6cSAndreas Gohr } 89a3092f6cSAndreas Gohr 90537711ebSAndreas Gohr return $this->result; 91a3092f6cSAndreas Gohr } 92a3092f6cSAndreas Gohr 93c180bf5fSAndreas Gohr /** 94c180bf5fSAndreas Gohr * Map file age to a human-friendly bucket label. 95c180bf5fSAndreas Gohr * 96c180bf5fSAndreas Gohr * @param int $ageSeconds Age in seconds since last modification 97c180bf5fSAndreas Gohr */ 98a3092f6cSAndreas Gohr private function getModifiedGroup(int $ageSeconds): string 99a3092f6cSAndreas Gohr { 100a3092f6cSAndreas Gohr $day = 86400; 101a3092f6cSAndreas Gohr return match (true) { 102a3092f6cSAndreas Gohr $ageSeconds < $day => '<1d', 103a3092f6cSAndreas Gohr $ageSeconds < 7 * $day => '<1w', 104a3092f6cSAndreas Gohr $ageSeconds < 30 * $day => '<1m', 105a3092f6cSAndreas Gohr $ageSeconds < 90 * $day => '<3m', 106a3092f6cSAndreas Gohr $ageSeconds < 180 * $day => '<6m', 107a3092f6cSAndreas Gohr $ageSeconds < 365 * $day => '<1y', 108a3092f6cSAndreas Gohr default => '>1y', 109a3092f6cSAndreas Gohr }; 110a3092f6cSAndreas Gohr } 111c25debc6SAndreas Gohr 112c180bf5fSAndreas Gohr /** 113c180bf5fSAndreas Gohr * Ensure an extension has all expected keys initialized. 114c180bf5fSAndreas Gohr * 115c180bf5fSAndreas Gohr * @param string $ext Lowercased file extension (or 'no_extension') 116c180bf5fSAndreas Gohr */ 117537711ebSAndreas Gohr private function initExtension(string $ext): void 118c25debc6SAndreas Gohr { 119537711ebSAndreas Gohr if (isset($this->result[$ext])) { 120537711ebSAndreas Gohr return; 121537711ebSAndreas Gohr } 122c25debc6SAndreas Gohr 123537711ebSAndreas Gohr $this->result[$ext] = [ 124537711ebSAndreas Gohr 'count' => 0, 125537711ebSAndreas Gohr 'size' => 0, 126537711ebSAndreas Gohr 'dups' => 0, 127c25debc6SAndreas Gohr ]; 128537711ebSAndreas Gohr foreach (self::BUCKETS as $bucket) { 129537711ebSAndreas Gohr $this->result[$ext][$bucket] = 0; 130c25debc6SAndreas Gohr } 131a3092f6cSAndreas Gohr } 132c25debc6SAndreas Gohr} 133