1a3092f6cSAndreas Gohr<?php 2a3092f6cSAndreas Gohr 3a3092f6cSAndreas Gohrnamespace dokuwiki\plugin\cachestats; 4a3092f6cSAndreas Gohr 5a3092f6cSAndreas Gohruse InvalidArgumentException; 6a3092f6cSAndreas Gohruse RecursiveDirectoryIterator; 7a3092f6cSAndreas Gohruse RecursiveIteratorIterator; 8a3092f6cSAndreas Gohruse SplFileInfo; 9a3092f6cSAndreas Gohr 10a3092f6cSAndreas Gohr/** 11a3092f6cSAndreas Gohr * Class FileStatistics 12a3092f6cSAndreas Gohr * 13a3092f6cSAndreas Gohr * Recursively scans a directory and collects: 14a3092f6cSAndreas Gohr * - number of files per file extension 15a3092f6cSAndreas Gohr * - duplicate files (based on MD5 checksum) per file extension 16a3092f6cSAndreas Gohr * - size of files summed up per extension 17a3092f6cSAndreas Gohr * - number of files per extension grouped by last modified date 18a3092f6cSAndreas Gohr * - total number of files 19a3092f6cSAndreas Gohr * - total size of all files 20a3092f6cSAndreas Gohr */ 21a3092f6cSAndreas Gohrclass FileStatistics 22a3092f6cSAndreas Gohr{ 23a3092f6cSAndreas Gohr private string $path; 24a3092f6cSAndreas Gohr 25*c25debc6SAndreas Gohr /** @var string[] */ 26*c25debc6SAndreas Gohr private array $buckets = ['<1d', '<1w', '<1m', '<3m', '<6m', '<1y', '>1y']; 27*c25debc6SAndreas Gohr 28a3092f6cSAndreas Gohr private array $stats = [ 29a3092f6cSAndreas Gohr 'extensions' => [], 30a3092f6cSAndreas Gohr 'duplicates' => [], 31a3092f6cSAndreas Gohr 'sizes' => [], 32a3092f6cSAndreas Gohr 'modified_groups' => [], 33a3092f6cSAndreas Gohr 'total_files' => 0, 34a3092f6cSAndreas Gohr 'total_size' => 0, 35a3092f6cSAndreas Gohr ]; 36a3092f6cSAndreas Gohr 37a3092f6cSAndreas Gohr private array $hashMap = []; // md5 => [ext, count] 38a3092f6cSAndreas Gohr 39a3092f6cSAndreas Gohr public function __construct(string $path) 40a3092f6cSAndreas Gohr { 41a3092f6cSAndreas Gohr if (!is_dir($path)) { 42a3092f6cSAndreas Gohr throw new InvalidArgumentException("Path '$path' is not a valid directory."); 43a3092f6cSAndreas Gohr } 44a3092f6cSAndreas Gohr 45a3092f6cSAndreas Gohr $this->path = rtrim($path, DIRECTORY_SEPARATOR); 46a3092f6cSAndreas Gohr } 47a3092f6cSAndreas Gohr 48a3092f6cSAndreas Gohr public function collect(): array 49a3092f6cSAndreas Gohr { 50a3092f6cSAndreas Gohr $iterator = new RecursiveIteratorIterator( 51a3092f6cSAndreas Gohr new RecursiveDirectoryIterator($this->path, RecursiveDirectoryIterator::SKIP_DOTS) 52a3092f6cSAndreas Gohr ); 53a3092f6cSAndreas Gohr 54a3092f6cSAndreas Gohr $now = time(); 55a3092f6cSAndreas Gohr 56a3092f6cSAndreas Gohr foreach ($iterator as $fileInfo) { 57a3092f6cSAndreas Gohr /** @var SplFileInfo $fileInfo */ 58a3092f6cSAndreas Gohr if (!$fileInfo->isFile()) { 59a3092f6cSAndreas Gohr continue; 60a3092f6cSAndreas Gohr } 61a3092f6cSAndreas Gohr 62a3092f6cSAndreas Gohr $this->stats['total_files']++; 63a3092f6cSAndreas Gohr $ext = strtolower($fileInfo->getExtension()) ?: 'no_extension'; 64a3092f6cSAndreas Gohr $path = $fileInfo->getPathname(); 65a3092f6cSAndreas Gohr $size = $fileInfo->getSize(); 66a3092f6cSAndreas Gohr $mtime = $fileInfo->getMTime(); 67a3092f6cSAndreas Gohr 68a3092f6cSAndreas Gohr // size aggregated per extension 69a3092f6cSAndreas Gohr $this->stats['sizes'][$ext] = ($this->stats['sizes'][$ext] ?? 0) + $size; 70a3092f6cSAndreas Gohr $this->stats['total_size'] += $size; 71a3092f6cSAndreas Gohr 72a3092f6cSAndreas Gohr // count per extension 73a3092f6cSAndreas Gohr $this->stats['extensions'][$ext] = ($this->stats['extensions'][$ext] ?? 0) + 1; 74a3092f6cSAndreas Gohr 75a3092f6cSAndreas Gohr // group by modified time 76a3092f6cSAndreas Gohr $group = $this->getModifiedGroup($now - $mtime); 77a3092f6cSAndreas Gohr $this->stats['modified_groups'][$ext][$group] = 78a3092f6cSAndreas Gohr ($this->stats['modified_groups'][$ext][$group] ?? 0) + 1; 79a3092f6cSAndreas Gohr 80a3092f6cSAndreas Gohr // handle duplicates by checksum 81a3092f6cSAndreas Gohr $md5 = md5_file($path); 82a3092f6cSAndreas Gohr if (isset($this->hashMap[$md5])) { 83a3092f6cSAndreas Gohr $this->hashMap[$md5]['count']++; 84a3092f6cSAndreas Gohr } else { 85a3092f6cSAndreas Gohr $this->hashMap[$md5] = ['ext' => $ext, 'count' => 1]; 86a3092f6cSAndreas Gohr } 87a3092f6cSAndreas Gohr } 88a3092f6cSAndreas Gohr 89a3092f6cSAndreas Gohr // summarize duplicates 90a3092f6cSAndreas Gohr foreach ($this->hashMap as $hash => $info) { 91a3092f6cSAndreas Gohr if ($info['count'] > 1) { 92a3092f6cSAndreas Gohr $this->stats['duplicates'][$info['ext']] = 93a3092f6cSAndreas Gohr ($this->stats['duplicates'][$info['ext']] ?? 0) + ($info['count'] - 1); 94a3092f6cSAndreas Gohr } 95a3092f6cSAndreas Gohr } 96a3092f6cSAndreas Gohr 97*c25debc6SAndreas Gohr return $this->buildResult(); 98a3092f6cSAndreas Gohr } 99a3092f6cSAndreas Gohr 100a3092f6cSAndreas Gohr private function getModifiedGroup(int $ageSeconds): string 101a3092f6cSAndreas Gohr { 102a3092f6cSAndreas Gohr $day = 86400; 103a3092f6cSAndreas Gohr return match (true) { 104a3092f6cSAndreas Gohr $ageSeconds < $day => '<1d', 105a3092f6cSAndreas Gohr $ageSeconds < 7 * $day => '<1w', 106a3092f6cSAndreas Gohr $ageSeconds < 30 * $day => '<1m', 107a3092f6cSAndreas Gohr $ageSeconds < 90 * $day => '<3m', 108a3092f6cSAndreas Gohr $ageSeconds < 180 * $day => '<6m', 109a3092f6cSAndreas Gohr $ageSeconds < 365 * $day => '<1y', 110a3092f6cSAndreas Gohr default => '>1y', 111a3092f6cSAndreas Gohr }; 112a3092f6cSAndreas Gohr } 113*c25debc6SAndreas Gohr 114*c25debc6SAndreas Gohr /** 115*c25debc6SAndreas Gohr * Combine collected sub statistics into a single result array keyed by extension 116*c25debc6SAndreas Gohr */ 117*c25debc6SAndreas Gohr private function buildResult(): array 118*c25debc6SAndreas Gohr { 119*c25debc6SAndreas Gohr $keys = array_unique( 120*c25debc6SAndreas Gohr array_merge( 121*c25debc6SAndreas Gohr array_keys($this->stats['extensions']), 122*c25debc6SAndreas Gohr array_keys($this->stats['sizes']), 123*c25debc6SAndreas Gohr array_keys($this->stats['duplicates']), 124*c25debc6SAndreas Gohr array_keys($this->stats['modified_groups']) 125*c25debc6SAndreas Gohr ) 126*c25debc6SAndreas Gohr ); 127*c25debc6SAndreas Gohr 128*c25debc6SAndreas Gohr $result = []; 129*c25debc6SAndreas Gohr foreach ($keys as $key) { 130*c25debc6SAndreas Gohr $result[$key] = [ 131*c25debc6SAndreas Gohr 'count' => $this->stats['extensions'][$key] ?? 0, 132*c25debc6SAndreas Gohr 'size' => $this->stats['sizes'][$key] ?? 0, 133*c25debc6SAndreas Gohr 'dups' => $this->stats['duplicates'][$key] ?? 0, 134*c25debc6SAndreas Gohr ]; 135*c25debc6SAndreas Gohr foreach ($this->buckets as $bucket) { 136*c25debc6SAndreas Gohr $result[$key][$bucket] = $this->stats['modified_groups'][$key][$bucket] ?? 0; 137*c25debc6SAndreas Gohr } 138a3092f6cSAndreas Gohr } 139a3092f6cSAndreas Gohr 140*c25debc6SAndreas Gohr return $result; 141*c25debc6SAndreas Gohr } 142*c25debc6SAndreas Gohr} 143