xref: /plugin/cachestats/FileStatistics.php (revision a6282b4305194b4d4422ae419e63e95f58f1bbef)
1<?php
2
3namespace dokuwiki\plugin\cachestats;
4
5use InvalidArgumentException;
6use RecursiveDirectoryIterator;
7use RecursiveIteratorIterator;
8use SplFileInfo;
9
10/**
11 * Recursively scans a directory and builds cache statistics keyed by extension.
12 * Data includes counts, total size, duplicate counts, and age buckets.
13 */
14class FileStatistics
15{
16    private string $path;
17
18    private const BUCKETS = ['<1d', '<1w', '<1m', '<3m', '<6m', '<1y', '>1y'];
19
20    private array $result = [];
21
22    private array $hashMap = []; // md5 => [ext, count]
23
24    /**
25     * @param string $path Absolute path to the cache directory
26     */
27    public function __construct(string $path)
28    {
29        if (!is_dir($path)) {
30            throw new InvalidArgumentException("Path '$path' is not a valid directory.");
31        }
32
33        $this->path = rtrim($path, DIRECTORY_SEPARATOR);
34    }
35
36    /**
37     * Walk the directory tree and return statistics keyed by extension.
38     *
39     * @param callable<int,SplFileInfo>|null $cb Optional callback to report progress
40     * @return array<string, array>
41     */
42    public function collect(?callable $cb = null): array
43    {
44        $iterator = new RecursiveIteratorIterator(
45            new RecursiveDirectoryIterator($this->path, RecursiveDirectoryIterator::SKIP_DOTS)
46        );
47
48        $now = time();
49        $counter = 0;
50        foreach ($iterator as $fileInfo) {
51            /** @var SplFileInfo $fileInfo */
52            if (!$fileInfo->isFile()) {
53                continue;
54            }
55
56            if($cb) $cb(++$counter, $fileInfo);
57
58            $ext = strtolower($fileInfo->getExtension()) ?: '-';
59            $path = $fileInfo->getPathname();
60            $size = $fileInfo->getSize();
61            $mtime = $fileInfo->getMTime();
62
63            $this->initExtension($ext);
64
65            $this->result[$ext]['count']++;
66            $this->result[$ext]['size'] += $size;
67
68            // group by modified time
69            $group = $this->getModifiedGroup($now - $mtime);
70            $this->result[$ext][$group]++;
71
72            // handle duplicates by checksum
73            $md5 = md5_file($path);
74            if (isset($this->hashMap[$md5])) {
75                $this->hashMap[$md5]['count']++;
76            } else {
77                $this->hashMap[$md5] = ['ext' => $ext, 'count' => 1];
78            }
79        }
80
81        // summarize duplicates
82        foreach ($this->hashMap as $hash => $info) {
83            if ($info['count'] > 1) {
84                $ext = $info['ext'];
85                $this->initExtension($ext);
86                $this->result[$ext]['dups'] += $info['count'] - 1;
87            }
88        }
89
90        return $this->result;
91    }
92
93    /**
94     * Map file age to a human-friendly bucket label.
95     *
96     * @param int $ageSeconds Age in seconds since last modification
97     */
98    private function getModifiedGroup(int $ageSeconds): string
99    {
100        $day = 86400;
101        return match (true) {
102            $ageSeconds < $day => '<1d',
103            $ageSeconds < 7 * $day => '<1w',
104            $ageSeconds < 30 * $day => '<1m',
105            $ageSeconds < 90 * $day => '<3m',
106            $ageSeconds < 180 * $day => '<6m',
107            $ageSeconds < 365 * $day => '<1y',
108            default => '>1y',
109        };
110    }
111
112    /**
113     * Ensure an extension has all expected keys initialized.
114     *
115     * @param string $ext Lowercased file extension (or 'no_extension')
116     */
117    private function initExtension(string $ext): void
118    {
119        if (isset($this->result[$ext])) {
120            return;
121        }
122
123        $this->result[$ext] = [
124            'count' => 0,
125            'size' => 0,
126            'dups' => 0,
127        ];
128        foreach (self::BUCKETS as $bucket) {
129            $this->result[$ext][$bucket] = 0;
130        }
131    }
132}
133