xref: /plugin/cachestats/FileStatistics.php (revision a6282b4305194b4d4422ae419e63e95f58f1bbef)
1a3092f6cSAndreas Gohr<?php
2a3092f6cSAndreas Gohr
3a3092f6cSAndreas Gohrnamespace dokuwiki\plugin\cachestats;
4a3092f6cSAndreas Gohr
5a3092f6cSAndreas Gohruse InvalidArgumentException;
6a3092f6cSAndreas Gohruse RecursiveDirectoryIterator;
7a3092f6cSAndreas Gohruse RecursiveIteratorIterator;
8a3092f6cSAndreas Gohruse SplFileInfo;
9a3092f6cSAndreas Gohr
10a3092f6cSAndreas Gohr/**
11c180bf5fSAndreas Gohr * Recursively scans a directory and builds cache statistics keyed by extension.
12c180bf5fSAndreas Gohr * Data includes counts, total size, duplicate counts, and age buckets.
13a3092f6cSAndreas Gohr */
14a3092f6cSAndreas Gohrclass FileStatistics
15a3092f6cSAndreas Gohr{
16a3092f6cSAndreas Gohr    private string $path;
17a3092f6cSAndreas Gohr
18537711ebSAndreas Gohr    private const BUCKETS = ['<1d', '<1w', '<1m', '<3m', '<6m', '<1y', '>1y'];
19c25debc6SAndreas Gohr
20537711ebSAndreas Gohr    private array $result = [];
21a3092f6cSAndreas Gohr
22a3092f6cSAndreas Gohr    private array $hashMap = []; // md5 => [ext, count]
23a3092f6cSAndreas Gohr
24c180bf5fSAndreas Gohr    /**
25c180bf5fSAndreas Gohr     * @param string $path Absolute path to the cache directory
26c180bf5fSAndreas Gohr     */
27a3092f6cSAndreas Gohr    public function __construct(string $path)
28a3092f6cSAndreas Gohr    {
29a3092f6cSAndreas Gohr        if (!is_dir($path)) {
30a3092f6cSAndreas Gohr            throw new InvalidArgumentException("Path '$path' is not a valid directory.");
31a3092f6cSAndreas Gohr        }
32a3092f6cSAndreas Gohr
33a3092f6cSAndreas Gohr        $this->path = rtrim($path, DIRECTORY_SEPARATOR);
34a3092f6cSAndreas Gohr    }
35a3092f6cSAndreas Gohr
36c180bf5fSAndreas Gohr    /**
37c180bf5fSAndreas Gohr     * Walk the directory tree and return statistics keyed by extension.
38c180bf5fSAndreas Gohr     *
39*a6282b43SAndreas Gohr     * @param callable<int,SplFileInfo>|null $cb Optional callback to report progress
40c180bf5fSAndreas Gohr     * @return array<string, array>
41c180bf5fSAndreas Gohr     */
42*a6282b43SAndreas Gohr    public function collect(?callable $cb = null): array
43a3092f6cSAndreas Gohr    {
44a3092f6cSAndreas Gohr        $iterator = new RecursiveIteratorIterator(
45a3092f6cSAndreas Gohr            new RecursiveDirectoryIterator($this->path, RecursiveDirectoryIterator::SKIP_DOTS)
46a3092f6cSAndreas Gohr        );
47a3092f6cSAndreas Gohr
48a3092f6cSAndreas Gohr        $now = time();
49*a6282b43SAndreas Gohr        $counter = 0;
50a3092f6cSAndreas Gohr        foreach ($iterator as $fileInfo) {
51a3092f6cSAndreas Gohr            /** @var SplFileInfo $fileInfo */
52a3092f6cSAndreas Gohr            if (!$fileInfo->isFile()) {
53a3092f6cSAndreas Gohr                continue;
54a3092f6cSAndreas Gohr            }
55a3092f6cSAndreas Gohr
56*a6282b43SAndreas Gohr            if($cb) $cb(++$counter, $fileInfo);
57*a6282b43SAndreas Gohr
58*a6282b43SAndreas Gohr            $ext = strtolower($fileInfo->getExtension()) ?: '-';
59a3092f6cSAndreas Gohr            $path = $fileInfo->getPathname();
60a3092f6cSAndreas Gohr            $size = $fileInfo->getSize();
61a3092f6cSAndreas Gohr            $mtime = $fileInfo->getMTime();
62a3092f6cSAndreas Gohr
63537711ebSAndreas Gohr            $this->initExtension($ext);
64a3092f6cSAndreas Gohr
65537711ebSAndreas Gohr            $this->result[$ext]['count']++;
66537711ebSAndreas Gohr            $this->result[$ext]['size'] += $size;
67a3092f6cSAndreas Gohr
68a3092f6cSAndreas Gohr            // group by modified time
69a3092f6cSAndreas Gohr            $group = $this->getModifiedGroup($now - $mtime);
70537711ebSAndreas Gohr            $this->result[$ext][$group]++;
71a3092f6cSAndreas Gohr
72a3092f6cSAndreas Gohr            // handle duplicates by checksum
73a3092f6cSAndreas Gohr            $md5 = md5_file($path);
74a3092f6cSAndreas Gohr            if (isset($this->hashMap[$md5])) {
75a3092f6cSAndreas Gohr                $this->hashMap[$md5]['count']++;
76a3092f6cSAndreas Gohr            } else {
77a3092f6cSAndreas Gohr                $this->hashMap[$md5] = ['ext' => $ext, 'count' => 1];
78a3092f6cSAndreas Gohr            }
79a3092f6cSAndreas Gohr        }
80a3092f6cSAndreas Gohr
81a3092f6cSAndreas Gohr        // summarize duplicates
82a3092f6cSAndreas Gohr        foreach ($this->hashMap as $hash => $info) {
83a3092f6cSAndreas Gohr            if ($info['count'] > 1) {
84537711ebSAndreas Gohr                $ext = $info['ext'];
85537711ebSAndreas Gohr                $this->initExtension($ext);
86537711ebSAndreas Gohr                $this->result[$ext]['dups'] += $info['count'] - 1;
87a3092f6cSAndreas Gohr            }
88a3092f6cSAndreas Gohr        }
89a3092f6cSAndreas Gohr
90537711ebSAndreas Gohr        return $this->result;
91a3092f6cSAndreas Gohr    }
92a3092f6cSAndreas Gohr
93c180bf5fSAndreas Gohr    /**
94c180bf5fSAndreas Gohr     * Map file age to a human-friendly bucket label.
95c180bf5fSAndreas Gohr     *
96c180bf5fSAndreas Gohr     * @param int $ageSeconds Age in seconds since last modification
97c180bf5fSAndreas Gohr     */
98a3092f6cSAndreas Gohr    private function getModifiedGroup(int $ageSeconds): string
99a3092f6cSAndreas Gohr    {
100a3092f6cSAndreas Gohr        $day = 86400;
101a3092f6cSAndreas Gohr        return match (true) {
102a3092f6cSAndreas Gohr            $ageSeconds < $day => '<1d',
103a3092f6cSAndreas Gohr            $ageSeconds < 7 * $day => '<1w',
104a3092f6cSAndreas Gohr            $ageSeconds < 30 * $day => '<1m',
105a3092f6cSAndreas Gohr            $ageSeconds < 90 * $day => '<3m',
106a3092f6cSAndreas Gohr            $ageSeconds < 180 * $day => '<6m',
107a3092f6cSAndreas Gohr            $ageSeconds < 365 * $day => '<1y',
108a3092f6cSAndreas Gohr            default => '>1y',
109a3092f6cSAndreas Gohr        };
110a3092f6cSAndreas Gohr    }
111c25debc6SAndreas Gohr
112c180bf5fSAndreas Gohr    /**
113c180bf5fSAndreas Gohr     * Ensure an extension has all expected keys initialized.
114c180bf5fSAndreas Gohr     *
115c180bf5fSAndreas Gohr     * @param string $ext Lowercased file extension (or 'no_extension')
116c180bf5fSAndreas Gohr     */
117537711ebSAndreas Gohr    private function initExtension(string $ext): void
118c25debc6SAndreas Gohr    {
119537711ebSAndreas Gohr        if (isset($this->result[$ext])) {
120537711ebSAndreas Gohr            return;
121537711ebSAndreas Gohr        }
122c25debc6SAndreas Gohr
123537711ebSAndreas Gohr        $this->result[$ext] = [
124537711ebSAndreas Gohr            'count' => 0,
125537711ebSAndreas Gohr            'size' => 0,
126537711ebSAndreas Gohr            'dups' => 0,
127c25debc6SAndreas Gohr        ];
128537711ebSAndreas Gohr        foreach (self::BUCKETS as $bucket) {
129537711ebSAndreas Gohr            $this->result[$ext][$bucket] = 0;
130c25debc6SAndreas Gohr        }
131a3092f6cSAndreas Gohr    }
132c25debc6SAndreas Gohr}
133