1<?php
2
3namespace dokuwiki\plugin\filelist;
4
5class Crawler
6{
7    /** @var string regexp to check extensions */
8    protected $ext;
9
10    /** @var string */
11    protected $sortby = 'name';
12
13    /** @var bool */
14    protected $sortreverse = false;
15
16    /** @var string[] patterns to ignore */
17    protected $ignore = [];
18
19    /**
20     * Initializes the crawler
21     *
22     * @param string $extensions The extensions to allow (comma separated list)
23     */
24    public function __construct($extensions)
25    {
26        $this->ext = explode(',', $extensions);
27        $this->ext = array_map('trim', $this->ext);
28        $this->ext = array_map('preg_quote_cb', $this->ext);
29        $this->ext = implode('|', $this->ext);
30
31        $this->ignore = $this->loadIgnores();
32    }
33
34    public function setSortBy($sortby)
35    {
36        $this->sortby = $sortby;
37    }
38
39    public function setSortReverse($sortreverse)
40    {
41        $this->sortreverse = $sortreverse;
42    }
43
44    /**
45     * Does a (recursive) crawl for finding files based on a given pattern.
46     * Based on a safe glob reimplementation using fnmatch and opendir.
47     *
48     * @param string $path the path to search in
49     * @param string $pattern the pattern to match to
50     * @param bool $recursive whether to search recursively
51     * @param string $titlefile the name of the title file
52     * @return array a hierarchical filelist or false if nothing could be found
53     *
54     * @see http://www.php.net/manual/en/function.glob.php#71083
55     */
56    public function crawl($root, $local, $pattern, $recursive, $titlefile)
57    {
58        $path = $root . $local;
59
60        // do not descent into wiki or data directories
61        if (Path::isWikiControlled($path)) return [];
62
63        if (($dir = opendir($path)) === false) return [];
64        $result = [];
65        while (($file = readdir($dir)) !== false) {
66            if ($file[0] == '.' || $file == $titlefile) {
67                // ignore hidden, system and title files
68                continue;
69            }
70            $self = $local . '/' . $file;
71            $filepath = $path . '/' . $file;
72            if (!is_readable($filepath)) continue;
73
74            if ($this->fnmatch($pattern, $file) || (is_dir($filepath) && $recursive)) {
75                if (!is_dir($filepath) && !$this->isExtensionAllowed($file)) {
76                    continue;
77                }
78                if ($this->isFileIgnored($file)) {
79                    continue;
80                }
81
82                // get title file
83                $filename = $file;
84                if (is_dir($filepath)) {
85                    $title = $filepath . '/' . $titlefile;
86                    if (is_readable($title)) {
87                        $filename = io_readFile($title, false);
88                    }
89                }
90
91                // prepare entry
92                if (!is_dir($filepath) || $recursive) {
93                    $entry = [
94                        'name' => $filename,
95                        'local' => $self,
96                        'path' => $filepath,
97                        'mtime' => filemtime($filepath),
98                        'ctime' => filectime($filepath),
99                        'size' => filesize($filepath),
100                        'children' => ((is_dir($filepath) && $recursive) ?
101                            $this->crawl($root, $self, $pattern, $recursive, $titlefile) :
102                            false
103                        ),
104                        'treesize' => 0,
105                    ];
106
107                    // calculate tree size
108                    if ($entry['children'] !== false) {
109                        foreach ($entry['children'] as $child) {
110                            $entry['treesize'] += $child['treesize'];
111                        }
112                    } else {
113                        $entry['treesize'] = 1;
114                    }
115
116                    // add entry to result
117                    $result[] = $entry;
118                }
119            }
120        }
121        closedir($dir);
122        return $this->sortItems($result);
123    }
124
125    /**
126     * Sort the given items by the current sortby and sortreverse settings
127     *
128     * @param array $items
129     * @return array
130     */
131    protected function sortItems($items)
132    {
133        $callback = [$this, 'compare' . ucfirst($this->sortby)];
134        if (!is_callable($callback)) return $items;
135
136        usort($items, $callback);
137        if ($this->sortreverse) {
138            $items = array_reverse($items);
139        }
140        return $items;
141    }
142
143    /**
144     * Check if a file is allowed by the configured extensions
145     *
146     * @param string $file
147     * @return bool
148     */
149    protected function isExtensionAllowed($file)
150    {
151        if ($this->ext === '') return true; // no restriction
152        return preg_match('/(' . $this->ext . ')$/i', $file);
153    }
154
155    /**
156     * Check if a file is ignored by the ignore patterns
157     *
158     * @param string $file
159     * @return bool
160     */
161    protected function isFileIgnored($file)
162    {
163        foreach ($this->ignore as $pattern) {
164            if ($this->fnmatch($pattern, $file)) return true;
165        }
166        return false;
167    }
168
169    /**
170     * Load the ignore patterns from the ignore.txt file
171     *
172     * @return string[]
173     */
174    protected function loadIgnores()
175    {
176        $file = __DIR__ . '/conf/ignore.txt';
177        $ignore = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
178        $ignore = array_map(static fn($line) => trim(preg_replace('/\s*#.*$/', '', $line)), $ignore);
179        $ignore = array_filter($ignore);
180        return $ignore;
181    }
182
183    /**
184     * Replacement for fnmatch() for windows systems.
185     *
186     * @author jk at ricochetsolutions dot com
187     * @link http://www.php.net/manual/en/function.fnmatch.php#71725
188     */
189    protected function fnmatch($pattern, $string)
190    {
191        return preg_match(
192            "#^" . strtr(
193                preg_quote($pattern, '#'),
194                [
195                    '\*' => '.*',
196                    '\?' => '.',
197                    '\[' => '[',
198                    '\]' => ']'
199                ]
200            ) . "$#i",
201            $string
202        );
203    }
204
205    public function compareName($a, $b)
206    {
207        return strcmp($a['name'], $b['name']);
208    }
209
210    public function compareIname($a, $b)
211    {
212        return strcmp(strtolower($a['name']), strtolower($b['name']));
213    }
214
215    public function compareCtime($a, $b)
216    {
217        return $a['ctime'] <=> $b['ctime'];
218    }
219
220    public function compareMtime($a, $b)
221    {
222        return $a['mtime'] <=> $b['mtime'];
223    }
224
225    public function compareSize($a, $b)
226    {
227        return $a['size'] <=> $b['size'];
228    }
229}
230