1<?php
2
3namespace dokuwiki\plugin\filelist;
4
5class Crawler
6{
7    /** @var string regexp to check extensions */
8    protected $ext;
9
10    /** @var string */
11    protected $sortby = 'name';
12
13    /** @var bool */
14    protected $sortreverse = false;
15
16    /** @var string[] patterns to ignore */
17    protected $ignore = [];
18
19    /**
20     * Initializes the crawler
21     *
22     * @param string $extensions The extensions to allow (comma separated list)
23     */
24    public function __construct($extensions)
25    {
26        $this->ext = explode(',', $extensions);
27        $this->ext = array_map('trim', $this->ext);
28        $this->ext = array_map('preg_quote_cb', $this->ext);
29        $this->ext = implode('|', $this->ext);
30
31        $this->ignore = $this->loadIgnores();
32    }
33
34    public function setSortBy($sortby)
35    {
36        $this->sortby = $sortby;
37    }
38
39    public function setSortReverse($sortreverse)
40    {
41        $this->sortreverse = $sortreverse;
42    }
43
44    /**
45     * Does a (recursive) crawl for finding files based on a given pattern.
46     * Based on a safe glob reimplementation using fnmatch and opendir.
47     *
48     * @param string $path the path to search in
49     * @param string $pattern the pattern to match to
50     * @param bool $recursive whether to search recursively
51     * @param string $titlefile the name of the title file
52     * @return array a hierarchical filelist or false if nothing could be found
53     *
54     * @see http://www.php.net/manual/en/function.glob.php#71083
55     */
56    public function crawl($root, $local, $pattern, $recursive, $titlefile)
57    {
58        $path = Path::toAbsolute($root . $local);
59
60        // do not descent into wiki or data directories
61        if (Path::isWikiControlled($path)) return [];
62
63        if (($dir = opendir($path)) === false) return [];
64        $result = [];
65        while (($file = readdir($dir)) !== false) {
66            if ($file[0] == '.' || $file == $titlefile) {
67                // ignore hidden, system and title files
68                continue;
69            }
70            // join without introducing leading or doubled slashes (local may be empty or end in /)
71            $self = ($local === '') ? $file : rtrim($local, '/') . '/' . $file;
72            $filepath = rtrim($path, '/') . '/' . $file;
73            if (!is_readable($filepath)) continue;
74
75            if ($this->fnmatch($pattern, $file) || (is_dir($filepath) && $recursive)) {
76                if (!is_dir($filepath) && !$this->isExtensionAllowed($file)) {
77                    continue;
78                }
79                if ($this->isFileIgnored($file)) {
80                    continue;
81                }
82
83                // get title file
84                $filename = $file;
85                if (is_dir($filepath)) {
86                    $title = $filepath . '/' . $titlefile;
87                    if (is_readable($title)) {
88                        $filename = io_readFile($title, false);
89                    }
90                }
91
92                // prepare entry
93                if (!is_dir($filepath) || $recursive) {
94                    $entry = [
95                        'name' => $filename,
96                        'local' => $self,
97                        'path' => $filepath,
98                        'mtime' => filemtime($filepath),
99                        'ctime' => filectime($filepath),
100                        'size' => filesize($filepath),
101                        'children' => ((is_dir($filepath) && $recursive) ?
102                            $this->crawl($root, $self, $pattern, $recursive, $titlefile) :
103                            false
104                        ),
105                        'treesize' => 0,
106                    ];
107
108                    // calculate tree size
109                    if ($entry['children'] !== false) {
110                        foreach ($entry['children'] as $child) {
111                            $entry['treesize'] += $child['treesize'];
112                        }
113                    } else {
114                        $entry['treesize'] = 1;
115                    }
116
117                    // add entry to result
118                    $result[] = $entry;
119                }
120            }
121        }
122        closedir($dir);
123        return $this->sortItems($result);
124    }
125
126    /**
127     * Sort the given items by the current sortby and sortreverse settings
128     *
129     * @param array $items
130     * @return array
131     */
132    protected function sortItems($items)
133    {
134        $callback = [$this, 'compare' . ucfirst($this->sortby)];
135        if (!is_callable($callback)) return $items;
136
137        usort($items, $callback);
138        if ($this->sortreverse) {
139            $items = array_reverse($items);
140        }
141        return $items;
142    }
143
144    /**
145     * Check if a file is allowed by the configured extensions
146     *
147     * @param string $file
148     * @return bool
149     */
150    protected function isExtensionAllowed($file)
151    {
152        if ($this->ext === '') return true; // no restriction
153        return preg_match('/(' . $this->ext . ')$/i', $file);
154    }
155
156    /**
157     * Check if a file is ignored by the ignore patterns
158     *
159     * @param string $file
160     * @return bool
161     */
162    protected function isFileIgnored($file)
163    {
164        foreach ($this->ignore as $pattern) {
165            if ($this->fnmatch($pattern, $file)) return true;
166        }
167        return false;
168    }
169
170    /**
171     * Load the ignore patterns from the ignore.txt file
172     *
173     * @return string[]
174     */
175    protected function loadIgnores()
176    {
177        $file = __DIR__ . '/conf/ignore.txt';
178        $ignore = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
179        $ignore = array_map(static fn($line) => trim(preg_replace('/\s*#.*$/', '', $line)), $ignore);
180        $ignore = array_filter($ignore);
181        return $ignore;
182    }
183
184    /**
185     * Replacement for fnmatch() for windows systems.
186     *
187     * @author jk at ricochetsolutions dot com
188     * @link http://www.php.net/manual/en/function.fnmatch.php#71725
189     */
190    protected function fnmatch($pattern, $string)
191    {
192        return preg_match(
193            "#^" . strtr(
194                preg_quote($pattern, '#'),
195                [
196                    '\*' => '.*',
197                    '\?' => '.',
198                    '\[' => '[',
199                    '\]' => ']'
200                ]
201            ) . "$#i",
202            $string
203        );
204    }
205
206    public function compareName($a, $b)
207    {
208        return strcmp($a['name'], $b['name']);
209    }
210
211    public function compareIname($a, $b)
212    {
213        return strcmp(strtolower($a['name']), strtolower($b['name']));
214    }
215
216    public function compareCtime($a, $b)
217    {
218        return $a['ctime'] <=> $b['ctime'];
219    }
220
221    public function compareMtime($a, $b)
222    {
223        return $a['mtime'] <=> $b['mtime'];
224    }
225
226    public function compareSize($a, $b)
227    {
228        return $a['size'] <=> $b['size'];
229    }
230}
231