1<?php 2 3namespace dokuwiki\plugin\filelist; 4 5class Crawler 6{ 7 /** @var string regexp to check extensions */ 8 protected $ext; 9 10 /** @var string */ 11 protected $sortby = 'name'; 12 13 /** @var bool */ 14 protected $sortreverse = false; 15 16 /** @var string[] patterns to ignore */ 17 protected $ignore = []; 18 19 /** 20 * Initializes the crawler 21 * 22 * @param string $extensions The extensions to allow (comma separated list) 23 */ 24 public function __construct($extensions) 25 { 26 $this->ext = explode(',', $extensions); 27 $this->ext = array_map('trim', $this->ext); 28 $this->ext = array_map('preg_quote_cb', $this->ext); 29 $this->ext = implode('|', $this->ext); 30 31 $this->ignore = $this->loadIgnores(); 32 } 33 34 public function setSortBy($sortby) 35 { 36 $this->sortby = $sortby; 37 } 38 39 public function setSortReverse($sortreverse) 40 { 41 $this->sortreverse = $sortreverse; 42 } 43 44 /** 45 * Does a (recursive) crawl for finding files based on a given pattern. 46 * Based on a safe glob reimplementation using fnmatch and opendir. 47 * 48 * @param string $path the path to search in 49 * @param string $pattern the pattern to match to 50 * @param bool $recursive whether to search recursively 51 * @param string $titlefile the name of the title file 52 * @return array a hierarchical filelist or false if nothing could be found 53 * 54 * @see http://www.php.net/manual/en/function.glob.php#71083 55 */ 56 public function crawl($root, $local, $pattern, $recursive, $titlefile) 57 { 58 $path = $root . $local; 59 60 // do not descent into wiki or data directories 61 if (Path::isWikiControlled($path)) return []; 62 63 if (($dir = opendir($path)) === false) return []; 64 $result = []; 65 while (($file = readdir($dir)) !== false) { 66 if ($file[0] == '.' || $file == $titlefile) { 67 // ignore hidden, system and title files 68 continue; 69 } 70 $self = $local . '/' . $file; 71 $filepath = $path . '/' . $file; 72 if (!is_readable($filepath)) continue; 73 74 if ($this->fnmatch($pattern, $file) || (is_dir($filepath) && $recursive)) { 75 if (!is_dir($filepath) && !$this->isExtensionAllowed($file)) { 76 continue; 77 } 78 if ($this->isFileIgnored($file)) { 79 continue; 80 } 81 82 // get title file 83 $filename = $file; 84 if (is_dir($filepath)) { 85 $title = $filepath . '/' . $titlefile; 86 if (is_readable($title)) { 87 $filename = io_readFile($title, false); 88 } 89 } 90 91 // prepare entry 92 if (!is_dir($filepath) || $recursive) { 93 $entry = [ 94 'name' => $filename, 95 'local' => $self, 96 'path' => $filepath, 97 'mtime' => filemtime($filepath), 98 'ctime' => filectime($filepath), 99 'size' => filesize($filepath), 100 'children' => ((is_dir($filepath) && $recursive) ? 101 $this->crawl($root, $self, $pattern, $recursive, $titlefile) : 102 false 103 ), 104 'treesize' => 0, 105 ]; 106 107 // calculate tree size 108 if ($entry['children'] !== false) { 109 foreach ($entry['children'] as $child) { 110 $entry['treesize'] += $child['treesize']; 111 } 112 } else { 113 $entry['treesize'] = 1; 114 } 115 116 // add entry to result 117 $result[] = $entry; 118 } 119 } 120 } 121 closedir($dir); 122 return $this->sortItems($result); 123 } 124 125 /** 126 * Sort the given items by the current sortby and sortreverse settings 127 * 128 * @param array $items 129 * @return array 130 */ 131 protected function sortItems($items) 132 { 133 $callback = [$this, 'compare' . ucfirst($this->sortby)]; 134 if (!is_callable($callback)) return $items; 135 136 usort($items, $callback); 137 if ($this->sortreverse) { 138 $items = array_reverse($items); 139 } 140 return $items; 141 } 142 143 /** 144 * Check if a file is allowed by the configured extensions 145 * 146 * @param string $file 147 * @return bool 148 */ 149 protected function isExtensionAllowed($file) 150 { 151 if ($this->ext === '') return true; // no restriction 152 return preg_match('/(' . $this->ext . ')$/i', $file); 153 } 154 155 /** 156 * Check if a file is ignored by the ignore patterns 157 * 158 * @param string $file 159 * @return bool 160 */ 161 protected function isFileIgnored($file) 162 { 163 foreach ($this->ignore as $pattern) { 164 if ($this->fnmatch($pattern, $file)) return true; 165 } 166 return false; 167 } 168 169 /** 170 * Load the ignore patterns from the ignore.txt file 171 * 172 * @return string[] 173 */ 174 protected function loadIgnores() 175 { 176 $file = __DIR__ . '/conf/ignore.txt'; 177 $ignore = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); 178 $ignore = array_map(static fn($line) => trim(preg_replace('/\s*#.*$/', '', $line)), $ignore); 179 $ignore = array_filter($ignore); 180 return $ignore; 181 } 182 183 /** 184 * Replacement for fnmatch() for windows systems. 185 * 186 * @author jk at ricochetsolutions dot com 187 * @link http://www.php.net/manual/en/function.fnmatch.php#71725 188 */ 189 protected function fnmatch($pattern, $string) 190 { 191 return preg_match( 192 "#^" . strtr( 193 preg_quote($pattern, '#'), 194 [ 195 '\*' => '.*', 196 '\?' => '.', 197 '\[' => '[', 198 '\]' => ']' 199 ] 200 ) . "$#i", 201 $string 202 ); 203 } 204 205 public function compareName($a, $b) 206 { 207 return strcmp($a['name'], $b['name']); 208 } 209 210 public function compareIname($a, $b) 211 { 212 return strcmp(strtolower($a['name']), strtolower($b['name'])); 213 } 214 215 public function compareCtime($a, $b) 216 { 217 return $a['ctime'] <=> $b['ctime']; 218 } 219 220 public function compareMtime($a, $b) 221 { 222 return $a['mtime'] <=> $b['mtime']; 223 } 224 225 public function compareSize($a, $b) 226 { 227 return $a['size'] <=> $b['size']; 228 } 229} 230