1<?php 2 3namespace dokuwiki\plugin\filelist; 4 5class Crawler 6{ 7 /** @var string regexp to check extensions */ 8 protected $ext; 9 10 /** @var string */ 11 protected $sortby = 'name'; 12 13 /** @var bool */ 14 protected $sortreverse = false; 15 16 /** @var string[] patterns to ignore */ 17 protected $ignore = []; 18 19 /** 20 * Initializes the crawler 21 * 22 * @param string $extensions The extensions to allow (comma separated list) 23 */ 24 public function __construct($extensions) 25 { 26 $this->ext = explode(',', $extensions); 27 $this->ext = array_map('trim', $this->ext); 28 $this->ext = array_map('preg_quote_cb', $this->ext); 29 $this->ext = implode('|', $this->ext); 30 31 $this->ignore = $this->loadIgnores(); 32 } 33 34 public function setSortBy($sortby) 35 { 36 $this->sortby = $sortby; 37 } 38 39 public function setSortReverse($sortreverse) 40 { 41 $this->sortreverse = $sortreverse; 42 } 43 44 /** 45 * Does a (recursive) crawl for finding files based on a given pattern. 46 * Based on a safe glob reimplementation using fnmatch and opendir. 47 * 48 * @param string $path the path to search in 49 * @param string $pattern the pattern to match to 50 * @param bool $recursive whether to search recursively 51 * @param string $titlefile the name of the title file 52 * @return array a hierarchical filelist or false if nothing could be found 53 * 54 * @see http://www.php.net/manual/en/function.glob.php#71083 55 */ 56 public function crawl($root, $local, $pattern, $recursive, $titlefile) 57 { 58 $path = Path::toAbsolute($root . $local); 59 60 // do not descent into wiki or data directories 61 if (Path::isWikiControlled($path)) return []; 62 63 if (($dir = opendir($path)) === false) return []; 64 $result = []; 65 while (($file = readdir($dir)) !== false) { 66 if ($file[0] == '.' || $file == $titlefile) { 67 // ignore hidden, system and title files 68 continue; 69 } 70 // join without introducing leading or doubled slashes (local may be empty or end in /) 71 $self = ($local === '') ? $file : rtrim($local, '/') . '/' . $file; 72 $filepath = rtrim($path, '/') . '/' . $file; 73 if (!is_readable($filepath)) continue; 74 75 if ($this->fnmatch($pattern, $file) || (is_dir($filepath) && $recursive)) { 76 if (!is_dir($filepath) && !$this->isExtensionAllowed($file)) { 77 continue; 78 } 79 if ($this->isFileIgnored($file)) { 80 continue; 81 } 82 83 // get title file 84 $filename = $file; 85 if (is_dir($filepath)) { 86 $title = $filepath . '/' . $titlefile; 87 if (is_readable($title)) { 88 $filename = io_readFile($title, false); 89 } 90 } 91 92 // prepare entry 93 if (!is_dir($filepath) || $recursive) { 94 $entry = [ 95 'name' => $filename, 96 'local' => $self, 97 'path' => $filepath, 98 'mtime' => filemtime($filepath), 99 'ctime' => filectime($filepath), 100 'size' => filesize($filepath), 101 'children' => ((is_dir($filepath) && $recursive) ? 102 $this->crawl($root, $self, $pattern, $recursive, $titlefile) : 103 false 104 ), 105 'treesize' => 0, 106 ]; 107 108 // calculate tree size 109 if ($entry['children'] !== false) { 110 foreach ($entry['children'] as $child) { 111 $entry['treesize'] += $child['treesize']; 112 } 113 } else { 114 $entry['treesize'] = 1; 115 } 116 117 // add entry to result 118 $result[] = $entry; 119 } 120 } 121 } 122 closedir($dir); 123 return $this->sortItems($result); 124 } 125 126 /** 127 * Sort the given items by the current sortby and sortreverse settings 128 * 129 * @param array $items 130 * @return array 131 */ 132 protected function sortItems($items) 133 { 134 $callback = [$this, 'compare' . ucfirst($this->sortby)]; 135 if (!is_callable($callback)) return $items; 136 137 usort($items, $callback); 138 if ($this->sortreverse) { 139 $items = array_reverse($items); 140 } 141 return $items; 142 } 143 144 /** 145 * Check if a file is allowed by the configured extensions 146 * 147 * @param string $file 148 * @return bool 149 */ 150 protected function isExtensionAllowed($file) 151 { 152 if ($this->ext === '') return true; // no restriction 153 return preg_match('/(' . $this->ext . ')$/i', $file); 154 } 155 156 /** 157 * Check if a file is ignored by the ignore patterns 158 * 159 * @param string $file 160 * @return bool 161 */ 162 protected function isFileIgnored($file) 163 { 164 foreach ($this->ignore as $pattern) { 165 if ($this->fnmatch($pattern, $file)) return true; 166 } 167 return false; 168 } 169 170 /** 171 * Load the ignore patterns from the ignore.txt file 172 * 173 * @return string[] 174 */ 175 protected function loadIgnores() 176 { 177 $file = __DIR__ . '/conf/ignore.txt'; 178 $ignore = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); 179 $ignore = array_map(static fn($line) => trim(preg_replace('/\s*#.*$/', '', $line)), $ignore); 180 $ignore = array_filter($ignore); 181 return $ignore; 182 } 183 184 /** 185 * Replacement for fnmatch() for windows systems. 186 * 187 * @author jk at ricochetsolutions dot com 188 * @link http://www.php.net/manual/en/function.fnmatch.php#71725 189 */ 190 protected function fnmatch($pattern, $string) 191 { 192 return preg_match( 193 "#^" . strtr( 194 preg_quote($pattern, '#'), 195 [ 196 '\*' => '.*', 197 '\?' => '.', 198 '\[' => '[', 199 '\]' => ']' 200 ] 201 ) . "$#i", 202 $string 203 ); 204 } 205 206 public function compareName($a, $b) 207 { 208 return strcmp($a['name'], $b['name']); 209 } 210 211 public function compareIname($a, $b) 212 { 213 return strcmp(strtolower($a['name']), strtolower($b['name'])); 214 } 215 216 public function compareCtime($a, $b) 217 { 218 return $a['ctime'] <=> $b['ctime']; 219 } 220 221 public function compareMtime($a, $b) 222 { 223 return $a['mtime'] <=> $b['mtime']; 224 } 225 226 public function compareSize($a, $b) 227 { 228 return $a['size'] <=> $b['size']; 229 } 230} 231