1<?php 2 3/** 4 * DokuWiki search functions 5 * 6 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 7 * @author Andreas Gohr <andi@splitbrain.org> 8 */ 9 10use dokuwiki\Utf8\PhpString; 11use dokuwiki\File\MediaFile; 12use dokuwiki\Utf8\Sort; 13 14/** 15 * Recurse directory 16 * 17 * This function recurses into a given base directory 18 * and calls the supplied function for each file and directory 19 * 20 * @param array &$data The results of the search are stored here 21 * @param string $base Where to start the search 22 * @param callback $func Callback (function name or array with object,method) 23 * @param array $opts option array will be given to the Callback 24 * @param string $dir Current directory beyond $base 25 * @param int $lvl Recursion Level 26 * @param mixed $sort 'natural' to use natural order sorting (default); 27 * 'date' to sort by filemtime; leave empty to skip sorting. 28 * @author Andreas Gohr <andi@splitbrain.org> 29 */ 30function search(&$data, $base, $func, $opts, $dir = '', $lvl = 1, $sort = 'natural') 31{ 32 $dirs = []; 33 $files = []; 34 $filepaths = []; 35 36 // safeguard against runaways #1452 37 if ($base == '' || $base == '/') { 38 throw new RuntimeException('No valid $base passed to search() - possible misconfiguration or bug'); 39 } 40 41 //read in directories and files 42 $dh = @opendir($base . '/' . $dir); 43 if (!$dh) return; 44 while (($file = readdir($dh)) !== false) { 45 if (preg_match('/^[\._]/', $file)) continue; //skip hidden files and upper dirs 46 if (is_dir($base . '/' . $dir . '/' . $file)) { 47 $dirs[] = $dir . '/' . $file; 48 continue; 49 } 50 $files[] = $dir . '/' . $file; 51 $filepaths[] = $base . '/' . $dir . '/' . $file; 52 } 53 closedir($dh); 54 if (!empty($sort)) { 55 if ($sort == 'date') { 56 @array_multisort(array_map('filemtime', $filepaths), SORT_NUMERIC, SORT_DESC, $files); 57 } else /* natural */ { 58 Sort::asortFN($files); 59 } 60 Sort::asortFN($dirs); 61 } 62 63 //give directories to userfunction then recurse 64 foreach ($dirs as $dir) { 65 if (call_user_func_array($func, [&$data, $base, $dir, 'd', $lvl, $opts])) { 66 search($data, $base, $func, $opts, $dir, $lvl + 1, $sort); 67 } 68 } 69 //now handle the files 70 foreach ($files as $file) { 71 call_user_func_array($func, [&$data, $base, $file, 'f', $lvl, $opts]); 72 } 73} 74 75/** 76 * The following functions are userfunctions to use with the search 77 * function above. This function is called for every found file or 78 * directory. When a directory is given to the function it has to 79 * decide if this directory should be traversed (true) or not (false) 80 * The function has to accept the following parameters: 81 * 82 * array &$data - Reference to the result data structure 83 * string $base - Base usually $conf['datadir'] 84 * string $file - current file or directory relative to $base 85 * string $type - Type either 'd' for directory or 'f' for file 86 * int $lvl - Current recursion depht 87 * array $opts - option array as given to search() 88 * 89 * return values for files are ignored 90 * 91 * All functions should check the ACL for document READ rights 92 * namespaces (directories) are NOT checked (when sneaky_index is 0) as this 93 * would break the recursion (You can have an nonreadable dir over a readable 94 * one deeper nested) also make sure to check the file type (for example 95 * in case of lockfiles). 96 */ 97 98/** 99 * Searches for pages beginning with the given query 100 * 101 * @author Andreas Gohr <andi@splitbrain.org> 102 * 103 * @param array $data 104 * @param string $base 105 * @param string $file 106 * @param string $type 107 * @param integer $lvl 108 * @param array $opts 109 * 110 * @return bool 111 */ 112function search_qsearch(&$data, $base, $file, $type, $lvl, $opts) 113{ 114 $opts = [ 115 'idmatch' => '(^|:)' . preg_quote($opts['query'], '/') . '/', 116 'listfiles' => true, 117 'pagesonly' => true 118 ]; 119 return search_universal($data, $base, $file, $type, $lvl, $opts); 120} 121 122/** 123 * Build the browsable index of pages 124 * 125 * $opts['ns'] is the currently viewed namespace 126 * 127 * @author Andreas Gohr <andi@splitbrain.org> 128 * 129 * @param array $data 130 * @param string $base 131 * @param string $file 132 * @param string $type 133 * @param integer $lvl 134 * @param array $opts 135 * 136 * @return bool 137 */ 138function search_index(&$data, $base, $file, $type, $lvl, $opts) 139{ 140 global $conf; 141 $ns = $opts['ns'] ?? ''; 142 $opts = [ 143 'pagesonly' => true, 144 'listdirs' => true, 145 'listfiles' => empty($opts['nofiles']), 146 'sneakyacl' => $conf['sneaky_index'], 147 // Hacky, should rather use recmatch 148 'depth' => preg_match('#^' . preg_quote($file, '#') . '(/|$)#', '/' . $ns) ? 0 : -1, 149 ]; 150 151 return search_universal($data, $base, $file, $type, $lvl, $opts); 152} 153 154/** 155 * List all namespaces 156 * 157 * @author Andreas Gohr <andi@splitbrain.org> 158 * 159 * @param array $data 160 * @param string $base 161 * @param string $file 162 * @param string $type 163 * @param integer $lvl 164 * @param array $opts 165 * 166 * @return bool 167 */ 168function search_namespaces(&$data, $base, $file, $type, $lvl, $opts) 169{ 170 $opts = ['listdirs' => true]; 171 return search_universal($data, $base, $file, $type, $lvl, $opts); 172} 173 174/** 175 * List all mediafiles in a namespace 176 * $opts['depth'] recursion level, 0 for all 177 * $opts['showmsg'] shows message if invalid media id is used 178 * $opts['skipacl'] skip acl checking 179 * $opts['pattern'] check given pattern 180 * $opts['hash'] add hashes to result list 181 * 182 * @author Andreas Gohr <andi@splitbrain.org> 183 * 184 * @param array $data 185 * @param string $base 186 * @param string $file 187 * @param string $type 188 * @param integer $lvl 189 * @param array $opts 190 * 191 * @return bool 192 */ 193function search_media(&$data, $base, $file, $type, $lvl, $opts) 194{ 195 196 //we do nothing with directories 197 if ($type == 'd') { 198 if (empty($opts['depth'])) return true; // recurse forever 199 $depth = substr_count($file, '/'); 200 if ($depth >= $opts['depth']) return false; // depth reached 201 return true; 202 } 203 204 $info = []; 205 $info['id'] = pathID($file, true); 206 if ($info['id'] !== cleanID($info['id'])) { 207 if (!empty($opts['showmsg'])) 208 msg(hsc($info['id']) . ' is not a valid file name for DokuWiki - skipped', -1); 209 return false; // skip non-valid files 210 } 211 212 //check ACL for namespace (we have no ACL for mediafiles) 213 $info['perm'] = auth_quickaclcheck(getNS($info['id']) . ':*'); 214 if (empty($opts['skipacl']) && $info['perm'] < AUTH_READ) { 215 return false; 216 } 217 218 //check pattern filter 219 if (!empty($opts['pattern']) && !@preg_match($opts['pattern'], $info['id'])) { 220 return false; 221 } 222 223 $info['file'] = PhpString::basename($file); 224 $info['size'] = filesize($base . '/' . $file); 225 $info['mtime'] = filemtime($base . '/' . $file); 226 $info['writable'] = is_writable($base . '/' . $file); 227 if (preg_match("/\.(jpe?g|gif|png)$/", $file)) { 228 $info['isimg'] = true; 229 $info['meta'] = new JpegMeta($base . '/' . $file); 230 } else { 231 $info['isimg'] = false; 232 } 233 if (!empty($opts['hash'])) { 234 $info['hash'] = md5(io_readFile(mediaFN($info['id']), false)); 235 } 236 237 $data[] = $info; 238 239 return false; 240} 241 242/** 243 * List all mediafiles in a namespace 244 * $opts['depth'] recursion level, 0 for all 245 * $opts['showmsg'] shows message if invalid media id is used 246 * $opts['skipacl'] skip acl checking 247 * $opts['pattern'] check given pattern 248 * $opts['hash'] add hashes to result list 249 * 250 * @todo This is a temporary copy of search_media returning a list of MediaFile intances 251 * 252 * @param array $data 253 * @param string $base 254 * @param string $file 255 * @param string $type 256 * @param integer $lvl 257 * @param array $opts 258 * 259 * @return bool 260 */ 261function search_mediafiles(&$data, $base, $file, $type, $lvl, $opts) 262{ 263 264 //we do nothing with directories 265 if ($type == 'd') { 266 if (empty($opts['depth'])) return true; // recurse forever 267 $depth = substr_count($file, '/'); 268 if ($depth >= $opts['depth']) return false; // depth reached 269 return true; 270 } 271 272 $id = pathID($file, true); 273 if ($id != cleanID($id)) { 274 if ($opts['showmsg']) 275 msg(hsc($id) . ' is not a valid file name for DokuWiki - skipped', -1); 276 return false; // skip non-valid files 277 } 278 279 //check ACL for namespace (we have no ACL for mediafiles) 280 $info['perm'] = auth_quickaclcheck(getNS($id) . ':*'); 281 if (empty($opts['skipacl']) && $info['perm'] < AUTH_READ) { 282 return false; 283 } 284 285 //check pattern filter 286 if (!empty($opts['pattern']) && !@preg_match($opts['pattern'], $id)) { 287 return false; 288 } 289 290 $data[] = new MediaFile($id); 291 return false; 292} 293 294 295/** 296 * This function just lists documents (for RSS namespace export) 297 * 298 * @author Andreas Gohr <andi@splitbrain.org> 299 * 300 * @param array $data 301 * @param string $base 302 * @param string $file 303 * @param string $type 304 * @param integer $lvl 305 * @param array $opts 306 * 307 * @return bool 308 */ 309function search_list(&$data, $base, $file, $type, $lvl, $opts) 310{ 311 //we do nothing with directories 312 if ($type == 'd') return false; 313 //only search txt files 314 if (str_ends_with($file, '.txt')) { 315 //check ACL 316 $id = pathID($file); 317 if (auth_quickaclcheck($id) < AUTH_READ) { 318 return false; 319 } 320 $data[]['id'] = $id; 321 } 322 return false; 323} 324 325/** 326 * Quicksearch for searching matching pagenames 327 * 328 * $opts['query'] is the search query 329 * 330 * @author Andreas Gohr <andi@splitbrain.org> 331 * 332 * @param array $data 333 * @param string $base 334 * @param string $file 335 * @param string $type 336 * @param integer $lvl 337 * @param array $opts 338 * 339 * @return bool 340 */ 341function search_pagename(&$data, $base, $file, $type, $lvl, $opts) 342{ 343 //we do nothing with directories 344 if ($type == 'd') return true; 345 //only search txt files 346 if (!str_ends_with($file, '.txt')) return true; 347 348 //simple stringmatching 349 if (!empty($opts['query'])) { 350 if (strpos($file, (string) $opts['query']) !== false) { 351 //check ACL 352 $id = pathID($file); 353 if (auth_quickaclcheck($id) < AUTH_READ) { 354 return false; 355 } 356 $data[]['id'] = $id; 357 } 358 } 359 return true; 360} 361 362/** 363 * Just lists all documents 364 * 365 * $opts['depth'] recursion level, 0 for all 366 * $opts['hash'] do md5 sum of content? 367 * $opts['skipacl'] list everything regardless of ACL 368 * 369 * @author Andreas Gohr <andi@splitbrain.org> 370 * 371 * @param array $data 372 * @param string $base 373 * @param string $file 374 * @param string $type 375 * @param integer $lvl 376 * @param array $opts 377 * 378 * @return bool 379 */ 380function search_allpages(&$data, $base, $file, $type, $lvl, $opts) 381{ 382 if (($opts['depth'] ?? 0) > 0) { 383 $parts = explode('/', ltrim($file, '/')); 384 if ( 385 ($type == 'd' && count($parts) >= $opts['depth']) 386 || ($type != 'd' && count($parts) > $opts['depth']) 387 ) { 388 return false; // depth reached 389 } 390 } 391 392 //we do nothing with directories 393 if ($type == 'd') { 394 return true; 395 } 396 397 //only search txt files 398 if (!str_ends_with($file, '.txt')) return true; 399 400 $item = []; 401 $item['id'] = pathID($file); 402 if (empty($opts['skipacl']) && auth_quickaclcheck($item['id']) < AUTH_READ) { 403 return false; 404 } 405 406 $item['rev'] = filemtime($base . '/' . $file); 407 $item['mtime'] = $item['rev']; 408 $item['size'] = filesize($base . '/' . $file); 409 if (!empty($opts['hash'])) { 410 $item['hash'] = md5(trim(rawWiki($item['id']))); 411 } 412 413 $data[] = $item; 414 return true; 415} 416 417/* ------------- helper functions below -------------- */ 418 419/** 420 * fulltext sort 421 * 422 * Callback sort function for use with usort to sort the data 423 * structure created by search_fulltext. Sorts descending by count 424 * 425 * @author Andreas Gohr <andi@splitbrain.org> 426 * 427 * @param array $a 428 * @param array $b 429 * 430 * @return int 431 */ 432function sort_search_fulltext($a, $b) 433{ 434 if ($a['count'] > $b['count']) { 435 return -1; 436 } elseif ($a['count'] < $b['count']) { 437 return 1; 438 } else { 439 return Sort::strcmp($a['id'], $b['id']); 440 } 441} 442 443/** 444 * translates a document path to an ID 445 * 446 * @author Andreas Gohr <andi@splitbrain.org> 447 * @todo move to pageutils 448 * 449 * @param string $path 450 * @param bool $keeptxt 451 * 452 * @return string 453 */ 454function pathID($path, $keeptxt = false) 455{ 456 $id = utf8_decodeFN($path); 457 $id = str_replace('/', ':', $id); 458 if (!$keeptxt) $id = preg_replace('#\.txt$#', '', $id); 459 $id = trim($id, ':'); 460 return $id; 461} 462 463 464/** 465 * This is a very universal callback for the search() function, replacing 466 * many of the former individual functions at the cost of a more complex 467 * setup. 468 * 469 * How the function behaves, depends on the options passed in the $opts 470 * array, where the following settings can be used. 471 * 472 * depth int recursion depth. 0 for unlimited (default: 0) 473 * keeptxt bool keep .txt extension for IDs (default: false) 474 * listfiles bool include files in listing (default: false) 475 * listdirs bool include namespaces in listing (default: false) 476 * pagesonly bool restrict files to pages (default: false) 477 * skipacl bool do not check for READ permission (default: false) 478 * sneakyacl bool don't recurse into nonreadable dirs (default: false) 479 * hash bool create MD5 hash for files (default: false) 480 * meta bool return file metadata (default: false) 481 * filematch string match files against this regexp (default: '', so accept everything) 482 * idmatch string match full ID against this regexp (default: '', so accept everything) 483 * dirmatch string match directory against this regexp when adding (default: '', so accept everything) 484 * nsmatch string match namespace against this regexp when adding (default: '', so accept everything) 485 * recmatch string match directory against this regexp when recursing (default: '', so accept everything) 486 * showmsg bool warn about non-ID files (default: false) 487 * showhidden bool show hidden files(e.g. by hidepages config) too (default: false) 488 * firsthead bool return first heading for pages (default: false) 489 * 490 * @param array &$data - Reference to the result data structure 491 * @param string $base - Base usually $conf['datadir'] 492 * @param string $file - current file or directory relative to $base 493 * @param string $type - Type either 'd' for directory or 'f' for file 494 * @param int $lvl - Current recursion depht 495 * @param array $opts - option array as given to search() 496 * @return bool if this directory should be traversed (true) or not (false) 497 * return value is ignored for files 498 * 499 * @author Andreas Gohr <gohr@cosmocode.de> 500 */ 501function search_universal(&$data, $base, $file, $type, $lvl, $opts) 502{ 503 $item = []; 504 $return = true; 505 506 // get ID and check if it is a valid one 507 $item['id'] = pathID($file, ($type == 'd' || !empty($opts['keeptxt']))); 508 if ($item['id'] !== cleanID($item['id'])) { 509 if (!empty($opts['showmsg'])) { 510 msg(hsc($item['id']) . ' is not a valid file name for DokuWiki - skipped', -1); 511 } 512 return false; // skip non-valid files 513 } 514 $item['ns'] = getNS($item['id']); 515 516 if ($type == 'd') { 517 // decide if to recursion into this directory is wanted 518 if (empty($opts['depth'])) { 519 $return = true; // recurse forever 520 } else { 521 $depth = substr_count($file, '/'); 522 if ($depth >= $opts['depth']) { 523 $return = false; // depth reached 524 } else { 525 $return = true; 526 } 527 } 528 529 if ($return) { 530 $match = empty($opts['recmatch']) || preg_match('/' . $opts['recmatch'] . '/', $file); 531 if (!$match) { 532 return false; // doesn't match 533 } 534 } 535 } 536 537 // check ACL 538 if (empty($opts['skipacl'])) { 539 if ($type == 'd') { 540 $item['perm'] = auth_quickaclcheck($item['id'] . ':*'); 541 } else { 542 $item['perm'] = auth_quickaclcheck($item['id']); //FIXME check namespace for media files 543 } 544 } else { 545 $item['perm'] = AUTH_DELETE; 546 } 547 548 // are we done here maybe? 549 if ($type == 'd') { 550 if (empty($opts['listdirs'])) return $return; 551 //neither list nor recurse forbidden items: 552 if (empty($opts['skipacl']) && !empty($opts['sneakyacl']) && $item['perm'] < AUTH_READ) return false; 553 if (!empty($opts['dirmatch']) && !preg_match('/' . $opts['dirmatch'] . '/', $file)) return $return; 554 if (!empty($opts['nsmatch']) && !preg_match('/' . $opts['nsmatch'] . '/', $item['ns'])) return $return; 555 } else { 556 if (empty($opts['listfiles'])) return $return; 557 if (empty($opts['skipacl']) && $item['perm'] < AUTH_READ) return $return; 558 if (!empty($opts['pagesonly']) && !str_ends_with($file, '.txt')) return $return; 559 if (empty($opts['showhidden']) && isHiddenPage($item['id'])) return $return; 560 if (!empty($opts['filematch']) && !preg_match('/' . $opts['filematch'] . '/', $file)) return $return; 561 if (!empty($opts['idmatch']) && !preg_match('/' . $opts['idmatch'] . '/', $item['id'])) return $return; 562 } 563 564 // still here? prepare the item 565 $item['type'] = $type; 566 $item['level'] = $lvl; 567 $item['open'] = $return; 568 569 if (!empty($opts['meta'])) { 570 $item['file'] = PhpString::basename($file); 571 $item['size'] = filesize($base . '/' . $file); 572 $item['mtime'] = filemtime($base . '/' . $file); 573 $item['rev'] = $item['mtime']; 574 $item['writable'] = is_writable($base . '/' . $file); 575 $item['executable'] = is_executable($base . '/' . $file); 576 } 577 578 if ($type == 'f') { 579 if (!empty($opts['hash'])) $item['hash'] = md5(io_readFile($base . '/' . $file, false)); 580 if (!empty($opts['firsthead'])) $item['title'] = p_get_first_heading($item['id'], METADATA_DONT_RENDER); 581 } 582 583 // finally add the item 584 $data[] = $item; 585 return $return; 586} 587 588//Setup VIM: ex: et ts=4 : 589