1<?php 2 3/** 4 * DokuWiki search functions 5 * 6 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 7 * @author Andreas Gohr <andi@splitbrain.org> 8 */ 9 10use dokuwiki\Utf8\PhpString; 11use dokuwiki\File\MediaFile; 12use dokuwiki\Utf8\Sort; 13 14/** 15 * Recurse directory 16 * 17 * This function recurses into a given base directory 18 * and calls the supplied function for each file and directory 19 * 20 * @param array &$data The results of the search are stored here 21 * @param string $base Where to start the search 22 * @param callback $func Callback (function name or array with object,method) 23 * @param array $opts option array will be given to the Callback 24 * @param string $dir Current directory beyond $base 25 * @param int $lvl Recursion Level 26 * @param mixed $sort 'natural' to use natural order sorting (default); 27 * 'date' to sort by filemtime; leave empty to skip sorting. 28 * @author Andreas Gohr <andi@splitbrain.org> 29 */ 30function search(&$data, $base, $func, $opts, $dir = '', $lvl = 1, $sort = 'natural') 31{ 32 $dirs = []; 33 $files = []; 34 $filepaths = []; 35 36 // safeguard against runaways #1452 37 if ($base == '' || $base == '/') { 38 throw new RuntimeException('No valid $base passed to search() - possible misconfiguration or bug'); 39 } 40 41 //read in directories and files 42 $dh = @opendir($base . '/' . $dir); 43 if (!$dh) return; 44 while (($file = readdir($dh)) !== false) { 45 if (preg_match('/^[\._]/', $file)) continue; //skip hidden files and upper dirs 46 if (is_dir($base . '/' . $dir . '/' . $file)) { 47 $dirs[] = $dir . '/' . $file; 48 continue; 49 } 50 $files[] = $dir . '/' . $file; 51 $filepaths[] = $base . '/' . $dir . '/' . $file; 52 } 53 closedir($dh); 54 if (!empty($sort)) { 55 if ($sort == 'date') { 56 @array_multisort(array_map(filemtime(...), $filepaths), SORT_NUMERIC, SORT_DESC, $files); 57 } else /* natural */ { 58 Sort::asortFN($files); 59 } 60 Sort::asortFN($dirs); 61 } 62 63 //give directories to userfunction then recurse 64 foreach ($dirs as $dir) { 65 if (call_user_func_array($func, [&$data, $base, $dir, 'd', $lvl, $opts])) { 66 search($data, $base, $func, $opts, $dir, $lvl + 1, $sort); 67 } 68 } 69 //now handle the files 70 foreach ($files as $file) { 71 call_user_func_array($func, [&$data, $base, $file, 'f', $lvl, $opts]); 72 } 73} 74 75/** 76 * The following functions are userfunctions to use with the search 77 * function above. This function is called for every found file or 78 * directory. When a directory is given to the function it has to 79 * decide if this directory should be traversed (true) or not (false) 80 * The function has to accept the following parameters: 81 * 82 * array &$data - Reference to the result data structure 83 * string $base - Base usually $conf['datadir'] 84 * string $file - current file or directory relative to $base 85 * string $type - Type either 'd' for directory or 'f' for file 86 * int $lvl - Current recursion depht 87 * array $opts - option array as given to search() 88 * 89 * return values for files are ignored 90 * 91 * All functions should check the ACL for document READ rights 92 * namespaces (directories) are NOT checked (when sneaky_index is 0) as this 93 * would break the recursion (You can have an nonreadable dir over a readable 94 * one deeper nested) also make sure to check the file type (for example 95 * in case of lockfiles). 96 */ 97 98/** 99 * Searches for pages beginning with the given query 100 * 101 * @author Andreas Gohr <andi@splitbrain.org> 102 * 103 * @param array $data 104 * @param string $base 105 * @param string $file 106 * @param string $type 107 * @param integer $lvl 108 * @param array $opts 109 * 110 * @return bool 111 */ 112function search_qsearch(&$data, $base, $file, $type, $lvl, $opts) 113{ 114 $opts = [ 115 'idmatch' => '(^|:)' . preg_quote($opts['query'], '/') . '/', 116 'listfiles' => true, 117 'pagesonly' => true 118 ]; 119 return search_universal($data, $base, $file, $type, $lvl, $opts); 120} 121 122/** 123 * Build the browsable index of pages 124 * 125 * $opts['ns'] is the currently viewed namespace 126 * 127 * @author Andreas Gohr <andi@splitbrain.org> 128 * 129 * @param array $data 130 * @param string $base 131 * @param string $file 132 * @param string $type 133 * @param integer $lvl 134 * @param array $opts 135 * 136 * @return bool 137 */ 138function search_index(&$data, $base, $file, $type, $lvl, $opts) 139{ 140 global $conf; 141 $ns = $opts['ns'] ?? ''; 142 $opts = [ 143 'pagesonly' => true, 144 'listdirs' => true, 145 'listfiles' => empty($opts['nofiles']), 146 'sneakyacl' => $conf['sneaky_index'], 147 // Hacky, should rather use recmatch 148 'depth' => preg_match('#^' . preg_quote($file, '#') . '(/|$)#', '/' . $ns) ? 0 : -1, 149 ]; 150 151 return search_universal($data, $base, $file, $type, $lvl, $opts); 152} 153 154/** 155 * List all namespaces 156 * 157 * @author Andreas Gohr <andi@splitbrain.org> 158 * 159 * @param array $data 160 * @param string $base 161 * @param string $file 162 * @param string $type 163 * @param integer $lvl 164 * @param array $opts 165 * 166 * @return bool 167 */ 168function search_namespaces(&$data, $base, $file, $type, $lvl, $opts) 169{ 170 $opts = ['listdirs' => true]; 171 return search_universal($data, $base, $file, $type, $lvl, $opts); 172} 173 174/** 175 * List all mediafiles in a namespace 176 * $opts['depth'] recursion level, 0 for all 177 * $opts['showmsg'] shows message if invalid media id is used 178 * $opts['skipacl'] skip acl checking 179 * $opts['pattern'] check given pattern 180 * $opts['hash'] add hashes to result list 181 * 182 * @author Andreas Gohr <andi@splitbrain.org> 183 * 184 * @param array $data 185 * @param string $base 186 * @param string $file 187 * @param string $type 188 * @param integer $lvl 189 * @param array $opts 190 * 191 * @return bool 192 */ 193function search_media(&$data, $base, $file, $type, $lvl, $opts) 194{ 195 //we do nothing with directories 196 if ($type == 'd') { 197 if (empty($opts['depth'])) return true; // recurse forever 198 $depth = substr_count($file, '/'); 199 if ($depth >= $opts['depth']) return false; // depth reached 200 return true; 201 } 202 203 $info = []; 204 $info['id'] = pathID($file, true); 205 if ($info['id'] !== cleanID($info['id'])) { 206 if (!empty($opts['showmsg'])) 207 msg(hsc($info['id']) . ' is not a valid file name for DokuWiki - skipped', -1); 208 return false; // skip non-valid files 209 } 210 211 //check ACL for namespace (we have no ACL for mediafiles) 212 $info['perm'] = auth_quickaclcheck(getNS($info['id']) . ':*'); 213 if (empty($opts['skipacl']) && $info['perm'] < AUTH_READ) { 214 return false; 215 } 216 217 //check pattern filter 218 if (!empty($opts['pattern']) && !@preg_match($opts['pattern'], $info['id'])) { 219 return false; 220 } 221 222 $info['file'] = PhpString::basename($file); 223 $info['size'] = filesize($base . '/' . $file); 224 $info['mtime'] = filemtime($base . '/' . $file); 225 $info['writable'] = is_writable($base . '/' . $file); 226 if (preg_match("/\.(jpe?g|gif|png)$/", $file)) { 227 $info['isimg'] = true; 228 $info['meta'] = new JpegMeta($base . '/' . $file); 229 } else { 230 $info['isimg'] = false; 231 } 232 if (!empty($opts['hash'])) { 233 $info['hash'] = md5(io_readFile(mediaFN($info['id']), false)); 234 } 235 236 $data[] = $info; 237 238 return false; 239} 240 241/** 242 * List all mediafiles in a namespace 243 * $opts['depth'] recursion level, 0 for all 244 * $opts['showmsg'] shows message if invalid media id is used 245 * $opts['skipacl'] skip acl checking 246 * $opts['pattern'] check given pattern 247 * $opts['hash'] add hashes to result list 248 * 249 * @todo This is a temporary copy of search_media returning a list of MediaFile intances 250 * 251 * @param array $data 252 * @param string $base 253 * @param string $file 254 * @param string $type 255 * @param integer $lvl 256 * @param array $opts 257 * 258 * @return bool 259 */ 260function search_mediafiles(&$data, $base, $file, $type, $lvl, $opts) 261{ 262 263 //we do nothing with directories 264 if ($type == 'd') { 265 if (empty($opts['depth'])) return true; // recurse forever 266 $depth = substr_count($file, '/'); 267 if ($depth >= $opts['depth']) return false; // depth reached 268 return true; 269 } 270 271 $id = pathID($file, true); 272 if ($id != cleanID($id)) { 273 if ($opts['showmsg']) 274 msg(hsc($id) . ' is not a valid file name for DokuWiki - skipped', -1); 275 return false; // skip non-valid files 276 } 277 278 //check ACL for namespace (we have no ACL for mediafiles) 279 $info['perm'] = auth_quickaclcheck(getNS($id) . ':*'); 280 if (empty($opts['skipacl']) && $info['perm'] < AUTH_READ) { 281 return false; 282 } 283 284 //check pattern filter 285 if (!empty($opts['pattern']) && !@preg_match($opts['pattern'], $id)) { 286 return false; 287 } 288 289 $data[] = new MediaFile($id); 290 return false; 291} 292 293 294/** 295 * This function just lists documents (for RSS namespace export) 296 * 297 * @author Andreas Gohr <andi@splitbrain.org> 298 * 299 * @param array $data 300 * @param string $base 301 * @param string $file 302 * @param string $type 303 * @param integer $lvl 304 * @param array $opts 305 * 306 * @return bool 307 */ 308function search_list(&$data, $base, $file, $type, $lvl, $opts) 309{ 310 //we do nothing with directories 311 if ($type == 'd') return false; 312 //only search txt files 313 if (str_ends_with($file, '.txt') ) { 314 //check ACL 315 $id = pathID($file); 316 if (auth_quickaclcheck($id) < AUTH_READ) { 317 return false; 318 } 319 $data[]['id'] = $id; 320 } 321 return false; 322} 323 324/** 325 * Quicksearch for searching matching pagenames 326 * 327 * $opts['query'] is the search query 328 * 329 * @author Andreas Gohr <andi@splitbrain.org> 330 * 331 * @param array $data 332 * @param string $base 333 * @param string $file 334 * @param string $type 335 * @param integer $lvl 336 * @param array $opts 337 * 338 * @return bool 339 */ 340function search_pagename(&$data, $base, $file, $type, $lvl, $opts) 341{ 342 //we do nothing with directories 343 if ($type == 'd') return true; 344 //only search txt files 345 if (!str_ends_with($file, '.txt')) return true; 346 347 //simple stringmatching 348 if (!empty($opts['query'])) { 349 if (str_contains($file, (string) $opts['query'])) { 350 //check ACL 351 $id = pathID($file); 352 if (auth_quickaclcheck($id) < AUTH_READ) { 353 return false; 354 } 355 $data[]['id'] = $id; 356 } 357 } 358 return true; 359} 360 361/** 362 * Just lists all documents 363 * 364 * $opts['depth'] recursion level, 0 for all 365 * $opts['hash'] do md5 sum of content? 366 * $opts['skipacl'] list everything regardless of ACL 367 * 368 * @author Andreas Gohr <andi@splitbrain.org> 369 * 370 * @param array $data 371 * @param string $base 372 * @param string $file 373 * @param string $type 374 * @param integer $lvl 375 * @param array $opts 376 * 377 * @return bool 378 */ 379function search_allpages(&$data, $base, $file, $type, $lvl, $opts) 380{ 381 if (($opts['depth'] ?? 0) > 0) { 382 $parts = explode('/', ltrim($file, '/')); 383 if ( 384 ($type == 'd' && count($parts) >= $opts['depth']) 385 || ($type != 'd' && count($parts) > $opts['depth']) 386 ) { 387 return false; // depth reached 388 } 389 } 390 391 //we do nothing with directories 392 if ($type == 'd') { 393 return true; 394 } 395 396 //only search txt files 397 if (!str_ends_with($file, '.txt')) return true; 398 399 $item = []; 400 $item['id'] = pathID($file); 401 if (empty($opts['skipacl']) && auth_quickaclcheck($item['id']) < AUTH_READ) { 402 return false; 403 } 404 405 $item['rev'] = filemtime($base . '/' . $file); 406 $item['mtime'] = $item['rev']; 407 $item['size'] = filesize($base . '/' . $file); 408 if (!empty($opts['hash'])) { 409 $item['hash'] = md5(trim(rawWiki($item['id']))); 410 } 411 412 $data[] = $item; 413 return true; 414} 415 416/* ------------- helper functions below -------------- */ 417 418/** 419 * fulltext sort 420 * 421 * Callback sort function for use with usort to sort the data 422 * structure created by search_fulltext. Sorts descending by count 423 * 424 * @author Andreas Gohr <andi@splitbrain.org> 425 * 426 * @param array $a 427 * @param array $b 428 * 429 * @return int 430 */ 431function sort_search_fulltext($a, $b) 432{ 433 if ($a['count'] > $b['count']) { 434 return -1; 435 } elseif ($a['count'] < $b['count']) { 436 return 1; 437 } else { 438 return Sort::strcmp($a['id'], $b['id']); 439 } 440} 441 442/** 443 * translates a document path to an ID 444 * 445 * @author Andreas Gohr <andi@splitbrain.org> 446 * @todo move to pageutils 447 * 448 * @param string $path 449 * @param bool $keeptxt 450 * 451 * @return string 452 */ 453function pathID($path, $keeptxt = false) 454{ 455 $id = utf8_decodeFN($path); 456 $id = str_replace('/', ':', $id); 457 if (!$keeptxt) $id = preg_replace('#\.txt$#', '', $id); 458 $id = trim($id, ':'); 459 return $id; 460} 461 462 463/** 464 * This is a very universal callback for the search() function, replacing 465 * many of the former individual functions at the cost of a more complex 466 * setup. 467 * 468 * How the function behaves, depends on the options passed in the $opts 469 * array, where the following settings can be used. 470 * 471 * depth int recursion depth. 0 for unlimited (default: 0) 472 * keeptxt bool keep .txt extension for IDs (default: false) 473 * listfiles bool include files in listing (default: false) 474 * listdirs bool include namespaces in listing (default: false) 475 * pagesonly bool restrict files to pages (default: false) 476 * skipacl bool do not check for READ permission (default: false) 477 * sneakyacl bool don't recurse into nonreadable dirs (default: false) 478 * hash bool create MD5 hash for files (default: false) 479 * meta bool return file metadata (default: false) 480 * filematch string match files against this regexp (default: '', so accept everything) 481 * idmatch string match full ID against this regexp (default: '', so accept everything) 482 * dirmatch string match directory against this regexp when adding (default: '', so accept everything) 483 * nsmatch string match namespace against this regexp when adding (default: '', so accept everything) 484 * recmatch string match directory against this regexp when recursing (default: '', so accept everything) 485 * showmsg bool warn about non-ID files (default: false) 486 * showhidden bool show hidden files(e.g. by hidepages config) too (default: false) 487 * firsthead bool return first heading for pages (default: false) 488 * 489 * @param array &$data - Reference to the result data structure 490 * @param string $base - Base usually $conf['datadir'] 491 * @param string $file - current file or directory relative to $base 492 * @param string $type - Type either 'd' for directory or 'f' for file 493 * @param int $lvl - Current recursion depht 494 * @param array $opts - option array as given to search() 495 * @return bool if this directory should be traversed (true) or not (false) 496 * return value is ignored for files 497 * 498 * @author Andreas Gohr <gohr@cosmocode.de> 499 */ 500function search_universal(&$data, $base, $file, $type, $lvl, $opts) 501{ 502 $item = []; 503 $return = true; 504 505 // get ID and check if it is a valid one 506 $item['id'] = pathID($file, ($type == 'd' || !empty($opts['keeptxt']))); 507 if ($item['id'] !== cleanID($item['id'])) { 508 if (!empty($opts['showmsg'])) { 509 msg(hsc($item['id']) . ' is not a valid file name for DokuWiki - skipped', -1); 510 } 511 return false; // skip non-valid files 512 } 513 $item['ns'] = getNS($item['id']); 514 515 if ($type == 'd') { 516 // decide if to recursion into this directory is wanted 517 if (empty($opts['depth'])) { 518 $return = true; // recurse forever 519 } else { 520 $depth = substr_count($file, '/'); 521 if ($depth >= $opts['depth']) { 522 $return = false; // depth reached 523 } else { 524 $return = true; 525 } 526 } 527 528 if ($return) { 529 $match = empty($opts['recmatch']) || preg_match('/' . $opts['recmatch'] . '/', $file); 530 if (!$match) { 531 return false; // doesn't match 532 } 533 } 534 } 535 536 // check ACL 537 if (empty($opts['skipacl'])) { 538 if ($type == 'd') { 539 $item['perm'] = auth_quickaclcheck($item['id'] . ':*'); 540 } else { 541 $item['perm'] = auth_quickaclcheck($item['id']); //FIXME check namespace for media files 542 } 543 } else { 544 $item['perm'] = AUTH_DELETE; 545 } 546 547 // are we done here maybe? 548 if ($type == 'd') { 549 if (empty($opts['listdirs'])) return $return; 550 //neither list nor recurse forbidden items: 551 if (empty($opts['skipacl']) && !empty($opts['sneakyacl']) && $item['perm'] < AUTH_READ) return false; 552 if (!empty($opts['dirmatch']) && !preg_match('/'.$opts['dirmatch'].'/', $file)) return $return; 553 if (!empty($opts['nsmatch']) && !preg_match('/'.$opts['nsmatch'].'/', $item['ns'])) return $return; 554 } else { 555 if (empty($opts['listfiles'])) return $return; 556 if (empty($opts['skipacl']) && $item['perm'] < AUTH_READ) return $return; 557 if (!empty($opts['pagesonly']) && !str_ends_with($file, '.txt')) return $return; 558 if (empty($opts['showhidden']) && isHiddenPage($item['id'])) return $return; 559 if (!empty($opts['filematch']) && !preg_match('/'.$opts['filematch'].'/', $file)) return $return; 560 if (!empty($opts['idmatch']) && !preg_match('/'.$opts['idmatch'].'/', $item['id'])) return $return; 561 } 562 563 // still here? prepare the item 564 $item['type'] = $type; 565 $item['level'] = $lvl; 566 $item['open'] = $return; 567 568 if (!empty($opts['meta'])) { 569 $item['file'] = PhpString::basename($file); 570 $item['size'] = filesize($base . '/' . $file); 571 $item['mtime'] = filemtime($base . '/' . $file); 572 $item['rev'] = $item['mtime']; 573 $item['writable'] = is_writable($base . '/' . $file); 574 $item['executable'] = is_executable($base . '/' . $file); 575 } 576 577 if ($type == 'f') { 578 if (!empty($opts['hash'])) $item['hash'] = md5(io_readFile($base . '/' . $file, false)); 579 if (!empty($opts['firsthead'])) { 580 $item['title'] = p_get_first_heading($item['id'], METADATA_DONT_RENDER); 581 } 582 } 583 584 // finally add the item 585 $data[] = $item; 586 return $return; 587} 588 589//Setup VIM: ex: et ts=4 : 590