1<?php 2/** 3 * DokuWiki search functions 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8use dokuwiki\Utf8\PhpString; 9use dokuwiki\File\MediaFile; 10use dokuwiki\Utf8\Sort; 11 12/** 13 * Recurse directory 14 * 15 * This function recurses into a given base directory 16 * and calls the supplied function for each file and directory 17 * 18 * @param array &$data The results of the search are stored here 19 * @param string $base Where to start the search 20 * @param callback $func Callback (function name or array with object,method) 21 * @param array $opts option array will be given to the Callback 22 * @param string $dir Current directory beyond $base 23 * @param int $lvl Recursion Level 24 * @param mixed $sort 'natural' to use natural order sorting (default); 25 * 'date' to sort by filemtime; leave empty to skip sorting. 26 * @author Andreas Gohr <andi@splitbrain.org> 27 */ 28function search(&$data, $base, $func, $opts, $dir = '', $lvl = 1, $sort = 'natural') 29{ 30 $dirs = []; 31 $files = []; 32 $filepaths = []; 33 34 // safeguard against runaways #1452 35 if ($base == '' || $base == '/') { 36 throw new RuntimeException('No valid $base passed to search() - possible misconfiguration or bug'); 37 } 38 39 //read in directories and files 40 $dh = @opendir($base . '/' . $dir); 41 if (!$dh) return; 42 while (($file = readdir($dh)) !== false) { 43 if (preg_match('/^[\._]/', $file)) continue; //skip hidden files and upper dirs 44 if (is_dir($base . '/' . $dir . '/' . $file)) { 45 $dirs[] = $dir . '/' . $file; 46 continue; 47 } 48 $files[] = $dir . '/' . $file; 49 $filepaths[] = $base . '/' . $dir . '/' . $file; 50 } 51 closedir($dh); 52 if (!empty($sort)) { 53 if ($sort == 'date') { 54 @array_multisort(array_map('filemtime', $filepaths), SORT_NUMERIC, SORT_DESC, $files); 55 } else /* natural */ { 56 Sort::asortFN($files); 57 } 58 Sort::asortFN($dirs); 59 } 60 61 //give directories to userfunction then recurse 62 foreach ($dirs as $dir) { 63 if (call_user_func_array($func, [&$data, $base, $dir, 'd', $lvl, $opts])) { 64 search($data, $base, $func, $opts, $dir, $lvl + 1, $sort); 65 } 66 } 67 //now handle the files 68 foreach ($files as $file) { 69 call_user_func_array($func, [&$data, $base, $file, 'f', $lvl, $opts]); 70 } 71} 72 73/** 74 * The following functions are userfunctions to use with the search 75 * function above. This function is called for every found file or 76 * directory. When a directory is given to the function it has to 77 * decide if this directory should be traversed (true) or not (false) 78 * The function has to accept the following parameters: 79 * 80 * array &$data - Reference to the result data structure 81 * string $base - Base usually $conf['datadir'] 82 * string $file - current file or directory relative to $base 83 * string $type - Type either 'd' for directory or 'f' for file 84 * int $lvl - Current recursion depht 85 * array $opts - option array as given to search() 86 * 87 * return values for files are ignored 88 * 89 * All functions should check the ACL for document READ rights 90 * namespaces (directories) are NOT checked (when sneaky_index is 0) as this 91 * would break the recursion (You can have an nonreadable dir over a readable 92 * one deeper nested) also make sure to check the file type (for example 93 * in case of lockfiles). 94 */ 95 96/** 97 * Searches for pages beginning with the given query 98 * 99 * @author Andreas Gohr <andi@splitbrain.org> 100 * 101 * @param array $data 102 * @param string $base 103 * @param string $file 104 * @param string $type 105 * @param integer $lvl 106 * @param array $opts 107 * 108 * @return bool 109 */ 110function search_qsearch(&$data, $base, $file, $type, $lvl, $opts) 111{ 112 $opts = [ 113 'idmatch' => '(^|:)' . preg_quote($opts['query'], '/') . '/', 114 'listfiles' => true, 115 'pagesonly' => true 116 ]; 117 return search_universal($data, $base, $file, $type, $lvl, $opts); 118} 119 120/** 121 * Build the browsable index of pages 122 * 123 * $opts['ns'] is the currently viewed namespace 124 * 125 * @author Andreas Gohr <andi@splitbrain.org> 126 * 127 * @param array $data 128 * @param string $base 129 * @param string $file 130 * @param string $type 131 * @param integer $lvl 132 * @param array $opts 133 * 134 * @return bool 135 */ 136function search_index(&$data, $base, $file, $type, $lvl, $opts) 137{ 138 global $conf; 139 $ns = $opts['ns'] ?? ''; 140 $opts = [ 141 'pagesonly' => true, 142 'listdirs' => true, 143 'listfiles' => empty($opts['nofiles']), 144 'sneakyacl' => $conf['sneaky_index'], 145 // Hacky, should rather use recmatch 146 'depth' => preg_match('#^' . preg_quote($file, '#') . '(/|$)#', '/' . $ns) ? 0 : -1, 147 ]; 148 149 return search_universal($data, $base, $file, $type, $lvl, $opts); 150} 151 152/** 153 * List all namespaces 154 * 155 * @author Andreas Gohr <andi@splitbrain.org> 156 * 157 * @param array $data 158 * @param string $base 159 * @param string $file 160 * @param string $type 161 * @param integer $lvl 162 * @param array $opts 163 * 164 * @return bool 165 */ 166function search_namespaces(&$data, $base, $file, $type, $lvl, $opts) 167{ 168 $opts = ['listdirs' => true]; 169 return search_universal($data, $base, $file, $type, $lvl, $opts); 170} 171 172/** 173 * List all mediafiles in a namespace 174 * $opts['depth'] recursion level, 0 for all 175 * $opts['showmsg'] shows message if invalid media id is used 176 * $opts['skipacl'] skip acl checking 177 * $opts['pattern'] check given pattern 178 * $opts['hash'] add hashes to result list 179 * 180 * @author Andreas Gohr <andi@splitbrain.org> 181 * 182 * @param array $data 183 * @param string $base 184 * @param string $file 185 * @param string $type 186 * @param integer $lvl 187 * @param array $opts 188 * 189 * @return bool 190 */ 191function search_media(&$data, $base, $file, $type, $lvl, $opts) 192{ 193 194 //we do nothing with directories 195 if ($type == 'd') { 196 if (empty($opts['depth'])) return true; // recurse forever 197 $depth = substr_count($file, '/'); 198 if ($depth >= $opts['depth']) return false; // depth reached 199 return true; 200 } 201 202 $info = []; 203 $info['id'] = pathID($file, true); 204 if ($info['id'] != cleanID($info['id'])) { 205 if (!empty($opts['showmsg'])) 206 msg(hsc($info['id']) . ' is not a valid file name for DokuWiki - skipped', -1); 207 return false; // skip non-valid files 208 } 209 210 //check ACL for namespace (we have no ACL for mediafiles) 211 $info['perm'] = auth_quickaclcheck(getNS($info['id']) . ':*'); 212 if (empty($opts['skipacl']) && $info['perm'] < AUTH_READ) { 213 return false; 214 } 215 216 //check pattern filter 217 if (!empty($opts['pattern']) && !@preg_match($opts['pattern'], $info['id'])) { 218 return false; 219 } 220 221 $info['file'] = PhpString::basename($file); 222 $info['size'] = filesize($base . '/' . $file); 223 $info['mtime'] = filemtime($base . '/' . $file); 224 $info['writable'] = is_writable($base . '/' . $file); 225 if (preg_match("/\.(jpe?g|gif|png)$/", $file)) { 226 $info['isimg'] = true; 227 $info['meta'] = new JpegMeta($base . '/' . $file); 228 } else { 229 $info['isimg'] = false; 230 } 231 if (!empty($opts['hash'])) { 232 $info['hash'] = md5(io_readFile(mediaFN($info['id']), false)); 233 } 234 235 $data[] = $info; 236 237 return false; 238} 239 240/** 241 * List all mediafiles in a namespace 242 * $opts['depth'] recursion level, 0 for all 243 * $opts['showmsg'] shows message if invalid media id is used 244 * $opts['skipacl'] skip acl checking 245 * $opts['pattern'] check given pattern 246 * $opts['hash'] add hashes to result list 247 * 248 * @todo This is a temporary copy of search_media returning a list of MediaFile intances 249 * 250 * @param array $data 251 * @param string $base 252 * @param string $file 253 * @param string $type 254 * @param integer $lvl 255 * @param array $opts 256 * 257 * @return bool 258 */ 259function search_mediafiles(&$data, $base, $file, $type, $lvl, $opts) 260{ 261 262 //we do nothing with directories 263 if ($type == 'd') { 264 if (empty($opts['depth'])) return true; // recurse forever 265 $depth = substr_count($file, '/'); 266 if ($depth >= $opts['depth']) return false; // depth reached 267 return true; 268 } 269 270 $id = pathID($file, true); 271 if ($id != cleanID($id)) { 272 if ($opts['showmsg']) 273 msg(hsc($id) . ' is not a valid file name for DokuWiki - skipped', -1); 274 return false; // skip non-valid files 275 } 276 277 //check ACL for namespace (we have no ACL for mediafiles) 278 $info['perm'] = auth_quickaclcheck(getNS($id) . ':*'); 279 if (empty($opts['skipacl']) && $info['perm'] < AUTH_READ) { 280 return false; 281 } 282 283 //check pattern filter 284 if (!empty($opts['pattern']) && !@preg_match($opts['pattern'], $id)) { 285 return false; 286 } 287 288 $data[] = new MediaFile($id); 289 return false; 290} 291 292 293/** 294 * This function just lists documents (for RSS namespace export) 295 * 296 * @author Andreas Gohr <andi@splitbrain.org> 297 * 298 * @param array $data 299 * @param string $base 300 * @param string $file 301 * @param string $type 302 * @param integer $lvl 303 * @param array $opts 304 * 305 * @return bool 306 */ 307function search_list(&$data, $base, $file, $type, $lvl, $opts) 308{ 309 //we do nothing with directories 310 if ($type == 'd') return false; 311 //only search txt files 312 if (substr($file, -4) == '.txt') { 313 //check ACL 314 $id = pathID($file); 315 if (auth_quickaclcheck($id) < AUTH_READ) { 316 return false; 317 } 318 $data[]['id'] = $id; 319 } 320 return false; 321} 322 323/** 324 * Quicksearch for searching matching pagenames 325 * 326 * $opts['query'] is the search query 327 * 328 * @author Andreas Gohr <andi@splitbrain.org> 329 * 330 * @param array $data 331 * @param string $base 332 * @param string $file 333 * @param string $type 334 * @param integer $lvl 335 * @param array $opts 336 * 337 * @return bool 338 */ 339function search_pagename(&$data, $base, $file, $type, $lvl, $opts) 340{ 341 //we do nothing with directories 342 if ($type == 'd') return true; 343 //only search txt files 344 if (substr($file, -4) != '.txt') return true; 345 346 //simple stringmatching 347 if (!empty($opts['query'])) { 348 if (strpos($file, (string) $opts['query']) !== false) { 349 //check ACL 350 $id = pathID($file); 351 if (auth_quickaclcheck($id) < AUTH_READ) { 352 return false; 353 } 354 $data[]['id'] = $id; 355 } 356 } 357 return true; 358} 359 360/** 361 * Just lists all documents 362 * 363 * $opts['depth'] recursion level, 0 for all 364 * $opts['hash'] do md5 sum of content? 365 * $opts['skipacl'] list everything regardless of ACL 366 * 367 * @author Andreas Gohr <andi@splitbrain.org> 368 * 369 * @param array $data 370 * @param string $base 371 * @param string $file 372 * @param string $type 373 * @param integer $lvl 374 * @param array $opts 375 * 376 * @return bool 377 */ 378function search_allpages(&$data, $base, $file, $type, $lvl, $opts) 379{ 380 if (isset($opts['depth']) && $opts['depth']) { 381 $parts = explode('/', ltrim($file, '/')); 382 if ( 383 ($type == 'd' && count($parts) >= $opts['depth']) 384 || ($type != 'd' && count($parts) > $opts['depth']) 385 ) { 386 return false; // depth reached 387 } 388 } 389 390 //we do nothing with directories 391 if ($type == 'd') { 392 return true; 393 } 394 395 //only search txt files 396 if (substr($file, -4) != '.txt') return true; 397 398 $item = []; 399 $item['id'] = pathID($file); 400 if (empty($opts['skipacl']) && auth_quickaclcheck($item['id']) < AUTH_READ) { 401 return false; 402 } 403 404 $item['rev'] = filemtime($base . '/' . $file); 405 $item['mtime'] = $item['rev']; 406 $item['size'] = filesize($base . '/' . $file); 407 if (!empty($opts['hash'])) { 408 $item['hash'] = md5(trim(rawWiki($item['id']))); 409 } 410 411 $data[] = $item; 412 return true; 413} 414 415/* ------------- helper functions below -------------- */ 416 417/** 418 * fulltext sort 419 * 420 * Callback sort function for use with usort to sort the data 421 * structure created by search_fulltext. Sorts descending by count 422 * 423 * @author Andreas Gohr <andi@splitbrain.org> 424 * 425 * @param array $a 426 * @param array $b 427 * 428 * @return int 429 */ 430function sort_search_fulltext($a, $b) 431{ 432 if ($a['count'] > $b['count']) { 433 return -1; 434 } elseif ($a['count'] < $b['count']) { 435 return 1; 436 } else { 437 return Sort::strcmp($a['id'], $b['id']); 438 } 439} 440 441/** 442 * translates a document path to an ID 443 * 444 * @author Andreas Gohr <andi@splitbrain.org> 445 * @todo move to pageutils 446 * 447 * @param string $path 448 * @param bool $keeptxt 449 * 450 * @return mixed|string 451 */ 452function pathID($path, $keeptxt = false) 453{ 454 $id = utf8_decodeFN($path); 455 $id = str_replace('/', ':', $id); 456 if (!$keeptxt) $id = preg_replace('#\.txt$#', '', $id); 457 $id = trim($id, ':'); 458 return $id; 459} 460 461 462/** 463 * This is a very universal callback for the search() function, replacing 464 * many of the former individual functions at the cost of a more complex 465 * setup. 466 * 467 * How the function behaves, depends on the options passed in the $opts 468 * array, where the following settings can be used. 469 * 470 * depth int recursion depth. 0 for unlimited (default: 0) 471 * keeptxt bool keep .txt extension for IDs (default: false) 472 * listfiles bool include files in listing (default: false) 473 * listdirs bool include namespaces in listing (default: false) 474 * pagesonly bool restrict files to pages (default: false) 475 * skipacl bool do not check for READ permission (default: false) 476 * sneakyacl bool don't recurse into nonreadable dirs (default: false) 477 * hash bool create MD5 hash for files (default: false) 478 * meta bool return file metadata (default: false) 479 * filematch string match files against this regexp (default: '', so accept everything) 480 * idmatch string match full ID against this regexp (default: '', so accept everything) 481 * dirmatch string match directory against this regexp when adding (default: '', so accept everything) 482 * nsmatch string match namespace against this regexp when adding (default: '', so accept everything) 483 * recmatch string match directory against this regexp when recursing (default: '', so accept everything) 484 * showmsg bool warn about non-ID files (default: false) 485 * showhidden bool show hidden files(e.g. by hidepages config) too (default: false) 486 * firsthead bool return first heading for pages (default: false) 487 * 488 * @param array &$data - Reference to the result data structure 489 * @param string $base - Base usually $conf['datadir'] 490 * @param string $file - current file or directory relative to $base 491 * @param string $type - Type either 'd' for directory or 'f' for file 492 * @param int $lvl - Current recursion depht 493 * @param array $opts - option array as given to search() 494 * @return bool if this directory should be traversed (true) or not (false) 495 * return value is ignored for files 496 * 497 * @author Andreas Gohr <gohr@cosmocode.de> 498 */ 499function search_universal(&$data, $base, $file, $type, $lvl, $opts) 500{ 501 $item = []; 502 $return = true; 503 504 // get ID and check if it is a valid one 505 $item['id'] = pathID($file, ($type == 'd' || !empty($opts['keeptxt']))); 506 if ($item['id'] != cleanID($item['id'])) { 507 if (!empty($opts['showmsg'])) { 508 msg(hsc($item['id']) . ' is not a valid file name for DokuWiki - skipped', -1); 509 } 510 return false; // skip non-valid files 511 } 512 $item['ns'] = getNS($item['id']); 513 514 if ($type == 'd') { 515 // decide if to recursion into this directory is wanted 516 if (empty($opts['depth'])) { 517 $return = true; // recurse forever 518 } else { 519 $depth = substr_count($file, '/'); 520 if ($depth >= $opts['depth']) { 521 $return = false; // depth reached 522 } else { 523 $return = true; 524 } 525 } 526 527 if ($return) { 528 $match = empty($opts['recmatch']) || preg_match('/' . $opts['recmatch'] . '/', $file); 529 if (!$match) { 530 return false; // doesn't match 531 } 532 } 533 } 534 535 // check ACL 536 if (empty($opts['skipacl'])) { 537 if ($type == 'd') { 538 $item['perm'] = auth_quickaclcheck($item['id'] . ':*'); 539 } else { 540 $item['perm'] = auth_quickaclcheck($item['id']); //FIXME check namespace for media files 541 } 542 } else { 543 $item['perm'] = AUTH_DELETE; 544 } 545 546 // are we done here maybe? 547 if ($type == 'd') { 548 if (empty($opts['listdirs'])) return $return; 549 //neither list nor recurse forbidden items: 550 if (empty($opts['skipacl']) && !empty($opts['sneakyacl']) && $item['perm'] < AUTH_READ) return false; 551 if (!empty($opts['dirmatch']) && !preg_match('/' . $opts['dirmatch'] . '/', $file)) return $return; 552 if (!empty($opts['nsmatch']) && !preg_match('/' . $opts['nsmatch'] . '/', $item['ns'])) return $return; 553 } else { 554 if (empty($opts['listfiles'])) return $return; 555 if (empty($opts['skipacl']) && $item['perm'] < AUTH_READ) return $return; 556 if (!empty($opts['pagesonly']) && (substr($file, -4) != '.txt')) return $return; 557 if (empty($opts['showhidden']) && isHiddenPage($item['id'])) return $return; 558 if (!empty($opts['filematch']) && !preg_match('/' . $opts['filematch'] . '/', $file)) return $return; 559 if (!empty($opts['idmatch']) && !preg_match('/' . $opts['idmatch'] . '/', $item['id'])) return $return; 560 } 561 562 // still here? prepare the item 563 $item['type'] = $type; 564 $item['level'] = $lvl; 565 $item['open'] = $return; 566 567 if (!empty($opts['meta'])) { 568 $item['file'] = PhpString::basename($file); 569 $item['size'] = filesize($base . '/' . $file); 570 $item['mtime'] = filemtime($base . '/' . $file); 571 $item['rev'] = $item['mtime']; 572 $item['writable'] = is_writable($base . '/' . $file); 573 $item['executable'] = is_executable($base . '/' . $file); 574 } 575 576 if ($type == 'f') { 577 if (!empty($opts['hash'])) $item['hash'] = md5(io_readFile($base . '/' . $file, false)); 578 if (!empty($opts['firsthead'])) $item['title'] = p_get_first_heading($item['id'], METADATA_DONT_RENDER); 579 } 580 581 // finally add the item 582 $data[] = $item; 583 return $return; 584} 585 586//Setup VIM: ex: et ts=4 : 587