1<?php 2/** 3 * DokuWiki search functions 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8use dokuwiki\Utf8\PhpString; 9use dokuwiki\File\MediaFile; 10use dokuwiki\Utf8\Sort; 11 12/** 13 * Recurse directory 14 * 15 * This function recurses into a given base directory 16 * and calls the supplied function for each file and directory 17 * 18 * @param array &$data The results of the search are stored here 19 * @param string $base Where to start the search 20 * @param callback $func Callback (function name or array with object,method) 21 * @param array $opts option array will be given to the Callback 22 * @param string $dir Current directory beyond $base 23 * @param int $lvl Recursion Level 24 * @param mixed $sort 'natural' to use natural order sorting (default); 25 * 'date' to sort by filemtime; leave empty to skip sorting. 26 * @author Andreas Gohr <andi@splitbrain.org> 27 */ 28function search(&$data, $base, $func, $opts, $dir = '', $lvl = 1, $sort = 'natural') 29{ 30 $dirs = []; 31 $files = []; 32 $filepaths = []; 33 34 // safeguard against runaways #1452 35 if($base == '' || $base == '/') { 36 throw new RuntimeException('No valid $base passed to search() - possible misconfiguration or bug'); 37 } 38 39 //read in directories and files 40 $dh = @opendir($base.'/'.$dir); 41 if(!$dh) return; 42 while(($file = readdir($dh)) !== false){ 43 if(preg_match('/^[\._]/', $file)) continue; //skip hidden files and upper dirs 44 if(is_dir($base.'/'.$dir.'/'.$file)){ 45 $dirs[] = $dir.'/'.$file; 46 continue; 47 } 48 $files[] = $dir.'/'.$file; 49 $filepaths[] = $base.'/'.$dir.'/'.$file; 50 } 51 closedir($dh); 52 if (!empty($sort)) { 53 if ($sort == 'date') { 54 @array_multisort(array_map('filemtime', $filepaths), SORT_NUMERIC, SORT_DESC, $files); 55 } else /* natural */ { 56 Sort::asortFN($files); 57 } 58 Sort::asortFN($dirs); 59 } 60 61 //give directories to userfunction then recurse 62 foreach($dirs as $dir){ 63 if (call_user_func_array($func, [&$data, $base, $dir, 'd', $lvl, $opts])){ 64 search($data, $base, $func, $opts, $dir, $lvl+1, $sort); 65 } 66 } 67 //now handle the files 68 foreach($files as $file){ 69 call_user_func_array($func, [&$data, $base, $file, 'f', $lvl, $opts]); 70 } 71} 72 73/** 74 * The following functions are userfunctions to use with the search 75 * function above. This function is called for every found file or 76 * directory. When a directory is given to the function it has to 77 * decide if this directory should be traversed (true) or not (false) 78 * The function has to accept the following parameters: 79 * 80 * array &$data - Reference to the result data structure 81 * string $base - Base usually $conf['datadir'] 82 * string $file - current file or directory relative to $base 83 * string $type - Type either 'd' for directory or 'f' for file 84 * int $lvl - Current recursion depht 85 * array $opts - option array as given to search() 86 * 87 * return values for files are ignored 88 * 89 * All functions should check the ACL for document READ rights 90 * namespaces (directories) are NOT checked (when sneaky_index is 0) as this 91 * would break the recursion (You can have an nonreadable dir over a readable 92 * one deeper nested) also make sure to check the file type (for example 93 * in case of lockfiles). 94 */ 95 96/** 97 * Searches for pages beginning with the given query 98 * 99 * @author Andreas Gohr <andi@splitbrain.org> 100 * 101 * @param array $data 102 * @param string $base 103 * @param string $file 104 * @param string $type 105 * @param integer $lvl 106 * @param array $opts 107 * 108 * @return bool 109 */ 110function search_qsearch(&$data, $base, $file, $type, $lvl, $opts) 111{ 112 $opts = [ 113 'idmatch' => '(^|:)'.preg_quote($opts['query'], '/').'/', 114 'listfiles' => true, 115 'pagesonly' => true 116 ]; 117 return search_universal($data, $base, $file, $type, $lvl, $opts); 118} 119 120/** 121 * Build the browsable index of pages 122 * 123 * $opts['ns'] is the currently viewed namespace 124 * 125 * @author Andreas Gohr <andi@splitbrain.org> 126 * 127 * @param array $data 128 * @param string $base 129 * @param string $file 130 * @param string $type 131 * @param integer $lvl 132 * @param array $opts 133 * 134 * @return bool 135 */ 136function search_index(&$data, $base, $file, $type, $lvl, $opts) 137{ 138 global $conf; 139 $ns = $opts['ns'] ?? ''; 140 $opts = [ 141 'pagesonly' => true, 142 'listdirs' => true, 143 'listfiles' => empty($opts['nofiles']), 144 'sneakyacl' => $conf['sneaky_index'], 145 // Hacky, should rather use recmatch 146 'depth' => preg_match('#^'.preg_quote($file, '#').'(/|$)#', '/'.$ns) ? 0 : -1, 147 ]; 148 149 return search_universal($data, $base, $file, $type, $lvl, $opts); 150} 151 152/** 153 * List all namespaces 154 * 155 * @author Andreas Gohr <andi@splitbrain.org> 156 * 157 * @param array $data 158 * @param string $base 159 * @param string $file 160 * @param string $type 161 * @param integer $lvl 162 * @param array $opts 163 * 164 * @return bool 165 */ 166function search_namespaces(&$data, $base, $file, $type, $lvl, $opts) 167{ 168 $opts = ['listdirs' => true]; 169 return search_universal($data, $base, $file, $type, $lvl, $opts); 170} 171 172/** 173 * List all mediafiles in a namespace 174 * $opts['depth'] recursion level, 0 for all 175 * $opts['showmsg'] shows message if invalid media id is used 176 * $opts['skipacl'] skip acl checking 177 * $opts['pattern'] check given pattern 178 * $opts['hash'] add hashes to result list 179 * 180 * @author Andreas Gohr <andi@splitbrain.org> 181 * 182 * @param array $data 183 * @param string $base 184 * @param string $file 185 * @param string $type 186 * @param integer $lvl 187 * @param array $opts 188 * 189 * @return bool 190 */ 191function search_media(&$data, $base, $file, $type, $lvl, $opts) 192{ 193 194 //we do nothing with directories 195 if($type == 'd') { 196 if(empty($opts['depth'])) return true; // recurse forever 197 $depth = substr_count($file, '/'); 198 if($depth >= $opts['depth']) return false; // depth reached 199 return true; 200 } 201 202 $info = []; 203 $info['id'] = pathID($file, true); 204 if($info['id'] != cleanID($info['id'])){ 205 if(!empty($opts['showmsg'])) 206 msg(hsc($info['id']).' is not a valid file name for DokuWiki - skipped', -1); 207 return false; // skip non-valid files 208 } 209 210 //check ACL for namespace (we have no ACL for mediafiles) 211 $info['perm'] = auth_quickaclcheck(getNS($info['id']).':*'); 212 if(empty($opts['skipacl']) && $info['perm'] < AUTH_READ){ 213 return false; 214 } 215 216 //check pattern filter 217 if(!empty($opts['pattern']) && !@preg_match($opts['pattern'], $info['id'])){ 218 return false; 219 } 220 221 $info['file'] = PhpString::basename($file); 222 $info['size'] = filesize($base.'/'.$file); 223 $info['mtime'] = filemtime($base.'/'.$file); 224 $info['writable'] = is_writable($base.'/'.$file); 225 if(preg_match("/\.(jpe?g|gif|png)$/", $file)){ 226 $info['isimg'] = true; 227 $info['meta'] = new JpegMeta($base.'/'.$file); 228 }else{ 229 $info['isimg'] = false; 230 } 231 if(!empty($opts['hash'])){ 232 $info['hash'] = md5(io_readFile(mediaFN($info['id']), false)); 233 } 234 235 $data[] = $info; 236 237 return false; 238} 239 240/** 241 * List all mediafiles in a namespace 242 * $opts['depth'] recursion level, 0 for all 243 * $opts['showmsg'] shows message if invalid media id is used 244 * $opts['skipacl'] skip acl checking 245 * $opts['pattern'] check given pattern 246 * $opts['hash'] add hashes to result list 247 * 248 * @todo This is a temporary copy of search_media returning a list of MediaFile intances 249 * 250 * @param array $data 251 * @param string $base 252 * @param string $file 253 * @param string $type 254 * @param integer $lvl 255 * @param array $opts 256 * 257 * @return bool 258 */ 259function search_mediafiles(&$data, $base, $file, $type, $lvl, $opts) 260{ 261 262 //we do nothing with directories 263 if($type == 'd') { 264 if(empty($opts['depth'])) return true; // recurse forever 265 $depth = substr_count($file, '/'); 266 if($depth >= $opts['depth']) return false; // depth reached 267 return true; 268 } 269 270 $id = pathID($file, true); 271 if($id != cleanID($id)){ 272 if($opts['showmsg']) 273 msg(hsc($id).' is not a valid file name for DokuWiki - skipped', -1); 274 return false; // skip non-valid files 275 } 276 277 //check ACL for namespace (we have no ACL for mediafiles) 278 $info['perm'] = auth_quickaclcheck(getNS($id).':*'); 279 if(empty($opts['skipacl']) && $info['perm'] < AUTH_READ){ 280 return false; 281 } 282 283 //check pattern filter 284 if(!empty($opts['pattern']) && !@preg_match($opts['pattern'], $id)){ 285 return false; 286 } 287 288 $data[] = new MediaFile($id); 289 return false; 290} 291 292 293/** 294 * This function just lists documents (for RSS namespace export) 295 * 296 * @author Andreas Gohr <andi@splitbrain.org> 297 * 298 * @param array $data 299 * @param string $base 300 * @param string $file 301 * @param string $type 302 * @param integer $lvl 303 * @param array $opts 304 * 305 * @return bool 306 */ 307function search_list(&$data, $base, $file, $type, $lvl, $opts) 308{ 309 //we do nothing with directories 310 if($type == 'd') return false; 311 //only search txt files 312 if(substr($file, -4) == '.txt'){ 313 //check ACL 314 $id = pathID($file); 315 if(auth_quickaclcheck($id) < AUTH_READ){ 316 return false; 317 } 318 $data[]['id'] = $id; 319 } 320 return false; 321} 322 323/** 324 * Quicksearch for searching matching pagenames 325 * 326 * $opts['query'] is the search query 327 * 328 * @author Andreas Gohr <andi@splitbrain.org> 329 * 330 * @param array $data 331 * @param string $base 332 * @param string $file 333 * @param string $type 334 * @param integer $lvl 335 * @param array $opts 336 * 337 * @return bool 338 */ 339function search_pagename(&$data, $base, $file, $type, $lvl, $opts) 340{ 341 //we do nothing with directories 342 if($type == 'd') return true; 343 //only search txt files 344 if(substr($file, -4) != '.txt') return true; 345 346 //simple stringmatching 347 if (!empty($opts['query'])){ 348 if(strpos($file, (string) $opts['query']) !== false){ 349 //check ACL 350 $id = pathID($file); 351 if(auth_quickaclcheck($id) < AUTH_READ){ 352 return false; 353 } 354 $data[]['id'] = $id; 355 } 356 } 357 return true; 358} 359 360/** 361 * Just lists all documents 362 * 363 * $opts['depth'] recursion level, 0 for all 364 * $opts['hash'] do md5 sum of content? 365 * $opts['skipacl'] list everything regardless of ACL 366 * 367 * @author Andreas Gohr <andi@splitbrain.org> 368 * 369 * @param array $data 370 * @param string $base 371 * @param string $file 372 * @param string $type 373 * @param integer $lvl 374 * @param array $opts 375 * 376 * @return bool 377 */ 378function search_allpages(&$data, $base, $file, $type, $lvl, $opts) 379{ 380 if(isset($opts['depth']) && $opts['depth']){ 381 $parts = explode('/', ltrim($file, '/')); 382 if(($type == 'd' && count($parts) >= $opts['depth']) 383 || ($type != 'd' && count($parts) > $opts['depth'])){ 384 return false; // depth reached 385 } 386 } 387 388 //we do nothing with directories 389 if($type == 'd'){ 390 return true; 391 } 392 393 //only search txt files 394 if(substr($file, -4) != '.txt') return true; 395 396 $item = []; 397 $item['id'] = pathID($file); 398 if(empty($opts['skipacl']) && auth_quickaclcheck($item['id']) < AUTH_READ){ 399 return false; 400 } 401 402 $item['rev'] = filemtime($base.'/'.$file); 403 $item['mtime'] = $item['rev']; 404 $item['size'] = filesize($base.'/'.$file); 405 if(!empty($opts['hash'])){ 406 $item['hash'] = md5(trim(rawWiki($item['id']))); 407 } 408 409 $data[] = $item; 410 return true; 411} 412 413/* ------------- helper functions below -------------- */ 414 415/** 416 * fulltext sort 417 * 418 * Callback sort function for use with usort to sort the data 419 * structure created by search_fulltext. Sorts descending by count 420 * 421 * @author Andreas Gohr <andi@splitbrain.org> 422 * 423 * @param array $a 424 * @param array $b 425 * 426 * @return int 427 */ 428function sort_search_fulltext($a, $b) 429{ 430 if($a['count'] > $b['count']){ 431 return -1; 432 }elseif($a['count'] < $b['count']){ 433 return 1; 434 }else{ 435 return Sort::strcmp($a['id'], $b['id']); 436 } 437} 438 439/** 440 * translates a document path to an ID 441 * 442 * @author Andreas Gohr <andi@splitbrain.org> 443 * @todo move to pageutils 444 * 445 * @param string $path 446 * @param bool $keeptxt 447 * 448 * @return mixed|string 449 */ 450function pathID($path, $keeptxt = false) 451{ 452 $id = utf8_decodeFN($path); 453 $id = str_replace('/', ':', $id); 454 if(!$keeptxt) $id = preg_replace('#\.txt$#', '', $id); 455 $id = trim($id, ':'); 456 return $id; 457} 458 459 460/** 461 * This is a very universal callback for the search() function, replacing 462 * many of the former individual functions at the cost of a more complex 463 * setup. 464 * 465 * How the function behaves, depends on the options passed in the $opts 466 * array, where the following settings can be used. 467 * 468 * depth int recursion depth. 0 for unlimited (default: 0) 469 * keeptxt bool keep .txt extension for IDs (default: false) 470 * listfiles bool include files in listing (default: false) 471 * listdirs bool include namespaces in listing (default: false) 472 * pagesonly bool restrict files to pages (default: false) 473 * skipacl bool do not check for READ permission (default: false) 474 * sneakyacl bool don't recurse into nonreadable dirs (default: false) 475 * hash bool create MD5 hash for files (default: false) 476 * meta bool return file metadata (default: false) 477 * filematch string match files against this regexp (default: '', so accept everything) 478 * idmatch string match full ID against this regexp (default: '', so accept everything) 479 * dirmatch string match directory against this regexp when adding (default: '', so accept everything) 480 * nsmatch string match namespace against this regexp when adding (default: '', so accept everything) 481 * recmatch string match directory against this regexp when recursing (default: '', so accept everything) 482 * showmsg bool warn about non-ID files (default: false) 483 * showhidden bool show hidden files(e.g. by hidepages config) too (default: false) 484 * firsthead bool return first heading for pages (default: false) 485 * 486 * @param array &$data - Reference to the result data structure 487 * @param string $base - Base usually $conf['datadir'] 488 * @param string $file - current file or directory relative to $base 489 * @param string $type - Type either 'd' for directory or 'f' for file 490 * @param int $lvl - Current recursion depht 491 * @param array $opts - option array as given to search() 492 * @return bool if this directory should be traversed (true) or not (false) 493 * return value is ignored for files 494 * 495 * @author Andreas Gohr <gohr@cosmocode.de> 496 */ 497function search_universal(&$data, $base, $file, $type, $lvl, $opts) 498{ 499 $item = []; 500 $return = true; 501 502 // get ID and check if it is a valid one 503 $item['id'] = pathID($file, ($type == 'd' || !empty($opts['keeptxt']))); 504 if($item['id'] != cleanID($item['id'])){ 505 if(!empty($opts['showmsg'])){ 506 msg(hsc($item['id']).' is not a valid file name for DokuWiki - skipped', -1); 507 } 508 return false; // skip non-valid files 509 } 510 $item['ns'] = getNS($item['id']); 511 512 if($type == 'd') { 513 // decide if to recursion into this directory is wanted 514 if(empty($opts['depth'])){ 515 $return = true; // recurse forever 516 }else{ 517 $depth = substr_count($file, '/'); 518 if($depth >= $opts['depth']){ 519 $return = false; // depth reached 520 }else{ 521 $return = true; 522 } 523 } 524 525 if ($return) { 526 $match = empty($opts['recmatch']) || preg_match('/'.$opts['recmatch'].'/', $file); 527 if (!$match) { 528 return false; // doesn't match 529 } 530 } 531 } 532 533 // check ACL 534 if(empty($opts['skipacl'])){ 535 if($type == 'd'){ 536 $item['perm'] = auth_quickaclcheck($item['id'].':*'); 537 }else{ 538 $item['perm'] = auth_quickaclcheck($item['id']); //FIXME check namespace for media files 539 } 540 }else{ 541 $item['perm'] = AUTH_DELETE; 542 } 543 544 // are we done here maybe? 545 if($type == 'd'){ 546 if(empty($opts['listdirs'])) return $return; 547 //neither list nor recurse forbidden items: 548 if(empty($opts['skipacl']) && !empty($opts['sneakyacl']) && $item['perm'] < AUTH_READ) return false; 549 if(!empty($opts['dirmatch']) && !preg_match('/'.$opts['dirmatch'].'/', $file)) return $return; 550 if(!empty($opts['nsmatch']) && !preg_match('/'.$opts['nsmatch'].'/', $item['ns'])) return $return; 551 }else{ 552 if(empty($opts['listfiles'])) return $return; 553 if(empty($opts['skipacl']) && $item['perm'] < AUTH_READ) return $return; 554 if(!empty($opts['pagesonly']) && (substr($file, -4) != '.txt')) return $return; 555 if(empty($opts['showhidden']) && isHiddenPage($item['id'])) return $return; 556 if(!empty($opts['filematch']) && !preg_match('/'.$opts['filematch'].'/', $file)) return $return; 557 if(!empty($opts['idmatch']) && !preg_match('/'.$opts['idmatch'].'/', $item['id'])) return $return; 558 } 559 560 // still here? prepare the item 561 $item['type'] = $type; 562 $item['level'] = $lvl; 563 $item['open'] = $return; 564 565 if(!empty($opts['meta'])){ 566 $item['file'] = PhpString::basename($file); 567 $item['size'] = filesize($base.'/'.$file); 568 $item['mtime'] = filemtime($base.'/'.$file); 569 $item['rev'] = $item['mtime']; 570 $item['writable'] = is_writable($base.'/'.$file); 571 $item['executable'] = is_executable($base.'/'.$file); 572 } 573 574 if($type == 'f'){ 575 if(!empty($opts['hash'])) $item['hash'] = md5(io_readFile($base.'/'.$file, false)); 576 if(!empty($opts['firsthead'])) $item['title'] = p_get_first_heading($item['id'], METADATA_DONT_RENDER); 577 } 578 579 // finally add the item 580 $data[] = $item; 581 return $return; 582} 583 584//Setup VIM: ex: et ts=4 : 585