1<?php 2/** 3 * DokuWiki search functions 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9use dokuwiki\Utf8\Sort; 10 11/** 12 * Recurse directory 13 * 14 * This function recurses into a given base directory 15 * and calls the supplied function for each file and directory 16 * 17 * @param array &$data The results of the search are stored here 18 * @param string $base Where to start the search 19 * @param callback $func Callback (function name or array with object,method) 20 * @param array $opts option array will be given to the Callback 21 * @param string $dir Current directory beyond $base 22 * @param int $lvl Recursion Level 23 * @param mixed $sort 'natural' to use natural order sorting (default); 24 * 'date' to sort by filemtime; leave empty to skip sorting. 25 * @author Andreas Gohr <andi@splitbrain.org> 26 */ 27function search(&$data, $base, $func, $opts, $dir='', $lvl=1, $sort='natural') 28{ 29 $dirs = array(); 30 $files = array(); 31 $filepaths = array(); 32 33 // safeguard against runaways #1452 34 if ($base == '' || $base == '/') { 35 throw new RuntimeException('No valid $base passed to search() - possible misconfiguration or bug'); 36 } 37 38 //read in directories and files 39 $dh = @opendir($base.'/'.$dir); 40 if (!$dh) return; 41 while (($file = readdir($dh)) !== false) { 42 if (preg_match('/^[\._]/', $file)) continue; //skip hidden files and upper dirs 43 if (is_dir($base.'/'.$dir.'/'.$file)) { 44 $dirs[] = $dir.'/'.$file; 45 continue; 46 } 47 $files[] = $dir.'/'.$file; 48 $filepaths[] = $base.'/'.$dir.'/'.$file; 49 } 50 closedir($dh); 51 if (!empty($sort)) { 52 if ($sort == 'date') { 53 @array_multisort(array_map('filemtime', $filepaths), SORT_NUMERIC, SORT_DESC, $files); 54 } else /* natural */ { 55 Sort::asortFN($files); 56 } 57 Sort::asortFN($dirs); 58 } 59 60 //give directories to userfunction then recurse 61 foreach ($dirs as $dir) { 62 if (call_user_func_array($func, array(&$data, $base, $dir, 'd', $lvl,$opts))) { 63 search($data, $base, $func, $opts, $dir, $lvl+1, $sort); 64 } 65 } 66 //now handle the files 67 foreach ($files as $file) { 68 call_user_func_array($func, array(&$data, $base, $file, 'f', $lvl, $opts)); 69 } 70} 71 72/** 73 * The following functions are userfunctions to use with the search 74 * function above. This function is called for every found file or 75 * directory. When a directory is given to the function it has to 76 * decide if this directory should be traversed (true) or not (false) 77 * The function has to accept the following parameters: 78 * 79 * array &$data - Reference to the result data structure 80 * string $base - Base usually $conf['datadir'] 81 * string $file - current file or directory relative to $base 82 * string $type - Type either 'd' for directory or 'f' for file 83 * int $lvl - Current recursion depht 84 * array $opts - option array as given to search() 85 * 86 * return values for files are ignored 87 * 88 * All functions should check the ACL for document READ rights 89 * namespaces (directories) are NOT checked (when sneaky_index is 0) as this 90 * would break the recursion (You can have an nonreadable dir over a readable 91 * one deeper nested) also make sure to check the file type (for example 92 * in case of lockfiles). 93 */ 94 95/** 96 * Searches for pages beginning with the given query 97 * 98 * @author Andreas Gohr <andi@splitbrain.org> 99 * 100 * @param array $data 101 * @param string $base 102 * @param string $file 103 * @param string $type 104 * @param integer $lvl 105 * @param array $opts 106 * 107 * @return bool 108 */ 109function search_qsearch(&$data, $base, $file, $type, $lvl, $opts) 110{ 111 $opts = array( 112 'idmatch' => '(^|:)'.preg_quote($opts['query'],'/').'/', 113 'listfiles' => true, 114 'pagesonly' => true, 115 ); 116 return search_universal($data, $base, $file, $type, $lvl, $opts); 117} 118 119/** 120 * Build the browsable index of pages 121 * 122 * $opts['ns'] is the currently viewed namespace 123 * 124 * @author Andreas Gohr <andi@splitbrain.org> 125 * 126 * @param array $data 127 * @param string $base 128 * @param string $file 129 * @param string $type 130 * @param integer $lvl 131 * @param array $opts 132 * 133 * @return bool 134 */ 135function search_index(&$data, $base, $file, $type, $lvl, $opts) 136{ 137 global $conf; 138 $ns = isset($opts['ns']) ? $opts['ns'] : ''; 139 $opts = array( 140 'pagesonly' => true, 141 'listdirs' => true, 142 'listfiles' => empty($opts['nofiles']), 143 'sneakyacl' => $conf['sneaky_index'], 144 // Hacky, should rather use recmatch 145 'depth' => preg_match('#^'.preg_quote($file, '#').'(/|$)#','/'.$ns) ? 0 : -1 146 ); 147 148 return search_universal($data, $base, $file, $type, $lvl, $opts); 149} 150 151/** 152 * List all namespaces 153 * 154 * @author Andreas Gohr <andi@splitbrain.org> 155 * 156 * @param array $data 157 * @param string $base 158 * @param string $file 159 * @param string $type 160 * @param integer $lvl 161 * @param array $opts 162 * 163 * @return bool 164 */ 165function search_namespaces(&$data, $base, $file, $type, $lvl, $opts) 166{ 167 $opts = array( 168 'listdirs' => true, 169 ); 170 return search_universal($data, $base, $file, $type, $lvl, $opts); 171} 172 173/** 174 * List all mediafiles in a namespace 175 * $opts['depth'] recursion level, 0 for all 176 * $opts['showmsg'] shows message if invalid media id is used 177 * $opts['skipacl'] skip acl checking 178 * $opts['pattern'] check given pattern 179 * $opts['hash'] add hashes to result list 180 * 181 * @author Andreas Gohr <andi@splitbrain.org> 182 * 183 * @param array $data 184 * @param string $base 185 * @param string $file 186 * @param string $type 187 * @param integer $lvl 188 * @param array $opts 189 * 190 * @return bool 191 */ 192function search_media(&$data, $base, $file, $type, $lvl, $opts) 193{ 194 //we do nothing with directories 195 if ($type == 'd') { 196 if (empty($opts['depth'])) return true; // recurse forever 197 $depth = substr_count($file,'/'); 198 if ($depth >= $opts['depth']) return false; // depth reached 199 return true; 200 } 201 202 $info = array(); 203 $info['id'] = pathID($file, true); 204 if ($info['id'] != cleanID($info['id'])) { 205 if ($opts['showmsg']) 206 msg(hsc($info['id']).' is not a valid file name for DokuWiki - skipped',-1); 207 return false; // skip non-valid files 208 } 209 210 //check ACL for namespace (we have no ACL for mediafiles) 211 $info['perm'] = auth_quickaclcheck(getNS($info['id']).':*'); 212 if (empty($opts['skipacl']) && $info['perm'] < AUTH_READ) { 213 return false; 214 } 215 216 //check pattern filter 217 if (!empty($opts['pattern']) && !@preg_match($opts['pattern'], $info['id'])) { 218 return false; 219 } 220 221 $info['file'] = \dokuwiki\Utf8\PhpString::basename($file); 222 $info['size'] = filesize($base.'/'.$file); 223 $info['mtime'] = filemtime($base.'/'.$file); 224 $info['writable'] = is_writable($base.'/'.$file); 225 if (preg_match("/\.(jpe?g|gif|png)$/", $file)) { 226 $info['isimg'] = true; 227 $info['meta'] = new JpegMeta($base.'/'.$file); 228 } else { 229 $info['isimg'] = false; 230 } 231 if (!empty($opts['hash'])) { 232 $info['hash'] = md5(io_readFile(mediaFN($info['id']), false)); 233 } 234 235 $data[] = $info; 236 237 return false; 238} 239 240/** 241 * List all mediafiles in a namespace 242 * $opts['depth'] recursion level, 0 for all 243 * $opts['showmsg'] shows message if invalid media id is used 244 * $opts['skipacl'] skip acl checking 245 * $opts['pattern'] check given pattern 246 * $opts['hash'] add hashes to result list 247 * 248 * @todo This is a temporary copy of search_media returning a list of MediaFile intances 249 * 250 * @param array $data 251 * @param string $base 252 * @param string $file 253 * @param string $type 254 * @param integer $lvl 255 * @param array $opts 256 * 257 * @return bool 258 */ 259function search_mediafiles(&$data,$base,$file,$type,$lvl,$opts){ 260 261 //we do nothing with directories 262 if($type == 'd') { 263 if(empty($opts['depth'])) return true; // recurse forever 264 $depth = substr_count($file,'/'); 265 if($depth >= $opts['depth']) return false; // depth reached 266 return true; 267 } 268 269 $id = pathID($file,true); 270 if($id != cleanID($id)){ 271 if($opts['showmsg']) 272 msg(hsc($id).' is not a valid file name for DokuWiki - skipped',-1); 273 return false; // skip non-valid files 274 } 275 276 //check ACL for namespace (we have no ACL for mediafiles) 277 $info['perm'] = auth_quickaclcheck(getNS($id).':*'); 278 if(empty($opts['skipacl']) && $info['perm'] < AUTH_READ){ 279 return false; 280 } 281 282 //check pattern filter 283 if(!empty($opts['pattern']) && !@preg_match($opts['pattern'], $id)){ 284 return false; 285 } 286 287 $data[] = new \dokuwiki\File\MediaFile($id); 288 return false; 289} 290 291 292/** 293 * This function just lists documents (for RSS namespace export) 294 * 295 * @author Andreas Gohr <andi@splitbrain.org> 296 * 297 * @param array $data 298 * @param string $base 299 * @param string $file 300 * @param string $type 301 * @param integer $lvl 302 * @param array $opts 303 * 304 * @return bool 305 */ 306function search_list(&$data, $base, $file, $type, $lvl, $opts) 307{ 308 //we do nothing with directories 309 if ($type == 'd') return false; 310 //only search txt files 311 if (substr($file, -4) == '.txt') { 312 //check ACL 313 $id = pathID($file); 314 if (auth_quickaclcheck($id) < AUTH_READ) { 315 return false; 316 } 317 $data[]['id'] = $id; 318 } 319 return false; 320} 321 322/** 323 * Quicksearch for searching matching pagenames 324 * 325 * $opts['query'] is the search query 326 * 327 * @author Andreas Gohr <andi@splitbrain.org> 328 * 329 * @param array $data 330 * @param string $base 331 * @param string $file 332 * @param string $type 333 * @param integer $lvl 334 * @param array $opts 335 * 336 * @return bool 337 */ 338function search_pagename(&$data, $base, $file, $type, $lvl, $opts) 339{ 340 //we do nothing with directories 341 if ($type == 'd') return true; 342 //only search txt files 343 if (substr($file, -4) != '.txt') return true; 344 345 //simple stringmatching 346 if (!empty($opts['query'])) { 347 if (strpos($file, $opts['query']) !== false) { 348 //check ACL 349 $id = pathID($file); 350 if (auth_quickaclcheck($id) < AUTH_READ) { 351 return false; 352 } 353 $data[]['id'] = $id; 354 } 355 } 356 return true; 357} 358 359/** 360 * Just lists all documents 361 * 362 * $opts['depth'] recursion level, 0 for all 363 * $opts['hash'] do md5 sum of content? 364 * $opts['skipacl'] list everything regardless of ACL 365 * 366 * @author Andreas Gohr <andi@splitbrain.org> 367 * 368 * @param array $data 369 * @param string $base 370 * @param string $file 371 * @param string $type 372 * @param integer $lvl 373 * @param array $opts 374 * 375 * @return bool 376 */ 377function search_allpages(&$data, $base, $file, $type, $lvl, $opts) 378{ 379 if (isset($opts['depth']) && $opts['depth']) { 380 $parts = explode('/',ltrim($file, '/')); 381 if (($type == 'd' && count($parts) >= $opts['depth']) 382 || ($type != 'd' && count($parts) > $opts['depth']) 383 ){ 384 return false; // depth reached 385 } 386 } 387 388 //we do nothing with directories 389 if ($type == 'd') { 390 return true; 391 } 392 393 //only search txt files 394 if (substr($file, -4) != '.txt') return true; 395 396 $item = array(); 397 $item['id'] = pathID($file); 398 if (empty($opts['skipacl']) && auth_quickaclcheck($item['id']) < AUTH_READ) { 399 return false; 400 } 401 402 $item['rev'] = filemtime($base.'/'.$file); 403 $item['mtime'] = $item['rev']; 404 $item['size'] = filesize($base.'/'.$file); 405 if (!empty($opts['hash'])) { 406 $item['hash'] = md5(trim(rawWiki($item['id']))); 407 } 408 409 $data[] = $item; 410 return true; 411} 412 413/* ------------- helper functions below -------------- */ 414 415/** 416 * fulltext sort 417 * 418 * Callback sort function for use with usort to sort the data 419 * structure created by search_fulltext. Sorts descending by count 420 * 421 * @author Andreas Gohr <andi@splitbrain.org> 422 * 423 * @param array $a 424 * @param array $b 425 * 426 * @return int 427 */ 428function sort_search_fulltext($a, $b) 429{ 430 if ($a['count'] > $b['count']) { 431 return -1; 432 } elseif ($a['count'] < $b['count']) { 433 return 1; 434 }else{ 435 return Sort::strcmp($a['id'],$b['id']); 436 } 437} 438 439/** 440 * translates a document path to an ID 441 * 442 * @author Andreas Gohr <andi@splitbrain.org> 443 * @todo move to pageutils 444 * 445 * @param string $path 446 * @param bool $keeptxt 447 * 448 * @return mixed|string 449 */ 450function pathID($path, $keeptxt=false) 451{ 452 $id = utf8_decodeFN($path); 453 $id = str_replace('/', ':', $id); 454 if (!$keeptxt) $id = preg_replace('#\.txt$#', '', $id); 455 $id = trim($id, ':'); 456 return $id; 457} 458 459 460/** 461 * This is a very universal callback for the search() function, replacing 462 * many of the former individual functions at the cost of a more complex 463 * setup. 464 * 465 * How the function behaves, depends on the options passed in the $opts 466 * array, where the following settings can be used. 467 * 468 * depth int recursion depth. 0 for unlimited (default: 0) 469 * keeptxt bool keep .txt extension for IDs (default: false) 470 * listfiles bool include files in listing (default: false) 471 * listdirs bool include namespaces in listing (default: false) 472 * pagesonly bool restrict files to pages (default: false) 473 * skipacl bool do not check for READ permission (default: false) 474 * sneakyacl bool don't recurse into nonreadable dirs (default: false) 475 * hash bool create MD5 hash for files (default: false) 476 * meta bool return file metadata (default: false) 477 * filematch string match files against this regexp (default: '', so accept everything) 478 * idmatch string match full ID against this regexp (default: '', so accept everything) 479 * dirmatch string match directory against this regexp when adding (default: '', so accept everything) 480 * nsmatch string match namespace against this regexp when adding (default: '', so accept everything) 481 * recmatch string match directory against this regexp when recursing (default: '', so accept everything) 482 * showmsg bool warn about non-ID files (default: false) 483 * showhidden bool show hidden files(e.g. by hidepages config) too (default: false) 484 * firsthead bool return first heading for pages (default: false) 485 * 486 * @param array &$data - Reference to the result data structure 487 * @param string $base - Base usually $conf['datadir'] 488 * @param string $file - current file or directory relative to $base 489 * @param string $type - Type either 'd' for directory or 'f' for file 490 * @param int $lvl - Current recursion depht 491 * @param array $opts - option array as given to search() 492 * @return bool if this directory should be traversed (true) or not (false) 493 * return value is ignored for files 494 * 495 * @author Andreas Gohr <gohr@cosmocode.de> 496 */ 497function search_universal(&$data, $base, $file, $type, $lvl, $opts) 498{ 499 $item = array(); 500 $return = true; 501 502 // get ID and check if it is a valid one 503 $item['id'] = pathID($file, ($type == 'd' || !empty($opts['keeptxt']))); 504 if ($item['id'] != cleanID($item['id'])){ 505 if (!empty($opts['showmsg'])) { 506 msg(hsc($item['id']).' is not a valid file name for DokuWiki - skipped',-1); 507 } 508 return false; // skip non-valid files 509 } 510 $item['ns'] = getNS($item['id']); 511 512 if ($type == 'd') { 513 // decide if to recursion into this directory is wanted 514 if (empty($opts['depth'])) { 515 $return = true; // recurse forever 516 } else { 517 $depth = substr_count($file,'/'); 518 if ($depth >= $opts['depth']) { 519 $return = false; // depth reached 520 } else { 521 $return = true; 522 } 523 } 524 525 if ($return) { 526 $match = empty($opts['recmatch']) || preg_match('/'.$opts['recmatch'].'/', $file); 527 if (!$match) { 528 return false; // doesn't match 529 } 530 } 531 } 532 533 // check ACL 534 if (empty($opts['skipacl'])) { 535 if ($type == 'd') { 536 $item['perm'] = auth_quickaclcheck($item['id'].':*'); 537 } else { 538 $item['perm'] = auth_quickaclcheck($item['id']); //FIXME check namespace for media files 539 } 540 } else { 541 $item['perm'] = AUTH_DELETE; 542 } 543 544 // are we done here maybe? 545 if ($type == 'd') { 546 if (empty($opts['listdirs'])) return $return; 547 //neither list nor recurse forbidden items: 548 if (empty($opts['skipacl']) && !empty($opts['sneakyacl']) && $item['perm'] < AUTH_READ) return false; 549 if (!empty($opts['dirmatch']) && !preg_match('/'.$opts['dirmatch'].'/', $file)) return $return; 550 if (!empty($opts['nsmatch']) && !preg_match('/'.$opts['nsmatch'].'/', $item['ns'])) return $return; 551 } else { 552 if (empty($opts['listfiles'])) return $return; 553 if (empty($opts['skipacl']) && $item['perm'] < AUTH_READ) return $return; 554 if (!empty($opts['pagesonly']) && (substr($file, -4) != '.txt')) return $return; 555 if (empty($opts['showhidden']) && isHiddenPage($item['id'])) return $return; 556 if (!empty($opts['filematch']) && !preg_match('/'.$opts['filematch'].'/', $file)) return $return; 557 if (!empty($opts['idmatch']) && !preg_match('/'.$opts['idmatch'].'/', $item['id'])) return $return; 558 } 559 560 // still here? prepare the item 561 $item['type'] = $type; 562 $item['level'] = $lvl; 563 $item['open'] = $return; 564 565 if (!empty($opts['meta'])) { 566 $item['file'] = \dokuwiki\Utf8\PhpString::basename($file); 567 $item['size'] = filesize($base.'/'.$file); 568 $item['mtime'] = filemtime($base.'/'.$file); 569 $item['rev'] = $item['mtime']; 570 $item['writable'] = is_writable($base.'/'.$file); 571 $item['executable'] = is_executable($base.'/'.$file); 572 } 573 574 if ($type == 'f') { 575 if (!empty($opts['hash'])) $item['hash'] = md5(io_readFile($base.'/'.$file, false)); 576 if (!empty($opts['firsthead'])) { 577 $item['title'] = p_get_first_heading($item['id'], METADATA_DONT_RENDER); 578 } 579 } 580 581 // finally add the item 582 $data[] = $item; 583 return $return; 584} 585 586//Setup VIM: ex: et ts=4 : 587