1<?php 2/** 3 * DokuWiki search functions 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8use dokuwiki\Utf8\PhpString; 9use dokuwiki\File\MediaFile; 10use dokuwiki\Utf8\Sort; 11 12/** 13 * Recurse directory 14 * 15 * This function recurses into a given base directory 16 * and calls the supplied function for each file and directory 17 * 18 * @param array &$data The results of the search are stored here 19 * @param string $base Where to start the search 20 * @param callback $func Callback (function name or array with object,method) 21 * @param array $opts option array will be given to the Callback 22 * @param string $dir Current directory beyond $base 23 * @param int $lvl Recursion Level 24 * @param mixed $sort 'natural' to use natural order sorting (default); 25 * 'date' to sort by filemtime; leave empty to skip sorting. 26 * @author Andreas Gohr <andi@splitbrain.org> 27 */ 28function search(&$data,$base,$func,$opts,$dir='',$lvl=1,$sort='natural'){ 29 $dirs = []; 30 $files = []; 31 $filepaths = []; 32 33 // safeguard against runaways #1452 34 if($base == '' || $base == '/') { 35 throw new RuntimeException('No valid $base passed to search() - possible misconfiguration or bug'); 36 } 37 38 //read in directories and files 39 $dh = @opendir($base.'/'.$dir); 40 if(!$dh) return; 41 while(($file = readdir($dh)) !== false){ 42 if(preg_match('/^[\._]/',$file)) continue; //skip hidden files and upper dirs 43 if(is_dir($base.'/'.$dir.'/'.$file)){ 44 $dirs[] = $dir.'/'.$file; 45 continue; 46 } 47 $files[] = $dir.'/'.$file; 48 $filepaths[] = $base.'/'.$dir.'/'.$file; 49 } 50 closedir($dh); 51 if (!empty($sort)) { 52 if ($sort == 'date') { 53 @array_multisort(array_map('filemtime', $filepaths), SORT_NUMERIC, SORT_DESC, $files); 54 } else /* natural */ { 55 Sort::asortFN($files); 56 } 57 Sort::asortFN($dirs); 58 } 59 60 //give directories to userfunction then recurse 61 foreach($dirs as $dir){ 62 if (call_user_func_array($func, [&$data, $base, $dir, 'd', $lvl, $opts])){ 63 search($data,$base,$func,$opts,$dir,$lvl+1,$sort); 64 } 65 } 66 //now handle the files 67 foreach($files as $file){ 68 call_user_func_array($func, [&$data, $base, $file, 'f', $lvl, $opts]); 69 } 70} 71 72/** 73 * The following functions are userfunctions to use with the search 74 * function above. This function is called for every found file or 75 * directory. When a directory is given to the function it has to 76 * decide if this directory should be traversed (true) or not (false) 77 * The function has to accept the following parameters: 78 * 79 * array &$data - Reference to the result data structure 80 * string $base - Base usually $conf['datadir'] 81 * string $file - current file or directory relative to $base 82 * string $type - Type either 'd' for directory or 'f' for file 83 * int $lvl - Current recursion depht 84 * array $opts - option array as given to search() 85 * 86 * return values for files are ignored 87 * 88 * All functions should check the ACL for document READ rights 89 * namespaces (directories) are NOT checked (when sneaky_index is 0) as this 90 * would break the recursion (You can have an nonreadable dir over a readable 91 * one deeper nested) also make sure to check the file type (for example 92 * in case of lockfiles). 93 */ 94 95/** 96 * Searches for pages beginning with the given query 97 * 98 * @author Andreas Gohr <andi@splitbrain.org> 99 * 100 * @param array $data 101 * @param string $base 102 * @param string $file 103 * @param string $type 104 * @param integer $lvl 105 * @param array $opts 106 * 107 * @return bool 108 */ 109function search_qsearch(&$data,$base,$file,$type,$lvl,$opts){ 110 $opts = [ 111 'idmatch' => '(^|:)'.preg_quote($opts['query'],'/').'/', 112 'listfiles' => true, 113 'pagesonly' => true 114 ]; 115 return search_universal($data,$base,$file,$type,$lvl,$opts); 116} 117 118/** 119 * Build the browsable index of pages 120 * 121 * $opts['ns'] is the currently viewed namespace 122 * 123 * @author Andreas Gohr <andi@splitbrain.org> 124 * 125 * @param array $data 126 * @param string $base 127 * @param string $file 128 * @param string $type 129 * @param integer $lvl 130 * @param array $opts 131 * 132 * @return bool 133 */ 134function search_index(&$data,$base,$file,$type,$lvl,$opts){ 135 global $conf; 136 $ns = $opts['ns'] ?? ''; 137 $opts = [ 138 'pagesonly' => true, 139 'listdirs' => true, 140 'listfiles' => empty($opts['nofiles']), 141 'sneakyacl' => $conf['sneaky_index'], 142 // Hacky, should rather use recmatch 143 'depth' => preg_match('#^'.preg_quote($file, '#').'(/|$)#','/'.$ns) ? 0 : -1, 144 ]; 145 146 return search_universal($data, $base, $file, $type, $lvl, $opts); 147} 148 149/** 150 * List all namespaces 151 * 152 * @author Andreas Gohr <andi@splitbrain.org> 153 * 154 * @param array $data 155 * @param string $base 156 * @param string $file 157 * @param string $type 158 * @param integer $lvl 159 * @param array $opts 160 * 161 * @return bool 162 */ 163function search_namespaces(&$data,$base,$file,$type,$lvl,$opts){ 164 $opts = ['listdirs' => true]; 165 return search_universal($data,$base,$file,$type,$lvl,$opts); 166} 167 168/** 169 * List all mediafiles in a namespace 170 * $opts['depth'] recursion level, 0 for all 171 * $opts['showmsg'] shows message if invalid media id is used 172 * $opts['skipacl'] skip acl checking 173 * $opts['pattern'] check given pattern 174 * $opts['hash'] add hashes to result list 175 * 176 * @author Andreas Gohr <andi@splitbrain.org> 177 * 178 * @param array $data 179 * @param string $base 180 * @param string $file 181 * @param string $type 182 * @param integer $lvl 183 * @param array $opts 184 * 185 * @return bool 186 */ 187function search_media(&$data,$base,$file,$type,$lvl,$opts){ 188 189 //we do nothing with directories 190 if($type == 'd') { 191 if(empty($opts['depth'])) return true; // recurse forever 192 $depth = substr_count($file,'/'); 193 if($depth >= $opts['depth']) return false; // depth reached 194 return true; 195 } 196 197 $info = []; 198 $info['id'] = pathID($file,true); 199 if($info['id'] != cleanID($info['id'])){ 200 if(!empty($opts['showmsg'])) 201 msg(hsc($info['id']).' is not a valid file name for DokuWiki - skipped',-1); 202 return false; // skip non-valid files 203 } 204 205 //check ACL for namespace (we have no ACL for mediafiles) 206 $info['perm'] = auth_quickaclcheck(getNS($info['id']).':*'); 207 if(empty($opts['skipacl']) && $info['perm'] < AUTH_READ){ 208 return false; 209 } 210 211 //check pattern filter 212 if(!empty($opts['pattern']) && !@preg_match($opts['pattern'], $info['id'])){ 213 return false; 214 } 215 216 $info['file'] = PhpString::basename($file); 217 $info['size'] = filesize($base.'/'.$file); 218 $info['mtime'] = filemtime($base.'/'.$file); 219 $info['writable'] = is_writable($base.'/'.$file); 220 if(preg_match("/\.(jpe?g|gif|png)$/",$file)){ 221 $info['isimg'] = true; 222 $info['meta'] = new JpegMeta($base.'/'.$file); 223 }else{ 224 $info['isimg'] = false; 225 } 226 if(!empty($opts['hash'])){ 227 $info['hash'] = md5(io_readFile(mediaFN($info['id']),false)); 228 } 229 230 $data[] = $info; 231 232 return false; 233} 234 235/** 236 * List all mediafiles in a namespace 237 * $opts['depth'] recursion level, 0 for all 238 * $opts['showmsg'] shows message if invalid media id is used 239 * $opts['skipacl'] skip acl checking 240 * $opts['pattern'] check given pattern 241 * $opts['hash'] add hashes to result list 242 * 243 * @todo This is a temporary copy of search_media returning a list of MediaFile intances 244 * 245 * @param array $data 246 * @param string $base 247 * @param string $file 248 * @param string $type 249 * @param integer $lvl 250 * @param array $opts 251 * 252 * @return bool 253 */ 254function search_mediafiles(&$data,$base,$file,$type,$lvl,$opts){ 255 256 //we do nothing with directories 257 if($type == 'd') { 258 if(empty($opts['depth'])) return true; // recurse forever 259 $depth = substr_count($file,'/'); 260 if($depth >= $opts['depth']) return false; // depth reached 261 return true; 262 } 263 264 $id = pathID($file,true); 265 if($id != cleanID($id)){ 266 if($opts['showmsg']) 267 msg(hsc($id).' is not a valid file name for DokuWiki - skipped',-1); 268 return false; // skip non-valid files 269 } 270 271 //check ACL for namespace (we have no ACL for mediafiles) 272 $info['perm'] = auth_quickaclcheck(getNS($id).':*'); 273 if(empty($opts['skipacl']) && $info['perm'] < AUTH_READ){ 274 return false; 275 } 276 277 //check pattern filter 278 if(!empty($opts['pattern']) && !@preg_match($opts['pattern'], $id)){ 279 return false; 280 } 281 282 $data[] = new MediaFile($id); 283 return false; 284} 285 286 287/** 288 * This function just lists documents (for RSS namespace export) 289 * 290 * @author Andreas Gohr <andi@splitbrain.org> 291 * 292 * @param array $data 293 * @param string $base 294 * @param string $file 295 * @param string $type 296 * @param integer $lvl 297 * @param array $opts 298 * 299 * @return bool 300 */ 301function search_list(&$data,$base,$file,$type,$lvl,$opts){ 302 //we do nothing with directories 303 if($type == 'd') return false; 304 //only search txt files 305 if(substr($file,-4) == '.txt'){ 306 //check ACL 307 $id = pathID($file); 308 if(auth_quickaclcheck($id) < AUTH_READ){ 309 return false; 310 } 311 $data[]['id'] = $id; 312 } 313 return false; 314} 315 316/** 317 * Quicksearch for searching matching pagenames 318 * 319 * $opts['query'] is the search query 320 * 321 * @author Andreas Gohr <andi@splitbrain.org> 322 * 323 * @param array $data 324 * @param string $base 325 * @param string $file 326 * @param string $type 327 * @param integer $lvl 328 * @param array $opts 329 * 330 * @return bool 331 */ 332function search_pagename(&$data,$base,$file,$type,$lvl,$opts){ 333 //we do nothing with directories 334 if($type == 'd') return true; 335 //only search txt files 336 if(substr($file,-4) != '.txt') return true; 337 338 //simple stringmatching 339 if (!empty($opts['query'])){ 340 if(strpos($file,(string) $opts['query']) !== false){ 341 //check ACL 342 $id = pathID($file); 343 if(auth_quickaclcheck($id) < AUTH_READ){ 344 return false; 345 } 346 $data[]['id'] = $id; 347 } 348 } 349 return true; 350} 351 352/** 353 * Just lists all documents 354 * 355 * $opts['depth'] recursion level, 0 for all 356 * $opts['hash'] do md5 sum of content? 357 * $opts['skipacl'] list everything regardless of ACL 358 * 359 * @author Andreas Gohr <andi@splitbrain.org> 360 * 361 * @param array $data 362 * @param string $base 363 * @param string $file 364 * @param string $type 365 * @param integer $lvl 366 * @param array $opts 367 * 368 * @return bool 369 */ 370function search_allpages(&$data,$base,$file,$type,$lvl,$opts){ 371 if(isset($opts['depth']) && $opts['depth']){ 372 $parts = explode('/',ltrim($file,'/')); 373 if(($type == 'd' && count($parts) >= $opts['depth']) 374 || ($type != 'd' && count($parts) > $opts['depth'])){ 375 return false; // depth reached 376 } 377 } 378 379 //we do nothing with directories 380 if($type == 'd'){ 381 return true; 382 } 383 384 //only search txt files 385 if(substr($file,-4) != '.txt') return true; 386 387 $item = []; 388 $item['id'] = pathID($file); 389 if(empty($opts['skipacl']) && auth_quickaclcheck($item['id']) < AUTH_READ){ 390 return false; 391 } 392 393 $item['rev'] = filemtime($base.'/'.$file); 394 $item['mtime'] = $item['rev']; 395 $item['size'] = filesize($base.'/'.$file); 396 if(!empty($opts['hash'])){ 397 $item['hash'] = md5(trim(rawWiki($item['id']))); 398 } 399 400 $data[] = $item; 401 return true; 402} 403 404/* ------------- helper functions below -------------- */ 405 406/** 407 * fulltext sort 408 * 409 * Callback sort function for use with usort to sort the data 410 * structure created by search_fulltext. Sorts descending by count 411 * 412 * @author Andreas Gohr <andi@splitbrain.org> 413 * 414 * @param array $a 415 * @param array $b 416 * 417 * @return int 418 */ 419function sort_search_fulltext($a,$b){ 420 if($a['count'] > $b['count']){ 421 return -1; 422 }elseif($a['count'] < $b['count']){ 423 return 1; 424 }else{ 425 return Sort::strcmp($a['id'],$b['id']); 426 } 427} 428 429/** 430 * translates a document path to an ID 431 * 432 * @author Andreas Gohr <andi@splitbrain.org> 433 * @todo move to pageutils 434 * 435 * @param string $path 436 * @param bool $keeptxt 437 * 438 * @return mixed|string 439 */ 440function pathID($path,$keeptxt=false){ 441 $id = utf8_decodeFN($path); 442 $id = str_replace('/',':',$id); 443 if(!$keeptxt) $id = preg_replace('#\.txt$#','',$id); 444 $id = trim($id, ':'); 445 return $id; 446} 447 448 449/** 450 * This is a very universal callback for the search() function, replacing 451 * many of the former individual functions at the cost of a more complex 452 * setup. 453 * 454 * How the function behaves, depends on the options passed in the $opts 455 * array, where the following settings can be used. 456 * 457 * depth int recursion depth. 0 for unlimited (default: 0) 458 * keeptxt bool keep .txt extension for IDs (default: false) 459 * listfiles bool include files in listing (default: false) 460 * listdirs bool include namespaces in listing (default: false) 461 * pagesonly bool restrict files to pages (default: false) 462 * skipacl bool do not check for READ permission (default: false) 463 * sneakyacl bool don't recurse into nonreadable dirs (default: false) 464 * hash bool create MD5 hash for files (default: false) 465 * meta bool return file metadata (default: false) 466 * filematch string match files against this regexp (default: '', so accept everything) 467 * idmatch string match full ID against this regexp (default: '', so accept everything) 468 * dirmatch string match directory against this regexp when adding (default: '', so accept everything) 469 * nsmatch string match namespace against this regexp when adding (default: '', so accept everything) 470 * recmatch string match directory against this regexp when recursing (default: '', so accept everything) 471 * showmsg bool warn about non-ID files (default: false) 472 * showhidden bool show hidden files(e.g. by hidepages config) too (default: false) 473 * firsthead bool return first heading for pages (default: false) 474 * 475 * @param array &$data - Reference to the result data structure 476 * @param string $base - Base usually $conf['datadir'] 477 * @param string $file - current file or directory relative to $base 478 * @param string $type - Type either 'd' for directory or 'f' for file 479 * @param int $lvl - Current recursion depht 480 * @param array $opts - option array as given to search() 481 * @return bool if this directory should be traversed (true) or not (false) 482 * return value is ignored for files 483 * 484 * @author Andreas Gohr <gohr@cosmocode.de> 485 */ 486function search_universal(&$data,$base,$file,$type,$lvl,$opts){ 487 $item = []; 488 $return = true; 489 490 // get ID and check if it is a valid one 491 $item['id'] = pathID($file,($type == 'd' || !empty($opts['keeptxt']))); 492 if($item['id'] != cleanID($item['id'])){ 493 if(!empty($opts['showmsg'])){ 494 msg(hsc($item['id']).' is not a valid file name for DokuWiki - skipped',-1); 495 } 496 return false; // skip non-valid files 497 } 498 $item['ns'] = getNS($item['id']); 499 500 if($type == 'd') { 501 // decide if to recursion into this directory is wanted 502 if(empty($opts['depth'])){ 503 $return = true; // recurse forever 504 }else{ 505 $depth = substr_count($file,'/'); 506 if($depth >= $opts['depth']){ 507 $return = false; // depth reached 508 }else{ 509 $return = true; 510 } 511 } 512 513 if ($return) { 514 $match = empty($opts['recmatch']) || preg_match('/'.$opts['recmatch'].'/',$file); 515 if (!$match) { 516 return false; // doesn't match 517 } 518 } 519 } 520 521 // check ACL 522 if(empty($opts['skipacl'])){ 523 if($type == 'd'){ 524 $item['perm'] = auth_quickaclcheck($item['id'].':*'); 525 }else{ 526 $item['perm'] = auth_quickaclcheck($item['id']); //FIXME check namespace for media files 527 } 528 }else{ 529 $item['perm'] = AUTH_DELETE; 530 } 531 532 // are we done here maybe? 533 if($type == 'd'){ 534 if(empty($opts['listdirs'])) return $return; 535 //neither list nor recurse forbidden items: 536 if(empty($opts['skipacl']) && !empty($opts['sneakyacl']) && $item['perm'] < AUTH_READ) return false; 537 if(!empty($opts['dirmatch']) && !preg_match('/'.$opts['dirmatch'].'/',$file)) return $return; 538 if(!empty($opts['nsmatch']) && !preg_match('/'.$opts['nsmatch'].'/',$item['ns'])) return $return; 539 }else{ 540 if(empty($opts['listfiles'])) return $return; 541 if(empty($opts['skipacl']) && $item['perm'] < AUTH_READ) return $return; 542 if(!empty($opts['pagesonly']) && (substr($file,-4) != '.txt')) return $return; 543 if(empty($opts['showhidden']) && isHiddenPage($item['id'])) return $return; 544 if(!empty($opts['filematch']) && !preg_match('/'.$opts['filematch'].'/',$file)) return $return; 545 if(!empty($opts['idmatch']) && !preg_match('/'.$opts['idmatch'].'/',$item['id'])) return $return; 546 } 547 548 // still here? prepare the item 549 $item['type'] = $type; 550 $item['level'] = $lvl; 551 $item['open'] = $return; 552 553 if(!empty($opts['meta'])){ 554 $item['file'] = PhpString::basename($file); 555 $item['size'] = filesize($base.'/'.$file); 556 $item['mtime'] = filemtime($base.'/'.$file); 557 $item['rev'] = $item['mtime']; 558 $item['writable'] = is_writable($base.'/'.$file); 559 $item['executable'] = is_executable($base.'/'.$file); 560 } 561 562 if($type == 'f'){ 563 if(!empty($opts['hash'])) $item['hash'] = md5(io_readFile($base.'/'.$file,false)); 564 if(!empty($opts['firsthead'])) $item['title'] = p_get_first_heading($item['id'],METADATA_DONT_RENDER); 565 } 566 567 // finally add the item 568 $data[] = $item; 569 return $return; 570} 571 572//Setup VIM: ex: et ts=4 : 573