1<?php 2/** 3 * DokuWiki search functions 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9 if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/'); 10 require_once(DOKU_INC.'inc/common.php'); 11 12/** 13 * recurse direcory 14 * 15 * This function recurses into a given base directory 16 * and calls the supplied function for each file and directory 17 * 18 * @author Andreas Gohr <andi@splitbrain.org> 19 */ 20function search(&$data,$base,$func,$opts,$dir='',$lvl=1){ 21 $dirs = array(); 22 $files = array(); 23 24 //read in directories and files 25 $dh = @opendir($base.'/'.$dir); 26 if(!$dh) return; 27 while(($file = readdir($dh)) !== false){ 28 if(preg_match('/^[\._]/',$file)) continue; //skip hidden files and upper dirs 29 if(is_dir($base.'/'.$dir.'/'.$file)){ 30 $dirs[] = $dir.'/'.$file; 31 continue; 32 } 33 $files[] = $dir.'/'.$file; 34 } 35 closedir($dh); 36 sort($files); 37 sort($dirs); 38 39 //give directories to userfunction then recurse 40 foreach($dirs as $dir){ 41 if ($func($data,$base,$dir,'d',$lvl,$opts)){ 42 search($data,$base,$func,$opts,$dir,$lvl+1); 43 } 44 } 45 //now handle the files 46 foreach($files as $file){ 47 $func($data,$base,$file,'f',$lvl,$opts); 48 } 49} 50 51/** 52 * The following functions are userfunctions to use with the search 53 * function above. This function is called for every found file or 54 * directory. When a directory is given to the function it has to 55 * decide if this directory should be traversed (true) or not (false) 56 * The function has to accept the following parameters: 57 * 58 * &$data - Reference to the result data structure 59 * $base - Base usually $conf['datadir'] 60 * $file - current file or directory relative to $base 61 * $type - Type either 'd' for directory or 'f' for file 62 * $lvl - Current recursion depht 63 * $opts - option array as given to search() 64 * 65 * return values for files are ignored 66 * 67 * All functions should check the ACL for document READ rights 68 * namespaces (directories) are NOT checked as this would break 69 * the recursion (You can have an nonreadable dir over a readable 70 * one deeper nested) 71 */ 72 73/** 74 * Searches for pages beginning with the given query 75 * 76 * @author Andreas Gohr <andi@splitbrain.org> 77 */ 78function search_qsearch(&$data,$base,$file,$type,$lvl,$opts){ 79 $item = array(); 80 81 if($type == 'd'){ 82 return false; //no handling yet 83 } 84 85 //get id 86 $id = pathID($file); 87 88 //check if it matches the query 89 if(!preg_match('/^'.preg_quote($opts['query'],'/').'/u',$id)){ 90 return false; 91 } 92 93 //check ACL 94 if(auth_quickaclcheck($id) < AUTH_READ){ 95 return false; 96 } 97 98 $data[]=array( 'id' => $id, 99 'type' => $type, 100 'level' => 1, 101 'open' => true); 102 return true; 103} 104 105/** 106 * Build the browsable index of pages 107 * 108 * $opts['ns'] is the current namespace 109 * 110 * @author Andreas Gohr <andi@splitbrain.org> 111 */ 112function search_index(&$data,$base,$file,$type,$lvl,$opts){ 113 $return = true; 114 115 $item = array(); 116 117 if($type == 'd' && !preg_match('#^'.$file.'(/|$)#','/'.$opts['ns'])){ 118 //add but don't recurse 119 $return = false; 120 }elseif($type == 'f' && !preg_match('#\.txt$#',$file)){ 121 //don't add 122 return false; 123 } 124 125 //check ACL 126 $id = pathID($file); 127 if($type=='f' && auth_quickaclcheck($id) < AUTH_READ){ 128 return false; 129 } 130 131 $data[]=array( 'id' => $id, 132 'type' => $type, 133 'level' => $lvl, 134 'open' => $return ); 135 return $return; 136} 137 138/** 139 * List all namespaces 140 * 141 * @author Andreas Gohr <andi@splitbrain.org> 142 */ 143function search_namespaces(&$data,$base,$file,$type,$lvl,$opts){ 144 if($type == 'f') return true; //nothing to do on files 145 146 $id = pathID($file); 147 $data[]=array( 'id' => $id, 148 'type' => $type, 149 'level' => $lvl ); 150 return true; 151} 152 153/** 154 * List all mediafiles in a namespace 155 * 156 * @author Andreas Gohr <andi@splitbrain.org> 157 */ 158function search_media(&$data,$base,$file,$type,$lvl,$opts){ 159 //we do nothing with directories 160 if($type == 'd') return false; 161 162 $info = array(); 163 $info['id'] = pathID($file); 164 165 //check ACL for namespace (we have no ACL for mediafiles) 166 if(auth_quickaclcheck(getNS($info['id']).':*') < AUTH_READ){ 167 return false; 168 } 169 170 $info['file'] = basename($file); 171 $info['size'] = filesize($base.'/'.$file); 172 if(preg_match("/\.(jpe?g|gif|png)$/",$file)){ 173 $info['isimg'] = true; 174 $info['info'] = getimagesize($base.'/'.$file); 175 }else{ 176 $info['isimg'] = false; 177 } 178 $data[] = $info; 179 180 return false; 181} 182 183/** 184 * This function just lists documents (for RSS namespace export) 185 * 186 * @author Andreas Gohr <andi@splitbrain.org> 187 */ 188function search_list(&$data,$base,$file,$type,$lvl,$opts){ 189 //we do nothing with directories 190 if($type == 'd') return false; 191 if(preg_match('#\.txt$#',$file)){ 192 //check ACL 193 $id = pathID($file); 194 if(auth_quickaclcheck($id) < AUTH_READ){ 195 return false; 196 } 197 $data[]['id'] = $id;; 198 } 199 return false; 200} 201 202/** 203 * Quicksearch for searching matching pagenames 204 * 205 * $opts['query'] is the search query 206 * 207 * @author Andreas Gohr <andi@splitbrain.org> 208 */ 209function search_pagename(&$data,$base,$file,$type,$lvl,$opts){ 210 //we do nothing with directories 211 if($type == 'd') return true; 212 //only search txt files 213 if(!preg_match('#\.txt$#',$file)) return true; 214 215 //simple stringmatching 216 if(strpos($file,$opts['query']) !== false){ 217 //check ACL 218 $id = pathID($file); 219 if(auth_quickaclcheck($id) < AUTH_READ){ 220 return false; 221 } 222 $data[]['id'] = $id; 223 } 224 225 return true; 226} 227 228/** 229 * Search for backlinks to a given page 230 * 231 * $opts['ns'] namespace of the page 232 * $opts['name'] name of the page without namespace 233 * 234 * @author Andreas Gohr <andi@splitbrain.org> 235 */ 236function search_backlinks(&$data,$base,$file,$type,$lvl,$opts){ 237 //we do nothing with directories 238 if($type == 'd') return true;; 239 //only search txt files 240 if(!preg_match('#\.txt$#',$file)) return true;; 241 242 //absolute search id 243 $sid = cleanID($opts['ns'].':'.$opts['name']); 244 245 //current id and namespace 246 $cid = pathID($file); 247 $cns = getNS($cid); 248 249 //check ACL 250 if(auth_quickaclcheck($cid) < AUTH_READ){ 251 return false; 252 } 253 254 //fetch instructions 255 require_once(DOKU_INC.'inc/parserutils.php'); 256 $instructions = p_cached_instructions($base.$file,true); 257 if(is_null($instructions)) return false; 258 259 //check all links for match 260 foreach($instructions as $ins){ 261 if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink') ){ 262 $mid = $ins[1][0]; 263 resolve_pageid($cns,$mid,$exists); //exists is not used 264 if($mid == $sid){ 265 //we have a match - finish 266 $data[]['id'] = $cid; 267 break; 268 } 269 } 270 } 271 272 return false; 273} 274 275/** 276 * Fulltextsearch 277 * 278 * $opts['query'] is the search query 279 * 280 * @author Andreas Gohr <andi@splitbrain.org> 281 */ 282function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){ 283 //we do nothing with directories 284 if($type == 'd') return true;; 285 //only search txt files 286 if(!preg_match('#\.txt$#',$file)) return true;; 287 288 //check ACL 289 $id = pathID($file); 290 if(auth_quickaclcheck($id) < AUTH_READ){ 291 return false; 292 } 293 294 //get text 295 $text = io_readfile($base.'/'.$file); 296 //lowercase text (u modifier does not help with case) 297 $lctext = utf8_strtolower($text); 298 299 //create regexp from queries 300 $qpreg = preg_split('/\s+/',preg_quote($opts['query'],'#')); 301 $qpreg = '('.join('|',$qpreg).')'; 302 303 //do the fulltext search 304 $matches = array(); 305 if($cnt = preg_match_all('#'.$qpreg.'#usi',$lctext,$matches)){ 306 //this is not the best way for snippet generation but the fastest I could find 307 //split query and only use the first token 308 $q = preg_split('/\s+/',$opts['query'],2); 309 $q = $q[0]; 310 $p = utf8_strpos($lctext,$q); 311 $f = $p - 100; 312 $l = utf8_strlen($q) + 200; 313 if($f < 0) $f = 0; 314 $snippet = '<span class="search_sep"> ... </span>'. 315 htmlspecialchars(utf8_substr($text,$f,$l)). 316 '<span class="search_sep"> ... </span>'; 317 $snippet = preg_replace('#'.$qpreg.'#si','<span class="search_hit">\\1</span>',$snippet); 318 319 $data[] = array( 320 'id' => $id, 321 'count' => $cnt, 322 'snippet' => $snippet, 323 ); 324 } 325 326 return true; 327} 328 329/** 330 * fulltext sort 331 * 332 * Callback sort function for use with usort to sort the data 333 * structure created by search_fulltext. Sorts descending by count 334 * 335 * @author Andreas Gohr <andi@splitbrain.org> 336 */ 337function sort_search_fulltext($a,$b){ 338 if($a['count'] > $b['count']){ 339 return -1; 340 }elseif($a['count'] < $b['count']){ 341 return 1; 342 }else{ 343 return strcmp($a['id'],$b['id']); 344 } 345} 346 347/** 348 * translates a document path to an ID 349 * 350 * @author Andreas Gohr <andi@splitbrain.org> 351 * @todo move to pageutils 352 */ 353function pathID($path){ 354 $id = utf8_decodeFN($path); 355 $id = str_replace('/',':',$id); 356 $id = preg_replace('#\.txt$#','',$id); 357 $id = preg_replace('#^:+#','',$id); 358 $id = preg_replace('#:+$#','',$id); 359 return $id; 360} 361 362 363//Setup VIM: ex: et ts=2 enc=utf-8 : 364