1<? 2/** 3 * DokuWiki search functions 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9 require_once("inc/common.php"); 10 11/** 12 * recurse direcory 13 * 14 * This function recurses into a given base directory 15 * and calls the supplied function for each file and directory 16 * 17 * @author Andreas Gohr <andi@splitbrain.org> 18 */ 19function search(&$data,$base,$func,$opts,$dir='',$lvl=1){ 20 $dirs = array(); 21 $files = array(); 22 23 //read in directories and files 24 $dh = @opendir($base.'/'.$dir); 25 if(!$dh) return; 26 while(($file = readdir($dh)) !== false){ 27 if(preg_match('/^\./',$file)) continue; //skip hidden files and upper dirs 28 if(is_dir($base.'/'.$dir.'/'.$file)){ 29 $dirs[] = $dir.'/'.$file; 30 continue; 31 } 32 $files[] = $dir.'/'.$file; 33 } 34 closedir($dh); 35 sort($files); 36 sort($dirs); 37 38 //give directories to userfunction then recurse 39 foreach($dirs as $dir){ 40 if ($func($data,$base,$dir,'d',$lvl,$opts)){ 41 search($data,$base,$func,$opts,$dir,$lvl+1); 42 } 43 } 44 //now handle the files 45 foreach($files as $file){ 46 $func($data,$base,$file,'f',$lvl,$opts); 47 } 48} 49 50/** 51 * The following functions are userfunctions to use with the search 52 * function above. This function is called for every found file or 53 * directory. When a directory is given to the function it has to 54 * decide if this directory should be traversed (true) or not (false) 55 * The function has to accept the following parameters: 56 * 57 * &$data - Reference to the result data structure 58 * $base - Base usually $conf['datadir'] 59 * $file - current file or directory relative to $base 60 * $type - Type either 'd' for directory or 'f' for file 61 * $lvl - Current recursion depht 62 * $opts - option array as given to search() 63 * 64 * return values for files are ignored 65 * 66 * All functions should check the ACL for document READ rights 67 * namespaces (directories) are NOT checked as this would break 68 * the recursion (You can have an nonreadable dir over a readable 69 * one deeper nested) 70 */ 71 72/** 73 * Build the browsable index of pages 74 * 75 * $opts['ns'] is the current namespace 76 * 77 * @author Andreas Gohr <andi@splitbrain.org> 78 */ 79function search_index(&$data,$base,$file,$type,$lvl,$opts){ 80 $return = true; 81 82 $item = array(); 83 84 if($type == 'd' && !preg_match('#^'.$file.'(/|$)#','/'.$opts['ns'])){ 85 //add but don't recurse 86 $return = false; 87 }elseif($type == 'f' && !preg_match('#\.txt$#',$file)){ 88 //don't add 89 return false; 90 } 91 92 //check ACL 93 $id = pathID($file); 94 if($type=='f' && auth_quickaclcheck($id) < AUTH_READ){ 95 return false; 96 } 97 98 $data[]=array( 'id' => $id, 99 'type' => $type, 100 'level' => $lvl, 101 'open' => $return ); 102 return $return; 103} 104 105/** 106 * List all namespaces 107 * 108 * @author Andreas Gohr <andi@splitbrain.org> 109 */ 110function search_namespaces(&$data,$base,$file,$type,$lvl,$opts){ 111 if($type == 'f') return true; //nothing to do on files 112 113 $id = pathID($file); 114 $data[]=array( 'id' => $id, 115 'type' => $type, 116 'level' => $lvl ); 117 return true; 118} 119 120/** 121 * List all mediafiles in a namespace 122 * 123 * @author Andreas Gohr <andi@splitbrain.org> 124 */ 125function search_media(&$data,$base,$file,$type,$lvl,$opts){ 126 //we do nothing with directories 127 if($type == 'd') return false; 128 129 $info = array(); 130 $info['id'] = pathID($file); 131 132 //check ACL for namespace (we have no ACL for mediafiles) 133 if(auth_quickaclcheck(getNS($info['id']).':*') < AUTH_READ){ 134 return false; 135 } 136 137 $info['file'] = basename($file); 138 $info['size'] = filesize($base.'/'.$file); 139 if(preg_match("/\.(jpe?g|gif|png)$/",$file)){ 140 $info['isimg'] = true; 141 $info['info'] = getimagesize($base.'/'.$file); 142 }else{ 143 $info['isimg'] = false; 144 } 145 $data[] = $info; 146 147 return false; 148} 149 150/** 151 * This function just lists documents (for RSS namespace export) 152 * 153 * @author Andreas Gohr <andi@splitbrain.org> 154 */ 155function search_list(&$data,$base,$file,$type,$lvl,$opts){ 156 //we do nothing with directories 157 if($type == 'd') return false; 158 if(preg_match('#\.txt$#',$file)){ 159 //check ACL 160 $id = pathID($file); 161 if(auth_quickaclcheck($id) < AUTH_READ){ 162 return false; 163 } 164 $data[]['id'] = $id;; 165 } 166 return false; 167} 168 169/** 170 * Quicksearch for searching matching pagenames 171 * 172 * $opts['query'] is the search query 173 * 174 * @author Andreas Gohr <andi@splitbrain.org> 175 */ 176function search_pagename(&$data,$base,$file,$type,$lvl,$opts){ 177 //we do nothing with directories 178 if($type == 'd') return true; 179 //only search txt files 180 if(!preg_match('#\.txt$#',$file)) return true; 181 182 //simple stringmatching 183 if(strpos($file,$opts['query']) !== false){ 184 //check ACL 185 $id = pathID($file); 186 if(auth_quickaclcheck($id) < AUTH_READ){ 187 return false; 188 } 189 $data[]['id'] = $id; 190 } 191 192 return true; 193} 194 195/** 196 * Search for backlinks to a given page 197 * 198 * $opts['ns'] namespace of the page 199 * $opts['name'] name of the page without namespace 200 * 201 * @author Andreas Gohr <andi@splitbrain.org> 202 */ 203function search_backlinks(&$data,$base,$file,$type,$lvl,$opts){ 204 //we do nothing with directories 205 if($type == 'd') return true;; 206 //only search txt files 207 if(!preg_match('#\.txt$#',$file)) return true;; 208 209 //get text 210 $text = io_readfile($base.'/'.$file); 211 212 //absolute search id 213 $sid = cleanID($opts['ns'].':'.$opts['name']); 214 215 //construct current namespace 216 $cid = pathID($file); 217 $cns = getNS($cid); 218 219 //check ACL 220 if(auth_quickaclcheck($cid) < AUTH_READ){ 221 return false; 222 } 223 224 //match all links 225 //FIXME may be incorrect because of code blocks 226 // CamelCase isn't supported, too 227 preg_match_all('#\[\[(.+?)\]\]#si',$text,$matches,PREG_SET_ORDER); 228 foreach($matches as $match){ 229 //get ID from link and discard most non wikilinks 230 list($mid) = split('\|',$match[1],2); 231 if(preg_match("#^(https?|telnet|gopher|file|wais|ftp|ed2k|irc)://#",$mid)) continue; 232 if(preg_match("#\w+>#",$mid)) continue; 233 $mns = getNS($mid); 234 //namespace starting with "." - prepend current namespace 235 if(strpos($mns,'.')===0){ 236 $mid = $cns.":".substr($mid,1); 237 } 238 if($mns===false){ 239 //no namespace in link? add current 240 $mid = "$cns:$mid"; 241 } 242 $mid = cleanID($mid); 243 244 if ($mid == $sid){ 245 $data[]['id'] = $cid; 246 break; 247 } 248 } 249} 250 251/** 252 * Fulltextsearch 253 * 254 * $opts['query'] is the search query 255 * 256 * @author Andreas Gohr <andi@splitbrain.org> 257 */ 258function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){ 259 //we do nothing with directories 260 if($type == 'd') return true;; 261 //only search txt files 262 if(!preg_match('#\.txt$#',$file)) return true;; 263 264 //check ACL 265 $id = pathID($file); 266 if(auth_quickaclcheck($id) < AUTH_READ){ 267 return false; 268 } 269 270 //get text 271 $text = io_readfile($base.'/'.$file); 272 273 //create regexp from queries 274 $qpreg = preg_split('/\s+/',preg_quote($opts['query'],'#')); 275 $qpreg = '('.join('|',$qpreg).')'; 276 277 //do the fulltext search 278 $matches = array(); 279 if($cnt = preg_match_all('#'.$qpreg.'#si',$text,$matches)){ 280 //this is not the best way for snippet generation but the fastest I could find 281 //split query and only use the first token 282 $q = preg_split('/\s+/',$opts['query'],2); 283 $q = $q[0]; 284 $p = strpos(strtolower($text),$q); 285 $f = $p - 100; 286 $l = strlen($q) + 200; 287 if($f < 0) $f = 0; 288 $snippet = '<span class="search_sep"> ... </span>'. 289 htmlspecialchars(substr($text,$f,$l)). 290 '<span class="search_sep"> ... </span>'; 291 $snippet = preg_replace('#'.$qpreg.'#si','<span class="search_hit">\\1</span>',$snippet); 292 293 $data[] = array( 294 'id' => $id, 295 'count' => $cnt, 296 'snippet' => $snippet, 297 ); 298 } 299 300 return true; 301} 302 303/** 304 * fulltext sort 305 * 306 * Callback sort function for use with usort to sort the data 307 * structure created by search_fulltext. Sorts descending by count 308 * 309 * @author Andreas Gohr <andi@splitbrain.org> 310 */ 311function sort_search_fulltext($a,$b){ 312 if($a['count'] > $b['count']){ 313 return -1; 314 }elseif($a['count'] < $b['count']){ 315 return 1; 316 }else{ 317 return strcmp($a['id'],$b['id']); 318 } 319} 320 321/** 322 * translates a document path to an ID 323 * 324 * @author Andreas Gohr <andi@splitbrain.org> 325 */ 326function pathID($path){ 327 $id = utf8_decodeFN($path); 328 $id = str_replace('/',':',$id); 329 $id = preg_replace('#\.txt$#','',$id); 330 $id = preg_replace('#^:+#','',$id); 331 $id = preg_replace('#:+$#','',$id); 332 return $id; 333} 334 335?> 336