1<?php 2/** 3 * DokuWiki search functions 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9 if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/'); 10 require_once(DOKU_INC.'inc/common.php'); 11 12/** 13 * recurse direcory 14 * 15 * This function recurses into a given base directory 16 * and calls the supplied function for each file and directory 17 * 18 * @author Andreas Gohr <andi@splitbrain.org> 19 */ 20function search(&$data,$base,$func,$opts,$dir='',$lvl=1){ 21 $dirs = array(); 22 $files = array(); 23 24 //read in directories and files 25 $dh = @opendir($base.'/'.$dir); 26 if(!$dh) return; 27 while(($file = readdir($dh)) !== false){ 28 if(preg_match('/^[\._]/',$file)) continue; //skip hidden files and upper dirs 29 if(is_dir($base.'/'.$dir.'/'.$file)){ 30 $dirs[] = $dir.'/'.$file; 31 continue; 32 } 33 $files[] = $dir.'/'.$file; 34 } 35 closedir($dh); 36 sort($files); 37 sort($dirs); 38 39 //give directories to userfunction then recurse 40 foreach($dirs as $dir){ 41 if ($func($data,$base,$dir,'d',$lvl,$opts)){ 42 search($data,$base,$func,$opts,$dir,$lvl+1); 43 } 44 } 45 //now handle the files 46 foreach($files as $file){ 47 $func($data,$base,$file,'f',$lvl,$opts); 48 } 49} 50 51/** 52 * The following functions are userfunctions to use with the search 53 * function above. This function is called for every found file or 54 * directory. When a directory is given to the function it has to 55 * decide if this directory should be traversed (true) or not (false) 56 * The function has to accept the following parameters: 57 * 58 * &$data - Reference to the result data structure 59 * $base - Base usually $conf['datadir'] 60 * $file - current file or directory relative to $base 61 * $type - Type either 'd' for directory or 'f' for file 62 * $lvl - Current recursion depht 63 * $opts - option array as given to search() 64 * 65 * return values for files are ignored 66 * 67 * All functions should check the ACL for document READ rights 68 * namespaces (directories) are NOT checked as this would break 69 * the recursion (You can have an nonreadable dir over a readable 70 * one deeper nested) 71 */ 72 73/** 74 * Build the browsable index of pages 75 * 76 * $opts['ns'] is the current namespace 77 * 78 * @author Andreas Gohr <andi@splitbrain.org> 79 */ 80function search_index(&$data,$base,$file,$type,$lvl,$opts){ 81 $return = true; 82 83 $item = array(); 84 85 if($type == 'd' && !preg_match('#^'.$file.'(/|$)#','/'.$opts['ns'])){ 86 //add but don't recurse 87 $return = false; 88 }elseif($type == 'f' && !preg_match('#\.txt$#',$file)){ 89 //don't add 90 return false; 91 } 92 93 //check ACL 94 $id = pathID($file); 95 if($type=='f' && auth_quickaclcheck($id) < AUTH_READ){ 96 return false; 97 } 98 99 $data[]=array( 'id' => $id, 100 'type' => $type, 101 'level' => $lvl, 102 'open' => $return ); 103 return $return; 104} 105 106/** 107 * List all namespaces 108 * 109 * @author Andreas Gohr <andi@splitbrain.org> 110 */ 111function search_namespaces(&$data,$base,$file,$type,$lvl,$opts){ 112 if($type == 'f') return true; //nothing to do on files 113 114 $id = pathID($file); 115 $data[]=array( 'id' => $id, 116 'type' => $type, 117 'level' => $lvl ); 118 return true; 119} 120 121/** 122 * List all mediafiles in a namespace 123 * 124 * @author Andreas Gohr <andi@splitbrain.org> 125 */ 126function search_media(&$data,$base,$file,$type,$lvl,$opts){ 127 //we do nothing with directories 128 if($type == 'd') return false; 129 130 $info = array(); 131 $info['id'] = pathID($file); 132 133 //check ACL for namespace (we have no ACL for mediafiles) 134 if(auth_quickaclcheck(getNS($info['id']).':*') < AUTH_READ){ 135 return false; 136 } 137 138 $info['file'] = basename($file); 139 $info['size'] = filesize($base.'/'.$file); 140 if(preg_match("/\.(jpe?g|gif|png)$/",$file)){ 141 $info['isimg'] = true; 142 $info['info'] = getimagesize($base.'/'.$file); 143 }else{ 144 $info['isimg'] = false; 145 } 146 $data[] = $info; 147 148 return false; 149} 150 151/** 152 * This function just lists documents (for RSS namespace export) 153 * 154 * @author Andreas Gohr <andi@splitbrain.org> 155 */ 156function search_list(&$data,$base,$file,$type,$lvl,$opts){ 157 //we do nothing with directories 158 if($type == 'd') return false; 159 if(preg_match('#\.txt$#',$file)){ 160 //check ACL 161 $id = pathID($file); 162 if(auth_quickaclcheck($id) < AUTH_READ){ 163 return false; 164 } 165 $data[]['id'] = $id;; 166 } 167 return false; 168} 169 170/** 171 * Quicksearch for searching matching pagenames 172 * 173 * $opts['query'] is the search query 174 * 175 * @author Andreas Gohr <andi@splitbrain.org> 176 */ 177function search_pagename(&$data,$base,$file,$type,$lvl,$opts){ 178 //we do nothing with directories 179 if($type == 'd') return true; 180 //only search txt files 181 if(!preg_match('#\.txt$#',$file)) return true; 182 183 //simple stringmatching 184 if(strpos($file,$opts['query']) !== false){ 185 //check ACL 186 $id = pathID($file); 187 if(auth_quickaclcheck($id) < AUTH_READ){ 188 return false; 189 } 190 $data[]['id'] = $id; 191 } 192 193 return true; 194} 195 196/** 197 * Search for backlinks to a given page 198 * 199 * $opts['ns'] namespace of the page 200 * $opts['name'] name of the page without namespace 201 * 202 * @author Andreas Gohr <andi@splitbrain.org> 203 */ 204function search_backlinks(&$data,$base,$file,$type,$lvl,$opts){ 205 //we do nothing with directories 206 if($type == 'd') return true;; 207 //only search txt files 208 if(!preg_match('#\.txt$#',$file)) return true;; 209 210 //absolute search id 211 $sid = cleanID($opts['ns'].':'.$opts['name']); 212 213 //current id and namespace 214 $cid = pathID($file); 215 $cns = getNS($cid); 216 217 //check ACL 218 if(auth_quickaclcheck($cid) < AUTH_READ){ 219 return false; 220 } 221 222 //fetch instructions 223 require_once(DOKU_INC.'inc/parserutils.php'); 224 $instructions = p_cached_instructions($base.$file,true); 225 if(is_null($instructions)) return false; 226 227 //check all links for match 228 foreach($instructions as $ins){ 229 if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink') ){ 230 $mid = $ins[1][0]; 231 resolve_pageid($cns,$mid,$exists); //exists is not used 232 if($mid == $sid){ 233 //we have a match - finish 234 $data[]['id'] = $cid; 235 break; 236 } 237 } 238 } 239 240 return false; 241} 242 243/** 244 * Fulltextsearch 245 * 246 * $opts['query'] is the search query 247 * 248 * @author Andreas Gohr <andi@splitbrain.org> 249 */ 250function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){ 251 //we do nothing with directories 252 if($type == 'd') return true;; 253 //only search txt files 254 if(!preg_match('#\.txt$#',$file)) return true;; 255 256 //check ACL 257 $id = pathID($file); 258 if(auth_quickaclcheck($id) < AUTH_READ){ 259 return false; 260 } 261 262 //get text 263 $text = io_readfile($base.'/'.$file); 264 //lowercase text (u modifier does not help with case) 265 $lctext = utf8_strtolower($text); 266 267 //create regexp from queries 268 $qpreg = preg_split('/\s+/',preg_quote($opts['query'],'#')); 269 $qpreg = '('.join('|',$qpreg).')'; 270 271 //do the fulltext search 272 $matches = array(); 273 if($cnt = preg_match_all('#'.$qpreg.'#usi',$lctext,$matches)){ 274 //this is not the best way for snippet generation but the fastest I could find 275 //split query and only use the first token 276 $q = preg_split('/\s+/',$opts['query'],2); 277 $q = $q[0]; 278 $p = utf8_strpos($lctext,$q); 279 $f = $p - 100; 280 $l = utf8_strlen($q) + 200; 281 if($f < 0) $f = 0; 282 $snippet = '<span class="search_sep"> ... </span>'. 283 htmlspecialchars(utf8_substr($text,$f,$l)). 284 '<span class="search_sep"> ... </span>'; 285 $snippet = preg_replace('#'.$qpreg.'#si','<span class="search_hit">\\1</span>',$snippet); 286 287 $data[] = array( 288 'id' => $id, 289 'count' => $cnt, 290 'snippet' => $snippet, 291 ); 292 } 293 294 return true; 295} 296 297/** 298 * fulltext sort 299 * 300 * Callback sort function for use with usort to sort the data 301 * structure created by search_fulltext. Sorts descending by count 302 * 303 * @author Andreas Gohr <andi@splitbrain.org> 304 */ 305function sort_search_fulltext($a,$b){ 306 if($a['count'] > $b['count']){ 307 return -1; 308 }elseif($a['count'] < $b['count']){ 309 return 1; 310 }else{ 311 return strcmp($a['id'],$b['id']); 312 } 313} 314 315/** 316 * translates a document path to an ID 317 * 318 * @author Andreas Gohr <andi@splitbrain.org> 319 * @todo move to pageutils 320 */ 321function pathID($path){ 322 $id = utf8_decodeFN($path); 323 $id = str_replace('/',':',$id); 324 $id = preg_replace('#\.txt$#','',$id); 325 $id = preg_replace('#^:+#','',$id); 326 $id = preg_replace('#:+$#','',$id); 327 return $id; 328} 329 330 331//Setup VIM: ex: et ts=2 enc=utf-8 : 332