1<?php 2/** 3 * DokuWiki search functions 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9 if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/'); 10 require_once(DOKU_INC.'inc/common.php'); 11 12/** 13 * recurse direcory 14 * 15 * This function recurses into a given base directory 16 * and calls the supplied function for each file and directory 17 * 18 * @author Andreas Gohr <andi@splitbrain.org> 19 */ 20function search(&$data,$base,$func,$opts,$dir='',$lvl=1){ 21 $dirs = array(); 22 $files = array(); 23 24 //read in directories and files 25 $dh = @opendir($base.'/'.$dir); 26 if(!$dh) return; 27 while(($file = readdir($dh)) !== false){ 28 if(preg_match('/^[\._]/',$file)) continue; //skip hidden files and upper dirs 29 if(is_dir($base.'/'.$dir.'/'.$file)){ 30 $dirs[] = $dir.'/'.$file; 31 continue; 32 } 33 $files[] = $dir.'/'.$file; 34 } 35 closedir($dh); 36 sort($files); 37 sort($dirs); 38 39 //give directories to userfunction then recurse 40 foreach($dirs as $dir){ 41 if ($func($data,$base,$dir,'d',$lvl,$opts)){ 42 search($data,$base,$func,$opts,$dir,$lvl+1); 43 } 44 } 45 //now handle the files 46 foreach($files as $file){ 47 $func($data,$base,$file,'f',$lvl,$opts); 48 } 49} 50 51/** 52 * The following functions are userfunctions to use with the search 53 * function above. This function is called for every found file or 54 * directory. When a directory is given to the function it has to 55 * decide if this directory should be traversed (true) or not (false) 56 * The function has to accept the following parameters: 57 * 58 * &$data - Reference to the result data structure 59 * $base - Base usually $conf['datadir'] 60 * $file - current file or directory relative to $base 61 * $type - Type either 'd' for directory or 'f' for file 62 * $lvl - Current recursion depht 63 * $opts - option array as given to search() 64 * 65 * return values for files are ignored 66 * 67 * All functions should check the ACL for document READ rights 68 * namespaces (directories) are NOT checked as this would break 69 * the recursion (You can have an nonreadable dir over a readable 70 * one deeper nested) 71 */ 72 73/** 74 * Build the browsable index of pages 75 * 76 * $opts['ns'] is the current namespace 77 * 78 * @author Andreas Gohr <andi@splitbrain.org> 79 */ 80function search_index(&$data,$base,$file,$type,$lvl,$opts){ 81 $return = true; 82 83 $item = array(); 84 85 if($type == 'd' && !preg_match('#^'.$file.'(/|$)#','/'.$opts['ns'])){ 86 //add but don't recurse 87 $return = false; 88 }elseif($type == 'f' && !preg_match('#\.txt$#',$file)){ 89 //don't add 90 return false; 91 } 92 93 //check ACL 94 $id = pathID($file); 95 if($type=='f' && auth_quickaclcheck($id) < AUTH_READ){ 96 return false; 97 } 98 99 $data[]=array( 'id' => $id, 100 'type' => $type, 101 'level' => $lvl, 102 'open' => $return ); 103 return $return; 104} 105 106/** 107 * List all namespaces 108 * 109 * @author Andreas Gohr <andi@splitbrain.org> 110 */ 111function search_namespaces(&$data,$base,$file,$type,$lvl,$opts){ 112 if($type == 'f') return true; //nothing to do on files 113 114 $id = pathID($file); 115 $data[]=array( 'id' => $id, 116 'type' => $type, 117 'level' => $lvl ); 118 return true; 119} 120 121/** 122 * List all mediafiles in a namespace 123 * 124 * @author Andreas Gohr <andi@splitbrain.org> 125 */ 126function search_media(&$data,$base,$file,$type,$lvl,$opts){ 127 //we do nothing with directories 128 if($type == 'd') return false; 129 130 $info = array(); 131 $info['id'] = pathID($file); 132 133 //check ACL for namespace (we have no ACL for mediafiles) 134 if(auth_quickaclcheck(getNS($info['id']).':*') < AUTH_READ){ 135 return false; 136 } 137 138 $info['file'] = basename($file); 139 $info['size'] = filesize($base.'/'.$file); 140 if(preg_match("/\.(jpe?g|gif|png)$/",$file)){ 141 $info['isimg'] = true; 142 $info['info'] = getimagesize($base.'/'.$file); 143 }else{ 144 $info['isimg'] = false; 145 } 146 $data[] = $info; 147 148 return false; 149} 150 151/** 152 * This function just lists documents (for RSS namespace export) 153 * 154 * @author Andreas Gohr <andi@splitbrain.org> 155 */ 156function search_list(&$data,$base,$file,$type,$lvl,$opts){ 157 //we do nothing with directories 158 if($type == 'd') return false; 159 if(preg_match('#\.txt$#',$file)){ 160 //check ACL 161 $id = pathID($file); 162 if(auth_quickaclcheck($id) < AUTH_READ){ 163 return false; 164 } 165 $data[]['id'] = $id;; 166 } 167 return false; 168} 169 170/** 171 * Quicksearch for searching matching pagenames 172 * 173 * $opts['query'] is the search query 174 * 175 * @author Andreas Gohr <andi@splitbrain.org> 176 */ 177function search_pagename(&$data,$base,$file,$type,$lvl,$opts){ 178 //we do nothing with directories 179 if($type == 'd') return true; 180 //only search txt files 181 if(!preg_match('#\.txt$#',$file)) return true; 182 183 //simple stringmatching 184 if(strpos($file,$opts['query']) !== false){ 185 //check ACL 186 $id = pathID($file); 187 if(auth_quickaclcheck($id) < AUTH_READ){ 188 return false; 189 } 190 $data[]['id'] = $id; 191 } 192 193 return true; 194} 195 196/** 197 * Search for backlinks to a given page 198 * 199 * $opts['ns'] namespace of the page 200 * $opts['name'] name of the page without namespace 201 * 202 * @author Andreas Gohr <andi@splitbrain.org> 203 */ 204function search_backlinks(&$data,$base,$file,$type,$lvl,$opts){ 205 //we do nothing with directories 206 if($type == 'd') return true;; 207 //only search txt files 208 if(!preg_match('#\.txt$#',$file)) return true;; 209 210 //get text 211 $text = io_readfile($base.'/'.$file); 212 213 //absolute search id 214 $sid = cleanID($opts['ns'].':'.$opts['name']); 215 216 //construct current namespace 217 $cid = pathID($file); 218 $cns = getNS($cid); 219 220 //check ACL 221 if(auth_quickaclcheck($cid) < AUTH_READ){ 222 return false; 223 } 224 225 //match all links 226 //FIXME may be incorrect because of code blocks 227 // CamelCase isn't supported, too 228 preg_match_all('#\[\[(.+?)\]\]#si',$text,$matches,PREG_SET_ORDER); 229 foreach($matches as $match){ 230 //get ID from link and discard most non wikilinks 231 list($mid) = split('\|',$match[1],2); 232 if(preg_match("#^(https?|telnet|gopher|file|wais|ftp|ed2k|irc)://#",$mid)) continue; 233 if(preg_match("#\w+>#",$mid)) continue; 234 $mns = getNS($mid); 235 //namespace starting with "." - prepend current namespace 236 if(strpos($mns,'.')===0){ 237 $mid = $cns.":".substr($mid,1); 238 } 239 if($mns===false){ 240 //no namespace in link? add current 241 $mid = "$cns:$mid"; 242 } 243 $mid = cleanID($mid); 244 245 if ($mid == $sid){ 246 $data[]['id'] = $cid; 247 break; 248 } 249 } 250} 251 252/** 253 * Fulltextsearch 254 * 255 * $opts['query'] is the search query 256 * 257 * @author Andreas Gohr <andi@splitbrain.org> 258 */ 259function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){ 260 //we do nothing with directories 261 if($type == 'd') return true;; 262 //only search txt files 263 if(!preg_match('#\.txt$#',$file)) return true;; 264 265 //check ACL 266 $id = pathID($file); 267 if(auth_quickaclcheck($id) < AUTH_READ){ 268 return false; 269 } 270 271 //get text 272 $text = io_readfile($base.'/'.$file); 273 //lowercase text (u modifier does not help with case) 274 $lctext = utf8_strtolower($text); 275 276 //create regexp from queries 277 $qpreg = preg_split('/\s+/',preg_quote($opts['query'],'#')); 278 $qpreg = '('.join('|',$qpreg).')'; 279 280 //do the fulltext search 281 $matches = array(); 282 if($cnt = preg_match_all('#'.$qpreg.'#usi',$lctext,$matches)){ 283 //this is not the best way for snippet generation but the fastest I could find 284 //split query and only use the first token 285 $q = preg_split('/\s+/',$opts['query'],2); 286 $q = $q[0]; 287 $p = utf8_strpos($lctext,$q); 288 $f = $p - 100; 289 $l = utf8_strlen($q) + 200; 290 if($f < 0) $f = 0; 291 $snippet = '<span class="search_sep"> ... </span>'. 292 htmlspecialchars(utf8_substr($text,$f,$l)). 293 '<span class="search_sep"> ... </span>'; 294 $snippet = preg_replace('#'.$qpreg.'#si','<span class="search_hit">\\1</span>',$snippet); 295 296 $data[] = array( 297 'id' => $id, 298 'count' => $cnt, 299 'snippet' => $snippet, 300 ); 301 } 302 303 return true; 304} 305 306/** 307 * fulltext sort 308 * 309 * Callback sort function for use with usort to sort the data 310 * structure created by search_fulltext. Sorts descending by count 311 * 312 * @author Andreas Gohr <andi@splitbrain.org> 313 */ 314function sort_search_fulltext($a,$b){ 315 if($a['count'] > $b['count']){ 316 return -1; 317 }elseif($a['count'] < $b['count']){ 318 return 1; 319 }else{ 320 return strcmp($a['id'],$b['id']); 321 } 322} 323 324/** 325 * translates a document path to an ID 326 * 327 * @author Andreas Gohr <andi@splitbrain.org> 328 */ 329function pathID($path){ 330 $id = utf8_decodeFN($path); 331 $id = str_replace('/',':',$id); 332 $id = preg_replace('#\.txt$#','',$id); 333 $id = preg_replace('#^:+#','',$id); 334 $id = preg_replace('#:+$#','',$id); 335 return $id; 336} 337 338 339//Setup VIM: ex: et ts=2 enc=utf-8 : 340