1<? 2 3require_once("inc/common.php"); 4 5/** 6 * This function recurses into a given base directory 7 * and calls the supplied function for each file and directory 8 */ 9function search(&$data,$base,$func,$opts,$dir='',$lvl=1){ 10 $dirs = array(); 11 $files = array(); 12 13 //read in directories and files 14 $dh = @opendir($base.'/'.$dir); 15 if(!$dh) return; 16 while(($file = readdir($dh)) !== false){ 17 if(preg_match('/^\./',$file)) continue; //skip hidden files and upper dirs 18 if(is_dir($base.'/'.$dir.'/'.$file)){ 19 $dirs[] = $dir.'/'.$file; 20 continue; 21 } 22 $files[] = $dir.'/'.$file; 23 } 24 closedir($dh); 25 sort($files); 26 sort($dirs); 27 28 //give directories to userfunction then recurse 29 foreach($dirs as $dir){ 30 if ($func($data,$base,$dir,'d',$lvl,$opts)){ 31 search($data,$base,$func,$opts,$dir,$lvl+1); 32 } 33 } 34 //now handle the files 35 foreach($files as $file){ 36 $func($data,$base,$file,'f',$lvl,$opts); 37 } 38} 39 40/** 41 * The following functions are userfunctions to use with the search 42 * function above. This function is called for every found file or 43 * directory. When a directory is given to the function it has to 44 * decide if this directory should be traversed (true) or not (false) 45 * The function has to accept the following parameters: 46 * 47 * &$data - Reference to the result data structure 48 * $base - Base usually $conf['datadir'] 49 * $file - current file or directory relative to $base 50 * $type - Type either 'd' for directory or 'f' for file 51 * $lvl - Current recursion depht 52 * $opts - option array as given to search() 53 * 54 * return values for files are ignored 55 * 56 * All functions should check the ACL for document READ rights 57 * namespaces (directories) are NOT checked as this would break 58 * the recursion (You can have an nonreadable dir over a readable 59 * one deeper nested) 60 */ 61 62/** 63 * This function build the browsable index of pages 64 * 65 * $opts['ns'] is the current namespace 66 */ 67function search_index(&$data,$base,$file,$type,$lvl,$opts){ 68 $return = true; 69 70 if($type == 'd' && !preg_match('#^'.$file.'(/|$)#','/'.$opts['ns'])){ 71 //add but don't recurse 72 $return = false; 73 }elseif($type == 'f' && !preg_match('#\.txt$#',$file)){ 74 //don't add 75 return false; 76 } 77 78 //check ACL 79 $id = pathID($file); 80 if($type=='f' && auth_quickaclcheck($id) < AUTH_READ){ 81 return false; 82 } 83 84 $data[]=array( 'id' => $id, 85 'type' => $type, 86 'level' => $lvl ); 87 return $return; 88} 89 90/** 91 * This function lists all namespaces 92 */ 93function search_namespaces(&$data,$base,$file,$type,$lvl,$opts){ 94 if($type == 'f') return true; //nothing to do on files 95 96 $id = pathID($file); 97 $data[]=array( 'id' => $id, 98 'type' => $type, 99 'level' => $lvl ); 100 return true; 101} 102 103/** 104 * This function lists all mediafiles in a namespace 105 */ 106function search_media(&$data,$base,$file,$type,$lvl,$opts){ 107 //we do nothing with directories 108 if($type == 'd') return false; 109 110 $info = array(); 111 $info['id'] = pathID($file); 112 113 //check ACL for namespace (we have no ACL for mediafiles) 114 if(auth_quickaclcheck(getNS($info['id']).':*') < AUTH_READ){ 115 return false; 116 } 117 118 $info['file'] = basename($file); 119 $info['size'] = filesize($base.'/'.$file); 120 if(preg_match("/\.(jpe?g|gif|png)$/",$file)){ 121 $info['isimg'] = true; 122 $info['info'] = getimagesize($base.'/'.$file); 123 }else{ 124 $info['isimg'] = false; 125 } 126 $data[] = $info; 127 128 return false; 129} 130 131/** 132 * This function just lists documents (for RSS namespace export) 133 */ 134function search_list(&$data,$base,$file,$type,$lvl,$opts){ 135 //we do nothing with directories 136 if($type == 'd') return false; 137 if(preg_match('#\.txt$#',$file)){ 138 //check ACL 139 $id = pathID($file); 140 if(auth_quickaclcheck($id) < AUTH_READ){ 141 return false; 142 } 143 $data[]['id'] = $id;; 144 } 145 return false; 146} 147 148/** 149 * Quicksearch for searching matching pagenames 150 * 151 * $opts['query'] is the search query 152 */ 153function search_pagename(&$data,$base,$file,$type,$lvl,$opts){ 154 //we do nothing with directories 155 if($type == 'd') return true; 156 //only search txt files 157 if(!preg_match('#\.txt$#',$file)) return true; 158 159 //simple stringmatching 160 if(strpos($file,$opts['query']) !== false){ 161 //check ACL 162 $id = pathID($file); 163 if(auth_quickaclcheck($id) < AUTH_READ){ 164 return false; 165 } 166 $data[]['id'] = $id; 167 } 168 169 return true; 170} 171 172/** 173 * Search for backlinks to a given page 174 * 175 * $opts['ns'] namespace of the page 176 * $opts['name'] name of the page without namespace 177 */ 178function search_backlinks(&$data,$base,$file,$type,$lvl,$opts){ 179 //we do nothing with directories 180 if($type == 'd') return true;; 181 //only search txt files 182 if(!preg_match('#\.txt$#',$file)) return true;; 183 184 //get text 185 $text = io_readfile($base.'/'.$file); 186 187 //absolute search id 188 $sid = cleanID($opts['ns'].':'.$opts['name']); 189 190 //construct current namespace 191 $cid = pathID($file); 192 $cns = getNS($cid); 193 194 //check ACL 195 if(auth_quickaclcheck($cid) < AUTH_READ){ 196 return false; 197 } 198 199 //match all links 200 //FIXME may be incorrect because of code blocks 201 // CamelCase isn't supported, too 202 preg_match_all('#\[\[(.+?)\]\]#si',$text,$matches,PREG_SET_ORDER); 203 foreach($matches as $match){ 204 //get ID from link and discard most non wikilinks 205 list($mid) = split('\|',$match[1],2); 206 if(preg_match("#^(https?|telnet|gopher|file|wais|ftp|ed2k|irc)://#",$mid)) continue; 207 if(preg_match("#\w+>#",$mid)) continue; 208 $mns = getNS($mid); 209 //namespace starting with "." - prepend current namespace 210 if(strpos($mns,'.')===0){ 211 $mid = $cns.":".substr($mid,1); 212 } 213 if($mns===false){ 214 //no namespace in link? add current 215 $mid = "$cns:$mid"; 216 } 217 $mid = cleanID($mid); 218 219 if ($mid == $sid){ 220 $data[]['id'] = $cid; 221 break; 222 } 223 } 224} 225 226/** 227 * Fulltextsearch 228 * 229 * $opts['query'] is the search query 230 */ 231function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){ 232 //we do nothing with directories 233 if($type == 'd') return true;; 234 //only search txt files 235 if(!preg_match('#\.txt$#',$file)) return true;; 236 237 //check ACL 238 $id = pathID($file); 239 if(auth_quickaclcheck($id) < AUTH_READ){ 240 return false; 241 } 242 243 //get text 244 $text = io_readfile($base.'/'.$file); 245 246 //create regexp from queries 247 $qpreg = preg_split('/\s+/',preg_quote($opts['query'],'#')); 248 $qpreg = '('.join('|',$qpreg).')'; 249 250 //do the fulltext search 251 $matches = array(); 252 if($cnt = preg_match_all('#'.$qpreg.'#si',$text,$matches)){ 253 //this is not the best way for snippet generation but the fastest I could find 254 //split query and only use the first token 255 $q = preg_split('/\s+/',$opts['query'],2); 256 $q = $q[0]; 257 $p = strpos(strtolower($text),$q); 258 $f = $p - 100; 259 $l = strlen($q) + 200; 260 if($f < 0) $f = 0; 261 $snippet = '<span class="search_sep"> ... </span>'. 262 htmlspecialchars(substr($text,$f,$l)). 263 '<span class="search_sep"> ... </span>'; 264 $snippet = preg_replace('#'.$qpreg.'#si','<span class="search_hit">\\1</span>',$snippet); 265 266 $data[] = array( 267 'id' => $id, 268 'count' => $cnt, 269 'snippet' => $snippet, 270 ); 271 } 272 273 return true; 274} 275 276/** 277 * Callback sort function for use with usort to sort the data 278 * structure created by search_fulltext. Sorts descending by count 279 */ 280function sort_search_fulltext($a,$b){ 281 if($a['count'] > $b['count']){ 282 return -1; 283 }elseif($a['count'] < $b['count']){ 284 return 1; 285 }else{ 286 return strcmp($a['id'],$b['id']); 287 } 288} 289 290/** 291 * translates a document path to an ID 292 */ 293function pathID($path){ 294 $id = str_replace('/',':',$path); 295 $id = preg_replace('#\.txt$#','',$id); 296 $id = preg_replace('#^:+#','',$id); 297 $id = preg_replace('#:+$#','',$id); 298 return $id; 299} 300 301?> 302