1*f3f0262cSandi<? 2*f3f0262cSandi 3*f3f0262cSandirequire_once("inc/common.php"); 4*f3f0262cSandi 5*f3f0262cSandi/** 6*f3f0262cSandi * This function recurses into a given base directory 7*f3f0262cSandi * and calls the supplied function for each file and directory 8*f3f0262cSandi */ 9*f3f0262cSandifunction search(&$data,$base,$func,$opts,$dir='',$lvl=1){ 10*f3f0262cSandi $dirs = array(); 11*f3f0262cSandi $files = array(); 12*f3f0262cSandi 13*f3f0262cSandi //read in directories and files 14*f3f0262cSandi $dh = @opendir($base.'/'.$dir); 15*f3f0262cSandi if(!$dh) return; 16*f3f0262cSandi while(($file = readdir($dh)) !== false){ 17*f3f0262cSandi if(preg_match('/^\./',$file)) continue; //skip hidden files and upper dirs 18*f3f0262cSandi if(is_dir($base.'/'.$dir.'/'.$file)){ 19*f3f0262cSandi $dirs[] = $dir.'/'.$file; 20*f3f0262cSandi continue; 21*f3f0262cSandi } 22*f3f0262cSandi $files[] = $dir.'/'.$file; 23*f3f0262cSandi } 24*f3f0262cSandi closedir($dh); 25*f3f0262cSandi sort($files); 26*f3f0262cSandi sort($dirs); 27*f3f0262cSandi 28*f3f0262cSandi //give directories to userfunction then recurse 29*f3f0262cSandi foreach($dirs as $dir){ 30*f3f0262cSandi if ($func($data,$base,$dir,'d',$lvl,$opts)){ 31*f3f0262cSandi search($data,$base,$func,$opts,$dir,$lvl+1); 32*f3f0262cSandi } 33*f3f0262cSandi } 34*f3f0262cSandi //now handle the files 35*f3f0262cSandi foreach($files as $file){ 36*f3f0262cSandi $func($data,$base,$file,'f',$lvl,$opts); 37*f3f0262cSandi } 38*f3f0262cSandi} 39*f3f0262cSandi 40*f3f0262cSandi/** 41*f3f0262cSandi * The following functions are userfunctions to use with the search 42*f3f0262cSandi * function above. This function is called for every found file or 43*f3f0262cSandi * directory. When a directory is given to the function it has to 44*f3f0262cSandi * decide if this directory should be traversed (true) or not (false) 45*f3f0262cSandi * The function has to accept the following parameters: 46*f3f0262cSandi * 47*f3f0262cSandi * &$data - Reference to the result data structure 48*f3f0262cSandi * $base - Base usually $conf['datadir'] 49*f3f0262cSandi * $file - current file or directory relative to $base 50*f3f0262cSandi * $type - Type either 'd' for directory or 'f' for file 51*f3f0262cSandi * $lvl - Current recursion depht 52*f3f0262cSandi * $opts - option array as given to search() 53*f3f0262cSandi * 54*f3f0262cSandi * return values for files are ignored 55*f3f0262cSandi * 56*f3f0262cSandi * All functions should check the ACL for document READ rights 57*f3f0262cSandi * namespaces (directories) are NOT checked as this would break 58*f3f0262cSandi * the recursion (You can have an nonreadable dir over a readable 59*f3f0262cSandi * one deeper nested) 60*f3f0262cSandi */ 61*f3f0262cSandi 62*f3f0262cSandi/** 63*f3f0262cSandi * This function build the browsable index of pages 64*f3f0262cSandi * 65*f3f0262cSandi * $opts['ns'] is the current namespace 66*f3f0262cSandi */ 67*f3f0262cSandifunction search_index(&$data,$base,$file,$type,$lvl,$opts){ 68*f3f0262cSandi $return = true; 69*f3f0262cSandi 70*f3f0262cSandi if($type == 'd' && !preg_match('#^'.$file.'(/|$)#','/'.$opts['ns'])){ 71*f3f0262cSandi //add but don't recurse 72*f3f0262cSandi $return = false; 73*f3f0262cSandi }elseif($type == 'f' && !preg_match('#\.txt$#',$file)){ 74*f3f0262cSandi //don't add 75*f3f0262cSandi return false; 76*f3f0262cSandi } 77*f3f0262cSandi 78*f3f0262cSandi //check ACL 79*f3f0262cSandi $id = pathID($file); 80*f3f0262cSandi if($type=='f' && auth_quickaclcheck($id) < AUTH_READ){ 81*f3f0262cSandi return false; 82*f3f0262cSandi } 83*f3f0262cSandi 84*f3f0262cSandi $data[]=array( 'id' => $id, 85*f3f0262cSandi 'type' => $type, 86*f3f0262cSandi 'level' => $lvl ); 87*f3f0262cSandi return $return; 88*f3f0262cSandi} 89*f3f0262cSandi 90*f3f0262cSandi/** 91*f3f0262cSandi * This function lists all namespaces 92*f3f0262cSandi */ 93*f3f0262cSandifunction search_namespaces(&$data,$base,$file,$type,$lvl,$opts){ 94*f3f0262cSandi if($type == 'f') return true; //nothing to do on files 95*f3f0262cSandi 96*f3f0262cSandi $id = pathID($file); 97*f3f0262cSandi $data[]=array( 'id' => $id, 98*f3f0262cSandi 'type' => $type, 99*f3f0262cSandi 'level' => $lvl ); 100*f3f0262cSandi return true; 101*f3f0262cSandi} 102*f3f0262cSandi 103*f3f0262cSandi/** 104*f3f0262cSandi * This function lists all mediafiles in a namespace 105*f3f0262cSandi */ 106*f3f0262cSandifunction search_media(&$data,$base,$file,$type,$lvl,$opts){ 107*f3f0262cSandi //we do nothing with directories 108*f3f0262cSandi if($type == 'd') return false; 109*f3f0262cSandi 110*f3f0262cSandi $info = array(); 111*f3f0262cSandi $info['id'] = pathID($file); 112*f3f0262cSandi 113*f3f0262cSandi //check ACL for namespace (we have no ACL for mediafiles) 114*f3f0262cSandi if(auth_quickaclcheck(getNS($info['id']).':*') < AUTH_READ){ 115*f3f0262cSandi return false; 116*f3f0262cSandi } 117*f3f0262cSandi 118*f3f0262cSandi $info['file'] = basename($file); 119*f3f0262cSandi $info['size'] = filesize($base.'/'.$file); 120*f3f0262cSandi if(preg_match("/\.(jpe?g|gif|png)$/",$file)){ 121*f3f0262cSandi $info['isimg'] = true; 122*f3f0262cSandi $info['info'] = getimagesize($base.'/'.$file); 123*f3f0262cSandi }else{ 124*f3f0262cSandi $info['isimg'] = false; 125*f3f0262cSandi } 126*f3f0262cSandi $data[] = $info; 127*f3f0262cSandi 128*f3f0262cSandi return false; 129*f3f0262cSandi} 130*f3f0262cSandi 131*f3f0262cSandi/** 132*f3f0262cSandi * This function just lists documents (for RSS namespace export) 133*f3f0262cSandi */ 134*f3f0262cSandifunction search_list(&$data,$base,$file,$type,$lvl,$opts){ 135*f3f0262cSandi //we do nothing with directories 136*f3f0262cSandi if($type == 'd') return false; 137*f3f0262cSandi if(preg_match('#\.txt$#',$file)){ 138*f3f0262cSandi //check ACL 139*f3f0262cSandi $id = pathID($file); 140*f3f0262cSandi if(auth_quickaclcheck($id) < AUTH_READ){ 141*f3f0262cSandi return false; 142*f3f0262cSandi } 143*f3f0262cSandi $data[]['id'] = $id;; 144*f3f0262cSandi } 145*f3f0262cSandi return false; 146*f3f0262cSandi} 147*f3f0262cSandi 148*f3f0262cSandi/** 149*f3f0262cSandi * Quicksearch for searching matching pagenames 150*f3f0262cSandi * 151*f3f0262cSandi * $opts['query'] is the search query 152*f3f0262cSandi */ 153*f3f0262cSandifunction search_pagename(&$data,$base,$file,$type,$lvl,$opts){ 154*f3f0262cSandi //we do nothing with directories 155*f3f0262cSandi if($type == 'd') return true; 156*f3f0262cSandi //only search txt files 157*f3f0262cSandi if(!preg_match('#\.txt$#',$file)) return true; 158*f3f0262cSandi 159*f3f0262cSandi //simple stringmatching 160*f3f0262cSandi if(strpos($file,$opts['query']) !== false){ 161*f3f0262cSandi //check ACL 162*f3f0262cSandi $id = pathID($file); 163*f3f0262cSandi if(auth_quickaclcheck($id) < AUTH_READ){ 164*f3f0262cSandi return false; 165*f3f0262cSandi } 166*f3f0262cSandi $data[]['id'] = $id; 167*f3f0262cSandi } 168*f3f0262cSandi 169*f3f0262cSandi return true; 170*f3f0262cSandi} 171*f3f0262cSandi 172*f3f0262cSandi/** 173*f3f0262cSandi * Search for backlinks to a given page 174*f3f0262cSandi * 175*f3f0262cSandi * $opts['ns'] namespace of the page 176*f3f0262cSandi * $opts['name'] name of the page without namespace 177*f3f0262cSandi */ 178*f3f0262cSandifunction search_backlinks(&$data,$base,$file,$type,$lvl,$opts){ 179*f3f0262cSandi //we do nothing with directories 180*f3f0262cSandi if($type == 'd') return true;; 181*f3f0262cSandi //only search txt files 182*f3f0262cSandi if(!preg_match('#\.txt$#',$file)) return true;; 183*f3f0262cSandi 184*f3f0262cSandi //get text 185*f3f0262cSandi $text = io_readfile($base.'/'.$file); 186*f3f0262cSandi 187*f3f0262cSandi //absolute search id 188*f3f0262cSandi $sid = cleanID($opts['ns'].':'.$opts['name']); 189*f3f0262cSandi 190*f3f0262cSandi //construct current namespace 191*f3f0262cSandi $cid = pathID($file); 192*f3f0262cSandi $cns = getNS($cid); 193*f3f0262cSandi 194*f3f0262cSandi //check ACL 195*f3f0262cSandi if(auth_quickaclcheck($cid) < AUTH_READ){ 196*f3f0262cSandi return false; 197*f3f0262cSandi } 198*f3f0262cSandi 199*f3f0262cSandi //match all links 200*f3f0262cSandi //FIXME may be incorrect because of code blocks 201*f3f0262cSandi // CamelCase isn't supported, too 202*f3f0262cSandi preg_match_all('#\[\[(.+?)\]\]#si',$text,$matches,PREG_SET_ORDER); 203*f3f0262cSandi foreach($matches as $match){ 204*f3f0262cSandi //get ID from link and discard most non wikilinks 205*f3f0262cSandi list($mid) = split('\|',$match[1],2); 206*f3f0262cSandi if(preg_match("#^(https?|telnet|gopher|file|wais|ftp|ed2k|irc)://#",$mid)) continue; 207*f3f0262cSandi if(preg_match("#\w+>#",$mid)) continue; 208*f3f0262cSandi $mns = getNS($mid); 209*f3f0262cSandi //namespace starting with "." - prepend current namespace 210*f3f0262cSandi if(strpos($mns,'.')===0){ 211*f3f0262cSandi $mid = $cns.":".substr($mid,1); 212*f3f0262cSandi } 213*f3f0262cSandi if($mns===false){ 214*f3f0262cSandi //no namespace in link? add current 215*f3f0262cSandi $mid = "$cns:$mid"; 216*f3f0262cSandi } 217*f3f0262cSandi $mid = cleanID($mid); 218*f3f0262cSandi 219*f3f0262cSandi if ($mid == $sid){ 220*f3f0262cSandi $data[]['id'] = $cid; 221*f3f0262cSandi break; 222*f3f0262cSandi } 223*f3f0262cSandi } 224*f3f0262cSandi} 225*f3f0262cSandi 226*f3f0262cSandi/** 227*f3f0262cSandi * Fulltextsearch 228*f3f0262cSandi * 229*f3f0262cSandi * $opts['query'] is the search query 230*f3f0262cSandi */ 231*f3f0262cSandifunction search_fulltext(&$data,$base,$file,$type,$lvl,$opts){ 232*f3f0262cSandi //we do nothing with directories 233*f3f0262cSandi if($type == 'd') return true;; 234*f3f0262cSandi //only search txt files 235*f3f0262cSandi if(!preg_match('#\.txt$#',$file)) return true;; 236*f3f0262cSandi 237*f3f0262cSandi //check ACL 238*f3f0262cSandi $id = pathID($file); 239*f3f0262cSandi if(auth_quickaclcheck($id) < AUTH_READ){ 240*f3f0262cSandi return false; 241*f3f0262cSandi } 242*f3f0262cSandi 243*f3f0262cSandi //get text 244*f3f0262cSandi $text = io_readfile($base.'/'.$file); 245*f3f0262cSandi 246*f3f0262cSandi //create regexp from queries 247*f3f0262cSandi $qpreg = preg_split('/\s+/',preg_quote($opts['query'],'#')); 248*f3f0262cSandi $qpreg = '('.join('|',$qpreg).')'; 249*f3f0262cSandi 250*f3f0262cSandi //do the fulltext search 251*f3f0262cSandi $matches = array(); 252*f3f0262cSandi if($cnt = preg_match_all('#'.$qpreg.'#si',$text,$matches)){ 253*f3f0262cSandi //this is not the best way for snippet generation but the fastest I could find 254*f3f0262cSandi //split query and only use the first token 255*f3f0262cSandi $q = preg_split('/\s+/',$opts['query'],2); 256*f3f0262cSandi $q = $q[0]; 257*f3f0262cSandi $p = strpos(strtolower($text),$q); 258*f3f0262cSandi $f = $p - 100; 259*f3f0262cSandi $l = strlen($q) + 200; 260*f3f0262cSandi if($f < 0) $f = 0; 261*f3f0262cSandi $snippet = '<span class="search_sep"> ... </span>'. 262*f3f0262cSandi htmlspecialchars(substr($text,$f,$l)). 263*f3f0262cSandi '<span class="search_sep"> ... </span>'; 264*f3f0262cSandi $snippet = preg_replace('#'.$qpreg.'#si','<span class="search_hit">\\1</span>',$snippet); 265*f3f0262cSandi 266*f3f0262cSandi $data[] = array( 267*f3f0262cSandi 'id' => $id, 268*f3f0262cSandi 'count' => $cnt, 269*f3f0262cSandi 'snippet' => $snippet, 270*f3f0262cSandi ); 271*f3f0262cSandi } 272*f3f0262cSandi 273*f3f0262cSandi return true; 274*f3f0262cSandi} 275*f3f0262cSandi 276*f3f0262cSandi/** 277*f3f0262cSandi * Callback sort function for use with usort to sort the data 278*f3f0262cSandi * structure created by search_fulltext. Sorts descending by count 279*f3f0262cSandi */ 280*f3f0262cSandifunction sort_search_fulltext($a,$b){ 281*f3f0262cSandi if($a['count'] > $b['count']){ 282*f3f0262cSandi return -1; 283*f3f0262cSandi }elseif($a['count'] < $b['count']){ 284*f3f0262cSandi return 1; 285*f3f0262cSandi }else{ 286*f3f0262cSandi return strcmp($a['id'],$b['id']); 287*f3f0262cSandi } 288*f3f0262cSandi} 289*f3f0262cSandi 290*f3f0262cSandi/** 291*f3f0262cSandi * translates a document path to an ID 292*f3f0262cSandi */ 293*f3f0262cSandifunction pathID($path){ 294*f3f0262cSandi $id = str_replace('/',':',$path); 295*f3f0262cSandi $id = preg_replace('#\.txt$#','',$id); 296*f3f0262cSandi $id = preg_replace('#^:+#','',$id); 297*f3f0262cSandi $id = preg_replace('#:+$#','',$id); 298*f3f0262cSandi return $id; 299*f3f0262cSandi} 300*f3f0262cSandi 301*f3f0262cSandi?> 302