1f3f0262cSandi<? 2*15fae107Sandi/** 3*15fae107Sandi * DokuWiki search functions 4*15fae107Sandi * 5*15fae107Sandi * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6*15fae107Sandi * @author Andreas Gohr <andi@splitbrain.org> 7*15fae107Sandi */ 8f3f0262cSandi 9f3f0262cSandi require_once("inc/common.php"); 10f3f0262cSandi 11f3f0262cSandi/** 12*15fae107Sandi * recurse direcory 13*15fae107Sandi * 14f3f0262cSandi * This function recurses into a given base directory 15f3f0262cSandi * and calls the supplied function for each file and directory 16*15fae107Sandi * 17*15fae107Sandi * @author Andreas Gohr <andi@splitbrain.org> 18f3f0262cSandi */ 19f3f0262cSandifunction search(&$data,$base,$func,$opts,$dir='',$lvl=1){ 20f3f0262cSandi $dirs = array(); 21f3f0262cSandi $files = array(); 22f3f0262cSandi 23f3f0262cSandi //read in directories and files 24f3f0262cSandi $dh = @opendir($base.'/'.$dir); 25f3f0262cSandi if(!$dh) return; 26f3f0262cSandi while(($file = readdir($dh)) !== false){ 27f3f0262cSandi if(preg_match('/^\./',$file)) continue; //skip hidden files and upper dirs 28f3f0262cSandi if(is_dir($base.'/'.$dir.'/'.$file)){ 29f3f0262cSandi $dirs[] = $dir.'/'.$file; 30f3f0262cSandi continue; 31f3f0262cSandi } 32f3f0262cSandi $files[] = $dir.'/'.$file; 33f3f0262cSandi } 34f3f0262cSandi closedir($dh); 35f3f0262cSandi sort($files); 36f3f0262cSandi sort($dirs); 37f3f0262cSandi 38f3f0262cSandi //give directories to userfunction then recurse 39f3f0262cSandi foreach($dirs as $dir){ 40f3f0262cSandi if ($func($data,$base,$dir,'d',$lvl,$opts)){ 41f3f0262cSandi search($data,$base,$func,$opts,$dir,$lvl+1); 42f3f0262cSandi } 43f3f0262cSandi } 44f3f0262cSandi //now handle the files 45f3f0262cSandi foreach($files as $file){ 46f3f0262cSandi $func($data,$base,$file,'f',$lvl,$opts); 47f3f0262cSandi } 48f3f0262cSandi} 49f3f0262cSandi 50f3f0262cSandi/** 51f3f0262cSandi * The following functions are userfunctions to use with the search 52f3f0262cSandi * function above. This function is called for every found file or 53f3f0262cSandi * directory. When a directory is given to the function it has to 54f3f0262cSandi * decide if this directory should be traversed (true) or not (false) 55f3f0262cSandi * The function has to accept the following parameters: 56f3f0262cSandi * 57f3f0262cSandi * &$data - Reference to the result data structure 58f3f0262cSandi * $base - Base usually $conf['datadir'] 59f3f0262cSandi * $file - current file or directory relative to $base 60f3f0262cSandi * $type - Type either 'd' for directory or 'f' for file 61f3f0262cSandi * $lvl - Current recursion depht 62f3f0262cSandi * $opts - option array as given to search() 63f3f0262cSandi * 64f3f0262cSandi * return values for files are ignored 65f3f0262cSandi * 66f3f0262cSandi * All functions should check the ACL for document READ rights 67f3f0262cSandi * namespaces (directories) are NOT checked as this would break 68f3f0262cSandi * the recursion (You can have an nonreadable dir over a readable 69f3f0262cSandi * one deeper nested) 70f3f0262cSandi */ 71f3f0262cSandi 72f3f0262cSandi/** 73*15fae107Sandi * Build the browsable index of pages 74f3f0262cSandi * 75f3f0262cSandi * $opts['ns'] is the current namespace 76*15fae107Sandi * 77*15fae107Sandi * @author Andreas Gohr <andi@splitbrain.org> 78f3f0262cSandi */ 79f3f0262cSandifunction search_index(&$data,$base,$file,$type,$lvl,$opts){ 80f3f0262cSandi $return = true; 81f3f0262cSandi 82f3f0262cSandi if($type == 'd' && !preg_match('#^'.$file.'(/|$)#','/'.$opts['ns'])){ 83f3f0262cSandi //add but don't recurse 84f3f0262cSandi $return = false; 85f3f0262cSandi }elseif($type == 'f' && !preg_match('#\.txt$#',$file)){ 86f3f0262cSandi //don't add 87f3f0262cSandi return false; 88f3f0262cSandi } 89f3f0262cSandi 90f3f0262cSandi //check ACL 91f3f0262cSandi $id = pathID($file); 92f3f0262cSandi if($type=='f' && auth_quickaclcheck($id) < AUTH_READ){ 93f3f0262cSandi return false; 94f3f0262cSandi } 95f3f0262cSandi 96f3f0262cSandi $data[]=array( 'id' => $id, 97f3f0262cSandi 'type' => $type, 98f3f0262cSandi 'level' => $lvl ); 99f3f0262cSandi return $return; 100f3f0262cSandi} 101f3f0262cSandi 102f3f0262cSandi/** 103*15fae107Sandi * List all namespaces 104*15fae107Sandi * 105*15fae107Sandi * @author Andreas Gohr <andi@splitbrain.org> 106f3f0262cSandi */ 107f3f0262cSandifunction search_namespaces(&$data,$base,$file,$type,$lvl,$opts){ 108f3f0262cSandi if($type == 'f') return true; //nothing to do on files 109f3f0262cSandi 110f3f0262cSandi $id = pathID($file); 111f3f0262cSandi $data[]=array( 'id' => $id, 112f3f0262cSandi 'type' => $type, 113f3f0262cSandi 'level' => $lvl ); 114f3f0262cSandi return true; 115f3f0262cSandi} 116f3f0262cSandi 117f3f0262cSandi/** 118*15fae107Sandi * List all mediafiles in a namespace 119*15fae107Sandi * 120*15fae107Sandi * @author Andreas Gohr <andi@splitbrain.org> 121f3f0262cSandi */ 122f3f0262cSandifunction search_media(&$data,$base,$file,$type,$lvl,$opts){ 123f3f0262cSandi //we do nothing with directories 124f3f0262cSandi if($type == 'd') return false; 125f3f0262cSandi 126f3f0262cSandi $info = array(); 127f3f0262cSandi $info['id'] = pathID($file); 128f3f0262cSandi 129f3f0262cSandi //check ACL for namespace (we have no ACL for mediafiles) 130f3f0262cSandi if(auth_quickaclcheck(getNS($info['id']).':*') < AUTH_READ){ 131f3f0262cSandi return false; 132f3f0262cSandi } 133f3f0262cSandi 134f3f0262cSandi $info['file'] = basename($file); 135f3f0262cSandi $info['size'] = filesize($base.'/'.$file); 136f3f0262cSandi if(preg_match("/\.(jpe?g|gif|png)$/",$file)){ 137f3f0262cSandi $info['isimg'] = true; 138f3f0262cSandi $info['info'] = getimagesize($base.'/'.$file); 139f3f0262cSandi }else{ 140f3f0262cSandi $info['isimg'] = false; 141f3f0262cSandi } 142f3f0262cSandi $data[] = $info; 143f3f0262cSandi 144f3f0262cSandi return false; 145f3f0262cSandi} 146f3f0262cSandi 147f3f0262cSandi/** 148f3f0262cSandi * This function just lists documents (for RSS namespace export) 149*15fae107Sandi * 150*15fae107Sandi * @author Andreas Gohr <andi@splitbrain.org> 151f3f0262cSandi */ 152f3f0262cSandifunction search_list(&$data,$base,$file,$type,$lvl,$opts){ 153f3f0262cSandi //we do nothing with directories 154f3f0262cSandi if($type == 'd') return false; 155f3f0262cSandi if(preg_match('#\.txt$#',$file)){ 156f3f0262cSandi //check ACL 157f3f0262cSandi $id = pathID($file); 158f3f0262cSandi if(auth_quickaclcheck($id) < AUTH_READ){ 159f3f0262cSandi return false; 160f3f0262cSandi } 161f3f0262cSandi $data[]['id'] = $id;; 162f3f0262cSandi } 163f3f0262cSandi return false; 164f3f0262cSandi} 165f3f0262cSandi 166f3f0262cSandi/** 167f3f0262cSandi * Quicksearch for searching matching pagenames 168f3f0262cSandi * 169f3f0262cSandi * $opts['query'] is the search query 170*15fae107Sandi * 171*15fae107Sandi * @author Andreas Gohr <andi@splitbrain.org> 172f3f0262cSandi */ 173f3f0262cSandifunction search_pagename(&$data,$base,$file,$type,$lvl,$opts){ 174f3f0262cSandi //we do nothing with directories 175f3f0262cSandi if($type == 'd') return true; 176f3f0262cSandi //only search txt files 177f3f0262cSandi if(!preg_match('#\.txt$#',$file)) return true; 178f3f0262cSandi 179f3f0262cSandi //simple stringmatching 180f3f0262cSandi if(strpos($file,$opts['query']) !== false){ 181f3f0262cSandi //check ACL 182f3f0262cSandi $id = pathID($file); 183f3f0262cSandi if(auth_quickaclcheck($id) < AUTH_READ){ 184f3f0262cSandi return false; 185f3f0262cSandi } 186f3f0262cSandi $data[]['id'] = $id; 187f3f0262cSandi } 188f3f0262cSandi 189f3f0262cSandi return true; 190f3f0262cSandi} 191f3f0262cSandi 192f3f0262cSandi/** 193f3f0262cSandi * Search for backlinks to a given page 194f3f0262cSandi * 195f3f0262cSandi * $opts['ns'] namespace of the page 196f3f0262cSandi * $opts['name'] name of the page without namespace 197*15fae107Sandi * 198*15fae107Sandi * @author Andreas Gohr <andi@splitbrain.org> 199f3f0262cSandi */ 200f3f0262cSandifunction search_backlinks(&$data,$base,$file,$type,$lvl,$opts){ 201f3f0262cSandi //we do nothing with directories 202f3f0262cSandi if($type == 'd') return true;; 203f3f0262cSandi //only search txt files 204f3f0262cSandi if(!preg_match('#\.txt$#',$file)) return true;; 205f3f0262cSandi 206f3f0262cSandi //get text 207f3f0262cSandi $text = io_readfile($base.'/'.$file); 208f3f0262cSandi 209f3f0262cSandi //absolute search id 210f3f0262cSandi $sid = cleanID($opts['ns'].':'.$opts['name']); 211f3f0262cSandi 212f3f0262cSandi //construct current namespace 213f3f0262cSandi $cid = pathID($file); 214f3f0262cSandi $cns = getNS($cid); 215f3f0262cSandi 216f3f0262cSandi //check ACL 217f3f0262cSandi if(auth_quickaclcheck($cid) < AUTH_READ){ 218f3f0262cSandi return false; 219f3f0262cSandi } 220f3f0262cSandi 221f3f0262cSandi //match all links 222f3f0262cSandi //FIXME may be incorrect because of code blocks 223f3f0262cSandi // CamelCase isn't supported, too 224f3f0262cSandi preg_match_all('#\[\[(.+?)\]\]#si',$text,$matches,PREG_SET_ORDER); 225f3f0262cSandi foreach($matches as $match){ 226f3f0262cSandi //get ID from link and discard most non wikilinks 227f3f0262cSandi list($mid) = split('\|',$match[1],2); 228f3f0262cSandi if(preg_match("#^(https?|telnet|gopher|file|wais|ftp|ed2k|irc)://#",$mid)) continue; 229f3f0262cSandi if(preg_match("#\w+>#",$mid)) continue; 230f3f0262cSandi $mns = getNS($mid); 231f3f0262cSandi //namespace starting with "." - prepend current namespace 232f3f0262cSandi if(strpos($mns,'.')===0){ 233f3f0262cSandi $mid = $cns.":".substr($mid,1); 234f3f0262cSandi } 235f3f0262cSandi if($mns===false){ 236f3f0262cSandi //no namespace in link? add current 237f3f0262cSandi $mid = "$cns:$mid"; 238f3f0262cSandi } 239f3f0262cSandi $mid = cleanID($mid); 240f3f0262cSandi 241f3f0262cSandi if ($mid == $sid){ 242f3f0262cSandi $data[]['id'] = $cid; 243f3f0262cSandi break; 244f3f0262cSandi } 245f3f0262cSandi } 246f3f0262cSandi} 247f3f0262cSandi 248f3f0262cSandi/** 249f3f0262cSandi * Fulltextsearch 250f3f0262cSandi * 251f3f0262cSandi * $opts['query'] is the search query 252*15fae107Sandi * 253*15fae107Sandi * @author Andreas Gohr <andi@splitbrain.org> 254f3f0262cSandi */ 255f3f0262cSandifunction search_fulltext(&$data,$base,$file,$type,$lvl,$opts){ 256f3f0262cSandi //we do nothing with directories 257f3f0262cSandi if($type == 'd') return true;; 258f3f0262cSandi //only search txt files 259f3f0262cSandi if(!preg_match('#\.txt$#',$file)) return true;; 260f3f0262cSandi 261f3f0262cSandi //check ACL 262f3f0262cSandi $id = pathID($file); 263f3f0262cSandi if(auth_quickaclcheck($id) < AUTH_READ){ 264f3f0262cSandi return false; 265f3f0262cSandi } 266f3f0262cSandi 267f3f0262cSandi //get text 268f3f0262cSandi $text = io_readfile($base.'/'.$file); 269f3f0262cSandi 270f3f0262cSandi //create regexp from queries 271f3f0262cSandi $qpreg = preg_split('/\s+/',preg_quote($opts['query'],'#')); 272f3f0262cSandi $qpreg = '('.join('|',$qpreg).')'; 273f3f0262cSandi 274f3f0262cSandi //do the fulltext search 275f3f0262cSandi $matches = array(); 276f3f0262cSandi if($cnt = preg_match_all('#'.$qpreg.'#si',$text,$matches)){ 277f3f0262cSandi //this is not the best way for snippet generation but the fastest I could find 278f3f0262cSandi //split query and only use the first token 279f3f0262cSandi $q = preg_split('/\s+/',$opts['query'],2); 280f3f0262cSandi $q = $q[0]; 281f3f0262cSandi $p = strpos(strtolower($text),$q); 282f3f0262cSandi $f = $p - 100; 283f3f0262cSandi $l = strlen($q) + 200; 284f3f0262cSandi if($f < 0) $f = 0; 285f3f0262cSandi $snippet = '<span class="search_sep"> ... </span>'. 286f3f0262cSandi htmlspecialchars(substr($text,$f,$l)). 287f3f0262cSandi '<span class="search_sep"> ... </span>'; 288f3f0262cSandi $snippet = preg_replace('#'.$qpreg.'#si','<span class="search_hit">\\1</span>',$snippet); 289f3f0262cSandi 290f3f0262cSandi $data[] = array( 291f3f0262cSandi 'id' => $id, 292f3f0262cSandi 'count' => $cnt, 293f3f0262cSandi 'snippet' => $snippet, 294f3f0262cSandi ); 295f3f0262cSandi } 296f3f0262cSandi 297f3f0262cSandi return true; 298f3f0262cSandi} 299f3f0262cSandi 300f3f0262cSandi/** 301*15fae107Sandi * fulltext sort 302*15fae107Sandi * 303f3f0262cSandi * Callback sort function for use with usort to sort the data 304f3f0262cSandi * structure created by search_fulltext. Sorts descending by count 305*15fae107Sandi * 306*15fae107Sandi * @author Andreas Gohr <andi@splitbrain.org> 307f3f0262cSandi */ 308f3f0262cSandifunction sort_search_fulltext($a,$b){ 309f3f0262cSandi if($a['count'] > $b['count']){ 310f3f0262cSandi return -1; 311f3f0262cSandi }elseif($a['count'] < $b['count']){ 312f3f0262cSandi return 1; 313f3f0262cSandi }else{ 314f3f0262cSandi return strcmp($a['id'],$b['id']); 315f3f0262cSandi } 316f3f0262cSandi} 317f3f0262cSandi 318f3f0262cSandi/** 319f3f0262cSandi * translates a document path to an ID 320*15fae107Sandi * 321*15fae107Sandi * @author Andreas Gohr <andi@splitbrain.org> 322f3f0262cSandi */ 323f3f0262cSandifunction pathID($path){ 324f3f0262cSandi $id = str_replace('/',':',$path); 325f3f0262cSandi $id = preg_replace('#\.txt$#','',$id); 326f3f0262cSandi $id = preg_replace('#^:+#','',$id); 327f3f0262cSandi $id = preg_replace('#:+$#','',$id); 328f3f0262cSandi return $id; 329f3f0262cSandi} 330f3f0262cSandi 331f3f0262cSandi?> 332