xref: /dokuwiki/inc/search.php (revision f3f0262c480d7e509b008d37c90aed884532bba8)
1<?
2
3require_once("inc/common.php");
4
5/**
6 * This function recurses into a given base directory
7 * and calls the supplied function for each file and directory
8 */
9function search(&$data,$base,$func,$opts,$dir='',$lvl=1){
10  $dirs   = array();
11  $files  = array();
12
13  //read in directories and files
14  $dh = @opendir($base.'/'.$dir);
15  if(!$dh) return;
16  while(($file = readdir($dh)) !== false){
17    if(preg_match('/^\./',$file)) continue; //skip hidden files and upper dirs
18    if(is_dir($base.'/'.$dir.'/'.$file)){
19      $dirs[] = $dir.'/'.$file;
20      continue;
21    }
22    $files[] = $dir.'/'.$file;
23  }
24  closedir($dh);
25  sort($files);
26  sort($dirs);
27
28  //give directories to userfunction then recurse
29  foreach($dirs as $dir){
30    if ($func($data,$base,$dir,'d',$lvl,$opts)){
31      search($data,$base,$func,$opts,$dir,$lvl+1);
32    }
33  }
34  //now handle the files
35  foreach($files as $file){
36    $func($data,$base,$file,'f',$lvl,$opts);
37  }
38}
39
40/**
41 * The following functions are userfunctions to use with the search
42 * function above. This function is called for every found file or
43 * directory. When a directory is given to the function it has to
44 * decide if this directory should be traversed (true) or not (false)
45 * The function has to accept the following parameters:
46 *
47 * &$data - Reference to the result data structure
48 * $base  - Base usually $conf['datadir']
49 * $file  - current file or directory relative to $base
50 * $type  - Type either 'd' for directory or 'f' for file
51 * $lvl   - Current recursion depht
52 * $opts  - option array as given to search()
53 *
54 * return values for files are ignored
55 *
56 * All functions should check the ACL for document READ rights
57 * namespaces (directories) are NOT checked as this would break
58 * the recursion (You can have an nonreadable dir over a readable
59 * one deeper nested)
60 */
61
62/**
63 * This function build the browsable index of pages
64 *
65 * $opts['ns'] is the current namespace
66 */
67function search_index(&$data,$base,$file,$type,$lvl,$opts){
68  $return = true;
69
70  if($type == 'd' && !preg_match('#^'.$file.'(/|$)#','/'.$opts['ns'])){
71    //add but don't recurse
72    $return = false;
73  }elseif($type == 'f' && !preg_match('#\.txt$#',$file)){
74    //don't add
75    return false;
76  }
77
78  //check ACL
79  $id = pathID($file);
80  if($type=='f' && auth_quickaclcheck($id) < AUTH_READ){
81    return false;
82  }
83
84  $data[]=array( 'id'    => $id,
85                 'type'  => $type,
86                 'level' => $lvl );
87  return $return;
88}
89
90/**
91 * This function lists all namespaces
92 */
93function search_namespaces(&$data,$base,$file,$type,$lvl,$opts){
94  if($type == 'f') return true; //nothing to do on files
95
96  $id = pathID($file);
97  $data[]=array( 'id'    => $id,
98                 'type'  => $type,
99                 'level' => $lvl );
100  return true;
101}
102
103/**
104 * This function lists all mediafiles in a namespace
105 */
106function search_media(&$data,$base,$file,$type,$lvl,$opts){
107  //we do nothing with directories
108  if($type == 'd') return false;
109
110  $info         = array();
111  $info['id']   = pathID($file);
112
113  //check ACL for namespace (we have no ACL for mediafiles)
114  if(auth_quickaclcheck(getNS($info['id']).':*') < AUTH_READ){
115    return false;
116  }
117
118  $info['file'] = basename($file);
119  $info['size'] = filesize($base.'/'.$file);
120  if(preg_match("/\.(jpe?g|gif|png)$/",$file)){
121    $info['isimg'] = true;
122    $info['info']  = getimagesize($base.'/'.$file);
123  }else{
124    $info['isimg'] = false;
125  }
126  $data[] = $info;
127
128  return false;
129}
130
131/**
132 * This function just lists documents (for RSS namespace export)
133 */
134function search_list(&$data,$base,$file,$type,$lvl,$opts){
135  //we do nothing with directories
136  if($type == 'd') return false;
137  if(preg_match('#\.txt$#',$file)){
138    //check ACL
139    $id = pathID($file);
140    if(auth_quickaclcheck($id) < AUTH_READ){
141      return false;
142    }
143    $data[]['id'] = $id;;
144  }
145  return false;
146}
147
148/**
149 * Quicksearch for searching matching pagenames
150 *
151 * $opts['query'] is the search query
152 */
153function search_pagename(&$data,$base,$file,$type,$lvl,$opts){
154  //we do nothing with directories
155  if($type == 'd') return true;
156  //only search txt files
157  if(!preg_match('#\.txt$#',$file)) return true;
158
159  //simple stringmatching
160  if(strpos($file,$opts['query']) !== false){
161    //check ACL
162    $id = pathID($file);
163    if(auth_quickaclcheck($id) < AUTH_READ){
164      return false;
165    }
166    $data[]['id'] = $id;
167  }
168
169  return true;
170}
171
172/**
173 * Search for backlinks to a given page
174 *
175 * $opts['ns']    namespace of the page
176 * $opts['name']  name of the page without namespace
177 */
178function search_backlinks(&$data,$base,$file,$type,$lvl,$opts){
179  //we do nothing with directories
180  if($type == 'd') return true;;
181  //only search txt files
182  if(!preg_match('#\.txt$#',$file)) return true;;
183
184  //get text
185  $text = io_readfile($base.'/'.$file);
186
187  //absolute search id
188  $sid = cleanID($opts['ns'].':'.$opts['name']);
189
190  //construct current namespace
191  $cid = pathID($file);
192  $cns = getNS($cid);
193
194  //check ACL
195  if(auth_quickaclcheck($cid) < AUTH_READ){
196    return false;
197  }
198
199  //match all links
200  //FIXME may be incorrect because of code blocks
201  //      CamelCase isn't supported, too
202  preg_match_all('#\[\[(.+?)\]\]#si',$text,$matches,PREG_SET_ORDER);
203  foreach($matches as $match){
204    //get ID from link and discard most non wikilinks
205    list($mid) = split('\|',$match[1],2);
206    if(preg_match("#^(https?|telnet|gopher|file|wais|ftp|ed2k|irc)://#",$mid)) continue;
207    if(preg_match("#\w+>#",$mid)) continue;
208    $mns = getNS($mid);
209   	//namespace starting with "." - prepend current namespace
210    if(strpos($mns,'.')===0){
211      $mid = $cns.":".substr($mid,1);
212    }
213    if($mns===false){
214      //no namespace in link? add current
215      $mid = "$cns:$mid";
216    }
217    $mid = cleanID($mid);
218
219    if ($mid == $sid){
220      $data[]['id'] = $cid;
221      break;
222    }
223  }
224}
225
226/**
227 * Fulltextsearch
228 *
229 * $opts['query'] is the search query
230 */
231function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){
232  //we do nothing with directories
233  if($type == 'd') return true;;
234  //only search txt files
235  if(!preg_match('#\.txt$#',$file)) return true;;
236
237  //check ACL
238  $id = pathID($file);
239  if(auth_quickaclcheck($id) < AUTH_READ){
240    return false;
241  }
242
243  //get text
244  $text = io_readfile($base.'/'.$file);
245
246  //create regexp from queries
247  $qpreg = preg_split('/\s+/',preg_quote($opts['query'],'#'));
248  $qpreg = '('.join('|',$qpreg).')';
249
250  //do the fulltext search
251  $matches = array();
252  if($cnt = preg_match_all('#'.$qpreg.'#si',$text,$matches)){
253    //this is not the best way for snippet generation but the fastest I could find
254    //split query and only use the first token
255    $q = preg_split('/\s+/',$opts['query'],2);
256    $q = $q[0];
257    $p = strpos(strtolower($text),$q);
258    $f = $p - 100;
259    $l = strlen($q) + 200;
260    if($f < 0) $f = 0;
261    $snippet = '<span class="search_sep"> ... </span>'.
262               htmlspecialchars(substr($text,$f,$l)).
263               '<span class="search_sep"> ... </span>';
264    $snippet = preg_replace('#'.$qpreg.'#si','<span class="search_hit">\\1</span>',$snippet);
265
266    $data[] = array(
267      'id'      => $id,
268      'count'   => $cnt,
269      'snippet' => $snippet,
270    );
271  }
272
273  return true;
274}
275
276/**
277 * Callback sort function for use with usort to sort the data
278 * structure created by search_fulltext. Sorts descending by count
279 */
280function sort_search_fulltext($a,$b){
281  if($a['count'] > $b['count']){
282    return -1;
283  }elseif($a['count'] < $b['count']){
284    return 1;
285  }else{
286    return strcmp($a['id'],$b['id']);
287  }
288}
289
290/**
291 * translates a document path to an ID
292 */
293function pathID($path){
294  $id = str_replace('/',':',$path);
295  $id = preg_replace('#\.txt$#','',$id);
296  $id = preg_replace('#^:+#','',$id);
297  $id = preg_replace('#:+$#','',$id);
298  return $id;
299}
300
301?>
302