xref: /dokuwiki/inc/search.php (revision 618191d008b98cb421694c541145c863d7b300ce)
1<?php
2/**
3 * DokuWiki search functions
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Andreas Gohr <andi@splitbrain.org>
7 */
8
9if(!defined('DOKU_INC')) die('meh.');
10
11/**
12 * Recurse directory
13 *
14 * This function recurses into a given base directory
15 * and calls the supplied function for each file and directory
16 *
17 * @param   array    &$data The results of the search are stored here
18 * @param   string    $base Where to start the search
19 * @param   callback  $func Callback (function name or array with object,method)
20 * @param   array     $opts option array will be given to the Callback
21 * @param   string    $dir  Current directory beyond $base
22 * @param   int       $lvl  Recursion Level
23 * @param   mixed     $sort 'natural' to use natural order sorting (default); 'date' to sort by filemtime; leave empty to skip sorting.
24 * @author  Andreas Gohr <andi@splitbrain.org>
25 */
26function search(&$data,$base,$func,$opts,$dir='',$lvl=1,$sort='natural'){
27    $dirs   = array();
28    $files  = array();
29    $filepaths = array();
30
31    //read in directories and files
32    $dh = @opendir($base.'/'.$dir);
33    if(!$dh) return;
34    while(($file = readdir($dh)) !== false){
35        if(preg_match('/^[\._]/',$file)) continue; //skip hidden files and upper dirs
36        if(is_dir($base.'/'.$dir.'/'.$file)){
37            $dirs[] = $dir.'/'.$file;
38            continue;
39        }
40        $files[] = $dir.'/'.$file;
41        $filepaths[] = $base.'/'.$dir.'/'.$file;
42    }
43    closedir($dh);
44    if (!empty($sort)) {
45        if ($sort == 'date') {
46            @array_multisort(array_map('filemtime', $filepaths), SORT_NUMERIC, SORT_DESC, $files);
47        } else /* natural */ {
48            natsort($files);
49        }
50        natsort($dirs);
51    }
52
53    //give directories to userfunction then recurse
54    foreach($dirs as $dir){
55        if (call_user_func_array($func, array(&$data,$base,$dir,'d',$lvl,$opts))){
56            search($data,$base,$func,$opts,$dir,$lvl+1,$sort);
57        }
58    }
59    //now handle the files
60    foreach($files as $file){
61        call_user_func_array($func, array(&$data,$base,$file,'f',$lvl,$opts));
62    }
63}
64
65/**
66 * The following functions are userfunctions to use with the search
67 * function above. This function is called for every found file or
68 * directory. When a directory is given to the function it has to
69 * decide if this directory should be traversed (true) or not (false)
70 * The function has to accept the following parameters:
71 *
72 * array &$data  - Reference to the result data structure
73 * string $base  - Base usually $conf['datadir']
74 * string $file  - current file or directory relative to $base
75 * string $type  - Type either 'd' for directory or 'f' for file
76 * int    $lvl   - Current recursion depht
77 * array  $opts  - option array as given to search()
78 *
79 * return values for files are ignored
80 *
81 * All functions should check the ACL for document READ rights
82 * namespaces (directories) are NOT checked (when sneaky_index is 0) as this
83 * would break the recursion (You can have an nonreadable dir over a readable
84 * one deeper nested) also make sure to check the file type (for example
85 * in case of lockfiles).
86 */
87
88/**
89 * Searches for pages beginning with the given query
90 *
91 * @author Andreas Gohr <andi@splitbrain.org>
92 */
93function search_qsearch(&$data,$base,$file,$type,$lvl,$opts){
94    $opts = array(
95            'idmatch'   => '(^|:)'.preg_quote($opts['query'],'/').'/',
96            'listfiles' => true,
97            'pagesonly' => true,
98            );
99    return search_universal($data,$base,$file,$type,$lvl,$opts);
100}
101
102/**
103 * Build the browsable index of pages
104 *
105 * $opts['ns'] is the currently viewed namespace
106 *
107 * @author  Andreas Gohr <andi@splitbrain.org>
108 */
109function search_index(&$data,$base,$file,$type,$lvl,$opts){
110    global $conf;
111    $opts = array(
112        'pagesonly' => true,
113        'listdirs' => true,
114        'listfiles' => empty($opts['nofiles']),
115        'sneakyacl' => $conf['sneaky_index'],
116        // Hacky, should rather use recmatch
117        'depth' => preg_match('#^'.preg_quote($file, '#').'(/|$)#','/'.$opts['ns']) ? 0 : -1
118    );
119
120    return search_universal($data, $base, $file, $type, $lvl, $opts);
121}
122
123/**
124 * List all namespaces
125 *
126 * @author  Andreas Gohr <andi@splitbrain.org>
127 */
128function search_namespaces(&$data,$base,$file,$type,$lvl,$opts){
129    $opts = array(
130            'listdirs' => true,
131            );
132    return search_universal($data,$base,$file,$type,$lvl,$opts);
133}
134
135/**
136 * List all mediafiles in a namespace
137 *
138 * @author  Andreas Gohr <andi@splitbrain.org>
139 */
140function search_media(&$data,$base,$file,$type,$lvl,$opts){
141
142    //we do nothing with directories
143    if($type == 'd') {
144        if(!$opts['depth']) return true; // recurse forever
145        $depth = substr_count($file,'/');
146        if($depth >= $opts['depth']) return false; // depth reached
147        return true;
148    }
149
150    $info         = array();
151    $info['id']   = pathID($file,true);
152    if($info['id'] != cleanID($info['id'])){
153        if($opts['showmsg'])
154            msg(hsc($info['id']).' is not a valid file name for DokuWiki - skipped',-1);
155        return false; // skip non-valid files
156    }
157
158    //check ACL for namespace (we have no ACL for mediafiles)
159    $info['perm'] = auth_quickaclcheck(getNS($info['id']).':*');
160    if(!$opts['skipacl'] && $info['perm'] < AUTH_READ){
161        return false;
162    }
163
164    //check pattern filter
165    if($opts['pattern'] && !@preg_match($opts['pattern'], $info['id'])){
166        return false;
167    }
168
169    $info['file']     = utf8_basename($file);
170    $info['size']     = filesize($base.'/'.$file);
171    $info['mtime']    = filemtime($base.'/'.$file);
172    $info['writable'] = is_writable($base.'/'.$file);
173    if(preg_match("/\.(jpe?g|gif|png)$/",$file)){
174        $info['isimg'] = true;
175        $info['meta']  = new JpegMeta($base.'/'.$file);
176    }else{
177        $info['isimg'] = false;
178    }
179    if($opts['hash']){
180        $info['hash'] = md5(io_readFile(mediaFN($info['id']),false));
181    }
182
183    $data[] = $info;
184
185    return false;
186}
187
188/**
189 * This function just lists documents (for RSS namespace export)
190 *
191 * @author  Andreas Gohr <andi@splitbrain.org>
192 */
193function search_list(&$data,$base,$file,$type,$lvl,$opts){
194    //we do nothing with directories
195    if($type == 'd') return false;
196    //only search txt files
197    if(substr($file,-4) == '.txt'){
198        //check ACL
199        $id = pathID($file);
200        if(auth_quickaclcheck($id) < AUTH_READ){
201            return false;
202        }
203        $data[]['id'] = $id;
204    }
205    return false;
206}
207
208/**
209 * Quicksearch for searching matching pagenames
210 *
211 * $opts['query'] is the search query
212 *
213 * @author  Andreas Gohr <andi@splitbrain.org>
214 */
215function search_pagename(&$data,$base,$file,$type,$lvl,$opts){
216    //we do nothing with directories
217    if($type == 'd') return true;
218    //only search txt files
219    if(substr($file,-4) != '.txt') return true;
220
221    //simple stringmatching
222    if (!empty($opts['query'])){
223        if(strpos($file,$opts['query']) !== false){
224            //check ACL
225            $id = pathID($file);
226            if(auth_quickaclcheck($id) < AUTH_READ){
227                return false;
228            }
229            $data[]['id'] = $id;
230        }
231    }
232    return true;
233}
234
235/**
236 * Just lists all documents
237 *
238 * $opts['depth']   recursion level, 0 for all
239 * $opts['hash']    do md5 sum of content?
240 * $opts['skipacl'] list everything regardless of ACL
241 *
242 * @author  Andreas Gohr <andi@splitbrain.org>
243 */
244function search_allpages(&$data,$base,$file,$type,$lvl,$opts){
245    if(isset($opts['depth']) && $opts['depth']){
246        $parts = explode('/',ltrim($file,'/'));
247        if(($type == 'd' && count($parts) >= $opts['depth'])
248          || ($type != 'd' && count($parts) > $opts['depth'])){
249            return false; // depth reached
250        }
251    }
252
253    //we do nothing with directories
254    if($type == 'd'){
255        return true;
256    }
257
258    //only search txt files
259    if(substr($file,-4) != '.txt') return true;
260
261    $item['id']   = pathID($file);
262    if(!$opts['skipacl'] && auth_quickaclcheck($item['id']) < AUTH_READ){
263        return false;
264    }
265
266    $item['rev']   = filemtime($base.'/'.$file);
267    $item['mtime'] = $item['rev'];
268    $item['size']  = filesize($base.'/'.$file);
269    if($opts['hash']){
270        $item['hash'] = md5(trim(rawWiki($item['id'])));
271    }
272
273    $data[] = $item;
274    return true;
275}
276
277/* ------------- helper functions below -------------- */
278
279/**
280 * fulltext sort
281 *
282 * Callback sort function for use with usort to sort the data
283 * structure created by search_fulltext. Sorts descending by count
284 *
285 * @author  Andreas Gohr <andi@splitbrain.org>
286 */
287function sort_search_fulltext($a,$b){
288    if($a['count'] > $b['count']){
289        return -1;
290    }elseif($a['count'] < $b['count']){
291        return 1;
292    }else{
293        return strcmp($a['id'],$b['id']);
294    }
295}
296
297/**
298 * translates a document path to an ID
299 *
300 * @author  Andreas Gohr <andi@splitbrain.org>
301 * @todo    move to pageutils
302 */
303function pathID($path,$keeptxt=false){
304    $id = utf8_decodeFN($path);
305    $id = str_replace('/',':',$id);
306    if(!$keeptxt) $id = preg_replace('#\.txt$#','',$id);
307    $id = trim($id, ':');
308    return $id;
309}
310
311
312/**
313 * This is a very universal callback for the search() function, replacing
314 * many of the former individual functions at the cost of a more complex
315 * setup.
316 *
317 * How the function behaves, depends on the options passed in the $opts
318 * array, where the following settings can be used.
319 *
320 * depth      int     recursion depth. 0 for unlimited
321 * keeptxt    bool    keep .txt extension for IDs
322 * listfiles  bool    include files in listing
323 * listdirs   bool    include namespaces in listing
324 * pagesonly  bool    restrict files to pages
325 * skipacl    bool    do not check for READ permission
326 * sneakyacl  bool    don't recurse into nonreadable dirs
327 * hash       bool    create MD5 hash for files
328 * meta       bool    return file metadata
329 * filematch  string  match files against this regexp
330 * idmatch    string  match full ID against this regexp
331 * dirmatch   string  match directory against this regexp when adding
332 * nsmatch    string  match namespace against this regexp when adding
333 * recmatch   string  match directory against this regexp when recursing
334 * showmsg    bool    warn about non-ID files
335 * showhidden bool    show hidden files too
336 * firsthead  bool    return first heading for pages
337 *
338 * @param array &$data - Reference to the result data structure
339 * @param string $base  - Base usually $conf['datadir']
340 * @param string $file  - current file or directory relative to $base
341 * @param string $type  - Type either 'd' for directory or 'f' for file
342 * @param int    $lvl   - Current recursion depht
343 * @param array  $opts  - option array as given to search()
344 * @return bool if this directory should be traversed (true) or not (false)
345 *              return value is ignored for files
346 *
347 * @author Andreas Gohr <gohr@cosmocode.de>
348 */
349function search_universal(&$data,$base,$file,$type,$lvl,$opts){
350    $item   = array();
351    $return = true;
352
353    // get ID and check if it is a valid one
354    $item['id'] = pathID($file,($type == 'd' || $opts['keeptxt']));
355    if($item['id'] != cleanID($item['id'])){
356        if($opts['showmsg'])
357            msg(hsc($item['id']).' is not a valid file name for DokuWiki - skipped',-1);
358        return false; // skip non-valid files
359    }
360    $item['ns']  = getNS($item['id']);
361
362    if($type == 'd') {
363        // decide if to recursion into this directory is wanted
364        if(!$opts['depth']){
365            $return = true; // recurse forever
366        }else{
367            $depth = substr_count($file,'/');
368            if($depth >= $opts['depth']){
369                $return = false; // depth reached
370            }else{
371                $return = true;
372            }
373        }
374        if($return && !preg_match('/'.$opts['recmatch'].'/',$file)){
375            $return = false; // doesn't match
376        }
377    }
378
379    // check ACL
380    if(empty($opts['skipacl'])){
381        if($type == 'd'){
382            $item['perm'] = auth_quickaclcheck($item['id'].':*');
383        }else{
384            $item['perm'] = auth_quickaclcheck($item['id']); //FIXME check namespace for media files
385        }
386    }else{
387        $item['perm'] = AUTH_DELETE;
388    }
389
390    // are we done here maybe?
391    if($type == 'd'){
392        if(empty($opts['listdirs'])) return $return;
393        if(empty($opts['skipacl']) && !empty($opts['sneakyacl']) && $item['perm'] < AUTH_READ) return false; //neither list nor recurse
394        if(!empty($opts['dirmatch']) && !preg_match('/'.$opts['dirmatch'].'/',$file)) return $return;
395        if(!empty($opts['nsmatch']) && !preg_match('/'.$opts['nsmatch'].'/',$item['ns'])) return $return;
396    }else{
397        if(empty($opts['listfiles'])) return $return;
398        if(empty($opts['skipacl']) && $item['perm'] < AUTH_READ) return $return;
399        if(!empty($opts['pagesonly']) && (substr($file,-4) != '.txt')) return $return;
400        if(empty($opts['showhidden']) && isHiddenPage($item['id'])) return $return;
401        if(!empty($opts['filematch']) && !preg_match('/'.$opts['filematch'].'/',$file)) return $return;
402        if(!empty($opts['idmatch']) && !preg_match('/'.$opts['idmatch'].'/',$item['id'])) return $return;
403    }
404
405    // still here? prepare the item
406    $item['type']  = $type;
407    $item['level'] = $lvl;
408    $item['open']  = $return;
409
410    if($opts['meta']){
411        $item['file']       = utf8_basename($file);
412        $item['size']       = filesize($base.'/'.$file);
413        $item['mtime']      = filemtime($base.'/'.$file);
414        $item['rev']        = $item['mtime'];
415        $item['writable']   = is_writable($base.'/'.$file);
416        $item['executable'] = is_executable($base.'/'.$file);
417    }
418
419    if($type == 'f'){
420        if($opts['hash']) $item['hash'] = md5(io_readFile($base.'/'.$file,false));
421        if($opts['firsthead']) $item['title'] = p_get_first_heading($item['id'],METADATA_DONT_RENDER);
422    }
423
424    // finally add the item
425    $data[] = $item;
426    return $return;
427}
428
429//Setup VIM: ex: et ts=4 :
430