xref: /dokuwiki/inc/Search/MetadataSearch.php (revision 02361d2a8a489d99bde9646525952bf8dff51577)
1<?php
2
3namespace dokuwiki\Search;
4
5use dokuwiki\Extension\Event;
6use dokuwiki\Search\MetadataIndex;
7use dokuwiki\Search\QueryParser;
8
9/**
10 * Class DokuWiki Metadata Search
11 *
12 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
13 * @author     Andreas Gohr <andi@splitbrain.org>
14 */
15class MetadataSearch
16{
17    /**
18     *  Metadata Search constructor. prevent direct object creation
19     */
20    protected function __construct() {}
21
22    /**
23     * Returns the backlinks for a given page
24     *
25     * Uses the metadata index.
26     *
27     * @param string $id           The id for which links shall be returned
28     * @param bool   $ignore_perms Ignore the fact that pages are hidden or read-protected
29     * @return array The pages that contain links to the given page
30     */
31    public static function backlinks($id, $ignore_perms = false)
32    {
33        $MetadataIndex = MetadataIndex::getInstance();
34        $result = $MetadataIndex->lookupKey('relation_references', $id);
35
36        if (!count($result)) return $result;
37
38        // check ACL permissions
39        foreach (array_keys($result) as $idx) {
40            if (($ignore_perms !== true
41                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
42                ) || !page_exists($result[$idx], '', false)
43            ) {
44                unset($result[$idx]);
45            }
46        }
47
48        sort($result);
49        return $result;
50    }
51
52    /**
53     * Returns the pages that use a given media file
54     *
55     * Uses the relation media metadata property and the metadata index.
56     *
57     * Note that before 2013-07-31 the second parameter was the maximum number
58     * of results and permissions were ignored. That's why the parameter is now
59     * checked to be explicitely set to true (with type bool) in order to be
60     * compatible with older uses of the function.
61     *
62     * @param string $id           The media id to look for
63     * @param bool   $ignore_perms Ignore hidden pages and acls (optional, default: false)
64     * @return array A list of pages that use the given media file
65     */
66    public static function mediause($id, $ignore_perms = false)
67    {
68        $MetadataIndex = MetadataIndex::getInstance();
69        $result = $MetadataIndex->lookupKey('relation_media', $id);
70
71        if (!count($result)) return $result;
72
73        // check ACL permissions
74        foreach (array_keys($result) as $idx) {
75            if (($ignore_perms !== true
76                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
77                ) || !page_exists($result[$idx], '', false)
78            ) {
79                unset($result[$idx]);
80            }
81        }
82
83        sort($result);
84        return $result;
85    }
86
87
88    /**
89     * Quicksearch for pagenames
90     *
91     * By default it only matches the pagename and ignores the namespace.
92     * This can be changed with the second parameter.
93     * The third parameter allows to search in titles as well.
94     *
95     * The function always returns titles as well
96     *
97     * @triggers SEARCH_QUERY_PAGELOOKUP
98     * @author   Andreas Gohr <andi@splitbrain.org>
99     * @author   Adrian Lang <lang@cosmocode.de>
100     *
101     * @param string     $id       page id
102     * @param bool       $in_ns    match against namespace as well?
103     * @param bool       $in_title search in title?
104     * @param int|string $after    only show results with mtime after this date,
105     *                             accepts timestap or strtotime arguments
106     * @param int|string $before   only show results with mtime before this date,
107     *                             accepts timestap or strtotime arguments
108     *
109     * @return string[]
110     */
111    public static function pageLookup($id, $in_ns = false, $in_title = false, $after = null, $before = null)
112    {
113        $data = [
114            'id' => $id,
115            'in_ns' => $in_ns,
116            'in_title' => $in_title,
117            'after' => $after,
118            'before' => $before
119        ];
120        $data['has_titles'] = true; // for plugin backward compatibility check
121        $action = static::class.'::pageLookupCallBack';
122        return Event::createAndTrigger('SEARCH_QUERY_PAGELOOKUP', $data, $action);
123    }
124
125    /**
126     * Returns list of pages as array(pageid => First Heading)
127     *
128     * @param array $data  event data
129     * @return string[]
130     */
131    public static function pageLookupCallBack(&$data)
132    {
133        // split out original parameters
134        $id = $data['id'];
135        $parsedQuery = QueryParser::convert($id);
136
137        if (count($parsedQuery['ns']) > 0) {
138            $ns = cleanID($parsedQuery['ns'][0]) . ':';
139            $id = implode(' ', $parsedQuery['highlight']);
140        }
141
142        $in_ns    = $data['in_ns'];
143        $in_title = $data['in_title'];
144        $cleaned = cleanID($id);
145
146        $pages = array();
147        if ($id !== '' && $cleaned !== '') {
148            $MetadataIndex = MetadataIndex::getInstance();
149            $page_idx = $MetadataIndex->getPages();
150            foreach ($page_idx as $p_id) {
151                if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) !== false)) {
152                    if (!isset($pages[$p_id])) {
153                        $pages[$p_id] = p_get_first_heading($p_id, METADATA_DONT_RENDER);
154                    }
155                }
156            }
157            if ($in_title) {
158                $func = static::class.'::pageLookupTitleCompare';
159                foreach ($MetadataIndex->lookupKey('title', $id, $func) as $p_id) {
160                    if (!isset($pages[$p_id])) {
161                        $pages[$p_id] = p_get_first_heading($p_id, METADATA_DONT_RENDER);
162                    }
163                }
164            }
165        }
166
167        if (isset($ns)) {
168            foreach (array_keys($pages) as $p_id) {
169                if (strpos($p_id, $ns) !== 0) {
170                    unset($pages[$p_id]);
171                }
172            }
173        }
174
175        // discard hidden pages
176        // discard nonexistent pages
177        // check ACL permissions
178        foreach (array_keys($pages) as $idx) {
179            if (!isVisiblePage($idx) || !page_exists($idx) || auth_quickaclcheck($idx) < AUTH_READ) {
180                unset($pages[$idx]);
181            }
182        }
183
184        $pages = static::filterResultsByTime($pages, $data['after'], $data['before']);
185
186        uksort($pages, static::class.'::pagesorter');
187        return $pages;
188    }
189
190    /**
191     * Tiny helper function for comparing the searched title with the title
192     * from the search index. This function is a wrapper around stripos with
193     * adapted argument order and return value.
194     *
195     * @param string $search searched title
196     * @param string $title  title from index
197     * @return bool
198     */
199    protected static function pageLookupTitleCompare($search, $title)
200    {
201        return stripos($title, $search) !== false;
202    }
203
204    /**
205     * Sort pages based on their namespace level first, then on their string
206     * values. This makes higher hierarchy pages rank higher than lower hierarchy
207     * pages.
208     *
209     * @param string $a
210     * @param string $b
211     * @return int Returns < 0 if $a is less than $b; > 0 if $a is greater than $b,
212     *             and 0 if they are equal.
213     */
214    protected static function pagesorter($a, $b)
215    {
216        $ac = count(explode(':',$a));
217        $bc = count(explode(':',$b));
218        if ($ac < $bc) {
219            return -1;
220        } elseif ($ac > $bc) {
221            return 1;
222        }
223        return strcmp ($a,$b);
224    }
225
226    /**
227     * @param array      $results search results in the form pageid => value
228     * @param int|string $after   only returns results with mtime after this date,
229     *                            accepts timestap or strtotime arguments
230     * @param int|string $before  only returns results with mtime after this date,
231     *                            accepts timestap or strtotime arguments
232     *
233     * @return array
234     */
235    protected static function filterResultsByTime(array $results, $after, $before)
236    {
237        if ($after || $before) {
238            $after = is_int($after) ? $after : strtotime($after);
239            $before = is_int($before) ? $before : strtotime($before);
240
241            foreach ($results as $id => $value) {
242                $mTime = filemtime(wikiFN($id));
243                if ($after && $after > $mTime) {
244                    unset($results[$id]);
245                    continue;
246                }
247                if ($before && $before < $mTime) {
248                    unset($results[$id]);
249                }
250            }
251        }
252        return $results;
253    }
254}
255