xref: /dokuwiki/inc/Search/MetadataSearch.php (revision be5c1ea2da9ce21461b05de9ad624446de1786be)
1<?php
2
3namespace dokuwiki\Search;
4
5use dokuwiki\Extension\Event;
6use dokuwiki\Search\MetadataIndex;
7use dokuwiki\Search\PageIndex;
8use dokuwiki\Search\QueryParser;
9
10/**
11 * Class DokuWiki Metadata Search
12 *
13 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
14 * @author     Andreas Gohr <andi@splitbrain.org>
15 */
16class MetadataSearch
17{
18    /**
19     *  Metadata Search constructor. prevent direct object creation
20     */
21    protected function __construct() {}
22
23    /**
24     * Returns the backlinks for a given page
25     *
26     * Uses the metadata index.
27     *
28     * @param string $id           The id for which links shall be returned
29     * @param bool   $ignore_perms Ignore the fact that pages are hidden or read-protected
30     * @return array The pages that contain links to the given page
31     */
32    public static function backlinks($id, $ignore_perms = false)
33    {
34        $MetadataIndex = MetadataIndex::getInstance();
35        $result = $MetadataIndex->lookupKey('relation_references', $id);
36
37        if (!count($result)) return $result;
38
39        // check ACL permissions
40        foreach (array_keys($result) as $idx) {
41            if (($ignore_perms !== true
42                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
43                ) || !page_exists($result[$idx], '', false)
44            ) {
45                unset($result[$idx]);
46            }
47        }
48
49        sort($result);
50        return $result;
51    }
52
53    /**
54     * Returns the pages that use a given media file
55     *
56     * Uses the relation media metadata property and the metadata index.
57     *
58     * Note that before 2013-07-31 the second parameter was the maximum number
59     * of results and permissions were ignored. That's why the parameter is now
60     * checked to be explicitely set to true (with type bool) in order to be
61     * compatible with older uses of the function.
62     *
63     * @param string $id           The media id to look for
64     * @param bool   $ignore_perms Ignore hidden pages and acls (optional, default: false)
65     * @return array A list of pages that use the given media file
66     */
67    public static function mediause($id, $ignore_perms = false)
68    {
69        $MetadataIndex = MetadataIndex::getInstance();
70        $result = $MetadataIndex->lookupKey('relation_media', $id);
71
72        if (!count($result)) return $result;
73
74        // check ACL permissions
75        foreach (array_keys($result) as $idx) {
76            if (($ignore_perms !== true
77                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
78                ) || !page_exists($result[$idx], '', false)
79            ) {
80                unset($result[$idx]);
81            }
82        }
83
84        sort($result);
85        return $result;
86    }
87
88
89    /**
90     * Quicksearch for pagenames
91     *
92     * By default it only matches the pagename and ignores the namespace.
93     * This can be changed with the second parameter.
94     * The third parameter allows to search in titles as well.
95     *
96     * The function always returns titles as well
97     *
98     * @triggers SEARCH_QUERY_PAGELOOKUP
99     * @author   Andreas Gohr <andi@splitbrain.org>
100     * @author   Adrian Lang <lang@cosmocode.de>
101     *
102     * @param string     $id       page id
103     * @param bool       $in_ns    match against namespace as well?
104     * @param bool       $in_title search in title?
105     * @param int|string $after    only show results with mtime after this date,
106     *                             accepts timestap or strtotime arguments
107     * @param int|string $before   only show results with mtime before this date,
108     *                             accepts timestap or strtotime arguments
109     *
110     * @return string[]
111     */
112    public static function pageLookup($id, $in_ns = false, $in_title = false, $after = null, $before = null)
113    {
114        $data = [
115            'id' => $id,
116            'in_ns' => $in_ns,
117            'in_title' => $in_title,
118            'after' => $after,
119            'before' => $before
120        ];
121        $data['has_titles'] = true; // for plugin backward compatibility check
122        $action = static::class.'::pageLookupCallBack';
123        return Event::createAndTrigger('SEARCH_QUERY_PAGELOOKUP', $data, $action);
124    }
125
126    /**
127     * Returns list of pages as array(pageid => First Heading)
128     *
129     * @param array $data  event data
130     * @return string[]
131     */
132    public static function pageLookupCallBack(&$data)
133    {
134        $PageIndex = PageIndex::getInstance();
135
136        // split out original parameters
137        $id = $data['id'];
138        $parsedQuery = QueryParser::convert($id);
139
140        if (count($parsedQuery['ns']) > 0) {
141            $ns = cleanID($parsedQuery['ns'][0]) . ':';
142            $id = implode(' ', $parsedQuery['highlight']);
143        }
144
145        $in_ns    = $data['in_ns'];
146        $in_title = $data['in_title'];
147        $cleaned = cleanID($id);
148
149        $pages = array();
150        if ($id !== '' && $cleaned !== '') {
151            $page_idx = $PageIndex->getPages();
152            foreach ($page_idx as $p_id) {
153                if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) !== false)) {
154                    if (!isset($pages[$p_id])) {
155                        $pages[$p_id] = p_get_first_heading($p_id, METADATA_DONT_RENDER);
156                    }
157                }
158            }
159            if ($in_title) {
160                $MetadataIndex = MetadataIndex::getInstance();
161                $func = static::class.'::pageLookupTitleCompare';
162                foreach ($MetadataIndex->lookupKey('title', $id, $func) as $p_id) {
163                    if (!isset($pages[$p_id])) {
164                        $pages[$p_id] = p_get_first_heading($p_id, METADATA_DONT_RENDER);
165                    }
166                }
167            }
168        }
169
170        if (isset($ns)) {
171            foreach (array_keys($pages) as $p_id) {
172                if (strpos($p_id, $ns) !== 0) {
173                    unset($pages[$p_id]);
174                }
175            }
176        }
177
178        // discard hidden pages
179        // discard nonexistent pages
180        // check ACL permissions
181        foreach (array_keys($pages) as $idx) {
182            if (!isVisiblePage($idx) || !page_exists($idx) || auth_quickaclcheck($idx) < AUTH_READ) {
183                unset($pages[$idx]);
184            }
185        }
186
187        $pages = static::filterResultsByTime($pages, $data['after'], $data['before']);
188
189        uksort($pages, static::class.'::pagesorter');
190        return $pages;
191    }
192
193    /**
194     * Tiny helper function for comparing the searched title with the title
195     * from the search index. This function is a wrapper around stripos with
196     * adapted argument order and return value.
197     *
198     * @param string $search searched title
199     * @param string $title  title from index
200     * @return bool
201     */
202    protected static function pageLookupTitleCompare($search, $title)
203    {
204        return stripos($title, $search) !== false;
205    }
206
207    /**
208     * Sort pages based on their namespace level first, then on their string
209     * values. This makes higher hierarchy pages rank higher than lower hierarchy
210     * pages.
211     *
212     * @param string $a
213     * @param string $b
214     * @return int Returns < 0 if $a is less than $b; > 0 if $a is greater than $b,
215     *             and 0 if they are equal.
216     */
217    protected static function pagesorter($a, $b)
218    {
219        $ac = count(explode(':',$a));
220        $bc = count(explode(':',$b));
221        if ($ac < $bc) {
222            return -1;
223        } elseif ($ac > $bc) {
224            return 1;
225        }
226        return strcmp ($a,$b);
227    }
228
229    /**
230     * @param array      $results search results in the form pageid => value
231     * @param int|string $after   only returns results with mtime after this date,
232     *                            accepts timestap or strtotime arguments
233     * @param int|string $before  only returns results with mtime after this date,
234     *                            accepts timestap or strtotime arguments
235     *
236     * @return array
237     */
238    protected static function filterResultsByTime(array $results, $after, $before)
239    {
240        if ($after || $before) {
241            $after = is_int($after) ? $after : strtotime($after);
242            $before = is_int($before) ? $before : strtotime($before);
243
244            foreach ($results as $id => $value) {
245                $mTime = filemtime(wikiFN($id));
246                if ($after && $after > $mTime) {
247                    unset($results[$id]);
248                    continue;
249                }
250                if ($before && $before < $mTime) {
251                    unset($results[$id]);
252                }
253            }
254        }
255        return $results;
256    }
257}
258