xref: /dokuwiki/inc/Search/MetadataSearch.php (revision a02395a1a2bed351d474b885b6a9b7ef5345a611)
1<?php
2
3namespace dokuwiki\Search;
4
5use dokuwiki\Extension\Event;
6use dokuwiki\Search\MetadataIndex;
7use dokuwiki\Search\QueryParser;
8use dokuwiki\Utf8;
9
10/**
11 * Class DokuWiki Metadata Search
12 *
13 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
14 * @author     Andreas Gohr <andi@splitbrain.org>
15 */
16class MetadataSearch
17{
18    /**
19     * Quicksearch for pagenames
20     *
21     * By default it only matches the pagename and ignores the namespace.
22     * This can be changed with the second parameter.
23     * The third parameter allows to search in titles as well.
24     *
25     * The function always returns titles as well
26     *
27     * @triggers SEARCH_QUERY_PAGELOOKUP
28     * @author   Andreas Gohr <andi@splitbrain.org>
29     * @author   Adrian Lang <lang@cosmocode.de>
30     *
31     * @param string     $id       page id
32     * @param bool       $in_ns    match against namespace as well?
33     * @param bool       $in_title search in title?
34     * @param int|string $after    only show results with mtime after this date,
35     *                             accepts timestap or strtotime arguments
36     * @param int|string $before   only show results with mtime before this date,
37     *                             accepts timestap or strtotime arguments
38     *
39     * @return string[]
40     */
41    public function pageLookup($id, $in_ns = false, $in_title = false, $after = null, $before = null)
42    {
43        $data = [
44            'id' => $id,
45            'in_ns' => $in_ns,
46            'in_title' => $in_title,
47            'after' => $after,
48            'before' => $before
49        ];
50        $data['has_titles'] = true; // for plugin backward compatibility check
51        $action = [$this, 'pageLookupCallBack'];
52        return Event::createAndTrigger('SEARCH_QUERY_PAGELOOKUP', $data, $action);
53    }
54
55    /**
56     * Returns list of pages as array(pageid => First Heading)
57     *
58     * @param array $data  event data
59     * @return string[]
60     */
61    public function pageLookupCallBack(&$data)
62    {
63        // split out original parameters
64        $id = $data['id'];
65        $parsedQuery = (new QueryParser)->convert($id);
66
67        if (count($parsedQuery['ns']) > 0) {
68            $ns = cleanID($parsedQuery['ns'][0]) . ':';
69            $id = implode(' ', $parsedQuery['highlight']);
70        }
71        if (count($parsedQuery['notns']) > 0) {
72            $notns = cleanID($parsedQuery['notns'][0]) . ':';
73            $id = implode(' ', $parsedQuery['highlight']);
74        }
75
76        $in_ns    = $data['in_ns'];
77        $in_title = $data['in_title'];
78        $cleaned = cleanID($id);
79
80        $MetadataIndex = new MetadataIndex();
81        $page_idx = $MetadataIndex->getPages();
82
83        $pages = array();
84        if ($id !== '' && $cleaned !== '') {
85            foreach ($page_idx as $p_id) {
86                if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) !== false)) {
87                    if (!isset($pages[$p_id])) {
88                        $pages[$p_id] = p_get_first_heading($p_id, METADATA_DONT_RENDER);
89                    }
90                }
91            }
92            if ($in_title) {
93                $func = [$this, 'pageLookupTitleCompare'];
94                foreach ($MetadataIndex->lookupKey('title', $id, $func) as $p_id) {
95                    if (!isset($pages[$p_id])) {
96                        $pages[$p_id] = p_get_first_heading($p_id, METADATA_DONT_RENDER);
97                    }
98                }
99            }
100        }
101
102        if (isset($ns)) {
103            foreach (array_keys($pages) as $p_id) {
104                if (strpos($p_id, $ns) !== 0) {
105                    unset($pages[$p_id]);
106                }
107            }
108        }
109        if (isset($notns)) {
110            foreach (array_keys($pages) as $p_id) {
111                if (strpos($p_id, $notns) === 0) {
112                    unset($pages[$p_id]);
113                }
114            }
115        }
116
117        // discard hidden pages
118        // discard nonexistent pages
119        // check ACL permissions
120        foreach (array_keys($pages) as $idx) {
121            if (!isVisiblePage($idx) || !page_exists($idx) || auth_quickaclcheck($idx) < AUTH_READ) {
122                unset($pages[$idx]);
123            }
124        }
125
126        $pages = $this->filterResultsByTime($pages, $data['after'], $data['before']);
127
128        uksort($pages, [$this, 'pagesorter']);
129        return $pages;
130    }
131
132    /**
133     * Tiny helper function for comparing the searched title with the title
134     * from the search index. This function is a wrapper around stripos with
135     * adapted argument order and return value.
136     *
137     * @param string $search searched title
138     * @param string $title  title from index
139     * @return bool
140     */
141    protected function pageLookupTitleCompare($search, $title)
142    {
143        return stripos($title, $search) !== false;
144    }
145
146    /**
147     * Sort pages based on their namespace level first, then on their string
148     * values. This makes higher hierarchy pages rank higher than lower hierarchy
149     * pages.
150     *
151     * @param string $a
152     * @param string $b
153     * @return int Returns < 0 if $a is less than $b; > 0 if $a is greater than $b,
154     *             and 0 if they are equal.
155     */
156    protected function pagesorter($a, $b)
157    {
158        $ac = count(explode(':',$a));
159        $bc = count(explode(':',$b));
160        if ($ac < $bc) {
161            return -1;
162        } elseif ($ac > $bc) {
163            return 1;
164        }
165        return Utf8\Sort::strcmp ($a,$b);
166    }
167
168    /**
169     * @param array      $results search results in the form pageid => value
170     * @param int|string $after   only returns results with mtime after this date,
171     *                            accepts timestap or strtotime arguments
172     * @param int|string $before  only returns results with mtime after this date,
173     *                            accepts timestap or strtotime arguments
174     *
175     * @return array
176     */
177    protected function filterResultsByTime(array $results, $after, $before)
178    {
179        if ($after || $before) {
180            $after = is_int($after) ? $after : strtotime($after);
181            $before = is_int($before) ? $before : strtotime($before);
182
183            foreach ($results as $id => $value) {
184                $mTime = filemtime(wikiFN($id));
185                if ($after && $after > $mTime) {
186                    unset($results[$id]);
187                    continue;
188                }
189                if ($before && $before < $mTime) {
190                    unset($results[$id]);
191                }
192            }
193        }
194        return $results;
195    }
196}
197