xref: /dokuwiki/inc/Search/MetadataSearch.php (revision 46b83514ca215ee33366a5c9f42f7da7812ef9ed)
1fe2d1da1SSatoshi Sahara<?php
2*46b83514SSatoshi Sahara
3fe2d1da1SSatoshi Saharanamespace dokuwiki\Search;
4fe2d1da1SSatoshi Sahara
5fe2d1da1SSatoshi Saharause dokuwiki\Extension\Event;
686fc7283SSatoshi Saharause dokuwiki\Search\MetadataIndex;
786fc7283SSatoshi Saharause dokuwiki\Search\PageIndex;
8fe2d1da1SSatoshi Saharause dokuwiki\Search\QueryParser;
9fe2d1da1SSatoshi Sahara
10fe2d1da1SSatoshi Sahara/**
11fe2d1da1SSatoshi Sahara * Class DokuWiki Metadata Search
12fe2d1da1SSatoshi Sahara *
13fe2d1da1SSatoshi Sahara * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
14fe2d1da1SSatoshi Sahara * @author     Andreas Gohr <andi@splitbrain.org>
15fe2d1da1SSatoshi Sahara */
16fe2d1da1SSatoshi Saharaclass MetadataSearch
17fe2d1da1SSatoshi Sahara{
18fe2d1da1SSatoshi Sahara    /**
19fe2d1da1SSatoshi Sahara     *  Metadata Search constructor. prevent direct object creation
20fe2d1da1SSatoshi Sahara     */
21fe2d1da1SSatoshi Sahara    protected function __construct() {}
22fe2d1da1SSatoshi Sahara
23fe2d1da1SSatoshi Sahara    /**
24fe2d1da1SSatoshi Sahara     * Returns the backlinks for a given page
25fe2d1da1SSatoshi Sahara     *
26fe2d1da1SSatoshi Sahara     * Uses the metadata index.
27fe2d1da1SSatoshi Sahara     *
28fe2d1da1SSatoshi Sahara     * @param string $id           The id for which links shall be returned
29fe2d1da1SSatoshi Sahara     * @param bool   $ignore_perms Ignore the fact that pages are hidden or read-protected
30fe2d1da1SSatoshi Sahara     * @return array The pages that contain links to the given page
31fe2d1da1SSatoshi Sahara     */
32fe2d1da1SSatoshi Sahara    public static function backlinks($id, $ignore_perms = false)
33fe2d1da1SSatoshi Sahara    {
3486fc7283SSatoshi Sahara        $Indexer = MetadataIndex::getInstance();
35fe2d1da1SSatoshi Sahara        $result = $Indexer->lookupKey('relation_references', $id);
36fe2d1da1SSatoshi Sahara
37fe2d1da1SSatoshi Sahara        if (!count($result)) return $result;
38fe2d1da1SSatoshi Sahara
39fe2d1da1SSatoshi Sahara        // check ACL permissions
40fe2d1da1SSatoshi Sahara        foreach (array_keys($result) as $idx) {
41fe2d1da1SSatoshi Sahara            if (($ignore_perms !== true
42fe2d1da1SSatoshi Sahara                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
43fe2d1da1SSatoshi Sahara                ) || !page_exists($result[$idx], '', false)
44fe2d1da1SSatoshi Sahara            ) {
45fe2d1da1SSatoshi Sahara                unset($result[$idx]);
46fe2d1da1SSatoshi Sahara            }
47fe2d1da1SSatoshi Sahara        }
48fe2d1da1SSatoshi Sahara
49fe2d1da1SSatoshi Sahara        sort($result);
50fe2d1da1SSatoshi Sahara        return $result;
51fe2d1da1SSatoshi Sahara    }
52fe2d1da1SSatoshi Sahara
53fe2d1da1SSatoshi Sahara    /**
54fe2d1da1SSatoshi Sahara     * Returns the pages that use a given media file
55fe2d1da1SSatoshi Sahara     *
56fe2d1da1SSatoshi Sahara     * Uses the relation media metadata property and the metadata index.
57fe2d1da1SSatoshi Sahara     *
58fe2d1da1SSatoshi Sahara     * Note that before 2013-07-31 the second parameter was the maximum number
59fe2d1da1SSatoshi Sahara     * of results and permissions were ignored. That's why the parameter is now
60fe2d1da1SSatoshi Sahara     * checked to be explicitely set to true (with type bool) in order to be
61fe2d1da1SSatoshi Sahara     * compatible with older uses of the function.
62fe2d1da1SSatoshi Sahara     *
63fe2d1da1SSatoshi Sahara     * @param string $id           The media id to look for
64fe2d1da1SSatoshi Sahara     * @param bool   $ignore_perms Ignore hidden pages and acls (optional, default: false)
65fe2d1da1SSatoshi Sahara     * @return array A list of pages that use the given media file
66fe2d1da1SSatoshi Sahara     */
67fe2d1da1SSatoshi Sahara    public static function mediause($id, $ignore_perms = false)
68fe2d1da1SSatoshi Sahara    {
6986fc7283SSatoshi Sahara        $Indexer = MetadataIndex::getInstance();
70fe2d1da1SSatoshi Sahara        $result = $Indexer->lookupKey('relation_media', $id);
71fe2d1da1SSatoshi Sahara
72fe2d1da1SSatoshi Sahara        if (!count($result)) return $result;
73fe2d1da1SSatoshi Sahara
74fe2d1da1SSatoshi Sahara        // check ACL permissions
75fe2d1da1SSatoshi Sahara        foreach (array_keys($result) as $idx) {
76fe2d1da1SSatoshi Sahara            if (($ignore_perms !== true
77fe2d1da1SSatoshi Sahara                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
78fe2d1da1SSatoshi Sahara                ) || !page_exists($result[$idx], '', false)
79fe2d1da1SSatoshi Sahara            ) {
80fe2d1da1SSatoshi Sahara                unset($result[$idx]);
81fe2d1da1SSatoshi Sahara            }
82fe2d1da1SSatoshi Sahara        }
83fe2d1da1SSatoshi Sahara
84fe2d1da1SSatoshi Sahara        sort($result);
85fe2d1da1SSatoshi Sahara        return $result;
86fe2d1da1SSatoshi Sahara    }
87fe2d1da1SSatoshi Sahara
88fe2d1da1SSatoshi Sahara
89fe2d1da1SSatoshi Sahara    /**
90fe2d1da1SSatoshi Sahara     * Quicksearch for pagenames
91fe2d1da1SSatoshi Sahara     *
92fe2d1da1SSatoshi Sahara     * By default it only matches the pagename and ignores the namespace.
93fe2d1da1SSatoshi Sahara     * This can be changed with the second parameter.
94fe2d1da1SSatoshi Sahara     * The third parameter allows to search in titles as well.
95fe2d1da1SSatoshi Sahara     *
96fe2d1da1SSatoshi Sahara     * The function always returns titles as well
97fe2d1da1SSatoshi Sahara     *
98fe2d1da1SSatoshi Sahara     * @triggers SEARCH_QUERY_PAGELOOKUP
99fe2d1da1SSatoshi Sahara     * @author   Andreas Gohr <andi@splitbrain.org>
100fe2d1da1SSatoshi Sahara     * @author   Adrian Lang <lang@cosmocode.de>
101fe2d1da1SSatoshi Sahara     *
102fe2d1da1SSatoshi Sahara     * @param string     $id       page id
103fe2d1da1SSatoshi Sahara     * @param bool       $in_ns    match against namespace as well?
104fe2d1da1SSatoshi Sahara     * @param bool       $in_title search in title?
105fe2d1da1SSatoshi Sahara     * @param int|string $after    only show results with mtime after this date,
106fe2d1da1SSatoshi Sahara     *                             accepts timestap or strtotime arguments
107fe2d1da1SSatoshi Sahara     * @param int|string $before   only show results with mtime before this date,
108fe2d1da1SSatoshi Sahara     *                             accepts timestap or strtotime arguments
109fe2d1da1SSatoshi Sahara     *
110fe2d1da1SSatoshi Sahara     * @return string[]
111fe2d1da1SSatoshi Sahara     */
112fe2d1da1SSatoshi Sahara    public static function pageLookup($id, $in_ns = false, $in_title = false, $after = null, $before = null)
113fe2d1da1SSatoshi Sahara    {
114fe2d1da1SSatoshi Sahara        $data = [
115fe2d1da1SSatoshi Sahara            'id' => $id,
116fe2d1da1SSatoshi Sahara            'in_ns' => $in_ns,
117fe2d1da1SSatoshi Sahara            'in_title' => $in_title,
118fe2d1da1SSatoshi Sahara            'after' => $after,
119fe2d1da1SSatoshi Sahara            'before' => $before
120fe2d1da1SSatoshi Sahara        ];
121fe2d1da1SSatoshi Sahara        $data['has_titles'] = true; // for plugin backward compatibility check
122*46b83514SSatoshi Sahara        $action = static::class.'::pageLookupCallBack';
123fe2d1da1SSatoshi Sahara        return Event::createAndTrigger('SEARCH_QUERY_PAGELOOKUP', $data, $action);
124fe2d1da1SSatoshi Sahara    }
125fe2d1da1SSatoshi Sahara
126fe2d1da1SSatoshi Sahara    /**
127fe2d1da1SSatoshi Sahara     * Returns list of pages as array(pageid => First Heading)
128fe2d1da1SSatoshi Sahara     *
129fe2d1da1SSatoshi Sahara     * @param array $data  event data
130fe2d1da1SSatoshi Sahara     * @return string[]
131fe2d1da1SSatoshi Sahara     */
132*46b83514SSatoshi Sahara    public static function pageLookupCallBack(&$data)
133fe2d1da1SSatoshi Sahara    {
13486fc7283SSatoshi Sahara        $Indexer = PageIndex::getInstance();
135fe2d1da1SSatoshi Sahara
136fe2d1da1SSatoshi Sahara        // split out original parameters
137fe2d1da1SSatoshi Sahara        $id = $data['id'];
138fe2d1da1SSatoshi Sahara        $parsedQuery = QueryParser::convert($id);
139fe2d1da1SSatoshi Sahara
140fe2d1da1SSatoshi Sahara        if (count($parsedQuery['ns']) > 0) {
141fe2d1da1SSatoshi Sahara            $ns = cleanID($parsedQuery['ns'][0]) . ':';
142fe2d1da1SSatoshi Sahara            $id = implode(' ', $parsedQuery['highlight']);
143fe2d1da1SSatoshi Sahara        }
144fe2d1da1SSatoshi Sahara
145fe2d1da1SSatoshi Sahara        $in_ns    = $data['in_ns'];
146fe2d1da1SSatoshi Sahara        $in_title = $data['in_title'];
147fe2d1da1SSatoshi Sahara        $cleaned = cleanID($id);
148fe2d1da1SSatoshi Sahara
149fe2d1da1SSatoshi Sahara        $pages = array();
150fe2d1da1SSatoshi Sahara        if ($id !== '' && $cleaned !== '') {
151fe2d1da1SSatoshi Sahara            $page_idx = $Indexer->getPages();
152fe2d1da1SSatoshi Sahara            foreach ($page_idx as $p_id) {
153fe2d1da1SSatoshi Sahara                if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) !== false)) {
154fe2d1da1SSatoshi Sahara                    if (!isset($pages[$p_id])) {
155fe2d1da1SSatoshi Sahara                        $pages[$p_id] = p_get_first_heading($p_id, METADATA_DONT_RENDER);
156fe2d1da1SSatoshi Sahara                    }
157fe2d1da1SSatoshi Sahara                }
158fe2d1da1SSatoshi Sahara            }
159fe2d1da1SSatoshi Sahara            if ($in_title) {
160fe2d1da1SSatoshi Sahara                $func = static::class.'::pageLookupTitleCompare';
161185796b3SSatoshi Sahara                foreach ($Indexer->MetadataIndex->lookupKey('title', $id, $func) as $p_id) {
162fe2d1da1SSatoshi Sahara                    if (!isset($pages[$p_id])) {
163fe2d1da1SSatoshi Sahara                        $pages[$p_id] = p_get_first_heading($p_id, METADATA_DONT_RENDER);
164fe2d1da1SSatoshi Sahara                    }
165fe2d1da1SSatoshi Sahara                }
166fe2d1da1SSatoshi Sahara            }
167fe2d1da1SSatoshi Sahara        }
168fe2d1da1SSatoshi Sahara
169fe2d1da1SSatoshi Sahara        if (isset($ns)) {
170fe2d1da1SSatoshi Sahara            foreach (array_keys($pages) as $p_id) {
171fe2d1da1SSatoshi Sahara                if (strpos($p_id, $ns) !== 0) {
172fe2d1da1SSatoshi Sahara                    unset($pages[$p_id]);
173fe2d1da1SSatoshi Sahara                }
174fe2d1da1SSatoshi Sahara            }
175fe2d1da1SSatoshi Sahara        }
176fe2d1da1SSatoshi Sahara
177fe2d1da1SSatoshi Sahara        // discard hidden pages
178fe2d1da1SSatoshi Sahara        // discard nonexistent pages
179fe2d1da1SSatoshi Sahara        // check ACL permissions
180fe2d1da1SSatoshi Sahara        foreach (array_keys($pages) as $idx) {
181fe2d1da1SSatoshi Sahara            if (!isVisiblePage($idx) || !page_exists($idx) || auth_quickaclcheck($idx) < AUTH_READ) {
182fe2d1da1SSatoshi Sahara                unset($pages[$idx]);
183fe2d1da1SSatoshi Sahara            }
184fe2d1da1SSatoshi Sahara        }
185fe2d1da1SSatoshi Sahara
186fe2d1da1SSatoshi Sahara        $pages = static::filterResultsByTime($pages, $data['after'], $data['before']);
187fe2d1da1SSatoshi Sahara
188fe2d1da1SSatoshi Sahara        uksort($pages, static::class.'::pagesorter');
189fe2d1da1SSatoshi Sahara        return $pages;
190fe2d1da1SSatoshi Sahara    }
191fe2d1da1SSatoshi Sahara
192fe2d1da1SSatoshi Sahara    /**
193fe2d1da1SSatoshi Sahara     * Tiny helper function for comparing the searched title with the title
194fe2d1da1SSatoshi Sahara     * from the search index. This function is a wrapper around stripos with
195fe2d1da1SSatoshi Sahara     * adapted argument order and return value.
196fe2d1da1SSatoshi Sahara     *
197fe2d1da1SSatoshi Sahara     * @param string $search searched title
198fe2d1da1SSatoshi Sahara     * @param string $title  title from index
199fe2d1da1SSatoshi Sahara     * @return bool
200fe2d1da1SSatoshi Sahara     */
201fe2d1da1SSatoshi Sahara    protected static function pageLookupTitleCompare($search, $title)
202fe2d1da1SSatoshi Sahara    {
203fe2d1da1SSatoshi Sahara        return stripos($title, $search) !== false;
204fe2d1da1SSatoshi Sahara    }
205fe2d1da1SSatoshi Sahara
206fe2d1da1SSatoshi Sahara    /**
207fe2d1da1SSatoshi Sahara     * Sort pages based on their namespace level first, then on their string
208fe2d1da1SSatoshi Sahara     * values. This makes higher hierarchy pages rank higher than lower hierarchy
209fe2d1da1SSatoshi Sahara     * pages.
210fe2d1da1SSatoshi Sahara     *
211fe2d1da1SSatoshi Sahara     * @param string $a
212fe2d1da1SSatoshi Sahara     * @param string $b
213fe2d1da1SSatoshi Sahara     * @return int Returns < 0 if $a is less than $b; > 0 if $a is greater than $b,
214fe2d1da1SSatoshi Sahara     *             and 0 if they are equal.
215fe2d1da1SSatoshi Sahara     */
216fe2d1da1SSatoshi Sahara    protected static function pagesorter($a, $b)
217fe2d1da1SSatoshi Sahara    {
218fe2d1da1SSatoshi Sahara        $ac = count(explode(':',$a));
219fe2d1da1SSatoshi Sahara        $bc = count(explode(':',$b));
220fe2d1da1SSatoshi Sahara        if ($ac < $bc) {
221fe2d1da1SSatoshi Sahara            return -1;
222fe2d1da1SSatoshi Sahara        } elseif ($ac > $bc) {
223fe2d1da1SSatoshi Sahara            return 1;
224fe2d1da1SSatoshi Sahara        }
225fe2d1da1SSatoshi Sahara        return strcmp ($a,$b);
226fe2d1da1SSatoshi Sahara    }
227fe2d1da1SSatoshi Sahara
228fe2d1da1SSatoshi Sahara    /**
229fe2d1da1SSatoshi Sahara     * @param array      $results search results in the form pageid => value
230fe2d1da1SSatoshi Sahara     * @param int|string $after   only returns results with mtime after this date,
231fe2d1da1SSatoshi Sahara     *                            accepts timestap or strtotime arguments
232fe2d1da1SSatoshi Sahara     * @param int|string $before  only returns results with mtime after this date,
233fe2d1da1SSatoshi Sahara     *                            accepts timestap or strtotime arguments
234fe2d1da1SSatoshi Sahara     *
235fe2d1da1SSatoshi Sahara     * @return array
236fe2d1da1SSatoshi Sahara     */
237fe2d1da1SSatoshi Sahara    protected static function filterResultsByTime(array $results, $after, $before)
238fe2d1da1SSatoshi Sahara    {
239fe2d1da1SSatoshi Sahara        if ($after || $before) {
240fe2d1da1SSatoshi Sahara            $after = is_int($after) ? $after : strtotime($after);
241fe2d1da1SSatoshi Sahara            $before = is_int($before) ? $before : strtotime($before);
242fe2d1da1SSatoshi Sahara
243fe2d1da1SSatoshi Sahara            foreach ($results as $id => $value) {
244fe2d1da1SSatoshi Sahara                $mTime = filemtime(wikiFN($id));
245fe2d1da1SSatoshi Sahara                if ($after && $after > $mTime) {
246fe2d1da1SSatoshi Sahara                    unset($results[$id]);
247fe2d1da1SSatoshi Sahara                    continue;
248fe2d1da1SSatoshi Sahara                }
249fe2d1da1SSatoshi Sahara                if ($before && $before < $mTime) {
250fe2d1da1SSatoshi Sahara                    unset($results[$id]);
251fe2d1da1SSatoshi Sahara                }
252fe2d1da1SSatoshi Sahara            }
253fe2d1da1SSatoshi Sahara        }
254fe2d1da1SSatoshi Sahara        return $results;
255fe2d1da1SSatoshi Sahara    }
256fe2d1da1SSatoshi Sahara}
257