xref: /dokuwiki/inc/Search/MetadataSearch.php (revision fe2d1da1fb3adc9a3f56c51d05c81c04a7b51491)
1*fe2d1da1SSatoshi Sahara<?php
2*fe2d1da1SSatoshi Saharanamespace dokuwiki\Search;
3*fe2d1da1SSatoshi Sahara
4*fe2d1da1SSatoshi Saharause dokuwiki\Extension\Event;
5*fe2d1da1SSatoshi Saharause dokuwiki\Search\Indexer;
6*fe2d1da1SSatoshi Saharause dokuwiki\Search\QueryParser;
7*fe2d1da1SSatoshi Sahara
8*fe2d1da1SSatoshi Sahara
9*fe2d1da1SSatoshi Sahara/**
10*fe2d1da1SSatoshi Sahara * Class DokuWiki Metadata Search
11*fe2d1da1SSatoshi Sahara *
12*fe2d1da1SSatoshi Sahara * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
13*fe2d1da1SSatoshi Sahara * @author     Andreas Gohr <andi@splitbrain.org>
14*fe2d1da1SSatoshi Sahara */
15*fe2d1da1SSatoshi Saharaclass MetadataSearch
16*fe2d1da1SSatoshi Sahara{
17*fe2d1da1SSatoshi Sahara    /**
18*fe2d1da1SSatoshi Sahara     *  Metadata Search constructor. prevent direct object creation
19*fe2d1da1SSatoshi Sahara     */
20*fe2d1da1SSatoshi Sahara    protected function __construct() {}
21*fe2d1da1SSatoshi Sahara
22*fe2d1da1SSatoshi Sahara    /**
23*fe2d1da1SSatoshi Sahara     * Returns the backlinks for a given page
24*fe2d1da1SSatoshi Sahara     *
25*fe2d1da1SSatoshi Sahara     * Uses the metadata index.
26*fe2d1da1SSatoshi Sahara     *
27*fe2d1da1SSatoshi Sahara     * @param string $id           The id for which links shall be returned
28*fe2d1da1SSatoshi Sahara     * @param bool   $ignore_perms Ignore the fact that pages are hidden or read-protected
29*fe2d1da1SSatoshi Sahara     * @return array The pages that contain links to the given page
30*fe2d1da1SSatoshi Sahara     */
31*fe2d1da1SSatoshi Sahara    public static function backlinks($id, $ignore_perms = false)
32*fe2d1da1SSatoshi Sahara    {
33*fe2d1da1SSatoshi Sahara        $Indexer = Indexer::getInstance();
34*fe2d1da1SSatoshi Sahara        $result = $Indexer->lookupKey('relation_references', $id);
35*fe2d1da1SSatoshi Sahara
36*fe2d1da1SSatoshi Sahara        if (!count($result)) return $result;
37*fe2d1da1SSatoshi Sahara
38*fe2d1da1SSatoshi Sahara        // check ACL permissions
39*fe2d1da1SSatoshi Sahara        foreach (array_keys($result) as $idx) {
40*fe2d1da1SSatoshi Sahara            if (($ignore_perms !== true
41*fe2d1da1SSatoshi Sahara                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
42*fe2d1da1SSatoshi Sahara                ) || !page_exists($result[$idx], '', false)
43*fe2d1da1SSatoshi Sahara            ) {
44*fe2d1da1SSatoshi Sahara                unset($result[$idx]);
45*fe2d1da1SSatoshi Sahara            }
46*fe2d1da1SSatoshi Sahara        }
47*fe2d1da1SSatoshi Sahara
48*fe2d1da1SSatoshi Sahara        sort($result);
49*fe2d1da1SSatoshi Sahara        return $result;
50*fe2d1da1SSatoshi Sahara    }
51*fe2d1da1SSatoshi Sahara
52*fe2d1da1SSatoshi Sahara    /**
53*fe2d1da1SSatoshi Sahara     * Returns the pages that use a given media file
54*fe2d1da1SSatoshi Sahara     *
55*fe2d1da1SSatoshi Sahara     * Uses the relation media metadata property and the metadata index.
56*fe2d1da1SSatoshi Sahara     *
57*fe2d1da1SSatoshi Sahara     * Note that before 2013-07-31 the second parameter was the maximum number
58*fe2d1da1SSatoshi Sahara     * of results and permissions were ignored. That's why the parameter is now
59*fe2d1da1SSatoshi Sahara     * checked to be explicitely set to true (with type bool) in order to be
60*fe2d1da1SSatoshi Sahara     * compatible with older uses of the function.
61*fe2d1da1SSatoshi Sahara     *
62*fe2d1da1SSatoshi Sahara     * @param string $id           The media id to look for
63*fe2d1da1SSatoshi Sahara     * @param bool   $ignore_perms Ignore hidden pages and acls (optional, default: false)
64*fe2d1da1SSatoshi Sahara     * @return array A list of pages that use the given media file
65*fe2d1da1SSatoshi Sahara     */
66*fe2d1da1SSatoshi Sahara    public static function mediause($id, $ignore_perms = false)
67*fe2d1da1SSatoshi Sahara    {
68*fe2d1da1SSatoshi Sahara        $Indexer = Indexer::getInstance();
69*fe2d1da1SSatoshi Sahara        $result = $Indexer->lookupKey('relation_media', $id);
70*fe2d1da1SSatoshi Sahara
71*fe2d1da1SSatoshi Sahara        if (!count($result)) return $result;
72*fe2d1da1SSatoshi Sahara
73*fe2d1da1SSatoshi Sahara        // check ACL permissions
74*fe2d1da1SSatoshi Sahara        foreach (array_keys($result) as $idx) {
75*fe2d1da1SSatoshi Sahara            if (($ignore_perms !== true
76*fe2d1da1SSatoshi Sahara                && (isHiddenPage($result[$idx]) || auth_quickaclcheck($result[$idx]) < AUTH_READ)
77*fe2d1da1SSatoshi Sahara                ) || !page_exists($result[$idx], '', false)
78*fe2d1da1SSatoshi Sahara            ) {
79*fe2d1da1SSatoshi Sahara                unset($result[$idx]);
80*fe2d1da1SSatoshi Sahara            }
81*fe2d1da1SSatoshi Sahara        }
82*fe2d1da1SSatoshi Sahara
83*fe2d1da1SSatoshi Sahara        sort($result);
84*fe2d1da1SSatoshi Sahara        return $result;
85*fe2d1da1SSatoshi Sahara    }
86*fe2d1da1SSatoshi Sahara
87*fe2d1da1SSatoshi Sahara
88*fe2d1da1SSatoshi Sahara    /**
89*fe2d1da1SSatoshi Sahara     * Quicksearch for pagenames
90*fe2d1da1SSatoshi Sahara     *
91*fe2d1da1SSatoshi Sahara     * By default it only matches the pagename and ignores the namespace.
92*fe2d1da1SSatoshi Sahara     * This can be changed with the second parameter.
93*fe2d1da1SSatoshi Sahara     * The third parameter allows to search in titles as well.
94*fe2d1da1SSatoshi Sahara     *
95*fe2d1da1SSatoshi Sahara     * The function always returns titles as well
96*fe2d1da1SSatoshi Sahara     *
97*fe2d1da1SSatoshi Sahara     * @triggers SEARCH_QUERY_PAGELOOKUP
98*fe2d1da1SSatoshi Sahara     * @author   Andreas Gohr <andi@splitbrain.org>
99*fe2d1da1SSatoshi Sahara     * @author   Adrian Lang <lang@cosmocode.de>
100*fe2d1da1SSatoshi Sahara     *
101*fe2d1da1SSatoshi Sahara     * @param string     $id       page id
102*fe2d1da1SSatoshi Sahara     * @param bool       $in_ns    match against namespace as well?
103*fe2d1da1SSatoshi Sahara     * @param bool       $in_title search in title?
104*fe2d1da1SSatoshi Sahara     * @param int|string $after    only show results with mtime after this date,
105*fe2d1da1SSatoshi Sahara     *                             accepts timestap or strtotime arguments
106*fe2d1da1SSatoshi Sahara     * @param int|string $before   only show results with mtime before this date,
107*fe2d1da1SSatoshi Sahara     *                             accepts timestap or strtotime arguments
108*fe2d1da1SSatoshi Sahara     *
109*fe2d1da1SSatoshi Sahara     * @return string[]
110*fe2d1da1SSatoshi Sahara     */
111*fe2d1da1SSatoshi Sahara    public static function pageLookup($id, $in_ns = false, $in_title = false, $after = null, $before = null)
112*fe2d1da1SSatoshi Sahara    {
113*fe2d1da1SSatoshi Sahara        $data = [
114*fe2d1da1SSatoshi Sahara            'id' => $id,
115*fe2d1da1SSatoshi Sahara            'in_ns' => $in_ns,
116*fe2d1da1SSatoshi Sahara            'in_title' => $in_title,
117*fe2d1da1SSatoshi Sahara            'after' => $after,
118*fe2d1da1SSatoshi Sahara            'before' => $before
119*fe2d1da1SSatoshi Sahara        ];
120*fe2d1da1SSatoshi Sahara        $data['has_titles'] = true; // for plugin backward compatibility check
121*fe2d1da1SSatoshi Sahara        $action = static::class.'::callback_pageLookup';
122*fe2d1da1SSatoshi Sahara        return Event::createAndTrigger('SEARCH_QUERY_PAGELOOKUP', $data, $action);
123*fe2d1da1SSatoshi Sahara    }
124*fe2d1da1SSatoshi Sahara
125*fe2d1da1SSatoshi Sahara    /**
126*fe2d1da1SSatoshi Sahara     * Returns list of pages as array(pageid => First Heading)
127*fe2d1da1SSatoshi Sahara     *
128*fe2d1da1SSatoshi Sahara     * @param array $data  event data
129*fe2d1da1SSatoshi Sahara     * @return string[]
130*fe2d1da1SSatoshi Sahara     */
131*fe2d1da1SSatoshi Sahara    public static function callback_pageLookup($data)
132*fe2d1da1SSatoshi Sahara    {
133*fe2d1da1SSatoshi Sahara        $Indexer = Indexer::getInstance();
134*fe2d1da1SSatoshi Sahara
135*fe2d1da1SSatoshi Sahara        // split out original parameters
136*fe2d1da1SSatoshi Sahara        $id = $data['id'];
137*fe2d1da1SSatoshi Sahara        $parsedQuery = QueryParser::convert($id);
138*fe2d1da1SSatoshi Sahara
139*fe2d1da1SSatoshi Sahara        if (count($parsedQuery['ns']) > 0) {
140*fe2d1da1SSatoshi Sahara            $ns = cleanID($parsedQuery['ns'][0]) . ':';
141*fe2d1da1SSatoshi Sahara            $id = implode(' ', $parsedQuery['highlight']);
142*fe2d1da1SSatoshi Sahara        }
143*fe2d1da1SSatoshi Sahara
144*fe2d1da1SSatoshi Sahara        $in_ns    = $data['in_ns'];
145*fe2d1da1SSatoshi Sahara        $in_title = $data['in_title'];
146*fe2d1da1SSatoshi Sahara        $cleaned = cleanID($id);
147*fe2d1da1SSatoshi Sahara
148*fe2d1da1SSatoshi Sahara        $pages = array();
149*fe2d1da1SSatoshi Sahara        if ($id !== '' && $cleaned !== '') {
150*fe2d1da1SSatoshi Sahara            $page_idx = $Indexer->getPages();
151*fe2d1da1SSatoshi Sahara            foreach ($page_idx as $p_id) {
152*fe2d1da1SSatoshi Sahara                if ((strpos($in_ns ? $p_id : noNSorNS($p_id), $cleaned) !== false)) {
153*fe2d1da1SSatoshi Sahara                    if (!isset($pages[$p_id])) {
154*fe2d1da1SSatoshi Sahara                        $pages[$p_id] = p_get_first_heading($p_id, METADATA_DONT_RENDER);
155*fe2d1da1SSatoshi Sahara                    }
156*fe2d1da1SSatoshi Sahara                }
157*fe2d1da1SSatoshi Sahara            }
158*fe2d1da1SSatoshi Sahara            if ($in_title) {
159*fe2d1da1SSatoshi Sahara                $func = static::class.'::pageLookupTitleCompare';
160*fe2d1da1SSatoshi Sahara                foreach ($Indexer->lookupKey('title', $id, $func) as $p_id) {
161*fe2d1da1SSatoshi Sahara                    if (!isset($pages[$p_id])) {
162*fe2d1da1SSatoshi Sahara                        $pages[$p_id] = p_get_first_heading($p_id, METADATA_DONT_RENDER);
163*fe2d1da1SSatoshi Sahara                    }
164*fe2d1da1SSatoshi Sahara                }
165*fe2d1da1SSatoshi Sahara            }
166*fe2d1da1SSatoshi Sahara        }
167*fe2d1da1SSatoshi Sahara
168*fe2d1da1SSatoshi Sahara        if (isset($ns)) {
169*fe2d1da1SSatoshi Sahara            foreach (array_keys($pages) as $p_id) {
170*fe2d1da1SSatoshi Sahara                if (strpos($p_id, $ns) !== 0) {
171*fe2d1da1SSatoshi Sahara                    unset($pages[$p_id]);
172*fe2d1da1SSatoshi Sahara                }
173*fe2d1da1SSatoshi Sahara            }
174*fe2d1da1SSatoshi Sahara        }
175*fe2d1da1SSatoshi Sahara
176*fe2d1da1SSatoshi Sahara        // discard hidden pages
177*fe2d1da1SSatoshi Sahara        // discard nonexistent pages
178*fe2d1da1SSatoshi Sahara        // check ACL permissions
179*fe2d1da1SSatoshi Sahara        foreach (array_keys($pages) as $idx) {
180*fe2d1da1SSatoshi Sahara            if (!isVisiblePage($idx) || !page_exists($idx) || auth_quickaclcheck($idx) < AUTH_READ) {
181*fe2d1da1SSatoshi Sahara                unset($pages[$idx]);
182*fe2d1da1SSatoshi Sahara            }
183*fe2d1da1SSatoshi Sahara        }
184*fe2d1da1SSatoshi Sahara
185*fe2d1da1SSatoshi Sahara        $pages = static::filterResultsByTime($pages, $data['after'], $data['before']);
186*fe2d1da1SSatoshi Sahara
187*fe2d1da1SSatoshi Sahara        uksort($pages, static::class.'::pagesorter');
188*fe2d1da1SSatoshi Sahara        return $pages;
189*fe2d1da1SSatoshi Sahara    }
190*fe2d1da1SSatoshi Sahara
191*fe2d1da1SSatoshi Sahara    /**
192*fe2d1da1SSatoshi Sahara     * Tiny helper function for comparing the searched title with the title
193*fe2d1da1SSatoshi Sahara     * from the search index. This function is a wrapper around stripos with
194*fe2d1da1SSatoshi Sahara     * adapted argument order and return value.
195*fe2d1da1SSatoshi Sahara     *
196*fe2d1da1SSatoshi Sahara     * @param string $search searched title
197*fe2d1da1SSatoshi Sahara     * @param string $title  title from index
198*fe2d1da1SSatoshi Sahara     * @return bool
199*fe2d1da1SSatoshi Sahara     */
200*fe2d1da1SSatoshi Sahara    protected static function pageLookupTitleCompare($search, $title)
201*fe2d1da1SSatoshi Sahara    {
202*fe2d1da1SSatoshi Sahara        return stripos($title, $search) !== false;
203*fe2d1da1SSatoshi Sahara    }
204*fe2d1da1SSatoshi Sahara
205*fe2d1da1SSatoshi Sahara    /**
206*fe2d1da1SSatoshi Sahara     * Sort pages based on their namespace level first, then on their string
207*fe2d1da1SSatoshi Sahara     * values. This makes higher hierarchy pages rank higher than lower hierarchy
208*fe2d1da1SSatoshi Sahara     * pages.
209*fe2d1da1SSatoshi Sahara     *
210*fe2d1da1SSatoshi Sahara     * @param string $a
211*fe2d1da1SSatoshi Sahara     * @param string $b
212*fe2d1da1SSatoshi Sahara     * @return int Returns < 0 if $a is less than $b; > 0 if $a is greater than $b,
213*fe2d1da1SSatoshi Sahara     *             and 0 if they are equal.
214*fe2d1da1SSatoshi Sahara     */
215*fe2d1da1SSatoshi Sahara    protected static function pagesorter($a, $b)
216*fe2d1da1SSatoshi Sahara    {
217*fe2d1da1SSatoshi Sahara        $ac = count(explode(':',$a));
218*fe2d1da1SSatoshi Sahara        $bc = count(explode(':',$b));
219*fe2d1da1SSatoshi Sahara        if ($ac < $bc) {
220*fe2d1da1SSatoshi Sahara            return -1;
221*fe2d1da1SSatoshi Sahara        } elseif ($ac > $bc) {
222*fe2d1da1SSatoshi Sahara            return 1;
223*fe2d1da1SSatoshi Sahara        }
224*fe2d1da1SSatoshi Sahara        return strcmp ($a,$b);
225*fe2d1da1SSatoshi Sahara    }
226*fe2d1da1SSatoshi Sahara
227*fe2d1da1SSatoshi Sahara    /**
228*fe2d1da1SSatoshi Sahara     * @param array      $results search results in the form pageid => value
229*fe2d1da1SSatoshi Sahara     * @param int|string $after   only returns results with mtime after this date,
230*fe2d1da1SSatoshi Sahara     *                            accepts timestap or strtotime arguments
231*fe2d1da1SSatoshi Sahara     * @param int|string $before  only returns results with mtime after this date,
232*fe2d1da1SSatoshi Sahara     *                            accepts timestap or strtotime arguments
233*fe2d1da1SSatoshi Sahara     *
234*fe2d1da1SSatoshi Sahara     * @return array
235*fe2d1da1SSatoshi Sahara     */
236*fe2d1da1SSatoshi Sahara    protected static function filterResultsByTime(array $results, $after, $before)
237*fe2d1da1SSatoshi Sahara    {
238*fe2d1da1SSatoshi Sahara        if ($after || $before) {
239*fe2d1da1SSatoshi Sahara            $after = is_int($after) ? $after : strtotime($after);
240*fe2d1da1SSatoshi Sahara            $before = is_int($before) ? $before : strtotime($before);
241*fe2d1da1SSatoshi Sahara
242*fe2d1da1SSatoshi Sahara            foreach ($results as $id => $value) {
243*fe2d1da1SSatoshi Sahara                $mTime = filemtime(wikiFN($id));
244*fe2d1da1SSatoshi Sahara                if ($after && $after > $mTime) {
245*fe2d1da1SSatoshi Sahara                    unset($results[$id]);
246*fe2d1da1SSatoshi Sahara                    continue;
247*fe2d1da1SSatoshi Sahara                }
248*fe2d1da1SSatoshi Sahara                if ($before && $before < $mTime) {
249*fe2d1da1SSatoshi Sahara                    unset($results[$id]);
250*fe2d1da1SSatoshi Sahara                }
251*fe2d1da1SSatoshi Sahara            }
252*fe2d1da1SSatoshi Sahara        }
253*fe2d1da1SSatoshi Sahara        return $results;
254*fe2d1da1SSatoshi Sahara    }
255*fe2d1da1SSatoshi Sahara}
256