1<?php
2
3/**
4 * DokuWiki Plugin elasticsearch (Action Component)
5 *
6 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
7 * @author  Andreas Gohr <gohr@cosmocode.de>
8 */
9
10use dokuwiki\Extension\ActionPlugin;
11use dokuwiki\Extension\Event;
12use dokuwiki\Extension\EventHandler;
13use dokuwiki\Form\Form;
14use Elastica\Aggregation\Terms;
15use Elastica\Query;
16use Elastica\Query\BoolQuery;
17use Elastica\Query\MatchQuery;
18use Elastica\Query\Range;
19use Elastica\Query\SimpleQueryString;
20use Elastica\Query\Term;
21use Elastica\ResultSet;
22
23/**
24 * Main search helper
25 */
26class action_plugin_elasticsearch_search extends ActionPlugin
27{
28    /**
29     * Example array element for search field 'tagging':
30     * 'tagging' => [                       // also used as search query parameter
31     *   'label' => 'Tag',
32     *   'fieldPath' => 'tagging',          // dot notation in more complex mappings
33     *   'limit' => '50',
34     * ]
35     *
36     * @var Array
37     */
38    protected static $pluginSearchConfigs;
39
40    /**
41     * Search will be performed on those fields only.
42     *
43     * @var string[]
44     */
45    protected $searchFields = [
46        'title*',
47        'abstract*',
48        'content*',
49        'uri',
50    ];
51
52    /**
53     * Registers a callback function for a given event
54     *
55     * @param EventHandler $controller DokuWiki's event controller object
56     * @return void
57     */
58    public function register(EventHandler $controller)
59    {
60
61        $controller->register_hook('ACTION_ACT_PREPROCESS', 'BEFORE', $this, 'handleActPreprocess');
62        $controller->register_hook('TPL_ACT_UNKNOWN', 'BEFORE', $this, 'handleActUnknown');
63        $controller->register_hook('FORM_QUICKSEARCH_OUTPUT', 'BEFORE', $this, 'handleQuicksearchOutput');
64    }
65
66    /**
67     * allow our custom do command
68     *
69     * @param Event $event
70     * @param $param
71     */
72    public function handleActPreprocess(Event $event, $param)
73    {
74        if ($event->data !== 'search') return;
75        $event->preventDefault();
76        $event->stopPropagation();
77    }
78
79    /**
80     * do the actual search
81     *
82     * @param Event $event
83     * @param $param
84     */
85    public function handleActUnknown(Event $event, $param)
86    {
87        if ($event->data !== 'search') return;
88        $event->preventDefault();
89        $event->stopPropagation();
90        global $QUERY;
91        global $INPUT;
92        global $ID;
93
94        if (empty($QUERY)) $QUERY = $INPUT->str('q');
95        if (empty($QUERY)) $QUERY = $ID;
96
97        // get extended search configurations from plugins
98        Event::createAndTrigger('PLUGIN_ELASTICSEARCH_FILTERS', self::$pluginSearchConfigs);
99
100        /** @var helper_plugin_elasticsearch_client $hlp */
101        $hlp = plugin_load('helper', 'elasticsearch_client');
102
103        $client = $hlp->connect();
104        $index = $client->getIndex($this->getConf('indexname'));
105
106        // store copy of the original query string
107        $q = $QUERY;
108        // let plugins manipulate the query
109        $additions = [];
110        Event::createAndTrigger('PLUGIN_ELASTICSEARCH_QUERY', $additions);
111        // if query is empty, return all results
112        if (empty(trim($QUERY))) $QUERY = '*';
113
114        // get fields to use in query
115        $fields = [];
116        Event::createAndTrigger('PLUGIN_ELASTICSEARCH_SEARCHFIELDS', $fields);
117
118        if ($this->getConf('searchSyntax')) {
119            $this->searchFields[] = 'syntax*';
120        }
121
122        // finally define the elastic query
123        $qstring = new SimpleQueryString($QUERY, array_merge($this->searchFields, $fields));
124        // restore the original query
125        $QUERY = $q;
126        // append additions provided by plugins
127        if (!empty($additions)) {
128            $QUERY .= ' ' . implode(' ', $additions);
129        }
130
131        // create the actual search object
132        $equery = new Query();
133        $subqueries = new BoolQuery();
134        $subqueries->addMust($qstring);
135
136        $equery->setHighlight(
137            [
138                "pre_tags" => ['ELASTICSEARCH_MARKER_IN'],
139                "post_tags" => ['ELASTICSEARCH_MARKER_OUT'],
140                "fields" => [
141                    $this->getConf('snippets') => new \stdClass(),
142                    'title' => new \stdClass()]
143            ]
144        );
145
146        // paginate
147        $equery->setSize($this->getConf('perpage'));
148        $equery->setFrom($this->getConf('perpage') * ($INPUT->int('p', 1, true) - 1));
149
150        // add ACL subqueries
151        $this->addACLSubqueries($subqueries);
152
153        // add language subquery
154        $this->addLanguageSubquery($subqueries, $this->getLanguageFilter());
155
156        // add date subquery
157        if ($INPUT->has('min')) {
158            $this->addDateSubquery($subqueries, $INPUT->str('min'));
159        }
160
161        // add namespace filter
162        if ($INPUT->has('ns')) {
163            $nsSubquery = new BoolQuery();
164            foreach ($INPUT->arr('ns') as $ns) {
165                $term = new Term();
166                $term->setTerm('namespace', $ns);
167                $nsSubquery->addShould($term);
168            }
169            $equery->setPostFilter($nsSubquery);
170        }
171
172
173        // add aggregations for namespaces
174        $agg = new Terms('namespace');
175        $agg->setField('namespace.keyword');
176        $agg->setSize(25);
177
178        $equery->addAggregation($agg);
179
180        // add search configurations from other plugins
181        $this->addPluginConfigurations($equery, $subqueries);
182
183        $equery->setQuery($subqueries);
184
185        try {
186            $result = $index->search($equery);
187            $aggs = $result->getAggregations();
188
189            $this->printIntro();
190            /** @var helper_plugin_elasticsearch_form $hlpform */
191            $hlpform = plugin_load('helper', 'elasticsearch_form');
192            $hlpform->tpl($aggs);
193            if ($this->printResults($result)) {
194                $this->printPagination($result);
195            }
196        } catch (Exception $e) {
197            msg('Something went wrong on searching please try again later or ask an admin for help.<br /><pre>' .
198                hsc($e->getMessage()) . '</pre>', -1);
199        }
200    }
201
202    /**
203     * Optionally disable "quick search"
204     *
205     * @param Event $event
206     */
207    public function handleQuicksearchOutput(Event $event)
208    {
209        if (!$this->getConf('disableQuicksearch')) return;
210
211        /** @var Form $form */
212        $form = $event->data;
213        $pos = $form->findPositionByAttribute('id', 'qsearch__out');
214        $form->removeElement($pos);
215        $form->removeElement($pos + 1); // div closing tag
216    }
217
218    /**
219     * @return array
220     */
221    public static function getRawPluginSearchConfigs()
222    {
223        return self::$pluginSearchConfigs;
224    }
225
226    /**
227     * Add search configurations supplied by other plugins
228     *
229     * @param Query $equery
230     * @param \Elastica\Query\BoolQuery
231     */
232    protected function addPluginConfigurations($equery, $subqueries)
233    {
234        global $INPUT;
235
236        if (!empty(self::$pluginSearchConfigs)) {
237            foreach (self::$pluginSearchConfigs as $param => $config) {
238                // handle search parameter
239                if ($INPUT->has($param)) {
240                    $pluginSubquery = new BoolQuery();
241                    foreach ($INPUT->arr($param) as $item) {
242                        $eterm = new Term();
243                        $eterm->setTerm($param, $item);
244                        $pluginSubquery->addShould($eterm);
245                    }
246                    $subqueries->addMust($pluginSubquery);
247                }
248
249                // build aggregation for use as filter in advanced search
250                $agg = new Terms($param);
251                $agg->setField($config['fieldPath']);
252                if (isset($config['limit'])) {
253                    $agg->setSize($config['limit']);
254                }
255                $equery->addAggregation($agg);
256            }
257        }
258    }
259
260    /**
261     * Adds date subquery
262     *
263     * @param BoolQuery $subqueries
264     * @param string $min Modified at the latest one {year|month|week} ago
265     */
266    protected function addDateSubquery($subqueries, $min)
267    {
268        if (!in_array($min, ['year', 'month', 'week'])) return;
269
270        $dateSubquery = new Range(
271            'modified',
272            ['gte' => date('Y-m-d', strtotime('1 ' . $min . ' ago'))]
273        );
274        $subqueries->addMust($dateSubquery);
275    }
276
277    /**
278     * Adds language subquery
279     *
280     * @param BoolQuery $subqueries
281     * @param array $langFilter
282     */
283    protected function addLanguageSubquery($subqueries, $langFilter)
284    {
285        if (empty($langFilter)) return;
286
287        $langSubquery = new MatchQuery();
288        $langSubquery->setField('language', implode(',', $langFilter));
289
290        $subqueries->addMust($langSubquery);
291    }
292
293    /**
294     * Languages to be used in the current search, determined by:
295     * 1. $INPUT variables, or 2. translation plugin
296     *
297     * @return array
298     */
299    protected function getLanguageFilter()
300    {
301        global $ID;
302        global $INPUT;
303
304        $ns = getNS($ID);
305        $langFilter = $INPUT->arr('lang');
306
307        /** @var helper_plugin_translation $transplugin */
308        $transplugin = plugin_load('helper', 'translation');
309
310        // optional translation detection: use current top namespace if it matches translation config
311        if (empty($langFilter) && $transplugin && $this->getConf('detectTranslation') && $ns) {
312            $topNs = strtok($ns, ':');
313            if (in_array($topNs, $transplugin->translations)) {
314                $langFilter = [$topNs];
315                $INPUT->set('lang', $langFilter);
316            }
317        } elseif (empty($langFilter) && $transplugin) {
318            // select all available translations
319            $INPUT->set('lang', $transplugin->translations);
320        }
321
322        return $langFilter;
323    }
324
325    /**
326     * Inserts subqueries based on current user's ACLs, none for superusers
327     *
328     * @param BoolQuery $subqueries
329     */
330    protected function addACLSubqueries($subqueries)
331    {
332        global $USERINFO;
333        global $INFO;
334
335        $groups = array_merge(['ALL'], $USERINFO['grps'] ?: []);
336
337        // no ACL filters for superusers
338        if ($INFO['isadmin']) return;
339
340        // include if group OR user have read permissions, allows for ACLs such as "block @group except user"
341        $includeSubquery = new BoolQuery();
342        foreach ($groups as $group) {
343            $term = new Term();
344            $term->setTerm('groups_include', $group);
345            $includeSubquery->addShould($term);
346        }
347        if (isset($_SERVER['REMOTE_USER'])) {
348            $userIncludeSubquery = new BoolQuery();
349            $term = new Term();
350            $term->setTerm('users_include', $_SERVER['REMOTE_USER']);
351            $userIncludeSubquery->addMust($term);
352            $includeSubquery->addShould($userIncludeSubquery);
353        }
354        $subqueries->addMust($includeSubquery);
355
356        // groups exclusion SHOULD be respected, not MUST, since that would not allow for exceptions
357        $groupExcludeSubquery = new BoolQuery();
358        foreach ($groups as $group) {
359            $term = new Term();
360            $term->setTerm('groups_exclude', $group);
361            $groupExcludeSubquery->addShould($term);
362        }
363        $excludeSubquery = new BoolQuery();
364        $excludeSubquery->addMustNot($groupExcludeSubquery);
365
366        $subqueries->addShould($excludeSubquery);
367
368        // user specific excludes must always be respected
369        if (isset($_SERVER['REMOTE_USER'])) {
370            $term = new Term();
371            $term->setTerm('users_exclude', $_SERVER['REMOTE_USER']);
372            $subqueries->addMustNot($term);
373        }
374    }
375
376    /**
377     * Prints the introduction text
378     */
379    protected function printIntro()
380    {
381        global $QUERY;
382        global $ID;
383        global $lang;
384
385        // just reuse the standard search page intro:
386        $intro = p_locale_xhtml('searchpage');
387        // allow use of placeholder in search intro
388        $pagecreateinfo = '';
389        if (auth_quickaclcheck($ID) >= AUTH_CREATE) {
390            $pagecreateinfo = sprintf($lang['searchcreatepage'], $QUERY);
391        }
392        $intro = str_replace(
393            ['@QUERY@', '@SEARCH@', '@CREATEPAGEINFO@'],
394            [hsc(rawurlencode($QUERY)), hsc($QUERY), $pagecreateinfo],
395            $intro
396        );
397        echo $intro;
398        flush();
399    }
400
401    /**
402     * Output the search results
403     *
404     * @param ResultSet $results
405     * @return bool true when results where shown
406     */
407    protected function printResults($results)
408    {
409        global $lang;
410
411        // output results
412        $found = $results->getTotalHits();
413
414        if (!$found) {
415            echo '<h2>' . $lang['nothingfound'] . '</h2>';
416            return (bool)$found;
417        }
418
419        echo '<dl class="search_results">';
420        echo '<h2>' . sprintf($this->getLang('totalfound'), $found) . '</h2>';
421        foreach ($results as $row) {
422
423            /** @var Elastica\Result $row */
424            $doc = $row->getSource();
425            $page = $doc['uri'];
426            if (
427                (!page_exists($page) && !is_file(mediaFN($page))) ||
428                isHiddenPage($page) ||
429                auth_quickaclcheck($page) < AUTH_READ
430            ) {
431                continue;
432            }
433
434            // get highlighted title
435            $highlightsTitle = $row->getHighlights()['title'] ?? '';
436            $title = str_replace(
437                ['ELASTICSEARCH_MARKER_IN', 'ELASTICSEARCH_MARKER_OUT'],
438                ['<strong class="search_hit">', '</strong>'],
439                hsc(implode(' … ', (array)$highlightsTitle))
440            );
441            if (!$title) $title = hsc($doc['title']);
442            if (!$title) $title = hsc(p_get_first_heading($page));
443            if (!$title) $title = hsc($page);
444
445            // get highlighted snippet
446            $highlightedSnippets = $row->getHighlights()[$this->getConf('snippets')] ?? [];
447            $snippet = str_replace(
448                ['ELASTICSEARCH_MARKER_IN', 'ELASTICSEARCH_MARKER_OUT'],
449                ['<strong class="search_hit">', '</strong>'],
450                hsc(implode(' … ', $highlightedSnippets))
451            );
452            if (!$snippet) $snippet = hsc($doc['abstract']); // always fall back to abstract
453
454            // assume page if no doctype is set, because old index won't have doctypes
455            $isPage = empty($doc['doctype']) || $doc['doctype'] === \action_plugin_elasticsearch_indexing::DOCTYPE_PAGE;
456            $href = $isPage ? wl($page) : ml($page);
457
458            echo '<dt>';
459            if (!$isPage && is_file(DOKU_INC . 'lib/images/fileicons/' . $doc['ext'] . '.png')) {
460                echo sprintf(
461                    '<img src="%s" alt="%s" /> ',
462                    DOKU_BASE . 'lib/images/fileicons/' . $doc['ext'] . '.png',
463                    $doc['ext']
464                );
465            }
466            echo '<a href="' . $href . '" class="wikilink1" title="' . hsc($page) . '">';
467            echo $title;
468            echo '</a>';
469            echo '</dt>';
470
471            // meta
472            echo '<dd class="meta elastic-resultmeta">';
473            if (!empty($doc['namespace'])) {
474                echo '<span class="ns">' . $this->getLang('ns') . ' ' . hsc($doc['namespace']) . '</span>';
475            }
476            if ($doc['modified']) {
477                $lastmod = strtotime($doc['modified']);
478                echo ' <span class="">' . $lang['lastmod'] . ' ' . dformat($lastmod) . '</span>';
479            }
480            if (!empty($doc['user'])) {
481                echo ' <span class="author">' . $this->getLang('author') . ' ' . userlink($doc['user']) . '</span>';
482            }
483            echo '</dd>';
484
485            // snippets
486            echo '<dd class="snippet">';
487            echo $snippet;
488            echo '</dd>';
489        }
490        echo '</dl>';
491
492        return (bool)$found;
493    }
494
495    /**
496     * @param ResultSet $result
497     */
498    protected function printPagination($result)
499    {
500        global $INPUT;
501        global $QUERY;
502
503        $all = $result->getTotalHits();
504        $pages = ceil($all / $this->getConf('perpage'));
505        $cur = $INPUT->int('p', 1, true);
506
507        if ($pages < 2) return;
508
509        // which pages to show
510        $toshow = [1, 2, $cur, $pages, $pages - 1];
511        if ($cur - 1 > 1) $toshow[] = $cur - 1;
512        if ($cur + 1 < $pages) $toshow[] = $cur + 1;
513        $toshow = array_unique($toshow);
514        // fill up to seven, if possible
515        if (count($toshow) < 7) {
516            if ($cur < 4) {
517                if ($cur + 2 < $pages && count($toshow) < 7) $toshow[] = $cur + 2;
518                if ($cur + 3 < $pages && count($toshow) < 7) $toshow[] = $cur + 3;
519                if ($cur + 4 < $pages && count($toshow) < 7) $toshow[] = $cur + 4;
520            } else {
521                if ($cur - 2 > 1 && count($toshow) < 7) $toshow[] = $cur - 2;
522                if ($cur - 3 > 1 && count($toshow) < 7) $toshow[] = $cur - 3;
523                if ($cur - 4 > 1 && count($toshow) < 7) $toshow[] = $cur - 4;
524            }
525        }
526        sort($toshow);
527        $showlen = count($toshow);
528
529        echo '<ul class="elastic_pagination">';
530        if ($cur > 1) {
531            $p = [
532                'q' => $QUERY,
533                'do' => 'search',
534                'ns' => $INPUT->arr('ns'),
535                'min' => $INPUT->arr('min'),
536                'p' => ($cur - 1)
537            ];
538            echo '<li class="prev">';
539            echo '<a href="' . wl('', $p) . '">';
540            echo '«';
541            echo '</a>';
542            echo '</li>';
543        }
544
545        for ($i = 0; $i < $showlen; $i++) {
546            if ($toshow[$i] == $cur) {
547                echo '<li class="cur">' . $toshow[$i] . '</li>';
548            } else {
549                $p = [
550                    'q' => $QUERY,
551                    'do' => 'search',
552                    'ns' => $INPUT->arr('ns'),
553                    'min' => $INPUT->arr('min'),
554                    'p' => $toshow[$i]
555                ];
556                echo '<li>';
557                echo '<a href="' . wl('', $p) . '">';
558                echo $toshow[$i];
559                echo '</a>';
560                echo '</li>';
561            }
562
563            // show seperator when a jump follows
564            if (isset($toshow[$i + 1]) && $toshow[$i + 1] - $toshow[$i] > 1) {
565                echo '<li class="sep">…</li>';
566            }
567        }
568
569        if ($cur < $pages) {
570            $p = [
571                'q' => $QUERY,
572                'do' => 'search',
573                'ns' => $INPUT->arr('ns'),
574                'min' => $INPUT->arr('min'),
575                'p' => ($cur + 1)
576            ];
577            echo '<li class="next">';
578            echo '<a href="' . wl('', $p) . '">';
579            echo '»';
580            echo '</a>';
581            echo '</li>';
582        }
583
584        echo '</ul>';
585    }
586}
587