1<?php
2/**
3 * DokuWiki Plugin elasticsearch (Action Component)
4 *
5 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
6 * @author  Andreas Gohr <gohr@cosmocode.de>
7 */
8
9use dokuwiki\Extension\Event;
10
11/**
12 * Main search helper
13 */
14class action_plugin_elasticsearch_search extends DokuWiki_Action_Plugin {
15
16    /**
17     * Example array element for search field 'tagging':
18     * 'tagging' => [                       // also used as search query parameter
19     *   'label' => 'Tag',
20     *   'fieldPath' => 'tagging',          // dot notation in more complex mappings
21     *   'limit' => '50',
22     * ]
23     *
24     * @var Array
25     */
26    protected static $pluginSearchConfigs;
27
28    /**
29     * Search will be performed on those fields only.
30     *
31     * @var string[]
32     */
33    protected $searchFields = [
34        'title*',
35        'abstract*',
36        'content*',
37        'uri',
38    ];
39
40    /**
41     * Registers a callback function for a given event
42     *
43     * @param Doku_Event_Handler $controller DokuWiki's event controller object
44     * @return void
45     */
46    public function register(Doku_Event_Handler $controller) {
47
48        $controller->register_hook('ACTION_ACT_PREPROCESS', 'BEFORE', $this, 'handle_preprocess');
49        $controller->register_hook('TPL_ACT_UNKNOWN', 'BEFORE', $this, 'handle_action');
50        $controller->register_hook('FORM_QUICKSEARCH_OUTPUT', 'BEFORE', $this, 'quicksearch');
51    }
52
53    /**
54     * allow our custom do command
55     *
56     * @param Doku_Event $event
57     * @param $param
58     */
59    public function handle_preprocess(Doku_Event $event, $param) {
60        if ($event->data !== 'search') return;
61        $event->preventDefault();
62        $event->stopPropagation();
63    }
64
65    /**
66     * do the actual search
67     *
68     * @param Doku_Event $event
69     * @param $param
70     */
71    public function handle_action(Doku_Event $event, $param) {
72        if ($event->data !== 'search') return;
73        $event->preventDefault();
74        $event->stopPropagation();
75        global $QUERY;
76        global $INPUT;
77        global $ID;
78
79        if (empty($QUERY)) $QUERY = $INPUT->str('q');
80        if (empty($QUERY)) $QUERY = $ID;
81
82        // get extended search configurations from plugins
83        Event::createAndTrigger('PLUGIN_ELASTICSEARCH_FILTERS', self::$pluginSearchConfigs);
84
85        /** @var helper_plugin_elasticsearch_client $hlp */
86        $hlp = plugin_load('helper', 'elasticsearch_client');
87
88        $client = $hlp->connect();
89        $index  = $client->getIndex($this->getConf('indexname'));
90
91        // store copy of the original query string
92        $q = $QUERY;
93        // let plugins manipulate the query
94        $additions = [];
95        Event::createAndTrigger('PLUGIN_ELASTICSEARCH_QUERY', $additions);
96        // if query is empty, return all results
97        if (empty(trim($QUERY))) $QUERY = '*';
98
99        // get fields to use in query
100        $fields = [];
101        Event::createAndTrigger('PLUGIN_ELASTICSEARCH_SEARCHFIELDS', $fields);
102
103        if ($this->getConf('searchSyntax')) {
104            array_push($this->searchFields, 'syntax*');
105        }
106
107        // finally define the elastic query
108        $qstring = new \Elastica\Query\SimpleQueryString($QUERY, array_merge($this->searchFields, $fields));
109        // restore the original query
110        $QUERY = $q;
111        // append additions provided by plugins
112        if (!empty($additions)) {
113            $QUERY .= ' ' . implode(' ', $additions);
114        }
115
116        // create the actual search object
117        $equery = new \Elastica\Query();
118        $subqueries = new \Elastica\Query\BoolQuery();
119        $subqueries->addMust($qstring);
120
121        $equery->setHighlight(
122            [
123                "pre_tags"  => ['ELASTICSEARCH_MARKER_IN'],
124                "post_tags" => ['ELASTICSEARCH_MARKER_OUT'],
125                "fields"    => [
126                    $this->getConf('snippets') => new \stdClass(),
127                    'title' => new \stdClass()]
128            ]
129        );
130
131        // paginate
132        $equery->setSize($this->getConf('perpage'));
133        $equery->setFrom($this->getConf('perpage') * ($INPUT->int('p', 1, true) - 1));
134
135        // add ACL subqueries
136        $this->addACLSubqueries($subqueries);
137
138        // add language subquery
139        $this->addLanguageSubquery($subqueries, $this->getLanguageFilter());
140
141        // add date subquery
142        if ($INPUT->has('min')) {
143            $this->addDateSubquery($subqueries, $INPUT->str('min'));
144        }
145
146        // add namespace filter
147        if($INPUT->has('ns')) {
148            $nsSubquery = new \Elastica\Query\BoolQuery();
149            foreach ($INPUT->arr('ns') as $ns) {
150                $term = new \Elastica\Query\Term();
151                $term->setTerm('namespace', $ns);
152                $nsSubquery->addShould($term);
153            }
154            $equery->setPostFilter($nsSubquery);
155        }
156
157
158        // add aggregations for namespaces
159        $agg = new \Elastica\Aggregation\Terms('namespace');
160        $agg->setField('namespace.keyword');
161        $agg->setSize(25);
162        $equery->addAggregation($agg);
163
164        // add search configurations from other plugins
165        $this->addPluginConfigurations($equery, $subqueries);
166
167        $equery->setQuery($subqueries);
168
169        try {
170            $result = $index->search($equery);
171            $aggs = $result->getAggregations();
172
173            $this->print_intro();
174            /** @var helper_plugin_elasticsearch_form $hlpform */
175            $hlpform = plugin_load('helper', 'elasticsearch_form');
176            $hlpform->tpl($aggs);
177            $this->print_results($result) && $this->print_pagination($result);
178        } catch(Exception $e) {
179            msg('Something went wrong on searching please try again later or ask an admin for help.<br /><pre>' . hsc($e->getMessage()) . '</pre>', -1);
180        }
181    }
182
183    /**
184     * Optionally disable "quick search"
185     *
186     * @param Doku_Event $event
187     */
188    public function quicksearch(Doku_Event $event)
189    {
190        if (!$this->getConf('disableQuicksearch')) return;
191
192        /** @var \dokuwiki\Form\Form $form */
193        $form = $event->data;
194        $pos = $form->findPositionByAttribute('id', 'qsearch__out');
195        $form->removeElement($pos);
196        $form->removeElement($pos + 1); // div closing tag
197    }
198
199    /**
200     * @return array
201     */
202    public static function getRawPluginSearchConfigs()
203    {
204        return self::$pluginSearchConfigs;
205    }
206
207    /**
208     * Add search configurations supplied by other plugins
209     *
210     * @param \Elastica\Query $equery
211     * @param \Elastica\Query\BoolQuery
212     */
213    protected function addPluginConfigurations($equery, $subqueries)
214    {
215        global $INPUT;
216
217        if (!empty(self::$pluginSearchConfigs)) {
218            foreach (self::$pluginSearchConfigs as $param => $config) {
219                // handle search parameter
220                if ($INPUT->has($param)) {
221                    $pluginSubquery = new \Elastica\Query\BoolQuery();
222                    foreach($INPUT->arr($param) as $item) {
223                        $eterm = new \Elastica\Query\Term();
224                        $eterm->setTerm($param, $item);
225                        $pluginSubquery->addShould($eterm);
226                    }
227                    $subqueries->addMust($pluginSubquery);
228                }
229
230                // build aggregation for use as filter in advanced search
231                $agg = new \Elastica\Aggregation\Terms($param);
232                $agg->setField($config['fieldPath']);
233                if (isset($config['limit'])) {
234                    $agg->setSize($config['limit']);
235                }
236                $equery->addAggregation($agg);
237            }
238        }
239    }
240
241    /**
242     * Adds date subquery
243     *
244     * @param Elastica\Query\BoolQuery $subqueries
245     * @param string $min Modified at the latest one {year|month|week} ago
246     */
247    protected function addDateSubquery($subqueries, $min)
248    {
249        if (!in_array($min, ['year', 'month', 'week'])) return;
250
251        $dateSubquery = new \Elastica\Query\Range(
252            'modified',
253            ['gte' => date('Y-m-d', strtotime('1 ' . $min . ' ago'))]
254        );
255        $subqueries->addMust($dateSubquery);
256    }
257
258    /**
259     * Adds language subquery
260     *
261     * @param Elastica\Query\BoolQuery $subqueries
262     * @param array $langFilter
263     */
264    protected function addLanguageSubquery($subqueries, $langFilter)
265    {
266        if (empty($langFilter)) return;
267
268        $langSubquery = new \Elastica\Query\MatchQuery();
269        $langSubquery->setField('language', implode(',', $langFilter));
270        $subqueries->addMust($langSubquery);
271    }
272
273    /**
274     * Languages to be used in the current search, determined by:
275     * 1. $INPUT variables, or 2. translation plugin
276     *
277     * @return array
278     */
279    protected function getLanguageFilter()
280    {
281        global $ID;
282        global $INPUT;
283
284        $ns = getNS($ID);
285        $langFilter = $INPUT->arr('lang');
286
287        /** @var helper_plugin_translation $transplugin */
288        $transplugin = plugin_load('helper', 'translation');
289
290        // optional translation detection: use current top namespace if it matches translation config
291        if (empty($langFilter) && $transplugin && $this->getConf('detectTranslation') && $ns) {
292            $topNs = strtok($ns, ':');
293            if (in_array($topNs, $transplugin->translations)) {
294                $langFilter = [$topNs];
295                $INPUT->set('lang', $langFilter);
296            }
297        } else if (empty($langFilter) && $transplugin) {
298            // select all available translations
299            $INPUT->set('lang', $transplugin->translations);
300        }
301
302        return $langFilter;
303    }
304
305    /**
306     * Inserts subqueries based on current user's ACLs, none for superusers
307     *
308     * @param \Elastica\Query\BoolQuery $subqueries
309     */
310    protected function addACLSubqueries($subqueries)
311    {
312        global $USERINFO;
313        global $INFO;
314
315        $groups = array_merge(['ALL'], $USERINFO['grps'] ?: []);
316
317        // no ACL filters for superusers
318        if ($INFO['isadmin']) return;
319
320        // include if group OR user have read permissions, allows for ACLs such as "block @group except user"
321        $includeSubquery = new \Elastica\Query\BoolQuery();
322        foreach($groups as $group) {
323            $term = new \Elastica\Query\Term();
324            $term->setTerm('groups_include', $group);
325            $includeSubquery->addShould($term);
326        }
327        if (isset($_SERVER['REMOTE_USER'])) {
328            $userIncludeSubquery = new \Elastica\Query\BoolQuery();
329            $term = new \Elastica\Query\Term();
330            $term->setTerm('users_include', $_SERVER['REMOTE_USER']);
331            $userIncludeSubquery->addMust($term);
332            $includeSubquery->addShould($userIncludeSubquery);
333        }
334        $subqueries->addMust($includeSubquery);
335
336        // groups exclusion SHOULD be respected, not MUST, since that would not allow for exceptions
337        $groupExcludeSubquery = new \Elastica\Query\BoolQuery();
338        foreach($groups as $group) {
339            $term = new \Elastica\Query\Term();
340            $term->setTerm('groups_exclude', $group);
341            $groupExcludeSubquery->addShould($term);
342        }
343        $excludeSubquery = new \Elastica\Query\BoolQuery();
344        $excludeSubquery->addMustNot($groupExcludeSubquery);
345        $subqueries->addShould($excludeSubquery);
346
347        // user specific excludes must always be respected
348        if (isset($_SERVER['REMOTE_USER'])) {
349            $term = new \Elastica\Query\Term();
350            $term->setTerm('users_exclude', $_SERVER['REMOTE_USER']);
351            $subqueries->addMustNot($term);
352        }
353    }
354
355    /**
356     * Prints the introduction text
357     */
358    protected function print_intro() {
359        global $QUERY;
360        global $ID;
361        global $lang;
362
363        // just reuse the standard search page intro:
364        $intro = p_locale_xhtml('searchpage');
365        // allow use of placeholder in search intro
366        $pagecreateinfo = '';
367        if (auth_quickaclcheck($ID) >= AUTH_CREATE) {
368            $pagecreateinfo = sprintf($lang['searchcreatepage'], $QUERY);
369        }
370        $intro          = str_replace(
371            ['@QUERY@', '@SEARCH@', '@CREATEPAGEINFO@'],
372            [hsc(rawurlencode($QUERY)), hsc($QUERY), $pagecreateinfo],
373            $intro
374        );
375        echo $intro;
376        flush();
377    }
378
379    /**
380     * Output the search results
381     *
382     * @param \Elastica\ResultSet $results
383     * @return bool true when results where shown
384     */
385    protected function print_results($results) {
386        global $lang;
387
388        // output results
389        $found = $results->getTotalHits();
390
391        if(!$found) {
392            echo '<h2>' . $lang['nothingfound'] . '</h2>';
393            return (bool)$found;
394        }
395
396        echo '<dl class="search_results">';
397        echo '<h2>' . sprintf($this->getLang('totalfound'), $found) . '</h2>';
398        foreach ($results as $row) {
399
400            /** @var Elastica\Result $row */
401            $doc = $row->getSource();
402            $page = $doc['uri'];
403            if (!(page_exists($page) || is_file(mediaFN($page))) || isHiddenPage($page) || auth_quickaclcheck($page) < AUTH_READ) continue;
404
405            // get highlighted title
406            $highlightsTitle = $row->getHighlights()['title'] ?? '';
407            $title = str_replace(
408                ['ELASTICSEARCH_MARKER_IN', 'ELASTICSEARCH_MARKER_OUT'],
409                ['<strong class="search_hit">', '</strong>'],
410                hsc(join(' … ', (array) $highlightsTitle))
411            );
412            if (!$title) $title = hsc($doc['title']);
413            if (!$title) $title = hsc(p_get_first_heading($page));
414            if (!$title) $title = hsc($page);
415
416            // get highlighted snippet
417            $highlightedSnippets = $row->getHighlights()[$this->getConf('snippets')] ?? [];
418            $snippet = str_replace(
419                ['ELASTICSEARCH_MARKER_IN', 'ELASTICSEARCH_MARKER_OUT'],
420                ['<strong class="search_hit">', '</strong>'],
421                hsc(join(' … ', $highlightedSnippets))
422            );
423            if (!$snippet) $snippet = hsc($doc['abstract']); // always fall back to abstract
424
425            // assume page if no doctype is set, because old index won't have doctypes
426            $isPage = empty($doc['doctype']) || $doc['doctype'] === \action_plugin_elasticsearch_indexing::DOCTYPE_PAGE;
427            $href = $isPage ? wl($page) : ml($page);
428
429            echo '<dt>';
430            if (!$isPage && is_file(DOKU_INC . 'lib/images/fileicons/'. $doc['ext'] .'.png')) {
431                echo sprintf(
432                    '<img src="%s" alt="%s" /> ',
433                    DOKU_BASE . 'lib/images/fileicons/'. $doc['ext'] .'.png',
434                    $doc['ext']
435                );
436            }
437            echo '<a href="' . $href . '" class="wikilink1" title="'.hsc($page).'">';
438            echo $title;
439            echo '</a>';
440            echo '</dt>';
441
442            // meta
443            echo '<dd class="meta elastic-resultmeta">';
444            if (!empty($doc['namespace'])) {
445                echo '<span class="ns">' . $this->getLang('ns') . ' ' . hsc($doc['namespace']) . '</span>';
446            }
447            if ($doc['modified']) {
448                $lastmod = strtotime($doc['modified']);
449                echo ' <span class="">' . $lang['lastmod'] . ' ' . dformat($lastmod) . '</span>';
450            }
451            if (!empty($doc['user'])) {
452                echo ' <span class="author">' . $this->getLang('author') . ' ' . userlink($doc['user']) . '</span>';
453            }
454            echo '</dd>';
455
456            // snippets
457            echo '<dd class="snippet">';
458            echo $snippet;
459            echo '</dd>';
460
461        }
462        echo '</dl>';
463
464        return (bool) $found;
465    }
466
467    /**
468     * @param \Elastica\ResultSet $result
469     */
470    protected function print_pagination($result) {
471        global $INPUT;
472        global $QUERY;
473
474        $all   = $result->getTotalHits();
475        $pages = ceil($all / $this->getConf('perpage'));
476        $cur   = $INPUT->int('p', 1, true);
477
478        if($pages < 2) return;
479
480        // which pages to show
481        $toshow = [1, 2, $cur, $pages, $pages - 1];
482        if($cur - 1 > 1) $toshow[] = $cur - 1;
483        if($cur + 1 < $pages) $toshow[] = $cur + 1;
484        $toshow = array_unique($toshow);
485        // fill up to seven, if possible
486        if(count($toshow) < 7) {
487            if($cur < 4) {
488                if($cur + 2 < $pages && count($toshow) < 7) $toshow[] = $cur + 2;
489                if($cur + 3 < $pages && count($toshow) < 7) $toshow[] = $cur + 3;
490                if($cur + 4 < $pages && count($toshow) < 7) $toshow[] = $cur + 4;
491            } else {
492                if($cur - 2 > 1 && count($toshow) < 7) $toshow[] = $cur - 2;
493                if($cur - 3 > 1 && count($toshow) < 7) $toshow[] = $cur - 3;
494                if($cur - 4 > 1 && count($toshow) < 7) $toshow[] = $cur - 4;
495            }
496        }
497        sort($toshow);
498        $showlen = count($toshow);
499
500        echo '<ul class="elastic_pagination">';
501        if($cur > 1) {
502            echo '<li class="prev">';
503            echo '<a href="' . wl('', http_build_query(['q' => $QUERY, 'do' => 'search', 'ns' => $INPUT->arr('ns'), 'min' => $INPUT->arr('min'), 'p' => ($cur-1)])) . '">';
504            echo '«';
505            echo '</a>';
506            echo '</li>';
507        }
508
509        for($i = 0; $i < $showlen; $i++) {
510            if($toshow[$i] == $cur) {
511                echo '<li class="cur">' . $toshow[$i] . '</li>';
512            } else {
513                echo '<li>';
514                echo '<a href="' . wl('', http_build_query(['q' => $QUERY, 'do' => 'search', 'ns' => $INPUT->arr('ns'), 'min' => $INPUT->arr('min'), 'p' => $toshow[$i]])) . '">';
515                echo $toshow[$i];
516                echo '</a>';
517                echo '</li>';
518            }
519
520            // show seperator when a jump follows
521            if(isset($toshow[$i + 1]) && $toshow[$i + 1] - $toshow[$i] > 1) {
522                echo '<li class="sep">…</li>';
523            }
524        }
525
526        if($cur < $pages) {
527            echo '<li class="next">';
528            echo '<a href="' . wl('', http_build_query(['q' => $QUERY, 'do' => 'search', 'ns' => $INPUT->arr('ns'), 'min' => $INPUT->arr('min'), 'p' => ($cur+1)])) . '">';
529            echo '»';
530            echo '</a>';
531            echo '</li>';
532        }
533
534        echo '</ul>';
535    }
536
537}
538