1<?php
2/**
3 * DokuWiki Plugin elasticsearch (Action Component)
4 *
5 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
6 * @author  Andreas Gohr <gohr@cosmocode.de>
7 */
8
9use dokuwiki\Extension\Event;
10
11/**
12 * Main search helper
13 */
14class action_plugin_elasticsearch_search extends DokuWiki_Action_Plugin {
15
16    /**
17     * Example array element for search field 'tagging':
18     * 'tagging' => [                       // also used as search query parameter
19     *   'label' => 'Tag',
20     *   'fieldPath' => 'tagging',          // dot notation in more complex mappings
21     *   'limit' => '50',
22     * ]
23     *
24     * @var Array
25     */
26    protected static $pluginSearchConfigs;
27
28    /**
29     * Search will be performed on those fields only.
30     *
31     * @var string[]
32     */
33    protected $searchFields = [
34        'title*',
35        'abstract*',
36        'content*',
37        'uri',
38    ];
39
40    /**
41     * Registers a callback function for a given event
42     *
43     * @param Doku_Event_Handler $controller DokuWiki's event controller object
44     * @return void
45     */
46    public function register(Doku_Event_Handler $controller) {
47
48        $controller->register_hook('ACTION_ACT_PREPROCESS', 'BEFORE', $this, 'handle_preprocess');
49        $controller->register_hook('TPL_ACT_UNKNOWN', 'BEFORE', $this, 'handle_action');
50        $controller->register_hook('FORM_QUICKSEARCH_OUTPUT', 'BEFORE', $this, 'quicksearch');
51    }
52
53    /**
54     * allow our custom do command
55     *
56     * @param Doku_Event $event
57     * @param $param
58     */
59    public function handle_preprocess(Doku_Event $event, $param) {
60        if ($event->data !== 'search') return;
61        $event->preventDefault();
62        $event->stopPropagation();
63    }
64
65    /**
66     * do the actual search
67     *
68     * @param Doku_Event $event
69     * @param $param
70     */
71    public function handle_action(Doku_Event $event, $param) {
72        if ($event->data !== 'search') return;
73        $event->preventDefault();
74        $event->stopPropagation();
75        global $QUERY;
76        global $INPUT;
77        global $ID;
78
79        if (empty($QUERY)) $QUERY = $INPUT->str('q');
80        if (empty($QUERY)) $QUERY = $ID;
81
82        // get extended search configurations from plugins
83        Event::createAndTrigger('PLUGIN_ELASTICSEARCH_FILTERS', self::$pluginSearchConfigs);
84
85        /** @var helper_plugin_elasticsearch_client $hlp */
86        $hlp = plugin_load('helper', 'elasticsearch_client');
87
88        $client = $hlp->connect();
89        $index  = $client->getIndex($this->getConf('indexname'));
90
91        // store copy of the original query string
92        $q = $QUERY;
93        // let plugins manipulate the query
94        $additions = [];
95        Event::createAndTrigger('PLUGIN_ELASTICSEARCH_QUERY', $additions);
96        // if query is empty, return all results
97        if (empty(trim($QUERY))) $QUERY = '*';
98
99        // get fields to use in query
100        $fields = [];
101        Event::createAndTrigger('PLUGIN_ELASTICSEARCH_SEARCHFIELDS', $fields);
102
103        if ($this->getConf('searchSyntax')) {
104            array_push($this->searchFields, 'syntax*');
105        }
106
107        // finally define the elastic query
108        $qstring = new \Elastica\Query\SimpleQueryString($QUERY, array_merge($this->searchFields, $fields));
109        // restore the original query
110        $QUERY = $q;
111        // append additions provided by plugins
112        if (!empty($additions)) {
113            $QUERY .= ' ' . implode(' ', $additions);
114        }
115
116        // create the actual search object
117        $equery = new \Elastica\Query();
118        $subqueries = new \Elastica\Query\BoolQuery();
119        $subqueries->addMust($qstring);
120
121        $equery->setHighlight(
122            [
123                "pre_tags"  => ['ELASTICSEARCH_MARKER_IN'],
124                "post_tags" => ['ELASTICSEARCH_MARKER_OUT'],
125                "fields"    => [
126                    $this->getConf('snippets') => new \stdClass(),
127                    'title' => new \stdClass()]
128            ]
129        );
130
131        // paginate
132        $equery->setSize($this->getConf('perpage'));
133        $equery->setFrom($this->getConf('perpage') * ($INPUT->int('p', 1, true) - 1));
134
135        // add ACL subqueries
136        $this->addACLSubqueries($subqueries);
137
138        // add language subquery
139        $this->addLanguageSubquery($subqueries, $this->getLanguageFilter());
140
141        // add date subquery
142        if ($INPUT->has('min')) {
143            $this->addDateSubquery($subqueries, $INPUT->str('min'));
144        }
145
146        // add namespace filter
147        if($INPUT->has('ns')) {
148            $nsSubquery = new \Elastica\Query\BoolQuery();
149            foreach($INPUT->arr('ns') as $ns) {
150                $eterm = new \Elastica\Query\Term();
151                $eterm->setTerm('namespace', $ns);
152                $nsSubquery->addShould($eterm);
153            }
154            $equery->setPostFilter($nsSubquery);
155        }
156
157        $equery->setQuery($subqueries);
158
159        // add aggregations for namespaces
160        $agg = new \Elastica\Aggregation\Terms('namespace');
161        $agg->setField('namespace.keyword');
162        $agg->setSize(25);
163        $equery->addAggregation($agg);
164
165        // add search configurations from other plugins
166        $this->addPluginConfigurations($equery);
167
168        try {
169            $result = $index->search($equery);
170            $aggs = $result->getAggregations();
171
172            $this->print_intro();
173            /** @var helper_plugin_elasticsearch_form $hlpform */
174            $hlpform = plugin_load('helper', 'elasticsearch_form');
175            $hlpform->tpl($aggs);
176            $this->print_results($result) && $this->print_pagination($result);
177        } catch(Exception $e) {
178            msg('Something went wrong on searching please try again later or ask an admin for help.<br /><pre>' . hsc($e->getMessage()) . '</pre>', -1);
179        }
180    }
181
182    /**
183     * Optionally disable "quick search"
184     *
185     * @param Doku_Event $event
186     */
187    public function quicksearch(Doku_Event $event)
188    {
189        if (!$this->getConf('disableQuicksearch')) return;
190
191        /** @var \dokuwiki\Form\Form $form */
192        $form = $event->data;
193        $pos = $form->findPositionByAttribute('id', 'qsearch__out');
194        $form->removeElement($pos);
195        $form->removeElement($pos + 1); // div closing tag
196    }
197
198    /**
199     * @return array
200     */
201    public static function getRawPluginSearchConfigs()
202    {
203        return self::$pluginSearchConfigs;
204    }
205
206    /**
207     * Add search configurations supplied by other plugins
208     *
209     * @param \Elastica\Query $equery
210     */
211    protected function addPluginConfigurations($equery)
212    {
213        global $INPUT;
214
215        if (!empty(self::$pluginSearchConfigs)) {
216            foreach (self::$pluginSearchConfigs as $param => $config) {
217                // handle search parameter
218                if ($INPUT->has($param)) {
219                    $pluginSubquery = new \Elastica\Query\BoolQuery();
220                    foreach($INPUT->arr($param) as $item) {
221                        $eterm = new \Elastica\Query\Term();
222                        $eterm->setTerm($param, $item);
223                        $pluginSubquery->addShould($eterm);
224                    }
225                    $equery->setPostFilter($pluginSubquery);
226                }
227                // build aggregation for use as filter in advanced search
228                $agg = new \Elastica\Aggregation\Terms($param);
229                $agg->setField($config['fieldPath']);
230                if (isset($config['limit'])) {
231                    $agg->setSize($config['limit']);
232                }
233                $equery->addAggregation($agg);
234            }
235        }
236    }
237
238    /**
239     * Adds date subquery
240     *
241     * @param Elastica\Query\BoolQuery $subqueries
242     * @param string $min Modified at the latest one {year|month|week} ago
243     */
244    protected function addDateSubquery($subqueries, $min)
245    {
246        if (!in_array($min, ['year', 'month', 'week'])) return;
247
248        $dateSubquery = new \Elastica\Query\Range(
249            'modified',
250            ['gte' => date('Y-m-d', strtotime('1 ' . $min . ' ago'))]
251        );
252        $subqueries->addMust($dateSubquery);
253    }
254
255    /**
256     * Adds language subquery
257     *
258     * @param Elastica\Query\BoolQuery $subqueries
259     * @param array $langFilter
260     */
261    protected function addLanguageSubquery($subqueries, $langFilter)
262    {
263        if (empty($langFilter)) return;
264
265        $langSubquery = new \Elastica\Query\Match();
266        $langSubquery->setField('language', implode(',', $langFilter));
267        $subqueries->addMust($langSubquery);
268    }
269
270    /**
271     * Languages to be used in the current search, determined by:
272     * 1. $INPUT variables, or 2. translation plugin
273     *
274     * @return array
275     */
276    protected function getLanguageFilter()
277    {
278        global $ID;
279        global $INPUT;
280
281        $ns = getNS($ID);
282        $langFilter = $INPUT->arr('lang');
283
284        /** @var helper_plugin_translation $transplugin */
285        $transplugin = plugin_load('helper', 'translation');
286
287        // optional translation detection: use current top namespace if it matches translation config
288        if (empty($langFilter) && $transplugin && $this->getConf('detectTranslation') && $ns) {
289            $topNs = strtok($ns, ':');
290            if (in_array($topNs, $transplugin->translations)) {
291                $langFilter = [$topNs];
292                $INPUT->set('lang', $langFilter);
293            }
294        } else if (empty($langFilter) && $transplugin) {
295            // select all available translations
296            $INPUT->set('lang', $transplugin->translations);
297        }
298
299        return $langFilter;
300    }
301
302    /**
303     * Inserts subqueries based on current user's ACLs, none for superusers
304     *
305     * @param \Elastica\Query\BoolQuery $subqueries
306     */
307    protected function addACLSubqueries($subqueries)
308    {
309        global $USERINFO;
310        global $conf;
311
312        $groups = array_merge(['ALL'], $USERINFO['grps'] ?: []);
313
314        // no ACL filters for superusers
315        if (in_array(ltrim($conf['superuser'], '@'), $groups)) return;
316
317        // include if group OR user have read permissions, allows for ACLs such as "block @group except user"
318        $includeSubquery = new \Elastica\Query\BoolQuery();
319        foreach($groups as $group) {
320            $term = new \Elastica\Query\Term();
321            $term->setTerm('groups_include', $group);
322            $includeSubquery->addShould($term);
323        }
324        if (isset($_SERVER['REMOTE_USER'])) {
325            $userIncludeSubquery = new \Elastica\Query\BoolQuery();
326            $term = new \Elastica\Query\Term();
327            $term->setTerm('users_include', $_SERVER['REMOTE_USER']);
328            $userIncludeSubquery->addMust($term);
329            $includeSubquery->addShould($userIncludeSubquery);
330        }
331        $subqueries->addMust($includeSubquery);
332
333        // groups exclusion SHOULD be respected, not MUST, since that would not allow for exceptions
334        $groupExcludeSubquery = new \Elastica\Query\BoolQuery();
335        foreach($groups as $group) {
336            $term = new \Elastica\Query\Term();
337            $term->setTerm('groups_exclude', $group);
338            $groupExcludeSubquery->addShould($term);
339        }
340        $excludeSubquery = new \Elastica\Query\BoolQuery();
341        $excludeSubquery->addMustNot($groupExcludeSubquery);
342        $subqueries->addShould($excludeSubquery);
343
344        // user specific excludes must always be respected
345        if (isset($_SERVER['REMOTE_USER'])) {
346            $term = new \Elastica\Query\Term();
347            $term->setTerm('users_exclude', $_SERVER['REMOTE_USER']);
348            $subqueries->addMustNot($term);
349        }
350    }
351
352    /**
353     * Prints the introduction text
354     */
355    protected function print_intro() {
356        global $QUERY;
357        global $ID;
358        global $lang;
359
360        // just reuse the standard search page intro:
361        $intro = p_locale_xhtml('searchpage');
362        // allow use of placeholder in search intro
363        $pagecreateinfo = '';
364        if (auth_quickaclcheck($ID) >= AUTH_CREATE) {
365            $pagecreateinfo = sprintf($lang['searchcreatepage'], $QUERY);
366        }
367        $intro          = str_replace(
368            ['@QUERY@', '@SEARCH@', '@CREATEPAGEINFO@'],
369            [hsc(rawurlencode($QUERY)), hsc($QUERY), $pagecreateinfo],
370            $intro
371        );
372        echo $intro;
373        flush();
374    }
375
376    /**
377     * Output the search results
378     *
379     * @param \Elastica\ResultSet $results
380     * @return bool true when results where shown
381     */
382    protected function print_results($results) {
383        global $lang;
384
385        // output results
386        $found = $results->getTotalHits();
387
388        if(!$found) {
389            echo '<h2>' . $lang['nothingfound'] . '</h2>';
390            return (bool)$found;
391        }
392
393        echo '<dl class="search_results">';
394        echo '<h2>' . sprintf($this->getLang('totalfound'), $found) . '</h2>';
395        foreach($results as $row) {
396
397            /** @var Elastica\Result $row */
398            $page = $row->getSource()['uri'];
399            if(!page_exists($page) || isHiddenPage($page) || auth_quickaclcheck($page) < AUTH_READ) continue;
400
401            // get highlighted title
402            $title = str_replace(
403                ['ELASTICSEARCH_MARKER_IN', 'ELASTICSEARCH_MARKER_OUT'],
404                ['<strong class="search_hit">', '</strong>'],
405                hsc(join(' … ', (array) $row->getHighlights()['title']))
406            );
407            if(!$title) $title = hsc($row->getSource()['title']);
408            if(!$title) $title = hsc(p_get_first_heading($page));
409            if(!$title) $title = hsc($page);
410
411            // get highlighted snippet
412            $snippet = str_replace(
413                ['ELASTICSEARCH_MARKER_IN', 'ELASTICSEARCH_MARKER_OUT'],
414                ['<strong class="search_hit">', '</strong>'],
415                hsc(join(' … ', (array) $row->getHighlights()[$this->getConf('snippets')]))
416            );
417            if(!$snippet) $snippet = hsc($row->getSource()['abstract']); // always fall back to abstract
418
419            echo '<dt>';
420            echo '<a href="'.wl($page).'" class="wikilink1" title="'.hsc($page).'">';
421            echo $title;
422            echo '</a>';
423            echo '</dt>';
424
425            // meta
426            echo '<dd class="meta elastic-resultmeta">';
427            if($row->getSource()['namespace']) {
428                echo '<span class="ns">' . $this->getLang('ns') . ' ' . hsc($row->getSource()['namespace']) . '</span>';
429            }
430            if($row->getSource()['user']) {
431                echo ' <span class="author">' . $this->getLang('author') . ' ' . userlink($row->getSource()['user']) . '</span>';
432            }
433            if($row->getSource()['modified']) {
434                $lastmod = strtotime($row->getSource()['modified']);
435                echo ' <span class="">' . $lang['lastmod'] . ' ' . dformat($lastmod) . '</span>';
436            }
437            echo '</dd>';
438
439            // snippets
440            echo '<dd class="snippet">';
441            echo $snippet;
442            echo '</dd>';
443
444        }
445        echo '</dl>';
446
447        return (bool) $found;
448    }
449
450    /**
451     * @param \Elastica\ResultSet $result
452     */
453    protected function print_pagination($result) {
454        global $INPUT;
455        global $QUERY;
456
457        $all   = $result->getTotalHits();
458        $pages = ceil($all / $this->getConf('perpage'));
459        $cur   = $INPUT->int('p', 1, true);
460
461        if($pages < 2) return;
462
463        // which pages to show
464        $toshow = [1, 2, $cur, $pages, $pages - 1];
465        if($cur - 1 > 1) $toshow[] = $cur - 1;
466        if($cur + 1 < $pages) $toshow[] = $cur + 1;
467        $toshow = array_unique($toshow);
468        // fill up to seven, if possible
469        if(count($toshow) < 7) {
470            if($cur < 4) {
471                if($cur + 2 < $pages && count($toshow) < 7) $toshow[] = $cur + 2;
472                if($cur + 3 < $pages && count($toshow) < 7) $toshow[] = $cur + 3;
473                if($cur + 4 < $pages && count($toshow) < 7) $toshow[] = $cur + 4;
474            } else {
475                if($cur - 2 > 1 && count($toshow) < 7) $toshow[] = $cur - 2;
476                if($cur - 3 > 1 && count($toshow) < 7) $toshow[] = $cur - 3;
477                if($cur - 4 > 1 && count($toshow) < 7) $toshow[] = $cur - 4;
478            }
479        }
480        sort($toshow);
481        $showlen = count($toshow);
482
483        echo '<ul class="elastic_pagination">';
484        if($cur > 1) {
485            echo '<li class="prev">';
486            echo '<a href="' . wl('', http_build_query(['q' => $QUERY, 'do' => 'search', 'ns' => $INPUT->arr('ns'), 'min' => $INPUT->arr('min'), 'p' => ($cur-1)])) . '">';
487            echo '«';
488            echo '</a>';
489            echo '</li>';
490        }
491
492        for($i = 0; $i < $showlen; $i++) {
493            if($toshow[$i] == $cur) {
494                echo '<li class="cur">' . $toshow[$i] . '</li>';
495            } else {
496                echo '<li>';
497                echo '<a href="' . wl('', http_build_query(['q' => $QUERY, 'do' => 'search', 'ns' => $INPUT->arr('ns'), 'min' => $INPUT->arr('min'), 'p' => $toshow[$i]])) . '">';
498                echo $toshow[$i];
499                echo '</a>';
500                echo '</li>';
501            }
502
503            // show seperator when a jump follows
504            if(isset($toshow[$i + 1]) && $toshow[$i + 1] - $toshow[$i] > 1) {
505                echo '<li class="sep">…</li>';
506            }
507        }
508
509        if($cur < $pages) {
510            echo '<li class="next">';
511            echo '<a href="' . wl('', http_build_query(['q' => $QUERY, 'do' => 'search', 'ns' => $INPUT->arr('ns'), 'min' => $INPUT->arr('min'), 'p' => ($cur+1)])) . '">';
512            echo '»';
513            echo '</a>';
514            echo '</li>';
515        }
516
517        echo '</ul>';
518    }
519
520}
521