1<?php 2 3/** 4 * DokuWiki Plugin elasticsearch (Action Component) 5 * 6 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 7 * @author Andreas Gohr <gohr@cosmocode.de> 8 */ 9 10use dokuwiki\Extension\ActionPlugin; 11use dokuwiki\Extension\Event; 12use dokuwiki\Extension\EventHandler; 13use dokuwiki\Form\Form; 14use Elastica\Aggregation\Terms; 15use Elastica\Query; 16use Elastica\Query\BoolQuery; 17use Elastica\Query\MatchQuery; 18use Elastica\Query\Range; 19use Elastica\Query\SimpleQueryString; 20use Elastica\Query\Term; 21use Elastica\ResultSet; 22 23/** 24 * Main search helper 25 */ 26class action_plugin_elasticsearch_search extends ActionPlugin 27{ 28 /** 29 * Example array element for search field 'tagging': 30 * 'tagging' => [ // also used as search query parameter 31 * 'label' => 'Tag', 32 * 'fieldPath' => 'tagging', // dot notation in more complex mappings 33 * 'limit' => '50', 34 * ] 35 * 36 * @var Array 37 */ 38 protected static $pluginSearchConfigs; 39 40 /** 41 * Search will be performed on those fields only. 42 * 43 * @var string[] 44 */ 45 protected $searchFields = [ 46 'title*', 47 'abstract*', 48 'content*', 49 'uri', 50 ]; 51 52 /** 53 * Registers a callback function for a given event 54 * 55 * @param EventHandler $controller DokuWiki's event controller object 56 * @return void 57 */ 58 public function register(EventHandler $controller) 59 { 60 61 $controller->register_hook('ACTION_ACT_PREPROCESS', 'BEFORE', $this, 'handleActPreprocess'); 62 $controller->register_hook('TPL_ACT_UNKNOWN', 'BEFORE', $this, 'handleActUnknown'); 63 $controller->register_hook('FORM_QUICKSEARCH_OUTPUT', 'BEFORE', $this, 'handleQuicksearchOutput'); 64 } 65 66 /** 67 * allow our custom do command 68 * 69 * @param Event $event 70 * @param $param 71 */ 72 public function handleActPreprocess(Event $event, $param) 73 { 74 if ($event->data !== 'search') return; 75 $event->preventDefault(); 76 $event->stopPropagation(); 77 } 78 79 /** 80 * do the actual search 81 * 82 * @param Event $event 83 * @param $param 84 */ 85 public function handleActUnknown(Event $event, $param) 86 { 87 if ($event->data !== 'search') return; 88 $event->preventDefault(); 89 $event->stopPropagation(); 90 global $QUERY; 91 global $INPUT; 92 global $ID; 93 94 if (empty($QUERY)) $QUERY = $INPUT->str('q'); 95 if (empty($QUERY)) $QUERY = $ID; 96 97 // get extended search configurations from plugins 98 Event::createAndTrigger('PLUGIN_ELASTICSEARCH_FILTERS', self::$pluginSearchConfigs); 99 100 /** @var helper_plugin_elasticsearch_client $hlp */ 101 $hlp = plugin_load('helper', 'elasticsearch_client'); 102 103 $client = $hlp->connect(); 104 $index = $client->getIndex($this->getConf('indexname')); 105 106 // store copy of the original query string 107 $q = $QUERY; 108 // let plugins manipulate the query 109 $additions = []; 110 Event::createAndTrigger('PLUGIN_ELASTICSEARCH_QUERY', $additions); 111 // if query is empty, return all results 112 if (empty(trim($QUERY))) $QUERY = '*'; 113 114 // get fields to use in query 115 $fields = []; 116 Event::createAndTrigger('PLUGIN_ELASTICSEARCH_SEARCHFIELDS', $fields); 117 118 if ($this->getConf('searchSyntax')) { 119 $this->searchFields[] = 'syntax*'; 120 } 121 122 // finally define the elastic query 123 $qstring = new SimpleQueryString($QUERY, array_merge($this->searchFields, $fields)); 124 // restore the original query 125 $QUERY = $q; 126 // append additions provided by plugins 127 if (!empty($additions)) { 128 $QUERY .= ' ' . implode(' ', $additions); 129 } 130 131 // create the actual search object 132 $equery = new Query(); 133 $subqueries = new BoolQuery(); 134 $subqueries->addMust($qstring); 135 136 $equery->setHighlight( 137 [ 138 "pre_tags" => ['ELASTICSEARCH_MARKER_IN'], 139 "post_tags" => ['ELASTICSEARCH_MARKER_OUT'], 140 "fields" => [ 141 $this->getConf('snippets') => new \stdClass(), 142 'title' => new \stdClass()] 143 ] 144 ); 145 146 // paginate 147 $equery->setSize($this->getConf('perpage')); 148 $equery->setFrom($this->getConf('perpage') * ($INPUT->int('p', 1, true) - 1)); 149 150 // add ACL subqueries 151 $this->addACLSubqueries($subqueries); 152 153 // add language subquery 154 $this->addLanguageSubquery($subqueries, $this->getLanguageFilter()); 155 156 // add date subquery 157 if ($INPUT->has('min')) { 158 $this->addDateSubquery($subqueries, $INPUT->str('min')); 159 } 160 161 // add namespace filter 162 if ($INPUT->has('ns')) { 163 $nsSubquery = new BoolQuery(); 164 foreach ($INPUT->arr('ns') as $ns) { 165 $term = new Term(); 166 $term->setTerm('namespace', $ns); 167 $nsSubquery->addShould($term); 168 } 169 $equery->setPostFilter($nsSubquery); 170 } 171 172 173 // add aggregations for namespaces 174 $agg = new Terms('namespace'); 175 $agg->setField('namespace.keyword'); 176 $agg->setSize(25); 177 178 $equery->addAggregation($agg); 179 180 // add search configurations from other plugins 181 $this->addPluginConfigurations($equery, $subqueries); 182 183 $equery->setQuery($subqueries); 184 185 try { 186 $result = $index->search($equery); 187 $aggs = $result->getAggregations(); 188 189 $this->printIntro(); 190 /** @var helper_plugin_elasticsearch_form $hlpform */ 191 $hlpform = plugin_load('helper', 'elasticsearch_form'); 192 $hlpform->tpl($aggs); 193 if ($this->printResults($result)) { 194 $this->printPagination($result); 195 } 196 } catch (Exception $e) { 197 msg('Something went wrong on searching please try again later or ask an admin for help.<br /><pre>' . 198 hsc($e->getMessage()) . '</pre>', -1); 199 } 200 } 201 202 /** 203 * Optionally disable "quick search" 204 * 205 * @param Event $event 206 */ 207 public function handleQuicksearchOutput(Event $event) 208 { 209 if (!$this->getConf('disableQuicksearch')) return; 210 211 /** @var Form $form */ 212 $form = $event->data; 213 $pos = $form->findPositionByAttribute('id', 'qsearch__out'); 214 $form->removeElement($pos); 215 $form->removeElement($pos + 1); // div closing tag 216 } 217 218 /** 219 * @return array 220 */ 221 public static function getRawPluginSearchConfigs() 222 { 223 return self::$pluginSearchConfigs; 224 } 225 226 /** 227 * Add search configurations supplied by other plugins 228 * 229 * @param Query $equery 230 * @param \Elastica\Query\BoolQuery 231 */ 232 protected function addPluginConfigurations($equery, $subqueries) 233 { 234 global $INPUT; 235 236 if (!empty(self::$pluginSearchConfigs)) { 237 foreach (self::$pluginSearchConfigs as $param => $config) { 238 // handle search parameter 239 if ($INPUT->has($param)) { 240 $pluginSubquery = new BoolQuery(); 241 foreach ($INPUT->arr($param) as $item) { 242 $eterm = new Term(); 243 $eterm->setTerm($param, $item); 244 $pluginSubquery->addShould($eterm); 245 } 246 $subqueries->addMust($pluginSubquery); 247 } 248 249 // build aggregation for use as filter in advanced search 250 $agg = new Terms($param); 251 $agg->setField($config['fieldPath']); 252 if (isset($config['limit'])) { 253 $agg->setSize($config['limit']); 254 } 255 $equery->addAggregation($agg); 256 } 257 } 258 } 259 260 /** 261 * Adds date subquery 262 * 263 * @param BoolQuery $subqueries 264 * @param string $min Modified at the latest one {year|month|week} ago 265 */ 266 protected function addDateSubquery($subqueries, $min) 267 { 268 if (!in_array($min, ['year', 'month', 'week'])) return; 269 270 $dateSubquery = new Range( 271 'modified', 272 ['gte' => date('Y-m-d', strtotime('1 ' . $min . ' ago'))] 273 ); 274 $subqueries->addMust($dateSubquery); 275 } 276 277 /** 278 * Adds language subquery 279 * 280 * @param BoolQuery $subqueries 281 * @param array $langFilter 282 */ 283 protected function addLanguageSubquery($subqueries, $langFilter) 284 { 285 if (empty($langFilter)) return; 286 287 $langSubquery = new MatchQuery(); 288 $langSubquery->setField('language', implode(',', $langFilter)); 289 290 $subqueries->addMust($langSubquery); 291 } 292 293 /** 294 * Languages to be used in the current search, determined by: 295 * 1. $INPUT variables, or 2. translation plugin 296 * 297 * @return array 298 */ 299 protected function getLanguageFilter() 300 { 301 global $ID; 302 global $INPUT; 303 304 $ns = getNS($ID); 305 $langFilter = $INPUT->arr('lang'); 306 307 /** @var helper_plugin_translation $transplugin */ 308 $transplugin = plugin_load('helper', 'translation'); 309 310 // optional translation detection: use current top namespace if it matches translation config 311 if (empty($langFilter) && $transplugin && $this->getConf('detectTranslation') && $ns) { 312 $topNs = strtok($ns, ':'); 313 if (in_array($topNs, $transplugin->translations)) { 314 $langFilter = [$topNs]; 315 $INPUT->set('lang', $langFilter); 316 } 317 } elseif (empty($langFilter) && $transplugin) { 318 // select all available translations 319 $INPUT->set('lang', $transplugin->translations); 320 } 321 322 return $langFilter; 323 } 324 325 /** 326 * Inserts subqueries based on current user's ACLs, none for superusers 327 * 328 * @param BoolQuery $subqueries 329 */ 330 protected function addACLSubqueries($subqueries) 331 { 332 global $USERINFO; 333 global $INFO; 334 335 $groups = array_merge(['ALL'], $USERINFO['grps'] ?: []); 336 337 // no ACL filters for superusers 338 if ($INFO['isadmin']) return; 339 340 // include if group OR user have read permissions, allows for ACLs such as "block @group except user" 341 $includeSubquery = new BoolQuery(); 342 foreach ($groups as $group) { 343 $term = new Term(); 344 $term->setTerm('groups_include', $group); 345 $includeSubquery->addShould($term); 346 } 347 if (isset($_SERVER['REMOTE_USER'])) { 348 $userIncludeSubquery = new BoolQuery(); 349 $term = new Term(); 350 $term->setTerm('users_include', $_SERVER['REMOTE_USER']); 351 $userIncludeSubquery->addMust($term); 352 $includeSubquery->addShould($userIncludeSubquery); 353 } 354 $subqueries->addMust($includeSubquery); 355 356 // groups exclusion SHOULD be respected, not MUST, since that would not allow for exceptions 357 $groupExcludeSubquery = new BoolQuery(); 358 foreach ($groups as $group) { 359 $term = new Term(); 360 $term->setTerm('groups_exclude', $group); 361 $groupExcludeSubquery->addShould($term); 362 } 363 $excludeSubquery = new BoolQuery(); 364 $excludeSubquery->addMustNot($groupExcludeSubquery); 365 366 $subqueries->addShould($excludeSubquery); 367 368 // user specific excludes must always be respected 369 if (isset($_SERVER['REMOTE_USER'])) { 370 $term = new Term(); 371 $term->setTerm('users_exclude', $_SERVER['REMOTE_USER']); 372 $subqueries->addMustNot($term); 373 } 374 } 375 376 /** 377 * Prints the introduction text 378 */ 379 protected function printIntro() 380 { 381 global $QUERY; 382 global $ID; 383 global $lang; 384 385 // just reuse the standard search page intro: 386 $intro = p_locale_xhtml('searchpage'); 387 // allow use of placeholder in search intro 388 $pagecreateinfo = ''; 389 if (auth_quickaclcheck($ID) >= AUTH_CREATE) { 390 $pagecreateinfo = sprintf($lang['searchcreatepage'], $QUERY); 391 } 392 $intro = str_replace( 393 ['@QUERY@', '@SEARCH@', '@CREATEPAGEINFO@'], 394 [hsc(rawurlencode($QUERY)), hsc($QUERY), $pagecreateinfo], 395 $intro 396 ); 397 echo $intro; 398 flush(); 399 } 400 401 /** 402 * Output the search results 403 * 404 * @param ResultSet $results 405 * @return bool true when results where shown 406 */ 407 protected function printResults($results) 408 { 409 global $lang; 410 411 // output results 412 $found = $results->getTotalHits(); 413 414 if (!$found) { 415 echo '<h2>' . $lang['nothingfound'] . '</h2>'; 416 return (bool)$found; 417 } 418 419 echo '<dl class="search_results">'; 420 echo '<h2>' . sprintf($this->getLang('totalfound'), $found) . '</h2>'; 421 foreach ($results as $row) { 422 423 /** @var Elastica\Result $row */ 424 $doc = $row->getSource(); 425 $page = $doc['uri']; 426 if ( 427 (!page_exists($page) && !is_file(mediaFN($page))) || 428 isHiddenPage($page) || 429 auth_quickaclcheck($page) < AUTH_READ 430 ) { 431 continue; 432 } 433 434 // get highlighted title 435 $highlightsTitle = $row->getHighlights()['title'] ?? ''; 436 $title = str_replace( 437 ['ELASTICSEARCH_MARKER_IN', 'ELASTICSEARCH_MARKER_OUT'], 438 ['<strong class="search_hit">', '</strong>'], 439 hsc(implode(' … ', (array)$highlightsTitle)) 440 ); 441 if (!$title) $title = hsc($doc['title']); 442 if (!$title) $title = hsc(p_get_first_heading($page)); 443 if (!$title) $title = hsc($page); 444 445 // get highlighted snippet 446 $highlightedSnippets = $row->getHighlights()[$this->getConf('snippets')] ?? []; 447 $snippet = str_replace( 448 ['ELASTICSEARCH_MARKER_IN', 'ELASTICSEARCH_MARKER_OUT'], 449 ['<strong class="search_hit">', '</strong>'], 450 hsc(implode(' … ', $highlightedSnippets)) 451 ); 452 if (!$snippet) $snippet = hsc($doc['abstract']); // always fall back to abstract 453 454 // assume page if no doctype is set, because old index won't have doctypes 455 $isPage = empty($doc['doctype']) || $doc['doctype'] === \action_plugin_elasticsearch_indexing::DOCTYPE_PAGE; 456 $href = $isPage ? wl($page) : ml($page); 457 458 echo '<dt>'; 459 if (!$isPage && is_file(DOKU_INC . 'lib/images/fileicons/' . $doc['ext'] . '.png')) { 460 echo sprintf( 461 '<img src="%s" alt="%s" /> ', 462 DOKU_BASE . 'lib/images/fileicons/' . $doc['ext'] . '.png', 463 $doc['ext'] 464 ); 465 } 466 echo '<a href="' . $href . '" class="wikilink1" title="' . hsc($page) . '">'; 467 echo $title; 468 echo '</a>'; 469 echo '</dt>'; 470 471 // meta 472 echo '<dd class="meta elastic-resultmeta">'; 473 if (!empty($doc['namespace'])) { 474 echo '<span class="ns">' . $this->getLang('ns') . ' ' . hsc($doc['namespace']) . '</span>'; 475 } 476 if ($doc['modified']) { 477 $lastmod = strtotime($doc['modified']); 478 echo ' <span class="">' . $lang['lastmod'] . ' ' . dformat($lastmod) . '</span>'; 479 } 480 if (!empty($doc['user'])) { 481 echo ' <span class="author">' . $this->getLang('author') . ' ' . userlink($doc['user']) . '</span>'; 482 } 483 echo '</dd>'; 484 485 // snippets 486 echo '<dd class="snippet">'; 487 echo $snippet; 488 echo '</dd>'; 489 } 490 echo '</dl>'; 491 492 return (bool)$found; 493 } 494 495 /** 496 * @param ResultSet $result 497 */ 498 protected function printPagination($result) 499 { 500 global $INPUT; 501 global $QUERY; 502 503 $all = $result->getTotalHits(); 504 $pages = ceil($all / $this->getConf('perpage')); 505 $cur = $INPUT->int('p', 1, true); 506 507 if ($pages < 2) return; 508 509 // which pages to show 510 $toshow = [1, 2, $cur, $pages, $pages - 1]; 511 if ($cur - 1 > 1) $toshow[] = $cur - 1; 512 if ($cur + 1 < $pages) $toshow[] = $cur + 1; 513 $toshow = array_unique($toshow); 514 // fill up to seven, if possible 515 if (count($toshow) < 7) { 516 if ($cur < 4) { 517 if ($cur + 2 < $pages && count($toshow) < 7) $toshow[] = $cur + 2; 518 if ($cur + 3 < $pages && count($toshow) < 7) $toshow[] = $cur + 3; 519 if ($cur + 4 < $pages && count($toshow) < 7) $toshow[] = $cur + 4; 520 } else { 521 if ($cur - 2 > 1 && count($toshow) < 7) $toshow[] = $cur - 2; 522 if ($cur - 3 > 1 && count($toshow) < 7) $toshow[] = $cur - 3; 523 if ($cur - 4 > 1 && count($toshow) < 7) $toshow[] = $cur - 4; 524 } 525 } 526 sort($toshow); 527 $showlen = count($toshow); 528 529 echo '<ul class="elastic_pagination">'; 530 if ($cur > 1) { 531 $p = [ 532 'q' => $QUERY, 533 'do' => 'search', 534 'ns' => $INPUT->arr('ns'), 535 'min' => $INPUT->arr('min'), 536 'p' => ($cur - 1) 537 ]; 538 echo '<li class="prev">'; 539 echo '<a href="' . wl('', $p) . '">'; 540 echo '«'; 541 echo '</a>'; 542 echo '</li>'; 543 } 544 545 for ($i = 0; $i < $showlen; $i++) { 546 if ($toshow[$i] == $cur) { 547 echo '<li class="cur">' . $toshow[$i] . '</li>'; 548 } else { 549 $p = [ 550 'q' => $QUERY, 551 'do' => 'search', 552 'ns' => $INPUT->arr('ns'), 553 'min' => $INPUT->arr('min'), 554 'p' => $toshow[$i] 555 ]; 556 echo '<li>'; 557 echo '<a href="' . wl('', $p) . '">'; 558 echo $toshow[$i]; 559 echo '</a>'; 560 echo '</li>'; 561 } 562 563 // show seperator when a jump follows 564 if (isset($toshow[$i + 1]) && $toshow[$i + 1] - $toshow[$i] > 1) { 565 echo '<li class="sep">…</li>'; 566 } 567 } 568 569 if ($cur < $pages) { 570 $p = [ 571 'q' => $QUERY, 572 'do' => 'search', 573 'ns' => $INPUT->arr('ns'), 574 'min' => $INPUT->arr('min'), 575 'p' => ($cur + 1) 576 ]; 577 echo '<li class="next">'; 578 echo '<a href="' . wl('', $p) . '">'; 579 echo '»'; 580 echo '</a>'; 581 echo '</li>'; 582 } 583 584 echo '</ul>'; 585 } 586} 587