1<?php 2/** 3 * DokuWiki Plugin elasticsearch (Action Component) 4 * 5 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 6 * @author Andreas Gohr <gohr@cosmocode.de> 7 */ 8 9use dokuwiki\Extension\Event; 10 11/** 12 * Main search helper 13 */ 14class action_plugin_elasticsearch_search extends DokuWiki_Action_Plugin { 15 16 /** 17 * Example array element for search field 'tagging': 18 * 'tagging' => [ // also used as search query parameter 19 * 'label' => 'Tag', 20 * 'fieldPath' => 'tagging', // dot notation in more complex mappings 21 * 'limit' => '50', 22 * ] 23 * 24 * @var Array 25 */ 26 protected static $pluginSearchConfigs; 27 28 /** 29 * Search will be performed on those fields only. 30 * 31 * @var string[] 32 */ 33 protected $searchFields = [ 34 'title*', 35 'abstract*', 36 'content*', 37 'uri', 38 ]; 39 40 /** 41 * Registers a callback function for a given event 42 * 43 * @param Doku_Event_Handler $controller DokuWiki's event controller object 44 * @return void 45 */ 46 public function register(Doku_Event_Handler $controller) { 47 48 $controller->register_hook('ACTION_ACT_PREPROCESS', 'BEFORE', $this, 'handle_preprocess'); 49 $controller->register_hook('TPL_ACT_UNKNOWN', 'BEFORE', $this, 'handle_action'); 50 $controller->register_hook('FORM_QUICKSEARCH_OUTPUT', 'BEFORE', $this, 'quicksearch'); 51 } 52 53 /** 54 * allow our custom do command 55 * 56 * @param Doku_Event $event 57 * @param $param 58 */ 59 public function handle_preprocess(Doku_Event $event, $param) { 60 if ($event->data !== 'search') return; 61 $event->preventDefault(); 62 $event->stopPropagation(); 63 } 64 65 /** 66 * do the actual search 67 * 68 * @param Doku_Event $event 69 * @param $param 70 */ 71 public function handle_action(Doku_Event $event, $param) { 72 if ($event->data !== 'search') return; 73 $event->preventDefault(); 74 $event->stopPropagation(); 75 global $QUERY; 76 global $INPUT; 77 global $ID; 78 79 if (empty($QUERY)) $QUERY = $INPUT->str('q'); 80 if (empty($QUERY)) $QUERY = $ID; 81 82 // get extended search configurations from plugins 83 Event::createAndTrigger('PLUGIN_ELASTICSEARCH_FILTERS', self::$pluginSearchConfigs); 84 85 /** @var helper_plugin_elasticsearch_client $hlp */ 86 $hlp = plugin_load('helper', 'elasticsearch_client'); 87 88 $client = $hlp->connect(); 89 $index = $client->getIndex($this->getConf('indexname')); 90 91 // store copy of the original query string 92 $q = $QUERY; 93 // let plugins manipulate the query 94 $additions = []; 95 Event::createAndTrigger('PLUGIN_ELASTICSEARCH_QUERY', $additions); 96 // if query is empty, return all results 97 if (empty(trim($QUERY))) $QUERY = '*'; 98 99 // get fields to use in query 100 $fields = []; 101 Event::createAndTrigger('PLUGIN_ELASTICSEARCH_SEARCHFIELDS', $fields); 102 103 if ($this->getConf('searchSyntax')) { 104 array_push($this->searchFields, 'syntax*'); 105 } 106 107 // finally define the elastic query 108 $qstring = new \Elastica\Query\SimpleQueryString($QUERY, array_merge($this->searchFields, $fields)); 109 // restore the original query 110 $QUERY = $q; 111 // append additions provided by plugins 112 if (!empty($additions)) { 113 $QUERY .= ' ' . implode(' ', $additions); 114 } 115 116 // create the actual search object 117 $equery = new \Elastica\Query(); 118 $subqueries = new \Elastica\Query\BoolQuery(); 119 $subqueries->addMust($qstring); 120 121 $equery->setHighlight( 122 [ 123 "pre_tags" => ['ELASTICSEARCH_MARKER_IN'], 124 "post_tags" => ['ELASTICSEARCH_MARKER_OUT'], 125 "fields" => [ 126 $this->getConf('snippets') => new \stdClass(), 127 'title' => new \stdClass()] 128 ] 129 ); 130 131 // paginate 132 $equery->setSize($this->getConf('perpage')); 133 $equery->setFrom($this->getConf('perpage') * ($INPUT->int('p', 1, true) - 1)); 134 135 // add ACL subqueries 136 $this->addACLSubqueries($subqueries); 137 138 // add language subquery 139 $this->addLanguageSubquery($subqueries, $this->getLanguageFilter()); 140 141 // add date subquery 142 if ($INPUT->has('min')) { 143 $this->addDateSubquery($subqueries, $INPUT->str('min')); 144 } 145 146 // add namespace filter 147 if($INPUT->has('ns')) { 148 $nsSubquery = new \Elastica\Query\BoolQuery(); 149 foreach ($INPUT->arr('ns') as $ns) { 150 $term = new \Elastica\Query\Term(); 151 $term->setTerm('namespace', $ns); 152 $nsSubquery->addShould($term); 153 } 154 $equery->setPostFilter($nsSubquery); 155 } 156 157 158 // add aggregations for namespaces 159 $agg = new \Elastica\Aggregation\Terms('namespace'); 160 $agg->setField('namespace.keyword'); 161 $agg->setSize(25); 162 $equery->addAggregation($agg); 163 164 // add search configurations from other plugins 165 $this->addPluginConfigurations($equery, $subqueries); 166 167 $equery->setQuery($subqueries); 168 169 try { 170 $result = $index->search($equery); 171 $aggs = $result->getAggregations(); 172 173 $this->print_intro(); 174 /** @var helper_plugin_elasticsearch_form $hlpform */ 175 $hlpform = plugin_load('helper', 'elasticsearch_form'); 176 $hlpform->tpl($aggs); 177 $this->print_results($result) && $this->print_pagination($result); 178 } catch(Exception $e) { 179 msg('Something went wrong on searching please try again later or ask an admin for help.<br /><pre>' . hsc($e->getMessage()) . '</pre>', -1); 180 } 181 } 182 183 /** 184 * Optionally disable "quick search" 185 * 186 * @param Doku_Event $event 187 */ 188 public function quicksearch(Doku_Event $event) 189 { 190 if (!$this->getConf('disableQuicksearch')) return; 191 192 /** @var \dokuwiki\Form\Form $form */ 193 $form = $event->data; 194 $pos = $form->findPositionByAttribute('id', 'qsearch__out'); 195 $form->removeElement($pos); 196 $form->removeElement($pos + 1); // div closing tag 197 } 198 199 /** 200 * @return array 201 */ 202 public static function getRawPluginSearchConfigs() 203 { 204 return self::$pluginSearchConfigs; 205 } 206 207 /** 208 * Add search configurations supplied by other plugins 209 * 210 * @param \Elastica\Query $equery 211 * @param \Elastica\Query\BoolQuery 212 */ 213 protected function addPluginConfigurations($equery, $subqueries) 214 { 215 global $INPUT; 216 217 if (!empty(self::$pluginSearchConfigs)) { 218 foreach (self::$pluginSearchConfigs as $param => $config) { 219 // handle search parameter 220 if ($INPUT->has($param)) { 221 $pluginSubquery = new \Elastica\Query\BoolQuery(); 222 foreach($INPUT->arr($param) as $item) { 223 $eterm = new \Elastica\Query\Term(); 224 $eterm->setTerm($param, $item); 225 $pluginSubquery->addShould($eterm); 226 } 227 $subqueries->addMust($pluginSubquery); 228 } 229 230 // build aggregation for use as filter in advanced search 231 $agg = new \Elastica\Aggregation\Terms($param); 232 $agg->setField($config['fieldPath']); 233 if (isset($config['limit'])) { 234 $agg->setSize($config['limit']); 235 } 236 $equery->addAggregation($agg); 237 } 238 } 239 } 240 241 /** 242 * Adds date subquery 243 * 244 * @param Elastica\Query\BoolQuery $subqueries 245 * @param string $min Modified at the latest one {year|month|week} ago 246 */ 247 protected function addDateSubquery($subqueries, $min) 248 { 249 if (!in_array($min, ['year', 'month', 'week'])) return; 250 251 $dateSubquery = new \Elastica\Query\Range( 252 'modified', 253 ['gte' => date('Y-m-d', strtotime('1 ' . $min . ' ago'))] 254 ); 255 $subqueries->addMust($dateSubquery); 256 } 257 258 /** 259 * Adds language subquery 260 * 261 * @param Elastica\Query\BoolQuery $subqueries 262 * @param array $langFilter 263 */ 264 protected function addLanguageSubquery($subqueries, $langFilter) 265 { 266 if (empty($langFilter)) return; 267 268 $langSubquery = new \Elastica\Query\MatchQuery(); 269 $langSubquery->setField('language', implode(',', $langFilter)); 270 $subqueries->addMust($langSubquery); 271 } 272 273 /** 274 * Languages to be used in the current search, determined by: 275 * 1. $INPUT variables, or 2. translation plugin 276 * 277 * @return array 278 */ 279 protected function getLanguageFilter() 280 { 281 global $ID; 282 global $INPUT; 283 284 $ns = getNS($ID); 285 $langFilter = $INPUT->arr('lang'); 286 287 /** @var helper_plugin_translation $transplugin */ 288 $transplugin = plugin_load('helper', 'translation'); 289 290 // optional translation detection: use current top namespace if it matches translation config 291 if (empty($langFilter) && $transplugin && $this->getConf('detectTranslation') && $ns) { 292 $topNs = strtok($ns, ':'); 293 if (in_array($topNs, $transplugin->translations)) { 294 $langFilter = [$topNs]; 295 $INPUT->set('lang', $langFilter); 296 } 297 } else if (empty($langFilter) && $transplugin) { 298 // select all available translations 299 $INPUT->set('lang', $transplugin->translations); 300 } 301 302 return $langFilter; 303 } 304 305 /** 306 * Inserts subqueries based on current user's ACLs, none for superusers 307 * 308 * @param \Elastica\Query\BoolQuery $subqueries 309 */ 310 protected function addACLSubqueries($subqueries) 311 { 312 global $USERINFO; 313 global $INFO; 314 315 $groups = array_merge(['ALL'], $USERINFO['grps'] ?: []); 316 317 // no ACL filters for superusers 318 if ($INFO['isadmin']) return; 319 320 // include if group OR user have read permissions, allows for ACLs such as "block @group except user" 321 $includeSubquery = new \Elastica\Query\BoolQuery(); 322 foreach($groups as $group) { 323 $term = new \Elastica\Query\Term(); 324 $term->setTerm('groups_include', $group); 325 $includeSubquery->addShould($term); 326 } 327 if (isset($_SERVER['REMOTE_USER'])) { 328 $userIncludeSubquery = new \Elastica\Query\BoolQuery(); 329 $term = new \Elastica\Query\Term(); 330 $term->setTerm('users_include', $_SERVER['REMOTE_USER']); 331 $userIncludeSubquery->addMust($term); 332 $includeSubquery->addShould($userIncludeSubquery); 333 } 334 $subqueries->addMust($includeSubquery); 335 336 // groups exclusion SHOULD be respected, not MUST, since that would not allow for exceptions 337 $groupExcludeSubquery = new \Elastica\Query\BoolQuery(); 338 foreach($groups as $group) { 339 $term = new \Elastica\Query\Term(); 340 $term->setTerm('groups_exclude', $group); 341 $groupExcludeSubquery->addShould($term); 342 } 343 $excludeSubquery = new \Elastica\Query\BoolQuery(); 344 $excludeSubquery->addMustNot($groupExcludeSubquery); 345 $subqueries->addShould($excludeSubquery); 346 347 // user specific excludes must always be respected 348 if (isset($_SERVER['REMOTE_USER'])) { 349 $term = new \Elastica\Query\Term(); 350 $term->setTerm('users_exclude', $_SERVER['REMOTE_USER']); 351 $subqueries->addMustNot($term); 352 } 353 } 354 355 /** 356 * Prints the introduction text 357 */ 358 protected function print_intro() { 359 global $QUERY; 360 global $ID; 361 global $lang; 362 363 // just reuse the standard search page intro: 364 $intro = p_locale_xhtml('searchpage'); 365 // allow use of placeholder in search intro 366 $pagecreateinfo = ''; 367 if (auth_quickaclcheck($ID) >= AUTH_CREATE) { 368 $pagecreateinfo = sprintf($lang['searchcreatepage'], $QUERY); 369 } 370 $intro = str_replace( 371 ['@QUERY@', '@SEARCH@', '@CREATEPAGEINFO@'], 372 [hsc(rawurlencode($QUERY)), hsc($QUERY), $pagecreateinfo], 373 $intro 374 ); 375 echo $intro; 376 flush(); 377 } 378 379 /** 380 * Output the search results 381 * 382 * @param \Elastica\ResultSet $results 383 * @return bool true when results where shown 384 */ 385 protected function print_results($results) { 386 global $lang; 387 388 // output results 389 $found = $results->getTotalHits(); 390 391 if(!$found) { 392 echo '<h2>' . $lang['nothingfound'] . '</h2>'; 393 return (bool)$found; 394 } 395 396 echo '<dl class="search_results">'; 397 echo '<h2>' . sprintf($this->getLang('totalfound'), $found) . '</h2>'; 398 foreach ($results as $row) { 399 400 /** @var Elastica\Result $row */ 401 $doc = $row->getSource(); 402 $page = $doc['uri']; 403 if (!(page_exists($page) || is_file(mediaFN($page))) || isHiddenPage($page) || auth_quickaclcheck($page) < AUTH_READ) continue; 404 405 // get highlighted title 406 $highlightsTitle = $row->getHighlights()['title'] ?? ''; 407 $title = str_replace( 408 ['ELASTICSEARCH_MARKER_IN', 'ELASTICSEARCH_MARKER_OUT'], 409 ['<strong class="search_hit">', '</strong>'], 410 hsc(join(' … ', (array) $highlightsTitle)) 411 ); 412 if (!$title) $title = hsc($doc['title']); 413 if (!$title) $title = hsc(p_get_first_heading($page)); 414 if (!$title) $title = hsc($page); 415 416 // get highlighted snippet 417 $highlightedSnippets = $row->getHighlights()[$this->getConf('snippets')] ?? []; 418 $snippet = str_replace( 419 ['ELASTICSEARCH_MARKER_IN', 'ELASTICSEARCH_MARKER_OUT'], 420 ['<strong class="search_hit">', '</strong>'], 421 hsc(join(' … ', $highlightedSnippets)) 422 ); 423 if (!$snippet) $snippet = hsc($doc['abstract']); // always fall back to abstract 424 425 // assume page if no doctype is set, because old index won't have doctypes 426 $isPage = empty($doc['doctype']) || $doc['doctype'] === \action_plugin_elasticsearch_indexing::DOCTYPE_PAGE; 427 $href = $isPage ? wl($page) : ml($page); 428 429 echo '<dt>'; 430 if (!$isPage && is_file(DOKU_INC . 'lib/images/fileicons/'. $doc['ext'] .'.png')) { 431 echo sprintf( 432 '<img src="%s" alt="%s" /> ', 433 DOKU_BASE . 'lib/images/fileicons/'. $doc['ext'] .'.png', 434 $doc['ext'] 435 ); 436 } 437 echo '<a href="' . $href . '" class="wikilink1" title="'.hsc($page).'">'; 438 echo $title; 439 echo '</a>'; 440 echo '</dt>'; 441 442 // meta 443 echo '<dd class="meta elastic-resultmeta">'; 444 if (!empty($doc['namespace'])) { 445 echo '<span class="ns">' . $this->getLang('ns') . ' ' . hsc($doc['namespace']) . '</span>'; 446 } 447 if ($doc['modified']) { 448 $lastmod = strtotime($doc['modified']); 449 echo ' <span class="">' . $lang['lastmod'] . ' ' . dformat($lastmod) . '</span>'; 450 } 451 if (!empty($doc['user'])) { 452 echo ' <span class="author">' . $this->getLang('author') . ' ' . userlink($doc['user']) . '</span>'; 453 } 454 echo '</dd>'; 455 456 // snippets 457 echo '<dd class="snippet">'; 458 echo $snippet; 459 echo '</dd>'; 460 461 } 462 echo '</dl>'; 463 464 return (bool) $found; 465 } 466 467 /** 468 * @param \Elastica\ResultSet $result 469 */ 470 protected function print_pagination($result) { 471 global $INPUT; 472 global $QUERY; 473 474 $all = $result->getTotalHits(); 475 $pages = ceil($all / $this->getConf('perpage')); 476 $cur = $INPUT->int('p', 1, true); 477 478 if($pages < 2) return; 479 480 // which pages to show 481 $toshow = [1, 2, $cur, $pages, $pages - 1]; 482 if($cur - 1 > 1) $toshow[] = $cur - 1; 483 if($cur + 1 < $pages) $toshow[] = $cur + 1; 484 $toshow = array_unique($toshow); 485 // fill up to seven, if possible 486 if(count($toshow) < 7) { 487 if($cur < 4) { 488 if($cur + 2 < $pages && count($toshow) < 7) $toshow[] = $cur + 2; 489 if($cur + 3 < $pages && count($toshow) < 7) $toshow[] = $cur + 3; 490 if($cur + 4 < $pages && count($toshow) < 7) $toshow[] = $cur + 4; 491 } else { 492 if($cur - 2 > 1 && count($toshow) < 7) $toshow[] = $cur - 2; 493 if($cur - 3 > 1 && count($toshow) < 7) $toshow[] = $cur - 3; 494 if($cur - 4 > 1 && count($toshow) < 7) $toshow[] = $cur - 4; 495 } 496 } 497 sort($toshow); 498 $showlen = count($toshow); 499 500 echo '<ul class="elastic_pagination">'; 501 if($cur > 1) { 502 echo '<li class="prev">'; 503 echo '<a href="' . wl('', http_build_query(['q' => $QUERY, 'do' => 'search', 'ns' => $INPUT->arr('ns'), 'min' => $INPUT->arr('min'), 'p' => ($cur-1)])) . '">'; 504 echo '«'; 505 echo '</a>'; 506 echo '</li>'; 507 } 508 509 for($i = 0; $i < $showlen; $i++) { 510 if($toshow[$i] == $cur) { 511 echo '<li class="cur">' . $toshow[$i] . '</li>'; 512 } else { 513 echo '<li>'; 514 echo '<a href="' . wl('', http_build_query(['q' => $QUERY, 'do' => 'search', 'ns' => $INPUT->arr('ns'), 'min' => $INPUT->arr('min'), 'p' => $toshow[$i]])) . '">'; 515 echo $toshow[$i]; 516 echo '</a>'; 517 echo '</li>'; 518 } 519 520 // show seperator when a jump follows 521 if(isset($toshow[$i + 1]) && $toshow[$i + 1] - $toshow[$i] > 1) { 522 echo '<li class="sep">…</li>'; 523 } 524 } 525 526 if($cur < $pages) { 527 echo '<li class="next">'; 528 echo '<a href="' . wl('', http_build_query(['q' => $QUERY, 'do' => 'search', 'ns' => $INPUT->arr('ns'), 'min' => $INPUT->arr('min'), 'p' => ($cur+1)])) . '">'; 529 echo '»'; 530 echo '</a>'; 531 echo '</li>'; 532 } 533 534 echo '</ul>'; 535 } 536 537} 538