<?php /** * DokuWiki Plugin elasticsearch (Helper Component) * * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html * @author Kieback&Peter IT <it-support@kieback-peter.de> */ use dokuwiki\Extension\Plugin; use Elastica\Client; use splitbrain\phpcli\Exception; use Elastica\Index; use Elastica\Response; use Elastica\Mapping; use dokuwiki\Extension\Event; require_once __DIR__ . '/../vendor/autoload.php'; /** * Access to the Elastica client */ class helper_plugin_elasticsearch_client extends Plugin { /** @var array Map of ISO codes to Elasticsearch analyzer names */ protected const ANALYZERS = [ 'ar' => 'arabic', 'bg' => 'bulgarian', 'bn' => 'bengali', 'ca' => 'catalan', 'cs' => 'czech', 'da' => 'danish', 'de' => 'german', 'el' => 'greek', 'en' => 'english', 'es' => 'spanish', 'eu' => 'basque', 'fa' => 'persian', 'fi' => 'finnish', 'fr' => 'french', 'ga' => 'irish', 'gl' => 'galician', 'hi' => 'hindi', 'hu' => 'hungarian', 'hy' => 'armenian', 'id' => 'indonesian', 'it' => 'italian', 'lt' => 'lithuanian', 'lv' => 'latvian', 'nl' => 'dutch', 'no' => 'norwegian', 'pt' => 'portuguese', 'ro' => 'romanian', 'ru' => 'russian', 'sv' => 'swedish', 'th' => 'thai', 'tr' => 'turkish', ]; /** * @var Client $elasticaClient */ protected $elasticaClient; /** * Connects to the elastica servers and returns the client object * * @return Client */ public function connect() { if (!is_null($this->elasticaClient)) return $this->elasticaClient; // security settings $username = $this->getConf('username'); $password = $this->getConf('password'); // parse servers config into DSN array $dsn = ['servers' => []]; $servers = $this->getConf('servers'); $lines = explode("\n", $servers); foreach ($lines as $line) { [$host, $proxy] = array_pad(explode(',', $line, 2), 2, null); [$host, $port] = explode(':', $host, 2); $host = trim($host); $port = (int) trim($port); if (!$port) $port = 80; $proxy = trim($proxy); if (!$host) continue; $dsn['servers'][] = [ 'host' => $host, 'port' => $port, 'proxy' => $proxy, 'username' => $username, 'password' => $password ]; } $this->elasticaClient = new Client($dsn); return $this->elasticaClient; } /** * Create the index * * @param bool $clear rebuild index * @throws Exception */ public function createIndex($clear = false) { $client = $this->connect(); $index = $client->getIndex($this->getConf('indexname')); if ($index->create([], $clear)->hasError()) { throw new Exception("Failed to create index!"); } if ($this->createMappings($index)->hasError()) { throw new Exception("Failed to create field mappings!"); } } /** * Get the correct analyzer for the given language code * * Returns the standard analalyzer for unknown languages * * @param string $lang * @return string */ protected function getLanguageAnalyzer($lang) { return self::ANALYZERS[$lang] ?? 'standard'; } /** * Define mappings for custom fields * * All languages get their separate fields configured with appropriate linguistic analyzers. * * ACL fields require custom mappings as well, or else they could break the search. They * might contain word-split tokens such as underscores and so must not * be indexed using the standard text analyzer. * * Fields containing metadata are configured as sparsely as possible, no analyzers are necessary. * * Plugins may provide their own fields via PLUGIN_ELASTICSEARCH_CREATEMAPPING event. * * @param Index $index * @return Response */ protected function createMappings(Index $index): Response { $langProps = $this->getLangProps(); // document permissions $aclProps = [ 'groups_include' => [ 'type' => 'keyword', ], 'groups_exclude' => [ 'type' => 'keyword', ], 'users_include' => [ 'type' => 'keyword', ], 'users_exclude' => [ 'type' => 'keyword', ], ]; // differentiate media types $mediaProps = [ 'doctype' => [ 'type' => 'keyword', ], 'mime' => [ 'type' => 'keyword', ], 'ext' => [ 'type' => 'keyword', ], ]; // additional fields which require something other than type text, standard analyzer $additionalProps = [ 'uri' => [ 'type' => 'text', 'analyzer' => 'pattern', // because colons surrounded by letters are part of word in standard analyzer ], ]; // plugins can supply their own mappings: ['plugin' => ['type' => 'keyword'] ] $pluginProps = []; Event::createAndTrigger('PLUGIN_ELASTICSEARCH_CREATEMAPPING', $pluginProps); $props = array_merge($langProps, $aclProps, $mediaProps, $additionalProps); foreach ($pluginProps as $fields) { $props = array_merge($props, $fields); } $mapping = new Mapping(); $mapping->setProperties($props); return $mapping->send($index); } /** * Language mappings recognize languages defined by translation plugin * * @return array */ protected function getLangProps() { global $conf; // default language $langprops = [ 'content' => [ 'type' => 'text', 'fields' => [ $conf['lang'] => [ 'type' => 'text', 'analyzer' => $this->getLanguageAnalyzer($conf['lang']) ], ] ] ]; // other languages as configured in the translation plugin /** @var helper_plugin_translation $transplugin */ $transplugin = plugin_load('helper', 'translation'); if ($transplugin) { $translations = array_diff(array_filter($transplugin->translations), [$conf['lang']]); if ($translations) foreach ($translations as $lang) { $langprops['content']['fields'][$lang] = [ 'type' => 'text', 'analyzer' => $this->getLanguageAnalyzer($lang) ]; } } return $langprops; } } // vim:ts=4:sw=4:et: