1<?php 2/** 3 * DokuWiki Plugin elasticsearch (Helper Component) 4 * 5 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 6 * @author Kieback&Peter IT <it-support@kieback-peter.de> 7 */ 8 9use dokuwiki\Extension\Event; 10 11require_once dirname(__FILE__) . '/../vendor/autoload.php'; 12 13/** 14 * Access to the Elastica client 15 */ 16class helper_plugin_elasticsearch_client extends DokuWiki_Plugin { 17 18 /** @var array Map of ISO codes to Elasticsearch analyzer names */ 19 const ANALYZERS = [ 20 'ar' => 'arabic', 21 'bg' => 'bulgarian', 22 'bn' => 'bengali', 23 'ca' => 'catalan', 24 'cs' => 'czech', 25 'da' => 'danish', 26 'de' => 'german', 27 'el' => 'greek', 28 'en' => 'english', 29 'es' => 'spanish', 30 'eu' => 'basque', 31 'fa' => 'persian', 32 'fi' => 'finnish', 33 'fr' => 'french', 34 'ga' => 'irish', 35 'gl' => 'galician', 36 'hi' => 'hindi', 37 'hu' => 'hungarian', 38 'hy' => 'armenian', 39 'id' => 'indonesian', 40 'it' => 'italian', 41 'lt' => 'lithuanian', 42 'lv' => 'latvian', 43 'nl' => 'dutch', 44 'no' => 'norwegian', 45 'pt' => 'portuguese', 46 'ro' => 'romanian', 47 'ru' => 'russian', 48 'sv' => 'swedish', 49 'th' => 'thai', 50 'tr' => 'turkish', 51 ]; 52 /** 53 * @var \Elastica\Client $elasticaClient 54 */ 55 protected $elasticaClient = null; 56 57 /** 58 * Connects to the elastica servers and returns the client object 59 * 60 * @return \Elastica\Client 61 */ 62 public function connect() { 63 if (!is_null($this->elasticaClient)) return $this->elasticaClient; 64 65 // parse servers config into DSN array 66 $dsn = ['servers' => []]; 67 $servers = $this->getConf('servers'); 68 $lines = explode("\n", $servers); 69 foreach ($lines as $line) { 70 list($host, $proxy) = array_pad(explode(',', $line, 2),2, null); 71 list($host, $port) = explode(':', $host, 2); 72 $host = trim($host); 73 $port = (int) trim($port); 74 if (!$port) $port = 80; 75 $proxy = trim($proxy); 76 if (!$host) continue; 77 $dsn['servers'][] = compact('host', 'port', 'proxy'); 78 } 79 80 $this->elasticaClient = new \Elastica\Client($dsn); 81 return $this->elasticaClient; 82 } 83 84 /** 85 * Create the index 86 * 87 * @param bool $clear rebuild index 88 * @return \Elastica\Response 89 */ 90 public function createIndex($clear=false) { 91 $client = $this->connect(); 92 $index = $client->getIndex($this->getConf('indexname')); 93 94 $index->create([], $clear); 95 96 $response = $this->mapNonstandardFields($index); 97 if ($response->hasError()) return $response; 98 $response = $this->mapAccessFields($index); 99 if ($response->hasError()) return $response; 100 101 $pluginMappings = []; 102 // plugins can supply their own mappings: ['plugin' => ['type' => 'keyword'] ] 103 Event::createAndTrigger('PLUGIN_ELASTICSEARCH_CREATEMAPPING', $pluginMappings); 104 105 if (!empty($pluginMappings)) { 106 foreach ($pluginMappings as $mapping) { 107 $response = $this->mapPluginFields($index, $mapping); 108 if ($response->hasError()) return $response; 109 } 110 } 111 112 return $response; 113 } 114 115 /** 116 * Create the field mapping: language analyzers for the content field 117 * 118 * @return \Elastica\Response 119 */ 120 public function createLanguageMapping() { 121 global $conf; 122 123 $client = $this->connect(); 124 $index = $client->getIndex($this->getConf('indexname')); 125 $type = $index->getType($this->getConf('documenttype')); 126 127 $langFields = ['title', 'abstract', 'content', 'syntax']; 128 129 foreach ($langFields as $langField) { 130 // default language 131 $props[$langField] = [ 132 'type' => 'text', 133 'fields' => [ 134 $conf['lang'] => [ 135 'type' => 'text', 136 'analyzer' => $this->getLanguageAnalyzer($conf['lang']) 137 ], 138 ] 139 ]; 140 141 // other languages as configured in the translation plugin 142 /** @var helper_plugin_translation $transplugin */ 143 $transplugin = plugin_load('helper', 'translation'); 144 if ($transplugin) { 145 $translations = array_diff(array_filter($transplugin->translations), [$conf['lang']]); 146 if ($translations) foreach ($translations as $lang) { 147 $props[$langField]['fields'][$lang] = [ 148 'type' => 'text', 149 'analyzer' => $this->getLanguageAnalyzer($lang) 150 ]; 151 } 152 } 153 } 154 155 $mapping = new \Elastica\Type\Mapping(); 156 $mapping->setType($type); 157 $mapping->setProperties($props); 158 $response = $mapping->send(); 159 return $response; 160 } 161 162 /** 163 * Get the correct analyzer for the given language code 164 * 165 * Returns the standard analalyzer for unknown languages 166 * 167 * @param string $lang 168 * @return string 169 */ 170 protected function getLanguageAnalyzer($lang) 171 { 172 if (isset(self::ANALYZERS[$lang])) return self::ANALYZERS[$lang]; 173 return 'standard'; 174 } 175 176 /** 177 * Define special mappings for ACL fields 178 * 179 * Standard mapping could break the search because ACL fields 180 * might contain word-split tokens such as underscores and so must not 181 * be indexed using the standard text analyzer. 182 * 183 * @param \Elastica\Index $index 184 * @return \Elastica\Response 185 */ 186 protected function mapAccessFields(\Elastica\Index $index): \Elastica\Response 187 { 188 $type = $index->getType($this->getConf('documenttype')); 189 $props = [ 190 'groups_include' => [ 191 'type' => 'keyword', 192 ], 193 'groups_exclude' => [ 194 'type' => 'keyword', 195 ], 196 'users_include' => [ 197 'type' => 'keyword', 198 ], 199 'users_exclude' => [ 200 'type' => 'keyword', 201 ], 202 ]; 203 204 $mapping = new \Elastica\Type\Mapping(); 205 $mapping->setType($type); 206 $mapping->setProperties($props); 207 return $mapping->send(); 208 } 209 210 /** 211 * Add mappings provided by plugins 212 * via PLUGIN_ELASTICSEARCH_CREATEMAPPING event 213 * 214 * @param \Elastica\Index $index 215 * @param array $props 216 * @return \Elastica\Response 217 */ 218 public function mapPluginFields(\Elastica\Index $index, Array $props): \Elastica\Response 219 { 220 $type = $index->getType($this->getConf('documenttype')); 221 222 $mapping = new \Elastica\Type\Mapping(); 223 $mapping->setType($type); 224 $mapping->setProperties($props); 225 return $mapping->send(); 226 } 227 228 /** 229 * Explicitly map fields which require something other that 230 * the default: type text, standard analyzer 231 * 232 * @param \Elastica\Index $index 233 * @return \Elastica\Response 234 */ 235 protected function mapNonstandardFields(\Elastica\Index $index): \Elastica\Response 236 { 237 $type = $index->getType($this->getConf('documenttype')); 238 239 $props = [ 240 'uri' => [ 241 'type' => 'text', 242 'analyzer' => 'pattern', // because colons surrounded by letters are part of word in standard analyzer 243 ], 244 ]; 245 246 $mapping = new \Elastica\Type\Mapping(); 247 $mapping->setType($type); 248 $mapping->setProperties($props); 249 return $mapping->send(); 250 } 251} 252 253// vim:ts=4:sw=4:et: 254