1<?php 2 3/** 4 * DokuWiki Plugin elasticsearch (Helper Component) 5 * 6 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 7 * @author Kieback&Peter IT <it-support@kieback-peter.de> 8 */ 9 10use dokuwiki\Extension\Plugin; 11use Elastica\Client; 12use splitbrain\phpcli\Exception; 13use Elastica\Index; 14use Elastica\Response; 15use Elastica\Mapping; 16use dokuwiki\Extension\Event; 17 18require_once __DIR__ . '/../vendor/autoload.php'; 19 20/** 21 * Access to the Elastica client 22 */ 23class helper_plugin_elasticsearch_client extends Plugin 24{ 25 /** @var array Map of ISO codes to Elasticsearch analyzer names */ 26 protected const ANALYZERS = [ 27 'ar' => 'arabic', 28 'bg' => 'bulgarian', 29 'bn' => 'bengali', 30 'ca' => 'catalan', 31 'cs' => 'czech', 32 'da' => 'danish', 33 'de' => 'german', 34 'el' => 'greek', 35 'en' => 'english', 36 'es' => 'spanish', 37 'eu' => 'basque', 38 'fa' => 'persian', 39 'fi' => 'finnish', 40 'fr' => 'french', 41 'ga' => 'irish', 42 'gl' => 'galician', 43 'hi' => 'hindi', 44 'hu' => 'hungarian', 45 'hy' => 'armenian', 46 'id' => 'indonesian', 47 'it' => 'italian', 48 'lt' => 'lithuanian', 49 'lv' => 'latvian', 50 'nl' => 'dutch', 51 'no' => 'norwegian', 52 'pt' => 'portuguese', 53 'ro' => 'romanian', 54 'ru' => 'russian', 55 'sv' => 'swedish', 56 'th' => 'thai', 57 'tr' => 'turkish', 58 ]; 59 /** 60 * @var Client $elasticaClient 61 */ 62 protected $elasticaClient; 63 64 /** 65 * Connects to the elastica servers and returns the client object 66 * 67 * @return Client 68 */ 69 public function connect() 70 { 71 if (!is_null($this->elasticaClient)) return $this->elasticaClient; 72 // security settings 73 $username = $this->getConf('username'); 74 $password = $this->getConf('password'); 75 // parse servers config into DSN array 76 $dsn = ['servers' => []]; 77 $servers = $this->getConf('servers'); 78 $lines = explode("\n", $servers); 79 foreach ($lines as $line) { 80 [$host, $proxy] = array_pad(explode(',', $line, 2), 2, null); 81 [$host, $port] = explode(':', $host, 2); 82 $host = trim($host); 83 $port = (int) trim($port); 84 if (!$port) $port = 80; 85 $proxy = trim($proxy); 86 if (!$host) continue; 87 $dsn['servers'][] = [ 88 'host' => $host, 89 'port' => $port, 90 'proxy' => $proxy, 91 'username' => $username, 92 'password' => $password 93 ]; 94 } 95 96 $this->elasticaClient = new Client($dsn); 97 return $this->elasticaClient; 98 } 99 100 /** 101 * Create the index 102 * 103 * @param bool $clear rebuild index 104 * @throws Exception 105 */ 106 public function createIndex($clear = false) 107 { 108 $client = $this->connect(); 109 $index = $client->getIndex($this->getConf('indexname')); 110 111 if ($index->create([], $clear)->hasError()) { 112 throw new Exception("Failed to create index!"); 113 } 114 115 if ($this->createMappings($index)->hasError()) { 116 throw new Exception("Failed to create field mappings!"); 117 } 118 } 119 120 /** 121 * Get the correct analyzer for the given language code 122 * 123 * Returns the standard analalyzer for unknown languages 124 * 125 * @param string $lang 126 * @return string 127 */ 128 protected function getLanguageAnalyzer($lang) 129 { 130 return self::ANALYZERS[$lang] ?? 'standard'; 131 } 132 133 /** 134 * Define mappings for custom fields 135 * 136 * All languages get their separate fields configured with appropriate linguistic analyzers. 137 * 138 * ACL fields require custom mappings as well, or else they could break the search. They 139 * might contain word-split tokens such as underscores and so must not 140 * be indexed using the standard text analyzer. 141 * 142 * Fields containing metadata are configured as sparsely as possible, no analyzers are necessary. 143 * 144 * Plugins may provide their own fields via PLUGIN_ELASTICSEARCH_CREATEMAPPING event. 145 * 146 * @param Index $index 147 * @return Response 148 */ 149 protected function createMappings(Index $index): Response 150 { 151 $langProps = $this->getLangProps(); 152 153 // document permissions 154 $aclProps = [ 155 'groups_include' => [ 156 'type' => 'keyword', 157 ], 158 'groups_exclude' => [ 159 'type' => 'keyword', 160 ], 161 'users_include' => [ 162 'type' => 'keyword', 163 ], 164 'users_exclude' => [ 165 'type' => 'keyword', 166 ], 167 ]; 168 169 // differentiate media types 170 $mediaProps = [ 171 'doctype' => [ 172 'type' => 'keyword', 173 ], 174 'mime' => [ 175 'type' => 'keyword', 176 ], 177 'ext' => [ 178 'type' => 'keyword', 179 ], 180 ]; 181 182 // additional fields which require something other than type text, standard analyzer 183 $additionalProps = [ 184 'uri' => [ 185 'type' => 'text', 186 'analyzer' => 'pattern', // because colons surrounded by letters are part of word in standard analyzer 187 ], 188 ]; 189 190 // plugins can supply their own mappings: ['plugin' => ['type' => 'keyword'] ] 191 $pluginProps = []; 192 Event::createAndTrigger('PLUGIN_ELASTICSEARCH_CREATEMAPPING', $pluginProps); 193 194 $props = array_merge($langProps, $aclProps, $mediaProps, $additionalProps); 195 foreach ($pluginProps as $fields) { 196 $props = array_merge($props, $fields); 197 } 198 199 $mapping = new Mapping(); 200 $mapping->setProperties($props); 201 return $mapping->send($index); 202 } 203 204 /** 205 * Language mappings recognize languages defined by translation plugin 206 * 207 * @return array 208 */ 209 protected function getLangProps() 210 { 211 global $conf; 212 213 // default language 214 $langprops = [ 215 'content' => [ 216 'type' => 'text', 217 'fields' => [ 218 $conf['lang'] => [ 219 'type' => 'text', 220 'analyzer' => $this->getLanguageAnalyzer($conf['lang']) 221 ], 222 ] 223 ] 224 ]; 225 226 // other languages as configured in the translation plugin 227 /** @var helper_plugin_translation $transplugin */ 228 $transplugin = plugin_load('helper', 'translation'); 229 if ($transplugin) { 230 $translations = array_diff(array_filter($transplugin->translations), [$conf['lang']]); 231 if ($translations) foreach ($translations as $lang) { 232 $langprops['content']['fields'][$lang] = [ 233 'type' => 'text', 234 'analyzer' => $this->getLanguageAnalyzer($lang) 235 ]; 236 } 237 } 238 239 return $langprops; 240 } 241} 242 243// vim:ts=4:sw=4:et: 244