1<?php 2/** 3 * DokuWiki Plugin elasticsearch (Helper Component) 4 * 5 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 6 * @author Kieback&Peter IT <it-support@kieback-peter.de> 7 */ 8 9use dokuwiki\Extension\Event; 10 11require_once dirname(__FILE__) . '/../vendor/autoload.php'; 12 13/** 14 * Access to the Elastica client 15 */ 16class helper_plugin_elasticsearch_client extends DokuWiki_Plugin { 17 18 /** @var array Map of ISO codes to Elasticsearch analyzer names */ 19 const ANALYZERS = [ 20 'ar' => 'arabic', 21 'bg' => 'bulgarian', 22 'bn' => 'bengali', 23 'ca' => 'catalan', 24 'cs' => 'czech', 25 'da' => 'danish', 26 'de' => 'german', 27 'el' => 'greek', 28 'en' => 'english', 29 'es' => 'spanish', 30 'eu' => 'basque', 31 'fa' => 'persian', 32 'fi' => 'finnish', 33 'fr' => 'french', 34 'ga' => 'irish', 35 'gl' => 'galician', 36 'hi' => 'hindi', 37 'hu' => 'hungarian', 38 'hy' => 'armenian', 39 'id' => 'indonesian', 40 'it' => 'italian', 41 'lt' => 'lithuanian', 42 'lv' => 'latvian', 43 'nl' => 'dutch', 44 'no' => 'norwegian', 45 'pt' => 'portuguese', 46 'ro' => 'romanian', 47 'ru' => 'russian', 48 'sv' => 'swedish', 49 'th' => 'thai', 50 'tr' => 'turkish', 51 ]; 52 /** 53 * @var \Elastica\Client $elasticaClient 54 */ 55 protected $elasticaClient = null; 56 57 /** 58 * Connects to the elastica servers and returns the client object 59 * 60 * @return \Elastica\Client 61 */ 62 public function connect() { 63 if (!is_null($this->elasticaClient)) return $this->elasticaClient; 64 // security settings 65 $username = $this->getConf('username'); 66 $password = $this->getConf('password'); 67 // parse servers config into DSN array 68 $dsn = ['servers' => []]; 69 $servers = $this->getConf('servers'); 70 $lines = explode("\n", $servers); 71 foreach ($lines as $line) { 72 list($host, $proxy) = array_pad(explode(',', $line, 2),2, null); 73 list($host, $port) = explode(':', $host, 2); 74 $host = trim($host); 75 $port = (int) trim($port); 76 if (!$port) $port = 80; 77 $proxy = trim($proxy); 78 if (!$host) continue; 79 $dsn['servers'][] = compact('host', 'port', 'proxy', 'username', 'password'); 80 } 81 82 $this->elasticaClient = new \Elastica\Client($dsn); 83 return $this->elasticaClient; 84 } 85 86 /** 87 * Create the index 88 * 89 * @param bool $clear rebuild index 90 * @throws \splitbrain\phpcli\Exception 91 */ 92 public function createIndex($clear = false) { 93 $client = $this->connect(); 94 $index = $client->getIndex($this->getConf('indexname')); 95 96 if ($index->create([], $clear)->hasError()) { 97 throw new \splitbrain\phpcli\Exception("Failed to create index!"); 98 } 99 100 if ($this->createMappings($index)->hasError()) { 101 throw new \splitbrain\phpcli\Exception("Failed to create field mappings!"); 102 } 103 } 104 105 /** 106 * Get the correct analyzer for the given language code 107 * 108 * Returns the standard analalyzer for unknown languages 109 * 110 * @param string $lang 111 * @return string 112 */ 113 protected function getLanguageAnalyzer($lang) 114 { 115 if (isset(self::ANALYZERS[$lang])) return self::ANALYZERS[$lang]; 116 return 'standard'; 117 } 118 119 /** 120 * Define mappings for custom fields 121 * 122 * All languages get their separate fields configured with appropriate linguistic analyzers. 123 * 124 * ACL fields require custom mappings as well, or else they could break the search. They 125 * might contain word-split tokens such as underscores and so must not 126 * be indexed using the standard text analyzer. 127 * 128 * Fields containing metadata are configured as sparsely as possible, no analyzers are necessary. 129 * 130 * Plugins may provide their own fields via PLUGIN_ELASTICSEARCH_CREATEMAPPING event. 131 * 132 * @param \Elastica\Index $index 133 * @return \Elastica\Response 134 */ 135 protected function createMappings(\Elastica\Index $index): \Elastica\Response 136 { 137 $langProps = $this->getLangProps(); 138 139 // document permissions 140 $aclProps = [ 141 'groups_include' => [ 142 'type' => 'keyword', 143 ], 144 'groups_exclude' => [ 145 'type' => 'keyword', 146 ], 147 'users_include' => [ 148 'type' => 'keyword', 149 ], 150 'users_exclude' => [ 151 'type' => 'keyword', 152 ], 153 ]; 154 155 // differentiate media types 156 $mediaProps = [ 157 'doctype' => [ 158 'type' => 'keyword', 159 ], 160 'mime' => [ 161 'type' => 'keyword', 162 ], 163 'ext' => [ 164 'type' => 'keyword', 165 ], 166 ]; 167 168 // additional fields which require something other than type text, standard analyzer 169 $additionalProps = [ 170 'uri' => [ 171 'type' => 'text', 172 'analyzer' => 'pattern', // because colons surrounded by letters are part of word in standard analyzer 173 ], 174 ]; 175 176 // plugins can supply their own mappings: ['plugin' => ['type' => 'keyword'] ] 177 $pluginProps = []; 178 Event::createAndTrigger('PLUGIN_ELASTICSEARCH_CREATEMAPPING', $pluginProps); 179 180 $props = array_merge($langProps, $aclProps, $mediaProps, $additionalProps); 181 foreach ($pluginProps as $plugin => $fields) { 182 $props = array_merge($props, $fields); 183 } 184 185 $mapping = new \Elastica\Mapping(); 186 $mapping->setProperties($props); 187 return $mapping->send($index); 188 } 189 190 /** 191 * Language mappings recognize languages defined by translation plugin 192 * 193 * @return array 194 */ 195 protected function getLangProps() 196 { 197 global $conf; 198 199 // default language 200 $langprops = [ 201 'content' => [ 202 'type' => 'text', 203 'fields' => [ 204 $conf['lang'] => [ 205 'type' => 'text', 206 'analyzer' => $this->getLanguageAnalyzer($conf['lang']) 207 ], 208 ] 209 ] 210 ]; 211 212 // other languages as configured in the translation plugin 213 /** @var helper_plugin_translation $transplugin */ 214 $transplugin = plugin_load('helper', 'translation'); 215 if ($transplugin) { 216 $translations = array_diff(array_filter($transplugin->translations), [$conf['lang']]); 217 if ($translations) foreach ($translations as $lang) { 218 $langprops['content']['fields'][$lang] = [ 219 'type' => 'text', 220 'analyzer' => $this->getLanguageAnalyzer($lang) 221 ]; 222 } 223 } 224 225 return $langprops; 226 } 227} 228 229// vim:ts=4:sw=4:et: 230