1<?php 2/** 3 * DokuWiki Plugin elasticsearch (Helper Component) 4 * 5 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 6 * @author Kieback&Peter IT <it-support@kieback-peter.de> 7 */ 8 9use dokuwiki\Extension\Event; 10 11require_once dirname(__FILE__) . '/../vendor/autoload.php'; 12 13/** 14 * Access to the Elastica client 15 */ 16class helper_plugin_elasticsearch_client extends DokuWiki_Plugin { 17 18 /** @var array Map of ISO codes to Elasticsearch analyzer names */ 19 const ANALYZERS = [ 20 'ar' => 'arabic', 21 'bg' => 'bulgarian', 22 'bn' => 'bengali', 23 'ca' => 'catalan', 24 'cs' => 'czech', 25 'da' => 'danish', 26 'de' => 'german', 27 'el' => 'greek', 28 'en' => 'english', 29 'es' => 'spanish', 30 'eu' => 'basque', 31 'fa' => 'persian', 32 'fi' => 'finnish', 33 'fr' => 'french', 34 'ga' => 'irish', 35 'gl' => 'galician', 36 'hi' => 'hindi', 37 'hu' => 'hungarian', 38 'hy' => 'armenian', 39 'id' => 'indonesian', 40 'it' => 'italian', 41 'lt' => 'lithuanian', 42 'lv' => 'latvian', 43 'nl' => 'dutch', 44 'no' => 'norwegian', 45 'pt' => 'portuguese', 46 'ro' => 'romanian', 47 'ru' => 'russian', 48 'sv' => 'swedish', 49 'th' => 'thai', 50 'tr' => 'turkish', 51 ]; 52 /** 53 * @var \Elastica\Client $elasticaClient 54 */ 55 protected $elasticaClient = null; 56 57 /** 58 * Connects to the elastica servers and returns the client object 59 * 60 * @return \Elastica\Client 61 */ 62 public function connect() { 63 if (!is_null($this->elasticaClient)) return $this->elasticaClient; 64 65 // parse servers config into DSN array 66 $dsn = ['servers' => []]; 67 $servers = $this->getConf('servers'); 68 $lines = explode("\n", $servers); 69 foreach ($lines as $line) { 70 list($host, $proxy) = array_pad(explode(',', $line, 2),2, null); 71 list($host, $port) = explode(':', $host, 2); 72 $host = trim($host); 73 $port = (int) trim($port); 74 if (!$port) $port = 80; 75 $proxy = trim($proxy); 76 if (!$host) continue; 77 $dsn['servers'][] = compact('host', 'port', 'proxy'); 78 } 79 80 $this->elasticaClient = new \Elastica\Client($dsn); 81 return $this->elasticaClient; 82 } 83 84 /** 85 * Create the index 86 * 87 * @param bool $clear rebuild index 88 * @throws \splitbrain\phpcli\Exception 89 */ 90 public function createIndex($clear = false) { 91 $client = $this->connect(); 92 $index = $client->getIndex($this->getConf('indexname')); 93 94 if ($index->create([], $clear)->hasError()) { 95 throw new \splitbrain\phpcli\Exception("Failed to create index!"); 96 } 97 98 if ($this->createMappings($index)->hasError()) { 99 throw new \splitbrain\phpcli\Exception("Failed to create field mappings!"); 100 } 101 } 102 103 /** 104 * Get the correct analyzer for the given language code 105 * 106 * Returns the standard analalyzer for unknown languages 107 * 108 * @param string $lang 109 * @return string 110 */ 111 protected function getLanguageAnalyzer($lang) 112 { 113 if (isset(self::ANALYZERS[$lang])) return self::ANALYZERS[$lang]; 114 return 'standard'; 115 } 116 117 /** 118 * Define mappings for custom fields 119 * 120 * All languages get their separate fields configured with appropriate linguistic analyzers. 121 * 122 * ACL fields require custom mappings as well, or else they could break the search. They 123 * might contain word-split tokens such as underscores and so must not 124 * be indexed using the standard text analyzer. 125 * 126 * Fields containing metadata are configured as sparsely as possible, no analyzers are necessary. 127 * 128 * Plugins may provide their own fields via PLUGIN_ELASTICSEARCH_CREATEMAPPING event. 129 * 130 * @param \Elastica\Index $index 131 * @return \Elastica\Response 132 */ 133 protected function createMappings(\Elastica\Index $index): \Elastica\Response 134 { 135 $langProps = $this->getLangProps(); 136 137 // document permissions 138 $aclProps = [ 139 'groups_include' => [ 140 'type' => 'keyword', 141 ], 142 'groups_exclude' => [ 143 'type' => 'keyword', 144 ], 145 'users_include' => [ 146 'type' => 'keyword', 147 ], 148 'users_exclude' => [ 149 'type' => 'keyword', 150 ], 151 ]; 152 153 // differentiate media types 154 $mediaProps = [ 155 'doctype' => [ 156 'type' => 'keyword', 157 ], 158 'mime' => [ 159 'type' => 'keyword', 160 ], 161 'ext' => [ 162 'type' => 'keyword', 163 ], 164 ]; 165 166 // additional fields which require something other than type text, standard analyzer 167 $additionalProps = [ 168 'uri' => [ 169 'type' => 'text', 170 'analyzer' => 'pattern', // because colons surrounded by letters are part of word in standard analyzer 171 ], 172 ]; 173 174 // plugins can supply their own mappings: ['plugin' => ['type' => 'keyword'] ] 175 $pluginProps = []; 176 Event::createAndTrigger('PLUGIN_ELASTICSEARCH_CREATEMAPPING', $pluginProps); 177 178 $props = array_merge($langProps, $aclProps, $mediaProps, $additionalProps); 179 foreach ($pluginProps as $plugin => $fields) { 180 $props = array_merge($props, $fields); 181 } 182 183 $mapping = new \Elastica\Mapping(); 184 $mapping->setProperties($props); 185 return $mapping->send($index); 186 } 187 188 /** 189 * Language mappings recognize languages defined by translation plugin 190 * 191 * @return array 192 */ 193 protected function getLangProps() 194 { 195 global $conf; 196 197 // default language 198 $langprops = [ 199 'content' => [ 200 'type' => 'text', 201 'fields' => [ 202 $conf['lang'] => [ 203 'type' => 'text', 204 'analyzer' => $this->getLanguageAnalyzer($conf['lang']) 205 ], 206 ] 207 ] 208 ]; 209 210 // other languages as configured in the translation plugin 211 /** @var helper_plugin_translation $transplugin */ 212 $transplugin = plugin_load('helper', 'translation'); 213 if ($transplugin) { 214 $translations = array_diff(array_filter($transplugin->translations), [$conf['lang']]); 215 if ($translations) foreach ($translations as $lang) { 216 $langprops['content']['fields'][$lang] = [ 217 'type' => 'text', 218 'analyzer' => $this->getLanguageAnalyzer($lang) 219 ]; 220 } 221 } 222 223 return $langprops; 224 } 225} 226 227// vim:ts=4:sw=4:et: 228