1<?php
2/**
3 * DokuWiki Plugin elasticsearch (Helper Component)
4 *
5 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
6 * @author  Kieback&Peter IT <it-support@kieback-peter.de>
7 */
8
9use dokuwiki\Extension\Event;
10
11require_once dirname(__FILE__) . '/../vendor/autoload.php';
12
13/**
14 * Access to the Elastica client
15 */
16class helper_plugin_elasticsearch_client extends DokuWiki_Plugin {
17
18    /** @var array Map of ISO codes to Elasticsearch analyzer names */
19    const ANALYZERS = [
20        'ar' => 'arabic',
21        'bg' => 'bulgarian',
22        'bn' => 'bengali',
23        'ca' => 'catalan',
24        'cs' => 'czech',
25        'da' => 'danish',
26        'de' => 'german',
27        'el' => 'greek',
28        'en' => 'english',
29        'es' => 'spanish',
30        'eu' => 'basque',
31        'fa' => 'persian',
32        'fi' => 'finnish',
33        'fr' => 'french',
34        'ga' => 'irish',
35        'gl' => 'galician',
36        'hi' => 'hindi',
37        'hu' => 'hungarian',
38        'hy' => 'armenian',
39        'id' => 'indonesian',
40        'it' => 'italian',
41        'lt' => 'lithuanian',
42        'lv' => 'latvian',
43        'nl' => 'dutch',
44        'no' => 'norwegian',
45        'pt' => 'portuguese',
46        'ro' => 'romanian',
47        'ru' => 'russian',
48        'sv' => 'swedish',
49        'th' => 'thai',
50        'tr' => 'turkish',
51        ];
52    /**
53     * @var \Elastica\Client $elasticaClient
54     */
55    protected $elasticaClient = null;
56
57    /**
58     * Connects to the elastica servers and returns the client object
59     *
60     * @return \Elastica\Client
61     */
62    public function connect() {
63        if (!is_null($this->elasticaClient)) return $this->elasticaClient;
64
65        // parse servers config into DSN array
66        $dsn = ['servers' => []];
67        $servers = $this->getConf('servers');
68        $lines   = explode("\n", $servers);
69        foreach ($lines as $line) {
70            list($host, $proxy) = array_pad(explode(',', $line, 2),2, null);
71            list($host, $port) = explode(':', $host, 2);
72            $host = trim($host);
73            $port = (int) trim($port);
74            if (!$port) $port = 80;
75            $proxy = trim($proxy);
76            if (!$host) continue;
77            $dsn['servers'][] = compact('host', 'port', 'proxy');
78        }
79
80        $this->elasticaClient = new \Elastica\Client($dsn);
81        return $this->elasticaClient;
82    }
83
84    /**
85     * Create the index
86     *
87     * @param bool $clear rebuild index
88     * @throws \splitbrain\phpcli\Exception
89     */
90    public function createIndex($clear = false) {
91        $client = $this->connect();
92        $index = $client->getIndex($this->getConf('indexname'));
93
94        if ($index->create([], $clear)->hasError()) {
95            throw new \splitbrain\phpcli\Exception("Failed to create index!");
96        }
97
98        if ($this->createMappings($index)->hasError()) {
99            throw new \splitbrain\phpcli\Exception("Failed to create field mappings!");
100        }
101    }
102
103    /**
104     * Get the correct analyzer for the given language code
105     *
106     * Returns the standard analalyzer for unknown languages
107     *
108     * @param string $lang
109     * @return string
110     */
111    protected function getLanguageAnalyzer($lang)
112    {
113        if (isset(self::ANALYZERS[$lang])) return self::ANALYZERS[$lang];
114        return 'standard';
115    }
116
117    /**
118     * Define mappings for custom fields
119     *
120     * All languages get their separate fields configured with appropriate linguistic analyzers.
121     *
122     * ACL fields require custom mappings as well, or else they could break the search. They
123     * might contain word-split tokens such as underscores and so must not
124     * be indexed using the standard text analyzer.
125     *
126     * Fields containing metadata are configured as sparsely as possible, no analyzers are necessary.
127     *
128     * Plugins may provide their own fields via PLUGIN_ELASTICSEARCH_CREATEMAPPING event.
129     *
130     * @param \Elastica\Index $index
131     * @return \Elastica\Response
132     */
133    protected function createMappings(\Elastica\Index $index): \Elastica\Response
134    {
135        $langProps = $this->getLangProps();
136
137        // document permissions
138        $aclProps = [
139            'groups_include' => [
140                'type' => 'keyword',
141            ],
142            'groups_exclude' => [
143                'type' => 'keyword',
144            ],
145            'users_include' => [
146                'type' => 'keyword',
147            ],
148            'users_exclude' => [
149                'type' => 'keyword',
150            ],
151        ];
152
153        // differentiate media types
154        $mediaProps = [
155            'doctype' => [
156                'type' => 'keyword',
157            ],
158            'mime' => [
159                'type' => 'keyword',
160            ],
161            'ext' => [
162                'type' => 'keyword',
163            ],
164        ];
165
166        // additional fields which require something other than type text, standard analyzer
167        $additionalProps = [
168            'uri' => [
169                'type' => 'text',
170                'analyzer' => 'pattern', // because colons surrounded by letters are part of word in standard analyzer
171            ],
172        ];
173
174        // plugins can supply their own mappings: ['plugin' => ['type' => 'keyword'] ]
175        $pluginProps = [];
176        Event::createAndTrigger('PLUGIN_ELASTICSEARCH_CREATEMAPPING', $pluginProps);
177
178        $props = array_merge($langProps, $aclProps, $mediaProps, $additionalProps);
179        foreach ($pluginProps as $plugin => $fields) {
180            $props = array_merge($props, $fields);
181        }
182
183        $mapping = new \Elastica\Mapping();
184        $mapping->setProperties($props);
185        return $mapping->send($index);
186    }
187
188    /**
189     * Language mappings recognize languages defined by translation plugin
190     *
191     * @return array
192     */
193    protected function getLangProps()
194    {
195        global $conf;
196
197        // default language
198        $langprops = [
199            'content' => [
200                'type'  => 'text',
201                'fields' => [
202                    $conf['lang'] => [
203                        'type'  => 'text',
204                        'analyzer' => $this->getLanguageAnalyzer($conf['lang'])
205                    ],
206                ]
207            ]
208        ];
209
210        // other languages as configured in the translation plugin
211        /** @var helper_plugin_translation $transplugin */
212        $transplugin = plugin_load('helper', 'translation');
213        if ($transplugin) {
214            $translations = array_diff(array_filter($transplugin->translations), [$conf['lang']]);
215            if ($translations) foreach ($translations as $lang) {
216                $langprops['content']['fields'][$lang] = [
217                    'type' => 'text',
218                    'analyzer' => $this->getLanguageAnalyzer($lang)
219                ];
220            }
221        }
222
223        return $langprops;
224    }
225}
226
227// vim:ts=4:sw=4:et:
228