1<?php
2
3/**
4 * DokuWiki Plugin elasticsearch (Helper Component)
5 *
6 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
7 * @author  Kieback&Peter IT <it-support@kieback-peter.de>
8 */
9
10use dokuwiki\Extension\Plugin;
11use Elastica\Client;
12use splitbrain\phpcli\Exception;
13use Elastica\Index;
14use Elastica\Response;
15use Elastica\Mapping;
16use dokuwiki\Extension\Event;
17
18require_once __DIR__ . '/../vendor/autoload.php';
19
20/**
21 * Access to the Elastica client
22 */
23class helper_plugin_elasticsearch_client extends Plugin
24{
25    /** @var array Map of ISO codes to Elasticsearch analyzer names */
26    protected const ANALYZERS = [
27        'ar' => 'arabic',
28        'bg' => 'bulgarian',
29        'bn' => 'bengali',
30        'ca' => 'catalan',
31        'cs' => 'czech',
32        'da' => 'danish',
33        'de' => 'german',
34        'el' => 'greek',
35        'en' => 'english',
36        'es' => 'spanish',
37        'eu' => 'basque',
38        'fa' => 'persian',
39        'fi' => 'finnish',
40        'fr' => 'french',
41        'ga' => 'irish',
42        'gl' => 'galician',
43        'hi' => 'hindi',
44        'hu' => 'hungarian',
45        'hy' => 'armenian',
46        'id' => 'indonesian',
47        'it' => 'italian',
48        'lt' => 'lithuanian',
49        'lv' => 'latvian',
50        'nl' => 'dutch',
51        'no' => 'norwegian',
52        'pt' => 'portuguese',
53        'ro' => 'romanian',
54        'ru' => 'russian',
55        'sv' => 'swedish',
56        'th' => 'thai',
57        'tr' => 'turkish',
58        ];
59    /**
60     * @var Client $elasticaClient
61     */
62    protected $elasticaClient;
63
64    /**
65     * Connects to the elastica servers and returns the client object
66     *
67     * @return Client
68     */
69    public function connect()
70    {
71        if (!is_null($this->elasticaClient)) return $this->elasticaClient;
72        // security settings
73        $username = $this->getConf('username');
74        $password = $this->getConf('password');
75        // parse servers config into DSN array
76        $dsn = ['servers' => []];
77        $servers = $this->getConf('servers');
78        $lines   = explode("\n", $servers);
79        foreach ($lines as $line) {
80            [$host, $proxy] = array_pad(explode(',', $line, 2), 2, null);
81            [$host, $port] = explode(':', $host, 2);
82            $host = trim($host);
83            $port = (int) trim($port);
84            if (!$port) $port = 80;
85            $proxy = trim($proxy);
86            if (!$host) continue;
87            $dsn['servers'][] = [
88                'host' => $host,
89                'port' => $port,
90                'proxy' => $proxy,
91                'username' => $username,
92                'password' => $password
93            ];
94        }
95
96        $this->elasticaClient = new Client($dsn);
97        return $this->elasticaClient;
98    }
99
100    /**
101     * Create the index
102     *
103     * @param bool $clear rebuild index
104     * @throws Exception
105     */
106    public function createIndex($clear = false)
107    {
108        $client = $this->connect();
109        $index = $client->getIndex($this->getConf('indexname'));
110
111        if ($index->create([], $clear)->hasError()) {
112            throw new Exception("Failed to create index!");
113        }
114
115        if ($this->createMappings($index)->hasError()) {
116            throw new Exception("Failed to create field mappings!");
117        }
118    }
119
120    /**
121     * Get the correct analyzer for the given language code
122     *
123     * Returns the standard analalyzer for unknown languages
124     *
125     * @param string $lang
126     * @return string
127     */
128    protected function getLanguageAnalyzer($lang)
129    {
130        return self::ANALYZERS[$lang] ?? 'standard';
131    }
132
133    /**
134     * Define mappings for custom fields
135     *
136     * All languages get their separate fields configured with appropriate linguistic analyzers.
137     *
138     * ACL fields require custom mappings as well, or else they could break the search. They
139     * might contain word-split tokens such as underscores and so must not
140     * be indexed using the standard text analyzer.
141     *
142     * Fields containing metadata are configured as sparsely as possible, no analyzers are necessary.
143     *
144     * Plugins may provide their own fields via PLUGIN_ELASTICSEARCH_CREATEMAPPING event.
145     *
146     * @param Index $index
147     * @return Response
148     */
149    protected function createMappings(Index $index): Response
150    {
151        $langProps = $this->getLangProps();
152
153        // document permissions
154        $aclProps = [
155            'groups_include' => [
156                'type' => 'keyword',
157            ],
158            'groups_exclude' => [
159                'type' => 'keyword',
160            ],
161            'users_include' => [
162                'type' => 'keyword',
163            ],
164            'users_exclude' => [
165                'type' => 'keyword',
166            ],
167        ];
168
169        // differentiate media types
170        $mediaProps = [
171            'doctype' => [
172                'type' => 'keyword',
173            ],
174            'mime' => [
175                'type' => 'keyword',
176            ],
177            'ext' => [
178                'type' => 'keyword',
179            ],
180        ];
181
182        // additional fields which require something other than type text, standard analyzer
183        $additionalProps = [
184            'uri' => [
185                'type' => 'text',
186                'analyzer' => 'pattern', // because colons surrounded by letters are part of word in standard analyzer
187            ],
188        ];
189
190        // plugins can supply their own mappings: ['plugin' => ['type' => 'keyword'] ]
191        $pluginProps = [];
192        Event::createAndTrigger('PLUGIN_ELASTICSEARCH_CREATEMAPPING', $pluginProps);
193
194        $props = array_merge($langProps, $aclProps, $mediaProps, $additionalProps);
195        foreach ($pluginProps as $fields) {
196            $props = array_merge($props, $fields);
197        }
198
199        $mapping = new Mapping();
200        $mapping->setProperties($props);
201        return $mapping->send($index);
202    }
203
204    /**
205     * Language mappings recognize languages defined by translation plugin
206     *
207     * @return array
208     */
209    protected function getLangProps()
210    {
211        global $conf;
212
213        // default language
214        $langprops = [
215            'content' => [
216                'type'  => 'text',
217                'fields' => [
218                    $conf['lang'] => [
219                        'type'  => 'text',
220                        'analyzer' => $this->getLanguageAnalyzer($conf['lang'])
221                    ],
222                ]
223            ]
224        ];
225
226        // other languages as configured in the translation plugin
227        /** @var helper_plugin_translation $transplugin */
228        $transplugin = plugin_load('helper', 'translation');
229        if ($transplugin) {
230            $translations = array_diff(array_filter($transplugin->translations), [$conf['lang']]);
231            if ($translations) foreach ($translations as $lang) {
232                $langprops['content']['fields'][$lang] = [
233                    'type' => 'text',
234                    'analyzer' => $this->getLanguageAnalyzer($lang)
235                ];
236            }
237        }
238
239        return $langprops;
240    }
241}
242
243// vim:ts=4:sw=4:et:
244