1<?php
2/**
3 * DokuWiki Plugin elasticsearch (Helper Component)
4 *
5 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
6 * @author  Kieback&Peter IT <it-support@kieback-peter.de>
7 */
8
9use dokuwiki\Extension\Event;
10
11require_once dirname(__FILE__) . '/../vendor/autoload.php';
12
13/**
14 * Access to the Elastica client
15 */
16class helper_plugin_elasticsearch_client extends DokuWiki_Plugin {
17
18    /** @var array Map of ISO codes to Elasticsearch analyzer names */
19    const ANALYZERS = [
20        'ar' => 'arabic',
21        'bg' => 'bulgarian',
22        'bn' => 'bengali',
23        'ca' => 'catalan',
24        'cs' => 'czech',
25        'da' => 'danish',
26        'de' => 'german',
27        'el' => 'greek',
28        'en' => 'english',
29        'es' => 'spanish',
30        'eu' => 'basque',
31        'fa' => 'persian',
32        'fi' => 'finnish',
33        'fr' => 'french',
34        'ga' => 'irish',
35        'gl' => 'galician',
36        'hi' => 'hindi',
37        'hu' => 'hungarian',
38        'hy' => 'armenian',
39        'id' => 'indonesian',
40        'it' => 'italian',
41        'lt' => 'lithuanian',
42        'lv' => 'latvian',
43        'nl' => 'dutch',
44        'no' => 'norwegian',
45        'pt' => 'portuguese',
46        'ro' => 'romanian',
47        'ru' => 'russian',
48        'sv' => 'swedish',
49        'th' => 'thai',
50        'tr' => 'turkish',
51        ];
52    /**
53     * @var \Elastica\Client $elasticaClient
54     */
55    protected $elasticaClient = null;
56
57    /**
58     * Connects to the elastica servers and returns the client object
59     *
60     * @return \Elastica\Client
61     */
62    public function connect() {
63        if (!is_null($this->elasticaClient)) return $this->elasticaClient;
64        // security settings
65        $username = $this->getConf('username');
66        $password = $this->getConf('password');
67        // parse servers config into DSN array
68        $dsn = ['servers' => []];
69        $servers = $this->getConf('servers');
70        $lines   = explode("\n", $servers);
71        foreach ($lines as $line) {
72            list($host, $proxy) = array_pad(explode(',', $line, 2),2, null);
73            list($host, $port) = explode(':', $host, 2);
74            $host = trim($host);
75            $port = (int) trim($port);
76            if (!$port) $port = 80;
77            $proxy = trim($proxy);
78            if (!$host) continue;
79            $dsn['servers'][] = compact('host', 'port', 'proxy', 'username', 'password');
80        }
81
82        $this->elasticaClient = new \Elastica\Client($dsn);
83        return $this->elasticaClient;
84    }
85
86    /**
87     * Create the index
88     *
89     * @param bool $clear rebuild index
90     * @throws \splitbrain\phpcli\Exception
91     */
92    public function createIndex($clear = false) {
93        $client = $this->connect();
94        $index = $client->getIndex($this->getConf('indexname'));
95
96        if ($index->create([], $clear)->hasError()) {
97            throw new \splitbrain\phpcli\Exception("Failed to create index!");
98        }
99
100        if ($this->createMappings($index)->hasError()) {
101            throw new \splitbrain\phpcli\Exception("Failed to create field mappings!");
102        }
103    }
104
105    /**
106     * Get the correct analyzer for the given language code
107     *
108     * Returns the standard analalyzer for unknown languages
109     *
110     * @param string $lang
111     * @return string
112     */
113    protected function getLanguageAnalyzer($lang)
114    {
115        if (isset(self::ANALYZERS[$lang])) return self::ANALYZERS[$lang];
116        return 'standard';
117    }
118
119    /**
120     * Define mappings for custom fields
121     *
122     * All languages get their separate fields configured with appropriate linguistic analyzers.
123     *
124     * ACL fields require custom mappings as well, or else they could break the search. They
125     * might contain word-split tokens such as underscores and so must not
126     * be indexed using the standard text analyzer.
127     *
128     * Fields containing metadata are configured as sparsely as possible, no analyzers are necessary.
129     *
130     * Plugins may provide their own fields via PLUGIN_ELASTICSEARCH_CREATEMAPPING event.
131     *
132     * @param \Elastica\Index $index
133     * @return \Elastica\Response
134     */
135    protected function createMappings(\Elastica\Index $index): \Elastica\Response
136    {
137        $langProps = $this->getLangProps();
138
139        // document permissions
140        $aclProps = [
141            'groups_include' => [
142                'type' => 'keyword',
143            ],
144            'groups_exclude' => [
145                'type' => 'keyword',
146            ],
147            'users_include' => [
148                'type' => 'keyword',
149            ],
150            'users_exclude' => [
151                'type' => 'keyword',
152            ],
153        ];
154
155        // differentiate media types
156        $mediaProps = [
157            'doctype' => [
158                'type' => 'keyword',
159            ],
160            'mime' => [
161                'type' => 'keyword',
162            ],
163            'ext' => [
164                'type' => 'keyword',
165            ],
166        ];
167
168        // additional fields which require something other than type text, standard analyzer
169        $additionalProps = [
170            'uri' => [
171                'type' => 'text',
172                'analyzer' => 'pattern', // because colons surrounded by letters are part of word in standard analyzer
173            ],
174        ];
175
176        // plugins can supply their own mappings: ['plugin' => ['type' => 'keyword'] ]
177        $pluginProps = [];
178        Event::createAndTrigger('PLUGIN_ELASTICSEARCH_CREATEMAPPING', $pluginProps);
179
180        $props = array_merge($langProps, $aclProps, $mediaProps, $additionalProps);
181        foreach ($pluginProps as $plugin => $fields) {
182            $props = array_merge($props, $fields);
183        }
184
185        $mapping = new \Elastica\Mapping();
186        $mapping->setProperties($props);
187        return $mapping->send($index);
188    }
189
190    /**
191     * Language mappings recognize languages defined by translation plugin
192     *
193     * @return array
194     */
195    protected function getLangProps()
196    {
197        global $conf;
198
199        // default language
200        $langprops = [
201            'content' => [
202                'type'  => 'text',
203                'fields' => [
204                    $conf['lang'] => [
205                        'type'  => 'text',
206                        'analyzer' => $this->getLanguageAnalyzer($conf['lang'])
207                    ],
208                ]
209            ]
210        ];
211
212        // other languages as configured in the translation plugin
213        /** @var helper_plugin_translation $transplugin */
214        $transplugin = plugin_load('helper', 'translation');
215        if ($transplugin) {
216            $translations = array_diff(array_filter($transplugin->translations), [$conf['lang']]);
217            if ($translations) foreach ($translations as $lang) {
218                $langprops['content']['fields'][$lang] = [
219                    'type' => 'text',
220                    'analyzer' => $this->getLanguageAnalyzer($lang)
221                ];
222            }
223        }
224
225        return $langprops;
226    }
227}
228
229// vim:ts=4:sw=4:et:
230