1<?php
2
3use dokuwiki\Extension\ActionPlugin;
4use dokuwiki\Extension\EventHandler;
5use Elastica\Exception\NotFoundException;
6use Elastica\Document;
7use Elastica\Exception\ResponseException;
8use dokuwiki\Logger;
9use dokuwiki\Extension\Event;
10
11/**
12 * DokuWiki Plugin elasticsearch (Action Component)
13 *
14 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
15 * @author  Kieback&Peter IT <it-support@kieback-peter.de>
16 * @author  Andreas Gohr <gohr@cosmocode.de>
17 */
18
19class action_plugin_elasticsearch_indexing extends ActionPlugin
20{
21    public const MIME_DOKUWIKI = 'text/dokuwiki';
22    public const DOCTYPE_PAGE = 'page';
23    public const DOCTYPE_MEDIA = 'media';
24
25    /**
26     * Registers a callback function for a given event
27     *
28     * @param EventHandler $controller DokuWiki's event controller object
29     * @return void
30     */
31    public function register(EventHandler $controller)
32    {
33        $controller->register_hook('TPL_CONTENT_DISPLAY', 'BEFORE', $this, 'handleTplContentDisplay');
34        $controller->register_hook('IO_WIKIPAGE_WRITE', 'BEFORE', $this, 'handleDelete');
35        $controller->register_hook('MEDIA_UPLOAD_FINISH', 'AFTER', $this, 'handleMediaUpload');
36        $controller->register_hook('MEDIA_DELETE_FILE', 'AFTER', $this, 'handleMediaDelete');
37    }
38
39    /**
40     * Add pages to index
41     *
42     * @param Event $event event object by reference
43     * @return void
44     */
45    public function handleTplContentDisplay(Event $event)
46    {
47        global $ID, $INFO;
48        $this->log(
49            'content display',
50            [
51                metaFN($ID, '.elasticsearch_indexed'),
52                wikiFN($ID),
53                wikiFN($INFO['id']),
54                $this->needsIndexing($ID) ? 'needs indexing' : 'no indexing needed',
55            ]
56        );
57        if ($this->needsIndexing($ID)) {
58            $this->indexPage($ID);
59        }
60    }
61
62    /**
63     * Update index on media upload
64     *
65     * @param Event $event
66     * @throws Exception
67     */
68    public function handleMediaUpload(Event $event)
69    {
70        $this->indexFile($event->data[2]);
71    }
72
73    /**
74     * Remove pages from index
75     *
76     * @param Event $event event object by reference
77     * @return void
78     */
79    public function handleDelete(Event $event)
80    {
81        if ($event->data[3]) return; // is old revision stuff
82        if (!empty($event->data[0][1])) return; // page still exists
83        // still here? delete from index
84        $this->deleteEntry($event->data[2], self::DOCTYPE_PAGE);
85    }
86
87    /**
88     * Remove deleted media from index
89     *
90     * @param Event $event
91     * @param $param
92     */
93    public function handleMediaDelete(Event $event, $param)
94    {
95        if ($event->data['unl']) $this->deleteEntry($event->data['id'], self::DOCTYPE_MEDIA);
96    }
97
98    /**
99     * Check if the page $id has changed since the last indexing.
100     *
101     * @param string $id
102     * @return boolean
103     */
104    protected function needsIndexing($id)
105    {
106        $indexStateFile = metaFN($id, '.elasticsearch_indexed');
107        $refreshStateFile = metaFN($id, '.elasticsearch_refresh');
108        $dataFile = wikiFN($id);
109
110        // no data file or page is hidden ('hidepages' configuration option) -> no indexing
111        if (!file_exists($dataFile) || isHiddenPage($id)) {
112            // page should not be indexed but has a state file, try to remove from index
113            if (file_exists($indexStateFile)) {
114                $this->deleteEntry($id, self::DOCTYPE_PAGE);
115            }
116            return false;
117        }
118
119        // force indexing if we're called via cli (e.g. cron)
120        if (PHP_SAPI == 'cli') {
121            return true;
122        }
123        // check if latest indexing attempt is done after page update
124        // and after other updates related to the page made by plugins
125        if (file_exists($indexStateFile)) {
126            if (
127                (filemtime($indexStateFile) > filemtime($dataFile)) &&
128                (!file_exists($refreshStateFile) || filemtime($indexStateFile) > filemtime($refreshStateFile))
129            ) {
130                return false;
131            }
132        }
133        return true;
134    }
135
136    /**
137     * @param array $data
138     */
139    protected function writeIndex($data)
140    {
141        /** @var helper_plugin_elasticsearch_client $hlp */
142        $hlp = plugin_load('helper', 'elasticsearch_client');
143
144        $indexName    = $this->getConf('indexname');
145        $client       = $hlp->connect();
146        $index        = $client->getIndex($indexName);
147        $documentId   = $data['doctype'] . '_' . $data['uri'];
148
149        // check if the document still exists to update it or add it as a new one
150        try {
151            $client->updateDocument($documentId, ['doc' => $data], $index->getName());
152        } catch (NotFoundException $e) {
153            $document = new Document($documentId, $data);
154            $index->addDocument($document);
155        } catch (ResponseException $e) {
156            if ($e->getResponse()->getStatus() == 404) {
157                $document = new Document($documentId, $data);
158                $index->addDocument($document);
159            } else {
160                throw $e;
161            }
162        } catch (Exception $e) {
163            msg(
164                'Something went wrong on indexing please try again later or ask an admin for help.<br /><pre>' .
165                hsc(get_class($e) . ' ' . $e->getMessage()) . '</pre>',
166                -1
167            );
168            return;
169        }
170        $index->refresh();
171        $this->updateIndexstate($data['uri']);
172    }
173
174    /**
175     * Save indexed state for a page or a media file
176     *
177     * @param string $id
178     * @param string $doctype
179     * @return bool
180     */
181    protected function updateIndexstate($id, $doctype = self::DOCTYPE_PAGE)
182    {
183        $indexStateFile = ($doctype === self::DOCTYPE_MEDIA) ?
184            mediaMetaFN($id, '.elasticsearch_indexed') :
185            metaFN($id, '.elasticsearch_indexed');
186        return io_saveFile($indexStateFile, '');
187    }
188
189    /**
190     * Remove the given document from the index
191     *
192     * @param $id
193     * @param $doctype
194     */
195    public function deleteEntry($id, $doctype)
196    {
197        /** @var helper_plugin_elasticsearch_client $hlp */
198        $hlp          = plugin_load('helper', 'elasticsearch_client');
199        $indexName    = $this->getConf('indexname');
200        $client       = $hlp->connect();
201        $index        = $client->getIndex($indexName);
202        $documentId   = $doctype . '_' . $id;
203
204        try {
205            $index->deleteById($documentId);
206            $index->refresh();
207            $this->log($documentId . ' deleted ');
208        } catch (Exception $e) {
209            // we ignore this
210            $this->log($documentId . ' not deleted ' . $e->getMessage());
211        }
212
213        // delete state file
214        $stateFile = ($doctype === self::DOCTYPE_MEDIA) ?
215            mediaMetaFN($id, '.elasticsearch_indexed') :
216            metaFN($id, '.elasticsearch_indexed');
217        @unlink($stateFile);
218    }
219
220    /**
221     * Index a page
222     *
223     * @param $id
224     * @return void
225     */
226    public function indexPage($id)
227    {
228        global $conf;
229
230        $this->log('Indexing page ' . $id);
231
232        // collect the date which should be indexed
233        $meta = p_get_metadata($id, '', METADATA_RENDER_UNLIMITED);
234
235        $data             = [];
236        $data['uri']      = $id;
237        $data['created']  = date('Y-m-d\TH:i:s\Z', $meta['date']['created']);
238        $data['modified'] = date('Y-m-d\TH:i:s\Z', $meta['date']['modified']);
239        $data['user']     = $meta['user'];
240        $data['title']    = $meta['title'] ?? $id;
241        $data['abstract'] = $meta['description']['abstract'];
242        $data['syntax']   = rawWiki($id);
243        $data['mime']     = self::MIME_DOKUWIKI;
244        $data['doctype']  = self::DOCTYPE_PAGE;
245
246        // prefer rendered plaintext over raw syntax output
247        /** @var \renderer_plugin_text $textRenderer */
248        $textRenderer = plugin_load('renderer', 'text');
249        if ($textRenderer) {
250            $data['content'] = p_cached_output(wikiFN($id), 'text');
251        } else {
252            $data['content']  = $data['syntax'];
253        }
254
255        /** @var helper_plugin_translation $trans */
256        $trans = plugin_load('helper', 'translation');
257        if ($trans) {
258            // translation plugin available
259            $lc               = $trans->getLangPart($id);
260            $data['language'] = $trans->realLC($lc);
261        } else {
262            // no translation plugin
263            $data['language'] = $conf['lang'];
264        }
265
266        $data['namespace'] = getNS($id);
267        if (trim($data['namespace']) == '') {
268            unset($data['namespace']);
269        }
270
271        /** @var helper_plugin_elasticsearch_acl $hlpAcl */
272        $hlpAcl = plugin_load('helper', 'elasticsearch_acl');
273
274        $fullACL = $hlpAcl->getPageACL($id);
275        $queryACL = $hlpAcl->splitRules($fullACL);
276        $data = array_merge($data, $queryACL);
277
278        // let plugins add their own data to index
279        $pluginData = $this->getPluginData($data['uri']);
280        $data = array_merge($data, $pluginData);
281
282        $this->writeIndex($data);
283    }
284
285    /**
286     * Index a file
287     *
288     * @param string $fileId
289     * @return void
290     * @throws Exception
291     */
292    public function indexFile($fileId)
293    {
294        $this->log('Indexing file ' . $fileId);
295
296        $docparser = new \helper_plugin_elasticsearch_docparser();
297
298        $file = mediaFN($fileId);
299
300        try {
301            $data = $docparser->parse($file);
302            $data['uri'] = $fileId;
303            $data['doctype'] = self::DOCTYPE_MEDIA;
304            $data['modified'] = date('Y-m-d\TH:i:s\Z', filemtime($file));
305            $data['namespace'] = getNS($fileId);
306            if (trim($data['namespace']) == '') {
307                unset($data['namespace']);
308            }
309
310            /** @var helper_plugin_elasticsearch_acl $hlpAcl */
311            $hlpAcl = plugin_load('helper', 'elasticsearch_acl');
312
313            $fullACL = $hlpAcl->getPageACL($fileId);
314            $queryACL = $hlpAcl->splitRules($fullACL);
315            $data = array_merge($data, $queryACL);
316
317            $this->writeIndex($data);
318        } catch (RuntimeException $e) {
319            $this->log('Skipping ' . $fileId . ': ' . $e->getMessage());
320        }
321    }
322
323
324    /**
325     * Get plugin data to feed into the index.
326     * If data does not match previously defined mappings, it will be ignored.
327     *
328     * @param $id
329     * @return array
330     */
331    protected function getPluginData($id): array
332    {
333        $pluginData = ['uri' => $id];
334        Event::createAndTrigger('PLUGIN_ELASTICSEARCH_INDEXPAGE', $pluginData);
335        return $pluginData;
336    }
337
338    /**
339     * Log something to the debug log
340     *
341     * @param string $txt
342     * @param mixed $info
343     */
344    protected function log($txt, $info = null)
345    {
346        $txt = 'ElasticSearch: ' . $txt;
347        Logger::debug($txt, $info);
348    }
349}
350