1<?php
2
3use dokuwiki\Extension\Event;
4
5/**
6 * DokuWiki Plugin elasticsearch (Action Component)
7 *
8 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
9 * @author  Kieback&Peter IT <it-support@kieback-peter.de>
10 * @author  Andreas Gohr <gohr@cosmocode.de>
11 */
12class action_plugin_elasticsearch_indexing extends DokuWiki_Action_Plugin {
13
14    /**
15     * Registers a callback function for a given event
16     *
17     * @param Doku_Event_Handler $controller DokuWiki's event controller object
18     * @return void
19     */
20    public function register(Doku_Event_Handler $controller) {
21        $controller->register_hook('TPL_CONTENT_DISPLAY', 'BEFORE', $this, 'handle_tpl_content_display');
22        $controller->register_hook('IO_WIKIPAGE_WRITE', 'BEFORE', $this, 'handle_delete');
23    }
24
25    /**
26     * Add pages to index
27     *
28     * @param Doku_Event $event event object by reference
29     * @param mixed $param [the parameters passed as fifth argument to register_hook() when this
30     *                           handler was registered]
31     * @return void
32     */
33    public function handle_tpl_content_display(Doku_Event &$event, $param) {
34        global $ID, $INFO;
35        $logs   = array();
36        $logs[] = 'BEGIN content display';
37        $logs[] = metaFN($ID, '.elasticsearch_indexed');
38        $logs[] = wikiFN($ID);
39        $logs[] = wikiFN($INFO['id']);
40        $logs[] = $this->needs_indexing($ID) ? 'needs indexing' : 'no indexing needed';
41        $logs[] = 'END content display';
42        $this->log($logs);
43        if($this->needs_indexing($ID)) {
44            $this->index_page($ID);
45        }
46    }
47
48    /**
49     * Remove pages from index
50     *
51     * @param Doku_Event $event event object by reference
52     * @param mixed $param [the parameters passed as fifth argument to register_hook() when this
53     *                           handler was registered]
54     * @return void
55     */
56    public function handle_delete(Doku_Event &$event, $param) {
57        if($event->data[3]) return; // is old revision stuff
58        if(!empty($event->data[0][1])) return; // page still exists
59        // still here? delete from index
60        $this->delete_page($event->data[2]);
61    }
62
63    /**
64     * Check if the page $id has changed since the last indexing.
65     *
66     * @param string $id
67     * @return boolean
68     */
69    protected function needs_indexing($id) {
70        $indexStateFile = metaFN($id, '.elasticsearch_indexed');
71        $refreshStateFile = metaFN($id, '.elasticsearch_refresh');
72        $dataFile = wikiFN($id);
73
74        // no data file or page is hidden ('hidepages' configuration option) -> no indexing
75        if (!file_exists($dataFile) || isHiddenPage($id)) {
76            // page should not be indexed but has a state file, try to remove from index
77            if (file_exists($indexStateFile)) {
78                $this->delete_page($id);
79            }
80            return false;
81        }
82
83        // force indexing if we're called via cli (e.g. cron)
84        if (php_sapi_name() == 'cli') {
85            return true;
86        }
87        // check if latest indexing attempt is done after page update
88        // and after other updates related to the page made by plugins
89        if (file_exists($indexStateFile)) {
90            if (
91                (filemtime($indexStateFile) > filemtime($dataFile)) &&
92                (!file_exists($refreshStateFile) || filemtime($indexStateFile) > filemtime($refreshStateFile))
93            ) {
94                return false;
95            }
96        }
97        return true;
98    }
99
100    /**
101     * Save indexed state for a page
102     *
103     * @param string $id
104     * @return int
105     */
106    protected function update_indexstate($id) {
107        $indexStateFile = metaFN($id, '.elasticsearch_indexed');
108        return io_saveFile($indexStateFile, '');
109    }
110
111    /**
112     * Remove the given document from the index
113     *
114     * @param $id
115     */
116    public function delete_page($id) {
117        /** @var helper_plugin_elasticsearch_client $hlp */
118        $hlp          = plugin_load('helper', 'elasticsearch_client');
119        $indexName    = $this->getConf('indexname');
120        $documentType = $this->getConf('documenttype');
121        $client       = $hlp->connect();
122        $index        = $client->getIndex($indexName);
123        $type         = $index->getType($documentType);
124        $documentId   = $documentType . '_' . $id;
125
126        try {
127            $type->deleteById($documentId);
128            $index->refresh();
129            $this->log($documentId.' deleted ');
130        } catch(Exception $e) {
131            // we ignore this
132            $this->log($documentId.' not deleted '.$e->getMessage());
133        }
134
135        // delete state file
136        @unlink(metaFN($id, '.elasticsearch_indexed'));
137    }
138
139    /**
140     * Index a page
141     *
142     * @param $id
143     * @return void
144     */
145    public function index_page($id) {
146        global $conf;
147
148        /** @var helper_plugin_elasticsearch_client $hlp */
149        $hlp = plugin_load('helper', 'elasticsearch_client');
150        /** @var helper_plugin_elasticsearch_acl $hlpAcl */
151        $hlpAcl = plugin_load('helper', 'elasticsearch_acl');
152
153        $this->log('Indexing page ' . $id);
154        $indexName    = $this->getConf('indexname');
155        $documentType = $this->getConf('documenttype');
156        $client       = $hlp->connect();
157        $index        = $client->getIndex($indexName);
158        $type         = $index->getType($documentType);
159        $documentId   = $documentType . '_' . $id;
160
161        // collect the date which should be indexed
162        $meta = p_get_metadata($id, '', METADATA_RENDER_UNLIMITED);
163
164        $data             = array();
165        $data['uri']      = $id;
166        $data['created']  = date('Y-m-d\TH:i:s\Z', $meta['date']['created']);
167        $data['modified'] = date('Y-m-d\TH:i:s\Z', $meta['date']['modified']);
168        $data['user']     = $meta['user'];
169        $data['title']    = $meta['title'];
170        $data['abstract'] = $meta['description']['abstract'];
171        $data['syntax']   = rawWiki($id);
172
173        // prefer rendered plaintext over raw syntax output
174        /** @var \renderer_plugin_text $textRenderer */
175        $textRenderer = plugin_load('renderer', 'text');
176        if ($textRenderer) {
177            $data['content'] = p_cached_output(wikiFN($id),'text');
178        } else {
179            $data['content']  = $data['syntax'];
180        }
181
182        /** @var helper_plugin_translation $trans */
183        $trans = plugin_load('helper', 'translation');
184        if($trans) {
185            // translation plugin available
186            $lc               = $trans->getLangPart($id);
187            $data['language'] = $trans->realLC($lc);
188        } else {
189            // no translation plugin
190            $lc               = '';
191            $data['language'] = $conf['lang'];
192        }
193
194        $data['namespace'] = getNS($id);
195        if(trim($data['namespace']) == '') {
196            unset($data['namespace']);
197        }
198
199        $fullACL = $hlpAcl->getPageACL($id);
200        $queryACL = $hlpAcl->splitRules($fullACL);
201        $data = array_merge($data, $queryACL);
202
203        // let plugins add their own data to index
204        $pluginData = $this->getPluginData($data['uri']);
205        $data = array_merge($data, $pluginData);
206
207        // check if the document still exists to update it or add it as a new one
208        try {
209            $client->updateDocument($documentId, array('doc' => $data), $index->getName(), $type->getName());
210        } catch(\Elastica\Exception\NotFoundException $e) {
211            $document = new \Elastica\Document($documentId, $data);
212            $type->addDocument($document);
213        } catch(\Elastica\Exception\ResponseException $e) {
214            if($e->getResponse()->getStatus() == 404) {
215                $document = new \Elastica\Document($documentId, $data);
216                $type->addDocument($document);
217            } else {
218                throw $e;
219            }
220        } catch(Exception $e) {
221            msg(
222                'Something went wrong on indexing please try again later or ask an admin for help.<br /><pre>' .
223                hsc(get_class($e).' '.$e->getMessage()) . '</pre>',
224                -1
225            );
226            return;
227        }
228        $index->refresh();
229        $this->update_indexstate($id);
230
231    }
232
233    /**
234     * Get plugin data to feed into the index.
235     * If data does not match previously defined mappings, it will be ignored.
236     *
237     * @see \helper_plugin_elasticsearch_client::mapPluginFields
238     *
239     * @param $id
240     * @return array
241     */
242    protected function getPluginData($id): array
243    {
244        $pluginData = ['uri' => $id];
245        Event::createAndTrigger('PLUGIN_ELASTICSEARCH_INDEXPAGE', $pluginData);
246        return $pluginData;
247    }
248
249    /**
250     * Log something to the debug log
251     *
252     * @param string|string[] $txt
253     */
254    protected function log($txt) {
255        if (!$this->getConf('debug')) {
256            return;
257        }
258        $logs = (array)$txt;
259
260        foreach ($logs as $entry) {
261            dbglog($entry);
262        }
263    }
264}
265