1<?php 2 3use dokuwiki\Extension\ActionPlugin; 4use dokuwiki\Extension\EventHandler; 5use Elastica\Exception\NotFoundException; 6use Elastica\Document; 7use Elastica\Exception\ResponseException; 8use dokuwiki\Logger; 9use dokuwiki\Extension\Event; 10 11/** 12 * DokuWiki Plugin elasticsearch (Action Component) 13 * 14 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 15 * @author Kieback&Peter IT <it-support@kieback-peter.de> 16 * @author Andreas Gohr <gohr@cosmocode.de> 17 */ 18 19class action_plugin_elasticsearch_indexing extends ActionPlugin 20{ 21 public const MIME_DOKUWIKI = 'text/dokuwiki'; 22 public const DOCTYPE_PAGE = 'page'; 23 public const DOCTYPE_MEDIA = 'media'; 24 25 /** 26 * Registers a callback function for a given event 27 * 28 * @param EventHandler $controller DokuWiki's event controller object 29 * @return void 30 */ 31 public function register(EventHandler $controller) 32 { 33 $controller->register_hook('TPL_CONTENT_DISPLAY', 'BEFORE', $this, 'handleTplContentDisplay'); 34 $controller->register_hook('IO_WIKIPAGE_WRITE', 'BEFORE', $this, 'handleDelete'); 35 $controller->register_hook('MEDIA_UPLOAD_FINISH', 'AFTER', $this, 'handleMediaUpload'); 36 $controller->register_hook('MEDIA_DELETE_FILE', 'AFTER', $this, 'handleMediaDelete'); 37 } 38 39 /** 40 * Add pages to index 41 * 42 * @param Event $event event object by reference 43 * @return void 44 */ 45 public function handleTplContentDisplay(Event $event) 46 { 47 global $ID, $INFO; 48 $this->log( 49 'content display', 50 [ 51 metaFN($ID, '.elasticsearch_indexed'), 52 wikiFN($ID), 53 wikiFN($INFO['id']), 54 $this->needsIndexing($ID) ? 'needs indexing' : 'no indexing needed', 55 ] 56 ); 57 if ($this->needsIndexing($ID)) { 58 $this->indexPage($ID); 59 } 60 } 61 62 /** 63 * Update index on media upload 64 * 65 * @param Event $event 66 * @throws Exception 67 */ 68 public function handleMediaUpload(Event $event) 69 { 70 $this->indexFile($event->data[2]); 71 } 72 73 /** 74 * Remove pages from index 75 * 76 * @param Event $event event object by reference 77 * @return void 78 */ 79 public function handleDelete(Event $event) 80 { 81 if ($event->data[3]) return; // is old revision stuff 82 if (!empty($event->data[0][1])) return; // page still exists 83 // still here? delete from index 84 $this->deleteEntry($event->data[2], self::DOCTYPE_PAGE); 85 } 86 87 /** 88 * Remove deleted media from index 89 * 90 * @param Event $event 91 * @param $param 92 */ 93 public function handleMediaDelete(Event $event, $param) 94 { 95 if ($event->data['unl']) $this->deleteEntry($event->data['id'], self::DOCTYPE_MEDIA); 96 } 97 98 /** 99 * Check if the page $id has changed since the last indexing. 100 * 101 * @param string $id 102 * @return boolean 103 */ 104 protected function needsIndexing($id) 105 { 106 $indexStateFile = metaFN($id, '.elasticsearch_indexed'); 107 $refreshStateFile = metaFN($id, '.elasticsearch_refresh'); 108 $dataFile = wikiFN($id); 109 110 // no data file or page is hidden ('hidepages' configuration option) -> no indexing 111 if (!file_exists($dataFile) || isHiddenPage($id)) { 112 // page should not be indexed but has a state file, try to remove from index 113 if (file_exists($indexStateFile)) { 114 $this->deleteEntry($id, self::DOCTYPE_PAGE); 115 } 116 return false; 117 } 118 119 // force indexing if we're called via cli (e.g. cron) 120 if (PHP_SAPI == 'cli') { 121 return true; 122 } 123 // check if latest indexing attempt is done after page update 124 // and after other updates related to the page made by plugins 125 if (file_exists($indexStateFile)) { 126 if ( 127 (filemtime($indexStateFile) > filemtime($dataFile)) && 128 (!file_exists($refreshStateFile) || filemtime($indexStateFile) > filemtime($refreshStateFile)) 129 ) { 130 return false; 131 } 132 } 133 return true; 134 } 135 136 /** 137 * @param array $data 138 */ 139 protected function writeIndex($data) 140 { 141 /** @var helper_plugin_elasticsearch_client $hlp */ 142 $hlp = plugin_load('helper', 'elasticsearch_client'); 143 144 $indexName = $this->getConf('indexname'); 145 $client = $hlp->connect(); 146 $index = $client->getIndex($indexName); 147 $documentId = $data['doctype'] . '_' . $data['uri']; 148 149 // check if the document still exists to update it or add it as a new one 150 try { 151 $client->updateDocument($documentId, ['doc' => $data], $index->getName()); 152 } catch (NotFoundException $e) { 153 $document = new Document($documentId, $data); 154 $index->addDocument($document); 155 } catch (ResponseException $e) { 156 if ($e->getResponse()->getStatus() == 404) { 157 $document = new Document($documentId, $data); 158 $index->addDocument($document); 159 } else { 160 throw $e; 161 } 162 } catch (Exception $e) { 163 msg( 164 'Something went wrong on indexing please try again later or ask an admin for help.<br /><pre>' . 165 hsc(get_class($e) . ' ' . $e->getMessage()) . '</pre>', 166 -1 167 ); 168 return; 169 } 170 $index->refresh(); 171 $this->updateIndexstate($data['uri']); 172 } 173 174 /** 175 * Save indexed state for a page or a media file 176 * 177 * @param string $id 178 * @param string $doctype 179 * @return bool 180 */ 181 protected function updateIndexstate($id, $doctype = self::DOCTYPE_PAGE) 182 { 183 $indexStateFile = ($doctype === self::DOCTYPE_MEDIA) ? 184 mediaMetaFN($id, '.elasticsearch_indexed') : 185 metaFN($id, '.elasticsearch_indexed'); 186 return io_saveFile($indexStateFile, ''); 187 } 188 189 /** 190 * Remove the given document from the index 191 * 192 * @param $id 193 * @param $doctype 194 */ 195 public function deleteEntry($id, $doctype) 196 { 197 /** @var helper_plugin_elasticsearch_client $hlp */ 198 $hlp = plugin_load('helper', 'elasticsearch_client'); 199 $indexName = $this->getConf('indexname'); 200 $client = $hlp->connect(); 201 $index = $client->getIndex($indexName); 202 $documentId = $doctype . '_' . $id; 203 204 try { 205 $index->deleteById($documentId); 206 $index->refresh(); 207 $this->log($documentId . ' deleted '); 208 } catch (Exception $e) { 209 // we ignore this 210 $this->log($documentId . ' not deleted ' . $e->getMessage()); 211 } 212 213 // delete state file 214 $stateFile = ($doctype === self::DOCTYPE_MEDIA) ? 215 mediaMetaFN($id, '.elasticsearch_indexed') : 216 metaFN($id, '.elasticsearch_indexed'); 217 @unlink($stateFile); 218 } 219 220 /** 221 * Index a page 222 * 223 * @param $id 224 * @return void 225 */ 226 public function indexPage($id) 227 { 228 global $conf; 229 230 $this->log('Indexing page ' . $id); 231 232 // collect the date which should be indexed 233 $meta = p_get_metadata($id, '', METADATA_RENDER_UNLIMITED); 234 235 $data = []; 236 $data['uri'] = $id; 237 $data['created'] = date('Y-m-d\TH:i:s\Z', $meta['date']['created']); 238 $data['modified'] = date('Y-m-d\TH:i:s\Z', $meta['date']['modified']); 239 $data['user'] = $meta['user']; 240 $data['title'] = $meta['title'] ?? $id; 241 $data['abstract'] = $meta['description']['abstract']; 242 $data['syntax'] = rawWiki($id); 243 $data['mime'] = self::MIME_DOKUWIKI; 244 $data['doctype'] = self::DOCTYPE_PAGE; 245 246 // prefer rendered plaintext over raw syntax output 247 /** @var \renderer_plugin_text $textRenderer */ 248 $textRenderer = plugin_load('renderer', 'text'); 249 if ($textRenderer) { 250 $data['content'] = p_cached_output(wikiFN($id), 'text'); 251 } else { 252 $data['content'] = $data['syntax']; 253 } 254 255 /** @var helper_plugin_translation $trans */ 256 $trans = plugin_load('helper', 'translation'); 257 if ($trans) { 258 // translation plugin available 259 $lc = $trans->getLangPart($id); 260 $data['language'] = $trans->realLC($lc); 261 } else { 262 // no translation plugin 263 $data['language'] = $conf['lang']; 264 } 265 266 $data['namespace'] = getNS($id); 267 if (trim($data['namespace']) == '') { 268 unset($data['namespace']); 269 } 270 271 /** @var helper_plugin_elasticsearch_acl $hlpAcl */ 272 $hlpAcl = plugin_load('helper', 'elasticsearch_acl'); 273 274 $fullACL = $hlpAcl->getPageACL($id); 275 $queryACL = $hlpAcl->splitRules($fullACL); 276 $data = array_merge($data, $queryACL); 277 278 // let plugins add their own data to index 279 $pluginData = $this->getPluginData($data['uri']); 280 $data = array_merge($data, $pluginData); 281 282 $this->writeIndex($data); 283 } 284 285 /** 286 * Index a file 287 * 288 * @param string $fileId 289 * @return void 290 * @throws Exception 291 */ 292 public function indexFile($fileId) 293 { 294 $this->log('Indexing file ' . $fileId); 295 296 $docparser = new \helper_plugin_elasticsearch_docparser(); 297 298 $file = mediaFN($fileId); 299 300 try { 301 $data = $docparser->parse($file); 302 $data['uri'] = $fileId; 303 $data['doctype'] = self::DOCTYPE_MEDIA; 304 $data['modified'] = date('Y-m-d\TH:i:s\Z', filemtime($file)); 305 $data['namespace'] = getNS($fileId); 306 if (trim($data['namespace']) == '') { 307 unset($data['namespace']); 308 } 309 310 /** @var helper_plugin_elasticsearch_acl $hlpAcl */ 311 $hlpAcl = plugin_load('helper', 'elasticsearch_acl'); 312 313 $fullACL = $hlpAcl->getPageACL($fileId); 314 $queryACL = $hlpAcl->splitRules($fullACL); 315 $data = array_merge($data, $queryACL); 316 317 $this->writeIndex($data); 318 } catch (RuntimeException $e) { 319 $this->log('Skipping ' . $fileId . ': ' . $e->getMessage()); 320 } 321 } 322 323 324 /** 325 * Get plugin data to feed into the index. 326 * If data does not match previously defined mappings, it will be ignored. 327 * 328 * @param $id 329 * @return array 330 */ 331 protected function getPluginData($id): array 332 { 333 $pluginData = ['uri' => $id]; 334 Event::createAndTrigger('PLUGIN_ELASTICSEARCH_INDEXPAGE', $pluginData); 335 return $pluginData; 336 } 337 338 /** 339 * Log something to the debug log 340 * 341 * @param string $txt 342 * @param mixed $info 343 */ 344 protected function log($txt, $info = null) 345 { 346 $txt = 'ElasticSearch: ' . $txt; 347 Logger::debug($txt, $info); 348 } 349} 350