1<?php 2 3use dokuwiki\Extension\Event; 4 5/** 6 * DokuWiki Plugin elasticsearch (Action Component) 7 * 8 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 9 * @author Kieback&Peter IT <it-support@kieback-peter.de> 10 * @author Andreas Gohr <gohr@cosmocode.de> 11 */ 12 13class action_plugin_elasticsearch_indexing extends DokuWiki_Action_Plugin { 14 15 const MIME_DOKUWIKI = 'text/dokuwiki'; 16 const DOCTYPE_PAGE = 'page'; 17 const DOCTYPE_MEDIA = 'media'; 18 19 /** 20 * Registers a callback function for a given event 21 * 22 * @param Doku_Event_Handler $controller DokuWiki's event controller object 23 * @return void 24 */ 25 public function register(Doku_Event_Handler $controller) { 26 $controller->register_hook('TPL_CONTENT_DISPLAY', 'BEFORE', $this, 'handle_tpl_content_display'); 27 $controller->register_hook('IO_WIKIPAGE_WRITE', 'BEFORE', $this, 'handle_delete'); 28 $controller->register_hook('MEDIA_UPLOAD_FINISH', 'AFTER', $this, 'handle_media_upload'); 29 $controller->register_hook('MEDIA_DELETE_FILE', 'AFTER', $this, 'handle_media_delete'); 30 } 31 32 /** 33 * Add pages to index 34 * 35 * @param Doku_Event $event event object by reference 36 * @return void 37 */ 38 public function handle_tpl_content_display(Doku_Event $event) { 39 global $ID, $INFO; 40 $this->log( 41 'content display', 42 [ 43 metaFN($ID, '.elasticsearch_indexed'), 44 wikiFN($ID), 45 wikiFN($INFO['id']), 46 $this->needs_indexing($ID) ? 'needs indexing' : 'no indexing needed', 47 ] 48 ); 49 if ($this->needs_indexing($ID)) { 50 $this->index_page($ID); 51 } 52 } 53 54 /** 55 * Update index on media upload 56 * 57 * @param Doku_Event $event 58 * @throws Exception 59 */ 60 public function handle_media_upload(Doku_Event $event) 61 { 62 $this->index_file($event->data[2]); 63 } 64 65 /** 66 * Remove pages from index 67 * 68 * @param Doku_Event $event event object by reference 69 * @return void 70 */ 71 public function handle_delete(Doku_Event $event) { 72 if ($event->data[3]) return; // is old revision stuff 73 if (!empty($event->data[0][1])) return; // page still exists 74 // still here? delete from index 75 $this->delete_entry($event->data[2], self::DOCTYPE_PAGE); 76 } 77 78 /** 79 * Remove deleted media from index 80 * 81 * @param Doku_Event $event 82 * @param $param 83 */ 84 public function handle_media_delete(Doku_Event $event, $param) 85 { 86 if ($event->data['unl']) $this->delete_entry($event->data['id'], self::DOCTYPE_MEDIA); 87 } 88 89 /** 90 * Check if the page $id has changed since the last indexing. 91 * 92 * @param string $id 93 * @return boolean 94 */ 95 protected function needs_indexing($id) { 96 $indexStateFile = metaFN($id, '.elasticsearch_indexed'); 97 $refreshStateFile = metaFN($id, '.elasticsearch_refresh'); 98 $dataFile = wikiFN($id); 99 100 // no data file or page is hidden ('hidepages' configuration option) -> no indexing 101 if (!file_exists($dataFile) || isHiddenPage($id)) { 102 // page should not be indexed but has a state file, try to remove from index 103 if (file_exists($indexStateFile)) { 104 $this->delete_entry($id, self::DOCTYPE_PAGE); 105 } 106 return false; 107 } 108 109 // force indexing if we're called via cli (e.g. cron) 110 if (php_sapi_name() == 'cli') { 111 return true; 112 } 113 // check if latest indexing attempt is done after page update 114 // and after other updates related to the page made by plugins 115 if (file_exists($indexStateFile)) { 116 if ( 117 (filemtime($indexStateFile) > filemtime($dataFile)) && 118 (!file_exists($refreshStateFile) || filemtime($indexStateFile) > filemtime($refreshStateFile)) 119 ) { 120 return false; 121 } 122 } 123 return true; 124 } 125 126 /** 127 * @param array $data 128 */ 129 protected function write_index($data) 130 { 131 /** @var helper_plugin_elasticsearch_client $hlp */ 132 $hlp = plugin_load('helper', 'elasticsearch_client'); 133 134 $indexName = $this->getConf('indexname'); 135 $client = $hlp->connect(); 136 $index = $client->getIndex($indexName); 137 $documentId = $data['doctype'] . '_' . $data['uri']; 138 139 // check if the document still exists to update it or add it as a new one 140 try { 141 $client->updateDocument($documentId, ['doc' => $data], $index->getName()); 142 } catch (\Elastica\Exception\NotFoundException $e) { 143 $document = new \Elastica\Document($documentId, $data); 144 $index->addDocument($document); 145 } catch (\Elastica\Exception\ResponseException $e) { 146 if ($e->getResponse()->getStatus() == 404) { 147 $document = new \Elastica\Document($documentId, $data); 148 $index->addDocument($document); 149 } else { 150 throw $e; 151 } 152 } catch (Exception $e) { 153 msg( 154 'Something went wrong on indexing please try again later or ask an admin for help.<br /><pre>' . 155 hsc(get_class($e) . ' ' . $e->getMessage()) . '</pre>', 156 -1 157 ); 158 return; 159 } 160 $index->refresh(); 161 $this->update_indexstate($data['uri']); 162 } 163 164 /** 165 * Save indexed state for a page or a media file 166 * 167 * @param string $id 168 * @param string $doctype 169 * @return bool 170 */ 171 protected function update_indexstate($id, $doctype = self::DOCTYPE_PAGE) { 172 $indexStateFile = ($doctype === self::DOCTYPE_MEDIA) ? 173 mediaMetaFN($id, '.elasticsearch_indexed') : 174 metaFN($id, '.elasticsearch_indexed'); 175 return io_saveFile($indexStateFile, ''); 176 } 177 178 /** 179 * Remove the given document from the index 180 * 181 * @param $id 182 * @param $doctype 183 */ 184 public function delete_entry($id, $doctype) { 185 /** @var helper_plugin_elasticsearch_client $hlp */ 186 $hlp = plugin_load('helper', 'elasticsearch_client'); 187 $indexName = $this->getConf('indexname'); 188 $client = $hlp->connect(); 189 $index = $client->getIndex($indexName); 190 $documentId = $doctype . '_' . $id; 191 192 try { 193 $index->deleteById($documentId); 194 $index->refresh(); 195 $this->log($documentId.' deleted '); 196 } catch(Exception $e) { 197 // we ignore this 198 $this->log($documentId.' not deleted '.$e->getMessage()); 199 } 200 201 // delete state file 202 $stateFile = ($doctype === self::DOCTYPE_MEDIA) ? 203 mediaMetaFN($id, '.elasticsearch_indexed') : 204 metaFN($id, '.elasticsearch_indexed'); 205 @unlink($stateFile); 206 } 207 208 /** 209 * Index a page 210 * 211 * @param $id 212 * @return void 213 */ 214 public function index_page($id) { 215 global $conf; 216 217 $this->log('Indexing page ' . $id); 218 219 // collect the date which should be indexed 220 $meta = p_get_metadata($id, '', METADATA_RENDER_UNLIMITED); 221 222 $data = array(); 223 $data['uri'] = $id; 224 $data['created'] = date('Y-m-d\TH:i:s\Z', $meta['date']['created']); 225 $data['modified'] = date('Y-m-d\TH:i:s\Z', $meta['date']['modified']); 226 $data['user'] = $meta['user']; 227 $data['title'] = $meta['title'] ?? $id; 228 $data['abstract'] = $meta['description']['abstract']; 229 $data['syntax'] = rawWiki($id); 230 $data['mime'] = self::MIME_DOKUWIKI; 231 $data['doctype'] = self::DOCTYPE_PAGE; 232 233 // prefer rendered plaintext over raw syntax output 234 /** @var \renderer_plugin_text $textRenderer */ 235 $textRenderer = plugin_load('renderer', 'text'); 236 if ($textRenderer) { 237 $data['content'] = p_cached_output(wikiFN($id),'text'); 238 } else { 239 $data['content'] = $data['syntax']; 240 } 241 242 /** @var helper_plugin_translation $trans */ 243 $trans = plugin_load('helper', 'translation'); 244 if($trans) { 245 // translation plugin available 246 $lc = $trans->getLangPart($id); 247 $data['language'] = $trans->realLC($lc); 248 } else { 249 // no translation plugin 250 $data['language'] = $conf['lang']; 251 } 252 253 $data['namespace'] = getNS($id); 254 if(trim($data['namespace']) == '') { 255 unset($data['namespace']); 256 } 257 258 /** @var helper_plugin_elasticsearch_acl $hlpAcl */ 259 $hlpAcl = plugin_load('helper', 'elasticsearch_acl'); 260 261 $fullACL = $hlpAcl->getPageACL($id); 262 $queryACL = $hlpAcl->splitRules($fullACL); 263 $data = array_merge($data, $queryACL); 264 265 // let plugins add their own data to index 266 $pluginData = $this->getPluginData($data['uri']); 267 $data = array_merge($data, $pluginData); 268 269 $this->write_index($data); 270 } 271 272 /** 273 * Index a file 274 * 275 * @param string $fileId 276 * @return void 277 * @throws Exception 278 */ 279 public function index_file($fileId) { 280 $this->log('Indexing file ' . $fileId); 281 282 $docparser = new \helper_plugin_elasticsearch_docparser(); 283 284 $file = mediaFN($fileId); 285 286 try { 287 $data = $docparser->parse($file); 288 $data['uri'] = $fileId; 289 $data['doctype'] = self::DOCTYPE_MEDIA; 290 $data['modified'] = date('Y-m-d\TH:i:s\Z', filemtime($file)); 291 $data['namespace'] = getNS($fileId); 292 if (trim($data['namespace']) == '') { 293 unset($data['namespace']); 294 } 295 296 /** @var helper_plugin_elasticsearch_acl $hlpAcl */ 297 $hlpAcl = plugin_load('helper', 'elasticsearch_acl'); 298 299 $fullACL = $hlpAcl->getPageACL($fileId); 300 $queryACL = $hlpAcl->splitRules($fullACL); 301 $data = array_merge($data, $queryACL); 302 303 $this->write_index($data); 304 } catch (RuntimeException $e) { 305 $this->log('Skipping ' . $fileId . ': ' . $e->getMessage()); 306 } 307 } 308 309 310 /** 311 * Get plugin data to feed into the index. 312 * If data does not match previously defined mappings, it will be ignored. 313 * 314 * @param $id 315 * @return array 316 */ 317 protected function getPluginData($id): array 318 { 319 $pluginData = ['uri' => $id]; 320 Event::createAndTrigger('PLUGIN_ELASTICSEARCH_INDEXPAGE', $pluginData); 321 return $pluginData; 322 } 323 324 /** 325 * Log something to the debug log 326 * 327 * @param string $txt 328 * @param mixed $info 329 */ 330 protected function log($txt, $info=null) { 331 $txt = 'ElasticSearch: '.$txt; 332 \dokuwiki\Logger::debug($txt, $info); 333 } 334} 335