1<?php 2 3use dokuwiki\Extension\Event; 4 5/** 6 * DokuWiki Plugin elasticsearch (Action Component) 7 * 8 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 9 * @author Kieback&Peter IT <it-support@kieback-peter.de> 10 * @author Andreas Gohr <gohr@cosmocode.de> 11 */ 12class action_plugin_elasticsearch_indexing extends DokuWiki_Action_Plugin { 13 14 /** 15 * Registers a callback function for a given event 16 * 17 * @param Doku_Event_Handler $controller DokuWiki's event controller object 18 * @return void 19 */ 20 public function register(Doku_Event_Handler $controller) { 21 $controller->register_hook('TPL_CONTENT_DISPLAY', 'BEFORE', $this, 'handle_tpl_content_display'); 22 $controller->register_hook('IO_WIKIPAGE_WRITE', 'BEFORE', $this, 'handle_delete'); 23 } 24 25 /** 26 * Add pages to index 27 * 28 * @param Doku_Event $event event object by reference 29 * @param mixed $param [the parameters passed as fifth argument to register_hook() when this 30 * handler was registered] 31 * @return void 32 */ 33 public function handle_tpl_content_display(Doku_Event &$event, $param) { 34 global $ID, $INFO; 35 $logs = array(); 36 $logs[] = 'BEGIN content display'; 37 $logs[] = metaFN($ID, '.elasticsearch_indexed'); 38 $logs[] = wikiFN($ID); 39 $logs[] = wikiFN($INFO['id']); 40 $logs[] = $this->needs_indexing($ID) ? 'needs indexing' : 'no indexing needed'; 41 $logs[] = 'END content display'; 42 $this->log($logs); 43 if($this->needs_indexing($ID)) { 44 $this->index_page($ID); 45 } 46 } 47 48 /** 49 * Remove pages from index 50 * 51 * @param Doku_Event $event event object by reference 52 * @param mixed $param [the parameters passed as fifth argument to register_hook() when this 53 * handler was registered] 54 * @return void 55 */ 56 public function handle_delete(Doku_Event &$event, $param) { 57 if($event->data[3]) return; // is old revision stuff 58 if(!empty($event->data[0][1])) return; // page still exists 59 // still here? delete from index 60 $this->delete_page($event->data[2]); 61 } 62 63 /** 64 * Check if the page $id has changed since the last indexing. 65 * 66 * @param string $id 67 * @return boolean 68 */ 69 protected function needs_indexing($id) { 70 $indexStateFile = metaFN($id, '.elasticsearch_indexed'); 71 $refreshStateFile = metaFN($id, '.elasticsearch_refresh'); 72 $dataFile = wikiFN($id); 73 74 // no data file or page is hidden ('hidepages' configuration option) -> no indexing 75 if (!file_exists($dataFile) || isHiddenPage($id)) { 76 // page should not be indexed but has a state file, try to remove from index 77 if (file_exists($indexStateFile)) { 78 $this->delete_page($id); 79 } 80 return false; 81 } 82 83 // force indexing if we're called via cli (e.g. cron) 84 if (php_sapi_name() == 'cli') { 85 return true; 86 } 87 // check if latest indexing attempt is done after page update 88 // and after other updates related to the page made by plugins 89 if (file_exists($indexStateFile)) { 90 if ( 91 (filemtime($indexStateFile) > filemtime($dataFile)) && 92 (!file_exists($refreshStateFile) || filemtime($indexStateFile) > filemtime($refreshStateFile)) 93 ) { 94 return false; 95 } 96 } 97 return true; 98 } 99 100 /** 101 * Save indexed state for a page 102 * 103 * @param string $id 104 * @return int 105 */ 106 protected function update_indexstate($id) { 107 $indexStateFile = metaFN($id, '.elasticsearch_indexed'); 108 return io_saveFile($indexStateFile, ''); 109 } 110 111 /** 112 * Remove the given document from the index 113 * 114 * @param $id 115 */ 116 public function delete_page($id) { 117 /** @var helper_plugin_elasticsearch_client $hlp */ 118 $hlp = plugin_load('helper', 'elasticsearch_client'); 119 $indexName = $this->getConf('indexname'); 120 $documentType = $this->getConf('documenttype'); 121 $client = $hlp->connect(); 122 $index = $client->getIndex($indexName); 123 $type = $index->getType($documentType); 124 $documentId = $documentType . '_' . $id; 125 126 try { 127 $type->deleteById($documentId); 128 $index->refresh(); 129 $this->log($documentId.' deleted '); 130 } catch(Exception $e) { 131 // we ignore this 132 $this->log($documentId.' not deleted '.$e->getMessage()); 133 } 134 135 // delete state file 136 @unlink(metaFN($id, '.elasticsearch_indexed')); 137 } 138 139 /** 140 * Index a page 141 * 142 * @param $id 143 * @return void 144 */ 145 public function index_page($id) { 146 global $conf; 147 148 /** @var helper_plugin_elasticsearch_client $hlp */ 149 $hlp = plugin_load('helper', 'elasticsearch_client'); 150 /** @var helper_plugin_elasticsearch_acl $hlpAcl */ 151 $hlpAcl = plugin_load('helper', 'elasticsearch_acl'); 152 153 $this->log('Indexing page ' . $id); 154 $indexName = $this->getConf('indexname'); 155 $documentType = $this->getConf('documenttype'); 156 $client = $hlp->connect(); 157 $index = $client->getIndex($indexName); 158 $type = $index->getType($documentType); 159 $documentId = $documentType . '_' . $id; 160 161 // collect the date which should be indexed 162 $meta = p_get_metadata($id, '', METADATA_RENDER_UNLIMITED); 163 164 $data = array(); 165 $data['uri'] = $id; 166 $data['created'] = date('Y-m-d\TH:i:s\Z', $meta['date']['created']); 167 $data['modified'] = date('Y-m-d\TH:i:s\Z', $meta['date']['modified']); 168 $data['user'] = $meta['user']; 169 $data['title'] = $meta['title']; 170 $data['abstract'] = $meta['description']['abstract']; 171 $data['syntax'] = rawWiki($id); 172 173 // prefer rendered plaintext over raw syntax output 174 /** @var \renderer_plugin_text $textRenderer */ 175 $textRenderer = plugin_load('renderer', 'text'); 176 if ($textRenderer) { 177 $data['content'] = p_cached_output(wikiFN($id),'text'); 178 } else { 179 $data['content'] = $data['syntax']; 180 } 181 182 /** @var helper_plugin_translation $trans */ 183 $trans = plugin_load('helper', 'translation'); 184 if($trans) { 185 // translation plugin available 186 $lc = $trans->getLangPart($id); 187 $data['language'] = $trans->realLC($lc); 188 } else { 189 // no translation plugin 190 $lc = ''; 191 $data['language'] = $conf['lang']; 192 } 193 194 $data['namespace'] = getNS($id); 195 if(trim($data['namespace']) == '') { 196 unset($data['namespace']); 197 } 198 199 $fullACL = $hlpAcl->getPageACL($id); 200 $queryACL = $hlpAcl->splitRules($fullACL); 201 $data = array_merge($data, $queryACL); 202 203 // let plugins add their own data to index 204 $pluginData = $this->getPluginData($data['uri']); 205 $data = array_merge($data, $pluginData); 206 207 // check if the document still exists to update it or add it as a new one 208 try { 209 $client->updateDocument($documentId, array('doc' => $data), $index->getName(), $type->getName()); 210 } catch(\Elastica\Exception\NotFoundException $e) { 211 $document = new \Elastica\Document($documentId, $data); 212 $type->addDocument($document); 213 } catch(\Elastica\Exception\ResponseException $e) { 214 if($e->getResponse()->getStatus() == 404) { 215 $document = new \Elastica\Document($documentId, $data); 216 $type->addDocument($document); 217 } else { 218 throw $e; 219 } 220 } catch(Exception $e) { 221 msg( 222 'Something went wrong on indexing please try again later or ask an admin for help.<br /><pre>' . 223 hsc(get_class($e).' '.$e->getMessage()) . '</pre>', 224 -1 225 ); 226 return; 227 } 228 $index->refresh(); 229 $this->update_indexstate($id); 230 231 } 232 233 /** 234 * Get plugin data to feed into the index. 235 * If data does not match previously defined mappings, it will be ignored. 236 * 237 * @see \helper_plugin_elasticsearch_client::mapPluginFields 238 * 239 * @param $id 240 * @return array 241 */ 242 protected function getPluginData($id): array 243 { 244 $pluginData = ['uri' => $id]; 245 Event::createAndTrigger('PLUGIN_ELASTICSEARCH_INDEXPAGE', $pluginData); 246 return $pluginData; 247 } 248 249 /** 250 * Log something to the debug log 251 * 252 * @param string|string[] $txt 253 */ 254 protected function log($txt) { 255 if (!$this->getConf('debug')) { 256 return; 257 } 258 $logs = (array)$txt; 259 260 foreach ($logs as $entry) { 261 dbglog($entry); 262 } 263 } 264} 265