1<?php 2 3namespace dokuwiki\Search; 4 5use dokuwiki\Extension\Event; 6use dokuwiki\Search\Exception\IndexAccessException; 7use dokuwiki\Search\Exception\IndexLockException; 8use dokuwiki\Search\Exception\IndexWriteException; 9 10// Version tag used to force rebuild on upgrade 11const INDEXER_VERSION = 8; 12 13/** 14 * Class DokuWiki Indexer 15 * 16 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 17 * @author Andreas Gohr <andi@splitbrain.org> 18 * @author Tom N Harris <tnharris@whoopdedo.org> 19 */ 20class Indexer extends AbstractIndex 21{ 22 // page to be indexed 23 protected $page; 24 25 /** 26 * Indexer constructor 27 * 28 * @param string $page name of the page to index 29 * @return Indexer 30 */ 31 public function __construct($page = null) 32 { 33 if (isset($page)) $this->page = $page; 34 } 35 36 /** 37 * Dispatch Indexing request for the page, called by TaskRunner::runIndexer() 38 * 39 * @param bool $verbose print status messages 40 * @param bool $force force reindexing even when the index is up to date 41 * @return bool If the function completed successfully 42 * 43 * @throws IndexAccessException 44 * @throws IndexLockException 45 * @throws IndexWriteException 46 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 47 * @author Tom N Harris <tnharris@whoopdedo.org> 48 */ 49 public function dispatch($verbose = false, $force = false) 50 { 51 if (!isset($this->page)) { 52 throw new IndexAccessException('Indexer: unknow page name'); 53 } 54 55 // check if page was deleted but is still in the index 56 if (!page_exists($this->page)) { 57 return $this->deletePage($verbose, $force); 58 } 59 60 // update search index 61 return $this->addPage($verbose, $force); 62 } 63 64 /** 65 * Version of the indexer taking into consideration the external tokenizer. 66 * The indexer is only compatible with data written by the same version. 67 * 68 * @triggers INDEXER_VERSION_GET 69 * Plugins that modify what gets indexed should hook this event and 70 * add their version info to the event data like so: 71 * $data[$plugin_name] = $plugin_version; 72 * 73 * @author Tom N Harris <tnharris@whoopdedo.org> 74 * @author Michael Hamann <michael@content-space.de> 75 * 76 * @return int|string 77 */ 78 public function getVersion() 79 { 80 static $indexer_version = null; 81 if ($indexer_version == null) { 82 $version = INDEXER_VERSION; 83 84 // DokuWiki version is included for the convenience of plugins 85 $data = array('dokuwiki' => $version); 86 Event::createAndTrigger('INDEXER_VERSION_GET', $data, null, false); 87 unset($data['dokuwiki']); // this needs to be first 88 ksort($data); 89 foreach ($data as $plugin => $vers) { 90 $version .= '+'.$plugin.'='.$vers; 91 } 92 $indexer_version = $version; 93 } 94 return $indexer_version; 95 } 96 97 /** 98 * Adds/updates the search index for the given page 99 * 100 * Locking is handled internally. 101 * 102 * @param bool $verbose print status messages 103 * @param bool $force force reindexing even when the index is up to date 104 * @return bool If the function completed successfully 105 * 106 * @throws IndexAccessException 107 * @throws IndexLockException 108 * @throws IndexWriteException 109 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 110 * @author Tom N Harris <tnharris@whoopdedo.org> 111 */ 112 public function addPage($verbose = false, $force = false) 113 { 114 if (!isset($this->page)) { 115 throw new IndexAccessException('Indexer: invalid page name in addePage'); 116 } else { 117 $page = $this->page; 118 } 119 120 // check if indexing needed for the existing page (full text and/or metadata indexing) 121 $idxtag = metaFN($page,'.indexed'); 122 if (!$force && file_exists($idxtag)) { 123 if (trim(io_readFile($idxtag)) == $this->getVersion()) { 124 $last = @filemtime($idxtag); 125 if ($last > @filemtime(wikiFN($page))) { 126 if ($verbose) dbglog("Indexer: index for {$page} up to date"); 127 return true; 128 } 129 } 130 } 131 132 // register the page to the page.idx file, $pid is always integer 133 $pid = $this->getPID($page); 134 135 // prepare metadata indexing 136 $metadata = array(); 137 $metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED); 138 139 $references = p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED); 140 $metadata['relation_references'] = ($references !== null) ? 141 array_keys($references) : array(); 142 143 $media = p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED); 144 $metadata['relation_media'] = ($media !== null) ? 145 array_keys($media) : array(); 146 147 // check if full text indexing allowed 148 $indexenabled = p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED); 149 if ($indexenabled !== false) $indexenabled = true; 150 $metadata['internal_index'] = $indexenabled; 151 152 $body = ''; 153 $data = compact('page', 'body', 'metadata', 'pid'); 154 $event = new Event('INDEXER_PAGE_ADD', $data); 155 if ($event->advise_before()) $data['body'] = $data['body'].' '.rawWiki($page); 156 $event->advise_after(); 157 unset($event); 158 extract($data); 159 $indexenabled = $metadata['internal_index']; 160 unset($metadata['internal_index']); 161 162 // Access to Metadata Index 163 $result = (new MetadataIndex($pid))->addMetaKeys($metadata); 164 if ($verbose) dbglog("Indexer: addMetaKeys({$page}) ".($result ? 'done' : 'failed')); 165 if (!$result) { 166 return false; 167 } 168 169 // Access to Fulltext Index 170 if ($indexenabled) { 171 $result = (new FulltextIndex($pid))->addWords($body); 172 if ($verbose) dbglog("Indexer: addWords() for {$page} done"); 173 if (!$result) { 174 return false; 175 } 176 } else { 177 if ($verbose) dbglog("Indexer: full text indexing disabled for {$page}"); 178 // ensure the page content deleted from the Fulltext index 179 $result = (new FulltextIndex($page))->deleteWords(); 180 if ($verbose) dbglog("Indexer: deleteWords() for {$page} done"); 181 if (!$result) { 182 return false; 183 } 184 } 185 186 // update index tag file 187 io_saveFile($idxtag, $this->getVersion()); 188 if ($verbose) dbglog("Indexer: finished"); 189 190 return $result; 191 } 192 193 /** 194 * Remove a page from the index 195 * 196 * Erases entries in all known indexes. Locking is handled internally. 197 * 198 * @param string $page name of the page to index 199 * @param bool $verbose print status messages 200 * @param bool $force force reindexing even when the index is up to date 201 * @return bool If the function completed successfully 202 * 203 * @throws IndexAccessException 204 * @throws IndexLockException 205 * @throws IndexWriteException 206 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 207 * @author Tom N Harris <tnharris@whoopdedo.org> 208 */ 209 public function deletePage($verbose = false, $force = false) 210 { 211 if (!isset($this->page)) { 212 throw new IndexAccessException('Indexer: invalid page name in deletePage'); 213 } else { 214 $page = $this->page; 215 } 216 217 $idxtag = metaFN($page,'.indexed'); 218 if (!$force && !file_exists($idxtag)) { 219 if ($verbose) dbglog("Indexer: {$page}.indexed file does not exist, ignoring"); 220 return true; 221 } 222 223 // retrieve pid from the page.idx file, $pid is always integer 224 $pid = $this->getPID($page); 225 226 // remove obsoleted content from Fulltext index 227 $result = (new FulltextIndex($pid))->deleteWords(); 228 if ($verbose) dbglog("Indexer: deleteWords() for {$page} done"); 229 if (!$result) { 230 return false; 231 } 232 233 // delete all keys of the page from metadata index 234 $result = (new MetadataIndex($pid))->deleteMetaKeys(); 235 if ($verbose) dbglog("Indexer: deleteMetaKeys() for {$page} done"); 236 if (!$result) { 237 return false; 238 } 239 240 // mark the page as deleted in the page.idx 241 $this->lock(); 242 $this->saveIndexKey('page', '', $pid, self::INDEX_MARK_DELETED.$page); 243 if ($verbose) dbglog("Indexer: {$page} has marked as deleted in page.idx"); 244 $this->unlock(); 245 246 unset(static::$pidCache[$pid]); 247 @unlink($idxtag); 248 return $result; 249 } 250 251 /** 252 * Rename a page in the search index without changing the indexed content. 253 * This function doesn't check if the old or new name exists in the filesystem. 254 * It returns an error if the old page isn't in the page list of the indexer 255 * and it deletes all previously indexed content of the new page. 256 * 257 * @param string $oldpage The old page name 258 * @param string $newpage The new page name 259 * @return bool If the page was successfully renamed 260 * @throws IndexLockException 261 * @throws IndexWriteException 262 */ 263 public function renamePage($oldpage, $newpage) 264 { 265 $index = $this->getIndex('page', ''); 266 // check if oldpage found in page.idx 267 $oldPid = array_search($oldpage, $index, true); 268 if ($oldPid === false) return false; 269 270 // check if newpage found in page.idx 271 $newPid = array_search($newpage, $index, true); 272 if ($newPid !== false) { 273 $result = (new Indexer($newpage))->deletePage(); 274 if (!$result) return false; 275 // Note: $index is no longer valid after deletePage()! 276 unset($index); 277 } 278 279 // update page.idx 280 $this->lock(); 281 $this->saveIndexKey('page', '', $oldPid, $newpage); 282 $this->unlock(); 283 284 // reset the pid cache 285 $this->resetPIDCache(); 286 287 return true; 288 } 289 290 /** 291 * Clear the Page Index 292 * 293 * @param bool $requireLock should be false only if the caller is resposible for index lock 294 * @return bool If the index has been cleared successfully 295 * @throws Exception\IndexLockException 296 */ 297 public function clear($requireLock = true) 298 { 299 global $conf; 300 301 if ($requireLock) $this->lock(); 302 303 // clear Metadata Index 304 (new MetadataIndex())->clear(false); 305 306 // clear Fulltext Index 307 (new FulltextIndex())->clear(false); 308 309 @unlink($conf['indexdir'].'/page.idx'); 310 311 // clear the pid cache 312 $this->resetPIDCache(); 313 314 if ($requireLock) $this->unlock(); 315 return true; 316 } 317 318} 319