1<?php 2 3namespace dokuwiki\Search; 4 5use dokuwiki\Extension\Event; 6use dokuwiki\Logger; 7use dokuwiki\Search\Exception\IndexAccessException; 8use dokuwiki\Search\Exception\IndexLockException; 9use dokuwiki\Search\Exception\IndexWriteException; 10 11// Version tag used to force rebuild on upgrade 12const INDEXER_VERSION = 8; 13 14/** 15 * Class DokuWiki Indexer 16 * 17 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 18 * @author Andreas Gohr <andi@splitbrain.org> 19 * @author Tom N Harris <tnharris@whoopdedo.org> 20 */ 21class Indexer extends AbstractIndex 22{ 23 // page to be indexed 24 protected $page; 25 26 /** 27 * Indexer constructor 28 * 29 * @param string $page name of the page to index 30 */ 31 public function __construct($page = null) 32 { 33 if (isset($page)) $this->page = $page; 34 } 35 36 /** 37 * Dispatch Indexing request for the page, called by TaskRunner::runIndexer() 38 * 39 * @param bool $verbose print status messages 40 * @param bool $force force reindexing even when the index is up to date 41 * @return bool If the function completed successfully 42 * 43 * @throws IndexAccessException 44 * @throws IndexLockException 45 * @throws IndexWriteException 46 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 47 * @author Tom N Harris <tnharris@whoopdedo.org> 48 */ 49 public function dispatch($verbose = false, $force = false) 50 { 51 if (!isset($this->page)) { 52 throw new IndexAccessException('Indexer: unknow page name'); 53 } 54 55 // check if page was deleted but is still in the index 56 if (!page_exists($this->page)) { 57 return $this->deletePage($verbose, $force); 58 } 59 60 // update search index 61 return $this->addPage($verbose, $force); 62 } 63 64 /** 65 * Version of the indexer taking into consideration the external tokenizer. 66 * The indexer is only compatible with data written by the same version. 67 * 68 * @triggers INDEXER_VERSION_GET 69 * Plugins that modify what gets indexed should hook this event and 70 * add their version info to the event data like so: 71 * $data[$plugin_name] = $plugin_version; 72 * 73 * @author Tom N Harris <tnharris@whoopdedo.org> 74 * @author Michael Hamann <michael@content-space.de> 75 * 76 * @return int|string 77 */ 78 public function getVersion() 79 { 80 static $indexer_version = null; 81 if ($indexer_version == null) { 82 $version = INDEXER_VERSION; 83 84 // DokuWiki version is included for the convenience of plugins 85 $data = array('dokuwiki' => $version); 86 Event::createAndTrigger('INDEXER_VERSION_GET', $data, null, false); 87 unset($data['dokuwiki']); // this needs to be first 88 ksort($data); 89 foreach ($data as $plugin => $vers) { 90 $version .= '+'.$plugin.'='.$vers; 91 } 92 $indexer_version = $version; 93 } 94 return $indexer_version; 95 } 96 97 /** 98 * Adds/updates the search index for the given page 99 * 100 * Locking is handled internally. 101 * 102 * @param bool $verbose print status messages 103 * @param bool $force force reindexing even when the index is up to date 104 * @return bool If the function completed successfully 105 * 106 * @throws IndexAccessException 107 * @throws IndexLockException 108 * @throws IndexWriteException 109 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 110 * @author Tom N Harris <tnharris@whoopdedo.org> 111 */ 112 public function addPage($verbose = false, $force = false) 113 { 114 if (!isset($this->page)) { 115 throw new IndexAccessException('Indexer: invalid page name in addePage'); 116 } else { 117 $page = $this->page; 118 } 119 120 // check if indexing needed for the existing page (full text and/or metadata indexing) 121 $idxtag = metaFN($page,'.indexed'); 122 if (!$force && file_exists($idxtag)) { 123 if (trim(io_readFile($idxtag)) == $this->getVersion()) { 124 $last = @filemtime($idxtag); 125 if ($last > @filemtime(wikiFN($page))) { 126 if ($verbose) Logger::debug("Indexer: index for {$page} up to date"); 127 return true; 128 } 129 } 130 } 131 132 // register the page to the page.idx file, $pid is always integer 133 $pid = $this->getPID($page); 134 135 // prepare metadata indexing 136 $metadata = array(); 137 $metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED); 138 139 $references = p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED); 140 $metadata['relation_references'] = ($references !== null) ? 141 array_keys($references) : array(); 142 143 $media = p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED); 144 $metadata['relation_media'] = ($media !== null) ? 145 array_keys($media) : array(); 146 147 // check if full text indexing allowed 148 $indexenabled = p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED); 149 if ($indexenabled !== false) $indexenabled = true; 150 $metadata['internal_index'] = $indexenabled; 151 152 $body = ''; 153 $data = compact('page', 'body', 'metadata', 'pid'); 154 $event = new Event('INDEXER_PAGE_ADD', $data); 155 if ($event->advise_before()) $data['body'] = $data['body'].' '.rawWiki($page); 156 $event->advise_after(); 157 unset($event); 158 extract($data); 159 $indexenabled = $metadata['internal_index']; 160 unset($metadata['internal_index']); 161 162 // Access to Metadata Index 163 $result = (new MetadataIndex($pid))->addMetaKeys($metadata); 164 if ($verbose) Logger::debug("Indexer: addMetaKeys({$page}) ".($result ? 'done' : 'failed')); 165 if (!$result) { 166 return false; 167 } 168 169 // Access to Fulltext Index 170 if ($indexenabled) { 171 $result = (new FulltextIndex($pid))->addWords($body); 172 if ($verbose) Logger::debug("Indexer: addWords() for {$page} done"); 173 if (!$result) { 174 return false; 175 } 176 } else { 177 if ($verbose) Logger::debug("Indexer: full text indexing disabled for {$page}"); 178 // ensure the page content deleted from the Fulltext index 179 $result = (new FulltextIndex($page))->deleteWords(); 180 if ($verbose) Logger::debug("Indexer: deleteWords() for {$page} done"); 181 if (!$result) { 182 return false; 183 } 184 } 185 186 // update index tag file 187 io_saveFile($idxtag, $this->getVersion()); 188 if ($verbose) Logger::debug("Indexer: finished"); 189 190 return $result; 191 } 192 193 /** 194 * Remove a page from the index 195 * 196 * Erases entries in all known indexes. Locking is handled internally. 197 * 198 * @param bool $verbose print status messages 199 * @param bool $force force reindexing even when the index is up to date 200 * @return bool If the function completed successfully 201 * 202 * @throws IndexAccessException 203 * @throws IndexLockException 204 * @throws IndexWriteException 205 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 206 * @author Tom N Harris <tnharris@whoopdedo.org> 207 */ 208 public function deletePage($verbose = false, $force = false) 209 { 210 if (!isset($this->page)) { 211 throw new IndexAccessException('Indexer: invalid page name in deletePage'); 212 } else { 213 $page = $this->page; 214 } 215 216 $idxtag = metaFN($page,'.indexed'); 217 if (!$force && !file_exists($idxtag)) { 218 if ($verbose) Logger::debug("Indexer: {$page}.indexed file does not exist, ignoring"); 219 return true; 220 } 221 222 // retrieve pid from the page.idx file, $pid is always integer 223 $pid = $this->getPID($page); 224 225 // remove obsoleted content from Fulltext index 226 $result = (new FulltextIndex($pid))->deleteWords(); 227 if ($verbose) Logger::debug("Indexer: deleteWords() for {$page} done"); 228 if (!$result) { 229 return false; 230 } 231 232 // delete all keys of the page from metadata index 233 $result = (new MetadataIndex($pid))->deleteMetaKeys(); 234 if ($verbose) Logger::debug("Indexer: deleteMetaKeys() for {$page} done"); 235 if (!$result) { 236 return false; 237 } 238 239 // mark the page as deleted in the page.idx 240 $this->lock(); 241 $this->saveIndexKey('page', '', $pid, self::INDEX_MARK_DELETED.$page); 242 if ($verbose) Logger::debug("Indexer: {$page} has marked as deleted in page.idx"); 243 $this->unlock(); 244 245 unset(static::$pidCache[$pid]); 246 @unlink($idxtag); 247 return $result; 248 } 249 250 /** 251 * Rename a page in the search index without changing the indexed content. 252 * This function doesn't check if the old or new name exists in the filesystem. 253 * It returns an error if the old page isn't in the page list of the indexer 254 * and it deletes all previously indexed content of the new page. 255 * 256 * @param string $oldpage The old page name 257 * @param string $newpage The new page name 258 * @return bool If the page was successfully renamed 259 * @throws IndexLockException 260 * @throws IndexWriteException 261 */ 262 public function renamePage($oldpage, $newpage) 263 { 264 $index = $this->getIndex('page', ''); 265 // check if oldpage found in page.idx 266 $oldPid = array_search($oldpage, $index, true); 267 if ($oldPid === false) return false; 268 269 // check if newpage found in page.idx 270 $newPid = array_search($newpage, $index, true); 271 if ($newPid !== false) { 272 $result = (new Indexer($newpage))->deletePage(); 273 if (!$result) return false; 274 // Note: $index is no longer valid after deletePage()! 275 unset($index); 276 } 277 278 // update page.idx 279 $this->lock(); 280 $this->saveIndexKey('page', '', $oldPid, $newpage); 281 $this->unlock(); 282 283 // reset the pid cache 284 $this->resetPIDCache(); 285 286 return true; 287 } 288 289 /** 290 * Clear the Page Index 291 * 292 * @param bool $requireLock should be false only if the caller is resposible for index lock 293 * @return bool If the index has been cleared successfully 294 * @throws Exception\IndexLockException 295 */ 296 public function clear($requireLock = true) 297 { 298 global $conf; 299 300 if ($requireLock) $this->lock(); 301 302 // clear Metadata Index 303 (new MetadataIndex())->clear(false); 304 305 // clear Fulltext Index 306 (new FulltextIndex())->clear(false); 307 308 @unlink($conf['indexdir'].'/page.idx'); 309 310 // clear the pid cache 311 $this->resetPIDCache(); 312 313 if ($requireLock) $this->unlock(); 314 return true; 315 } 316 317} 318