1<?php 2 3namespace dokuwiki\Search; 4 5use dokuwiki\Extension\Event; 6use dokuwiki\Search\Exception\IndexAccessException; 7use dokuwiki\Search\Exception\SearchException; 8 9// Version tag used to force rebuild on upgrade 10const INDEXER_VERSION = 8; 11 12/** 13 * Class DokuWiki Indexer (Singleton) 14 * 15 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 16 * @author Andreas Gohr <andi@splitbrain.org> 17 * @author Tom N Harris <tnharris@whoopdedo.org> 18 */ 19class Indexer extends AbstractIndex 20{ 21 /** @var Indexer $instance */ 22 protected static $instance = null; 23 24 /** 25 * Get new or existing singleton instance of the Indexer 26 * 27 * @return Indexer 28 */ 29 public static function getInstance() 30 { 31 if (is_null(static::$instance)) { 32 static::$instance = new static(); 33 } 34 return static::$instance; 35 } 36 37 /** 38 * Dispatch Indexing request for the page, called by TaskRunner::runIndexer() 39 * 40 * @param string $page name of the page to index 41 * @param bool $verbose print status messages 42 * @param bool $force force reindexing even when the index is up to date 43 * @return bool If the function completed successfully 44 * 45 * @throws IndexAccessException 46 * @throws SearchException 47 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 48 * @author Tom N Harris <tnharris@whoopdedo.org> 49 */ 50 public function dispatch($page, $verbose = false, $force = false) 51 { 52 // check if page was deleted but is still in the index 53 if (!page_exists($page)) { 54 return $this->deletePage($page, $verbose, $force); 55 } 56 57 // update search index 58 return $this->addPage($page, $verbose, $force); 59 } 60 61 /** 62 * Version of the indexer taking into consideration the external tokenizer. 63 * The indexer is only compatible with data written by the same version. 64 * 65 * @triggers INDEXER_VERSION_GET 66 * Plugins that modify what gets indexed should hook this event and 67 * add their version info to the event data like so: 68 * $data[$plugin_name] = $plugin_version; 69 * 70 * @author Tom N Harris <tnharris@whoopdedo.org> 71 * @author Michael Hamann <michael@content-space.de> 72 * 73 * @return int|string 74 */ 75 public function getVersion() 76 { 77 static $indexer_version = null; 78 if ($indexer_version == null) { 79 $version = INDEXER_VERSION; 80 81 // DokuWiki version is included for the convenience of plugins 82 $data = array('dokuwiki' => $version); 83 Event::createAndTrigger('INDEXER_VERSION_GET', $data, null, false); 84 unset($data['dokuwiki']); // this needs to be first 85 ksort($data); 86 foreach ($data as $plugin => $vers) { 87 $version .= '+'.$plugin.'='.$vers; 88 } 89 $indexer_version = $version; 90 } 91 return $indexer_version; 92 } 93 94 /** 95 * Adds/updates the search index for the given page 96 * 97 * Locking is handled internally. 98 * 99 * @param string $page name of the page to index 100 * @param bool $verbose print status messages 101 * @param bool $force force reindexing even when the index is up to date 102 * @return bool If the function completed successfully 103 * 104 * @throws SearchException 105 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 106 * @author Tom N Harris <tnharris@whoopdedo.org> 107 */ 108 public function addPage($page, $verbose = false, $force = false) 109 { 110 // check if indexing needed for the existing page (full text and/or metadata indexing) 111 $idxtag = metaFN($page,'.indexed'); 112 if (!$force && file_exists($idxtag)) { 113 if (trim(io_readFile($idxtag)) == $this->getVersion()) { 114 $last = @filemtime($idxtag); 115 if ($last > @filemtime(wikiFN($page))) { 116 if ($verbose) dbglog("Indexer: index for {$page} up to date"); 117 return true; 118 } 119 } 120 } 121 122 // register the page to the page.idx 123 $pid = $this->getPID($page); 124 if ($pid === false) { 125 if ($verbose) dbglog("Indexer: getting the PID failed for {$page}"); 126 throw new IndexAccessException("Failed to get PID for {$page}"); 127 } 128 129 // prepare metadata indexing 130 $metadata = array(); 131 $metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED); 132 133 $references = p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED); 134 $metadata['relation_references'] = ($references !== null) ? 135 array_keys($references) : array(); 136 137 $media = p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED); 138 $metadata['relation_media'] = ($media !== null) ? 139 array_keys($media) : array(); 140 141 // check if full text indexing allowed 142 $indexenabled = p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED); 143 if ($indexenabled !== false) $indexenabled = true; 144 $metadata['internal_index'] = $indexenabled; 145 146 $body = ''; 147 $data = compact('page', 'body', 'metadata', 'pid'); 148 $event = new Event('INDEXER_PAGE_ADD', $data); 149 if ($event->advise_before()) $data['body'] = $data['body'].' '.rawWiki($page); 150 $event->advise_after(); 151 unset($event); 152 extract($data); 153 $indexenabled = $metadata['internal_index']; 154 unset($metadata['internal_index']); 155 156 // Access to Metadata Index 157 $MetadataIndex = MetadataIndex::getInstance(); 158 $result = $MetadataIndex->addMetaKeys($page, $metadata); 159 if ($verbose) dbglog("Indexer: addMetaKeys({$page}) ".($result ? 'done' : 'failed')); 160 if (!$result) { 161 return false; 162 } 163 164 // Access to Fulltext Index 165 $FulltextIndex = FulltextIndex::getInstance(); 166 if ($indexenabled) { 167 $result = $FulltextIndex->addPagewords($page, $body); 168 if ($verbose) dbglog("Indexer: addPageWords({$page}) ".($result ? 'done' : 'failed')); 169 if (!$result) { 170 return false; 171 } 172 } else { 173 if ($verbose) dbglog("Indexer: full text indexing disabled for {$page}"); 174 // ensure the page content deleted from the Fulltext index 175 $result = $FulltextIndex->deletePageWords($page); 176 if ($verbose) dbglog("Indexer: deletePageWords({$page}) ".($result ? 'done' : 'failed')); 177 if (!$result) { 178 return false; 179 } 180 } 181 182 // update index tag file 183 io_saveFile($idxtag, $this->getVersion()); 184 if ($verbose) dbglog("Indexer: finished"); 185 186 return $result; 187 } 188 189 /** 190 * Remove a page from the index 191 * 192 * Erases entries in all known indexes. Locking is handled internally. 193 * 194 * @param string $page name of the page to index 195 * @param bool $verbose print status messages 196 * @param bool $force force reindexing even when the index is up to date 197 * @return bool If the function completed successfully 198 * 199 * @throws Exception\IndexLockException 200 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 201 * @author Tom N Harris <tnharris@whoopdedo.org> 202 */ 203 public function deletePage($page, $verbose = false, $force = false) 204 { 205 $idxtag = metaFN($page,'.indexed'); 206 if (!$force && !file_exists($idxtag)) { 207 if ($verbose) dbglog("Indexer: {$page}.indexed file does not exist, ignoring"); 208 return true; 209 } 210 211 // remove obsoleted content from Fulltext index 212 $FulltextIndex = FulltextIndex::getInstance(); 213 $result = $FulltextIndex->deletePageWords($page); 214 if ($verbose) dbglog("Indexer: deletePageWords({$page}) ".($result ? 'done' : 'failed')); 215 if (!$result) { 216 return false; 217 } 218 219 // delete all keys of the page from metadata index 220 $MetadataIndex = MetadataIndex::getInstance(); 221 $result = $MetadataIndex->deleteMetaKeys($page); 222 if ($verbose) dbglog("Indexer: deleteMetaKeys({$page}) ".($result ? 'done' : 'failed')); 223 if (!$result) { 224 return false; 225 } 226 227 // mark the page as deleted in the page.idx 228 $pid = $this->getPID($page); 229 if ($pid !== false) { 230 if (!$this->lock()) return false; 231 $result = $this->saveIndexKey('page', '', $pid, self::INDEX_MARK_DELETED.$page); 232 if ($verbose) dbglog("Indexer: update page.idx ".($result ? 'done' : 'failed')); 233 $this->unlock(); 234 } else { 235 if ($verbose) dbglog("Indexer: {$page} not found in the page.idx, ignoring"); 236 $result = true; 237 } 238 239 unset(static::$pidCache[$pid]); 240 @unlink($idxtag); 241 return $result; 242 } 243 244 /** 245 * Rename a page in the search index without changing the indexed content. 246 * This function doesn't check if the old or new name exists in the filesystem. 247 * It returns an error if the old page isn't in the page list of the indexer 248 * and it deletes all previously indexed content of the new page. 249 * 250 * @param string $oldpage The old page name 251 * @param string $newpage The new page name 252 * @return bool If the page was successfully renamed 253 * @throws Exception\IndexLockException 254 */ 255 public function renamePage($oldpage, $newpage) 256 { 257 $index = $this->getIndex('page', ''); 258 // check if oldpage found in page.idx 259 $oldPid = array_search($oldpage, $index, true); 260 if ($oldPid === false) return false; 261 262 // check if newpage found in page.idx 263 $newPid = array_search($newpage, $index, true); 264 if ($newPid !== false) { 265 $result = $this->deletePage($newpage); 266 if (!$result) return false; 267 // Note: $index is no longer valid after deletePage()! 268 unset($index); 269 } 270 271 // update page.idx 272 if (!$this->lock()) return false; 273 $result = $this->saveIndexKey('page', '', $oldPid, $newpage); 274 $this->unlock(); 275 276 // reset the pid cache 277 $this->resetPIDCache(); 278 279 return $result; 280 } 281 282 /** 283 * Clear the Page Index 284 * 285 * @param bool $requireLock should be false only if the caller is resposible for index lock 286 * @return bool If the index has been cleared successfully 287 * @throws Exception\IndexLockException 288 */ 289 public function clear($requireLock = true) 290 { 291 global $conf; 292 293 if ($requireLock) $this->lock(); 294 295 // clear Metadata Index 296 $MetadataIndex = MetadataIndex::getInstance(); 297 $MetadataIndex->clear(false); 298 299 // clear Fulltext Index 300 $FulltextIndex = FulltextIndex::getInstance(); 301 $FulltextIndex->clear(false); 302 303 @unlink($conf['indexdir'].'/page.idx'); 304 305 // clear the pid cache 306 $this->resetPIDCache(); 307 308 if ($requireLock) $this->unlock(); 309 return true; 310 } 311 312} 313