1<?php 2 3namespace dokuwiki\Search; 4 5use dokuwiki\Extension\Event; 6use dokuwiki\Search\Exception\IndexAccessException; 7use dokuwiki\Search\Exception\IndexLockException; 8use dokuwiki\Search\Exception\IndexWriteException; 9 10// Version tag used to force rebuild on upgrade 11const INDEXER_VERSION = 8; 12 13/** 14 * Class DokuWiki Indexer (Singleton) 15 * 16 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 17 * @author Andreas Gohr <andi@splitbrain.org> 18 * @author Tom N Harris <tnharris@whoopdedo.org> 19 */ 20class Indexer extends AbstractIndex 21{ 22 /** @var Indexer $instance */ 23 protected static $instance = null; 24 25 /** 26 * Get new or existing singleton instance of the Indexer 27 * 28 * @return Indexer 29 */ 30 public static function getInstance() 31 { 32 if (is_null(static::$instance)) { 33 static::$instance = new static(); 34 } 35 return static::$instance; 36 } 37 38 /** 39 * Dispatch Indexing request for the page, called by TaskRunner::runIndexer() 40 * 41 * @param string $page name of the page to index 42 * @param bool $verbose print status messages 43 * @param bool $force force reindexing even when the index is up to date 44 * @return bool If the function completed successfully 45 * 46 * @throws IndexLockException 47 * @throws IndexWriteException 48 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 49 * @author Tom N Harris <tnharris@whoopdedo.org> 50 */ 51 public function dispatch($page, $verbose = false, $force = false) 52 { 53 // check if page was deleted but is still in the index 54 if (!page_exists($page)) { 55 return $this->deletePage($page, $verbose, $force); 56 } 57 58 // update search index 59 return $this->addPage($page, $verbose, $force); 60 } 61 62 /** 63 * Version of the indexer taking into consideration the external tokenizer. 64 * The indexer is only compatible with data written by the same version. 65 * 66 * @triggers INDEXER_VERSION_GET 67 * Plugins that modify what gets indexed should hook this event and 68 * add their version info to the event data like so: 69 * $data[$plugin_name] = $plugin_version; 70 * 71 * @author Tom N Harris <tnharris@whoopdedo.org> 72 * @author Michael Hamann <michael@content-space.de> 73 * 74 * @return int|string 75 */ 76 public function getVersion() 77 { 78 static $indexer_version = null; 79 if ($indexer_version == null) { 80 $version = INDEXER_VERSION; 81 82 // DokuWiki version is included for the convenience of plugins 83 $data = array('dokuwiki' => $version); 84 Event::createAndTrigger('INDEXER_VERSION_GET', $data, null, false); 85 unset($data['dokuwiki']); // this needs to be first 86 ksort($data); 87 foreach ($data as $plugin => $vers) { 88 $version .= '+'.$plugin.'='.$vers; 89 } 90 $indexer_version = $version; 91 } 92 return $indexer_version; 93 } 94 95 /** 96 * Adds/updates the search index for the given page 97 * 98 * Locking is handled internally. 99 * 100 * @param string $page name of the page to index 101 * @param bool $verbose print status messages 102 * @param bool $force force reindexing even when the index is up to date 103 * @return bool If the function completed successfully 104 * 105 * @throws IndexLockException 106 * @throws IndexWriteException 107 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 108 * @author Tom N Harris <tnharris@whoopdedo.org> 109 */ 110 public function addPage($page, $verbose = false, $force = false) 111 { 112 // check if indexing needed for the existing page (full text and/or metadata indexing) 113 $idxtag = metaFN($page,'.indexed'); 114 if (!$force && file_exists($idxtag)) { 115 if (trim(io_readFile($idxtag)) == $this->getVersion()) { 116 $last = @filemtime($idxtag); 117 if ($last > @filemtime(wikiFN($page))) { 118 if ($verbose) dbglog("Indexer: index for {$page} up to date"); 119 return true; 120 } 121 } 122 } 123 124 // register the page to the page.idx file, $pid is always numeric 125 $pid = $this->getPID($page); 126 127 // prepare metadata indexing 128 $metadata = array(); 129 $metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED); 130 131 $references = p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED); 132 $metadata['relation_references'] = ($references !== null) ? 133 array_keys($references) : array(); 134 135 $media = p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED); 136 $metadata['relation_media'] = ($media !== null) ? 137 array_keys($media) : array(); 138 139 // check if full text indexing allowed 140 $indexenabled = p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED); 141 if ($indexenabled !== false) $indexenabled = true; 142 $metadata['internal_index'] = $indexenabled; 143 144 $body = ''; 145 $data = compact('page', 'body', 'metadata', 'pid'); 146 $event = new Event('INDEXER_PAGE_ADD', $data); 147 if ($event->advise_before()) $data['body'] = $data['body'].' '.rawWiki($page); 148 $event->advise_after(); 149 unset($event); 150 extract($data); 151 $indexenabled = $metadata['internal_index']; 152 unset($metadata['internal_index']); 153 154 // Access to Metadata Index 155 $MetadataIndex = MetadataIndex::getInstance(); 156 $result = $MetadataIndex->addMetaKeys($page, $metadata); 157 if ($verbose) dbglog("Indexer: addMetaKeys({$page}) ".($result ? 'done' : 'failed')); 158 if (!$result) { 159 return false; 160 } 161 162 // Access to Fulltext Index 163 $FulltextIndex = FulltextIndex::getInstance(); 164 if ($indexenabled) { 165 $result = $FulltextIndex->addPagewords($page, $body); 166 if ($verbose) dbglog("Indexer: addPageWords({$page}) ".($result ? 'done' : 'failed')); 167 if (!$result) { 168 return false; 169 } 170 } else { 171 if ($verbose) dbglog("Indexer: full text indexing disabled for {$page}"); 172 // ensure the page content deleted from the Fulltext index 173 $result = $FulltextIndex->deletePageWords($page); 174 if ($verbose) dbglog("Indexer: deletePageWords({$page}) ".($result ? 'done' : 'failed')); 175 if (!$result) { 176 return false; 177 } 178 } 179 180 // update index tag file 181 io_saveFile($idxtag, $this->getVersion()); 182 if ($verbose) dbglog("Indexer: finished"); 183 184 return $result; 185 } 186 187 /** 188 * Remove a page from the index 189 * 190 * Erases entries in all known indexes. Locking is handled internally. 191 * 192 * @param string $page name of the page to index 193 * @param bool $verbose print status messages 194 * @param bool $force force reindexing even when the index is up to date 195 * @return bool If the function completed successfully 196 * 197 * @throws IndexLockException 198 * @throws IndexWriteException 199 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 200 * @author Tom N Harris <tnharris@whoopdedo.org> 201 */ 202 public function deletePage($page, $verbose = false, $force = false) 203 { 204 $idxtag = metaFN($page,'.indexed'); 205 if (!$force && !file_exists($idxtag)) { 206 if ($verbose) dbglog("Indexer: {$page}.indexed file does not exist, ignoring"); 207 return true; 208 } 209 210 // remove obsoleted content from Fulltext index 211 $FulltextIndex = FulltextIndex::getInstance(); 212 $result = $FulltextIndex->deletePageWords($page); 213 if ($verbose) dbglog("Indexer: deletePageWords({$page}) ".($result ? 'done' : 'failed')); 214 if (!$result) { 215 return false; 216 } 217 218 // delete all keys of the page from metadata index 219 $MetadataIndex = MetadataIndex::getInstance(); 220 $result = $MetadataIndex->deleteMetaKeys($page); 221 if ($verbose) dbglog("Indexer: deleteMetaKeys({$page}) ".($result ? 'done' : 'failed')); 222 if (!$result) { 223 return false; 224 } 225 226 // mark the page as deleted in the page.idx 227 $pid = $this->getPID($page); 228 $this->lock(); 229 $this->saveIndexKey('page', '', $pid, self::INDEX_MARK_DELETED.$page); 230 if ($verbose) dbglog("Indexer: {$page} has marked as deleted in page.idx"); 231 $this->unlock(); 232 233 unset(static::$pidCache[$pid]); 234 @unlink($idxtag); 235 return $result; 236 } 237 238 /** 239 * Rename a page in the search index without changing the indexed content. 240 * This function doesn't check if the old or new name exists in the filesystem. 241 * It returns an error if the old page isn't in the page list of the indexer 242 * and it deletes all previously indexed content of the new page. 243 * 244 * @param string $oldpage The old page name 245 * @param string $newpage The new page name 246 * @return bool If the page was successfully renamed 247 * @throws IndexLockException 248 * @throws IndexWriteException 249 */ 250 public function renamePage($oldpage, $newpage) 251 { 252 $index = $this->getIndex('page', ''); 253 // check if oldpage found in page.idx 254 $oldPid = array_search($oldpage, $index, true); 255 if ($oldPid === false) return false; 256 257 // check if newpage found in page.idx 258 $newPid = array_search($newpage, $index, true); 259 if ($newPid !== false) { 260 $result = $this->deletePage($newpage); 261 if (!$result) return false; 262 // Note: $index is no longer valid after deletePage()! 263 unset($index); 264 } 265 266 // update page.idx 267 $this->lock(); 268 $this->saveIndexKey('page', '', $oldPid, $newpage); 269 $this->unlock(); 270 271 // reset the pid cache 272 $this->resetPIDCache(); 273 274 return true; 275 } 276 277 /** 278 * Clear the Page Index 279 * 280 * @param bool $requireLock should be false only if the caller is resposible for index lock 281 * @return bool If the index has been cleared successfully 282 * @throws Exception\IndexLockException 283 */ 284 public function clear($requireLock = true) 285 { 286 global $conf; 287 288 if ($requireLock) $this->lock(); 289 290 // clear Metadata Index 291 $MetadataIndex = MetadataIndex::getInstance(); 292 $MetadataIndex->clear(false); 293 294 // clear Fulltext Index 295 $FulltextIndex = FulltextIndex::getInstance(); 296 $FulltextIndex->clear(false); 297 298 @unlink($conf['indexdir'].'/page.idx'); 299 300 // clear the pid cache 301 $this->resetPIDCache(); 302 303 if ($requireLock) $this->unlock(); 304 return true; 305 } 306 307} 308