1<?php 2 3namespace dokuwiki\Search; 4 5use dokuwiki\Extension\Event; 6use dokuwiki\Search\Exception\IndexAccessException; 7use dokuwiki\Search\Exception\IndexLockException; 8use dokuwiki\Search\Exception\IndexWriteException; 9 10// Version tag used to force rebuild on upgrade 11const INDEXER_VERSION = 8; 12 13/** 14 * Class DokuWiki Indexer 15 * 16 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 17 * @author Andreas Gohr <andi@splitbrain.org> 18 * @author Tom N Harris <tnharris@whoopdedo.org> 19 */ 20class Indexer extends AbstractIndex 21{ 22 // page to be indexed 23 protected $page; 24 25 /** 26 * Indexer constructor 27 * 28 * @param string $page name of the page to index 29 */ 30 public function __construct($page = null) 31 { 32 if (isset($page)) $this->page = $page; 33 } 34 35 /** 36 * Dispatch Indexing request for the page, called by TaskRunner::runIndexer() 37 * 38 * @param bool $verbose print status messages 39 * @param bool $force force reindexing even when the index is up to date 40 * @return bool If the function completed successfully 41 * 42 * @throws IndexAccessException 43 * @throws IndexLockException 44 * @throws IndexWriteException 45 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 46 * @author Tom N Harris <tnharris@whoopdedo.org> 47 */ 48 public function dispatch($verbose = false, $force = false) 49 { 50 if (!isset($this->page)) { 51 throw new IndexAccessException('Indexer: unknow page name'); 52 } 53 54 // check if page was deleted but is still in the index 55 if (!page_exists($this->page)) { 56 return $this->deletePage($verbose, $force); 57 } 58 59 // update search index 60 return $this->addPage($verbose, $force); 61 } 62 63 /** 64 * Version of the indexer taking into consideration the external tokenizer. 65 * The indexer is only compatible with data written by the same version. 66 * 67 * @triggers INDEXER_VERSION_GET 68 * Plugins that modify what gets indexed should hook this event and 69 * add their version info to the event data like so: 70 * $data[$plugin_name] = $plugin_version; 71 * 72 * @author Tom N Harris <tnharris@whoopdedo.org> 73 * @author Michael Hamann <michael@content-space.de> 74 * 75 * @return int|string 76 */ 77 public function getVersion() 78 { 79 static $indexer_version = null; 80 if ($indexer_version == null) { 81 $version = INDEXER_VERSION; 82 83 // DokuWiki version is included for the convenience of plugins 84 $data = array('dokuwiki' => $version); 85 Event::createAndTrigger('INDEXER_VERSION_GET', $data, null, false); 86 unset($data['dokuwiki']); // this needs to be first 87 ksort($data); 88 foreach ($data as $plugin => $vers) { 89 $version .= '+'.$plugin.'='.$vers; 90 } 91 $indexer_version = $version; 92 } 93 return $indexer_version; 94 } 95 96 /** 97 * Adds/updates the search index for the given page 98 * 99 * Locking is handled internally. 100 * 101 * @param bool $verbose print status messages 102 * @param bool $force force reindexing even when the index is up to date 103 * @return bool If the function completed successfully 104 * 105 * @throws IndexAccessException 106 * @throws IndexLockException 107 * @throws IndexWriteException 108 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 109 * @author Tom N Harris <tnharris@whoopdedo.org> 110 */ 111 public function addPage($verbose = false, $force = false) 112 { 113 if (!isset($this->page)) { 114 throw new IndexAccessException('Indexer: invalid page name in addePage'); 115 } else { 116 $page = $this->page; 117 } 118 119 // check if indexing needed for the existing page (full text and/or metadata indexing) 120 $idxtag = metaFN($page,'.indexed'); 121 if (!$force && file_exists($idxtag)) { 122 if (trim(io_readFile($idxtag)) == $this->getVersion()) { 123 $last = @filemtime($idxtag); 124 if ($last > @filemtime(wikiFN($page))) { 125 if ($verbose) dbglog("Indexer: index for {$page} up to date"); 126 return true; 127 } 128 } 129 } 130 131 // register the page to the page.idx file, $pid is always integer 132 $pid = $this->getPID($page); 133 134 // prepare metadata indexing 135 $metadata = array(); 136 $metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED); 137 138 $references = p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED); 139 $metadata['relation_references'] = ($references !== null) ? 140 array_keys($references) : array(); 141 142 $media = p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED); 143 $metadata['relation_media'] = ($media !== null) ? 144 array_keys($media) : array(); 145 146 // check if full text indexing allowed 147 $indexenabled = p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED); 148 if ($indexenabled !== false) $indexenabled = true; 149 $metadata['internal_index'] = $indexenabled; 150 151 $body = ''; 152 $data = compact('page', 'body', 'metadata', 'pid'); 153 $event = new Event('INDEXER_PAGE_ADD', $data); 154 if ($event->advise_before()) $data['body'] = $data['body'].' '.rawWiki($page); 155 $event->advise_after(); 156 unset($event); 157 extract($data); 158 $indexenabled = $metadata['internal_index']; 159 unset($metadata['internal_index']); 160 161 // Access to Metadata Index 162 $result = (new MetadataIndex($pid))->addMetaKeys($metadata); 163 if ($verbose) dbglog("Indexer: addMetaKeys({$page}) ".($result ? 'done' : 'failed')); 164 if (!$result) { 165 return false; 166 } 167 168 // Access to Fulltext Index 169 if ($indexenabled) { 170 $result = (new FulltextIndex($pid))->addWords($body); 171 if ($verbose) dbglog("Indexer: addWords() for {$page} done"); 172 if (!$result) { 173 return false; 174 } 175 } else { 176 if ($verbose) dbglog("Indexer: full text indexing disabled for {$page}"); 177 // ensure the page content deleted from the Fulltext index 178 $result = (new FulltextIndex($page))->deleteWords(); 179 if ($verbose) dbglog("Indexer: deleteWords() for {$page} done"); 180 if (!$result) { 181 return false; 182 } 183 } 184 185 // update index tag file 186 io_saveFile($idxtag, $this->getVersion()); 187 if ($verbose) dbglog("Indexer: finished"); 188 189 return $result; 190 } 191 192 /** 193 * Remove a page from the index 194 * 195 * Erases entries in all known indexes. Locking is handled internally. 196 * 197 * @param bool $verbose print status messages 198 * @param bool $force force reindexing even when the index is up to date 199 * @return bool If the function completed successfully 200 * 201 * @throws IndexAccessException 202 * @throws IndexLockException 203 * @throws IndexWriteException 204 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 205 * @author Tom N Harris <tnharris@whoopdedo.org> 206 */ 207 public function deletePage($verbose = false, $force = false) 208 { 209 if (!isset($this->page)) { 210 throw new IndexAccessException('Indexer: invalid page name in deletePage'); 211 } else { 212 $page = $this->page; 213 } 214 215 $idxtag = metaFN($page,'.indexed'); 216 if (!$force && !file_exists($idxtag)) { 217 if ($verbose) dbglog("Indexer: {$page}.indexed file does not exist, ignoring"); 218 return true; 219 } 220 221 // retrieve pid from the page.idx file, $pid is always integer 222 $pid = $this->getPID($page); 223 224 // remove obsoleted content from Fulltext index 225 $result = (new FulltextIndex($pid))->deleteWords(); 226 if ($verbose) dbglog("Indexer: deleteWords() for {$page} done"); 227 if (!$result) { 228 return false; 229 } 230 231 // delete all keys of the page from metadata index 232 $result = (new MetadataIndex($pid))->deleteMetaKeys(); 233 if ($verbose) dbglog("Indexer: deleteMetaKeys() for {$page} done"); 234 if (!$result) { 235 return false; 236 } 237 238 // mark the page as deleted in the page.idx 239 $this->lock(); 240 $this->saveIndexKey('page', '', $pid, self::INDEX_MARK_DELETED.$page); 241 if ($verbose) dbglog("Indexer: {$page} has marked as deleted in page.idx"); 242 $this->unlock(); 243 244 unset(static::$pidCache[$pid]); 245 @unlink($idxtag); 246 return $result; 247 } 248 249 /** 250 * Rename a page in the search index without changing the indexed content. 251 * This function doesn't check if the old or new name exists in the filesystem. 252 * It returns an error if the old page isn't in the page list of the indexer 253 * and it deletes all previously indexed content of the new page. 254 * 255 * @param string $oldpage The old page name 256 * @param string $newpage The new page name 257 * @return bool If the page was successfully renamed 258 * @throws IndexLockException 259 * @throws IndexWriteException 260 */ 261 public function renamePage($oldpage, $newpage) 262 { 263 $index = $this->getIndex('page', ''); 264 // check if oldpage found in page.idx 265 $oldPid = array_search($oldpage, $index, true); 266 if ($oldPid === false) return false; 267 268 // check if newpage found in page.idx 269 $newPid = array_search($newpage, $index, true); 270 if ($newPid !== false) { 271 $result = (new Indexer($newpage))->deletePage(); 272 if (!$result) return false; 273 // Note: $index is no longer valid after deletePage()! 274 unset($index); 275 } 276 277 // update page.idx 278 $this->lock(); 279 $this->saveIndexKey('page', '', $oldPid, $newpage); 280 $this->unlock(); 281 282 // reset the pid cache 283 $this->resetPIDCache(); 284 285 return true; 286 } 287 288 /** 289 * Clear the Page Index 290 * 291 * @param bool $requireLock should be false only if the caller is resposible for index lock 292 * @return bool If the index has been cleared successfully 293 * @throws Exception\IndexLockException 294 */ 295 public function clear($requireLock = true) 296 { 297 global $conf; 298 299 if ($requireLock) $this->lock(); 300 301 // clear Metadata Index 302 (new MetadataIndex())->clear(false); 303 304 // clear Fulltext Index 305 (new FulltextIndex())->clear(false); 306 307 @unlink($conf['indexdir'].'/page.idx'); 308 309 // clear the pid cache 310 $this->resetPIDCache(); 311 312 if ($requireLock) $this->unlock(); 313 return true; 314 } 315 316} 317