1<?php 2 3namespace dokuwiki\Search; 4 5use dokuwiki\Extension\Event; 6use dokuwiki\Search\Exception\IndexAccessException; 7use dokuwiki\Search\Exception\IndexLockException; 8use dokuwiki\Search\Exception\IndexWriteException; 9 10// Version tag used to force rebuild on upgrade 11const INDEXER_VERSION = 8; 12 13/** 14 * Class DokuWiki Indexer 15 * 16 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 17 * @author Andreas Gohr <andi@splitbrain.org> 18 * @author Tom N Harris <tnharris@whoopdedo.org> 19 */ 20class Indexer extends AbstractIndex 21{ 22 // page to be indexed 23 protected $page; 24 25 /** 26 * Indexer constructor 27 * 28 * @param string $page name of the page to index 29 * @return Indexer 30 */ 31 public function __construct($page = null) 32 { 33 if (isset($page)) $this->page = $page; 34 } 35 36 /** 37 * Dispatch Indexing request for the page, called by TaskRunner::runIndexer() 38 * 39 * @param bool $verbose print status messages 40 * @param bool $force force reindexing even when the index is up to date 41 * @return bool If the function completed successfully 42 * 43 * @throws IndexAccessException 44 * @throws IndexLockException 45 * @throws IndexWriteException 46 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 47 * @author Tom N Harris <tnharris@whoopdedo.org> 48 */ 49 public function dispatch($verbose = false, $force = false) 50 { 51 if (!isset($this->page)) { 52 throw new IndexAccessException('Indexer: unknow page name'); 53 } 54 55 // check if page was deleted but is still in the index 56 if (!page_exists($this->page)) { 57 return $this->deletePage($verbose, $force); 58 } 59 60 // update search index 61 return $this->addPage($verbose, $force); 62 } 63 64 /** 65 * Version of the indexer taking into consideration the external tokenizer. 66 * The indexer is only compatible with data written by the same version. 67 * 68 * @triggers INDEXER_VERSION_GET 69 * Plugins that modify what gets indexed should hook this event and 70 * add their version info to the event data like so: 71 * $data[$plugin_name] = $plugin_version; 72 * 73 * @author Tom N Harris <tnharris@whoopdedo.org> 74 * @author Michael Hamann <michael@content-space.de> 75 * 76 * @return int|string 77 */ 78 public function getVersion() 79 { 80 static $indexer_version = null; 81 if ($indexer_version == null) { 82 $version = INDEXER_VERSION; 83 84 // DokuWiki version is included for the convenience of plugins 85 $data = array('dokuwiki' => $version); 86 Event::createAndTrigger('INDEXER_VERSION_GET', $data, null, false); 87 unset($data['dokuwiki']); // this needs to be first 88 ksort($data); 89 foreach ($data as $plugin => $vers) { 90 $version .= '+'.$plugin.'='.$vers; 91 } 92 $indexer_version = $version; 93 } 94 return $indexer_version; 95 } 96 97 /** 98 * Adds/updates the search index for the given page 99 * 100 * Locking is handled internally. 101 * 102 * @param bool $verbose print status messages 103 * @param bool $force force reindexing even when the index is up to date 104 * @return bool If the function completed successfully 105 * 106 * @throws IndexAccessException 107 * @throws IndexLockException 108 * @throws IndexWriteException 109 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 110 * @author Tom N Harris <tnharris@whoopdedo.org> 111 */ 112 public function addPage($verbose = false, $force = false) 113 { 114 if (!isset($this->page)) { 115 throw new IndexAccessException('Indexer: invalid page name in addePage'); 116 } else { 117 $page = $this->page; 118 } 119 120 // check if indexing needed for the existing page (full text and/or metadata indexing) 121 $idxtag = metaFN($page,'.indexed'); 122 if (!$force && file_exists($idxtag)) { 123 if (trim(io_readFile($idxtag)) == $this->getVersion()) { 124 $last = @filemtime($idxtag); 125 if ($last > @filemtime(wikiFN($page))) { 126 if ($verbose) dbglog("Indexer: index for {$page} up to date"); 127 return true; 128 } 129 } 130 } 131 132 // register the page to the page.idx file, $pid is always numeric 133 $pid = $this->getPID($page); 134 135 // prepare metadata indexing 136 $metadata = array(); 137 $metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED); 138 139 $references = p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED); 140 $metadata['relation_references'] = ($references !== null) ? 141 array_keys($references) : array(); 142 143 $media = p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED); 144 $metadata['relation_media'] = ($media !== null) ? 145 array_keys($media) : array(); 146 147 // check if full text indexing allowed 148 $indexenabled = p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED); 149 if ($indexenabled !== false) $indexenabled = true; 150 $metadata['internal_index'] = $indexenabled; 151 152 $body = ''; 153 $data = compact('page', 'body', 'metadata', 'pid'); 154 $event = new Event('INDEXER_PAGE_ADD', $data); 155 if ($event->advise_before()) $data['body'] = $data['body'].' '.rawWiki($page); 156 $event->advise_after(); 157 unset($event); 158 extract($data); 159 $indexenabled = $metadata['internal_index']; 160 unset($metadata['internal_index']); 161 162 // Access to Metadata Index 163 $result = (new MetadataIndex($page))->addMetaKeys($metadata); 164 if ($verbose) dbglog("Indexer: addMetaKeys({$page}) ".($result ? 'done' : 'failed')); 165 if (!$result) { 166 return false; 167 } 168 169 // Access to Fulltext Index 170 if ($indexenabled) { 171 $result = (new FulltextIndex($page))->addWords($body); 172 if ($verbose) dbglog("Indexer: addPageWords({$page}) ".($result ? 'done' : 'failed')); // FIXME 173 if (!$result) { 174 return false; 175 } 176 } else { 177 if ($verbose) dbglog("Indexer: full text indexing disabled for {$page}"); 178 // ensure the page content deleted from the Fulltext index 179 $result = (new FulltextIndex($page))->deleteWords(); 180 if ($verbose) dbglog("Indexer: deletePageWords({$page}) ".($result ? 'done' : 'failed')); // FIXME 181 if (!$result) { 182 return false; 183 } 184 } 185 186 // update index tag file 187 io_saveFile($idxtag, $this->getVersion()); 188 if ($verbose) dbglog("Indexer: finished"); 189 190 return $result; 191 } 192 193 /** 194 * Remove a page from the index 195 * 196 * Erases entries in all known indexes. Locking is handled internally. 197 * 198 * @param string $page name of the page to index 199 * @param bool $verbose print status messages 200 * @param bool $force force reindexing even when the index is up to date 201 * @return bool If the function completed successfully 202 * 203 * @throws IndexAccessException 204 * @throws IndexLockException 205 * @throws IndexWriteException 206 * @author Satoshi Sahara <sahara.satoshi@gmail.com> 207 * @author Tom N Harris <tnharris@whoopdedo.org> 208 */ 209 public function deletePage($verbose = false, $force = false) 210 { 211 if (!isset($this->page)) { 212 throw new IndexAccessException('Indexer: invalid page name in deletePage'); 213 } else { 214 $page = $this->page; 215 } 216 217 $idxtag = metaFN($page,'.indexed'); 218 if (!$force && !file_exists($idxtag)) { 219 if ($verbose) dbglog("Indexer: {$page}.indexed file does not exist, ignoring"); 220 return true; 221 } 222 223 // remove obsoleted content from Fulltext index 224 $result = (new FulltextIndex($page))->deleteWords(); 225 if ($verbose) dbglog("Indexer: deletePageWords({$page}) ".($result ? 'done' : 'failed')); // FIXME 226 if (!$result) { 227 return false; 228 } 229 230 // delete all keys of the page from metadata index 231 $result = (new MetadataIndex($page))->deleteMetaKeys(); 232 if ($verbose) dbglog("Indexer: deleteMetaKeys({$page}) ".($result ? 'done' : 'failed')); // FIXME 233 if (!$result) { 234 return false; 235 } 236 237 // mark the page as deleted in the page.idx 238 $pid = $this->getPID($page); 239 $this->lock(); 240 $this->saveIndexKey('page', '', $pid, self::INDEX_MARK_DELETED.$page); 241 if ($verbose) dbglog("Indexer: {$page} has marked as deleted in page.idx"); 242 $this->unlock(); 243 244 unset(static::$pidCache[$pid]); 245 @unlink($idxtag); 246 return $result; 247 } 248 249 /** 250 * Rename a page in the search index without changing the indexed content. 251 * This function doesn't check if the old or new name exists in the filesystem. 252 * It returns an error if the old page isn't in the page list of the indexer 253 * and it deletes all previously indexed content of the new page. 254 * 255 * @param string $oldpage The old page name 256 * @param string $newpage The new page name 257 * @return bool If the page was successfully renamed 258 * @throws IndexLockException 259 * @throws IndexWriteException 260 */ 261 public function renamePage($oldpage, $newpage) 262 { 263 $index = $this->getIndex('page', ''); 264 // check if oldpage found in page.idx 265 $oldPid = array_search($oldpage, $index, true); 266 if ($oldPid === false) return false; 267 268 // check if newpage found in page.idx 269 $newPid = array_search($newpage, $index, true); 270 if ($newPid !== false) { 271 $result = (new Indexer($newpage))->deletePage(); 272 if (!$result) return false; 273 // Note: $index is no longer valid after deletePage()! 274 unset($index); 275 } 276 277 // update page.idx 278 $this->lock(); 279 $this->saveIndexKey('page', '', $oldPid, $newpage); 280 $this->unlock(); 281 282 // reset the pid cache 283 $this->resetPIDCache(); 284 285 return true; 286 } 287 288 /** 289 * Clear the Page Index 290 * 291 * @param bool $requireLock should be false only if the caller is resposible for index lock 292 * @return bool If the index has been cleared successfully 293 * @throws Exception\IndexLockException 294 */ 295 public function clear($requireLock = true) 296 { 297 global $conf; 298 299 if ($requireLock) $this->lock(); 300 301 // clear Metadata Index 302 (new MetadataIndex())->clear(false); 303 304 // clear Fulltext Index 305 (new FulltextIndex())->clear(false); 306 307 @unlink($conf['indexdir'].'/page.idx'); 308 309 // clear the pid cache 310 $this->resetPIDCache(); 311 312 if ($requireLock) $this->unlock(); 313 return true; 314 } 315 316} 317