xref: /dokuwiki/inc/Search/Indexer.php (revision a16bd5489e1dd5e6c42803b19bc20d9231c91a16)
1<?php
2
3namespace dokuwiki\Search;
4
5use dokuwiki\Extension\Event;
6use dokuwiki\Search\Exception\IndexAccessException;
7use dokuwiki\Search\Exception\IndexLockException;
8use dokuwiki\Search\Exception\IndexWriteException;
9
10// Version tag used to force rebuild on upgrade
11const INDEXER_VERSION = 8;
12
13/**
14 * Class DokuWiki Indexer (Singleton)
15 *
16 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
17 * @author     Andreas Gohr <andi@splitbrain.org>
18 * @author Tom N Harris <tnharris@whoopdedo.org>
19 */
20class Indexer extends AbstractIndex
21{
22    /** @var Indexer $instance */
23    protected static $instance = null;
24
25    /**
26     * Get new or existing singleton instance of the Indexer
27     *
28     * @return Indexer
29     */
30    public static function getInstance()
31    {
32        if (is_null(static::$instance)) {
33            static::$instance = new static();
34        }
35        return static::$instance;
36    }
37
38    /**
39     * Dispatch Indexing request for the page, called by TaskRunner::runIndexer()
40     *
41     * @param string $page name of the page to index
42     * @param bool $verbose print status messages
43     * @param bool $force force reindexing even when the index is up to date
44     * @return bool  If the function completed successfully
45     *
46     * @throws IndexLockException
47     * @throws IndexWriteException
48     * @author Satoshi Sahara <sahara.satoshi@gmail.com>
49     * @author Tom N Harris <tnharris@whoopdedo.org>
50     */
51    public function dispatch($page, $verbose = false, $force = false)
52    {
53        // check if page was deleted but is still in the index
54        if (!page_exists($page)) {
55            return $this->deletePage($page, $verbose, $force);
56        }
57
58        // update search index
59        return $this->addPage($page, $verbose, $force);
60    }
61
62    /**
63     * Version of the indexer taking into consideration the external tokenizer.
64     * The indexer is only compatible with data written by the same version.
65     *
66     * @triggers INDEXER_VERSION_GET
67     * Plugins that modify what gets indexed should hook this event and
68     * add their version info to the event data like so:
69     *     $data[$plugin_name] = $plugin_version;
70     *
71     * @author Tom N Harris <tnharris@whoopdedo.org>
72     * @author Michael Hamann <michael@content-space.de>
73     *
74     * @return int|string
75     */
76    public function getVersion()
77    {
78        static $indexer_version = null;
79        if ($indexer_version == null) {
80            $version = INDEXER_VERSION;
81
82            // DokuWiki version is included for the convenience of plugins
83            $data = array('dokuwiki' => $version);
84            Event::createAndTrigger('INDEXER_VERSION_GET', $data, null, false);
85            unset($data['dokuwiki']); // this needs to be first
86            ksort($data);
87            foreach ($data as $plugin => $vers) {
88                $version .= '+'.$plugin.'='.$vers;
89            }
90            $indexer_version = $version;
91        }
92        return $indexer_version;
93    }
94
95    /**
96     * Adds/updates the search index for the given page
97     *
98     * Locking is handled internally.
99     *
100     * @param string $page name of the page to index
101     * @param bool $verbose print status messages
102     * @param bool $force force reindexing even when the index is up to date
103     * @return bool  If the function completed successfully
104     *
105     * @throws IndexLockException
106     * @throws IndexWriteException
107     * @author Satoshi Sahara <sahara.satoshi@gmail.com>
108     * @author Tom N Harris <tnharris@whoopdedo.org>
109     */
110    public function addPage($page, $verbose = false, $force = false)
111    {
112        // check if indexing needed for the existing page (full text and/or metadata indexing)
113        $idxtag = metaFN($page,'.indexed');
114        if (!$force && file_exists($idxtag)) {
115            if (trim(io_readFile($idxtag)) == $this->getVersion()) {
116                $last = @filemtime($idxtag);
117                if ($last > @filemtime(wikiFN($page))) {
118                    if ($verbose) dbglog("Indexer: index for {$page} up to date");
119                    return true;
120                }
121            }
122        }
123
124        // register the page to the page.idx file, $pid is always numeric
125        $pid = $this->getPID($page);
126
127        // prepare metadata indexing
128        $metadata = array();
129        $metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED);
130
131        $references = p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED);
132        $metadata['relation_references'] = ($references !== null) ?
133                array_keys($references) : array();
134
135        $media = p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED);
136        $metadata['relation_media'] = ($media !== null) ?
137                array_keys($media) : array();
138
139        // check if full text indexing allowed
140        $indexenabled = p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED);
141        if ($indexenabled !== false) $indexenabled = true;
142        $metadata['internal_index'] = $indexenabled;
143
144        $body = '';
145        $data = compact('page', 'body', 'metadata', 'pid');
146        $event = new Event('INDEXER_PAGE_ADD', $data);
147        if ($event->advise_before()) $data['body'] = $data['body'].' '.rawWiki($page);
148        $event->advise_after();
149        unset($event);
150        extract($data);
151        $indexenabled = $metadata['internal_index'];
152        unset($metadata['internal_index']);
153
154        // Access to Metadata Index
155        $MetadataIndex = MetadataIndex::getInstance();
156        $result = $MetadataIndex->addMetaKeys($page, $metadata);
157        if ($verbose) dbglog("Indexer: addMetaKeys({$page}) ".($result ? 'done' : 'failed'));
158        if (!$result) {
159            return false;
160        }
161
162        // Access to Fulltext Index
163        $FulltextIndex = FulltextIndex::getInstance();
164        if ($indexenabled) {
165            $result = $FulltextIndex->addPagewords($page, $body);
166            if ($verbose) dbglog("Indexer: addPageWords({$page}) ".($result ? 'done' : 'failed'));
167            if (!$result) {
168                return false;
169            }
170        } else {
171            if ($verbose) dbglog("Indexer: full text indexing disabled for {$page}");
172            // ensure the page content deleted from the Fulltext index
173            $result = $FulltextIndex->deletePageWords($page);
174            if ($verbose) dbglog("Indexer: deletePageWords({$page}) ".($result ? 'done' : 'failed'));
175            if (!$result) {
176                return false;
177            }
178        }
179
180        // update index tag file
181        io_saveFile($idxtag, $this->getVersion());
182        if ($verbose) dbglog("Indexer: finished");
183
184        return $result;
185    }
186
187    /**
188     * Remove a page from the index
189     *
190     * Erases entries in all known indexes. Locking is handled internally.
191     *
192     * @param string $page name of the page to index
193     * @param bool $verbose print status messages
194     * @param bool $force force reindexing even when the index is up to date
195     * @return bool  If the function completed successfully
196     *
197     * @throws IndexLockException
198     * @throws IndexWriteException
199     * @author Satoshi Sahara <sahara.satoshi@gmail.com>
200     * @author Tom N Harris <tnharris@whoopdedo.org>
201     */
202    public function deletePage($page, $verbose = false, $force = false)
203    {
204        $idxtag = metaFN($page,'.indexed');
205        if (!$force && !file_exists($idxtag)) {
206            if ($verbose) dbglog("Indexer: {$page}.indexed file does not exist, ignoring");
207            return true;
208        }
209
210        // remove obsoleted content from Fulltext index
211        $FulltextIndex = FulltextIndex::getInstance();
212        $result = $FulltextIndex->deletePageWords($page);
213        if ($verbose) dbglog("Indexer: deletePageWords({$page}) ".($result ? 'done' : 'failed'));
214        if (!$result) {
215            return false;
216        }
217
218        // delete all keys of the page from metadata index
219        $MetadataIndex = MetadataIndex::getInstance();
220        $result = $MetadataIndex->deleteMetaKeys($page);
221        if ($verbose) dbglog("Indexer: deleteMetaKeys({$page}) ".($result ? 'done' : 'failed'));
222        if (!$result) {
223            return false;
224        }
225
226        // mark the page as deleted in the page.idx
227        $pid = $this->getPID($page);
228        $this->lock();
229        $this->saveIndexKey('page', '', $pid, self::INDEX_MARK_DELETED.$page);
230        if ($verbose) dbglog("Indexer: {$page} has marked as deleted in page.idx");
231        $this->unlock();
232
233        unset(static::$pidCache[$pid]);
234        @unlink($idxtag);
235        return $result;
236    }
237
238    /**
239     * Rename a page in the search index without changing the indexed content.
240     * This function doesn't check if the old or new name exists in the filesystem.
241     * It returns an error if the old page isn't in the page list of the indexer
242     * and it deletes all previously indexed content of the new page.
243     *
244     * @param string $oldpage The old page name
245     * @param string $newpage The new page name
246     * @return bool  If the page was successfully renamed
247     * @throws IndexLockException
248     * @throws IndexWriteException
249     */
250    public function renamePage($oldpage, $newpage)
251    {
252        $index = $this->getIndex('page', '');
253        // check if oldpage found in page.idx
254        $oldPid = array_search($oldpage, $index, true);
255        if ($oldPid === false) return false;
256
257        // check if newpage found in page.idx
258        $newPid = array_search($newpage, $index, true);
259        if ($newPid !== false) {
260            $result = $this->deletePage($newpage);
261            if (!$result) return false;
262            // Note: $index is no longer valid after deletePage()!
263            unset($index);
264        }
265
266        // update page.idx
267        $this->lock();
268        $this->saveIndexKey('page', '', $oldPid, $newpage);
269        $this->unlock();
270
271        // reset the pid cache
272        $this->resetPIDCache();
273
274        return true;
275    }
276
277    /**
278     * Clear the Page Index
279     *
280     * @param bool $requireLock should be false only if the caller is resposible for index lock
281     * @return bool  If the index has been cleared successfully
282     * @throws Exception\IndexLockException
283     */
284    public function clear($requireLock = true)
285    {
286        global $conf;
287
288        if ($requireLock) $this->lock();
289
290        // clear Metadata Index
291        $MetadataIndex = MetadataIndex::getInstance();
292        $MetadataIndex->clear(false);
293
294        // clear Fulltext Index
295        $FulltextIndex = FulltextIndex::getInstance();
296        $FulltextIndex->clear(false);
297
298        @unlink($conf['indexdir'].'/page.idx');
299
300        // clear the pid cache
301        $this->resetPIDCache();
302
303        if ($requireLock) $this->unlock();
304        return true;
305    }
306
307}
308