xref: /dokuwiki/inc/Search/Indexer.php (revision 51ddbadd349647574755305e1ab08620b7a7ff43)
1<?php
2
3namespace dokuwiki\Search;
4
5use dokuwiki\Extension\Event;
6use dokuwiki\Search\FulltextIndex;
7use dokuwiki\Search\MetadataIndex;
8
9// Version tag used to force rebuild on upgrade
10const INDEXER_VERSION = 8;
11
12/**
13 * Class DokuWiki Indexer (Singleton)
14 *
15 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
16 * @author     Andreas Gohr <andi@splitbrain.org>
17 * @author Tom N Harris <tnharris@whoopdedo.org>
18 */
19class Indexer extends AbstractIndex
20{
21    /** @var Indexer $instance */
22    protected static $instance = null;
23
24    /**
25     * Get new or existing singleton instance of the Indexer
26     *
27     * @return Indexer
28     */
29    public static function getInstance()
30    {
31        if (is_null(static::$instance)) {
32            static::$instance = new static();
33        }
34        return static::$instance;
35    }
36
37    /**
38     * Dispatch Indexing request for the page, called by TaskRunner::runIndexer()
39     *
40     * @param string        $page   name of the page to index
41     * @param bool          $verbose    print status messages
42     * @param bool          $force  force reindexing even when the index is up to date
43     * @return bool  If the function completed successfully
44     *
45     * @author Tom N Harris <tnharris@whoopdedo.org>
46     * @author Satoshi Sahara <sahara.satoshi@gmail.com>
47     */
48    public function dispatch($page, $verbose = false, $force = false)
49    {
50        // check if page was deleted but is still in the index
51        if (!page_exists($page)) {
52            return $this->deletePage($page, $verbose, $force);
53        }
54
55        // update search index
56        return $this->addPage($page, $verbose, $force);
57    }
58
59    /**
60     * Version of the indexer taking into consideration the external tokenizer.
61     * The indexer is only compatible with data written by the same version.
62     *
63     * @triggers INDEXER_VERSION_GET
64     * Plugins that modify what gets indexed should hook this event and
65     * add their version info to the event data like so:
66     *     $data[$plugin_name] = $plugin_version;
67     *
68     * @author Tom N Harris <tnharris@whoopdedo.org>
69     * @author Michael Hamann <michael@content-space.de>
70     *
71     * @return int|string
72     */
73    public function getVersion()
74    {
75        static $indexer_version = null;
76        if ($indexer_version == null) {
77            $version = INDEXER_VERSION;
78
79            // DokuWiki version is included for the convenience of plugins
80            $data = array('dokuwiki' => $version);
81            Event::createAndTrigger('INDEXER_VERSION_GET', $data, null, false);
82            unset($data['dokuwiki']); // this needs to be first
83            ksort($data);
84            foreach ($data as $plugin => $vers) {
85                $version .= '+'.$plugin.'='.$vers;
86            }
87            $indexer_version = $version;
88        }
89        return $indexer_version;
90    }
91
92    /**
93     * Adds/updates the search index for the given page
94     *
95     * Locking is handled internally.
96     *
97     * @param string        $page   name of the page to index
98     * @param bool          $verbose    print status messages
99     * @param bool          $force  force reindexing even when the index is up to date
100     * @return bool  If the function completed successfully
101     *
102     * @author Tom N Harris <tnharris@whoopdedo.org>
103     * @author Satoshi Sahara <sahara.satoshi@gmail.com>
104     */
105    public function addPage($page, $verbose = false, $force = false)
106    {
107        // check if indexing needed for the existing page (full text and/or metadata indexing)
108        $idxtag = metaFN($page,'.indexed');
109        if (!$force && file_exists($idxtag)) {
110            if (trim(io_readFile($idxtag)) == $this->getVersion()) {
111                $last = @filemtime($idxtag);
112                if ($last > @filemtime(wikiFN($page))) {
113                    if ($verbose) dbglog("Indexer: index for {$page} up to date");
114                    return true;
115                }
116            }
117        }
118
119        // register the page to the page.idx
120        $pid = $this->getPID($page);
121        if ($pid === false) {
122            if ($verbose) dbglog("Indexer: getting the PID failed for {$page}");
123            trigger_error("Failed to get PID for {$page}", E_USER_ERROR);
124            return false;
125        }
126
127        // prepare metadata indexing
128        $metadata = array();
129        $metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED);
130
131        $references = p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED);
132        $metadata['relation_references'] = ($references !== null) ?
133                array_keys($references) : array();
134
135        $media = p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED);
136        $metadata['relation_media'] = ($media !== null) ?
137                array_keys($media) : array();
138
139        // check if full text indexing allowed
140        $indexenabled = p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED);
141        if ($indexenabled !== false) $indexenabled = true;
142        $metadata['internal_index'] = $indexenabled;
143
144        $body = '';
145        $data = compact('page', 'body', 'metadata', 'pid');
146        $event = new Event('INDEXER_PAGE_ADD', $data);
147        if ($event->advise_before()) $data['body'] = $data['body'].' '.rawWiki($page);
148        $event->advise_after();
149        unset($event);
150        extract($data);
151        $indexenabled = $metadata['internal_index'];
152        unset($metadata['internal_index']);
153
154        // Access to Metadata Index
155        $MetadataIndex = MetadataIndex::getInstance();
156        $result = $MetadataIndex->addMetaKeys($page, $metadata);
157        if ($verbose) dbglog("Indexer: addMetaKeys({$page}) ".($result ? 'done' : 'failed'));
158        if (!$result) {
159            return false;
160        }
161
162        // Access to Fulltext Index
163        $FulltextIndex = FulltextIndex::getInstance();
164        if ($indexenabled) {
165            $result = $FulltextIndex->addPagewords($page, $body);
166            if ($verbose) dbglog("Indexer: addPageWords({$page}) ".($result ? 'done' : 'failed'));
167            if (!$result) {
168                return false;
169            }
170        } else {
171            if ($verbose) dbglog("Indexer: full text indexing disabled for {$page}");
172            // ensure the page content deleted from the Fulltext index
173            $result = $FulltextIndex->deletePageWords($page);
174            if ($verbose) dbglog("Indexer: deletePageWords({$page}) ".($result ? 'done' : 'failed'));
175            if (!$result) {
176                return false;
177            }
178        }
179
180        // update index tag file
181        io_saveFile($idxtag, $this->getVersion());
182        if ($verbose) dbglog("Indexer: finished");
183
184        return $result;
185    }
186
187    /**
188     * Remove a page from the index
189     *
190     * Erases entries in all known indexes. Locking is handled internally.
191     *
192     * @param string        $page   name of the page to index
193     * @param bool          $verbose    print status messages
194     * @param bool          $force  force reindexing even when the index is up to date
195     * @return bool  If the function completed successfully
196     *
197     * @author Tom N Harris <tnharris@whoopdedo.org>
198     * @author Satoshi Sahara <sahara.satoshi@gmail.com>
199     */
200    public function deletePage($page, $verbose = false, $force = false)
201    {
202        $idxtag = metaFN($page,'.indexed');
203        if (!$force && !file_exists($idxtag)) {
204            if ($verbose) dbglog("Indexer: {$page}.indexed file does not exist, ignoring");
205            return true;
206        }
207
208        // remove obsoleted content from Fulltext index
209        $FulltextIndex = FulltextIndex::getInstance();
210        $result = $FulltextIndex->deletePageWords($page);
211        if ($verbose) dbglog("Indexer: deletePageWords({$page}) ".($result ? 'done' : 'failed'));
212        if (!$result) {
213            return false;
214        }
215
216        // delete all keys of the page from metadata index
217        $MetadataIndex = MetadataIndex::getInstance();
218        $result = $MetadataIndex->deleteMetaKeys($page);
219        if ($verbose) dbglog("Indexer: deleteMetaKeys({$page}) ".($result ? 'done' : 'failed'));
220        if (!$result) {
221            return false;
222        }
223
224        // mark the page as deleted in the page.idx
225        $pid = $this->getPID($page);
226        if ($pid !== false) {
227            if (!$this->lock()) return false;
228            $result = $this->saveIndexKey('page', '', $pid, self::INDEX_MARK_DELETED.$page);
229            if ($verbose) dbglog("Indexer: update page.idx  ".($result ? 'done' : 'failed'));
230            $this->unlock();
231        } else {
232            if ($verbose) dbglog("Indexer: {$page} not found in the page.idx, ignoring");
233            $result = true;
234        }
235
236        unset(static::$pidCache[$pid]);
237        @unlink($idxtag);
238        return $result;
239    }
240
241    /**
242     * Rename a page in the search index without changing the indexed content.
243     * This function doesn't check if the old or new name exists in the filesystem.
244     * It returns an error if the old page isn't in the page list of the indexer
245     * and it deletes all previously indexed content of the new page.
246     *
247     * @param string $oldpage The old page name
248     * @param string $newpage The new page name
249     * @return bool           If the page was successfully renamed
250     */
251    public function renamePage($oldpage, $newpage)
252    {
253        $index = $this->getIndex('page', '');
254        // check if oldpage found in page.idx
255        $oldPid = array_search($oldpage, $index, true);
256        if ($oldPid === false) return false;
257
258        // check if newpage found in page.idx
259        $newPid = array_search($newpage, $index, true);
260        if ($newPid !== false) {
261            $result = $this->deletePage($newpage);
262            if (!$result) return false;
263            // Note: $index is no longer valid after deletePage()!
264            unset($index);
265        }
266
267        // update page.idx
268        if (!$this->lock()) return false;
269        $result = $this->saveIndexKey('page', '', $oldPid, $newpage);
270        $this->unlock();
271
272        // reset the pid cache
273        $this->resetPIDCache();
274
275        return $result;
276    }
277
278    /**
279     * Clear the Page Index
280     *
281     * @param bool   $requireLock
282     * @return bool  If the index has been cleared successfully
283     */
284    public function clear($requireLock = true)
285    {
286        global $conf;
287
288        if ($requireLock && !$this->lock()) return false;
289
290        // clear Metadata Index
291        $MetadataIndex = MetadataIndex::getInstance();
292        $MetadataIndex->clear(false);
293
294        // clear Fulltext Index
295        $FulltextIndex = FulltextIndex::getInstance();
296        $FulltextIndex->clear(false);
297
298        @unlink($conf['indexdir'].'/page.idx');
299
300        // clear the pid cache
301        $this->resetPIDCache();
302
303        if ($requireLock) $this->unlock();
304        return true;
305    }
306
307}
308