xref: /dokuwiki/inc/Search/Indexer.php (revision 5792814ceec77d6d39fe39e061d967f18d685f34)
1<?php
2
3namespace dokuwiki\Search;
4
5use dokuwiki\Extension\Event;
6use dokuwiki\Search\Exception\IndexAccessException;
7use dokuwiki\Search\Exception\IndexLockException;
8use dokuwiki\Search\Exception\IndexWriteException;
9
10// Version tag used to force rebuild on upgrade
11const INDEXER_VERSION = 8;
12
13/**
14 * Class DokuWiki Indexer
15 *
16 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
17 * @author     Andreas Gohr <andi@splitbrain.org>
18 * @author Tom N Harris <tnharris@whoopdedo.org>
19 */
20class Indexer extends AbstractIndex
21{
22    // page to be indexed
23    protected $page;
24
25    /**
26     * Indexer constructor
27     *
28     * @param string $page name of the page to index
29     */
30    public function __construct($page = null)
31    {
32        if (isset($page)) $this->page = $page;
33    }
34
35    /**
36     * Dispatch Indexing request for the page, called by TaskRunner::runIndexer()
37     *
38     * @param bool $verbose print status messages
39     * @param bool $force force reindexing even when the index is up to date
40     * @return bool  If the function completed successfully
41     *
42     * @throws IndexAccessException
43     * @throws IndexLockException
44     * @throws IndexWriteException
45     * @author Satoshi Sahara <sahara.satoshi@gmail.com>
46     * @author Tom N Harris <tnharris@whoopdedo.org>
47     */
48    public function dispatch($verbose = false, $force = false)
49    {
50        if (!isset($this->page)) {
51            throw new IndexAccessException('Indexer: unknow page name');
52        }
53
54        // check if page was deleted but is still in the index
55        if (!page_exists($this->page)) {
56            return $this->deletePage($verbose, $force);
57        }
58
59        // update search index
60        return $this->addPage($verbose, $force);
61    }
62
63    /**
64     * Version of the indexer taking into consideration the external tokenizer.
65     * The indexer is only compatible with data written by the same version.
66     *
67     * @triggers INDEXER_VERSION_GET
68     * Plugins that modify what gets indexed should hook this event and
69     * add their version info to the event data like so:
70     *     $data[$plugin_name] = $plugin_version;
71     *
72     * @author Tom N Harris <tnharris@whoopdedo.org>
73     * @author Michael Hamann <michael@content-space.de>
74     *
75     * @return int|string
76     */
77    public function getVersion()
78    {
79        static $indexer_version = null;
80        if ($indexer_version == null) {
81            $version = INDEXER_VERSION;
82
83            // DokuWiki version is included for the convenience of plugins
84            $data = array('dokuwiki' => $version);
85            Event::createAndTrigger('INDEXER_VERSION_GET', $data, null, false);
86            unset($data['dokuwiki']); // this needs to be first
87            ksort($data);
88            foreach ($data as $plugin => $vers) {
89                $version .= '+'.$plugin.'='.$vers;
90            }
91            $indexer_version = $version;
92        }
93        return $indexer_version;
94    }
95
96    /**
97     * Adds/updates the search index for the given page
98     *
99     * Locking is handled internally.
100     *
101     * @param bool $verbose print status messages
102     * @param bool $force force reindexing even when the index is up to date
103     * @return bool  If the function completed successfully
104     *
105     * @throws IndexAccessException
106     * @throws IndexLockException
107     * @throws IndexWriteException
108     * @author Satoshi Sahara <sahara.satoshi@gmail.com>
109     * @author Tom N Harris <tnharris@whoopdedo.org>
110     */
111    public function addPage($verbose = false, $force = false)
112    {
113        if (!isset($this->page)) {
114            throw new IndexAccessException('Indexer: invalid page name in addePage');
115        } else {
116            $page = $this->page;
117        }
118
119        // check if indexing needed for the existing page (full text and/or metadata indexing)
120        $idxtag = metaFN($page,'.indexed');
121        if (!$force && file_exists($idxtag)) {
122            if (trim(io_readFile($idxtag)) == $this->getVersion()) {
123                $last = @filemtime($idxtag);
124                if ($last > @filemtime(wikiFN($page))) {
125                    if ($verbose) dbglog("Indexer: index for {$page} up to date");
126                    return true;
127                }
128            }
129        }
130
131        // register the page to the page.idx file, $pid is always integer
132        $pid = $this->getPID($page);
133
134        // prepare metadata indexing
135        $metadata = array();
136        $metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED);
137
138        $references = p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED);
139        $metadata['relation_references'] = ($references !== null) ?
140                array_keys($references) : array();
141
142        $media = p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED);
143        $metadata['relation_media'] = ($media !== null) ?
144                array_keys($media) : array();
145
146        // check if full text indexing allowed
147        $indexenabled = p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED);
148        if ($indexenabled !== false) $indexenabled = true;
149        $metadata['internal_index'] = $indexenabled;
150
151        $body = '';
152        $data = compact('page', 'body', 'metadata', 'pid');
153        $event = new Event('INDEXER_PAGE_ADD', $data);
154        if ($event->advise_before()) $data['body'] = $data['body'].' '.rawWiki($page);
155        $event->advise_after();
156        unset($event);
157        extract($data);
158        $indexenabled = $metadata['internal_index'];
159        unset($metadata['internal_index']);
160
161        // Access to Metadata Index
162        $result = (new MetadataIndex($pid))->addMetaKeys($metadata);
163        if ($verbose) dbglog("Indexer: addMetaKeys({$page}) ".($result ? 'done' : 'failed'));
164        if (!$result) {
165            return false;
166        }
167
168        // Access to Fulltext Index
169        if ($indexenabled) {
170            $result = (new FulltextIndex($pid))->addWords($body);
171            if ($verbose) dbglog("Indexer: addWords() for {$page} done");
172            if (!$result) {
173                return false;
174            }
175        } else {
176            if ($verbose) dbglog("Indexer: full text indexing disabled for {$page}");
177            // ensure the page content deleted from the Fulltext index
178            $result = (new FulltextIndex($page))->deleteWords();
179            if ($verbose) dbglog("Indexer: deleteWords() for {$page} done");
180            if (!$result) {
181                return false;
182            }
183        }
184
185        // update index tag file
186        io_saveFile($idxtag, $this->getVersion());
187        if ($verbose) dbglog("Indexer: finished");
188
189        return $result;
190    }
191
192    /**
193     * Remove a page from the index
194     *
195     * Erases entries in all known indexes. Locking is handled internally.
196     *
197     * @param bool $verbose print status messages
198     * @param bool $force force reindexing even when the index is up to date
199     * @return bool  If the function completed successfully
200     *
201     * @throws IndexAccessException
202     * @throws IndexLockException
203     * @throws IndexWriteException
204     * @author Satoshi Sahara <sahara.satoshi@gmail.com>
205     * @author Tom N Harris <tnharris@whoopdedo.org>
206     */
207    public function deletePage($verbose = false, $force = false)
208    {
209        if (!isset($this->page)) {
210            throw new IndexAccessException('Indexer: invalid page name in deletePage');
211        } else {
212            $page = $this->page;
213        }
214
215        $idxtag = metaFN($page,'.indexed');
216        if (!$force && !file_exists($idxtag)) {
217            if ($verbose) dbglog("Indexer: {$page}.indexed file does not exist, ignoring");
218            return true;
219        }
220
221        // retrieve pid from the page.idx file, $pid is always integer
222        $pid = $this->getPID($page);
223
224        // remove obsoleted content from Fulltext index
225        $result = (new FulltextIndex($pid))->deleteWords();
226        if ($verbose) dbglog("Indexer: deleteWords() for {$page} done");
227        if (!$result) {
228            return false;
229        }
230
231        // delete all keys of the page from metadata index
232        $result = (new MetadataIndex($pid))->deleteMetaKeys();
233        if ($verbose) dbglog("Indexer: deleteMetaKeys() for {$page} done");
234        if (!$result) {
235            return false;
236        }
237
238        // mark the page as deleted in the page.idx
239        $this->lock();
240        $this->saveIndexKey('page', '', $pid, self::INDEX_MARK_DELETED.$page);
241        if ($verbose) dbglog("Indexer: {$page} has marked as deleted in page.idx");
242        $this->unlock();
243
244        unset(static::$pidCache[$pid]);
245        @unlink($idxtag);
246        return $result;
247    }
248
249    /**
250     * Rename a page in the search index without changing the indexed content.
251     * This function doesn't check if the old or new name exists in the filesystem.
252     * It returns an error if the old page isn't in the page list of the indexer
253     * and it deletes all previously indexed content of the new page.
254     *
255     * @param string $oldpage The old page name
256     * @param string $newpage The new page name
257     * @return bool  If the page was successfully renamed
258     * @throws IndexLockException
259     * @throws IndexWriteException
260     */
261    public function renamePage($oldpage, $newpage)
262    {
263        $index = $this->getIndex('page', '');
264        // check if oldpage found in page.idx
265        $oldPid = array_search($oldpage, $index, true);
266        if ($oldPid === false) return false;
267
268        // check if newpage found in page.idx
269        $newPid = array_search($newpage, $index, true);
270        if ($newPid !== false) {
271            $result = (new Indexer($newpage))->deletePage();
272            if (!$result) return false;
273            // Note: $index is no longer valid after deletePage()!
274            unset($index);
275        }
276
277        // update page.idx
278        $this->lock();
279        $this->saveIndexKey('page', '', $oldPid, $newpage);
280        $this->unlock();
281
282        // reset the pid cache
283        $this->resetPIDCache();
284
285        return true;
286    }
287
288    /**
289     * Clear the Page Index
290     *
291     * @param bool $requireLock should be false only if the caller is resposible for index lock
292     * @return bool  If the index has been cleared successfully
293     * @throws Exception\IndexLockException
294     */
295    public function clear($requireLock = true)
296    {
297        global $conf;
298
299        if ($requireLock) $this->lock();
300
301        // clear Metadata Index
302        (new MetadataIndex())->clear(false);
303
304        // clear Fulltext Index
305        (new FulltextIndex())->clear(false);
306
307        @unlink($conf['indexdir'].'/page.idx');
308
309        // clear the pid cache
310        $this->resetPIDCache();
311
312        if ($requireLock) $this->unlock();
313        return true;
314    }
315
316}
317