xref: /dokuwiki/inc/Search/Indexer.php (revision 725e8e5f1de55fe91234a551f65f436015c3615e)
1<?php
2
3namespace dokuwiki\Search;
4
5use dokuwiki\Extension\Event;
6use dokuwiki\Search\Exception\IndexAccessException;
7use dokuwiki\Search\Exception\IndexLockException;
8use dokuwiki\Search\Exception\IndexWriteException;
9
10// Version tag used to force rebuild on upgrade
11const INDEXER_VERSION = 8;
12
13/**
14 * Class DokuWiki Indexer
15 *
16 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
17 * @author     Andreas Gohr <andi@splitbrain.org>
18 * @author Tom N Harris <tnharris@whoopdedo.org>
19 */
20class Indexer extends AbstractIndex
21{
22    // page to be indexed
23    protected $page;
24
25    /**
26     * Indexer constructor
27     *
28     * @param string $page name of the page to index
29     * @return Indexer
30     */
31    public function __construct($page = null)
32    {
33        if (isset($page)) $this->page = $page;
34    }
35
36    /**
37     * Dispatch Indexing request for the page, called by TaskRunner::runIndexer()
38     *
39     * @param bool $verbose print status messages
40     * @param bool $force force reindexing even when the index is up to date
41     * @return bool  If the function completed successfully
42     *
43     * @throws IndexAccessException
44     * @throws IndexLockException
45     * @throws IndexWriteException
46     * @author Satoshi Sahara <sahara.satoshi@gmail.com>
47     * @author Tom N Harris <tnharris@whoopdedo.org>
48     */
49    public function dispatch($verbose = false, $force = false)
50    {
51        if (!isset($this->page)) {
52            throw new IndexAccessException('Indexer: unknow page name');
53        }
54
55        // check if page was deleted but is still in the index
56        if (!page_exists($this->page)) {
57            return $this->deletePage($verbose, $force);
58        }
59
60        // update search index
61        return $this->addPage($verbose, $force);
62    }
63
64    /**
65     * Version of the indexer taking into consideration the external tokenizer.
66     * The indexer is only compatible with data written by the same version.
67     *
68     * @triggers INDEXER_VERSION_GET
69     * Plugins that modify what gets indexed should hook this event and
70     * add their version info to the event data like so:
71     *     $data[$plugin_name] = $plugin_version;
72     *
73     * @author Tom N Harris <tnharris@whoopdedo.org>
74     * @author Michael Hamann <michael@content-space.de>
75     *
76     * @return int|string
77     */
78    public function getVersion()
79    {
80        static $indexer_version = null;
81        if ($indexer_version == null) {
82            $version = INDEXER_VERSION;
83
84            // DokuWiki version is included for the convenience of plugins
85            $data = array('dokuwiki' => $version);
86            Event::createAndTrigger('INDEXER_VERSION_GET', $data, null, false);
87            unset($data['dokuwiki']); // this needs to be first
88            ksort($data);
89            foreach ($data as $plugin => $vers) {
90                $version .= '+'.$plugin.'='.$vers;
91            }
92            $indexer_version = $version;
93        }
94        return $indexer_version;
95    }
96
97    /**
98     * Adds/updates the search index for the given page
99     *
100     * Locking is handled internally.
101     *
102     * @param bool $verbose print status messages
103     * @param bool $force force reindexing even when the index is up to date
104     * @return bool  If the function completed successfully
105     *
106     * @throws IndexAccessException
107     * @throws IndexLockException
108     * @throws IndexWriteException
109     * @author Satoshi Sahara <sahara.satoshi@gmail.com>
110     * @author Tom N Harris <tnharris@whoopdedo.org>
111     */
112    public function addPage($verbose = false, $force = false)
113    {
114        if (!isset($this->page)) {
115            throw new IndexAccessException('Indexer: invalid page name in addePage');
116        } else {
117            $page = $this->page;
118        }
119
120        // check if indexing needed for the existing page (full text and/or metadata indexing)
121        $idxtag = metaFN($page,'.indexed');
122        if (!$force && file_exists($idxtag)) {
123            if (trim(io_readFile($idxtag)) == $this->getVersion()) {
124                $last = @filemtime($idxtag);
125                if ($last > @filemtime(wikiFN($page))) {
126                    if ($verbose) dbglog("Indexer: index for {$page} up to date");
127                    return true;
128                }
129            }
130        }
131
132        // register the page to the page.idx file, $pid is always integer
133        $pid = $this->getPID($page);
134
135        // prepare metadata indexing
136        $metadata = array();
137        $metadata['title'] = p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED);
138
139        $references = p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED);
140        $metadata['relation_references'] = ($references !== null) ?
141                array_keys($references) : array();
142
143        $media = p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED);
144        $metadata['relation_media'] = ($media !== null) ?
145                array_keys($media) : array();
146
147        // check if full text indexing allowed
148        $indexenabled = p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED);
149        if ($indexenabled !== false) $indexenabled = true;
150        $metadata['internal_index'] = $indexenabled;
151
152        $body = '';
153        $data = compact('page', 'body', 'metadata', 'pid');
154        $event = new Event('INDEXER_PAGE_ADD', $data);
155        if ($event->advise_before()) $data['body'] = $data['body'].' '.rawWiki($page);
156        $event->advise_after();
157        unset($event);
158        extract($data);
159        $indexenabled = $metadata['internal_index'];
160        unset($metadata['internal_index']);
161
162        // Access to Metadata Index
163        $result = (new MetadataIndex($pid))->addMetaKeys($metadata);
164        if ($verbose) dbglog("Indexer: addMetaKeys({$page}) ".($result ? 'done' : 'failed'));
165        if (!$result) {
166            return false;
167        }
168
169        // Access to Fulltext Index
170        if ($indexenabled) {
171            $result = (new FulltextIndex($pid))->addWords($body);
172            if ($verbose) dbglog("Indexer: addWords() for {$page} done");
173            if (!$result) {
174                return false;
175            }
176        } else {
177            if ($verbose) dbglog("Indexer: full text indexing disabled for {$page}");
178            // ensure the page content deleted from the Fulltext index
179            $result = (new FulltextIndex($page))->deleteWords();
180            if ($verbose) dbglog("Indexer: deleteWords() for {$page} done");
181            if (!$result) {
182                return false;
183            }
184        }
185
186        // update index tag file
187        io_saveFile($idxtag, $this->getVersion());
188        if ($verbose) dbglog("Indexer: finished");
189
190        return $result;
191    }
192
193    /**
194     * Remove a page from the index
195     *
196     * Erases entries in all known indexes. Locking is handled internally.
197     *
198     * @param string $page name of the page to index
199     * @param bool $verbose print status messages
200     * @param bool $force force reindexing even when the index is up to date
201     * @return bool  If the function completed successfully
202     *
203     * @throws IndexAccessException
204     * @throws IndexLockException
205     * @throws IndexWriteException
206     * @author Satoshi Sahara <sahara.satoshi@gmail.com>
207     * @author Tom N Harris <tnharris@whoopdedo.org>
208     */
209    public function deletePage($verbose = false, $force = false)
210    {
211        if (!isset($this->page)) {
212            throw new IndexAccessException('Indexer: invalid page name in deletePage');
213        } else {
214            $page = $this->page;
215        }
216
217        $idxtag = metaFN($page,'.indexed');
218        if (!$force && !file_exists($idxtag)) {
219            if ($verbose) dbglog("Indexer: {$page}.indexed file does not exist, ignoring");
220            return true;
221        }
222
223        // retrieve pid from the page.idx file, $pid is always integer
224        $pid = $this->getPID($page);
225
226        // remove obsoleted content from Fulltext index
227        $result = (new FulltextIndex($pid))->deleteWords();
228        if ($verbose) dbglog("Indexer: deleteWords() for {$page} done");
229        if (!$result) {
230            return false;
231        }
232
233        // delete all keys of the page from metadata index
234        $result = (new MetadataIndex($pid))->deleteMetaKeys();
235        if ($verbose) dbglog("Indexer: deleteMetaKeys() for {$page} done");
236        if (!$result) {
237            return false;
238        }
239
240        // mark the page as deleted in the page.idx
241        $this->lock();
242        $this->saveIndexKey('page', '', $pid, self::INDEX_MARK_DELETED.$page);
243        if ($verbose) dbglog("Indexer: {$page} has marked as deleted in page.idx");
244        $this->unlock();
245
246        unset(static::$pidCache[$pid]);
247        @unlink($idxtag);
248        return $result;
249    }
250
251    /**
252     * Rename a page in the search index without changing the indexed content.
253     * This function doesn't check if the old or new name exists in the filesystem.
254     * It returns an error if the old page isn't in the page list of the indexer
255     * and it deletes all previously indexed content of the new page.
256     *
257     * @param string $oldpage The old page name
258     * @param string $newpage The new page name
259     * @return bool  If the page was successfully renamed
260     * @throws IndexLockException
261     * @throws IndexWriteException
262     */
263    public function renamePage($oldpage, $newpage)
264    {
265        $index = $this->getIndex('page', '');
266        // check if oldpage found in page.idx
267        $oldPid = array_search($oldpage, $index, true);
268        if ($oldPid === false) return false;
269
270        // check if newpage found in page.idx
271        $newPid = array_search($newpage, $index, true);
272        if ($newPid !== false) {
273            $result = (new Indexer($newpage))->deletePage();
274            if (!$result) return false;
275            // Note: $index is no longer valid after deletePage()!
276            unset($index);
277        }
278
279        // update page.idx
280        $this->lock();
281        $this->saveIndexKey('page', '', $oldPid, $newpage);
282        $this->unlock();
283
284        // reset the pid cache
285        $this->resetPIDCache();
286
287        return true;
288    }
289
290    /**
291     * Clear the Page Index
292     *
293     * @param bool $requireLock should be false only if the caller is resposible for index lock
294     * @return bool  If the index has been cleared successfully
295     * @throws Exception\IndexLockException
296     */
297    public function clear($requireLock = true)
298    {
299        global $conf;
300
301        if ($requireLock) $this->lock();
302
303        // clear Metadata Index
304        (new MetadataIndex())->clear(false);
305
306        // clear Fulltext Index
307        (new FulltextIndex())->clear(false);
308
309        @unlink($conf['indexdir'].'/page.idx');
310
311        // clear the pid cache
312        $this->resetPIDCache();
313
314        if ($requireLock) $this->unlock();
315        return true;
316    }
317
318}
319