xref: /dokuwiki/inc/Search/Index/AbstractIndex.php (revision 1148921de6af6909f19cb5b30b698d0f27d7751e)
1ec5280efSAndreas Gohr<?php
2ec5280efSAndreas Gohr
3ec5280efSAndreas Gohrnamespace dokuwiki\Search\Index;
4ec5280efSAndreas Gohr
5c66b5ec6SAndreas Gohruse dokuwiki\Search\Exception\IndexLockException;
6c66b5ec6SAndreas Gohr
7ec5280efSAndreas Gohr/**
8c66b5ec6SAndreas Gohr * Basic building block to access individual index files
9c66b5ec6SAndreas Gohr *
10c66b5ec6SAndreas Gohr * To be able to write to an index, a lock must be acquired.
1183b3acccSAndreas Gohr *
1283b3acccSAndreas Gohr * Indexes are iterable, yielding RID => value pairs.
13ec5280efSAndreas Gohr */
1421fbd01bSAndreas Gohrabstract class AbstractIndex implements \IteratorAggregate, \Countable
15ec5280efSAndreas Gohr{
16ec5280efSAndreas Gohr    /** @var string name of the index */
17ec5280efSAndreas Gohr    protected $idx;
18ec5280efSAndreas Gohr
19c66b5ec6SAndreas Gohr    /** @var string suffix of the index */
20ec5280efSAndreas Gohr    protected $suffix;
21ec5280efSAndreas Gohr
22ec5280efSAndreas Gohr    /** @var string full filename to the index */
23ec5280efSAndreas Gohr    protected $filename;
24ec5280efSAndreas Gohr
25c66b5ec6SAndreas Gohr    /** @var bool has this instance acquired a lock? */
267fcedc39SAndreas Gohr    protected $isWritable = false;
277fcedc39SAndreas Gohr
28ec5280efSAndreas Gohr    /**
29ec5280efSAndreas Gohr     * Initialize the index
30ec5280efSAndreas Gohr     *
31ec5280efSAndreas Gohr     * The $suffix argument is for an index that is split into multiple parts.
32ec5280efSAndreas Gohr     * Different index files should use different base names.
33ec5280efSAndreas Gohr     *
34c66b5ec6SAndreas Gohr     * When $isWritable is true, a lock is acquired immediately
35c66b5ec6SAndreas Gohr     *
36ec5280efSAndreas Gohr     * @param string $idx name of the index
37ec5280efSAndreas Gohr     * @param string $suffix subpart identifier
38c66b5ec6SAndreas Gohr     * @param bool $isWritable acquire a lock immediately?
39c66b5ec6SAndreas Gohr     * @throws IndexLockException
40ec5280efSAndreas Gohr     */
417fcedc39SAndreas Gohr    public function __construct($idx, $suffix = '', $isWritable = false)
42ec5280efSAndreas Gohr    {
43ec5280efSAndreas Gohr        global $conf;
44ec5280efSAndreas Gohr        $this->filename = $conf['indexdir'] . '/' . $idx . $suffix . '.idx';
45ec5280efSAndreas Gohr        $this->idx = $idx;
46ec5280efSAndreas Gohr        $this->suffix = $suffix;
47c66b5ec6SAndreas Gohr        if ($isWritable) $this->lock();
48c66b5ec6SAndreas Gohr    }
49c66b5ec6SAndreas Gohr
50c66b5ec6SAndreas Gohr    /**
51c66b5ec6SAndreas Gohr     * Make this index writable by acquiring the lock
52c66b5ec6SAndreas Gohr     *
53c66b5ec6SAndreas Gohr     * @throws IndexLockException
54c66b5ec6SAndreas Gohr     */
55c66b5ec6SAndreas Gohr    public function lock()
56c66b5ec6SAndreas Gohr    {
57c66b5ec6SAndreas Gohr        if ($this->isWritable) return;
58c66b5ec6SAndreas Gohr        Lock::acquire($this->idx);
59c66b5ec6SAndreas Gohr        $this->isWritable = true;
60c66b5ec6SAndreas Gohr    }
61c66b5ec6SAndreas Gohr
62c66b5ec6SAndreas Gohr    /**
63c66b5ec6SAndreas Gohr     * Make this index read-only by releasing the lock
64c66b5ec6SAndreas Gohr     *
65c66b5ec6SAndreas Gohr     * Decrements the reference count in the Lock registry. The filesystem
66c66b5ec6SAndreas Gohr     * lock is only removed when the count reaches zero.
67c66b5ec6SAndreas Gohr     */
68c66b5ec6SAndreas Gohr    public function unlock()
69c66b5ec6SAndreas Gohr    {
70c66b5ec6SAndreas Gohr        if (!$this->isWritable) return;
71c66b5ec6SAndreas Gohr        Lock::release($this->idx);
72c66b5ec6SAndreas Gohr        $this->isWritable = false;
73c66b5ec6SAndreas Gohr    }
74c66b5ec6SAndreas Gohr
75c66b5ec6SAndreas Gohr    /**
76c66b5ec6SAndreas Gohr     * Whether this index instance is writable
77c66b5ec6SAndreas Gohr     *
78c66b5ec6SAndreas Gohr     * @return bool
79c66b5ec6SAndreas Gohr     */
80c66b5ec6SAndreas Gohr    public function isWritable()
81c66b5ec6SAndreas Gohr    {
82c66b5ec6SAndreas Gohr        return $this->isWritable;
83c66b5ec6SAndreas Gohr    }
84c66b5ec6SAndreas Gohr
85c66b5ec6SAndreas Gohr    /**
86c66b5ec6SAndreas Gohr     * Ensure lock is released when the index is destroyed
87c66b5ec6SAndreas Gohr     */
88c66b5ec6SAndreas Gohr    public function __destruct()
89c66b5ec6SAndreas Gohr    {
90c66b5ec6SAndreas Gohr        $this->unlock();
91ec5280efSAndreas Gohr    }
92ec5280efSAndreas Gohr
93ec5280efSAndreas Gohr    /**
94ec5280efSAndreas Gohr     * @return string the full path to the underlying file
95ec5280efSAndreas Gohr     */
96ec5280efSAndreas Gohr    public function getFilename()
97ec5280efSAndreas Gohr    {
98ec5280efSAndreas Gohr        return $this->filename;
99ec5280efSAndreas Gohr    }
100ec5280efSAndreas Gohr
101ec5280efSAndreas Gohr    /**
102596d5287SAndreas Gohr     * Does this index exist, yet?
103596d5287SAndreas Gohr     *
104596d5287SAndreas Gohr     * @return bool
105596d5287SAndreas Gohr     */
106596d5287SAndreas Gohr    public function exists()
107596d5287SAndreas Gohr    {
108596d5287SAndreas Gohr        return file_exists($this->getFilename());
109596d5287SAndreas Gohr    }
110596d5287SAndreas Gohr
111596d5287SAndreas Gohr    /**
112596d5287SAndreas Gohr     * Return the largest numeric suffix for the current index
113596d5287SAndreas Gohr     *
114596d5287SAndreas Gohr     * This is only useful for indexes that use integer based suffixes (like the wordlength indexes)
115596d5287SAndreas Gohr     *
116596d5287SAndreas Gohr     * @return int 0 if no numeric suffix indexes are found
117596d5287SAndreas Gohr     */
118596d5287SAndreas Gohr    public function max()
119596d5287SAndreas Gohr    {
120596d5287SAndreas Gohr        global $conf;
121596d5287SAndreas Gohr        $result = 0;
122596d5287SAndreas Gohr        $files = glob($conf['indexdir'] . '/' . $this->idx . '*.idx');
123596d5287SAndreas Gohr        foreach ($files as $file) {
124596d5287SAndreas Gohr            if (preg_match('/(\d)+\.idx$/', $file, $match)) {
125596d5287SAndreas Gohr                $num = (int)$match[1];
126596d5287SAndreas Gohr                if ($num > $result) $result = $num;
127596d5287SAndreas Gohr            }
128596d5287SAndreas Gohr        }
129596d5287SAndreas Gohr
130596d5287SAndreas Gohr        return $result;
131596d5287SAndreas Gohr    }
132596d5287SAndreas Gohr
133596d5287SAndreas Gohr    /**
134ec5280efSAndreas Gohr     * Change a line in the index
135ec5280efSAndreas Gohr     *
136ec5280efSAndreas Gohr     * If the line doesn't exist, it will be added, creating empty
137ec5280efSAndreas Gohr     * lines inbetween as necessary
138ec5280efSAndreas Gohr     *
139ec5280efSAndreas Gohr     * @param int $rid the line number, count starting at 0
140ec5280efSAndreas Gohr     * @param string $value line content to write
141ec5280efSAndreas Gohr     */
142ec5280efSAndreas Gohr    abstract public function changeRow($rid, $value);
143ec5280efSAndreas Gohr
144ec5280efSAndreas Gohr    /**
145ec5280efSAndreas Gohr     * Retrieve a line from the index
146ec5280efSAndreas Gohr     *
147ec5280efSAndreas Gohr     * Returns an empty string for non-existing lines
148ec5280efSAndreas Gohr     *
149ec5280efSAndreas Gohr     * @param int $rid the line number
150ec5280efSAndreas Gohr     * @return string a line with trailing whitespace removed
151ec5280efSAndreas Gohr     */
152ec5280efSAndreas Gohr    abstract public function retrieveRow($rid);
153ec5280efSAndreas Gohr
154ec5280efSAndreas Gohr    /**
1559f63f003SAndreas Gohr     * Retrieve multiple lines from the index
1569f63f003SAndreas Gohr     *
1579f63f003SAndreas Gohr     * Ignores non-existing lines, eg the result array may be smaller than the input $rids
1589f63f003SAndreas Gohr     *
1599f63f003SAndreas Gohr     * @param int[] $rids
1609f63f003SAndreas Gohr     * @return array [rid => value]
1619f63f003SAndreas Gohr     */
1629f63f003SAndreas Gohr    abstract public function retrieveRows($rids);
1639f63f003SAndreas Gohr
1649f63f003SAndreas Gohr    /**
1657fcedc39SAndreas Gohr     * Searches the Index for a given value
166d6396b6dSAndreas Gohr     *
1677fcedc39SAndreas Gohr     * If the index is writable and the value is not found it will be added. Otherwise null is returned.
1687fcedc39SAndreas Gohr     *
1699f63f003SAndreas Gohr     * Note the existence of an entry in the index does not say anything about the existence
170d6396b6dSAndreas Gohr     * of the real world object (eg. a page)
171d6396b6dSAndreas Gohr     *
172c66b5ec6SAndreas Gohr     * You should preferably use accessCachedValue() instead.
173d6396b6dSAndreas Gohr     *
174d6396b6dSAndreas Gohr     * @param string $value
1757fcedc39SAndreas Gohr     *
1767fcedc39SAndreas Gohr     * @return int|null the RID of the entry, null if not found and not added
177d6396b6dSAndreas Gohr     */
1788ed35011SAndreas Gohr    public function getRowID($value)
179d6396b6dSAndreas Gohr    {
1808ed35011SAndreas Gohr        $result = $this->getRowIDs([$value]);
1817fcedc39SAndreas Gohr        return $result[$value] ?? null;
182d6396b6dSAndreas Gohr    }
183d6396b6dSAndreas Gohr
184d6396b6dSAndreas Gohr    /**
1857fcedc39SAndreas Gohr     * Searches the Index for all given values
1867fcedc39SAndreas Gohr     *
1877fcedc39SAndreas Gohr     * If the index is writable, not found values are added
188d6396b6dSAndreas Gohr     *
189d6396b6dSAndreas Gohr     * @param string[] $values
19003a35633SAndreas Gohr     * @return array the RIDs of the entries (value => rid)
191d6396b6dSAndreas Gohr     */
1928ed35011SAndreas Gohr    abstract public function getRowIDs($values);
193d6396b6dSAndreas Gohr
194d6396b6dSAndreas Gohr    /**
19503a35633SAndreas Gohr     * Find all RIDs matching a regular expression
19603a35633SAndreas Gohr     *
197*1148921dSAndreas Gohr     * A full regular expression including delimiters and modifiers is expected.
198*1148921dSAndreas Gohr     *
199*1148921dSAndreas Gohr     * For searching across collections, prefer using CollectionSearch which scans each
200*1148921dSAndreas Gohr     * index only once for all terms instead of once per term.
20103a35633SAndreas Gohr     *
20203a35633SAndreas Gohr     * @param string $re the regular expression to match against
20303a35633SAndreas Gohr     * @return array (rid => value)
20403a35633SAndreas Gohr     */
20503a35633SAndreas Gohr    abstract public function search($re);
20603a35633SAndreas Gohr
20703a35633SAndreas Gohr    /**
208ec5280efSAndreas Gohr     * Clears the index by deleting its file
209c66b5ec6SAndreas Gohr     *
210ec5280efSAndreas Gohr     * @return void
211ec5280efSAndreas Gohr     */
212ec5280efSAndreas Gohr    public function clear()
213ec5280efSAndreas Gohr    {
214ec5280efSAndreas Gohr        @unlink($this->filename);
215ec5280efSAndreas Gohr    }
216ec5280efSAndreas Gohr
2177fcedc39SAndreas Gohr    /**
21821fbd01bSAndreas Gohr     * Return the number of lines in the index
21921fbd01bSAndreas Gohr     *
22021fbd01bSAndreas Gohr     * @return int
22121fbd01bSAndreas Gohr     */
22221fbd01bSAndreas Gohr    abstract public function count(): int;
22321fbd01bSAndreas Gohr
22421fbd01bSAndreas Gohr    /**
2257fcedc39SAndreas Gohr     * Saves the index if needed
2267fcedc39SAndreas Gohr     *
2277fcedc39SAndreas Gohr     * The default implementation does nothing and is only for streamlining the API of
2287fcedc39SAndreas Gohr     * the different index classes
229c66b5ec6SAndreas Gohr     *
2307fcedc39SAndreas Gohr     * @return void
2317fcedc39SAndreas Gohr     */
2327fcedc39SAndreas Gohr    public function save()
2337fcedc39SAndreas Gohr    {
2347fcedc39SAndreas Gohr    }
235ec5280efSAndreas Gohr}
236