xref: /dokuwiki/inc/Search/Index/AbstractIndex.php (revision 06053dca2fac9a1da4eb1accf8c2488942da5d2a)
1<?php
2
3namespace dokuwiki\Search\Index;
4
5use dokuwiki\Search\Exception\IndexLockException;
6
7/**
8 * Basic building block to access individual index files
9 *
10 * To be able to write to an index, a lock must be acquired.
11 *
12 * Indexes are iterable, yielding RID => value pairs.
13 */
14abstract class AbstractIndex implements \IteratorAggregate, \Countable
15{
16    /** @var string name of the index */
17    protected string $idx;
18
19    /** @var string suffix of the index */
20    protected string $suffix;
21
22    /** @var string full filename to the index */
23    protected string $filename;
24
25    /** @var bool has this instance acquired a lock? */
26    protected bool $isWritable = false;
27
28    /**
29     * Initialize the index
30     *
31     * The $suffix argument is for an index that is split into multiple parts.
32     * Different index files should use different base names.
33     *
34     * When $isWritable is true, a lock is acquired immediately
35     *
36     * @param string $idx name of the index
37     * @param string $suffix subpart identifier
38     * @param bool $isWritable acquire a lock immediately?
39     * @throws IndexLockException
40     */
41    public function __construct(string $idx, string $suffix = '', bool $isWritable = false)
42    {
43        global $conf;
44        $this->filename = $conf['indexdir'] . '/' . $idx . $suffix . '.idx';
45        $this->idx = $idx;
46        $this->suffix = $suffix;
47        if ($isWritable) $this->lock();
48    }
49
50    /**
51     * Make this index writable by acquiring the lock
52     *
53     * @throws IndexLockException
54     */
55    public function lock(): void
56    {
57        if ($this->isWritable) return;
58        Lock::acquire($this->idx);
59        $this->isWritable = true;
60    }
61
62    /**
63     * Make this index read-only by releasing the lock
64     *
65     * Decrements the reference count in the Lock registry. The filesystem
66     * lock is only removed when the count reaches zero.
67     */
68    public function unlock(): void
69    {
70        if (!$this->isWritable) return;
71        Lock::release($this->idx);
72        $this->isWritable = false;
73    }
74
75    /**
76     * Whether this index instance is writable
77     *
78     * @return bool
79     */
80    public function isWritable(): bool
81    {
82        return $this->isWritable;
83    }
84
85    /**
86     * Ensure lock is released when the index is destroyed
87     */
88    public function __destruct()
89    {
90        $this->unlock();
91    }
92
93    /**
94     * @return string the full path to the underlying file
95     */
96    public function getFilename(): string
97    {
98        return $this->filename;
99    }
100
101    /**
102     * Does this index exist, yet?
103     *
104     * @return bool
105     */
106    public function exists(): bool
107    {
108        return file_exists($this->getFilename());
109    }
110
111    /**
112     * Return the largest numeric suffix for the current index
113     *
114     * This is only useful for indexes that use integer based suffixes (like the wordlength indexes)
115     *
116     * @return int 0 if no numeric suffix indexes are found
117     */
118    public function max(): int
119    {
120        global $conf;
121        $result = 0;
122        $files = glob($conf['indexdir'] . '/' . $this->idx . '*.idx');
123        foreach ($files as $file) {
124            if (preg_match('/(\d)+\.idx$/', $file, $match)) {
125                $num = (int)$match[1];
126                if ($num > $result) $result = $num;
127            }
128        }
129
130        return $result;
131    }
132
133    /**
134     * Change a line in the index
135     *
136     * If the line doesn't exist, it will be added, creating empty
137     * lines inbetween as necessary
138     *
139     * @param int $rid the line number, count starting at 0
140     * @param string $value line content to write
141     */
142    abstract public function changeRow(int $rid, string $value);
143
144    /**
145     * Retrieve a line from the index
146     *
147     * Returns an empty string for non-existing lines
148     *
149     * @param int $rid the line number
150     * @return string a line with trailing whitespace removed
151     */
152    abstract public function retrieveRow(int $rid): string;
153
154    /**
155     * Retrieve multiple lines from the index
156     *
157     * Ignores non-existing lines, eg the result array may be smaller than the input $rids
158     *
159     * @param int[] $rids
160     * @return array [rid => value]
161     */
162    abstract public function retrieveRows(array $rids): array;
163
164    /**
165     * Searches the Index for a given value
166     *
167     * If the index is writable and the value is not found it will be added. Otherwise null is returned.
168     *
169     * Note the existence of an entry in the index does not say anything about the existence
170     * of the real world object (eg. a page)
171     *
172     * You should preferably use accessCachedValue() instead.
173     *
174     * @param string $value
175     *
176     * @return int|null the RID of the entry, null if not found and not added
177     */
178    public function getRowID(string $value): ?int
179    {
180        $result = $this->getRowIDs([$value]);
181        return $result[$value] ?? null;
182    }
183
184    /**
185     * Searches the Index for all given values
186     *
187     * If the index is writable, not found values are added
188     *
189     * @param string[] $values
190     * @return array the RIDs of the entries (value => rid)
191     */
192    abstract public function getRowIDs(array $values): array;
193
194    /**
195     * Find all RIDs matching a regular expression
196     *
197     * A full regular expression including delimiters and modifiers is expected.
198     *
199     * For searching across collections, prefer using CollectionSearch which scans each
200     * index only once for all terms instead of once per term.
201     *
202     * @param string $re the regular expression to match against
203     * @return array (rid => value)
204     */
205    abstract public function search(string $re): array;
206
207    /**
208     * Clears the index by deleting its file
209     *
210     * @return void
211     */
212    public function clear(): void
213    {
214        @unlink($this->filename);
215    }
216
217    /**
218     * Return the number of lines in the index
219     *
220     * @return int
221     */
222    abstract public function count(): int;
223
224    /**
225     * Saves the index if needed
226     *
227     * The default implementation does nothing and is only for streamlining the API of
228     * the different index classes
229     *
230     * @return void
231     */
232    public function save()
233    {
234    }
235}
236