xref: /dokuwiki/inc/Search/Index/FileIndex.php (revision 9369b4a991666bc911474806b106d8958e79f4c1)
1<?php
2
3namespace dokuwiki\Search\Index;
4
5use dokuwiki\Search\Exception\IndexAccessException;
6use dokuwiki\Search\Exception\IndexLockException;
7use dokuwiki\Search\Exception\IndexWriteException;
8
9/**
10 * Access to a single index file
11 *
12 * Access using this class always happens on a line-by-line basis. It is usually not read in full.
13 * All modifications are implicitly saved
14 * Should be used for large indexes that receive only few changes at once.
15 */
16class FileIndex extends AbstractIndex
17{
18    /** @var array RID cache for faster access */
19    protected array $ridCache = [];
20
21    /**
22     * @inheritdoc
23     * @throws IndexWriteException
24     * @throws IndexLockException
25     * @author Tom N Harris <tnharris@whoopdedo.org>
26     */
27    public function changeRow(int $rid, string $value): void
28    {
29        global $conf;
30
31        if (!$this->isWritable) throw new IndexLockException();
32
33        if (!str_ends_with($value, "\n")) {
34            $value .= "\n";
35        }
36
37        $tempname = $this->filename . '.tmp';
38        $fh = @fopen($tempname, 'w');
39        if (!$fh) {
40            throw new IndexWriteException("Failed to write $tempname");
41        }
42        $ih = @fopen($this->filename, 'r');
43
44        $ln = -1; // line counter
45        // copy previous index lines line-by-line, replacing the wanted line
46        if ($ih) {
47            while (($curline = fgets($ih)) !== false) {
48                fwrite($fh, (++$ln == $rid) ? $value : $curline);
49            }
50            fclose($ih);
51        }
52        // if wanted line is beyond the current line count, insert empty lines inbetween
53        if ($rid > $ln) {
54            while ($rid > ++$ln) {
55                fwrite($fh, "\n");
56            }
57            fwrite($fh, $value);
58        }
59        fclose($fh);
60
61        if ($conf['fperm']) {
62            chmod($tempname, $conf['fperm']);
63        }
64        io_rename($tempname, $this->filename);
65    }
66
67    /**
68     * @inheritdoc
69     *
70     * When writable and the requested RID is beyond the end of the file,
71     * the file is padded with empty lines up to that RID. This avoids
72     * a more expensive line-by-line copy in a subsequent changeRow() call.
73     *
74     * @throws IndexWriteException
75     * @author Tom N Harris <tnharris@whoopdedo.org>
76     */
77    public function retrieveRow(int $rid): string
78    {
79        if (!file_exists($this->filename)) {
80            return '';
81        }
82        $fh = @fopen($this->filename, 'r');
83        if (!$fh) {
84            return '';
85        }
86        $ln = -1;
87        while (($line = fgets($fh)) !== false) {
88            if (++$ln == $rid) {
89                fclose($fh);
90                return rtrim($line);
91            }
92        }
93        fclose($fh);
94
95        if (!$this->isWritable) return '';
96
97        // still here? pad the index for the given ID
98        // we do not simply call changeRow() here because appending is faster than line-by-line copying
99        if (!file_put_contents($this->filename, implode("\n", array_fill(0, $rid - $ln + 1, '')), FILE_APPEND)) {
100            throw new IndexWriteException("Failed to write $this->filename");
101        }
102
103        return '';
104    }
105
106    /** @inheritdoc */
107    public function retrieveRows(array $rids): array
108    {
109        $result = [];
110        sort($rids);
111        $next = array_shift($rids);
112
113        if (!file_exists($this->filename)) {
114            return $result;
115        }
116        $fh = @fopen($this->filename, 'r');
117        if (!$fh) {
118            return $result;
119        }
120        $ln = -1;
121        while (($line = fgets($fh)) !== false) {
122            if (++$ln === $next) {
123                $result[$ln] = rtrim($line);
124                $next = array_shift($rids);
125                if ($next === false) break;
126            }
127        }
128        fclose($fh);
129        return $result;
130    }
131
132
133    /**
134     * @inheritdoc
135     * @throws IndexAccessException
136     * @throws IndexWriteException
137     */
138    public function getRowIDs(array $values): array
139    {
140        $values = array_map(trim(...), $values);
141        $values = array_fill_keys($values, 1); // easier access as associative array
142
143        // search for the values
144        $result = [];
145        $ln = 0;
146        if (file_exists($this->filename)) {
147            $fh = @fopen($this->filename, 'r');
148            if (!$fh) {
149                throw new IndexAccessException("Failed to read $this->filename");
150            }
151            while (($line = fgets($fh)) !== false && $values) {
152                $line = trim($line);
153                if (isset($values[$line])) {
154                    $result[$line] = $ln;
155                    unset($values[$line]);
156                }
157                $ln++;
158            }
159            fclose($fh);
160        }
161
162        if (!$this->isWritable) return $result;
163
164        // if there are still values, they have not been found and will be appended
165        foreach (array_keys($values) as $value) {
166            if (!file_put_contents($this->filename, "$value\n", FILE_APPEND)) {
167                throw new IndexWriteException("Failed to write $this->filename");
168            }
169            $result[$value] = $ln++;
170        }
171
172        return $result;
173    }
174
175    /** @inheritdoc */
176    public function search(string $re): array
177    {
178        $result = [];
179        $ln = 0;
180        if (file_exists($this->filename)) {
181            $fh = @fopen($this->filename, 'r');
182            if (!$fh) {
183                throw new IndexAccessException("Failed to read $this->filename");
184            }
185            while (($line = fgets($fh)) !== false) {
186                $line = trim($line);
187                if (preg_match($re, $line)) {
188                    $result[$ln] = $line;
189                }
190                $ln++;
191            }
192            fclose($fh);
193        }
194        return $result;
195    }
196
197    /**
198     * Cached mechanism to retrieve a single value
199     *
200     * @param string $value
201     * @return int the RID of the entry
202     * @see getRowID()
203     */
204    public function accessCachedValue(string $value): int
205    {
206        if (isset($this->ridCache[$value])) {
207            return $this->ridCache[$value];
208        }
209
210        // limit cache to 10 entries by discarding the oldest element
211        // as in DokuWiki usually only the most recently
212        // added item will be requested again
213        if (count($this->ridCache) > 10) {
214            array_shift($this->ridCache);
215        }
216        $this->ridCache[$value] = $this->getRowID($value);
217        return $this->ridCache[$value];
218    }
219
220    /** @inheritdoc */
221    public function count(): int
222    {
223        if (!file_exists($this->filename)) return 0;
224        $fh = @fopen($this->filename, 'r');
225        if (!$fh) return 0;
226        $count = 0;
227        while (fgets($fh) !== false) $count++;
228        fclose($fh);
229        return $count;
230    }
231
232    /** @inheritdoc */
233    public function getIterator(): \Generator
234    {
235        if (!file_exists($this->filename)) return;
236        $fh = @fopen($this->filename, 'r');
237        if (!$fh) return;
238        $ln = 0;
239        while (($line = fgets($fh)) !== false) {
240            yield $ln++ => rtrim($line);
241        }
242        fclose($fh);
243    }
244}
245