xref: /dokuwiki/inc/Search/Index/FileIndex.php (revision 06053dca2fac9a1da4eb1accf8c2488942da5d2a)
1<?php
2
3namespace dokuwiki\Search\Index;
4
5use dokuwiki\Search\Exception\IndexAccessException;
6use dokuwiki\Search\Exception\IndexLockException;
7use dokuwiki\Search\Exception\IndexWriteException;
8
9/**
10 * Access to a single index file
11 *
12 * Access using this class always happens on a line-by-line basis. It is usually not read in full.
13 * All modifications are implicitly saved
14 * Should be used for large indexes that receive only few changes at once.
15 */
16class FileIndex extends AbstractIndex
17{
18    /** @var array RID cache for faster access */
19    protected array $ridCache = [];
20
21    /**
22     * @inheritdoc
23     * @throws IndexWriteException
24     * @throws IndexLockException
25     * @author Tom N Harris <tnharris@whoopdedo.org>
26     */
27    public function changeRow(int $rid, string $value): void
28    {
29        global $conf;
30
31        if (!$this->isWritable) throw new IndexLockException();
32
33        if (!str_ends_with($value, "\n")) {
34            $value .= "\n";
35        }
36
37        $tempname = $this->filename . '.tmp';
38        $fh = @fopen($tempname, 'w');
39        if (!$fh) {
40            throw new IndexWriteException("Failed to write $tempname");
41        }
42        $ih = @fopen($this->filename, 'r');
43
44        $ln = -1; // line counter
45        // copy previous index lines line-by-line, replacing the wanted line
46        if ($ih) {
47            while (($curline = fgets($ih)) !== false) {
48                fwrite($fh, (++$ln == $rid) ? $value : $curline);
49            }
50            fclose($ih);
51        }
52        // if wanted line is beyond the current line count, insert empty lines inbetween
53        if ($rid > $ln) {
54            while ($rid > ++$ln) {
55                fwrite($fh, "\n");
56            }
57            fwrite($fh, $value);
58        }
59        fclose($fh);
60
61        if ($conf['fperm']) {
62            chmod($tempname, $conf['fperm']);
63        }
64        io_rename($tempname, $this->filename);
65    }
66
67    /** @inheritdoc */
68    public function retrieveRow(int $rid): string
69    {
70        if (!file_exists($this->filename)) {
71            return '';
72        }
73        $fh = @fopen($this->filename, 'r');
74        if (!$fh) {
75            return '';
76        }
77        $ln = -1;
78        while (($line = fgets($fh)) !== false) {
79            if (++$ln == $rid) {
80                fclose($fh);
81                return rtrim($line);
82            }
83        }
84        fclose($fh);
85
86        return '';
87    }
88
89    /** @inheritdoc */
90    public function retrieveRows(array $rids): array
91    {
92        $result = [];
93        sort($rids);
94        $next = array_shift($rids);
95
96        if (!file_exists($this->filename)) {
97            return $result;
98        }
99        $fh = @fopen($this->filename, 'r');
100        if (!$fh) {
101            return $result;
102        }
103        $ln = -1;
104        while (($line = fgets($fh)) !== false) {
105            if (++$ln === $next) {
106                $result[$ln] = rtrim($line);
107                $next = array_shift($rids);
108                if ($next === false) break;
109            }
110        }
111        fclose($fh);
112        return $result;
113    }
114
115
116    /**
117     * @inheritdoc
118     * @throws IndexAccessException
119     * @throws IndexWriteException
120     */
121    public function getRowIDs(array $values): array
122    {
123        $values = array_map(trim(...), $values);
124        $values = array_fill_keys($values, 1); // easier access as associative array
125
126        // search for the values
127        $result = [];
128        $ln = 0;
129        if (file_exists($this->filename)) {
130            $fh = @fopen($this->filename, 'r');
131            if (!$fh) {
132                throw new IndexAccessException("Failed to read $this->filename");
133            }
134            while (($line = fgets($fh)) !== false && $values) {
135                $line = trim($line);
136                if (isset($values[$line])) {
137                    $result[$line] = $ln;
138                    unset($values[$line]);
139                }
140                $ln++;
141            }
142            fclose($fh);
143        }
144
145        if (!$this->isWritable) return $result;
146
147        // if there are still values, they have not been found and will be appended
148        foreach (array_keys($values) as $value) {
149            if (!file_put_contents($this->filename, "$value\n", FILE_APPEND)) {
150                throw new IndexWriteException("Failed to write $this->filename");
151            }
152            $result[$value] = $ln++;
153        }
154
155        return $result;
156    }
157
158    /** @inheritdoc */
159    public function search(string $re): array
160    {
161        $result = [];
162        $ln = 0;
163        if (file_exists($this->filename)) {
164            $fh = @fopen($this->filename, 'r');
165            if (!$fh) {
166                throw new IndexAccessException("Failed to read $this->filename");
167            }
168            while (($line = fgets($fh)) !== false) {
169                $line = trim($line);
170                if (preg_match($re, $line)) {
171                    $result[$ln] = $line;
172                }
173                $ln++;
174            }
175            fclose($fh);
176        }
177        return $result;
178    }
179
180    /**
181     * Cached mechanism to retrieve a single value
182     *
183     * @param string $value
184     * @return int the RID of the entry
185     * @see getRowID()
186     */
187    public function accessCachedValue(string $value): int
188    {
189        if (isset($this->ridCache[$value])) {
190            return $this->ridCache[$value];
191        }
192
193        // limit cache to 10 entries by discarding the oldest element
194        // as in DokuWiki usually only the most recently
195        // added item will be requested again
196        if (count($this->ridCache) > 10) {
197            array_shift($this->ridCache);
198        }
199        $this->ridCache[$value] = $this->getRowID($value);
200        return $this->ridCache[$value];
201    }
202
203    /** @inheritdoc */
204    public function count(): int
205    {
206        if (!file_exists($this->filename)) return 0;
207        $fh = @fopen($this->filename, 'r');
208        if (!$fh) return 0;
209        $count = 0;
210        while (fgets($fh) !== false) $count++;
211        fclose($fh);
212        return $count;
213    }
214
215    /** @inheritdoc */
216    public function getIterator(): \Generator
217    {
218        if (!file_exists($this->filename)) return;
219        $fh = @fopen($this->filename, 'r');
220        if (!$fh) return;
221        $ln = 0;
222        while (($line = fgets($fh)) !== false) {
223            yield $ln++ => rtrim($line);
224        }
225        fclose($fh);
226    }
227}
228