xref: /dokuwiki/inc/Search/Index/FileIndex.php (revision db8be586414d0dc05ca5131baddfa84f08c55520)
1<?php
2
3namespace dokuwiki\Search\Index;
4
5use dokuwiki\Search\Exception\IndexAccessException;
6use dokuwiki\Search\Exception\IndexLockException;
7use dokuwiki\Search\Exception\IndexWriteException;
8
9/**
10 * Access to a single index file
11 *
12 * Access using this class always happens on a line-by-line basis. It is usually not read in full.
13 * All modifications are implicitly saved
14 * Should be used for large indexes that receive only few changes at once.
15 */
16class FileIndex extends AbstractIndex
17{
18    /** @var array RID cache for faster access */
19    protected $ridCache = [];
20
21    /**
22     * @inheritdoc
23     * @throws IndexWriteException
24     * @throws IndexLockException
25     * @author Tom N Harris <tnharris@whoopdedo.org>
26     */
27    public function changeRow($rid, $value)
28    {
29        global $conf;
30
31        if (!$this->isWritable) throw new IndexLockException();
32
33        if (substr($value, -1) !== "\n") {
34            $value .= "\n";
35        }
36
37        $tempname = $this->filename . '.tmp';
38        $fh = @fopen($tempname, 'w');
39        if (!$fh) {
40            throw new IndexWriteException("Failed to write {$tempname}");
41        }
42        $ih = @fopen($this->filename, 'r');
43
44        $ln = -1; // line counter
45        // copy previous index lines line-by-line, replacing the wanted line
46        if ($ih) {
47            while (($curline = fgets($ih)) !== false) {
48                fwrite($fh, (++$ln == $rid) ? $value : $curline);
49            }
50            fclose($ih);
51        }
52        // if wanted line is beyond the current line count, insert empty lines inbetween
53        if ($rid > $ln) {
54            while ($rid > ++$ln) {
55                fwrite($fh, "\n");
56            }
57            fwrite($fh, $value);
58        }
59        fclose($fh);
60
61        if ($conf['fperm']) {
62            chmod($tempname, $conf['fperm']);
63        }
64        io_rename($tempname, $this->filename);
65    }
66
67    /**
68     * @inheritdoc
69     *
70     * When writable and the requested RID is beyond the end of the file,
71     * the file is padded with empty lines up to that RID. This avoids
72     * a more expensive line-by-line copy in a subsequent changeRow() call.
73     *
74     * @throws IndexWriteException
75     * @author Tom N Harris <tnharris@whoopdedo.org>
76     */
77    public function retrieveRow($rid)
78    {
79        if (!file_exists($this->filename)) {
80            return '';
81        }
82        $fh = @fopen($this->filename, 'r');
83        if (!$fh) {
84            return '';
85        }
86        $ln = -1;
87        while (($line = fgets($fh)) !== false) {
88            if (++$ln == $rid) {
89                fclose($fh);
90                return rtrim((string)$line);
91            }
92        }
93        fclose($fh);
94
95        if (!$this->isWritable) return '';
96
97        // still here? pad the index for the given ID
98        // we do not simply call changeRow() here because appending is faster than line-by-line copying
99        if (!file_put_contents($this->filename, join("\n", array_fill(0, $rid - $ln + 1, '')), FILE_APPEND)) {
100            throw new IndexWriteException("Failed to write {$this->filename}");
101        }
102
103        return '';
104    }
105
106    /** @inheritdoc */
107    public function retrieveRows($rids)
108    {
109        $result = [];
110        sort($rids);
111        $next = array_shift($rids);
112
113        if (!file_exists($this->filename)) {
114            return $result;
115        }
116        $fh = @fopen($this->filename, 'r');
117        if (!$fh) {
118            return $result;
119        }
120        $ln = -1;
121        while (($line = fgets($fh)) !== false) {
122            if (++$ln === $next) {
123                $result[$ln] = rtrim((string)$line);
124                $next = array_shift($rids);
125                if ($next === false) break;
126            }
127        }
128        fclose($fh);
129        return $result;
130    }
131
132
133    /**
134     * @inheritdoc
135     * @throws IndexAccessException
136     * @throws IndexWriteException
137     */
138    public function getRowIDs($values)
139    {
140        $values = array_map('trim', $values);
141        $values = array_fill_keys($values, 1); // easier access as associative array
142
143        // search for the values
144        $result = [];
145        $ln = 0;
146        if (file_exists($this->filename)) {
147            $fh = @fopen($this->filename, 'r');
148            if (!$fh) {
149                throw new IndexAccessException("Failed to read {$this->filename}");
150            }
151            while (($line = fgets($fh)) !== false && $values) {
152                $line = trim($line);
153                if (isset($values[$line])) {
154                    $result[$line] = $ln;
155                    unset($values[$line]);
156                }
157                $ln++;
158            }
159            fclose($fh);
160        }
161
162        if (!$this->isWritable) return $result;
163
164        // if there are still values, they have not been found and will be appended
165        foreach (array_keys($values) as $value) {
166            if (!file_put_contents($this->filename, "$value\n", FILE_APPEND)) {
167                throw new IndexWriteException("Failed to write {$this->filename}");
168            }
169            $result[$value] = $ln++;
170        }
171
172        return $result;
173    }
174
175    /** @inheritdoc */
176    public function search($re)
177    {
178        $result = [];
179        $ln = 0;
180        if (file_exists($this->filename)) {
181            $fh = @fopen($this->filename, 'r');
182            if (!$fh) {
183                throw new IndexAccessException("Failed to read {$this->filename}");
184            }
185            while (($line = fgets($fh)) !== false) {
186                $line = trim($line);
187                if (preg_match($re, $line)) {
188                    $result[$ln] = $line;
189                }
190                $ln++;
191            }
192            fclose($fh);
193        }
194        return $result;
195    }
196
197    /**
198     * Cached mechanism to retrieve a single value
199     *
200     * @param string $value
201     * @return int the RID of the entry
202     * @throws IndexAccessException
203     * @throws IndexWriteException
204     * @see getRowID()
205     */
206    public function accessCachedValue($value)
207    {
208        if (isset($this->ridCache[$value])) {
209            return $this->ridCache[$value];
210        }
211
212        // limit cache to 10 entries by discarding the oldest element
213        // as in DokuWiki usually only the most recently
214        // added item will be requested again
215        if (count($this->ridCache) > 10) {
216            array_shift($this->ridCache);
217        }
218        $this->ridCache[$value] = $this->getRowID($value);
219        return $this->ridCache[$value];
220    }
221
222    /** @inheritdoc */
223    public function count(): int
224    {
225        if (!file_exists($this->filename)) return 0;
226        $fh = @fopen($this->filename, 'r');
227        if (!$fh) return 0;
228        $count = 0;
229        while (fgets($fh) !== false) $count++;
230        fclose($fh);
231        return $count;
232    }
233
234    /** @inheritdoc */
235    public function getIterator(): \Generator
236    {
237        if (!file_exists($this->filename)) return;
238        $fh = @fopen($this->filename, 'r');
239        if (!$fh) return;
240        $ln = 0;
241        while (($line = fgets($fh)) !== false) {
242            yield $ln++ => rtrim($line);
243        }
244        fclose($fh);
245    }
246}
247