xref: /dokuwiki/inc/Search/Index/FileIndex.php (revision 9369b4a991666bc911474806b106d8958e79f4c1)
19bd7d62fSAndreas Gohr<?php
29bd7d62fSAndreas Gohr
39bd7d62fSAndreas Gohrnamespace dokuwiki\Search\Index;
49bd7d62fSAndreas Gohr
59bd7d62fSAndreas Gohruse dokuwiki\Search\Exception\IndexAccessException;
67fcedc39SAndreas Gohruse dokuwiki\Search\Exception\IndexLockException;
79bd7d62fSAndreas Gohruse dokuwiki\Search\Exception\IndexWriteException;
89bd7d62fSAndreas Gohr
99bd7d62fSAndreas Gohr/**
109bd7d62fSAndreas Gohr * Access to a single index file
119bd7d62fSAndreas Gohr *
129bd7d62fSAndreas Gohr * Access using this class always happens on a line-by-line basis. It is usually not read in full.
139bd7d62fSAndreas Gohr * All modifications are implicitly saved
149bd7d62fSAndreas Gohr * Should be used for large indexes that receive only few changes at once.
159bd7d62fSAndreas Gohr */
169bd7d62fSAndreas Gohrclass FileIndex extends AbstractIndex
179bd7d62fSAndreas Gohr{
189bd7d62fSAndreas Gohr    /** @var array RID cache for faster access */
19*9369b4a9SAndreas Gohr    protected array $ridCache = [];
209bd7d62fSAndreas Gohr
219bd7d62fSAndreas Gohr    /**
229bd7d62fSAndreas Gohr     * @inheritdoc
239bd7d62fSAndreas Gohr     * @throws IndexWriteException
247fcedc39SAndreas Gohr     * @throws IndexLockException
259bd7d62fSAndreas Gohr     * @author Tom N Harris <tnharris@whoopdedo.org>
269bd7d62fSAndreas Gohr     */
27*9369b4a9SAndreas Gohr    public function changeRow(int $rid, string $value): void
289bd7d62fSAndreas Gohr    {
299bd7d62fSAndreas Gohr        global $conf;
309bd7d62fSAndreas Gohr
317fcedc39SAndreas Gohr        if (!$this->isWritable) throw new IndexLockException();
327fcedc39SAndreas Gohr
33*9369b4a9SAndreas Gohr        if (!str_ends_with($value, "\n")) {
349bd7d62fSAndreas Gohr            $value .= "\n";
359bd7d62fSAndreas Gohr        }
369bd7d62fSAndreas Gohr
379bd7d62fSAndreas Gohr        $tempname = $this->filename . '.tmp';
389bd7d62fSAndreas Gohr        $fh = @fopen($tempname, 'w');
3903a35633SAndreas Gohr        if (!$fh) {
40*9369b4a9SAndreas Gohr            throw new IndexWriteException("Failed to write $tempname");
4103a35633SAndreas Gohr        }
429bd7d62fSAndreas Gohr        $ih = @fopen($this->filename, 'r');
439bd7d62fSAndreas Gohr
449bd7d62fSAndreas Gohr        $ln = -1; // line counter
459bd7d62fSAndreas Gohr        // copy previous index lines line-by-line, replacing the wanted line
469bd7d62fSAndreas Gohr        if ($ih) {
479bd7d62fSAndreas Gohr            while (($curline = fgets($ih)) !== false) {
489bd7d62fSAndreas Gohr                fwrite($fh, (++$ln == $rid) ? $value : $curline);
499bd7d62fSAndreas Gohr            }
509bd7d62fSAndreas Gohr            fclose($ih);
519bd7d62fSAndreas Gohr        }
529bd7d62fSAndreas Gohr        // if wanted line is beyond the current line count, insert empty lines inbetween
539bd7d62fSAndreas Gohr        if ($rid > $ln) {
549bd7d62fSAndreas Gohr            while ($rid > ++$ln) {
559bd7d62fSAndreas Gohr                fwrite($fh, "\n");
569bd7d62fSAndreas Gohr            }
579bd7d62fSAndreas Gohr            fwrite($fh, $value);
589bd7d62fSAndreas Gohr        }
599bd7d62fSAndreas Gohr        fclose($fh);
609bd7d62fSAndreas Gohr
619bd7d62fSAndreas Gohr        if ($conf['fperm']) {
629bd7d62fSAndreas Gohr            chmod($tempname, $conf['fperm']);
639bd7d62fSAndreas Gohr        }
649bd7d62fSAndreas Gohr        io_rename($tempname, $this->filename);
659bd7d62fSAndreas Gohr    }
669bd7d62fSAndreas Gohr
679bd7d62fSAndreas Gohr    /**
689bd7d62fSAndreas Gohr     * @inheritdoc
69db8be586SAndreas Gohr     *
70db8be586SAndreas Gohr     * When writable and the requested RID is beyond the end of the file,
71db8be586SAndreas Gohr     * the file is padded with empty lines up to that RID. This avoids
72db8be586SAndreas Gohr     * a more expensive line-by-line copy in a subsequent changeRow() call.
73db8be586SAndreas Gohr     *
747fcedc39SAndreas Gohr     * @throws IndexWriteException
759bd7d62fSAndreas Gohr     * @author Tom N Harris <tnharris@whoopdedo.org>
769bd7d62fSAndreas Gohr     */
77*9369b4a9SAndreas Gohr    public function retrieveRow(int $rid): string
789bd7d62fSAndreas Gohr    {
7903a35633SAndreas Gohr        if (!file_exists($this->filename)) {
8003a35633SAndreas Gohr            return '';
8103a35633SAndreas Gohr        }
829bd7d62fSAndreas Gohr        $fh = @fopen($this->filename, 'r');
8303a35633SAndreas Gohr        if (!$fh) {
8403a35633SAndreas Gohr            return '';
8503a35633SAndreas Gohr        }
869bd7d62fSAndreas Gohr        $ln = -1;
879bd7d62fSAndreas Gohr        while (($line = fgets($fh)) !== false) {
88dec26820SAndreas Gohr            if (++$ln == $rid) {
899bd7d62fSAndreas Gohr                fclose($fh);
90*9369b4a9SAndreas Gohr                return rtrim($line);
919bd7d62fSAndreas Gohr            }
92dec26820SAndreas Gohr        }
93dec26820SAndreas Gohr        fclose($fh);
94dec26820SAndreas Gohr
957fcedc39SAndreas Gohr        if (!$this->isWritable) return '';
967fcedc39SAndreas Gohr
97dec26820SAndreas Gohr        // still here? pad the index for the given ID
98dec26820SAndreas Gohr        // we do not simply call changeRow() here because appending is faster than line-by-line copying
99*9369b4a9SAndreas Gohr        if (!file_put_contents($this->filename, implode("\n", array_fill(0, $rid - $ln + 1, '')), FILE_APPEND)) {
100*9369b4a9SAndreas Gohr            throw new IndexWriteException("Failed to write $this->filename");
101dec26820SAndreas Gohr        }
102dec26820SAndreas Gohr
103dec26820SAndreas Gohr        return '';
104dec26820SAndreas Gohr    }
1059f63f003SAndreas Gohr
1069f63f003SAndreas Gohr    /** @inheritdoc */
107*9369b4a9SAndreas Gohr    public function retrieveRows(array $rids): array
1089f63f003SAndreas Gohr    {
1099f63f003SAndreas Gohr        $result = [];
1109f63f003SAndreas Gohr        sort($rids);
1119f63f003SAndreas Gohr        $next = array_shift($rids);
1129f63f003SAndreas Gohr
1139f63f003SAndreas Gohr        if (!file_exists($this->filename)) {
1149f63f003SAndreas Gohr            return $result;
1159f63f003SAndreas Gohr        }
1169f63f003SAndreas Gohr        $fh = @fopen($this->filename, 'r');
1179f63f003SAndreas Gohr        if (!$fh) {
1189f63f003SAndreas Gohr            return $result;
1199f63f003SAndreas Gohr        }
1209f63f003SAndreas Gohr        $ln = -1;
1219f63f003SAndreas Gohr        while (($line = fgets($fh)) !== false) {
1229f63f003SAndreas Gohr            if (++$ln === $next) {
123*9369b4a9SAndreas Gohr                $result[$ln] = rtrim($line);
1249f63f003SAndreas Gohr                $next = array_shift($rids);
1259f63f003SAndreas Gohr                if ($next === false) break;
1269f63f003SAndreas Gohr            }
1279f63f003SAndreas Gohr        }
1289f63f003SAndreas Gohr        fclose($fh);
1299f63f003SAndreas Gohr        return $result;
1309f63f003SAndreas Gohr    }
1319f63f003SAndreas Gohr
1329bd7d62fSAndreas Gohr
1339bd7d62fSAndreas Gohr    /**
134d6396b6dSAndreas Gohr     * @inheritdoc
1359bd7d62fSAndreas Gohr     * @throws IndexAccessException
1367fcedc39SAndreas Gohr     * @throws IndexWriteException
1379bd7d62fSAndreas Gohr     */
138*9369b4a9SAndreas Gohr    public function getRowIDs(array $values): array
1399bd7d62fSAndreas Gohr    {
140*9369b4a9SAndreas Gohr        $values = array_map(trim(...), $values);
1419bd7d62fSAndreas Gohr        $values = array_fill_keys($values, 1); // easier access as associative array
1429bd7d62fSAndreas Gohr
1439bd7d62fSAndreas Gohr        // search for the values
1449bd7d62fSAndreas Gohr        $result = [];
1459bd7d62fSAndreas Gohr        $ln = 0;
1469bd7d62fSAndreas Gohr        if (file_exists($this->filename)) {
1479bd7d62fSAndreas Gohr            $fh = @fopen($this->filename, 'r');
14803a35633SAndreas Gohr            if (!$fh) {
149*9369b4a9SAndreas Gohr                throw new IndexAccessException("Failed to read $this->filename");
15003a35633SAndreas Gohr            }
1519bd7d62fSAndreas Gohr            while (($line = fgets($fh)) !== false && $values) {
1529bd7d62fSAndreas Gohr                $line = trim($line);
1539bd7d62fSAndreas Gohr                if (isset($values[$line])) {
1549bd7d62fSAndreas Gohr                    $result[$line] = $ln;
1559bd7d62fSAndreas Gohr                    unset($values[$line]);
1569bd7d62fSAndreas Gohr                }
1579bd7d62fSAndreas Gohr                $ln++;
1589bd7d62fSAndreas Gohr            }
1599bd7d62fSAndreas Gohr            fclose($fh);
1609bd7d62fSAndreas Gohr        }
1619bd7d62fSAndreas Gohr
1627fcedc39SAndreas Gohr        if (!$this->isWritable) return $result;
1637fcedc39SAndreas Gohr
1649bd7d62fSAndreas Gohr        // if there are still values, they have not been found and will be appended
1659bd7d62fSAndreas Gohr        foreach (array_keys($values) as $value) {
1667fcedc39SAndreas Gohr            if (!file_put_contents($this->filename, "$value\n", FILE_APPEND)) {
167*9369b4a9SAndreas Gohr                throw new IndexWriteException("Failed to write $this->filename");
1687fcedc39SAndreas Gohr            }
1699bd7d62fSAndreas Gohr            $result[$value] = $ln++;
1709bd7d62fSAndreas Gohr        }
1719bd7d62fSAndreas Gohr
1729bd7d62fSAndreas Gohr        return $result;
1739bd7d62fSAndreas Gohr    }
1749bd7d62fSAndreas Gohr
17503a35633SAndreas Gohr    /** @inheritdoc */
176*9369b4a9SAndreas Gohr    public function search(string $re): array
17703a35633SAndreas Gohr    {
17803a35633SAndreas Gohr        $result = [];
17903a35633SAndreas Gohr        $ln = 0;
18003a35633SAndreas Gohr        if (file_exists($this->filename)) {
18103a35633SAndreas Gohr            $fh = @fopen($this->filename, 'r');
18203a35633SAndreas Gohr            if (!$fh) {
183*9369b4a9SAndreas Gohr                throw new IndexAccessException("Failed to read $this->filename");
18403a35633SAndreas Gohr            }
18503a35633SAndreas Gohr            while (($line = fgets($fh)) !== false) {
18603a35633SAndreas Gohr                $line = trim($line);
18703a35633SAndreas Gohr                if (preg_match($re, $line)) {
18803a35633SAndreas Gohr                    $result[$ln] = $line;
18903a35633SAndreas Gohr                }
19003a35633SAndreas Gohr                $ln++;
19103a35633SAndreas Gohr            }
19203a35633SAndreas Gohr            fclose($fh);
19303a35633SAndreas Gohr        }
19403a35633SAndreas Gohr        return $result;
19503a35633SAndreas Gohr    }
19603a35633SAndreas Gohr
1979bd7d62fSAndreas Gohr    /**
198596d5287SAndreas Gohr     * Cached mechanism to retrieve a single value
1999bd7d62fSAndreas Gohr     *
2009bd7d62fSAndreas Gohr     * @param string $value
2019bd7d62fSAndreas Gohr     * @return int the RID of the entry
202596d5287SAndreas Gohr     * @see getRowID()
2039bd7d62fSAndreas Gohr     */
204*9369b4a9SAndreas Gohr    public function accessCachedValue(string $value): int
2059bd7d62fSAndreas Gohr    {
206fb5311ecSAndreas Gohr        if (isset($this->ridCache[$value])) {
207fb5311ecSAndreas Gohr            return $this->ridCache[$value];
20803a35633SAndreas Gohr        }
2099bd7d62fSAndreas Gohr
2109bd7d62fSAndreas Gohr        // limit cache to 10 entries by discarding the oldest element
2119bd7d62fSAndreas Gohr        // as in DokuWiki usually only the most recently
2129bd7d62fSAndreas Gohr        // added item will be requested again
213fb5311ecSAndreas Gohr        if (count($this->ridCache) > 10) {
214fb5311ecSAndreas Gohr            array_shift($this->ridCache);
21503a35633SAndreas Gohr        }
216fb5311ecSAndreas Gohr        $this->ridCache[$value] = $this->getRowID($value);
217fb5311ecSAndreas Gohr        return $this->ridCache[$value];
2189bd7d62fSAndreas Gohr    }
21983b3acccSAndreas Gohr
22083b3acccSAndreas Gohr    /** @inheritdoc */
22121fbd01bSAndreas Gohr    public function count(): int
22221fbd01bSAndreas Gohr    {
22321fbd01bSAndreas Gohr        if (!file_exists($this->filename)) return 0;
22421fbd01bSAndreas Gohr        $fh = @fopen($this->filename, 'r');
22521fbd01bSAndreas Gohr        if (!$fh) return 0;
22621fbd01bSAndreas Gohr        $count = 0;
22721fbd01bSAndreas Gohr        while (fgets($fh) !== false) $count++;
22821fbd01bSAndreas Gohr        fclose($fh);
22921fbd01bSAndreas Gohr        return $count;
23021fbd01bSAndreas Gohr    }
23121fbd01bSAndreas Gohr
23221fbd01bSAndreas Gohr    /** @inheritdoc */
23383b3acccSAndreas Gohr    public function getIterator(): \Generator
23483b3acccSAndreas Gohr    {
23583b3acccSAndreas Gohr        if (!file_exists($this->filename)) return;
23683b3acccSAndreas Gohr        $fh = @fopen($this->filename, 'r');
23783b3acccSAndreas Gohr        if (!$fh) return;
23883b3acccSAndreas Gohr        $ln = 0;
23983b3acccSAndreas Gohr        while (($line = fgets($fh)) !== false) {
24083b3acccSAndreas Gohr            yield $ln++ => rtrim($line);
24183b3acccSAndreas Gohr        }
24283b3acccSAndreas Gohr        fclose($fh);
24383b3acccSAndreas Gohr    }
2449bd7d62fSAndreas Gohr}
245