xref: /dokuwiki/inc/Search/Index/FileIndex.php (revision 03a35633c932eca1b7b0d373d08b9140884a0ebe)
19bd7d62fSAndreas Gohr<?php
29bd7d62fSAndreas Gohr
39bd7d62fSAndreas Gohrnamespace dokuwiki\Search\Index;
49bd7d62fSAndreas Gohr
59bd7d62fSAndreas Gohruse dokuwiki\Search\Exception\IndexAccessException;
69bd7d62fSAndreas Gohruse dokuwiki\Search\Exception\IndexWriteException;
79bd7d62fSAndreas Gohr
89bd7d62fSAndreas Gohr/**
99bd7d62fSAndreas Gohr * Access to a single index file
109bd7d62fSAndreas Gohr *
119bd7d62fSAndreas Gohr * Access using this class always happens on a line-by-line basis. It is usually not read in full.
129bd7d62fSAndreas Gohr * All modifications are implicitly saved
139bd7d62fSAndreas Gohr * Should be used for large indexes that receive only few changes at once.
149bd7d62fSAndreas Gohr */
159bd7d62fSAndreas Gohrclass FileIndex extends AbstractIndex
169bd7d62fSAndreas Gohr{
179bd7d62fSAndreas Gohr    /** @var array RID cache for faster access */
189bd7d62fSAndreas Gohr    protected static $ridCache = [];
199bd7d62fSAndreas Gohr
209bd7d62fSAndreas Gohr    /**
219bd7d62fSAndreas Gohr     * @inheritdoc
229bd7d62fSAndreas Gohr     * @throws IndexWriteException
239bd7d62fSAndreas Gohr     * @author Tom N Harris <tnharris@whoopdedo.org>
249bd7d62fSAndreas Gohr     */
259bd7d62fSAndreas Gohr    public function changeRow($rid, $value)
269bd7d62fSAndreas Gohr    {
279bd7d62fSAndreas Gohr        global $conf;
289bd7d62fSAndreas Gohr
299bd7d62fSAndreas Gohr        if (substr($value, -1) !== "\n") {
309bd7d62fSAndreas Gohr            $value .= "\n";
319bd7d62fSAndreas Gohr        }
329bd7d62fSAndreas Gohr
339bd7d62fSAndreas Gohr        $tempname = $this->filename . '.tmp';
349bd7d62fSAndreas Gohr        $fh = @fopen($tempname, 'w');
35*03a35633SAndreas Gohr        if (!$fh) {
36*03a35633SAndreas Gohr            throw new IndexWriteException("Failed to write {$tempname}");
37*03a35633SAndreas Gohr        }
389bd7d62fSAndreas Gohr        $ih = @fopen($this->filename, 'r');
399bd7d62fSAndreas Gohr
409bd7d62fSAndreas Gohr        $ln = -1; // line counter
419bd7d62fSAndreas Gohr        // copy previous index lines line-by-line, replacing the wanted line
429bd7d62fSAndreas Gohr        if ($ih) {
439bd7d62fSAndreas Gohr            while (($curline = fgets($ih)) !== false) {
449bd7d62fSAndreas Gohr                fwrite($fh, (++$ln == $rid) ? $value : $curline);
459bd7d62fSAndreas Gohr            }
469bd7d62fSAndreas Gohr            fclose($ih);
479bd7d62fSAndreas Gohr        }
489bd7d62fSAndreas Gohr        // if wanted line is beyond the current line count, insert empty lines inbetween
499bd7d62fSAndreas Gohr        if ($rid > $ln) {
509bd7d62fSAndreas Gohr            while ($rid > ++$ln) {
519bd7d62fSAndreas Gohr                fwrite($fh, "\n");
529bd7d62fSAndreas Gohr            }
539bd7d62fSAndreas Gohr            fwrite($fh, $value);
549bd7d62fSAndreas Gohr        }
559bd7d62fSAndreas Gohr        fclose($fh);
569bd7d62fSAndreas Gohr
579bd7d62fSAndreas Gohr        if ($conf['fperm']) {
589bd7d62fSAndreas Gohr            chmod($tempname, $conf['fperm']);
599bd7d62fSAndreas Gohr        }
609bd7d62fSAndreas Gohr        io_rename($tempname, $this->filename);
619bd7d62fSAndreas Gohr    }
629bd7d62fSAndreas Gohr
639bd7d62fSAndreas Gohr    /**
649bd7d62fSAndreas Gohr     * @inheritdoc
659bd7d62fSAndreas Gohr     * @author Tom N Harris <tnharris@whoopdedo.org>
669bd7d62fSAndreas Gohr     */
679bd7d62fSAndreas Gohr    public function retrieveRow($rid)
689bd7d62fSAndreas Gohr    {
69*03a35633SAndreas Gohr        if (!file_exists($this->filename)) {
70*03a35633SAndreas Gohr            return '';
71*03a35633SAndreas Gohr        }
729bd7d62fSAndreas Gohr        $fh = @fopen($this->filename, 'r');
73*03a35633SAndreas Gohr        if (!$fh) {
74*03a35633SAndreas Gohr            return '';
75*03a35633SAndreas Gohr        }
769bd7d62fSAndreas Gohr        $ln = -1;
779bd7d62fSAndreas Gohr        while (($line = fgets($fh)) !== false) {
78dec26820SAndreas Gohr            if (++$ln == $rid) {
799bd7d62fSAndreas Gohr                fclose($fh);
809bd7d62fSAndreas Gohr                return rtrim((string) $line);
819bd7d62fSAndreas Gohr            }
82dec26820SAndreas Gohr        }
83dec26820SAndreas Gohr        fclose($fh);
84dec26820SAndreas Gohr
85dec26820SAndreas Gohr        // still here? pad the index for the given ID
86dec26820SAndreas Gohr        // we do not simply call changeRow() here because appending is faster than line-by-line copying
87dec26820SAndreas Gohr        if (!file_put_contents($this->filename, join("\n", array_fill(0, $rid - $ln + 1, '')), FILE_APPEND)) {
88dec26820SAndreas Gohr            throw new IndexWriteException("Failed to write {$this->filename}");
89dec26820SAndreas Gohr        }
90dec26820SAndreas Gohr
91dec26820SAndreas Gohr        return '';
92dec26820SAndreas Gohr    }
939bd7d62fSAndreas Gohr
949bd7d62fSAndreas Gohr    /**
95d6396b6dSAndreas Gohr     * @inheritdoc
969bd7d62fSAndreas Gohr     * @throws IndexAccessException
979bd7d62fSAndreas Gohr     */
988ed35011SAndreas Gohr    public function getRowIDs($values)
999bd7d62fSAndreas Gohr    {
1009bd7d62fSAndreas Gohr        $values = array_map('trim', $values);
1019bd7d62fSAndreas Gohr        $values = array_fill_keys($values, 1); // easier access as associative array
1029bd7d62fSAndreas Gohr
1039bd7d62fSAndreas Gohr        // search for the values
1049bd7d62fSAndreas Gohr        $result = [];
1059bd7d62fSAndreas Gohr        $ln = 0;
1069bd7d62fSAndreas Gohr        if (file_exists($this->filename)) {
1079bd7d62fSAndreas Gohr            $fh = @fopen($this->filename, 'r');
108*03a35633SAndreas Gohr            if (!$fh) {
109*03a35633SAndreas Gohr                throw new IndexAccessException("Failed to read {$this->filename}");
110*03a35633SAndreas Gohr            }
1119bd7d62fSAndreas Gohr            while (($line = fgets($fh)) !== false && $values) {
1129bd7d62fSAndreas Gohr                $line = trim($line);
1139bd7d62fSAndreas Gohr                if (isset($values[$line])) {
1149bd7d62fSAndreas Gohr                    $result[$line] = $ln;
1159bd7d62fSAndreas Gohr                    unset($values[$line]);
1169bd7d62fSAndreas Gohr                }
1179bd7d62fSAndreas Gohr                $ln++;
1189bd7d62fSAndreas Gohr            }
1199bd7d62fSAndreas Gohr            fclose($fh);
1209bd7d62fSAndreas Gohr        }
1219bd7d62fSAndreas Gohr
1229bd7d62fSAndreas Gohr        // if there are still values, they have not been found and will be appended
1239bd7d62fSAndreas Gohr        foreach (array_keys($values) as $value) {
1249bd7d62fSAndreas Gohr            file_put_contents($this->filename, "$value\n", FILE_APPEND);
1259bd7d62fSAndreas Gohr            $result[$value] = $ln++;
1269bd7d62fSAndreas Gohr        }
1279bd7d62fSAndreas Gohr
1289bd7d62fSAndreas Gohr        return $result;
1299bd7d62fSAndreas Gohr    }
1309bd7d62fSAndreas Gohr
131*03a35633SAndreas Gohr    /** @inheritdoc */
132*03a35633SAndreas Gohr    public function search($re)
133*03a35633SAndreas Gohr    {
134*03a35633SAndreas Gohr        $result = [];
135*03a35633SAndreas Gohr        $ln = 0;
136*03a35633SAndreas Gohr        if (file_exists($this->filename)) {
137*03a35633SAndreas Gohr            $fh = @fopen($this->filename, 'r');
138*03a35633SAndreas Gohr            if (!$fh) {
139*03a35633SAndreas Gohr                throw new IndexAccessException("Failed to read {$this->filename}");
140*03a35633SAndreas Gohr            }
141*03a35633SAndreas Gohr            while (($line = fgets($fh)) !== false) {
142*03a35633SAndreas Gohr                $line = trim($line);
143*03a35633SAndreas Gohr                if (preg_match($re, $line)) {
144*03a35633SAndreas Gohr                    $result[$ln] = $line;
145*03a35633SAndreas Gohr                }
146*03a35633SAndreas Gohr                $ln++;
147*03a35633SAndreas Gohr            }
148*03a35633SAndreas Gohr            fclose($fh);
149*03a35633SAndreas Gohr        }
150*03a35633SAndreas Gohr        return $result;
151*03a35633SAndreas Gohr    }
152*03a35633SAndreas Gohr
1539bd7d62fSAndreas Gohr    /**
1549bd7d62fSAndreas Gohr     * Cached version of accessCachedValue()
1559bd7d62fSAndreas Gohr     *
1569bd7d62fSAndreas Gohr     * @param string $value
1579bd7d62fSAndreas Gohr     * @return int the RID of the entry
1589bd7d62fSAndreas Gohr     * @throws IndexAccessException
1599bd7d62fSAndreas Gohr     * @throws IndexWriteException
1609bd7d62fSAndreas Gohr     */
1619bd7d62fSAndreas Gohr    public function accessCachedValue($value)
1629bd7d62fSAndreas Gohr    {
163*03a35633SAndreas Gohr        if (isset(static::$ridCache['value'])) {
164*03a35633SAndreas Gohr            return static::$ridCache['value'];
165*03a35633SAndreas Gohr        }
1669bd7d62fSAndreas Gohr
1679bd7d62fSAndreas Gohr        // limit cache to 10 entries by discarding the oldest element
1689bd7d62fSAndreas Gohr        // as in DokuWiki usually only the most recently
1699bd7d62fSAndreas Gohr        // added item will be requested again
170*03a35633SAndreas Gohr        if (count(static::$ridCache) > 10) {
171*03a35633SAndreas Gohr            array_shift(static::$ridCache);
172*03a35633SAndreas Gohr        }
1738ed35011SAndreas Gohr        static::$ridCache[$value] = $this->getRowID($value);
1749bd7d62fSAndreas Gohr        return static::$ridCache[$value];
1759bd7d62fSAndreas Gohr    }
1769bd7d62fSAndreas Gohr}
177