xref: /dokuwiki/inc/Search/Index/FileIndex.php (revision 9bd7d62f47cb0e2a7651fefd7106f6ac10625281)
1*9bd7d62fSAndreas Gohr<?php
2*9bd7d62fSAndreas Gohr
3*9bd7d62fSAndreas Gohrnamespace dokuwiki\Search\Index;
4*9bd7d62fSAndreas Gohr
5*9bd7d62fSAndreas Gohruse dokuwiki\Search\Exception\IndexAccessException;
6*9bd7d62fSAndreas Gohruse dokuwiki\Search\Exception\IndexWriteException;
7*9bd7d62fSAndreas Gohr
8*9bd7d62fSAndreas Gohr/**
9*9bd7d62fSAndreas Gohr * Access to a single index file
10*9bd7d62fSAndreas Gohr *
11*9bd7d62fSAndreas Gohr * Access using this class always happens on a line-by-line basis. It is usually not read in full.
12*9bd7d62fSAndreas Gohr * All modifications are implicitly saved
13*9bd7d62fSAndreas Gohr * Should be used for large indexes that receive only few changes at once.
14*9bd7d62fSAndreas Gohr */
15*9bd7d62fSAndreas Gohrclass FileIndex extends AbstractIndex
16*9bd7d62fSAndreas Gohr{
17*9bd7d62fSAndreas Gohr    /** @var array RID cache for faster access */
18*9bd7d62fSAndreas Gohr    protected static $ridCache = [];
19*9bd7d62fSAndreas Gohr
20*9bd7d62fSAndreas Gohr    /**
21*9bd7d62fSAndreas Gohr     * @inheritdoc
22*9bd7d62fSAndreas Gohr     * @throws IndexWriteException
23*9bd7d62fSAndreas Gohr     * @author Tom N Harris <tnharris@whoopdedo.org>
24*9bd7d62fSAndreas Gohr     */
25*9bd7d62fSAndreas Gohr    public function changeRow($rid, $value)
26*9bd7d62fSAndreas Gohr    {
27*9bd7d62fSAndreas Gohr        global $conf;
28*9bd7d62fSAndreas Gohr
29*9bd7d62fSAndreas Gohr        if (substr($value, -1) !== "\n") {
30*9bd7d62fSAndreas Gohr            $value .= "\n";
31*9bd7d62fSAndreas Gohr        }
32*9bd7d62fSAndreas Gohr
33*9bd7d62fSAndreas Gohr        $tempname = $this->filename . '.tmp';
34*9bd7d62fSAndreas Gohr        $fh = @fopen($tempname, 'w');
35*9bd7d62fSAndreas Gohr        if (!$fh) throw new IndexWriteException("Failed to write {$tempname}");
36*9bd7d62fSAndreas Gohr        $ih = @fopen($this->filename, 'r');
37*9bd7d62fSAndreas Gohr
38*9bd7d62fSAndreas Gohr        $ln = -1; // line counter
39*9bd7d62fSAndreas Gohr        // copy previous index lines line-by-line, replacing the wanted line
40*9bd7d62fSAndreas Gohr        if ($ih) {
41*9bd7d62fSAndreas Gohr            while (($curline = fgets($ih)) !== false) {
42*9bd7d62fSAndreas Gohr                fwrite($fh, (++$ln == $rid) ? $value : $curline);
43*9bd7d62fSAndreas Gohr            }
44*9bd7d62fSAndreas Gohr            fclose($ih);
45*9bd7d62fSAndreas Gohr        }
46*9bd7d62fSAndreas Gohr        // if wanted line is beyond the current line count, insert empty lines inbetween
47*9bd7d62fSAndreas Gohr        if ($rid > $ln) {
48*9bd7d62fSAndreas Gohr            while ($rid > ++$ln) {
49*9bd7d62fSAndreas Gohr                fwrite($fh, "\n");
50*9bd7d62fSAndreas Gohr            }
51*9bd7d62fSAndreas Gohr            fwrite($fh, $value);
52*9bd7d62fSAndreas Gohr        }
53*9bd7d62fSAndreas Gohr        fclose($fh);
54*9bd7d62fSAndreas Gohr
55*9bd7d62fSAndreas Gohr        if ($conf['fperm']) {
56*9bd7d62fSAndreas Gohr            chmod($tempname, $conf['fperm']);
57*9bd7d62fSAndreas Gohr        }
58*9bd7d62fSAndreas Gohr        io_rename($tempname, $this->filename);
59*9bd7d62fSAndreas Gohr    }
60*9bd7d62fSAndreas Gohr
61*9bd7d62fSAndreas Gohr    /**
62*9bd7d62fSAndreas Gohr     * @inheritdoc
63*9bd7d62fSAndreas Gohr     * @author Tom N Harris <tnharris@whoopdedo.org>
64*9bd7d62fSAndreas Gohr     */
65*9bd7d62fSAndreas Gohr    public function retrieveRow($rid)
66*9bd7d62fSAndreas Gohr    {
67*9bd7d62fSAndreas Gohr        if (!file_exists($this->filename)) return '';
68*9bd7d62fSAndreas Gohr        $fh = @fopen($this->filename, 'r');
69*9bd7d62fSAndreas Gohr        if (!$fh) return '';
70*9bd7d62fSAndreas Gohr        $ln = -1;
71*9bd7d62fSAndreas Gohr        while (($line = fgets($fh)) !== false) {
72*9bd7d62fSAndreas Gohr            if (++$ln == $rid) break;
73*9bd7d62fSAndreas Gohr        }
74*9bd7d62fSAndreas Gohr        fclose($fh);
75*9bd7d62fSAndreas Gohr        return rtrim((string)$line);
76*9bd7d62fSAndreas Gohr    }
77*9bd7d62fSAndreas Gohr
78*9bd7d62fSAndreas Gohr    /**
79*9bd7d62fSAndreas Gohr     * Searches the Index for a given value and adds it if not found
80*9bd7d62fSAndreas Gohr     *
81*9bd7d62fSAndreas Gohr     * Entries previously marked as deleted will be restored.
82*9bd7d62fSAndreas Gohr     *
83*9bd7d62fSAndreas Gohr     * Note the existance of an entry in the index does not say anything about the exististance
84*9bd7d62fSAndreas Gohr     * of the real world object (eg. a page)
85*9bd7d62fSAndreas Gohr     *
86*9bd7d62fSAndreas Gohr     * You should preferable use accessCachedValue() instead.
87*9bd7d62fSAndreas Gohr     *
88*9bd7d62fSAndreas Gohr     * @param string $value
89*9bd7d62fSAndreas Gohr     * @return int the RID of the entry
90*9bd7d62fSAndreas Gohr     * @throws IndexAccessException
91*9bd7d62fSAndreas Gohr     * @throws IndexWriteException
92*9bd7d62fSAndreas Gohr     */
93*9bd7d62fSAndreas Gohr    public function accessValue($value)
94*9bd7d62fSAndreas Gohr    {
95*9bd7d62fSAndreas Gohr        $result = $this->accessValues([$value]);
96*9bd7d62fSAndreas Gohr        return $result[$value];
97*9bd7d62fSAndreas Gohr    }
98*9bd7d62fSAndreas Gohr
99*9bd7d62fSAndreas Gohr    /**
100*9bd7d62fSAndreas Gohr     * Searches the Index for all given values and adds them if not found
101*9bd7d62fSAndreas Gohr     *
102*9bd7d62fSAndreas Gohr     * @param string[] $values
103*9bd7d62fSAndreas Gohr     * @return array the RIDs of the entries
104*9bd7d62fSAndreas Gohr     * @throws IndexAccessException
105*9bd7d62fSAndreas Gohr     */
106*9bd7d62fSAndreas Gohr    public function accessValues($values)
107*9bd7d62fSAndreas Gohr    {
108*9bd7d62fSAndreas Gohr        $values = array_map('trim', $values);
109*9bd7d62fSAndreas Gohr        $values = array_fill_keys($values, 1); // easier access as associative array
110*9bd7d62fSAndreas Gohr
111*9bd7d62fSAndreas Gohr        // search for the values
112*9bd7d62fSAndreas Gohr        $result = [];
113*9bd7d62fSAndreas Gohr        $ln = 0;
114*9bd7d62fSAndreas Gohr        if (file_exists($this->filename)) {
115*9bd7d62fSAndreas Gohr            $fh = @fopen($this->filename, 'r');
116*9bd7d62fSAndreas Gohr            if (!$fh) throw new IndexAccessException("Failed to read {$this->filename}");
117*9bd7d62fSAndreas Gohr            while (($line = fgets($fh)) !== false && $values) {
118*9bd7d62fSAndreas Gohr                $line = trim($line);
119*9bd7d62fSAndreas Gohr                if (isset($values[$line])) {
120*9bd7d62fSAndreas Gohr                    $result[$line] = $ln;
121*9bd7d62fSAndreas Gohr                    unset($values[$line]);
122*9bd7d62fSAndreas Gohr                }
123*9bd7d62fSAndreas Gohr                $ln++;
124*9bd7d62fSAndreas Gohr            }
125*9bd7d62fSAndreas Gohr            fclose($fh);
126*9bd7d62fSAndreas Gohr        }
127*9bd7d62fSAndreas Gohr
128*9bd7d62fSAndreas Gohr        // if there are still values, they have not been found and will be appended
129*9bd7d62fSAndreas Gohr        foreach (array_keys($values) as $value) {
130*9bd7d62fSAndreas Gohr            file_put_contents($this->filename, "$value\n", FILE_APPEND);
131*9bd7d62fSAndreas Gohr            $result[$value] = $ln++;
132*9bd7d62fSAndreas Gohr        }
133*9bd7d62fSAndreas Gohr
134*9bd7d62fSAndreas Gohr        return $result;
135*9bd7d62fSAndreas Gohr    }
136*9bd7d62fSAndreas Gohr
137*9bd7d62fSAndreas Gohr    /**
138*9bd7d62fSAndreas Gohr     * Cached version of accessCachedValue()
139*9bd7d62fSAndreas Gohr     *
140*9bd7d62fSAndreas Gohr     * @param string $value
141*9bd7d62fSAndreas Gohr     * @return int the RID of the entry
142*9bd7d62fSAndreas Gohr     * @throws IndexAccessException
143*9bd7d62fSAndreas Gohr     * @throws IndexWriteException
144*9bd7d62fSAndreas Gohr     */
145*9bd7d62fSAndreas Gohr    public function accessCachedValue($value)
146*9bd7d62fSAndreas Gohr    {
147*9bd7d62fSAndreas Gohr        if (isset(static::$ridCache['value'])) return static::$ridCache['value'];
148*9bd7d62fSAndreas Gohr
149*9bd7d62fSAndreas Gohr        // limit cache to 10 entries by discarding the oldest element
150*9bd7d62fSAndreas Gohr        // as in DokuWiki usually only the most recently
151*9bd7d62fSAndreas Gohr        // added item will be requested again
152*9bd7d62fSAndreas Gohr        if (count(static::$ridCache) > 10) array_shift(static::$ridCache);
153*9bd7d62fSAndreas Gohr        static::$ridCache[$value] = $this->accessValue($value);
154*9bd7d62fSAndreas Gohr        return static::$ridCache[$value];
155*9bd7d62fSAndreas Gohr    }
156*9bd7d62fSAndreas Gohr}
157