xref: /dokuwiki/inc/Search/Index/AbstractIndex.php (revision c66b5ec65fd5aa2f1037d2be542b49297f3aac0e)
1ec5280efSAndreas Gohr<?php
2ec5280efSAndreas Gohr
3ec5280efSAndreas Gohrnamespace dokuwiki\Search\Index;
4ec5280efSAndreas Gohr
5*c66b5ec6SAndreas Gohruse dokuwiki\Search\Exception\IndexLockException;
6*c66b5ec6SAndreas Gohr
7ec5280efSAndreas Gohr/**
8*c66b5ec6SAndreas Gohr * Basic building block to access individual index files
9*c66b5ec6SAndreas Gohr *
10*c66b5ec6SAndreas Gohr * To be able to write to an index, a lock must be acquired.
11ec5280efSAndreas Gohr */
12ec5280efSAndreas Gohrabstract class AbstractIndex
13ec5280efSAndreas Gohr{
14ec5280efSAndreas Gohr    /** @var string name of the index */
15ec5280efSAndreas Gohr    protected $idx;
16ec5280efSAndreas Gohr
17*c66b5ec6SAndreas Gohr    /** @var string suffix of the index */
18ec5280efSAndreas Gohr    protected $suffix;
19ec5280efSAndreas Gohr
20ec5280efSAndreas Gohr    /** @var string full filename to the index */
21ec5280efSAndreas Gohr    protected $filename;
22ec5280efSAndreas Gohr
23*c66b5ec6SAndreas Gohr    /** @var bool has this instance acquired a lock? */
247fcedc39SAndreas Gohr    protected $isWritable = false;
257fcedc39SAndreas Gohr
26ec5280efSAndreas Gohr    /**
27ec5280efSAndreas Gohr     * Initialize the index
28ec5280efSAndreas Gohr     *
29ec5280efSAndreas Gohr     * The $suffix argument is for an index that is split into multiple parts.
30ec5280efSAndreas Gohr     * Different index files should use different base names.
31ec5280efSAndreas Gohr     *
32*c66b5ec6SAndreas Gohr     * When $isWritable is true, a lock is acquired immediately
33*c66b5ec6SAndreas Gohr     *
34ec5280efSAndreas Gohr     * @param string $idx name of the index
35ec5280efSAndreas Gohr     * @param string $suffix subpart identifier
36*c66b5ec6SAndreas Gohr     * @param bool $isWritable acquire a lock immediately?
37*c66b5ec6SAndreas Gohr     * @throws IndexLockException
38ec5280efSAndreas Gohr     */
397fcedc39SAndreas Gohr    public function __construct($idx, $suffix = '', $isWritable = false)
40ec5280efSAndreas Gohr    {
41ec5280efSAndreas Gohr        global $conf;
42ec5280efSAndreas Gohr        $this->filename = $conf['indexdir'] . '/' . $idx . $suffix . '.idx';
43ec5280efSAndreas Gohr        $this->idx = $idx;
44ec5280efSAndreas Gohr        $this->suffix = $suffix;
45*c66b5ec6SAndreas Gohr        if ($isWritable) $this->lock();
46*c66b5ec6SAndreas Gohr    }
47*c66b5ec6SAndreas Gohr
48*c66b5ec6SAndreas Gohr    /**
49*c66b5ec6SAndreas Gohr     * Make this index writable by acquiring the lock
50*c66b5ec6SAndreas Gohr     *
51*c66b5ec6SAndreas Gohr     * @throws IndexLockException
52*c66b5ec6SAndreas Gohr     */
53*c66b5ec6SAndreas Gohr    public function lock()
54*c66b5ec6SAndreas Gohr    {
55*c66b5ec6SAndreas Gohr        if ($this->isWritable) return;
56*c66b5ec6SAndreas Gohr        Lock::acquire($this->idx);
57*c66b5ec6SAndreas Gohr        $this->isWritable = true;
58*c66b5ec6SAndreas Gohr    }
59*c66b5ec6SAndreas Gohr
60*c66b5ec6SAndreas Gohr    /**
61*c66b5ec6SAndreas Gohr     * Make this index read-only by releasing the lock
62*c66b5ec6SAndreas Gohr     *
63*c66b5ec6SAndreas Gohr     * Decrements the reference count in the Lock registry. The filesystem
64*c66b5ec6SAndreas Gohr     * lock is only removed when the count reaches zero.
65*c66b5ec6SAndreas Gohr     */
66*c66b5ec6SAndreas Gohr    public function unlock()
67*c66b5ec6SAndreas Gohr    {
68*c66b5ec6SAndreas Gohr        if (!$this->isWritable) return;
69*c66b5ec6SAndreas Gohr        Lock::release($this->idx);
70*c66b5ec6SAndreas Gohr        $this->isWritable = false;
71*c66b5ec6SAndreas Gohr    }
72*c66b5ec6SAndreas Gohr
73*c66b5ec6SAndreas Gohr    /**
74*c66b5ec6SAndreas Gohr     * Whether this index instance is writable
75*c66b5ec6SAndreas Gohr     *
76*c66b5ec6SAndreas Gohr     * @return bool
77*c66b5ec6SAndreas Gohr     */
78*c66b5ec6SAndreas Gohr    public function isWritable()
79*c66b5ec6SAndreas Gohr    {
80*c66b5ec6SAndreas Gohr        return $this->isWritable;
81*c66b5ec6SAndreas Gohr    }
82*c66b5ec6SAndreas Gohr
83*c66b5ec6SAndreas Gohr    /**
84*c66b5ec6SAndreas Gohr     * Ensure lock is released when the index is destroyed
85*c66b5ec6SAndreas Gohr     */
86*c66b5ec6SAndreas Gohr    public function __destruct()
87*c66b5ec6SAndreas Gohr    {
88*c66b5ec6SAndreas Gohr        $this->unlock();
89ec5280efSAndreas Gohr    }
90ec5280efSAndreas Gohr
91ec5280efSAndreas Gohr    /**
92ec5280efSAndreas Gohr     * @return string the full path to the underlying file
93ec5280efSAndreas Gohr     */
94ec5280efSAndreas Gohr    public function getFilename()
95ec5280efSAndreas Gohr    {
96ec5280efSAndreas Gohr        return $this->filename;
97ec5280efSAndreas Gohr    }
98ec5280efSAndreas Gohr
99ec5280efSAndreas Gohr    /**
100596d5287SAndreas Gohr     * Does this index exist, yet?
101596d5287SAndreas Gohr     *
102596d5287SAndreas Gohr     * @return bool
103596d5287SAndreas Gohr     */
104596d5287SAndreas Gohr    public function exists()
105596d5287SAndreas Gohr    {
106596d5287SAndreas Gohr        return file_exists($this->getFilename());
107596d5287SAndreas Gohr    }
108596d5287SAndreas Gohr
109596d5287SAndreas Gohr    /**
110596d5287SAndreas Gohr     * Return the largest numeric suffix for the current index
111596d5287SAndreas Gohr     *
112596d5287SAndreas Gohr     * This is only useful for indexes that use integer based suffixes (like the wordlength indexes)
113596d5287SAndreas Gohr     *
114596d5287SAndreas Gohr     * @return int 0 if no numeric suffix indexes are found
115596d5287SAndreas Gohr     */
116596d5287SAndreas Gohr    public function max()
117596d5287SAndreas Gohr    {
118596d5287SAndreas Gohr        global $conf;
119596d5287SAndreas Gohr        $result = 0;
120596d5287SAndreas Gohr        $files = glob($conf['indexdir'] . '/' . $this->idx . '*.idx');
121596d5287SAndreas Gohr        foreach ($files as $file) {
122596d5287SAndreas Gohr            if (preg_match('/(\d)+\.idx$/', $file, $match)) {
123596d5287SAndreas Gohr                $num = (int)$match[1];
124596d5287SAndreas Gohr                if ($num > $result) $result = $num;
125596d5287SAndreas Gohr            }
126596d5287SAndreas Gohr        }
127596d5287SAndreas Gohr
128596d5287SAndreas Gohr        return $result;
129596d5287SAndreas Gohr    }
130596d5287SAndreas Gohr
131596d5287SAndreas Gohr    /**
132ec5280efSAndreas Gohr     * Change a line in the index
133ec5280efSAndreas Gohr     *
134ec5280efSAndreas Gohr     * If the line doesn't exist, it will be added, creating empty
135ec5280efSAndreas Gohr     * lines inbetween as necessary
136ec5280efSAndreas Gohr     *
137ec5280efSAndreas Gohr     * @param int $rid the line number, count starting at 0
138ec5280efSAndreas Gohr     * @param string $value line content to write
139ec5280efSAndreas Gohr     */
140ec5280efSAndreas Gohr    abstract public function changeRow($rid, $value);
141ec5280efSAndreas Gohr
142ec5280efSAndreas Gohr    /**
143ec5280efSAndreas Gohr     * Retrieve a line from the index
144ec5280efSAndreas Gohr     *
145ec5280efSAndreas Gohr     * Returns an empty string for non-existing lines
146ec5280efSAndreas Gohr     *
147ec5280efSAndreas Gohr     * @param int $rid the line number
148ec5280efSAndreas Gohr     * @return string a line with trailing whitespace removed
149ec5280efSAndreas Gohr     */
150ec5280efSAndreas Gohr    abstract public function retrieveRow($rid);
151ec5280efSAndreas Gohr
152ec5280efSAndreas Gohr    /**
1539f63f003SAndreas Gohr     * Retrieve multiple lines from the index
1549f63f003SAndreas Gohr     *
1559f63f003SAndreas Gohr     * Ignores non-existing lines, eg the result array may be smaller than the input $rids
1569f63f003SAndreas Gohr     *
1579f63f003SAndreas Gohr     * @param int[] $rids
1589f63f003SAndreas Gohr     * @return array [rid => value]
1599f63f003SAndreas Gohr     */
1609f63f003SAndreas Gohr    abstract public function retrieveRows($rids);
1619f63f003SAndreas Gohr
1629f63f003SAndreas Gohr    /**
1637fcedc39SAndreas Gohr     * Searches the Index for a given value
164d6396b6dSAndreas Gohr     *
1657fcedc39SAndreas Gohr     * If the index is writable and the value is not found it will be added. Otherwise null is returned.
1667fcedc39SAndreas Gohr     *
1679f63f003SAndreas Gohr     * Note the existence of an entry in the index does not say anything about the existence
168d6396b6dSAndreas Gohr     * of the real world object (eg. a page)
169d6396b6dSAndreas Gohr     *
170*c66b5ec6SAndreas Gohr     * You should preferably use accessCachedValue() instead.
171d6396b6dSAndreas Gohr     *
172d6396b6dSAndreas Gohr     * @param string $value
1737fcedc39SAndreas Gohr     *
1747fcedc39SAndreas Gohr     * @return int|null the RID of the entry, null if not found and not added
175d6396b6dSAndreas Gohr     */
1768ed35011SAndreas Gohr    public function getRowID($value)
177d6396b6dSAndreas Gohr    {
1788ed35011SAndreas Gohr        $result = $this->getRowIDs([$value]);
1797fcedc39SAndreas Gohr        return $result[$value] ?? null;
180d6396b6dSAndreas Gohr    }
181d6396b6dSAndreas Gohr
182d6396b6dSAndreas Gohr    /**
1837fcedc39SAndreas Gohr     * Searches the Index for all given values
1847fcedc39SAndreas Gohr     *
1857fcedc39SAndreas Gohr     * If the index is writable, not found values are added
186d6396b6dSAndreas Gohr     *
187d6396b6dSAndreas Gohr     * @param string[] $values
18803a35633SAndreas Gohr     * @return array the RIDs of the entries (value => rid)
189d6396b6dSAndreas Gohr     */
1908ed35011SAndreas Gohr    abstract public function getRowIDs($values);
191d6396b6dSAndreas Gohr
192d6396b6dSAndreas Gohr    /**
19303a35633SAndreas Gohr     * Find all RIDs matching a regular expression
19403a35633SAndreas Gohr     *
19503a35633SAndreas Gohr     * A full regular expression including delimiters and modifiers is expected
19603a35633SAndreas Gohr     *
19703a35633SAndreas Gohr     * @param string $re the regular expression to match against
19803a35633SAndreas Gohr     * @return array (rid => value)
19903a35633SAndreas Gohr     */
20003a35633SAndreas Gohr    abstract public function search($re);
20103a35633SAndreas Gohr
20203a35633SAndreas Gohr    /**
203ec5280efSAndreas Gohr     * Clears the index by deleting its file
204*c66b5ec6SAndreas Gohr     *
205ec5280efSAndreas Gohr     * @return void
206ec5280efSAndreas Gohr     */
207ec5280efSAndreas Gohr    public function clear()
208ec5280efSAndreas Gohr    {
209ec5280efSAndreas Gohr        @unlink($this->filename);
210ec5280efSAndreas Gohr    }
211ec5280efSAndreas Gohr
2127fcedc39SAndreas Gohr    /**
2137fcedc39SAndreas Gohr     * Saves the index if needed
2147fcedc39SAndreas Gohr     *
2157fcedc39SAndreas Gohr     * The default implementation does nothing and is only for streamlining the API of
2167fcedc39SAndreas Gohr     * the different index classes
217*c66b5ec6SAndreas Gohr     *
2187fcedc39SAndreas Gohr     * @return void
2197fcedc39SAndreas Gohr     */
2207fcedc39SAndreas Gohr    public function save()
2217fcedc39SAndreas Gohr    {
2227fcedc39SAndreas Gohr    }
223ec5280efSAndreas Gohr}
224