xref: /dokuwiki/inc/Search/Index/AbstractIndex.php (revision 596d5287d7a816d606ef4153ef9e0f4704bf8f73)
1ec5280efSAndreas Gohr<?php
2ec5280efSAndreas Gohr
3ec5280efSAndreas Gohrnamespace dokuwiki\Search\Index;
4ec5280efSAndreas Gohr
5ec5280efSAndreas Gohr/**
6ec5280efSAndreas Gohr * Basic Building block to access individual index files
7ec5280efSAndreas Gohr */
8ec5280efSAndreas Gohrabstract class AbstractIndex
9ec5280efSAndreas Gohr{
10ec5280efSAndreas Gohr    /** @var string name of the index */
11ec5280efSAndreas Gohr    protected $idx;
12ec5280efSAndreas Gohr
13ec5280efSAndreas Gohr    /** @var string $suffix of the index */
14ec5280efSAndreas Gohr    protected $suffix;
15ec5280efSAndreas Gohr
16ec5280efSAndreas Gohr    /** @var string full filename to the index */
17ec5280efSAndreas Gohr    protected $filename;
18ec5280efSAndreas Gohr
197fcedc39SAndreas Gohr    /** @var bool is this index opened for writing? */
207fcedc39SAndreas Gohr    protected $isWritable = false;
217fcedc39SAndreas Gohr
22ec5280efSAndreas Gohr    /**
23ec5280efSAndreas Gohr     * Initialize the index
24ec5280efSAndreas Gohr     *
25ec5280efSAndreas Gohr     * The $suffix argument is for an index that is split into multiple parts.
26ec5280efSAndreas Gohr     * Different index files should use different base names.
27ec5280efSAndreas Gohr     *
28ec5280efSAndreas Gohr     * @param string $idx name of the index
29ec5280efSAndreas Gohr     * @param string $suffix subpart identifier
307fcedc39SAndreas Gohr     * @param bool $isWritable has a sufficient lock been acquired to write to this index?
31ec5280efSAndreas Gohr     */
327fcedc39SAndreas Gohr    public function __construct($idx, $suffix = '', $isWritable = false)
33ec5280efSAndreas Gohr    {
34ec5280efSAndreas Gohr        global $conf;
35ec5280efSAndreas Gohr        $this->filename = $conf['indexdir'] . '/' . $idx . $suffix . '.idx';
36ec5280efSAndreas Gohr        $this->idx = $idx;
37ec5280efSAndreas Gohr        $this->suffix = $suffix;
387fcedc39SAndreas Gohr        $this->isWritable = $isWritable;
39ec5280efSAndreas Gohr    }
40ec5280efSAndreas Gohr
41ec5280efSAndreas Gohr    /**
42ec5280efSAndreas Gohr     * @return string the full path to the underlying file
43ec5280efSAndreas Gohr     */
44ec5280efSAndreas Gohr    public function getFilename()
45ec5280efSAndreas Gohr    {
46ec5280efSAndreas Gohr        return $this->filename;
47ec5280efSAndreas Gohr    }
48ec5280efSAndreas Gohr
49ec5280efSAndreas Gohr    /**
50*596d5287SAndreas Gohr     * Does this index exist, yet?
51*596d5287SAndreas Gohr     *
52*596d5287SAndreas Gohr     * @return bool
53*596d5287SAndreas Gohr     */
54*596d5287SAndreas Gohr    public function exists()
55*596d5287SAndreas Gohr    {
56*596d5287SAndreas Gohr        return file_exists($this->getFilename());
57*596d5287SAndreas Gohr    }
58*596d5287SAndreas Gohr
59*596d5287SAndreas Gohr    /**
60*596d5287SAndreas Gohr     * Return the largest numeric suffix for the current index
61*596d5287SAndreas Gohr     *
62*596d5287SAndreas Gohr     * This is only useful for indexes that use integer based suffixes (like the wordlength indexes)
63*596d5287SAndreas Gohr     *
64*596d5287SAndreas Gohr     * @return int 0 if no numeric suffix indexes are found
65*596d5287SAndreas Gohr     */
66*596d5287SAndreas Gohr    public function max()
67*596d5287SAndreas Gohr    {
68*596d5287SAndreas Gohr        global $conf;
69*596d5287SAndreas Gohr        $result = 0;
70*596d5287SAndreas Gohr        $files = glob($conf['indexdir'] . '/' . $this->idx . '*.idx');
71*596d5287SAndreas Gohr        foreach ($files as $file) {
72*596d5287SAndreas Gohr            if (preg_match('/(\d)+\.idx$/', $file, $match)) {
73*596d5287SAndreas Gohr                $num = (int)$match[1];
74*596d5287SAndreas Gohr                if ($num > $result) $result = $num;
75*596d5287SAndreas Gohr            }
76*596d5287SAndreas Gohr        }
77*596d5287SAndreas Gohr
78*596d5287SAndreas Gohr        return $result;
79*596d5287SAndreas Gohr    }
80*596d5287SAndreas Gohr
81*596d5287SAndreas Gohr    /**
82ec5280efSAndreas Gohr     * Change a line in the index
83ec5280efSAndreas Gohr     *
84ec5280efSAndreas Gohr     * If the line doesn't exist, it will be added, creating empty
85ec5280efSAndreas Gohr     * lines inbetween as necessary
86ec5280efSAndreas Gohr     *
87ec5280efSAndreas Gohr     * @param int $rid the line number, count starting at 0
88ec5280efSAndreas Gohr     * @param string $value line content to write
89ec5280efSAndreas Gohr     */
90ec5280efSAndreas Gohr    abstract public function changeRow($rid, $value);
91ec5280efSAndreas Gohr
92ec5280efSAndreas Gohr    /**
93ec5280efSAndreas Gohr     * Retrieve a line from the index
94ec5280efSAndreas Gohr     *
95ec5280efSAndreas Gohr     * Returns an empty string for non-existing lines
96ec5280efSAndreas Gohr     *
97ec5280efSAndreas Gohr     * @param int $rid the line number
98ec5280efSAndreas Gohr     * @return string a line with trailing whitespace removed
99ec5280efSAndreas Gohr     */
100ec5280efSAndreas Gohr    abstract public function retrieveRow($rid);
101ec5280efSAndreas Gohr
102ec5280efSAndreas Gohr    /**
1039f63f003SAndreas Gohr     * Retrieve multiple lines from the index
1049f63f003SAndreas Gohr     *
1059f63f003SAndreas Gohr     * Ignores non-existing lines, eg the result array may be smaller than the input $rids
1069f63f003SAndreas Gohr     *
1079f63f003SAndreas Gohr     * @param int[] $rids
1089f63f003SAndreas Gohr     * @return array [rid => value]
1099f63f003SAndreas Gohr     */
1109f63f003SAndreas Gohr    abstract public function retrieveRows($rids);
1119f63f003SAndreas Gohr
1129f63f003SAndreas Gohr    /**
1137fcedc39SAndreas Gohr     * Searches the Index for a given value
114d6396b6dSAndreas Gohr     *
1157fcedc39SAndreas Gohr     * If the index is writable and the value is not found it will be added. Otherwise null is returned.
1167fcedc39SAndreas Gohr     *
1177fcedc39SAndreas Gohr     * Entries previously marked as deleted will be restored.  FIXME is that true?
118d6396b6dSAndreas Gohr     *
1199f63f003SAndreas Gohr     * Note the existence of an entry in the index does not say anything about the existence
120d6396b6dSAndreas Gohr     * of the real world object (eg. a page)
121d6396b6dSAndreas Gohr     *
122d6396b6dSAndreas Gohr     * You should preferable use accessCachedValue() instead.
123d6396b6dSAndreas Gohr     *
124d6396b6dSAndreas Gohr     * @param string $value
1257fcedc39SAndreas Gohr     *
1267fcedc39SAndreas Gohr     * @return int|null the RID of the entry, null if not found and not added
127d6396b6dSAndreas Gohr     */
1288ed35011SAndreas Gohr    public function getRowID($value)
129d6396b6dSAndreas Gohr    {
1308ed35011SAndreas Gohr        $result = $this->getRowIDs([$value]);
1317fcedc39SAndreas Gohr        return $result[$value] ?? null;
132d6396b6dSAndreas Gohr    }
133d6396b6dSAndreas Gohr
134d6396b6dSAndreas Gohr    /**
1357fcedc39SAndreas Gohr     * Searches the Index for all given values
1367fcedc39SAndreas Gohr     *
1377fcedc39SAndreas Gohr     * If the index is writable, not found values are added
138d6396b6dSAndreas Gohr     *
139d6396b6dSAndreas Gohr     * @param string[] $values
14003a35633SAndreas Gohr     * @return array the RIDs of the entries (value => rid)
141d6396b6dSAndreas Gohr     */
1428ed35011SAndreas Gohr    abstract public function getRowIDs($values);
143d6396b6dSAndreas Gohr
144d6396b6dSAndreas Gohr    /**
14503a35633SAndreas Gohr     * Find all RIDs matching a regular expression
14603a35633SAndreas Gohr     *
14703a35633SAndreas Gohr     * A full regular expression including delimiters and modifiers is expected
14803a35633SAndreas Gohr     *
14903a35633SAndreas Gohr     * @param string $re the regular expression to match against
15003a35633SAndreas Gohr     * @return array (rid => value)
15103a35633SAndreas Gohr     */
15203a35633SAndreas Gohr    abstract public function search($re);
15303a35633SAndreas Gohr
15403a35633SAndreas Gohr    /**
155ec5280efSAndreas Gohr     * Clears the index by deleting its file
156ec5280efSAndreas Gohr     * @return void
157ec5280efSAndreas Gohr     */
158ec5280efSAndreas Gohr    public function clear()
159ec5280efSAndreas Gohr    {
160ec5280efSAndreas Gohr        @unlink($this->filename);
161ec5280efSAndreas Gohr    }
162ec5280efSAndreas Gohr
1637fcedc39SAndreas Gohr    /**
1647fcedc39SAndreas Gohr     * Saves the index if needed
1657fcedc39SAndreas Gohr     *
1667fcedc39SAndreas Gohr     * The default implementation does nothing and is only for streamlining the API of
1677fcedc39SAndreas Gohr     * the different index classes
1687fcedc39SAndreas Gohr     * @return void
1697fcedc39SAndreas Gohr     */
1707fcedc39SAndreas Gohr    public function save()
1717fcedc39SAndreas Gohr    {
1727fcedc39SAndreas Gohr    }
173ec5280efSAndreas Gohr}
174