1ec5280efSAndreas Gohr<?php 2ec5280efSAndreas Gohr 3ec5280efSAndreas Gohrnamespace dokuwiki\Search\Index; 4ec5280efSAndreas Gohr 5c66b5ec6SAndreas Gohruse dokuwiki\Search\Exception\IndexLockException; 6c66b5ec6SAndreas Gohr 7ec5280efSAndreas Gohr/** 8c66b5ec6SAndreas Gohr * Basic building block to access individual index files 9c66b5ec6SAndreas Gohr * 10c66b5ec6SAndreas Gohr * To be able to write to an index, a lock must be acquired. 11*83b3acccSAndreas Gohr * 12*83b3acccSAndreas Gohr * Indexes are iterable, yielding RID => value pairs. 13ec5280efSAndreas Gohr */ 14*83b3acccSAndreas Gohrabstract class AbstractIndex implements \IteratorAggregate 15ec5280efSAndreas Gohr{ 16ec5280efSAndreas Gohr /** @var string name of the index */ 17ec5280efSAndreas Gohr protected $idx; 18ec5280efSAndreas Gohr 19c66b5ec6SAndreas Gohr /** @var string suffix of the index */ 20ec5280efSAndreas Gohr protected $suffix; 21ec5280efSAndreas Gohr 22ec5280efSAndreas Gohr /** @var string full filename to the index */ 23ec5280efSAndreas Gohr protected $filename; 24ec5280efSAndreas Gohr 25c66b5ec6SAndreas Gohr /** @var bool has this instance acquired a lock? */ 267fcedc39SAndreas Gohr protected $isWritable = false; 277fcedc39SAndreas Gohr 28ec5280efSAndreas Gohr /** 29ec5280efSAndreas Gohr * Initialize the index 30ec5280efSAndreas Gohr * 31ec5280efSAndreas Gohr * The $suffix argument is for an index that is split into multiple parts. 32ec5280efSAndreas Gohr * Different index files should use different base names. 33ec5280efSAndreas Gohr * 34c66b5ec6SAndreas Gohr * When $isWritable is true, a lock is acquired immediately 35c66b5ec6SAndreas Gohr * 36ec5280efSAndreas Gohr * @param string $idx name of the index 37ec5280efSAndreas Gohr * @param string $suffix subpart identifier 38c66b5ec6SAndreas Gohr * @param bool $isWritable acquire a lock immediately? 39c66b5ec6SAndreas Gohr * @throws IndexLockException 40ec5280efSAndreas Gohr */ 417fcedc39SAndreas Gohr public function __construct($idx, $suffix = '', $isWritable = false) 42ec5280efSAndreas Gohr { 43ec5280efSAndreas Gohr global $conf; 44ec5280efSAndreas Gohr $this->filename = $conf['indexdir'] . '/' . $idx . $suffix . '.idx'; 45ec5280efSAndreas Gohr $this->idx = $idx; 46ec5280efSAndreas Gohr $this->suffix = $suffix; 47c66b5ec6SAndreas Gohr if ($isWritable) $this->lock(); 48c66b5ec6SAndreas Gohr } 49c66b5ec6SAndreas Gohr 50c66b5ec6SAndreas Gohr /** 51c66b5ec6SAndreas Gohr * Make this index writable by acquiring the lock 52c66b5ec6SAndreas Gohr * 53c66b5ec6SAndreas Gohr * @throws IndexLockException 54c66b5ec6SAndreas Gohr */ 55c66b5ec6SAndreas Gohr public function lock() 56c66b5ec6SAndreas Gohr { 57c66b5ec6SAndreas Gohr if ($this->isWritable) return; 58c66b5ec6SAndreas Gohr Lock::acquire($this->idx); 59c66b5ec6SAndreas Gohr $this->isWritable = true; 60c66b5ec6SAndreas Gohr } 61c66b5ec6SAndreas Gohr 62c66b5ec6SAndreas Gohr /** 63c66b5ec6SAndreas Gohr * Make this index read-only by releasing the lock 64c66b5ec6SAndreas Gohr * 65c66b5ec6SAndreas Gohr * Decrements the reference count in the Lock registry. The filesystem 66c66b5ec6SAndreas Gohr * lock is only removed when the count reaches zero. 67c66b5ec6SAndreas Gohr */ 68c66b5ec6SAndreas Gohr public function unlock() 69c66b5ec6SAndreas Gohr { 70c66b5ec6SAndreas Gohr if (!$this->isWritable) return; 71c66b5ec6SAndreas Gohr Lock::release($this->idx); 72c66b5ec6SAndreas Gohr $this->isWritable = false; 73c66b5ec6SAndreas Gohr } 74c66b5ec6SAndreas Gohr 75c66b5ec6SAndreas Gohr /** 76c66b5ec6SAndreas Gohr * Whether this index instance is writable 77c66b5ec6SAndreas Gohr * 78c66b5ec6SAndreas Gohr * @return bool 79c66b5ec6SAndreas Gohr */ 80c66b5ec6SAndreas Gohr public function isWritable() 81c66b5ec6SAndreas Gohr { 82c66b5ec6SAndreas Gohr return $this->isWritable; 83c66b5ec6SAndreas Gohr } 84c66b5ec6SAndreas Gohr 85c66b5ec6SAndreas Gohr /** 86c66b5ec6SAndreas Gohr * Ensure lock is released when the index is destroyed 87c66b5ec6SAndreas Gohr */ 88c66b5ec6SAndreas Gohr public function __destruct() 89c66b5ec6SAndreas Gohr { 90c66b5ec6SAndreas Gohr $this->unlock(); 91ec5280efSAndreas Gohr } 92ec5280efSAndreas Gohr 93ec5280efSAndreas Gohr /** 94ec5280efSAndreas Gohr * @return string the full path to the underlying file 95ec5280efSAndreas Gohr */ 96ec5280efSAndreas Gohr public function getFilename() 97ec5280efSAndreas Gohr { 98ec5280efSAndreas Gohr return $this->filename; 99ec5280efSAndreas Gohr } 100ec5280efSAndreas Gohr 101ec5280efSAndreas Gohr /** 102596d5287SAndreas Gohr * Does this index exist, yet? 103596d5287SAndreas Gohr * 104596d5287SAndreas Gohr * @return bool 105596d5287SAndreas Gohr */ 106596d5287SAndreas Gohr public function exists() 107596d5287SAndreas Gohr { 108596d5287SAndreas Gohr return file_exists($this->getFilename()); 109596d5287SAndreas Gohr } 110596d5287SAndreas Gohr 111596d5287SAndreas Gohr /** 112596d5287SAndreas Gohr * Return the largest numeric suffix for the current index 113596d5287SAndreas Gohr * 114596d5287SAndreas Gohr * This is only useful for indexes that use integer based suffixes (like the wordlength indexes) 115596d5287SAndreas Gohr * 116596d5287SAndreas Gohr * @return int 0 if no numeric suffix indexes are found 117596d5287SAndreas Gohr */ 118596d5287SAndreas Gohr public function max() 119596d5287SAndreas Gohr { 120596d5287SAndreas Gohr global $conf; 121596d5287SAndreas Gohr $result = 0; 122596d5287SAndreas Gohr $files = glob($conf['indexdir'] . '/' . $this->idx . '*.idx'); 123596d5287SAndreas Gohr foreach ($files as $file) { 124596d5287SAndreas Gohr if (preg_match('/(\d)+\.idx$/', $file, $match)) { 125596d5287SAndreas Gohr $num = (int)$match[1]; 126596d5287SAndreas Gohr if ($num > $result) $result = $num; 127596d5287SAndreas Gohr } 128596d5287SAndreas Gohr } 129596d5287SAndreas Gohr 130596d5287SAndreas Gohr return $result; 131596d5287SAndreas Gohr } 132596d5287SAndreas Gohr 133596d5287SAndreas Gohr /** 134ec5280efSAndreas Gohr * Change a line in the index 135ec5280efSAndreas Gohr * 136ec5280efSAndreas Gohr * If the line doesn't exist, it will be added, creating empty 137ec5280efSAndreas Gohr * lines inbetween as necessary 138ec5280efSAndreas Gohr * 139ec5280efSAndreas Gohr * @param int $rid the line number, count starting at 0 140ec5280efSAndreas Gohr * @param string $value line content to write 141ec5280efSAndreas Gohr */ 142ec5280efSAndreas Gohr abstract public function changeRow($rid, $value); 143ec5280efSAndreas Gohr 144ec5280efSAndreas Gohr /** 145ec5280efSAndreas Gohr * Retrieve a line from the index 146ec5280efSAndreas Gohr * 147ec5280efSAndreas Gohr * Returns an empty string for non-existing lines 148ec5280efSAndreas Gohr * 149ec5280efSAndreas Gohr * @param int $rid the line number 150ec5280efSAndreas Gohr * @return string a line with trailing whitespace removed 151ec5280efSAndreas Gohr */ 152ec5280efSAndreas Gohr abstract public function retrieveRow($rid); 153ec5280efSAndreas Gohr 154ec5280efSAndreas Gohr /** 1559f63f003SAndreas Gohr * Retrieve multiple lines from the index 1569f63f003SAndreas Gohr * 1579f63f003SAndreas Gohr * Ignores non-existing lines, eg the result array may be smaller than the input $rids 1589f63f003SAndreas Gohr * 1599f63f003SAndreas Gohr * @param int[] $rids 1609f63f003SAndreas Gohr * @return array [rid => value] 1619f63f003SAndreas Gohr */ 1629f63f003SAndreas Gohr abstract public function retrieveRows($rids); 1639f63f003SAndreas Gohr 1649f63f003SAndreas Gohr /** 1657fcedc39SAndreas Gohr * Searches the Index for a given value 166d6396b6dSAndreas Gohr * 1677fcedc39SAndreas Gohr * If the index is writable and the value is not found it will be added. Otherwise null is returned. 1687fcedc39SAndreas Gohr * 1699f63f003SAndreas Gohr * Note the existence of an entry in the index does not say anything about the existence 170d6396b6dSAndreas Gohr * of the real world object (eg. a page) 171d6396b6dSAndreas Gohr * 172c66b5ec6SAndreas Gohr * You should preferably use accessCachedValue() instead. 173d6396b6dSAndreas Gohr * 174d6396b6dSAndreas Gohr * @param string $value 1757fcedc39SAndreas Gohr * 1767fcedc39SAndreas Gohr * @return int|null the RID of the entry, null if not found and not added 177d6396b6dSAndreas Gohr */ 1788ed35011SAndreas Gohr public function getRowID($value) 179d6396b6dSAndreas Gohr { 1808ed35011SAndreas Gohr $result = $this->getRowIDs([$value]); 1817fcedc39SAndreas Gohr return $result[$value] ?? null; 182d6396b6dSAndreas Gohr } 183d6396b6dSAndreas Gohr 184d6396b6dSAndreas Gohr /** 1857fcedc39SAndreas Gohr * Searches the Index for all given values 1867fcedc39SAndreas Gohr * 1877fcedc39SAndreas Gohr * If the index is writable, not found values are added 188d6396b6dSAndreas Gohr * 189d6396b6dSAndreas Gohr * @param string[] $values 19003a35633SAndreas Gohr * @return array the RIDs of the entries (value => rid) 191d6396b6dSAndreas Gohr */ 1928ed35011SAndreas Gohr abstract public function getRowIDs($values); 193d6396b6dSAndreas Gohr 194d6396b6dSAndreas Gohr /** 19503a35633SAndreas Gohr * Find all RIDs matching a regular expression 19603a35633SAndreas Gohr * 19703a35633SAndreas Gohr * A full regular expression including delimiters and modifiers is expected 19803a35633SAndreas Gohr * 19903a35633SAndreas Gohr * @param string $re the regular expression to match against 20003a35633SAndreas Gohr * @return array (rid => value) 20103a35633SAndreas Gohr */ 20203a35633SAndreas Gohr abstract public function search($re); 20303a35633SAndreas Gohr 20403a35633SAndreas Gohr /** 205ec5280efSAndreas Gohr * Clears the index by deleting its file 206c66b5ec6SAndreas Gohr * 207ec5280efSAndreas Gohr * @return void 208ec5280efSAndreas Gohr */ 209ec5280efSAndreas Gohr public function clear() 210ec5280efSAndreas Gohr { 211ec5280efSAndreas Gohr @unlink($this->filename); 212ec5280efSAndreas Gohr } 213ec5280efSAndreas Gohr 2147fcedc39SAndreas Gohr /** 2157fcedc39SAndreas Gohr * Saves the index if needed 2167fcedc39SAndreas Gohr * 2177fcedc39SAndreas Gohr * The default implementation does nothing and is only for streamlining the API of 2187fcedc39SAndreas Gohr * the different index classes 219c66b5ec6SAndreas Gohr * 2207fcedc39SAndreas Gohr * @return void 2217fcedc39SAndreas Gohr */ 2227fcedc39SAndreas Gohr public function save() 2237fcedc39SAndreas Gohr { 2247fcedc39SAndreas Gohr } 225ec5280efSAndreas Gohr} 226