1ec5280efSAndreas Gohr<?php 2ec5280efSAndreas Gohr 3ec5280efSAndreas Gohrnamespace dokuwiki\Search\Index; 4ec5280efSAndreas Gohr 5*c66b5ec6SAndreas Gohruse dokuwiki\Search\Exception\IndexLockException; 6*c66b5ec6SAndreas Gohr 7ec5280efSAndreas Gohr/** 8*c66b5ec6SAndreas Gohr * Basic building block to access individual index files 9*c66b5ec6SAndreas Gohr * 10*c66b5ec6SAndreas Gohr * To be able to write to an index, a lock must be acquired. 11ec5280efSAndreas Gohr */ 12ec5280efSAndreas Gohrabstract class AbstractIndex 13ec5280efSAndreas Gohr{ 14ec5280efSAndreas Gohr /** @var string name of the index */ 15ec5280efSAndreas Gohr protected $idx; 16ec5280efSAndreas Gohr 17*c66b5ec6SAndreas Gohr /** @var string suffix of the index */ 18ec5280efSAndreas Gohr protected $suffix; 19ec5280efSAndreas Gohr 20ec5280efSAndreas Gohr /** @var string full filename to the index */ 21ec5280efSAndreas Gohr protected $filename; 22ec5280efSAndreas Gohr 23*c66b5ec6SAndreas Gohr /** @var bool has this instance acquired a lock? */ 247fcedc39SAndreas Gohr protected $isWritable = false; 257fcedc39SAndreas Gohr 26ec5280efSAndreas Gohr /** 27ec5280efSAndreas Gohr * Initialize the index 28ec5280efSAndreas Gohr * 29ec5280efSAndreas Gohr * The $suffix argument is for an index that is split into multiple parts. 30ec5280efSAndreas Gohr * Different index files should use different base names. 31ec5280efSAndreas Gohr * 32*c66b5ec6SAndreas Gohr * When $isWritable is true, a lock is acquired immediately 33*c66b5ec6SAndreas Gohr * 34ec5280efSAndreas Gohr * @param string $idx name of the index 35ec5280efSAndreas Gohr * @param string $suffix subpart identifier 36*c66b5ec6SAndreas Gohr * @param bool $isWritable acquire a lock immediately? 37*c66b5ec6SAndreas Gohr * @throws IndexLockException 38ec5280efSAndreas Gohr */ 397fcedc39SAndreas Gohr public function __construct($idx, $suffix = '', $isWritable = false) 40ec5280efSAndreas Gohr { 41ec5280efSAndreas Gohr global $conf; 42ec5280efSAndreas Gohr $this->filename = $conf['indexdir'] . '/' . $idx . $suffix . '.idx'; 43ec5280efSAndreas Gohr $this->idx = $idx; 44ec5280efSAndreas Gohr $this->suffix = $suffix; 45*c66b5ec6SAndreas Gohr if ($isWritable) $this->lock(); 46*c66b5ec6SAndreas Gohr } 47*c66b5ec6SAndreas Gohr 48*c66b5ec6SAndreas Gohr /** 49*c66b5ec6SAndreas Gohr * Make this index writable by acquiring the lock 50*c66b5ec6SAndreas Gohr * 51*c66b5ec6SAndreas Gohr * @throws IndexLockException 52*c66b5ec6SAndreas Gohr */ 53*c66b5ec6SAndreas Gohr public function lock() 54*c66b5ec6SAndreas Gohr { 55*c66b5ec6SAndreas Gohr if ($this->isWritable) return; 56*c66b5ec6SAndreas Gohr Lock::acquire($this->idx); 57*c66b5ec6SAndreas Gohr $this->isWritable = true; 58*c66b5ec6SAndreas Gohr } 59*c66b5ec6SAndreas Gohr 60*c66b5ec6SAndreas Gohr /** 61*c66b5ec6SAndreas Gohr * Make this index read-only by releasing the lock 62*c66b5ec6SAndreas Gohr * 63*c66b5ec6SAndreas Gohr * Decrements the reference count in the Lock registry. The filesystem 64*c66b5ec6SAndreas Gohr * lock is only removed when the count reaches zero. 65*c66b5ec6SAndreas Gohr */ 66*c66b5ec6SAndreas Gohr public function unlock() 67*c66b5ec6SAndreas Gohr { 68*c66b5ec6SAndreas Gohr if (!$this->isWritable) return; 69*c66b5ec6SAndreas Gohr Lock::release($this->idx); 70*c66b5ec6SAndreas Gohr $this->isWritable = false; 71*c66b5ec6SAndreas Gohr } 72*c66b5ec6SAndreas Gohr 73*c66b5ec6SAndreas Gohr /** 74*c66b5ec6SAndreas Gohr * Whether this index instance is writable 75*c66b5ec6SAndreas Gohr * 76*c66b5ec6SAndreas Gohr * @return bool 77*c66b5ec6SAndreas Gohr */ 78*c66b5ec6SAndreas Gohr public function isWritable() 79*c66b5ec6SAndreas Gohr { 80*c66b5ec6SAndreas Gohr return $this->isWritable; 81*c66b5ec6SAndreas Gohr } 82*c66b5ec6SAndreas Gohr 83*c66b5ec6SAndreas Gohr /** 84*c66b5ec6SAndreas Gohr * Ensure lock is released when the index is destroyed 85*c66b5ec6SAndreas Gohr */ 86*c66b5ec6SAndreas Gohr public function __destruct() 87*c66b5ec6SAndreas Gohr { 88*c66b5ec6SAndreas Gohr $this->unlock(); 89ec5280efSAndreas Gohr } 90ec5280efSAndreas Gohr 91ec5280efSAndreas Gohr /** 92ec5280efSAndreas Gohr * @return string the full path to the underlying file 93ec5280efSAndreas Gohr */ 94ec5280efSAndreas Gohr public function getFilename() 95ec5280efSAndreas Gohr { 96ec5280efSAndreas Gohr return $this->filename; 97ec5280efSAndreas Gohr } 98ec5280efSAndreas Gohr 99ec5280efSAndreas Gohr /** 100596d5287SAndreas Gohr * Does this index exist, yet? 101596d5287SAndreas Gohr * 102596d5287SAndreas Gohr * @return bool 103596d5287SAndreas Gohr */ 104596d5287SAndreas Gohr public function exists() 105596d5287SAndreas Gohr { 106596d5287SAndreas Gohr return file_exists($this->getFilename()); 107596d5287SAndreas Gohr } 108596d5287SAndreas Gohr 109596d5287SAndreas Gohr /** 110596d5287SAndreas Gohr * Return the largest numeric suffix for the current index 111596d5287SAndreas Gohr * 112596d5287SAndreas Gohr * This is only useful for indexes that use integer based suffixes (like the wordlength indexes) 113596d5287SAndreas Gohr * 114596d5287SAndreas Gohr * @return int 0 if no numeric suffix indexes are found 115596d5287SAndreas Gohr */ 116596d5287SAndreas Gohr public function max() 117596d5287SAndreas Gohr { 118596d5287SAndreas Gohr global $conf; 119596d5287SAndreas Gohr $result = 0; 120596d5287SAndreas Gohr $files = glob($conf['indexdir'] . '/' . $this->idx . '*.idx'); 121596d5287SAndreas Gohr foreach ($files as $file) { 122596d5287SAndreas Gohr if (preg_match('/(\d)+\.idx$/', $file, $match)) { 123596d5287SAndreas Gohr $num = (int)$match[1]; 124596d5287SAndreas Gohr if ($num > $result) $result = $num; 125596d5287SAndreas Gohr } 126596d5287SAndreas Gohr } 127596d5287SAndreas Gohr 128596d5287SAndreas Gohr return $result; 129596d5287SAndreas Gohr } 130596d5287SAndreas Gohr 131596d5287SAndreas Gohr /** 132ec5280efSAndreas Gohr * Change a line in the index 133ec5280efSAndreas Gohr * 134ec5280efSAndreas Gohr * If the line doesn't exist, it will be added, creating empty 135ec5280efSAndreas Gohr * lines inbetween as necessary 136ec5280efSAndreas Gohr * 137ec5280efSAndreas Gohr * @param int $rid the line number, count starting at 0 138ec5280efSAndreas Gohr * @param string $value line content to write 139ec5280efSAndreas Gohr */ 140ec5280efSAndreas Gohr abstract public function changeRow($rid, $value); 141ec5280efSAndreas Gohr 142ec5280efSAndreas Gohr /** 143ec5280efSAndreas Gohr * Retrieve a line from the index 144ec5280efSAndreas Gohr * 145ec5280efSAndreas Gohr * Returns an empty string for non-existing lines 146ec5280efSAndreas Gohr * 147ec5280efSAndreas Gohr * @param int $rid the line number 148ec5280efSAndreas Gohr * @return string a line with trailing whitespace removed 149ec5280efSAndreas Gohr */ 150ec5280efSAndreas Gohr abstract public function retrieveRow($rid); 151ec5280efSAndreas Gohr 152ec5280efSAndreas Gohr /** 1539f63f003SAndreas Gohr * Retrieve multiple lines from the index 1549f63f003SAndreas Gohr * 1559f63f003SAndreas Gohr * Ignores non-existing lines, eg the result array may be smaller than the input $rids 1569f63f003SAndreas Gohr * 1579f63f003SAndreas Gohr * @param int[] $rids 1589f63f003SAndreas Gohr * @return array [rid => value] 1599f63f003SAndreas Gohr */ 1609f63f003SAndreas Gohr abstract public function retrieveRows($rids); 1619f63f003SAndreas Gohr 1629f63f003SAndreas Gohr /** 1637fcedc39SAndreas Gohr * Searches the Index for a given value 164d6396b6dSAndreas Gohr * 1657fcedc39SAndreas Gohr * If the index is writable and the value is not found it will be added. Otherwise null is returned. 1667fcedc39SAndreas Gohr * 1679f63f003SAndreas Gohr * Note the existence of an entry in the index does not say anything about the existence 168d6396b6dSAndreas Gohr * of the real world object (eg. a page) 169d6396b6dSAndreas Gohr * 170*c66b5ec6SAndreas Gohr * You should preferably use accessCachedValue() instead. 171d6396b6dSAndreas Gohr * 172d6396b6dSAndreas Gohr * @param string $value 1737fcedc39SAndreas Gohr * 1747fcedc39SAndreas Gohr * @return int|null the RID of the entry, null if not found and not added 175d6396b6dSAndreas Gohr */ 1768ed35011SAndreas Gohr public function getRowID($value) 177d6396b6dSAndreas Gohr { 1788ed35011SAndreas Gohr $result = $this->getRowIDs([$value]); 1797fcedc39SAndreas Gohr return $result[$value] ?? null; 180d6396b6dSAndreas Gohr } 181d6396b6dSAndreas Gohr 182d6396b6dSAndreas Gohr /** 1837fcedc39SAndreas Gohr * Searches the Index for all given values 1847fcedc39SAndreas Gohr * 1857fcedc39SAndreas Gohr * If the index is writable, not found values are added 186d6396b6dSAndreas Gohr * 187d6396b6dSAndreas Gohr * @param string[] $values 18803a35633SAndreas Gohr * @return array the RIDs of the entries (value => rid) 189d6396b6dSAndreas Gohr */ 1908ed35011SAndreas Gohr abstract public function getRowIDs($values); 191d6396b6dSAndreas Gohr 192d6396b6dSAndreas Gohr /** 19303a35633SAndreas Gohr * Find all RIDs matching a regular expression 19403a35633SAndreas Gohr * 19503a35633SAndreas Gohr * A full regular expression including delimiters and modifiers is expected 19603a35633SAndreas Gohr * 19703a35633SAndreas Gohr * @param string $re the regular expression to match against 19803a35633SAndreas Gohr * @return array (rid => value) 19903a35633SAndreas Gohr */ 20003a35633SAndreas Gohr abstract public function search($re); 20103a35633SAndreas Gohr 20203a35633SAndreas Gohr /** 203ec5280efSAndreas Gohr * Clears the index by deleting its file 204*c66b5ec6SAndreas Gohr * 205ec5280efSAndreas Gohr * @return void 206ec5280efSAndreas Gohr */ 207ec5280efSAndreas Gohr public function clear() 208ec5280efSAndreas Gohr { 209ec5280efSAndreas Gohr @unlink($this->filename); 210ec5280efSAndreas Gohr } 211ec5280efSAndreas Gohr 2127fcedc39SAndreas Gohr /** 2137fcedc39SAndreas Gohr * Saves the index if needed 2147fcedc39SAndreas Gohr * 2157fcedc39SAndreas Gohr * The default implementation does nothing and is only for streamlining the API of 2167fcedc39SAndreas Gohr * the different index classes 217*c66b5ec6SAndreas Gohr * 2187fcedc39SAndreas Gohr * @return void 2197fcedc39SAndreas Gohr */ 2207fcedc39SAndreas Gohr public function save() 2217fcedc39SAndreas Gohr { 2227fcedc39SAndreas Gohr } 223ec5280efSAndreas Gohr} 224