19bd7d62fSAndreas Gohr<?php 29bd7d62fSAndreas Gohr 39bd7d62fSAndreas Gohrnamespace dokuwiki\Search\Index; 49bd7d62fSAndreas Gohr 59bd7d62fSAndreas Gohruse dokuwiki\Search\Exception\IndexAccessException; 67fcedc39SAndreas Gohruse dokuwiki\Search\Exception\IndexLockException; 79bd7d62fSAndreas Gohruse dokuwiki\Search\Exception\IndexWriteException; 89bd7d62fSAndreas Gohr 99bd7d62fSAndreas Gohr/** 109bd7d62fSAndreas Gohr * Access to a single index file 119bd7d62fSAndreas Gohr * 129bd7d62fSAndreas Gohr * Access using this class always happens on a line-by-line basis. It is usually not read in full. 139bd7d62fSAndreas Gohr * All modifications are implicitly saved 149bd7d62fSAndreas Gohr * Should be used for large indexes that receive only few changes at once. 159bd7d62fSAndreas Gohr */ 169bd7d62fSAndreas Gohrclass FileIndex extends AbstractIndex 179bd7d62fSAndreas Gohr{ 189bd7d62fSAndreas Gohr /** @var array RID cache for faster access */ 19*9369b4a9SAndreas Gohr protected array $ridCache = []; 209bd7d62fSAndreas Gohr 219bd7d62fSAndreas Gohr /** 229bd7d62fSAndreas Gohr * @inheritdoc 239bd7d62fSAndreas Gohr * @throws IndexWriteException 247fcedc39SAndreas Gohr * @throws IndexLockException 259bd7d62fSAndreas Gohr * @author Tom N Harris <tnharris@whoopdedo.org> 269bd7d62fSAndreas Gohr */ 27*9369b4a9SAndreas Gohr public function changeRow(int $rid, string $value): void 289bd7d62fSAndreas Gohr { 299bd7d62fSAndreas Gohr global $conf; 309bd7d62fSAndreas Gohr 317fcedc39SAndreas Gohr if (!$this->isWritable) throw new IndexLockException(); 327fcedc39SAndreas Gohr 33*9369b4a9SAndreas Gohr if (!str_ends_with($value, "\n")) { 349bd7d62fSAndreas Gohr $value .= "\n"; 359bd7d62fSAndreas Gohr } 369bd7d62fSAndreas Gohr 379bd7d62fSAndreas Gohr $tempname = $this->filename . '.tmp'; 389bd7d62fSAndreas Gohr $fh = @fopen($tempname, 'w'); 3903a35633SAndreas Gohr if (!$fh) { 40*9369b4a9SAndreas Gohr throw new IndexWriteException("Failed to write $tempname"); 4103a35633SAndreas Gohr } 429bd7d62fSAndreas Gohr $ih = @fopen($this->filename, 'r'); 439bd7d62fSAndreas Gohr 449bd7d62fSAndreas Gohr $ln = -1; // line counter 459bd7d62fSAndreas Gohr // copy previous index lines line-by-line, replacing the wanted line 469bd7d62fSAndreas Gohr if ($ih) { 479bd7d62fSAndreas Gohr while (($curline = fgets($ih)) !== false) { 489bd7d62fSAndreas Gohr fwrite($fh, (++$ln == $rid) ? $value : $curline); 499bd7d62fSAndreas Gohr } 509bd7d62fSAndreas Gohr fclose($ih); 519bd7d62fSAndreas Gohr } 529bd7d62fSAndreas Gohr // if wanted line is beyond the current line count, insert empty lines inbetween 539bd7d62fSAndreas Gohr if ($rid > $ln) { 549bd7d62fSAndreas Gohr while ($rid > ++$ln) { 559bd7d62fSAndreas Gohr fwrite($fh, "\n"); 569bd7d62fSAndreas Gohr } 579bd7d62fSAndreas Gohr fwrite($fh, $value); 589bd7d62fSAndreas Gohr } 599bd7d62fSAndreas Gohr fclose($fh); 609bd7d62fSAndreas Gohr 619bd7d62fSAndreas Gohr if ($conf['fperm']) { 629bd7d62fSAndreas Gohr chmod($tempname, $conf['fperm']); 639bd7d62fSAndreas Gohr } 649bd7d62fSAndreas Gohr io_rename($tempname, $this->filename); 659bd7d62fSAndreas Gohr } 669bd7d62fSAndreas Gohr 679bd7d62fSAndreas Gohr /** 689bd7d62fSAndreas Gohr * @inheritdoc 69db8be586SAndreas Gohr * 70db8be586SAndreas Gohr * When writable and the requested RID is beyond the end of the file, 71db8be586SAndreas Gohr * the file is padded with empty lines up to that RID. This avoids 72db8be586SAndreas Gohr * a more expensive line-by-line copy in a subsequent changeRow() call. 73db8be586SAndreas Gohr * 747fcedc39SAndreas Gohr * @throws IndexWriteException 759bd7d62fSAndreas Gohr * @author Tom N Harris <tnharris@whoopdedo.org> 769bd7d62fSAndreas Gohr */ 77*9369b4a9SAndreas Gohr public function retrieveRow(int $rid): string 789bd7d62fSAndreas Gohr { 7903a35633SAndreas Gohr if (!file_exists($this->filename)) { 8003a35633SAndreas Gohr return ''; 8103a35633SAndreas Gohr } 829bd7d62fSAndreas Gohr $fh = @fopen($this->filename, 'r'); 8303a35633SAndreas Gohr if (!$fh) { 8403a35633SAndreas Gohr return ''; 8503a35633SAndreas Gohr } 869bd7d62fSAndreas Gohr $ln = -1; 879bd7d62fSAndreas Gohr while (($line = fgets($fh)) !== false) { 88dec26820SAndreas Gohr if (++$ln == $rid) { 899bd7d62fSAndreas Gohr fclose($fh); 90*9369b4a9SAndreas Gohr return rtrim($line); 919bd7d62fSAndreas Gohr } 92dec26820SAndreas Gohr } 93dec26820SAndreas Gohr fclose($fh); 94dec26820SAndreas Gohr 957fcedc39SAndreas Gohr if (!$this->isWritable) return ''; 967fcedc39SAndreas Gohr 97dec26820SAndreas Gohr // still here? pad the index for the given ID 98dec26820SAndreas Gohr // we do not simply call changeRow() here because appending is faster than line-by-line copying 99*9369b4a9SAndreas Gohr if (!file_put_contents($this->filename, implode("\n", array_fill(0, $rid - $ln + 1, '')), FILE_APPEND)) { 100*9369b4a9SAndreas Gohr throw new IndexWriteException("Failed to write $this->filename"); 101dec26820SAndreas Gohr } 102dec26820SAndreas Gohr 103dec26820SAndreas Gohr return ''; 104dec26820SAndreas Gohr } 1059f63f003SAndreas Gohr 1069f63f003SAndreas Gohr /** @inheritdoc */ 107*9369b4a9SAndreas Gohr public function retrieveRows(array $rids): array 1089f63f003SAndreas Gohr { 1099f63f003SAndreas Gohr $result = []; 1109f63f003SAndreas Gohr sort($rids); 1119f63f003SAndreas Gohr $next = array_shift($rids); 1129f63f003SAndreas Gohr 1139f63f003SAndreas Gohr if (!file_exists($this->filename)) { 1149f63f003SAndreas Gohr return $result; 1159f63f003SAndreas Gohr } 1169f63f003SAndreas Gohr $fh = @fopen($this->filename, 'r'); 1179f63f003SAndreas Gohr if (!$fh) { 1189f63f003SAndreas Gohr return $result; 1199f63f003SAndreas Gohr } 1209f63f003SAndreas Gohr $ln = -1; 1219f63f003SAndreas Gohr while (($line = fgets($fh)) !== false) { 1229f63f003SAndreas Gohr if (++$ln === $next) { 123*9369b4a9SAndreas Gohr $result[$ln] = rtrim($line); 1249f63f003SAndreas Gohr $next = array_shift($rids); 1259f63f003SAndreas Gohr if ($next === false) break; 1269f63f003SAndreas Gohr } 1279f63f003SAndreas Gohr } 1289f63f003SAndreas Gohr fclose($fh); 1299f63f003SAndreas Gohr return $result; 1309f63f003SAndreas Gohr } 1319f63f003SAndreas Gohr 1329bd7d62fSAndreas Gohr 1339bd7d62fSAndreas Gohr /** 134d6396b6dSAndreas Gohr * @inheritdoc 1359bd7d62fSAndreas Gohr * @throws IndexAccessException 1367fcedc39SAndreas Gohr * @throws IndexWriteException 1379bd7d62fSAndreas Gohr */ 138*9369b4a9SAndreas Gohr public function getRowIDs(array $values): array 1399bd7d62fSAndreas Gohr { 140*9369b4a9SAndreas Gohr $values = array_map(trim(...), $values); 1419bd7d62fSAndreas Gohr $values = array_fill_keys($values, 1); // easier access as associative array 1429bd7d62fSAndreas Gohr 1439bd7d62fSAndreas Gohr // search for the values 1449bd7d62fSAndreas Gohr $result = []; 1459bd7d62fSAndreas Gohr $ln = 0; 1469bd7d62fSAndreas Gohr if (file_exists($this->filename)) { 1479bd7d62fSAndreas Gohr $fh = @fopen($this->filename, 'r'); 14803a35633SAndreas Gohr if (!$fh) { 149*9369b4a9SAndreas Gohr throw new IndexAccessException("Failed to read $this->filename"); 15003a35633SAndreas Gohr } 1519bd7d62fSAndreas Gohr while (($line = fgets($fh)) !== false && $values) { 1529bd7d62fSAndreas Gohr $line = trim($line); 1539bd7d62fSAndreas Gohr if (isset($values[$line])) { 1549bd7d62fSAndreas Gohr $result[$line] = $ln; 1559bd7d62fSAndreas Gohr unset($values[$line]); 1569bd7d62fSAndreas Gohr } 1579bd7d62fSAndreas Gohr $ln++; 1589bd7d62fSAndreas Gohr } 1599bd7d62fSAndreas Gohr fclose($fh); 1609bd7d62fSAndreas Gohr } 1619bd7d62fSAndreas Gohr 1627fcedc39SAndreas Gohr if (!$this->isWritable) return $result; 1637fcedc39SAndreas Gohr 1649bd7d62fSAndreas Gohr // if there are still values, they have not been found and will be appended 1659bd7d62fSAndreas Gohr foreach (array_keys($values) as $value) { 1667fcedc39SAndreas Gohr if (!file_put_contents($this->filename, "$value\n", FILE_APPEND)) { 167*9369b4a9SAndreas Gohr throw new IndexWriteException("Failed to write $this->filename"); 1687fcedc39SAndreas Gohr } 1699bd7d62fSAndreas Gohr $result[$value] = $ln++; 1709bd7d62fSAndreas Gohr } 1719bd7d62fSAndreas Gohr 1729bd7d62fSAndreas Gohr return $result; 1739bd7d62fSAndreas Gohr } 1749bd7d62fSAndreas Gohr 17503a35633SAndreas Gohr /** @inheritdoc */ 176*9369b4a9SAndreas Gohr public function search(string $re): array 17703a35633SAndreas Gohr { 17803a35633SAndreas Gohr $result = []; 17903a35633SAndreas Gohr $ln = 0; 18003a35633SAndreas Gohr if (file_exists($this->filename)) { 18103a35633SAndreas Gohr $fh = @fopen($this->filename, 'r'); 18203a35633SAndreas Gohr if (!$fh) { 183*9369b4a9SAndreas Gohr throw new IndexAccessException("Failed to read $this->filename"); 18403a35633SAndreas Gohr } 18503a35633SAndreas Gohr while (($line = fgets($fh)) !== false) { 18603a35633SAndreas Gohr $line = trim($line); 18703a35633SAndreas Gohr if (preg_match($re, $line)) { 18803a35633SAndreas Gohr $result[$ln] = $line; 18903a35633SAndreas Gohr } 19003a35633SAndreas Gohr $ln++; 19103a35633SAndreas Gohr } 19203a35633SAndreas Gohr fclose($fh); 19303a35633SAndreas Gohr } 19403a35633SAndreas Gohr return $result; 19503a35633SAndreas Gohr } 19603a35633SAndreas Gohr 1979bd7d62fSAndreas Gohr /** 198596d5287SAndreas Gohr * Cached mechanism to retrieve a single value 1999bd7d62fSAndreas Gohr * 2009bd7d62fSAndreas Gohr * @param string $value 2019bd7d62fSAndreas Gohr * @return int the RID of the entry 202596d5287SAndreas Gohr * @see getRowID() 2039bd7d62fSAndreas Gohr */ 204*9369b4a9SAndreas Gohr public function accessCachedValue(string $value): int 2059bd7d62fSAndreas Gohr { 206fb5311ecSAndreas Gohr if (isset($this->ridCache[$value])) { 207fb5311ecSAndreas Gohr return $this->ridCache[$value]; 20803a35633SAndreas Gohr } 2099bd7d62fSAndreas Gohr 2109bd7d62fSAndreas Gohr // limit cache to 10 entries by discarding the oldest element 2119bd7d62fSAndreas Gohr // as in DokuWiki usually only the most recently 2129bd7d62fSAndreas Gohr // added item will be requested again 213fb5311ecSAndreas Gohr if (count($this->ridCache) > 10) { 214fb5311ecSAndreas Gohr array_shift($this->ridCache); 21503a35633SAndreas Gohr } 216fb5311ecSAndreas Gohr $this->ridCache[$value] = $this->getRowID($value); 217fb5311ecSAndreas Gohr return $this->ridCache[$value]; 2189bd7d62fSAndreas Gohr } 21983b3acccSAndreas Gohr 22083b3acccSAndreas Gohr /** @inheritdoc */ 22121fbd01bSAndreas Gohr public function count(): int 22221fbd01bSAndreas Gohr { 22321fbd01bSAndreas Gohr if (!file_exists($this->filename)) return 0; 22421fbd01bSAndreas Gohr $fh = @fopen($this->filename, 'r'); 22521fbd01bSAndreas Gohr if (!$fh) return 0; 22621fbd01bSAndreas Gohr $count = 0; 22721fbd01bSAndreas Gohr while (fgets($fh) !== false) $count++; 22821fbd01bSAndreas Gohr fclose($fh); 22921fbd01bSAndreas Gohr return $count; 23021fbd01bSAndreas Gohr } 23121fbd01bSAndreas Gohr 23221fbd01bSAndreas Gohr /** @inheritdoc */ 23383b3acccSAndreas Gohr public function getIterator(): \Generator 23483b3acccSAndreas Gohr { 23583b3acccSAndreas Gohr if (!file_exists($this->filename)) return; 23683b3acccSAndreas Gohr $fh = @fopen($this->filename, 'r'); 23783b3acccSAndreas Gohr if (!$fh) return; 23883b3acccSAndreas Gohr $ln = 0; 23983b3acccSAndreas Gohr while (($line = fgets($fh)) !== false) { 24083b3acccSAndreas Gohr yield $ln++ => rtrim($line); 24183b3acccSAndreas Gohr } 24283b3acccSAndreas Gohr fclose($fh); 24383b3acccSAndreas Gohr } 2449bd7d62fSAndreas Gohr} 245