1<?php 2 3namespace dokuwiki\Search\Index; 4 5use dokuwiki\Search\Exception\IndexAccessException; 6use dokuwiki\Search\Exception\IndexLockException; 7use dokuwiki\Search\Exception\IndexWriteException; 8 9/** 10 * Access to a single index file 11 * 12 * Access using this class always happens on a line-by-line basis. It is usually not read in full. 13 * All modifications are implicitly saved 14 * Should be used for large indexes that receive only few changes at once. 15 */ 16class FileIndex extends AbstractIndex 17{ 18 /** @var array RID cache for faster access */ 19 protected array $ridCache = []; 20 21 /** 22 * @inheritdoc 23 * @throws IndexWriteException 24 * @throws IndexLockException 25 * @author Tom N Harris <tnharris@whoopdedo.org> 26 */ 27 public function changeRow(int $rid, string $value): void 28 { 29 global $conf; 30 31 if (!$this->isWritable) throw new IndexLockException(); 32 33 if (!str_ends_with($value, "\n")) { 34 $value .= "\n"; 35 } 36 37 $tempname = $this->filename . '.tmp'; 38 $fh = @fopen($tempname, 'w'); 39 if (!$fh) { 40 throw new IndexWriteException("Failed to write $tempname"); 41 } 42 $ih = @fopen($this->filename, 'r'); 43 44 $ln = -1; // line counter 45 // copy previous index lines line-by-line, replacing the wanted line 46 if ($ih) { 47 while (($curline = fgets($ih)) !== false) { 48 fwrite($fh, (++$ln == $rid) ? $value : $curline); 49 } 50 fclose($ih); 51 } 52 // if wanted line is beyond the current line count, insert empty lines inbetween 53 if ($rid > $ln) { 54 while ($rid > ++$ln) { 55 fwrite($fh, "\n"); 56 } 57 fwrite($fh, $value); 58 } 59 fclose($fh); 60 61 if ($conf['fperm']) { 62 chmod($tempname, $conf['fperm']); 63 } 64 io_rename($tempname, $this->filename); 65 } 66 67 /** @inheritdoc */ 68 public function retrieveRow(int $rid): string 69 { 70 if (!file_exists($this->filename)) { 71 return ''; 72 } 73 $fh = @fopen($this->filename, 'r'); 74 if (!$fh) { 75 return ''; 76 } 77 $ln = -1; 78 while (($line = fgets($fh)) !== false) { 79 if (++$ln == $rid) { 80 fclose($fh); 81 return rtrim($line); 82 } 83 } 84 fclose($fh); 85 86 return ''; 87 } 88 89 /** @inheritdoc */ 90 public function retrieveRows(array $rids): array 91 { 92 $result = []; 93 sort($rids); 94 $next = array_shift($rids); 95 96 if (!file_exists($this->filename)) { 97 return $result; 98 } 99 $fh = @fopen($this->filename, 'r'); 100 if (!$fh) { 101 return $result; 102 } 103 $ln = -1; 104 while (($line = fgets($fh)) !== false) { 105 if (++$ln === $next) { 106 $result[$ln] = rtrim($line); 107 $next = array_shift($rids); 108 if ($next === false) break; 109 } 110 } 111 fclose($fh); 112 return $result; 113 } 114 115 116 /** 117 * @inheritdoc 118 * @throws IndexAccessException 119 * @throws IndexWriteException 120 */ 121 public function getRowIDs(array $values): array 122 { 123 $values = array_map(trim(...), $values); 124 $values = array_fill_keys($values, 1); // easier access as associative array 125 126 // search for the values 127 $result = []; 128 $ln = 0; 129 if (file_exists($this->filename)) { 130 $fh = @fopen($this->filename, 'r'); 131 if (!$fh) { 132 throw new IndexAccessException("Failed to read $this->filename"); 133 } 134 while (($line = fgets($fh)) !== false && $values) { 135 $line = trim($line); 136 if (isset($values[$line])) { 137 $result[$line] = $ln; 138 unset($values[$line]); 139 } 140 $ln++; 141 } 142 fclose($fh); 143 } 144 145 if (!$this->isWritable) return $result; 146 147 // if there are still values, they have not been found and will be appended 148 foreach (array_keys($values) as $value) { 149 if (!file_put_contents($this->filename, "$value\n", FILE_APPEND)) { 150 throw new IndexWriteException("Failed to write $this->filename"); 151 } 152 $result[$value] = $ln++; 153 } 154 155 return $result; 156 } 157 158 /** @inheritdoc */ 159 public function search(string $re): array 160 { 161 $result = []; 162 $ln = 0; 163 if (file_exists($this->filename)) { 164 $fh = @fopen($this->filename, 'r'); 165 if (!$fh) { 166 throw new IndexAccessException("Failed to read $this->filename"); 167 } 168 while (($line = fgets($fh)) !== false) { 169 $line = trim($line); 170 if (preg_match($re, $line)) { 171 $result[$ln] = $line; 172 } 173 $ln++; 174 } 175 fclose($fh); 176 } 177 return $result; 178 } 179 180 /** 181 * Cached mechanism to retrieve a single value 182 * 183 * @param string $value 184 * @return int the RID of the entry 185 * @see getRowID() 186 */ 187 public function accessCachedValue(string $value): int 188 { 189 if (isset($this->ridCache[$value])) { 190 return $this->ridCache[$value]; 191 } 192 193 // limit cache to 10 entries by discarding the oldest element 194 // as in DokuWiki usually only the most recently 195 // added item will be requested again 196 if (count($this->ridCache) > 10) { 197 array_shift($this->ridCache); 198 } 199 $this->ridCache[$value] = $this->getRowID($value); 200 return $this->ridCache[$value]; 201 } 202 203 /** @inheritdoc */ 204 public function count(): int 205 { 206 if (!file_exists($this->filename)) return 0; 207 $fh = @fopen($this->filename, 'r'); 208 if (!$fh) return 0; 209 $count = 0; 210 while (fgets($fh) !== false) $count++; 211 fclose($fh); 212 return $count; 213 } 214 215 /** @inheritdoc */ 216 public function getIterator(): \Generator 217 { 218 if (!file_exists($this->filename)) return; 219 $fh = @fopen($this->filename, 'r'); 220 if (!$fh) return; 221 $ln = 0; 222 while (($line = fgets($fh)) !== false) { 223 yield $ln++ => rtrim($line); 224 } 225 fclose($fh); 226 } 227} 228