1<?php 2 3namespace dokuwiki\Search\Index; 4 5use dokuwiki\Search\Exception\IndexAccessException; 6use dokuwiki\Search\Exception\IndexLockException; 7use dokuwiki\Search\Exception\IndexWriteException; 8 9/** 10 * Access to a single index file 11 * 12 * Access using this class always happens on a line-by-line basis. It is usually not read in full. 13 * All modifications are implicitly saved 14 * Should be used for large indexes that receive only few changes at once. 15 */ 16class FileIndex extends AbstractIndex 17{ 18 /** @var array RID cache for faster access */ 19 protected $ridCache = []; 20 21 /** 22 * @inheritdoc 23 * @throws IndexWriteException 24 * @throws IndexLockException 25 * @author Tom N Harris <tnharris@whoopdedo.org> 26 */ 27 public function changeRow($rid, $value) 28 { 29 global $conf; 30 31 if (!$this->isWritable) throw new IndexLockException(); 32 33 if (substr($value, -1) !== "\n") { 34 $value .= "\n"; 35 } 36 37 $tempname = $this->filename . '.tmp'; 38 $fh = @fopen($tempname, 'w'); 39 if (!$fh) { 40 throw new IndexWriteException("Failed to write {$tempname}"); 41 } 42 $ih = @fopen($this->filename, 'r'); 43 44 $ln = -1; // line counter 45 // copy previous index lines line-by-line, replacing the wanted line 46 if ($ih) { 47 while (($curline = fgets($ih)) !== false) { 48 fwrite($fh, (++$ln == $rid) ? $value : $curline); 49 } 50 fclose($ih); 51 } 52 // if wanted line is beyond the current line count, insert empty lines inbetween 53 if ($rid > $ln) { 54 while ($rid > ++$ln) { 55 fwrite($fh, "\n"); 56 } 57 fwrite($fh, $value); 58 } 59 fclose($fh); 60 61 if ($conf['fperm']) { 62 chmod($tempname, $conf['fperm']); 63 } 64 io_rename($tempname, $this->filename); 65 } 66 67 /** 68 * @inheritdoc 69 * 70 * When writable and the requested RID is beyond the end of the file, 71 * the file is padded with empty lines up to that RID. This avoids 72 * a more expensive line-by-line copy in a subsequent changeRow() call. 73 * 74 * @throws IndexWriteException 75 * @author Tom N Harris <tnharris@whoopdedo.org> 76 */ 77 public function retrieveRow($rid) 78 { 79 if (!file_exists($this->filename)) { 80 return ''; 81 } 82 $fh = @fopen($this->filename, 'r'); 83 if (!$fh) { 84 return ''; 85 } 86 $ln = -1; 87 while (($line = fgets($fh)) !== false) { 88 if (++$ln == $rid) { 89 fclose($fh); 90 return rtrim((string)$line); 91 } 92 } 93 fclose($fh); 94 95 if (!$this->isWritable) return ''; 96 97 // still here? pad the index for the given ID 98 // we do not simply call changeRow() here because appending is faster than line-by-line copying 99 if (!file_put_contents($this->filename, join("\n", array_fill(0, $rid - $ln + 1, '')), FILE_APPEND)) { 100 throw new IndexWriteException("Failed to write {$this->filename}"); 101 } 102 103 return ''; 104 } 105 106 /** @inheritdoc */ 107 public function retrieveRows($rids) 108 { 109 $result = []; 110 sort($rids); 111 $next = array_shift($rids); 112 113 if (!file_exists($this->filename)) { 114 return $result; 115 } 116 $fh = @fopen($this->filename, 'r'); 117 if (!$fh) { 118 return $result; 119 } 120 $ln = -1; 121 while (($line = fgets($fh)) !== false) { 122 if (++$ln === $next) { 123 $result[$ln] = rtrim((string)$line); 124 $next = array_shift($rids); 125 if ($next === false) break; 126 } 127 } 128 fclose($fh); 129 return $result; 130 } 131 132 133 /** 134 * @inheritdoc 135 * @throws IndexAccessException 136 * @throws IndexWriteException 137 */ 138 public function getRowIDs($values) 139 { 140 $values = array_map('trim', $values); 141 $values = array_fill_keys($values, 1); // easier access as associative array 142 143 // search for the values 144 $result = []; 145 $ln = 0; 146 if (file_exists($this->filename)) { 147 $fh = @fopen($this->filename, 'r'); 148 if (!$fh) { 149 throw new IndexAccessException("Failed to read {$this->filename}"); 150 } 151 while (($line = fgets($fh)) !== false && $values) { 152 $line = trim($line); 153 if (isset($values[$line])) { 154 $result[$line] = $ln; 155 unset($values[$line]); 156 } 157 $ln++; 158 } 159 fclose($fh); 160 } 161 162 if (!$this->isWritable) return $result; 163 164 // if there are still values, they have not been found and will be appended 165 foreach (array_keys($values) as $value) { 166 if (!file_put_contents($this->filename, "$value\n", FILE_APPEND)) { 167 throw new IndexWriteException("Failed to write {$this->filename}"); 168 } 169 $result[$value] = $ln++; 170 } 171 172 return $result; 173 } 174 175 /** @inheritdoc */ 176 public function search($re) 177 { 178 $result = []; 179 $ln = 0; 180 if (file_exists($this->filename)) { 181 $fh = @fopen($this->filename, 'r'); 182 if (!$fh) { 183 throw new IndexAccessException("Failed to read {$this->filename}"); 184 } 185 while (($line = fgets($fh)) !== false) { 186 $line = trim($line); 187 if (preg_match($re, $line)) { 188 $result[$ln] = $line; 189 } 190 $ln++; 191 } 192 fclose($fh); 193 } 194 return $result; 195 } 196 197 /** 198 * Cached mechanism to retrieve a single value 199 * 200 * @param string $value 201 * @return int the RID of the entry 202 * @throws IndexAccessException 203 * @throws IndexWriteException 204 * @see getRowID() 205 */ 206 public function accessCachedValue($value) 207 { 208 if (isset($this->ridCache[$value])) { 209 return $this->ridCache[$value]; 210 } 211 212 // limit cache to 10 entries by discarding the oldest element 213 // as in DokuWiki usually only the most recently 214 // added item will be requested again 215 if (count($this->ridCache) > 10) { 216 array_shift($this->ridCache); 217 } 218 $this->ridCache[$value] = $this->getRowID($value); 219 return $this->ridCache[$value]; 220 } 221 222 /** @inheritdoc */ 223 public function count(): int 224 { 225 if (!file_exists($this->filename)) return 0; 226 $fh = @fopen($this->filename, 'r'); 227 if (!$fh) return 0; 228 $count = 0; 229 while (fgets($fh) !== false) $count++; 230 fclose($fh); 231 return $count; 232 } 233 234 /** @inheritdoc */ 235 public function getIterator(): \Generator 236 { 237 if (!file_exists($this->filename)) return; 238 $fh = @fopen($this->filename, 'r'); 239 if (!$fh) return; 240 $ln = 0; 241 while (($line = fgets($fh)) !== false) { 242 yield $ln++ => rtrim($line); 243 } 244 fclose($fh); 245 } 246} 247