1<?php 2 3namespace dokuwiki\ChangeLog; 4 5/** 6 * methods for handling of changelog of pages or media files 7 */ 8abstract class ChangeLog 9{ 10 11 /** @var string */ 12 protected $id; 13 /** @var int */ 14 protected $chunk_size; 15 /** @var array */ 16 protected $cache; 17 18 /** 19 * Constructor 20 * 21 * @param string $id page id 22 * @param int $chunk_size maximum block size read from file 23 */ 24 public function __construct($id, $chunk_size = 8192) 25 { 26 global $cache_revinfo; 27 28 $this->cache =& $cache_revinfo; 29 if (!isset($this->cache[$id])) { 30 $this->cache[$id] = array(); 31 } 32 33 $this->id = $id; 34 $this->setChunkSize($chunk_size); 35 36 } 37 38 /** 39 * Set chunk size for file reading 40 * Chunk size zero let read whole file at once 41 * 42 * @param int $chunk_size maximum block size read from file 43 */ 44 public function setChunkSize($chunk_size) 45 { 46 if (!is_numeric($chunk_size)) $chunk_size = 0; 47 48 $this->chunk_size = (int)max($chunk_size, 0); 49 } 50 51 /** 52 * Returns path to changelog 53 * 54 * @return string path to file 55 */ 56 abstract protected function getChangelogFilename(); 57 58 /** 59 * Returns path to current page/media 60 * 61 * @return string path to file 62 */ 63 abstract protected function getFilename(); 64 65 /** 66 * Get the changelog information for a specific page id and revision (timestamp) 67 * 68 * Adjacent changelog lines are optimistically parsed and cached to speed up 69 * consecutive calls to getRevisionInfo. For large changelog files, only the chunk 70 * containing the requested changelog line is read. 71 * 72 * @param int $rev revision timestamp 73 * @return bool|array false or array with entries: 74 * - date: unix timestamp 75 * - ip: IPv4 address (127.0.0.1) 76 * - type: log line type 77 * - id: page id 78 * - user: user name 79 * - sum: edit summary (or action reason) 80 * - extra: extra data (varies by line type) 81 * 82 * @author Ben Coburn <btcoburn@silicodon.net> 83 * @author Kate Arzamastseva <pshns@ukr.net> 84 */ 85 public function getRevisionInfo($rev) 86 { 87 $rev = max($rev, 0); 88 89 // check if it's already in the memory cache 90 if (isset($this->cache[$this->id]) && isset($this->cache[$this->id][$rev])) { 91 return $this->cache[$this->id][$rev]; 92 } 93 94 //read lines from changelog 95 list($fp, $lines) = $this->readloglines($rev); 96 if ($fp) { 97 fclose($fp); 98 } 99 if (empty($lines)) return false; 100 101 // parse and cache changelog lines 102 foreach ($lines as $value) { 103 $tmp = parseChangelogLine($value); 104 if ($tmp !== false) { 105 $this->cache[$this->id][$tmp['date']] = $tmp; 106 } 107 } 108 if (!isset($this->cache[$this->id][$rev])) { 109 return false; 110 } 111 return $this->cache[$this->id][$rev]; 112 } 113 114 /** 115 * Return a list of page revisions numbers 116 * 117 * Does not guarantee that the revision exists in the attic, 118 * only that a line with the date exists in the changelog. 119 * By default the current revision is skipped. 120 * 121 * The current revision is automatically skipped when the page exists. 122 * See $INFO['meta']['last_change'] for the current revision. 123 * A negative $first let read the current revision too. 124 * 125 * For efficiency, the log lines are parsed and cached for later 126 * calls to getRevisionInfo. Large changelog files are read 127 * backwards in chunks until the requested number of changelog 128 * lines are recieved. 129 * 130 * @param int $first skip the first n changelog lines 131 * @param int $num number of revisions to return 132 * @return array with the revision timestamps 133 * 134 * @author Ben Coburn <btcoburn@silicodon.net> 135 * @author Kate Arzamastseva <pshns@ukr.net> 136 */ 137 public function getRevisions($first, $num) 138 { 139 $revs = array(); 140 $lines = array(); 141 $count = 0; 142 143 $num = max($num, 0); 144 if ($num == 0) { 145 return $revs; 146 } 147 148 if ($first < 0) { 149 $first = 0; 150 } else { 151 if (file_exists($this->getFilename())) { 152 // skip current revision if the page exists 153 $first = max($first + 1, 0); 154 } 155 } 156 157 $file = $this->getChangelogFilename(); 158 159 if (!file_exists($file)) { 160 return $revs; 161 } 162 if (filesize($file) < $this->chunk_size || $this->chunk_size == 0) { 163 // read whole file 164 $lines = file($file); 165 if ($lines === false) { 166 return $revs; 167 } 168 } else { 169 // read chunks backwards 170 $fp = fopen($file, 'rb'); // "file pointer" 171 if ($fp === false) { 172 return $revs; 173 } 174 fseek($fp, 0, SEEK_END); 175 $tail = ftell($fp); 176 177 // chunk backwards 178 $finger = max($tail - $this->chunk_size, 0); 179 while ($count < $num + $first) { 180 $nl = $this->getNewlinepointer($fp, $finger); 181 182 // was the chunk big enough? if not, take another bite 183 if ($nl > 0 && $tail <= $nl) { 184 $finger = max($finger - $this->chunk_size, 0); 185 continue; 186 } else { 187 $finger = $nl; 188 } 189 190 // read chunk 191 $chunk = ''; 192 $read_size = max($tail - $finger, 0); // found chunk size 193 $got = 0; 194 while ($got < $read_size && !feof($fp)) { 195 $tmp = @fread($fp, max(min($this->chunk_size, $read_size - $got), 0)); 196 if ($tmp === false) { 197 break; 198 } //error state 199 $got += strlen($tmp); 200 $chunk .= $tmp; 201 } 202 $tmp = explode("\n", $chunk); 203 array_pop($tmp); // remove trailing newline 204 205 // combine with previous chunk 206 $count += count($tmp); 207 $lines = array_merge($tmp, $lines); 208 209 // next chunk 210 if ($finger == 0) { 211 break; 212 } // already read all the lines 213 else { 214 $tail = $finger; 215 $finger = max($tail - $this->chunk_size, 0); 216 } 217 } 218 fclose($fp); 219 } 220 221 // skip parsing extra lines 222 $num = max(min(count($lines) - $first, $num), 0); 223 if ($first > 0 && $num > 0) { 224 $lines = array_slice($lines, max(count($lines) - $first - $num, 0), $num); 225 } else { 226 if ($first > 0 && $num == 0) { 227 $lines = array_slice($lines, 0, max(count($lines) - $first, 0)); 228 } elseif ($first == 0 && $num > 0) { 229 $lines = array_slice($lines, max(count($lines) - $num, 0)); 230 } 231 } 232 233 // handle lines in reverse order 234 for ($i = count($lines) - 1; $i >= 0; $i--) { 235 $tmp = parseChangelogLine($lines[$i]); 236 if ($tmp !== false) { 237 $this->cache[$this->id][$tmp['date']] = $tmp; 238 $revs[] = $tmp['date']; 239 } 240 } 241 242 return $revs; 243 } 244 245 /** 246 * Get the nth revision left or right handside for a specific page id and revision (timestamp) 247 * 248 * For large changelog files, only the chunk containing the 249 * reference revision $rev is read and sometimes a next chunck. 250 * 251 * Adjacent changelog lines are optimistically parsed and cached to speed up 252 * consecutive calls to getRevisionInfo. 253 * 254 * @param int $rev revision timestamp used as startdate (doesn't need to be revisionnumber) 255 * @param int $direction give position of returned revision with respect to $rev; positive=next, negative=prev 256 * @return bool|int 257 * timestamp of the requested revision 258 * otherwise false 259 */ 260 public function getRelativeRevision($rev, $direction) 261 { 262 $rev = max($rev, 0); 263 $direction = (int)$direction; 264 265 //no direction given or last rev, so no follow-up 266 if (!$direction || ($direction > 0 && $this->isCurrentRevision($rev))) { 267 return false; 268 } 269 270 //get lines from changelog 271 list($fp, $lines, $head, $tail, $eof) = $this->readloglines($rev); 272 if (empty($lines)) return false; 273 274 // look for revisions later/earlier then $rev, when founded count till the wanted revision is reached 275 // also parse and cache changelog lines for getRevisionInfo(). 276 $revcounter = 0; 277 $relativerev = false; 278 $checkotherchunck = true; //always runs once 279 while (!$relativerev && $checkotherchunck) { 280 $tmp = array(); 281 //parse in normal or reverse order 282 $count = count($lines); 283 if ($direction > 0) { 284 $start = 0; 285 $step = 1; 286 } else { 287 $start = $count - 1; 288 $step = -1; 289 } 290 for ($i = $start; $i >= 0 && $i < $count; $i = $i + $step) { 291 $tmp = parseChangelogLine($lines[$i]); 292 if ($tmp !== false) { 293 $this->cache[$this->id][$tmp['date']] = $tmp; 294 //look for revs older/earlier then reference $rev and select $direction-th one 295 if (($direction > 0 && $tmp['date'] > $rev) || ($direction < 0 && $tmp['date'] < $rev)) { 296 $revcounter++; 297 if ($revcounter == abs($direction)) { 298 $relativerev = $tmp['date']; 299 } 300 } 301 } 302 } 303 304 //true when $rev is found, but not the wanted follow-up. 305 $checkotherchunck = $fp 306 && ($tmp['date'] == $rev || ($revcounter > 0 && !$relativerev)) 307 && !(($tail == $eof && $direction > 0) || ($head == 0 && $direction < 0)); 308 309 if ($checkotherchunck) { 310 list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, $direction); 311 312 if (empty($lines)) break; 313 } 314 } 315 if ($fp) { 316 fclose($fp); 317 } 318 319 return $relativerev; 320 } 321 322 /** 323 * Returns revisions around rev1 and rev2 324 * When available it returns $max entries for each revision 325 * 326 * @param int $rev1 oldest revision timestamp 327 * @param int $rev2 newest revision timestamp (0 looks up last revision) 328 * @param int $max maximum number of revisions returned 329 * @return array with two arrays with revisions surrounding rev1 respectively rev2 330 */ 331 public function getRevisionsAround($rev1, $rev2, $max = 50) 332 { 333 $max = floor(abs($max) / 2) * 2 + 1; 334 $rev1 = max($rev1, 0); 335 $rev2 = max($rev2, 0); 336 337 if ($rev2) { 338 if ($rev2 < $rev1) { 339 $rev = $rev2; 340 $rev2 = $rev1; 341 $rev1 = $rev; 342 } 343 } else { 344 //empty right side means a removed page. Look up last revision. 345 $revs = $this->getRevisions(-1, 1); 346 $rev2 = $revs[0]; 347 } 348 //collect revisions around rev2 349 list($revs2, $allrevs, $fp, $lines, $head, $tail) = $this->retrieveRevisionsAround($rev2, $max); 350 351 if (empty($revs2)) return array(array(), array()); 352 353 //collect revisions around rev1 354 $index = array_search($rev1, $allrevs); 355 if ($index === false) { 356 //no overlapping revisions 357 list($revs1, , , , ,) = $this->retrieveRevisionsAround($rev1, $max); 358 if (empty($revs1)) $revs1 = array(); 359 } else { 360 //revisions overlaps, reuse revisions around rev2 361 $revs1 = $allrevs; 362 while ($head > 0) { 363 for ($i = count($lines) - 1; $i >= 0; $i--) { 364 $tmp = parseChangelogLine($lines[$i]); 365 if ($tmp !== false) { 366 $this->cache[$this->id][$tmp['date']] = $tmp; 367 $revs1[] = $tmp['date']; 368 $index++; 369 370 if ($index > floor($max / 2)) break 2; 371 } 372 } 373 374 list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, -1); 375 } 376 sort($revs1); 377 //return wanted selection 378 $revs1 = array_slice($revs1, max($index - floor($max / 2), 0), $max); 379 } 380 381 return array(array_reverse($revs1), array_reverse($revs2)); 382 } 383 384 /** 385 * Returns lines from changelog. 386 * If file larger than $chuncksize, only chunck is read that could contain $rev. 387 * 388 * @param int $rev revision timestamp 389 * @return array|false 390 * if success returns array(fp, array(changeloglines), $head, $tail, $eof) 391 * where fp only defined for chuck reading, needs closing. 392 * otherwise false 393 */ 394 protected function readloglines($rev) 395 { 396 $file = $this->getChangelogFilename(); 397 398 if (!file_exists($file)) { 399 return false; 400 } 401 402 $fp = null; 403 $head = 0; 404 $tail = 0; 405 $eof = 0; 406 407 if (filesize($file) < $this->chunk_size || $this->chunk_size == 0) { 408 // read whole file 409 $lines = file($file); 410 if ($lines === false) { 411 return false; 412 } 413 } else { 414 // read by chunk 415 $fp = fopen($file, 'rb'); // "file pointer" 416 if ($fp === false) { 417 return false; 418 } 419 $head = 0; 420 fseek($fp, 0, SEEK_END); 421 $eof = ftell($fp); 422 $tail = $eof; 423 424 // find chunk 425 while ($tail - $head > $this->chunk_size) { 426 $finger = $head + floor(($tail - $head) / 2.0); 427 $finger = $this->getNewlinepointer($fp, $finger); 428 $tmp = fgets($fp); 429 if ($finger == $head || $finger == $tail) { 430 break; 431 } 432 $tmp = parseChangelogLine($tmp); 433 $finger_rev = $tmp['date']; 434 435 if ($finger_rev > $rev) { 436 $tail = $finger; 437 } else { 438 $head = $finger; 439 } 440 } 441 442 if ($tail - $head < 1) { 443 // cound not find chunk, assume requested rev is missing 444 fclose($fp); 445 return false; 446 } 447 448 $lines = $this->readChunk($fp, $head, $tail); 449 } 450 return array( 451 $fp, 452 $lines, 453 $head, 454 $tail, 455 $eof, 456 ); 457 } 458 459 /** 460 * Read chunk and return array with lines of given chunck. 461 * Has no check if $head and $tail are really at a new line 462 * 463 * @param resource $fp resource filepointer 464 * @param int $head start point chunck 465 * @param int $tail end point chunck 466 * @return array lines read from chunck 467 */ 468 protected function readChunk($fp, $head, $tail) 469 { 470 $chunk = ''; 471 $chunk_size = max($tail - $head, 0); // found chunk size 472 $got = 0; 473 fseek($fp, $head); 474 while ($got < $chunk_size && !feof($fp)) { 475 $tmp = @fread($fp, max(min($this->chunk_size, $chunk_size - $got), 0)); 476 if ($tmp === false) { //error state 477 break; 478 } 479 $got += strlen($tmp); 480 $chunk .= $tmp; 481 } 482 $lines = explode("\n", $chunk); 483 array_pop($lines); // remove trailing newline 484 return $lines; 485 } 486 487 /** 488 * Set pointer to first new line after $finger and return its position 489 * 490 * @param resource $fp filepointer 491 * @param int $finger a pointer 492 * @return int pointer 493 */ 494 protected function getNewlinepointer($fp, $finger) 495 { 496 fseek($fp, $finger); 497 $nl = $finger; 498 if ($finger > 0) { 499 fgets($fp); // slip the finger forward to a new line 500 $nl = ftell($fp); 501 } 502 return $nl; 503 } 504 505 /** 506 * Check whether given revision is the current page 507 * 508 * @param int $rev timestamp of current page 509 * @return bool true if $rev is current revision, otherwise false 510 */ 511 public function isCurrentRevision($rev) 512 { 513 return $rev == @filemtime($this->getFilename()); 514 } 515 516 /** 517 * Return an existing revision for a specific date which is 518 * the current one or younger or equal then the date 519 * 520 * @param number $date_at timestamp 521 * @return string revision ('' for current) 522 */ 523 public function getLastRevisionAt($date_at) 524 { 525 //requested date_at(timestamp) younger or equal then modified_time($this->id) => load current 526 if (file_exists($this->getFilename()) && $date_at >= @filemtime($this->getFilename())) { 527 return ''; 528 } else { 529 if ($rev = $this->getRelativeRevision($date_at + 1, -1)) { //+1 to get also the requested date revision 530 return $rev; 531 } else { 532 return false; 533 } 534 } 535 } 536 537 /** 538 * Returns the next lines of the changelog of the chunck before head or after tail 539 * 540 * @param resource $fp filepointer 541 * @param int $head position head of last chunk 542 * @param int $tail position tail of last chunk 543 * @param int $direction positive forward, negative backward 544 * @return array with entries: 545 * - $lines: changelog lines of readed chunk 546 * - $head: head of chunk 547 * - $tail: tail of chunk 548 */ 549 protected function readAdjacentChunk($fp, $head, $tail, $direction) 550 { 551 if (!$fp) return array(array(), $head, $tail); 552 553 if ($direction > 0) { 554 //read forward 555 $head = $tail; 556 $tail = $head + floor($this->chunk_size * (2 / 3)); 557 $tail = $this->getNewlinepointer($fp, $tail); 558 } else { 559 //read backward 560 $tail = $head; 561 $head = max($tail - $this->chunk_size, 0); 562 while (true) { 563 $nl = $this->getNewlinepointer($fp, $head); 564 // was the chunk big enough? if not, take another bite 565 if ($nl > 0 && $tail <= $nl) { 566 $head = max($head - $this->chunk_size, 0); 567 } else { 568 $head = $nl; 569 break; 570 } 571 } 572 } 573 574 //load next chunck 575 $lines = $this->readChunk($fp, $head, $tail); 576 return array($lines, $head, $tail); 577 } 578 579 /** 580 * Collect the $max revisions near to the timestamp $rev 581 * 582 * @param int $rev revision timestamp 583 * @param int $max maximum number of revisions to be returned 584 * @return bool|array 585 * return array with entries: 586 * - $requestedrevs: array of with $max revision timestamps 587 * - $revs: all parsed revision timestamps 588 * - $fp: filepointer only defined for chuck reading, needs closing. 589 * - $lines: non-parsed changelog lines before the parsed revisions 590 * - $head: position of first readed changelogline 591 * - $lasttail: position of end of last readed changelogline 592 * otherwise false 593 */ 594 protected function retrieveRevisionsAround($rev, $max) 595 { 596 //get lines from changelog 597 list($fp, $lines, $starthead, $starttail, /* $eof */) = $this->readloglines($rev); 598 if (empty($lines)) return false; 599 600 //parse chunk containing $rev, and read forward more chunks until $max/2 is reached 601 $head = $starthead; 602 $tail = $starttail; 603 $revs = array(); 604 $aftercount = $beforecount = 0; 605 while (count($lines) > 0) { 606 foreach ($lines as $line) { 607 $tmp = parseChangelogLine($line); 608 if ($tmp !== false) { 609 $this->cache[$this->id][$tmp['date']] = $tmp; 610 $revs[] = $tmp['date']; 611 if ($tmp['date'] >= $rev) { 612 //count revs after reference $rev 613 $aftercount++; 614 if ($aftercount == 1) $beforecount = count($revs); 615 } 616 //enough revs after reference $rev? 617 if ($aftercount > floor($max / 2)) break 2; 618 } 619 } 620 //retrieve next chunk 621 list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, 1); 622 } 623 if ($aftercount == 0) return false; 624 625 $lasttail = $tail; 626 627 //read additional chuncks backward until $max/2 is reached and total number of revs is equal to $max 628 $lines = array(); 629 $i = 0; 630 if ($aftercount > 0) { 631 $head = $starthead; 632 $tail = $starttail; 633 while ($head > 0) { 634 list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, -1); 635 636 for ($i = count($lines) - 1; $i >= 0; $i--) { 637 $tmp = parseChangelogLine($lines[$i]); 638 if ($tmp !== false) { 639 $this->cache[$this->id][$tmp['date']] = $tmp; 640 $revs[] = $tmp['date']; 641 $beforecount++; 642 //enough revs before reference $rev? 643 if ($beforecount > max(floor($max / 2), $max - $aftercount)) break 2; 644 } 645 } 646 } 647 } 648 sort($revs); 649 650 //keep only non-parsed lines 651 $lines = array_slice($lines, 0, $i); 652 //trunk desired selection 653 $requestedrevs = array_slice($revs, -$max, $max); 654 655 return array($requestedrevs, $revs, $fp, $lines, $head, $lasttail); 656 } 657} 658