1<?php 2 3namespace dokuwiki\ChangeLog; 4 5/** 6 * methods for handling of changelog of pages or media files 7 */ 8abstract class ChangeLog 9{ 10 11 /** @var string */ 12 protected $id; 13 /** @var int */ 14 protected $chunk_size; 15 /** @var array */ 16 protected $cache; 17 18 /** 19 * Constructor 20 * 21 * @param string $id page id 22 * @param int $chunk_size maximum block size read from file 23 */ 24 public function __construct($id, $chunk_size = 8192) 25 { 26 global $cache_revinfo; 27 28 $this->cache =& $cache_revinfo; 29 if (!isset($this->cache[$id])) { 30 $this->cache[$id] = array(); 31 } 32 33 $this->id = $id; 34 $this->setChunkSize($chunk_size); 35 36 } 37 38 /** 39 * Set chunk size for file reading 40 * Chunk size zero let read whole file at once 41 * 42 * @param int $chunk_size maximum block size read from file 43 */ 44 public function setChunkSize($chunk_size) 45 { 46 if (!is_numeric($chunk_size)) $chunk_size = 0; 47 48 $this->chunk_size = (int)max($chunk_size, 0); 49 } 50 51 /** 52 * Returns path to changelog 53 * 54 * @return string path to file 55 */ 56 abstract protected function getChangelogFilename(); 57 58 /** 59 * Returns path to current page/media 60 * 61 * @return string path to file 62 */ 63 abstract protected function getFilename(); 64 65 /** 66 * Get the changelog information for a specific page id and revision (timestamp) 67 * 68 * Adjacent changelog lines are optimistically parsed and cached to speed up 69 * consecutive calls to getRevisionInfo. For large changelog files, only the chunk 70 * containing the requested changelog line is read. 71 * 72 * @param int $rev revision timestamp 73 * @return bool|array false or array with entries: 74 * - date: unix timestamp 75 * - ip: IPv4 address (127.0.0.1) 76 * - type: log line type 77 * - id: page id 78 * - user: user name 79 * - sum: edit summary (or action reason) 80 * - extra: extra data (varies by line type) 81 * 82 * @author Ben Coburn <btcoburn@silicodon.net> 83 * @author Kate Arzamastseva <pshns@ukr.net> 84 */ 85 public function getRevisionInfo($rev) 86 { 87 $rev = max($rev, 0); 88 89 // check if it's already in the memory cache 90 if (isset($this->cache[$this->id]) && isset($this->cache[$this->id][$rev])) { 91 return $this->cache[$this->id][$rev]; 92 } 93 94 //read lines from changelog 95 list($fp, $lines) = $this->readloglines($rev); 96 if ($fp) { 97 fclose($fp); 98 } 99 if (empty($lines)) return false; 100 101 // parse and cache changelog lines 102 foreach ($lines as $value) { 103 $tmp = parseChangelogLine($value); 104 if ($tmp !== false) { 105 $this->cache[$this->id][$tmp['date']] = $tmp; 106 } 107 } 108 if (!isset($this->cache[$this->id][$rev])) { 109 return false; 110 } 111 return $this->cache[$this->id][$rev]; 112 } 113 114 /** 115 * Return a list of page revisions numbers 116 * 117 * Does not guarantee that the revision exists in the attic, 118 * only that a line with the date exists in the changelog. 119 * By default the current revision is skipped. 120 * 121 * The current revision is automatically skipped when the page exists. 122 * See $INFO['meta']['last_change'] for the current revision. 123 * A negative $first let read the current revision too. 124 * 125 * For efficiency, the log lines are parsed and cached for later 126 * calls to getRevisionInfo. Large changelog files are read 127 * backwards in chunks until the requested number of changelog 128 * lines are recieved. 129 * 130 * @param int $first skip the first n changelog lines 131 * @param int $num number of revisions to return 132 * @return array with the revision timestamps 133 * 134 * @author Ben Coburn <btcoburn@silicodon.net> 135 * @author Kate Arzamastseva <pshns@ukr.net> 136 */ 137 public function getRevisions($first, $num) 138 { 139 $revs = array(); 140 $lines = array(); 141 $count = 0; 142 143 $num = max($num, 0); 144 if ($num == 0) { 145 return $revs; 146 } 147 148 if ($first < 0) { 149 $first = 0; 150 } else { 151 if (file_exists($this->getFilename())) { 152 // skip current revision if the page exists 153 $first = max($first + 1, 0); 154 } 155 } 156 157 $file = $this->getChangelogFilename(); 158 159 if (!file_exists($file)) { 160 return $revs; 161 } 162 if (filesize($file) < $this->chunk_size || $this->chunk_size == 0) { 163 // read whole file 164 $lines = file($file); 165 if ($lines === false) { 166 return $revs; 167 } 168 } else { 169 // read chunks backwards 170 $fp = fopen($file, 'rb'); // "file pointer" 171 if ($fp === false) { 172 return $revs; 173 } 174 fseek($fp, 0, SEEK_END); 175 $tail = ftell($fp); 176 177 // chunk backwards 178 $finger = max($tail - $this->chunk_size, 0); 179 while ($count < $num + $first) { 180 $nl = $this->getNewlinepointer($fp, $finger); 181 182 // was the chunk big enough? if not, take another bite 183 if ($nl > 0 && $tail <= $nl) { 184 $finger = max($finger - $this->chunk_size, 0); 185 continue; 186 } else { 187 $finger = $nl; 188 } 189 190 // read chunk 191 $chunk = ''; 192 $read_size = max($tail - $finger, 0); // found chunk size 193 $got = 0; 194 while ($got < $read_size && !feof($fp)) { 195 $tmp = @fread($fp, max(min($this->chunk_size, $read_size - $got), 0)); 196 if ($tmp === false) { 197 break; 198 } //error state 199 $got += strlen($tmp); 200 $chunk .= $tmp; 201 } 202 $tmp = explode("\n", $chunk); 203 array_pop($tmp); // remove trailing newline 204 205 // combine with previous chunk 206 $count += count($tmp); 207 $lines = array_merge($tmp, $lines); 208 209 // next chunk 210 if ($finger == 0) { 211 break; 212 } // already read all the lines 213 else { 214 $tail = $finger; 215 $finger = max($tail - $this->chunk_size, 0); 216 } 217 } 218 fclose($fp); 219 } 220 221 // skip parsing extra lines 222 $num = max(min(count($lines) - $first, $num), 0); 223 if ($first > 0 && $num > 0) { 224 $lines = array_slice($lines, max(count($lines) - $first - $num, 0), $num); 225 } else { 226 if ($first > 0 && $num == 0) { 227 $lines = array_slice($lines, 0, max(count($lines) - $first, 0)); 228 } elseif ($first == 0 && $num > 0) { 229 $lines = array_slice($lines, max(count($lines) - $num, 0)); 230 } 231 } 232 233 // handle lines in reverse order 234 for ($i = count($lines) - 1; $i >= 0; $i--) { 235 $tmp = parseChangelogLine($lines[$i]); 236 if ($tmp !== false) { 237 $this->cache[$this->id][$tmp['date']] = $tmp; 238 $revs[] = $tmp['date']; 239 } 240 } 241 242 return $revs; 243 } 244 245 /** 246 * Get the nth revision left or right handside for a specific page id and revision (timestamp) 247 * 248 * For large changelog files, only the chunk containing the 249 * reference revision $rev is read and sometimes a next chunck. 250 * 251 * Adjacent changelog lines are optimistically parsed and cached to speed up 252 * consecutive calls to getRevisionInfo. 253 * 254 * @param int $rev revision timestamp used as startdate (doesn't need to be revisionnumber) 255 * @param int $direction give position of returned revision with respect to $rev; positive=next, negative=prev 256 * @return bool|int 257 * timestamp of the requested revision 258 * otherwise false 259 */ 260 public function getRelativeRevision($rev, $direction) 261 { 262 $rev = max($rev, 0); 263 $direction = (int)$direction; 264 265 //no direction given or last rev, so no follow-up 266 if (!$direction || ($direction > 0 && $this->isCurrentRevision($rev))) { 267 return false; 268 } 269 270 //get lines from changelog 271 list($fp, $lines, $head, $tail, $eof) = $this->readloglines($rev); 272 if (empty($lines)) return false; 273 274 // look for revisions later/earlier then $rev, when founded count till the wanted revision is reached 275 // also parse and cache changelog lines for getRevisionInfo(). 276 $revcounter = 0; 277 $relativerev = false; 278 $checkotherchunck = true; //always runs once 279 while (!$relativerev && $checkotherchunck) { 280 $tmp = array(); 281 //parse in normal or reverse order 282 $count = count($lines); 283 if ($direction > 0) { 284 $start = 0; 285 $step = 1; 286 } else { 287 $start = $count - 1; 288 $step = -1; 289 } 290 for ($i = $start; $i >= 0 && $i < $count; $i = $i + $step) { 291 $tmp = parseChangelogLine($lines[$i]); 292 if ($tmp !== false) { 293 $this->cache[$this->id][$tmp['date']] = $tmp; 294 //look for revs older/earlier then reference $rev and select $direction-th one 295 if (($direction > 0 && $tmp['date'] > $rev) || ($direction < 0 && $tmp['date'] < $rev)) { 296 $revcounter++; 297 if ($revcounter == abs($direction)) { 298 $relativerev = $tmp['date']; 299 } 300 } 301 } 302 } 303 304 //true when $rev is found, but not the wanted follow-up. 305 $checkotherchunck = $fp 306 && ($tmp['date'] == $rev || ($revcounter > 0 && !$relativerev)) 307 && !(($tail == $eof && $direction > 0) || ($head == 0 && $direction < 0)); 308 309 if ($checkotherchunck) { 310 list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, $direction); 311 312 if (empty($lines)) break; 313 } 314 } 315 if ($fp) { 316 fclose($fp); 317 } 318 319 return $relativerev; 320 } 321 322 /** 323 * Returns revisions around rev1 and rev2 324 * When available it returns $max entries for each revision 325 * 326 * @param int $rev1 oldest revision timestamp 327 * @param int $rev2 newest revision timestamp (0 looks up last revision) 328 * @param int $max maximum number of revisions returned 329 * @return array with two arrays with revisions surrounding rev1 respectively rev2 330 */ 331 public function getRevisionsAround($rev1, $rev2, $max = 50) 332 { 333 $max = floor(abs($max) / 2) * 2 + 1; 334 $rev1 = max($rev1, 0); 335 $rev2 = max($rev2, 0); 336 337 if ($rev2) { 338 if ($rev2 < $rev1) { 339 $rev = $rev2; 340 $rev2 = $rev1; 341 $rev1 = $rev; 342 } 343 } else { 344 //empty right side means a removed page. Look up last revision. 345 $revs = $this->getRevisions(-1, 1); 346 $rev2 = $revs[0]; 347 } 348 //collect revisions around rev2 349 list($revs2, $allrevs, $fp, $lines, $head, $tail) = $this->retrieveRevisionsAround($rev2, $max); 350 351 if (empty($revs2)) return array(array(), array()); 352 353 //collect revisions around rev1 354 $index = array_search($rev1, $allrevs); 355 if ($index === false) { 356 //no overlapping revisions 357 list($revs1, , , , ,) = $this->retrieveRevisionsAround($rev1, $max); 358 if (empty($revs1)) $revs1 = array(); 359 } else { 360 //revisions overlaps, reuse revisions around rev2 361 $revs1 = $allrevs; 362 while ($head > 0) { 363 for ($i = count($lines) - 1; $i >= 0; $i--) { 364 $tmp = parseChangelogLine($lines[$i]); 365 if ($tmp !== false) { 366 $this->cache[$this->id][$tmp['date']] = $tmp; 367 $revs1[] = $tmp['date']; 368 $index++; 369 370 if ($index > floor($max / 2)) break 2; 371 } 372 } 373 374 list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, -1); 375 } 376 sort($revs1); 377 //return wanted selection 378 $revs1 = array_slice($revs1, max($index - floor($max / 2), 0), $max); 379 } 380 381 return array(array_reverse($revs1), array_reverse($revs2)); 382 } 383 384 385 /** 386 * Checks if the ID has old revisons 387 * @return boolean 388 */ 389 public function hasRevisions() { 390 $file = $this->getChangelogFilename(); 391 return file_exists($file); 392 } 393 394 /** 395 * Returns lines from changelog. 396 * If file larger than $chuncksize, only chunck is read that could contain $rev. 397 * 398 * @param int $rev revision timestamp 399 * @return array|false 400 * if success returns array(fp, array(changeloglines), $head, $tail, $eof) 401 * where fp only defined for chuck reading, needs closing. 402 * otherwise false 403 */ 404 protected function readloglines($rev) 405 { 406 $file = $this->getChangelogFilename(); 407 408 if (!file_exists($file)) { 409 return false; 410 } 411 412 $fp = null; 413 $head = 0; 414 $tail = 0; 415 $eof = 0; 416 417 if (filesize($file) < $this->chunk_size || $this->chunk_size == 0) { 418 // read whole file 419 $lines = file($file); 420 if ($lines === false) { 421 return false; 422 } 423 } else { 424 // read by chunk 425 $fp = fopen($file, 'rb'); // "file pointer" 426 if ($fp === false) { 427 return false; 428 } 429 $head = 0; 430 fseek($fp, 0, SEEK_END); 431 $eof = ftell($fp); 432 $tail = $eof; 433 434 // find chunk 435 while ($tail - $head > $this->chunk_size) { 436 $finger = $head + floor(($tail - $head) / 2.0); 437 $finger = $this->getNewlinepointer($fp, $finger); 438 $tmp = fgets($fp); 439 if ($finger == $head || $finger == $tail) { 440 break; 441 } 442 $tmp = parseChangelogLine($tmp); 443 $finger_rev = $tmp['date']; 444 445 if ($finger_rev > $rev) { 446 $tail = $finger; 447 } else { 448 $head = $finger; 449 } 450 } 451 452 if ($tail - $head < 1) { 453 // cound not find chunk, assume requested rev is missing 454 fclose($fp); 455 return false; 456 } 457 458 $lines = $this->readChunk($fp, $head, $tail); 459 } 460 return array( 461 $fp, 462 $lines, 463 $head, 464 $tail, 465 $eof, 466 ); 467 } 468 469 /** 470 * Read chunk and return array with lines of given chunck. 471 * Has no check if $head and $tail are really at a new line 472 * 473 * @param resource $fp resource filepointer 474 * @param int $head start point chunck 475 * @param int $tail end point chunck 476 * @return array lines read from chunck 477 */ 478 protected function readChunk($fp, $head, $tail) 479 { 480 $chunk = ''; 481 $chunk_size = max($tail - $head, 0); // found chunk size 482 $got = 0; 483 fseek($fp, $head); 484 while ($got < $chunk_size && !feof($fp)) { 485 $tmp = @fread($fp, max(min($this->chunk_size, $chunk_size - $got), 0)); 486 if ($tmp === false) { //error state 487 break; 488 } 489 $got += strlen($tmp); 490 $chunk .= $tmp; 491 } 492 $lines = explode("\n", $chunk); 493 array_pop($lines); // remove trailing newline 494 return $lines; 495 } 496 497 /** 498 * Set pointer to first new line after $finger and return its position 499 * 500 * @param resource $fp filepointer 501 * @param int $finger a pointer 502 * @return int pointer 503 */ 504 protected function getNewlinepointer($fp, $finger) 505 { 506 fseek($fp, $finger); 507 $nl = $finger; 508 if ($finger > 0) { 509 fgets($fp); // slip the finger forward to a new line 510 $nl = ftell($fp); 511 } 512 return $nl; 513 } 514 515 /** 516 * Check whether given revision is the current page 517 * 518 * @param int $rev timestamp of current page 519 * @return bool true if $rev is current revision, otherwise false 520 */ 521 public function isCurrentRevision($rev) 522 { 523 return $rev == @filemtime($this->getFilename()); 524 } 525 526 /** 527 * Return an existing revision for a specific date which is 528 * the current one or younger or equal then the date 529 * 530 * @param number $date_at timestamp 531 * @return string revision ('' for current) 532 */ 533 public function getLastRevisionAt($date_at) 534 { 535 //requested date_at(timestamp) younger or equal then modified_time($this->id) => load current 536 if (file_exists($this->getFilename()) && $date_at >= @filemtime($this->getFilename())) { 537 return ''; 538 } else { 539 if ($rev = $this->getRelativeRevision($date_at + 1, -1)) { //+1 to get also the requested date revision 540 return $rev; 541 } else { 542 return false; 543 } 544 } 545 } 546 547 /** 548 * Returns the next lines of the changelog of the chunck before head or after tail 549 * 550 * @param resource $fp filepointer 551 * @param int $head position head of last chunk 552 * @param int $tail position tail of last chunk 553 * @param int $direction positive forward, negative backward 554 * @return array with entries: 555 * - $lines: changelog lines of readed chunk 556 * - $head: head of chunk 557 * - $tail: tail of chunk 558 */ 559 protected function readAdjacentChunk($fp, $head, $tail, $direction) 560 { 561 if (!$fp) return array(array(), $head, $tail); 562 563 if ($direction > 0) { 564 //read forward 565 $head = $tail; 566 $tail = $head + floor($this->chunk_size * (2 / 3)); 567 $tail = $this->getNewlinepointer($fp, $tail); 568 } else { 569 //read backward 570 $tail = $head; 571 $head = max($tail - $this->chunk_size, 0); 572 while (true) { 573 $nl = $this->getNewlinepointer($fp, $head); 574 // was the chunk big enough? if not, take another bite 575 if ($nl > 0 && $tail <= $nl) { 576 $head = max($head - $this->chunk_size, 0); 577 } else { 578 $head = $nl; 579 break; 580 } 581 } 582 } 583 584 //load next chunck 585 $lines = $this->readChunk($fp, $head, $tail); 586 return array($lines, $head, $tail); 587 } 588 589 /** 590 * Collect the $max revisions near to the timestamp $rev 591 * 592 * @param int $rev revision timestamp 593 * @param int $max maximum number of revisions to be returned 594 * @return bool|array 595 * return array with entries: 596 * - $requestedrevs: array of with $max revision timestamps 597 * - $revs: all parsed revision timestamps 598 * - $fp: filepointer only defined for chuck reading, needs closing. 599 * - $lines: non-parsed changelog lines before the parsed revisions 600 * - $head: position of first readed changelogline 601 * - $lasttail: position of end of last readed changelogline 602 * otherwise false 603 */ 604 protected function retrieveRevisionsAround($rev, $max) 605 { 606 //get lines from changelog 607 list($fp, $lines, $starthead, $starttail, /* $eof */) = $this->readloglines($rev); 608 if (empty($lines)) return false; 609 610 //parse chunk containing $rev, and read forward more chunks until $max/2 is reached 611 $head = $starthead; 612 $tail = $starttail; 613 $revs = array(); 614 $aftercount = $beforecount = 0; 615 while (count($lines) > 0) { 616 foreach ($lines as $line) { 617 $tmp = parseChangelogLine($line); 618 if ($tmp !== false) { 619 $this->cache[$this->id][$tmp['date']] = $tmp; 620 $revs[] = $tmp['date']; 621 if ($tmp['date'] >= $rev) { 622 //count revs after reference $rev 623 $aftercount++; 624 if ($aftercount == 1) $beforecount = count($revs); 625 } 626 //enough revs after reference $rev? 627 if ($aftercount > floor($max / 2)) break 2; 628 } 629 } 630 //retrieve next chunk 631 list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, 1); 632 } 633 if ($aftercount == 0) return false; 634 635 $lasttail = $tail; 636 637 //read additional chuncks backward until $max/2 is reached and total number of revs is equal to $max 638 $lines = array(); 639 $i = 0; 640 if ($aftercount > 0) { 641 $head = $starthead; 642 $tail = $starttail; 643 while ($head > 0) { 644 list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, -1); 645 646 for ($i = count($lines) - 1; $i >= 0; $i--) { 647 $tmp = parseChangelogLine($lines[$i]); 648 if ($tmp !== false) { 649 $this->cache[$this->id][$tmp['date']] = $tmp; 650 $revs[] = $tmp['date']; 651 $beforecount++; 652 //enough revs before reference $rev? 653 if ($beforecount > max(floor($max / 2), $max - $aftercount)) break 2; 654 } 655 } 656 } 657 } 658 sort($revs); 659 660 //keep only non-parsed lines 661 $lines = array_slice($lines, 0, $i); 662 //trunk desired selection 663 $requestedrevs = array_slice($revs, -$max, $max); 664 665 return array($requestedrevs, $revs, $fp, $lines, $head, $lasttail); 666 } 667} 668