1 <?php
2 
3 namespace dokuwiki\ChangeLog;
4 
5 use dokuwiki\Logger;
6 
7 /**
8  * ChangeLog Prototype; methods for handling changelog
9  */
10 abstract class ChangeLog
11 {
12     use ChangeLogTrait;
13 
14     /** @var string */
15     protected $id;
16     /** @var false|int */
17     protected $currentRevision;
18     /** @var array */
19     protected $cache = [];
20 
21     /**
22      * Constructor
23      *
24      * @param string $id page id
25      * @param int $chunk_size maximum block size read from file
26      */
27     public function __construct($id, $chunk_size = 8192)
28     {
29         global $cache_revinfo;
30 
31         $this->cache =& $cache_revinfo;
32         if (!isset($this->cache[$id])) {
33             $this->cache[$id] = [];
34         }
35 
36         $this->id = $id;
37         $this->setChunkSize($chunk_size);
38     }
39 
40     /**
41      * Returns path to current page/media
42      *
43      * @param string|int $rev empty string or revision timestamp
44      * @return string path to file
45      */
46     abstract protected function getFilename($rev = '');
47 
48     /**
49      * Returns mode
50      *
51      * @return string RevisionInfo::MODE_MEDIA or RevisionInfo::MODE_PAGE
52      */
53     abstract protected function getMode();
54 
55     /**
56      * Check whether given revision is the current page
57      *
58      * @param int $rev timestamp of current page
59      * @return bool true if $rev is current revision, otherwise false
60      */
61     public function isCurrentRevision($rev)
62     {
63         return $rev == $this->currentRevision();
64     }
65 
66     /**
67      * Checks if the revision is last revision
68      *
69      * @param int $rev revision timestamp
70      * @return bool true if $rev is last revision, otherwise false
71      */
72     public function isLastRevision($rev = null)
73     {
74         return $rev === $this->lastRevision();
75     }
76 
77     /**
78      * Return the current revision identifier
79      *
80      * The "current" revision means current version of the page or media file. It is either
81      * identical with or newer than the "last" revision, that depends on whether the file
82      * has modified, created or deleted outside of DokuWiki.
83      * The value of identifier can be determined by timestamp as far as the file exists,
84      * otherwise it must be assigned larger than any other revisions to keep them sortable.
85      *
86      * @return int|false revision timestamp
87      */
88     public function currentRevision()
89     {
90         if (!isset($this->currentRevision)) {
91             // set ChangeLog::currentRevision property
92             $this->getCurrentRevisionInfo();
93         }
94         return $this->currentRevision;
95     }
96 
97     /**
98      * Return the last revision identifier, date value of the last entry of the changelog
99      *
100      * @return int|false revision timestamp
101      */
102     public function lastRevision()
103     {
104         $revs = $this->getRevisions(-1, 1);
105         return empty($revs) ? false : $revs[0];
106     }
107 
108     /**
109      * Parses a changelog line into its components and save revision info to the cache pool
110      *
111      * @param string $value changelog line
112      * @return array|bool parsed line or false
113      */
114     protected function parseAndCacheLogLine($value)
115     {
116         $info = static::parseLogLine($value);
117         if (is_array($info)) {
118             $info['mode'] = $this->getMode();
119             $this->cache[$this->id][$info['date']] ??= $info;
120             return $info;
121         }
122         return false;
123     }
124 
125     /**
126      * Get the changelog information for a specific revision (timestamp)
127      *
128      * Adjacent changelog lines are optimistically parsed and cached to speed up
129      * consecutive calls to getRevisionInfo. For large changelog files, only the chunk
130      * containing the requested changelog line is read.
131      *
132      * @param int $rev revision timestamp
133      * @param bool $retrieveCurrentRevInfo allows to skip for getting other revision info in the
134      *                                     getCurrentRevisionInfo() where $currentRevision is not yet determined
135      * @return bool|array false or array with entries:
136      *      - date:  unix timestamp
137      *      - ip:    IPv4 address (127.0.0.1)
138      *      - type:  log line type
139      *      - id:    page id
140      *      - user:  user name
141      *      - sum:   edit summary (or action reason)
142      *      - extra: extra data (varies by line type)
143      *      - sizechange: change of filesize
144      *    additional:
145      *      - mode: page or media
146      *
147      * @author Ben Coburn <btcoburn@silicodon.net>
148      * @author Kate Arzamastseva <pshns@ukr.net>
149      */
150     public function getRevisionInfo($rev, $retrieveCurrentRevInfo = true)
151     {
152         $rev = max(0, $rev);
153         if (!$rev) return false;
154 
155         //ensure the external edits are cached as well
156         if (!isset($this->currentRevision) && $retrieveCurrentRevInfo) {
157             $this->getCurrentRevisionInfo();
158         }
159 
160         // check if it's already in the memory cache
161         if (isset($this->cache[$this->id][$rev])) {
162             return $this->cache[$this->id][$rev];
163         }
164 
165         //read lines from changelog
166         [$fp, $lines] = $this->readloglines($rev);
167         if ($fp) {
168             fclose($fp);
169         }
170         if (empty($lines)) return false;
171 
172         // parse and cache changelog lines
173         foreach ($lines as $line) {
174             $this->parseAndCacheLogLine($line);
175         }
176 
177         return $this->cache[$this->id][$rev] ?? false;
178     }
179 
180     /**
181      * Return a list of page revisions numbers
182      *
183      * Does not guarantee that the revision exists in the attic,
184      * only that a line with the date exists in the changelog.
185      * By default the current revision is skipped.
186      *
187      * The current revision is automatically skipped when the page exists.
188      * See $INFO['meta']['last_change'] for the current revision.
189      * A negative $first let read the current revision too.
190      *
191      * For efficiency, the log lines are parsed and cached for later
192      * calls to getRevisionInfo. Large changelog files are read
193      * backwards in chunks until the requested number of changelog
194      * lines are received.
195      *
196      * @param int $first skip the first n changelog lines
197      * @param int $num number of revisions to return
198      * @return array with the revision timestamps
199      *
200      * @author Ben Coburn <btcoburn@silicodon.net>
201      * @author Kate Arzamastseva <pshns@ukr.net>
202      */
203     public function getRevisions($first, $num)
204     {
205         $revs = [];
206         $lines = [];
207         $count = 0;
208 
209         $logfile = $this->getChangelogFilename();
210         if (!file_exists($logfile)) return $revs;
211 
212         $num = max($num, 0);
213         if ($num == 0) {
214             return $revs;
215         }
216 
217         if ($first < 0) {
218             $first = 0;
219         } else {
220             $fileLastMod = $this->getFilename();
221             if (file_exists($fileLastMod) && $this->isLastRevision(filemtime($fileLastMod))) {
222                 // skip last revision if the page exists
223                 $first = max($first + 1, 0);
224             }
225         }
226 
227         if (filesize($logfile) < $this->chunk_size || $this->chunk_size == 0) {
228             // read whole file
229             $lines = file($logfile);
230             if ($lines === false) {
231                 return $revs;
232             }
233         } else {
234             // read chunks backwards
235             $fp = fopen($logfile, 'rb'); // "file pointer"
236             if ($fp === false) {
237                 return $revs;
238             }
239             fseek($fp, 0, SEEK_END);
240             $tail = ftell($fp);
241 
242             // chunk backwards
243             $finger = max($tail - $this->chunk_size, 0);
244             while ($count < $num + $first) {
245                 $nl = $this->getNewlinepointer($fp, $finger);
246 
247                 // was the chunk big enough? if not, take another bite
248                 if ($nl > 0 && $tail <= $nl) {
249                     $finger = max($finger - $this->chunk_size, 0);
250                     continue;
251                 } else {
252                     $finger = $nl;
253                 }
254 
255                 // read chunk
256                 $chunk = '';
257                 $read_size = max($tail - $finger, 0); // found chunk size
258                 $got = 0;
259                 while ($got < $read_size && !feof($fp)) {
260                     $tmp = @fread($fp, max(min($this->chunk_size, $read_size - $got), 0));
261                     if ($tmp === false) {
262                         break;
263                     } //error state
264                     $got += strlen($tmp);
265                     $chunk .= $tmp;
266                 }
267                 $tmp = explode("\n", $chunk);
268                 array_pop($tmp); // remove trailing newline
269 
270                 // combine with previous chunk
271                 $count += count($tmp);
272                 $lines = [...$tmp, ...$lines];
273 
274                 // next chunk
275                 if ($finger == 0) {
276                     break;
277                 } else { // already read all the lines
278                     $tail = $finger;
279                     $finger = max($tail - $this->chunk_size, 0);
280                 }
281             }
282             fclose($fp);
283         }
284 
285         // skip parsing extra lines
286         $num = max(min(count($lines) - $first, $num), 0);
287         if ($first > 0 && $num > 0) {
288             $lines = array_slice($lines, max(count($lines) - $first - $num, 0), $num);
289         } elseif ($first > 0 && $num == 0) {
290             $lines = array_slice($lines, 0, max(count($lines) - $first, 0));
291         } elseif ($first == 0 && $num > 0) {
292             $lines = array_slice($lines, max(count($lines) - $num, 0));
293         }
294 
295         // handle lines in reverse order
296         for ($i = count($lines) - 1; $i >= 0; $i--) {
297             $info = $this->parseAndCacheLogLine($lines[$i]);
298             if (is_array($info)) {
299                 $revs[] = $info['date'];
300             }
301         }
302 
303         return $revs;
304     }
305 
306     /**
307      * Get the nth revision left or right-hand side  for a specific page id and revision (timestamp)
308      *
309      * For large changelog files, only the chunk containing the
310      * reference revision $rev is read and sometimes a next chunk.
311      *
312      * Adjacent changelog lines are optimistically parsed and cached to speed up
313      * consecutive calls to getRevisionInfo.
314      *
315      * @param int $rev revision timestamp used as start date
316      *    (doesn't need to be exact revision number)
317      * @param int $direction give position of returned revision with respect to $rev;
318           positive=next, negative=prev
319      * @return bool|int
320      *      timestamp of the requested revision
321      *      otherwise false
322      */
323     public function getRelativeRevision($rev, $direction)
324     {
325         $rev = max($rev, 0);
326         $direction = (int)$direction;
327 
328         //no direction given or last rev, so no follow-up
329         if (!$direction || ($direction > 0 && $this->isCurrentRevision($rev))) {
330             return false;
331         }
332 
333         //get lines from changelog
334         [$fp, $lines, $head, $tail, $eof] = $this->readloglines($rev);
335         if (empty($lines)) return false;
336 
337         // look for revisions later/earlier than $rev, when founded count till the wanted revision is reached
338         // also parse and cache changelog lines for getRevisionInfo().
339         $revCounter = 0;
340         $relativeRev = false;
341         $checkOtherChunk = true; //always runs once
342         while (!$relativeRev && $checkOtherChunk) {
343             $info = [];
344             //parse in normal or reverse order
345             $count = count($lines);
346             if ($direction > 0) {
347                 $start = 0;
348                 $step = 1;
349             } else {
350                 $start = $count - 1;
351                 $step = -1;
352             }
353             for ($i = $start; $i >= 0 && $i < $count; $i += $step) {
354                 $info = $this->parseAndCacheLogLine($lines[$i]);
355                 if (is_array($info)) {
356                     //look for revs older/earlier then reference $rev and select $direction-th one
357                     if (($direction > 0 && $info['date'] > $rev) || ($direction < 0 && $info['date'] < $rev)) {
358                         $revCounter++;
359                         if ($revCounter == abs($direction)) {
360                             $relativeRev = $info['date'];
361                         }
362                     }
363                 }
364             }
365 
366             //true when $rev is found, but not the wanted follow-up.
367             $checkOtherChunk = $fp
368                 && ($info['date'] == $rev || ($revCounter > 0 && !$relativeRev))
369                 && (!($tail == $eof && $direction > 0) && !($head == 0 && $direction < 0));
370 
371             if ($checkOtherChunk) {
372                 [$lines, $head, $tail] = $this->readAdjacentChunk($fp, $head, $tail, $direction);
373 
374                 if (empty($lines)) break;
375             }
376         }
377         if ($fp) {
378             fclose($fp);
379         }
380 
381         return $relativeRev;
382     }
383 
384     /**
385      * Returns revisions around rev1 and rev2
386      * When available it returns $max entries for each revision
387      *
388      * @param int $rev1 oldest revision timestamp
389      * @param int $rev2 newest revision timestamp (0 looks up last revision)
390      * @param int $max maximum number of revisions returned
391      * @return array with two arrays with revisions surrounding rev1 respectively rev2
392      */
393     public function getRevisionsAround($rev1, $rev2, $max = 50)
394     {
395         $max = (int) (abs($max) / 2) * 2 + 1;
396         $rev1 = max($rev1, 0);
397         $rev2 = max($rev2, 0);
398 
399         if ($rev2) {
400             if ($rev2 < $rev1) {
401                 $rev = $rev2;
402                 $rev2 = $rev1;
403                 $rev1 = $rev;
404             }
405         } else {
406             //empty right side means a removed page. Look up last revision.
407             $rev2 = $this->currentRevision();
408         }
409         //collect revisions around rev2
410         [$revs2, $allRevs, $fp, $lines, $head, $tail] = $this->retrieveRevisionsAround($rev2, $max);
411 
412         if (empty($revs2)) return [[], []];
413 
414         //collect revisions around rev1
415         $index = array_search($rev1, $allRevs);
416         if ($index === false) {
417             //no overlapping revisions
418             [$revs1, , , , , ] = $this->retrieveRevisionsAround($rev1, $max);
419             if (empty($revs1)) $revs1 = [];
420         } else {
421             //revisions overlaps, reuse revisions around rev2
422             $lastRev = array_pop($allRevs); //keep last entry that could be external edit
423             $revs1 = $allRevs;
424             while ($head > 0) {
425                 for ($i = count($lines) - 1; $i >= 0; $i--) {
426                     $info = $this->parseAndCacheLogLine($lines[$i]);
427                     if (is_array($info)) {
428                         $revs1[] = $info['date'];
429                         $index++;
430 
431                         if ($index > (int) ($max / 2)) {
432                             break 2;
433                         }
434                     }
435                 }
436 
437                 [$lines, $head, $tail] = $this->readAdjacentChunk($fp, $head, $tail, -1);
438             }
439             sort($revs1);
440             $revs1[] = $lastRev; //push back last entry
441 
442             //return wanted selection
443             $revs1 = array_slice($revs1, max($index - (int) ($max / 2), 0), $max);
444         }
445 
446         return [array_reverse($revs1), array_reverse($revs2)];
447     }
448 
449     /**
450      * Return an existing revision for a specific date which is
451      * the current one or younger or equal then the date
452      *
453      * @param number $date_at timestamp
454      * @return string revision ('' for current)
455      */
456     public function getLastRevisionAt($date_at)
457     {
458         $fileLastMod = $this->getFilename();
459         //requested date_at(timestamp) younger or equal then modified_time($this->id) => load current
460         if (file_exists($fileLastMod) && $date_at >= @filemtime($fileLastMod)) {
461             return '';
462         } elseif ($rev = $this->getRelativeRevision($date_at + 1, -1)) {
463             //+1 to get also the requested date revision
464             return $rev;
465         } else {
466             return false;
467         }
468     }
469 
470     /**
471      * Collect the $max revisions near to the timestamp $rev
472      *
473      * Ideally, half of retrieved timestamps are older than $rev, another half are newer.
474      * The returned array $requestedRevs may not contain the reference timestamp $rev
475      * when it does not match any revision value recorded in changelog.
476      *
477      * @param int $rev revision timestamp
478      * @param int $max maximum number of revisions to be returned
479      * @return bool|array
480      *     return array with entries:
481      *       - $requestedRevs: array of with $max revision timestamps
482      *       - $revs: all parsed revision timestamps
483      *       - $fp: file pointer only defined for chuck reading, needs closing.
484      *       - $lines: non-parsed changelog lines before the parsed revisions
485      *       - $head: position of first read changelog line
486      *       - $lastTail: position of end of last read changelog line
487      *     otherwise false
488      */
489     protected function retrieveRevisionsAround($rev, $max)
490     {
491         $revs = [];
492         $afterCount = 0;
493         $beforeCount = 0;
494 
495         //get lines from changelog
496         [$fp, $lines, $startHead, $startTail, $eof] = $this->readloglines($rev);
497         if (empty($lines)) return false;
498 
499         //parse changelog lines in chunk, and read forward more chunks until $max/2 is reached
500         $head = $startHead;
501         $tail = $startTail;
502         while (count($lines) > 0) {
503             foreach ($lines as $line) {
504                 $info = $this->parseAndCacheLogLine($line);
505                 if (is_array($info)) {
506                     $revs[] = $info['date'];
507                     if ($info['date'] >= $rev) {
508                         //count revs after reference $rev
509                         $afterCount++;
510                         if ($afterCount == 1) {
511                             $beforeCount = count($revs);
512                         }
513                     }
514                     //enough revs after reference $rev?
515                     if ($afterCount > (int) ($max / 2)) {
516                         break 2;
517                     }
518                 }
519             }
520             //retrieve next chunk
521             [$lines, $head, $tail] = $this->readAdjacentChunk($fp, $head, $tail, 1);
522         }
523         $lastTail = $tail;
524 
525         // add a possible revision of external edit, create or deletion
526         if (
527             $lastTail == $eof && $afterCount <= (int) ($max / 2) &&
528             count($revs) && !$this->isCurrentRevision($revs[count($revs) - 1])
529         ) {
530             $revs[] = $this->currentRevision;
531             $afterCount++;
532         }
533 
534         if ($afterCount == 0) {
535             //given timestamp $rev is newer than the most recent line in chunk
536             return false; //FIXME: or proceed to collect older revisions?
537         }
538 
539         //read more chunks backward until $max/2 is reached and total number of revs is equal to $max
540         $lines = [];
541         $i = 0;
542         $head = $startHead;
543         $tail = $startTail;
544         while ($head > 0) {
545             [$lines, $head, $tail] = $this->readAdjacentChunk($fp, $head, $tail, -1);
546 
547             for ($i = count($lines) - 1; $i >= 0; $i--) {
548                 $info = $this->parseAndCacheLogLine($lines[$i]);
549                 if (is_array($info)) {
550                     $revs[] = $info['date'];
551                     $beforeCount++;
552                     //enough revs before reference $rev?
553                     if ($beforeCount > max((int) ($max / 2), $max - $afterCount)) {
554                         break 2;
555                     }
556                 }
557             }
558         }
559         //keep only non-parsed lines
560         $lines = array_slice($lines, 0, $i);
561 
562         sort($revs);
563 
564         //trunk desired selection
565         $requestedRevs = array_slice($revs, -$max, $max);
566 
567         return [$requestedRevs, $revs, $fp, $lines, $head, $lastTail];
568     }
569 
570     /**
571      * Get the current revision information, considering external edit, create or deletion
572      *
573      * When the file has not modified since its last revision, the information of the last
574      * change that had already recorded in the changelog is returned as current change info.
575      * Otherwise, the change information since the last revision caused outside DokuWiki
576      * should be returned, which is referred as "external revision".
577      *
578      * The change date of the file can be determined by timestamp as far as the file exists,
579      * however this is not possible when the file has already deleted outside of DokuWiki.
580      * In such case we assign 1 sec before current time() for the external deletion.
581      * As a result, the value of current revision identifier may change each time because:
582      *   1) the file has again modified outside of DokuWiki, or
583      *   2) the value is essentially volatile for deleted but once existed files.
584      *
585      * @return bool|array false when page had never existed or array with entries:
586      *      - date:  revision identifier (timestamp or last revision +1)
587      *      - ip:    IPv4 address (127.0.0.1)
588      *      - type:  log line type
589      *      - id:    id of page or media
590      *      - user:  user name
591      *      - sum:   edit summary (or action reason)
592      *      - extra: extra data (varies by line type)
593      *      - sizechange: change of filesize
594      *      - timestamp: unix timestamp or false (key set only for external edit occurred)
595      *   additional:
596      *      - mode:  page or media
597      *
598      * @author  Satoshi Sahara <sahara.satoshi@gmail.com>
599      */
600     public function getCurrentRevisionInfo()
601     {
602         global $lang;
603 
604         if (isset($this->currentRevision)) {
605             return $this->getRevisionInfo($this->currentRevision);
606         }
607 
608         // get revision id from the item file timestamp and changelog
609         $fileLastMod = $this->getFilename();
610         $fileRev = @filemtime($fileLastMod); // false when the file not exist
611         $lastRev = $this->lastRevision();    // false when no changelog
612 
613         if (!$fileRev && !$lastRev) {                // has never existed
614             $this->currentRevision = false;
615             return false;
616         } elseif ($fileRev === $lastRev) {           // not external edit
617             $this->currentRevision = $lastRev;
618             return $this->getRevisionInfo($lastRev);
619         }
620 
621         if (!$fileRev && $lastRev) {                 // item file does not exist
622             // check consistency against changelog
623             $revInfo = $this->getRevisionInfo($lastRev, false);
624             if ($revInfo['type'] == DOKU_CHANGE_TYPE_DELETE) {
625                 $this->currentRevision = $lastRev;
626                 return $revInfo;
627             }
628 
629             // externally deleted, set revision date as late as possible
630             $revInfo = [
631                 'date' => max($lastRev + 1, time() - 1), // 1 sec before now or new page save
632                 'ip'   => '127.0.0.1',
633                 'type' => DOKU_CHANGE_TYPE_DELETE,
634                 'id'   => $this->id,
635                 'user' => '',
636                 'sum'  => $lang['deleted'] . ' - ' . $lang['external_edit'] . ' (' . $lang['unknowndate'] . ')',
637                 'extra' => '',
638                 'sizechange' => -io_getSizeFile($this->getFilename($lastRev)),
639                 'timestamp' => false,
640                 'mode' => $this->getMode()
641             ];
642         } else {                                     // item file exists, with timestamp $fileRev
643             // here, file timestamp $fileRev is different with last revision timestamp $lastRev in changelog
644             $isJustCreated = $lastRev === false || (
645                     $fileRev > $lastRev &&
646                     $this->getRevisionInfo($lastRev, false)['type'] == DOKU_CHANGE_TYPE_DELETE
647             );
648             $filesize_new = filesize($this->getFilename());
649             $filesize_old = $isJustCreated ? 0 : io_getSizeFile($this->getFilename($lastRev));
650             $sizechange = $filesize_new - $filesize_old;
651 
652             if ($isJustCreated) {
653                 $timestamp = $fileRev;
654                 $sum = $lang['created'] . ' - ' . $lang['external_edit'];
655             } elseif ($fileRev > $lastRev) {
656                 $timestamp = $fileRev;
657                 $sum = $lang['external_edit'];
658             } else {
659                 // $fileRev is older than $lastRev, that is erroneous/incorrect occurrence.
660                 $msg = "Warning: current file modification time is older than last revision date";
661                 $details = 'File revision: ' . $fileRev . ' ' . dformat($fileRev, "%Y-%m-%d %H:%M:%S") . "\n"
662                           . 'Last revision: ' . $lastRev . ' ' . dformat($lastRev, "%Y-%m-%d %H:%M:%S");
663                 Logger::error($msg, $details, $this->getFilename());
664                 $timestamp = false;
665                 $sum = $lang['external_edit'] . ' (' . $lang['unknowndate'] . ')';
666             }
667 
668             // externally created or edited
669             $revInfo = [
670                 'date' => $timestamp ?: $lastRev + 1,
671                 'ip'   => '127.0.0.1',
672                 'type' => $isJustCreated ? DOKU_CHANGE_TYPE_CREATE : DOKU_CHANGE_TYPE_EDIT,
673                 'id'   => $this->id,
674                 'user' => '',
675                 'sum'  => $sum,
676                 'extra' => '',
677                 'sizechange' => $sizechange,
678                 'timestamp' => $timestamp,
679                 'mode' => $this->getMode()
680             ];
681         }
682 
683         // cache current revision information of external edition
684         $this->currentRevision = $revInfo['date'];
685         $this->cache[$this->id][$this->currentRevision] = $revInfo;
686         return $this->getRevisionInfo($this->currentRevision);
687     }
688 
689     /**
690      * Mechanism to trace no-actual external current revision
691      * @param int $rev
692      */
693     public function traceCurrentRevision($rev)
694     {
695         if ($rev > $this->lastRevision()) {
696             $rev = $this->currentRevision();
697         }
698         return $rev;
699     }
700 }
701