xref: /dokuwiki/inc/ChangeLog/ChangeLog.php (revision d41f5a8f13cbe65aecfb29abc34975970b616198)
1<?php
2
3namespace dokuwiki\ChangeLog;
4
5use dokuwiki\Logger;
6
7/**
8 * ChangeLog Prototype; methods for handling changelog
9 */
10abstract class ChangeLog
11{
12    use ChangeLogTrait;
13
14    /** @var string */
15    protected $id;
16    /** @var false|int */
17    protected $currentRevision;
18    /** @var array */
19    protected $cache = [];
20
21    /**
22     * Constructor
23     *
24     * @param string $id page id
25     * @param int $chunk_size maximum block size read from file
26     */
27    public function __construct($id, $chunk_size = 8192)
28    {
29        global $cache_revinfo;
30
31        $this->cache =& $cache_revinfo;
32        if (!isset($this->cache[$id])) {
33            $this->cache[$id] = [];
34        }
35
36        $this->id = $id;
37        $this->setChunkSize($chunk_size);
38    }
39
40    /**
41     * Returns path to current page/media
42     *
43     * @param string|int $rev empty string or revision timestamp
44     * @return string path to file
45     */
46    abstract protected function getFilename($rev = '');
47
48    /**
49     * Returns mode
50     *
51     * @return string RevisionInfo::MODE_MEDIA or RevisionInfo::MODE_PAGE
52     */
53    abstract protected function getMode();
54
55    /**
56     * Check whether given revision is the current page
57     *
58     * @param int $rev timestamp of current page
59     * @return bool true if $rev is current revision, otherwise false
60     */
61    public function isCurrentRevision($rev)
62    {
63        return $rev == $this->currentRevision();
64    }
65
66    /**
67     * Checks if the revision is last revision
68     *
69     * @param int $rev revision timestamp
70     * @return bool true if $rev is last revision, otherwise false
71     */
72    public function isLastRevision($rev = null)
73    {
74        return $rev === $this->lastRevision();
75    }
76
77    /**
78     * Return the current revision identifier
79     *
80     * The "current" revision means current version of the page or media file. It is either
81     * identical with or newer than the "last" revision, that depends on whether the file
82     * has modified, created or deleted outside of DokuWiki.
83     * The value of identifier can be determined by timestamp as far as the file exists,
84     * otherwise it must be assigned larger than any other revisions to keep them sortable.
85     *
86     * @return int|false revision timestamp
87     */
88    public function currentRevision()
89    {
90        if (!isset($this->currentRevision)) {
91            // set ChangeLog::currentRevision property
92            $this->getCurrentRevisionInfo();
93        }
94        return $this->currentRevision;
95    }
96
97    /**
98     * Return the last revision identifier, date value of the last entry of the changelog
99     *
100     * @return int|false revision timestamp
101     */
102    public function lastRevision()
103    {
104        $revs = $this->getRevisions(-1, 1);
105        return empty($revs) ? false : $revs[0];
106    }
107
108    /**
109     * Parses a changelog line into its components and save revision info to the cache pool
110     *
111     * @param string $value changelog line
112     * @return array|bool parsed line or false
113     */
114    protected function parseAndCacheLogLine($value)
115    {
116        $info = static::parseLogLine($value);
117        if (is_array($info)) {
118            $info['mode'] = $this->getMode();
119            $this->cache[$this->id][$info['date']] ??= $info;
120            return $info;
121        }
122        return false;
123    }
124
125    /**
126     * Get the changelog information for a specific revision (timestamp)
127     *
128     * Adjacent changelog lines are optimistically parsed and cached to speed up
129     * consecutive calls to getRevisionInfo. For large changelog files, only the chunk
130     * containing the requested changelog line is read.
131     *
132     * @param int $rev revision timestamp
133     * @param bool $retrieveCurrentRevInfo allows to skip for getting other revision info in the
134     *                                     getCurrentRevisionInfo() where $currentRevision is not yet determined
135     * @return bool|array false or array with entries:
136     *      - date:  unix timestamp
137     *      - ip:    IPv4 address (127.0.0.1)
138     *      - type:  log line type
139     *      - id:    page id
140     *      - user:  user name
141     *      - sum:   edit summary (or action reason)
142     *      - extra: extra data (varies by line type)
143     *      - sizechange: change of filesize
144     *    additional:
145     *      - mode: page or media
146     *
147     * @author Ben Coburn <btcoburn@silicodon.net>
148     * @author Kate Arzamastseva <pshns@ukr.net>
149     */
150    public function getRevisionInfo($rev, $retrieveCurrentRevInfo = true)
151    {
152        $rev = max(0, $rev);
153        if (!$rev) return false;
154
155        //ensure the external edits are cached as well
156        if (!isset($this->currentRevision) && $retrieveCurrentRevInfo) {
157            $this->getCurrentRevisionInfo();
158        }
159
160        // check if it's already in the memory cache
161        if (isset($this->cache[$this->id][$rev])) {
162            return $this->cache[$this->id][$rev];
163        }
164
165        //read lines from changelog
166        $result = $this->readloglines($rev);
167        if ($result === false) return false;
168        [$fp, $lines] = $result;
169        if ($fp) {
170            fclose($fp);
171        }
172        if (empty($lines)) return false;
173
174        // parse and cache changelog lines
175        foreach ($lines as $line) {
176            $this->parseAndCacheLogLine($line);
177        }
178
179        return $this->cache[$this->id][$rev] ?? false;
180    }
181
182    /**
183     * Return a list of page revisions numbers
184     *
185     * Does not guarantee that the revision exists in the attic,
186     * only that a line with the date exists in the changelog.
187     * By default the current revision is skipped.
188     *
189     * The current revision is automatically skipped when the page exists.
190     * See $INFO['meta']['last_change'] for the current revision.
191     * A negative $first let read the current revision too.
192     *
193     * For efficiency, the log lines are parsed and cached for later
194     * calls to getRevisionInfo. Large changelog files are read
195     * backwards in chunks until the requested number of changelog
196     * lines are received.
197     *
198     * @param int $first skip the first n changelog lines
199     * @param int $num number of revisions to return
200     * @return array with the revision timestamps
201     *
202     * @author Ben Coburn <btcoburn@silicodon.net>
203     * @author Kate Arzamastseva <pshns@ukr.net>
204     */
205    public function getRevisions($first, $num)
206    {
207        $revs = [];
208        $lines = [];
209        $count = 0;
210
211        $logfile = $this->getChangelogFilename();
212        if (!file_exists($logfile)) return $revs;
213
214        $num = max($num, 0);
215        if ($num == 0) {
216            return $revs;
217        }
218
219        if ($first < 0) {
220            $first = 0;
221        } else {
222            $fileLastMod = $this->getFilename();
223            if (file_exists($fileLastMod) && $this->isLastRevision(filemtime($fileLastMod))) {
224                // skip last revision if the page exists
225                $first = max($first + 1, 0);
226            }
227        }
228
229        if (filesize($logfile) < $this->chunk_size || $this->chunk_size == 0) {
230            // read whole file
231            $lines = file($logfile);
232            if ($lines === false) {
233                return $revs;
234            }
235        } else {
236            // read chunks backwards
237            $fp = fopen($logfile, 'rb'); // "file pointer"
238            if ($fp === false) {
239                return $revs;
240            }
241            fseek($fp, 0, SEEK_END);
242            $tail = ftell($fp);
243
244            // chunk backwards
245            $finger = max($tail - $this->chunk_size, 0);
246            while ($count < $num + $first) {
247                $nl = $this->getNewlinepointer($fp, $finger);
248
249                // was the chunk big enough? if not, take another bite
250                if ($nl > 0 && $tail <= $nl) {
251                    $finger = max($finger - $this->chunk_size, 0);
252                    continue;
253                } else {
254                    $finger = $nl;
255                }
256
257                // read chunk
258                $chunk = '';
259                $read_size = max($tail - $finger, 0); // found chunk size
260                $got = 0;
261                while ($got < $read_size && !feof($fp)) {
262                    $tmp = @fread($fp, max(min($this->chunk_size, $read_size - $got), 0));
263                    if ($tmp === false) {
264                        break;
265                    } //error state
266                    $got += strlen($tmp);
267                    $chunk .= $tmp;
268                }
269                $tmp = explode("\n", $chunk);
270                array_pop($tmp); // remove trailing newline
271
272                // combine with previous chunk
273                $count += count($tmp);
274                $lines = [...$tmp, ...$lines];
275
276                // next chunk
277                if ($finger == 0) {
278                    break;
279                } else { // already read all the lines
280                    $tail = $finger;
281                    $finger = max($tail - $this->chunk_size, 0);
282                }
283            }
284            fclose($fp);
285        }
286
287        // skip parsing extra lines
288        $num = max(min(count($lines) - $first, $num), 0);
289        if ($first > 0 && $num > 0) {
290            $lines = array_slice($lines, max(count($lines) - $first - $num, 0), $num);
291        } elseif ($first > 0 && $num == 0) {
292            $lines = array_slice($lines, 0, max(count($lines) - $first, 0));
293        } elseif ($first == 0 && $num > 0) {
294            $lines = array_slice($lines, max(count($lines) - $num, 0));
295        }
296
297        // handle lines in reverse order
298        for ($i = count($lines) - 1; $i >= 0; $i--) {
299            $info = $this->parseAndCacheLogLine($lines[$i]);
300            if (is_array($info)) {
301                $revs[] = $info['date'];
302            }
303        }
304
305        return $revs;
306    }
307
308    /**
309     * Get the nth revision left or right-hand side  for a specific page id and revision (timestamp)
310     *
311     * For large changelog files, only the chunk containing the
312     * reference revision $rev is read and sometimes a next chunk.
313     *
314     * Adjacent changelog lines are optimistically parsed and cached to speed up
315     * consecutive calls to getRevisionInfo.
316     *
317     * @param int $rev revision timestamp used as start date
318     *    (doesn't need to be exact revision number)
319     * @param int $direction give position of returned revision with respect to $rev;
320          positive=next, negative=prev
321     * @return bool|int
322     *      timestamp of the requested revision
323     *      otherwise false
324     */
325    public function getRelativeRevision($rev, $direction)
326    {
327        $rev = max($rev, 0);
328        $direction = (int)$direction;
329
330        //no direction given or last rev, so no follow-up
331        if (!$direction || ($direction > 0 && $this->isCurrentRevision($rev))) {
332            return false;
333        }
334
335        //get lines from changelog
336        $result = $this->readloglines($rev);
337        if ($result === false) return false;
338        [$fp, $lines, $head, $tail, $eof] = $result;
339        if (empty($lines)) return false;
340
341        // look for revisions later/earlier than $rev, when founded count till the wanted revision is reached
342        // also parse and cache changelog lines for getRevisionInfo().
343        $revCounter = 0;
344        $relativeRev = false;
345        $checkOtherChunk = true; //always runs once
346        while (!$relativeRev && $checkOtherChunk) {
347            $info = [];
348            //parse in normal or reverse order
349            $count = count($lines);
350            if ($direction > 0) {
351                $start = 0;
352                $step = 1;
353            } else {
354                $start = $count - 1;
355                $step = -1;
356            }
357            for ($i = $start; $i >= 0 && $i < $count; $i += $step) {
358                $info = $this->parseAndCacheLogLine($lines[$i]);
359                if (is_array($info)) {
360                    //look for revs older/earlier then reference $rev and select $direction-th one
361                    if (($direction > 0 && $info['date'] > $rev) || ($direction < 0 && $info['date'] < $rev)) {
362                        $revCounter++;
363                        if ($revCounter == abs($direction)) {
364                            $relativeRev = $info['date'];
365                        }
366                    }
367                }
368            }
369
370            //true when $rev is found, but not the wanted follow-up.
371            $checkOtherChunk = $fp
372                && ($info['date'] == $rev || ($revCounter > 0 && !$relativeRev))
373                && (!($tail == $eof && $direction > 0) && !($head == 0 && $direction < 0));
374
375            if ($checkOtherChunk) {
376                [$lines, $head, $tail] = $this->readAdjacentChunk($fp, $head, $tail, $direction);
377
378                if (empty($lines)) break;
379            }
380        }
381        if ($fp) {
382            fclose($fp);
383        }
384
385        return $relativeRev;
386    }
387
388    /**
389     * Returns revisions around rev1 and rev2
390     * When available it returns $max entries for each revision
391     *
392     * @param int $rev1 oldest revision timestamp
393     * @param int $rev2 newest revision timestamp (0 looks up last revision)
394     * @param int $max maximum number of revisions returned
395     * @return array with two arrays with revisions surrounding rev1 respectively rev2
396     */
397    public function getRevisionsAround($rev1, $rev2, $max = 50)
398    {
399        $max = (int) (abs($max) / 2) * 2 + 1;
400        $rev1 = max($rev1, 0);
401        $rev2 = max($rev2, 0);
402
403        if ($rev2) {
404            if ($rev2 < $rev1) {
405                $rev = $rev2;
406                $rev2 = $rev1;
407                $rev1 = $rev;
408            }
409        } else {
410            //empty right side means a removed page. Look up last revision.
411            $rev2 = $this->currentRevision();
412        }
413        //collect revisions around rev2
414        $result2 = $this->retrieveRevisionsAround($rev2, $max);
415        if ($result2 === false) return [[], []];
416        [$revs2, $allRevs, $fp, $lines, $head, $tail] = $result2;
417
418        if (empty($revs2)) return [[], []];
419
420        //collect revisions around rev1
421        $index = array_search($rev1, $allRevs);
422        if ($index === false) {
423            //no overlapping revisions
424            $result1 = $this->retrieveRevisionsAround($rev1, $max);
425            if ($result1 === false) {
426                $revs1 = [];
427            } else {
428                [$revs1, , , , , ] = $result1;
429                if (empty($revs1)) $revs1 = [];
430            }
431        } else {
432            //revisions overlaps, reuse revisions around rev2
433            $lastRev = array_pop($allRevs); //keep last entry that could be external edit
434            $revs1 = $allRevs;
435            while ($head > 0) {
436                for ($i = count($lines) - 1; $i >= 0; $i--) {
437                    $info = $this->parseAndCacheLogLine($lines[$i]);
438                    if (is_array($info)) {
439                        $revs1[] = $info['date'];
440                        $index++;
441
442                        if ($index > (int) ($max / 2)) {
443                            break 2;
444                        }
445                    }
446                }
447
448                [$lines, $head, $tail] = $this->readAdjacentChunk($fp, $head, $tail, -1);
449            }
450            sort($revs1);
451            $revs1[] = $lastRev; //push back last entry
452
453            //return wanted selection
454            $revs1 = array_slice($revs1, max($index - (int) ($max / 2), 0), $max);
455        }
456
457        return [array_reverse($revs1), array_reverse($revs2)];
458    }
459
460    /**
461     * Return an existing revision for a specific date which is
462     * the current one or younger or equal then the date
463     *
464     * @param number $date_at timestamp
465     * @return string revision ('' for current)
466     */
467    public function getLastRevisionAt($date_at)
468    {
469        $fileLastMod = $this->getFilename();
470        //requested date_at(timestamp) younger or equal then modified_time($this->id) => load current
471        if (file_exists($fileLastMod) && $date_at >= @filemtime($fileLastMod)) {
472            return '';
473        } elseif ($rev = $this->getRelativeRevision($date_at + 1, -1)) {
474            //+1 to get also the requested date revision
475            return $rev;
476        } else {
477            return false;
478        }
479    }
480
481    /**
482     * Collect the $max revisions near to the timestamp $rev
483     *
484     * Ideally, half of retrieved timestamps are older than $rev, another half are newer.
485     * The returned array $requestedRevs may not contain the reference timestamp $rev
486     * when it does not match any revision value recorded in changelog.
487     *
488     * @param int $rev revision timestamp
489     * @param int $max maximum number of revisions to be returned
490     * @return bool|array
491     *     return array with entries:
492     *       - $requestedRevs: array of with $max revision timestamps
493     *       - $revs: all parsed revision timestamps
494     *       - $fp: file pointer only defined for chuck reading, needs closing.
495     *       - $lines: non-parsed changelog lines before the parsed revisions
496     *       - $head: position of first read changelog line
497     *       - $lastTail: position of end of last read changelog line
498     *     otherwise false
499     */
500    protected function retrieveRevisionsAround($rev, $max)
501    {
502        $revs = [];
503        $afterCount = 0;
504        $beforeCount = 0;
505
506        //get lines from changelog
507        $result = $this->readloglines($rev);
508        if ($result === false) return false;
509        [$fp, $lines, $startHead, $startTail, $eof] = $result;
510        if (empty($lines)) return false;
511
512        //parse changelog lines in chunk, and read forward more chunks until $max/2 is reached
513        $head = $startHead;
514        $tail = $startTail;
515        while (count($lines) > 0) {
516            foreach ($lines as $line) {
517                $info = $this->parseAndCacheLogLine($line);
518                if (is_array($info)) {
519                    $revs[] = $info['date'];
520                    if ($info['date'] >= $rev) {
521                        //count revs after reference $rev
522                        $afterCount++;
523                        if ($afterCount == 1) {
524                            $beforeCount = count($revs);
525                        }
526                    }
527                    //enough revs after reference $rev?
528                    if ($afterCount > (int) ($max / 2)) {
529                        break 2;
530                    }
531                }
532            }
533            //retrieve next chunk
534            [$lines, $head, $tail] = $this->readAdjacentChunk($fp, $head, $tail, 1);
535        }
536        $lastTail = $tail;
537
538        // add a possible revision of external edit, create or deletion
539        if (
540            $lastTail == $eof && $afterCount <= (int) ($max / 2) &&
541            count($revs) && !$this->isCurrentRevision($revs[count($revs) - 1])
542        ) {
543            $revs[] = $this->currentRevision;
544            $afterCount++;
545        }
546
547        if ($afterCount == 0) {
548            //given timestamp $rev is newer than the most recent line in chunk
549            return false; //FIXME: or proceed to collect older revisions?
550        }
551
552        //read more chunks backward until $max/2 is reached and total number of revs is equal to $max
553        $lines = [];
554        $i = 0;
555        $head = $startHead;
556        $tail = $startTail;
557        while ($head > 0) {
558            [$lines, $head, $tail] = $this->readAdjacentChunk($fp, $head, $tail, -1);
559
560            for ($i = count($lines) - 1; $i >= 0; $i--) {
561                $info = $this->parseAndCacheLogLine($lines[$i]);
562                if (is_array($info)) {
563                    $revs[] = $info['date'];
564                    $beforeCount++;
565                    //enough revs before reference $rev?
566                    if ($beforeCount > max((int) ($max / 2), $max - $afterCount)) {
567                        break 2;
568                    }
569                }
570            }
571        }
572        //keep only non-parsed lines
573        $lines = array_slice($lines, 0, $i);
574
575        sort($revs);
576
577        //trunk desired selection
578        $requestedRevs = array_slice($revs, -$max, $max);
579
580        return [$requestedRevs, $revs, $fp, $lines, $head, $lastTail];
581    }
582
583    /**
584     * Get the current revision information, considering external edit, create or deletion
585     *
586     * When the file has not modified since its last revision, the information of the last
587     * change that had already recorded in the changelog is returned as current change info.
588     * Otherwise, the change information since the last revision caused outside DokuWiki
589     * should be returned, which is referred as "external revision".
590     *
591     * The change date of the file can be determined by timestamp as far as the file exists,
592     * however this is not possible when the file has already deleted outside of DokuWiki.
593     * In such case we assign 1 sec before current time() for the external deletion.
594     * As a result, the value of current revision identifier may change each time because:
595     *   1) the file has again modified outside of DokuWiki, or
596     *   2) the value is essentially volatile for deleted but once existed files.
597     *
598     * @return bool|array false when page had never existed or array with entries:
599     *      - date:  revision identifier (timestamp or last revision +1)
600     *      - ip:    IPv4 address (127.0.0.1)
601     *      - type:  log line type
602     *      - id:    id of page or media
603     *      - user:  user name
604     *      - sum:   edit summary (or action reason)
605     *      - extra: extra data (varies by line type)
606     *      - sizechange: change of filesize
607     *      - timestamp: unix timestamp or false (key set only for external edit occurred)
608     *   additional:
609     *      - mode:  page or media
610     *
611     * @author  Satoshi Sahara <sahara.satoshi@gmail.com>
612     */
613    public function getCurrentRevisionInfo()
614    {
615        global $lang;
616
617        if (isset($this->currentRevision)) {
618            return $this->getRevisionInfo($this->currentRevision);
619        }
620
621        // get revision id from the item file timestamp and changelog
622        $fileLastMod = $this->getFilename();
623        $fileRev = @filemtime($fileLastMod); // false when the file not exist
624        $lastRev = $this->lastRevision();    // false when no changelog
625
626        if (!$fileRev && !$lastRev) {                // has never existed
627            $this->currentRevision = false;
628            return false;
629        } elseif ($fileRev === $lastRev) {           // not external edit
630            $this->currentRevision = $lastRev;
631            return $this->getRevisionInfo($lastRev);
632        }
633
634        if (!$fileRev && $lastRev) {                 // item file does not exist
635            // check consistency against changelog
636            $revInfo = $this->getRevisionInfo($lastRev, false);
637            if ($revInfo['type'] == DOKU_CHANGE_TYPE_DELETE) {
638                $this->currentRevision = $lastRev;
639                return $revInfo;
640            }
641
642            // externally deleted, set revision date as late as possible
643            $revInfo = [
644                'date' => max($lastRev + 1, time() - 1), // 1 sec before now or new page save
645                'ip'   => '127.0.0.1',
646                'type' => DOKU_CHANGE_TYPE_DELETE,
647                'id'   => $this->id,
648                'user' => '',
649                'sum'  => $lang['deleted'] . ' - ' . $lang['external_edit'] . ' (' . $lang['unknowndate'] . ')',
650                'extra' => '',
651                'sizechange' => -io_getSizeFile($this->getFilename($lastRev)),
652                'timestamp' => false,
653                'mode' => $this->getMode()
654            ];
655        } else {                                     // item file exists, with timestamp $fileRev
656            // here, file timestamp $fileRev is different with last revision timestamp $lastRev in changelog
657            $isJustCreated = $lastRev === false || (
658                    $fileRev > $lastRev &&
659                    $this->getRevisionInfo($lastRev, false)['type'] == DOKU_CHANGE_TYPE_DELETE
660            );
661            $filesize_new = filesize($this->getFilename());
662            $filesize_old = $isJustCreated ? 0 : io_getSizeFile($this->getFilename($lastRev));
663            $sizechange = $filesize_new - $filesize_old;
664
665            if ($isJustCreated) {
666                $timestamp = $fileRev;
667                $sum = $lang['created'] . ' - ' . $lang['external_edit'];
668            } elseif ($fileRev > $lastRev) {
669                $timestamp = $fileRev;
670                $sum = $lang['external_edit'];
671            } else {
672                // $fileRev is older than $lastRev, that is erroneous/incorrect occurrence.
673                $msg = "Warning: current file modification time is older than last revision date";
674                $details = 'File revision: ' . $fileRev . ' ' . dformat($fileRev, "%Y-%m-%d %H:%M:%S") . "\n"
675                          . 'Last revision: ' . $lastRev . ' ' . dformat($lastRev, "%Y-%m-%d %H:%M:%S");
676                Logger::error($msg, $details, $this->getFilename());
677                $timestamp = false;
678                $sum = $lang['external_edit'] . ' (' . $lang['unknowndate'] . ')';
679            }
680
681            // externally created or edited
682            $revInfo = [
683                'date' => $timestamp ?: $lastRev + 1,
684                'ip'   => '127.0.0.1',
685                'type' => $isJustCreated ? DOKU_CHANGE_TYPE_CREATE : DOKU_CHANGE_TYPE_EDIT,
686                'id'   => $this->id,
687                'user' => '',
688                'sum'  => $sum,
689                'extra' => '',
690                'sizechange' => $sizechange,
691                'timestamp' => $timestamp,
692                'mode' => $this->getMode()
693            ];
694        }
695
696        // cache current revision information of external edition
697        $this->currentRevision = $revInfo['date'];
698        $this->cache[$this->id][$this->currentRevision] = $revInfo;
699        return $this->getRevisionInfo($this->currentRevision);
700    }
701
702    /**
703     * Mechanism to trace no-actual external current revision
704     * @param int $rev
705     */
706    public function traceCurrentRevision($rev)
707    {
708        if ($rev > $this->lastRevision()) {
709            $rev = $this->currentRevision();
710        }
711        return $rev;
712    }
713}
714