xref: /dokuwiki/inc/ChangeLog/ChangeLog.php (revision 6547cfc7454f26cc353587c1314f9fde93a0a056)
1<?php
2
3namespace dokuwiki\ChangeLog;
4
5use dokuwiki\Logger;
6
7/**
8 * ChangeLog Prototype; methods for handling changelog
9 */
10abstract class ChangeLog
11{
12    use ChangeLogTrait;
13
14    /** @var string */
15    protected $id;
16    /** @var false|int */
17    protected $currentRevision;
18    /** @var array */
19    protected $cache = [];
20
21    /**
22     * Constructor
23     *
24     * @param string $id page id
25     * @param int $chunk_size maximum block size read from file
26     */
27    public function __construct($id, $chunk_size = 8192)
28    {
29        global $cache_revinfo;
30
31        $this->cache =& $cache_revinfo;
32        if (!isset($this->cache[$id])) {
33            $this->cache[$id] = [];
34        }
35
36        $this->id = $id;
37        $this->setChunkSize($chunk_size);
38    }
39
40    /**
41     * Returns path to current page/media
42     *
43     * @param string|int $rev empty string or revision timestamp
44     * @return string path to file
45     */
46    abstract protected function getFilename($rev = '');
47
48    /**
49     * Check whether given revision is the current page
50     *
51     * @param int $rev timestamp of current page
52     * @return bool true if $rev is current revision, otherwise false
53     */
54    public function isCurrentRevision($rev)
55    {
56        return $rev == $this->currentRevision();
57    }
58
59    /**
60     * Checks if the revision is last revision
61     *
62     * @param int $rev revision timestamp
63     * @return bool true if $rev is last revision, otherwise false
64     */
65    public function isLastRevision($rev = null)
66    {
67        return $rev === $this->lastRevision();
68    }
69
70    /**
71     * Return the current revision identifier
72     *
73     * The "current" revision means current version of the page or media file. It is either
74     * identical with or newer than the "last" revision, that depends on whether the file
75     * has modified, created or deleted outside of DokuWiki.
76     * The value of identifier can be determined by timestamp as far as the file exists,
77     * otherwise it must be assigned larger than any other revisions to keep them sortable.
78     *
79     * @return int|false revision timestamp
80     */
81    public function currentRevision()
82    {
83        if (!isset($this->currentRevision)) {
84            // set ChangeLog::currentRevision property
85            $this->getCurrentRevisionInfo();
86        }
87        return $this->currentRevision;
88    }
89
90    /**
91     * Return the last revision identifier, date value of the last entry of the changelog
92     *
93     * @return int|false revision timestamp
94     */
95    public function lastRevision()
96    {
97        $revs = $this->getRevisions(-1, 1);
98        return empty($revs) ? false : $revs[0];
99    }
100
101    /**
102     * Save revision info to the cache pool
103     *
104     * @param array $info Revision info structure
105     * @return bool
106     */
107    protected function cacheRevisionInfo($info)
108    {
109        if (!is_array($info)) return false;
110        //$this->cache[$this->id][$info['date']] ??= $info; // since php 7.4
111        $this->cache[$this->id][$info['date']] ??= $info;
112        return true;
113    }
114
115    /**
116     * Get the changelog information for a specific revision (timestamp)
117     *
118     * Adjacent changelog lines are optimistically parsed and cached to speed up
119     * consecutive calls to getRevisionInfo. For large changelog files, only the chunk
120     * containing the requested changelog line is read.
121     *
122     * @param int $rev revision timestamp
123     * @param bool $retrieveCurrentRevInfo allows to skip for getting other revision info in the
124     *                                     getCurrentRevisionInfo() where $currentRevision is not yet determined
125     * @return bool|array false or array with entries:
126     *      - date:  unix timestamp
127     *      - ip:    IPv4 address (127.0.0.1)
128     *      - type:  log line type
129     *      - id:    page id
130     *      - user:  user name
131     *      - sum:   edit summary (or action reason)
132     *      - extra: extra data (varies by line type)
133     *      - sizechange: change of filesize
134     *
135     * @author Ben Coburn <btcoburn@silicodon.net>
136     * @author Kate Arzamastseva <pshns@ukr.net>
137     */
138    public function getRevisionInfo($rev, $retrieveCurrentRevInfo = true)
139    {
140        $rev = max(0, $rev);
141        if (!$rev) return false;
142
143        //ensure the external edits are cached as well
144        if (!isset($this->currentRevision) && $retrieveCurrentRevInfo) {
145            $this->getCurrentRevisionInfo();
146        }
147
148        // check if it's already in the memory cache
149        if (isset($this->cache[$this->id]) && isset($this->cache[$this->id][$rev])) {
150            return $this->cache[$this->id][$rev];
151        }
152
153        //read lines from changelog
154        [$fp, $lines] = $this->readloglines($rev);
155        if ($fp) {
156            fclose($fp);
157        }
158        if (empty($lines)) return false;
159
160        // parse and cache changelog lines
161        foreach ($lines as $value) {
162            $info = static::parseLogLine($value);
163            $this->cacheRevisionInfo($info);
164        }
165        if (!isset($this->cache[$this->id][$rev])) {
166            return false;
167        }
168        return $this->cache[$this->id][$rev];
169    }
170
171    /**
172     * Return a list of page revisions numbers
173     *
174     * Does not guarantee that the revision exists in the attic,
175     * only that a line with the date exists in the changelog.
176     * By default the current revision is skipped.
177     *
178     * The current revision is automatically skipped when the page exists.
179     * See $INFO['meta']['last_change'] for the current revision.
180     * A negative $first let read the current revision too.
181     *
182     * For efficiency, the log lines are parsed and cached for later
183     * calls to getRevisionInfo. Large changelog files are read
184     * backwards in chunks until the requested number of changelog
185     * lines are received.
186     *
187     * @param int $first skip the first n changelog lines
188     * @param int $num number of revisions to return
189     * @return array with the revision timestamps
190     *
191     * @author Ben Coburn <btcoburn@silicodon.net>
192     * @author Kate Arzamastseva <pshns@ukr.net>
193     */
194    public function getRevisions($first, $num)
195    {
196        $revs = [];
197        $lines = [];
198        $count = 0;
199
200        $logfile = $this->getChangelogFilename();
201        if (!file_exists($logfile)) return $revs;
202
203        $num = max($num, 0);
204        if ($num == 0) {
205            return $revs;
206        }
207
208        if ($first < 0) {
209            $first = 0;
210        } else {
211            $fileLastMod = $this->getFilename();
212            if (file_exists($fileLastMod) && $this->isLastRevision(filemtime($fileLastMod))) {
213                // skip last revision if the page exists
214                $first = max($first + 1, 0);
215            }
216        }
217
218        if (filesize($logfile) < $this->chunk_size || $this->chunk_size == 0) {
219            // read whole file
220            $lines = file($logfile);
221            if ($lines === false) {
222                return $revs;
223            }
224        } else {
225            // read chunks backwards
226            $fp = fopen($logfile, 'rb'); // "file pointer"
227            if ($fp === false) {
228                return $revs;
229            }
230            fseek($fp, 0, SEEK_END);
231            $tail = ftell($fp);
232
233            // chunk backwards
234            $finger = max($tail - $this->chunk_size, 0);
235            while ($count < $num + $first) {
236                $nl = $this->getNewlinepointer($fp, $finger);
237
238                // was the chunk big enough? if not, take another bite
239                if ($nl > 0 && $tail <= $nl) {
240                    $finger = max($finger - $this->chunk_size, 0);
241                    continue;
242                } else {
243                    $finger = $nl;
244                }
245
246                // read chunk
247                $chunk = '';
248                $read_size = max($tail - $finger, 0); // found chunk size
249                $got = 0;
250                while ($got < $read_size && !feof($fp)) {
251                    $tmp = @fread($fp, max(min($this->chunk_size, $read_size - $got), 0));
252                    if ($tmp === false) {
253                        break;
254                    } //error state
255                    $got += strlen($tmp);
256                    $chunk .= $tmp;
257                }
258                $tmp = explode("\n", $chunk);
259                array_pop($tmp); // remove trailing newline
260
261                // combine with previous chunk
262                $count += count($tmp);
263                $lines = [...$tmp, ...$lines];
264
265                // next chunk
266                if ($finger == 0) {
267                    break;
268                } else { // already read all the lines
269                    $tail = $finger;
270                    $finger = max($tail - $this->chunk_size, 0);
271                }
272            }
273            fclose($fp);
274        }
275
276        // skip parsing extra lines
277        $num = max(min(count($lines) - $first, $num), 0);
278        if ($first > 0 && $num > 0) {
279            $lines = array_slice($lines, max(count($lines) - $first - $num, 0), $num);
280        } elseif ($first > 0 && $num == 0) {
281            $lines = array_slice($lines, 0, max(count($lines) - $first, 0));
282        } elseif ($first == 0 && $num > 0) {
283            $lines = array_slice($lines, max(count($lines) - $num, 0));
284        }
285
286        // handle lines in reverse order
287        for ($i = count($lines) - 1; $i >= 0; $i--) {
288            $info = static::parseLogLine($lines[$i]);
289            if ($this->cacheRevisionInfo($info)) {
290                $revs[] = $info['date'];
291            }
292        }
293
294        return $revs;
295    }
296
297    /**
298     * Get the nth revision left or right-hand side  for a specific page id and revision (timestamp)
299     *
300     * For large changelog files, only the chunk containing the
301     * reference revision $rev is read and sometimes a next chunk.
302     *
303     * Adjacent changelog lines are optimistically parsed and cached to speed up
304     * consecutive calls to getRevisionInfo.
305     *
306     * @param int $rev revision timestamp used as start date
307     *    (doesn't need to be exact revision number)
308     * @param int $direction give position of returned revision with respect to $rev;
309          positive=next, negative=prev
310     * @return bool|int
311     *      timestamp of the requested revision
312     *      otherwise false
313     */
314    public function getRelativeRevision($rev, $direction)
315    {
316        $rev = max($rev, 0);
317        $direction = (int)$direction;
318
319        //no direction given or last rev, so no follow-up
320        if (!$direction || ($direction > 0 && $this->isCurrentRevision($rev))) {
321            return false;
322        }
323
324        //get lines from changelog
325        [$fp, $lines, $head, $tail, $eof] = $this->readloglines($rev);
326        if (empty($lines)) return false;
327
328        // look for revisions later/earlier than $rev, when founded count till the wanted revision is reached
329        // also parse and cache changelog lines for getRevisionInfo().
330        $revCounter = 0;
331        $relativeRev = false;
332        $checkOtherChunk = true; //always runs once
333        while (!$relativeRev && $checkOtherChunk) {
334            $info = [];
335            //parse in normal or reverse order
336            $count = count($lines);
337            if ($direction > 0) {
338                $start = 0;
339                $step = 1;
340            } else {
341                $start = $count - 1;
342                $step = -1;
343            }
344            for ($i = $start; $i >= 0 && $i < $count; $i += $step) {
345                $info = static::parseLogLine($lines[$i]);
346                if ($this->cacheRevisionInfo($info)) {
347                    //look for revs older/earlier then reference $rev and select $direction-th one
348                    if (($direction > 0 && $info['date'] > $rev) || ($direction < 0 && $info['date'] < $rev)) {
349                        $revCounter++;
350                        if ($revCounter == abs($direction)) {
351                            $relativeRev = $info['date'];
352                        }
353                    }
354                }
355            }
356
357            //true when $rev is found, but not the wanted follow-up.
358            $checkOtherChunk = $fp
359                && ($info['date'] == $rev || ($revCounter > 0 && !$relativeRev))
360                && (!($tail == $eof && $direction > 0) && !($head == 0 && $direction < 0));
361
362            if ($checkOtherChunk) {
363                [$lines, $head, $tail] = $this->readAdjacentChunk($fp, $head, $tail, $direction);
364
365                if (empty($lines)) break;
366            }
367        }
368        if ($fp) {
369            fclose($fp);
370        }
371
372        return $relativeRev;
373    }
374
375    /**
376     * Returns revisions around rev1 and rev2
377     * When available it returns $max entries for each revision
378     *
379     * @param int $rev1 oldest revision timestamp
380     * @param int $rev2 newest revision timestamp (0 looks up last revision)
381     * @param int $max maximum number of revisions returned
382     * @return array with two arrays with revisions surrounding rev1 respectively rev2
383     */
384    public function getRevisionsAround($rev1, $rev2, $max = 50)
385    {
386        $max = (int) (abs($max) / 2) * 2 + 1;
387        $rev1 = max($rev1, 0);
388        $rev2 = max($rev2, 0);
389
390        if ($rev2) {
391            if ($rev2 < $rev1) {
392                $rev = $rev2;
393                $rev2 = $rev1;
394                $rev1 = $rev;
395            }
396        } else {
397            //empty right side means a removed page. Look up last revision.
398            $rev2 = $this->currentRevision();
399        }
400        //collect revisions around rev2
401        [$revs2, $allRevs, $fp, $lines, $head, $tail] = $this->retrieveRevisionsAround($rev2, $max);
402
403        if (empty($revs2)) return [[], []];
404
405        //collect revisions around rev1
406        $index = array_search($rev1, $allRevs, true);
407        if ($index === false) {
408            //no overlapping revisions
409            [$revs1, , , , , ] = $this->retrieveRevisionsAround($rev1, $max);
410            if (empty($revs1)) $revs1 = [];
411        } else {
412            //revisions overlaps, reuse revisions around rev2
413            $lastRev = array_pop($allRevs); //keep last entry that could be external edit
414            $revs1 = $allRevs;
415            while ($head > 0) {
416                for ($i = count($lines) - 1; $i >= 0; $i--) {
417                    $info = static::parseLogLine($lines[$i]);
418                    if ($this->cacheRevisionInfo($info)) {
419                        $revs1[] = $info['date'];
420                        $index++;
421
422                        if ($index > (int) ($max / 2)) break 2;
423                    }
424                }
425
426                [$lines, $head, $tail] = $this->readAdjacentChunk($fp, $head, $tail, -1);
427            }
428            sort($revs1);
429            $revs1[] = $lastRev; //push back last entry
430
431            //return wanted selection
432            $revs1 = array_slice($revs1, max($index - (int) ($max / 2), 0), $max);
433        }
434
435        return [array_reverse($revs1), array_reverse($revs2)];
436    }
437
438    /**
439     * Return an existing revision for a specific date which is
440     * the current one or younger or equal then the date
441     *
442     * @param number $date_at timestamp
443     * @return string revision ('' for current)
444     */
445    public function getLastRevisionAt($date_at)
446    {
447        $fileLastMod = $this->getFilename();
448        //requested date_at(timestamp) younger or equal then modified_time($this->id) => load current
449        if (file_exists($fileLastMod) && $date_at >= @filemtime($fileLastMod)) {
450            return '';
451        } elseif ($rev = $this->getRelativeRevision($date_at + 1, -1)) {
452            //+1 to get also the requested date revision
453            return $rev;
454        } else {
455            return false;
456        }
457    }
458
459    /**
460     * Collect the $max revisions near to the timestamp $rev
461     *
462     * Ideally, half of retrieved timestamps are older than $rev, another half are newer.
463     * The returned array $requestedRevs may not contain the reference timestamp $rev
464     * when it does not match any revision value recorded in changelog.
465     *
466     * @param int $rev revision timestamp
467     * @param int $max maximum number of revisions to be returned
468     * @return bool|array
469     *     return array with entries:
470     *       - $requestedRevs: array of with $max revision timestamps
471     *       - $revs: all parsed revision timestamps
472     *       - $fp: file pointer only defined for chuck reading, needs closing.
473     *       - $lines: non-parsed changelog lines before the parsed revisions
474     *       - $head: position of first read changelog line
475     *       - $lastTail: position of end of last read changelog line
476     *     otherwise false
477     */
478    protected function retrieveRevisionsAround($rev, $max)
479    {
480        $revs = [];
481        $afterCount = 0;
482        $beforeCount = 0;
483
484        //get lines from changelog
485        [$fp, $lines, $startHead, $startTail, $eof] = $this->readloglines($rev);
486        if (empty($lines)) return false;
487
488        //parse changelog lines in chunk, and read forward more chunks until $max/2 is reached
489        $head = $startHead;
490        $tail = $startTail;
491        while (count($lines) > 0) {
492            foreach ($lines as $line) {
493                $info = static::parseLogLine($line);
494                if ($this->cacheRevisionInfo($info)) {
495                    $revs[] = $info['date'];
496                    if ($info['date'] >= $rev) {
497                        //count revs after reference $rev
498                        $afterCount++;
499                        if ($afterCount == 1) $beforeCount = count($revs);
500                    }
501                    //enough revs after reference $rev?
502                    if ($afterCount > (int) ($max / 2)) break 2;
503                }
504            }
505            //retrieve next chunk
506            [$lines, $head, $tail] = $this->readAdjacentChunk($fp, $head, $tail, 1);
507        }
508        $lastTail = $tail;
509
510        // add a possible revision of external edit, create or deletion
511        if (
512            $lastTail == $eof && $afterCount <= (int) ($max / 2) &&
513            count($revs) && !$this->isCurrentRevision($revs[count($revs) - 1])
514        ) {
515            $revs[] = $this->currentRevision;
516            $afterCount++;
517        }
518
519        if ($afterCount == 0) {
520            //given timestamp $rev is newer than the most recent line in chunk
521            return false; //FIXME: or proceed to collect older revisions?
522        }
523
524        //read more chunks backward until $max/2 is reached and total number of revs is equal to $max
525        $lines = [];
526        $i = 0;
527        $head = $startHead;
528        $tail = $startTail;
529        while ($head > 0) {
530            [$lines, $head, $tail] = $this->readAdjacentChunk($fp, $head, $tail, -1);
531
532            for ($i = count($lines) - 1; $i >= 0; $i--) {
533                $info = static::parseLogLine($lines[$i]);
534                if ($this->cacheRevisionInfo($info)) {
535                    $revs[] = $info['date'];
536                    $beforeCount++;
537                    //enough revs before reference $rev?
538                    if ($beforeCount > max((int) ($max / 2), $max - $afterCount)) break 2;
539                }
540            }
541        }
542        //keep only non-parsed lines
543        $lines = array_slice($lines, 0, $i);
544
545        sort($revs);
546
547        //trunk desired selection
548        $requestedRevs = array_slice($revs, -$max, $max);
549
550        return [$requestedRevs, $revs, $fp, $lines, $head, $lastTail];
551    }
552
553    /**
554     * Get the current revision information, considering external edit, create or deletion
555     *
556     * When the file has not modified since its last revision, the information of the last
557     * change that had already recorded in the changelog is returned as current change info.
558     * Otherwise, the change information since the last revision caused outside DokuWiki
559     * should be returned, which is referred as "external revision".
560     *
561     * The change date of the file can be determined by timestamp as far as the file exists,
562     * however this is not possible when the file has already deleted outside of DokuWiki.
563     * In such case we assign 1 sec before current time() for the external deletion.
564     * As a result, the value of current revision identifier may change each time because:
565     *   1) the file has again modified outside of DokuWiki, or
566     *   2) the value is essentially volatile for deleted but once existed files.
567     *
568     * @return bool|array false when page had never existed or array with entries:
569     *      - date:  revision identifier (timestamp or last revision +1)
570     *      - ip:    IPv4 address (127.0.0.1)
571     *      - type:  log line type
572     *      - id:    id of page or media
573     *      - user:  user name
574     *      - sum:   edit summary (or action reason)
575     *      - extra: extra data (varies by line type)
576     *      - sizechange: change of filesize
577     *      - timestamp: unix timestamp or false (key set only for external edit occurred)
578     *
579     * @author  Satoshi Sahara <sahara.satoshi@gmail.com>
580     */
581    public function getCurrentRevisionInfo()
582    {
583        global $lang;
584
585        if (isset($this->currentRevision)) return $this->getRevisionInfo($this->currentRevision);
586
587        // get revision id from the item file timestamp and changelog
588        $fileLastMod = $this->getFilename();
589        $fileRev = @filemtime($fileLastMod); // false when the file not exist
590        $lastRev = $this->lastRevision();    // false when no changelog
591
592        if (!$fileRev && !$lastRev) {                // has never existed
593            $this->currentRevision = false;
594            return false;
595        } elseif ($fileRev === $lastRev) {           // not external edit
596            $this->currentRevision = $lastRev;
597            return $this->getRevisionInfo($lastRev);
598        }
599
600        if (!$fileRev && $lastRev) {                 // item file does not exist
601            // check consistency against changelog
602            $revInfo = $this->getRevisionInfo($lastRev, false);
603            if ($revInfo['type'] == DOKU_CHANGE_TYPE_DELETE) {
604                $this->currentRevision = $lastRev;
605                return $revInfo;
606            }
607
608            // externally deleted, set revision date as late as possible
609            $revInfo = [
610                'date' => max($lastRev + 1, time() - 1), // 1 sec before now or new page save
611                'ip'   => '127.0.0.1',
612                'type' => DOKU_CHANGE_TYPE_DELETE,
613                'id'   => $this->id,
614                'user' => '',
615                'sum'  => $lang['deleted'] . ' - ' . $lang['external_edit'] . ' (' . $lang['unknowndate'] . ')',
616                'extra' => '',
617                'sizechange' => -io_getSizeFile($this->getFilename($lastRev)),
618                'timestamp' => false,
619            ];
620        } else {                                     // item file exists, with timestamp $fileRev
621            // here, file timestamp $fileRev is different with last revision timestamp $lastRev in changelog
622            $isJustCreated = $lastRev === false || (
623                    $fileRev > $lastRev &&
624                    $this->getRevisionInfo($lastRev, false)['type'] == DOKU_CHANGE_TYPE_DELETE
625            );
626            $filesize_new = filesize($this->getFilename());
627            $filesize_old = $isJustCreated ? 0 : io_getSizeFile($this->getFilename($lastRev));
628            $sizechange = $filesize_new - $filesize_old;
629
630            if ($isJustCreated) {
631                $timestamp = $fileRev;
632                $sum = $lang['created'] . ' - ' . $lang['external_edit'];
633            } elseif ($fileRev > $lastRev) {
634                $timestamp = $fileRev;
635                $sum = $lang['external_edit'];
636            } else {
637                // $fileRev is older than $lastRev, that is erroneous/incorrect occurrence.
638                $msg = "Warning: current file modification time is older than last revision date";
639                $details = 'File revision: ' . $fileRev . ' ' . dformat($fileRev, "%Y-%m-%d %H:%M:%S") . "\n"
640                          . 'Last revision: ' . $lastRev . ' ' . dformat($lastRev, "%Y-%m-%d %H:%M:%S");
641                Logger::error($msg, $details, $this->getFilename());
642                $timestamp = false;
643                $sum = $lang['external_edit'] . ' (' . $lang['unknowndate'] . ')';
644            }
645
646            // externally created or edited
647            $revInfo = [
648                'date' => $timestamp ?: $lastRev + 1,
649                'ip'   => '127.0.0.1',
650                'type' => $isJustCreated ? DOKU_CHANGE_TYPE_CREATE : DOKU_CHANGE_TYPE_EDIT,
651                'id'   => $this->id,
652                'user' => '',
653                'sum'  => $sum,
654                'extra' => '',
655                'sizechange' => $sizechange,
656                'timestamp' => $timestamp,
657            ];
658        }
659
660        // cache current revision information of external edition
661        $this->currentRevision = $revInfo['date'];
662        $this->cache[$this->id][$this->currentRevision] = $revInfo;
663        return $this->getRevisionInfo($this->currentRevision);
664    }
665
666    /**
667     * Mechanism to trace no-actual external current revision
668     * @param int $rev
669     */
670    public function traceCurrentRevision($rev)
671    {
672        if ($rev > $this->lastRevision()) {
673            $rev = $this->currentRevision();
674        }
675        return $rev;
676    }
677}
678