xref: /dokuwiki/inc/ChangeLog/ChangeLog.php (revision a7b2005af9c5943ba9bb7b178697e7b36de9200f)
1<?php
2
3namespace dokuwiki\ChangeLog;
4
5use dokuwiki\Logger;
6
7/**
8 * ChangeLog Prototype; methods for handling changelog
9 */
10abstract class ChangeLog
11{
12    use ChangeLogTrait;
13
14    /** @var string */
15    protected $id;
16    /** @var int */
17    protected $currentRevision;
18    /** @var array */
19    protected $cache;
20
21    /**
22     * Constructor
23     *
24     * @param string $id page id
25     * @param int $chunk_size maximum block size read from file
26     */
27    public function __construct($id, $chunk_size = 8192)
28    {
29        global $cache_revinfo;
30
31        $this->cache =& $cache_revinfo;
32        if (!isset($this->cache[$id])) {
33            $this->cache[$id] = array();
34        }
35
36        $this->id = $id;
37        $this->setChunkSize($chunk_size);
38        // set property currentRevision and cache prior to getRevisionInfo($currentRev) call
39        $this->getCurrentRevisionInfo();
40    }
41
42    /**
43     * Returns path to current page/media
44     *
45     * @return string path to file
46     */
47    abstract protected function getFilename();
48
49    /**
50     * Check whether given revision is the current page
51     *
52     * @param int $rev timestamp of current page
53     * @return bool true if $rev is current revision, otherwise false
54     */
55    public function isCurrentRevision($rev)
56    {
57        return $rev == $this->currentRevision();
58    }
59
60    /**
61     * Checks if the revision is last revision
62     *
63     * @param int $rev revision timestamp
64     * @return bool true if $rev is last revision, otherwise false
65     */
66    public function isLastRevision($rev = null)
67    {
68        return $rev === $this->lastRevision();
69    }
70
71    /**
72     * Return the current revision identifer
73     *
74     * The "current" revision means current version of the page or media file. It is either
75     * identical with or newer than the "last" revision, that depends on whether the file
76     * has modified, created or deleted outside of DokuWiki.
77     * The value of identifier can be determined by timestamp as far as the file exists,
78     * otherwise it must be assigned larger than any other revisions to keep them sortable.
79     *
80     * @return int|false revision timestamp
81     */
82    public function currentRevision()
83    {
84        if (!isset($this->currentRevision)) {
85            // set ChangeLog::currentRevision property
86            $this->getCurrentRevisionInfo();
87        }
88        return $this->currentRevision;
89    }
90
91    /**
92     * Return the last revision identifer, date value of the last entry of the changelog
93     *
94     * @return int|false revision timestamp
95     */
96    public function lastRevision()
97    {
98        $revs = $this->getRevisions(-1, 1);
99        return empty($revs) ? false : $revs[0];
100    }
101
102    /**
103     * Save revision info to the cache pool
104     *
105     * @param array $info Revision info structure
106     * @return bool
107     */
108    protected function cacheRevisionInfo($info)
109    {
110        if (!is_array($info)) return false;
111        //$this->cache[$this->id][$info['date']] ??= $info; // since php 7.4
112        $this->cache[$this->id][$info['date']] = $this->cache[$this->id][$info['date']] ?? $info;
113        return true;
114    }
115
116    /**
117     * Get the changelog information for a specific revision (timestamp)
118     *
119     * Adjacent changelog lines are optimistically parsed and cached to speed up
120     * consecutive calls to getRevisionInfo. For large changelog files, only the chunk
121     * containing the requested changelog line is read.
122     *
123     * @param int $rev revision timestamp
124     * @return bool|array false or array with entries:
125     *      - date:  unix timestamp
126     *      - ip:    IPv4 address (127.0.0.1)
127     *      - type:  log line type
128     *      - id:    page id
129     *      - user:  user name
130     *      - sum:   edit summary (or action reason)
131     *      - extra: extra data (varies by line type)
132     *      - sizechange: change of filesize
133     *
134     * @author Ben Coburn <btcoburn@silicodon.net>
135     * @author Kate Arzamastseva <pshns@ukr.net>
136     */
137    public function getRevisionInfo($rev)
138    {
139        $rev = max(0, $rev);
140        if (!$rev) return false;
141
142        // check if it's already in the memory cache
143        if (isset($this->cache[$this->id]) && isset($this->cache[$this->id][$rev])) {
144            return $this->cache[$this->id][$rev];
145        }
146
147        //read lines from changelog
148        list($fp, $lines) = $this->readloglines($rev);
149        if ($fp) {
150            fclose($fp);
151        }
152        if (empty($lines)) return false;
153
154        // parse and cache changelog lines
155        foreach ($lines as $value) {
156            $info = $this->parseLogLine($value);
157            $this->cacheRevisionInfo($info);
158        }
159        if (!isset($this->cache[$this->id][$rev])) {
160            return false;
161        }
162        return $this->cache[$this->id][$rev];
163    }
164
165    /**
166     * Return a list of page revisions numbers
167     *
168     * Does not guarantee that the revision exists in the attic,
169     * only that a line with the date exists in the changelog.
170     * By default the current revision is skipped.
171     *
172     * The current revision is automatically skipped when the page exists.
173     * See $INFO['meta']['last_change'] for the current revision.
174     * A negative $first let read the current revision too.
175     *
176     * For efficiency, the log lines are parsed and cached for later
177     * calls to getRevisionInfo. Large changelog files are read
178     * backwards in chunks until the requested number of changelog
179     * lines are recieved.
180     *
181     * @param int $first skip the first n changelog lines
182     * @param int $num number of revisions to return
183     * @return array with the revision timestamps
184     *
185     * @author Ben Coburn <btcoburn@silicodon.net>
186     * @author Kate Arzamastseva <pshns@ukr.net>
187     */
188    public function getRevisions($first, $num)
189    {
190        $revs = array();
191        $lines = array();
192        $count = 0;
193
194        $logfile = $this->getChangelogFilename();
195        if (!file_exists($logfile)) return $revs;
196
197        $num = max($num, 0);
198        if ($num == 0) {
199            return $revs;
200        }
201
202        if ($first < 0) {
203            $first = 0;
204        } else {
205            $fileLastMod = $this->getFilename();
206            if (file_exists($fileLastMod) && $this->isLastRevision(filemtime($fileLastMod))) {
207                // skip last revision if the page exists
208                $first = max($first + 1, 0);
209            }
210        }
211
212        if (filesize($logfile) < $this->chunk_size || $this->chunk_size == 0) {
213            // read whole file
214            $lines = file($logfile);
215            if ($lines === false) {
216                return $revs;
217            }
218        } else {
219            // read chunks backwards
220            $fp = fopen($logfile, 'rb'); // "file pointer"
221            if ($fp === false) {
222                return $revs;
223            }
224            fseek($fp, 0, SEEK_END);
225            $tail = ftell($fp);
226
227            // chunk backwards
228            $finger = max($tail - $this->chunk_size, 0);
229            while ($count < $num + $first) {
230                $nl = $this->getNewlinepointer($fp, $finger);
231
232                // was the chunk big enough? if not, take another bite
233                if ($nl > 0 && $tail <= $nl) {
234                    $finger = max($finger - $this->chunk_size, 0);
235                    continue;
236                } else {
237                    $finger = $nl;
238                }
239
240                // read chunk
241                $chunk = '';
242                $read_size = max($tail - $finger, 0); // found chunk size
243                $got = 0;
244                while ($got < $read_size && !feof($fp)) {
245                    $tmp = @fread($fp, max(min($this->chunk_size, $read_size - $got), 0));
246                    if ($tmp === false) {
247                        break;
248                    } //error state
249                    $got += strlen($tmp);
250                    $chunk .= $tmp;
251                }
252                $tmp = explode("\n", $chunk);
253                array_pop($tmp); // remove trailing newline
254
255                // combine with previous chunk
256                $count += count($tmp);
257                $lines = array_merge($tmp, $lines);
258
259                // next chunk
260                if ($finger == 0) {
261                    break;
262                } else { // already read all the lines
263                    $tail = $finger;
264                    $finger = max($tail - $this->chunk_size, 0);
265                }
266            }
267            fclose($fp);
268        }
269
270        // skip parsing extra lines
271        $num = max(min(count($lines) - $first, $num), 0);
272        if ($first > 0 && $num > 0) {
273            $lines = array_slice($lines, max(count($lines) - $first - $num, 0), $num);
274        } elseif ($first > 0 && $num == 0) {
275            $lines = array_slice($lines, 0, max(count($lines) - $first, 0));
276        } elseif ($first == 0 && $num > 0) {
277            $lines = array_slice($lines, max(count($lines) - $num, 0));
278        }
279
280        // handle lines in reverse order
281        for ($i = count($lines) - 1; $i >= 0; $i--) {
282            $info = $this->parseLogLine($lines[$i]);
283            if ($this->cacheRevisionInfo($info)) {
284                $revs[] = $info['date'];
285            }
286        }
287
288        return $revs;
289    }
290
291    /**
292     * Get the nth revision left or right handside  for a specific page id and revision (timestamp)
293     *
294     * For large changelog files, only the chunk containing the
295     * reference revision $rev is read and sometimes a next chunck.
296     *
297     * Adjacent changelog lines are optimistically parsed and cached to speed up
298     * consecutive calls to getRevisionInfo.
299     *
300     * @param int $rev revision timestamp used as startdate
301     *    (doesn't need to be exact revision number)
302     * @param int $direction give position of returned revision with respect to $rev;
303          positive=next, negative=prev
304     * @return bool|int
305     *      timestamp of the requested revision
306     *      otherwise false
307     */
308    public function getRelativeRevision($rev, $direction)
309    {
310        $rev = max($rev, 0);
311        $direction = (int)$direction;
312
313        //no direction given or last rev, so no follow-up
314        if (!$direction || ($direction > 0 && $this->isCurrentRevision($rev))) {
315            return false;
316        }
317
318        //get lines from changelog
319        list($fp, $lines, $head, $tail, $eof) = $this->readloglines($rev);
320        if (empty($lines)) return false;
321
322        // look for revisions later/earlier than $rev, when founded count till the wanted revision is reached
323        // also parse and cache changelog lines for getRevisionInfo().
324        $revcounter = 0;
325        $relativerev = false;
326        $checkotherchunck = true; //always runs once
327        while (!$relativerev && $checkotherchunck) {
328            $info = array();
329            //parse in normal or reverse order
330            $count = count($lines);
331            if ($direction > 0) {
332                $start = 0;
333                $step = 1;
334            } else {
335                $start = $count - 1;
336                $step = -1;
337            }
338            for ($i = $start; $i >= 0 && $i < $count; $i = $i + $step) {
339                $info = $this->parseLogLine($lines[$i]);
340                if ($this->cacheRevisionInfo($info)) {
341                    //look for revs older/earlier then reference $rev and select $direction-th one
342                    if (($direction > 0 && $info['date'] > $rev) || ($direction < 0 && $info['date'] < $rev)) {
343                        $revcounter++;
344                        if ($revcounter == abs($direction)) {
345                            $relativerev = $info['date'];
346                        }
347                    }
348                }
349            }
350
351            //true when $rev is found, but not the wanted follow-up.
352            $checkotherchunck = $fp
353                && ($info['date'] == $rev || ($revcounter > 0 && !$relativerev))
354                && !(($tail == $eof && $direction > 0) || ($head == 0 && $direction < 0));
355
356            if ($checkotherchunck) {
357                list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, $direction);
358
359                if (empty($lines)) break;
360            }
361        }
362        if ($fp) {
363            fclose($fp);
364        }
365
366        return $relativerev;
367    }
368
369    /**
370     * Returns revisions around rev1 and rev2
371     * When available it returns $max entries for each revision
372     *
373     * @param int $rev1 oldest revision timestamp
374     * @param int $rev2 newest revision timestamp (0 looks up last revision)
375     * @param int $max maximum number of revisions returned
376     * @return array with two arrays with revisions surrounding rev1 respectively rev2
377     */
378    public function getRevisionsAround($rev1, $rev2, $max = 50)
379    {
380        $max = intval(abs($max) / 2) * 2 + 1;
381        $rev1 = max($rev1, 0);
382        $rev2 = max($rev2, 0);
383
384        if ($rev2) {
385            if ($rev2 < $rev1) {
386                $rev = $rev2;
387                $rev2 = $rev1;
388                $rev1 = $rev;
389            }
390        } else {
391            //empty right side means a removed page. Look up last revision.
392            $rev2 = $this->currentRevision();
393        }
394        //collect revisions around rev2
395        list($revs2, $allrevs, $fp, $lines, $head, $tail) = $this->retrieveRevisionsAround($rev2, $max);
396
397        if (empty($revs2)) return array(array(), array());
398
399        //collect revisions around rev1
400        $index = array_search($rev1, $allrevs);
401        if ($index === false) {
402            //no overlapping revisions
403            list($revs1, , , , ,) = $this->retrieveRevisionsAround($rev1, $max);
404            if (empty($revs1)) $revs1 = array();
405        } else {
406            //revisions overlaps, reuse revisions around rev2
407            $lastrev = array_pop($allrevs); //keep last entry that could be external edit
408            $revs1 = $allrevs;
409            while ($head > 0) {
410                for ($i = count($lines) - 1; $i >= 0; $i--) {
411                    $info = $this->parseLogLine($lines[$i]);
412                    if ($this->cacheRevisionInfo($info)) {
413                        $revs1[] = $info['date'];
414                        $index++;
415
416                        if ($index > intval($max / 2)) break 2;
417                    }
418                }
419
420                list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, -1);
421            }
422            sort($revs1);
423            $revs1[] = $lastrev; //push back last entry
424
425            //return wanted selection
426            $revs1 = array_slice($revs1, max($index - intval($max / 2), 0), $max);
427        }
428
429        return array(array_reverse($revs1), array_reverse($revs2));
430    }
431
432    /**
433     * Return an existing revision for a specific date which is
434     * the current one or younger or equal then the date
435     *
436     * @param number $date_at timestamp
437     * @return string revision ('' for current)
438     */
439    public function getLastRevisionAt($date_at)
440    {
441        $fileLastMod = $this->getFilename();
442        //requested date_at(timestamp) younger or equal then modified_time($this->id) => load current
443        if (file_exists($fileLastMod) && $date_at >= @filemtime($fileLastMod)) {
444            return '';
445        } else {
446            if ($rev = $this->getRelativeRevision($date_at + 1, -1)) { //+1 to get also the requested date revision
447                return $rev;
448            } else {
449                return false;
450            }
451        }
452    }
453
454    /**
455     * Collect the $max revisions near to the timestamp $rev
456     *
457     * Ideally, half of retrieved timestamps are older than $rev, another half are newer.
458     * The returned array $requestedrevs may not contain the reference timestamp $rev
459     * when it does not match any revision value recorded in changelog.
460     *
461     * @param int $rev revision timestamp
462     * @param int $max maximum number of revisions to be returned
463     * @return bool|array
464     *     return array with entries:
465     *       - $requestedrevs: array of with $max revision timestamps
466     *       - $revs: all parsed revision timestamps
467     *       - $fp: filepointer only defined for chuck reading, needs closing.
468     *       - $lines: non-parsed changelog lines before the parsed revisions
469     *       - $head: position of first readed changelogline
470     *       - $lasttail: position of end of last readed changelogline
471     *     otherwise false
472     */
473    protected function retrieveRevisionsAround($rev, $max)
474    {
475        $revs = array();
476        $aftercount = $beforecount = 0;
477
478        //get lines from changelog
479        list($fp, $lines, $starthead, $starttail, $eof) = $this->readloglines($rev);
480        if (empty($lines)) return false;
481
482        //parse changelog lines in chunk, and read forward more chunks until $max/2 is reached
483        $head = $starthead;
484        $tail = $starttail;
485        while (count($lines) > 0) {
486            foreach ($lines as $line) {
487                $info = $this->parseLogLine($line);
488                if ($this->cacheRevisionInfo($info)) {
489                    $revs[] = $info['date'];
490                    if ($info['date'] >= $rev) {
491                        //count revs after reference $rev
492                        $aftercount++;
493                        if ($aftercount == 1) $beforecount = count($revs);
494                    }
495                    //enough revs after reference $rev?
496                    if ($aftercount > intval($max / 2)) break 2;
497                }
498            }
499            //retrieve next chunk
500            list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, 1);
501        }
502        $lasttail = $tail;
503
504        // add a possible revision of external edit, create or deletion
505        if ($lasttail == $eof && $aftercount <= intval($max / 2) &&
506            count($revs) && !$this->isCurrentRevision($revs[count($revs)-1])
507        ) {
508            $revs[] = $this->currentRevision;
509            $aftercount++;
510        }
511
512        if ($aftercount == 0) {
513            //given timestamp $rev is newer than the most recent line in chunk
514            return false; //FIXME: or proceed to collect older revisions?
515        }
516
517        //read more chunks backward until $max/2 is reached and total number of revs is equal to $max
518        $lines = array();
519        $i = 0;
520        if ($aftercount > 0) {
521            $head = $starthead;
522            $tail = $starttail;
523            while ($head > 0) {
524                list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, -1);
525
526                for ($i = count($lines) - 1; $i >= 0; $i--) {
527                    $info = $this->parseLogLine($lines[$i]);
528                    if ($this->cacheRevisionInfo($info)) {
529                        $revs[] = $info['date'];
530                        $beforecount++;
531                        //enough revs before reference $rev?
532                        if ($beforecount > max(intval($max / 2), $max - $aftercount)) break 2;
533                    }
534                }
535            }
536        }
537        //keep only non-parsed lines
538        $lines = array_slice($lines, 0, $i);
539
540        sort($revs);
541
542        //trunk desired selection
543        $requestedrevs = array_slice($revs, -$max, $max);
544
545        return array($requestedrevs, $revs, $fp, $lines, $head, $lasttail);
546    }
547
548    /**
549     * Get the current revision information, considering external edit, create or deletion
550     *
551     * When the file has not modified since its last revision, the infomation of the last
552     * change that had already recorded in the changelog is returned as current change info.
553     * Otherwise, the change infomation since the last revision caused outside DokuWiki
554     * should be returned, which is referred as "external revision".
555     *
556     * The change date of the file can be determined by timestamp as far as the file exists,
557     * however this is not possible when the file has already deleted outside of DokuWiki.
558     * In such case we assign 1 sec before current time() for the external deletion.
559     * As a result, the value of current revision identifier may change each time because:
560     *   1) the file has again modified outside of DokuWiki, or
561     *   2) the value is essentially volatile for deleted but once existed files.
562     *
563     * @return bool|array false when page had never existed or array with entries:
564     *      - date:  revision identifier (timestamp or last revision +1)
565     *      - ip:    IPv4 address (127.0.0.1)
566     *      - type:  log line type
567     *      - id:    id of page or media
568     *      - user:  user name
569     *      - sum:   edit summary (or action reason)
570     *      - extra: extra data (varies by line type)
571     *      - sizechange: change of filesize
572     *      - timestamp: unix timestamp or false (key set only for external edit occurred)
573     *
574     * @author  Satoshi Sahara <sahara.satoshi@gmail.com>
575     */
576    public function getCurrentRevisionInfo()
577    {
578        global $lang;
579
580        if (isset($this->currentRevision)) return $this->getRevisionInfo($this->currentRevision);
581
582        // get revision id from the item file timestamp and chagelog
583        $fileLastMod = $this->getFilename();
584        $fileRev = @filemtime($fileLastMod); // false when the file not exist
585        $lastRev = $this->lastRevision();    // false when no changelog
586
587        if (!$fileRev && !$lastRev) {                // has never existed
588            $this->currentRevision = false;
589            return false;
590        } elseif ($fileRev === $lastRev) {           // not external edit
591            $this->currentRevision = $lastRev;
592            return $this->getRevisionInfo($lastRev);
593        }
594
595        if (!$fileRev && $lastRev) {                 // item file does not exist
596            // check consistency against changelog
597            $revInfo = $this->getRevisionInfo($lastRev);
598            if ($revInfo['type'] == DOKU_CHANGE_TYPE_DELETE) {
599                $this->currentRevision = $lastRev;
600                return $this->getRevisionInfo($lastRev);
601            }
602
603            // externally deleted, set revision date as late as possible
604            $revInfo = [
605                'date' => max($lastRev +1, time() -1), // 1 sec before now or new page save
606                'ip'   => '127.0.0.1',
607                'type' => DOKU_CHANGE_TYPE_DELETE,
608                'id'   => $this->id,
609                'user' => '',
610                'sum'  => $lang['deleted'].' - '.$lang['external_edit'].' ('.$lang['unknowndate'].')',
611                'extra' => '',
612                'sizechange' => -io_getSizeFile($this->getFilename($lastRev)),
613                'timestamp' => false,
614            ];
615
616        } elseif ($fileRev) {                        // item file exist
617            // here, file timestamp is different with last revision in changelog
618            $isJustCreated = $lastRev === false || (
619                    $fileRev > $lastRev &&
620                    $this->getRevisionInfo($lastRev)['type'] == DOKU_CHANGE_TYPE_DELETE
621            );
622            $filesize_new = filesize($this->getFilename());
623            $filesize_old = $isJustCreated ? 0 : io_getSizeFile($this->getFilename($lastRev));
624            $sizechange = $filesize_new - $filesize_old;
625
626            if ($isJustCreated) {
627                $timestamp = $fileRev;
628                $sum = $lang['created'].' - '.$lang['external_edit'];
629            } elseif ($fileRev > $lastRev) {
630                $timestamp = $fileRev;
631                $sum = $lang['external_edit'];
632            } else {
633                // $fileRev is older than $lastRev, that is erroneous/incorrect occurence.
634                $msg = "Warning: current file modification time is older than last revision date";
635                $details = 'File revision: '.$fileRev.' '.strftime("%Y-%m-%d %H:%M:%S", $fileRev)."\n"
636                          .'Last revision: '.$lastRev.' '.strftime("%Y-%m-%d %H:%M:%S", $lastRev);
637                Logger::error($msg, $details, $this->getFilename());
638                $timestamp = false;
639                $sum = $lang['external_edit'].' ('.$lang['unknowndate'].')';
640            }
641
642            // externally created or edited
643            $revInfo = [
644                'date' => $timestamp ?: $lastRev +1,
645                'ip'   => '127.0.0.1',
646                'type' => $isJustCreated ? DOKU_CHANGE_TYPE_CREATE : DOKU_CHANGE_TYPE_EDIT,
647                'id'   => $this->id,
648                'user' => '',
649                'sum'  => $sum,
650                'extra' => '',
651                'sizechange' => $sizechange,
652                'timestamp' => $timestamp,
653            ];
654        }
655
656        // cache current revision information of external edition
657        $this->currentRevision = $revInfo['date'];
658        $this->cache[$this->id][$this->currentRevision] = $revInfo;
659        return $this->getRevisionInfo($this->currentRevision);
660    }
661}
662