xref: /dokuwiki/inc/ChangeLog/ChangeLog.php (revision d154755dd0a4dd6b30eaaa17a66d2adeedb209a5)
1<?php
2
3namespace dokuwiki\ChangeLog;
4
5/**
6 * ChangeLog Prototype; methods for handling changelog
7 */
8abstract class ChangeLog
9{
10    use ChangeLogTrait;
11
12    /** @var string */
13    protected $id;
14    /** @var int */
15    protected $currentRevision;
16    /** @var array */
17    protected $cache;
18
19    /**
20     * Constructor
21     *
22     * @param string $id page id
23     * @param int $chunk_size maximum block size read from file
24     */
25    public function __construct($id, $chunk_size = 8192)
26    {
27        global $cache_revinfo;
28
29        $this->cache =& $cache_revinfo;
30        if (!isset($this->cache[$id])) {
31            $this->cache[$id] = array();
32        }
33
34        $this->id = $id;
35        $this->setChunkSize($chunk_size);
36        // set property currentRevision and cache prior to getRevisionInfo($currentRev) call
37        $this->getCurrentRevisionInfo();
38    }
39
40    /**
41     * Returns path to current page/media
42     *
43     * @return string path to file
44     */
45    abstract protected function getFilename();
46
47    /**
48     * Check whether given revision is the current page
49     *
50     * @param int $rev timestamp of current page
51     * @return bool true if $rev is current revision, otherwise false
52     */
53    public function isCurrentRevision($rev)
54    {
55        return $rev == $this->currentRevision();
56    }
57
58    /**
59     * Checks if the revision is last revision
60     *
61     * @param int $rev revision timestamp
62     * @return bool true if $rev is last revision, otherwise false
63     */
64    public function isLastRevision($rev = null)
65    {
66        return $rev === $this->lastRevision();
67    }
68
69    /**
70     * Return the current revision identifer
71     * @return int|false
72     */
73    public function currentRevision()
74    {
75        if (!isset($this->currentRevision)) {
76            // set ChangeLog::currentRevision property
77            $this->getCurrentRevisionInfo();
78        }
79        return $this->currentRevision;
80    }
81
82    /**
83     * Return the last revision identifer, timestamp of last entry of changelog
84     *
85     * @return int|false
86     */
87    public function lastRevision()
88    {
89        $revs = $this->getRevisions(-1, 1);
90        return empty($revs) ? false : $revs[0];
91    }
92
93    /**
94     * Save revision info to the cache pool
95     *
96     * @param array $info Revision info structure
97     * @return bool
98     */
99    protected function cacheRevisionInfo($info)
100    {
101        if (!is_array($info)) return false;
102        //$this->cache[$this->id][$info['date']] ??= $info; // since php 7.4
103        $this->cache[$this->id][$info['date']] = $this->cache[$this->id][$info['date']] ?? $info;
104        return true;
105    }
106
107    /**
108     * Get the changelog information for a specific revision (timestamp)
109     *
110     * Adjacent changelog lines are optimistically parsed and cached to speed up
111     * consecutive calls to getRevisionInfo. For large changelog files, only the chunk
112     * containing the requested changelog line is read.
113     *
114     * @param int $rev revision timestamp
115     * @return bool|array false or array with entries:
116     *      - date:  unix timestamp
117     *      - ip:    IPv4 address (127.0.0.1)
118     *      - type:  log line type
119     *      - id:    page id
120     *      - user:  user name
121     *      - sum:   edit summary (or action reason)
122     *      - extra: extra data (varies by line type)
123     *      - sizechange: change of filesize
124     *
125     * @author Ben Coburn <btcoburn@silicodon.net>
126     * @author Kate Arzamastseva <pshns@ukr.net>
127     */
128    public function getRevisionInfo($rev)
129    {
130        $rev = max(0, $rev);
131        if (!$rev) return false;
132
133        // check if it's already in the memory cache
134        if (isset($this->cache[$this->id]) && isset($this->cache[$this->id][$rev])) {
135            return $this->cache[$this->id][$rev];
136        }
137
138        //read lines from changelog
139        list($fp, $lines) = $this->readloglines($rev);
140        if ($fp) {
141            fclose($fp);
142        }
143        if (empty($lines)) return false;
144
145        // parse and cache changelog lines
146        foreach ($lines as $value) {
147            $info = $this->parseLogLine($value);
148            $this->cacheRevisionInfo($info);
149        }
150        if (!isset($this->cache[$this->id][$rev])) {
151            return false;
152        }
153        return $this->cache[$this->id][$rev];
154    }
155
156    /**
157     * Return a list of page revisions numbers
158     *
159     * Does not guarantee that the revision exists in the attic,
160     * only that a line with the date exists in the changelog.
161     * By default the current revision is skipped.
162     *
163     * The current revision is automatically skipped when the page exists.
164     * See $INFO['meta']['last_change'] for the current revision.
165     * A negative $first let read the current revision too.
166     *
167     * For efficiency, the log lines are parsed and cached for later
168     * calls to getRevisionInfo. Large changelog files are read
169     * backwards in chunks until the requested number of changelog
170     * lines are recieved.
171     *
172     * @param int $first skip the first n changelog lines
173     * @param int $num number of revisions to return
174     * @return array with the revision timestamps
175     *
176     * @author Ben Coburn <btcoburn@silicodon.net>
177     * @author Kate Arzamastseva <pshns@ukr.net>
178     */
179    public function getRevisions($first, $num)
180    {
181        $revs = array();
182        $lines = array();
183        $count = 0;
184
185        $logfile = $this->getChangelogFilename();
186        if (!file_exists($logfile)) return $revs;
187
188        $num = max($num, 0);
189        if ($num == 0) {
190            return $revs;
191        }
192
193        if ($first < 0) {
194            $first = 0;
195        } else {
196            $fileLastMod = $this->getFilename();
197            if (file_exists($fileLastMod) && $this->isLastRevision(filemtime($fileLastMod))) {
198                // skip last revision if the page exists
199                $first = max($first + 1, 0);
200            }
201        }
202
203        if (filesize($logfile) < $this->chunk_size || $this->chunk_size == 0) {
204            // read whole file
205            $lines = file($logfile);
206            if ($lines === false) {
207                return $revs;
208            }
209        } else {
210            // read chunks backwards
211            $fp = fopen($logfile, 'rb'); // "file pointer"
212            if ($fp === false) {
213                return $revs;
214            }
215            fseek($fp, 0, SEEK_END);
216            $tail = ftell($fp);
217
218            // chunk backwards
219            $finger = max($tail - $this->chunk_size, 0);
220            while ($count < $num + $first) {
221                $nl = $this->getNewlinepointer($fp, $finger);
222
223                // was the chunk big enough? if not, take another bite
224                if ($nl > 0 && $tail <= $nl) {
225                    $finger = max($finger - $this->chunk_size, 0);
226                    continue;
227                } else {
228                    $finger = $nl;
229                }
230
231                // read chunk
232                $chunk = '';
233                $read_size = max($tail - $finger, 0); // found chunk size
234                $got = 0;
235                while ($got < $read_size && !feof($fp)) {
236                    $tmp = @fread($fp, max(min($this->chunk_size, $read_size - $got), 0));
237                    if ($tmp === false) {
238                        break;
239                    } //error state
240                    $got += strlen($tmp);
241                    $chunk .= $tmp;
242                }
243                $tmp = explode("\n", $chunk);
244                array_pop($tmp); // remove trailing newline
245
246                // combine with previous chunk
247                $count += count($tmp);
248                $lines = array_merge($tmp, $lines);
249
250                // next chunk
251                if ($finger == 0) {
252                    break;
253                } else { // already read all the lines
254                    $tail = $finger;
255                    $finger = max($tail - $this->chunk_size, 0);
256                }
257            }
258            fclose($fp);
259        }
260
261        // skip parsing extra lines
262        $num = max(min(count($lines) - $first, $num), 0);
263        if ($first > 0 && $num > 0) {
264            $lines = array_slice($lines, max(count($lines) - $first - $num, 0), $num);
265        } elseif ($first > 0 && $num == 0) {
266            $lines = array_slice($lines, 0, max(count($lines) - $first, 0));
267        } elseif ($first == 0 && $num > 0) {
268            $lines = array_slice($lines, max(count($lines) - $num, 0));
269        }
270
271        // handle lines in reverse order
272        for ($i = count($lines) - 1; $i >= 0; $i--) {
273            $info = $this->parseLogLine($lines[$i]);
274            if ($this->cacheRevisionInfo($info)) {
275                $revs[] = $info['date'];
276            }
277        }
278
279        return $revs;
280    }
281
282    /**
283     * Get the nth revision left or right handside  for a specific page id and revision (timestamp)
284     *
285     * For large changelog files, only the chunk containing the
286     * reference revision $rev is read and sometimes a next chunck.
287     *
288     * Adjacent changelog lines are optimistically parsed and cached to speed up
289     * consecutive calls to getRevisionInfo.
290     *
291     * @param int $rev revision timestamp used as startdate
292     *    (doesn't need to be exact revision number)
293     * @param int $direction give position of returned revision with respect to $rev;
294          positive=next, negative=prev
295     * @return bool|int
296     *      timestamp of the requested revision
297     *      otherwise false
298     */
299    public function getRelativeRevision($rev, $direction)
300    {
301        $rev = max($rev, 0);
302        $direction = (int)$direction;
303
304        //no direction given or last rev, so no follow-up
305        if (!$direction || ($direction > 0 && $this->isCurrentRevision($rev))) {
306            return false;
307        }
308
309        //get lines from changelog
310        list($fp, $lines, $head, $tail, $eof) = $this->readloglines($rev);
311        if (empty($lines)) return false;
312
313        // look for revisions later/earlier than $rev, when founded count till the wanted revision is reached
314        // also parse and cache changelog lines for getRevisionInfo().
315        $revcounter = 0;
316        $relativerev = false;
317        $checkotherchunck = true; //always runs once
318        while (!$relativerev && $checkotherchunck) {
319            $info = array();
320            //parse in normal or reverse order
321            $count = count($lines);
322            if ($direction > 0) {
323                $start = 0;
324                $step = 1;
325            } else {
326                $start = $count - 1;
327                $step = -1;
328            }
329            for ($i = $start; $i >= 0 && $i < $count; $i = $i + $step) {
330                $info = $this->parseLogLine($lines[$i]);
331                if ($this->cacheRevisionInfo($info)) {
332                    //look for revs older/earlier then reference $rev and select $direction-th one
333                    if (($direction > 0 && $info['date'] > $rev) || ($direction < 0 && $info['date'] < $rev)) {
334                        $revcounter++;
335                        if ($revcounter == abs($direction)) {
336                            $relativerev = $info['date'];
337                        }
338                    }
339                }
340            }
341
342            //true when $rev is found, but not the wanted follow-up.
343            $checkotherchunck = $fp
344                && ($info['date'] == $rev || ($revcounter > 0 && !$relativerev))
345                && !(($tail == $eof && $direction > 0) || ($head == 0 && $direction < 0));
346
347            if ($checkotherchunck) {
348                list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, $direction);
349
350                if (empty($lines)) break;
351            }
352        }
353        if ($fp) {
354            fclose($fp);
355        }
356
357        return $relativerev;
358    }
359
360    /**
361     * Returns revisions around rev1 and rev2
362     * When available it returns $max entries for each revision
363     *
364     * @param int $rev1 oldest revision timestamp
365     * @param int $rev2 newest revision timestamp (0 looks up last revision)
366     * @param int $max maximum number of revisions returned
367     * @return array with two arrays with revisions surrounding rev1 respectively rev2
368     */
369    public function getRevisionsAround($rev1, $rev2, $max = 50)
370    {
371        $max = intval(abs($max) / 2) * 2 + 1;
372        $rev1 = max($rev1, 0);
373        $rev2 = max($rev2, 0);
374
375        if ($rev2) {
376            if ($rev2 < $rev1) {
377                $rev = $rev2;
378                $rev2 = $rev1;
379                $rev1 = $rev;
380            }
381        } else {
382            //empty right side means a removed page. Look up last revision.
383            $rev2 = $this->currentRevision();
384        }
385        //collect revisions around rev2
386        list($revs2, $allrevs, $fp, $lines, $head, $tail) = $this->retrieveRevisionsAround($rev2, $max);
387
388        if (empty($revs2)) return array(array(), array());
389
390        //collect revisions around rev1
391        $index = array_search($rev1, $allrevs);
392        if ($index === false) {
393            //no overlapping revisions
394            list($revs1, , , , ,) = $this->retrieveRevisionsAround($rev1, $max);
395            if (empty($revs1)) $revs1 = array();
396        } else {
397            //revisions overlaps, reuse revisions around rev2
398            $lastrev = array_pop($allrevs); //keep last entry that could be external edit
399            $revs1 = $allrevs;
400            while ($head > 0) {
401                for ($i = count($lines) - 1; $i >= 0; $i--) {
402                    $info = $this->parseLogLine($lines[$i]);
403                    if ($this->cacheRevisionInfo($info)) {
404                        $revs1[] = $info['date'];
405                        $index++;
406
407                        if ($index > intval($max / 2)) break 2;
408                    }
409                }
410
411                list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, -1);
412            }
413            sort($revs1);
414            $revs1[] = $lastrev; //push back last entry
415
416            //return wanted selection
417            $revs1 = array_slice($revs1, max($index - intval($max / 2), 0), $max);
418        }
419
420        return array(array_reverse($revs1), array_reverse($revs2));
421    }
422
423    /**
424     * Return an existing revision for a specific date which is
425     * the current one or younger or equal then the date
426     *
427     * @param number $date_at timestamp
428     * @return string revision ('' for current)
429     */
430    public function getLastRevisionAt($date_at)
431    {
432        $fileLastMod = $this->getFilename();
433        //requested date_at(timestamp) younger or equal then modified_time($this->id) => load current
434        if (file_exists($fileLastMod) && $date_at >= @filemtime($fileLastMod)) {
435            return '';
436        } else {
437            if ($rev = $this->getRelativeRevision($date_at + 1, -1)) { //+1 to get also the requested date revision
438                return $rev;
439            } else {
440                return false;
441            }
442        }
443    }
444
445    /**
446     * Collect the $max revisions near to the timestamp $rev
447     *
448     * Ideally, half of retrieved timestamps are older than $rev, another half are newer.
449     * The returned array $requestedrevs may not contain the reference timestamp $rev
450     * when it does not match any revision value recorded in changelog.
451     *
452     * @param int $rev revision timestamp
453     * @param int $max maximum number of revisions to be returned
454     * @return bool|array
455     *     return array with entries:
456     *       - $requestedrevs: array of with $max revision timestamps
457     *       - $revs: all parsed revision timestamps
458     *       - $fp: filepointer only defined for chuck reading, needs closing.
459     *       - $lines: non-parsed changelog lines before the parsed revisions
460     *       - $head: position of first readed changelogline
461     *       - $lasttail: position of end of last readed changelogline
462     *     otherwise false
463     */
464    protected function retrieveRevisionsAround($rev, $max)
465    {
466        $revs = array();
467        $aftercount = $beforecount = 0;
468
469        //get lines from changelog
470        list($fp, $lines, $starthead, $starttail, $eof) = $this->readloglines($rev);
471        if (empty($lines)) return false;
472
473        //parse changelog lines in chunk, and read forward more chunks until $max/2 is reached
474        $head = $starthead;
475        $tail = $starttail;
476        while (count($lines) > 0) {
477            foreach ($lines as $line) {
478                $info = $this->parseLogLine($line);
479                if ($this->cacheRevisionInfo($info)) {
480                    $revs[] = $info['date'];
481                    if ($info['date'] >= $rev) {
482                        //count revs after reference $rev
483                        $aftercount++;
484                        if ($aftercount == 1) $beforecount = count($revs);
485                    }
486                    //enough revs after reference $rev?
487                    if ($aftercount > intval($max / 2)) break 2;
488                }
489            }
490            //retrieve next chunk
491            list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, 1);
492        }
493        $lasttail = $tail;
494
495        // add a possible revision of external edit, create or deletion
496        if ($lasttail == $eof && $aftercount <= intval($max / 2) &&
497            count($revs) && !$this->isCurrentRevision($revs[count($revs)-1])
498        ) {
499            $revs[] = $this->currentRevision;
500            $aftercount++;
501        }
502
503        if ($aftercount == 0) {
504            //given timestamp $rev is newer than the most recent line in chunk
505            return false; //FIXME: or proceed to collect older revisions?
506        }
507
508        //read more chunks backward until $max/2 is reached and total number of revs is equal to $max
509        $lines = array();
510        $i = 0;
511        if ($aftercount > 0) {
512            $head = $starthead;
513            $tail = $starttail;
514            while ($head > 0) {
515                list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, -1);
516
517                for ($i = count($lines) - 1; $i >= 0; $i--) {
518                    $info = $this->parseLogLine($lines[$i]);
519                    if ($this->cacheRevisionInfo($info)) {
520                        $revs[] = $info['date'];
521                        $beforecount++;
522                        //enough revs before reference $rev?
523                        if ($beforecount > max(intval($max / 2), $max - $aftercount)) break 2;
524                    }
525                }
526            }
527        }
528        //keep only non-parsed lines
529        $lines = array_slice($lines, 0, $i);
530
531        sort($revs);
532
533        //trunk desired selection
534        $requestedrevs = array_slice($revs, -$max, $max);
535
536        return array($requestedrevs, $revs, $fp, $lines, $head, $lasttail);
537    }
538
539    /**
540     * Get the current revision information, considering external edit, create or deletion
541     *
542     * The "current" revison is the last timestamp of the page in the context of changelog.
543     * However it is often recognised that is in sight now from the DokuWiki user perspective.
544     * The current page is accessible without any revision identifier (eg. doku.php?id=foo),
545     * but it has unique modification time of the source txt file and kept in changelog.
546     * When the page is deleted by saving blank text in the DokuWiki editor, the deletion
547     * time is to be kept as its revision identifier in the changelog.
548     *
549     * External edit will break consistency between the file and changelog. A page source
550     * file might be modified, created or deleted without using DokuWiki editor, instead
551     * by accessing direct to the file stored in data directory via server console.
552     * Such editions are never recorded in changelog. However after external file edit,
553     * now we can see new "current" content of the edited page!
554     *
555     * A tentative revision should be assigned for the external edition to handle whole
556     * revisions successfully in DokuWiki revision list and diff view interface.
557     * As far as the source file of the edition exists, a unique revision can be decided
558     * using function filemtime(), but it could be unknown if the foo.txt file had deleted
559     * or moved to foo.bak file.
560     * In such case, we assume unknown revision as "last timestamp in changelog" +1
561     * to ensure that current one should be newer than any revisions in changelog.
562     * Another case of external edit: when foo.bak file moved back to foo.txt, the current
563     * one could become older than latest timestamp in changelog. In this case, we should
564     * assume the revison as "last timestamp in chagelog" +1, instead of its timestamp.
565     *
566     * @return bool|array false when page had never existed or array with entries:
567     *      - date:  revision identifier (timestamp or last revision +1)
568     *      - ip:    IPv4 address (127.0.0.1)
569     *      - type:  log line type
570     *      - id:    id of page or media
571     *      - user:  user name
572     *      - sum:   edit summary (or action reason)
573     *      - extra: extra data (varies by line type)
574     *      - sizechange: change of filesize
575     *      - timestamp: unix timestamp or false (key set only for external edit occurred)
576     *
577     * @author  Satoshi Sahara <sahara.satoshi@gmail.com>
578     */
579    public function getCurrentRevisionInfo()
580    {
581        global $lang;
582
583        if (isset($this->currentRevision)) return $this->getRevisionInfo($this->currentRevision);
584
585        // get revision id from the item file timestamp and chagelog
586        $fileLastMod = $this->getFilename();
587        $fileRev = @filemtime($fileLastMod); // false when the file not exist
588        $lastRev = $this->lastRevision();    // false when no changelog
589
590        if (!$fileRev && !$lastRev) {                // has never existed
591            $this->currentRevision = false;
592            return false;
593        } elseif ($fileRev === $lastRev) {           // not external edit
594            $this->currentRevision = $lastRev;
595            return $this->getRevisionInfo($lastRev);
596        }
597
598        if (!$fileRev && $lastRev) {                 // item file does not exist
599            // check consistency against changelog
600            $revInfo = $this->getRevisionInfo($lastRev);
601            if ($revInfo['type'] == DOKU_CHANGE_TYPE_DELETE) {
602                $this->currentRevision = $lastRev;
603                return $this->getRevisionInfo($lastRev);
604            }
605
606            // externally deleted
607            $revInfo = [
608                'date' => $lastRev +1,
609                'ip'   => '127.0.0.1',
610                'type' => DOKU_CHANGE_TYPE_DELETE,
611                'id'   => $this->id,
612                'user' => '',
613                'sum'  => $lang['deleted'].' - '.$lang['external_edit'].' ('.$lang['unknowndate'].')',
614                'extra' => '',
615                'sizechange' => -io_getSizeFile($this->getFilename($lastRev)),
616                'timestamp' => false,
617            ];
618
619        } elseif ($fileRev) {                        // item file exist
620            // here, file timestamp is different with last revision in changelog
621            $isJustCreated = $lastRev === false || (
622                    $fileRev > $lastRev &&
623                    $this->getRevisionInfo($lastRev)['type'] == DOKU_CHANGE_TYPE_DELETE
624            );
625            $filesize_new = filesize($this->getFilename());
626            $filesize_old = $isJustCreated ? 0 : io_getSizeFile($this->getFilename($lastRev));
627            $sizechange = $filesize_new - $filesize_old;
628
629            if ($isJustCreated) {
630                $timestamp = $fileRev;
631                $sum = $lang['created'].' - '.$lang['external_edit'];
632            } elseif ($fileRev > $lastRev) {
633                $timestamp = $fileRev;
634                $sum = $lang['external_edit'];
635            } else {
636                // $fileRev is older than $lastRev, that is erroneous/incorrect occurence.
637                // try to change file modification time to the detection time
638                $timestamp = touch($fileLastMod) ? filemtime($fileLastMod) : false;
639                $sum = $lang['external_edit'].' ('.$lang['unknowndate'].')';
640            }
641
642            // externally created or edited
643            $revInfo = [
644                'date' => $timestamp ?: $lastRev +1,
645                'ip'   => '127.0.0.1',
646                'type' => $isJustCreated ? DOKU_CHANGE_TYPE_CREATE : DOKU_CHANGE_TYPE_EDIT,
647                'id'   => $this->id,
648                'user' => '',
649                'sum'  => $sum,
650                'extra' => '',
651                'sizechange' => $sizechange,
652                'timestamp' => $timestamp,
653            ];
654        }
655
656        // cache current revision information of external edition
657        $this->currentRevision = $revInfo['date'];
658        $this->cache[$this->id][$this->currentRevision] = $revInfo;
659        return $this->getRevisionInfo($this->currentRevision);
660    }
661}
662