xref: /dokuwiki/inc/ChangeLog/ChangeLog.php (revision 05282e9f6254abed0b8dec15cd6b137aca339a41)
1<?php
2
3namespace dokuwiki\ChangeLog;
4
5/**
6 * ChangeLog Prototype; methods for handling changelog
7 */
8abstract class ChangeLog
9{
10    use ChangeLogTrait;
11
12    /** @var string */
13    protected $id;
14    /** @var int */
15    protected $currentRevision;
16    /** @var array */
17    protected $cache;
18
19    /**
20     * Constructor
21     *
22     * @param string $id page id
23     * @param int $chunk_size maximum block size read from file
24     */
25    public function __construct($id, $chunk_size = 8192)
26    {
27        global $cache_revinfo;
28
29        $this->cache =& $cache_revinfo;
30        if (!isset($this->cache[$id])) {
31            $this->cache[$id] = array();
32        }
33
34        $this->id = $id;
35        $this->setChunkSize($chunk_size);
36        // set property currentRevision and cache prior to getRevisionInfo($currentRev) call
37        $this->getCurrentRevisionInfo();
38    }
39
40    /**
41     * Returns path to current page/media
42     *
43     * @return string path to file
44     */
45    abstract protected function getFilename();
46
47    /**
48     * Check whether given revision is the current page
49     *
50     * @param int $rev timestamp of current page
51     * @return bool true if $rev is current revision, otherwise false
52     */
53    public function isCurrentRevision($rev)
54    {
55        return $rev == $this->currentRevision();
56    }
57
58    /**
59     * Checks if the revision is last revision
60     *
61     * @param int $rev revision timestamp
62     * @return bool true if $rev is last revision, otherwise false
63     */
64    public function isLastRevision($rev = null)
65    {
66        return $rev === $this->lastRevision();
67    }
68
69    /**
70     * Return the current revision identifer
71     *
72     * The "current" revision means current version of the page or media file. It is either
73     * identical with or newer than the "last" revision, that depends on whether the file
74     * has modified, created or deleted outside of DokuWiki.
75     * The value of identifier can be determined by timestamp as far as the file exists,
76     * otherwise it must be assigned larger than any other revisions to keep them sortable.
77     *
78     * @return int|false revision timestamp
79     */
80    public function currentRevision()
81    {
82        if (!isset($this->currentRevision)) {
83            // set ChangeLog::currentRevision property
84            $this->getCurrentRevisionInfo();
85        }
86        return $this->currentRevision;
87    }
88
89    /**
90     * Return the last revision identifer, date value of the last entry of the changelog
91     *
92     * @return int|false revision timestamp
93     */
94    public function lastRevision()
95    {
96        $revs = $this->getRevisions(-1, 1);
97        return empty($revs) ? false : $revs[0];
98    }
99
100    /**
101     * Save revision info to the cache pool
102     *
103     * @param array $info Revision info structure
104     * @return bool
105     */
106    protected function cacheRevisionInfo($info)
107    {
108        if (!is_array($info)) return false;
109        //$this->cache[$this->id][$info['date']] ??= $info; // since php 7.4
110        $this->cache[$this->id][$info['date']] = $this->cache[$this->id][$info['date']] ?? $info;
111        return true;
112    }
113
114    /**
115     * Get the changelog information for a specific revision (timestamp)
116     *
117     * Adjacent changelog lines are optimistically parsed and cached to speed up
118     * consecutive calls to getRevisionInfo. For large changelog files, only the chunk
119     * containing the requested changelog line is read.
120     *
121     * @param int $rev revision timestamp
122     * @return bool|array false or array with entries:
123     *      - date:  unix timestamp
124     *      - ip:    IPv4 address (127.0.0.1)
125     *      - type:  log line type
126     *      - id:    page id
127     *      - user:  user name
128     *      - sum:   edit summary (or action reason)
129     *      - extra: extra data (varies by line type)
130     *      - sizechange: change of filesize
131     *
132     * @author Ben Coburn <btcoburn@silicodon.net>
133     * @author Kate Arzamastseva <pshns@ukr.net>
134     */
135    public function getRevisionInfo($rev)
136    {
137        $rev = max(0, $rev);
138        if (!$rev) return false;
139
140        // check if it's already in the memory cache
141        if (isset($this->cache[$this->id]) && isset($this->cache[$this->id][$rev])) {
142            return $this->cache[$this->id][$rev];
143        }
144
145        //read lines from changelog
146        list($fp, $lines) = $this->readloglines($rev);
147        if ($fp) {
148            fclose($fp);
149        }
150        if (empty($lines)) return false;
151
152        // parse and cache changelog lines
153        foreach ($lines as $value) {
154            $info = $this->parseLogLine($value);
155            $this->cacheRevisionInfo($info);
156        }
157        if (!isset($this->cache[$this->id][$rev])) {
158            return false;
159        }
160        return $this->cache[$this->id][$rev];
161    }
162
163    /**
164     * Return a list of page revisions numbers
165     *
166     * Does not guarantee that the revision exists in the attic,
167     * only that a line with the date exists in the changelog.
168     * By default the current revision is skipped.
169     *
170     * The current revision is automatically skipped when the page exists.
171     * See $INFO['meta']['last_change'] for the current revision.
172     * A negative $first let read the current revision too.
173     *
174     * For efficiency, the log lines are parsed and cached for later
175     * calls to getRevisionInfo. Large changelog files are read
176     * backwards in chunks until the requested number of changelog
177     * lines are recieved.
178     *
179     * @param int $first skip the first n changelog lines
180     * @param int $num number of revisions to return
181     * @return array with the revision timestamps
182     *
183     * @author Ben Coburn <btcoburn@silicodon.net>
184     * @author Kate Arzamastseva <pshns@ukr.net>
185     */
186    public function getRevisions($first, $num)
187    {
188        $revs = array();
189        $lines = array();
190        $count = 0;
191
192        $logfile = $this->getChangelogFilename();
193        if (!file_exists($logfile)) return $revs;
194
195        $num = max($num, 0);
196        if ($num == 0) {
197            return $revs;
198        }
199
200        if ($first < 0) {
201            $first = 0;
202        } else {
203            $fileLastMod = $this->getFilename();
204            if (file_exists($fileLastMod) && $this->isLastRevision(filemtime($fileLastMod))) {
205                // skip last revision if the page exists
206                $first = max($first + 1, 0);
207            }
208        }
209
210        if (filesize($logfile) < $this->chunk_size || $this->chunk_size == 0) {
211            // read whole file
212            $lines = file($logfile);
213            if ($lines === false) {
214                return $revs;
215            }
216        } else {
217            // read chunks backwards
218            $fp = fopen($logfile, 'rb'); // "file pointer"
219            if ($fp === false) {
220                return $revs;
221            }
222            fseek($fp, 0, SEEK_END);
223            $tail = ftell($fp);
224
225            // chunk backwards
226            $finger = max($tail - $this->chunk_size, 0);
227            while ($count < $num + $first) {
228                $nl = $this->getNewlinepointer($fp, $finger);
229
230                // was the chunk big enough? if not, take another bite
231                if ($nl > 0 && $tail <= $nl) {
232                    $finger = max($finger - $this->chunk_size, 0);
233                    continue;
234                } else {
235                    $finger = $nl;
236                }
237
238                // read chunk
239                $chunk = '';
240                $read_size = max($tail - $finger, 0); // found chunk size
241                $got = 0;
242                while ($got < $read_size && !feof($fp)) {
243                    $tmp = @fread($fp, max(min($this->chunk_size, $read_size - $got), 0));
244                    if ($tmp === false) {
245                        break;
246                    } //error state
247                    $got += strlen($tmp);
248                    $chunk .= $tmp;
249                }
250                $tmp = explode("\n", $chunk);
251                array_pop($tmp); // remove trailing newline
252
253                // combine with previous chunk
254                $count += count($tmp);
255                $lines = array_merge($tmp, $lines);
256
257                // next chunk
258                if ($finger == 0) {
259                    break;
260                } else { // already read all the lines
261                    $tail = $finger;
262                    $finger = max($tail - $this->chunk_size, 0);
263                }
264            }
265            fclose($fp);
266        }
267
268        // skip parsing extra lines
269        $num = max(min(count($lines) - $first, $num), 0);
270        if ($first > 0 && $num > 0) {
271            $lines = array_slice($lines, max(count($lines) - $first - $num, 0), $num);
272        } elseif ($first > 0 && $num == 0) {
273            $lines = array_slice($lines, 0, max(count($lines) - $first, 0));
274        } elseif ($first == 0 && $num > 0) {
275            $lines = array_slice($lines, max(count($lines) - $num, 0));
276        }
277
278        // handle lines in reverse order
279        for ($i = count($lines) - 1; $i >= 0; $i--) {
280            $info = $this->parseLogLine($lines[$i]);
281            if ($this->cacheRevisionInfo($info)) {
282                $revs[] = $info['date'];
283            }
284        }
285
286        return $revs;
287    }
288
289    /**
290     * Get the nth revision left or right handside  for a specific page id and revision (timestamp)
291     *
292     * For large changelog files, only the chunk containing the
293     * reference revision $rev is read and sometimes a next chunck.
294     *
295     * Adjacent changelog lines are optimistically parsed and cached to speed up
296     * consecutive calls to getRevisionInfo.
297     *
298     * @param int $rev revision timestamp used as startdate
299     *    (doesn't need to be exact revision number)
300     * @param int $direction give position of returned revision with respect to $rev;
301          positive=next, negative=prev
302     * @return bool|int
303     *      timestamp of the requested revision
304     *      otherwise false
305     */
306    public function getRelativeRevision($rev, $direction)
307    {
308        $rev = max($rev, 0);
309        $direction = (int)$direction;
310
311        //no direction given or last rev, so no follow-up
312        if (!$direction || ($direction > 0 && $this->isCurrentRevision($rev))) {
313            return false;
314        }
315
316        //get lines from changelog
317        list($fp, $lines, $head, $tail, $eof) = $this->readloglines($rev);
318        if (empty($lines)) return false;
319
320        // look for revisions later/earlier than $rev, when founded count till the wanted revision is reached
321        // also parse and cache changelog lines for getRevisionInfo().
322        $revcounter = 0;
323        $relativerev = false;
324        $checkotherchunck = true; //always runs once
325        while (!$relativerev && $checkotherchunck) {
326            $info = array();
327            //parse in normal or reverse order
328            $count = count($lines);
329            if ($direction > 0) {
330                $start = 0;
331                $step = 1;
332            } else {
333                $start = $count - 1;
334                $step = -1;
335            }
336            for ($i = $start; $i >= 0 && $i < $count; $i = $i + $step) {
337                $info = $this->parseLogLine($lines[$i]);
338                if ($this->cacheRevisionInfo($info)) {
339                    //look for revs older/earlier then reference $rev and select $direction-th one
340                    if (($direction > 0 && $info['date'] > $rev) || ($direction < 0 && $info['date'] < $rev)) {
341                        $revcounter++;
342                        if ($revcounter == abs($direction)) {
343                            $relativerev = $info['date'];
344                        }
345                    }
346                }
347            }
348
349            //true when $rev is found, but not the wanted follow-up.
350            $checkotherchunck = $fp
351                && ($info['date'] == $rev || ($revcounter > 0 && !$relativerev))
352                && !(($tail == $eof && $direction > 0) || ($head == 0 && $direction < 0));
353
354            if ($checkotherchunck) {
355                list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, $direction);
356
357                if (empty($lines)) break;
358            }
359        }
360        if ($fp) {
361            fclose($fp);
362        }
363
364        return $relativerev;
365    }
366
367    /**
368     * Returns revisions around rev1 and rev2
369     * When available it returns $max entries for each revision
370     *
371     * @param int $rev1 oldest revision timestamp
372     * @param int $rev2 newest revision timestamp (0 looks up last revision)
373     * @param int $max maximum number of revisions returned
374     * @return array with two arrays with revisions surrounding rev1 respectively rev2
375     */
376    public function getRevisionsAround($rev1, $rev2, $max = 50)
377    {
378        $max = intval(abs($max) / 2) * 2 + 1;
379        $rev1 = max($rev1, 0);
380        $rev2 = max($rev2, 0);
381
382        if ($rev2) {
383            if ($rev2 < $rev1) {
384                $rev = $rev2;
385                $rev2 = $rev1;
386                $rev1 = $rev;
387            }
388        } else {
389            //empty right side means a removed page. Look up last revision.
390            $rev2 = $this->currentRevision();
391        }
392        //collect revisions around rev2
393        list($revs2, $allrevs, $fp, $lines, $head, $tail) = $this->retrieveRevisionsAround($rev2, $max);
394
395        if (empty($revs2)) return array(array(), array());
396
397        //collect revisions around rev1
398        $index = array_search($rev1, $allrevs);
399        if ($index === false) {
400            //no overlapping revisions
401            list($revs1, , , , ,) = $this->retrieveRevisionsAround($rev1, $max);
402            if (empty($revs1)) $revs1 = array();
403        } else {
404            //revisions overlaps, reuse revisions around rev2
405            $lastrev = array_pop($allrevs); //keep last entry that could be external edit
406            $revs1 = $allrevs;
407            while ($head > 0) {
408                for ($i = count($lines) - 1; $i >= 0; $i--) {
409                    $info = $this->parseLogLine($lines[$i]);
410                    if ($this->cacheRevisionInfo($info)) {
411                        $revs1[] = $info['date'];
412                        $index++;
413
414                        if ($index > intval($max / 2)) break 2;
415                    }
416                }
417
418                list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, -1);
419            }
420            sort($revs1);
421            $revs1[] = $lastrev; //push back last entry
422
423            //return wanted selection
424            $revs1 = array_slice($revs1, max($index - intval($max / 2), 0), $max);
425        }
426
427        return array(array_reverse($revs1), array_reverse($revs2));
428    }
429
430    /**
431     * Return an existing revision for a specific date which is
432     * the current one or younger or equal then the date
433     *
434     * @param number $date_at timestamp
435     * @return string revision ('' for current)
436     */
437    public function getLastRevisionAt($date_at)
438    {
439        $fileLastMod = $this->getFilename();
440        //requested date_at(timestamp) younger or equal then modified_time($this->id) => load current
441        if (file_exists($fileLastMod) && $date_at >= @filemtime($fileLastMod)) {
442            return '';
443        } else {
444            if ($rev = $this->getRelativeRevision($date_at + 1, -1)) { //+1 to get also the requested date revision
445                return $rev;
446            } else {
447                return false;
448            }
449        }
450    }
451
452    /**
453     * Collect the $max revisions near to the timestamp $rev
454     *
455     * Ideally, half of retrieved timestamps are older than $rev, another half are newer.
456     * The returned array $requestedrevs may not contain the reference timestamp $rev
457     * when it does not match any revision value recorded in changelog.
458     *
459     * @param int $rev revision timestamp
460     * @param int $max maximum number of revisions to be returned
461     * @return bool|array
462     *     return array with entries:
463     *       - $requestedrevs: array of with $max revision timestamps
464     *       - $revs: all parsed revision timestamps
465     *       - $fp: filepointer only defined for chuck reading, needs closing.
466     *       - $lines: non-parsed changelog lines before the parsed revisions
467     *       - $head: position of first readed changelogline
468     *       - $lasttail: position of end of last readed changelogline
469     *     otherwise false
470     */
471    protected function retrieveRevisionsAround($rev, $max)
472    {
473        $revs = array();
474        $aftercount = $beforecount = 0;
475
476        //get lines from changelog
477        list($fp, $lines, $starthead, $starttail, $eof) = $this->readloglines($rev);
478        if (empty($lines)) return false;
479
480        //parse changelog lines in chunk, and read forward more chunks until $max/2 is reached
481        $head = $starthead;
482        $tail = $starttail;
483        while (count($lines) > 0) {
484            foreach ($lines as $line) {
485                $info = $this->parseLogLine($line);
486                if ($this->cacheRevisionInfo($info)) {
487                    $revs[] = $info['date'];
488                    if ($info['date'] >= $rev) {
489                        //count revs after reference $rev
490                        $aftercount++;
491                        if ($aftercount == 1) $beforecount = count($revs);
492                    }
493                    //enough revs after reference $rev?
494                    if ($aftercount > intval($max / 2)) break 2;
495                }
496            }
497            //retrieve next chunk
498            list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, 1);
499        }
500        $lasttail = $tail;
501
502        // add a possible revision of external edit, create or deletion
503        if ($lasttail == $eof && $aftercount <= intval($max / 2) &&
504            count($revs) && !$this->isCurrentRevision($revs[count($revs)-1])
505        ) {
506            $revs[] = $this->currentRevision;
507            $aftercount++;
508        }
509
510        if ($aftercount == 0) {
511            //given timestamp $rev is newer than the most recent line in chunk
512            return false; //FIXME: or proceed to collect older revisions?
513        }
514
515        //read more chunks backward until $max/2 is reached and total number of revs is equal to $max
516        $lines = array();
517        $i = 0;
518        if ($aftercount > 0) {
519            $head = $starthead;
520            $tail = $starttail;
521            while ($head > 0) {
522                list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, -1);
523
524                for ($i = count($lines) - 1; $i >= 0; $i--) {
525                    $info = $this->parseLogLine($lines[$i]);
526                    if ($this->cacheRevisionInfo($info)) {
527                        $revs[] = $info['date'];
528                        $beforecount++;
529                        //enough revs before reference $rev?
530                        if ($beforecount > max(intval($max / 2), $max - $aftercount)) break 2;
531                    }
532                }
533            }
534        }
535        //keep only non-parsed lines
536        $lines = array_slice($lines, 0, $i);
537
538        sort($revs);
539
540        //trunk desired selection
541        $requestedrevs = array_slice($revs, -$max, $max);
542
543        return array($requestedrevs, $revs, $fp, $lines, $head, $lasttail);
544    }
545
546    /**
547     * Get the current revision information, considering external edit, create or deletion
548     *
549     * When the file has not modified since its last revision, the infomation of the last
550     * change that had already recorded in the changelog is returned as current change info.
551     * Otherwise, the change infomation since the last revision caused outside DokuWiki
552     * should be returned, which is referred as "external revision".
553     *
554     * The change date of the file can be determined by timestamp as far as the file exists,
555     * however this is not possible when the file has already deleted outside of DokuWiki.
556     * In such case we assign current time() value for the external deletion. The change
557     * date is used as revision identifier.
558     * As a result, the value of current revision identifier may change each time because:
559     *   1) the file has again modified outside of DokuWiki, or
560     *   2) the value is essentially volatile for deleted but once existed files.
561     *
562     * @return bool|array false when page had never existed or array with entries:
563     *      - date:  revision identifier (timestamp or last revision +1)
564     *      - ip:    IPv4 address (127.0.0.1)
565     *      - type:  log line type
566     *      - id:    id of page or media
567     *      - user:  user name
568     *      - sum:   edit summary (or action reason)
569     *      - extra: extra data (varies by line type)
570     *      - sizechange: change of filesize
571     *      - timestamp: unix timestamp or false (key set only for external edit occurred)
572     *
573     * @author  Satoshi Sahara <sahara.satoshi@gmail.com>
574     */
575    public function getCurrentRevisionInfo()
576    {
577        global $lang;
578
579        if (isset($this->currentRevision)) return $this->getRevisionInfo($this->currentRevision);
580
581        // get revision id from the item file timestamp and chagelog
582        $fileLastMod = $this->getFilename();
583        $fileRev = @filemtime($fileLastMod); // false when the file not exist
584        $lastRev = $this->lastRevision();    // false when no changelog
585
586        if (!$fileRev && !$lastRev) {                // has never existed
587            $this->currentRevision = false;
588            return false;
589        } elseif ($fileRev === $lastRev) {           // not external edit
590            $this->currentRevision = $lastRev;
591            return $this->getRevisionInfo($lastRev);
592        }
593
594        if (!$fileRev && $lastRev) {                 // item file does not exist
595            // check consistency against changelog
596            $revInfo = $this->getRevisionInfo($lastRev);
597            if ($revInfo['type'] == DOKU_CHANGE_TYPE_DELETE) {
598                $this->currentRevision = $lastRev;
599                return $this->getRevisionInfo($lastRev);
600            }
601
602            // externally deleted
603            $revInfo = [
604                'date' => time(), // assign current time
605                'ip'   => '127.0.0.1',
606                'type' => DOKU_CHANGE_TYPE_DELETE,
607                'id'   => $this->id,
608                'user' => '',
609                'sum'  => $lang['deleted'].' - '.$lang['external_edit'].' ('.$lang['unknowndate'].')',
610                'extra' => '',
611                'sizechange' => -io_getSizeFile($this->getFilename($lastRev)),
612                'timestamp' => false,
613            ];
614
615        } elseif ($fileRev) {                        // item file exist
616            // here, file timestamp is different with last revision in changelog
617            $isJustCreated = $lastRev === false || (
618                    $fileRev > $lastRev &&
619                    $this->getRevisionInfo($lastRev)['type'] == DOKU_CHANGE_TYPE_DELETE
620            );
621            $filesize_new = filesize($this->getFilename());
622            $filesize_old = $isJustCreated ? 0 : io_getSizeFile($this->getFilename($lastRev));
623            $sizechange = $filesize_new - $filesize_old;
624
625            if ($isJustCreated) {
626                $timestamp = $fileRev;
627                $sum = $lang['created'].' - '.$lang['external_edit'];
628            } elseif ($fileRev > $lastRev) {
629                $timestamp = $fileRev;
630                $sum = $lang['external_edit'];
631            } else {
632                // $fileRev is older than $lastRev, that is erroneous/incorrect occurence.
633                // try to change file modification time to the detection time
634                $timestamp = touch($fileLastMod) ? filemtime($fileLastMod) : false;
635                $sum = $lang['external_edit'].' ('.$lang['unknowndate'].')';
636            }
637
638            // externally created or edited
639            $revInfo = [
640                'date' => $timestamp ?: time(),
641                'ip'   => '127.0.0.1',
642                'type' => $isJustCreated ? DOKU_CHANGE_TYPE_CREATE : DOKU_CHANGE_TYPE_EDIT,
643                'id'   => $this->id,
644                'user' => '',
645                'sum'  => $sum,
646                'extra' => '',
647                'sizechange' => $sizechange,
648                'timestamp' => $timestamp,
649            ];
650        }
651
652        // cache current revision information of external edition
653        $this->currentRevision = $revInfo['date'];
654        $this->cache[$this->id][$this->currentRevision] = $revInfo;
655        return $this->getRevisionInfo($this->currentRevision);
656    }
657}
658