xref: /dokuwiki/inc/ChangeLog/ChangeLog.php (revision ccc4c71ca88c25bcefb7f42eb01f0c040487e3a9)
1<?php
2
3namespace dokuwiki\ChangeLog;
4
5/**
6 * methods for handling of changelog of pages or media files
7 */
8abstract class ChangeLog
9{
10
11    /** @var string */
12    protected $id;
13    /** @var int */
14    protected $chunk_size;
15    /** @var array */
16    protected $cache;
17
18    /**
19     * Constructor
20     *
21     * @param string $id page id
22     * @param int $chunk_size maximum block size read from file
23     */
24    public function __construct($id, $chunk_size = 8192)
25    {
26        global $cache_revinfo;
27
28        $this->cache =& $cache_revinfo;
29        if (!isset($this->cache[$id])) {
30            $this->cache[$id] = array();
31        }
32
33        $this->id = $id;
34        $this->setChunkSize($chunk_size);
35
36    }
37
38    /**
39     * Set chunk size for file reading
40     * Chunk size zero let read whole file at once
41     *
42     * @param int $chunk_size maximum block size read from file
43     */
44    public function setChunkSize($chunk_size)
45    {
46        if (!is_numeric($chunk_size)) $chunk_size = 0;
47
48        $this->chunk_size = (int)max($chunk_size, 0);
49    }
50
51    /**
52     * Returns path to changelog
53     *
54     * @return string path to file
55     */
56    abstract protected function getChangelogFilename();
57
58    /**
59     * Returns path to current page/media
60     *
61     * @return string path to file
62     */
63    abstract protected function getFilename();
64
65    /**
66     * Get the changelog information for a specific page id and revision (timestamp)
67     *
68     * Adjacent changelog lines are optimistically parsed and cached to speed up
69     * consecutive calls to getRevisionInfo. For large changelog files, only the chunk
70     * containing the requested changelog line is read.
71     *
72     * @param int $rev revision timestamp
73     * @return bool|array false or array with entries:
74     *      - date:  unix timestamp
75     *      - ip:    IPv4 address (127.0.0.1)
76     *      - type:  log line type
77     *      - id:    page id
78     *      - user:  user name
79     *      - sum:   edit summary (or action reason)
80     *      - extra: extra data (varies by line type)
81     *
82     * @author Ben Coburn <btcoburn@silicodon.net>
83     * @author Kate Arzamastseva <pshns@ukr.net>
84     */
85    public function getRevisionInfo($rev)
86    {
87        $rev = max($rev, 0);
88
89        // check if it's already in the memory cache
90        if (isset($this->cache[$this->id]) && isset($this->cache[$this->id][$rev])) {
91            return $this->cache[$this->id][$rev];
92        }
93
94        //read lines from changelog
95        list($fp, $lines) = $this->readloglines($rev);
96        if ($fp) {
97            fclose($fp);
98        }
99        if (empty($lines)) return false;
100
101        // parse and cache changelog lines
102        foreach ($lines as $value) {
103            $tmp = parseChangelogLine($value);
104            if ($tmp !== false) {
105                $this->cache[$this->id][$tmp['date']] = $tmp;
106            }
107        }
108        if (!isset($this->cache[$this->id][$rev])) {
109            return false;
110        }
111        return $this->cache[$this->id][$rev];
112    }
113
114    /**
115     * Return a list of page revisions numbers
116     *
117     * Does not guarantee that the revision exists in the attic,
118     * only that a line with the date exists in the changelog.
119     * By default the current revision is skipped.
120     *
121     * The current revision is automatically skipped when the page exists.
122     * See $INFO['meta']['last_change'] for the current revision.
123     * A negative $first let read the current revision too.
124     *
125     * For efficiency, the log lines are parsed and cached for later
126     * calls to getRevisionInfo. Large changelog files are read
127     * backwards in chunks until the requested number of changelog
128     * lines are recieved.
129     *
130     * @param int $first skip the first n changelog lines
131     * @param int $num number of revisions to return
132     * @return array with the revision timestamps
133     *
134     * @author Ben Coburn <btcoburn@silicodon.net>
135     * @author Kate Arzamastseva <pshns@ukr.net>
136     */
137    public function getRevisions($first, $num)
138    {
139        $revs = array();
140        $lines = array();
141        $count = 0;
142
143        $num = max($num, 0);
144        if ($num == 0) {
145            return $revs;
146        }
147
148        if ($first < 0) {
149            $first = 0;
150        } else {
151            if (file_exists($this->getFilename())) {
152                // skip current revision if the page exists
153                $first = max($first + 1, 0);
154            }
155        }
156
157        $file = $this->getChangelogFilename();
158
159        if (!file_exists($file)) {
160            return $revs;
161        }
162        if (filesize($file) < $this->chunk_size || $this->chunk_size == 0) {
163            // read whole file
164            $lines = file($file);
165            if ($lines === false) {
166                return $revs;
167            }
168        } else {
169            // read chunks backwards
170            $fp = fopen($file, 'rb'); // "file pointer"
171            if ($fp === false) {
172                return $revs;
173            }
174            fseek($fp, 0, SEEK_END);
175            $tail = ftell($fp);
176
177            // chunk backwards
178            $finger = max($tail - $this->chunk_size, 0);
179            while ($count < $num + $first) {
180                $nl = $this->getNewlinepointer($fp, $finger);
181
182                // was the chunk big enough? if not, take another bite
183                if ($nl > 0 && $tail <= $nl) {
184                    $finger = max($finger - $this->chunk_size, 0);
185                    continue;
186                } else {
187                    $finger = $nl;
188                }
189
190                // read chunk
191                $chunk = '';
192                $read_size = max($tail - $finger, 0); // found chunk size
193                $got = 0;
194                while ($got < $read_size && !feof($fp)) {
195                    $tmp = @fread($fp, max(min($this->chunk_size, $read_size - $got), 0));
196                    if ($tmp === false) {
197                        break;
198                    } //error state
199                    $got += strlen($tmp);
200                    $chunk .= $tmp;
201                }
202                $tmp = explode("\n", $chunk);
203                array_pop($tmp); // remove trailing newline
204
205                // combine with previous chunk
206                $count += count($tmp);
207                $lines = array_merge($tmp, $lines);
208
209                // next chunk
210                if ($finger == 0) {
211                    break;
212                } // already read all the lines
213                else {
214                    $tail = $finger;
215                    $finger = max($tail - $this->chunk_size, 0);
216                }
217            }
218            fclose($fp);
219        }
220
221        // skip parsing extra lines
222        $num = max(min(count($lines) - $first, $num), 0);
223        if ($first > 0 && $num > 0) {
224            $lines = array_slice($lines, max(count($lines) - $first - $num, 0), $num);
225        } else {
226            if ($first > 0 && $num == 0) {
227                $lines = array_slice($lines, 0, max(count($lines) - $first, 0));
228            } elseif ($first == 0 && $num > 0) {
229                $lines = array_slice($lines, max(count($lines) - $num, 0));
230            }
231        }
232
233        // handle lines in reverse order
234        for ($i = count($lines) - 1; $i >= 0; $i--) {
235            $tmp = parseChangelogLine($lines[$i]);
236            if ($tmp !== false) {
237                $this->cache[$this->id][$tmp['date']] = $tmp;
238                $revs[] = $tmp['date'];
239            }
240        }
241
242        return $revs;
243    }
244
245    /**
246     * Get the nth revision left or right handside  for a specific page id and revision (timestamp)
247     *
248     * For large changelog files, only the chunk containing the
249     * reference revision $rev is read and sometimes a next chunck.
250     *
251     * Adjacent changelog lines are optimistically parsed and cached to speed up
252     * consecutive calls to getRevisionInfo.
253     *
254     * @param int $rev revision timestamp used as startdate (doesn't need to be revisionnumber)
255     * @param int $direction give position of returned revision with respect to $rev; positive=next, negative=prev
256     * @return bool|int
257     *      timestamp of the requested revision
258     *      otherwise false
259     */
260    public function getRelativeRevision($rev, $direction)
261    {
262        $rev = max($rev, 0);
263        $direction = (int)$direction;
264
265        //no direction given or last rev, so no follow-up
266        if (!$direction || ($direction > 0 && $this->isCurrentRevision($rev))) {
267            return false;
268        }
269
270        //get lines from changelog
271        list($fp, $lines, $head, $tail, $eof) = $this->readloglines($rev);
272        if (empty($lines)) return false;
273
274        // look for revisions later/earlier then $rev, when founded count till the wanted revision is reached
275        // also parse and cache changelog lines for getRevisionInfo().
276        $revcounter = 0;
277        $relativerev = false;
278        $checkotherchunck = true; //always runs once
279        while (!$relativerev && $checkotherchunck) {
280            $tmp = array();
281            //parse in normal or reverse order
282            $count = count($lines);
283            if ($direction > 0) {
284                $start = 0;
285                $step = 1;
286            } else {
287                $start = $count - 1;
288                $step = -1;
289            }
290            for ($i = $start; $i >= 0 && $i < $count; $i = $i + $step) {
291                $tmp = parseChangelogLine($lines[$i]);
292                if ($tmp !== false) {
293                    $this->cache[$this->id][$tmp['date']] = $tmp;
294                    //look for revs older/earlier then reference $rev and select $direction-th one
295                    if (($direction > 0 && $tmp['date'] > $rev) || ($direction < 0 && $tmp['date'] < $rev)) {
296                        $revcounter++;
297                        if ($revcounter == abs($direction)) {
298                            $relativerev = $tmp['date'];
299                        }
300                    }
301                }
302            }
303
304            //true when $rev is found, but not the wanted follow-up.
305            $checkotherchunck = $fp
306                && ($tmp['date'] == $rev || ($revcounter > 0 && !$relativerev))
307                && !(($tail == $eof && $direction > 0) || ($head == 0 && $direction < 0));
308
309            if ($checkotherchunck) {
310                list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, $direction);
311
312                if (empty($lines)) break;
313            }
314        }
315        if ($fp) {
316            fclose($fp);
317        }
318
319        return $relativerev;
320    }
321
322    /**
323     * Returns revisions around rev1 and rev2
324     * When available it returns $max entries for each revision
325     *
326     * @param int $rev1 oldest revision timestamp
327     * @param int $rev2 newest revision timestamp (0 looks up last revision)
328     * @param int $max maximum number of revisions returned
329     * @return array with two arrays with revisions surrounding rev1 respectively rev2
330     */
331    public function getRevisionsAround($rev1, $rev2, $max = 50)
332    {
333        $max = floor(abs($max) / 2) * 2 + 1;
334        $rev1 = max($rev1, 0);
335        $rev2 = max($rev2, 0);
336
337        if ($rev2) {
338            if ($rev2 < $rev1) {
339                $rev = $rev2;
340                $rev2 = $rev1;
341                $rev1 = $rev;
342            }
343        } else {
344            //empty right side means a removed page. Look up last revision.
345            $revs = $this->getRevisions(-1, 1);
346            $rev2 = $revs[0];
347        }
348        //collect revisions around rev2
349        list($revs2, $allrevs, $fp, $lines, $head, $tail) = $this->retrieveRevisionsAround($rev2, $max);
350
351        if (empty($revs2)) return array(array(), array());
352
353        //collect revisions around rev1
354        $index = array_search($rev1, $allrevs);
355        if ($index === false) {
356            //no overlapping revisions
357            list($revs1, , , , ,) = $this->retrieveRevisionsAround($rev1, $max);
358            if (empty($revs1)) $revs1 = array();
359        } else {
360            //revisions overlaps, reuse revisions around rev2
361            $revs1 = $allrevs;
362            while ($head > 0) {
363                for ($i = count($lines) - 1; $i >= 0; $i--) {
364                    $tmp = parseChangelogLine($lines[$i]);
365                    if ($tmp !== false) {
366                        $this->cache[$this->id][$tmp['date']] = $tmp;
367                        $revs1[] = $tmp['date'];
368                        $index++;
369
370                        if ($index > floor($max / 2)) break 2;
371                    }
372                }
373
374                list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, -1);
375            }
376            sort($revs1);
377            //return wanted selection
378            $revs1 = array_slice($revs1, max($index - floor($max / 2), 0), $max);
379        }
380
381        return array(array_reverse($revs1), array_reverse($revs2));
382    }
383
384
385    /**
386     * Checks if the ID has old revisons
387     * @return boolean
388     */
389    public function hasRevisions() {
390        $file = $this->getChangelogFilename();
391        return file_exists($file);
392    }
393
394    /**
395     * Returns lines from changelog.
396     * If file larger than $chuncksize, only chunck is read that could contain $rev.
397     *
398     * @param int $rev revision timestamp
399     * @return array|false
400     *     if success returns array(fp, array(changeloglines), $head, $tail, $eof)
401     *     where fp only defined for chuck reading, needs closing.
402     *     otherwise false
403     */
404    protected function readloglines($rev)
405    {
406        $file = $this->getChangelogFilename();
407
408        if (!file_exists($file)) {
409            return false;
410        }
411
412        $fp = null;
413        $head = 0;
414        $tail = 0;
415        $eof = 0;
416
417        if (filesize($file) < $this->chunk_size || $this->chunk_size == 0) {
418            // read whole file
419            $lines = file($file);
420            if ($lines === false) {
421                return false;
422            }
423        } else {
424            // read by chunk
425            $fp = fopen($file, 'rb'); // "file pointer"
426            if ($fp === false) {
427                return false;
428            }
429            $head = 0;
430            fseek($fp, 0, SEEK_END);
431            $eof = ftell($fp);
432            $tail = $eof;
433
434            // find chunk
435            while ($tail - $head > $this->chunk_size) {
436                $finger = $head + floor(($tail - $head) / 2.0);
437                $finger = $this->getNewlinepointer($fp, $finger);
438                $tmp = fgets($fp);
439                if ($finger == $head || $finger == $tail) {
440                    break;
441                }
442                $tmp = parseChangelogLine($tmp);
443                $finger_rev = $tmp['date'];
444
445                if ($finger_rev > $rev) {
446                    $tail = $finger;
447                } else {
448                    $head = $finger;
449                }
450            }
451
452            if ($tail - $head < 1) {
453                // cound not find chunk, assume requested rev is missing
454                fclose($fp);
455                return false;
456            }
457
458            $lines = $this->readChunk($fp, $head, $tail);
459        }
460        return array(
461            $fp,
462            $lines,
463            $head,
464            $tail,
465            $eof,
466        );
467    }
468
469    /**
470     * Read chunk and return array with lines of given chunck.
471     * Has no check if $head and $tail are really at a new line
472     *
473     * @param resource $fp resource filepointer
474     * @param int $head start point chunck
475     * @param int $tail end point chunck
476     * @return array lines read from chunck
477     */
478    protected function readChunk($fp, $head, $tail)
479    {
480        $chunk = '';
481        $chunk_size = max($tail - $head, 0); // found chunk size
482        $got = 0;
483        fseek($fp, $head);
484        while ($got < $chunk_size && !feof($fp)) {
485            $tmp = @fread($fp, max(min($this->chunk_size, $chunk_size - $got), 0));
486            if ($tmp === false) { //error state
487                break;
488            }
489            $got += strlen($tmp);
490            $chunk .= $tmp;
491        }
492        $lines = explode("\n", $chunk);
493        array_pop($lines); // remove trailing newline
494        return $lines;
495    }
496
497    /**
498     * Set pointer to first new line after $finger and return its position
499     *
500     * @param resource $fp filepointer
501     * @param int $finger a pointer
502     * @return int pointer
503     */
504    protected function getNewlinepointer($fp, $finger)
505    {
506        fseek($fp, $finger);
507        $nl = $finger;
508        if ($finger > 0) {
509            fgets($fp); // slip the finger forward to a new line
510            $nl = ftell($fp);
511        }
512        return $nl;
513    }
514
515    /**
516     * Check whether given revision is the current page
517     *
518     * @param int $rev timestamp of current page
519     * @return bool true if $rev is current revision, otherwise false
520     */
521    public function isCurrentRevision($rev)
522    {
523        return $rev == @filemtime($this->getFilename());
524    }
525
526    /**
527     * Return an existing revision for a specific date which is
528     * the current one or younger or equal then the date
529     *
530     * @param number $date_at timestamp
531     * @return string revision ('' for current)
532     */
533    public function getLastRevisionAt($date_at)
534    {
535        //requested date_at(timestamp) younger or equal then modified_time($this->id) => load current
536        if (file_exists($this->getFilename()) && $date_at >= @filemtime($this->getFilename())) {
537            return '';
538        } else {
539            if ($rev = $this->getRelativeRevision($date_at + 1, -1)) { //+1 to get also the requested date revision
540                return $rev;
541            } else {
542                return false;
543            }
544        }
545    }
546
547    /**
548     * Returns the next lines of the changelog  of the chunck before head or after tail
549     *
550     * @param resource $fp filepointer
551     * @param int $head position head of last chunk
552     * @param int $tail position tail of last chunk
553     * @param int $direction positive forward, negative backward
554     * @return array with entries:
555     *    - $lines: changelog lines of readed chunk
556     *    - $head: head of chunk
557     *    - $tail: tail of chunk
558     */
559    protected function readAdjacentChunk($fp, $head, $tail, $direction)
560    {
561        if (!$fp) return array(array(), $head, $tail);
562
563        if ($direction > 0) {
564            //read forward
565            $head = $tail;
566            $tail = $head + floor($this->chunk_size * (2 / 3));
567            $tail = $this->getNewlinepointer($fp, $tail);
568        } else {
569            //read backward
570            $tail = $head;
571            $head = max($tail - $this->chunk_size, 0);
572            while (true) {
573                $nl = $this->getNewlinepointer($fp, $head);
574                // was the chunk big enough? if not, take another bite
575                if ($nl > 0 && $tail <= $nl) {
576                    $head = max($head - $this->chunk_size, 0);
577                } else {
578                    $head = $nl;
579                    break;
580                }
581            }
582        }
583
584        //load next chunck
585        $lines = $this->readChunk($fp, $head, $tail);
586        return array($lines, $head, $tail);
587    }
588
589    /**
590     * Collect the $max revisions near to the timestamp $rev
591     *
592     * @param int $rev revision timestamp
593     * @param int $max maximum number of revisions to be returned
594     * @return bool|array
595     *     return array with entries:
596     *       - $requestedrevs: array of with $max revision timestamps
597     *       - $revs: all parsed revision timestamps
598     *       - $fp: filepointer only defined for chuck reading, needs closing.
599     *       - $lines: non-parsed changelog lines before the parsed revisions
600     *       - $head: position of first readed changelogline
601     *       - $lasttail: position of end of last readed changelogline
602     *     otherwise false
603     */
604    protected function retrieveRevisionsAround($rev, $max)
605    {
606        //get lines from changelog
607        list($fp, $lines, $starthead, $starttail, /* $eof */) = $this->readloglines($rev);
608        if (empty($lines)) return false;
609
610        //parse chunk containing $rev, and read forward more chunks until $max/2 is reached
611        $head = $starthead;
612        $tail = $starttail;
613        $revs = array();
614        $aftercount = $beforecount = 0;
615        while (count($lines) > 0) {
616            foreach ($lines as $line) {
617                $tmp = parseChangelogLine($line);
618                if ($tmp !== false) {
619                    $this->cache[$this->id][$tmp['date']] = $tmp;
620                    $revs[] = $tmp['date'];
621                    if ($tmp['date'] >= $rev) {
622                        //count revs after reference $rev
623                        $aftercount++;
624                        if ($aftercount == 1) $beforecount = count($revs);
625                    }
626                    //enough revs after reference $rev?
627                    if ($aftercount > floor($max / 2)) break 2;
628                }
629            }
630            //retrieve next chunk
631            list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, 1);
632        }
633        if ($aftercount == 0) return false;
634
635        $lasttail = $tail;
636
637        //read additional chuncks backward until $max/2 is reached and total number of revs is equal to $max
638        $lines = array();
639        $i = 0;
640        if ($aftercount > 0) {
641            $head = $starthead;
642            $tail = $starttail;
643            while ($head > 0) {
644                list($lines, $head, $tail) = $this->readAdjacentChunk($fp, $head, $tail, -1);
645
646                for ($i = count($lines) - 1; $i >= 0; $i--) {
647                    $tmp = parseChangelogLine($lines[$i]);
648                    if ($tmp !== false) {
649                        $this->cache[$this->id][$tmp['date']] = $tmp;
650                        $revs[] = $tmp['date'];
651                        $beforecount++;
652                        //enough revs before reference $rev?
653                        if ($beforecount > max(floor($max / 2), $max - $aftercount)) break 2;
654                    }
655                }
656            }
657        }
658        sort($revs);
659
660        //keep only non-parsed lines
661        $lines = array_slice($lines, 0, $i);
662        //trunk desired selection
663        $requestedrevs = array_slice($revs, -$max, $max);
664
665        return array($requestedrevs, $revs, $fp, $lines, $head, $lasttail);
666    }
667}
668