xref: /dokuwiki/inc/DifferenceEngine.php (revision 988c134016f0557947bd6811e22086919f98fa8e)
1<?php
2/**
3 * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
4 *
5 * Additions by Axel Boldt for MediaWiki
6 *
7 * @copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
8 * @license  You may copy this code freely under the conditions of the GPL.
9 */
10define('USE_ASSERTS', function_exists('assert'));
11
12class _DiffOp {
13    var $type;
14    var $orig;
15    var $closing;
16
17    function reverse() {
18        trigger_error("pure virtual", E_USER_ERROR);
19    }
20
21    function norig() {
22        return $this->orig ? count($this->orig) : 0;
23    }
24
25    function nclosing() {
26        return $this->closing ? count($this->closing) : 0;
27    }
28}
29
30class _DiffOp_Copy extends _DiffOp {
31    var $type = 'copy';
32    /**
33     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
34     */
35    function _DiffOp_Copy($orig, $closing = false) {
36        $this->__construct($orig, $closing);
37    }
38
39    function __construct($orig, $closing = false) {
40        if (!is_array($closing))
41            $closing = $orig;
42        $this->orig = $orig;
43        $this->closing = $closing;
44    }
45
46    function reverse() {
47        return new _DiffOp_Copy($this->closing, $this->orig);
48    }
49}
50
51class _DiffOp_Delete extends _DiffOp {
52    var $type = 'delete';
53    /**
54     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
55     */
56    function _DiffOp_Delete($lines) {
57        $this->__construct($lines);
58    }
59
60    function __construct($lines) {
61        $this->orig = $lines;
62        $this->closing = false;
63    }
64
65    function reverse() {
66        return new _DiffOp_Add($this->orig);
67    }
68}
69
70class _DiffOp_Add extends _DiffOp {
71    var $type = 'add';
72    /**
73     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
74     */
75    function _DiffOp_Add($lines) {
76        $this->__construct($lines);
77    }
78
79    function __construct($lines) {
80        $this->closing = $lines;
81        $this->orig = false;
82    }
83
84    function reverse() {
85        return new _DiffOp_Delete($this->closing);
86    }
87}
88
89class _DiffOp_Change extends _DiffOp {
90    var $type = 'change';
91    /**
92     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
93     */
94    function _DiffOp_Change($orig, $closing) {
95        $this->__construct($orig, $closing);
96    }
97
98    function __construct($orig, $closing) {
99        $this->orig = $orig;
100        $this->closing = $closing;
101    }
102
103    function reverse() {
104        return new _DiffOp_Change($this->closing, $this->orig);
105    }
106}
107
108
109/**
110 * Class used internally by Diff to actually compute the diffs.
111 *
112 * The algorithm used here is mostly lifted from the perl module
113 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
114 *   http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
115 *
116 * More ideas are taken from:
117 *   http://www.ics.uci.edu/~eppstein/161/960229.html
118 *
119 * Some ideas are (and a bit of code) are from from analyze.c, from GNU
120 * diffutils-2.7, which can be found at:
121 *   ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
122 *
123 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
124 * are my own.
125 *
126 * @author Geoffrey T. Dairiki
127 * @access private
128 */
129class _DiffEngine {
130
131    function diff($from_lines, $to_lines) {
132        $n_from = count($from_lines);
133        $n_to = count($to_lines);
134
135        $this->xchanged = $this->ychanged = array();
136        $this->xv = $this->yv = array();
137        $this->xind = $this->yind = array();
138        unset($this->seq);
139        unset($this->in_seq);
140        unset($this->lcs);
141
142        // Skip leading common lines.
143        for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
144            if ($from_lines[$skip] != $to_lines[$skip])
145                break;
146            $this->xchanged[$skip] = $this->ychanged[$skip] = false;
147        }
148        // Skip trailing common lines.
149        $xi = $n_from;
150        $yi = $n_to;
151        for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
152            if ($from_lines[$xi] != $to_lines[$yi])
153                break;
154            $this->xchanged[$xi] = $this->ychanged[$yi] = false;
155        }
156
157        // Ignore lines which do not exist in both files.
158        for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
159            $xhash[$from_lines[$xi]] = 1;
160        for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
161            $line = $to_lines[$yi];
162            if (($this->ychanged[$yi] = empty($xhash[$line])))
163                continue;
164            $yhash[$line] = 1;
165            $this->yv[] = $line;
166            $this->yind[] = $yi;
167        }
168        for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
169            $line = $from_lines[$xi];
170            if (($this->xchanged[$xi] = empty($yhash[$line])))
171                continue;
172            $this->xv[] = $line;
173            $this->xind[] = $xi;
174        }
175
176        // Find the LCS.
177        $this->_compareseq(0, count($this->xv), 0, count($this->yv));
178
179        // Merge edits when possible
180        $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
181        $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
182
183        // Compute the edit operations.
184        $edits = array();
185        $xi = $yi = 0;
186        while ($xi < $n_from || $yi < $n_to) {
187            USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
188            USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
189
190            // Skip matching "snake".
191            $copy = array();
192            while ($xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
193                $copy[] = $from_lines[$xi++];
194                ++$yi;
195            }
196            if ($copy)
197                $edits[] = new _DiffOp_Copy($copy);
198
199            // Find deletes & adds.
200            $delete = array();
201            while ($xi < $n_from && $this->xchanged[$xi])
202                $delete[] = $from_lines[$xi++];
203
204            $add = array();
205            while ($yi < $n_to && $this->ychanged[$yi])
206                $add[] = $to_lines[$yi++];
207
208            if ($delete && $add)
209                $edits[] = new _DiffOp_Change($delete, $add);
210            elseif ($delete)
211                $edits[] = new _DiffOp_Delete($delete);
212            elseif ($add)
213                $edits[] = new _DiffOp_Add($add);
214        }
215        return $edits;
216    }
217
218
219    /**
220     * Divide the Largest Common Subsequence (LCS) of the sequences
221     * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
222     * sized segments.
223     *
224     * Returns (LCS, PTS).  LCS is the length of the LCS. PTS is an
225     * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
226     * sub sequences.  The first sub-sequence is contained in [X0, X1),
227     * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
228     * that (X0, Y0) == (XOFF, YOFF) and
229     * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
230     *
231     * This function assumes that the first lines of the specified portions
232     * of the two files do not match, and likewise that the last lines do not
233     * match.  The caller must trim matching lines from the beginning and end
234     * of the portions it is going to specify.
235     */
236    function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) {
237        $flip = false;
238
239        if ($xlim - $xoff > $ylim - $yoff) {
240            // Things seems faster (I'm not sure I understand why)
241            // when the shortest sequence in X.
242            $flip = true;
243            list ($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
244        }
245
246        if ($flip)
247            for ($i = $ylim - 1; $i >= $yoff; $i--)
248                $ymatches[$this->xv[$i]][] = $i;
249        else
250            for ($i = $ylim - 1; $i >= $yoff; $i--)
251                $ymatches[$this->yv[$i]][] = $i;
252
253        $this->lcs = 0;
254        $this->seq[0]= $yoff - 1;
255        $this->in_seq = array();
256        $ymids[0] = array();
257
258        $numer = $xlim - $xoff + $nchunks - 1;
259        $x = $xoff;
260        for ($chunk = 0; $chunk < $nchunks; $chunk++) {
261            if ($chunk > 0)
262                for ($i = 0; $i <= $this->lcs; $i++)
263                    $ymids[$i][$chunk-1] = $this->seq[$i];
264
265            $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
266            for ( ; $x < $x1; $x++) {
267                $line = $flip ? $this->yv[$x] : $this->xv[$x];
268                if (empty($ymatches[$line]))
269                    continue;
270                $matches = $ymatches[$line];
271                reset($matches);
272                while (list ($junk, $y) = each($matches))
273                    if (empty($this->in_seq[$y])) {
274                        $k = $this->_lcs_pos($y);
275                        USE_ASSERTS && assert($k > 0);
276                        $ymids[$k] = $ymids[$k-1];
277                        break;
278                    }
279                while (list ($junk, $y) = each($matches)) {
280                    if ($y > $this->seq[$k-1]) {
281                        USE_ASSERTS && assert($y < $this->seq[$k]);
282                        // Optimization: this is a common case:
283                        //  next match is just replacing previous match.
284                        $this->in_seq[$this->seq[$k]] = false;
285                        $this->seq[$k] = $y;
286                        $this->in_seq[$y] = 1;
287                    }
288                    else if (empty($this->in_seq[$y])) {
289                        $k = $this->_lcs_pos($y);
290                        USE_ASSERTS && assert($k > 0);
291                        $ymids[$k] = $ymids[$k-1];
292                    }
293                }
294            }
295        }
296
297        $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
298        $ymid = $ymids[$this->lcs];
299        for ($n = 0; $n < $nchunks - 1; $n++) {
300            $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
301            $y1 = $ymid[$n] + 1;
302            $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
303        }
304        $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
305
306        return array($this->lcs, $seps);
307    }
308
309    function _lcs_pos($ypos) {
310        $end = $this->lcs;
311        if ($end == 0 || $ypos > $this->seq[$end]) {
312            $this->seq[++$this->lcs] = $ypos;
313            $this->in_seq[$ypos] = 1;
314            return $this->lcs;
315        }
316
317        $beg = 1;
318        while ($beg < $end) {
319            $mid = (int)(($beg + $end) / 2);
320            if ($ypos > $this->seq[$mid])
321                $beg = $mid + 1;
322            else
323                $end = $mid;
324        }
325
326        USE_ASSERTS && assert($ypos != $this->seq[$end]);
327
328        $this->in_seq[$this->seq[$end]] = false;
329        $this->seq[$end] = $ypos;
330        $this->in_seq[$ypos] = 1;
331        return $end;
332    }
333
334    /**
335     * Find LCS of two sequences.
336     *
337     * The results are recorded in the vectors $this->{x,y}changed[], by
338     * storing a 1 in the element for each line that is an insertion
339     * or deletion (ie. is not in the LCS).
340     *
341     * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
342     *
343     * Note that XLIM, YLIM are exclusive bounds.
344     * All line numbers are origin-0 and discarded lines are not counted.
345     */
346    function _compareseq($xoff, $xlim, $yoff, $ylim) {
347        // Slide down the bottom initial diagonal.
348        while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) {
349            ++$xoff;
350            ++$yoff;
351        }
352
353        // Slide up the top initial diagonal.
354        while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
355            --$xlim;
356            --$ylim;
357        }
358
359        if ($xoff == $xlim || $yoff == $ylim)
360            $lcs = 0;
361        else {
362            // This is ad hoc but seems to work well.
363            //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
364            //$nchunks = max(2,min(8,(int)$nchunks));
365            $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
366            list ($lcs, $seps)
367                = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
368        }
369
370        if ($lcs == 0) {
371            // X and Y sequences have no common subsequence:
372            // mark all changed.
373            while ($yoff < $ylim)
374                $this->ychanged[$this->yind[$yoff++]] = 1;
375            while ($xoff < $xlim)
376                $this->xchanged[$this->xind[$xoff++]] = 1;
377        }
378        else {
379            // Use the partitions to split this problem into subproblems.
380            reset($seps);
381            $pt1 = $seps[0];
382            while ($pt2 = next($seps)) {
383                $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
384                $pt1 = $pt2;
385            }
386        }
387    }
388
389    /**
390     * Adjust inserts/deletes of identical lines to join changes
391     * as much as possible.
392     *
393     * We do something when a run of changed lines include a
394     * line at one end and has an excluded, identical line at the other.
395     * We are free to choose which identical line is included.
396     * `compareseq' usually chooses the one at the beginning,
397     * but usually it is cleaner to consider the following identical line
398     * to be the "change".
399     *
400     * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
401     */
402    function _shift_boundaries($lines, &$changed, $other_changed) {
403        $i = 0;
404        $j = 0;
405
406        USE_ASSERTS && assert('count($lines) == count($changed)');
407        $len = count($lines);
408        $other_len = count($other_changed);
409
410        while (1) {
411            /*
412             * Scan forwards to find beginning of another run of changes.
413             * Also keep track of the corresponding point in the other file.
414             *
415             * Throughout this code, $i and $j are adjusted together so that
416             * the first $i elements of $changed and the first $j elements
417             * of $other_changed both contain the same number of zeros
418             * (unchanged lines).
419             * Furthermore, $j is always kept so that $j == $other_len or
420             * $other_changed[$j] == false.
421             */
422            while ($j < $other_len && $other_changed[$j])
423                $j++;
424
425            while ($i < $len && ! $changed[$i]) {
426                USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
427                $i++;
428                $j++;
429                while ($j < $other_len && $other_changed[$j])
430                    $j++;
431            }
432
433            if ($i == $len)
434                break;
435
436            $start = $i;
437
438            // Find the end of this run of changes.
439            while (++$i < $len && $changed[$i])
440                continue;
441
442            do {
443                /*
444                 * Record the length of this run of changes, so that
445                 * we can later determine whether the run has grown.
446                 */
447                $runlength = $i - $start;
448
449                /*
450                 * Move the changed region back, so long as the
451                 * previous unchanged line matches the last changed one.
452                 * This merges with previous changed regions.
453                 */
454                while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
455                    $changed[--$start] = 1;
456                    $changed[--$i] = false;
457                    while ($start > 0 && $changed[$start - 1])
458                        $start--;
459                    USE_ASSERTS && assert('$j > 0');
460                    while ($other_changed[--$j])
461                        continue;
462                    USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
463                }
464
465                /*
466                 * Set CORRESPONDING to the end of the changed run, at the last
467                 * point where it corresponds to a changed run in the other file.
468                 * CORRESPONDING == LEN means no such point has been found.
469                 */
470                $corresponding = $j < $other_len ? $i : $len;
471
472                /*
473                 * Move the changed region forward, so long as the
474                 * first changed line matches the following unchanged one.
475                 * This merges with following changed regions.
476                 * Do this second, so that if there are no merges,
477                 * the changed region is moved forward as far as possible.
478                 */
479                while ($i < $len && $lines[$start] == $lines[$i]) {
480                    $changed[$start++] = false;
481                    $changed[$i++] = 1;
482                    while ($i < $len && $changed[$i])
483                        $i++;
484
485                    USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
486                    $j++;
487                    if ($j < $other_len && $other_changed[$j]) {
488                        $corresponding = $i;
489                        while ($j < $other_len && $other_changed[$j])
490                            $j++;
491                    }
492                }
493            } while ($runlength != $i - $start);
494
495            /*
496             * If possible, move the fully-merged run of changes
497             * back to a corresponding run in the other file.
498             */
499            while ($corresponding < $i) {
500                $changed[--$start] = 1;
501                $changed[--$i] = 0;
502                USE_ASSERTS && assert('$j > 0');
503                while ($other_changed[--$j])
504                    continue;
505                USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
506            }
507        }
508    }
509}
510
511/**
512 * Class representing a 'diff' between two sequences of strings.
513 */
514class Diff {
515
516    var $edits;
517
518    /**
519     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
520     */
521    function Diff($from_lines, $to_lines) {
522        $this->__construct($from_lines, $to_lines);
523    }
524
525    /**
526     * Constructor.
527     * Computes diff between sequences of strings.
528     *
529     * @param $from_lines array An array of strings.
530     *      (Typically these are lines from a file.)
531     * @param $to_lines array An array of strings.
532     */
533    function __construct($from_lines, $to_lines) {
534        $eng = new _DiffEngine;
535        $this->edits = $eng->diff($from_lines, $to_lines);
536        //$this->_check($from_lines, $to_lines);
537    }
538
539    /**
540     * Compute reversed Diff.
541     *
542     * SYNOPSIS:
543     *
544     *  $diff = new Diff($lines1, $lines2);
545     *  $rev = $diff->reverse();
546     * @return object A Diff object representing the inverse of the
547     *          original diff.
548     */
549    function reverse() {
550        $rev = $this;
551        $rev->edits = array();
552        foreach ($this->edits as $edit) {
553            $rev->edits[] = $edit->reverse();
554        }
555        return $rev;
556    }
557
558    /**
559     * Check for empty diff.
560     *
561     * @return bool True iff two sequences were identical.
562     */
563    function isEmpty() {
564        foreach ($this->edits as $edit) {
565            if ($edit->type != 'copy')
566                return false;
567        }
568        return true;
569    }
570
571    /**
572     * Compute the length of the Longest Common Subsequence (LCS).
573     *
574     * This is mostly for diagnostic purposed.
575     *
576     * @return int The length of the LCS.
577     */
578    function lcs() {
579        $lcs = 0;
580        foreach ($this->edits as $edit) {
581            if ($edit->type == 'copy')
582                $lcs += count($edit->orig);
583        }
584        return $lcs;
585    }
586
587    /**
588     * Get the original set of lines.
589     *
590     * This reconstructs the $from_lines parameter passed to the
591     * constructor.
592     *
593     * @return array The original sequence of strings.
594     */
595    function orig() {
596        $lines = array();
597
598        foreach ($this->edits as $edit) {
599            if ($edit->orig)
600                array_splice($lines, count($lines), 0, $edit->orig);
601        }
602        return $lines;
603    }
604
605    /**
606     * Get the closing set of lines.
607     *
608     * This reconstructs the $to_lines parameter passed to the
609     * constructor.
610     *
611     * @return array The sequence of strings.
612     */
613    function closing() {
614        $lines = array();
615
616        foreach ($this->edits as $edit) {
617            if ($edit->closing)
618                array_splice($lines, count($lines), 0, $edit->closing);
619        }
620        return $lines;
621    }
622
623    /**
624     * Check a Diff for validity.
625     *
626     * This is here only for debugging purposes.
627     */
628    function _check($from_lines, $to_lines) {
629        if (serialize($from_lines) != serialize($this->orig()))
630            trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
631        if (serialize($to_lines) != serialize($this->closing()))
632            trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
633
634        $rev = $this->reverse();
635        if (serialize($to_lines) != serialize($rev->orig()))
636            trigger_error("Reversed original doesn't match", E_USER_ERROR);
637        if (serialize($from_lines) != serialize($rev->closing()))
638            trigger_error("Reversed closing doesn't match", E_USER_ERROR);
639
640        $prevtype = 'none';
641        foreach ($this->edits as $edit) {
642            if ($prevtype == $edit->type)
643                trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
644            $prevtype = $edit->type;
645        }
646
647        $lcs = $this->lcs();
648        trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE);
649    }
650}
651
652/**
653 * FIXME: bad name.
654 */
655class MappedDiff extends Diff {
656    /**
657     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
658     */
659    function MappedDiff($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) {
660        $this->__construct($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines);
661    }
662
663    /**
664     * Constructor.
665     *
666     * Computes diff between sequences of strings.
667     *
668     * This can be used to compute things like
669     * case-insensitve diffs, or diffs which ignore
670     * changes in white-space.
671     *
672     * @param $from_lines array An array of strings.
673     *  (Typically these are lines from a file.)
674     *
675     * @param $to_lines array An array of strings.
676     *
677     * @param $mapped_from_lines array This array should
678     *  have the same size number of elements as $from_lines.
679     *  The elements in $mapped_from_lines and
680     *  $mapped_to_lines are what is actually compared
681     *  when computing the diff.
682     *
683     * @param $mapped_to_lines array This array should
684     *  have the same number of elements as $to_lines.
685     */
686    function __construct($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) {
687
688        assert(count($from_lines) == count($mapped_from_lines));
689        assert(count($to_lines) == count($mapped_to_lines));
690
691        parent::__construct($mapped_from_lines, $mapped_to_lines);
692
693        $xi = $yi = 0;
694        $ecnt = count($this->edits);
695        for ($i = 0; $i < $ecnt; $i++) {
696            $orig = &$this->edits[$i]->orig;
697            if (is_array($orig)) {
698                $orig = array_slice($from_lines, $xi, count($orig));
699                $xi += count($orig);
700            }
701
702            $closing = &$this->edits[$i]->closing;
703            if (is_array($closing)) {
704                $closing = array_slice($to_lines, $yi, count($closing));
705                $yi += count($closing);
706            }
707        }
708    }
709}
710
711/**
712 * A class to format Diffs
713 *
714 * This class formats the diff in classic diff format.
715 * It is intended that this class be customized via inheritance,
716 * to obtain fancier outputs.
717 */
718class DiffFormatter {
719    /**
720     * Number of leading context "lines" to preserve.
721     *
722     * This should be left at zero for this class, but subclasses
723     * may want to set this to other values.
724     */
725    var $leading_context_lines = 0;
726
727    /**
728     * Number of trailing context "lines" to preserve.
729     *
730     * This should be left at zero for this class, but subclasses
731     * may want to set this to other values.
732     */
733    var $trailing_context_lines = 0;
734
735    /**
736     * Format a diff.
737     *
738     * @param $diff object A Diff object.
739     * @return string The formatted output.
740     */
741    function format($diff) {
742
743        $xi = $yi = 1;
744        $block = false;
745        $context = array();
746
747        $nlead = $this->leading_context_lines;
748        $ntrail = $this->trailing_context_lines;
749
750        $this->_start_diff();
751
752        foreach ($diff->edits as $edit) {
753            if ($edit->type == 'copy') {
754                if (is_array($block)) {
755                    if (count($edit->orig) <= $nlead + $ntrail) {
756                        $block[] = $edit;
757                    }
758                    else{
759                        if ($ntrail) {
760                            $context = array_slice($edit->orig, 0, $ntrail);
761                            $block[] = new _DiffOp_Copy($context);
762                        }
763                        $this->_block($x0, $ntrail + $xi - $x0, $y0, $ntrail + $yi - $y0, $block);
764                        $block = false;
765                    }
766                }
767                $context = $edit->orig;
768            }
769            else {
770                if (! is_array($block)) {
771                    $context = array_slice($context, count($context) - $nlead);
772                    $x0 = $xi - count($context);
773                    $y0 = $yi - count($context);
774                    $block = array();
775                    if ($context)
776                        $block[] = new _DiffOp_Copy($context);
777                }
778                $block[] = $edit;
779            }
780
781            if ($edit->orig)
782                $xi += count($edit->orig);
783            if ($edit->closing)
784                $yi += count($edit->closing);
785        }
786
787        if (is_array($block))
788            $this->_block($x0, $xi - $x0, $y0, $yi - $y0, $block);
789
790        return $this->_end_diff();
791    }
792
793    function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
794        $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
795        foreach ($edits as $edit) {
796            if ($edit->type == 'copy')
797                $this->_context($edit->orig);
798            elseif ($edit->type == 'add')
799                $this->_added($edit->closing);
800            elseif ($edit->type == 'delete')
801                $this->_deleted($edit->orig);
802            elseif ($edit->type == 'change')
803                $this->_changed($edit->orig, $edit->closing);
804            else
805                trigger_error("Unknown edit type", E_USER_ERROR);
806        }
807        $this->_end_block();
808    }
809
810    function _start_diff() {
811        ob_start();
812    }
813
814    function _end_diff() {
815        $val = ob_get_contents();
816        ob_end_clean();
817        return $val;
818    }
819
820    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
821        if ($xlen > 1)
822            $xbeg .= "," . ($xbeg + $xlen - 1);
823        if ($ylen > 1)
824            $ybeg .= "," . ($ybeg + $ylen - 1);
825
826        return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
827    }
828
829    function _start_block($header) {
830        echo $header;
831    }
832
833    function _end_block() {
834    }
835
836    function _lines($lines, $prefix = ' ') {
837        foreach ($lines as $line)
838            echo "$prefix $line\n";
839    }
840
841    function _context($lines) {
842        $this->_lines($lines);
843    }
844
845    function _added($lines) {
846        $this->_lines($lines, ">");
847    }
848    function _deleted($lines) {
849        $this->_lines($lines, "<");
850    }
851
852    function _changed($orig, $closing) {
853        $this->_deleted($orig);
854        echo "---\n";
855        $this->_added($closing);
856    }
857}
858
859
860/**
861 *  Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
862 *
863 */
864
865define('NBSP', "\xC2\xA0");     // utf-8 non-breaking space.
866
867class _HWLDF_WordAccumulator {
868    /**
869     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
870     */
871    function _HWLDF_WordAccumulator() {
872        $this->__construct();
873    }
874
875    function __construct() {
876        $this->_lines = array();
877        $this->_line = '';
878        $this->_group = '';
879        $this->_tag = '';
880    }
881
882    function _flushGroup($new_tag) {
883        if ($this->_group !== '') {
884            if ($this->_tag == 'mark')
885                $this->_line .= '<strong>'.$this->_group.'</strong>';
886            elseif ($this->_tag == 'add')
887                $this->_line .= '<span class="diff-addedline">'.$this->_group.'</span>';
888            elseif ($this->_tag == 'del')
889                $this->_line .= '<span class="diff-deletedline"><del>'.$this->_group.'</del></span>';
890            else
891                $this->_line .= $this->_group;
892        }
893        $this->_group = '';
894        $this->_tag = $new_tag;
895    }
896
897    function _flushLine($new_tag) {
898        $this->_flushGroup($new_tag);
899        if ($this->_line != '')
900            $this->_lines[] = $this->_line;
901        $this->_line = '';
902    }
903
904    function addWords($words, $tag = '') {
905        if ($tag != $this->_tag)
906            $this->_flushGroup($tag);
907
908        foreach ($words as $word) {
909            // new-line should only come as first char of word.
910            if ($word == '')
911                continue;
912            if ($word[0] == "\n") {
913                $this->_group .= NBSP;
914                $this->_flushLine($tag);
915                $word = substr($word, 1);
916            }
917            assert(!strstr($word, "\n"));
918            $this->_group .= $word;
919        }
920    }
921
922    function getLines() {
923        $this->_flushLine('~done');
924        return $this->_lines;
925    }
926}
927
928class WordLevelDiff extends MappedDiff {
929
930    /**
931     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
932     */
933    function WordLevelDiff($orig_lines, $closing_lines) {
934        $this->__construct($orig_lines, $closing_lines);
935    }
936
937    function __construct($orig_lines, $closing_lines) {
938        list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
939        list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
940
941        parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped);
942    }
943
944    function _split($lines) {
945        if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu',
946             implode("\n", $lines), $m)) {
947            return array(array(''), array(''));
948        }
949        return array($m[0], $m[1]);
950    }
951
952    function orig() {
953        $orig = new _HWLDF_WordAccumulator;
954
955        foreach ($this->edits as $edit) {
956            if ($edit->type == 'copy')
957                $orig->addWords($edit->orig);
958            elseif ($edit->orig)
959                $orig->addWords($edit->orig, 'mark');
960        }
961        return $orig->getLines();
962    }
963
964    function closing() {
965        $closing = new _HWLDF_WordAccumulator;
966
967        foreach ($this->edits as $edit) {
968            if ($edit->type == 'copy')
969                $closing->addWords($edit->closing);
970            elseif ($edit->closing)
971                $closing->addWords($edit->closing, 'mark');
972        }
973        return $closing->getLines();
974    }
975}
976
977class InlineWordLevelDiff extends MappedDiff {
978
979    /**
980     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
981     */
982    function InlineWordLevelDiff($orig_lines, $closing_lines) {
983        $this->__construct($orig_lines, $closing_lines);
984    }
985
986    function __construct($orig_lines, $closing_lines) {
987        list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
988        list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
989
990        parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped);
991    }
992
993    function _split($lines) {
994        if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu',
995             implode("\n", $lines), $m)) {
996            return array(array(''), array(''));
997        }
998        return array($m[0], $m[1]);
999    }
1000
1001    function inline() {
1002        $orig = new _HWLDF_WordAccumulator;
1003        foreach ($this->edits as $edit) {
1004            if ($edit->type == 'copy')
1005                $orig->addWords($edit->closing);
1006            elseif ($edit->type == 'change'){
1007                $orig->addWords($edit->orig, 'del');
1008                $orig->addWords($edit->closing, 'add');
1009            } elseif ($edit->type == 'delete')
1010                $orig->addWords($edit->orig, 'del');
1011            elseif ($edit->type == 'add')
1012                $orig->addWords($edit->closing, 'add');
1013            elseif ($edit->orig)
1014                $orig->addWords($edit->orig, 'del');
1015        }
1016        return $orig->getLines();
1017    }
1018}
1019
1020/**
1021 * "Unified" diff formatter.
1022 *
1023 * This class formats the diff in classic "unified diff" format.
1024 */
1025class UnifiedDiffFormatter extends DiffFormatter {
1026
1027    /**
1028     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
1029     */
1030    function UnifiedDiffFormatter($context_lines = 4) {
1031        $this->__construct($context_lines);
1032    }
1033
1034    function __construct($context_lines = 4) {
1035        $this->leading_context_lines = $context_lines;
1036        $this->trailing_context_lines = $context_lines;
1037    }
1038
1039    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1040        if ($xlen != 1)
1041            $xbeg .= "," . $xlen;
1042        if ($ylen != 1)
1043            $ybeg .= "," . $ylen;
1044        return "@@ -$xbeg +$ybeg @@\n";
1045    }
1046
1047    function _added($lines) {
1048        $this->_lines($lines, "+");
1049    }
1050    function _deleted($lines) {
1051        $this->_lines($lines, "-");
1052    }
1053    function _changed($orig, $final) {
1054        $this->_deleted($orig);
1055        $this->_added($final);
1056    }
1057}
1058
1059/**
1060 *  Wikipedia Table style diff formatter.
1061 *
1062 */
1063class TableDiffFormatter extends DiffFormatter {
1064
1065    /**
1066     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
1067     */
1068    function TableDiffFormatter() {
1069        $this->__construct();
1070    }
1071
1072    function __construct() {
1073        $this->leading_context_lines = 2;
1074        $this->trailing_context_lines = 2;
1075    }
1076
1077    function format($diff) {
1078        // Preserve whitespaces by converting some to non-breaking spaces.
1079        // Do not convert all of them to allow word-wrap.
1080        $val = parent::format($diff);
1081        $val = str_replace('  ','&nbsp; ', $val);
1082        $val = preg_replace('/ (?=<)|(?<=[ >]) /', '&nbsp;', $val);
1083        return $val;
1084    }
1085
1086    function _pre($text){
1087        $text = htmlspecialchars($text);
1088        return $text;
1089    }
1090
1091    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1092        global $lang;
1093        $l1 = $lang['line'].' '.$xbeg;
1094        $l2 = $lang['line'].' '.$ybeg;
1095        $r = '<tr><td class="diff-blockheader" colspan="2">'.$l1.":</td>\n".
1096             '    <td class="diff-blockheader" colspan="2">'.$l2.":</td>\n".
1097             "</tr>\n";
1098        return $r;
1099    }
1100
1101    function _start_block($header) {
1102        print($header);
1103    }
1104
1105    function _end_block() {
1106    }
1107
1108    function _lines($lines, $prefix=' ', $color="white") {
1109    }
1110
1111    function addedLine($line) {
1112        return '<td>+</td><td class="diff-addedline">' .  $line.'</td>';
1113    }
1114
1115    function deletedLine($line) {
1116        return '<td>-</td><td class="diff-deletedline">' .  $line.'</td>';
1117    }
1118
1119    function emptyLine() {
1120        return '<td colspan="2">&nbsp;</td>';
1121    }
1122
1123    function contextLine($line) {
1124        return '<td> </td><td class="diff-context">'.$line.'</td>';
1125    }
1126
1127    function _added($lines) {
1128        foreach ($lines as $line) {
1129            print('<tr>' . $this->emptyLine() . $this->addedLine($line) . "</tr>\n");
1130        }
1131    }
1132
1133    function _deleted($lines) {
1134        foreach ($lines as $line) {
1135            print('<tr>' . $this->deletedLine($line) . $this->emptyLine() . "</tr>\n");
1136        }
1137    }
1138
1139    function _context($lines) {
1140        foreach ($lines as $line) {
1141            print('<tr>' . $this->contextLine($line) .  $this->contextLine($line) . "</tr>\n");
1142        }
1143    }
1144
1145    function _changed($orig, $closing) {
1146        $diff = new WordLevelDiff($orig, $closing);
1147        $del = $diff->orig();
1148        $add = $diff->closing();
1149
1150        while ($line = array_shift($del)) {
1151            $aline = array_shift($add);
1152            print('<tr>' . $this->deletedLine($line) . $this->addedLine($aline) . "</tr>\n");
1153        }
1154        $this->_added($add); # If any leftovers
1155    }
1156}
1157
1158/**
1159 *  Inline style diff formatter.
1160 *
1161 */
1162class InlineDiffFormatter extends DiffFormatter {
1163    var $colspan = 4;
1164    /**
1165     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
1166     */
1167    function InlineDiffFormatter() {
1168        $this->__construct();
1169    }
1170
1171    function __construct() {
1172        $this->leading_context_lines = 2;
1173        $this->trailing_context_lines = 2;
1174    }
1175
1176    function format($diff) {
1177        // Preserve whitespaces by converting some to non-breaking spaces.
1178        // Do not convert all of them to allow word-wrap.
1179        $val = parent::format($diff);
1180        $val = str_replace('  ','&nbsp; ', $val);
1181        $val = preg_replace('/ (?=<)|(?<=[ >]) /', '&nbsp;', $val);
1182        return $val;
1183    }
1184
1185    function _pre($text){
1186        $text = htmlspecialchars($text);
1187        return $text;
1188    }
1189
1190    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1191        global $lang;
1192        if ($xlen != 1)
1193            $xbeg .= "," . $xlen;
1194        if ($ylen != 1)
1195            $ybeg .= "," . $ylen;
1196        $r = '<tr><td colspan="'.$this->colspan.'" class="diff-blockheader">@@ '.$lang['line']." -$xbeg +$ybeg @@";
1197        $r .= ' <span class="diff-deletedline"><del>'.$lang['deleted'].'</del></span>';
1198        $r .= ' <span class="diff-addedline">'.$lang['created'].'</span>';
1199        $r .= "</td></tr>\n";
1200        return $r;
1201    }
1202
1203    function _start_block($header) {
1204        print($header."\n");
1205    }
1206
1207    function _end_block() {
1208    }
1209
1210    function _lines($lines, $prefix=' ', $color="white") {
1211    }
1212
1213    function _added($lines) {
1214        foreach ($lines as $line) {
1215            print('<tr><td colspan="'.$this->colspan.'" class="diff-addedline">'. $line . "</td></tr>\n");
1216        }
1217    }
1218
1219    function _deleted($lines) {
1220        foreach ($lines as $line) {
1221            print('<tr><td colspan="'.$this->colspan.'" class="diff-deletedline"><del>' . $line . "</del></td></tr>\n");
1222        }
1223    }
1224
1225    function _context($lines) {
1226        foreach ($lines as $line) {
1227            print('<tr><td colspan="'.$this->colspan.'" class="diff-context">'.$line."</td></tr>\n");
1228        }
1229    }
1230
1231    function _changed($orig, $closing) {
1232        $diff = new InlineWordLevelDiff($orig, $closing);
1233        $add = $diff->inline();
1234
1235        foreach ($add as $line)
1236            print('<tr><td colspan="'.$this->colspan.'">'.$line."</td></tr>\n");
1237    }
1238}
1239
1240
1241//Setup VIM: ex: et ts=4 :
1242