xref: /dokuwiki/inc/DifferenceEngine.php (revision 34df7cb037a3bc9c3b7337a01bc8615be1b6ab8d)
1f3f0262cSandi<?php
215fae107Sandi/**
315fae107Sandi * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
415fae107Sandi *
515fae107Sandi * Additions by Axel Boldt for MediaWiki
615fae107Sandi *
715fae107Sandi * @copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
815fae107Sandi * @license  You may copy this code freely under the conditions of the GPL.
915fae107Sandi */
10f3f0262cSandidefine('USE_ASSERTS', function_exists('assert'));
11f3f0262cSandi
12f3f0262cSandiclass _DiffOp {
13f3f0262cSandi    var $type;
14f3f0262cSandi    var $orig;
15f3f0262cSandi    var $closing;
16f3f0262cSandi
1742ea7f44SGerrit Uitslag    /**
1842ea7f44SGerrit Uitslag     * @return _DiffOp
1942ea7f44SGerrit Uitslag     */
20f3f0262cSandi    function reverse() {
21f3f0262cSandi        trigger_error("pure virtual", E_USER_ERROR);
22f3f0262cSandi    }
23f3f0262cSandi
24f3f0262cSandi    function norig() {
25f5b78785SAndreas Gohr        return $this->orig ? count($this->orig) : 0;
26f3f0262cSandi    }
27f3f0262cSandi
28f3f0262cSandi    function nclosing() {
29f5b78785SAndreas Gohr        return $this->closing ? count($this->closing) : 0;
30f3f0262cSandi    }
31f3f0262cSandi}
32f3f0262cSandi
33f3f0262cSandiclass _DiffOp_Copy extends _DiffOp {
34f3f0262cSandi    var $type = 'copy';
35988c1340SPiyush Mishra
36988c1340SPiyush Mishra    function __construct($orig, $closing = false) {
37f3f0262cSandi        if (!is_array($closing))
38f3f0262cSandi            $closing = $orig;
39f3f0262cSandi        $this->orig = $orig;
40f3f0262cSandi        $this->closing = $closing;
41f3f0262cSandi    }
42f3f0262cSandi
43f3f0262cSandi    function reverse() {
44f3f0262cSandi        return new _DiffOp_Copy($this->closing, $this->orig);
45f3f0262cSandi    }
46f3f0262cSandi}
47f3f0262cSandi
48f3f0262cSandiclass _DiffOp_Delete extends _DiffOp {
49f3f0262cSandi    var $type = 'delete';
50988c1340SPiyush Mishra
51988c1340SPiyush Mishra    function __construct($lines) {
52f3f0262cSandi        $this->orig = $lines;
53f3f0262cSandi        $this->closing = false;
54f3f0262cSandi    }
55f3f0262cSandi
56f3f0262cSandi    function reverse() {
57f3f0262cSandi        return new _DiffOp_Add($this->orig);
58f3f0262cSandi    }
59f3f0262cSandi}
60f3f0262cSandi
61f3f0262cSandiclass _DiffOp_Add extends _DiffOp {
62f3f0262cSandi    var $type = 'add';
63988c1340SPiyush Mishra
64988c1340SPiyush Mishra    function __construct($lines) {
65f3f0262cSandi        $this->closing = $lines;
66f3f0262cSandi        $this->orig = false;
67f3f0262cSandi    }
68f3f0262cSandi
69f3f0262cSandi    function reverse() {
70f3f0262cSandi        return new _DiffOp_Delete($this->closing);
71f3f0262cSandi    }
72f3f0262cSandi}
73f3f0262cSandi
74f3f0262cSandiclass _DiffOp_Change extends _DiffOp {
75f3f0262cSandi    var $type = 'change';
76988c1340SPiyush Mishra
77988c1340SPiyush Mishra    function __construct($orig, $closing) {
78f3f0262cSandi        $this->orig = $orig;
79f3f0262cSandi        $this->closing = $closing;
80f3f0262cSandi    }
81f3f0262cSandi
82f3f0262cSandi    function reverse() {
83f3f0262cSandi        return new _DiffOp_Change($this->closing, $this->orig);
84f3f0262cSandi    }
85f3f0262cSandi}
86f3f0262cSandi
87f3f0262cSandi
88f3f0262cSandi/**
89f3f0262cSandi * Class used internally by Diff to actually compute the diffs.
90f3f0262cSandi *
91f3f0262cSandi * The algorithm used here is mostly lifted from the perl module
92f3f0262cSandi * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
93f3f0262cSandi *   http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
94f3f0262cSandi *
95f3f0262cSandi * More ideas are taken from:
96f3f0262cSandi *   http://www.ics.uci.edu/~eppstein/161/960229.html
97f3f0262cSandi *
98f3f0262cSandi * Some ideas are (and a bit of code) are from from analyze.c, from GNU
99f3f0262cSandi * diffutils-2.7, which can be found at:
100f3f0262cSandi *   ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
101f3f0262cSandi *
102f3f0262cSandi * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
103f3f0262cSandi * are my own.
104f3f0262cSandi *
105f3f0262cSandi * @author Geoffrey T. Dairiki
106f3f0262cSandi * @access private
107f3f0262cSandi */
108f5b78785SAndreas Gohrclass _DiffEngine {
109f5b78785SAndreas Gohr
11042ea7f44SGerrit Uitslag    var $xchanged = array();
11142ea7f44SGerrit Uitslag    var $ychanged = array();
11242ea7f44SGerrit Uitslag    var $xv = array();
11342ea7f44SGerrit Uitslag    var $yv = array();
11442ea7f44SGerrit Uitslag    var $xind = array();
11542ea7f44SGerrit Uitslag    var $yind = array();
11642ea7f44SGerrit Uitslag    var $seq;
11742ea7f44SGerrit Uitslag    var $in_seq;
11842ea7f44SGerrit Uitslag    var $lcs;
11942ea7f44SGerrit Uitslag
12042ea7f44SGerrit Uitslag    /**
12142ea7f44SGerrit Uitslag     * @param array $from_lines
12242ea7f44SGerrit Uitslag     * @param array $to_lines
12342ea7f44SGerrit Uitslag     * @return _DiffOp[]
12442ea7f44SGerrit Uitslag     */
125f3f0262cSandi    function diff($from_lines, $to_lines) {
126f5b78785SAndreas Gohr        $n_from = count($from_lines);
127f5b78785SAndreas Gohr        $n_to = count($to_lines);
128f3f0262cSandi
129f3f0262cSandi        $this->xchanged = $this->ychanged = array();
130f3f0262cSandi        $this->xv = $this->yv = array();
131f3f0262cSandi        $this->xind = $this->yind = array();
132f3f0262cSandi        unset($this->seq);
133f3f0262cSandi        unset($this->in_seq);
134f3f0262cSandi        unset($this->lcs);
135f3f0262cSandi
136f3f0262cSandi        // Skip leading common lines.
137f3f0262cSandi        for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
138f3f0262cSandi            if ($from_lines[$skip] != $to_lines[$skip])
139f3f0262cSandi                break;
140f3f0262cSandi            $this->xchanged[$skip] = $this->ychanged[$skip] = false;
141f3f0262cSandi        }
142f3f0262cSandi        // Skip trailing common lines.
143f5b78785SAndreas Gohr        $xi = $n_from;
144f5b78785SAndreas Gohr        $yi = $n_to;
145f3f0262cSandi        for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
146f3f0262cSandi            if ($from_lines[$xi] != $to_lines[$yi])
147f3f0262cSandi                break;
148f3f0262cSandi            $this->xchanged[$xi] = $this->ychanged[$yi] = false;
149f3f0262cSandi        }
150f3f0262cSandi
151f3f0262cSandi        // Ignore lines which do not exist in both files.
152f3f0262cSandi        for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
153f3f0262cSandi            $xhash[$from_lines[$xi]] = 1;
154f3f0262cSandi        for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
155f3f0262cSandi            $line = $to_lines[$yi];
156f3f0262cSandi            if (($this->ychanged[$yi] = empty($xhash[$line])))
157f3f0262cSandi                continue;
158f3f0262cSandi            $yhash[$line] = 1;
159f3f0262cSandi            $this->yv[] = $line;
160f3f0262cSandi            $this->yind[] = $yi;
161f3f0262cSandi        }
162f3f0262cSandi        for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
163f3f0262cSandi            $line = $from_lines[$xi];
164f3f0262cSandi            if (($this->xchanged[$xi] = empty($yhash[$line])))
165f3f0262cSandi                continue;
166f3f0262cSandi            $this->xv[] = $line;
167f3f0262cSandi            $this->xind[] = $xi;
168f3f0262cSandi        }
169f3f0262cSandi
170f3f0262cSandi        // Find the LCS.
171f5b78785SAndreas Gohr        $this->_compareseq(0, count($this->xv), 0, count($this->yv));
172f3f0262cSandi
173f3f0262cSandi        // Merge edits when possible
174f3f0262cSandi        $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
175f3f0262cSandi        $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
176f3f0262cSandi
177f3f0262cSandi        // Compute the edit operations.
178f3f0262cSandi        $edits = array();
179f3f0262cSandi        $xi = $yi = 0;
180f3f0262cSandi        while ($xi < $n_from || $yi < $n_to) {
181f3f0262cSandi            USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
182f3f0262cSandi            USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
183f3f0262cSandi
184f3f0262cSandi            // Skip matching "snake".
185f3f0262cSandi            $copy = array();
1867deca91bSRobin Getz            while ($xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
187f3f0262cSandi                $copy[] = $from_lines[$xi++];
188f3f0262cSandi                ++$yi;
189f3f0262cSandi            }
190f3f0262cSandi            if ($copy)
191f3f0262cSandi                $edits[] = new _DiffOp_Copy($copy);
192f3f0262cSandi
193f3f0262cSandi            // Find deletes & adds.
194f3f0262cSandi            $delete = array();
195f3f0262cSandi            while ($xi < $n_from && $this->xchanged[$xi])
196f3f0262cSandi                $delete[] = $from_lines[$xi++];
197f3f0262cSandi
198f3f0262cSandi            $add = array();
199f3f0262cSandi            while ($yi < $n_to && $this->ychanged[$yi])
200f3f0262cSandi                $add[] = $to_lines[$yi++];
201f3f0262cSandi
202f3f0262cSandi            if ($delete && $add)
203f3f0262cSandi                $edits[] = new _DiffOp_Change($delete, $add);
204f3f0262cSandi            elseif ($delete)
205f3f0262cSandi                $edits[] = new _DiffOp_Delete($delete);
206f3f0262cSandi            elseif ($add)
207f3f0262cSandi                $edits[] = new _DiffOp_Add($add);
208f3f0262cSandi        }
209f3f0262cSandi        return $edits;
210f3f0262cSandi    }
211f3f0262cSandi
212f3f0262cSandi
21315fae107Sandi    /**
21415fae107Sandi     * Divide the Largest Common Subsequence (LCS) of the sequences
215f3f0262cSandi     * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
216f3f0262cSandi     * sized segments.
217f3f0262cSandi     *
218f3f0262cSandi     * Returns (LCS, PTS).  LCS is the length of the LCS. PTS is an
219f3f0262cSandi     * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
220f3f0262cSandi     * sub sequences.  The first sub-sequence is contained in [X0, X1),
221f3f0262cSandi     * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
222f3f0262cSandi     * that (X0, Y0) == (XOFF, YOFF) and
223f3f0262cSandi     * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
224f3f0262cSandi     *
225f3f0262cSandi     * This function assumes that the first lines of the specified portions
226f3f0262cSandi     * of the two files do not match, and likewise that the last lines do not
227f3f0262cSandi     * match.  The caller must trim matching lines from the beginning and end
228f3f0262cSandi     * of the portions it is going to specify.
229f3f0262cSandi     */
230f3f0262cSandi    function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) {
231f3f0262cSandi        $flip = false;
232f3f0262cSandi
233f3f0262cSandi        if ($xlim - $xoff > $ylim - $yoff) {
234f3f0262cSandi            // Things seems faster (I'm not sure I understand why)
235f3f0262cSandi            // when the shortest sequence in X.
236f3f0262cSandi            $flip = true;
2377deca91bSRobin Getz            list ($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
238f3f0262cSandi        }
239f3f0262cSandi
240f3f0262cSandi        if ($flip)
241f3f0262cSandi            for ($i = $ylim - 1; $i >= $yoff; $i--)
242f3f0262cSandi                $ymatches[$this->xv[$i]][] = $i;
243f3f0262cSandi        else
244f3f0262cSandi            for ($i = $ylim - 1; $i >= $yoff; $i--)
245f3f0262cSandi                $ymatches[$this->yv[$i]][] = $i;
246f3f0262cSandi
247f3f0262cSandi        $this->lcs = 0;
248f3f0262cSandi        $this->seq[0]= $yoff - 1;
249f3f0262cSandi        $this->in_seq = array();
250f3f0262cSandi        $ymids[0] = array();
251f3f0262cSandi
252f3f0262cSandi        $numer = $xlim - $xoff + $nchunks - 1;
253f3f0262cSandi        $x = $xoff;
254f3f0262cSandi        for ($chunk = 0; $chunk < $nchunks; $chunk++) {
255f3f0262cSandi            if ($chunk > 0)
256f3f0262cSandi                for ($i = 0; $i <= $this->lcs; $i++)
257f3f0262cSandi                    $ymids[$i][$chunk-1] = $this->seq[$i];
258f3f0262cSandi
259f3f0262cSandi            $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
260f3f0262cSandi            for ( ; $x < $x1; $x++) {
261f3f0262cSandi                $line = $flip ? $this->yv[$x] : $this->xv[$x];
262f3f0262cSandi                if (empty($ymatches[$line]))
263f3f0262cSandi                    continue;
264f3f0262cSandi                $matches = $ymatches[$line];
265f3f0262cSandi                reset($matches);
266f3f0262cSandi                while (list ($junk, $y) = each($matches))
267f3f0262cSandi                    if (empty($this->in_seq[$y])) {
268f3f0262cSandi                        $k = $this->_lcs_pos($y);
269f3f0262cSandi                        USE_ASSERTS && assert($k > 0);
270f3f0262cSandi                        $ymids[$k] = $ymids[$k-1];
271f3f0262cSandi                        break;
272f3f0262cSandi                    }
273f3f0262cSandi                while (list ($junk, $y) = each($matches)) {
274f3f0262cSandi                    if ($y > $this->seq[$k-1]) {
275f3f0262cSandi                        USE_ASSERTS && assert($y < $this->seq[$k]);
276f3f0262cSandi                        // Optimization: this is a common case:
277f3f0262cSandi                        //  next match is just replacing previous match.
278f3f0262cSandi                        $this->in_seq[$this->seq[$k]] = false;
279f3f0262cSandi                        $this->seq[$k] = $y;
280f3f0262cSandi                        $this->in_seq[$y] = 1;
281f3f0262cSandi                    }
282f3f0262cSandi                    else if (empty($this->in_seq[$y])) {
283f3f0262cSandi                        $k = $this->_lcs_pos($y);
284f3f0262cSandi                        USE_ASSERTS && assert($k > 0);
285f3f0262cSandi                        $ymids[$k] = $ymids[$k-1];
286f3f0262cSandi                    }
287f3f0262cSandi                }
288f3f0262cSandi            }
289f3f0262cSandi        }
290f3f0262cSandi
291f3f0262cSandi        $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
292f3f0262cSandi        $ymid = $ymids[$this->lcs];
293f3f0262cSandi        for ($n = 0; $n < $nchunks - 1; $n++) {
294f3f0262cSandi            $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
295f3f0262cSandi            $y1 = $ymid[$n] + 1;
296f3f0262cSandi            $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
297f3f0262cSandi        }
298f3f0262cSandi        $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
299f3f0262cSandi
300f3f0262cSandi        return array($this->lcs, $seps);
301f3f0262cSandi    }
302f3f0262cSandi
303f3f0262cSandi    function _lcs_pos($ypos) {
304f3f0262cSandi        $end = $this->lcs;
305f3f0262cSandi        if ($end == 0 || $ypos > $this->seq[$end]) {
306f3f0262cSandi            $this->seq[++$this->lcs] = $ypos;
307f3f0262cSandi            $this->in_seq[$ypos] = 1;
308f3f0262cSandi            return $this->lcs;
309f3f0262cSandi        }
310f3f0262cSandi
311f3f0262cSandi        $beg = 1;
312f3f0262cSandi        while ($beg < $end) {
313f3f0262cSandi            $mid = (int)(($beg + $end) / 2);
314f3f0262cSandi            if ($ypos > $this->seq[$mid])
315f3f0262cSandi                $beg = $mid + 1;
316f3f0262cSandi            else
317f3f0262cSandi                $end = $mid;
318f3f0262cSandi        }
319f3f0262cSandi
320f3f0262cSandi        USE_ASSERTS && assert($ypos != $this->seq[$end]);
321f3f0262cSandi
322f3f0262cSandi        $this->in_seq[$this->seq[$end]] = false;
323f3f0262cSandi        $this->seq[$end] = $ypos;
324f3f0262cSandi        $this->in_seq[$ypos] = 1;
325f3f0262cSandi        return $end;
326f3f0262cSandi    }
327f3f0262cSandi
32815fae107Sandi    /**
32915fae107Sandi     * Find LCS of two sequences.
330f3f0262cSandi     *
331f3f0262cSandi     * The results are recorded in the vectors $this->{x,y}changed[], by
332f3f0262cSandi     * storing a 1 in the element for each line that is an insertion
333f3f0262cSandi     * or deletion (ie. is not in the LCS).
334f3f0262cSandi     *
335f3f0262cSandi     * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
336f3f0262cSandi     *
337f3f0262cSandi     * Note that XLIM, YLIM are exclusive bounds.
338f3f0262cSandi     * All line numbers are origin-0 and discarded lines are not counted.
339f3f0262cSandi     */
340f3f0262cSandi    function _compareseq($xoff, $xlim, $yoff, $ylim) {
341f3f0262cSandi        // Slide down the bottom initial diagonal.
3427deca91bSRobin Getz        while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) {
343f3f0262cSandi            ++$xoff;
344f3f0262cSandi            ++$yoff;
345f3f0262cSandi        }
346f3f0262cSandi
347f3f0262cSandi        // Slide up the top initial diagonal.
3487deca91bSRobin Getz        while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
349f3f0262cSandi            --$xlim;
350f3f0262cSandi            --$ylim;
351f3f0262cSandi        }
352f3f0262cSandi
353f3f0262cSandi        if ($xoff == $xlim || $yoff == $ylim)
354f3f0262cSandi            $lcs = 0;
355f3f0262cSandi        else {
356f3f0262cSandi            // This is ad hoc but seems to work well.
357f3f0262cSandi            //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
358f3f0262cSandi            //$nchunks = max(2,min(8,(int)$nchunks));
359f3f0262cSandi            $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
360f3f0262cSandi            list ($lcs, $seps)
361f3f0262cSandi                = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
362f3f0262cSandi        }
363f3f0262cSandi
364f3f0262cSandi        if ($lcs == 0) {
365f3f0262cSandi            // X and Y sequences have no common subsequence:
366f3f0262cSandi            // mark all changed.
367f3f0262cSandi            while ($yoff < $ylim)
368f3f0262cSandi                $this->ychanged[$this->yind[$yoff++]] = 1;
369f3f0262cSandi            while ($xoff < $xlim)
370f3f0262cSandi                $this->xchanged[$this->xind[$xoff++]] = 1;
371f3f0262cSandi        }
372f3f0262cSandi        else {
373f3f0262cSandi            // Use the partitions to split this problem into subproblems.
374f3f0262cSandi            reset($seps);
375f3f0262cSandi            $pt1 = $seps[0];
376f3f0262cSandi            while ($pt2 = next($seps)) {
377f3f0262cSandi                $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
378f3f0262cSandi                $pt1 = $pt2;
379f3f0262cSandi            }
380f3f0262cSandi        }
381f3f0262cSandi    }
382f3f0262cSandi
38315fae107Sandi    /**
38415fae107Sandi     * Adjust inserts/deletes of identical lines to join changes
385f3f0262cSandi     * as much as possible.
386f3f0262cSandi     *
387f3f0262cSandi     * We do something when a run of changed lines include a
388f3f0262cSandi     * line at one end and has an excluded, identical line at the other.
389f3f0262cSandi     * We are free to choose which identical line is included.
390f3f0262cSandi     * `compareseq' usually chooses the one at the beginning,
391f3f0262cSandi     * but usually it is cleaner to consider the following identical line
392f3f0262cSandi     * to be the "change".
393f3f0262cSandi     *
394f3f0262cSandi     * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
395f3f0262cSandi     */
396f3f0262cSandi    function _shift_boundaries($lines, &$changed, $other_changed) {
397f3f0262cSandi        $i = 0;
398f3f0262cSandi        $j = 0;
399f3f0262cSandi
400f5b78785SAndreas Gohr        USE_ASSERTS && assert('count($lines) == count($changed)');
401f5b78785SAndreas Gohr        $len = count($lines);
402f5b78785SAndreas Gohr        $other_len = count($other_changed);
403f3f0262cSandi
404f3f0262cSandi        while (1) {
405f3f0262cSandi            /*
406f3f0262cSandi             * Scan forwards to find beginning of another run of changes.
407f3f0262cSandi             * Also keep track of the corresponding point in the other file.
408f3f0262cSandi             *
409f3f0262cSandi             * Throughout this code, $i and $j are adjusted together so that
410f3f0262cSandi             * the first $i elements of $changed and the first $j elements
411f3f0262cSandi             * of $other_changed both contain the same number of zeros
412f3f0262cSandi             * (unchanged lines).
413f3f0262cSandi             * Furthermore, $j is always kept so that $j == $other_len or
414f3f0262cSandi             * $other_changed[$j] == false.
415f3f0262cSandi             */
416f3f0262cSandi            while ($j < $other_len && $other_changed[$j])
417f3f0262cSandi                $j++;
418f3f0262cSandi
419f3f0262cSandi            while ($i < $len && ! $changed[$i]) {
420f3f0262cSandi                USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
421f5b78785SAndreas Gohr                $i++;
422f5b78785SAndreas Gohr                $j++;
423f3f0262cSandi                while ($j < $other_len && $other_changed[$j])
424f3f0262cSandi                    $j++;
425f3f0262cSandi            }
426f3f0262cSandi
427f3f0262cSandi            if ($i == $len)
428f3f0262cSandi                break;
429f3f0262cSandi
430f3f0262cSandi            $start = $i;
431f3f0262cSandi
432f3f0262cSandi            // Find the end of this run of changes.
433f3f0262cSandi            while (++$i < $len && $changed[$i])
434f3f0262cSandi                continue;
435f3f0262cSandi
436f3f0262cSandi            do {
437f3f0262cSandi                /*
438f3f0262cSandi                 * Record the length of this run of changes, so that
439f3f0262cSandi                 * we can later determine whether the run has grown.
440f3f0262cSandi                 */
441f3f0262cSandi                $runlength = $i - $start;
442f3f0262cSandi
443f3f0262cSandi                /*
444f3f0262cSandi                 * Move the changed region back, so long as the
445f3f0262cSandi                 * previous unchanged line matches the last changed one.
446f3f0262cSandi                 * This merges with previous changed regions.
447f3f0262cSandi                 */
448f3f0262cSandi                while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
449f3f0262cSandi                    $changed[--$start] = 1;
450f3f0262cSandi                    $changed[--$i] = false;
451f3f0262cSandi                    while ($start > 0 && $changed[$start - 1])
452f3f0262cSandi                        $start--;
453f3f0262cSandi                    USE_ASSERTS && assert('$j > 0');
454f3f0262cSandi                    while ($other_changed[--$j])
455f3f0262cSandi                        continue;
456f3f0262cSandi                    USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
457f3f0262cSandi                }
458f3f0262cSandi
459f3f0262cSandi                /*
460f3f0262cSandi                 * Set CORRESPONDING to the end of the changed run, at the last
461f3f0262cSandi                 * point where it corresponds to a changed run in the other file.
462f3f0262cSandi                 * CORRESPONDING == LEN means no such point has been found.
463f3f0262cSandi                 */
464f3f0262cSandi                $corresponding = $j < $other_len ? $i : $len;
465f3f0262cSandi
466f3f0262cSandi                /*
467f3f0262cSandi                 * Move the changed region forward, so long as the
468f3f0262cSandi                 * first changed line matches the following unchanged one.
469f3f0262cSandi                 * This merges with following changed regions.
470f3f0262cSandi                 * Do this second, so that if there are no merges,
471f3f0262cSandi                 * the changed region is moved forward as far as possible.
472f3f0262cSandi                 */
473f3f0262cSandi                while ($i < $len && $lines[$start] == $lines[$i]) {
474f3f0262cSandi                    $changed[$start++] = false;
475f3f0262cSandi                    $changed[$i++] = 1;
476f3f0262cSandi                    while ($i < $len && $changed[$i])
477f3f0262cSandi                        $i++;
478f3f0262cSandi
479f3f0262cSandi                    USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
480f3f0262cSandi                    $j++;
481f3f0262cSandi                    if ($j < $other_len && $other_changed[$j]) {
482f3f0262cSandi                        $corresponding = $i;
483f3f0262cSandi                        while ($j < $other_len && $other_changed[$j])
484f3f0262cSandi                            $j++;
485f3f0262cSandi                    }
486f3f0262cSandi                }
487f3f0262cSandi            } while ($runlength != $i - $start);
488f3f0262cSandi
489f3f0262cSandi            /*
490f3f0262cSandi             * If possible, move the fully-merged run of changes
491f3f0262cSandi             * back to a corresponding run in the other file.
492f3f0262cSandi             */
493f3f0262cSandi            while ($corresponding < $i) {
494f3f0262cSandi                $changed[--$start] = 1;
495f3f0262cSandi                $changed[--$i] = 0;
496f3f0262cSandi                USE_ASSERTS && assert('$j > 0');
497f3f0262cSandi                while ($other_changed[--$j])
498f3f0262cSandi                    continue;
499f3f0262cSandi                USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
500f3f0262cSandi            }
501f3f0262cSandi        }
502f3f0262cSandi    }
503f3f0262cSandi}
504f3f0262cSandi
505f3f0262cSandi/**
506f3f0262cSandi * Class representing a 'diff' between two sequences of strings.
507f3f0262cSandi */
508f5b78785SAndreas Gohrclass Diff {
509f5b78785SAndreas Gohr
510f3f0262cSandi    var $edits;
511f3f0262cSandi
512f3f0262cSandi    /**
513f3f0262cSandi     * Constructor.
514f3f0262cSandi     * Computes diff between sequences of strings.
515f3f0262cSandi     *
51642ea7f44SGerrit Uitslag     * @param array $from_lines An array of strings.
517f3f0262cSandi     *                          (Typically these are lines from a file.)
51842ea7f44SGerrit Uitslag     * @param array $to_lines   An array of strings.
519f3f0262cSandi     */
520988c1340SPiyush Mishra    function __construct($from_lines, $to_lines) {
521f3f0262cSandi        $eng = new _DiffEngine;
522f3f0262cSandi        $this->edits = $eng->diff($from_lines, $to_lines);
523f3f0262cSandi        //$this->_check($from_lines, $to_lines);
524f3f0262cSandi    }
525f3f0262cSandi
526f3f0262cSandi    /**
527f3f0262cSandi     * Compute reversed Diff.
528f3f0262cSandi     *
529f3f0262cSandi     * SYNOPSIS:
530f3f0262cSandi     *
531f3f0262cSandi     *  $diff = new Diff($lines1, $lines2);
532f3f0262cSandi     *  $rev = $diff->reverse();
53342ea7f44SGerrit Uitslag     *
53442ea7f44SGerrit Uitslag     * @return Diff  A Diff object representing the inverse of the
535f3f0262cSandi     *               original diff.
536f3f0262cSandi     */
537f3f0262cSandi    function reverse() {
538f3f0262cSandi        $rev = $this;
539f3f0262cSandi        $rev->edits = array();
540f3f0262cSandi        foreach ($this->edits as $edit) {
541f3f0262cSandi            $rev->edits[] = $edit->reverse();
542f3f0262cSandi        }
543f3f0262cSandi        return $rev;
544f3f0262cSandi    }
545f3f0262cSandi
546f3f0262cSandi    /**
547f3f0262cSandi     * Check for empty diff.
548f3f0262cSandi     *
549f3f0262cSandi     * @return bool True iff two sequences were identical.
550f3f0262cSandi     */
551f3f0262cSandi    function isEmpty() {
552f3f0262cSandi        foreach ($this->edits as $edit) {
553f3f0262cSandi            if ($edit->type != 'copy')
554f3f0262cSandi                return false;
555f3f0262cSandi        }
556f3f0262cSandi        return true;
557f3f0262cSandi    }
558f3f0262cSandi
559f3f0262cSandi    /**
560f3f0262cSandi     * Compute the length of the Longest Common Subsequence (LCS).
561f3f0262cSandi     *
562f3f0262cSandi     * This is mostly for diagnostic purposed.
563f3f0262cSandi     *
564f3f0262cSandi     * @return int The length of the LCS.
565f3f0262cSandi     */
566f3f0262cSandi    function lcs() {
567f3f0262cSandi        $lcs = 0;
568f3f0262cSandi        foreach ($this->edits as $edit) {
569f3f0262cSandi            if ($edit->type == 'copy')
570f5b78785SAndreas Gohr                $lcs += count($edit->orig);
571f3f0262cSandi        }
572f3f0262cSandi        return $lcs;
573f3f0262cSandi    }
574f3f0262cSandi
575f3f0262cSandi    /**
576f3f0262cSandi     * Get the original set of lines.
577f3f0262cSandi     *
578f3f0262cSandi     * This reconstructs the $from_lines parameter passed to the
579f3f0262cSandi     * constructor.
580f3f0262cSandi     *
581f3f0262cSandi     * @return array The original sequence of strings.
582f3f0262cSandi     */
583f3f0262cSandi    function orig() {
584f3f0262cSandi        $lines = array();
585f3f0262cSandi
586f3f0262cSandi        foreach ($this->edits as $edit) {
587f3f0262cSandi            if ($edit->orig)
588f5b78785SAndreas Gohr                array_splice($lines, count($lines), 0, $edit->orig);
589f3f0262cSandi        }
590f3f0262cSandi        return $lines;
591f3f0262cSandi    }
592f3f0262cSandi
593f3f0262cSandi    /**
594f3f0262cSandi     * Get the closing set of lines.
595f3f0262cSandi     *
596f3f0262cSandi     * This reconstructs the $to_lines parameter passed to the
597f3f0262cSandi     * constructor.
598f3f0262cSandi     *
599f3f0262cSandi     * @return array The sequence of strings.
600f3f0262cSandi     */
601f3f0262cSandi    function closing() {
602f3f0262cSandi        $lines = array();
603f3f0262cSandi
604f3f0262cSandi        foreach ($this->edits as $edit) {
605f3f0262cSandi            if ($edit->closing)
606f5b78785SAndreas Gohr                array_splice($lines, count($lines), 0, $edit->closing);
607f3f0262cSandi        }
608f3f0262cSandi        return $lines;
609f3f0262cSandi    }
610f3f0262cSandi
611f3f0262cSandi    /**
612f3f0262cSandi     * Check a Diff for validity.
613f3f0262cSandi     *
614f3f0262cSandi     * This is here only for debugging purposes.
615f3f0262cSandi     */
616f3f0262cSandi    function _check($from_lines, $to_lines) {
617f3f0262cSandi        if (serialize($from_lines) != serialize($this->orig()))
618f3f0262cSandi            trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
619f3f0262cSandi        if (serialize($to_lines) != serialize($this->closing()))
620f3f0262cSandi            trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
621f3f0262cSandi
622f3f0262cSandi        $rev = $this->reverse();
623f3f0262cSandi        if (serialize($to_lines) != serialize($rev->orig()))
624f3f0262cSandi            trigger_error("Reversed original doesn't match", E_USER_ERROR);
625f3f0262cSandi        if (serialize($from_lines) != serialize($rev->closing()))
626f3f0262cSandi            trigger_error("Reversed closing doesn't match", E_USER_ERROR);
627f3f0262cSandi
628f3f0262cSandi        $prevtype = 'none';
629f3f0262cSandi        foreach ($this->edits as $edit) {
630f3f0262cSandi            if ($prevtype == $edit->type)
631f3f0262cSandi                trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
632f3f0262cSandi            $prevtype = $edit->type;
633f3f0262cSandi        }
634f3f0262cSandi
635f3f0262cSandi        $lcs = $this->lcs();
636f3f0262cSandi        trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE);
637f3f0262cSandi    }
638f3f0262cSandi}
639f3f0262cSandi
640f3f0262cSandi/**
641f3f0262cSandi * FIXME: bad name.
642f3f0262cSandi */
643f5b78785SAndreas Gohrclass MappedDiff extends Diff {
644f3f0262cSandi    /**
645f3f0262cSandi     * Constructor.
646f3f0262cSandi     *
647f3f0262cSandi     * Computes diff between sequences of strings.
648f3f0262cSandi     *
649f3f0262cSandi     * This can be used to compute things like
650f3f0262cSandi     * case-insensitve diffs, or diffs which ignore
651f3f0262cSandi     * changes in white-space.
652f3f0262cSandi     *
65342ea7f44SGerrit Uitslag     * @param string[] $from_lines         An array of strings.
654f3f0262cSandi     *                                     (Typically these are lines from a file.)
655f3f0262cSandi     *
65642ea7f44SGerrit Uitslag     * @param string[] $to_lines           An array of strings.
657f3f0262cSandi     *
65842ea7f44SGerrit Uitslag     * @param string[] $mapped_from_lines  This array should
659f3f0262cSandi     *                                     have the same size number of elements as $from_lines.
660f3f0262cSandi     *                                     The elements in $mapped_from_lines and
661f3f0262cSandi     *                                     $mapped_to_lines are what is actually compared
662f3f0262cSandi     *                                     when computing the diff.
663f3f0262cSandi     *
66442ea7f44SGerrit Uitslag     * @param string[] $mapped_to_lines    This array should
665f3f0262cSandi     *                                     have the same number of elements as $to_lines.
666f3f0262cSandi     */
667988c1340SPiyush Mishra    function __construct($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) {
668f3f0262cSandi
669f5b78785SAndreas Gohr        assert(count($from_lines) == count($mapped_from_lines));
670f5b78785SAndreas Gohr        assert(count($to_lines) == count($mapped_to_lines));
671f3f0262cSandi
672988c1340SPiyush Mishra        parent::__construct($mapped_from_lines, $mapped_to_lines);
673f3f0262cSandi
674f3f0262cSandi        $xi = $yi = 0;
675f5b78785SAndreas Gohr        $ecnt = count($this->edits);
676f5b78785SAndreas Gohr        for ($i = 0; $i < $ecnt; $i++) {
677f3f0262cSandi            $orig = &$this->edits[$i]->orig;
678f3f0262cSandi            if (is_array($orig)) {
679f5b78785SAndreas Gohr                $orig = array_slice($from_lines, $xi, count($orig));
680f5b78785SAndreas Gohr                $xi += count($orig);
681f3f0262cSandi            }
682f3f0262cSandi
683f3f0262cSandi            $closing = &$this->edits[$i]->closing;
684f3f0262cSandi            if (is_array($closing)) {
685f5b78785SAndreas Gohr                $closing = array_slice($to_lines, $yi, count($closing));
686f5b78785SAndreas Gohr                $yi += count($closing);
687f3f0262cSandi            }
688f3f0262cSandi        }
689f3f0262cSandi    }
690f3f0262cSandi}
691f3f0262cSandi
692f3f0262cSandi/**
693f3f0262cSandi * A class to format Diffs
694f3f0262cSandi *
695f3f0262cSandi * This class formats the diff in classic diff format.
696f3f0262cSandi * It is intended that this class be customized via inheritance,
697f3f0262cSandi * to obtain fancier outputs.
698f3f0262cSandi */
699f5b78785SAndreas Gohrclass DiffFormatter {
700f3f0262cSandi    /**
701f3f0262cSandi     * Number of leading context "lines" to preserve.
702f3f0262cSandi     *
703f3f0262cSandi     * This should be left at zero for this class, but subclasses
704f3f0262cSandi     * may want to set this to other values.
705f3f0262cSandi     */
706f3f0262cSandi    var $leading_context_lines = 0;
707f3f0262cSandi
708f3f0262cSandi    /**
709f3f0262cSandi     * Number of trailing context "lines" to preserve.
710f3f0262cSandi     *
711f3f0262cSandi     * This should be left at zero for this class, but subclasses
712f3f0262cSandi     * may want to set this to other values.
713f3f0262cSandi     */
714f3f0262cSandi    var $trailing_context_lines = 0;
715f3f0262cSandi
716f3f0262cSandi    /**
717f3f0262cSandi     * Format a diff.
718f3f0262cSandi     *
71942ea7f44SGerrit Uitslag     * @param Diff $diff A Diff object.
720f3f0262cSandi     * @return string The formatted output.
721f3f0262cSandi     */
722f3f0262cSandi    function format($diff) {
723f3f0262cSandi
724f3f0262cSandi        $xi = $yi = 1;
72542ea7f44SGerrit Uitslag        $x0 = $y0 = 0;
726f3f0262cSandi        $block = false;
727f3f0262cSandi        $context = array();
728f3f0262cSandi
729f3f0262cSandi        $nlead = $this->leading_context_lines;
730f3f0262cSandi        $ntrail = $this->trailing_context_lines;
731f3f0262cSandi
732f3f0262cSandi        $this->_start_diff();
733f3f0262cSandi
734f3f0262cSandi        foreach ($diff->edits as $edit) {
735f3f0262cSandi            if ($edit->type == 'copy') {
736f3f0262cSandi                if (is_array($block)) {
737f5b78785SAndreas Gohr                    if (count($edit->orig) <= $nlead + $ntrail) {
738f3f0262cSandi                        $block[] = $edit;
739f3f0262cSandi                    }
740f3f0262cSandi                    else{
741f3f0262cSandi                        if ($ntrail) {
742f3f0262cSandi                            $context = array_slice($edit->orig, 0, $ntrail);
743f3f0262cSandi                            $block[] = new _DiffOp_Copy($context);
744f3f0262cSandi                        }
7457deca91bSRobin Getz                        $this->_block($x0, $ntrail + $xi - $x0, $y0, $ntrail + $yi - $y0, $block);
746f3f0262cSandi                        $block = false;
747f3f0262cSandi                    }
748f3f0262cSandi                }
749f3f0262cSandi                $context = $edit->orig;
750f3f0262cSandi            }
751f3f0262cSandi            else {
752f3f0262cSandi                if (! is_array($block)) {
753f5b78785SAndreas Gohr                    $context = array_slice($context, count($context) - $nlead);
754f5b78785SAndreas Gohr                    $x0 = $xi - count($context);
755f5b78785SAndreas Gohr                    $y0 = $yi - count($context);
756f3f0262cSandi                    $block = array();
757f3f0262cSandi                    if ($context)
758f3f0262cSandi                        $block[] = new _DiffOp_Copy($context);
759f3f0262cSandi                }
760f3f0262cSandi                $block[] = $edit;
761f3f0262cSandi            }
762f3f0262cSandi
763f3f0262cSandi            if ($edit->orig)
764f5b78785SAndreas Gohr                $xi += count($edit->orig);
765f3f0262cSandi            if ($edit->closing)
766f5b78785SAndreas Gohr                $yi += count($edit->closing);
767f3f0262cSandi        }
768f3f0262cSandi
769f3f0262cSandi        if (is_array($block))
7707deca91bSRobin Getz            $this->_block($x0, $xi - $x0, $y0, $yi - $y0, $block);
771f3f0262cSandi
772f3f0262cSandi        return $this->_end_diff();
773f3f0262cSandi    }
774f3f0262cSandi
77542ea7f44SGerrit Uitslag    /**
77642ea7f44SGerrit Uitslag     * @param int $xbeg
77742ea7f44SGerrit Uitslag     * @param int $xlen
77842ea7f44SGerrit Uitslag     * @param int $ybeg
77942ea7f44SGerrit Uitslag     * @param int $ylen
78042ea7f44SGerrit Uitslag     * @param array $edits
78142ea7f44SGerrit Uitslag     */
782f3f0262cSandi    function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
783f3f0262cSandi        $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
784f3f0262cSandi        foreach ($edits as $edit) {
785f3f0262cSandi            if ($edit->type == 'copy')
786f3f0262cSandi                $this->_context($edit->orig);
787f3f0262cSandi            elseif ($edit->type == 'add')
788f3f0262cSandi                $this->_added($edit->closing);
789f3f0262cSandi            elseif ($edit->type == 'delete')
790f3f0262cSandi                $this->_deleted($edit->orig);
791f3f0262cSandi            elseif ($edit->type == 'change')
792f3f0262cSandi                $this->_changed($edit->orig, $edit->closing);
793f3f0262cSandi            else
794f3f0262cSandi                trigger_error("Unknown edit type", E_USER_ERROR);
795f3f0262cSandi        }
796f3f0262cSandi        $this->_end_block();
797f3f0262cSandi    }
798f3f0262cSandi
799f3f0262cSandi    function _start_diff() {
800f3f0262cSandi        ob_start();
801f3f0262cSandi    }
802f3f0262cSandi
803f3f0262cSandi    function _end_diff() {
804f3f0262cSandi        $val = ob_get_contents();
805f3f0262cSandi        ob_end_clean();
806f3f0262cSandi        return $val;
807f3f0262cSandi    }
808f3f0262cSandi
80942ea7f44SGerrit Uitslag    /**
81042ea7f44SGerrit Uitslag     * @param int $xbeg
81142ea7f44SGerrit Uitslag     * @param int $xlen
81242ea7f44SGerrit Uitslag     * @param int $ybeg
81342ea7f44SGerrit Uitslag     * @param int $ylen
81442ea7f44SGerrit Uitslag     * @return string
81542ea7f44SGerrit Uitslag     */
816f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
817f3f0262cSandi        if ($xlen > 1)
818f3f0262cSandi            $xbeg .= "," . ($xbeg + $xlen - 1);
819f3f0262cSandi        if ($ylen > 1)
820f3f0262cSandi            $ybeg .= "," . ($ybeg + $ylen - 1);
821f3f0262cSandi
822f3f0262cSandi        return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
823f3f0262cSandi    }
824f3f0262cSandi
82542ea7f44SGerrit Uitslag    /**
82642ea7f44SGerrit Uitslag     * @param string $header
82742ea7f44SGerrit Uitslag     */
828f3f0262cSandi    function _start_block($header) {
829f3f0262cSandi        echo $header;
830f3f0262cSandi    }
831f3f0262cSandi
832f3f0262cSandi    function _end_block() {
833f3f0262cSandi    }
834f3f0262cSandi
835f3f0262cSandi    function _lines($lines, $prefix = ' ') {
836f3f0262cSandi        foreach ($lines as $line)
83760056e69SChristopher Smith            echo "$prefix ".$this->_escape($line)."\n";
838f3f0262cSandi    }
839f3f0262cSandi
840f3f0262cSandi    function _context($lines) {
841f3f0262cSandi        $this->_lines($lines);
842f3f0262cSandi    }
843f3f0262cSandi
844f3f0262cSandi    function _added($lines) {
845f3f0262cSandi        $this->_lines($lines, ">");
846f3f0262cSandi    }
847f3f0262cSandi    function _deleted($lines) {
848f3f0262cSandi        $this->_lines($lines, "<");
849f3f0262cSandi    }
850f3f0262cSandi
851f3f0262cSandi    function _changed($orig, $closing) {
852f3f0262cSandi        $this->_deleted($orig);
853f3f0262cSandi        echo "---\n";
854f3f0262cSandi        $this->_added($closing);
855f3f0262cSandi    }
85660056e69SChristopher Smith
857bfd197d2ShArpanet    /**
858bfd197d2ShArpanet     * Escape string
859bfd197d2ShArpanet     *
860bfd197d2ShArpanet     * Override this method within other formatters if escaping required.
861bfd197d2ShArpanet     * Base class requires $str to be returned WITHOUT escaping.
862bfd197d2ShArpanet     *
863bfd197d2ShArpanet     * @param $str string Text string to escape
864bfd197d2ShArpanet     * @return string The escaped string.
865bfd197d2ShArpanet     */
86660056e69SChristopher Smith    function _escape($str){
86760056e69SChristopher Smith        return $str;
86860056e69SChristopher Smith    }
869f3f0262cSandi}
870f3f0262cSandi
87147a906eaSAndreas Gohr/**
87247a906eaSAndreas Gohr * Utilityclass for styling HTML formatted diffs
87347a906eaSAndreas Gohr *
87447a906eaSAndreas Gohr * Depends on global var $DIFF_INLINESTYLES, if true some minimal predefined
87547a906eaSAndreas Gohr * inline styles are used. Useful for HTML mails and RSS feeds
87647a906eaSAndreas Gohr *
87747a906eaSAndreas Gohr * @author Andreas Gohr <andi@splitbrain.org>
87847a906eaSAndreas Gohr */
87947a906eaSAndreas Gohrclass HTMLDiff {
88047a906eaSAndreas Gohr    /**
88147a906eaSAndreas Gohr     * Holds the style names and basic CSS
88247a906eaSAndreas Gohr     */
88347a906eaSAndreas Gohr    static public $styles = array(
88447a906eaSAndreas Gohr            'diff-addedline'    => 'background-color: #ddffdd;',
88547a906eaSAndreas Gohr            'diff-deletedline'  => 'background-color: #ffdddd;',
88647a906eaSAndreas Gohr            'diff-context'      => 'background-color: #f5f5f5;',
88747a906eaSAndreas Gohr            'diff-mark'         => 'color: #ff0000;',
88847a906eaSAndreas Gohr        );
88947a906eaSAndreas Gohr
89047a906eaSAndreas Gohr    /**
89147a906eaSAndreas Gohr     * Return a class or style parameter
89247a906eaSAndreas Gohr     */
89347a906eaSAndreas Gohr    static function css($classname){
89447a906eaSAndreas Gohr        global $DIFF_INLINESTYLES;
89547a906eaSAndreas Gohr
89647a906eaSAndreas Gohr        if($DIFF_INLINESTYLES){
89747a906eaSAndreas Gohr            if(!isset(self::$styles[$classname])) return '';
89847a906eaSAndreas Gohr            return 'style="'.self::$styles[$classname].'"';
89947a906eaSAndreas Gohr        }else{
90047a906eaSAndreas Gohr            return 'class="'.$classname.'"';
90147a906eaSAndreas Gohr        }
90247a906eaSAndreas Gohr    }
90347a906eaSAndreas Gohr}
904f3f0262cSandi
905f3f0262cSandi/**
906f3f0262cSandi *  Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
907f3f0262cSandi *
908f3f0262cSandi */
909f3f0262cSandi
9109b3a5b24Schrisdefine('NBSP', "\xC2\xA0");     // utf-8 non-breaking space.
911f3f0262cSandi
912f3f0262cSandiclass _HWLDF_WordAccumulator {
913988c1340SPiyush Mishra
914988c1340SPiyush Mishra    function __construct() {
915f3f0262cSandi        $this->_lines = array();
916f3f0262cSandi        $this->_line = '';
917f3f0262cSandi        $this->_group = '';
918f3f0262cSandi        $this->_tag = '';
919f3f0262cSandi    }
920f3f0262cSandi
921f3f0262cSandi    function _flushGroup($new_tag) {
922f3f0262cSandi        if ($this->_group !== '') {
923f3f0262cSandi            if ($this->_tag == 'mark')
92460056e69SChristopher Smith                $this->_line .= '<strong '.HTMLDiff::css('diff-mark').'>'.$this->_escape($this->_group).'</strong>';
925812bb04eSRobin Getz            elseif ($this->_tag == 'add')
92660056e69SChristopher Smith                $this->_line .= '<span '.HTMLDiff::css('diff-addedline').'>'.$this->_escape($this->_group).'</span>';
927812bb04eSRobin Getz            elseif ($this->_tag == 'del')
92860056e69SChristopher Smith                $this->_line .= '<span '.HTMLDiff::css('diff-deletedline').'><del>'.$this->_escape($this->_group).'</del></span>';
929f3f0262cSandi            else
93060056e69SChristopher Smith                $this->_line .= $this->_escape($this->_group);
931f3f0262cSandi        }
932f3f0262cSandi        $this->_group = '';
933f3f0262cSandi        $this->_tag = $new_tag;
934f3f0262cSandi    }
935f3f0262cSandi
93642ea7f44SGerrit Uitslag    /**
93742ea7f44SGerrit Uitslag     * @param string $new_tag
93842ea7f44SGerrit Uitslag     */
939f3f0262cSandi    function _flushLine($new_tag) {
940f3f0262cSandi        $this->_flushGroup($new_tag);
941f3f0262cSandi        if ($this->_line != '')
942f3f0262cSandi            $this->_lines[] = $this->_line;
943f3f0262cSandi        $this->_line = '';
944f3f0262cSandi    }
945f3f0262cSandi
946f3f0262cSandi    function addWords($words, $tag = '') {
947f3f0262cSandi        if ($tag != $this->_tag)
948f3f0262cSandi            $this->_flushGroup($tag);
949f3f0262cSandi
950f3f0262cSandi        foreach ($words as $word) {
951f3f0262cSandi            // new-line should only come as first char of word.
952f3f0262cSandi            if ($word == '')
953f3f0262cSandi                continue;
954f3f0262cSandi            if ($word[0] == "\n") {
955f3f0262cSandi                $this->_group .= NBSP;
956f3f0262cSandi                $this->_flushLine($tag);
957f3f0262cSandi                $word = substr($word, 1);
958f3f0262cSandi            }
959f3f0262cSandi            assert(!strstr($word, "\n"));
960f3f0262cSandi            $this->_group .= $word;
961f3f0262cSandi        }
962f3f0262cSandi    }
963f3f0262cSandi
964f3f0262cSandi    function getLines() {
965f3f0262cSandi        $this->_flushLine('~done');
966f3f0262cSandi        return $this->_lines;
967f3f0262cSandi    }
96860056e69SChristopher Smith
96960056e69SChristopher Smith    function _escape($str){
97060056e69SChristopher Smith        return hsc($str);
97160056e69SChristopher Smith    }
972f3f0262cSandi}
973f3f0262cSandi
974f5b78785SAndreas Gohrclass WordLevelDiff extends MappedDiff {
975f5b78785SAndreas Gohr
976988c1340SPiyush Mishra    function __construct($orig_lines, $closing_lines) {
977f3f0262cSandi        list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
978f3f0262cSandi        list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
979f3f0262cSandi
980988c1340SPiyush Mishra        parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped);
981f3f0262cSandi    }
982f3f0262cSandi
983f3f0262cSandi    function _split($lines) {
9845e1ee188SXin LI        if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu',
9857deca91bSRobin Getz             implode("\n", $lines), $m)) {
986f3f0262cSandi            return array(array(''), array(''));
987f3f0262cSandi        }
988f3f0262cSandi        return array($m[0], $m[1]);
989f3f0262cSandi    }
990f3f0262cSandi
991f3f0262cSandi    function orig() {
992f3f0262cSandi        $orig = new _HWLDF_WordAccumulator;
993f3f0262cSandi
994f3f0262cSandi        foreach ($this->edits as $edit) {
995f3f0262cSandi            if ($edit->type == 'copy')
996f3f0262cSandi                $orig->addWords($edit->orig);
997f3f0262cSandi            elseif ($edit->orig)
998f3f0262cSandi                $orig->addWords($edit->orig, 'mark');
999f3f0262cSandi        }
1000f3f0262cSandi        return $orig->getLines();
1001f3f0262cSandi    }
1002f3f0262cSandi
1003f3f0262cSandi    function closing() {
1004f3f0262cSandi        $closing = new _HWLDF_WordAccumulator;
1005f3f0262cSandi
1006f3f0262cSandi        foreach ($this->edits as $edit) {
1007f3f0262cSandi            if ($edit->type == 'copy')
1008f3f0262cSandi                $closing->addWords($edit->closing);
1009f3f0262cSandi            elseif ($edit->closing)
1010f3f0262cSandi                $closing->addWords($edit->closing, 'mark');
1011f3f0262cSandi        }
1012f3f0262cSandi        return $closing->getLines();
1013f3f0262cSandi    }
1014f3f0262cSandi}
1015f3f0262cSandi
1016812bb04eSRobin Getzclass InlineWordLevelDiff extends MappedDiff {
1017812bb04eSRobin Getz
1018988c1340SPiyush Mishra    function __construct($orig_lines, $closing_lines) {
1019812bb04eSRobin Getz        list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
1020812bb04eSRobin Getz        list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
1021812bb04eSRobin Getz
1022988c1340SPiyush Mishra        parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped);
1023812bb04eSRobin Getz    }
1024812bb04eSRobin Getz
1025812bb04eSRobin Getz    function _split($lines) {
1026c5982caaSDanny Lin        if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu',
1027812bb04eSRobin Getz             implode("\n", $lines), $m)) {
1028812bb04eSRobin Getz            return array(array(''), array(''));
1029812bb04eSRobin Getz        }
1030812bb04eSRobin Getz        return array($m[0], $m[1]);
1031812bb04eSRobin Getz    }
1032812bb04eSRobin Getz
1033812bb04eSRobin Getz    function inline() {
1034812bb04eSRobin Getz        $orig = new _HWLDF_WordAccumulator;
1035812bb04eSRobin Getz        foreach ($this->edits as $edit) {
1036812bb04eSRobin Getz            if ($edit->type == 'copy')
10374f2305cbSAdrian Lang                $orig->addWords($edit->closing);
1038812bb04eSRobin Getz            elseif ($edit->type == 'change'){
1039812bb04eSRobin Getz                $orig->addWords($edit->orig, 'del');
1040812bb04eSRobin Getz                $orig->addWords($edit->closing, 'add');
1041812bb04eSRobin Getz            } elseif ($edit->type == 'delete')
1042812bb04eSRobin Getz                $orig->addWords($edit->orig, 'del');
1043812bb04eSRobin Getz            elseif ($edit->type == 'add')
1044812bb04eSRobin Getz                $orig->addWords($edit->closing, 'add');
1045812bb04eSRobin Getz            elseif ($edit->orig)
1046812bb04eSRobin Getz                $orig->addWords($edit->orig, 'del');
1047812bb04eSRobin Getz        }
1048812bb04eSRobin Getz        return $orig->getLines();
1049812bb04eSRobin Getz    }
1050812bb04eSRobin Getz}
1051812bb04eSRobin Getz
1052f3f0262cSandi/**
1053f3f0262cSandi * "Unified" diff formatter.
1054f3f0262cSandi *
1055f3f0262cSandi * This class formats the diff in classic "unified diff" format.
1056df9752e9SChristopher Smith *
1057df9752e9SChristopher Smith * NOTE: output is plain text and unsafe for use in HTML without escaping.
1058f3f0262cSandi */
1059f5b78785SAndreas Gohrclass UnifiedDiffFormatter extends DiffFormatter {
1060f5b78785SAndreas Gohr
1061988c1340SPiyush Mishra    function __construct($context_lines = 4) {
1062f3f0262cSandi        $this->leading_context_lines = $context_lines;
1063f3f0262cSandi        $this->trailing_context_lines = $context_lines;
1064f3f0262cSandi    }
1065f3f0262cSandi
1066f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1067f3f0262cSandi        if ($xlen != 1)
1068f3f0262cSandi            $xbeg .= "," . $xlen;
1069f3f0262cSandi        if ($ylen != 1)
1070f3f0262cSandi            $ybeg .= "," . $ylen;
1071f3f0262cSandi        return "@@ -$xbeg +$ybeg @@\n";
1072f3f0262cSandi    }
1073f3f0262cSandi
1074f3f0262cSandi    function _added($lines) {
1075f3f0262cSandi        $this->_lines($lines, "+");
1076f3f0262cSandi    }
1077f3f0262cSandi    function _deleted($lines) {
1078f3f0262cSandi        $this->_lines($lines, "-");
1079f3f0262cSandi    }
1080f3f0262cSandi    function _changed($orig, $final) {
1081f3f0262cSandi        $this->_deleted($orig);
1082f3f0262cSandi        $this->_added($final);
1083f3f0262cSandi    }
1084f3f0262cSandi}
1085f3f0262cSandi
1086f3f0262cSandi/**
1087f3f0262cSandi *  Wikipedia Table style diff formatter.
1088f3f0262cSandi *
1089f3f0262cSandi */
1090f5b78785SAndreas Gohrclass TableDiffFormatter extends DiffFormatter {
10913a4ea35cSChristopher Smith    var $colspan = 2;
1092f5b78785SAndreas Gohr
1093988c1340SPiyush Mishra    function __construct() {
1094f3f0262cSandi        $this->leading_context_lines = 2;
1095f3f0262cSandi        $this->trailing_context_lines = 2;
1096f3f0262cSandi    }
1097f3f0262cSandi
109842ea7f44SGerrit Uitslag    /**
109942ea7f44SGerrit Uitslag     * @param Diff $diff
110042ea7f44SGerrit Uitslag     * @return string
110142ea7f44SGerrit Uitslag     */
11022d880650SAdrian Lang    function format($diff) {
11032d880650SAdrian Lang        // Preserve whitespaces by converting some to non-breaking spaces.
11042d880650SAdrian Lang        // Do not convert all of them to allow word-wrap.
11052d880650SAdrian Lang        $val = parent::format($diff);
1106e260f93bSAnika Henke        $val = str_replace('  ','&#160; ', $val);
1107e260f93bSAnika Henke        $val = preg_replace('/ (?=<)|(?<=[ >]) /', '&#160;', $val);
11082d880650SAdrian Lang        return $val;
11092d880650SAdrian Lang    }
11102d880650SAdrian Lang
1111f3f0262cSandi    function _pre($text){
1112f3f0262cSandi        $text = htmlspecialchars($text);
1113f3f0262cSandi        return $text;
1114f3f0262cSandi    }
1115f3f0262cSandi
1116f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1117f3f0262cSandi        global $lang;
1118f3f0262cSandi        $l1 = $lang['line'].' '.$xbeg;
1119f3f0262cSandi        $l2 = $lang['line'].' '.$ybeg;
11203a4ea35cSChristopher Smith        $r = '<tr><td '.HTMLDiff::css('diff-blockheader').' colspan="'.$this->colspan.'">'.$l1.":</td>\n".
11213a4ea35cSChristopher Smith             '<td '.HTMLDiff::css('diff-blockheader').' colspan="'.$this->colspan.'">'.$l2.":</td>\n".
11225048c277SAnika Henke             "</tr>\n";
1123f3f0262cSandi        return $r;
1124f3f0262cSandi    }
1125f3f0262cSandi
1126f3f0262cSandi    function _start_block($header) {
1127f3f0262cSandi        print($header);
1128f3f0262cSandi    }
1129f3f0262cSandi
1130f3f0262cSandi    function _end_block() {
1131f3f0262cSandi    }
1132f3f0262cSandi
1133f3f0262cSandi    function _lines($lines, $prefix=' ', $color="white") {
1134f3f0262cSandi    }
1135f3f0262cSandi
113660056e69SChristopher Smith    function addedLine($line,$escaped=false) {
113760056e69SChristopher Smith        if (!$escaped){
113860056e69SChristopher Smith            $line = $this->_escape($line);
113960056e69SChristopher Smith        }
1140f76724a4STom N Harris        return '<td '.HTMLDiff::css('diff-lineheader').'>+</td>'.
1141f76724a4STom N Harris               '<td '.HTMLDiff::css('diff-addedline').'>' .  $line.'</td>';
1142f3f0262cSandi    }
1143f3f0262cSandi
114460056e69SChristopher Smith    function deletedLine($line,$escaped=false) {
114560056e69SChristopher Smith        if (!$escaped){
114660056e69SChristopher Smith            $line = $this->_escape($line);
114760056e69SChristopher Smith        }
1148f76724a4STom N Harris        return '<td '.HTMLDiff::css('diff-lineheader').'>-</td>'.
1149f76724a4STom N Harris               '<td '.HTMLDiff::css('diff-deletedline').'>' .  $line.'</td>';
1150f3f0262cSandi    }
1151f3f0262cSandi
1152f3f0262cSandi    function emptyLine() {
11533a4ea35cSChristopher Smith        return '<td colspan="'.$this->colspan.'">&#160;</td>';
1154f3f0262cSandi    }
1155f3f0262cSandi
1156f3f0262cSandi    function contextLine($line) {
1157f76724a4STom N Harris        return '<td '.HTMLDiff::css('diff-lineheader').'>&#160;</td>'.
1158333ef4f3SChristopher Smith               '<td '.HTMLDiff::css('diff-context').'>'.$this->_escape($line).'</td>';
1159f3f0262cSandi    }
1160f3f0262cSandi
1161f3f0262cSandi    function _added($lines) {
116260056e69SChristopher Smith        $this->_addedLines($lines,false);
116360056e69SChristopher Smith    }
116460056e69SChristopher Smith
116560056e69SChristopher Smith    function _addedLines($lines,$escaped=false){
1166f3f0262cSandi        foreach ($lines as $line) {
116760056e69SChristopher Smith            print('<tr>' . $this->emptyLine() . $this->addedLine($line,$escaped) . "</tr>\n");
1168f3f0262cSandi        }
1169f3f0262cSandi    }
1170f3f0262cSandi
1171f3f0262cSandi    function _deleted($lines) {
1172f3f0262cSandi        foreach ($lines as $line) {
11737deca91bSRobin Getz            print('<tr>' . $this->deletedLine($line) . $this->emptyLine() . "</tr>\n");
1174f3f0262cSandi        }
1175f3f0262cSandi    }
1176f3f0262cSandi
1177f3f0262cSandi    function _context($lines) {
1178f3f0262cSandi        foreach ($lines as $line) {
11797deca91bSRobin Getz            print('<tr>' . $this->contextLine($line) .  $this->contextLine($line) . "</tr>\n");
1180f3f0262cSandi        }
1181f3f0262cSandi    }
1182f3f0262cSandi
1183f3f0262cSandi    function _changed($orig, $closing) {
118460056e69SChristopher Smith        $diff = new WordLevelDiff($orig, $closing);  // this escapes the diff data
1185f3f0262cSandi        $del = $diff->orig();
1186f3f0262cSandi        $add = $diff->closing();
1187f3f0262cSandi
1188f3f0262cSandi        while ($line = array_shift($del)) {
1189f3f0262cSandi            $aline = array_shift($add);
119060056e69SChristopher Smith            print('<tr>' . $this->deletedLine($line,true) . $this->addedLine($aline,true) . "</tr>\n");
1191f3f0262cSandi        }
119260056e69SChristopher Smith        $this->_addedLines($add,true); # If any leftovers
119360056e69SChristopher Smith    }
119460056e69SChristopher Smith
119560056e69SChristopher Smith    function _escape($str) {
119660056e69SChristopher Smith        return hsc($str);
1197f3f0262cSandi    }
1198f3f0262cSandi}
1199f3f0262cSandi
1200812bb04eSRobin Getz/**
1201812bb04eSRobin Getz *  Inline style diff formatter.
1202812bb04eSRobin Getz *
1203812bb04eSRobin Getz */
1204812bb04eSRobin Getzclass InlineDiffFormatter extends DiffFormatter {
1205f76724a4STom N Harris    var $colspan = 2;
1206988c1340SPiyush Mishra
1207988c1340SPiyush Mishra    function __construct() {
1208812bb04eSRobin Getz        $this->leading_context_lines = 2;
1209812bb04eSRobin Getz        $this->trailing_context_lines = 2;
1210812bb04eSRobin Getz    }
1211812bb04eSRobin Getz
121242ea7f44SGerrit Uitslag    /**
121342ea7f44SGerrit Uitslag     * @param Diff $diff
121442ea7f44SGerrit Uitslag     * @return string
121542ea7f44SGerrit Uitslag     */
1216812bb04eSRobin Getz    function format($diff) {
1217812bb04eSRobin Getz        // Preserve whitespaces by converting some to non-breaking spaces.
1218812bb04eSRobin Getz        // Do not convert all of them to allow word-wrap.
1219812bb04eSRobin Getz        $val = parent::format($diff);
1220e260f93bSAnika Henke        $val = str_replace('  ','&#160; ', $val);
1221e260f93bSAnika Henke        $val = preg_replace('/ (?=<)|(?<=[ >]) /', '&#160;', $val);
1222812bb04eSRobin Getz        return $val;
1223812bb04eSRobin Getz    }
1224812bb04eSRobin Getz
1225812bb04eSRobin Getz    function _pre($text){
1226812bb04eSRobin Getz        $text = htmlspecialchars($text);
1227812bb04eSRobin Getz        return $text;
1228812bb04eSRobin Getz    }
1229812bb04eSRobin Getz
1230812bb04eSRobin Getz    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1231812bb04eSRobin Getz        global $lang;
1232812bb04eSRobin Getz        if ($xlen != 1)
1233812bb04eSRobin Getz            $xbeg .= "," . $xlen;
1234812bb04eSRobin Getz        if ($ylen != 1)
1235812bb04eSRobin Getz            $ybeg .= "," . $ylen;
12363a4ea35cSChristopher Smith        $r = '<tr><td colspan="'.$this->colspan.'" '.HTMLDiff::css('diff-blockheader').'>@@ '.$lang['line']." -$xbeg +$ybeg @@";
123747a906eaSAndreas Gohr        $r .= ' <span '.HTMLDiff::css('diff-deletedline').'><del>'.$lang['deleted'].'</del></span>';
123847a906eaSAndreas Gohr        $r .= ' <span '.HTMLDiff::css('diff-addedline').'>'.$lang['created'].'</span>';
1239812bb04eSRobin Getz        $r .= "</td></tr>\n";
1240812bb04eSRobin Getz        return $r;
1241812bb04eSRobin Getz    }
1242812bb04eSRobin Getz
1243812bb04eSRobin Getz    function _start_block($header) {
1244812bb04eSRobin Getz        print($header."\n");
1245812bb04eSRobin Getz    }
1246812bb04eSRobin Getz
1247812bb04eSRobin Getz    function _end_block() {
1248812bb04eSRobin Getz    }
1249812bb04eSRobin Getz
1250812bb04eSRobin Getz    function _lines($lines, $prefix=' ', $color="white") {
1251812bb04eSRobin Getz    }
1252812bb04eSRobin Getz
1253812bb04eSRobin Getz    function _added($lines) {
1254812bb04eSRobin Getz        foreach ($lines as $line) {
1255333ef4f3SChristopher Smith            print('<tr><td '.HTMLDiff::css('diff-lineheader').'>&#160;</td><td '.HTMLDiff::css('diff-addedline').'>'. $this->_escape($line) . "</td></tr>\n");
1256812bb04eSRobin Getz        }
1257812bb04eSRobin Getz    }
1258812bb04eSRobin Getz
1259812bb04eSRobin Getz    function _deleted($lines) {
1260812bb04eSRobin Getz        foreach ($lines as $line) {
1261333ef4f3SChristopher Smith            print('<tr><td '.HTMLDiff::css('diff-lineheader').'>&#160;</td><td '.HTMLDiff::css('diff-deletedline').'><del>' . $this->_escape($line) . "</del></td></tr>\n");
1262812bb04eSRobin Getz        }
1263812bb04eSRobin Getz    }
1264812bb04eSRobin Getz
1265812bb04eSRobin Getz    function _context($lines) {
1266812bb04eSRobin Getz        foreach ($lines as $line) {
1267333ef4f3SChristopher Smith            print('<tr><td '.HTMLDiff::css('diff-lineheader').'>&#160;</td><td '.HTMLDiff::css('diff-context').'>'. $this->_escape($line) ."</td></tr>\n");
1268812bb04eSRobin Getz        }
1269812bb04eSRobin Getz    }
1270812bb04eSRobin Getz
1271812bb04eSRobin Getz    function _changed($orig, $closing) {
127260056e69SChristopher Smith        $diff = new InlineWordLevelDiff($orig, $closing);  // this escapes the diff data
1273812bb04eSRobin Getz        $add = $diff->inline();
1274812bb04eSRobin Getz
1275812bb04eSRobin Getz        foreach ($add as $line)
1276a69506c5STom N Harris            print('<tr><td '.HTMLDiff::css('diff-lineheader').'>&#160;</td><td>'.$line."</td></tr>\n");
1277812bb04eSRobin Getz    }
127860056e69SChristopher Smith
127960056e69SChristopher Smith    function _escape($str) {
128060056e69SChristopher Smith        return hsc($str);
128160056e69SChristopher Smith    }
1282812bb04eSRobin Getz}
1283812bb04eSRobin Getz
1284a297e675SAndreas Gohr/**
1285a297e675SAndreas Gohr * A class for computing three way diffs.
1286a297e675SAndreas Gohr *
1287a297e675SAndreas Gohr * @author  Geoffrey T. Dairiki <dairiki@dairiki.org>
1288a297e675SAndreas Gohr */
1289a297e675SAndreas Gohrclass Diff3 extends Diff {
1290a297e675SAndreas Gohr
1291a297e675SAndreas Gohr    /**
1292a297e675SAndreas Gohr     * Conflict counter.
1293a297e675SAndreas Gohr     *
1294a297e675SAndreas Gohr     * @var integer
1295a297e675SAndreas Gohr     */
1296a297e675SAndreas Gohr    var $_conflictingBlocks = 0;
1297a297e675SAndreas Gohr
1298a297e675SAndreas Gohr    /**
1299a297e675SAndreas Gohr     * Computes diff between 3 sequences of strings.
1300a297e675SAndreas Gohr     *
1301a297e675SAndreas Gohr     * @param array $orig    The original lines to use.
1302a297e675SAndreas Gohr     * @param array $final1  The first version to compare to.
1303a297e675SAndreas Gohr     * @param array $final2  The second version to compare to.
1304a297e675SAndreas Gohr     */
1305a297e675SAndreas Gohr    function __construct($orig, $final1, $final2) {
1306a297e675SAndreas Gohr        $engine = new _DiffEngine();
1307a297e675SAndreas Gohr
1308a297e675SAndreas Gohr        $this->_edits = $this->_diff3($engine->diff($orig, $final1),
1309a297e675SAndreas Gohr                                      $engine->diff($orig, $final2));
1310a297e675SAndreas Gohr    }
1311a297e675SAndreas Gohr
1312a297e675SAndreas Gohr    /**
1313a297e675SAndreas Gohr     * Returns the merged lines
1314a297e675SAndreas Gohr     *
1315a297e675SAndreas Gohr     * @param string $label1  label for first version
1316a297e675SAndreas Gohr     * @param string $label2  label for second version
1317*34df7cb0SAndreas Gohr     * @param string $label3  separator between versions
1318a297e675SAndreas Gohr     * @return array          lines of the merged text
1319a297e675SAndreas Gohr     */
1320*34df7cb0SAndreas Gohr    function mergedOutput($label1='<<<<<<<',$label2='>>>>>>>',$label3='=======') {
1321a297e675SAndreas Gohr        $lines = array();
1322a297e675SAndreas Gohr        foreach ($this->_edits as $edit) {
1323a297e675SAndreas Gohr            if ($edit->isConflict()) {
1324a297e675SAndreas Gohr                /* FIXME: this should probably be moved somewhere else. */
1325a297e675SAndreas Gohr                $lines = array_merge($lines,
1326*34df7cb0SAndreas Gohr                                     array($label1),
1327a297e675SAndreas Gohr                                     $edit->final1,
1328*34df7cb0SAndreas Gohr                                     array($label3),
1329a297e675SAndreas Gohr                                     $edit->final2,
1330*34df7cb0SAndreas Gohr                                     array($label2));
1331a297e675SAndreas Gohr                $this->_conflictingBlocks++;
1332a297e675SAndreas Gohr            } else {
1333a297e675SAndreas Gohr                $lines = array_merge($lines, $edit->merged());
1334a297e675SAndreas Gohr            }
1335a297e675SAndreas Gohr        }
1336a297e675SAndreas Gohr
1337a297e675SAndreas Gohr        return $lines;
1338a297e675SAndreas Gohr    }
1339a297e675SAndreas Gohr
1340a297e675SAndreas Gohr    /**
1341a297e675SAndreas Gohr     * @access private
1342a297e675SAndreas Gohr     */
1343a297e675SAndreas Gohr    function _diff3($edits1, $edits2) {
1344a297e675SAndreas Gohr        $edits = array();
1345a297e675SAndreas Gohr        $bb = new _Diff3_BlockBuilder();
1346a297e675SAndreas Gohr
1347a297e675SAndreas Gohr        $e1 = current($edits1);
1348a297e675SAndreas Gohr        $e2 = current($edits2);
1349a297e675SAndreas Gohr        while ($e1 || $e2) {
1350a297e675SAndreas Gohr            if ($e1 && $e2 && is_a($e1, '_DiffOp_copy') && is_a($e2, '_DiffOp_copy')) {
1351a297e675SAndreas Gohr                /* We have copy blocks from both diffs. This is the (only)
1352a297e675SAndreas Gohr                 * time we want to emit a diff3 copy block.  Flush current
1353a297e675SAndreas Gohr                 * diff3 diff block, if any. */
1354a297e675SAndreas Gohr                if ($edit = $bb->finish()) {
1355a297e675SAndreas Gohr                    $edits[] = $edit;
1356a297e675SAndreas Gohr                }
1357a297e675SAndreas Gohr
1358a297e675SAndreas Gohr                $ncopy = min($e1->norig(), $e2->norig());
1359a297e675SAndreas Gohr                assert($ncopy > 0);
1360a297e675SAndreas Gohr                $edits[] = new _Diff3_Op_copy(array_slice($e1->orig, 0, $ncopy));
1361a297e675SAndreas Gohr
1362a297e675SAndreas Gohr                if ($e1->norig() > $ncopy) {
1363a297e675SAndreas Gohr                    array_splice($e1->orig, 0, $ncopy);
1364a297e675SAndreas Gohr                    array_splice($e1->closing, 0, $ncopy);
1365a297e675SAndreas Gohr                } else {
1366a297e675SAndreas Gohr                    $e1 = next($edits1);
1367a297e675SAndreas Gohr                }
1368a297e675SAndreas Gohr
1369a297e675SAndreas Gohr                if ($e2->norig() > $ncopy) {
1370a297e675SAndreas Gohr                    array_splice($e2->orig, 0, $ncopy);
1371a297e675SAndreas Gohr                    array_splice($e2->closing, 0, $ncopy);
1372a297e675SAndreas Gohr                } else {
1373a297e675SAndreas Gohr                    $e2 = next($edits2);
1374a297e675SAndreas Gohr                }
1375a297e675SAndreas Gohr            } else {
1376a297e675SAndreas Gohr                if ($e1 && $e2) {
1377a297e675SAndreas Gohr                    if ($e1->orig && $e2->orig) {
1378a297e675SAndreas Gohr                        $norig = min($e1->norig(), $e2->norig());
1379a297e675SAndreas Gohr                        $orig = array_splice($e1->orig, 0, $norig);
1380a297e675SAndreas Gohr                        array_splice($e2->orig, 0, $norig);
1381a297e675SAndreas Gohr                        $bb->input($orig);
1382a297e675SAndreas Gohr                    }
1383a297e675SAndreas Gohr
1384a297e675SAndreas Gohr                    if (is_a($e1, '_DiffOp_copy')) {
1385a297e675SAndreas Gohr                        $bb->out1(array_splice($e1->closing, 0, $norig));
1386a297e675SAndreas Gohr                    }
1387a297e675SAndreas Gohr
1388a297e675SAndreas Gohr                    if (is_a($e2, '_DiffOp_copy')) {
1389a297e675SAndreas Gohr                        $bb->out2(array_splice($e2->closing, 0, $norig));
1390a297e675SAndreas Gohr                    }
1391a297e675SAndreas Gohr                }
1392a297e675SAndreas Gohr
1393a297e675SAndreas Gohr                if ($e1 && ! $e1->orig) {
1394a297e675SAndreas Gohr                    $bb->out1($e1->closing);
1395a297e675SAndreas Gohr                    $e1 = next($edits1);
1396a297e675SAndreas Gohr                }
1397a297e675SAndreas Gohr                if ($e2 && ! $e2->orig) {
1398a297e675SAndreas Gohr                    $bb->out2($e2->closing);
1399a297e675SAndreas Gohr                    $e2 = next($edits2);
1400a297e675SAndreas Gohr                }
1401a297e675SAndreas Gohr            }
1402a297e675SAndreas Gohr        }
1403a297e675SAndreas Gohr
1404a297e675SAndreas Gohr        if ($edit = $bb->finish()) {
1405a297e675SAndreas Gohr            $edits[] = $edit;
1406a297e675SAndreas Gohr        }
1407a297e675SAndreas Gohr
1408a297e675SAndreas Gohr        return $edits;
1409a297e675SAndreas Gohr    }
1410a297e675SAndreas Gohr}
1411a297e675SAndreas Gohr
1412a297e675SAndreas Gohr/**
1413a297e675SAndreas Gohr * @author  Geoffrey T. Dairiki <dairiki@dairiki.org>
1414a297e675SAndreas Gohr *
1415a297e675SAndreas Gohr * @access private
1416a297e675SAndreas Gohr */
1417a297e675SAndreas Gohrclass _Diff3_Op {
1418a297e675SAndreas Gohr
1419a297e675SAndreas Gohr    function __construct($orig = false, $final1 = false, $final2 = false) {
1420a297e675SAndreas Gohr        $this->orig = $orig ? $orig : array();
1421a297e675SAndreas Gohr        $this->final1 = $final1 ? $final1 : array();
1422a297e675SAndreas Gohr        $this->final2 = $final2 ? $final2 : array();
1423a297e675SAndreas Gohr    }
1424a297e675SAndreas Gohr
1425a297e675SAndreas Gohr    function merged() {
1426a297e675SAndreas Gohr        if (!isset($this->_merged)) {
1427a297e675SAndreas Gohr            if ($this->final1 === $this->final2) {
1428a297e675SAndreas Gohr                $this->_merged = &$this->final1;
1429a297e675SAndreas Gohr            } elseif ($this->final1 === $this->orig) {
1430a297e675SAndreas Gohr                $this->_merged = &$this->final2;
1431a297e675SAndreas Gohr            } elseif ($this->final2 === $this->orig) {
1432a297e675SAndreas Gohr                $this->_merged = &$this->final1;
1433a297e675SAndreas Gohr            } else {
1434a297e675SAndreas Gohr                $this->_merged = false;
1435a297e675SAndreas Gohr            }
1436a297e675SAndreas Gohr        }
1437a297e675SAndreas Gohr
1438a297e675SAndreas Gohr        return $this->_merged;
1439a297e675SAndreas Gohr    }
1440a297e675SAndreas Gohr
1441a297e675SAndreas Gohr    function isConflict() {
1442a297e675SAndreas Gohr        return $this->merged() === false;
1443a297e675SAndreas Gohr    }
1444a297e675SAndreas Gohr
1445a297e675SAndreas Gohr}
1446a297e675SAndreas Gohr
1447a297e675SAndreas Gohr/**
1448a297e675SAndreas Gohr * @author  Geoffrey T. Dairiki <dairiki@dairiki.org>
1449a297e675SAndreas Gohr *
1450a297e675SAndreas Gohr * @access private
1451a297e675SAndreas Gohr */
1452a297e675SAndreas Gohrclass _Diff3_Op_copy extends _Diff3_Op {
1453a297e675SAndreas Gohr
1454a297e675SAndreas Gohr    function __construct($lines = false) {
1455a297e675SAndreas Gohr        $this->orig = $lines ? $lines : array();
1456a297e675SAndreas Gohr        $this->final1 = &$this->orig;
1457a297e675SAndreas Gohr        $this->final2 = &$this->orig;
1458a297e675SAndreas Gohr    }
1459a297e675SAndreas Gohr
1460a297e675SAndreas Gohr    function merged() {
1461a297e675SAndreas Gohr        return $this->orig;
1462a297e675SAndreas Gohr    }
1463a297e675SAndreas Gohr
1464a297e675SAndreas Gohr    function isConflict() {
1465a297e675SAndreas Gohr        return false;
1466a297e675SAndreas Gohr    }
1467a297e675SAndreas Gohr}
1468a297e675SAndreas Gohr
1469a297e675SAndreas Gohr/**
1470a297e675SAndreas Gohr * @author  Geoffrey T. Dairiki <dairiki@dairiki.org>
1471a297e675SAndreas Gohr *
1472a297e675SAndreas Gohr * @access private
1473a297e675SAndreas Gohr */
1474a297e675SAndreas Gohrclass _Diff3_BlockBuilder {
1475a297e675SAndreas Gohr
1476a297e675SAndreas Gohr    function __construct() {
1477a297e675SAndreas Gohr        $this->_init();
1478a297e675SAndreas Gohr    }
1479a297e675SAndreas Gohr
1480a297e675SAndreas Gohr    function input($lines) {
1481a297e675SAndreas Gohr        if ($lines) {
1482a297e675SAndreas Gohr            $this->_append($this->orig, $lines);
1483a297e675SAndreas Gohr        }
1484a297e675SAndreas Gohr    }
1485a297e675SAndreas Gohr
1486a297e675SAndreas Gohr    function out1($lines) {
1487a297e675SAndreas Gohr        if ($lines) {
1488a297e675SAndreas Gohr            $this->_append($this->final1, $lines);
1489a297e675SAndreas Gohr        }
1490a297e675SAndreas Gohr    }
1491a297e675SAndreas Gohr
1492a297e675SAndreas Gohr    function out2($lines) {
1493a297e675SAndreas Gohr        if ($lines) {
1494a297e675SAndreas Gohr            $this->_append($this->final2, $lines);
1495a297e675SAndreas Gohr        }
1496a297e675SAndreas Gohr    }
1497a297e675SAndreas Gohr
1498a297e675SAndreas Gohr    function isEmpty() {
1499a297e675SAndreas Gohr        return !$this->orig && !$this->final1 && !$this->final2;
1500a297e675SAndreas Gohr    }
1501a297e675SAndreas Gohr
1502a297e675SAndreas Gohr    function finish() {
1503a297e675SAndreas Gohr        if ($this->isEmpty()) {
1504a297e675SAndreas Gohr            return false;
1505a297e675SAndreas Gohr        } else {
1506a297e675SAndreas Gohr            $edit = new _Diff3_Op($this->orig, $this->final1, $this->final2);
1507a297e675SAndreas Gohr            $this->_init();
1508a297e675SAndreas Gohr            return $edit;
1509a297e675SAndreas Gohr        }
1510a297e675SAndreas Gohr    }
1511a297e675SAndreas Gohr
1512a297e675SAndreas Gohr    function _init() {
1513a297e675SAndreas Gohr        $this->orig = $this->final1 = $this->final2 = array();
1514a297e675SAndreas Gohr    }
1515a297e675SAndreas Gohr
1516a297e675SAndreas Gohr    function _append(&$array, $lines) {
1517a297e675SAndreas Gohr        array_splice($array, sizeof($array), 0, $lines);
1518a297e675SAndreas Gohr    }
1519a297e675SAndreas Gohr}
1520340756e4Sandi
1521e3776c06SMichael Hamann//Setup VIM: ex: et ts=4 :
1522