xref: /dokuwiki/inc/DifferenceEngine.php (revision fc55d0b2e6b8eb4249d184d2b332f6771347bdfd)
1f3f0262cSandi<?php
215fae107Sandi/**
315fae107Sandi * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
415fae107Sandi *
515fae107Sandi * Additions by Axel Boldt for MediaWiki
615fae107Sandi *
715fae107Sandi * @copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
815fae107Sandi * @license  You may copy this code freely under the conditions of the GPL.
915fae107Sandi */
10f3f0262cSandidefine('USE_ASSERTS', function_exists('assert'));
11f3f0262cSandi
12f3f0262cSandiclass _DiffOp {
13f3f0262cSandi    var $type;
14f3f0262cSandi    var $orig;
15f3f0262cSandi    var $closing;
16f3f0262cSandi
1742ea7f44SGerrit Uitslag    /**
1842ea7f44SGerrit Uitslag     * @return _DiffOp
1942ea7f44SGerrit Uitslag     */
20f3f0262cSandi    function reverse() {
21f3f0262cSandi        trigger_error("pure virtual", E_USER_ERROR);
22f3f0262cSandi    }
23f3f0262cSandi
24f3f0262cSandi    function norig() {
25f5b78785SAndreas Gohr        return $this->orig ? count($this->orig) : 0;
26f3f0262cSandi    }
27f3f0262cSandi
28f3f0262cSandi    function nclosing() {
29f5b78785SAndreas Gohr        return $this->closing ? count($this->closing) : 0;
30f3f0262cSandi    }
31f3f0262cSandi}
32f3f0262cSandi
33f3f0262cSandiclass _DiffOp_Copy extends _DiffOp {
34f3f0262cSandi    var $type = 'copy';
35988c1340SPiyush Mishra
36988c1340SPiyush Mishra    function __construct($orig, $closing = false) {
37f3f0262cSandi        if (!is_array($closing))
38f3f0262cSandi            $closing = $orig;
39f3f0262cSandi        $this->orig = $orig;
40f3f0262cSandi        $this->closing = $closing;
41f3f0262cSandi    }
42f3f0262cSandi
43f3f0262cSandi    function reverse() {
44f3f0262cSandi        return new _DiffOp_Copy($this->closing, $this->orig);
45f3f0262cSandi    }
46f3f0262cSandi}
47f3f0262cSandi
48f3f0262cSandiclass _DiffOp_Delete extends _DiffOp {
49f3f0262cSandi    var $type = 'delete';
50988c1340SPiyush Mishra
51988c1340SPiyush Mishra    function __construct($lines) {
52f3f0262cSandi        $this->orig = $lines;
53f3f0262cSandi        $this->closing = false;
54f3f0262cSandi    }
55f3f0262cSandi
56f3f0262cSandi    function reverse() {
57f3f0262cSandi        return new _DiffOp_Add($this->orig);
58f3f0262cSandi    }
59f3f0262cSandi}
60f3f0262cSandi
61f3f0262cSandiclass _DiffOp_Add extends _DiffOp {
62f3f0262cSandi    var $type = 'add';
63988c1340SPiyush Mishra
64988c1340SPiyush Mishra    function __construct($lines) {
65f3f0262cSandi        $this->closing = $lines;
66f3f0262cSandi        $this->orig = false;
67f3f0262cSandi    }
68f3f0262cSandi
69f3f0262cSandi    function reverse() {
70f3f0262cSandi        return new _DiffOp_Delete($this->closing);
71f3f0262cSandi    }
72f3f0262cSandi}
73f3f0262cSandi
74f3f0262cSandiclass _DiffOp_Change extends _DiffOp {
75f3f0262cSandi    var $type = 'change';
76988c1340SPiyush Mishra
77988c1340SPiyush Mishra    function __construct($orig, $closing) {
78f3f0262cSandi        $this->orig = $orig;
79f3f0262cSandi        $this->closing = $closing;
80f3f0262cSandi    }
81f3f0262cSandi
82f3f0262cSandi    function reverse() {
83f3f0262cSandi        return new _DiffOp_Change($this->closing, $this->orig);
84f3f0262cSandi    }
85f3f0262cSandi}
86f3f0262cSandi
87f3f0262cSandi
88f3f0262cSandi/**
89f3f0262cSandi * Class used internally by Diff to actually compute the diffs.
90f3f0262cSandi *
91f3f0262cSandi * The algorithm used here is mostly lifted from the perl module
92f3f0262cSandi * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
93f3f0262cSandi *   http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
94f3f0262cSandi *
95f3f0262cSandi * More ideas are taken from:
96f3f0262cSandi *   http://www.ics.uci.edu/~eppstein/161/960229.html
97f3f0262cSandi *
98f3f0262cSandi * Some ideas are (and a bit of code) are from from analyze.c, from GNU
99f3f0262cSandi * diffutils-2.7, which can be found at:
100f3f0262cSandi *   ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
101f3f0262cSandi *
102f3f0262cSandi * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
103f3f0262cSandi * are my own.
104f3f0262cSandi *
105f3f0262cSandi * @author Geoffrey T. Dairiki
106f3f0262cSandi * @access private
107f3f0262cSandi */
108f5b78785SAndreas Gohrclass _DiffEngine {
109f5b78785SAndreas Gohr
11042ea7f44SGerrit Uitslag    var $xchanged = array();
11142ea7f44SGerrit Uitslag    var $ychanged = array();
11242ea7f44SGerrit Uitslag    var $xv = array();
11342ea7f44SGerrit Uitslag    var $yv = array();
11442ea7f44SGerrit Uitslag    var $xind = array();
11542ea7f44SGerrit Uitslag    var $yind = array();
11642ea7f44SGerrit Uitslag    var $seq;
11742ea7f44SGerrit Uitslag    var $in_seq;
11842ea7f44SGerrit Uitslag    var $lcs;
11942ea7f44SGerrit Uitslag
12042ea7f44SGerrit Uitslag    /**
12142ea7f44SGerrit Uitslag     * @param array $from_lines
12242ea7f44SGerrit Uitslag     * @param array $to_lines
12342ea7f44SGerrit Uitslag     * @return _DiffOp[]
12442ea7f44SGerrit Uitslag     */
125f3f0262cSandi    function diff($from_lines, $to_lines) {
126f5b78785SAndreas Gohr        $n_from = count($from_lines);
127f5b78785SAndreas Gohr        $n_to = count($to_lines);
128f3f0262cSandi
129f3f0262cSandi        $this->xchanged = $this->ychanged = array();
130f3f0262cSandi        $this->xv = $this->yv = array();
131f3f0262cSandi        $this->xind = $this->yind = array();
132f3f0262cSandi        unset($this->seq);
133f3f0262cSandi        unset($this->in_seq);
134f3f0262cSandi        unset($this->lcs);
135f3f0262cSandi
136f3f0262cSandi        // Skip leading common lines.
137f3f0262cSandi        for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
138f3f0262cSandi            if ($from_lines[$skip] != $to_lines[$skip])
139f3f0262cSandi                break;
140f3f0262cSandi            $this->xchanged[$skip] = $this->ychanged[$skip] = false;
141f3f0262cSandi        }
142f3f0262cSandi        // Skip trailing common lines.
143f5b78785SAndreas Gohr        $xi = $n_from;
144f5b78785SAndreas Gohr        $yi = $n_to;
145f3f0262cSandi        for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
146f3f0262cSandi            if ($from_lines[$xi] != $to_lines[$yi])
147f3f0262cSandi                break;
148f3f0262cSandi            $this->xchanged[$xi] = $this->ychanged[$yi] = false;
149f3f0262cSandi        }
150f3f0262cSandi
151f3f0262cSandi        // Ignore lines which do not exist in both files.
152f3f0262cSandi        for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
153f3f0262cSandi            $xhash[$from_lines[$xi]] = 1;
154f3f0262cSandi        for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
155f3f0262cSandi            $line = $to_lines[$yi];
156f3f0262cSandi            if (($this->ychanged[$yi] = empty($xhash[$line])))
157f3f0262cSandi                continue;
158f3f0262cSandi            $yhash[$line] = 1;
159f3f0262cSandi            $this->yv[] = $line;
160f3f0262cSandi            $this->yind[] = $yi;
161f3f0262cSandi        }
162f3f0262cSandi        for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
163f3f0262cSandi            $line = $from_lines[$xi];
164f3f0262cSandi            if (($this->xchanged[$xi] = empty($yhash[$line])))
165f3f0262cSandi                continue;
166f3f0262cSandi            $this->xv[] = $line;
167f3f0262cSandi            $this->xind[] = $xi;
168f3f0262cSandi        }
169f3f0262cSandi
170f3f0262cSandi        // Find the LCS.
171f5b78785SAndreas Gohr        $this->_compareseq(0, count($this->xv), 0, count($this->yv));
172f3f0262cSandi
173f3f0262cSandi        // Merge edits when possible
174f3f0262cSandi        $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
175f3f0262cSandi        $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
176f3f0262cSandi
177f3f0262cSandi        // Compute the edit operations.
178f3f0262cSandi        $edits = array();
179f3f0262cSandi        $xi = $yi = 0;
180f3f0262cSandi        while ($xi < $n_from || $yi < $n_to) {
181f3f0262cSandi            USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
182f3f0262cSandi            USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
183f3f0262cSandi
184f3f0262cSandi            // Skip matching "snake".
185f3f0262cSandi            $copy = array();
1867deca91bSRobin Getz            while ($xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
187f3f0262cSandi                $copy[] = $from_lines[$xi++];
188f3f0262cSandi                ++$yi;
189f3f0262cSandi            }
190f3f0262cSandi            if ($copy)
191f3f0262cSandi                $edits[] = new _DiffOp_Copy($copy);
192f3f0262cSandi
193f3f0262cSandi            // Find deletes & adds.
194f3f0262cSandi            $delete = array();
195f3f0262cSandi            while ($xi < $n_from && $this->xchanged[$xi])
196f3f0262cSandi                $delete[] = $from_lines[$xi++];
197f3f0262cSandi
198f3f0262cSandi            $add = array();
199f3f0262cSandi            while ($yi < $n_to && $this->ychanged[$yi])
200f3f0262cSandi                $add[] = $to_lines[$yi++];
201f3f0262cSandi
202f3f0262cSandi            if ($delete && $add)
203f3f0262cSandi                $edits[] = new _DiffOp_Change($delete, $add);
204f3f0262cSandi            elseif ($delete)
205f3f0262cSandi                $edits[] = new _DiffOp_Delete($delete);
206f3f0262cSandi            elseif ($add)
207f3f0262cSandi                $edits[] = new _DiffOp_Add($add);
208f3f0262cSandi        }
209f3f0262cSandi        return $edits;
210f3f0262cSandi    }
211f3f0262cSandi
212f3f0262cSandi
21315fae107Sandi    /**
21415fae107Sandi     * Divide the Largest Common Subsequence (LCS) of the sequences
215f3f0262cSandi     * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
216f3f0262cSandi     * sized segments.
217f3f0262cSandi     *
218f3f0262cSandi     * Returns (LCS, PTS).  LCS is the length of the LCS. PTS is an
219f3f0262cSandi     * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
220f3f0262cSandi     * sub sequences.  The first sub-sequence is contained in [X0, X1),
221f3f0262cSandi     * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
222f3f0262cSandi     * that (X0, Y0) == (XOFF, YOFF) and
223f3f0262cSandi     * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
224f3f0262cSandi     *
225f3f0262cSandi     * This function assumes that the first lines of the specified portions
226f3f0262cSandi     * of the two files do not match, and likewise that the last lines do not
227f3f0262cSandi     * match.  The caller must trim matching lines from the beginning and end
228f3f0262cSandi     * of the portions it is going to specify.
229f50a239bSTakamura     *
230f50a239bSTakamura     * @param integer $xoff
231f50a239bSTakamura     * @param integer $xlim
232f50a239bSTakamura     * @param integer $yoff
233f50a239bSTakamura     * @param integer $ylim
234f50a239bSTakamura     * @param integer $nchunks
235f50a239bSTakamura     *
236f50a239bSTakamura     * @return array
237f3f0262cSandi     */
238f3f0262cSandi    function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) {
239f3f0262cSandi        $flip = false;
240f3f0262cSandi
241f3f0262cSandi        if ($xlim - $xoff > $ylim - $yoff) {
242f3f0262cSandi            // Things seems faster (I'm not sure I understand why)
243f3f0262cSandi            // when the shortest sequence in X.
244f3f0262cSandi            $flip = true;
2457deca91bSRobin Getz            list ($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
246f3f0262cSandi        }
247f3f0262cSandi
248f3f0262cSandi        if ($flip)
249f3f0262cSandi            for ($i = $ylim - 1; $i >= $yoff; $i--)
250f3f0262cSandi                $ymatches[$this->xv[$i]][] = $i;
251f3f0262cSandi        else
252f3f0262cSandi            for ($i = $ylim - 1; $i >= $yoff; $i--)
253f3f0262cSandi                $ymatches[$this->yv[$i]][] = $i;
254f3f0262cSandi
255f3f0262cSandi        $this->lcs = 0;
256f3f0262cSandi        $this->seq[0]= $yoff - 1;
257f3f0262cSandi        $this->in_seq = array();
258f3f0262cSandi        $ymids[0] = array();
259f3f0262cSandi
260f3f0262cSandi        $numer = $xlim - $xoff + $nchunks - 1;
261f3f0262cSandi        $x = $xoff;
262f3f0262cSandi        for ($chunk = 0; $chunk < $nchunks; $chunk++) {
263f3f0262cSandi            if ($chunk > 0)
264f3f0262cSandi                for ($i = 0; $i <= $this->lcs; $i++)
265f3f0262cSandi                    $ymids[$i][$chunk-1] = $this->seq[$i];
266f3f0262cSandi
267f3f0262cSandi            $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
268f3f0262cSandi            for ( ; $x < $x1; $x++) {
269f3f0262cSandi                $line = $flip ? $this->yv[$x] : $this->xv[$x];
270f3f0262cSandi                if (empty($ymatches[$line]))
271f3f0262cSandi                    continue;
272f3f0262cSandi                $matches = $ymatches[$line];
273dd5c3e2bSPhy                $switch = false;
274dd5c3e2bSPhy                foreach ($matches as $y) {
275d3b71b9dSPhy                    if ($switch && $y > $this->seq[$k-1]) {
276f3f0262cSandi                        USE_ASSERTS && assert($y < $this->seq[$k]);
277f3f0262cSandi                        // Optimization: this is a common case:
278f3f0262cSandi                        //  next match is just replacing previous match.
279f3f0262cSandi                        $this->in_seq[$this->seq[$k]] = false;
280f3f0262cSandi                        $this->seq[$k] = $y;
281f3f0262cSandi                        $this->in_seq[$y] = 1;
282f3f0262cSandi                    }
283f3f0262cSandi                    else if (empty($this->in_seq[$y])) {
284f3f0262cSandi                        $k = $this->_lcs_pos($y);
285f3f0262cSandi                        USE_ASSERTS && assert($k > 0);
286f3f0262cSandi                        $ymids[$k] = $ymids[$k-1];
287d3b71b9dSPhy                        $switch = true;
288f3f0262cSandi                    }
289f3f0262cSandi                }
290f3f0262cSandi            }
291dd5c3e2bSPhy        }
292f3f0262cSandi
293f3f0262cSandi        $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
294f3f0262cSandi        $ymid = $ymids[$this->lcs];
295f3f0262cSandi        for ($n = 0; $n < $nchunks - 1; $n++) {
296f3f0262cSandi            $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
297f3f0262cSandi            $y1 = $ymid[$n] + 1;
298f3f0262cSandi            $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
299f3f0262cSandi        }
300f3f0262cSandi        $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
301f3f0262cSandi
302f3f0262cSandi        return array($this->lcs, $seps);
303f3f0262cSandi    }
304f3f0262cSandi
305f3f0262cSandi    function _lcs_pos($ypos) {
306f3f0262cSandi        $end = $this->lcs;
307f3f0262cSandi        if ($end == 0 || $ypos > $this->seq[$end]) {
308f3f0262cSandi            $this->seq[++$this->lcs] = $ypos;
309f3f0262cSandi            $this->in_seq[$ypos] = 1;
310f3f0262cSandi            return $this->lcs;
311f3f0262cSandi        }
312f3f0262cSandi
313f3f0262cSandi        $beg = 1;
314f3f0262cSandi        while ($beg < $end) {
315f3f0262cSandi            $mid = (int)(($beg + $end) / 2);
316f3f0262cSandi            if ($ypos > $this->seq[$mid])
317f3f0262cSandi                $beg = $mid + 1;
318f3f0262cSandi            else
319f3f0262cSandi                $end = $mid;
320f3f0262cSandi        }
321f3f0262cSandi
322f3f0262cSandi        USE_ASSERTS && assert($ypos != $this->seq[$end]);
323f3f0262cSandi
324f3f0262cSandi        $this->in_seq[$this->seq[$end]] = false;
325f3f0262cSandi        $this->seq[$end] = $ypos;
326f3f0262cSandi        $this->in_seq[$ypos] = 1;
327f3f0262cSandi        return $end;
328f3f0262cSandi    }
329f3f0262cSandi
33015fae107Sandi    /**
33115fae107Sandi     * Find LCS of two sequences.
332f3f0262cSandi     *
333f3f0262cSandi     * The results are recorded in the vectors $this->{x,y}changed[], by
334f3f0262cSandi     * storing a 1 in the element for each line that is an insertion
335f3f0262cSandi     * or deletion (ie. is not in the LCS).
336f3f0262cSandi     *
337f3f0262cSandi     * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
338f3f0262cSandi     *
339f3f0262cSandi     * Note that XLIM, YLIM are exclusive bounds.
340f3f0262cSandi     * All line numbers are origin-0 and discarded lines are not counted.
341f50a239bSTakamura     *
342f50a239bSTakamura     * @param integer $xoff
343f50a239bSTakamura     * @param integer $xlim
344f50a239bSTakamura     * @param integer $yoff
345f50a239bSTakamura     * @param integer $ylim
346f3f0262cSandi     */
347f3f0262cSandi    function _compareseq($xoff, $xlim, $yoff, $ylim) {
348f3f0262cSandi        // Slide down the bottom initial diagonal.
3497deca91bSRobin Getz        while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) {
350f3f0262cSandi            ++$xoff;
351f3f0262cSandi            ++$yoff;
352f3f0262cSandi        }
353f3f0262cSandi
354f3f0262cSandi        // Slide up the top initial diagonal.
3557deca91bSRobin Getz        while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
356f3f0262cSandi            --$xlim;
357f3f0262cSandi            --$ylim;
358f3f0262cSandi        }
359f3f0262cSandi
360f3f0262cSandi        if ($xoff == $xlim || $yoff == $ylim)
361f3f0262cSandi            $lcs = 0;
362f3f0262cSandi        else {
363f3f0262cSandi            // This is ad hoc but seems to work well.
364f3f0262cSandi            //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
365f3f0262cSandi            //$nchunks = max(2,min(8,(int)$nchunks));
366f3f0262cSandi            $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
367f3f0262cSandi            list ($lcs, $seps)
368f3f0262cSandi                = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
369f3f0262cSandi        }
370f3f0262cSandi
371f3f0262cSandi        if ($lcs == 0) {
372f3f0262cSandi            // X and Y sequences have no common subsequence:
373f3f0262cSandi            // mark all changed.
374f3f0262cSandi            while ($yoff < $ylim)
375f3f0262cSandi                $this->ychanged[$this->yind[$yoff++]] = 1;
376f3f0262cSandi            while ($xoff < $xlim)
377f3f0262cSandi                $this->xchanged[$this->xind[$xoff++]] = 1;
378f3f0262cSandi        }
379f3f0262cSandi        else {
380f3f0262cSandi            // Use the partitions to split this problem into subproblems.
381f3f0262cSandi            reset($seps);
382f3f0262cSandi            $pt1 = $seps[0];
383f3f0262cSandi            while ($pt2 = next($seps)) {
384f3f0262cSandi                $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
385f3f0262cSandi                $pt1 = $pt2;
386f3f0262cSandi            }
387f3f0262cSandi        }
388f3f0262cSandi    }
389f3f0262cSandi
39015fae107Sandi    /**
39115fae107Sandi     * Adjust inserts/deletes of identical lines to join changes
392f3f0262cSandi     * as much as possible.
393f3f0262cSandi     *
394f3f0262cSandi     * We do something when a run of changed lines include a
395f3f0262cSandi     * line at one end and has an excluded, identical line at the other.
396f3f0262cSandi     * We are free to choose which identical line is included.
397f3f0262cSandi     * `compareseq' usually chooses the one at the beginning,
398f3f0262cSandi     * but usually it is cleaner to consider the following identical line
399f3f0262cSandi     * to be the "change".
400f3f0262cSandi     *
401f3f0262cSandi     * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
402f50a239bSTakamura     *
403f50a239bSTakamura     * @param array $lines
404f50a239bSTakamura     * @param array $changed
405f50a239bSTakamura     * @param array $other_changed
406f3f0262cSandi     */
407f3f0262cSandi    function _shift_boundaries($lines, &$changed, $other_changed) {
408f3f0262cSandi        $i = 0;
409f3f0262cSandi        $j = 0;
410f3f0262cSandi
411*fc55d0b2SPhy        USE_ASSERTS && assert(count($lines) == count($changed));
412f5b78785SAndreas Gohr        $len = count($lines);
413f5b78785SAndreas Gohr        $other_len = count($other_changed);
414f3f0262cSandi
415f3f0262cSandi        while (1) {
416f3f0262cSandi            /*
417f3f0262cSandi             * Scan forwards to find beginning of another run of changes.
418f3f0262cSandi             * Also keep track of the corresponding point in the other file.
419f3f0262cSandi             *
420f3f0262cSandi             * Throughout this code, $i and $j are adjusted together so that
421f3f0262cSandi             * the first $i elements of $changed and the first $j elements
422f3f0262cSandi             * of $other_changed both contain the same number of zeros
423f3f0262cSandi             * (unchanged lines).
424f3f0262cSandi             * Furthermore, $j is always kept so that $j == $other_len or
425f3f0262cSandi             * $other_changed[$j] == false.
426f3f0262cSandi             */
427f3f0262cSandi            while ($j < $other_len && $other_changed[$j])
428f3f0262cSandi                $j++;
429f3f0262cSandi
430f3f0262cSandi            while ($i < $len && ! $changed[$i]) {
431*fc55d0b2SPhy                USE_ASSERTS && assert($j < $other_len && ! $other_changed[$j]);
432f5b78785SAndreas Gohr                $i++;
433f5b78785SAndreas Gohr                $j++;
434f3f0262cSandi                while ($j < $other_len && $other_changed[$j])
435f3f0262cSandi                    $j++;
436f3f0262cSandi            }
437f3f0262cSandi
438f3f0262cSandi            if ($i == $len)
439f3f0262cSandi                break;
440f3f0262cSandi
441f3f0262cSandi            $start = $i;
442f3f0262cSandi
443f3f0262cSandi            // Find the end of this run of changes.
444f3f0262cSandi            while (++$i < $len && $changed[$i])
445f3f0262cSandi                continue;
446f3f0262cSandi
447f3f0262cSandi            do {
448f3f0262cSandi                /*
449f3f0262cSandi                 * Record the length of this run of changes, so that
450f3f0262cSandi                 * we can later determine whether the run has grown.
451f3f0262cSandi                 */
452f3f0262cSandi                $runlength = $i - $start;
453f3f0262cSandi
454f3f0262cSandi                /*
455f3f0262cSandi                 * Move the changed region back, so long as the
456f3f0262cSandi                 * previous unchanged line matches the last changed one.
457f3f0262cSandi                 * This merges with previous changed regions.
458f3f0262cSandi                 */
459f3f0262cSandi                while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
460f3f0262cSandi                    $changed[--$start] = 1;
461f3f0262cSandi                    $changed[--$i] = false;
462f3f0262cSandi                    while ($start > 0 && $changed[$start - 1])
463f3f0262cSandi                        $start--;
464*fc55d0b2SPhy                    USE_ASSERTS && assert($j > 0);
465f3f0262cSandi                    while ($other_changed[--$j])
466f3f0262cSandi                        continue;
467*fc55d0b2SPhy                    USE_ASSERTS && assert($j >= 0 && !$other_changed[$j]);
468f3f0262cSandi                }
469f3f0262cSandi
470f3f0262cSandi                /*
471f3f0262cSandi                 * Set CORRESPONDING to the end of the changed run, at the last
472f3f0262cSandi                 * point where it corresponds to a changed run in the other file.
473f3f0262cSandi                 * CORRESPONDING == LEN means no such point has been found.
474f3f0262cSandi                 */
475f3f0262cSandi                $corresponding = $j < $other_len ? $i : $len;
476f3f0262cSandi
477f3f0262cSandi                /*
478f3f0262cSandi                 * Move the changed region forward, so long as the
479f3f0262cSandi                 * first changed line matches the following unchanged one.
480f3f0262cSandi                 * This merges with following changed regions.
481f3f0262cSandi                 * Do this second, so that if there are no merges,
482f3f0262cSandi                 * the changed region is moved forward as far as possible.
483f3f0262cSandi                 */
484f3f0262cSandi                while ($i < $len && $lines[$start] == $lines[$i]) {
485f3f0262cSandi                    $changed[$start++] = false;
486f3f0262cSandi                    $changed[$i++] = 1;
487f3f0262cSandi                    while ($i < $len && $changed[$i])
488f3f0262cSandi                        $i++;
489f3f0262cSandi
490*fc55d0b2SPhy                    USE_ASSERTS && assert($j < $other_len && ! $other_changed[$j]);
491f3f0262cSandi                    $j++;
492f3f0262cSandi                    if ($j < $other_len && $other_changed[$j]) {
493f3f0262cSandi                        $corresponding = $i;
494f3f0262cSandi                        while ($j < $other_len && $other_changed[$j])
495f3f0262cSandi                            $j++;
496f3f0262cSandi                    }
497f3f0262cSandi                }
498f3f0262cSandi            } while ($runlength != $i - $start);
499f3f0262cSandi
500f3f0262cSandi            /*
501f3f0262cSandi             * If possible, move the fully-merged run of changes
502f3f0262cSandi             * back to a corresponding run in the other file.
503f3f0262cSandi             */
504f3f0262cSandi            while ($corresponding < $i) {
505f3f0262cSandi                $changed[--$start] = 1;
506f3f0262cSandi                $changed[--$i] = 0;
507*fc55d0b2SPhy                USE_ASSERTS && assert($j > 0);
508f3f0262cSandi                while ($other_changed[--$j])
509f3f0262cSandi                    continue;
510*fc55d0b2SPhy                USE_ASSERTS && assert($j >= 0 && !$other_changed[$j]);
511f3f0262cSandi            }
512f3f0262cSandi        }
513f3f0262cSandi    }
514f3f0262cSandi}
515f3f0262cSandi
516f3f0262cSandi/**
517f3f0262cSandi * Class representing a 'diff' between two sequences of strings.
518f3f0262cSandi */
519f5b78785SAndreas Gohrclass Diff {
520f5b78785SAndreas Gohr
521f3f0262cSandi    var $edits;
522f3f0262cSandi
523f3f0262cSandi    /**
524f3f0262cSandi     * Constructor.
525f3f0262cSandi     * Computes diff between sequences of strings.
526f3f0262cSandi     *
52742ea7f44SGerrit Uitslag     * @param array $from_lines An array of strings.
528f3f0262cSandi     *                          (Typically these are lines from a file.)
52942ea7f44SGerrit Uitslag     * @param array $to_lines   An array of strings.
530f3f0262cSandi     */
531988c1340SPiyush Mishra    function __construct($from_lines, $to_lines) {
532f3f0262cSandi        $eng = new _DiffEngine;
533f3f0262cSandi        $this->edits = $eng->diff($from_lines, $to_lines);
534f3f0262cSandi        //$this->_check($from_lines, $to_lines);
535f3f0262cSandi    }
536f3f0262cSandi
537f3f0262cSandi    /**
538f3f0262cSandi     * Compute reversed Diff.
539f3f0262cSandi     *
540f3f0262cSandi     * SYNOPSIS:
541f3f0262cSandi     *
542f3f0262cSandi     *  $diff = new Diff($lines1, $lines2);
543f3f0262cSandi     *  $rev = $diff->reverse();
54442ea7f44SGerrit Uitslag     *
54542ea7f44SGerrit Uitslag     * @return Diff  A Diff object representing the inverse of the
546f3f0262cSandi     *               original diff.
547f3f0262cSandi     */
548f3f0262cSandi    function reverse() {
549f3f0262cSandi        $rev = $this;
550f3f0262cSandi        $rev->edits = array();
551f3f0262cSandi        foreach ($this->edits as $edit) {
552f3f0262cSandi            $rev->edits[] = $edit->reverse();
553f3f0262cSandi        }
554f3f0262cSandi        return $rev;
555f3f0262cSandi    }
556f3f0262cSandi
557f3f0262cSandi    /**
558f3f0262cSandi     * Check for empty diff.
559f3f0262cSandi     *
560f3f0262cSandi     * @return bool True iff two sequences were identical.
561f3f0262cSandi     */
562f3f0262cSandi    function isEmpty() {
563f3f0262cSandi        foreach ($this->edits as $edit) {
564f3f0262cSandi            if ($edit->type != 'copy')
565f3f0262cSandi                return false;
566f3f0262cSandi        }
567f3f0262cSandi        return true;
568f3f0262cSandi    }
569f3f0262cSandi
570f3f0262cSandi    /**
571f3f0262cSandi     * Compute the length of the Longest Common Subsequence (LCS).
572f3f0262cSandi     *
573f3f0262cSandi     * This is mostly for diagnostic purposed.
574f3f0262cSandi     *
575f3f0262cSandi     * @return int The length of the LCS.
576f3f0262cSandi     */
577f3f0262cSandi    function lcs() {
578f3f0262cSandi        $lcs = 0;
579f3f0262cSandi        foreach ($this->edits as $edit) {
580f3f0262cSandi            if ($edit->type == 'copy')
581f5b78785SAndreas Gohr                $lcs += count($edit->orig);
582f3f0262cSandi        }
583f3f0262cSandi        return $lcs;
584f3f0262cSandi    }
585f3f0262cSandi
586f3f0262cSandi    /**
587f3f0262cSandi     * Get the original set of lines.
588f3f0262cSandi     *
589f3f0262cSandi     * This reconstructs the $from_lines parameter passed to the
590f3f0262cSandi     * constructor.
591f3f0262cSandi     *
592f3f0262cSandi     * @return array The original sequence of strings.
593f3f0262cSandi     */
594f3f0262cSandi    function orig() {
595f3f0262cSandi        $lines = array();
596f3f0262cSandi
597f3f0262cSandi        foreach ($this->edits as $edit) {
598f3f0262cSandi            if ($edit->orig)
599f5b78785SAndreas Gohr                array_splice($lines, count($lines), 0, $edit->orig);
600f3f0262cSandi        }
601f3f0262cSandi        return $lines;
602f3f0262cSandi    }
603f3f0262cSandi
604f3f0262cSandi    /**
605f3f0262cSandi     * Get the closing set of lines.
606f3f0262cSandi     *
607f3f0262cSandi     * This reconstructs the $to_lines parameter passed to the
608f3f0262cSandi     * constructor.
609f3f0262cSandi     *
610f3f0262cSandi     * @return array The sequence of strings.
611f3f0262cSandi     */
612f3f0262cSandi    function closing() {
613f3f0262cSandi        $lines = array();
614f3f0262cSandi
615f3f0262cSandi        foreach ($this->edits as $edit) {
616f3f0262cSandi            if ($edit->closing)
617f5b78785SAndreas Gohr                array_splice($lines, count($lines), 0, $edit->closing);
618f3f0262cSandi        }
619f3f0262cSandi        return $lines;
620f3f0262cSandi    }
621f3f0262cSandi
622f3f0262cSandi    /**
623f3f0262cSandi     * Check a Diff for validity.
624f3f0262cSandi     *
625f3f0262cSandi     * This is here only for debugging purposes.
626f50a239bSTakamura     *
627f50a239bSTakamura     * @param mixed $from_lines
628f50a239bSTakamura     * @param mixed $to_lines
629f3f0262cSandi     */
630f3f0262cSandi    function _check($from_lines, $to_lines) {
631f3f0262cSandi        if (serialize($from_lines) != serialize($this->orig()))
632f3f0262cSandi            trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
633f3f0262cSandi        if (serialize($to_lines) != serialize($this->closing()))
634f3f0262cSandi            trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
635f3f0262cSandi
636f3f0262cSandi        $rev = $this->reverse();
637f3f0262cSandi        if (serialize($to_lines) != serialize($rev->orig()))
638f3f0262cSandi            trigger_error("Reversed original doesn't match", E_USER_ERROR);
639f3f0262cSandi        if (serialize($from_lines) != serialize($rev->closing()))
640f3f0262cSandi            trigger_error("Reversed closing doesn't match", E_USER_ERROR);
641f3f0262cSandi
642f3f0262cSandi        $prevtype = 'none';
643f3f0262cSandi        foreach ($this->edits as $edit) {
644f3f0262cSandi            if ($prevtype == $edit->type)
645f3f0262cSandi                trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
646f3f0262cSandi            $prevtype = $edit->type;
647f3f0262cSandi        }
648f3f0262cSandi
649f3f0262cSandi        $lcs = $this->lcs();
650f3f0262cSandi        trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE);
651f3f0262cSandi    }
652f3f0262cSandi}
653f3f0262cSandi
654f3f0262cSandi/**
655f3f0262cSandi * FIXME: bad name.
656f3f0262cSandi */
657f5b78785SAndreas Gohrclass MappedDiff extends Diff {
658f3f0262cSandi    /**
659f3f0262cSandi     * Constructor.
660f3f0262cSandi     *
661f3f0262cSandi     * Computes diff between sequences of strings.
662f3f0262cSandi     *
663f3f0262cSandi     * This can be used to compute things like
664f3f0262cSandi     * case-insensitve diffs, or diffs which ignore
665f3f0262cSandi     * changes in white-space.
666f3f0262cSandi     *
66742ea7f44SGerrit Uitslag     * @param string[] $from_lines         An array of strings.
668f3f0262cSandi     *                                     (Typically these are lines from a file.)
669f3f0262cSandi     *
67042ea7f44SGerrit Uitslag     * @param string[] $to_lines           An array of strings.
671f3f0262cSandi     *
67242ea7f44SGerrit Uitslag     * @param string[] $mapped_from_lines  This array should
673f3f0262cSandi     *                                     have the same size number of elements as $from_lines.
674f3f0262cSandi     *                                     The elements in $mapped_from_lines and
675f3f0262cSandi     *                                     $mapped_to_lines are what is actually compared
676f3f0262cSandi     *                                     when computing the diff.
677f3f0262cSandi     *
67842ea7f44SGerrit Uitslag     * @param string[] $mapped_to_lines    This array should
679f3f0262cSandi     *                                     have the same number of elements as $to_lines.
680f3f0262cSandi     */
681988c1340SPiyush Mishra    function __construct($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) {
682f3f0262cSandi
683f5b78785SAndreas Gohr        assert(count($from_lines) == count($mapped_from_lines));
684f5b78785SAndreas Gohr        assert(count($to_lines) == count($mapped_to_lines));
685f3f0262cSandi
686988c1340SPiyush Mishra        parent::__construct($mapped_from_lines, $mapped_to_lines);
687f3f0262cSandi
688f3f0262cSandi        $xi = $yi = 0;
689f5b78785SAndreas Gohr        $ecnt = count($this->edits);
690f5b78785SAndreas Gohr        for ($i = 0; $i < $ecnt; $i++) {
691f3f0262cSandi            $orig = &$this->edits[$i]->orig;
692f3f0262cSandi            if (is_array($orig)) {
693f5b78785SAndreas Gohr                $orig = array_slice($from_lines, $xi, count($orig));
694f5b78785SAndreas Gohr                $xi += count($orig);
695f3f0262cSandi            }
696f3f0262cSandi
697f3f0262cSandi            $closing = &$this->edits[$i]->closing;
698f3f0262cSandi            if (is_array($closing)) {
699f5b78785SAndreas Gohr                $closing = array_slice($to_lines, $yi, count($closing));
700f5b78785SAndreas Gohr                $yi += count($closing);
701f3f0262cSandi            }
702f3f0262cSandi        }
703f3f0262cSandi    }
704f3f0262cSandi}
705f3f0262cSandi
706f3f0262cSandi/**
707f3f0262cSandi * A class to format Diffs
708f3f0262cSandi *
709f3f0262cSandi * This class formats the diff in classic diff format.
710f3f0262cSandi * It is intended that this class be customized via inheritance,
711f3f0262cSandi * to obtain fancier outputs.
712f3f0262cSandi */
713f5b78785SAndreas Gohrclass DiffFormatter {
714f3f0262cSandi    /**
715f3f0262cSandi     * Number of leading context "lines" to preserve.
716f3f0262cSandi     *
717f3f0262cSandi     * This should be left at zero for this class, but subclasses
718f3f0262cSandi     * may want to set this to other values.
719f3f0262cSandi     */
720f3f0262cSandi    var $leading_context_lines = 0;
721f3f0262cSandi
722f3f0262cSandi    /**
723f3f0262cSandi     * Number of trailing context "lines" to preserve.
724f3f0262cSandi     *
725f3f0262cSandi     * This should be left at zero for this class, but subclasses
726f3f0262cSandi     * may want to set this to other values.
727f3f0262cSandi     */
728f3f0262cSandi    var $trailing_context_lines = 0;
729f3f0262cSandi
730f3f0262cSandi    /**
731f3f0262cSandi     * Format a diff.
732f3f0262cSandi     *
73342ea7f44SGerrit Uitslag     * @param Diff $diff A Diff object.
734f3f0262cSandi     * @return string The formatted output.
735f3f0262cSandi     */
736f3f0262cSandi    function format($diff) {
737f3f0262cSandi
738f3f0262cSandi        $xi = $yi = 1;
73942ea7f44SGerrit Uitslag        $x0 = $y0 = 0;
740f3f0262cSandi        $block = false;
741f3f0262cSandi        $context = array();
742f3f0262cSandi
743f3f0262cSandi        $nlead = $this->leading_context_lines;
744f3f0262cSandi        $ntrail = $this->trailing_context_lines;
745f3f0262cSandi
746f3f0262cSandi        $this->_start_diff();
747f3f0262cSandi
748f3f0262cSandi        foreach ($diff->edits as $edit) {
749f3f0262cSandi            if ($edit->type == 'copy') {
750f3f0262cSandi                if (is_array($block)) {
751f5b78785SAndreas Gohr                    if (count($edit->orig) <= $nlead + $ntrail) {
752f3f0262cSandi                        $block[] = $edit;
753f3f0262cSandi                    }
754f3f0262cSandi                    else{
755f3f0262cSandi                        if ($ntrail) {
756f3f0262cSandi                            $context = array_slice($edit->orig, 0, $ntrail);
757f3f0262cSandi                            $block[] = new _DiffOp_Copy($context);
758f3f0262cSandi                        }
7597deca91bSRobin Getz                        $this->_block($x0, $ntrail + $xi - $x0, $y0, $ntrail + $yi - $y0, $block);
760f3f0262cSandi                        $block = false;
761f3f0262cSandi                    }
762f3f0262cSandi                }
763f3f0262cSandi                $context = $edit->orig;
764f3f0262cSandi            }
765f3f0262cSandi            else {
766f3f0262cSandi                if (! is_array($block)) {
767f5b78785SAndreas Gohr                    $context = array_slice($context, count($context) - $nlead);
768f5b78785SAndreas Gohr                    $x0 = $xi - count($context);
769f5b78785SAndreas Gohr                    $y0 = $yi - count($context);
770f3f0262cSandi                    $block = array();
771f3f0262cSandi                    if ($context)
772f3f0262cSandi                        $block[] = new _DiffOp_Copy($context);
773f3f0262cSandi                }
774f3f0262cSandi                $block[] = $edit;
775f3f0262cSandi            }
776f3f0262cSandi
777f3f0262cSandi            if ($edit->orig)
778f5b78785SAndreas Gohr                $xi += count($edit->orig);
779f3f0262cSandi            if ($edit->closing)
780f5b78785SAndreas Gohr                $yi += count($edit->closing);
781f3f0262cSandi        }
782f3f0262cSandi
783f3f0262cSandi        if (is_array($block))
7847deca91bSRobin Getz            $this->_block($x0, $xi - $x0, $y0, $yi - $y0, $block);
785f3f0262cSandi
786f3f0262cSandi        return $this->_end_diff();
787f3f0262cSandi    }
788f3f0262cSandi
78942ea7f44SGerrit Uitslag    /**
79042ea7f44SGerrit Uitslag     * @param int $xbeg
79142ea7f44SGerrit Uitslag     * @param int $xlen
79242ea7f44SGerrit Uitslag     * @param int $ybeg
79342ea7f44SGerrit Uitslag     * @param int $ylen
79442ea7f44SGerrit Uitslag     * @param array $edits
79542ea7f44SGerrit Uitslag     */
796f3f0262cSandi    function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
797f3f0262cSandi        $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
798f3f0262cSandi        foreach ($edits as $edit) {
799f3f0262cSandi            if ($edit->type == 'copy')
800f3f0262cSandi                $this->_context($edit->orig);
801f3f0262cSandi            elseif ($edit->type == 'add')
802f3f0262cSandi                $this->_added($edit->closing);
803f3f0262cSandi            elseif ($edit->type == 'delete')
804f3f0262cSandi                $this->_deleted($edit->orig);
805f3f0262cSandi            elseif ($edit->type == 'change')
806f3f0262cSandi                $this->_changed($edit->orig, $edit->closing);
807f3f0262cSandi            else
808f3f0262cSandi                trigger_error("Unknown edit type", E_USER_ERROR);
809f3f0262cSandi        }
810f3f0262cSandi        $this->_end_block();
811f3f0262cSandi    }
812f3f0262cSandi
813f3f0262cSandi    function _start_diff() {
814f3f0262cSandi        ob_start();
815f3f0262cSandi    }
816f3f0262cSandi
817f3f0262cSandi    function _end_diff() {
818f3f0262cSandi        $val = ob_get_contents();
819f3f0262cSandi        ob_end_clean();
820f3f0262cSandi        return $val;
821f3f0262cSandi    }
822f3f0262cSandi
82342ea7f44SGerrit Uitslag    /**
82442ea7f44SGerrit Uitslag     * @param int $xbeg
82542ea7f44SGerrit Uitslag     * @param int $xlen
82642ea7f44SGerrit Uitslag     * @param int $ybeg
82742ea7f44SGerrit Uitslag     * @param int $ylen
82842ea7f44SGerrit Uitslag     * @return string
82942ea7f44SGerrit Uitslag     */
830f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
831f3f0262cSandi        if ($xlen > 1)
832f3f0262cSandi            $xbeg .= "," . ($xbeg + $xlen - 1);
833f3f0262cSandi        if ($ylen > 1)
834f3f0262cSandi            $ybeg .= "," . ($ybeg + $ylen - 1);
835f3f0262cSandi
836f3f0262cSandi        return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
837f3f0262cSandi    }
838f3f0262cSandi
83942ea7f44SGerrit Uitslag    /**
84042ea7f44SGerrit Uitslag     * @param string $header
84142ea7f44SGerrit Uitslag     */
842f3f0262cSandi    function _start_block($header) {
843f3f0262cSandi        echo $header;
844f3f0262cSandi    }
845f3f0262cSandi
846f3f0262cSandi    function _end_block() {
847f3f0262cSandi    }
848f3f0262cSandi
849f3f0262cSandi    function _lines($lines, $prefix = ' ') {
850f3f0262cSandi        foreach ($lines as $line)
85160056e69SChristopher Smith            echo "$prefix ".$this->_escape($line)."\n";
852f3f0262cSandi    }
853f3f0262cSandi
854f3f0262cSandi    function _context($lines) {
855f3f0262cSandi        $this->_lines($lines);
856f3f0262cSandi    }
857f3f0262cSandi
858f3f0262cSandi    function _added($lines) {
859f3f0262cSandi        $this->_lines($lines, ">");
860f3f0262cSandi    }
861f3f0262cSandi    function _deleted($lines) {
862f3f0262cSandi        $this->_lines($lines, "<");
863f3f0262cSandi    }
864f3f0262cSandi
865f3f0262cSandi    function _changed($orig, $closing) {
866f3f0262cSandi        $this->_deleted($orig);
867f3f0262cSandi        echo "---\n";
868f3f0262cSandi        $this->_added($closing);
869f3f0262cSandi    }
87060056e69SChristopher Smith
871bfd197d2ShArpanet    /**
872bfd197d2ShArpanet     * Escape string
873bfd197d2ShArpanet     *
874bfd197d2ShArpanet     * Override this method within other formatters if escaping required.
875bfd197d2ShArpanet     * Base class requires $str to be returned WITHOUT escaping.
876bfd197d2ShArpanet     *
877bfd197d2ShArpanet     * @param $str string Text string to escape
878bfd197d2ShArpanet     * @return string The escaped string.
879bfd197d2ShArpanet     */
88060056e69SChristopher Smith    function _escape($str){
88160056e69SChristopher Smith        return $str;
88260056e69SChristopher Smith    }
883f3f0262cSandi}
884f3f0262cSandi
88547a906eaSAndreas Gohr/**
88647a906eaSAndreas Gohr * Utilityclass for styling HTML formatted diffs
88747a906eaSAndreas Gohr *
88847a906eaSAndreas Gohr * Depends on global var $DIFF_INLINESTYLES, if true some minimal predefined
88947a906eaSAndreas Gohr * inline styles are used. Useful for HTML mails and RSS feeds
89047a906eaSAndreas Gohr *
89147a906eaSAndreas Gohr * @author Andreas Gohr <andi@splitbrain.org>
89247a906eaSAndreas Gohr */
89347a906eaSAndreas Gohrclass HTMLDiff {
89447a906eaSAndreas Gohr    /**
89547a906eaSAndreas Gohr     * Holds the style names and basic CSS
89647a906eaSAndreas Gohr     */
89747a906eaSAndreas Gohr    static public $styles = array(
89847a906eaSAndreas Gohr            'diff-addedline'    => 'background-color: #ddffdd;',
89947a906eaSAndreas Gohr            'diff-deletedline'  => 'background-color: #ffdddd;',
90047a906eaSAndreas Gohr            'diff-context'      => 'background-color: #f5f5f5;',
90147a906eaSAndreas Gohr            'diff-mark'         => 'color: #ff0000;',
90247a906eaSAndreas Gohr        );
90347a906eaSAndreas Gohr
90447a906eaSAndreas Gohr    /**
90547a906eaSAndreas Gohr     * Return a class or style parameter
906f50a239bSTakamura     *
907f50a239bSTakamura     * @param string $classname
908f50a239bSTakamura     *
909f50a239bSTakamura     * @return string
91047a906eaSAndreas Gohr     */
91147a906eaSAndreas Gohr    static function css($classname){
91247a906eaSAndreas Gohr        global $DIFF_INLINESTYLES;
91347a906eaSAndreas Gohr
91447a906eaSAndreas Gohr        if($DIFF_INLINESTYLES){
91547a906eaSAndreas Gohr            if(!isset(self::$styles[$classname])) return '';
91647a906eaSAndreas Gohr            return 'style="'.self::$styles[$classname].'"';
91747a906eaSAndreas Gohr        }else{
91847a906eaSAndreas Gohr            return 'class="'.$classname.'"';
91947a906eaSAndreas Gohr        }
92047a906eaSAndreas Gohr    }
92147a906eaSAndreas Gohr}
922f3f0262cSandi
923f3f0262cSandi/**
924f3f0262cSandi *  Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
925f3f0262cSandi *
926f3f0262cSandi */
927f3f0262cSandi
9289b3a5b24Schrisdefine('NBSP', "\xC2\xA0");     // utf-8 non-breaking space.
929f3f0262cSandi
930f3f0262cSandiclass _HWLDF_WordAccumulator {
931988c1340SPiyush Mishra
932988c1340SPiyush Mishra    function __construct() {
933f3f0262cSandi        $this->_lines = array();
934f3f0262cSandi        $this->_line = '';
935f3f0262cSandi        $this->_group = '';
936f3f0262cSandi        $this->_tag = '';
937f3f0262cSandi    }
938f3f0262cSandi
939f3f0262cSandi    function _flushGroup($new_tag) {
940f3f0262cSandi        if ($this->_group !== '') {
941f3f0262cSandi            if ($this->_tag == 'mark')
94260056e69SChristopher Smith                $this->_line .= '<strong '.HTMLDiff::css('diff-mark').'>'.$this->_escape($this->_group).'</strong>';
943812bb04eSRobin Getz            elseif ($this->_tag == 'add')
94460056e69SChristopher Smith                $this->_line .= '<span '.HTMLDiff::css('diff-addedline').'>'.$this->_escape($this->_group).'</span>';
945812bb04eSRobin Getz            elseif ($this->_tag == 'del')
94660056e69SChristopher Smith                $this->_line .= '<span '.HTMLDiff::css('diff-deletedline').'><del>'.$this->_escape($this->_group).'</del></span>';
947f3f0262cSandi            else
94860056e69SChristopher Smith                $this->_line .= $this->_escape($this->_group);
949f3f0262cSandi        }
950f3f0262cSandi        $this->_group = '';
951f3f0262cSandi        $this->_tag = $new_tag;
952f3f0262cSandi    }
953f3f0262cSandi
95442ea7f44SGerrit Uitslag    /**
95542ea7f44SGerrit Uitslag     * @param string $new_tag
95642ea7f44SGerrit Uitslag     */
957f3f0262cSandi    function _flushLine($new_tag) {
958f3f0262cSandi        $this->_flushGroup($new_tag);
959f3f0262cSandi        if ($this->_line != '')
960f3f0262cSandi            $this->_lines[] = $this->_line;
961f3f0262cSandi        $this->_line = '';
962f3f0262cSandi    }
963f3f0262cSandi
964f3f0262cSandi    function addWords($words, $tag = '') {
965f3f0262cSandi        if ($tag != $this->_tag)
966f3f0262cSandi            $this->_flushGroup($tag);
967f3f0262cSandi
968f3f0262cSandi        foreach ($words as $word) {
969f3f0262cSandi            // new-line should only come as first char of word.
970f3f0262cSandi            if ($word == '')
971f3f0262cSandi                continue;
972f3f0262cSandi            if ($word[0] == "\n") {
973f3f0262cSandi                $this->_group .= NBSP;
974f3f0262cSandi                $this->_flushLine($tag);
975f3f0262cSandi                $word = substr($word, 1);
976f3f0262cSandi            }
977f3f0262cSandi            assert(!strstr($word, "\n"));
978f3f0262cSandi            $this->_group .= $word;
979f3f0262cSandi        }
980f3f0262cSandi    }
981f3f0262cSandi
982f3f0262cSandi    function getLines() {
983f3f0262cSandi        $this->_flushLine('~done');
984f3f0262cSandi        return $this->_lines;
985f3f0262cSandi    }
98660056e69SChristopher Smith
98760056e69SChristopher Smith    function _escape($str){
98860056e69SChristopher Smith        return hsc($str);
98960056e69SChristopher Smith    }
990f3f0262cSandi}
991f3f0262cSandi
992f5b78785SAndreas Gohrclass WordLevelDiff extends MappedDiff {
993f5b78785SAndreas Gohr
994988c1340SPiyush Mishra    function __construct($orig_lines, $closing_lines) {
995f3f0262cSandi        list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
996f3f0262cSandi        list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
997f3f0262cSandi
998988c1340SPiyush Mishra        parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped);
999f3f0262cSandi    }
1000f3f0262cSandi
1001f3f0262cSandi    function _split($lines) {
10025e1ee188SXin LI        if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu',
10037deca91bSRobin Getz             implode("\n", $lines), $m)) {
1004f3f0262cSandi            return array(array(''), array(''));
1005f3f0262cSandi        }
1006f3f0262cSandi        return array($m[0], $m[1]);
1007f3f0262cSandi    }
1008f3f0262cSandi
1009f3f0262cSandi    function orig() {
1010f3f0262cSandi        $orig = new _HWLDF_WordAccumulator;
1011f3f0262cSandi
1012f3f0262cSandi        foreach ($this->edits as $edit) {
1013f3f0262cSandi            if ($edit->type == 'copy')
1014f3f0262cSandi                $orig->addWords($edit->orig);
1015f3f0262cSandi            elseif ($edit->orig)
1016f3f0262cSandi                $orig->addWords($edit->orig, 'mark');
1017f3f0262cSandi        }
1018f3f0262cSandi        return $orig->getLines();
1019f3f0262cSandi    }
1020f3f0262cSandi
1021f3f0262cSandi    function closing() {
1022f3f0262cSandi        $closing = new _HWLDF_WordAccumulator;
1023f3f0262cSandi
1024f3f0262cSandi        foreach ($this->edits as $edit) {
1025f3f0262cSandi            if ($edit->type == 'copy')
1026f3f0262cSandi                $closing->addWords($edit->closing);
1027f3f0262cSandi            elseif ($edit->closing)
1028f3f0262cSandi                $closing->addWords($edit->closing, 'mark');
1029f3f0262cSandi        }
1030f3f0262cSandi        return $closing->getLines();
1031f3f0262cSandi    }
1032f3f0262cSandi}
1033f3f0262cSandi
1034812bb04eSRobin Getzclass InlineWordLevelDiff extends MappedDiff {
1035812bb04eSRobin Getz
1036988c1340SPiyush Mishra    function __construct($orig_lines, $closing_lines) {
1037812bb04eSRobin Getz        list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
1038812bb04eSRobin Getz        list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
1039812bb04eSRobin Getz
1040988c1340SPiyush Mishra        parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped);
1041812bb04eSRobin Getz    }
1042812bb04eSRobin Getz
1043812bb04eSRobin Getz    function _split($lines) {
1044c5982caaSDanny Lin        if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu',
1045812bb04eSRobin Getz             implode("\n", $lines), $m)) {
1046812bb04eSRobin Getz            return array(array(''), array(''));
1047812bb04eSRobin Getz        }
1048812bb04eSRobin Getz        return array($m[0], $m[1]);
1049812bb04eSRobin Getz    }
1050812bb04eSRobin Getz
1051812bb04eSRobin Getz    function inline() {
1052812bb04eSRobin Getz        $orig = new _HWLDF_WordAccumulator;
1053812bb04eSRobin Getz        foreach ($this->edits as $edit) {
1054812bb04eSRobin Getz            if ($edit->type == 'copy')
10554f2305cbSAdrian Lang                $orig->addWords($edit->closing);
1056812bb04eSRobin Getz            elseif ($edit->type == 'change'){
1057812bb04eSRobin Getz                $orig->addWords($edit->orig, 'del');
1058812bb04eSRobin Getz                $orig->addWords($edit->closing, 'add');
1059812bb04eSRobin Getz            } elseif ($edit->type == 'delete')
1060812bb04eSRobin Getz                $orig->addWords($edit->orig, 'del');
1061812bb04eSRobin Getz            elseif ($edit->type == 'add')
1062812bb04eSRobin Getz                $orig->addWords($edit->closing, 'add');
1063812bb04eSRobin Getz            elseif ($edit->orig)
1064812bb04eSRobin Getz                $orig->addWords($edit->orig, 'del');
1065812bb04eSRobin Getz        }
1066812bb04eSRobin Getz        return $orig->getLines();
1067812bb04eSRobin Getz    }
1068812bb04eSRobin Getz}
1069812bb04eSRobin Getz
1070f3f0262cSandi/**
1071f3f0262cSandi * "Unified" diff formatter.
1072f3f0262cSandi *
1073f3f0262cSandi * This class formats the diff in classic "unified diff" format.
1074df9752e9SChristopher Smith *
1075df9752e9SChristopher Smith * NOTE: output is plain text and unsafe for use in HTML without escaping.
1076f3f0262cSandi */
1077f5b78785SAndreas Gohrclass UnifiedDiffFormatter extends DiffFormatter {
1078f5b78785SAndreas Gohr
1079988c1340SPiyush Mishra    function __construct($context_lines = 4) {
1080f3f0262cSandi        $this->leading_context_lines = $context_lines;
1081f3f0262cSandi        $this->trailing_context_lines = $context_lines;
1082f3f0262cSandi    }
1083f3f0262cSandi
1084f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1085f3f0262cSandi        if ($xlen != 1)
1086f3f0262cSandi            $xbeg .= "," . $xlen;
1087f3f0262cSandi        if ($ylen != 1)
1088f3f0262cSandi            $ybeg .= "," . $ylen;
1089f3f0262cSandi        return "@@ -$xbeg +$ybeg @@\n";
1090f3f0262cSandi    }
1091f3f0262cSandi
1092f3f0262cSandi    function _added($lines) {
1093f3f0262cSandi        $this->_lines($lines, "+");
1094f3f0262cSandi    }
1095f3f0262cSandi    function _deleted($lines) {
1096f3f0262cSandi        $this->_lines($lines, "-");
1097f3f0262cSandi    }
1098f3f0262cSandi    function _changed($orig, $final) {
1099f3f0262cSandi        $this->_deleted($orig);
1100f3f0262cSandi        $this->_added($final);
1101f3f0262cSandi    }
1102f3f0262cSandi}
1103f3f0262cSandi
1104f3f0262cSandi/**
1105f3f0262cSandi *  Wikipedia Table style diff formatter.
1106f3f0262cSandi *
1107f3f0262cSandi */
1108f5b78785SAndreas Gohrclass TableDiffFormatter extends DiffFormatter {
11093a4ea35cSChristopher Smith    var $colspan = 2;
1110f5b78785SAndreas Gohr
1111988c1340SPiyush Mishra    function __construct() {
1112f3f0262cSandi        $this->leading_context_lines = 2;
1113f3f0262cSandi        $this->trailing_context_lines = 2;
1114f3f0262cSandi    }
1115f3f0262cSandi
111642ea7f44SGerrit Uitslag    /**
111742ea7f44SGerrit Uitslag     * @param Diff $diff
111842ea7f44SGerrit Uitslag     * @return string
111942ea7f44SGerrit Uitslag     */
11202d880650SAdrian Lang    function format($diff) {
11212d880650SAdrian Lang        // Preserve whitespaces by converting some to non-breaking spaces.
11222d880650SAdrian Lang        // Do not convert all of them to allow word-wrap.
11232d880650SAdrian Lang        $val = parent::format($diff);
1124e260f93bSAnika Henke        $val = str_replace('  ','&#160; ', $val);
1125e260f93bSAnika Henke        $val = preg_replace('/ (?=<)|(?<=[ >]) /', '&#160;', $val);
11262d880650SAdrian Lang        return $val;
11272d880650SAdrian Lang    }
11282d880650SAdrian Lang
1129f3f0262cSandi    function _pre($text){
1130f3f0262cSandi        $text = htmlspecialchars($text);
1131f3f0262cSandi        return $text;
1132f3f0262cSandi    }
1133f3f0262cSandi
1134f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1135f3f0262cSandi        global $lang;
1136f3f0262cSandi        $l1 = $lang['line'].' '.$xbeg;
1137f3f0262cSandi        $l2 = $lang['line'].' '.$ybeg;
11383a4ea35cSChristopher Smith        $r = '<tr><td '.HTMLDiff::css('diff-blockheader').' colspan="'.$this->colspan.'">'.$l1.":</td>\n".
11393a4ea35cSChristopher Smith             '<td '.HTMLDiff::css('diff-blockheader').' colspan="'.$this->colspan.'">'.$l2.":</td>\n".
11405048c277SAnika Henke             "</tr>\n";
1141f3f0262cSandi        return $r;
1142f3f0262cSandi    }
1143f3f0262cSandi
1144f3f0262cSandi    function _start_block($header) {
1145f3f0262cSandi        print($header);
1146f3f0262cSandi    }
1147f3f0262cSandi
1148f3f0262cSandi    function _end_block() {
1149f3f0262cSandi    }
1150f3f0262cSandi
1151f3f0262cSandi    function _lines($lines, $prefix=' ', $color="white") {
1152f3f0262cSandi    }
1153f3f0262cSandi
115460056e69SChristopher Smith    function addedLine($line,$escaped=false) {
115560056e69SChristopher Smith        if (!$escaped){
115660056e69SChristopher Smith            $line = $this->_escape($line);
115760056e69SChristopher Smith        }
1158f76724a4STom N Harris        return '<td '.HTMLDiff::css('diff-lineheader').'>+</td>'.
1159f76724a4STom N Harris               '<td '.HTMLDiff::css('diff-addedline').'>' .  $line.'</td>';
1160f3f0262cSandi    }
1161f3f0262cSandi
116260056e69SChristopher Smith    function deletedLine($line,$escaped=false) {
116360056e69SChristopher Smith        if (!$escaped){
116460056e69SChristopher Smith            $line = $this->_escape($line);
116560056e69SChristopher Smith        }
1166f76724a4STom N Harris        return '<td '.HTMLDiff::css('diff-lineheader').'>-</td>'.
1167f76724a4STom N Harris               '<td '.HTMLDiff::css('diff-deletedline').'>' .  $line.'</td>';
1168f3f0262cSandi    }
1169f3f0262cSandi
1170f3f0262cSandi    function emptyLine() {
11713a4ea35cSChristopher Smith        return '<td colspan="'.$this->colspan.'">&#160;</td>';
1172f3f0262cSandi    }
1173f3f0262cSandi
1174f3f0262cSandi    function contextLine($line) {
1175f76724a4STom N Harris        return '<td '.HTMLDiff::css('diff-lineheader').'>&#160;</td>'.
1176333ef4f3SChristopher Smith               '<td '.HTMLDiff::css('diff-context').'>'.$this->_escape($line).'</td>';
1177f3f0262cSandi    }
1178f3f0262cSandi
1179f3f0262cSandi    function _added($lines) {
118060056e69SChristopher Smith        $this->_addedLines($lines,false);
118160056e69SChristopher Smith    }
118260056e69SChristopher Smith
118360056e69SChristopher Smith    function _addedLines($lines,$escaped=false){
1184f3f0262cSandi        foreach ($lines as $line) {
118560056e69SChristopher Smith            print('<tr>' . $this->emptyLine() . $this->addedLine($line,$escaped) . "</tr>\n");
1186f3f0262cSandi        }
1187f3f0262cSandi    }
1188f3f0262cSandi
1189f3f0262cSandi    function _deleted($lines) {
1190f3f0262cSandi        foreach ($lines as $line) {
11917deca91bSRobin Getz            print('<tr>' . $this->deletedLine($line) . $this->emptyLine() . "</tr>\n");
1192f3f0262cSandi        }
1193f3f0262cSandi    }
1194f3f0262cSandi
1195f3f0262cSandi    function _context($lines) {
1196f3f0262cSandi        foreach ($lines as $line) {
11977deca91bSRobin Getz            print('<tr>' . $this->contextLine($line) .  $this->contextLine($line) . "</tr>\n");
1198f3f0262cSandi        }
1199f3f0262cSandi    }
1200f3f0262cSandi
1201f3f0262cSandi    function _changed($orig, $closing) {
120260056e69SChristopher Smith        $diff = new WordLevelDiff($orig, $closing);  // this escapes the diff data
1203f3f0262cSandi        $del = $diff->orig();
1204f3f0262cSandi        $add = $diff->closing();
1205f3f0262cSandi
1206f3f0262cSandi        while ($line = array_shift($del)) {
1207f3f0262cSandi            $aline = array_shift($add);
120860056e69SChristopher Smith            print('<tr>' . $this->deletedLine($line,true) . $this->addedLine($aline,true) . "</tr>\n");
1209f3f0262cSandi        }
121060056e69SChristopher Smith        $this->_addedLines($add,true); # If any leftovers
121160056e69SChristopher Smith    }
121260056e69SChristopher Smith
121360056e69SChristopher Smith    function _escape($str) {
121460056e69SChristopher Smith        return hsc($str);
1215f3f0262cSandi    }
1216f3f0262cSandi}
1217f3f0262cSandi
1218812bb04eSRobin Getz/**
1219812bb04eSRobin Getz *  Inline style diff formatter.
1220812bb04eSRobin Getz *
1221812bb04eSRobin Getz */
1222812bb04eSRobin Getzclass InlineDiffFormatter extends DiffFormatter {
1223f76724a4STom N Harris    var $colspan = 2;
1224988c1340SPiyush Mishra
1225988c1340SPiyush Mishra    function __construct() {
1226812bb04eSRobin Getz        $this->leading_context_lines = 2;
1227812bb04eSRobin Getz        $this->trailing_context_lines = 2;
1228812bb04eSRobin Getz    }
1229812bb04eSRobin Getz
123042ea7f44SGerrit Uitslag    /**
123142ea7f44SGerrit Uitslag     * @param Diff $diff
123242ea7f44SGerrit Uitslag     * @return string
123342ea7f44SGerrit Uitslag     */
1234812bb04eSRobin Getz    function format($diff) {
1235812bb04eSRobin Getz        // Preserve whitespaces by converting some to non-breaking spaces.
1236812bb04eSRobin Getz        // Do not convert all of them to allow word-wrap.
1237812bb04eSRobin Getz        $val = parent::format($diff);
1238e260f93bSAnika Henke        $val = str_replace('  ','&#160; ', $val);
1239e260f93bSAnika Henke        $val = preg_replace('/ (?=<)|(?<=[ >]) /', '&#160;', $val);
1240812bb04eSRobin Getz        return $val;
1241812bb04eSRobin Getz    }
1242812bb04eSRobin Getz
1243812bb04eSRobin Getz    function _pre($text){
1244812bb04eSRobin Getz        $text = htmlspecialchars($text);
1245812bb04eSRobin Getz        return $text;
1246812bb04eSRobin Getz    }
1247812bb04eSRobin Getz
1248812bb04eSRobin Getz    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1249812bb04eSRobin Getz        global $lang;
1250812bb04eSRobin Getz        if ($xlen != 1)
1251812bb04eSRobin Getz            $xbeg .= "," . $xlen;
1252812bb04eSRobin Getz        if ($ylen != 1)
1253812bb04eSRobin Getz            $ybeg .= "," . $ylen;
12543a4ea35cSChristopher Smith        $r = '<tr><td colspan="'.$this->colspan.'" '.HTMLDiff::css('diff-blockheader').'>@@ '.$lang['line']." -$xbeg +$ybeg @@";
125547a906eaSAndreas Gohr        $r .= ' <span '.HTMLDiff::css('diff-deletedline').'><del>'.$lang['deleted'].'</del></span>';
125647a906eaSAndreas Gohr        $r .= ' <span '.HTMLDiff::css('diff-addedline').'>'.$lang['created'].'</span>';
1257812bb04eSRobin Getz        $r .= "</td></tr>\n";
1258812bb04eSRobin Getz        return $r;
1259812bb04eSRobin Getz    }
1260812bb04eSRobin Getz
1261812bb04eSRobin Getz    function _start_block($header) {
1262812bb04eSRobin Getz        print($header."\n");
1263812bb04eSRobin Getz    }
1264812bb04eSRobin Getz
1265812bb04eSRobin Getz    function _end_block() {
1266812bb04eSRobin Getz    }
1267812bb04eSRobin Getz
1268812bb04eSRobin Getz    function _lines($lines, $prefix=' ', $color="white") {
1269812bb04eSRobin Getz    }
1270812bb04eSRobin Getz
1271812bb04eSRobin Getz    function _added($lines) {
1272812bb04eSRobin Getz        foreach ($lines as $line) {
1273333ef4f3SChristopher Smith            print('<tr><td '.HTMLDiff::css('diff-lineheader').'>&#160;</td><td '.HTMLDiff::css('diff-addedline').'>'. $this->_escape($line) . "</td></tr>\n");
1274812bb04eSRobin Getz        }
1275812bb04eSRobin Getz    }
1276812bb04eSRobin Getz
1277812bb04eSRobin Getz    function _deleted($lines) {
1278812bb04eSRobin Getz        foreach ($lines as $line) {
1279333ef4f3SChristopher Smith            print('<tr><td '.HTMLDiff::css('diff-lineheader').'>&#160;</td><td '.HTMLDiff::css('diff-deletedline').'><del>' . $this->_escape($line) . "</del></td></tr>\n");
1280812bb04eSRobin Getz        }
1281812bb04eSRobin Getz    }
1282812bb04eSRobin Getz
1283812bb04eSRobin Getz    function _context($lines) {
1284812bb04eSRobin Getz        foreach ($lines as $line) {
1285333ef4f3SChristopher Smith            print('<tr><td '.HTMLDiff::css('diff-lineheader').'>&#160;</td><td '.HTMLDiff::css('diff-context').'>'. $this->_escape($line) ."</td></tr>\n");
1286812bb04eSRobin Getz        }
1287812bb04eSRobin Getz    }
1288812bb04eSRobin Getz
1289812bb04eSRobin Getz    function _changed($orig, $closing) {
129060056e69SChristopher Smith        $diff = new InlineWordLevelDiff($orig, $closing);  // this escapes the diff data
1291812bb04eSRobin Getz        $add = $diff->inline();
1292812bb04eSRobin Getz
1293812bb04eSRobin Getz        foreach ($add as $line)
1294a69506c5STom N Harris            print('<tr><td '.HTMLDiff::css('diff-lineheader').'>&#160;</td><td>'.$line."</td></tr>\n");
1295812bb04eSRobin Getz    }
129660056e69SChristopher Smith
129760056e69SChristopher Smith    function _escape($str) {
129860056e69SChristopher Smith        return hsc($str);
129960056e69SChristopher Smith    }
1300812bb04eSRobin Getz}
1301812bb04eSRobin Getz
1302a297e675SAndreas Gohr/**
1303a297e675SAndreas Gohr * A class for computing three way diffs.
1304a297e675SAndreas Gohr *
1305a297e675SAndreas Gohr * @author  Geoffrey T. Dairiki <dairiki@dairiki.org>
1306a297e675SAndreas Gohr */
1307a297e675SAndreas Gohrclass Diff3 extends Diff {
1308a297e675SAndreas Gohr
1309a297e675SAndreas Gohr    /**
1310a297e675SAndreas Gohr     * Conflict counter.
1311a297e675SAndreas Gohr     *
1312a297e675SAndreas Gohr     * @var integer
1313a297e675SAndreas Gohr     */
1314a297e675SAndreas Gohr    var $_conflictingBlocks = 0;
1315a297e675SAndreas Gohr
1316a297e675SAndreas Gohr    /**
1317a297e675SAndreas Gohr     * Computes diff between 3 sequences of strings.
1318a297e675SAndreas Gohr     *
1319a297e675SAndreas Gohr     * @param array $orig    The original lines to use.
1320a297e675SAndreas Gohr     * @param array $final1  The first version to compare to.
1321a297e675SAndreas Gohr     * @param array $final2  The second version to compare to.
1322a297e675SAndreas Gohr     */
1323a297e675SAndreas Gohr    function __construct($orig, $final1, $final2) {
1324a297e675SAndreas Gohr        $engine = new _DiffEngine();
1325a297e675SAndreas Gohr
1326a297e675SAndreas Gohr        $this->_edits = $this->_diff3($engine->diff($orig, $final1),
1327a297e675SAndreas Gohr                                      $engine->diff($orig, $final2));
1328a297e675SAndreas Gohr    }
1329a297e675SAndreas Gohr
1330a297e675SAndreas Gohr    /**
1331a297e675SAndreas Gohr     * Returns the merged lines
1332a297e675SAndreas Gohr     *
1333a297e675SAndreas Gohr     * @param string $label1  label for first version
1334a297e675SAndreas Gohr     * @param string $label2  label for second version
133534df7cb0SAndreas Gohr     * @param string $label3  separator between versions
1336a297e675SAndreas Gohr     * @return array          lines of the merged text
1337a297e675SAndreas Gohr     */
133834df7cb0SAndreas Gohr    function mergedOutput($label1='<<<<<<<',$label2='>>>>>>>',$label3='=======') {
1339a297e675SAndreas Gohr        $lines = array();
1340a297e675SAndreas Gohr        foreach ($this->_edits as $edit) {
1341a297e675SAndreas Gohr            if ($edit->isConflict()) {
1342a297e675SAndreas Gohr                /* FIXME: this should probably be moved somewhere else. */
1343a297e675SAndreas Gohr                $lines = array_merge($lines,
134434df7cb0SAndreas Gohr                                     array($label1),
1345a297e675SAndreas Gohr                                     $edit->final1,
134634df7cb0SAndreas Gohr                                     array($label3),
1347a297e675SAndreas Gohr                                     $edit->final2,
134834df7cb0SAndreas Gohr                                     array($label2));
1349a297e675SAndreas Gohr                $this->_conflictingBlocks++;
1350a297e675SAndreas Gohr            } else {
1351a297e675SAndreas Gohr                $lines = array_merge($lines, $edit->merged());
1352a297e675SAndreas Gohr            }
1353a297e675SAndreas Gohr        }
1354a297e675SAndreas Gohr
1355a297e675SAndreas Gohr        return $lines;
1356a297e675SAndreas Gohr    }
1357a297e675SAndreas Gohr
1358a297e675SAndreas Gohr    /**
1359a297e675SAndreas Gohr     * @access private
1360f50a239bSTakamura     *
1361f50a239bSTakamura     * @param array $edits1
1362f50a239bSTakamura     * @param array $edits2
1363f50a239bSTakamura     *
1364f50a239bSTakamura     * @return array
1365a297e675SAndreas Gohr     */
1366a297e675SAndreas Gohr    function _diff3($edits1, $edits2) {
1367a297e675SAndreas Gohr        $edits = array();
1368a297e675SAndreas Gohr        $bb = new _Diff3_BlockBuilder();
1369a297e675SAndreas Gohr
1370a297e675SAndreas Gohr        $e1 = current($edits1);
1371a297e675SAndreas Gohr        $e2 = current($edits2);
1372a297e675SAndreas Gohr        while ($e1 || $e2) {
1373a297e675SAndreas Gohr            if ($e1 && $e2 && is_a($e1, '_DiffOp_copy') && is_a($e2, '_DiffOp_copy')) {
1374a297e675SAndreas Gohr                /* We have copy blocks from both diffs. This is the (only)
1375a297e675SAndreas Gohr                 * time we want to emit a diff3 copy block.  Flush current
1376a297e675SAndreas Gohr                 * diff3 diff block, if any. */
1377a297e675SAndreas Gohr                if ($edit = $bb->finish()) {
1378a297e675SAndreas Gohr                    $edits[] = $edit;
1379a297e675SAndreas Gohr                }
1380a297e675SAndreas Gohr
1381a297e675SAndreas Gohr                $ncopy = min($e1->norig(), $e2->norig());
1382a297e675SAndreas Gohr                assert($ncopy > 0);
1383a297e675SAndreas Gohr                $edits[] = new _Diff3_Op_copy(array_slice($e1->orig, 0, $ncopy));
1384a297e675SAndreas Gohr
1385a297e675SAndreas Gohr                if ($e1->norig() > $ncopy) {
1386a297e675SAndreas Gohr                    array_splice($e1->orig, 0, $ncopy);
1387a297e675SAndreas Gohr                    array_splice($e1->closing, 0, $ncopy);
1388a297e675SAndreas Gohr                } else {
1389a297e675SAndreas Gohr                    $e1 = next($edits1);
1390a297e675SAndreas Gohr                }
1391a297e675SAndreas Gohr
1392a297e675SAndreas Gohr                if ($e2->norig() > $ncopy) {
1393a297e675SAndreas Gohr                    array_splice($e2->orig, 0, $ncopy);
1394a297e675SAndreas Gohr                    array_splice($e2->closing, 0, $ncopy);
1395a297e675SAndreas Gohr                } else {
1396a297e675SAndreas Gohr                    $e2 = next($edits2);
1397a297e675SAndreas Gohr                }
1398a297e675SAndreas Gohr            } else {
1399a297e675SAndreas Gohr                if ($e1 && $e2) {
1400a297e675SAndreas Gohr                    if ($e1->orig && $e2->orig) {
1401a297e675SAndreas Gohr                        $norig = min($e1->norig(), $e2->norig());
1402a297e675SAndreas Gohr                        $orig = array_splice($e1->orig, 0, $norig);
1403a297e675SAndreas Gohr                        array_splice($e2->orig, 0, $norig);
1404a297e675SAndreas Gohr                        $bb->input($orig);
1405a297e675SAndreas Gohr                    }
1406a297e675SAndreas Gohr
1407a297e675SAndreas Gohr                    if (is_a($e1, '_DiffOp_copy')) {
1408a297e675SAndreas Gohr                        $bb->out1(array_splice($e1->closing, 0, $norig));
1409a297e675SAndreas Gohr                    }
1410a297e675SAndreas Gohr
1411a297e675SAndreas Gohr                    if (is_a($e2, '_DiffOp_copy')) {
1412a297e675SAndreas Gohr                        $bb->out2(array_splice($e2->closing, 0, $norig));
1413a297e675SAndreas Gohr                    }
1414a297e675SAndreas Gohr                }
1415a297e675SAndreas Gohr
1416a297e675SAndreas Gohr                if ($e1 && ! $e1->orig) {
1417a297e675SAndreas Gohr                    $bb->out1($e1->closing);
1418a297e675SAndreas Gohr                    $e1 = next($edits1);
1419a297e675SAndreas Gohr                }
1420a297e675SAndreas Gohr                if ($e2 && ! $e2->orig) {
1421a297e675SAndreas Gohr                    $bb->out2($e2->closing);
1422a297e675SAndreas Gohr                    $e2 = next($edits2);
1423a297e675SAndreas Gohr                }
1424a297e675SAndreas Gohr            }
1425a297e675SAndreas Gohr        }
1426a297e675SAndreas Gohr
1427a297e675SAndreas Gohr        if ($edit = $bb->finish()) {
1428a297e675SAndreas Gohr            $edits[] = $edit;
1429a297e675SAndreas Gohr        }
1430a297e675SAndreas Gohr
1431a297e675SAndreas Gohr        return $edits;
1432a297e675SAndreas Gohr    }
1433a297e675SAndreas Gohr}
1434a297e675SAndreas Gohr
1435a297e675SAndreas Gohr/**
1436a297e675SAndreas Gohr * @author  Geoffrey T. Dairiki <dairiki@dairiki.org>
1437a297e675SAndreas Gohr *
1438a297e675SAndreas Gohr * @access private
1439a297e675SAndreas Gohr */
1440a297e675SAndreas Gohrclass _Diff3_Op {
1441a297e675SAndreas Gohr
1442a297e675SAndreas Gohr    function __construct($orig = false, $final1 = false, $final2 = false) {
1443a297e675SAndreas Gohr        $this->orig = $orig ? $orig : array();
1444a297e675SAndreas Gohr        $this->final1 = $final1 ? $final1 : array();
1445a297e675SAndreas Gohr        $this->final2 = $final2 ? $final2 : array();
1446a297e675SAndreas Gohr    }
1447a297e675SAndreas Gohr
1448a297e675SAndreas Gohr    function merged() {
1449a297e675SAndreas Gohr        if (!isset($this->_merged)) {
1450a297e675SAndreas Gohr            if ($this->final1 === $this->final2) {
1451a297e675SAndreas Gohr                $this->_merged = &$this->final1;
1452a297e675SAndreas Gohr            } elseif ($this->final1 === $this->orig) {
1453a297e675SAndreas Gohr                $this->_merged = &$this->final2;
1454a297e675SAndreas Gohr            } elseif ($this->final2 === $this->orig) {
1455a297e675SAndreas Gohr                $this->_merged = &$this->final1;
1456a297e675SAndreas Gohr            } else {
1457a297e675SAndreas Gohr                $this->_merged = false;
1458a297e675SAndreas Gohr            }
1459a297e675SAndreas Gohr        }
1460a297e675SAndreas Gohr
1461a297e675SAndreas Gohr        return $this->_merged;
1462a297e675SAndreas Gohr    }
1463a297e675SAndreas Gohr
1464a297e675SAndreas Gohr    function isConflict() {
1465a297e675SAndreas Gohr        return $this->merged() === false;
1466a297e675SAndreas Gohr    }
1467a297e675SAndreas Gohr
1468a297e675SAndreas Gohr}
1469a297e675SAndreas Gohr
1470a297e675SAndreas Gohr/**
1471a297e675SAndreas Gohr * @author  Geoffrey T. Dairiki <dairiki@dairiki.org>
1472a297e675SAndreas Gohr *
1473a297e675SAndreas Gohr * @access private
1474a297e675SAndreas Gohr */
1475a297e675SAndreas Gohrclass _Diff3_Op_copy extends _Diff3_Op {
1476a297e675SAndreas Gohr
1477a297e675SAndreas Gohr    function __construct($lines = false) {
1478a297e675SAndreas Gohr        $this->orig = $lines ? $lines : array();
1479a297e675SAndreas Gohr        $this->final1 = &$this->orig;
1480a297e675SAndreas Gohr        $this->final2 = &$this->orig;
1481a297e675SAndreas Gohr    }
1482a297e675SAndreas Gohr
1483a297e675SAndreas Gohr    function merged() {
1484a297e675SAndreas Gohr        return $this->orig;
1485a297e675SAndreas Gohr    }
1486a297e675SAndreas Gohr
1487a297e675SAndreas Gohr    function isConflict() {
1488a297e675SAndreas Gohr        return false;
1489a297e675SAndreas Gohr    }
1490a297e675SAndreas Gohr}
1491a297e675SAndreas Gohr
1492a297e675SAndreas Gohr/**
1493a297e675SAndreas Gohr * @author  Geoffrey T. Dairiki <dairiki@dairiki.org>
1494a297e675SAndreas Gohr *
1495a297e675SAndreas Gohr * @access private
1496a297e675SAndreas Gohr */
1497a297e675SAndreas Gohrclass _Diff3_BlockBuilder {
1498a297e675SAndreas Gohr
1499a297e675SAndreas Gohr    function __construct() {
1500a297e675SAndreas Gohr        $this->_init();
1501a297e675SAndreas Gohr    }
1502a297e675SAndreas Gohr
1503a297e675SAndreas Gohr    function input($lines) {
1504a297e675SAndreas Gohr        if ($lines) {
1505a297e675SAndreas Gohr            $this->_append($this->orig, $lines);
1506a297e675SAndreas Gohr        }
1507a297e675SAndreas Gohr    }
1508a297e675SAndreas Gohr
1509a297e675SAndreas Gohr    function out1($lines) {
1510a297e675SAndreas Gohr        if ($lines) {
1511a297e675SAndreas Gohr            $this->_append($this->final1, $lines);
1512a297e675SAndreas Gohr        }
1513a297e675SAndreas Gohr    }
1514a297e675SAndreas Gohr
1515a297e675SAndreas Gohr    function out2($lines) {
1516a297e675SAndreas Gohr        if ($lines) {
1517a297e675SAndreas Gohr            $this->_append($this->final2, $lines);
1518a297e675SAndreas Gohr        }
1519a297e675SAndreas Gohr    }
1520a297e675SAndreas Gohr
1521a297e675SAndreas Gohr    function isEmpty() {
1522a297e675SAndreas Gohr        return !$this->orig && !$this->final1 && !$this->final2;
1523a297e675SAndreas Gohr    }
1524a297e675SAndreas Gohr
1525a297e675SAndreas Gohr    function finish() {
1526a297e675SAndreas Gohr        if ($this->isEmpty()) {
1527a297e675SAndreas Gohr            return false;
1528a297e675SAndreas Gohr        } else {
1529a297e675SAndreas Gohr            $edit = new _Diff3_Op($this->orig, $this->final1, $this->final2);
1530a297e675SAndreas Gohr            $this->_init();
1531a297e675SAndreas Gohr            return $edit;
1532a297e675SAndreas Gohr        }
1533a297e675SAndreas Gohr    }
1534a297e675SAndreas Gohr
1535a297e675SAndreas Gohr    function _init() {
1536a297e675SAndreas Gohr        $this->orig = $this->final1 = $this->final2 = array();
1537a297e675SAndreas Gohr    }
1538a297e675SAndreas Gohr
1539a297e675SAndreas Gohr    function _append(&$array, $lines) {
1540a297e675SAndreas Gohr        array_splice($array, sizeof($array), 0, $lines);
1541a297e675SAndreas Gohr    }
1542a297e675SAndreas Gohr}
1543340756e4Sandi
1544e3776c06SMichael Hamann//Setup VIM: ex: et ts=4 :
1545