xref: /dokuwiki/inc/DifferenceEngine.php (revision f50a239b3b819527445d240746b09a05fb76d103)
1f3f0262cSandi<?php
215fae107Sandi/**
315fae107Sandi * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
415fae107Sandi *
515fae107Sandi * Additions by Axel Boldt for MediaWiki
615fae107Sandi *
715fae107Sandi * @copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
815fae107Sandi * @license  You may copy this code freely under the conditions of the GPL.
915fae107Sandi */
10f3f0262cSandidefine('USE_ASSERTS', function_exists('assert'));
11f3f0262cSandi
12f3f0262cSandiclass _DiffOp {
13f3f0262cSandi    var $type;
14f3f0262cSandi    var $orig;
15f3f0262cSandi    var $closing;
16f3f0262cSandi
1742ea7f44SGerrit Uitslag    /**
1842ea7f44SGerrit Uitslag     * @return _DiffOp
1942ea7f44SGerrit Uitslag     */
20f3f0262cSandi    function reverse() {
21f3f0262cSandi        trigger_error("pure virtual", E_USER_ERROR);
22f3f0262cSandi    }
23f3f0262cSandi
24f3f0262cSandi    function norig() {
25f5b78785SAndreas Gohr        return $this->orig ? count($this->orig) : 0;
26f3f0262cSandi    }
27f3f0262cSandi
28f3f0262cSandi    function nclosing() {
29f5b78785SAndreas Gohr        return $this->closing ? count($this->closing) : 0;
30f3f0262cSandi    }
31f3f0262cSandi}
32f3f0262cSandi
33f3f0262cSandiclass _DiffOp_Copy extends _DiffOp {
34f3f0262cSandi    var $type = 'copy';
35988c1340SPiyush Mishra
36988c1340SPiyush Mishra    function __construct($orig, $closing = false) {
37f3f0262cSandi        if (!is_array($closing))
38f3f0262cSandi            $closing = $orig;
39f3f0262cSandi        $this->orig = $orig;
40f3f0262cSandi        $this->closing = $closing;
41f3f0262cSandi    }
42f3f0262cSandi
43f3f0262cSandi    function reverse() {
44f3f0262cSandi        return new _DiffOp_Copy($this->closing, $this->orig);
45f3f0262cSandi    }
46f3f0262cSandi}
47f3f0262cSandi
48f3f0262cSandiclass _DiffOp_Delete extends _DiffOp {
49f3f0262cSandi    var $type = 'delete';
50988c1340SPiyush Mishra
51988c1340SPiyush Mishra    function __construct($lines) {
52f3f0262cSandi        $this->orig = $lines;
53f3f0262cSandi        $this->closing = false;
54f3f0262cSandi    }
55f3f0262cSandi
56f3f0262cSandi    function reverse() {
57f3f0262cSandi        return new _DiffOp_Add($this->orig);
58f3f0262cSandi    }
59f3f0262cSandi}
60f3f0262cSandi
61f3f0262cSandiclass _DiffOp_Add extends _DiffOp {
62f3f0262cSandi    var $type = 'add';
63988c1340SPiyush Mishra
64988c1340SPiyush Mishra    function __construct($lines) {
65f3f0262cSandi        $this->closing = $lines;
66f3f0262cSandi        $this->orig = false;
67f3f0262cSandi    }
68f3f0262cSandi
69f3f0262cSandi    function reverse() {
70f3f0262cSandi        return new _DiffOp_Delete($this->closing);
71f3f0262cSandi    }
72f3f0262cSandi}
73f3f0262cSandi
74f3f0262cSandiclass _DiffOp_Change extends _DiffOp {
75f3f0262cSandi    var $type = 'change';
76988c1340SPiyush Mishra
77988c1340SPiyush Mishra    function __construct($orig, $closing) {
78f3f0262cSandi        $this->orig = $orig;
79f3f0262cSandi        $this->closing = $closing;
80f3f0262cSandi    }
81f3f0262cSandi
82f3f0262cSandi    function reverse() {
83f3f0262cSandi        return new _DiffOp_Change($this->closing, $this->orig);
84f3f0262cSandi    }
85f3f0262cSandi}
86f3f0262cSandi
87f3f0262cSandi
88f3f0262cSandi/**
89f3f0262cSandi * Class used internally by Diff to actually compute the diffs.
90f3f0262cSandi *
91f3f0262cSandi * The algorithm used here is mostly lifted from the perl module
92f3f0262cSandi * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
93f3f0262cSandi *   http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
94f3f0262cSandi *
95f3f0262cSandi * More ideas are taken from:
96f3f0262cSandi *   http://www.ics.uci.edu/~eppstein/161/960229.html
97f3f0262cSandi *
98f3f0262cSandi * Some ideas are (and a bit of code) are from from analyze.c, from GNU
99f3f0262cSandi * diffutils-2.7, which can be found at:
100f3f0262cSandi *   ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
101f3f0262cSandi *
102f3f0262cSandi * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
103f3f0262cSandi * are my own.
104f3f0262cSandi *
105f3f0262cSandi * @author Geoffrey T. Dairiki
106f3f0262cSandi * @access private
107f3f0262cSandi */
108f5b78785SAndreas Gohrclass _DiffEngine {
109f5b78785SAndreas Gohr
11042ea7f44SGerrit Uitslag    var $xchanged = array();
11142ea7f44SGerrit Uitslag    var $ychanged = array();
11242ea7f44SGerrit Uitslag    var $xv = array();
11342ea7f44SGerrit Uitslag    var $yv = array();
11442ea7f44SGerrit Uitslag    var $xind = array();
11542ea7f44SGerrit Uitslag    var $yind = array();
11642ea7f44SGerrit Uitslag    var $seq;
11742ea7f44SGerrit Uitslag    var $in_seq;
11842ea7f44SGerrit Uitslag    var $lcs;
11942ea7f44SGerrit Uitslag
12042ea7f44SGerrit Uitslag    /**
12142ea7f44SGerrit Uitslag     * @param array $from_lines
12242ea7f44SGerrit Uitslag     * @param array $to_lines
12342ea7f44SGerrit Uitslag     * @return _DiffOp[]
12442ea7f44SGerrit Uitslag     */
125f3f0262cSandi    function diff($from_lines, $to_lines) {
126f5b78785SAndreas Gohr        $n_from = count($from_lines);
127f5b78785SAndreas Gohr        $n_to = count($to_lines);
128f3f0262cSandi
129f3f0262cSandi        $this->xchanged = $this->ychanged = array();
130f3f0262cSandi        $this->xv = $this->yv = array();
131f3f0262cSandi        $this->xind = $this->yind = array();
132f3f0262cSandi        unset($this->seq);
133f3f0262cSandi        unset($this->in_seq);
134f3f0262cSandi        unset($this->lcs);
135f3f0262cSandi
136f3f0262cSandi        // Skip leading common lines.
137f3f0262cSandi        for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
138f3f0262cSandi            if ($from_lines[$skip] != $to_lines[$skip])
139f3f0262cSandi                break;
140f3f0262cSandi            $this->xchanged[$skip] = $this->ychanged[$skip] = false;
141f3f0262cSandi        }
142f3f0262cSandi        // Skip trailing common lines.
143f5b78785SAndreas Gohr        $xi = $n_from;
144f5b78785SAndreas Gohr        $yi = $n_to;
145f3f0262cSandi        for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
146f3f0262cSandi            if ($from_lines[$xi] != $to_lines[$yi])
147f3f0262cSandi                break;
148f3f0262cSandi            $this->xchanged[$xi] = $this->ychanged[$yi] = false;
149f3f0262cSandi        }
150f3f0262cSandi
151f3f0262cSandi        // Ignore lines which do not exist in both files.
152f3f0262cSandi        for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
153f3f0262cSandi            $xhash[$from_lines[$xi]] = 1;
154f3f0262cSandi        for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
155f3f0262cSandi            $line = $to_lines[$yi];
156f3f0262cSandi            if (($this->ychanged[$yi] = empty($xhash[$line])))
157f3f0262cSandi                continue;
158f3f0262cSandi            $yhash[$line] = 1;
159f3f0262cSandi            $this->yv[] = $line;
160f3f0262cSandi            $this->yind[] = $yi;
161f3f0262cSandi        }
162f3f0262cSandi        for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
163f3f0262cSandi            $line = $from_lines[$xi];
164f3f0262cSandi            if (($this->xchanged[$xi] = empty($yhash[$line])))
165f3f0262cSandi                continue;
166f3f0262cSandi            $this->xv[] = $line;
167f3f0262cSandi            $this->xind[] = $xi;
168f3f0262cSandi        }
169f3f0262cSandi
170f3f0262cSandi        // Find the LCS.
171f5b78785SAndreas Gohr        $this->_compareseq(0, count($this->xv), 0, count($this->yv));
172f3f0262cSandi
173f3f0262cSandi        // Merge edits when possible
174f3f0262cSandi        $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
175f3f0262cSandi        $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
176f3f0262cSandi
177f3f0262cSandi        // Compute the edit operations.
178f3f0262cSandi        $edits = array();
179f3f0262cSandi        $xi = $yi = 0;
180f3f0262cSandi        while ($xi < $n_from || $yi < $n_to) {
181f3f0262cSandi            USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
182f3f0262cSandi            USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
183f3f0262cSandi
184f3f0262cSandi            // Skip matching "snake".
185f3f0262cSandi            $copy = array();
1867deca91bSRobin Getz            while ($xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
187f3f0262cSandi                $copy[] = $from_lines[$xi++];
188f3f0262cSandi                ++$yi;
189f3f0262cSandi            }
190f3f0262cSandi            if ($copy)
191f3f0262cSandi                $edits[] = new _DiffOp_Copy($copy);
192f3f0262cSandi
193f3f0262cSandi            // Find deletes & adds.
194f3f0262cSandi            $delete = array();
195f3f0262cSandi            while ($xi < $n_from && $this->xchanged[$xi])
196f3f0262cSandi                $delete[] = $from_lines[$xi++];
197f3f0262cSandi
198f3f0262cSandi            $add = array();
199f3f0262cSandi            while ($yi < $n_to && $this->ychanged[$yi])
200f3f0262cSandi                $add[] = $to_lines[$yi++];
201f3f0262cSandi
202f3f0262cSandi            if ($delete && $add)
203f3f0262cSandi                $edits[] = new _DiffOp_Change($delete, $add);
204f3f0262cSandi            elseif ($delete)
205f3f0262cSandi                $edits[] = new _DiffOp_Delete($delete);
206f3f0262cSandi            elseif ($add)
207f3f0262cSandi                $edits[] = new _DiffOp_Add($add);
208f3f0262cSandi        }
209f3f0262cSandi        return $edits;
210f3f0262cSandi    }
211f3f0262cSandi
212f3f0262cSandi
21315fae107Sandi    /**
21415fae107Sandi     * Divide the Largest Common Subsequence (LCS) of the sequences
215f3f0262cSandi     * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
216f3f0262cSandi     * sized segments.
217f3f0262cSandi     *
218f3f0262cSandi     * Returns (LCS, PTS).  LCS is the length of the LCS. PTS is an
219f3f0262cSandi     * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
220f3f0262cSandi     * sub sequences.  The first sub-sequence is contained in [X0, X1),
221f3f0262cSandi     * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
222f3f0262cSandi     * that (X0, Y0) == (XOFF, YOFF) and
223f3f0262cSandi     * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
224f3f0262cSandi     *
225f3f0262cSandi     * This function assumes that the first lines of the specified portions
226f3f0262cSandi     * of the two files do not match, and likewise that the last lines do not
227f3f0262cSandi     * match.  The caller must trim matching lines from the beginning and end
228f3f0262cSandi     * of the portions it is going to specify.
229*f50a239bSTakamura     *
230*f50a239bSTakamura     * @param integer $xoff
231*f50a239bSTakamura     * @param integer $xlim
232*f50a239bSTakamura     * @param integer $yoff
233*f50a239bSTakamura     * @param integer $ylim
234*f50a239bSTakamura     * @param integer $nchunks
235*f50a239bSTakamura     *
236*f50a239bSTakamura     * @return array
237f3f0262cSandi     */
238f3f0262cSandi    function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) {
239f3f0262cSandi        $flip = false;
240f3f0262cSandi
241f3f0262cSandi        if ($xlim - $xoff > $ylim - $yoff) {
242f3f0262cSandi            // Things seems faster (I'm not sure I understand why)
243f3f0262cSandi            // when the shortest sequence in X.
244f3f0262cSandi            $flip = true;
2457deca91bSRobin Getz            list ($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
246f3f0262cSandi        }
247f3f0262cSandi
248f3f0262cSandi        if ($flip)
249f3f0262cSandi            for ($i = $ylim - 1; $i >= $yoff; $i--)
250f3f0262cSandi                $ymatches[$this->xv[$i]][] = $i;
251f3f0262cSandi        else
252f3f0262cSandi            for ($i = $ylim - 1; $i >= $yoff; $i--)
253f3f0262cSandi                $ymatches[$this->yv[$i]][] = $i;
254f3f0262cSandi
255f3f0262cSandi        $this->lcs = 0;
256f3f0262cSandi        $this->seq[0]= $yoff - 1;
257f3f0262cSandi        $this->in_seq = array();
258f3f0262cSandi        $ymids[0] = array();
259f3f0262cSandi
260f3f0262cSandi        $numer = $xlim - $xoff + $nchunks - 1;
261f3f0262cSandi        $x = $xoff;
262f3f0262cSandi        for ($chunk = 0; $chunk < $nchunks; $chunk++) {
263f3f0262cSandi            if ($chunk > 0)
264f3f0262cSandi                for ($i = 0; $i <= $this->lcs; $i++)
265f3f0262cSandi                    $ymids[$i][$chunk-1] = $this->seq[$i];
266f3f0262cSandi
267f3f0262cSandi            $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
268f3f0262cSandi            for ( ; $x < $x1; $x++) {
269f3f0262cSandi                $line = $flip ? $this->yv[$x] : $this->xv[$x];
270f3f0262cSandi                if (empty($ymatches[$line]))
271f3f0262cSandi                    continue;
272f3f0262cSandi                $matches = $ymatches[$line];
273f3f0262cSandi                reset($matches);
274f3f0262cSandi                while (list ($junk, $y) = each($matches))
275f3f0262cSandi                    if (empty($this->in_seq[$y])) {
276f3f0262cSandi                        $k = $this->_lcs_pos($y);
277f3f0262cSandi                        USE_ASSERTS && assert($k > 0);
278f3f0262cSandi                        $ymids[$k] = $ymids[$k-1];
279f3f0262cSandi                        break;
280f3f0262cSandi                    }
281f3f0262cSandi                while (list ($junk, $y) = each($matches)) {
282f3f0262cSandi                    if ($y > $this->seq[$k-1]) {
283f3f0262cSandi                        USE_ASSERTS && assert($y < $this->seq[$k]);
284f3f0262cSandi                        // Optimization: this is a common case:
285f3f0262cSandi                        //  next match is just replacing previous match.
286f3f0262cSandi                        $this->in_seq[$this->seq[$k]] = false;
287f3f0262cSandi                        $this->seq[$k] = $y;
288f3f0262cSandi                        $this->in_seq[$y] = 1;
289f3f0262cSandi                    }
290f3f0262cSandi                    else if (empty($this->in_seq[$y])) {
291f3f0262cSandi                        $k = $this->_lcs_pos($y);
292f3f0262cSandi                        USE_ASSERTS && assert($k > 0);
293f3f0262cSandi                        $ymids[$k] = $ymids[$k-1];
294f3f0262cSandi                    }
295f3f0262cSandi                }
296f3f0262cSandi            }
297f3f0262cSandi        }
298f3f0262cSandi
299f3f0262cSandi        $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
300f3f0262cSandi        $ymid = $ymids[$this->lcs];
301f3f0262cSandi        for ($n = 0; $n < $nchunks - 1; $n++) {
302f3f0262cSandi            $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
303f3f0262cSandi            $y1 = $ymid[$n] + 1;
304f3f0262cSandi            $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
305f3f0262cSandi        }
306f3f0262cSandi        $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
307f3f0262cSandi
308f3f0262cSandi        return array($this->lcs, $seps);
309f3f0262cSandi    }
310f3f0262cSandi
311f3f0262cSandi    function _lcs_pos($ypos) {
312f3f0262cSandi        $end = $this->lcs;
313f3f0262cSandi        if ($end == 0 || $ypos > $this->seq[$end]) {
314f3f0262cSandi            $this->seq[++$this->lcs] = $ypos;
315f3f0262cSandi            $this->in_seq[$ypos] = 1;
316f3f0262cSandi            return $this->lcs;
317f3f0262cSandi        }
318f3f0262cSandi
319f3f0262cSandi        $beg = 1;
320f3f0262cSandi        while ($beg < $end) {
321f3f0262cSandi            $mid = (int)(($beg + $end) / 2);
322f3f0262cSandi            if ($ypos > $this->seq[$mid])
323f3f0262cSandi                $beg = $mid + 1;
324f3f0262cSandi            else
325f3f0262cSandi                $end = $mid;
326f3f0262cSandi        }
327f3f0262cSandi
328f3f0262cSandi        USE_ASSERTS && assert($ypos != $this->seq[$end]);
329f3f0262cSandi
330f3f0262cSandi        $this->in_seq[$this->seq[$end]] = false;
331f3f0262cSandi        $this->seq[$end] = $ypos;
332f3f0262cSandi        $this->in_seq[$ypos] = 1;
333f3f0262cSandi        return $end;
334f3f0262cSandi    }
335f3f0262cSandi
33615fae107Sandi    /**
33715fae107Sandi     * Find LCS of two sequences.
338f3f0262cSandi     *
339f3f0262cSandi     * The results are recorded in the vectors $this->{x,y}changed[], by
340f3f0262cSandi     * storing a 1 in the element for each line that is an insertion
341f3f0262cSandi     * or deletion (ie. is not in the LCS).
342f3f0262cSandi     *
343f3f0262cSandi     * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
344f3f0262cSandi     *
345f3f0262cSandi     * Note that XLIM, YLIM are exclusive bounds.
346f3f0262cSandi     * All line numbers are origin-0 and discarded lines are not counted.
347*f50a239bSTakamura     *
348*f50a239bSTakamura     * @param integer $xoff
349*f50a239bSTakamura     * @param integer $xlim
350*f50a239bSTakamura     * @param integer $yoff
351*f50a239bSTakamura     * @param integer $ylim
352f3f0262cSandi     */
353f3f0262cSandi    function _compareseq($xoff, $xlim, $yoff, $ylim) {
354f3f0262cSandi        // Slide down the bottom initial diagonal.
3557deca91bSRobin Getz        while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) {
356f3f0262cSandi            ++$xoff;
357f3f0262cSandi            ++$yoff;
358f3f0262cSandi        }
359f3f0262cSandi
360f3f0262cSandi        // Slide up the top initial diagonal.
3617deca91bSRobin Getz        while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
362f3f0262cSandi            --$xlim;
363f3f0262cSandi            --$ylim;
364f3f0262cSandi        }
365f3f0262cSandi
366f3f0262cSandi        if ($xoff == $xlim || $yoff == $ylim)
367f3f0262cSandi            $lcs = 0;
368f3f0262cSandi        else {
369f3f0262cSandi            // This is ad hoc but seems to work well.
370f3f0262cSandi            //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
371f3f0262cSandi            //$nchunks = max(2,min(8,(int)$nchunks));
372f3f0262cSandi            $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
373f3f0262cSandi            list ($lcs, $seps)
374f3f0262cSandi                = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
375f3f0262cSandi        }
376f3f0262cSandi
377f3f0262cSandi        if ($lcs == 0) {
378f3f0262cSandi            // X and Y sequences have no common subsequence:
379f3f0262cSandi            // mark all changed.
380f3f0262cSandi            while ($yoff < $ylim)
381f3f0262cSandi                $this->ychanged[$this->yind[$yoff++]] = 1;
382f3f0262cSandi            while ($xoff < $xlim)
383f3f0262cSandi                $this->xchanged[$this->xind[$xoff++]] = 1;
384f3f0262cSandi        }
385f3f0262cSandi        else {
386f3f0262cSandi            // Use the partitions to split this problem into subproblems.
387f3f0262cSandi            reset($seps);
388f3f0262cSandi            $pt1 = $seps[0];
389f3f0262cSandi            while ($pt2 = next($seps)) {
390f3f0262cSandi                $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
391f3f0262cSandi                $pt1 = $pt2;
392f3f0262cSandi            }
393f3f0262cSandi        }
394f3f0262cSandi    }
395f3f0262cSandi
39615fae107Sandi    /**
39715fae107Sandi     * Adjust inserts/deletes of identical lines to join changes
398f3f0262cSandi     * as much as possible.
399f3f0262cSandi     *
400f3f0262cSandi     * We do something when a run of changed lines include a
401f3f0262cSandi     * line at one end and has an excluded, identical line at the other.
402f3f0262cSandi     * We are free to choose which identical line is included.
403f3f0262cSandi     * `compareseq' usually chooses the one at the beginning,
404f3f0262cSandi     * but usually it is cleaner to consider the following identical line
405f3f0262cSandi     * to be the "change".
406f3f0262cSandi     *
407f3f0262cSandi     * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
408*f50a239bSTakamura     *
409*f50a239bSTakamura     * @param array $lines
410*f50a239bSTakamura     * @param array $changed
411*f50a239bSTakamura     * @param array $other_changed
412f3f0262cSandi     */
413f3f0262cSandi    function _shift_boundaries($lines, &$changed, $other_changed) {
414f3f0262cSandi        $i = 0;
415f3f0262cSandi        $j = 0;
416f3f0262cSandi
417f5b78785SAndreas Gohr        USE_ASSERTS && assert('count($lines) == count($changed)');
418f5b78785SAndreas Gohr        $len = count($lines);
419f5b78785SAndreas Gohr        $other_len = count($other_changed);
420f3f0262cSandi
421f3f0262cSandi        while (1) {
422f3f0262cSandi            /*
423f3f0262cSandi             * Scan forwards to find beginning of another run of changes.
424f3f0262cSandi             * Also keep track of the corresponding point in the other file.
425f3f0262cSandi             *
426f3f0262cSandi             * Throughout this code, $i and $j are adjusted together so that
427f3f0262cSandi             * the first $i elements of $changed and the first $j elements
428f3f0262cSandi             * of $other_changed both contain the same number of zeros
429f3f0262cSandi             * (unchanged lines).
430f3f0262cSandi             * Furthermore, $j is always kept so that $j == $other_len or
431f3f0262cSandi             * $other_changed[$j] == false.
432f3f0262cSandi             */
433f3f0262cSandi            while ($j < $other_len && $other_changed[$j])
434f3f0262cSandi                $j++;
435f3f0262cSandi
436f3f0262cSandi            while ($i < $len && ! $changed[$i]) {
437f3f0262cSandi                USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
438f5b78785SAndreas Gohr                $i++;
439f5b78785SAndreas Gohr                $j++;
440f3f0262cSandi                while ($j < $other_len && $other_changed[$j])
441f3f0262cSandi                    $j++;
442f3f0262cSandi            }
443f3f0262cSandi
444f3f0262cSandi            if ($i == $len)
445f3f0262cSandi                break;
446f3f0262cSandi
447f3f0262cSandi            $start = $i;
448f3f0262cSandi
449f3f0262cSandi            // Find the end of this run of changes.
450f3f0262cSandi            while (++$i < $len && $changed[$i])
451f3f0262cSandi                continue;
452f3f0262cSandi
453f3f0262cSandi            do {
454f3f0262cSandi                /*
455f3f0262cSandi                 * Record the length of this run of changes, so that
456f3f0262cSandi                 * we can later determine whether the run has grown.
457f3f0262cSandi                 */
458f3f0262cSandi                $runlength = $i - $start;
459f3f0262cSandi
460f3f0262cSandi                /*
461f3f0262cSandi                 * Move the changed region back, so long as the
462f3f0262cSandi                 * previous unchanged line matches the last changed one.
463f3f0262cSandi                 * This merges with previous changed regions.
464f3f0262cSandi                 */
465f3f0262cSandi                while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
466f3f0262cSandi                    $changed[--$start] = 1;
467f3f0262cSandi                    $changed[--$i] = false;
468f3f0262cSandi                    while ($start > 0 && $changed[$start - 1])
469f3f0262cSandi                        $start--;
470f3f0262cSandi                    USE_ASSERTS && assert('$j > 0');
471f3f0262cSandi                    while ($other_changed[--$j])
472f3f0262cSandi                        continue;
473f3f0262cSandi                    USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
474f3f0262cSandi                }
475f3f0262cSandi
476f3f0262cSandi                /*
477f3f0262cSandi                 * Set CORRESPONDING to the end of the changed run, at the last
478f3f0262cSandi                 * point where it corresponds to a changed run in the other file.
479f3f0262cSandi                 * CORRESPONDING == LEN means no such point has been found.
480f3f0262cSandi                 */
481f3f0262cSandi                $corresponding = $j < $other_len ? $i : $len;
482f3f0262cSandi
483f3f0262cSandi                /*
484f3f0262cSandi                 * Move the changed region forward, so long as the
485f3f0262cSandi                 * first changed line matches the following unchanged one.
486f3f0262cSandi                 * This merges with following changed regions.
487f3f0262cSandi                 * Do this second, so that if there are no merges,
488f3f0262cSandi                 * the changed region is moved forward as far as possible.
489f3f0262cSandi                 */
490f3f0262cSandi                while ($i < $len && $lines[$start] == $lines[$i]) {
491f3f0262cSandi                    $changed[$start++] = false;
492f3f0262cSandi                    $changed[$i++] = 1;
493f3f0262cSandi                    while ($i < $len && $changed[$i])
494f3f0262cSandi                        $i++;
495f3f0262cSandi
496f3f0262cSandi                    USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
497f3f0262cSandi                    $j++;
498f3f0262cSandi                    if ($j < $other_len && $other_changed[$j]) {
499f3f0262cSandi                        $corresponding = $i;
500f3f0262cSandi                        while ($j < $other_len && $other_changed[$j])
501f3f0262cSandi                            $j++;
502f3f0262cSandi                    }
503f3f0262cSandi                }
504f3f0262cSandi            } while ($runlength != $i - $start);
505f3f0262cSandi
506f3f0262cSandi            /*
507f3f0262cSandi             * If possible, move the fully-merged run of changes
508f3f0262cSandi             * back to a corresponding run in the other file.
509f3f0262cSandi             */
510f3f0262cSandi            while ($corresponding < $i) {
511f3f0262cSandi                $changed[--$start] = 1;
512f3f0262cSandi                $changed[--$i] = 0;
513f3f0262cSandi                USE_ASSERTS && assert('$j > 0');
514f3f0262cSandi                while ($other_changed[--$j])
515f3f0262cSandi                    continue;
516f3f0262cSandi                USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
517f3f0262cSandi            }
518f3f0262cSandi        }
519f3f0262cSandi    }
520f3f0262cSandi}
521f3f0262cSandi
522f3f0262cSandi/**
523f3f0262cSandi * Class representing a 'diff' between two sequences of strings.
524f3f0262cSandi */
525f5b78785SAndreas Gohrclass Diff {
526f5b78785SAndreas Gohr
527f3f0262cSandi    var $edits;
528f3f0262cSandi
529f3f0262cSandi    /**
530f3f0262cSandi     * Constructor.
531f3f0262cSandi     * Computes diff between sequences of strings.
532f3f0262cSandi     *
53342ea7f44SGerrit Uitslag     * @param array $from_lines An array of strings.
534f3f0262cSandi     *                          (Typically these are lines from a file.)
53542ea7f44SGerrit Uitslag     * @param array $to_lines   An array of strings.
536f3f0262cSandi     */
537988c1340SPiyush Mishra    function __construct($from_lines, $to_lines) {
538f3f0262cSandi        $eng = new _DiffEngine;
539f3f0262cSandi        $this->edits = $eng->diff($from_lines, $to_lines);
540f3f0262cSandi        //$this->_check($from_lines, $to_lines);
541f3f0262cSandi    }
542f3f0262cSandi
543f3f0262cSandi    /**
544f3f0262cSandi     * Compute reversed Diff.
545f3f0262cSandi     *
546f3f0262cSandi     * SYNOPSIS:
547f3f0262cSandi     *
548f3f0262cSandi     *  $diff = new Diff($lines1, $lines2);
549f3f0262cSandi     *  $rev = $diff->reverse();
55042ea7f44SGerrit Uitslag     *
55142ea7f44SGerrit Uitslag     * @return Diff  A Diff object representing the inverse of the
552f3f0262cSandi     *               original diff.
553f3f0262cSandi     */
554f3f0262cSandi    function reverse() {
555f3f0262cSandi        $rev = $this;
556f3f0262cSandi        $rev->edits = array();
557f3f0262cSandi        foreach ($this->edits as $edit) {
558f3f0262cSandi            $rev->edits[] = $edit->reverse();
559f3f0262cSandi        }
560f3f0262cSandi        return $rev;
561f3f0262cSandi    }
562f3f0262cSandi
563f3f0262cSandi    /**
564f3f0262cSandi     * Check for empty diff.
565f3f0262cSandi     *
566f3f0262cSandi     * @return bool True iff two sequences were identical.
567f3f0262cSandi     */
568f3f0262cSandi    function isEmpty() {
569f3f0262cSandi        foreach ($this->edits as $edit) {
570f3f0262cSandi            if ($edit->type != 'copy')
571f3f0262cSandi                return false;
572f3f0262cSandi        }
573f3f0262cSandi        return true;
574f3f0262cSandi    }
575f3f0262cSandi
576f3f0262cSandi    /**
577f3f0262cSandi     * Compute the length of the Longest Common Subsequence (LCS).
578f3f0262cSandi     *
579f3f0262cSandi     * This is mostly for diagnostic purposed.
580f3f0262cSandi     *
581f3f0262cSandi     * @return int The length of the LCS.
582f3f0262cSandi     */
583f3f0262cSandi    function lcs() {
584f3f0262cSandi        $lcs = 0;
585f3f0262cSandi        foreach ($this->edits as $edit) {
586f3f0262cSandi            if ($edit->type == 'copy')
587f5b78785SAndreas Gohr                $lcs += count($edit->orig);
588f3f0262cSandi        }
589f3f0262cSandi        return $lcs;
590f3f0262cSandi    }
591f3f0262cSandi
592f3f0262cSandi    /**
593f3f0262cSandi     * Get the original set of lines.
594f3f0262cSandi     *
595f3f0262cSandi     * This reconstructs the $from_lines parameter passed to the
596f3f0262cSandi     * constructor.
597f3f0262cSandi     *
598f3f0262cSandi     * @return array The original sequence of strings.
599f3f0262cSandi     */
600f3f0262cSandi    function orig() {
601f3f0262cSandi        $lines = array();
602f3f0262cSandi
603f3f0262cSandi        foreach ($this->edits as $edit) {
604f3f0262cSandi            if ($edit->orig)
605f5b78785SAndreas Gohr                array_splice($lines, count($lines), 0, $edit->orig);
606f3f0262cSandi        }
607f3f0262cSandi        return $lines;
608f3f0262cSandi    }
609f3f0262cSandi
610f3f0262cSandi    /**
611f3f0262cSandi     * Get the closing set of lines.
612f3f0262cSandi     *
613f3f0262cSandi     * This reconstructs the $to_lines parameter passed to the
614f3f0262cSandi     * constructor.
615f3f0262cSandi     *
616f3f0262cSandi     * @return array The sequence of strings.
617f3f0262cSandi     */
618f3f0262cSandi    function closing() {
619f3f0262cSandi        $lines = array();
620f3f0262cSandi
621f3f0262cSandi        foreach ($this->edits as $edit) {
622f3f0262cSandi            if ($edit->closing)
623f5b78785SAndreas Gohr                array_splice($lines, count($lines), 0, $edit->closing);
624f3f0262cSandi        }
625f3f0262cSandi        return $lines;
626f3f0262cSandi    }
627f3f0262cSandi
628f3f0262cSandi    /**
629f3f0262cSandi     * Check a Diff for validity.
630f3f0262cSandi     *
631f3f0262cSandi     * This is here only for debugging purposes.
632*f50a239bSTakamura     *
633*f50a239bSTakamura     * @param mixed $from_lines
634*f50a239bSTakamura     * @param mixed $to_lines
635f3f0262cSandi     */
636f3f0262cSandi    function _check($from_lines, $to_lines) {
637f3f0262cSandi        if (serialize($from_lines) != serialize($this->orig()))
638f3f0262cSandi            trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
639f3f0262cSandi        if (serialize($to_lines) != serialize($this->closing()))
640f3f0262cSandi            trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
641f3f0262cSandi
642f3f0262cSandi        $rev = $this->reverse();
643f3f0262cSandi        if (serialize($to_lines) != serialize($rev->orig()))
644f3f0262cSandi            trigger_error("Reversed original doesn't match", E_USER_ERROR);
645f3f0262cSandi        if (serialize($from_lines) != serialize($rev->closing()))
646f3f0262cSandi            trigger_error("Reversed closing doesn't match", E_USER_ERROR);
647f3f0262cSandi
648f3f0262cSandi        $prevtype = 'none';
649f3f0262cSandi        foreach ($this->edits as $edit) {
650f3f0262cSandi            if ($prevtype == $edit->type)
651f3f0262cSandi                trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
652f3f0262cSandi            $prevtype = $edit->type;
653f3f0262cSandi        }
654f3f0262cSandi
655f3f0262cSandi        $lcs = $this->lcs();
656f3f0262cSandi        trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE);
657f3f0262cSandi    }
658f3f0262cSandi}
659f3f0262cSandi
660f3f0262cSandi/**
661f3f0262cSandi * FIXME: bad name.
662f3f0262cSandi */
663f5b78785SAndreas Gohrclass MappedDiff extends Diff {
664f3f0262cSandi    /**
665f3f0262cSandi     * Constructor.
666f3f0262cSandi     *
667f3f0262cSandi     * Computes diff between sequences of strings.
668f3f0262cSandi     *
669f3f0262cSandi     * This can be used to compute things like
670f3f0262cSandi     * case-insensitve diffs, or diffs which ignore
671f3f0262cSandi     * changes in white-space.
672f3f0262cSandi     *
67342ea7f44SGerrit Uitslag     * @param string[] $from_lines         An array of strings.
674f3f0262cSandi     *                                     (Typically these are lines from a file.)
675f3f0262cSandi     *
67642ea7f44SGerrit Uitslag     * @param string[] $to_lines           An array of strings.
677f3f0262cSandi     *
67842ea7f44SGerrit Uitslag     * @param string[] $mapped_from_lines  This array should
679f3f0262cSandi     *                                     have the same size number of elements as $from_lines.
680f3f0262cSandi     *                                     The elements in $mapped_from_lines and
681f3f0262cSandi     *                                     $mapped_to_lines are what is actually compared
682f3f0262cSandi     *                                     when computing the diff.
683f3f0262cSandi     *
68442ea7f44SGerrit Uitslag     * @param string[] $mapped_to_lines    This array should
685f3f0262cSandi     *                                     have the same number of elements as $to_lines.
686f3f0262cSandi     */
687988c1340SPiyush Mishra    function __construct($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) {
688f3f0262cSandi
689f5b78785SAndreas Gohr        assert(count($from_lines) == count($mapped_from_lines));
690f5b78785SAndreas Gohr        assert(count($to_lines) == count($mapped_to_lines));
691f3f0262cSandi
692988c1340SPiyush Mishra        parent::__construct($mapped_from_lines, $mapped_to_lines);
693f3f0262cSandi
694f3f0262cSandi        $xi = $yi = 0;
695f5b78785SAndreas Gohr        $ecnt = count($this->edits);
696f5b78785SAndreas Gohr        for ($i = 0; $i < $ecnt; $i++) {
697f3f0262cSandi            $orig = &$this->edits[$i]->orig;
698f3f0262cSandi            if (is_array($orig)) {
699f5b78785SAndreas Gohr                $orig = array_slice($from_lines, $xi, count($orig));
700f5b78785SAndreas Gohr                $xi += count($orig);
701f3f0262cSandi            }
702f3f0262cSandi
703f3f0262cSandi            $closing = &$this->edits[$i]->closing;
704f3f0262cSandi            if (is_array($closing)) {
705f5b78785SAndreas Gohr                $closing = array_slice($to_lines, $yi, count($closing));
706f5b78785SAndreas Gohr                $yi += count($closing);
707f3f0262cSandi            }
708f3f0262cSandi        }
709f3f0262cSandi    }
710f3f0262cSandi}
711f3f0262cSandi
712f3f0262cSandi/**
713f3f0262cSandi * A class to format Diffs
714f3f0262cSandi *
715f3f0262cSandi * This class formats the diff in classic diff format.
716f3f0262cSandi * It is intended that this class be customized via inheritance,
717f3f0262cSandi * to obtain fancier outputs.
718f3f0262cSandi */
719f5b78785SAndreas Gohrclass DiffFormatter {
720f3f0262cSandi    /**
721f3f0262cSandi     * Number of leading context "lines" to preserve.
722f3f0262cSandi     *
723f3f0262cSandi     * This should be left at zero for this class, but subclasses
724f3f0262cSandi     * may want to set this to other values.
725f3f0262cSandi     */
726f3f0262cSandi    var $leading_context_lines = 0;
727f3f0262cSandi
728f3f0262cSandi    /**
729f3f0262cSandi     * Number of trailing context "lines" to preserve.
730f3f0262cSandi     *
731f3f0262cSandi     * This should be left at zero for this class, but subclasses
732f3f0262cSandi     * may want to set this to other values.
733f3f0262cSandi     */
734f3f0262cSandi    var $trailing_context_lines = 0;
735f3f0262cSandi
736f3f0262cSandi    /**
737f3f0262cSandi     * Format a diff.
738f3f0262cSandi     *
73942ea7f44SGerrit Uitslag     * @param Diff $diff A Diff object.
740f3f0262cSandi     * @return string The formatted output.
741f3f0262cSandi     */
742f3f0262cSandi    function format($diff) {
743f3f0262cSandi
744f3f0262cSandi        $xi = $yi = 1;
74542ea7f44SGerrit Uitslag        $x0 = $y0 = 0;
746f3f0262cSandi        $block = false;
747f3f0262cSandi        $context = array();
748f3f0262cSandi
749f3f0262cSandi        $nlead = $this->leading_context_lines;
750f3f0262cSandi        $ntrail = $this->trailing_context_lines;
751f3f0262cSandi
752f3f0262cSandi        $this->_start_diff();
753f3f0262cSandi
754f3f0262cSandi        foreach ($diff->edits as $edit) {
755f3f0262cSandi            if ($edit->type == 'copy') {
756f3f0262cSandi                if (is_array($block)) {
757f5b78785SAndreas Gohr                    if (count($edit->orig) <= $nlead + $ntrail) {
758f3f0262cSandi                        $block[] = $edit;
759f3f0262cSandi                    }
760f3f0262cSandi                    else{
761f3f0262cSandi                        if ($ntrail) {
762f3f0262cSandi                            $context = array_slice($edit->orig, 0, $ntrail);
763f3f0262cSandi                            $block[] = new _DiffOp_Copy($context);
764f3f0262cSandi                        }
7657deca91bSRobin Getz                        $this->_block($x0, $ntrail + $xi - $x0, $y0, $ntrail + $yi - $y0, $block);
766f3f0262cSandi                        $block = false;
767f3f0262cSandi                    }
768f3f0262cSandi                }
769f3f0262cSandi                $context = $edit->orig;
770f3f0262cSandi            }
771f3f0262cSandi            else {
772f3f0262cSandi                if (! is_array($block)) {
773f5b78785SAndreas Gohr                    $context = array_slice($context, count($context) - $nlead);
774f5b78785SAndreas Gohr                    $x0 = $xi - count($context);
775f5b78785SAndreas Gohr                    $y0 = $yi - count($context);
776f3f0262cSandi                    $block = array();
777f3f0262cSandi                    if ($context)
778f3f0262cSandi                        $block[] = new _DiffOp_Copy($context);
779f3f0262cSandi                }
780f3f0262cSandi                $block[] = $edit;
781f3f0262cSandi            }
782f3f0262cSandi
783f3f0262cSandi            if ($edit->orig)
784f5b78785SAndreas Gohr                $xi += count($edit->orig);
785f3f0262cSandi            if ($edit->closing)
786f5b78785SAndreas Gohr                $yi += count($edit->closing);
787f3f0262cSandi        }
788f3f0262cSandi
789f3f0262cSandi        if (is_array($block))
7907deca91bSRobin Getz            $this->_block($x0, $xi - $x0, $y0, $yi - $y0, $block);
791f3f0262cSandi
792f3f0262cSandi        return $this->_end_diff();
793f3f0262cSandi    }
794f3f0262cSandi
79542ea7f44SGerrit Uitslag    /**
79642ea7f44SGerrit Uitslag     * @param int $xbeg
79742ea7f44SGerrit Uitslag     * @param int $xlen
79842ea7f44SGerrit Uitslag     * @param int $ybeg
79942ea7f44SGerrit Uitslag     * @param int $ylen
80042ea7f44SGerrit Uitslag     * @param array $edits
80142ea7f44SGerrit Uitslag     */
802f3f0262cSandi    function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
803f3f0262cSandi        $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
804f3f0262cSandi        foreach ($edits as $edit) {
805f3f0262cSandi            if ($edit->type == 'copy')
806f3f0262cSandi                $this->_context($edit->orig);
807f3f0262cSandi            elseif ($edit->type == 'add')
808f3f0262cSandi                $this->_added($edit->closing);
809f3f0262cSandi            elseif ($edit->type == 'delete')
810f3f0262cSandi                $this->_deleted($edit->orig);
811f3f0262cSandi            elseif ($edit->type == 'change')
812f3f0262cSandi                $this->_changed($edit->orig, $edit->closing);
813f3f0262cSandi            else
814f3f0262cSandi                trigger_error("Unknown edit type", E_USER_ERROR);
815f3f0262cSandi        }
816f3f0262cSandi        $this->_end_block();
817f3f0262cSandi    }
818f3f0262cSandi
819f3f0262cSandi    function _start_diff() {
820f3f0262cSandi        ob_start();
821f3f0262cSandi    }
822f3f0262cSandi
823f3f0262cSandi    function _end_diff() {
824f3f0262cSandi        $val = ob_get_contents();
825f3f0262cSandi        ob_end_clean();
826f3f0262cSandi        return $val;
827f3f0262cSandi    }
828f3f0262cSandi
82942ea7f44SGerrit Uitslag    /**
83042ea7f44SGerrit Uitslag     * @param int $xbeg
83142ea7f44SGerrit Uitslag     * @param int $xlen
83242ea7f44SGerrit Uitslag     * @param int $ybeg
83342ea7f44SGerrit Uitslag     * @param int $ylen
83442ea7f44SGerrit Uitslag     * @return string
83542ea7f44SGerrit Uitslag     */
836f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
837f3f0262cSandi        if ($xlen > 1)
838f3f0262cSandi            $xbeg .= "," . ($xbeg + $xlen - 1);
839f3f0262cSandi        if ($ylen > 1)
840f3f0262cSandi            $ybeg .= "," . ($ybeg + $ylen - 1);
841f3f0262cSandi
842f3f0262cSandi        return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
843f3f0262cSandi    }
844f3f0262cSandi
84542ea7f44SGerrit Uitslag    /**
84642ea7f44SGerrit Uitslag     * @param string $header
84742ea7f44SGerrit Uitslag     */
848f3f0262cSandi    function _start_block($header) {
849f3f0262cSandi        echo $header;
850f3f0262cSandi    }
851f3f0262cSandi
852f3f0262cSandi    function _end_block() {
853f3f0262cSandi    }
854f3f0262cSandi
855f3f0262cSandi    function _lines($lines, $prefix = ' ') {
856f3f0262cSandi        foreach ($lines as $line)
85760056e69SChristopher Smith            echo "$prefix ".$this->_escape($line)."\n";
858f3f0262cSandi    }
859f3f0262cSandi
860f3f0262cSandi    function _context($lines) {
861f3f0262cSandi        $this->_lines($lines);
862f3f0262cSandi    }
863f3f0262cSandi
864f3f0262cSandi    function _added($lines) {
865f3f0262cSandi        $this->_lines($lines, ">");
866f3f0262cSandi    }
867f3f0262cSandi    function _deleted($lines) {
868f3f0262cSandi        $this->_lines($lines, "<");
869f3f0262cSandi    }
870f3f0262cSandi
871f3f0262cSandi    function _changed($orig, $closing) {
872f3f0262cSandi        $this->_deleted($orig);
873f3f0262cSandi        echo "---\n";
874f3f0262cSandi        $this->_added($closing);
875f3f0262cSandi    }
87660056e69SChristopher Smith
877bfd197d2ShArpanet    /**
878bfd197d2ShArpanet     * Escape string
879bfd197d2ShArpanet     *
880bfd197d2ShArpanet     * Override this method within other formatters if escaping required.
881bfd197d2ShArpanet     * Base class requires $str to be returned WITHOUT escaping.
882bfd197d2ShArpanet     *
883bfd197d2ShArpanet     * @param $str string Text string to escape
884bfd197d2ShArpanet     * @return string The escaped string.
885bfd197d2ShArpanet     */
88660056e69SChristopher Smith    function _escape($str){
88760056e69SChristopher Smith        return $str;
88860056e69SChristopher Smith    }
889f3f0262cSandi}
890f3f0262cSandi
89147a906eaSAndreas Gohr/**
89247a906eaSAndreas Gohr * Utilityclass for styling HTML formatted diffs
89347a906eaSAndreas Gohr *
89447a906eaSAndreas Gohr * Depends on global var $DIFF_INLINESTYLES, if true some minimal predefined
89547a906eaSAndreas Gohr * inline styles are used. Useful for HTML mails and RSS feeds
89647a906eaSAndreas Gohr *
89747a906eaSAndreas Gohr * @author Andreas Gohr <andi@splitbrain.org>
89847a906eaSAndreas Gohr */
89947a906eaSAndreas Gohrclass HTMLDiff {
90047a906eaSAndreas Gohr    /**
90147a906eaSAndreas Gohr     * Holds the style names and basic CSS
90247a906eaSAndreas Gohr     */
90347a906eaSAndreas Gohr    static public $styles = array(
90447a906eaSAndreas Gohr            'diff-addedline'    => 'background-color: #ddffdd;',
90547a906eaSAndreas Gohr            'diff-deletedline'  => 'background-color: #ffdddd;',
90647a906eaSAndreas Gohr            'diff-context'      => 'background-color: #f5f5f5;',
90747a906eaSAndreas Gohr            'diff-mark'         => 'color: #ff0000;',
90847a906eaSAndreas Gohr        );
90947a906eaSAndreas Gohr
91047a906eaSAndreas Gohr    /**
91147a906eaSAndreas Gohr     * Return a class or style parameter
912*f50a239bSTakamura     *
913*f50a239bSTakamura     * @param string $classname
914*f50a239bSTakamura     *
915*f50a239bSTakamura     * @return string
91647a906eaSAndreas Gohr     */
91747a906eaSAndreas Gohr    static function css($classname){
91847a906eaSAndreas Gohr        global $DIFF_INLINESTYLES;
91947a906eaSAndreas Gohr
92047a906eaSAndreas Gohr        if($DIFF_INLINESTYLES){
92147a906eaSAndreas Gohr            if(!isset(self::$styles[$classname])) return '';
92247a906eaSAndreas Gohr            return 'style="'.self::$styles[$classname].'"';
92347a906eaSAndreas Gohr        }else{
92447a906eaSAndreas Gohr            return 'class="'.$classname.'"';
92547a906eaSAndreas Gohr        }
92647a906eaSAndreas Gohr    }
92747a906eaSAndreas Gohr}
928f3f0262cSandi
929f3f0262cSandi/**
930f3f0262cSandi *  Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
931f3f0262cSandi *
932f3f0262cSandi */
933f3f0262cSandi
9349b3a5b24Schrisdefine('NBSP', "\xC2\xA0");     // utf-8 non-breaking space.
935f3f0262cSandi
936f3f0262cSandiclass _HWLDF_WordAccumulator {
937988c1340SPiyush Mishra
938988c1340SPiyush Mishra    function __construct() {
939f3f0262cSandi        $this->_lines = array();
940f3f0262cSandi        $this->_line = '';
941f3f0262cSandi        $this->_group = '';
942f3f0262cSandi        $this->_tag = '';
943f3f0262cSandi    }
944f3f0262cSandi
945f3f0262cSandi    function _flushGroup($new_tag) {
946f3f0262cSandi        if ($this->_group !== '') {
947f3f0262cSandi            if ($this->_tag == 'mark')
94860056e69SChristopher Smith                $this->_line .= '<strong '.HTMLDiff::css('diff-mark').'>'.$this->_escape($this->_group).'</strong>';
949812bb04eSRobin Getz            elseif ($this->_tag == 'add')
95060056e69SChristopher Smith                $this->_line .= '<span '.HTMLDiff::css('diff-addedline').'>'.$this->_escape($this->_group).'</span>';
951812bb04eSRobin Getz            elseif ($this->_tag == 'del')
95260056e69SChristopher Smith                $this->_line .= '<span '.HTMLDiff::css('diff-deletedline').'><del>'.$this->_escape($this->_group).'</del></span>';
953f3f0262cSandi            else
95460056e69SChristopher Smith                $this->_line .= $this->_escape($this->_group);
955f3f0262cSandi        }
956f3f0262cSandi        $this->_group = '';
957f3f0262cSandi        $this->_tag = $new_tag;
958f3f0262cSandi    }
959f3f0262cSandi
96042ea7f44SGerrit Uitslag    /**
96142ea7f44SGerrit Uitslag     * @param string $new_tag
96242ea7f44SGerrit Uitslag     */
963f3f0262cSandi    function _flushLine($new_tag) {
964f3f0262cSandi        $this->_flushGroup($new_tag);
965f3f0262cSandi        if ($this->_line != '')
966f3f0262cSandi            $this->_lines[] = $this->_line;
967f3f0262cSandi        $this->_line = '';
968f3f0262cSandi    }
969f3f0262cSandi
970f3f0262cSandi    function addWords($words, $tag = '') {
971f3f0262cSandi        if ($tag != $this->_tag)
972f3f0262cSandi            $this->_flushGroup($tag);
973f3f0262cSandi
974f3f0262cSandi        foreach ($words as $word) {
975f3f0262cSandi            // new-line should only come as first char of word.
976f3f0262cSandi            if ($word == '')
977f3f0262cSandi                continue;
978f3f0262cSandi            if ($word[0] == "\n") {
979f3f0262cSandi                $this->_group .= NBSP;
980f3f0262cSandi                $this->_flushLine($tag);
981f3f0262cSandi                $word = substr($word, 1);
982f3f0262cSandi            }
983f3f0262cSandi            assert(!strstr($word, "\n"));
984f3f0262cSandi            $this->_group .= $word;
985f3f0262cSandi        }
986f3f0262cSandi    }
987f3f0262cSandi
988f3f0262cSandi    function getLines() {
989f3f0262cSandi        $this->_flushLine('~done');
990f3f0262cSandi        return $this->_lines;
991f3f0262cSandi    }
99260056e69SChristopher Smith
99360056e69SChristopher Smith    function _escape($str){
99460056e69SChristopher Smith        return hsc($str);
99560056e69SChristopher Smith    }
996f3f0262cSandi}
997f3f0262cSandi
998f5b78785SAndreas Gohrclass WordLevelDiff extends MappedDiff {
999f5b78785SAndreas Gohr
1000988c1340SPiyush Mishra    function __construct($orig_lines, $closing_lines) {
1001f3f0262cSandi        list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
1002f3f0262cSandi        list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
1003f3f0262cSandi
1004988c1340SPiyush Mishra        parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped);
1005f3f0262cSandi    }
1006f3f0262cSandi
1007f3f0262cSandi    function _split($lines) {
10085e1ee188SXin LI        if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu',
10097deca91bSRobin Getz             implode("\n", $lines), $m)) {
1010f3f0262cSandi            return array(array(''), array(''));
1011f3f0262cSandi        }
1012f3f0262cSandi        return array($m[0], $m[1]);
1013f3f0262cSandi    }
1014f3f0262cSandi
1015f3f0262cSandi    function orig() {
1016f3f0262cSandi        $orig = new _HWLDF_WordAccumulator;
1017f3f0262cSandi
1018f3f0262cSandi        foreach ($this->edits as $edit) {
1019f3f0262cSandi            if ($edit->type == 'copy')
1020f3f0262cSandi                $orig->addWords($edit->orig);
1021f3f0262cSandi            elseif ($edit->orig)
1022f3f0262cSandi                $orig->addWords($edit->orig, 'mark');
1023f3f0262cSandi        }
1024f3f0262cSandi        return $orig->getLines();
1025f3f0262cSandi    }
1026f3f0262cSandi
1027f3f0262cSandi    function closing() {
1028f3f0262cSandi        $closing = new _HWLDF_WordAccumulator;
1029f3f0262cSandi
1030f3f0262cSandi        foreach ($this->edits as $edit) {
1031f3f0262cSandi            if ($edit->type == 'copy')
1032f3f0262cSandi                $closing->addWords($edit->closing);
1033f3f0262cSandi            elseif ($edit->closing)
1034f3f0262cSandi                $closing->addWords($edit->closing, 'mark');
1035f3f0262cSandi        }
1036f3f0262cSandi        return $closing->getLines();
1037f3f0262cSandi    }
1038f3f0262cSandi}
1039f3f0262cSandi
1040812bb04eSRobin Getzclass InlineWordLevelDiff extends MappedDiff {
1041812bb04eSRobin Getz
1042988c1340SPiyush Mishra    function __construct($orig_lines, $closing_lines) {
1043812bb04eSRobin Getz        list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
1044812bb04eSRobin Getz        list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
1045812bb04eSRobin Getz
1046988c1340SPiyush Mishra        parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped);
1047812bb04eSRobin Getz    }
1048812bb04eSRobin Getz
1049812bb04eSRobin Getz    function _split($lines) {
1050c5982caaSDanny Lin        if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu',
1051812bb04eSRobin Getz             implode("\n", $lines), $m)) {
1052812bb04eSRobin Getz            return array(array(''), array(''));
1053812bb04eSRobin Getz        }
1054812bb04eSRobin Getz        return array($m[0], $m[1]);
1055812bb04eSRobin Getz    }
1056812bb04eSRobin Getz
1057812bb04eSRobin Getz    function inline() {
1058812bb04eSRobin Getz        $orig = new _HWLDF_WordAccumulator;
1059812bb04eSRobin Getz        foreach ($this->edits as $edit) {
1060812bb04eSRobin Getz            if ($edit->type == 'copy')
10614f2305cbSAdrian Lang                $orig->addWords($edit->closing);
1062812bb04eSRobin Getz            elseif ($edit->type == 'change'){
1063812bb04eSRobin Getz                $orig->addWords($edit->orig, 'del');
1064812bb04eSRobin Getz                $orig->addWords($edit->closing, 'add');
1065812bb04eSRobin Getz            } elseif ($edit->type == 'delete')
1066812bb04eSRobin Getz                $orig->addWords($edit->orig, 'del');
1067812bb04eSRobin Getz            elseif ($edit->type == 'add')
1068812bb04eSRobin Getz                $orig->addWords($edit->closing, 'add');
1069812bb04eSRobin Getz            elseif ($edit->orig)
1070812bb04eSRobin Getz                $orig->addWords($edit->orig, 'del');
1071812bb04eSRobin Getz        }
1072812bb04eSRobin Getz        return $orig->getLines();
1073812bb04eSRobin Getz    }
1074812bb04eSRobin Getz}
1075812bb04eSRobin Getz
1076f3f0262cSandi/**
1077f3f0262cSandi * "Unified" diff formatter.
1078f3f0262cSandi *
1079f3f0262cSandi * This class formats the diff in classic "unified diff" format.
1080df9752e9SChristopher Smith *
1081df9752e9SChristopher Smith * NOTE: output is plain text and unsafe for use in HTML without escaping.
1082f3f0262cSandi */
1083f5b78785SAndreas Gohrclass UnifiedDiffFormatter extends DiffFormatter {
1084f5b78785SAndreas Gohr
1085988c1340SPiyush Mishra    function __construct($context_lines = 4) {
1086f3f0262cSandi        $this->leading_context_lines = $context_lines;
1087f3f0262cSandi        $this->trailing_context_lines = $context_lines;
1088f3f0262cSandi    }
1089f3f0262cSandi
1090f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1091f3f0262cSandi        if ($xlen != 1)
1092f3f0262cSandi            $xbeg .= "," . $xlen;
1093f3f0262cSandi        if ($ylen != 1)
1094f3f0262cSandi            $ybeg .= "," . $ylen;
1095f3f0262cSandi        return "@@ -$xbeg +$ybeg @@\n";
1096f3f0262cSandi    }
1097f3f0262cSandi
1098f3f0262cSandi    function _added($lines) {
1099f3f0262cSandi        $this->_lines($lines, "+");
1100f3f0262cSandi    }
1101f3f0262cSandi    function _deleted($lines) {
1102f3f0262cSandi        $this->_lines($lines, "-");
1103f3f0262cSandi    }
1104f3f0262cSandi    function _changed($orig, $final) {
1105f3f0262cSandi        $this->_deleted($orig);
1106f3f0262cSandi        $this->_added($final);
1107f3f0262cSandi    }
1108f3f0262cSandi}
1109f3f0262cSandi
1110f3f0262cSandi/**
1111f3f0262cSandi *  Wikipedia Table style diff formatter.
1112f3f0262cSandi *
1113f3f0262cSandi */
1114f5b78785SAndreas Gohrclass TableDiffFormatter extends DiffFormatter {
11153a4ea35cSChristopher Smith    var $colspan = 2;
1116f5b78785SAndreas Gohr
1117988c1340SPiyush Mishra    function __construct() {
1118f3f0262cSandi        $this->leading_context_lines = 2;
1119f3f0262cSandi        $this->trailing_context_lines = 2;
1120f3f0262cSandi    }
1121f3f0262cSandi
112242ea7f44SGerrit Uitslag    /**
112342ea7f44SGerrit Uitslag     * @param Diff $diff
112442ea7f44SGerrit Uitslag     * @return string
112542ea7f44SGerrit Uitslag     */
11262d880650SAdrian Lang    function format($diff) {
11272d880650SAdrian Lang        // Preserve whitespaces by converting some to non-breaking spaces.
11282d880650SAdrian Lang        // Do not convert all of them to allow word-wrap.
11292d880650SAdrian Lang        $val = parent::format($diff);
1130e260f93bSAnika Henke        $val = str_replace('  ','&#160; ', $val);
1131e260f93bSAnika Henke        $val = preg_replace('/ (?=<)|(?<=[ >]) /', '&#160;', $val);
11322d880650SAdrian Lang        return $val;
11332d880650SAdrian Lang    }
11342d880650SAdrian Lang
1135f3f0262cSandi    function _pre($text){
1136f3f0262cSandi        $text = htmlspecialchars($text);
1137f3f0262cSandi        return $text;
1138f3f0262cSandi    }
1139f3f0262cSandi
1140f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1141f3f0262cSandi        global $lang;
1142f3f0262cSandi        $l1 = $lang['line'].' '.$xbeg;
1143f3f0262cSandi        $l2 = $lang['line'].' '.$ybeg;
11443a4ea35cSChristopher Smith        $r = '<tr><td '.HTMLDiff::css('diff-blockheader').' colspan="'.$this->colspan.'">'.$l1.":</td>\n".
11453a4ea35cSChristopher Smith             '<td '.HTMLDiff::css('diff-blockheader').' colspan="'.$this->colspan.'">'.$l2.":</td>\n".
11465048c277SAnika Henke             "</tr>\n";
1147f3f0262cSandi        return $r;
1148f3f0262cSandi    }
1149f3f0262cSandi
1150f3f0262cSandi    function _start_block($header) {
1151f3f0262cSandi        print($header);
1152f3f0262cSandi    }
1153f3f0262cSandi
1154f3f0262cSandi    function _end_block() {
1155f3f0262cSandi    }
1156f3f0262cSandi
1157f3f0262cSandi    function _lines($lines, $prefix=' ', $color="white") {
1158f3f0262cSandi    }
1159f3f0262cSandi
116060056e69SChristopher Smith    function addedLine($line,$escaped=false) {
116160056e69SChristopher Smith        if (!$escaped){
116260056e69SChristopher Smith            $line = $this->_escape($line);
116360056e69SChristopher Smith        }
1164f76724a4STom N Harris        return '<td '.HTMLDiff::css('diff-lineheader').'>+</td>'.
1165f76724a4STom N Harris               '<td '.HTMLDiff::css('diff-addedline').'>' .  $line.'</td>';
1166f3f0262cSandi    }
1167f3f0262cSandi
116860056e69SChristopher Smith    function deletedLine($line,$escaped=false) {
116960056e69SChristopher Smith        if (!$escaped){
117060056e69SChristopher Smith            $line = $this->_escape($line);
117160056e69SChristopher Smith        }
1172f76724a4STom N Harris        return '<td '.HTMLDiff::css('diff-lineheader').'>-</td>'.
1173f76724a4STom N Harris               '<td '.HTMLDiff::css('diff-deletedline').'>' .  $line.'</td>';
1174f3f0262cSandi    }
1175f3f0262cSandi
1176f3f0262cSandi    function emptyLine() {
11773a4ea35cSChristopher Smith        return '<td colspan="'.$this->colspan.'">&#160;</td>';
1178f3f0262cSandi    }
1179f3f0262cSandi
1180f3f0262cSandi    function contextLine($line) {
1181f76724a4STom N Harris        return '<td '.HTMLDiff::css('diff-lineheader').'>&#160;</td>'.
1182333ef4f3SChristopher Smith               '<td '.HTMLDiff::css('diff-context').'>'.$this->_escape($line).'</td>';
1183f3f0262cSandi    }
1184f3f0262cSandi
1185f3f0262cSandi    function _added($lines) {
118660056e69SChristopher Smith        $this->_addedLines($lines,false);
118760056e69SChristopher Smith    }
118860056e69SChristopher Smith
118960056e69SChristopher Smith    function _addedLines($lines,$escaped=false){
1190f3f0262cSandi        foreach ($lines as $line) {
119160056e69SChristopher Smith            print('<tr>' . $this->emptyLine() . $this->addedLine($line,$escaped) . "</tr>\n");
1192f3f0262cSandi        }
1193f3f0262cSandi    }
1194f3f0262cSandi
1195f3f0262cSandi    function _deleted($lines) {
1196f3f0262cSandi        foreach ($lines as $line) {
11977deca91bSRobin Getz            print('<tr>' . $this->deletedLine($line) . $this->emptyLine() . "</tr>\n");
1198f3f0262cSandi        }
1199f3f0262cSandi    }
1200f3f0262cSandi
1201f3f0262cSandi    function _context($lines) {
1202f3f0262cSandi        foreach ($lines as $line) {
12037deca91bSRobin Getz            print('<tr>' . $this->contextLine($line) .  $this->contextLine($line) . "</tr>\n");
1204f3f0262cSandi        }
1205f3f0262cSandi    }
1206f3f0262cSandi
1207f3f0262cSandi    function _changed($orig, $closing) {
120860056e69SChristopher Smith        $diff = new WordLevelDiff($orig, $closing);  // this escapes the diff data
1209f3f0262cSandi        $del = $diff->orig();
1210f3f0262cSandi        $add = $diff->closing();
1211f3f0262cSandi
1212f3f0262cSandi        while ($line = array_shift($del)) {
1213f3f0262cSandi            $aline = array_shift($add);
121460056e69SChristopher Smith            print('<tr>' . $this->deletedLine($line,true) . $this->addedLine($aline,true) . "</tr>\n");
1215f3f0262cSandi        }
121660056e69SChristopher Smith        $this->_addedLines($add,true); # If any leftovers
121760056e69SChristopher Smith    }
121860056e69SChristopher Smith
121960056e69SChristopher Smith    function _escape($str) {
122060056e69SChristopher Smith        return hsc($str);
1221f3f0262cSandi    }
1222f3f0262cSandi}
1223f3f0262cSandi
1224812bb04eSRobin Getz/**
1225812bb04eSRobin Getz *  Inline style diff formatter.
1226812bb04eSRobin Getz *
1227812bb04eSRobin Getz */
1228812bb04eSRobin Getzclass InlineDiffFormatter extends DiffFormatter {
1229f76724a4STom N Harris    var $colspan = 2;
1230988c1340SPiyush Mishra
1231988c1340SPiyush Mishra    function __construct() {
1232812bb04eSRobin Getz        $this->leading_context_lines = 2;
1233812bb04eSRobin Getz        $this->trailing_context_lines = 2;
1234812bb04eSRobin Getz    }
1235812bb04eSRobin Getz
123642ea7f44SGerrit Uitslag    /**
123742ea7f44SGerrit Uitslag     * @param Diff $diff
123842ea7f44SGerrit Uitslag     * @return string
123942ea7f44SGerrit Uitslag     */
1240812bb04eSRobin Getz    function format($diff) {
1241812bb04eSRobin Getz        // Preserve whitespaces by converting some to non-breaking spaces.
1242812bb04eSRobin Getz        // Do not convert all of them to allow word-wrap.
1243812bb04eSRobin Getz        $val = parent::format($diff);
1244e260f93bSAnika Henke        $val = str_replace('  ','&#160; ', $val);
1245e260f93bSAnika Henke        $val = preg_replace('/ (?=<)|(?<=[ >]) /', '&#160;', $val);
1246812bb04eSRobin Getz        return $val;
1247812bb04eSRobin Getz    }
1248812bb04eSRobin Getz
1249812bb04eSRobin Getz    function _pre($text){
1250812bb04eSRobin Getz        $text = htmlspecialchars($text);
1251812bb04eSRobin Getz        return $text;
1252812bb04eSRobin Getz    }
1253812bb04eSRobin Getz
1254812bb04eSRobin Getz    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1255812bb04eSRobin Getz        global $lang;
1256812bb04eSRobin Getz        if ($xlen != 1)
1257812bb04eSRobin Getz            $xbeg .= "," . $xlen;
1258812bb04eSRobin Getz        if ($ylen != 1)
1259812bb04eSRobin Getz            $ybeg .= "," . $ylen;
12603a4ea35cSChristopher Smith        $r = '<tr><td colspan="'.$this->colspan.'" '.HTMLDiff::css('diff-blockheader').'>@@ '.$lang['line']." -$xbeg +$ybeg @@";
126147a906eaSAndreas Gohr        $r .= ' <span '.HTMLDiff::css('diff-deletedline').'><del>'.$lang['deleted'].'</del></span>';
126247a906eaSAndreas Gohr        $r .= ' <span '.HTMLDiff::css('diff-addedline').'>'.$lang['created'].'</span>';
1263812bb04eSRobin Getz        $r .= "</td></tr>\n";
1264812bb04eSRobin Getz        return $r;
1265812bb04eSRobin Getz    }
1266812bb04eSRobin Getz
1267812bb04eSRobin Getz    function _start_block($header) {
1268812bb04eSRobin Getz        print($header."\n");
1269812bb04eSRobin Getz    }
1270812bb04eSRobin Getz
1271812bb04eSRobin Getz    function _end_block() {
1272812bb04eSRobin Getz    }
1273812bb04eSRobin Getz
1274812bb04eSRobin Getz    function _lines($lines, $prefix=' ', $color="white") {
1275812bb04eSRobin Getz    }
1276812bb04eSRobin Getz
1277812bb04eSRobin Getz    function _added($lines) {
1278812bb04eSRobin Getz        foreach ($lines as $line) {
1279333ef4f3SChristopher Smith            print('<tr><td '.HTMLDiff::css('diff-lineheader').'>&#160;</td><td '.HTMLDiff::css('diff-addedline').'>'. $this->_escape($line) . "</td></tr>\n");
1280812bb04eSRobin Getz        }
1281812bb04eSRobin Getz    }
1282812bb04eSRobin Getz
1283812bb04eSRobin Getz    function _deleted($lines) {
1284812bb04eSRobin Getz        foreach ($lines as $line) {
1285333ef4f3SChristopher Smith            print('<tr><td '.HTMLDiff::css('diff-lineheader').'>&#160;</td><td '.HTMLDiff::css('diff-deletedline').'><del>' . $this->_escape($line) . "</del></td></tr>\n");
1286812bb04eSRobin Getz        }
1287812bb04eSRobin Getz    }
1288812bb04eSRobin Getz
1289812bb04eSRobin Getz    function _context($lines) {
1290812bb04eSRobin Getz        foreach ($lines as $line) {
1291333ef4f3SChristopher Smith            print('<tr><td '.HTMLDiff::css('diff-lineheader').'>&#160;</td><td '.HTMLDiff::css('diff-context').'>'. $this->_escape($line) ."</td></tr>\n");
1292812bb04eSRobin Getz        }
1293812bb04eSRobin Getz    }
1294812bb04eSRobin Getz
1295812bb04eSRobin Getz    function _changed($orig, $closing) {
129660056e69SChristopher Smith        $diff = new InlineWordLevelDiff($orig, $closing);  // this escapes the diff data
1297812bb04eSRobin Getz        $add = $diff->inline();
1298812bb04eSRobin Getz
1299812bb04eSRobin Getz        foreach ($add as $line)
1300a69506c5STom N Harris            print('<tr><td '.HTMLDiff::css('diff-lineheader').'>&#160;</td><td>'.$line."</td></tr>\n");
1301812bb04eSRobin Getz    }
130260056e69SChristopher Smith
130360056e69SChristopher Smith    function _escape($str) {
130460056e69SChristopher Smith        return hsc($str);
130560056e69SChristopher Smith    }
1306812bb04eSRobin Getz}
1307812bb04eSRobin Getz
1308a297e675SAndreas Gohr/**
1309a297e675SAndreas Gohr * A class for computing three way diffs.
1310a297e675SAndreas Gohr *
1311a297e675SAndreas Gohr * @author  Geoffrey T. Dairiki <dairiki@dairiki.org>
1312a297e675SAndreas Gohr */
1313a297e675SAndreas Gohrclass Diff3 extends Diff {
1314a297e675SAndreas Gohr
1315a297e675SAndreas Gohr    /**
1316a297e675SAndreas Gohr     * Conflict counter.
1317a297e675SAndreas Gohr     *
1318a297e675SAndreas Gohr     * @var integer
1319a297e675SAndreas Gohr     */
1320a297e675SAndreas Gohr    var $_conflictingBlocks = 0;
1321a297e675SAndreas Gohr
1322a297e675SAndreas Gohr    /**
1323a297e675SAndreas Gohr     * Computes diff between 3 sequences of strings.
1324a297e675SAndreas Gohr     *
1325a297e675SAndreas Gohr     * @param array $orig    The original lines to use.
1326a297e675SAndreas Gohr     * @param array $final1  The first version to compare to.
1327a297e675SAndreas Gohr     * @param array $final2  The second version to compare to.
1328a297e675SAndreas Gohr     */
1329a297e675SAndreas Gohr    function __construct($orig, $final1, $final2) {
1330a297e675SAndreas Gohr        $engine = new _DiffEngine();
1331a297e675SAndreas Gohr
1332a297e675SAndreas Gohr        $this->_edits = $this->_diff3($engine->diff($orig, $final1),
1333a297e675SAndreas Gohr                                      $engine->diff($orig, $final2));
1334a297e675SAndreas Gohr    }
1335a297e675SAndreas Gohr
1336a297e675SAndreas Gohr    /**
1337a297e675SAndreas Gohr     * Returns the merged lines
1338a297e675SAndreas Gohr     *
1339a297e675SAndreas Gohr     * @param string $label1  label for first version
1340a297e675SAndreas Gohr     * @param string $label2  label for second version
134134df7cb0SAndreas Gohr     * @param string $label3  separator between versions
1342a297e675SAndreas Gohr     * @return array          lines of the merged text
1343a297e675SAndreas Gohr     */
134434df7cb0SAndreas Gohr    function mergedOutput($label1='<<<<<<<',$label2='>>>>>>>',$label3='=======') {
1345a297e675SAndreas Gohr        $lines = array();
1346a297e675SAndreas Gohr        foreach ($this->_edits as $edit) {
1347a297e675SAndreas Gohr            if ($edit->isConflict()) {
1348a297e675SAndreas Gohr                /* FIXME: this should probably be moved somewhere else. */
1349a297e675SAndreas Gohr                $lines = array_merge($lines,
135034df7cb0SAndreas Gohr                                     array($label1),
1351a297e675SAndreas Gohr                                     $edit->final1,
135234df7cb0SAndreas Gohr                                     array($label3),
1353a297e675SAndreas Gohr                                     $edit->final2,
135434df7cb0SAndreas Gohr                                     array($label2));
1355a297e675SAndreas Gohr                $this->_conflictingBlocks++;
1356a297e675SAndreas Gohr            } else {
1357a297e675SAndreas Gohr                $lines = array_merge($lines, $edit->merged());
1358a297e675SAndreas Gohr            }
1359a297e675SAndreas Gohr        }
1360a297e675SAndreas Gohr
1361a297e675SAndreas Gohr        return $lines;
1362a297e675SAndreas Gohr    }
1363a297e675SAndreas Gohr
1364a297e675SAndreas Gohr    /**
1365a297e675SAndreas Gohr     * @access private
1366*f50a239bSTakamura     *
1367*f50a239bSTakamura     * @param array $edits1
1368*f50a239bSTakamura     * @param array $edits2
1369*f50a239bSTakamura     *
1370*f50a239bSTakamura     * @return array
1371a297e675SAndreas Gohr     */
1372a297e675SAndreas Gohr    function _diff3($edits1, $edits2) {
1373a297e675SAndreas Gohr        $edits = array();
1374a297e675SAndreas Gohr        $bb = new _Diff3_BlockBuilder();
1375a297e675SAndreas Gohr
1376a297e675SAndreas Gohr        $e1 = current($edits1);
1377a297e675SAndreas Gohr        $e2 = current($edits2);
1378a297e675SAndreas Gohr        while ($e1 || $e2) {
1379a297e675SAndreas Gohr            if ($e1 && $e2 && is_a($e1, '_DiffOp_copy') && is_a($e2, '_DiffOp_copy')) {
1380a297e675SAndreas Gohr                /* We have copy blocks from both diffs. This is the (only)
1381a297e675SAndreas Gohr                 * time we want to emit a diff3 copy block.  Flush current
1382a297e675SAndreas Gohr                 * diff3 diff block, if any. */
1383a297e675SAndreas Gohr                if ($edit = $bb->finish()) {
1384a297e675SAndreas Gohr                    $edits[] = $edit;
1385a297e675SAndreas Gohr                }
1386a297e675SAndreas Gohr
1387a297e675SAndreas Gohr                $ncopy = min($e1->norig(), $e2->norig());
1388a297e675SAndreas Gohr                assert($ncopy > 0);
1389a297e675SAndreas Gohr                $edits[] = new _Diff3_Op_copy(array_slice($e1->orig, 0, $ncopy));
1390a297e675SAndreas Gohr
1391a297e675SAndreas Gohr                if ($e1->norig() > $ncopy) {
1392a297e675SAndreas Gohr                    array_splice($e1->orig, 0, $ncopy);
1393a297e675SAndreas Gohr                    array_splice($e1->closing, 0, $ncopy);
1394a297e675SAndreas Gohr                } else {
1395a297e675SAndreas Gohr                    $e1 = next($edits1);
1396a297e675SAndreas Gohr                }
1397a297e675SAndreas Gohr
1398a297e675SAndreas Gohr                if ($e2->norig() > $ncopy) {
1399a297e675SAndreas Gohr                    array_splice($e2->orig, 0, $ncopy);
1400a297e675SAndreas Gohr                    array_splice($e2->closing, 0, $ncopy);
1401a297e675SAndreas Gohr                } else {
1402a297e675SAndreas Gohr                    $e2 = next($edits2);
1403a297e675SAndreas Gohr                }
1404a297e675SAndreas Gohr            } else {
1405a297e675SAndreas Gohr                if ($e1 && $e2) {
1406a297e675SAndreas Gohr                    if ($e1->orig && $e2->orig) {
1407a297e675SAndreas Gohr                        $norig = min($e1->norig(), $e2->norig());
1408a297e675SAndreas Gohr                        $orig = array_splice($e1->orig, 0, $norig);
1409a297e675SAndreas Gohr                        array_splice($e2->orig, 0, $norig);
1410a297e675SAndreas Gohr                        $bb->input($orig);
1411a297e675SAndreas Gohr                    }
1412a297e675SAndreas Gohr
1413a297e675SAndreas Gohr                    if (is_a($e1, '_DiffOp_copy')) {
1414a297e675SAndreas Gohr                        $bb->out1(array_splice($e1->closing, 0, $norig));
1415a297e675SAndreas Gohr                    }
1416a297e675SAndreas Gohr
1417a297e675SAndreas Gohr                    if (is_a($e2, '_DiffOp_copy')) {
1418a297e675SAndreas Gohr                        $bb->out2(array_splice($e2->closing, 0, $norig));
1419a297e675SAndreas Gohr                    }
1420a297e675SAndreas Gohr                }
1421a297e675SAndreas Gohr
1422a297e675SAndreas Gohr                if ($e1 && ! $e1->orig) {
1423a297e675SAndreas Gohr                    $bb->out1($e1->closing);
1424a297e675SAndreas Gohr                    $e1 = next($edits1);
1425a297e675SAndreas Gohr                }
1426a297e675SAndreas Gohr                if ($e2 && ! $e2->orig) {
1427a297e675SAndreas Gohr                    $bb->out2($e2->closing);
1428a297e675SAndreas Gohr                    $e2 = next($edits2);
1429a297e675SAndreas Gohr                }
1430a297e675SAndreas Gohr            }
1431a297e675SAndreas Gohr        }
1432a297e675SAndreas Gohr
1433a297e675SAndreas Gohr        if ($edit = $bb->finish()) {
1434a297e675SAndreas Gohr            $edits[] = $edit;
1435a297e675SAndreas Gohr        }
1436a297e675SAndreas Gohr
1437a297e675SAndreas Gohr        return $edits;
1438a297e675SAndreas Gohr    }
1439a297e675SAndreas Gohr}
1440a297e675SAndreas Gohr
1441a297e675SAndreas Gohr/**
1442a297e675SAndreas Gohr * @author  Geoffrey T. Dairiki <dairiki@dairiki.org>
1443a297e675SAndreas Gohr *
1444a297e675SAndreas Gohr * @access private
1445a297e675SAndreas Gohr */
1446a297e675SAndreas Gohrclass _Diff3_Op {
1447a297e675SAndreas Gohr
1448a297e675SAndreas Gohr    function __construct($orig = false, $final1 = false, $final2 = false) {
1449a297e675SAndreas Gohr        $this->orig = $orig ? $orig : array();
1450a297e675SAndreas Gohr        $this->final1 = $final1 ? $final1 : array();
1451a297e675SAndreas Gohr        $this->final2 = $final2 ? $final2 : array();
1452a297e675SAndreas Gohr    }
1453a297e675SAndreas Gohr
1454a297e675SAndreas Gohr    function merged() {
1455a297e675SAndreas Gohr        if (!isset($this->_merged)) {
1456a297e675SAndreas Gohr            if ($this->final1 === $this->final2) {
1457a297e675SAndreas Gohr                $this->_merged = &$this->final1;
1458a297e675SAndreas Gohr            } elseif ($this->final1 === $this->orig) {
1459a297e675SAndreas Gohr                $this->_merged = &$this->final2;
1460a297e675SAndreas Gohr            } elseif ($this->final2 === $this->orig) {
1461a297e675SAndreas Gohr                $this->_merged = &$this->final1;
1462a297e675SAndreas Gohr            } else {
1463a297e675SAndreas Gohr                $this->_merged = false;
1464a297e675SAndreas Gohr            }
1465a297e675SAndreas Gohr        }
1466a297e675SAndreas Gohr
1467a297e675SAndreas Gohr        return $this->_merged;
1468a297e675SAndreas Gohr    }
1469a297e675SAndreas Gohr
1470a297e675SAndreas Gohr    function isConflict() {
1471a297e675SAndreas Gohr        return $this->merged() === false;
1472a297e675SAndreas Gohr    }
1473a297e675SAndreas Gohr
1474a297e675SAndreas Gohr}
1475a297e675SAndreas Gohr
1476a297e675SAndreas Gohr/**
1477a297e675SAndreas Gohr * @author  Geoffrey T. Dairiki <dairiki@dairiki.org>
1478a297e675SAndreas Gohr *
1479a297e675SAndreas Gohr * @access private
1480a297e675SAndreas Gohr */
1481a297e675SAndreas Gohrclass _Diff3_Op_copy extends _Diff3_Op {
1482a297e675SAndreas Gohr
1483a297e675SAndreas Gohr    function __construct($lines = false) {
1484a297e675SAndreas Gohr        $this->orig = $lines ? $lines : array();
1485a297e675SAndreas Gohr        $this->final1 = &$this->orig;
1486a297e675SAndreas Gohr        $this->final2 = &$this->orig;
1487a297e675SAndreas Gohr    }
1488a297e675SAndreas Gohr
1489a297e675SAndreas Gohr    function merged() {
1490a297e675SAndreas Gohr        return $this->orig;
1491a297e675SAndreas Gohr    }
1492a297e675SAndreas Gohr
1493a297e675SAndreas Gohr    function isConflict() {
1494a297e675SAndreas Gohr        return false;
1495a297e675SAndreas Gohr    }
1496a297e675SAndreas Gohr}
1497a297e675SAndreas Gohr
1498a297e675SAndreas Gohr/**
1499a297e675SAndreas Gohr * @author  Geoffrey T. Dairiki <dairiki@dairiki.org>
1500a297e675SAndreas Gohr *
1501a297e675SAndreas Gohr * @access private
1502a297e675SAndreas Gohr */
1503a297e675SAndreas Gohrclass _Diff3_BlockBuilder {
1504a297e675SAndreas Gohr
1505a297e675SAndreas Gohr    function __construct() {
1506a297e675SAndreas Gohr        $this->_init();
1507a297e675SAndreas Gohr    }
1508a297e675SAndreas Gohr
1509a297e675SAndreas Gohr    function input($lines) {
1510a297e675SAndreas Gohr        if ($lines) {
1511a297e675SAndreas Gohr            $this->_append($this->orig, $lines);
1512a297e675SAndreas Gohr        }
1513a297e675SAndreas Gohr    }
1514a297e675SAndreas Gohr
1515a297e675SAndreas Gohr    function out1($lines) {
1516a297e675SAndreas Gohr        if ($lines) {
1517a297e675SAndreas Gohr            $this->_append($this->final1, $lines);
1518a297e675SAndreas Gohr        }
1519a297e675SAndreas Gohr    }
1520a297e675SAndreas Gohr
1521a297e675SAndreas Gohr    function out2($lines) {
1522a297e675SAndreas Gohr        if ($lines) {
1523a297e675SAndreas Gohr            $this->_append($this->final2, $lines);
1524a297e675SAndreas Gohr        }
1525a297e675SAndreas Gohr    }
1526a297e675SAndreas Gohr
1527a297e675SAndreas Gohr    function isEmpty() {
1528a297e675SAndreas Gohr        return !$this->orig && !$this->final1 && !$this->final2;
1529a297e675SAndreas Gohr    }
1530a297e675SAndreas Gohr
1531a297e675SAndreas Gohr    function finish() {
1532a297e675SAndreas Gohr        if ($this->isEmpty()) {
1533a297e675SAndreas Gohr            return false;
1534a297e675SAndreas Gohr        } else {
1535a297e675SAndreas Gohr            $edit = new _Diff3_Op($this->orig, $this->final1, $this->final2);
1536a297e675SAndreas Gohr            $this->_init();
1537a297e675SAndreas Gohr            return $edit;
1538a297e675SAndreas Gohr        }
1539a297e675SAndreas Gohr    }
1540a297e675SAndreas Gohr
1541a297e675SAndreas Gohr    function _init() {
1542a297e675SAndreas Gohr        $this->orig = $this->final1 = $this->final2 = array();
1543a297e675SAndreas Gohr    }
1544a297e675SAndreas Gohr
1545a297e675SAndreas Gohr    function _append(&$array, $lines) {
1546a297e675SAndreas Gohr        array_splice($array, sizeof($array), 0, $lines);
1547a297e675SAndreas Gohr    }
1548a297e675SAndreas Gohr}
1549340756e4Sandi
1550e3776c06SMichael Hamann//Setup VIM: ex: et ts=4 :
1551