xref: /dokuwiki/inc/DifferenceEngine.php (revision 5048c277bbf430c50bf87cd17ef1e5e21fcb095a)
1f3f0262cSandi<?php
215fae107Sandi/**
315fae107Sandi * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
415fae107Sandi *
515fae107Sandi * Additions by Axel Boldt for MediaWiki
615fae107Sandi *
715fae107Sandi * @copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
815fae107Sandi * @license  You may copy this code freely under the conditions of the GPL.
915fae107Sandi */
10f3f0262cSandidefine('USE_ASSERTS', function_exists('assert'));
11f3f0262cSandi
12f3f0262cSandiclass _DiffOp {
13f3f0262cSandi    var $type;
14f3f0262cSandi    var $orig;
15f3f0262cSandi    var $closing;
16f3f0262cSandi
17f3f0262cSandi    function reverse() {
18f3f0262cSandi        trigger_error("pure virtual", E_USER_ERROR);
19f3f0262cSandi    }
20f3f0262cSandi
21f3f0262cSandi    function norig() {
22f5b78785SAndreas Gohr        return $this->orig ? count($this->orig) : 0;
23f3f0262cSandi    }
24f3f0262cSandi
25f3f0262cSandi    function nclosing() {
26f5b78785SAndreas Gohr        return $this->closing ? count($this->closing) : 0;
27f3f0262cSandi    }
28f3f0262cSandi}
29f3f0262cSandi
30f3f0262cSandiclass _DiffOp_Copy extends _DiffOp {
31f3f0262cSandi    var $type = 'copy';
32f3f0262cSandi
33f3f0262cSandi    function _DiffOp_Copy($orig, $closing = false) {
34f3f0262cSandi        if (!is_array($closing))
35f3f0262cSandi            $closing = $orig;
36f3f0262cSandi        $this->orig = $orig;
37f3f0262cSandi        $this->closing = $closing;
38f3f0262cSandi    }
39f3f0262cSandi
40f3f0262cSandi    function reverse() {
41f3f0262cSandi        return new _DiffOp_Copy($this->closing, $this->orig);
42f3f0262cSandi    }
43f3f0262cSandi}
44f3f0262cSandi
45f3f0262cSandiclass _DiffOp_Delete extends _DiffOp {
46f3f0262cSandi    var $type = 'delete';
47f3f0262cSandi
48f3f0262cSandi    function _DiffOp_Delete($lines) {
49f3f0262cSandi        $this->orig = $lines;
50f3f0262cSandi        $this->closing = false;
51f3f0262cSandi    }
52f3f0262cSandi
53f3f0262cSandi    function reverse() {
54f3f0262cSandi        return new _DiffOp_Add($this->orig);
55f3f0262cSandi    }
56f3f0262cSandi}
57f3f0262cSandi
58f3f0262cSandiclass _DiffOp_Add extends _DiffOp {
59f3f0262cSandi    var $type = 'add';
60f3f0262cSandi
61f3f0262cSandi    function _DiffOp_Add($lines) {
62f3f0262cSandi        $this->closing = $lines;
63f3f0262cSandi        $this->orig = false;
64f3f0262cSandi    }
65f3f0262cSandi
66f3f0262cSandi    function reverse() {
67f3f0262cSandi        return new _DiffOp_Delete($this->closing);
68f3f0262cSandi    }
69f3f0262cSandi}
70f3f0262cSandi
71f3f0262cSandiclass _DiffOp_Change extends _DiffOp {
72f3f0262cSandi    var $type = 'change';
73f3f0262cSandi
74f3f0262cSandi    function _DiffOp_Change($orig, $closing) {
75f3f0262cSandi        $this->orig = $orig;
76f3f0262cSandi        $this->closing = $closing;
77f3f0262cSandi    }
78f3f0262cSandi
79f3f0262cSandi    function reverse() {
80f3f0262cSandi        return new _DiffOp_Change($this->closing, $this->orig);
81f3f0262cSandi    }
82f3f0262cSandi}
83f3f0262cSandi
84f3f0262cSandi
85f3f0262cSandi/**
86f3f0262cSandi * Class used internally by Diff to actually compute the diffs.
87f3f0262cSandi *
88f3f0262cSandi * The algorithm used here is mostly lifted from the perl module
89f3f0262cSandi * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
90f3f0262cSandi *   http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
91f3f0262cSandi *
92f3f0262cSandi * More ideas are taken from:
93f3f0262cSandi *   http://www.ics.uci.edu/~eppstein/161/960229.html
94f3f0262cSandi *
95f3f0262cSandi * Some ideas are (and a bit of code) are from from analyze.c, from GNU
96f3f0262cSandi * diffutils-2.7, which can be found at:
97f3f0262cSandi *   ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
98f3f0262cSandi *
99f3f0262cSandi * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
100f3f0262cSandi * are my own.
101f3f0262cSandi *
102f3f0262cSandi * @author Geoffrey T. Dairiki
103f3f0262cSandi * @access private
104f3f0262cSandi */
105f5b78785SAndreas Gohrclass _DiffEngine {
106f5b78785SAndreas Gohr
107f3f0262cSandi    function diff($from_lines, $to_lines) {
108f5b78785SAndreas Gohr        $n_from = count($from_lines);
109f5b78785SAndreas Gohr        $n_to = count($to_lines);
110f3f0262cSandi
111f3f0262cSandi        $this->xchanged = $this->ychanged = array();
112f3f0262cSandi        $this->xv = $this->yv = array();
113f3f0262cSandi        $this->xind = $this->yind = array();
114f3f0262cSandi        unset($this->seq);
115f3f0262cSandi        unset($this->in_seq);
116f3f0262cSandi        unset($this->lcs);
117f3f0262cSandi
118f3f0262cSandi        // Skip leading common lines.
119f3f0262cSandi        for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
120f3f0262cSandi            if ($from_lines[$skip] != $to_lines[$skip])
121f3f0262cSandi                break;
122f3f0262cSandi            $this->xchanged[$skip] = $this->ychanged[$skip] = false;
123f3f0262cSandi        }
124f3f0262cSandi        // Skip trailing common lines.
125f5b78785SAndreas Gohr        $xi = $n_from;
126f5b78785SAndreas Gohr        $yi = $n_to;
127f3f0262cSandi        for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
128f3f0262cSandi            if ($from_lines[$xi] != $to_lines[$yi])
129f3f0262cSandi                break;
130f3f0262cSandi            $this->xchanged[$xi] = $this->ychanged[$yi] = false;
131f3f0262cSandi        }
132f3f0262cSandi
133f3f0262cSandi        // Ignore lines which do not exist in both files.
134f3f0262cSandi        for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
135f3f0262cSandi            $xhash[$from_lines[$xi]] = 1;
136f3f0262cSandi        for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
137f3f0262cSandi            $line = $to_lines[$yi];
138f3f0262cSandi            if (($this->ychanged[$yi] = empty($xhash[$line])))
139f3f0262cSandi                continue;
140f3f0262cSandi            $yhash[$line] = 1;
141f3f0262cSandi            $this->yv[] = $line;
142f3f0262cSandi            $this->yind[] = $yi;
143f3f0262cSandi        }
144f3f0262cSandi        for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
145f3f0262cSandi            $line = $from_lines[$xi];
146f3f0262cSandi            if (($this->xchanged[$xi] = empty($yhash[$line])))
147f3f0262cSandi                continue;
148f3f0262cSandi            $this->xv[] = $line;
149f3f0262cSandi            $this->xind[] = $xi;
150f3f0262cSandi        }
151f3f0262cSandi
152f3f0262cSandi        // Find the LCS.
153f5b78785SAndreas Gohr        $this->_compareseq(0, count($this->xv), 0, count($this->yv));
154f3f0262cSandi
155f3f0262cSandi        // Merge edits when possible
156f3f0262cSandi        $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
157f3f0262cSandi        $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
158f3f0262cSandi
159f3f0262cSandi        // Compute the edit operations.
160f3f0262cSandi        $edits = array();
161f3f0262cSandi        $xi = $yi = 0;
162f3f0262cSandi        while ($xi < $n_from || $yi < $n_to) {
163f3f0262cSandi            USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
164f3f0262cSandi            USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
165f3f0262cSandi
166f3f0262cSandi            // Skip matching "snake".
167f3f0262cSandi            $copy = array();
1687deca91bSRobin Getz            while ($xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
169f3f0262cSandi                $copy[] = $from_lines[$xi++];
170f3f0262cSandi                ++$yi;
171f3f0262cSandi            }
172f3f0262cSandi            if ($copy)
173f3f0262cSandi                $edits[] = new _DiffOp_Copy($copy);
174f3f0262cSandi
175f3f0262cSandi            // Find deletes & adds.
176f3f0262cSandi            $delete = array();
177f3f0262cSandi            while ($xi < $n_from && $this->xchanged[$xi])
178f3f0262cSandi                $delete[] = $from_lines[$xi++];
179f3f0262cSandi
180f3f0262cSandi            $add = array();
181f3f0262cSandi            while ($yi < $n_to && $this->ychanged[$yi])
182f3f0262cSandi                $add[] = $to_lines[$yi++];
183f3f0262cSandi
184f3f0262cSandi            if ($delete && $add)
185f3f0262cSandi                $edits[] = new _DiffOp_Change($delete, $add);
186f3f0262cSandi            elseif ($delete)
187f3f0262cSandi                $edits[] = new _DiffOp_Delete($delete);
188f3f0262cSandi            elseif ($add)
189f3f0262cSandi                $edits[] = new _DiffOp_Add($add);
190f3f0262cSandi        }
191f3f0262cSandi        return $edits;
192f3f0262cSandi    }
193f3f0262cSandi
194f3f0262cSandi
19515fae107Sandi    /**
19615fae107Sandi     * Divide the Largest Common Subsequence (LCS) of the sequences
197f3f0262cSandi     * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
198f3f0262cSandi     * sized segments.
199f3f0262cSandi     *
200f3f0262cSandi     * Returns (LCS, PTS).  LCS is the length of the LCS. PTS is an
201f3f0262cSandi     * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
202f3f0262cSandi     * sub sequences.  The first sub-sequence is contained in [X0, X1),
203f3f0262cSandi     * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
204f3f0262cSandi     * that (X0, Y0) == (XOFF, YOFF) and
205f3f0262cSandi     * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
206f3f0262cSandi     *
207f3f0262cSandi     * This function assumes that the first lines of the specified portions
208f3f0262cSandi     * of the two files do not match, and likewise that the last lines do not
209f3f0262cSandi     * match.  The caller must trim matching lines from the beginning and end
210f3f0262cSandi     * of the portions it is going to specify.
211f3f0262cSandi     */
212f3f0262cSandi    function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) {
213f3f0262cSandi        $flip = false;
214f3f0262cSandi
215f3f0262cSandi        if ($xlim - $xoff > $ylim - $yoff) {
216f3f0262cSandi            // Things seems faster (I'm not sure I understand why)
217f3f0262cSandi            // when the shortest sequence in X.
218f3f0262cSandi            $flip = true;
2197deca91bSRobin Getz            list ($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
220f3f0262cSandi        }
221f3f0262cSandi
222f3f0262cSandi        if ($flip)
223f3f0262cSandi            for ($i = $ylim - 1; $i >= $yoff; $i--)
224f3f0262cSandi                $ymatches[$this->xv[$i]][] = $i;
225f3f0262cSandi        else
226f3f0262cSandi            for ($i = $ylim - 1; $i >= $yoff; $i--)
227f3f0262cSandi                $ymatches[$this->yv[$i]][] = $i;
228f3f0262cSandi
229f3f0262cSandi        $this->lcs = 0;
230f3f0262cSandi        $this->seq[0]= $yoff - 1;
231f3f0262cSandi        $this->in_seq = array();
232f3f0262cSandi        $ymids[0] = array();
233f3f0262cSandi
234f3f0262cSandi        $numer = $xlim - $xoff + $nchunks - 1;
235f3f0262cSandi        $x = $xoff;
236f3f0262cSandi        for ($chunk = 0; $chunk < $nchunks; $chunk++) {
237f3f0262cSandi            if ($chunk > 0)
238f3f0262cSandi                for ($i = 0; $i <= $this->lcs; $i++)
239f3f0262cSandi                    $ymids[$i][$chunk-1] = $this->seq[$i];
240f3f0262cSandi
241f3f0262cSandi            $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
242f3f0262cSandi            for ( ; $x < $x1; $x++) {
243f3f0262cSandi                $line = $flip ? $this->yv[$x] : $this->xv[$x];
244f3f0262cSandi                if (empty($ymatches[$line]))
245f3f0262cSandi                    continue;
246f3f0262cSandi                $matches = $ymatches[$line];
247f3f0262cSandi                reset($matches);
248f3f0262cSandi                while (list ($junk, $y) = each($matches))
249f3f0262cSandi                    if (empty($this->in_seq[$y])) {
250f3f0262cSandi                        $k = $this->_lcs_pos($y);
251f3f0262cSandi                        USE_ASSERTS && assert($k > 0);
252f3f0262cSandi                        $ymids[$k] = $ymids[$k-1];
253f3f0262cSandi                        break;
254f3f0262cSandi                    }
255f3f0262cSandi                while (list ($junk, $y) = each($matches)) {
256f3f0262cSandi                    if ($y > $this->seq[$k-1]) {
257f3f0262cSandi                        USE_ASSERTS && assert($y < $this->seq[$k]);
258f3f0262cSandi                        // Optimization: this is a common case:
259f3f0262cSandi                        //  next match is just replacing previous match.
260f3f0262cSandi                        $this->in_seq[$this->seq[$k]] = false;
261f3f0262cSandi                        $this->seq[$k] = $y;
262f3f0262cSandi                        $this->in_seq[$y] = 1;
263f3f0262cSandi                    }
264f3f0262cSandi                    else if (empty($this->in_seq[$y])) {
265f3f0262cSandi                        $k = $this->_lcs_pos($y);
266f3f0262cSandi                        USE_ASSERTS && assert($k > 0);
267f3f0262cSandi                        $ymids[$k] = $ymids[$k-1];
268f3f0262cSandi                    }
269f3f0262cSandi                }
270f3f0262cSandi            }
271f3f0262cSandi        }
272f3f0262cSandi
273f3f0262cSandi        $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
274f3f0262cSandi        $ymid = $ymids[$this->lcs];
275f3f0262cSandi        for ($n = 0; $n < $nchunks - 1; $n++) {
276f3f0262cSandi            $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
277f3f0262cSandi            $y1 = $ymid[$n] + 1;
278f3f0262cSandi            $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
279f3f0262cSandi        }
280f3f0262cSandi        $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
281f3f0262cSandi
282f3f0262cSandi        return array($this->lcs, $seps);
283f3f0262cSandi    }
284f3f0262cSandi
285f3f0262cSandi    function _lcs_pos($ypos) {
286f3f0262cSandi        $end = $this->lcs;
287f3f0262cSandi        if ($end == 0 || $ypos > $this->seq[$end]) {
288f3f0262cSandi            $this->seq[++$this->lcs] = $ypos;
289f3f0262cSandi            $this->in_seq[$ypos] = 1;
290f3f0262cSandi            return $this->lcs;
291f3f0262cSandi        }
292f3f0262cSandi
293f3f0262cSandi        $beg = 1;
294f3f0262cSandi        while ($beg < $end) {
295f3f0262cSandi            $mid = (int)(($beg + $end) / 2);
296f3f0262cSandi            if ($ypos > $this->seq[$mid])
297f3f0262cSandi                $beg = $mid + 1;
298f3f0262cSandi            else
299f3f0262cSandi                $end = $mid;
300f3f0262cSandi        }
301f3f0262cSandi
302f3f0262cSandi        USE_ASSERTS && assert($ypos != $this->seq[$end]);
303f3f0262cSandi
304f3f0262cSandi        $this->in_seq[$this->seq[$end]] = false;
305f3f0262cSandi        $this->seq[$end] = $ypos;
306f3f0262cSandi        $this->in_seq[$ypos] = 1;
307f3f0262cSandi        return $end;
308f3f0262cSandi    }
309f3f0262cSandi
31015fae107Sandi    /**
31115fae107Sandi     * Find LCS of two sequences.
312f3f0262cSandi     *
313f3f0262cSandi     * The results are recorded in the vectors $this->{x,y}changed[], by
314f3f0262cSandi     * storing a 1 in the element for each line that is an insertion
315f3f0262cSandi     * or deletion (ie. is not in the LCS).
316f3f0262cSandi     *
317f3f0262cSandi     * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
318f3f0262cSandi     *
319f3f0262cSandi     * Note that XLIM, YLIM are exclusive bounds.
320f3f0262cSandi     * All line numbers are origin-0 and discarded lines are not counted.
321f3f0262cSandi     */
322f3f0262cSandi    function _compareseq($xoff, $xlim, $yoff, $ylim) {
323f3f0262cSandi        // Slide down the bottom initial diagonal.
3247deca91bSRobin Getz        while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) {
325f3f0262cSandi            ++$xoff;
326f3f0262cSandi            ++$yoff;
327f3f0262cSandi        }
328f3f0262cSandi
329f3f0262cSandi        // Slide up the top initial diagonal.
3307deca91bSRobin Getz        while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
331f3f0262cSandi            --$xlim;
332f3f0262cSandi            --$ylim;
333f3f0262cSandi        }
334f3f0262cSandi
335f3f0262cSandi        if ($xoff == $xlim || $yoff == $ylim)
336f3f0262cSandi            $lcs = 0;
337f3f0262cSandi        else {
338f3f0262cSandi            // This is ad hoc but seems to work well.
339f3f0262cSandi            //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
340f3f0262cSandi            //$nchunks = max(2,min(8,(int)$nchunks));
341f3f0262cSandi            $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
342f3f0262cSandi            list ($lcs, $seps)
343f3f0262cSandi                = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
344f3f0262cSandi        }
345f3f0262cSandi
346f3f0262cSandi        if ($lcs == 0) {
347f3f0262cSandi            // X and Y sequences have no common subsequence:
348f3f0262cSandi            // mark all changed.
349f3f0262cSandi            while ($yoff < $ylim)
350f3f0262cSandi                $this->ychanged[$this->yind[$yoff++]] = 1;
351f3f0262cSandi            while ($xoff < $xlim)
352f3f0262cSandi                $this->xchanged[$this->xind[$xoff++]] = 1;
353f3f0262cSandi        }
354f3f0262cSandi        else {
355f3f0262cSandi            // Use the partitions to split this problem into subproblems.
356f3f0262cSandi            reset($seps);
357f3f0262cSandi            $pt1 = $seps[0];
358f3f0262cSandi            while ($pt2 = next($seps)) {
359f3f0262cSandi                $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
360f3f0262cSandi                $pt1 = $pt2;
361f3f0262cSandi            }
362f3f0262cSandi        }
363f3f0262cSandi    }
364f3f0262cSandi
36515fae107Sandi    /**
36615fae107Sandi     * Adjust inserts/deletes of identical lines to join changes
367f3f0262cSandi     * as much as possible.
368f3f0262cSandi     *
369f3f0262cSandi     * We do something when a run of changed lines include a
370f3f0262cSandi     * line at one end and has an excluded, identical line at the other.
371f3f0262cSandi     * We are free to choose which identical line is included.
372f3f0262cSandi     * `compareseq' usually chooses the one at the beginning,
373f3f0262cSandi     * but usually it is cleaner to consider the following identical line
374f3f0262cSandi     * to be the "change".
375f3f0262cSandi     *
376f3f0262cSandi     * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
377f3f0262cSandi     */
378f3f0262cSandi    function _shift_boundaries($lines, &$changed, $other_changed) {
379f3f0262cSandi        $i = 0;
380f3f0262cSandi        $j = 0;
381f3f0262cSandi
382f5b78785SAndreas Gohr        USE_ASSERTS && assert('count($lines) == count($changed)');
383f5b78785SAndreas Gohr        $len = count($lines);
384f5b78785SAndreas Gohr        $other_len = count($other_changed);
385f3f0262cSandi
386f3f0262cSandi        while (1) {
387f3f0262cSandi            /*
388f3f0262cSandi             * Scan forwards to find beginning of another run of changes.
389f3f0262cSandi             * Also keep track of the corresponding point in the other file.
390f3f0262cSandi             *
391f3f0262cSandi             * Throughout this code, $i and $j are adjusted together so that
392f3f0262cSandi             * the first $i elements of $changed and the first $j elements
393f3f0262cSandi             * of $other_changed both contain the same number of zeros
394f3f0262cSandi             * (unchanged lines).
395f3f0262cSandi             * Furthermore, $j is always kept so that $j == $other_len or
396f3f0262cSandi             * $other_changed[$j] == false.
397f3f0262cSandi             */
398f3f0262cSandi            while ($j < $other_len && $other_changed[$j])
399f3f0262cSandi                $j++;
400f3f0262cSandi
401f3f0262cSandi            while ($i < $len && ! $changed[$i]) {
402f3f0262cSandi                USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
403f5b78785SAndreas Gohr                $i++;
404f5b78785SAndreas Gohr                $j++;
405f3f0262cSandi                while ($j < $other_len && $other_changed[$j])
406f3f0262cSandi                    $j++;
407f3f0262cSandi            }
408f3f0262cSandi
409f3f0262cSandi            if ($i == $len)
410f3f0262cSandi                break;
411f3f0262cSandi
412f3f0262cSandi            $start = $i;
413f3f0262cSandi
414f3f0262cSandi            // Find the end of this run of changes.
415f3f0262cSandi            while (++$i < $len && $changed[$i])
416f3f0262cSandi                continue;
417f3f0262cSandi
418f3f0262cSandi            do {
419f3f0262cSandi                /*
420f3f0262cSandi                 * Record the length of this run of changes, so that
421f3f0262cSandi                 * we can later determine whether the run has grown.
422f3f0262cSandi                 */
423f3f0262cSandi                $runlength = $i - $start;
424f3f0262cSandi
425f3f0262cSandi                /*
426f3f0262cSandi                 * Move the changed region back, so long as the
427f3f0262cSandi                 * previous unchanged line matches the last changed one.
428f3f0262cSandi                 * This merges with previous changed regions.
429f3f0262cSandi                 */
430f3f0262cSandi                while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
431f3f0262cSandi                    $changed[--$start] = 1;
432f3f0262cSandi                    $changed[--$i] = false;
433f3f0262cSandi                    while ($start > 0 && $changed[$start - 1])
434f3f0262cSandi                        $start--;
435f3f0262cSandi                    USE_ASSERTS && assert('$j > 0');
436f3f0262cSandi                    while ($other_changed[--$j])
437f3f0262cSandi                        continue;
438f3f0262cSandi                    USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
439f3f0262cSandi                }
440f3f0262cSandi
441f3f0262cSandi                /*
442f3f0262cSandi                 * Set CORRESPONDING to the end of the changed run, at the last
443f3f0262cSandi                 * point where it corresponds to a changed run in the other file.
444f3f0262cSandi                 * CORRESPONDING == LEN means no such point has been found.
445f3f0262cSandi                 */
446f3f0262cSandi                $corresponding = $j < $other_len ? $i : $len;
447f3f0262cSandi
448f3f0262cSandi                /*
449f3f0262cSandi                 * Move the changed region forward, so long as the
450f3f0262cSandi                 * first changed line matches the following unchanged one.
451f3f0262cSandi                 * This merges with following changed regions.
452f3f0262cSandi                 * Do this second, so that if there are no merges,
453f3f0262cSandi                 * the changed region is moved forward as far as possible.
454f3f0262cSandi                 */
455f3f0262cSandi                while ($i < $len && $lines[$start] == $lines[$i]) {
456f3f0262cSandi                    $changed[$start++] = false;
457f3f0262cSandi                    $changed[$i++] = 1;
458f3f0262cSandi                    while ($i < $len && $changed[$i])
459f3f0262cSandi                        $i++;
460f3f0262cSandi
461f3f0262cSandi                    USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
462f3f0262cSandi                    $j++;
463f3f0262cSandi                    if ($j < $other_len && $other_changed[$j]) {
464f3f0262cSandi                        $corresponding = $i;
465f3f0262cSandi                        while ($j < $other_len && $other_changed[$j])
466f3f0262cSandi                            $j++;
467f3f0262cSandi                    }
468f3f0262cSandi                }
469f3f0262cSandi            } while ($runlength != $i - $start);
470f3f0262cSandi
471f3f0262cSandi            /*
472f3f0262cSandi             * If possible, move the fully-merged run of changes
473f3f0262cSandi             * back to a corresponding run in the other file.
474f3f0262cSandi             */
475f3f0262cSandi            while ($corresponding < $i) {
476f3f0262cSandi                $changed[--$start] = 1;
477f3f0262cSandi                $changed[--$i] = 0;
478f3f0262cSandi                USE_ASSERTS && assert('$j > 0');
479f3f0262cSandi                while ($other_changed[--$j])
480f3f0262cSandi                    continue;
481f3f0262cSandi                USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
482f3f0262cSandi            }
483f3f0262cSandi        }
484f3f0262cSandi    }
485f3f0262cSandi}
486f3f0262cSandi
487f3f0262cSandi/**
488f3f0262cSandi * Class representing a 'diff' between two sequences of strings.
489f3f0262cSandi */
490f5b78785SAndreas Gohrclass Diff {
491f5b78785SAndreas Gohr
492f3f0262cSandi    var $edits;
493f3f0262cSandi
494f3f0262cSandi    /**
495f3f0262cSandi     * Constructor.
496f3f0262cSandi     * Computes diff between sequences of strings.
497f3f0262cSandi     *
498f3f0262cSandi     * @param $from_lines array An array of strings.
499f3f0262cSandi     *      (Typically these are lines from a file.)
500f3f0262cSandi     * @param $to_lines array An array of strings.
501f3f0262cSandi     */
502f3f0262cSandi    function Diff($from_lines, $to_lines) {
503f3f0262cSandi        $eng = new _DiffEngine;
504f3f0262cSandi        $this->edits = $eng->diff($from_lines, $to_lines);
505f3f0262cSandi        //$this->_check($from_lines, $to_lines);
506f3f0262cSandi    }
507f3f0262cSandi
508f3f0262cSandi    /**
509f3f0262cSandi     * Compute reversed Diff.
510f3f0262cSandi     *
511f3f0262cSandi     * SYNOPSIS:
512f3f0262cSandi     *
513f3f0262cSandi     *  $diff = new Diff($lines1, $lines2);
514f3f0262cSandi     *  $rev = $diff->reverse();
515f3f0262cSandi     * @return object A Diff object representing the inverse of the
516f3f0262cSandi     *          original diff.
517f3f0262cSandi     */
518f3f0262cSandi    function reverse() {
519f3f0262cSandi        $rev = $this;
520f3f0262cSandi        $rev->edits = array();
521f3f0262cSandi        foreach ($this->edits as $edit) {
522f3f0262cSandi            $rev->edits[] = $edit->reverse();
523f3f0262cSandi        }
524f3f0262cSandi        return $rev;
525f3f0262cSandi    }
526f3f0262cSandi
527f3f0262cSandi    /**
528f3f0262cSandi     * Check for empty diff.
529f3f0262cSandi     *
530f3f0262cSandi     * @return bool True iff two sequences were identical.
531f3f0262cSandi     */
532f3f0262cSandi    function isEmpty() {
533f3f0262cSandi        foreach ($this->edits as $edit) {
534f3f0262cSandi            if ($edit->type != 'copy')
535f3f0262cSandi                return false;
536f3f0262cSandi        }
537f3f0262cSandi        return true;
538f3f0262cSandi    }
539f3f0262cSandi
540f3f0262cSandi    /**
541f3f0262cSandi     * Compute the length of the Longest Common Subsequence (LCS).
542f3f0262cSandi     *
543f3f0262cSandi     * This is mostly for diagnostic purposed.
544f3f0262cSandi     *
545f3f0262cSandi     * @return int The length of the LCS.
546f3f0262cSandi     */
547f3f0262cSandi    function lcs() {
548f3f0262cSandi        $lcs = 0;
549f3f0262cSandi        foreach ($this->edits as $edit) {
550f3f0262cSandi            if ($edit->type == 'copy')
551f5b78785SAndreas Gohr                $lcs += count($edit->orig);
552f3f0262cSandi        }
553f3f0262cSandi        return $lcs;
554f3f0262cSandi    }
555f3f0262cSandi
556f3f0262cSandi    /**
557f3f0262cSandi     * Get the original set of lines.
558f3f0262cSandi     *
559f3f0262cSandi     * This reconstructs the $from_lines parameter passed to the
560f3f0262cSandi     * constructor.
561f3f0262cSandi     *
562f3f0262cSandi     * @return array The original sequence of strings.
563f3f0262cSandi     */
564f3f0262cSandi    function orig() {
565f3f0262cSandi        $lines = array();
566f3f0262cSandi
567f3f0262cSandi        foreach ($this->edits as $edit) {
568f3f0262cSandi            if ($edit->orig)
569f5b78785SAndreas Gohr                array_splice($lines, count($lines), 0, $edit->orig);
570f3f0262cSandi        }
571f3f0262cSandi        return $lines;
572f3f0262cSandi    }
573f3f0262cSandi
574f3f0262cSandi    /**
575f3f0262cSandi     * Get the closing set of lines.
576f3f0262cSandi     *
577f3f0262cSandi     * This reconstructs the $to_lines parameter passed to the
578f3f0262cSandi     * constructor.
579f3f0262cSandi     *
580f3f0262cSandi     * @return array The sequence of strings.
581f3f0262cSandi     */
582f3f0262cSandi    function closing() {
583f3f0262cSandi        $lines = array();
584f3f0262cSandi
585f3f0262cSandi        foreach ($this->edits as $edit) {
586f3f0262cSandi            if ($edit->closing)
587f5b78785SAndreas Gohr                array_splice($lines, count($lines), 0, $edit->closing);
588f3f0262cSandi        }
589f3f0262cSandi        return $lines;
590f3f0262cSandi    }
591f3f0262cSandi
592f3f0262cSandi    /**
593f3f0262cSandi     * Check a Diff for validity.
594f3f0262cSandi     *
595f3f0262cSandi     * This is here only for debugging purposes.
596f3f0262cSandi     */
597f3f0262cSandi    function _check($from_lines, $to_lines) {
598f3f0262cSandi        if (serialize($from_lines) != serialize($this->orig()))
599f3f0262cSandi            trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
600f3f0262cSandi        if (serialize($to_lines) != serialize($this->closing()))
601f3f0262cSandi            trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
602f3f0262cSandi
603f3f0262cSandi        $rev = $this->reverse();
604f3f0262cSandi        if (serialize($to_lines) != serialize($rev->orig()))
605f3f0262cSandi            trigger_error("Reversed original doesn't match", E_USER_ERROR);
606f3f0262cSandi        if (serialize($from_lines) != serialize($rev->closing()))
607f3f0262cSandi            trigger_error("Reversed closing doesn't match", E_USER_ERROR);
608f3f0262cSandi
609f3f0262cSandi        $prevtype = 'none';
610f3f0262cSandi        foreach ($this->edits as $edit) {
611f3f0262cSandi            if ($prevtype == $edit->type)
612f3f0262cSandi                trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
613f3f0262cSandi            $prevtype = $edit->type;
614f3f0262cSandi        }
615f3f0262cSandi
616f3f0262cSandi        $lcs = $this->lcs();
617f3f0262cSandi        trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE);
618f3f0262cSandi    }
619f3f0262cSandi}
620f3f0262cSandi
621f3f0262cSandi/**
622f3f0262cSandi * FIXME: bad name.
623f3f0262cSandi */
624f5b78785SAndreas Gohrclass MappedDiff extends Diff {
625f3f0262cSandi    /**
626f3f0262cSandi     * Constructor.
627f3f0262cSandi     *
628f3f0262cSandi     * Computes diff between sequences of strings.
629f3f0262cSandi     *
630f3f0262cSandi     * This can be used to compute things like
631f3f0262cSandi     * case-insensitve diffs, or diffs which ignore
632f3f0262cSandi     * changes in white-space.
633f3f0262cSandi     *
634f3f0262cSandi     * @param $from_lines array An array of strings.
635f3f0262cSandi     *  (Typically these are lines from a file.)
636f3f0262cSandi     *
637f3f0262cSandi     * @param $to_lines array An array of strings.
638f3f0262cSandi     *
639f3f0262cSandi     * @param $mapped_from_lines array This array should
640f3f0262cSandi     *  have the same size number of elements as $from_lines.
641f3f0262cSandi     *  The elements in $mapped_from_lines and
642f3f0262cSandi     *  $mapped_to_lines are what is actually compared
643f3f0262cSandi     *  when computing the diff.
644f3f0262cSandi     *
645f3f0262cSandi     * @param $mapped_to_lines array This array should
646f3f0262cSandi     *  have the same number of elements as $to_lines.
647f3f0262cSandi     */
6487deca91bSRobin Getz    function MappedDiff($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) {
649f3f0262cSandi
650f5b78785SAndreas Gohr        assert(count($from_lines) == count($mapped_from_lines));
651f5b78785SAndreas Gohr        assert(count($to_lines) == count($mapped_to_lines));
652f3f0262cSandi
653f3f0262cSandi        $this->Diff($mapped_from_lines, $mapped_to_lines);
654f3f0262cSandi
655f3f0262cSandi        $xi = $yi = 0;
656f5b78785SAndreas Gohr        $ecnt = count($this->edits);
657f5b78785SAndreas Gohr        for ($i = 0; $i < $ecnt; $i++) {
658f3f0262cSandi            $orig = &$this->edits[$i]->orig;
659f3f0262cSandi            if (is_array($orig)) {
660f5b78785SAndreas Gohr                $orig = array_slice($from_lines, $xi, count($orig));
661f5b78785SAndreas Gohr                $xi += count($orig);
662f3f0262cSandi            }
663f3f0262cSandi
664f3f0262cSandi            $closing = &$this->edits[$i]->closing;
665f3f0262cSandi            if (is_array($closing)) {
666f5b78785SAndreas Gohr                $closing = array_slice($to_lines, $yi, count($closing));
667f5b78785SAndreas Gohr                $yi += count($closing);
668f3f0262cSandi            }
669f3f0262cSandi        }
670f3f0262cSandi    }
671f3f0262cSandi}
672f3f0262cSandi
673f3f0262cSandi/**
674f3f0262cSandi * A class to format Diffs
675f3f0262cSandi *
676f3f0262cSandi * This class formats the diff in classic diff format.
677f3f0262cSandi * It is intended that this class be customized via inheritance,
678f3f0262cSandi * to obtain fancier outputs.
679f3f0262cSandi */
680f5b78785SAndreas Gohrclass DiffFormatter {
681f3f0262cSandi    /**
682f3f0262cSandi     * Number of leading context "lines" to preserve.
683f3f0262cSandi     *
684f3f0262cSandi     * This should be left at zero for this class, but subclasses
685f3f0262cSandi     * may want to set this to other values.
686f3f0262cSandi     */
687f3f0262cSandi    var $leading_context_lines = 0;
688f3f0262cSandi
689f3f0262cSandi    /**
690f3f0262cSandi     * Number of trailing context "lines" to preserve.
691f3f0262cSandi     *
692f3f0262cSandi     * This should be left at zero for this class, but subclasses
693f3f0262cSandi     * may want to set this to other values.
694f3f0262cSandi     */
695f3f0262cSandi    var $trailing_context_lines = 0;
696f3f0262cSandi
697f3f0262cSandi    /**
698f3f0262cSandi     * Format a diff.
699f3f0262cSandi     *
700f3f0262cSandi     * @param $diff object A Diff object.
701f3f0262cSandi     * @return string The formatted output.
702f3f0262cSandi     */
703f3f0262cSandi    function format($diff) {
704f3f0262cSandi
705f3f0262cSandi        $xi = $yi = 1;
706f3f0262cSandi        $block = false;
707f3f0262cSandi        $context = array();
708f3f0262cSandi
709f3f0262cSandi        $nlead = $this->leading_context_lines;
710f3f0262cSandi        $ntrail = $this->trailing_context_lines;
711f3f0262cSandi
712f3f0262cSandi        $this->_start_diff();
713f3f0262cSandi
714f3f0262cSandi        foreach ($diff->edits as $edit) {
715f3f0262cSandi            if ($edit->type == 'copy') {
716f3f0262cSandi                if (is_array($block)) {
717f5b78785SAndreas Gohr                    if (count($edit->orig) <= $nlead + $ntrail) {
718f3f0262cSandi                        $block[] = $edit;
719f3f0262cSandi                    }
720f3f0262cSandi                    else{
721f3f0262cSandi                        if ($ntrail) {
722f3f0262cSandi                            $context = array_slice($edit->orig, 0, $ntrail);
723f3f0262cSandi                            $block[] = new _DiffOp_Copy($context);
724f3f0262cSandi                        }
7257deca91bSRobin Getz                        $this->_block($x0, $ntrail + $xi - $x0, $y0, $ntrail + $yi - $y0, $block);
726f3f0262cSandi                        $block = false;
727f3f0262cSandi                    }
728f3f0262cSandi                }
729f3f0262cSandi                $context = $edit->orig;
730f3f0262cSandi            }
731f3f0262cSandi            else {
732f3f0262cSandi                if (! is_array($block)) {
733f5b78785SAndreas Gohr                    $context = array_slice($context, count($context) - $nlead);
734f5b78785SAndreas Gohr                    $x0 = $xi - count($context);
735f5b78785SAndreas Gohr                    $y0 = $yi - count($context);
736f3f0262cSandi                    $block = array();
737f3f0262cSandi                    if ($context)
738f3f0262cSandi                        $block[] = new _DiffOp_Copy($context);
739f3f0262cSandi                }
740f3f0262cSandi                $block[] = $edit;
741f3f0262cSandi            }
742f3f0262cSandi
743f3f0262cSandi            if ($edit->orig)
744f5b78785SAndreas Gohr                $xi += count($edit->orig);
745f3f0262cSandi            if ($edit->closing)
746f5b78785SAndreas Gohr                $yi += count($edit->closing);
747f3f0262cSandi        }
748f3f0262cSandi
749f3f0262cSandi        if (is_array($block))
7507deca91bSRobin Getz            $this->_block($x0, $xi - $x0, $y0, $yi - $y0, $block);
751f3f0262cSandi
752f3f0262cSandi        return $this->_end_diff();
753f3f0262cSandi    }
754f3f0262cSandi
755f3f0262cSandi    function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
756f3f0262cSandi        $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
757f3f0262cSandi        foreach ($edits as $edit) {
758f3f0262cSandi            if ($edit->type == 'copy')
759f3f0262cSandi                $this->_context($edit->orig);
760f3f0262cSandi            elseif ($edit->type == 'add')
761f3f0262cSandi                $this->_added($edit->closing);
762f3f0262cSandi            elseif ($edit->type == 'delete')
763f3f0262cSandi                $this->_deleted($edit->orig);
764f3f0262cSandi            elseif ($edit->type == 'change')
765f3f0262cSandi                $this->_changed($edit->orig, $edit->closing);
766f3f0262cSandi            else
767f3f0262cSandi                trigger_error("Unknown edit type", E_USER_ERROR);
768f3f0262cSandi        }
769f3f0262cSandi        $this->_end_block();
770f3f0262cSandi    }
771f3f0262cSandi
772f3f0262cSandi    function _start_diff() {
773f3f0262cSandi        ob_start();
774f3f0262cSandi    }
775f3f0262cSandi
776f3f0262cSandi    function _end_diff() {
777f3f0262cSandi        $val = ob_get_contents();
778f3f0262cSandi        ob_end_clean();
779f3f0262cSandi        return $val;
780f3f0262cSandi    }
781f3f0262cSandi
782f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
783f3f0262cSandi        if ($xlen > 1)
784f3f0262cSandi            $xbeg .= "," . ($xbeg + $xlen - 1);
785f3f0262cSandi        if ($ylen > 1)
786f3f0262cSandi            $ybeg .= "," . ($ybeg + $ylen - 1);
787f3f0262cSandi
788f3f0262cSandi        return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
789f3f0262cSandi    }
790f3f0262cSandi
791f3f0262cSandi    function _start_block($header) {
792f3f0262cSandi        echo $header;
793f3f0262cSandi    }
794f3f0262cSandi
795f3f0262cSandi    function _end_block() {
796f3f0262cSandi    }
797f3f0262cSandi
798f3f0262cSandi    function _lines($lines, $prefix = ' ') {
799f3f0262cSandi        foreach ($lines as $line)
800f3f0262cSandi            echo "$prefix $line\n";
801f3f0262cSandi    }
802f3f0262cSandi
803f3f0262cSandi    function _context($lines) {
804f3f0262cSandi        $this->_lines($lines);
805f3f0262cSandi    }
806f3f0262cSandi
807f3f0262cSandi    function _added($lines) {
808f3f0262cSandi        $this->_lines($lines, ">");
809f3f0262cSandi    }
810f3f0262cSandi    function _deleted($lines) {
811f3f0262cSandi        $this->_lines($lines, "<");
812f3f0262cSandi    }
813f3f0262cSandi
814f3f0262cSandi    function _changed($orig, $closing) {
815f3f0262cSandi        $this->_deleted($orig);
816f3f0262cSandi        echo "---\n";
817f3f0262cSandi        $this->_added($closing);
818f3f0262cSandi    }
819f3f0262cSandi}
820f3f0262cSandi
821f3f0262cSandi
822f3f0262cSandi/**
823f3f0262cSandi *  Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
824f3f0262cSandi *
825f3f0262cSandi */
826f3f0262cSandi
8279b3a5b24Schrisdefine('NBSP', "\xC2\xA0");     // utf-8 non-breaking space.
828f3f0262cSandi
829f3f0262cSandiclass _HWLDF_WordAccumulator {
830f3f0262cSandi    function _HWLDF_WordAccumulator() {
831f3f0262cSandi        $this->_lines = array();
832f3f0262cSandi        $this->_line = '';
833f3f0262cSandi        $this->_group = '';
834f3f0262cSandi        $this->_tag = '';
835f3f0262cSandi    }
836f3f0262cSandi
837f3f0262cSandi    function _flushGroup($new_tag) {
838f3f0262cSandi        if ($this->_group !== '') {
839f3f0262cSandi            if ($this->_tag == 'mark')
840ed7ecb79SAnika Henke                $this->_line .= '<strong>'.$this->_group.'</strong>';
841812bb04eSRobin Getz            elseif ($this->_tag == 'add')
842812bb04eSRobin Getz                $this->_line .= '<span class="diff-addedline">'.$this->_group.'</span>';
843812bb04eSRobin Getz            elseif ($this->_tag == 'del')
844812bb04eSRobin Getz                $this->_line .= '<span class="diff-deletedline"><del>'.$this->_group.'</del></span>';
845f3f0262cSandi            else
846f3f0262cSandi                $this->_line .= $this->_group;
847f3f0262cSandi        }
848f3f0262cSandi        $this->_group = '';
849f3f0262cSandi        $this->_tag = $new_tag;
850f3f0262cSandi    }
851f3f0262cSandi
852f3f0262cSandi    function _flushLine($new_tag) {
853f3f0262cSandi        $this->_flushGroup($new_tag);
854f3f0262cSandi        if ($this->_line != '')
855f3f0262cSandi            $this->_lines[] = $this->_line;
856f3f0262cSandi        $this->_line = '';
857f3f0262cSandi    }
858f3f0262cSandi
859f3f0262cSandi    function addWords($words, $tag = '') {
860f3f0262cSandi        if ($tag != $this->_tag)
861f3f0262cSandi            $this->_flushGroup($tag);
862f3f0262cSandi
863f3f0262cSandi        foreach ($words as $word) {
864f3f0262cSandi            // new-line should only come as first char of word.
865f3f0262cSandi            if ($word == '')
866f3f0262cSandi                continue;
867f3f0262cSandi            if ($word[0] == "\n") {
868f3f0262cSandi                $this->_group .= NBSP;
869f3f0262cSandi                $this->_flushLine($tag);
870f3f0262cSandi                $word = substr($word, 1);
871f3f0262cSandi            }
872f3f0262cSandi            assert(!strstr($word, "\n"));
873f3f0262cSandi            $this->_group .= $word;
874f3f0262cSandi        }
875f3f0262cSandi    }
876f3f0262cSandi
877f3f0262cSandi    function getLines() {
878f3f0262cSandi        $this->_flushLine('~done');
879f3f0262cSandi        return $this->_lines;
880f3f0262cSandi    }
881f3f0262cSandi}
882f3f0262cSandi
883f5b78785SAndreas Gohrclass WordLevelDiff extends MappedDiff {
884f5b78785SAndreas Gohr
885f3f0262cSandi    function WordLevelDiff($orig_lines, $closing_lines) {
886f3f0262cSandi        list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
887f3f0262cSandi        list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
888f3f0262cSandi
8897deca91bSRobin Getz        $this->MappedDiff($orig_words, $closing_words, $orig_stripped, $closing_stripped);
890f3f0262cSandi    }
891f3f0262cSandi
892f3f0262cSandi    function _split($lines) {
8935e1ee188SXin LI        if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu',
8947deca91bSRobin Getz             implode("\n", $lines), $m)) {
895f3f0262cSandi            return array(array(''), array(''));
896f3f0262cSandi        }
897f3f0262cSandi        return array($m[0], $m[1]);
898f3f0262cSandi    }
899f3f0262cSandi
900f3f0262cSandi    function orig() {
901f3f0262cSandi        $orig = new _HWLDF_WordAccumulator;
902f3f0262cSandi
903f3f0262cSandi        foreach ($this->edits as $edit) {
904f3f0262cSandi            if ($edit->type == 'copy')
905f3f0262cSandi                $orig->addWords($edit->orig);
906f3f0262cSandi            elseif ($edit->orig)
907f3f0262cSandi                $orig->addWords($edit->orig, 'mark');
908f3f0262cSandi        }
909f3f0262cSandi        return $orig->getLines();
910f3f0262cSandi    }
911f3f0262cSandi
912f3f0262cSandi    function closing() {
913f3f0262cSandi        $closing = new _HWLDF_WordAccumulator;
914f3f0262cSandi
915f3f0262cSandi        foreach ($this->edits as $edit) {
916f3f0262cSandi            if ($edit->type == 'copy')
917f3f0262cSandi                $closing->addWords($edit->closing);
918f3f0262cSandi            elseif ($edit->closing)
919f3f0262cSandi                $closing->addWords($edit->closing, 'mark');
920f3f0262cSandi        }
921f3f0262cSandi        return $closing->getLines();
922f3f0262cSandi    }
923f3f0262cSandi}
924f3f0262cSandi
925812bb04eSRobin Getzclass InlineWordLevelDiff extends MappedDiff {
926812bb04eSRobin Getz
927812bb04eSRobin Getz    function InlineWordLevelDiff($orig_lines, $closing_lines) {
928812bb04eSRobin Getz        list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
929812bb04eSRobin Getz        list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
930812bb04eSRobin Getz
931812bb04eSRobin Getz        $this->MappedDiff($orig_words, $closing_words, $orig_stripped, $closing_stripped);
932812bb04eSRobin Getz    }
933812bb04eSRobin Getz
934812bb04eSRobin Getz    function _split($lines) {
935812bb04eSRobin Getz        if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
936812bb04eSRobin Getz             implode("\n", $lines), $m)) {
937812bb04eSRobin Getz            return array(array(''), array(''));
938812bb04eSRobin Getz        }
939812bb04eSRobin Getz        return array($m[0], $m[1]);
940812bb04eSRobin Getz    }
941812bb04eSRobin Getz
942812bb04eSRobin Getz    function inline() {
943812bb04eSRobin Getz        $orig = new _HWLDF_WordAccumulator;
944812bb04eSRobin Getz        foreach ($this->edits as $edit) {
945812bb04eSRobin Getz            if ($edit->type == 'copy')
946812bb04eSRobin Getz                $orig->addWords($edit->orig);
947812bb04eSRobin Getz            elseif ($edit->type == 'change'){
948812bb04eSRobin Getz                $orig->addWords($edit->orig, 'del');
949812bb04eSRobin Getz                $orig->addWords($edit->closing, 'add');
950812bb04eSRobin Getz            } elseif ($edit->type == 'delete')
951812bb04eSRobin Getz                $orig->addWords($edit->orig, 'del');
952812bb04eSRobin Getz            elseif ($edit->type == 'add')
953812bb04eSRobin Getz                $orig->addWords($edit->closing, 'add');
954812bb04eSRobin Getz            elseif ($edit->orig)
955812bb04eSRobin Getz                $orig->addWords($edit->orig, 'del');
956812bb04eSRobin Getz        }
957812bb04eSRobin Getz        return $orig->getLines();
958812bb04eSRobin Getz    }
959812bb04eSRobin Getz}
960812bb04eSRobin Getz
961f3f0262cSandi/**
962f3f0262cSandi * "Unified" diff formatter.
963f3f0262cSandi *
964f3f0262cSandi * This class formats the diff in classic "unified diff" format.
965f3f0262cSandi */
966f5b78785SAndreas Gohrclass UnifiedDiffFormatter extends DiffFormatter {
967f5b78785SAndreas Gohr
968f3f0262cSandi    function UnifiedDiffFormatter($context_lines = 4) {
969f3f0262cSandi        $this->leading_context_lines = $context_lines;
970f3f0262cSandi        $this->trailing_context_lines = $context_lines;
971f3f0262cSandi    }
972f3f0262cSandi
973f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
974f3f0262cSandi        if ($xlen != 1)
975f3f0262cSandi            $xbeg .= "," . $xlen;
976f3f0262cSandi        if ($ylen != 1)
977f3f0262cSandi            $ybeg .= "," . $ylen;
978f3f0262cSandi        return "@@ -$xbeg +$ybeg @@\n";
979f3f0262cSandi    }
980f3f0262cSandi
981f3f0262cSandi    function _added($lines) {
982f3f0262cSandi        $this->_lines($lines, "+");
983f3f0262cSandi    }
984f3f0262cSandi    function _deleted($lines) {
985f3f0262cSandi        $this->_lines($lines, "-");
986f3f0262cSandi    }
987f3f0262cSandi    function _changed($orig, $final) {
988f3f0262cSandi        $this->_deleted($orig);
989f3f0262cSandi        $this->_added($final);
990f3f0262cSandi    }
991f3f0262cSandi}
992f3f0262cSandi
993f3f0262cSandi/**
994f3f0262cSandi *  Wikipedia Table style diff formatter.
995f3f0262cSandi *
996f3f0262cSandi */
997f5b78785SAndreas Gohrclass TableDiffFormatter extends DiffFormatter {
998f5b78785SAndreas Gohr
999f3f0262cSandi    function TableDiffFormatter() {
1000f3f0262cSandi        $this->leading_context_lines = 2;
1001f3f0262cSandi        $this->trailing_context_lines = 2;
1002f3f0262cSandi    }
1003f3f0262cSandi
10042d880650SAdrian Lang    function format($diff) {
10052d880650SAdrian Lang        // Preserve whitespaces by converting some to non-breaking spaces.
10062d880650SAdrian Lang        // Do not convert all of them to allow word-wrap.
10072d880650SAdrian Lang        $val = parent::format($diff);
10082d880650SAdrian Lang        $val = str_replace('  ','&nbsp; ', $val);
10092d880650SAdrian Lang        $val = preg_replace('/ (?=<)|(?<=[ >]) /', '&nbsp;', $val);
10102d880650SAdrian Lang        return $val;
10112d880650SAdrian Lang    }
10122d880650SAdrian Lang
1013f3f0262cSandi    function _pre($text){
1014f3f0262cSandi        $text = htmlspecialchars($text);
1015f3f0262cSandi        return $text;
1016f3f0262cSandi    }
1017f3f0262cSandi
1018f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1019f3f0262cSandi        global $lang;
1020f3f0262cSandi        $l1 = $lang['line'].' '.$xbeg;
1021f3f0262cSandi        $l2 = $lang['line'].' '.$ybeg;
1022*5048c277SAnika Henke        $r = '<tr><td class="diff-blockheader" colspan="2">'.$l1.":</td>\n".
1023*5048c277SAnika Henke             '    <td class="diff-blockheader" colspan="2">'.$l2.":</td>\n".
1024*5048c277SAnika Henke             "</tr>\n";
1025f3f0262cSandi        return $r;
1026f3f0262cSandi    }
1027f3f0262cSandi
1028f3f0262cSandi    function _start_block($header) {
1029f3f0262cSandi        print($header);
1030f3f0262cSandi    }
1031f3f0262cSandi
1032f3f0262cSandi    function _end_block() {
1033f3f0262cSandi    }
1034f3f0262cSandi
1035f3f0262cSandi    function _lines($lines, $prefix=' ', $color="white") {
1036f3f0262cSandi    }
1037f3f0262cSandi
1038f3f0262cSandi    function addedLine($line) {
10397deca91bSRobin Getz        return '<td>+</td><td class="diff-addedline">' .  $line.'</td>';
1040f3f0262cSandi    }
1041f3f0262cSandi
1042f3f0262cSandi    function deletedLine($line) {
10437deca91bSRobin Getz        return '<td>-</td><td class="diff-deletedline">' .  $line.'</td>';
1044f3f0262cSandi    }
1045f3f0262cSandi
1046f3f0262cSandi    function emptyLine() {
1047f3f0262cSandi        return '<td colspan="2">&nbsp;</td>';
1048f3f0262cSandi    }
1049f3f0262cSandi
1050f3f0262cSandi    function contextLine($line) {
1051f3f0262cSandi        return '<td> </td><td class="diff-context">'.$line.'</td>';
1052f3f0262cSandi    }
1053f3f0262cSandi
1054f3f0262cSandi    function _added($lines) {
1055f3f0262cSandi        foreach ($lines as $line) {
10567deca91bSRobin Getz            print('<tr>' . $this->emptyLine() . $this->addedLine($line) . "</tr>\n");
1057f3f0262cSandi        }
1058f3f0262cSandi    }
1059f3f0262cSandi
1060f3f0262cSandi    function _deleted($lines) {
1061f3f0262cSandi        foreach ($lines as $line) {
10627deca91bSRobin Getz            print('<tr>' . $this->deletedLine($line) . $this->emptyLine() . "</tr>\n");
1063f3f0262cSandi        }
1064f3f0262cSandi    }
1065f3f0262cSandi
1066f3f0262cSandi    function _context($lines) {
1067f3f0262cSandi        foreach ($lines as $line) {
10687deca91bSRobin Getz            print('<tr>' . $this->contextLine($line) .  $this->contextLine($line) . "</tr>\n");
1069f3f0262cSandi        }
1070f3f0262cSandi    }
1071f3f0262cSandi
1072f3f0262cSandi    function _changed($orig, $closing) {
1073f3f0262cSandi        $diff = new WordLevelDiff($orig, $closing);
1074f3f0262cSandi        $del = $diff->orig();
1075f3f0262cSandi        $add = $diff->closing();
1076f3f0262cSandi
1077f3f0262cSandi        while ($line = array_shift($del)) {
1078f3f0262cSandi            $aline = array_shift($add);
10797deca91bSRobin Getz            print('<tr>' . $this->deletedLine($line) . $this->addedLine($aline) . "</tr>\n");
1080f3f0262cSandi        }
1081f3f0262cSandi        $this->_added($add); # If any leftovers
1082f3f0262cSandi    }
1083f3f0262cSandi}
1084f3f0262cSandi
1085812bb04eSRobin Getz/**
1086812bb04eSRobin Getz *  Inline style diff formatter.
1087812bb04eSRobin Getz *
1088812bb04eSRobin Getz */
1089812bb04eSRobin Getzclass InlineDiffFormatter extends DiffFormatter {
1090812bb04eSRobin Getz
1091812bb04eSRobin Getz    function InlineDiffFormatter() {
1092812bb04eSRobin Getz        $this->leading_context_lines = 2;
1093812bb04eSRobin Getz        $this->trailing_context_lines = 2;
1094812bb04eSRobin Getz    }
1095812bb04eSRobin Getz
1096812bb04eSRobin Getz    function format($diff) {
1097812bb04eSRobin Getz        // Preserve whitespaces by converting some to non-breaking spaces.
1098812bb04eSRobin Getz        // Do not convert all of them to allow word-wrap.
1099812bb04eSRobin Getz        $val = parent::format($diff);
1100812bb04eSRobin Getz        $val = str_replace('  ','&nbsp; ', $val);
1101812bb04eSRobin Getz        $val = preg_replace('/ (?=<)|(?<=[ >]) /', '&nbsp;', $val);
1102812bb04eSRobin Getz        return $val;
1103812bb04eSRobin Getz    }
1104812bb04eSRobin Getz
1105812bb04eSRobin Getz    function _pre($text){
1106812bb04eSRobin Getz        $text = htmlspecialchars($text);
1107812bb04eSRobin Getz        return $text;
1108812bb04eSRobin Getz    }
1109812bb04eSRobin Getz
1110812bb04eSRobin Getz    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1111812bb04eSRobin Getz        global $lang;
1112812bb04eSRobin Getz        if ($xlen != 1)
1113812bb04eSRobin Getz            $xbeg .= "," . $xlen;
1114812bb04eSRobin Getz        if ($ylen != 1)
1115812bb04eSRobin Getz            $ybeg .= "," . $ylen;
1116812bb04eSRobin Getz        $r = '<tr><td class="diff-blockheader">@@ '.$lang['line']." -$xbeg +$ybeg @@";
1117812bb04eSRobin Getz        $r .= ' <span class="diff-deletedline"><del>'.$lang['deleted'].'</del></span>';
1118812bb04eSRobin Getz        $r .= ' <span class="diff-addedline">'.$lang['created'].'</span>';
1119812bb04eSRobin Getz        $r .= "</td></tr>\n";
1120812bb04eSRobin Getz        return $r;
1121812bb04eSRobin Getz    }
1122812bb04eSRobin Getz
1123812bb04eSRobin Getz    function _start_block($header) {
1124812bb04eSRobin Getz        print($header."\n");
1125812bb04eSRobin Getz    }
1126812bb04eSRobin Getz
1127812bb04eSRobin Getz    function _end_block() {
1128812bb04eSRobin Getz    }
1129812bb04eSRobin Getz
1130812bb04eSRobin Getz    function _lines($lines, $prefix=' ', $color="white") {
1131812bb04eSRobin Getz    }
1132812bb04eSRobin Getz
1133812bb04eSRobin Getz    function _added($lines) {
1134812bb04eSRobin Getz        foreach ($lines as $line) {
1135812bb04eSRobin Getz            print('<tr><td class="diff-addedline">'. $line . "</td></tr>\n");
1136812bb04eSRobin Getz        }
1137812bb04eSRobin Getz    }
1138812bb04eSRobin Getz
1139812bb04eSRobin Getz    function _deleted($lines) {
1140812bb04eSRobin Getz        foreach ($lines as $line) {
1141812bb04eSRobin Getz            print('<tr><td class="diff-deletedline"><del>' . $line . "</del></td></tr>\n");
1142812bb04eSRobin Getz        }
1143812bb04eSRobin Getz    }
1144812bb04eSRobin Getz
1145812bb04eSRobin Getz    function _context($lines) {
1146812bb04eSRobin Getz        foreach ($lines as $line) {
1147812bb04eSRobin Getz            print('<tr><td class="diff-context">'.$line."</td></tr>\n");
1148812bb04eSRobin Getz        }
1149812bb04eSRobin Getz    }
1150812bb04eSRobin Getz
1151812bb04eSRobin Getz    function _changed($orig, $closing) {
1152812bb04eSRobin Getz        $diff = new InlineWordLevelDiff($orig, $closing);
1153812bb04eSRobin Getz        $add = $diff->inline();
1154812bb04eSRobin Getz
1155812bb04eSRobin Getz        foreach ($add as $line)
1156812bb04eSRobin Getz            print('<tr><td>'.$line."</td></tr>\n");
1157812bb04eSRobin Getz    }
1158812bb04eSRobin Getz}
1159812bb04eSRobin Getz
1160340756e4Sandi
1161e3776c06SMichael Hamann//Setup VIM: ex: et ts=4 :
1162