xref: /dokuwiki/inc/DifferenceEngine.php (revision 988c134016f0557947bd6811e22086919f98fa8e)
1f3f0262cSandi<?php
215fae107Sandi/**
315fae107Sandi * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
415fae107Sandi *
515fae107Sandi * Additions by Axel Boldt for MediaWiki
615fae107Sandi *
715fae107Sandi * @copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
815fae107Sandi * @license  You may copy this code freely under the conditions of the GPL.
915fae107Sandi */
10f3f0262cSandidefine('USE_ASSERTS', function_exists('assert'));
11f3f0262cSandi
12f3f0262cSandiclass _DiffOp {
13f3f0262cSandi    var $type;
14f3f0262cSandi    var $orig;
15f3f0262cSandi    var $closing;
16f3f0262cSandi
17f3f0262cSandi    function reverse() {
18f3f0262cSandi        trigger_error("pure virtual", E_USER_ERROR);
19f3f0262cSandi    }
20f3f0262cSandi
21f3f0262cSandi    function norig() {
22f5b78785SAndreas Gohr        return $this->orig ? count($this->orig) : 0;
23f3f0262cSandi    }
24f3f0262cSandi
25f3f0262cSandi    function nclosing() {
26f5b78785SAndreas Gohr        return $this->closing ? count($this->closing) : 0;
27f3f0262cSandi    }
28f3f0262cSandi}
29f3f0262cSandi
30f3f0262cSandiclass _DiffOp_Copy extends _DiffOp {
31f3f0262cSandi    var $type = 'copy';
32*988c1340SPiyush Mishra    /**
33*988c1340SPiyush Mishra     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
34*988c1340SPiyush Mishra     */
35f3f0262cSandi    function _DiffOp_Copy($orig, $closing = false) {
36*988c1340SPiyush Mishra        $this->__construct($orig, $closing);
37*988c1340SPiyush Mishra    }
38*988c1340SPiyush Mishra
39*988c1340SPiyush Mishra    function __construct($orig, $closing = false) {
40f3f0262cSandi        if (!is_array($closing))
41f3f0262cSandi            $closing = $orig;
42f3f0262cSandi        $this->orig = $orig;
43f3f0262cSandi        $this->closing = $closing;
44f3f0262cSandi    }
45f3f0262cSandi
46f3f0262cSandi    function reverse() {
47f3f0262cSandi        return new _DiffOp_Copy($this->closing, $this->orig);
48f3f0262cSandi    }
49f3f0262cSandi}
50f3f0262cSandi
51f3f0262cSandiclass _DiffOp_Delete extends _DiffOp {
52f3f0262cSandi    var $type = 'delete';
53*988c1340SPiyush Mishra    /**
54*988c1340SPiyush Mishra     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
55*988c1340SPiyush Mishra     */
56f3f0262cSandi    function _DiffOp_Delete($lines) {
57*988c1340SPiyush Mishra        $this->__construct($lines);
58*988c1340SPiyush Mishra    }
59*988c1340SPiyush Mishra
60*988c1340SPiyush Mishra    function __construct($lines) {
61f3f0262cSandi        $this->orig = $lines;
62f3f0262cSandi        $this->closing = false;
63f3f0262cSandi    }
64f3f0262cSandi
65f3f0262cSandi    function reverse() {
66f3f0262cSandi        return new _DiffOp_Add($this->orig);
67f3f0262cSandi    }
68f3f0262cSandi}
69f3f0262cSandi
70f3f0262cSandiclass _DiffOp_Add extends _DiffOp {
71f3f0262cSandi    var $type = 'add';
72*988c1340SPiyush Mishra    /**
73*988c1340SPiyush Mishra     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
74*988c1340SPiyush Mishra     */
75f3f0262cSandi    function _DiffOp_Add($lines) {
76*988c1340SPiyush Mishra        $this->__construct($lines);
77*988c1340SPiyush Mishra    }
78*988c1340SPiyush Mishra
79*988c1340SPiyush Mishra    function __construct($lines) {
80f3f0262cSandi        $this->closing = $lines;
81f3f0262cSandi        $this->orig = false;
82f3f0262cSandi    }
83f3f0262cSandi
84f3f0262cSandi    function reverse() {
85f3f0262cSandi        return new _DiffOp_Delete($this->closing);
86f3f0262cSandi    }
87f3f0262cSandi}
88f3f0262cSandi
89f3f0262cSandiclass _DiffOp_Change extends _DiffOp {
90f3f0262cSandi    var $type = 'change';
91*988c1340SPiyush Mishra    /**
92*988c1340SPiyush Mishra     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
93*988c1340SPiyush Mishra     */
94f3f0262cSandi    function _DiffOp_Change($orig, $closing) {
95*988c1340SPiyush Mishra        $this->__construct($orig, $closing);
96*988c1340SPiyush Mishra    }
97*988c1340SPiyush Mishra
98*988c1340SPiyush Mishra    function __construct($orig, $closing) {
99f3f0262cSandi        $this->orig = $orig;
100f3f0262cSandi        $this->closing = $closing;
101f3f0262cSandi    }
102f3f0262cSandi
103f3f0262cSandi    function reverse() {
104f3f0262cSandi        return new _DiffOp_Change($this->closing, $this->orig);
105f3f0262cSandi    }
106f3f0262cSandi}
107f3f0262cSandi
108f3f0262cSandi
109f3f0262cSandi/**
110f3f0262cSandi * Class used internally by Diff to actually compute the diffs.
111f3f0262cSandi *
112f3f0262cSandi * The algorithm used here is mostly lifted from the perl module
113f3f0262cSandi * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
114f3f0262cSandi *   http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
115f3f0262cSandi *
116f3f0262cSandi * More ideas are taken from:
117f3f0262cSandi *   http://www.ics.uci.edu/~eppstein/161/960229.html
118f3f0262cSandi *
119f3f0262cSandi * Some ideas are (and a bit of code) are from from analyze.c, from GNU
120f3f0262cSandi * diffutils-2.7, which can be found at:
121f3f0262cSandi *   ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
122f3f0262cSandi *
123f3f0262cSandi * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
124f3f0262cSandi * are my own.
125f3f0262cSandi *
126f3f0262cSandi * @author Geoffrey T. Dairiki
127f3f0262cSandi * @access private
128f3f0262cSandi */
129f5b78785SAndreas Gohrclass _DiffEngine {
130f5b78785SAndreas Gohr
131f3f0262cSandi    function diff($from_lines, $to_lines) {
132f5b78785SAndreas Gohr        $n_from = count($from_lines);
133f5b78785SAndreas Gohr        $n_to = count($to_lines);
134f3f0262cSandi
135f3f0262cSandi        $this->xchanged = $this->ychanged = array();
136f3f0262cSandi        $this->xv = $this->yv = array();
137f3f0262cSandi        $this->xind = $this->yind = array();
138f3f0262cSandi        unset($this->seq);
139f3f0262cSandi        unset($this->in_seq);
140f3f0262cSandi        unset($this->lcs);
141f3f0262cSandi
142f3f0262cSandi        // Skip leading common lines.
143f3f0262cSandi        for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
144f3f0262cSandi            if ($from_lines[$skip] != $to_lines[$skip])
145f3f0262cSandi                break;
146f3f0262cSandi            $this->xchanged[$skip] = $this->ychanged[$skip] = false;
147f3f0262cSandi        }
148f3f0262cSandi        // Skip trailing common lines.
149f5b78785SAndreas Gohr        $xi = $n_from;
150f5b78785SAndreas Gohr        $yi = $n_to;
151f3f0262cSandi        for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
152f3f0262cSandi            if ($from_lines[$xi] != $to_lines[$yi])
153f3f0262cSandi                break;
154f3f0262cSandi            $this->xchanged[$xi] = $this->ychanged[$yi] = false;
155f3f0262cSandi        }
156f3f0262cSandi
157f3f0262cSandi        // Ignore lines which do not exist in both files.
158f3f0262cSandi        for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
159f3f0262cSandi            $xhash[$from_lines[$xi]] = 1;
160f3f0262cSandi        for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
161f3f0262cSandi            $line = $to_lines[$yi];
162f3f0262cSandi            if (($this->ychanged[$yi] = empty($xhash[$line])))
163f3f0262cSandi                continue;
164f3f0262cSandi            $yhash[$line] = 1;
165f3f0262cSandi            $this->yv[] = $line;
166f3f0262cSandi            $this->yind[] = $yi;
167f3f0262cSandi        }
168f3f0262cSandi        for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
169f3f0262cSandi            $line = $from_lines[$xi];
170f3f0262cSandi            if (($this->xchanged[$xi] = empty($yhash[$line])))
171f3f0262cSandi                continue;
172f3f0262cSandi            $this->xv[] = $line;
173f3f0262cSandi            $this->xind[] = $xi;
174f3f0262cSandi        }
175f3f0262cSandi
176f3f0262cSandi        // Find the LCS.
177f5b78785SAndreas Gohr        $this->_compareseq(0, count($this->xv), 0, count($this->yv));
178f3f0262cSandi
179f3f0262cSandi        // Merge edits when possible
180f3f0262cSandi        $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
181f3f0262cSandi        $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
182f3f0262cSandi
183f3f0262cSandi        // Compute the edit operations.
184f3f0262cSandi        $edits = array();
185f3f0262cSandi        $xi = $yi = 0;
186f3f0262cSandi        while ($xi < $n_from || $yi < $n_to) {
187f3f0262cSandi            USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
188f3f0262cSandi            USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
189f3f0262cSandi
190f3f0262cSandi            // Skip matching "snake".
191f3f0262cSandi            $copy = array();
1927deca91bSRobin Getz            while ($xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
193f3f0262cSandi                $copy[] = $from_lines[$xi++];
194f3f0262cSandi                ++$yi;
195f3f0262cSandi            }
196f3f0262cSandi            if ($copy)
197f3f0262cSandi                $edits[] = new _DiffOp_Copy($copy);
198f3f0262cSandi
199f3f0262cSandi            // Find deletes & adds.
200f3f0262cSandi            $delete = array();
201f3f0262cSandi            while ($xi < $n_from && $this->xchanged[$xi])
202f3f0262cSandi                $delete[] = $from_lines[$xi++];
203f3f0262cSandi
204f3f0262cSandi            $add = array();
205f3f0262cSandi            while ($yi < $n_to && $this->ychanged[$yi])
206f3f0262cSandi                $add[] = $to_lines[$yi++];
207f3f0262cSandi
208f3f0262cSandi            if ($delete && $add)
209f3f0262cSandi                $edits[] = new _DiffOp_Change($delete, $add);
210f3f0262cSandi            elseif ($delete)
211f3f0262cSandi                $edits[] = new _DiffOp_Delete($delete);
212f3f0262cSandi            elseif ($add)
213f3f0262cSandi                $edits[] = new _DiffOp_Add($add);
214f3f0262cSandi        }
215f3f0262cSandi        return $edits;
216f3f0262cSandi    }
217f3f0262cSandi
218f3f0262cSandi
21915fae107Sandi    /**
22015fae107Sandi     * Divide the Largest Common Subsequence (LCS) of the sequences
221f3f0262cSandi     * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
222f3f0262cSandi     * sized segments.
223f3f0262cSandi     *
224f3f0262cSandi     * Returns (LCS, PTS).  LCS is the length of the LCS. PTS is an
225f3f0262cSandi     * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
226f3f0262cSandi     * sub sequences.  The first sub-sequence is contained in [X0, X1),
227f3f0262cSandi     * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
228f3f0262cSandi     * that (X0, Y0) == (XOFF, YOFF) and
229f3f0262cSandi     * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
230f3f0262cSandi     *
231f3f0262cSandi     * This function assumes that the first lines of the specified portions
232f3f0262cSandi     * of the two files do not match, and likewise that the last lines do not
233f3f0262cSandi     * match.  The caller must trim matching lines from the beginning and end
234f3f0262cSandi     * of the portions it is going to specify.
235f3f0262cSandi     */
236f3f0262cSandi    function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) {
237f3f0262cSandi        $flip = false;
238f3f0262cSandi
239f3f0262cSandi        if ($xlim - $xoff > $ylim - $yoff) {
240f3f0262cSandi            // Things seems faster (I'm not sure I understand why)
241f3f0262cSandi            // when the shortest sequence in X.
242f3f0262cSandi            $flip = true;
2437deca91bSRobin Getz            list ($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
244f3f0262cSandi        }
245f3f0262cSandi
246f3f0262cSandi        if ($flip)
247f3f0262cSandi            for ($i = $ylim - 1; $i >= $yoff; $i--)
248f3f0262cSandi                $ymatches[$this->xv[$i]][] = $i;
249f3f0262cSandi        else
250f3f0262cSandi            for ($i = $ylim - 1; $i >= $yoff; $i--)
251f3f0262cSandi                $ymatches[$this->yv[$i]][] = $i;
252f3f0262cSandi
253f3f0262cSandi        $this->lcs = 0;
254f3f0262cSandi        $this->seq[0]= $yoff - 1;
255f3f0262cSandi        $this->in_seq = array();
256f3f0262cSandi        $ymids[0] = array();
257f3f0262cSandi
258f3f0262cSandi        $numer = $xlim - $xoff + $nchunks - 1;
259f3f0262cSandi        $x = $xoff;
260f3f0262cSandi        for ($chunk = 0; $chunk < $nchunks; $chunk++) {
261f3f0262cSandi            if ($chunk > 0)
262f3f0262cSandi                for ($i = 0; $i <= $this->lcs; $i++)
263f3f0262cSandi                    $ymids[$i][$chunk-1] = $this->seq[$i];
264f3f0262cSandi
265f3f0262cSandi            $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
266f3f0262cSandi            for ( ; $x < $x1; $x++) {
267f3f0262cSandi                $line = $flip ? $this->yv[$x] : $this->xv[$x];
268f3f0262cSandi                if (empty($ymatches[$line]))
269f3f0262cSandi                    continue;
270f3f0262cSandi                $matches = $ymatches[$line];
271f3f0262cSandi                reset($matches);
272f3f0262cSandi                while (list ($junk, $y) = each($matches))
273f3f0262cSandi                    if (empty($this->in_seq[$y])) {
274f3f0262cSandi                        $k = $this->_lcs_pos($y);
275f3f0262cSandi                        USE_ASSERTS && assert($k > 0);
276f3f0262cSandi                        $ymids[$k] = $ymids[$k-1];
277f3f0262cSandi                        break;
278f3f0262cSandi                    }
279f3f0262cSandi                while (list ($junk, $y) = each($matches)) {
280f3f0262cSandi                    if ($y > $this->seq[$k-1]) {
281f3f0262cSandi                        USE_ASSERTS && assert($y < $this->seq[$k]);
282f3f0262cSandi                        // Optimization: this is a common case:
283f3f0262cSandi                        //  next match is just replacing previous match.
284f3f0262cSandi                        $this->in_seq[$this->seq[$k]] = false;
285f3f0262cSandi                        $this->seq[$k] = $y;
286f3f0262cSandi                        $this->in_seq[$y] = 1;
287f3f0262cSandi                    }
288f3f0262cSandi                    else if (empty($this->in_seq[$y])) {
289f3f0262cSandi                        $k = $this->_lcs_pos($y);
290f3f0262cSandi                        USE_ASSERTS && assert($k > 0);
291f3f0262cSandi                        $ymids[$k] = $ymids[$k-1];
292f3f0262cSandi                    }
293f3f0262cSandi                }
294f3f0262cSandi            }
295f3f0262cSandi        }
296f3f0262cSandi
297f3f0262cSandi        $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
298f3f0262cSandi        $ymid = $ymids[$this->lcs];
299f3f0262cSandi        for ($n = 0; $n < $nchunks - 1; $n++) {
300f3f0262cSandi            $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
301f3f0262cSandi            $y1 = $ymid[$n] + 1;
302f3f0262cSandi            $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
303f3f0262cSandi        }
304f3f0262cSandi        $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
305f3f0262cSandi
306f3f0262cSandi        return array($this->lcs, $seps);
307f3f0262cSandi    }
308f3f0262cSandi
309f3f0262cSandi    function _lcs_pos($ypos) {
310f3f0262cSandi        $end = $this->lcs;
311f3f0262cSandi        if ($end == 0 || $ypos > $this->seq[$end]) {
312f3f0262cSandi            $this->seq[++$this->lcs] = $ypos;
313f3f0262cSandi            $this->in_seq[$ypos] = 1;
314f3f0262cSandi            return $this->lcs;
315f3f0262cSandi        }
316f3f0262cSandi
317f3f0262cSandi        $beg = 1;
318f3f0262cSandi        while ($beg < $end) {
319f3f0262cSandi            $mid = (int)(($beg + $end) / 2);
320f3f0262cSandi            if ($ypos > $this->seq[$mid])
321f3f0262cSandi                $beg = $mid + 1;
322f3f0262cSandi            else
323f3f0262cSandi                $end = $mid;
324f3f0262cSandi        }
325f3f0262cSandi
326f3f0262cSandi        USE_ASSERTS && assert($ypos != $this->seq[$end]);
327f3f0262cSandi
328f3f0262cSandi        $this->in_seq[$this->seq[$end]] = false;
329f3f0262cSandi        $this->seq[$end] = $ypos;
330f3f0262cSandi        $this->in_seq[$ypos] = 1;
331f3f0262cSandi        return $end;
332f3f0262cSandi    }
333f3f0262cSandi
33415fae107Sandi    /**
33515fae107Sandi     * Find LCS of two sequences.
336f3f0262cSandi     *
337f3f0262cSandi     * The results are recorded in the vectors $this->{x,y}changed[], by
338f3f0262cSandi     * storing a 1 in the element for each line that is an insertion
339f3f0262cSandi     * or deletion (ie. is not in the LCS).
340f3f0262cSandi     *
341f3f0262cSandi     * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
342f3f0262cSandi     *
343f3f0262cSandi     * Note that XLIM, YLIM are exclusive bounds.
344f3f0262cSandi     * All line numbers are origin-0 and discarded lines are not counted.
345f3f0262cSandi     */
346f3f0262cSandi    function _compareseq($xoff, $xlim, $yoff, $ylim) {
347f3f0262cSandi        // Slide down the bottom initial diagonal.
3487deca91bSRobin Getz        while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) {
349f3f0262cSandi            ++$xoff;
350f3f0262cSandi            ++$yoff;
351f3f0262cSandi        }
352f3f0262cSandi
353f3f0262cSandi        // Slide up the top initial diagonal.
3547deca91bSRobin Getz        while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
355f3f0262cSandi            --$xlim;
356f3f0262cSandi            --$ylim;
357f3f0262cSandi        }
358f3f0262cSandi
359f3f0262cSandi        if ($xoff == $xlim || $yoff == $ylim)
360f3f0262cSandi            $lcs = 0;
361f3f0262cSandi        else {
362f3f0262cSandi            // This is ad hoc but seems to work well.
363f3f0262cSandi            //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
364f3f0262cSandi            //$nchunks = max(2,min(8,(int)$nchunks));
365f3f0262cSandi            $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
366f3f0262cSandi            list ($lcs, $seps)
367f3f0262cSandi                = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
368f3f0262cSandi        }
369f3f0262cSandi
370f3f0262cSandi        if ($lcs == 0) {
371f3f0262cSandi            // X and Y sequences have no common subsequence:
372f3f0262cSandi            // mark all changed.
373f3f0262cSandi            while ($yoff < $ylim)
374f3f0262cSandi                $this->ychanged[$this->yind[$yoff++]] = 1;
375f3f0262cSandi            while ($xoff < $xlim)
376f3f0262cSandi                $this->xchanged[$this->xind[$xoff++]] = 1;
377f3f0262cSandi        }
378f3f0262cSandi        else {
379f3f0262cSandi            // Use the partitions to split this problem into subproblems.
380f3f0262cSandi            reset($seps);
381f3f0262cSandi            $pt1 = $seps[0];
382f3f0262cSandi            while ($pt2 = next($seps)) {
383f3f0262cSandi                $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
384f3f0262cSandi                $pt1 = $pt2;
385f3f0262cSandi            }
386f3f0262cSandi        }
387f3f0262cSandi    }
388f3f0262cSandi
38915fae107Sandi    /**
39015fae107Sandi     * Adjust inserts/deletes of identical lines to join changes
391f3f0262cSandi     * as much as possible.
392f3f0262cSandi     *
393f3f0262cSandi     * We do something when a run of changed lines include a
394f3f0262cSandi     * line at one end and has an excluded, identical line at the other.
395f3f0262cSandi     * We are free to choose which identical line is included.
396f3f0262cSandi     * `compareseq' usually chooses the one at the beginning,
397f3f0262cSandi     * but usually it is cleaner to consider the following identical line
398f3f0262cSandi     * to be the "change".
399f3f0262cSandi     *
400f3f0262cSandi     * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
401f3f0262cSandi     */
402f3f0262cSandi    function _shift_boundaries($lines, &$changed, $other_changed) {
403f3f0262cSandi        $i = 0;
404f3f0262cSandi        $j = 0;
405f3f0262cSandi
406f5b78785SAndreas Gohr        USE_ASSERTS && assert('count($lines) == count($changed)');
407f5b78785SAndreas Gohr        $len = count($lines);
408f5b78785SAndreas Gohr        $other_len = count($other_changed);
409f3f0262cSandi
410f3f0262cSandi        while (1) {
411f3f0262cSandi            /*
412f3f0262cSandi             * Scan forwards to find beginning of another run of changes.
413f3f0262cSandi             * Also keep track of the corresponding point in the other file.
414f3f0262cSandi             *
415f3f0262cSandi             * Throughout this code, $i and $j are adjusted together so that
416f3f0262cSandi             * the first $i elements of $changed and the first $j elements
417f3f0262cSandi             * of $other_changed both contain the same number of zeros
418f3f0262cSandi             * (unchanged lines).
419f3f0262cSandi             * Furthermore, $j is always kept so that $j == $other_len or
420f3f0262cSandi             * $other_changed[$j] == false.
421f3f0262cSandi             */
422f3f0262cSandi            while ($j < $other_len && $other_changed[$j])
423f3f0262cSandi                $j++;
424f3f0262cSandi
425f3f0262cSandi            while ($i < $len && ! $changed[$i]) {
426f3f0262cSandi                USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
427f5b78785SAndreas Gohr                $i++;
428f5b78785SAndreas Gohr                $j++;
429f3f0262cSandi                while ($j < $other_len && $other_changed[$j])
430f3f0262cSandi                    $j++;
431f3f0262cSandi            }
432f3f0262cSandi
433f3f0262cSandi            if ($i == $len)
434f3f0262cSandi                break;
435f3f0262cSandi
436f3f0262cSandi            $start = $i;
437f3f0262cSandi
438f3f0262cSandi            // Find the end of this run of changes.
439f3f0262cSandi            while (++$i < $len && $changed[$i])
440f3f0262cSandi                continue;
441f3f0262cSandi
442f3f0262cSandi            do {
443f3f0262cSandi                /*
444f3f0262cSandi                 * Record the length of this run of changes, so that
445f3f0262cSandi                 * we can later determine whether the run has grown.
446f3f0262cSandi                 */
447f3f0262cSandi                $runlength = $i - $start;
448f3f0262cSandi
449f3f0262cSandi                /*
450f3f0262cSandi                 * Move the changed region back, so long as the
451f3f0262cSandi                 * previous unchanged line matches the last changed one.
452f3f0262cSandi                 * This merges with previous changed regions.
453f3f0262cSandi                 */
454f3f0262cSandi                while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
455f3f0262cSandi                    $changed[--$start] = 1;
456f3f0262cSandi                    $changed[--$i] = false;
457f3f0262cSandi                    while ($start > 0 && $changed[$start - 1])
458f3f0262cSandi                        $start--;
459f3f0262cSandi                    USE_ASSERTS && assert('$j > 0');
460f3f0262cSandi                    while ($other_changed[--$j])
461f3f0262cSandi                        continue;
462f3f0262cSandi                    USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
463f3f0262cSandi                }
464f3f0262cSandi
465f3f0262cSandi                /*
466f3f0262cSandi                 * Set CORRESPONDING to the end of the changed run, at the last
467f3f0262cSandi                 * point where it corresponds to a changed run in the other file.
468f3f0262cSandi                 * CORRESPONDING == LEN means no such point has been found.
469f3f0262cSandi                 */
470f3f0262cSandi                $corresponding = $j < $other_len ? $i : $len;
471f3f0262cSandi
472f3f0262cSandi                /*
473f3f0262cSandi                 * Move the changed region forward, so long as the
474f3f0262cSandi                 * first changed line matches the following unchanged one.
475f3f0262cSandi                 * This merges with following changed regions.
476f3f0262cSandi                 * Do this second, so that if there are no merges,
477f3f0262cSandi                 * the changed region is moved forward as far as possible.
478f3f0262cSandi                 */
479f3f0262cSandi                while ($i < $len && $lines[$start] == $lines[$i]) {
480f3f0262cSandi                    $changed[$start++] = false;
481f3f0262cSandi                    $changed[$i++] = 1;
482f3f0262cSandi                    while ($i < $len && $changed[$i])
483f3f0262cSandi                        $i++;
484f3f0262cSandi
485f3f0262cSandi                    USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
486f3f0262cSandi                    $j++;
487f3f0262cSandi                    if ($j < $other_len && $other_changed[$j]) {
488f3f0262cSandi                        $corresponding = $i;
489f3f0262cSandi                        while ($j < $other_len && $other_changed[$j])
490f3f0262cSandi                            $j++;
491f3f0262cSandi                    }
492f3f0262cSandi                }
493f3f0262cSandi            } while ($runlength != $i - $start);
494f3f0262cSandi
495f3f0262cSandi            /*
496f3f0262cSandi             * If possible, move the fully-merged run of changes
497f3f0262cSandi             * back to a corresponding run in the other file.
498f3f0262cSandi             */
499f3f0262cSandi            while ($corresponding < $i) {
500f3f0262cSandi                $changed[--$start] = 1;
501f3f0262cSandi                $changed[--$i] = 0;
502f3f0262cSandi                USE_ASSERTS && assert('$j > 0');
503f3f0262cSandi                while ($other_changed[--$j])
504f3f0262cSandi                    continue;
505f3f0262cSandi                USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
506f3f0262cSandi            }
507f3f0262cSandi        }
508f3f0262cSandi    }
509f3f0262cSandi}
510f3f0262cSandi
511f3f0262cSandi/**
512f3f0262cSandi * Class representing a 'diff' between two sequences of strings.
513f3f0262cSandi */
514f5b78785SAndreas Gohrclass Diff {
515f5b78785SAndreas Gohr
516f3f0262cSandi    var $edits;
517f3f0262cSandi
518f3f0262cSandi    /**
519*988c1340SPiyush Mishra     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
520*988c1340SPiyush Mishra     */
521*988c1340SPiyush Mishra    function Diff($from_lines, $to_lines) {
522*988c1340SPiyush Mishra        $this->__construct($from_lines, $to_lines);
523*988c1340SPiyush Mishra    }
524*988c1340SPiyush Mishra
525*988c1340SPiyush Mishra    /**
526f3f0262cSandi     * Constructor.
527f3f0262cSandi     * Computes diff between sequences of strings.
528f3f0262cSandi     *
529f3f0262cSandi     * @param $from_lines array An array of strings.
530f3f0262cSandi     *      (Typically these are lines from a file.)
531f3f0262cSandi     * @param $to_lines array An array of strings.
532f3f0262cSandi     */
533*988c1340SPiyush Mishra    function __construct($from_lines, $to_lines) {
534f3f0262cSandi        $eng = new _DiffEngine;
535f3f0262cSandi        $this->edits = $eng->diff($from_lines, $to_lines);
536f3f0262cSandi        //$this->_check($from_lines, $to_lines);
537f3f0262cSandi    }
538f3f0262cSandi
539f3f0262cSandi    /**
540f3f0262cSandi     * Compute reversed Diff.
541f3f0262cSandi     *
542f3f0262cSandi     * SYNOPSIS:
543f3f0262cSandi     *
544f3f0262cSandi     *  $diff = new Diff($lines1, $lines2);
545f3f0262cSandi     *  $rev = $diff->reverse();
546f3f0262cSandi     * @return object A Diff object representing the inverse of the
547f3f0262cSandi     *          original diff.
548f3f0262cSandi     */
549f3f0262cSandi    function reverse() {
550f3f0262cSandi        $rev = $this;
551f3f0262cSandi        $rev->edits = array();
552f3f0262cSandi        foreach ($this->edits as $edit) {
553f3f0262cSandi            $rev->edits[] = $edit->reverse();
554f3f0262cSandi        }
555f3f0262cSandi        return $rev;
556f3f0262cSandi    }
557f3f0262cSandi
558f3f0262cSandi    /**
559f3f0262cSandi     * Check for empty diff.
560f3f0262cSandi     *
561f3f0262cSandi     * @return bool True iff two sequences were identical.
562f3f0262cSandi     */
563f3f0262cSandi    function isEmpty() {
564f3f0262cSandi        foreach ($this->edits as $edit) {
565f3f0262cSandi            if ($edit->type != 'copy')
566f3f0262cSandi                return false;
567f3f0262cSandi        }
568f3f0262cSandi        return true;
569f3f0262cSandi    }
570f3f0262cSandi
571f3f0262cSandi    /**
572f3f0262cSandi     * Compute the length of the Longest Common Subsequence (LCS).
573f3f0262cSandi     *
574f3f0262cSandi     * This is mostly for diagnostic purposed.
575f3f0262cSandi     *
576f3f0262cSandi     * @return int The length of the LCS.
577f3f0262cSandi     */
578f3f0262cSandi    function lcs() {
579f3f0262cSandi        $lcs = 0;
580f3f0262cSandi        foreach ($this->edits as $edit) {
581f3f0262cSandi            if ($edit->type == 'copy')
582f5b78785SAndreas Gohr                $lcs += count($edit->orig);
583f3f0262cSandi        }
584f3f0262cSandi        return $lcs;
585f3f0262cSandi    }
586f3f0262cSandi
587f3f0262cSandi    /**
588f3f0262cSandi     * Get the original set of lines.
589f3f0262cSandi     *
590f3f0262cSandi     * This reconstructs the $from_lines parameter passed to the
591f3f0262cSandi     * constructor.
592f3f0262cSandi     *
593f3f0262cSandi     * @return array The original sequence of strings.
594f3f0262cSandi     */
595f3f0262cSandi    function orig() {
596f3f0262cSandi        $lines = array();
597f3f0262cSandi
598f3f0262cSandi        foreach ($this->edits as $edit) {
599f3f0262cSandi            if ($edit->orig)
600f5b78785SAndreas Gohr                array_splice($lines, count($lines), 0, $edit->orig);
601f3f0262cSandi        }
602f3f0262cSandi        return $lines;
603f3f0262cSandi    }
604f3f0262cSandi
605f3f0262cSandi    /**
606f3f0262cSandi     * Get the closing set of lines.
607f3f0262cSandi     *
608f3f0262cSandi     * This reconstructs the $to_lines parameter passed to the
609f3f0262cSandi     * constructor.
610f3f0262cSandi     *
611f3f0262cSandi     * @return array The sequence of strings.
612f3f0262cSandi     */
613f3f0262cSandi    function closing() {
614f3f0262cSandi        $lines = array();
615f3f0262cSandi
616f3f0262cSandi        foreach ($this->edits as $edit) {
617f3f0262cSandi            if ($edit->closing)
618f5b78785SAndreas Gohr                array_splice($lines, count($lines), 0, $edit->closing);
619f3f0262cSandi        }
620f3f0262cSandi        return $lines;
621f3f0262cSandi    }
622f3f0262cSandi
623f3f0262cSandi    /**
624f3f0262cSandi     * Check a Diff for validity.
625f3f0262cSandi     *
626f3f0262cSandi     * This is here only for debugging purposes.
627f3f0262cSandi     */
628f3f0262cSandi    function _check($from_lines, $to_lines) {
629f3f0262cSandi        if (serialize($from_lines) != serialize($this->orig()))
630f3f0262cSandi            trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
631f3f0262cSandi        if (serialize($to_lines) != serialize($this->closing()))
632f3f0262cSandi            trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
633f3f0262cSandi
634f3f0262cSandi        $rev = $this->reverse();
635f3f0262cSandi        if (serialize($to_lines) != serialize($rev->orig()))
636f3f0262cSandi            trigger_error("Reversed original doesn't match", E_USER_ERROR);
637f3f0262cSandi        if (serialize($from_lines) != serialize($rev->closing()))
638f3f0262cSandi            trigger_error("Reversed closing doesn't match", E_USER_ERROR);
639f3f0262cSandi
640f3f0262cSandi        $prevtype = 'none';
641f3f0262cSandi        foreach ($this->edits as $edit) {
642f3f0262cSandi            if ($prevtype == $edit->type)
643f3f0262cSandi                trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
644f3f0262cSandi            $prevtype = $edit->type;
645f3f0262cSandi        }
646f3f0262cSandi
647f3f0262cSandi        $lcs = $this->lcs();
648f3f0262cSandi        trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE);
649f3f0262cSandi    }
650f3f0262cSandi}
651f3f0262cSandi
652f3f0262cSandi/**
653f3f0262cSandi * FIXME: bad name.
654f3f0262cSandi */
655f5b78785SAndreas Gohrclass MappedDiff extends Diff {
656f3f0262cSandi    /**
657*988c1340SPiyush Mishra     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
658*988c1340SPiyush Mishra     */
659*988c1340SPiyush Mishra    function MappedDiff($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) {
660*988c1340SPiyush Mishra        $this->__construct($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines);
661*988c1340SPiyush Mishra    }
662*988c1340SPiyush Mishra
663*988c1340SPiyush Mishra    /**
664f3f0262cSandi     * Constructor.
665f3f0262cSandi     *
666f3f0262cSandi     * Computes diff between sequences of strings.
667f3f0262cSandi     *
668f3f0262cSandi     * This can be used to compute things like
669f3f0262cSandi     * case-insensitve diffs, or diffs which ignore
670f3f0262cSandi     * changes in white-space.
671f3f0262cSandi     *
672f3f0262cSandi     * @param $from_lines array An array of strings.
673f3f0262cSandi     *  (Typically these are lines from a file.)
674f3f0262cSandi     *
675f3f0262cSandi     * @param $to_lines array An array of strings.
676f3f0262cSandi     *
677f3f0262cSandi     * @param $mapped_from_lines array This array should
678f3f0262cSandi     *  have the same size number of elements as $from_lines.
679f3f0262cSandi     *  The elements in $mapped_from_lines and
680f3f0262cSandi     *  $mapped_to_lines are what is actually compared
681f3f0262cSandi     *  when computing the diff.
682f3f0262cSandi     *
683f3f0262cSandi     * @param $mapped_to_lines array This array should
684f3f0262cSandi     *  have the same number of elements as $to_lines.
685f3f0262cSandi     */
686*988c1340SPiyush Mishra    function __construct($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) {
687f3f0262cSandi
688f5b78785SAndreas Gohr        assert(count($from_lines) == count($mapped_from_lines));
689f5b78785SAndreas Gohr        assert(count($to_lines) == count($mapped_to_lines));
690f3f0262cSandi
691*988c1340SPiyush Mishra        parent::__construct($mapped_from_lines, $mapped_to_lines);
692f3f0262cSandi
693f3f0262cSandi        $xi = $yi = 0;
694f5b78785SAndreas Gohr        $ecnt = count($this->edits);
695f5b78785SAndreas Gohr        for ($i = 0; $i < $ecnt; $i++) {
696f3f0262cSandi            $orig = &$this->edits[$i]->orig;
697f3f0262cSandi            if (is_array($orig)) {
698f5b78785SAndreas Gohr                $orig = array_slice($from_lines, $xi, count($orig));
699f5b78785SAndreas Gohr                $xi += count($orig);
700f3f0262cSandi            }
701f3f0262cSandi
702f3f0262cSandi            $closing = &$this->edits[$i]->closing;
703f3f0262cSandi            if (is_array($closing)) {
704f5b78785SAndreas Gohr                $closing = array_slice($to_lines, $yi, count($closing));
705f5b78785SAndreas Gohr                $yi += count($closing);
706f3f0262cSandi            }
707f3f0262cSandi        }
708f3f0262cSandi    }
709f3f0262cSandi}
710f3f0262cSandi
711f3f0262cSandi/**
712f3f0262cSandi * A class to format Diffs
713f3f0262cSandi *
714f3f0262cSandi * This class formats the diff in classic diff format.
715f3f0262cSandi * It is intended that this class be customized via inheritance,
716f3f0262cSandi * to obtain fancier outputs.
717f3f0262cSandi */
718f5b78785SAndreas Gohrclass DiffFormatter {
719f3f0262cSandi    /**
720f3f0262cSandi     * Number of leading context "lines" to preserve.
721f3f0262cSandi     *
722f3f0262cSandi     * This should be left at zero for this class, but subclasses
723f3f0262cSandi     * may want to set this to other values.
724f3f0262cSandi     */
725f3f0262cSandi    var $leading_context_lines = 0;
726f3f0262cSandi
727f3f0262cSandi    /**
728f3f0262cSandi     * Number of trailing context "lines" to preserve.
729f3f0262cSandi     *
730f3f0262cSandi     * This should be left at zero for this class, but subclasses
731f3f0262cSandi     * may want to set this to other values.
732f3f0262cSandi     */
733f3f0262cSandi    var $trailing_context_lines = 0;
734f3f0262cSandi
735f3f0262cSandi    /**
736f3f0262cSandi     * Format a diff.
737f3f0262cSandi     *
738f3f0262cSandi     * @param $diff object A Diff object.
739f3f0262cSandi     * @return string The formatted output.
740f3f0262cSandi     */
741f3f0262cSandi    function format($diff) {
742f3f0262cSandi
743f3f0262cSandi        $xi = $yi = 1;
744f3f0262cSandi        $block = false;
745f3f0262cSandi        $context = array();
746f3f0262cSandi
747f3f0262cSandi        $nlead = $this->leading_context_lines;
748f3f0262cSandi        $ntrail = $this->trailing_context_lines;
749f3f0262cSandi
750f3f0262cSandi        $this->_start_diff();
751f3f0262cSandi
752f3f0262cSandi        foreach ($diff->edits as $edit) {
753f3f0262cSandi            if ($edit->type == 'copy') {
754f3f0262cSandi                if (is_array($block)) {
755f5b78785SAndreas Gohr                    if (count($edit->orig) <= $nlead + $ntrail) {
756f3f0262cSandi                        $block[] = $edit;
757f3f0262cSandi                    }
758f3f0262cSandi                    else{
759f3f0262cSandi                        if ($ntrail) {
760f3f0262cSandi                            $context = array_slice($edit->orig, 0, $ntrail);
761f3f0262cSandi                            $block[] = new _DiffOp_Copy($context);
762f3f0262cSandi                        }
7637deca91bSRobin Getz                        $this->_block($x0, $ntrail + $xi - $x0, $y0, $ntrail + $yi - $y0, $block);
764f3f0262cSandi                        $block = false;
765f3f0262cSandi                    }
766f3f0262cSandi                }
767f3f0262cSandi                $context = $edit->orig;
768f3f0262cSandi            }
769f3f0262cSandi            else {
770f3f0262cSandi                if (! is_array($block)) {
771f5b78785SAndreas Gohr                    $context = array_slice($context, count($context) - $nlead);
772f5b78785SAndreas Gohr                    $x0 = $xi - count($context);
773f5b78785SAndreas Gohr                    $y0 = $yi - count($context);
774f3f0262cSandi                    $block = array();
775f3f0262cSandi                    if ($context)
776f3f0262cSandi                        $block[] = new _DiffOp_Copy($context);
777f3f0262cSandi                }
778f3f0262cSandi                $block[] = $edit;
779f3f0262cSandi            }
780f3f0262cSandi
781f3f0262cSandi            if ($edit->orig)
782f5b78785SAndreas Gohr                $xi += count($edit->orig);
783f3f0262cSandi            if ($edit->closing)
784f5b78785SAndreas Gohr                $yi += count($edit->closing);
785f3f0262cSandi        }
786f3f0262cSandi
787f3f0262cSandi        if (is_array($block))
7887deca91bSRobin Getz            $this->_block($x0, $xi - $x0, $y0, $yi - $y0, $block);
789f3f0262cSandi
790f3f0262cSandi        return $this->_end_diff();
791f3f0262cSandi    }
792f3f0262cSandi
793f3f0262cSandi    function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
794f3f0262cSandi        $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
795f3f0262cSandi        foreach ($edits as $edit) {
796f3f0262cSandi            if ($edit->type == 'copy')
797f3f0262cSandi                $this->_context($edit->orig);
798f3f0262cSandi            elseif ($edit->type == 'add')
799f3f0262cSandi                $this->_added($edit->closing);
800f3f0262cSandi            elseif ($edit->type == 'delete')
801f3f0262cSandi                $this->_deleted($edit->orig);
802f3f0262cSandi            elseif ($edit->type == 'change')
803f3f0262cSandi                $this->_changed($edit->orig, $edit->closing);
804f3f0262cSandi            else
805f3f0262cSandi                trigger_error("Unknown edit type", E_USER_ERROR);
806f3f0262cSandi        }
807f3f0262cSandi        $this->_end_block();
808f3f0262cSandi    }
809f3f0262cSandi
810f3f0262cSandi    function _start_diff() {
811f3f0262cSandi        ob_start();
812f3f0262cSandi    }
813f3f0262cSandi
814f3f0262cSandi    function _end_diff() {
815f3f0262cSandi        $val = ob_get_contents();
816f3f0262cSandi        ob_end_clean();
817f3f0262cSandi        return $val;
818f3f0262cSandi    }
819f3f0262cSandi
820f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
821f3f0262cSandi        if ($xlen > 1)
822f3f0262cSandi            $xbeg .= "," . ($xbeg + $xlen - 1);
823f3f0262cSandi        if ($ylen > 1)
824f3f0262cSandi            $ybeg .= "," . ($ybeg + $ylen - 1);
825f3f0262cSandi
826f3f0262cSandi        return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
827f3f0262cSandi    }
828f3f0262cSandi
829f3f0262cSandi    function _start_block($header) {
830f3f0262cSandi        echo $header;
831f3f0262cSandi    }
832f3f0262cSandi
833f3f0262cSandi    function _end_block() {
834f3f0262cSandi    }
835f3f0262cSandi
836f3f0262cSandi    function _lines($lines, $prefix = ' ') {
837f3f0262cSandi        foreach ($lines as $line)
838f3f0262cSandi            echo "$prefix $line\n";
839f3f0262cSandi    }
840f3f0262cSandi
841f3f0262cSandi    function _context($lines) {
842f3f0262cSandi        $this->_lines($lines);
843f3f0262cSandi    }
844f3f0262cSandi
845f3f0262cSandi    function _added($lines) {
846f3f0262cSandi        $this->_lines($lines, ">");
847f3f0262cSandi    }
848f3f0262cSandi    function _deleted($lines) {
849f3f0262cSandi        $this->_lines($lines, "<");
850f3f0262cSandi    }
851f3f0262cSandi
852f3f0262cSandi    function _changed($orig, $closing) {
853f3f0262cSandi        $this->_deleted($orig);
854f3f0262cSandi        echo "---\n";
855f3f0262cSandi        $this->_added($closing);
856f3f0262cSandi    }
857f3f0262cSandi}
858f3f0262cSandi
859f3f0262cSandi
860f3f0262cSandi/**
861f3f0262cSandi *  Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
862f3f0262cSandi *
863f3f0262cSandi */
864f3f0262cSandi
8659b3a5b24Schrisdefine('NBSP', "\xC2\xA0");     // utf-8 non-breaking space.
866f3f0262cSandi
867f3f0262cSandiclass _HWLDF_WordAccumulator {
868*988c1340SPiyush Mishra    /**
869*988c1340SPiyush Mishra     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
870*988c1340SPiyush Mishra     */
871f3f0262cSandi    function _HWLDF_WordAccumulator() {
872*988c1340SPiyush Mishra        $this->__construct();
873*988c1340SPiyush Mishra    }
874*988c1340SPiyush Mishra
875*988c1340SPiyush Mishra    function __construct() {
876f3f0262cSandi        $this->_lines = array();
877f3f0262cSandi        $this->_line = '';
878f3f0262cSandi        $this->_group = '';
879f3f0262cSandi        $this->_tag = '';
880f3f0262cSandi    }
881f3f0262cSandi
882f3f0262cSandi    function _flushGroup($new_tag) {
883f3f0262cSandi        if ($this->_group !== '') {
884f3f0262cSandi            if ($this->_tag == 'mark')
885ed7ecb79SAnika Henke                $this->_line .= '<strong>'.$this->_group.'</strong>';
886812bb04eSRobin Getz            elseif ($this->_tag == 'add')
887812bb04eSRobin Getz                $this->_line .= '<span class="diff-addedline">'.$this->_group.'</span>';
888812bb04eSRobin Getz            elseif ($this->_tag == 'del')
889812bb04eSRobin Getz                $this->_line .= '<span class="diff-deletedline"><del>'.$this->_group.'</del></span>';
890f3f0262cSandi            else
891f3f0262cSandi                $this->_line .= $this->_group;
892f3f0262cSandi        }
893f3f0262cSandi        $this->_group = '';
894f3f0262cSandi        $this->_tag = $new_tag;
895f3f0262cSandi    }
896f3f0262cSandi
897f3f0262cSandi    function _flushLine($new_tag) {
898f3f0262cSandi        $this->_flushGroup($new_tag);
899f3f0262cSandi        if ($this->_line != '')
900f3f0262cSandi            $this->_lines[] = $this->_line;
901f3f0262cSandi        $this->_line = '';
902f3f0262cSandi    }
903f3f0262cSandi
904f3f0262cSandi    function addWords($words, $tag = '') {
905f3f0262cSandi        if ($tag != $this->_tag)
906f3f0262cSandi            $this->_flushGroup($tag);
907f3f0262cSandi
908f3f0262cSandi        foreach ($words as $word) {
909f3f0262cSandi            // new-line should only come as first char of word.
910f3f0262cSandi            if ($word == '')
911f3f0262cSandi                continue;
912f3f0262cSandi            if ($word[0] == "\n") {
913f3f0262cSandi                $this->_group .= NBSP;
914f3f0262cSandi                $this->_flushLine($tag);
915f3f0262cSandi                $word = substr($word, 1);
916f3f0262cSandi            }
917f3f0262cSandi            assert(!strstr($word, "\n"));
918f3f0262cSandi            $this->_group .= $word;
919f3f0262cSandi        }
920f3f0262cSandi    }
921f3f0262cSandi
922f3f0262cSandi    function getLines() {
923f3f0262cSandi        $this->_flushLine('~done');
924f3f0262cSandi        return $this->_lines;
925f3f0262cSandi    }
926f3f0262cSandi}
927f3f0262cSandi
928f5b78785SAndreas Gohrclass WordLevelDiff extends MappedDiff {
929f5b78785SAndreas Gohr
930*988c1340SPiyush Mishra    /**
931*988c1340SPiyush Mishra     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
932*988c1340SPiyush Mishra     */
933f3f0262cSandi    function WordLevelDiff($orig_lines, $closing_lines) {
934*988c1340SPiyush Mishra        $this->__construct($orig_lines, $closing_lines);
935*988c1340SPiyush Mishra    }
936*988c1340SPiyush Mishra
937*988c1340SPiyush Mishra    function __construct($orig_lines, $closing_lines) {
938f3f0262cSandi        list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
939f3f0262cSandi        list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
940f3f0262cSandi
941*988c1340SPiyush Mishra        parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped);
942f3f0262cSandi    }
943f3f0262cSandi
944f3f0262cSandi    function _split($lines) {
9455e1ee188SXin LI        if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu',
9467deca91bSRobin Getz             implode("\n", $lines), $m)) {
947f3f0262cSandi            return array(array(''), array(''));
948f3f0262cSandi        }
949f3f0262cSandi        return array($m[0], $m[1]);
950f3f0262cSandi    }
951f3f0262cSandi
952f3f0262cSandi    function orig() {
953f3f0262cSandi        $orig = new _HWLDF_WordAccumulator;
954f3f0262cSandi
955f3f0262cSandi        foreach ($this->edits as $edit) {
956f3f0262cSandi            if ($edit->type == 'copy')
957f3f0262cSandi                $orig->addWords($edit->orig);
958f3f0262cSandi            elseif ($edit->orig)
959f3f0262cSandi                $orig->addWords($edit->orig, 'mark');
960f3f0262cSandi        }
961f3f0262cSandi        return $orig->getLines();
962f3f0262cSandi    }
963f3f0262cSandi
964f3f0262cSandi    function closing() {
965f3f0262cSandi        $closing = new _HWLDF_WordAccumulator;
966f3f0262cSandi
967f3f0262cSandi        foreach ($this->edits as $edit) {
968f3f0262cSandi            if ($edit->type == 'copy')
969f3f0262cSandi                $closing->addWords($edit->closing);
970f3f0262cSandi            elseif ($edit->closing)
971f3f0262cSandi                $closing->addWords($edit->closing, 'mark');
972f3f0262cSandi        }
973f3f0262cSandi        return $closing->getLines();
974f3f0262cSandi    }
975f3f0262cSandi}
976f3f0262cSandi
977812bb04eSRobin Getzclass InlineWordLevelDiff extends MappedDiff {
978812bb04eSRobin Getz
979*988c1340SPiyush Mishra    /**
980*988c1340SPiyush Mishra     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
981*988c1340SPiyush Mishra     */
982812bb04eSRobin Getz    function InlineWordLevelDiff($orig_lines, $closing_lines) {
983*988c1340SPiyush Mishra        $this->__construct($orig_lines, $closing_lines);
984*988c1340SPiyush Mishra    }
985*988c1340SPiyush Mishra
986*988c1340SPiyush Mishra    function __construct($orig_lines, $closing_lines) {
987812bb04eSRobin Getz        list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
988812bb04eSRobin Getz        list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
989812bb04eSRobin Getz
990*988c1340SPiyush Mishra        parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped);
991812bb04eSRobin Getz    }
992812bb04eSRobin Getz
993812bb04eSRobin Getz    function _split($lines) {
994c5982caaSDanny Lin        if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu',
995812bb04eSRobin Getz             implode("\n", $lines), $m)) {
996812bb04eSRobin Getz            return array(array(''), array(''));
997812bb04eSRobin Getz        }
998812bb04eSRobin Getz        return array($m[0], $m[1]);
999812bb04eSRobin Getz    }
1000812bb04eSRobin Getz
1001812bb04eSRobin Getz    function inline() {
1002812bb04eSRobin Getz        $orig = new _HWLDF_WordAccumulator;
1003812bb04eSRobin Getz        foreach ($this->edits as $edit) {
1004812bb04eSRobin Getz            if ($edit->type == 'copy')
10054f2305cbSAdrian Lang                $orig->addWords($edit->closing);
1006812bb04eSRobin Getz            elseif ($edit->type == 'change'){
1007812bb04eSRobin Getz                $orig->addWords($edit->orig, 'del');
1008812bb04eSRobin Getz                $orig->addWords($edit->closing, 'add');
1009812bb04eSRobin Getz            } elseif ($edit->type == 'delete')
1010812bb04eSRobin Getz                $orig->addWords($edit->orig, 'del');
1011812bb04eSRobin Getz            elseif ($edit->type == 'add')
1012812bb04eSRobin Getz                $orig->addWords($edit->closing, 'add');
1013812bb04eSRobin Getz            elseif ($edit->orig)
1014812bb04eSRobin Getz                $orig->addWords($edit->orig, 'del');
1015812bb04eSRobin Getz        }
1016812bb04eSRobin Getz        return $orig->getLines();
1017812bb04eSRobin Getz    }
1018812bb04eSRobin Getz}
1019812bb04eSRobin Getz
1020f3f0262cSandi/**
1021f3f0262cSandi * "Unified" diff formatter.
1022f3f0262cSandi *
1023f3f0262cSandi * This class formats the diff in classic "unified diff" format.
1024f3f0262cSandi */
1025f5b78785SAndreas Gohrclass UnifiedDiffFormatter extends DiffFormatter {
1026f5b78785SAndreas Gohr
1027*988c1340SPiyush Mishra    /**
1028*988c1340SPiyush Mishra     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
1029*988c1340SPiyush Mishra     */
1030f3f0262cSandi    function UnifiedDiffFormatter($context_lines = 4) {
1031*988c1340SPiyush Mishra        $this->__construct($context_lines);
1032*988c1340SPiyush Mishra    }
1033*988c1340SPiyush Mishra
1034*988c1340SPiyush Mishra    function __construct($context_lines = 4) {
1035f3f0262cSandi        $this->leading_context_lines = $context_lines;
1036f3f0262cSandi        $this->trailing_context_lines = $context_lines;
1037f3f0262cSandi    }
1038f3f0262cSandi
1039f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1040f3f0262cSandi        if ($xlen != 1)
1041f3f0262cSandi            $xbeg .= "," . $xlen;
1042f3f0262cSandi        if ($ylen != 1)
1043f3f0262cSandi            $ybeg .= "," . $ylen;
1044f3f0262cSandi        return "@@ -$xbeg +$ybeg @@\n";
1045f3f0262cSandi    }
1046f3f0262cSandi
1047f3f0262cSandi    function _added($lines) {
1048f3f0262cSandi        $this->_lines($lines, "+");
1049f3f0262cSandi    }
1050f3f0262cSandi    function _deleted($lines) {
1051f3f0262cSandi        $this->_lines($lines, "-");
1052f3f0262cSandi    }
1053f3f0262cSandi    function _changed($orig, $final) {
1054f3f0262cSandi        $this->_deleted($orig);
1055f3f0262cSandi        $this->_added($final);
1056f3f0262cSandi    }
1057f3f0262cSandi}
1058f3f0262cSandi
1059f3f0262cSandi/**
1060f3f0262cSandi *  Wikipedia Table style diff formatter.
1061f3f0262cSandi *
1062f3f0262cSandi */
1063f5b78785SAndreas Gohrclass TableDiffFormatter extends DiffFormatter {
1064f5b78785SAndreas Gohr
1065*988c1340SPiyush Mishra    /**
1066*988c1340SPiyush Mishra     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
1067*988c1340SPiyush Mishra     */
1068f3f0262cSandi    function TableDiffFormatter() {
1069*988c1340SPiyush Mishra        $this->__construct();
1070*988c1340SPiyush Mishra    }
1071*988c1340SPiyush Mishra
1072*988c1340SPiyush Mishra    function __construct() {
1073f3f0262cSandi        $this->leading_context_lines = 2;
1074f3f0262cSandi        $this->trailing_context_lines = 2;
1075f3f0262cSandi    }
1076f3f0262cSandi
10772d880650SAdrian Lang    function format($diff) {
10782d880650SAdrian Lang        // Preserve whitespaces by converting some to non-breaking spaces.
10792d880650SAdrian Lang        // Do not convert all of them to allow word-wrap.
10802d880650SAdrian Lang        $val = parent::format($diff);
10812d880650SAdrian Lang        $val = str_replace('  ','&nbsp; ', $val);
10822d880650SAdrian Lang        $val = preg_replace('/ (?=<)|(?<=[ >]) /', '&nbsp;', $val);
10832d880650SAdrian Lang        return $val;
10842d880650SAdrian Lang    }
10852d880650SAdrian Lang
1086f3f0262cSandi    function _pre($text){
1087f3f0262cSandi        $text = htmlspecialchars($text);
1088f3f0262cSandi        return $text;
1089f3f0262cSandi    }
1090f3f0262cSandi
1091f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1092f3f0262cSandi        global $lang;
1093f3f0262cSandi        $l1 = $lang['line'].' '.$xbeg;
1094f3f0262cSandi        $l2 = $lang['line'].' '.$ybeg;
10955048c277SAnika Henke        $r = '<tr><td class="diff-blockheader" colspan="2">'.$l1.":</td>\n".
10965048c277SAnika Henke             '    <td class="diff-blockheader" colspan="2">'.$l2.":</td>\n".
10975048c277SAnika Henke             "</tr>\n";
1098f3f0262cSandi        return $r;
1099f3f0262cSandi    }
1100f3f0262cSandi
1101f3f0262cSandi    function _start_block($header) {
1102f3f0262cSandi        print($header);
1103f3f0262cSandi    }
1104f3f0262cSandi
1105f3f0262cSandi    function _end_block() {
1106f3f0262cSandi    }
1107f3f0262cSandi
1108f3f0262cSandi    function _lines($lines, $prefix=' ', $color="white") {
1109f3f0262cSandi    }
1110f3f0262cSandi
1111f3f0262cSandi    function addedLine($line) {
11127deca91bSRobin Getz        return '<td>+</td><td class="diff-addedline">' .  $line.'</td>';
1113f3f0262cSandi    }
1114f3f0262cSandi
1115f3f0262cSandi    function deletedLine($line) {
11167deca91bSRobin Getz        return '<td>-</td><td class="diff-deletedline">' .  $line.'</td>';
1117f3f0262cSandi    }
1118f3f0262cSandi
1119f3f0262cSandi    function emptyLine() {
1120f3f0262cSandi        return '<td colspan="2">&nbsp;</td>';
1121f3f0262cSandi    }
1122f3f0262cSandi
1123f3f0262cSandi    function contextLine($line) {
1124f3f0262cSandi        return '<td> </td><td class="diff-context">'.$line.'</td>';
1125f3f0262cSandi    }
1126f3f0262cSandi
1127f3f0262cSandi    function _added($lines) {
1128f3f0262cSandi        foreach ($lines as $line) {
11297deca91bSRobin Getz            print('<tr>' . $this->emptyLine() . $this->addedLine($line) . "</tr>\n");
1130f3f0262cSandi        }
1131f3f0262cSandi    }
1132f3f0262cSandi
1133f3f0262cSandi    function _deleted($lines) {
1134f3f0262cSandi        foreach ($lines as $line) {
11357deca91bSRobin Getz            print('<tr>' . $this->deletedLine($line) . $this->emptyLine() . "</tr>\n");
1136f3f0262cSandi        }
1137f3f0262cSandi    }
1138f3f0262cSandi
1139f3f0262cSandi    function _context($lines) {
1140f3f0262cSandi        foreach ($lines as $line) {
11417deca91bSRobin Getz            print('<tr>' . $this->contextLine($line) .  $this->contextLine($line) . "</tr>\n");
1142f3f0262cSandi        }
1143f3f0262cSandi    }
1144f3f0262cSandi
1145f3f0262cSandi    function _changed($orig, $closing) {
1146f3f0262cSandi        $diff = new WordLevelDiff($orig, $closing);
1147f3f0262cSandi        $del = $diff->orig();
1148f3f0262cSandi        $add = $diff->closing();
1149f3f0262cSandi
1150f3f0262cSandi        while ($line = array_shift($del)) {
1151f3f0262cSandi            $aline = array_shift($add);
11527deca91bSRobin Getz            print('<tr>' . $this->deletedLine($line) . $this->addedLine($aline) . "</tr>\n");
1153f3f0262cSandi        }
1154f3f0262cSandi        $this->_added($add); # If any leftovers
1155f3f0262cSandi    }
1156f3f0262cSandi}
1157f3f0262cSandi
1158812bb04eSRobin Getz/**
1159812bb04eSRobin Getz *  Inline style diff formatter.
1160812bb04eSRobin Getz *
1161812bb04eSRobin Getz */
1162812bb04eSRobin Getzclass InlineDiffFormatter extends DiffFormatter {
1163c495dc45SAndreas Gohr    var $colspan = 4;
1164*988c1340SPiyush Mishra    /**
1165*988c1340SPiyush Mishra     * DONOT USE THIS. Its just to make sure nothing breaks because of the name change.
1166*988c1340SPiyush Mishra     */
1167812bb04eSRobin Getz    function InlineDiffFormatter() {
1168*988c1340SPiyush Mishra        $this->__construct();
1169*988c1340SPiyush Mishra    }
1170*988c1340SPiyush Mishra
1171*988c1340SPiyush Mishra    function __construct() {
1172812bb04eSRobin Getz        $this->leading_context_lines = 2;
1173812bb04eSRobin Getz        $this->trailing_context_lines = 2;
1174812bb04eSRobin Getz    }
1175812bb04eSRobin Getz
1176812bb04eSRobin Getz    function format($diff) {
1177812bb04eSRobin Getz        // Preserve whitespaces by converting some to non-breaking spaces.
1178812bb04eSRobin Getz        // Do not convert all of them to allow word-wrap.
1179812bb04eSRobin Getz        $val = parent::format($diff);
1180812bb04eSRobin Getz        $val = str_replace('  ','&nbsp; ', $val);
1181812bb04eSRobin Getz        $val = preg_replace('/ (?=<)|(?<=[ >]) /', '&nbsp;', $val);
1182812bb04eSRobin Getz        return $val;
1183812bb04eSRobin Getz    }
1184812bb04eSRobin Getz
1185812bb04eSRobin Getz    function _pre($text){
1186812bb04eSRobin Getz        $text = htmlspecialchars($text);
1187812bb04eSRobin Getz        return $text;
1188812bb04eSRobin Getz    }
1189812bb04eSRobin Getz
1190812bb04eSRobin Getz    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1191812bb04eSRobin Getz        global $lang;
1192812bb04eSRobin Getz        if ($xlen != 1)
1193812bb04eSRobin Getz            $xbeg .= "," . $xlen;
1194812bb04eSRobin Getz        if ($ylen != 1)
1195812bb04eSRobin Getz            $ybeg .= "," . $ylen;
1196c495dc45SAndreas Gohr        $r = '<tr><td colspan="'.$this->colspan.'" class="diff-blockheader">@@ '.$lang['line']." -$xbeg +$ybeg @@";
1197812bb04eSRobin Getz        $r .= ' <span class="diff-deletedline"><del>'.$lang['deleted'].'</del></span>';
1198812bb04eSRobin Getz        $r .= ' <span class="diff-addedline">'.$lang['created'].'</span>';
1199812bb04eSRobin Getz        $r .= "</td></tr>\n";
1200812bb04eSRobin Getz        return $r;
1201812bb04eSRobin Getz    }
1202812bb04eSRobin Getz
1203812bb04eSRobin Getz    function _start_block($header) {
1204812bb04eSRobin Getz        print($header."\n");
1205812bb04eSRobin Getz    }
1206812bb04eSRobin Getz
1207812bb04eSRobin Getz    function _end_block() {
1208812bb04eSRobin Getz    }
1209812bb04eSRobin Getz
1210812bb04eSRobin Getz    function _lines($lines, $prefix=' ', $color="white") {
1211812bb04eSRobin Getz    }
1212812bb04eSRobin Getz
1213812bb04eSRobin Getz    function _added($lines) {
1214812bb04eSRobin Getz        foreach ($lines as $line) {
1215c495dc45SAndreas Gohr            print('<tr><td colspan="'.$this->colspan.'" class="diff-addedline">'. $line . "</td></tr>\n");
1216812bb04eSRobin Getz        }
1217812bb04eSRobin Getz    }
1218812bb04eSRobin Getz
1219812bb04eSRobin Getz    function _deleted($lines) {
1220812bb04eSRobin Getz        foreach ($lines as $line) {
1221c495dc45SAndreas Gohr            print('<tr><td colspan="'.$this->colspan.'" class="diff-deletedline"><del>' . $line . "</del></td></tr>\n");
1222812bb04eSRobin Getz        }
1223812bb04eSRobin Getz    }
1224812bb04eSRobin Getz
1225812bb04eSRobin Getz    function _context($lines) {
1226812bb04eSRobin Getz        foreach ($lines as $line) {
1227c495dc45SAndreas Gohr            print('<tr><td colspan="'.$this->colspan.'" class="diff-context">'.$line."</td></tr>\n");
1228812bb04eSRobin Getz        }
1229812bb04eSRobin Getz    }
1230812bb04eSRobin Getz
1231812bb04eSRobin Getz    function _changed($orig, $closing) {
1232812bb04eSRobin Getz        $diff = new InlineWordLevelDiff($orig, $closing);
1233812bb04eSRobin Getz        $add = $diff->inline();
1234812bb04eSRobin Getz
1235812bb04eSRobin Getz        foreach ($add as $line)
1236c495dc45SAndreas Gohr            print('<tr><td colspan="'.$this->colspan.'">'.$line."</td></tr>\n");
1237812bb04eSRobin Getz    }
1238812bb04eSRobin Getz}
1239812bb04eSRobin Getz
1240340756e4Sandi
1241e3776c06SMichael Hamann//Setup VIM: ex: et ts=4 :
1242