xref: /dokuwiki/inc/DifferenceEngine.php (revision f5b787850258b8f9d343ed55434dbec1631f3092)
1f3f0262cSandi<?php
215fae107Sandi/**
315fae107Sandi * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
415fae107Sandi *
515fae107Sandi * Additions by Axel Boldt for MediaWiki
615fae107Sandi *
715fae107Sandi * @copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
815fae107Sandi * @license  You may copy this code freely under the conditions of the GPL.
915fae107Sandi */
10f3f0262cSandidefine('USE_ASSERTS', function_exists('assert'));
11f3f0262cSandi
12f3f0262cSandiclass _DiffOp {
13f3f0262cSandi    var $type;
14f3f0262cSandi    var $orig;
15f3f0262cSandi    var $closing;
16f3f0262cSandi
17f3f0262cSandi    function reverse() {
18f3f0262cSandi        trigger_error("pure virtual", E_USER_ERROR);
19f3f0262cSandi    }
20f3f0262cSandi
21f3f0262cSandi    function norig() {
22*f5b78785SAndreas Gohr        return $this->orig ? count($this->orig) : 0;
23f3f0262cSandi    }
24f3f0262cSandi
25f3f0262cSandi    function nclosing() {
26*f5b78785SAndreas Gohr        return $this->closing ? count($this->closing) : 0;
27f3f0262cSandi    }
28f3f0262cSandi}
29f3f0262cSandi
30f3f0262cSandiclass _DiffOp_Copy extends _DiffOp {
31f3f0262cSandi    var $type = 'copy';
32f3f0262cSandi
33f3f0262cSandi    function _DiffOp_Copy ($orig, $closing = false) {
34f3f0262cSandi        if (!is_array($closing))
35f3f0262cSandi            $closing = $orig;
36f3f0262cSandi        $this->orig = $orig;
37f3f0262cSandi        $this->closing = $closing;
38f3f0262cSandi    }
39f3f0262cSandi
40f3f0262cSandi    function reverse() {
41f3f0262cSandi        return new _DiffOp_Copy($this->closing, $this->orig);
42f3f0262cSandi    }
43f3f0262cSandi}
44f3f0262cSandi
45f3f0262cSandiclass _DiffOp_Delete extends _DiffOp {
46f3f0262cSandi    var $type = 'delete';
47f3f0262cSandi
48f3f0262cSandi    function _DiffOp_Delete ($lines) {
49f3f0262cSandi        $this->orig = $lines;
50f3f0262cSandi        $this->closing = false;
51f3f0262cSandi    }
52f3f0262cSandi
53f3f0262cSandi    function reverse() {
54f3f0262cSandi        return new _DiffOp_Add($this->orig);
55f3f0262cSandi    }
56f3f0262cSandi}
57f3f0262cSandi
58f3f0262cSandiclass _DiffOp_Add extends _DiffOp {
59f3f0262cSandi    var $type = 'add';
60f3f0262cSandi
61f3f0262cSandi    function _DiffOp_Add ($lines) {
62f3f0262cSandi        $this->closing = $lines;
63f3f0262cSandi        $this->orig = false;
64f3f0262cSandi    }
65f3f0262cSandi
66f3f0262cSandi    function reverse() {
67f3f0262cSandi        return new _DiffOp_Delete($this->closing);
68f3f0262cSandi    }
69f3f0262cSandi}
70f3f0262cSandi
71f3f0262cSandiclass _DiffOp_Change extends _DiffOp {
72f3f0262cSandi    var $type = 'change';
73f3f0262cSandi
74f3f0262cSandi    function _DiffOp_Change ($orig, $closing) {
75f3f0262cSandi        $this->orig = $orig;
76f3f0262cSandi        $this->closing = $closing;
77f3f0262cSandi    }
78f3f0262cSandi
79f3f0262cSandi    function reverse() {
80f3f0262cSandi        return new _DiffOp_Change($this->closing, $this->orig);
81f3f0262cSandi    }
82f3f0262cSandi}
83f3f0262cSandi
84f3f0262cSandi
85f3f0262cSandi/**
86f3f0262cSandi * Class used internally by Diff to actually compute the diffs.
87f3f0262cSandi *
88f3f0262cSandi * The algorithm used here is mostly lifted from the perl module
89f3f0262cSandi * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
90f3f0262cSandi *   http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
91f3f0262cSandi *
92f3f0262cSandi * More ideas are taken from:
93f3f0262cSandi *   http://www.ics.uci.edu/~eppstein/161/960229.html
94f3f0262cSandi *
95f3f0262cSandi * Some ideas are (and a bit of code) are from from analyze.c, from GNU
96f3f0262cSandi * diffutils-2.7, which can be found at:
97f3f0262cSandi *   ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
98f3f0262cSandi *
99f3f0262cSandi * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
100f3f0262cSandi * are my own.
101f3f0262cSandi *
102f3f0262cSandi * @author Geoffrey T. Dairiki
103f3f0262cSandi * @access private
104f3f0262cSandi */
105*f5b78785SAndreas Gohrclass _DiffEngine {
106*f5b78785SAndreas Gohr
107f3f0262cSandi    function diff ($from_lines, $to_lines) {
108*f5b78785SAndreas Gohr        $n_from = count($from_lines);
109*f5b78785SAndreas Gohr        $n_to = count($to_lines);
110f3f0262cSandi
111f3f0262cSandi        $this->xchanged = $this->ychanged = array();
112f3f0262cSandi        $this->xv = $this->yv = array();
113f3f0262cSandi        $this->xind = $this->yind = array();
114f3f0262cSandi        unset($this->seq);
115f3f0262cSandi        unset($this->in_seq);
116f3f0262cSandi        unset($this->lcs);
117f3f0262cSandi
118f3f0262cSandi        // Skip leading common lines.
119f3f0262cSandi        for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
120f3f0262cSandi            if ($from_lines[$skip] != $to_lines[$skip])
121f3f0262cSandi                break;
122f3f0262cSandi            $this->xchanged[$skip] = $this->ychanged[$skip] = false;
123f3f0262cSandi        }
124f3f0262cSandi        // Skip trailing common lines.
125*f5b78785SAndreas Gohr        $xi = $n_from;
126*f5b78785SAndreas Gohr        $yi = $n_to;
127f3f0262cSandi        for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
128f3f0262cSandi            if ($from_lines[$xi] != $to_lines[$yi])
129f3f0262cSandi                break;
130f3f0262cSandi            $this->xchanged[$xi] = $this->ychanged[$yi] = false;
131f3f0262cSandi        }
132f3f0262cSandi
133f3f0262cSandi        // Ignore lines which do not exist in both files.
134f3f0262cSandi        for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
135f3f0262cSandi            $xhash[$from_lines[$xi]] = 1;
136f3f0262cSandi        for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
137f3f0262cSandi            $line = $to_lines[$yi];
138f3f0262cSandi            if ( ($this->ychanged[$yi] = empty($xhash[$line])) )
139f3f0262cSandi                continue;
140f3f0262cSandi            $yhash[$line] = 1;
141f3f0262cSandi            $this->yv[] = $line;
142f3f0262cSandi            $this->yind[] = $yi;
143f3f0262cSandi        }
144f3f0262cSandi        for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
145f3f0262cSandi            $line = $from_lines[$xi];
146f3f0262cSandi            if ( ($this->xchanged[$xi] = empty($yhash[$line])) )
147f3f0262cSandi                continue;
148f3f0262cSandi            $this->xv[] = $line;
149f3f0262cSandi            $this->xind[] = $xi;
150f3f0262cSandi        }
151f3f0262cSandi
152f3f0262cSandi        // Find the LCS.
153*f5b78785SAndreas Gohr        $this->_compareseq(0, count($this->xv), 0, count($this->yv));
154f3f0262cSandi
155f3f0262cSandi        // Merge edits when possible
156f3f0262cSandi        $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
157f3f0262cSandi        $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
158f3f0262cSandi
159f3f0262cSandi        // Compute the edit operations.
160f3f0262cSandi        $edits = array();
161f3f0262cSandi        $xi = $yi = 0;
162f3f0262cSandi        while ($xi < $n_from || $yi < $n_to) {
163f3f0262cSandi            USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
164f3f0262cSandi            USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
165f3f0262cSandi
166f3f0262cSandi            // Skip matching "snake".
167f3f0262cSandi            $copy = array();
168f3f0262cSandi            while ( $xi < $n_from && $yi < $n_to
169f3f0262cSandi                    && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
170f3f0262cSandi                $copy[] = $from_lines[$xi++];
171f3f0262cSandi                ++$yi;
172f3f0262cSandi            }
173f3f0262cSandi            if ($copy)
174f3f0262cSandi                $edits[] = new _DiffOp_Copy($copy);
175f3f0262cSandi
176f3f0262cSandi            // Find deletes & adds.
177f3f0262cSandi            $delete = array();
178f3f0262cSandi            while ($xi < $n_from && $this->xchanged[$xi])
179f3f0262cSandi                $delete[] = $from_lines[$xi++];
180f3f0262cSandi
181f3f0262cSandi            $add = array();
182f3f0262cSandi            while ($yi < $n_to && $this->ychanged[$yi])
183f3f0262cSandi                $add[] = $to_lines[$yi++];
184f3f0262cSandi
185f3f0262cSandi            if ($delete && $add)
186f3f0262cSandi                $edits[] = new _DiffOp_Change($delete, $add);
187f3f0262cSandi            elseif ($delete)
188f3f0262cSandi                $edits[] = new _DiffOp_Delete($delete);
189f3f0262cSandi            elseif ($add)
190f3f0262cSandi                $edits[] = new _DiffOp_Add($add);
191f3f0262cSandi        }
192f3f0262cSandi        return $edits;
193f3f0262cSandi    }
194f3f0262cSandi
195f3f0262cSandi
19615fae107Sandi    /**
19715fae107Sandi     * Divide the Largest Common Subsequence (LCS) of the sequences
198f3f0262cSandi     * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
199f3f0262cSandi     * sized segments.
200f3f0262cSandi     *
201f3f0262cSandi     * Returns (LCS, PTS).  LCS is the length of the LCS. PTS is an
202f3f0262cSandi     * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
203f3f0262cSandi     * sub sequences.  The first sub-sequence is contained in [X0, X1),
204f3f0262cSandi     * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
205f3f0262cSandi     * that (X0, Y0) == (XOFF, YOFF) and
206f3f0262cSandi     * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
207f3f0262cSandi     *
208f3f0262cSandi     * This function assumes that the first lines of the specified portions
209f3f0262cSandi     * of the two files do not match, and likewise that the last lines do not
210f3f0262cSandi     * match.  The caller must trim matching lines from the beginning and end
211f3f0262cSandi     * of the portions it is going to specify.
212f3f0262cSandi     */
213f3f0262cSandi    function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) {
214f3f0262cSandi        $flip = false;
215f3f0262cSandi
216f3f0262cSandi        if ($xlim - $xoff > $ylim - $yoff) {
217f3f0262cSandi            // Things seems faster (I'm not sure I understand why)
218f3f0262cSandi            // when the shortest sequence in X.
219f3f0262cSandi            $flip = true;
220f3f0262cSandi            list ($xoff, $xlim, $yoff, $ylim)
221f3f0262cSandi                = array( $yoff, $ylim, $xoff, $xlim);
222f3f0262cSandi        }
223f3f0262cSandi
224f3f0262cSandi        if ($flip)
225f3f0262cSandi            for ($i = $ylim - 1; $i >= $yoff; $i--)
226f3f0262cSandi                $ymatches[$this->xv[$i]][] = $i;
227f3f0262cSandi        else
228f3f0262cSandi            for ($i = $ylim - 1; $i >= $yoff; $i--)
229f3f0262cSandi                $ymatches[$this->yv[$i]][] = $i;
230f3f0262cSandi
231f3f0262cSandi        $this->lcs = 0;
232f3f0262cSandi        $this->seq[0]= $yoff - 1;
233f3f0262cSandi        $this->in_seq = array();
234f3f0262cSandi        $ymids[0] = array();
235f3f0262cSandi
236f3f0262cSandi        $numer = $xlim - $xoff + $nchunks - 1;
237f3f0262cSandi        $x = $xoff;
238f3f0262cSandi        for ($chunk = 0; $chunk < $nchunks; $chunk++) {
239f3f0262cSandi            if ($chunk > 0)
240f3f0262cSandi                for ($i = 0; $i <= $this->lcs; $i++)
241f3f0262cSandi                    $ymids[$i][$chunk-1] = $this->seq[$i];
242f3f0262cSandi
243f3f0262cSandi            $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
244f3f0262cSandi            for ( ; $x < $x1; $x++) {
245f3f0262cSandi                $line = $flip ? $this->yv[$x] : $this->xv[$x];
246f3f0262cSandi                if (empty($ymatches[$line]))
247f3f0262cSandi                    continue;
248f3f0262cSandi                $matches = $ymatches[$line];
249f3f0262cSandi                reset($matches);
250f3f0262cSandi                while (list ($junk, $y) = each($matches))
251f3f0262cSandi                    if (empty($this->in_seq[$y])) {
252f3f0262cSandi                        $k = $this->_lcs_pos($y);
253f3f0262cSandi                        USE_ASSERTS && assert($k > 0);
254f3f0262cSandi                        $ymids[$k] = $ymids[$k-1];
255f3f0262cSandi                        break;
256f3f0262cSandi                    }
257f3f0262cSandi                while (list ($junk, $y) = each($matches)) {
258f3f0262cSandi                    if ($y > $this->seq[$k-1]) {
259f3f0262cSandi                        USE_ASSERTS && assert($y < $this->seq[$k]);
260f3f0262cSandi                        // Optimization: this is a common case:
261f3f0262cSandi                        //  next match is just replacing previous match.
262f3f0262cSandi                        $this->in_seq[$this->seq[$k]] = false;
263f3f0262cSandi                        $this->seq[$k] = $y;
264f3f0262cSandi                        $this->in_seq[$y] = 1;
265f3f0262cSandi                    }
266f3f0262cSandi                    else if (empty($this->in_seq[$y])) {
267f3f0262cSandi                        $k = $this->_lcs_pos($y);
268f3f0262cSandi                        USE_ASSERTS && assert($k > 0);
269f3f0262cSandi                        $ymids[$k] = $ymids[$k-1];
270f3f0262cSandi                    }
271f3f0262cSandi                }
272f3f0262cSandi            }
273f3f0262cSandi        }
274f3f0262cSandi
275f3f0262cSandi        $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
276f3f0262cSandi        $ymid = $ymids[$this->lcs];
277f3f0262cSandi        for ($n = 0; $n < $nchunks - 1; $n++) {
278f3f0262cSandi            $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
279f3f0262cSandi            $y1 = $ymid[$n] + 1;
280f3f0262cSandi            $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
281f3f0262cSandi        }
282f3f0262cSandi        $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
283f3f0262cSandi
284f3f0262cSandi        return array($this->lcs, $seps);
285f3f0262cSandi    }
286f3f0262cSandi
287f3f0262cSandi    function _lcs_pos ($ypos) {
288f3f0262cSandi        $end = $this->lcs;
289f3f0262cSandi        if ($end == 0 || $ypos > $this->seq[$end]) {
290f3f0262cSandi            $this->seq[++$this->lcs] = $ypos;
291f3f0262cSandi            $this->in_seq[$ypos] = 1;
292f3f0262cSandi            return $this->lcs;
293f3f0262cSandi        }
294f3f0262cSandi
295f3f0262cSandi        $beg = 1;
296f3f0262cSandi        while ($beg < $end) {
297f3f0262cSandi            $mid = (int)(($beg + $end) / 2);
298f3f0262cSandi            if ( $ypos > $this->seq[$mid] )
299f3f0262cSandi                $beg = $mid + 1;
300f3f0262cSandi            else
301f3f0262cSandi                $end = $mid;
302f3f0262cSandi        }
303f3f0262cSandi
304f3f0262cSandi        USE_ASSERTS && assert($ypos != $this->seq[$end]);
305f3f0262cSandi
306f3f0262cSandi        $this->in_seq[$this->seq[$end]] = false;
307f3f0262cSandi        $this->seq[$end] = $ypos;
308f3f0262cSandi        $this->in_seq[$ypos] = 1;
309f3f0262cSandi        return $end;
310f3f0262cSandi    }
311f3f0262cSandi
31215fae107Sandi    /**
31315fae107Sandi     * Find LCS of two sequences.
314f3f0262cSandi     *
315f3f0262cSandi     * The results are recorded in the vectors $this->{x,y}changed[], by
316f3f0262cSandi     * storing a 1 in the element for each line that is an insertion
317f3f0262cSandi     * or deletion (ie. is not in the LCS).
318f3f0262cSandi     *
319f3f0262cSandi     * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
320f3f0262cSandi     *
321f3f0262cSandi     * Note that XLIM, YLIM are exclusive bounds.
322f3f0262cSandi     * All line numbers are origin-0 and discarded lines are not counted.
323f3f0262cSandi     */
324f3f0262cSandi    function _compareseq ($xoff, $xlim, $yoff, $ylim) {
325f3f0262cSandi        // Slide down the bottom initial diagonal.
326f3f0262cSandi        while ($xoff < $xlim && $yoff < $ylim
327f3f0262cSandi                && $this->xv[$xoff] == $this->yv[$yoff]) {
328f3f0262cSandi            ++$xoff;
329f3f0262cSandi            ++$yoff;
330f3f0262cSandi        }
331f3f0262cSandi
332f3f0262cSandi        // Slide up the top initial diagonal.
333f3f0262cSandi        while ($xlim > $xoff && $ylim > $yoff
334f3f0262cSandi                && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
335f3f0262cSandi            --$xlim;
336f3f0262cSandi            --$ylim;
337f3f0262cSandi        }
338f3f0262cSandi
339f3f0262cSandi        if ($xoff == $xlim || $yoff == $ylim)
340f3f0262cSandi            $lcs = 0;
341f3f0262cSandi        else {
342f3f0262cSandi            // This is ad hoc but seems to work well.
343f3f0262cSandi            //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
344f3f0262cSandi            //$nchunks = max(2,min(8,(int)$nchunks));
345f3f0262cSandi            $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
346f3f0262cSandi            list ($lcs, $seps)
347f3f0262cSandi                = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
348f3f0262cSandi        }
349f3f0262cSandi
350f3f0262cSandi        if ($lcs == 0) {
351f3f0262cSandi            // X and Y sequences have no common subsequence:
352f3f0262cSandi            // mark all changed.
353f3f0262cSandi            while ($yoff < $ylim)
354f3f0262cSandi                $this->ychanged[$this->yind[$yoff++]] = 1;
355f3f0262cSandi            while ($xoff < $xlim)
356f3f0262cSandi                $this->xchanged[$this->xind[$xoff++]] = 1;
357f3f0262cSandi        }
358f3f0262cSandi        else {
359f3f0262cSandi            // Use the partitions to split this problem into subproblems.
360f3f0262cSandi            reset($seps);
361f3f0262cSandi            $pt1 = $seps[0];
362f3f0262cSandi            while ($pt2 = next($seps)) {
363f3f0262cSandi                $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
364f3f0262cSandi                $pt1 = $pt2;
365f3f0262cSandi            }
366f3f0262cSandi        }
367f3f0262cSandi    }
368f3f0262cSandi
36915fae107Sandi    /**
37015fae107Sandi     * Adjust inserts/deletes of identical lines to join changes
371f3f0262cSandi     * as much as possible.
372f3f0262cSandi     *
373f3f0262cSandi     * We do something when a run of changed lines include a
374f3f0262cSandi     * line at one end and has an excluded, identical line at the other.
375f3f0262cSandi     * We are free to choose which identical line is included.
376f3f0262cSandi     * `compareseq' usually chooses the one at the beginning,
377f3f0262cSandi     * but usually it is cleaner to consider the following identical line
378f3f0262cSandi     * to be the "change".
379f3f0262cSandi     *
380f3f0262cSandi     * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
381f3f0262cSandi     */
382f3f0262cSandi    function _shift_boundaries ($lines, &$changed, $other_changed) {
383f3f0262cSandi        $i = 0;
384f3f0262cSandi        $j = 0;
385f3f0262cSandi
386*f5b78785SAndreas Gohr        USE_ASSERTS && assert('count($lines) == count($changed)');
387*f5b78785SAndreas Gohr        $len = count($lines);
388*f5b78785SAndreas Gohr        $other_len = count($other_changed);
389f3f0262cSandi
390f3f0262cSandi        while (1) {
391f3f0262cSandi            /*
392f3f0262cSandi             * Scan forwards to find beginning of another run of changes.
393f3f0262cSandi             * Also keep track of the corresponding point in the other file.
394f3f0262cSandi             *
395f3f0262cSandi             * Throughout this code, $i and $j are adjusted together so that
396f3f0262cSandi             * the first $i elements of $changed and the first $j elements
397f3f0262cSandi             * of $other_changed both contain the same number of zeros
398f3f0262cSandi             * (unchanged lines).
399f3f0262cSandi             * Furthermore, $j is always kept so that $j == $other_len or
400f3f0262cSandi             * $other_changed[$j] == false.
401f3f0262cSandi             */
402f3f0262cSandi            while ($j < $other_len && $other_changed[$j])
403f3f0262cSandi                $j++;
404f3f0262cSandi
405f3f0262cSandi            while ($i < $len && ! $changed[$i]) {
406f3f0262cSandi                USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
407*f5b78785SAndreas Gohr                $i++;
408*f5b78785SAndreas Gohr                $j++;
409f3f0262cSandi                while ($j < $other_len && $other_changed[$j])
410f3f0262cSandi                    $j++;
411f3f0262cSandi            }
412f3f0262cSandi
413f3f0262cSandi            if ($i == $len)
414f3f0262cSandi                break;
415f3f0262cSandi
416f3f0262cSandi            $start = $i;
417f3f0262cSandi
418f3f0262cSandi            // Find the end of this run of changes.
419f3f0262cSandi            while (++$i < $len && $changed[$i])
420f3f0262cSandi                continue;
421f3f0262cSandi
422f3f0262cSandi            do {
423f3f0262cSandi                /*
424f3f0262cSandi                 * Record the length of this run of changes, so that
425f3f0262cSandi                 * we can later determine whether the run has grown.
426f3f0262cSandi                 */
427f3f0262cSandi                $runlength = $i - $start;
428f3f0262cSandi
429f3f0262cSandi                /*
430f3f0262cSandi                 * Move the changed region back, so long as the
431f3f0262cSandi                 * previous unchanged line matches the last changed one.
432f3f0262cSandi                 * This merges with previous changed regions.
433f3f0262cSandi                 */
434f3f0262cSandi                while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
435f3f0262cSandi                    $changed[--$start] = 1;
436f3f0262cSandi                    $changed[--$i] = false;
437f3f0262cSandi                    while ($start > 0 && $changed[$start - 1])
438f3f0262cSandi                        $start--;
439f3f0262cSandi                    USE_ASSERTS && assert('$j > 0');
440f3f0262cSandi                    while ($other_changed[--$j])
441f3f0262cSandi                        continue;
442f3f0262cSandi                    USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
443f3f0262cSandi                }
444f3f0262cSandi
445f3f0262cSandi                /*
446f3f0262cSandi                 * Set CORRESPONDING to the end of the changed run, at the last
447f3f0262cSandi                 * point where it corresponds to a changed run in the other file.
448f3f0262cSandi                 * CORRESPONDING == LEN means no such point has been found.
449f3f0262cSandi                 */
450f3f0262cSandi                $corresponding = $j < $other_len ? $i : $len;
451f3f0262cSandi
452f3f0262cSandi                /*
453f3f0262cSandi                 * Move the changed region forward, so long as the
454f3f0262cSandi                 * first changed line matches the following unchanged one.
455f3f0262cSandi                 * This merges with following changed regions.
456f3f0262cSandi                 * Do this second, so that if there are no merges,
457f3f0262cSandi                 * the changed region is moved forward as far as possible.
458f3f0262cSandi                 */
459f3f0262cSandi                while ($i < $len && $lines[$start] == $lines[$i]) {
460f3f0262cSandi                    $changed[$start++] = false;
461f3f0262cSandi                    $changed[$i++] = 1;
462f3f0262cSandi                    while ($i < $len && $changed[$i])
463f3f0262cSandi                        $i++;
464f3f0262cSandi
465f3f0262cSandi                    USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
466f3f0262cSandi                    $j++;
467f3f0262cSandi                    if ($j < $other_len && $other_changed[$j]) {
468f3f0262cSandi                        $corresponding = $i;
469f3f0262cSandi                        while ($j < $other_len && $other_changed[$j])
470f3f0262cSandi                            $j++;
471f3f0262cSandi                    }
472f3f0262cSandi                }
473f3f0262cSandi            } while ($runlength != $i - $start);
474f3f0262cSandi
475f3f0262cSandi            /*
476f3f0262cSandi             * If possible, move the fully-merged run of changes
477f3f0262cSandi             * back to a corresponding run in the other file.
478f3f0262cSandi             */
479f3f0262cSandi            while ($corresponding < $i) {
480f3f0262cSandi                $changed[--$start] = 1;
481f3f0262cSandi                $changed[--$i] = 0;
482f3f0262cSandi                USE_ASSERTS && assert('$j > 0');
483f3f0262cSandi                while ($other_changed[--$j])
484f3f0262cSandi                    continue;
485f3f0262cSandi                USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
486f3f0262cSandi            }
487f3f0262cSandi        }
488f3f0262cSandi    }
489f3f0262cSandi}
490f3f0262cSandi
491f3f0262cSandi/**
492f3f0262cSandi * Class representing a 'diff' between two sequences of strings.
493f3f0262cSandi */
494*f5b78785SAndreas Gohrclass Diff {
495*f5b78785SAndreas Gohr
496f3f0262cSandi    var $edits;
497f3f0262cSandi
498f3f0262cSandi    /**
499f3f0262cSandi     * Constructor.
500f3f0262cSandi     * Computes diff between sequences of strings.
501f3f0262cSandi     *
502f3f0262cSandi     * @param $from_lines array An array of strings.
503f3f0262cSandi     *      (Typically these are lines from a file.)
504f3f0262cSandi     * @param $to_lines array An array of strings.
505f3f0262cSandi     */
506f3f0262cSandi    function Diff($from_lines, $to_lines) {
507f3f0262cSandi        $eng = new _DiffEngine;
508f3f0262cSandi        $this->edits = $eng->diff($from_lines, $to_lines);
509f3f0262cSandi        //$this->_check($from_lines, $to_lines);
510f3f0262cSandi    }
511f3f0262cSandi
512f3f0262cSandi    /**
513f3f0262cSandi     * Compute reversed Diff.
514f3f0262cSandi     *
515f3f0262cSandi     * SYNOPSIS:
516f3f0262cSandi     *
517f3f0262cSandi     *  $diff = new Diff($lines1, $lines2);
518f3f0262cSandi     *  $rev = $diff->reverse();
519f3f0262cSandi     * @return object A Diff object representing the inverse of the
520f3f0262cSandi     *          original diff.
521f3f0262cSandi     */
522f3f0262cSandi    function reverse () {
523f3f0262cSandi        $rev = $this;
524f3f0262cSandi        $rev->edits = array();
525f3f0262cSandi        foreach ($this->edits as $edit) {
526f3f0262cSandi            $rev->edits[] = $edit->reverse();
527f3f0262cSandi        }
528f3f0262cSandi        return $rev;
529f3f0262cSandi    }
530f3f0262cSandi
531f3f0262cSandi    /**
532f3f0262cSandi     * Check for empty diff.
533f3f0262cSandi     *
534f3f0262cSandi     * @return bool True iff two sequences were identical.
535f3f0262cSandi     */
536f3f0262cSandi    function isEmpty () {
537f3f0262cSandi        foreach ($this->edits as $edit) {
538f3f0262cSandi            if ($edit->type != 'copy')
539f3f0262cSandi                return false;
540f3f0262cSandi        }
541f3f0262cSandi        return true;
542f3f0262cSandi    }
543f3f0262cSandi
544f3f0262cSandi    /**
545f3f0262cSandi     * Compute the length of the Longest Common Subsequence (LCS).
546f3f0262cSandi     *
547f3f0262cSandi     * This is mostly for diagnostic purposed.
548f3f0262cSandi     *
549f3f0262cSandi     * @return int The length of the LCS.
550f3f0262cSandi     */
551f3f0262cSandi    function lcs () {
552f3f0262cSandi        $lcs = 0;
553f3f0262cSandi        foreach ($this->edits as $edit) {
554f3f0262cSandi            if ($edit->type == 'copy')
555*f5b78785SAndreas Gohr                $lcs += count($edit->orig);
556f3f0262cSandi        }
557f3f0262cSandi        return $lcs;
558f3f0262cSandi    }
559f3f0262cSandi
560f3f0262cSandi    /**
561f3f0262cSandi     * Get the original set of lines.
562f3f0262cSandi     *
563f3f0262cSandi     * This reconstructs the $from_lines parameter passed to the
564f3f0262cSandi     * constructor.
565f3f0262cSandi     *
566f3f0262cSandi     * @return array The original sequence of strings.
567f3f0262cSandi     */
568f3f0262cSandi    function orig() {
569f3f0262cSandi        $lines = array();
570f3f0262cSandi
571f3f0262cSandi        foreach ($this->edits as $edit) {
572f3f0262cSandi            if ($edit->orig)
573*f5b78785SAndreas Gohr                array_splice($lines, count($lines), 0, $edit->orig);
574f3f0262cSandi        }
575f3f0262cSandi        return $lines;
576f3f0262cSandi    }
577f3f0262cSandi
578f3f0262cSandi    /**
579f3f0262cSandi     * Get the closing set of lines.
580f3f0262cSandi     *
581f3f0262cSandi     * This reconstructs the $to_lines parameter passed to the
582f3f0262cSandi     * constructor.
583f3f0262cSandi     *
584f3f0262cSandi     * @return array The sequence of strings.
585f3f0262cSandi     */
586f3f0262cSandi    function closing() {
587f3f0262cSandi        $lines = array();
588f3f0262cSandi
589f3f0262cSandi        foreach ($this->edits as $edit) {
590f3f0262cSandi            if ($edit->closing)
591*f5b78785SAndreas Gohr                array_splice($lines, count($lines), 0, $edit->closing);
592f3f0262cSandi        }
593f3f0262cSandi        return $lines;
594f3f0262cSandi    }
595f3f0262cSandi
596f3f0262cSandi    /**
597f3f0262cSandi     * Check a Diff for validity.
598f3f0262cSandi     *
599f3f0262cSandi     * This is here only for debugging purposes.
600f3f0262cSandi     */
601f3f0262cSandi    function _check ($from_lines, $to_lines) {
602f3f0262cSandi        if (serialize($from_lines) != serialize($this->orig()))
603f3f0262cSandi            trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
604f3f0262cSandi        if (serialize($to_lines) != serialize($this->closing()))
605f3f0262cSandi            trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
606f3f0262cSandi
607f3f0262cSandi        $rev = $this->reverse();
608f3f0262cSandi        if (serialize($to_lines) != serialize($rev->orig()))
609f3f0262cSandi            trigger_error("Reversed original doesn't match", E_USER_ERROR);
610f3f0262cSandi        if (serialize($from_lines) != serialize($rev->closing()))
611f3f0262cSandi            trigger_error("Reversed closing doesn't match", E_USER_ERROR);
612f3f0262cSandi
613f3f0262cSandi        $prevtype = 'none';
614f3f0262cSandi        foreach ($this->edits as $edit) {
615f3f0262cSandi            if ( $prevtype == $edit->type )
616f3f0262cSandi                trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
617f3f0262cSandi            $prevtype = $edit->type;
618f3f0262cSandi        }
619f3f0262cSandi
620f3f0262cSandi        $lcs = $this->lcs();
621f3f0262cSandi        trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE);
622f3f0262cSandi    }
623f3f0262cSandi}
624f3f0262cSandi
625f3f0262cSandi/**
626f3f0262cSandi * FIXME: bad name.
627f3f0262cSandi */
628*f5b78785SAndreas Gohrclass MappedDiff extends Diff {
629f3f0262cSandi    /**
630f3f0262cSandi     * Constructor.
631f3f0262cSandi     *
632f3f0262cSandi     * Computes diff between sequences of strings.
633f3f0262cSandi     *
634f3f0262cSandi     * This can be used to compute things like
635f3f0262cSandi     * case-insensitve diffs, or diffs which ignore
636f3f0262cSandi     * changes in white-space.
637f3f0262cSandi     *
638f3f0262cSandi     * @param $from_lines array An array of strings.
639f3f0262cSandi     *  (Typically these are lines from a file.)
640f3f0262cSandi     *
641f3f0262cSandi     * @param $to_lines array An array of strings.
642f3f0262cSandi     *
643f3f0262cSandi     * @param $mapped_from_lines array This array should
644f3f0262cSandi     *  have the same size number of elements as $from_lines.
645f3f0262cSandi     *  The elements in $mapped_from_lines and
646f3f0262cSandi     *  $mapped_to_lines are what is actually compared
647f3f0262cSandi     *  when computing the diff.
648f3f0262cSandi     *
649f3f0262cSandi     * @param $mapped_to_lines array This array should
650f3f0262cSandi     *  have the same number of elements as $to_lines.
651f3f0262cSandi     */
652f3f0262cSandi    function MappedDiff($from_lines, $to_lines,
653f3f0262cSandi            $mapped_from_lines, $mapped_to_lines) {
654f3f0262cSandi
655*f5b78785SAndreas Gohr        assert(count($from_lines) == count($mapped_from_lines));
656*f5b78785SAndreas Gohr        assert(count($to_lines) == count($mapped_to_lines));
657f3f0262cSandi
658f3f0262cSandi        $this->Diff($mapped_from_lines, $mapped_to_lines);
659f3f0262cSandi
660f3f0262cSandi        $xi = $yi = 0;
661*f5b78785SAndreas Gohr        $ecnt = count($this->edits);
662*f5b78785SAndreas Gohr        for ($i = 0; $i < $ecnt; $i++) {
663f3f0262cSandi            $orig = &$this->edits[$i]->orig;
664f3f0262cSandi            if (is_array($orig)) {
665*f5b78785SAndreas Gohr                $orig = array_slice($from_lines, $xi, count($orig));
666*f5b78785SAndreas Gohr                $xi += count($orig);
667f3f0262cSandi            }
668f3f0262cSandi
669f3f0262cSandi            $closing = &$this->edits[$i]->closing;
670f3f0262cSandi            if (is_array($closing)) {
671*f5b78785SAndreas Gohr                $closing = array_slice($to_lines, $yi, count($closing));
672*f5b78785SAndreas Gohr                $yi += count($closing);
673f3f0262cSandi            }
674f3f0262cSandi        }
675f3f0262cSandi    }
676f3f0262cSandi}
677f3f0262cSandi
678f3f0262cSandi/**
679f3f0262cSandi * A class to format Diffs
680f3f0262cSandi *
681f3f0262cSandi * This class formats the diff in classic diff format.
682f3f0262cSandi * It is intended that this class be customized via inheritance,
683f3f0262cSandi * to obtain fancier outputs.
684f3f0262cSandi */
685*f5b78785SAndreas Gohrclass DiffFormatter {
686f3f0262cSandi    /**
687f3f0262cSandi     * Number of leading context "lines" to preserve.
688f3f0262cSandi     *
689f3f0262cSandi     * This should be left at zero for this class, but subclasses
690f3f0262cSandi     * may want to set this to other values.
691f3f0262cSandi     */
692f3f0262cSandi    var $leading_context_lines = 0;
693f3f0262cSandi
694f3f0262cSandi    /**
695f3f0262cSandi     * Number of trailing context "lines" to preserve.
696f3f0262cSandi     *
697f3f0262cSandi     * This should be left at zero for this class, but subclasses
698f3f0262cSandi     * may want to set this to other values.
699f3f0262cSandi     */
700f3f0262cSandi    var $trailing_context_lines = 0;
701f3f0262cSandi
702f3f0262cSandi    /**
703f3f0262cSandi     * Format a diff.
704f3f0262cSandi     *
705f3f0262cSandi     * @param $diff object A Diff object.
706f3f0262cSandi     * @return string The formatted output.
707f3f0262cSandi     */
708f3f0262cSandi    function format($diff) {
709f3f0262cSandi
710f3f0262cSandi        $xi = $yi = 1;
711f3f0262cSandi        $block = false;
712f3f0262cSandi        $context = array();
713f3f0262cSandi
714f3f0262cSandi        $nlead = $this->leading_context_lines;
715f3f0262cSandi        $ntrail = $this->trailing_context_lines;
716f3f0262cSandi
717f3f0262cSandi        $this->_start_diff();
718f3f0262cSandi
719f3f0262cSandi        foreach ($diff->edits as $edit) {
720f3f0262cSandi            if ($edit->type == 'copy') {
721f3f0262cSandi                if (is_array($block)) {
722*f5b78785SAndreas Gohr                    if (count($edit->orig) <= $nlead + $ntrail) {
723f3f0262cSandi                        $block[] = $edit;
724f3f0262cSandi                    }
725f3f0262cSandi                    else{
726f3f0262cSandi                        if ($ntrail) {
727f3f0262cSandi                            $context = array_slice($edit->orig, 0, $ntrail);
728f3f0262cSandi                            $block[] = new _DiffOp_Copy($context);
729f3f0262cSandi                        }
730f3f0262cSandi                        $this->_block($x0, $ntrail + $xi - $x0,
731f3f0262cSandi                                $y0, $ntrail + $yi - $y0,
732f3f0262cSandi                                $block);
733f3f0262cSandi                        $block = false;
734f3f0262cSandi                    }
735f3f0262cSandi                }
736f3f0262cSandi                $context = $edit->orig;
737f3f0262cSandi            }
738f3f0262cSandi            else {
739f3f0262cSandi                if (! is_array($block)) {
740*f5b78785SAndreas Gohr                    $context = array_slice($context, count($context) - $nlead);
741*f5b78785SAndreas Gohr                    $x0 = $xi - count($context);
742*f5b78785SAndreas Gohr                    $y0 = $yi - count($context);
743f3f0262cSandi                    $block = array();
744f3f0262cSandi                    if ($context)
745f3f0262cSandi                        $block[] = new _DiffOp_Copy($context);
746f3f0262cSandi                }
747f3f0262cSandi                $block[] = $edit;
748f3f0262cSandi            }
749f3f0262cSandi
750f3f0262cSandi            if ($edit->orig)
751*f5b78785SAndreas Gohr                $xi += count($edit->orig);
752f3f0262cSandi            if ($edit->closing)
753*f5b78785SAndreas Gohr                $yi += count($edit->closing);
754f3f0262cSandi        }
755f3f0262cSandi
756f3f0262cSandi        if (is_array($block))
757f3f0262cSandi            $this->_block($x0, $xi - $x0,
758f3f0262cSandi                    $y0, $yi - $y0,
759f3f0262cSandi                    $block);
760f3f0262cSandi
761f3f0262cSandi        return $this->_end_diff();
762f3f0262cSandi    }
763f3f0262cSandi
764f3f0262cSandi    function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
765f3f0262cSandi        $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
766f3f0262cSandi        foreach ($edits as $edit) {
767f3f0262cSandi            if ($edit->type == 'copy')
768f3f0262cSandi                $this->_context($edit->orig);
769f3f0262cSandi            elseif ($edit->type == 'add')
770f3f0262cSandi                $this->_added($edit->closing);
771f3f0262cSandi            elseif ($edit->type == 'delete')
772f3f0262cSandi                $this->_deleted($edit->orig);
773f3f0262cSandi            elseif ($edit->type == 'change')
774f3f0262cSandi                $this->_changed($edit->orig, $edit->closing);
775f3f0262cSandi            else
776f3f0262cSandi                trigger_error("Unknown edit type", E_USER_ERROR);
777f3f0262cSandi        }
778f3f0262cSandi        $this->_end_block();
779f3f0262cSandi    }
780f3f0262cSandi
781f3f0262cSandi    function _start_diff() {
782f3f0262cSandi        ob_start();
783f3f0262cSandi    }
784f3f0262cSandi
785f3f0262cSandi    function _end_diff() {
786f3f0262cSandi        $val = ob_get_contents();
787f3f0262cSandi        ob_end_clean();
788f3f0262cSandi        return $val;
789f3f0262cSandi    }
790f3f0262cSandi
791f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
792f3f0262cSandi        if ($xlen > 1)
793f3f0262cSandi            $xbeg .= "," . ($xbeg + $xlen - 1);
794f3f0262cSandi        if ($ylen > 1)
795f3f0262cSandi            $ybeg .= "," . ($ybeg + $ylen - 1);
796f3f0262cSandi
797f3f0262cSandi        return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
798f3f0262cSandi    }
799f3f0262cSandi
800f3f0262cSandi    function _start_block($header) {
801f3f0262cSandi        echo $header;
802f3f0262cSandi    }
803f3f0262cSandi
804f3f0262cSandi    function _end_block() {
805f3f0262cSandi    }
806f3f0262cSandi
807f3f0262cSandi    function _lines($lines, $prefix = ' ') {
808f3f0262cSandi        foreach ($lines as $line)
809f3f0262cSandi            echo "$prefix $line\n";
810f3f0262cSandi    }
811f3f0262cSandi
812f3f0262cSandi    function _context($lines) {
813f3f0262cSandi        $this->_lines($lines);
814f3f0262cSandi    }
815f3f0262cSandi
816f3f0262cSandi    function _added($lines) {
817f3f0262cSandi        $this->_lines($lines, ">");
818f3f0262cSandi    }
819f3f0262cSandi    function _deleted($lines) {
820f3f0262cSandi        $this->_lines($lines, "<");
821f3f0262cSandi    }
822f3f0262cSandi
823f3f0262cSandi    function _changed($orig, $closing) {
824f3f0262cSandi        $this->_deleted($orig);
825f3f0262cSandi        echo "---\n";
826f3f0262cSandi        $this->_added($closing);
827f3f0262cSandi    }
828f3f0262cSandi}
829f3f0262cSandi
830f3f0262cSandi
831f3f0262cSandi/**
832f3f0262cSandi *  Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
833f3f0262cSandi *
834f3f0262cSandi */
835f3f0262cSandi
8369b3a5b24Schrisdefine('NBSP', "\xC2\xA0");     // utf-8 non-breaking space.
837f3f0262cSandi
838f3f0262cSandiclass _HWLDF_WordAccumulator {
839f3f0262cSandi    function _HWLDF_WordAccumulator () {
840f3f0262cSandi        $this->_lines = array();
841f3f0262cSandi        $this->_line = '';
842f3f0262cSandi        $this->_group = '';
843f3f0262cSandi        $this->_tag = '';
844f3f0262cSandi    }
845f3f0262cSandi
846f3f0262cSandi    function _flushGroup ($new_tag) {
847f3f0262cSandi        if ($this->_group !== '') {
848f3f0262cSandi            if ($this->_tag == 'mark')
849ed7ecb79SAnika Henke                $this->_line .= '<strong>'.$this->_group.'</strong>';
850f3f0262cSandi            else
851f3f0262cSandi                $this->_line .= $this->_group;
852f3f0262cSandi        }
853f3f0262cSandi        $this->_group = '';
854f3f0262cSandi        $this->_tag = $new_tag;
855f3f0262cSandi    }
856f3f0262cSandi
857f3f0262cSandi    function _flushLine ($new_tag) {
858f3f0262cSandi        $this->_flushGroup($new_tag);
859f3f0262cSandi        if ($this->_line != '')
860f3f0262cSandi            $this->_lines[] = $this->_line;
861f3f0262cSandi        $this->_line = '';
862f3f0262cSandi    }
863f3f0262cSandi
864f3f0262cSandi    function addWords ($words, $tag = '') {
865f3f0262cSandi        if ($tag != $this->_tag)
866f3f0262cSandi            $this->_flushGroup($tag);
867f3f0262cSandi
868f3f0262cSandi        foreach ($words as $word) {
869f3f0262cSandi            // new-line should only come as first char of word.
870f3f0262cSandi            if ($word == '')
871f3f0262cSandi                continue;
872f3f0262cSandi            if ($word[0] == "\n") {
873f3f0262cSandi                $this->_group .= NBSP;
874f3f0262cSandi                $this->_flushLine($tag);
875f3f0262cSandi                $word = substr($word, 1);
876f3f0262cSandi            }
877f3f0262cSandi            assert(!strstr($word, "\n"));
878f3f0262cSandi            $this->_group .= $word;
879f3f0262cSandi        }
880f3f0262cSandi    }
881f3f0262cSandi
882f3f0262cSandi    function getLines() {
883f3f0262cSandi        $this->_flushLine('~done');
884f3f0262cSandi        return $this->_lines;
885f3f0262cSandi    }
886f3f0262cSandi}
887f3f0262cSandi
888*f5b78785SAndreas Gohrclass WordLevelDiff extends MappedDiff {
889*f5b78785SAndreas Gohr
890f3f0262cSandi    function WordLevelDiff ($orig_lines, $closing_lines) {
891f3f0262cSandi        list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
892f3f0262cSandi        list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
893f3f0262cSandi
894f3f0262cSandi        $this->MappedDiff($orig_words, $closing_words,
895f3f0262cSandi                $orig_stripped, $closing_stripped);
896f3f0262cSandi    }
897f3f0262cSandi
898f3f0262cSandi    function _split($lines) {
899f3f0262cSandi            if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
900f3f0262cSandi                    implode("\n", $lines),
901f3f0262cSandi                    $m)) {
902f3f0262cSandi            return array(array(''), array(''));
903f3f0262cSandi            }
904f3f0262cSandi            return array($m[0], $m[1]);
905f3f0262cSandi            }
906f3f0262cSandi
907f3f0262cSandi            function orig () {
908f3f0262cSandi            $orig = new _HWLDF_WordAccumulator;
909f3f0262cSandi
910f3f0262cSandi            foreach ($this->edits as $edit) {
911f3f0262cSandi            if ($edit->type == 'copy')
912f3f0262cSandi            $orig->addWords($edit->orig);
913f3f0262cSandi            elseif ($edit->orig)
914f3f0262cSandi            $orig->addWords($edit->orig, 'mark');
915f3f0262cSandi            }
916f3f0262cSandi            return $orig->getLines();
917f3f0262cSandi            }
918f3f0262cSandi
919f3f0262cSandi            function closing () {
920f3f0262cSandi                $closing = new _HWLDF_WordAccumulator;
921f3f0262cSandi
922f3f0262cSandi                foreach ($this->edits as $edit) {
923f3f0262cSandi                    if ($edit->type == 'copy')
924f3f0262cSandi                        $closing->addWords($edit->closing);
925f3f0262cSandi                    elseif ($edit->closing)
926f3f0262cSandi                        $closing->addWords($edit->closing, 'mark');
927f3f0262cSandi                }
928f3f0262cSandi                return $closing->getLines();
929f3f0262cSandi            }
930f3f0262cSandi}
931f3f0262cSandi
932f3f0262cSandi/**
933f3f0262cSandi * "Unified" diff formatter.
934f3f0262cSandi *
935f3f0262cSandi * This class formats the diff in classic "unified diff" format.
936f3f0262cSandi */
937*f5b78785SAndreas Gohrclass UnifiedDiffFormatter extends DiffFormatter {
938*f5b78785SAndreas Gohr
939f3f0262cSandi    function UnifiedDiffFormatter($context_lines = 4) {
940f3f0262cSandi        $this->leading_context_lines = $context_lines;
941f3f0262cSandi        $this->trailing_context_lines = $context_lines;
942f3f0262cSandi    }
943f3f0262cSandi
944f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
945f3f0262cSandi        if ($xlen != 1)
946f3f0262cSandi            $xbeg .= "," . $xlen;
947f3f0262cSandi        if ($ylen != 1)
948f3f0262cSandi            $ybeg .= "," . $ylen;
949f3f0262cSandi        return "@@ -$xbeg +$ybeg @@\n";
950f3f0262cSandi    }
951f3f0262cSandi
952f3f0262cSandi    function _added($lines) {
953f3f0262cSandi        $this->_lines($lines, "+");
954f3f0262cSandi    }
955f3f0262cSandi    function _deleted($lines) {
956f3f0262cSandi        $this->_lines($lines, "-");
957f3f0262cSandi    }
958f3f0262cSandi    function _changed($orig, $final) {
959f3f0262cSandi        $this->_deleted($orig);
960f3f0262cSandi        $this->_added($final);
961f3f0262cSandi    }
962f3f0262cSandi}
963f3f0262cSandi
964f3f0262cSandi/**
965f3f0262cSandi *  Wikipedia Table style diff formatter.
966f3f0262cSandi *
967f3f0262cSandi */
968*f5b78785SAndreas Gohrclass TableDiffFormatter extends DiffFormatter {
969*f5b78785SAndreas Gohr
970f3f0262cSandi    function TableDiffFormatter() {
971f3f0262cSandi        $this->leading_context_lines = 2;
972f3f0262cSandi        $this->trailing_context_lines = 2;
973f3f0262cSandi    }
974f3f0262cSandi
9752d880650SAdrian Lang    function format($diff) {
9762d880650SAdrian Lang        // Preserve whitespaces by converting some to non-breaking spaces.
9772d880650SAdrian Lang        // Do not convert all of them to allow word-wrap.
9782d880650SAdrian Lang        $val = parent::format($diff);
9792d880650SAdrian Lang        $val = str_replace('  ','&nbsp; ', $val);
9802d880650SAdrian Lang        $val = preg_replace('/ (?=<)|(?<=[ >]) /', '&nbsp;', $val);
9812d880650SAdrian Lang        return $val;
9822d880650SAdrian Lang    }
9832d880650SAdrian Lang
984f3f0262cSandi    function _pre($text){
985f3f0262cSandi        $text = htmlspecialchars($text);
986f3f0262cSandi        return $text;
987f3f0262cSandi    }
988f3f0262cSandi
989f3f0262cSandi    function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
990f3f0262cSandi        global $lang;
991f3f0262cSandi        $l1 = $lang['line'].' '.$xbeg;
992f3f0262cSandi        $l2 = $lang['line'].' '.$ybeg;
993f3f0262cSandi        $r = '<tr><td class="diff-blockheader" colspan="2">'.$l1.":</td>\n" .
994f3f0262cSandi            '<td class="diff-blockheader" colspan="2">'.$l2.":</td></tr>\n";
995f3f0262cSandi        return $r;
996f3f0262cSandi    }
997f3f0262cSandi
998f3f0262cSandi    function _start_block( $header ) {
999f3f0262cSandi        print( $header );
1000f3f0262cSandi    }
1001f3f0262cSandi
1002f3f0262cSandi    function _end_block() {
1003f3f0262cSandi    }
1004f3f0262cSandi
1005f3f0262cSandi    function _lines( $lines, $prefix=' ', $color="white" ) {
1006f3f0262cSandi    }
1007f3f0262cSandi
1008f3f0262cSandi    function addedLine( $line ) {
1009f3f0262cSandi        return '<td>+</td><td class="diff-addedline">' .
1010f3f0262cSandi            $line.'</td>';
10112d880650SAdrian Lang
1012f3f0262cSandi    }
1013f3f0262cSandi
1014f3f0262cSandi    function deletedLine( $line ) {
1015f3f0262cSandi        return '<td>-</td><td class="diff-deletedline">' .
1016f3f0262cSandi            $line.'</td>';
1017f3f0262cSandi    }
1018f3f0262cSandi
1019f3f0262cSandi    function emptyLine() {
1020f3f0262cSandi        return '<td colspan="2">&nbsp;</td>';
1021f3f0262cSandi    }
1022f3f0262cSandi
1023f3f0262cSandi    function contextLine( $line ) {
1024f3f0262cSandi        return '<td> </td><td class="diff-context">'.$line.'</td>';
1025f3f0262cSandi    }
1026f3f0262cSandi
1027f3f0262cSandi    function _added($lines) {
1028f3f0262cSandi        foreach ($lines as $line) {
1029f3f0262cSandi            print( '<tr>' . $this->emptyLine() .
1030f3f0262cSandi                    $this->addedLine( $line ) . "</tr>\n" );
1031f3f0262cSandi        }
1032f3f0262cSandi    }
1033f3f0262cSandi
1034f3f0262cSandi    function _deleted($lines) {
1035f3f0262cSandi        foreach ($lines as $line) {
1036f3f0262cSandi            print( '<tr>' . $this->deletedLine( $line ) .
1037f3f0262cSandi                    $this->emptyLine() . "</tr>\n" );
1038f3f0262cSandi        }
1039f3f0262cSandi    }
1040f3f0262cSandi
1041f3f0262cSandi    function _context( $lines ) {
1042f3f0262cSandi        foreach ($lines as $line) {
1043f3f0262cSandi            print( '<tr>' . $this->contextLine( $line ) .
1044f3f0262cSandi                    $this->contextLine( $line ) . "</tr>\n" );
1045f3f0262cSandi        }
1046f3f0262cSandi    }
1047f3f0262cSandi
1048f3f0262cSandi    function _changed( $orig, $closing ) {
1049f3f0262cSandi        $diff = new WordLevelDiff( $orig, $closing );
1050f3f0262cSandi        $del = $diff->orig();
1051f3f0262cSandi        $add = $diff->closing();
1052f3f0262cSandi
1053f3f0262cSandi        while ( $line = array_shift( $del ) ) {
1054f3f0262cSandi            $aline = array_shift( $add );
1055f3f0262cSandi            print( '<tr>' . $this->deletedLine( $line ) .
1056f3f0262cSandi                    $this->addedLine( $aline ) . "</tr>\n" );
1057f3f0262cSandi        }
1058f3f0262cSandi        $this->_added( $add ); # If any leftovers
1059f3f0262cSandi    }
1060f3f0262cSandi}
1061f3f0262cSandi
1062340756e4Sandi
1063*f5b78785SAndreas Gohr//Setup VIM: ex: et ts=4 enc=utf-8 :
1064