xref: /dokuwiki/inc/DifferenceEngine.php (revision ed7ecb791be76223682b576af6242134cdca9496)
1f3f0262cSandi<?php
215fae107Sandi/**
315fae107Sandi * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
415fae107Sandi *
515fae107Sandi * Additions by Axel Boldt for MediaWiki
615fae107Sandi *
715fae107Sandi * @copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
815fae107Sandi * @license  You may copy this code freely under the conditions of the GPL.
915fae107Sandi */
10f3f0262cSandidefine('USE_ASSERTS', function_exists('assert'));
11f3f0262cSandi
12f3f0262cSandiclass _DiffOp {
13f3f0262cSandi  var $type;
14f3f0262cSandi  var $orig;
15f3f0262cSandi  var $closing;
16f3f0262cSandi
17f3f0262cSandi  function reverse() {
18f3f0262cSandi    trigger_error("pure virtual", E_USER_ERROR);
19f3f0262cSandi  }
20f3f0262cSandi
21f3f0262cSandi  function norig() {
22f3f0262cSandi    return $this->orig ? sizeof($this->orig) : 0;
23f3f0262cSandi  }
24f3f0262cSandi
25f3f0262cSandi  function nclosing() {
26f3f0262cSandi    return $this->closing ? sizeof($this->closing) : 0;
27f3f0262cSandi  }
28f3f0262cSandi}
29f3f0262cSandi
30f3f0262cSandiclass _DiffOp_Copy extends _DiffOp {
31f3f0262cSandi  var $type = 'copy';
32f3f0262cSandi
33f3f0262cSandi  function _DiffOp_Copy ($orig, $closing = false) {
34f3f0262cSandi    if (!is_array($closing))
35f3f0262cSandi      $closing = $orig;
36f3f0262cSandi    $this->orig = $orig;
37f3f0262cSandi    $this->closing = $closing;
38f3f0262cSandi  }
39f3f0262cSandi
40f3f0262cSandi  function reverse() {
41f3f0262cSandi    return new _DiffOp_Copy($this->closing, $this->orig);
42f3f0262cSandi  }
43f3f0262cSandi}
44f3f0262cSandi
45f3f0262cSandiclass _DiffOp_Delete extends _DiffOp {
46f3f0262cSandi  var $type = 'delete';
47f3f0262cSandi
48f3f0262cSandi  function _DiffOp_Delete ($lines) {
49f3f0262cSandi    $this->orig = $lines;
50f3f0262cSandi    $this->closing = false;
51f3f0262cSandi  }
52f3f0262cSandi
53f3f0262cSandi  function reverse() {
54f3f0262cSandi    return new _DiffOp_Add($this->orig);
55f3f0262cSandi  }
56f3f0262cSandi}
57f3f0262cSandi
58f3f0262cSandiclass _DiffOp_Add extends _DiffOp {
59f3f0262cSandi  var $type = 'add';
60f3f0262cSandi
61f3f0262cSandi  function _DiffOp_Add ($lines) {
62f3f0262cSandi    $this->closing = $lines;
63f3f0262cSandi    $this->orig = false;
64f3f0262cSandi  }
65f3f0262cSandi
66f3f0262cSandi  function reverse() {
67f3f0262cSandi    return new _DiffOp_Delete($this->closing);
68f3f0262cSandi  }
69f3f0262cSandi}
70f3f0262cSandi
71f3f0262cSandiclass _DiffOp_Change extends _DiffOp {
72f3f0262cSandi  var $type = 'change';
73f3f0262cSandi
74f3f0262cSandi  function _DiffOp_Change ($orig, $closing) {
75f3f0262cSandi    $this->orig = $orig;
76f3f0262cSandi    $this->closing = $closing;
77f3f0262cSandi  }
78f3f0262cSandi
79f3f0262cSandi  function reverse() {
80f3f0262cSandi    return new _DiffOp_Change($this->closing, $this->orig);
81f3f0262cSandi  }
82f3f0262cSandi}
83f3f0262cSandi
84f3f0262cSandi
85f3f0262cSandi/**
86f3f0262cSandi * Class used internally by Diff to actually compute the diffs.
87f3f0262cSandi *
88f3f0262cSandi * The algorithm used here is mostly lifted from the perl module
89f3f0262cSandi * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
90f3f0262cSandi *   http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
91f3f0262cSandi *
92f3f0262cSandi * More ideas are taken from:
93f3f0262cSandi *   http://www.ics.uci.edu/~eppstein/161/960229.html
94f3f0262cSandi *
95f3f0262cSandi * Some ideas are (and a bit of code) are from from analyze.c, from GNU
96f3f0262cSandi * diffutils-2.7, which can be found at:
97f3f0262cSandi *   ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
98f3f0262cSandi *
99f3f0262cSandi * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
100f3f0262cSandi * are my own.
101f3f0262cSandi *
102f3f0262cSandi * @author Geoffrey T. Dairiki
103f3f0262cSandi * @access private
104f3f0262cSandi */
105f3f0262cSandiclass _DiffEngine
106f3f0262cSandi{
107f3f0262cSandi  function diff ($from_lines, $to_lines) {
108f3f0262cSandi    $n_from = sizeof($from_lines);
109f3f0262cSandi    $n_to = sizeof($to_lines);
110f3f0262cSandi
111f3f0262cSandi    $this->xchanged = $this->ychanged = array();
112f3f0262cSandi    $this->xv = $this->yv = array();
113f3f0262cSandi    $this->xind = $this->yind = array();
114f3f0262cSandi    unset($this->seq);
115f3f0262cSandi    unset($this->in_seq);
116f3f0262cSandi    unset($this->lcs);
117f3f0262cSandi
118f3f0262cSandi    // Skip leading common lines.
119f3f0262cSandi    for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
120f3f0262cSandi      if ($from_lines[$skip] != $to_lines[$skip])
121f3f0262cSandi        break;
122f3f0262cSandi      $this->xchanged[$skip] = $this->ychanged[$skip] = false;
123f3f0262cSandi    }
124f3f0262cSandi    // Skip trailing common lines.
125f3f0262cSandi    $xi = $n_from; $yi = $n_to;
126f3f0262cSandi    for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
127f3f0262cSandi      if ($from_lines[$xi] != $to_lines[$yi])
128f3f0262cSandi        break;
129f3f0262cSandi      $this->xchanged[$xi] = $this->ychanged[$yi] = false;
130f3f0262cSandi    }
131f3f0262cSandi
132f3f0262cSandi    // Ignore lines which do not exist in both files.
133f3f0262cSandi    for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
134f3f0262cSandi      $xhash[$from_lines[$xi]] = 1;
135f3f0262cSandi    for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
136f3f0262cSandi      $line = $to_lines[$yi];
137f3f0262cSandi      if ( ($this->ychanged[$yi] = empty($xhash[$line])) )
138f3f0262cSandi        continue;
139f3f0262cSandi      $yhash[$line] = 1;
140f3f0262cSandi      $this->yv[] = $line;
141f3f0262cSandi      $this->yind[] = $yi;
142f3f0262cSandi    }
143f3f0262cSandi    for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
144f3f0262cSandi      $line = $from_lines[$xi];
145f3f0262cSandi      if ( ($this->xchanged[$xi] = empty($yhash[$line])) )
146f3f0262cSandi        continue;
147f3f0262cSandi      $this->xv[] = $line;
148f3f0262cSandi      $this->xind[] = $xi;
149f3f0262cSandi    }
150f3f0262cSandi
151f3f0262cSandi    // Find the LCS.
152f3f0262cSandi    $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
153f3f0262cSandi
154f3f0262cSandi    // Merge edits when possible
155f3f0262cSandi    $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
156f3f0262cSandi    $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
157f3f0262cSandi
158f3f0262cSandi    // Compute the edit operations.
159f3f0262cSandi    $edits = array();
160f3f0262cSandi    $xi = $yi = 0;
161f3f0262cSandi    while ($xi < $n_from || $yi < $n_to) {
162f3f0262cSandi      USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
163f3f0262cSandi      USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
164f3f0262cSandi
165f3f0262cSandi      // Skip matching "snake".
166f3f0262cSandi      $copy = array();
167f3f0262cSandi      while ( $xi < $n_from && $yi < $n_to
168f3f0262cSandi          && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
169f3f0262cSandi        $copy[] = $from_lines[$xi++];
170f3f0262cSandi        ++$yi;
171f3f0262cSandi      }
172f3f0262cSandi      if ($copy)
173f3f0262cSandi        $edits[] = new _DiffOp_Copy($copy);
174f3f0262cSandi
175f3f0262cSandi      // Find deletes & adds.
176f3f0262cSandi      $delete = array();
177f3f0262cSandi      while ($xi < $n_from && $this->xchanged[$xi])
178f3f0262cSandi        $delete[] = $from_lines[$xi++];
179f3f0262cSandi
180f3f0262cSandi      $add = array();
181f3f0262cSandi      while ($yi < $n_to && $this->ychanged[$yi])
182f3f0262cSandi        $add[] = $to_lines[$yi++];
183f3f0262cSandi
184f3f0262cSandi      if ($delete && $add)
185f3f0262cSandi        $edits[] = new _DiffOp_Change($delete, $add);
186f3f0262cSandi      elseif ($delete)
187f3f0262cSandi        $edits[] = new _DiffOp_Delete($delete);
188f3f0262cSandi      elseif ($add)
189f3f0262cSandi        $edits[] = new _DiffOp_Add($add);
190f3f0262cSandi    }
191f3f0262cSandi    return $edits;
192f3f0262cSandi  }
193f3f0262cSandi
194f3f0262cSandi
19515fae107Sandi  /**
19615fae107Sandi   * Divide the Largest Common Subsequence (LCS) of the sequences
197f3f0262cSandi   * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
198f3f0262cSandi   * sized segments.
199f3f0262cSandi   *
200f3f0262cSandi   * Returns (LCS, PTS).  LCS is the length of the LCS. PTS is an
201f3f0262cSandi   * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
202f3f0262cSandi   * sub sequences.  The first sub-sequence is contained in [X0, X1),
203f3f0262cSandi   * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
204f3f0262cSandi   * that (X0, Y0) == (XOFF, YOFF) and
205f3f0262cSandi   * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
206f3f0262cSandi   *
207f3f0262cSandi   * This function assumes that the first lines of the specified portions
208f3f0262cSandi   * of the two files do not match, and likewise that the last lines do not
209f3f0262cSandi   * match.  The caller must trim matching lines from the beginning and end
210f3f0262cSandi   * of the portions it is going to specify.
211f3f0262cSandi   */
212f3f0262cSandi  function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) {
213f3f0262cSandi  $flip = false;
214f3f0262cSandi
215f3f0262cSandi  if ($xlim - $xoff > $ylim - $yoff) {
216f3f0262cSandi    // Things seems faster (I'm not sure I understand why)
217f3f0262cSandi      // when the shortest sequence in X.
218f3f0262cSandi      $flip = true;
219f3f0262cSandi    list ($xoff, $xlim, $yoff, $ylim)
220f3f0262cSandi    = array( $yoff, $ylim, $xoff, $xlim);
221f3f0262cSandi    }
222f3f0262cSandi
223f3f0262cSandi  if ($flip)
224f3f0262cSandi    for ($i = $ylim - 1; $i >= $yoff; $i--)
225f3f0262cSandi    $ymatches[$this->xv[$i]][] = $i;
226f3f0262cSandi  else
227f3f0262cSandi    for ($i = $ylim - 1; $i >= $yoff; $i--)
228f3f0262cSandi    $ymatches[$this->yv[$i]][] = $i;
229f3f0262cSandi
230f3f0262cSandi  $this->lcs = 0;
231f3f0262cSandi  $this->seq[0]= $yoff - 1;
232f3f0262cSandi  $this->in_seq = array();
233f3f0262cSandi  $ymids[0] = array();
234f3f0262cSandi
235f3f0262cSandi  $numer = $xlim - $xoff + $nchunks - 1;
236f3f0262cSandi  $x = $xoff;
237f3f0262cSandi  for ($chunk = 0; $chunk < $nchunks; $chunk++) {
238f3f0262cSandi    if ($chunk > 0)
239f3f0262cSandi    for ($i = 0; $i <= $this->lcs; $i++)
240f3f0262cSandi      $ymids[$i][$chunk-1] = $this->seq[$i];
241f3f0262cSandi
242f3f0262cSandi    $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
243f3f0262cSandi    for ( ; $x < $x1; $x++) {
244f3f0262cSandi        $line = $flip ? $this->yv[$x] : $this->xv[$x];
245f3f0262cSandi        if (empty($ymatches[$line]))
246f3f0262cSandi      continue;
247f3f0262cSandi    $matches = $ymatches[$line];
248f3f0262cSandi        reset($matches);
249f3f0262cSandi    while (list ($junk, $y) = each($matches))
250f3f0262cSandi      if (empty($this->in_seq[$y])) {
251f3f0262cSandi      $k = $this->_lcs_pos($y);
252f3f0262cSandi      USE_ASSERTS && assert($k > 0);
253f3f0262cSandi      $ymids[$k] = $ymids[$k-1];
254f3f0262cSandi      break;
255f3f0262cSandi          }
256f3f0262cSandi    while (list ($junk, $y) = each($matches)) {
257f3f0262cSandi      if ($y > $this->seq[$k-1]) {
258f3f0262cSandi      USE_ASSERTS && assert($y < $this->seq[$k]);
259f3f0262cSandi      // Optimization: this is a common case:
260f3f0262cSandi      //  next match is just replacing previous match.
261f3f0262cSandi      $this->in_seq[$this->seq[$k]] = false;
262f3f0262cSandi      $this->seq[$k] = $y;
263f3f0262cSandi      $this->in_seq[$y] = 1;
264f3f0262cSandi          }
265f3f0262cSandi      else if (empty($this->in_seq[$y])) {
266f3f0262cSandi      $k = $this->_lcs_pos($y);
267f3f0262cSandi      USE_ASSERTS && assert($k > 0);
268f3f0262cSandi      $ymids[$k] = $ymids[$k-1];
269f3f0262cSandi          }
270f3f0262cSandi        }
271f3f0262cSandi      }
272f3f0262cSandi    }
273f3f0262cSandi
274f3f0262cSandi  $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
275f3f0262cSandi  $ymid = $ymids[$this->lcs];
276f3f0262cSandi  for ($n = 0; $n < $nchunks - 1; $n++) {
277f3f0262cSandi    $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
278f3f0262cSandi    $y1 = $ymid[$n] + 1;
279f3f0262cSandi    $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
280f3f0262cSandi    }
281f3f0262cSandi  $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
282f3f0262cSandi
283f3f0262cSandi  return array($this->lcs, $seps);
284f3f0262cSandi  }
285f3f0262cSandi
286f3f0262cSandi  function _lcs_pos ($ypos) {
287f3f0262cSandi  $end = $this->lcs;
288f3f0262cSandi  if ($end == 0 || $ypos > $this->seq[$end]) {
289f3f0262cSandi    $this->seq[++$this->lcs] = $ypos;
290f3f0262cSandi    $this->in_seq[$ypos] = 1;
291f3f0262cSandi    return $this->lcs;
292f3f0262cSandi    }
293f3f0262cSandi
294f3f0262cSandi  $beg = 1;
295f3f0262cSandi  while ($beg < $end) {
296f3f0262cSandi    $mid = (int)(($beg + $end) / 2);
297f3f0262cSandi    if ( $ypos > $this->seq[$mid] )
298f3f0262cSandi    $beg = $mid + 1;
299f3f0262cSandi    else
300f3f0262cSandi    $end = $mid;
301f3f0262cSandi    }
302f3f0262cSandi
303f3f0262cSandi  USE_ASSERTS && assert($ypos != $this->seq[$end]);
304f3f0262cSandi
305f3f0262cSandi  $this->in_seq[$this->seq[$end]] = false;
306f3f0262cSandi  $this->seq[$end] = $ypos;
307f3f0262cSandi  $this->in_seq[$ypos] = 1;
308f3f0262cSandi  return $end;
309f3f0262cSandi  }
310f3f0262cSandi
31115fae107Sandi  /**
31215fae107Sandi   * Find LCS of two sequences.
313f3f0262cSandi   *
314f3f0262cSandi   * The results are recorded in the vectors $this->{x,y}changed[], by
315f3f0262cSandi   * storing a 1 in the element for each line that is an insertion
316f3f0262cSandi   * or deletion (ie. is not in the LCS).
317f3f0262cSandi   *
318f3f0262cSandi   * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
319f3f0262cSandi   *
320f3f0262cSandi   * Note that XLIM, YLIM are exclusive bounds.
321f3f0262cSandi   * All line numbers are origin-0 and discarded lines are not counted.
322f3f0262cSandi   */
323f3f0262cSandi  function _compareseq ($xoff, $xlim, $yoff, $ylim) {
324f3f0262cSandi  // Slide down the bottom initial diagonal.
325f3f0262cSandi  while ($xoff < $xlim && $yoff < $ylim
326f3f0262cSandi         && $this->xv[$xoff] == $this->yv[$yoff]) {
327f3f0262cSandi    ++$xoff;
328f3f0262cSandi    ++$yoff;
329f3f0262cSandi    }
330f3f0262cSandi
331f3f0262cSandi  // Slide up the top initial diagonal.
332f3f0262cSandi  while ($xlim > $xoff && $ylim > $yoff
333f3f0262cSandi         && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
334f3f0262cSandi    --$xlim;
335f3f0262cSandi    --$ylim;
336f3f0262cSandi    }
337f3f0262cSandi
338f3f0262cSandi  if ($xoff == $xlim || $yoff == $ylim)
339f3f0262cSandi    $lcs = 0;
340f3f0262cSandi  else {
341f3f0262cSandi    // This is ad hoc but seems to work well.
342f3f0262cSandi    //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
343f3f0262cSandi    //$nchunks = max(2,min(8,(int)$nchunks));
344f3f0262cSandi    $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
345f3f0262cSandi    list ($lcs, $seps)
346f3f0262cSandi    = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
347f3f0262cSandi    }
348f3f0262cSandi
349f3f0262cSandi  if ($lcs == 0) {
350f3f0262cSandi    // X and Y sequences have no common subsequence:
351f3f0262cSandi    // mark all changed.
352f3f0262cSandi    while ($yoff < $ylim)
353f3f0262cSandi    $this->ychanged[$this->yind[$yoff++]] = 1;
354f3f0262cSandi    while ($xoff < $xlim)
355f3f0262cSandi    $this->xchanged[$this->xind[$xoff++]] = 1;
356f3f0262cSandi    }
357f3f0262cSandi  else {
358f3f0262cSandi    // Use the partitions to split this problem into subproblems.
359f3f0262cSandi    reset($seps);
360f3f0262cSandi    $pt1 = $seps[0];
361f3f0262cSandi    while ($pt2 = next($seps)) {
362f3f0262cSandi    $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
363f3f0262cSandi    $pt1 = $pt2;
364f3f0262cSandi      }
365f3f0262cSandi    }
366f3f0262cSandi  }
367f3f0262cSandi
36815fae107Sandi  /**
36915fae107Sandi   * Adjust inserts/deletes of identical lines to join changes
370f3f0262cSandi   * as much as possible.
371f3f0262cSandi   *
372f3f0262cSandi   * We do something when a run of changed lines include a
373f3f0262cSandi   * line at one end and has an excluded, identical line at the other.
374f3f0262cSandi   * We are free to choose which identical line is included.
375f3f0262cSandi   * `compareseq' usually chooses the one at the beginning,
376f3f0262cSandi   * but usually it is cleaner to consider the following identical line
377f3f0262cSandi   * to be the "change".
378f3f0262cSandi   *
379f3f0262cSandi   * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
380f3f0262cSandi   */
381f3f0262cSandi  function _shift_boundaries ($lines, &$changed, $other_changed) {
382f3f0262cSandi  $i = 0;
383f3f0262cSandi  $j = 0;
384f3f0262cSandi
385f3f0262cSandi  USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
386f3f0262cSandi  $len = sizeof($lines);
387f3f0262cSandi  $other_len = sizeof($other_changed);
388f3f0262cSandi
389f3f0262cSandi  while (1) {
390f3f0262cSandi    /*
391f3f0262cSandi     * Scan forwards to find beginning of another run of changes.
392f3f0262cSandi     * Also keep track of the corresponding point in the other file.
393f3f0262cSandi     *
394f3f0262cSandi     * Throughout this code, $i and $j are adjusted together so that
395f3f0262cSandi     * the first $i elements of $changed and the first $j elements
396f3f0262cSandi     * of $other_changed both contain the same number of zeros
397f3f0262cSandi     * (unchanged lines).
398f3f0262cSandi     * Furthermore, $j is always kept so that $j == $other_len or
399f3f0262cSandi     * $other_changed[$j] == false.
400f3f0262cSandi     */
401f3f0262cSandi    while ($j < $other_len && $other_changed[$j])
402f3f0262cSandi    $j++;
403f3f0262cSandi
404f3f0262cSandi    while ($i < $len && ! $changed[$i]) {
405f3f0262cSandi    USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
406f3f0262cSandi    $i++; $j++;
407f3f0262cSandi    while ($j < $other_len && $other_changed[$j])
408f3f0262cSandi      $j++;
409f3f0262cSandi      }
410f3f0262cSandi
411f3f0262cSandi    if ($i == $len)
412f3f0262cSandi    break;
413f3f0262cSandi
414f3f0262cSandi    $start = $i;
415f3f0262cSandi
416f3f0262cSandi    // Find the end of this run of changes.
417f3f0262cSandi    while (++$i < $len && $changed[$i])
418f3f0262cSandi    continue;
419f3f0262cSandi
420f3f0262cSandi    do {
421f3f0262cSandi    /*
422f3f0262cSandi     * Record the length of this run of changes, so that
423f3f0262cSandi     * we can later determine whether the run has grown.
424f3f0262cSandi     */
425f3f0262cSandi    $runlength = $i - $start;
426f3f0262cSandi
427f3f0262cSandi    /*
428f3f0262cSandi     * Move the changed region back, so long as the
429f3f0262cSandi     * previous unchanged line matches the last changed one.
430f3f0262cSandi     * This merges with previous changed regions.
431f3f0262cSandi     */
432f3f0262cSandi    while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
433f3f0262cSandi      $changed[--$start] = 1;
434f3f0262cSandi      $changed[--$i] = false;
435f3f0262cSandi      while ($start > 0 && $changed[$start - 1])
436f3f0262cSandi      $start--;
437f3f0262cSandi      USE_ASSERTS && assert('$j > 0');
438f3f0262cSandi      while ($other_changed[--$j])
439f3f0262cSandi      continue;
440f3f0262cSandi      USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
441f3f0262cSandi        }
442f3f0262cSandi
443f3f0262cSandi    /*
444f3f0262cSandi     * Set CORRESPONDING to the end of the changed run, at the last
445f3f0262cSandi     * point where it corresponds to a changed run in the other file.
446f3f0262cSandi     * CORRESPONDING == LEN means no such point has been found.
447f3f0262cSandi     */
448f3f0262cSandi    $corresponding = $j < $other_len ? $i : $len;
449f3f0262cSandi
450f3f0262cSandi    /*
451f3f0262cSandi     * Move the changed region forward, so long as the
452f3f0262cSandi     * first changed line matches the following unchanged one.
453f3f0262cSandi     * This merges with following changed regions.
454f3f0262cSandi     * Do this second, so that if there are no merges,
455f3f0262cSandi     * the changed region is moved forward as far as possible.
456f3f0262cSandi     */
457f3f0262cSandi    while ($i < $len && $lines[$start] == $lines[$i]) {
458f3f0262cSandi      $changed[$start++] = false;
459f3f0262cSandi      $changed[$i++] = 1;
460f3f0262cSandi      while ($i < $len && $changed[$i])
461f3f0262cSandi      $i++;
462f3f0262cSandi
463f3f0262cSandi      USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
464f3f0262cSandi      $j++;
465f3f0262cSandi      if ($j < $other_len && $other_changed[$j]) {
466f3f0262cSandi      $corresponding = $i;
467f3f0262cSandi      while ($j < $other_len && $other_changed[$j])
468f3f0262cSandi        $j++;
469f3f0262cSandi          }
470f3f0262cSandi        }
471f3f0262cSandi      } while ($runlength != $i - $start);
472f3f0262cSandi
473f3f0262cSandi    /*
474f3f0262cSandi     * If possible, move the fully-merged run of changes
475f3f0262cSandi     * back to a corresponding run in the other file.
476f3f0262cSandi     */
477f3f0262cSandi    while ($corresponding < $i) {
478f3f0262cSandi    $changed[--$start] = 1;
479f3f0262cSandi    $changed[--$i] = 0;
480f3f0262cSandi    USE_ASSERTS && assert('$j > 0');
481f3f0262cSandi    while ($other_changed[--$j])
482f3f0262cSandi      continue;
483f3f0262cSandi    USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
484f3f0262cSandi      }
485f3f0262cSandi    }
486f3f0262cSandi  }
487f3f0262cSandi}
488f3f0262cSandi
489f3f0262cSandi/**
490f3f0262cSandi * Class representing a 'diff' between two sequences of strings.
491f3f0262cSandi */
492f3f0262cSandiclass Diff
493f3f0262cSandi{
494f3f0262cSandi  var $edits;
495f3f0262cSandi
496f3f0262cSandi  /**
497f3f0262cSandi   * Constructor.
498f3f0262cSandi   * Computes diff between sequences of strings.
499f3f0262cSandi   *
500f3f0262cSandi   * @param $from_lines array An array of strings.
501f3f0262cSandi   *      (Typically these are lines from a file.)
502f3f0262cSandi   * @param $to_lines array An array of strings.
503f3f0262cSandi   */
504f3f0262cSandi  function Diff($from_lines, $to_lines) {
505f3f0262cSandi    $eng = new _DiffEngine;
506f3f0262cSandi    $this->edits = $eng->diff($from_lines, $to_lines);
507f3f0262cSandi    //$this->_check($from_lines, $to_lines);
508f3f0262cSandi  }
509f3f0262cSandi
510f3f0262cSandi  /**
511f3f0262cSandi   * Compute reversed Diff.
512f3f0262cSandi   *
513f3f0262cSandi   * SYNOPSIS:
514f3f0262cSandi   *
515f3f0262cSandi   *  $diff = new Diff($lines1, $lines2);
516f3f0262cSandi   *  $rev = $diff->reverse();
517f3f0262cSandi   * @return object A Diff object representing the inverse of the
518f3f0262cSandi   *          original diff.
519f3f0262cSandi   */
520f3f0262cSandi  function reverse () {
521f3f0262cSandi  $rev = $this;
522f3f0262cSandi    $rev->edits = array();
523f3f0262cSandi    foreach ($this->edits as $edit) {
524f3f0262cSandi      $rev->edits[] = $edit->reverse();
525f3f0262cSandi    }
526f3f0262cSandi  return $rev;
527f3f0262cSandi  }
528f3f0262cSandi
529f3f0262cSandi  /**
530f3f0262cSandi   * Check for empty diff.
531f3f0262cSandi   *
532f3f0262cSandi   * @return bool True iff two sequences were identical.
533f3f0262cSandi   */
534f3f0262cSandi  function isEmpty () {
535f3f0262cSandi    foreach ($this->edits as $edit) {
536f3f0262cSandi      if ($edit->type != 'copy')
537f3f0262cSandi        return false;
538f3f0262cSandi    }
539f3f0262cSandi    return true;
540f3f0262cSandi  }
541f3f0262cSandi
542f3f0262cSandi  /**
543f3f0262cSandi   * Compute the length of the Longest Common Subsequence (LCS).
544f3f0262cSandi   *
545f3f0262cSandi   * This is mostly for diagnostic purposed.
546f3f0262cSandi   *
547f3f0262cSandi   * @return int The length of the LCS.
548f3f0262cSandi   */
549f3f0262cSandi  function lcs () {
550f3f0262cSandi  $lcs = 0;
551f3f0262cSandi    foreach ($this->edits as $edit) {
552f3f0262cSandi      if ($edit->type == 'copy')
553f3f0262cSandi        $lcs += sizeof($edit->orig);
554f3f0262cSandi    }
555f3f0262cSandi  return $lcs;
556f3f0262cSandi  }
557f3f0262cSandi
558f3f0262cSandi  /**
559f3f0262cSandi   * Get the original set of lines.
560f3f0262cSandi   *
561f3f0262cSandi   * This reconstructs the $from_lines parameter passed to the
562f3f0262cSandi   * constructor.
563f3f0262cSandi   *
564f3f0262cSandi   * @return array The original sequence of strings.
565f3f0262cSandi   */
566f3f0262cSandi  function orig() {
567f3f0262cSandi    $lines = array();
568f3f0262cSandi
569f3f0262cSandi    foreach ($this->edits as $edit) {
570f3f0262cSandi      if ($edit->orig)
571f3f0262cSandi        array_splice($lines, sizeof($lines), 0, $edit->orig);
572f3f0262cSandi    }
573f3f0262cSandi    return $lines;
574f3f0262cSandi  }
575f3f0262cSandi
576f3f0262cSandi  /**
577f3f0262cSandi   * Get the closing set of lines.
578f3f0262cSandi   *
579f3f0262cSandi   * This reconstructs the $to_lines parameter passed to the
580f3f0262cSandi   * constructor.
581f3f0262cSandi   *
582f3f0262cSandi   * @return array The sequence of strings.
583f3f0262cSandi   */
584f3f0262cSandi  function closing() {
585f3f0262cSandi    $lines = array();
586f3f0262cSandi
587f3f0262cSandi    foreach ($this->edits as $edit) {
588f3f0262cSandi      if ($edit->closing)
589f3f0262cSandi        array_splice($lines, sizeof($lines), 0, $edit->closing);
590f3f0262cSandi    }
591f3f0262cSandi    return $lines;
592f3f0262cSandi  }
593f3f0262cSandi
594f3f0262cSandi  /**
595f3f0262cSandi   * Check a Diff for validity.
596f3f0262cSandi   *
597f3f0262cSandi   * This is here only for debugging purposes.
598f3f0262cSandi   */
599f3f0262cSandi  function _check ($from_lines, $to_lines) {
600f3f0262cSandi    if (serialize($from_lines) != serialize($this->orig()))
601f3f0262cSandi      trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
602f3f0262cSandi    if (serialize($to_lines) != serialize($this->closing()))
603f3f0262cSandi      trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
604f3f0262cSandi
605f3f0262cSandi    $rev = $this->reverse();
606f3f0262cSandi    if (serialize($to_lines) != serialize($rev->orig()))
607f3f0262cSandi      trigger_error("Reversed original doesn't match", E_USER_ERROR);
608f3f0262cSandi    if (serialize($from_lines) != serialize($rev->closing()))
609f3f0262cSandi      trigger_error("Reversed closing doesn't match", E_USER_ERROR);
610f3f0262cSandi
611f3f0262cSandi
612f3f0262cSandi    $prevtype = 'none';
613f3f0262cSandi    foreach ($this->edits as $edit) {
614f3f0262cSandi      if ( $prevtype == $edit->type )
615f3f0262cSandi        trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
616f3f0262cSandi      $prevtype = $edit->type;
617f3f0262cSandi    }
618f3f0262cSandi
619f3f0262cSandi    $lcs = $this->lcs();
620f3f0262cSandi    trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE);
621f3f0262cSandi  }
622f3f0262cSandi}
623f3f0262cSandi
624f3f0262cSandi/**
625f3f0262cSandi * FIXME: bad name.
626f3f0262cSandi */
627f3f0262cSandiclass MappedDiff
628f3f0262cSandiextends Diff
629f3f0262cSandi{
630f3f0262cSandi  /**
631f3f0262cSandi   * Constructor.
632f3f0262cSandi   *
633f3f0262cSandi   * Computes diff between sequences of strings.
634f3f0262cSandi   *
635f3f0262cSandi   * This can be used to compute things like
636f3f0262cSandi   * case-insensitve diffs, or diffs which ignore
637f3f0262cSandi   * changes in white-space.
638f3f0262cSandi   *
639f3f0262cSandi   * @param $from_lines array An array of strings.
640f3f0262cSandi   *  (Typically these are lines from a file.)
641f3f0262cSandi   *
642f3f0262cSandi   * @param $to_lines array An array of strings.
643f3f0262cSandi   *
644f3f0262cSandi   * @param $mapped_from_lines array This array should
645f3f0262cSandi   *  have the same size number of elements as $from_lines.
646f3f0262cSandi   *  The elements in $mapped_from_lines and
647f3f0262cSandi   *  $mapped_to_lines are what is actually compared
648f3f0262cSandi   *  when computing the diff.
649f3f0262cSandi   *
650f3f0262cSandi   * @param $mapped_to_lines array This array should
651f3f0262cSandi   *  have the same number of elements as $to_lines.
652f3f0262cSandi   */
653f3f0262cSandi  function MappedDiff($from_lines, $to_lines,
654f3f0262cSandi            $mapped_from_lines, $mapped_to_lines) {
655f3f0262cSandi
656f3f0262cSandi    assert(sizeof($from_lines) == sizeof($mapped_from_lines));
657f3f0262cSandi    assert(sizeof($to_lines) == sizeof($mapped_to_lines));
658f3f0262cSandi
659f3f0262cSandi    $this->Diff($mapped_from_lines, $mapped_to_lines);
660f3f0262cSandi
661f3f0262cSandi    $xi = $yi = 0;
662f3f0262cSandi    for ($i = 0; $i < sizeof($this->edits); $i++) {
663f3f0262cSandi      $orig = &$this->edits[$i]->orig;
664f3f0262cSandi      if (is_array($orig)) {
665f3f0262cSandi        $orig = array_slice($from_lines, $xi, sizeof($orig));
666f3f0262cSandi        $xi += sizeof($orig);
667f3f0262cSandi      }
668f3f0262cSandi
669f3f0262cSandi      $closing = &$this->edits[$i]->closing;
670f3f0262cSandi      if (is_array($closing)) {
671f3f0262cSandi        $closing = array_slice($to_lines, $yi, sizeof($closing));
672f3f0262cSandi        $yi += sizeof($closing);
673f3f0262cSandi      }
674f3f0262cSandi    }
675f3f0262cSandi  }
676f3f0262cSandi}
677f3f0262cSandi
678f3f0262cSandi/**
679f3f0262cSandi * A class to format Diffs
680f3f0262cSandi *
681f3f0262cSandi * This class formats the diff in classic diff format.
682f3f0262cSandi * It is intended that this class be customized via inheritance,
683f3f0262cSandi * to obtain fancier outputs.
684f3f0262cSandi */
685f3f0262cSandiclass DiffFormatter
686f3f0262cSandi{
687f3f0262cSandi  /**
688f3f0262cSandi   * Number of leading context "lines" to preserve.
689f3f0262cSandi   *
690f3f0262cSandi   * This should be left at zero for this class, but subclasses
691f3f0262cSandi   * may want to set this to other values.
692f3f0262cSandi   */
693f3f0262cSandi  var $leading_context_lines = 0;
694f3f0262cSandi
695f3f0262cSandi  /**
696f3f0262cSandi   * Number of trailing context "lines" to preserve.
697f3f0262cSandi   *
698f3f0262cSandi   * This should be left at zero for this class, but subclasses
699f3f0262cSandi   * may want to set this to other values.
700f3f0262cSandi   */
701f3f0262cSandi  var $trailing_context_lines = 0;
702f3f0262cSandi
703f3f0262cSandi  /**
704f3f0262cSandi   * Format a diff.
705f3f0262cSandi   *
706f3f0262cSandi   * @param $diff object A Diff object.
707f3f0262cSandi   * @return string The formatted output.
708f3f0262cSandi   */
709f3f0262cSandi  function format($diff) {
710f3f0262cSandi
711f3f0262cSandi    $xi = $yi = 1;
712f3f0262cSandi    $block = false;
713f3f0262cSandi    $context = array();
714f3f0262cSandi
715f3f0262cSandi    $nlead = $this->leading_context_lines;
716f3f0262cSandi    $ntrail = $this->trailing_context_lines;
717f3f0262cSandi
718f3f0262cSandi    $this->_start_diff();
719f3f0262cSandi
720f3f0262cSandi    foreach ($diff->edits as $edit) {
721f3f0262cSandi      if ($edit->type == 'copy') {
722f3f0262cSandi        if (is_array($block)) {
723f3f0262cSandi          if (sizeof($edit->orig) <= $nlead + $ntrail) {
724f3f0262cSandi            $block[] = $edit;
725f3f0262cSandi          }
726f3f0262cSandi          else{
727f3f0262cSandi            if ($ntrail) {
728f3f0262cSandi              $context = array_slice($edit->orig, 0, $ntrail);
729f3f0262cSandi              $block[] = new _DiffOp_Copy($context);
730f3f0262cSandi            }
731f3f0262cSandi            $this->_block($x0, $ntrail + $xi - $x0,
732f3f0262cSandi                    $y0, $ntrail + $yi - $y0,
733f3f0262cSandi                    $block);
734f3f0262cSandi            $block = false;
735f3f0262cSandi          }
736f3f0262cSandi        }
737f3f0262cSandi        $context = $edit->orig;
738f3f0262cSandi      }
739f3f0262cSandi      else {
740f3f0262cSandi        if (! is_array($block)) {
741f3f0262cSandi          $context = array_slice($context, sizeof($context) - $nlead);
742f3f0262cSandi          $x0 = $xi - sizeof($context);
743f3f0262cSandi          $y0 = $yi - sizeof($context);
744f3f0262cSandi          $block = array();
745f3f0262cSandi          if ($context)
746f3f0262cSandi            $block[] = new _DiffOp_Copy($context);
747f3f0262cSandi        }
748f3f0262cSandi        $block[] = $edit;
749f3f0262cSandi      }
750f3f0262cSandi
751f3f0262cSandi      if ($edit->orig)
752f3f0262cSandi        $xi += sizeof($edit->orig);
753f3f0262cSandi      if ($edit->closing)
754f3f0262cSandi        $yi += sizeof($edit->closing);
755f3f0262cSandi    }
756f3f0262cSandi
757f3f0262cSandi    if (is_array($block))
758f3f0262cSandi      $this->_block($x0, $xi - $x0,
759f3f0262cSandi              $y0, $yi - $y0,
760f3f0262cSandi              $block);
761f3f0262cSandi
762f3f0262cSandi    return $this->_end_diff();
763f3f0262cSandi  }
764f3f0262cSandi
765f3f0262cSandi  function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
766f3f0262cSandi    $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
767f3f0262cSandi    foreach ($edits as $edit) {
768f3f0262cSandi      if ($edit->type == 'copy')
769f3f0262cSandi        $this->_context($edit->orig);
770f3f0262cSandi      elseif ($edit->type == 'add')
771f3f0262cSandi        $this->_added($edit->closing);
772f3f0262cSandi      elseif ($edit->type == 'delete')
773f3f0262cSandi        $this->_deleted($edit->orig);
774f3f0262cSandi      elseif ($edit->type == 'change')
775f3f0262cSandi        $this->_changed($edit->orig, $edit->closing);
776f3f0262cSandi      else
777f3f0262cSandi        trigger_error("Unknown edit type", E_USER_ERROR);
778f3f0262cSandi    }
779f3f0262cSandi    $this->_end_block();
780f3f0262cSandi  }
781f3f0262cSandi
782f3f0262cSandi  function _start_diff() {
783f3f0262cSandi    ob_start();
784f3f0262cSandi  }
785f3f0262cSandi
786f3f0262cSandi  function _end_diff() {
787f3f0262cSandi    $val = ob_get_contents();
788f3f0262cSandi    ob_end_clean();
789f3f0262cSandi    return $val;
790f3f0262cSandi  }
791f3f0262cSandi
792f3f0262cSandi  function _block_header($xbeg, $xlen, $ybeg, $ylen) {
793f3f0262cSandi    if ($xlen > 1)
794f3f0262cSandi      $xbeg .= "," . ($xbeg + $xlen - 1);
795f3f0262cSandi    if ($ylen > 1)
796f3f0262cSandi      $ybeg .= "," . ($ybeg + $ylen - 1);
797f3f0262cSandi
798f3f0262cSandi    return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
799f3f0262cSandi  }
800f3f0262cSandi
801f3f0262cSandi  function _start_block($header) {
802f3f0262cSandi    echo $header;
803f3f0262cSandi  }
804f3f0262cSandi
805f3f0262cSandi  function _end_block() {
806f3f0262cSandi  }
807f3f0262cSandi
808f3f0262cSandi  function _lines($lines, $prefix = ' ') {
809f3f0262cSandi    foreach ($lines as $line)
810f3f0262cSandi      echo "$prefix $line\n";
811f3f0262cSandi  }
812f3f0262cSandi
813f3f0262cSandi  function _context($lines) {
814f3f0262cSandi    $this->_lines($lines);
815f3f0262cSandi  }
816f3f0262cSandi
817f3f0262cSandi  function _added($lines) {
818f3f0262cSandi    $this->_lines($lines, ">");
819f3f0262cSandi  }
820f3f0262cSandi  function _deleted($lines) {
821f3f0262cSandi    $this->_lines($lines, "<");
822f3f0262cSandi  }
823f3f0262cSandi
824f3f0262cSandi  function _changed($orig, $closing) {
825f3f0262cSandi    $this->_deleted($orig);
826f3f0262cSandi    echo "---\n";
827f3f0262cSandi    $this->_added($closing);
828f3f0262cSandi  }
829f3f0262cSandi}
830f3f0262cSandi
831f3f0262cSandi
832f3f0262cSandi/**
833f3f0262cSandi *  Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
834f3f0262cSandi *
835f3f0262cSandi */
836f3f0262cSandi
8379b3a5b24Schrisdefine('NBSP', "\xC2\xA0");     // utf-8 non-breaking space.
838f3f0262cSandi
839f3f0262cSandiclass _HWLDF_WordAccumulator {
840f3f0262cSandi  function _HWLDF_WordAccumulator () {
841f3f0262cSandi    $this->_lines = array();
842f3f0262cSandi    $this->_line = '';
843f3f0262cSandi    $this->_group = '';
844f3f0262cSandi    $this->_tag = '';
845f3f0262cSandi  }
846f3f0262cSandi
847f3f0262cSandi  function _flushGroup ($new_tag) {
848f3f0262cSandi    if ($this->_group !== '') {
849f3f0262cSandi    if ($this->_tag == 'mark')
850*ed7ecb79SAnika Henke      $this->_line .= '<strong>'.$this->_group.'</strong>';
851f3f0262cSandi    else
852f3f0262cSandi    $this->_line .= $this->_group;
853f3f0262cSandi  }
854f3f0262cSandi    $this->_group = '';
855f3f0262cSandi    $this->_tag = $new_tag;
856f3f0262cSandi  }
857f3f0262cSandi
858f3f0262cSandi  function _flushLine ($new_tag) {
859f3f0262cSandi    $this->_flushGroup($new_tag);
860f3f0262cSandi    if ($this->_line != '')
861f3f0262cSandi      $this->_lines[] = $this->_line;
862f3f0262cSandi    $this->_line = '';
863f3f0262cSandi  }
864f3f0262cSandi
865f3f0262cSandi  function addWords ($words, $tag = '') {
866f3f0262cSandi    if ($tag != $this->_tag)
867f3f0262cSandi      $this->_flushGroup($tag);
868f3f0262cSandi
869f3f0262cSandi    foreach ($words as $word) {
870f3f0262cSandi      // new-line should only come as first char of word.
871f3f0262cSandi      if ($word == '')
872f3f0262cSandi        continue;
873f3f0262cSandi      if ($word[0] == "\n") {
874f3f0262cSandi        $this->_group .= NBSP;
875f3f0262cSandi        $this->_flushLine($tag);
876f3f0262cSandi        $word = substr($word, 1);
877f3f0262cSandi      }
878f3f0262cSandi      assert(!strstr($word, "\n"));
879f3f0262cSandi      $this->_group .= $word;
880f3f0262cSandi    }
881f3f0262cSandi  }
882f3f0262cSandi
883f3f0262cSandi  function getLines() {
884f3f0262cSandi    $this->_flushLine('~done');
885f3f0262cSandi    return $this->_lines;
886f3f0262cSandi  }
887f3f0262cSandi}
888f3f0262cSandi
889f3f0262cSandiclass WordLevelDiff extends MappedDiff
890f3f0262cSandi{
891f3f0262cSandi  function WordLevelDiff ($orig_lines, $closing_lines) {
892f3f0262cSandi    list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
893f3f0262cSandi    list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
894f3f0262cSandi
895f3f0262cSandi
896f3f0262cSandi    $this->MappedDiff($orig_words, $closing_words,
897f3f0262cSandi              $orig_stripped, $closing_stripped);
898f3f0262cSandi  }
899f3f0262cSandi
900f3f0262cSandi  function _split($lines) {
901f3f0262cSandi    // FIXME: fix POSIX char class.
902f3f0262cSandi#    if (!preg_match_all('/ ( [^\S\n]+ | [[:alnum:]]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
903f3f0262cSandi    if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
904f3f0262cSandi              implode("\n", $lines),
905f3f0262cSandi              $m)) {
906f3f0262cSandi      return array(array(''), array(''));
907f3f0262cSandi    }
908f3f0262cSandi    return array($m[0], $m[1]);
909f3f0262cSandi  }
910f3f0262cSandi
911f3f0262cSandi  function orig () {
912f3f0262cSandi    $orig = new _HWLDF_WordAccumulator;
913f3f0262cSandi
914f3f0262cSandi    foreach ($this->edits as $edit) {
915f3f0262cSandi      if ($edit->type == 'copy')
916f3f0262cSandi        $orig->addWords($edit->orig);
917f3f0262cSandi      elseif ($edit->orig)
918f3f0262cSandi        $orig->addWords($edit->orig, 'mark');
919f3f0262cSandi    }
920f3f0262cSandi    return $orig->getLines();
921f3f0262cSandi  }
922f3f0262cSandi
923f3f0262cSandi  function closing () {
924f3f0262cSandi    $closing = new _HWLDF_WordAccumulator;
925f3f0262cSandi
926f3f0262cSandi    foreach ($this->edits as $edit) {
927f3f0262cSandi      if ($edit->type == 'copy')
928f3f0262cSandi        $closing->addWords($edit->closing);
929f3f0262cSandi      elseif ($edit->closing)
930f3f0262cSandi        $closing->addWords($edit->closing, 'mark');
931f3f0262cSandi    }
932f3f0262cSandi    return $closing->getLines();
933f3f0262cSandi  }
934f3f0262cSandi}
935f3f0262cSandi
936f3f0262cSandi/**
937f3f0262cSandi * "Unified" diff formatter.
938f3f0262cSandi *
939f3f0262cSandi * This class formats the diff in classic "unified diff" format.
940f3f0262cSandi */
941f3f0262cSandiclass UnifiedDiffFormatter extends DiffFormatter
942f3f0262cSandi{
943f3f0262cSandi    function UnifiedDiffFormatter($context_lines = 4) {
944f3f0262cSandi        $this->leading_context_lines = $context_lines;
945f3f0262cSandi        $this->trailing_context_lines = $context_lines;
946f3f0262cSandi    }
947f3f0262cSandi
948f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
949f3f0262cSandi        if ($xlen != 1)
950f3f0262cSandi            $xbeg .= "," . $xlen;
951f3f0262cSandi        if ($ylen != 1)
952f3f0262cSandi            $ybeg .= "," . $ylen;
953f3f0262cSandi        return "@@ -$xbeg +$ybeg @@\n";
954f3f0262cSandi    }
955f3f0262cSandi
956f3f0262cSandi    function _added($lines) {
957f3f0262cSandi        $this->_lines($lines, "+");
958f3f0262cSandi    }
959f3f0262cSandi    function _deleted($lines) {
960f3f0262cSandi        $this->_lines($lines, "-");
961f3f0262cSandi    }
962f3f0262cSandi    function _changed($orig, $final) {
963f3f0262cSandi        $this->_deleted($orig);
964f3f0262cSandi        $this->_added($final);
965f3f0262cSandi    }
966f3f0262cSandi}
967f3f0262cSandi
968f3f0262cSandi/**
969f3f0262cSandi *  Wikipedia Table style diff formatter.
970f3f0262cSandi *
971f3f0262cSandi */
972f3f0262cSandiclass TableDiffFormatter extends DiffFormatter
973f3f0262cSandi{
974f3f0262cSandi  function TableDiffFormatter() {
975f3f0262cSandi    $this->leading_context_lines = 2;
976f3f0262cSandi    $this->trailing_context_lines = 2;
977f3f0262cSandi  }
978f3f0262cSandi
979f3f0262cSandi  function _pre($text){
980f3f0262cSandi    $text = htmlspecialchars($text);
981f3f0262cSandi    $text = str_replace('  ',' &nbsp;',$text);
982f3f0262cSandi    return $text;
983f3f0262cSandi  }
984f3f0262cSandi
985f3f0262cSandi  function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
986f3f0262cSandi    global $lang;
987f3f0262cSandi    $l1 = $lang['line'].' '.$xbeg;
988f3f0262cSandi    $l2 = $lang['line'].' '.$ybeg;
989f3f0262cSandi    $r = '<tr><td class="diff-blockheader" colspan="2">'.$l1.":</td>\n" .
990f3f0262cSandi      '<td class="diff-blockheader" colspan="2">'.$l2.":</td></tr>\n";
991f3f0262cSandi    return $r;
992f3f0262cSandi  }
993f3f0262cSandi
994f3f0262cSandi  function _start_block( $header ) {
995f3f0262cSandi    print( $header );
996f3f0262cSandi  }
997f3f0262cSandi
998f3f0262cSandi  function _end_block() {
999f3f0262cSandi  }
1000f3f0262cSandi
1001f3f0262cSandi  function _lines( $lines, $prefix=' ', $color="white" ) {
1002f3f0262cSandi  }
1003f3f0262cSandi
1004f3f0262cSandi  function addedLine( $line ) {
1005f3f0262cSandi    $line = str_replace('  ','&nbsp; ',$line);
1006f3f0262cSandi    return '<td>+</td><td class="diff-addedline">' .
1007f3f0262cSandi      $line.'</td>';
1008f3f0262cSandi  }
1009f3f0262cSandi
1010f3f0262cSandi  function deletedLine( $line ) {
1011f3f0262cSandi    $line = str_replace('  ','&nbsp; ',$line);
1012f3f0262cSandi    return '<td>-</td><td class="diff-deletedline">' .
1013f3f0262cSandi      $line.'</td>';
1014f3f0262cSandi  }
1015f3f0262cSandi
1016f3f0262cSandi  function emptyLine() {
1017039bebbaSBen Coburn    //$line = str_replace('  ','&nbsp; ',$line);
1018f3f0262cSandi    return '<td colspan="2">&nbsp;</td>';
1019f3f0262cSandi  }
1020f3f0262cSandi
1021f3f0262cSandi  function contextLine( $line ) {
1022f3f0262cSandi    $line = str_replace('  ','&nbsp; ',$line);
1023f3f0262cSandi    return '<td> </td><td class="diff-context">'.$line.'</td>';
1024f3f0262cSandi  }
1025f3f0262cSandi
1026f3f0262cSandi  function _added($lines) {
1027f3f0262cSandi    foreach ($lines as $line) {
1028f3f0262cSandi      print( '<tr>' . $this->emptyLine() .
1029f3f0262cSandi        $this->addedLine( $line ) . "</tr>\n" );
1030f3f0262cSandi    }
1031f3f0262cSandi  }
1032f3f0262cSandi
1033f3f0262cSandi  function _deleted($lines) {
1034f3f0262cSandi    foreach ($lines as $line) {
1035f3f0262cSandi      print( '<tr>' . $this->deletedLine( $line ) .
1036f3f0262cSandi        $this->emptyLine() . "</tr>\n" );
1037f3f0262cSandi    }
1038f3f0262cSandi  }
1039f3f0262cSandi
1040f3f0262cSandi  function _context( $lines ) {
1041f3f0262cSandi    foreach ($lines as $line) {
1042f3f0262cSandi      print( '<tr>' . $this->contextLine( $line ) .
1043f3f0262cSandi        $this->contextLine( $line ) . "</tr>\n" );
1044f3f0262cSandi    }
1045f3f0262cSandi  }
1046f3f0262cSandi
1047f3f0262cSandi  function _changed( $orig, $closing ) {
1048f3f0262cSandi    $diff = new WordLevelDiff( $orig, $closing );
1049f3f0262cSandi    $del = $diff->orig();
1050f3f0262cSandi    $add = $diff->closing();
1051f3f0262cSandi
1052f3f0262cSandi    while ( $line = array_shift( $del ) ) {
1053f3f0262cSandi      $aline = array_shift( $add );
1054f3f0262cSandi      print( '<tr>' . $this->deletedLine( $line ) .
1055f3f0262cSandi        $this->addedLine( $aline ) . "</tr>\n" );
1056f3f0262cSandi    }
1057f3f0262cSandi    $this->_added( $add ); # If any leftovers
1058f3f0262cSandi  }
1059f3f0262cSandi}
1060f3f0262cSandi
1061340756e4Sandi
1062340756e4Sandi//Setup VIM: ex: et ts=2 enc=utf-8 :
1063