xref: /dokuwiki/inc/DifferenceEngine.php (revision 1c64eaeaa3295062b0a4e9b20b772274cd86ca42)
1<?php
2/**
3 * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
4 *
5 * Additions by Axel Boldt for MediaWiki
6 *
7 * @copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
8 * @license  You may copy this code freely under the conditions of the GPL.
9 */
10define('USE_ASSERTS', function_exists('assert'));
11
12class _DiffOp {
13  var $type;
14  var $orig;
15  var $closing;
16
17  function reverse() {
18    trigger_error("pure virtual", E_USER_ERROR);
19  }
20
21  function norig() {
22    return $this->orig ? sizeof($this->orig) : 0;
23  }
24
25  function nclosing() {
26    return $this->closing ? sizeof($this->closing) : 0;
27  }
28}
29
30class _DiffOp_Copy extends _DiffOp {
31  var $type = 'copy';
32
33  function _DiffOp_Copy ($orig, $closing = false) {
34    if (!is_array($closing))
35      $closing = $orig;
36    $this->orig = $orig;
37    $this->closing = $closing;
38  }
39
40  function reverse() {
41    return new _DiffOp_Copy($this->closing, $this->orig);
42  }
43}
44
45class _DiffOp_Delete extends _DiffOp {
46  var $type = 'delete';
47
48  function _DiffOp_Delete ($lines) {
49    $this->orig = $lines;
50    $this->closing = false;
51  }
52
53  function reverse() {
54    return new _DiffOp_Add($this->orig);
55  }
56}
57
58class _DiffOp_Add extends _DiffOp {
59  var $type = 'add';
60
61  function _DiffOp_Add ($lines) {
62    $this->closing = $lines;
63    $this->orig = false;
64  }
65
66  function reverse() {
67    return new _DiffOp_Delete($this->closing);
68  }
69}
70
71class _DiffOp_Change extends _DiffOp {
72  var $type = 'change';
73
74  function _DiffOp_Change ($orig, $closing) {
75    $this->orig = $orig;
76    $this->closing = $closing;
77  }
78
79  function reverse() {
80    return new _DiffOp_Change($this->closing, $this->orig);
81  }
82}
83
84
85/**
86 * Class used internally by Diff to actually compute the diffs.
87 *
88 * The algorithm used here is mostly lifted from the perl module
89 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
90 *   http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
91 *
92 * More ideas are taken from:
93 *   http://www.ics.uci.edu/~eppstein/161/960229.html
94 *
95 * Some ideas are (and a bit of code) are from from analyze.c, from GNU
96 * diffutils-2.7, which can be found at:
97 *   ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
98 *
99 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
100 * are my own.
101 *
102 * @author Geoffrey T. Dairiki
103 * @access private
104 */
105class _DiffEngine
106{
107  function diff ($from_lines, $to_lines) {
108    $n_from = sizeof($from_lines);
109    $n_to = sizeof($to_lines);
110
111    $this->xchanged = $this->ychanged = array();
112    $this->xv = $this->yv = array();
113    $this->xind = $this->yind = array();
114    unset($this->seq);
115    unset($this->in_seq);
116    unset($this->lcs);
117
118    // Skip leading common lines.
119    for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
120      if ($from_lines[$skip] != $to_lines[$skip])
121        break;
122      $this->xchanged[$skip] = $this->ychanged[$skip] = false;
123    }
124    // Skip trailing common lines.
125    $xi = $n_from; $yi = $n_to;
126    for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
127      if ($from_lines[$xi] != $to_lines[$yi])
128        break;
129      $this->xchanged[$xi] = $this->ychanged[$yi] = false;
130    }
131
132    // Ignore lines which do not exist in both files.
133    for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
134      $xhash[$from_lines[$xi]] = 1;
135    for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
136      $line = $to_lines[$yi];
137      if ( ($this->ychanged[$yi] = empty($xhash[$line])) )
138        continue;
139      $yhash[$line] = 1;
140      $this->yv[] = $line;
141      $this->yind[] = $yi;
142    }
143    for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
144      $line = $from_lines[$xi];
145      if ( ($this->xchanged[$xi] = empty($yhash[$line])) )
146        continue;
147      $this->xv[] = $line;
148      $this->xind[] = $xi;
149    }
150
151    // Find the LCS.
152    $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
153
154    // Merge edits when possible
155    $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
156    $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
157
158    // Compute the edit operations.
159    $edits = array();
160    $xi = $yi = 0;
161    while ($xi < $n_from || $yi < $n_to) {
162      USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
163      USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
164
165      // Skip matching "snake".
166      $copy = array();
167      while ( $xi < $n_from && $yi < $n_to
168          && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
169        $copy[] = $from_lines[$xi++];
170        ++$yi;
171      }
172      if ($copy)
173        $edits[] = new _DiffOp_Copy($copy);
174
175      // Find deletes & adds.
176      $delete = array();
177      while ($xi < $n_from && $this->xchanged[$xi])
178        $delete[] = $from_lines[$xi++];
179
180      $add = array();
181      while ($yi < $n_to && $this->ychanged[$yi])
182        $add[] = $to_lines[$yi++];
183
184      if ($delete && $add)
185        $edits[] = new _DiffOp_Change($delete, $add);
186      elseif ($delete)
187        $edits[] = new _DiffOp_Delete($delete);
188      elseif ($add)
189        $edits[] = new _DiffOp_Add($add);
190    }
191    return $edits;
192  }
193
194
195  /**
196   * Divide the Largest Common Subsequence (LCS) of the sequences
197   * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
198   * sized segments.
199   *
200   * Returns (LCS, PTS).  LCS is the length of the LCS. PTS is an
201   * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
202   * sub sequences.  The first sub-sequence is contained in [X0, X1),
203   * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
204   * that (X0, Y0) == (XOFF, YOFF) and
205   * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
206   *
207   * This function assumes that the first lines of the specified portions
208   * of the two files do not match, and likewise that the last lines do not
209   * match.  The caller must trim matching lines from the beginning and end
210   * of the portions it is going to specify.
211   */
212  function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) {
213  $flip = false;
214
215  if ($xlim - $xoff > $ylim - $yoff) {
216    // Things seems faster (I'm not sure I understand why)
217      // when the shortest sequence in X.
218      $flip = true;
219    list ($xoff, $xlim, $yoff, $ylim)
220    = array( $yoff, $ylim, $xoff, $xlim);
221    }
222
223  if ($flip)
224    for ($i = $ylim - 1; $i >= $yoff; $i--)
225    $ymatches[$this->xv[$i]][] = $i;
226  else
227    for ($i = $ylim - 1; $i >= $yoff; $i--)
228    $ymatches[$this->yv[$i]][] = $i;
229
230  $this->lcs = 0;
231  $this->seq[0]= $yoff - 1;
232  $this->in_seq = array();
233  $ymids[0] = array();
234
235  $numer = $xlim - $xoff + $nchunks - 1;
236  $x = $xoff;
237  for ($chunk = 0; $chunk < $nchunks; $chunk++) {
238    if ($chunk > 0)
239    for ($i = 0; $i <= $this->lcs; $i++)
240      $ymids[$i][$chunk-1] = $this->seq[$i];
241
242    $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
243    for ( ; $x < $x1; $x++) {
244        $line = $flip ? $this->yv[$x] : $this->xv[$x];
245        if (empty($ymatches[$line]))
246      continue;
247    $matches = $ymatches[$line];
248        reset($matches);
249    while (list ($junk, $y) = each($matches))
250      if (empty($this->in_seq[$y])) {
251      $k = $this->_lcs_pos($y);
252      USE_ASSERTS && assert($k > 0);
253      $ymids[$k] = $ymids[$k-1];
254      break;
255          }
256    while (list ($junk, $y) = each($matches)) {
257      if ($y > $this->seq[$k-1]) {
258      USE_ASSERTS && assert($y < $this->seq[$k]);
259      // Optimization: this is a common case:
260      //  next match is just replacing previous match.
261      $this->in_seq[$this->seq[$k]] = false;
262      $this->seq[$k] = $y;
263      $this->in_seq[$y] = 1;
264          }
265      else if (empty($this->in_seq[$y])) {
266      $k = $this->_lcs_pos($y);
267      USE_ASSERTS && assert($k > 0);
268      $ymids[$k] = $ymids[$k-1];
269          }
270        }
271      }
272    }
273
274  $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
275  $ymid = $ymids[$this->lcs];
276  for ($n = 0; $n < $nchunks - 1; $n++) {
277    $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
278    $y1 = $ymid[$n] + 1;
279    $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
280    }
281  $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
282
283  return array($this->lcs, $seps);
284  }
285
286  function _lcs_pos ($ypos) {
287  $end = $this->lcs;
288  if ($end == 0 || $ypos > $this->seq[$end]) {
289    $this->seq[++$this->lcs] = $ypos;
290    $this->in_seq[$ypos] = 1;
291    return $this->lcs;
292    }
293
294  $beg = 1;
295  while ($beg < $end) {
296    $mid = (int)(($beg + $end) / 2);
297    if ( $ypos > $this->seq[$mid] )
298    $beg = $mid + 1;
299    else
300    $end = $mid;
301    }
302
303  USE_ASSERTS && assert($ypos != $this->seq[$end]);
304
305  $this->in_seq[$this->seq[$end]] = false;
306  $this->seq[$end] = $ypos;
307  $this->in_seq[$ypos] = 1;
308  return $end;
309  }
310
311  /**
312   * Find LCS of two sequences.
313   *
314   * The results are recorded in the vectors $this->{x,y}changed[], by
315   * storing a 1 in the element for each line that is an insertion
316   * or deletion (ie. is not in the LCS).
317   *
318   * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
319   *
320   * Note that XLIM, YLIM are exclusive bounds.
321   * All line numbers are origin-0 and discarded lines are not counted.
322   */
323  function _compareseq ($xoff, $xlim, $yoff, $ylim) {
324  // Slide down the bottom initial diagonal.
325  while ($xoff < $xlim && $yoff < $ylim
326         && $this->xv[$xoff] == $this->yv[$yoff]) {
327    ++$xoff;
328    ++$yoff;
329    }
330
331  // Slide up the top initial diagonal.
332  while ($xlim > $xoff && $ylim > $yoff
333         && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
334    --$xlim;
335    --$ylim;
336    }
337
338  if ($xoff == $xlim || $yoff == $ylim)
339    $lcs = 0;
340  else {
341    // This is ad hoc but seems to work well.
342    //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
343    //$nchunks = max(2,min(8,(int)$nchunks));
344    $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
345    list ($lcs, $seps)
346    = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
347    }
348
349  if ($lcs == 0) {
350    // X and Y sequences have no common subsequence:
351    // mark all changed.
352    while ($yoff < $ylim)
353    $this->ychanged[$this->yind[$yoff++]] = 1;
354    while ($xoff < $xlim)
355    $this->xchanged[$this->xind[$xoff++]] = 1;
356    }
357  else {
358    // Use the partitions to split this problem into subproblems.
359    reset($seps);
360    $pt1 = $seps[0];
361    while ($pt2 = next($seps)) {
362    $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
363    $pt1 = $pt2;
364      }
365    }
366  }
367
368  /**
369   * Adjust inserts/deletes of identical lines to join changes
370   * as much as possible.
371   *
372   * We do something when a run of changed lines include a
373   * line at one end and has an excluded, identical line at the other.
374   * We are free to choose which identical line is included.
375   * `compareseq' usually chooses the one at the beginning,
376   * but usually it is cleaner to consider the following identical line
377   * to be the "change".
378   *
379   * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
380   */
381  function _shift_boundaries ($lines, &$changed, $other_changed) {
382  $i = 0;
383  $j = 0;
384
385  USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
386  $len = sizeof($lines);
387  $other_len = sizeof($other_changed);
388
389  while (1) {
390    /*
391     * Scan forwards to find beginning of another run of changes.
392     * Also keep track of the corresponding point in the other file.
393     *
394     * Throughout this code, $i and $j are adjusted together so that
395     * the first $i elements of $changed and the first $j elements
396     * of $other_changed both contain the same number of zeros
397     * (unchanged lines).
398     * Furthermore, $j is always kept so that $j == $other_len or
399     * $other_changed[$j] == false.
400     */
401    while ($j < $other_len && $other_changed[$j])
402    $j++;
403
404    while ($i < $len && ! $changed[$i]) {
405    USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
406    $i++; $j++;
407    while ($j < $other_len && $other_changed[$j])
408      $j++;
409      }
410
411    if ($i == $len)
412    break;
413
414    $start = $i;
415
416    // Find the end of this run of changes.
417    while (++$i < $len && $changed[$i])
418    continue;
419
420    do {
421    /*
422     * Record the length of this run of changes, so that
423     * we can later determine whether the run has grown.
424     */
425    $runlength = $i - $start;
426
427    /*
428     * Move the changed region back, so long as the
429     * previous unchanged line matches the last changed one.
430     * This merges with previous changed regions.
431     */
432    while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
433      $changed[--$start] = 1;
434      $changed[--$i] = false;
435      while ($start > 0 && $changed[$start - 1])
436      $start--;
437      USE_ASSERTS && assert('$j > 0');
438      while ($other_changed[--$j])
439      continue;
440      USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
441        }
442
443    /*
444     * Set CORRESPONDING to the end of the changed run, at the last
445     * point where it corresponds to a changed run in the other file.
446     * CORRESPONDING == LEN means no such point has been found.
447     */
448    $corresponding = $j < $other_len ? $i : $len;
449
450    /*
451     * Move the changed region forward, so long as the
452     * first changed line matches the following unchanged one.
453     * This merges with following changed regions.
454     * Do this second, so that if there are no merges,
455     * the changed region is moved forward as far as possible.
456     */
457    while ($i < $len && $lines[$start] == $lines[$i]) {
458      $changed[$start++] = false;
459      $changed[$i++] = 1;
460      while ($i < $len && $changed[$i])
461      $i++;
462
463      USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
464      $j++;
465      if ($j < $other_len && $other_changed[$j]) {
466      $corresponding = $i;
467      while ($j < $other_len && $other_changed[$j])
468        $j++;
469          }
470        }
471      } while ($runlength != $i - $start);
472
473    /*
474     * If possible, move the fully-merged run of changes
475     * back to a corresponding run in the other file.
476     */
477    while ($corresponding < $i) {
478    $changed[--$start] = 1;
479    $changed[--$i] = 0;
480    USE_ASSERTS && assert('$j > 0');
481    while ($other_changed[--$j])
482      continue;
483    USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
484      }
485    }
486  }
487}
488
489/**
490 * Class representing a 'diff' between two sequences of strings.
491 */
492class Diff
493{
494  var $edits;
495
496  /**
497   * Constructor.
498   * Computes diff between sequences of strings.
499   *
500   * @param $from_lines array An array of strings.
501   *      (Typically these are lines from a file.)
502   * @param $to_lines array An array of strings.
503   */
504  function Diff($from_lines, $to_lines) {
505    $eng = new _DiffEngine;
506    $this->edits = $eng->diff($from_lines, $to_lines);
507    //$this->_check($from_lines, $to_lines);
508  }
509
510  /**
511   * Compute reversed Diff.
512   *
513   * SYNOPSIS:
514   *
515   *  $diff = new Diff($lines1, $lines2);
516   *  $rev = $diff->reverse();
517   * @return object A Diff object representing the inverse of the
518   *          original diff.
519   */
520  function reverse () {
521  $rev = $this;
522    $rev->edits = array();
523    foreach ($this->edits as $edit) {
524      $rev->edits[] = $edit->reverse();
525    }
526  return $rev;
527  }
528
529  /**
530   * Check for empty diff.
531   *
532   * @return bool True iff two sequences were identical.
533   */
534  function isEmpty () {
535    foreach ($this->edits as $edit) {
536      if ($edit->type != 'copy')
537        return false;
538    }
539    return true;
540  }
541
542  /**
543   * Compute the length of the Longest Common Subsequence (LCS).
544   *
545   * This is mostly for diagnostic purposed.
546   *
547   * @return int The length of the LCS.
548   */
549  function lcs () {
550  $lcs = 0;
551    foreach ($this->edits as $edit) {
552      if ($edit->type == 'copy')
553        $lcs += sizeof($edit->orig);
554    }
555  return $lcs;
556  }
557
558  /**
559   * Get the original set of lines.
560   *
561   * This reconstructs the $from_lines parameter passed to the
562   * constructor.
563   *
564   * @return array The original sequence of strings.
565   */
566  function orig() {
567    $lines = array();
568
569    foreach ($this->edits as $edit) {
570      if ($edit->orig)
571        array_splice($lines, sizeof($lines), 0, $edit->orig);
572    }
573    return $lines;
574  }
575
576  /**
577   * Get the closing set of lines.
578   *
579   * This reconstructs the $to_lines parameter passed to the
580   * constructor.
581   *
582   * @return array The sequence of strings.
583   */
584  function closing() {
585    $lines = array();
586
587    foreach ($this->edits as $edit) {
588      if ($edit->closing)
589        array_splice($lines, sizeof($lines), 0, $edit->closing);
590    }
591    return $lines;
592  }
593
594  /**
595   * Check a Diff for validity.
596   *
597   * This is here only for debugging purposes.
598   */
599  function _check ($from_lines, $to_lines) {
600    if (serialize($from_lines) != serialize($this->orig()))
601      trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
602    if (serialize($to_lines) != serialize($this->closing()))
603      trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
604
605    $rev = $this->reverse();
606    if (serialize($to_lines) != serialize($rev->orig()))
607      trigger_error("Reversed original doesn't match", E_USER_ERROR);
608    if (serialize($from_lines) != serialize($rev->closing()))
609      trigger_error("Reversed closing doesn't match", E_USER_ERROR);
610
611
612    $prevtype = 'none';
613    foreach ($this->edits as $edit) {
614      if ( $prevtype == $edit->type )
615        trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
616      $prevtype = $edit->type;
617    }
618
619    $lcs = $this->lcs();
620    trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE);
621  }
622}
623
624/**
625 * FIXME: bad name.
626 */
627class MappedDiff
628extends Diff
629{
630  /**
631   * Constructor.
632   *
633   * Computes diff between sequences of strings.
634   *
635   * This can be used to compute things like
636   * case-insensitve diffs, or diffs which ignore
637   * changes in white-space.
638   *
639   * @param $from_lines array An array of strings.
640   *  (Typically these are lines from a file.)
641   *
642   * @param $to_lines array An array of strings.
643   *
644   * @param $mapped_from_lines array This array should
645   *  have the same size number of elements as $from_lines.
646   *  The elements in $mapped_from_lines and
647   *  $mapped_to_lines are what is actually compared
648   *  when computing the diff.
649   *
650   * @param $mapped_to_lines array This array should
651   *  have the same number of elements as $to_lines.
652   */
653  function MappedDiff($from_lines, $to_lines,
654            $mapped_from_lines, $mapped_to_lines) {
655
656    assert(sizeof($from_lines) == sizeof($mapped_from_lines));
657    assert(sizeof($to_lines) == sizeof($mapped_to_lines));
658
659    $this->Diff($mapped_from_lines, $mapped_to_lines);
660
661    $xi = $yi = 0;
662    for ($i = 0; $i < sizeof($this->edits); $i++) {
663      $orig = &$this->edits[$i]->orig;
664      if (is_array($orig)) {
665        $orig = array_slice($from_lines, $xi, sizeof($orig));
666        $xi += sizeof($orig);
667      }
668
669      $closing = &$this->edits[$i]->closing;
670      if (is_array($closing)) {
671        $closing = array_slice($to_lines, $yi, sizeof($closing));
672        $yi += sizeof($closing);
673      }
674    }
675  }
676}
677
678/**
679 * A class to format Diffs
680 *
681 * This class formats the diff in classic diff format.
682 * It is intended that this class be customized via inheritance,
683 * to obtain fancier outputs.
684 */
685class DiffFormatter
686{
687  /**
688   * Number of leading context "lines" to preserve.
689   *
690   * This should be left at zero for this class, but subclasses
691   * may want to set this to other values.
692   */
693  var $leading_context_lines = 0;
694
695  /**
696   * Number of trailing context "lines" to preserve.
697   *
698   * This should be left at zero for this class, but subclasses
699   * may want to set this to other values.
700   */
701  var $trailing_context_lines = 0;
702
703  /**
704   * Format a diff.
705   *
706   * @param $diff object A Diff object.
707   * @return string The formatted output.
708   */
709  function format($diff) {
710
711    $xi = $yi = 1;
712    $block = false;
713    $context = array();
714
715    $nlead = $this->leading_context_lines;
716    $ntrail = $this->trailing_context_lines;
717
718    $this->_start_diff();
719
720    foreach ($diff->edits as $edit) {
721      if ($edit->type == 'copy') {
722        if (is_array($block)) {
723          if (sizeof($edit->orig) <= $nlead + $ntrail) {
724            $block[] = $edit;
725          }
726          else{
727            if ($ntrail) {
728              $context = array_slice($edit->orig, 0, $ntrail);
729              $block[] = new _DiffOp_Copy($context);
730            }
731            $this->_block($x0, $ntrail + $xi - $x0,
732                    $y0, $ntrail + $yi - $y0,
733                    $block);
734            $block = false;
735          }
736        }
737        $context = $edit->orig;
738      }
739      else {
740        if (! is_array($block)) {
741          $context = array_slice($context, sizeof($context) - $nlead);
742          $x0 = $xi - sizeof($context);
743          $y0 = $yi - sizeof($context);
744          $block = array();
745          if ($context)
746            $block[] = new _DiffOp_Copy($context);
747        }
748        $block[] = $edit;
749      }
750
751      if ($edit->orig)
752        $xi += sizeof($edit->orig);
753      if ($edit->closing)
754        $yi += sizeof($edit->closing);
755    }
756
757    if (is_array($block))
758      $this->_block($x0, $xi - $x0,
759              $y0, $yi - $y0,
760              $block);
761
762    return $this->_end_diff();
763  }
764
765  function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
766    $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
767    foreach ($edits as $edit) {
768      if ($edit->type == 'copy')
769        $this->_context($edit->orig);
770      elseif ($edit->type == 'add')
771        $this->_added($edit->closing);
772      elseif ($edit->type == 'delete')
773        $this->_deleted($edit->orig);
774      elseif ($edit->type == 'change')
775        $this->_changed($edit->orig, $edit->closing);
776      else
777        trigger_error("Unknown edit type", E_USER_ERROR);
778    }
779    $this->_end_block();
780  }
781
782  function _start_diff() {
783    ob_start();
784  }
785
786  function _end_diff() {
787    $val = ob_get_contents();
788    ob_end_clean();
789    return $val;
790  }
791
792  function _block_header($xbeg, $xlen, $ybeg, $ylen) {
793    if ($xlen > 1)
794      $xbeg .= "," . ($xbeg + $xlen - 1);
795    if ($ylen > 1)
796      $ybeg .= "," . ($ybeg + $ylen - 1);
797
798    return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
799  }
800
801  function _start_block($header) {
802    echo $header;
803  }
804
805  function _end_block() {
806  }
807
808  function _lines($lines, $prefix = ' ') {
809    foreach ($lines as $line)
810      echo "$prefix $line\n";
811  }
812
813  function _context($lines) {
814    $this->_lines($lines);
815  }
816
817  function _added($lines) {
818    $this->_lines($lines, ">");
819  }
820  function _deleted($lines) {
821    $this->_lines($lines, "<");
822  }
823
824  function _changed($orig, $closing) {
825    $this->_deleted($orig);
826    echo "---\n";
827    $this->_added($closing);
828  }
829}
830
831
832/**
833 *  Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
834 *
835 */
836
837define('NBSP', "\xC2\xA0");     // utf-8 non-breaking space.
838
839class _HWLDF_WordAccumulator {
840  function _HWLDF_WordAccumulator () {
841    $this->_lines = array();
842    $this->_line = '';
843    $this->_group = '';
844    $this->_tag = '';
845  }
846
847  function _flushGroup ($new_tag) {
848    if ($this->_group !== '') {
849    if ($this->_tag == 'mark')
850      $this->_line .= '<strong>'.$this->_group.'</strong>';
851    else
852    $this->_line .= $this->_group;
853  }
854    $this->_group = '';
855    $this->_tag = $new_tag;
856  }
857
858  function _flushLine ($new_tag) {
859    $this->_flushGroup($new_tag);
860    if ($this->_line != '')
861      $this->_lines[] = $this->_line;
862    $this->_line = '';
863  }
864
865  function addWords ($words, $tag = '') {
866    if ($tag != $this->_tag)
867      $this->_flushGroup($tag);
868
869    foreach ($words as $word) {
870      // new-line should only come as first char of word.
871      if ($word == '')
872        continue;
873      if ($word[0] == "\n") {
874        $this->_group .= NBSP;
875        $this->_flushLine($tag);
876        $word = substr($word, 1);
877      }
878      assert(!strstr($word, "\n"));
879      $this->_group .= $word;
880    }
881  }
882
883  function getLines() {
884    $this->_flushLine('~done');
885    return $this->_lines;
886  }
887}
888
889class WordLevelDiff extends MappedDiff
890{
891  function WordLevelDiff ($orig_lines, $closing_lines) {
892    list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
893    list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
894
895
896    $this->MappedDiff($orig_words, $closing_words,
897              $orig_stripped, $closing_stripped);
898  }
899
900  function _split($lines) {
901    // FIXME: fix POSIX char class.
902#    if (!preg_match_all('/ ( [^\S\n]+ | [[:alnum:]]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
903    if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
904              implode("\n", $lines),
905              $m)) {
906      return array(array(''), array(''));
907    }
908    return array($m[0], $m[1]);
909  }
910
911  function orig () {
912    $orig = new _HWLDF_WordAccumulator;
913
914    foreach ($this->edits as $edit) {
915      if ($edit->type == 'copy')
916        $orig->addWords($edit->orig);
917      elseif ($edit->orig)
918        $orig->addWords($edit->orig, 'mark');
919    }
920    return $orig->getLines();
921  }
922
923  function closing () {
924    $closing = new _HWLDF_WordAccumulator;
925
926    foreach ($this->edits as $edit) {
927      if ($edit->type == 'copy')
928        $closing->addWords($edit->closing);
929      elseif ($edit->closing)
930        $closing->addWords($edit->closing, 'mark');
931    }
932    return $closing->getLines();
933  }
934}
935
936/**
937 * "Unified" diff formatter.
938 *
939 * This class formats the diff in classic "unified diff" format.
940 */
941class UnifiedDiffFormatter extends DiffFormatter
942{
943    function UnifiedDiffFormatter($context_lines = 4) {
944        $this->leading_context_lines = $context_lines;
945        $this->trailing_context_lines = $context_lines;
946    }
947
948    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
949        if ($xlen != 1)
950            $xbeg .= "," . $xlen;
951        if ($ylen != 1)
952            $ybeg .= "," . $ylen;
953        return "@@ -$xbeg +$ybeg @@\n";
954    }
955
956    function _added($lines) {
957        $this->_lines($lines, "+");
958    }
959    function _deleted($lines) {
960        $this->_lines($lines, "-");
961    }
962    function _changed($orig, $final) {
963        $this->_deleted($orig);
964        $this->_added($final);
965    }
966}
967
968/**
969 *  Wikipedia Table style diff formatter.
970 *
971 */
972class TableDiffFormatter extends DiffFormatter
973{
974  function TableDiffFormatter() {
975    $this->leading_context_lines = 2;
976    $this->trailing_context_lines = 2;
977  }
978
979    function format($diff) {
980        // Preserve whitespaces by converting some to non-breaking spaces.
981        // Do not convert all of them to allow word-wrap.
982        $val = parent::format($diff);
983        $val = str_replace('  ','&nbsp; ', $val);
984        $val = preg_replace('/ (?=<)|(?<=[ >]) /', '&nbsp;', $val);
985        return $val;
986    }
987
988  function _pre($text){
989    $text = htmlspecialchars($text);
990    return $text;
991  }
992
993  function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
994    global $lang;
995    $l1 = $lang['line'].' '.$xbeg;
996    $l2 = $lang['line'].' '.$ybeg;
997    $r = '<tr><td class="diff-blockheader" colspan="2">'.$l1.":</td>\n" .
998      '<td class="diff-blockheader" colspan="2">'.$l2.":</td></tr>\n";
999    return $r;
1000  }
1001
1002  function _start_block( $header ) {
1003    print( $header );
1004  }
1005
1006  function _end_block() {
1007  }
1008
1009  function _lines( $lines, $prefix=' ', $color="white" ) {
1010  }
1011
1012  function addedLine( $line ) {
1013    return '<td>+</td><td class="diff-addedline">' .
1014      $line.'</td>';
1015
1016  }
1017
1018  function deletedLine( $line ) {
1019    return '<td>-</td><td class="diff-deletedline">' .
1020      $line.'</td>';
1021  }
1022
1023  function emptyLine() {
1024    return '<td colspan="2">&nbsp;</td>';
1025  }
1026
1027  function contextLine( $line ) {
1028    return '<td> </td><td class="diff-context">'.$line.'</td>';
1029  }
1030
1031  function _added($lines) {
1032    foreach ($lines as $line) {
1033      print( '<tr>' . $this->emptyLine() .
1034        $this->addedLine( $line ) . "</tr>\n" );
1035    }
1036  }
1037
1038  function _deleted($lines) {
1039    foreach ($lines as $line) {
1040      print( '<tr>' . $this->deletedLine( $line ) .
1041        $this->emptyLine() . "</tr>\n" );
1042    }
1043  }
1044
1045  function _context( $lines ) {
1046    foreach ($lines as $line) {
1047      print( '<tr>' . $this->contextLine( $line ) .
1048        $this->contextLine( $line ) . "</tr>\n" );
1049    }
1050  }
1051
1052  function _changed( $orig, $closing ) {
1053    $diff = new WordLevelDiff( $orig, $closing );
1054    $del = $diff->orig();
1055    $add = $diff->closing();
1056
1057    while ( $line = array_shift( $del ) ) {
1058      $aline = array_shift( $add );
1059      print( '<tr>' . $this->deletedLine( $line ) .
1060        $this->addedLine( $aline ) . "</tr>\n" );
1061    }
1062    $this->_added( $add ); # If any leftovers
1063  }
1064}
1065
1066
1067//Setup VIM: ex: et ts=2 enc=utf-8 :
1068