xref: /dokuwiki/inc/DifferenceEngine.php (revision f77ccdf72d9702956edd4917d261b908636fd690)
1<?php
2/**
3 * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
4 *
5 * Additions by Axel Boldt for MediaWiki
6 *
7 * @copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
8 * @license  You may copy this code freely under the conditions of the GPL.
9 */
10define('USE_ASSERTS', function_exists('assert'));
11
12class _DiffOp {
13    var $type;
14    var $orig;
15    var $closing;
16
17    function reverse() {
18        trigger_error("pure virtual", E_USER_ERROR);
19    }
20
21    function norig() {
22        return $this->orig ? count($this->orig) : 0;
23    }
24
25    function nclosing() {
26        return $this->closing ? count($this->closing) : 0;
27    }
28}
29
30class _DiffOp_Copy extends _DiffOp {
31    var $type = 'copy';
32
33    function _DiffOp_Copy ($orig, $closing = false) {
34        if (!is_array($closing))
35            $closing = $orig;
36        $this->orig = $orig;
37        $this->closing = $closing;
38    }
39
40    function reverse() {
41        return new _DiffOp_Copy($this->closing, $this->orig);
42    }
43}
44
45class _DiffOp_Delete extends _DiffOp {
46    var $type = 'delete';
47
48    function _DiffOp_Delete ($lines) {
49        $this->orig = $lines;
50        $this->closing = false;
51    }
52
53    function reverse() {
54        return new _DiffOp_Add($this->orig);
55    }
56}
57
58class _DiffOp_Add extends _DiffOp {
59    var $type = 'add';
60
61    function _DiffOp_Add ($lines) {
62        $this->closing = $lines;
63        $this->orig = false;
64    }
65
66    function reverse() {
67        return new _DiffOp_Delete($this->closing);
68    }
69}
70
71class _DiffOp_Change extends _DiffOp {
72    var $type = 'change';
73
74    function _DiffOp_Change ($orig, $closing) {
75        $this->orig = $orig;
76        $this->closing = $closing;
77    }
78
79    function reverse() {
80        return new _DiffOp_Change($this->closing, $this->orig);
81    }
82}
83
84
85/**
86 * Class used internally by Diff to actually compute the diffs.
87 *
88 * The algorithm used here is mostly lifted from the perl module
89 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
90 *   http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
91 *
92 * More ideas are taken from:
93 *   http://www.ics.uci.edu/~eppstein/161/960229.html
94 *
95 * Some ideas are (and a bit of code) are from from analyze.c, from GNU
96 * diffutils-2.7, which can be found at:
97 *   ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
98 *
99 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
100 * are my own.
101 *
102 * @author Geoffrey T. Dairiki
103 * @access private
104 */
105class _DiffEngine {
106
107    function diff ($from_lines, $to_lines) {
108        $n_from = count($from_lines);
109        $n_to = count($to_lines);
110
111        $this->xchanged = $this->ychanged = array();
112        $this->xv = $this->yv = array();
113        $this->xind = $this->yind = array();
114        unset($this->seq);
115        unset($this->in_seq);
116        unset($this->lcs);
117
118        // Skip leading common lines.
119        for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
120            if ($from_lines[$skip] != $to_lines[$skip])
121                break;
122            $this->xchanged[$skip] = $this->ychanged[$skip] = false;
123        }
124        // Skip trailing common lines.
125        $xi = $n_from;
126        $yi = $n_to;
127        for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
128            if ($from_lines[$xi] != $to_lines[$yi])
129                break;
130            $this->xchanged[$xi] = $this->ychanged[$yi] = false;
131        }
132
133        // Ignore lines which do not exist in both files.
134        for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
135            $xhash[$from_lines[$xi]] = 1;
136        for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
137            $line = $to_lines[$yi];
138            if ( ($this->ychanged[$yi] = empty($xhash[$line])) )
139                continue;
140            $yhash[$line] = 1;
141            $this->yv[] = $line;
142            $this->yind[] = $yi;
143        }
144        for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
145            $line = $from_lines[$xi];
146            if ( ($this->xchanged[$xi] = empty($yhash[$line])) )
147                continue;
148            $this->xv[] = $line;
149            $this->xind[] = $xi;
150        }
151
152        // Find the LCS.
153        $this->_compareseq(0, count($this->xv), 0, count($this->yv));
154
155        // Merge edits when possible
156        $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
157        $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
158
159        // Compute the edit operations.
160        $edits = array();
161        $xi = $yi = 0;
162        while ($xi < $n_from || $yi < $n_to) {
163            USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
164            USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
165
166            // Skip matching "snake".
167            $copy = array();
168            while ( $xi < $n_from && $yi < $n_to
169                    && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
170                $copy[] = $from_lines[$xi++];
171                ++$yi;
172            }
173            if ($copy)
174                $edits[] = new _DiffOp_Copy($copy);
175
176            // Find deletes & adds.
177            $delete = array();
178            while ($xi < $n_from && $this->xchanged[$xi])
179                $delete[] = $from_lines[$xi++];
180
181            $add = array();
182            while ($yi < $n_to && $this->ychanged[$yi])
183                $add[] = $to_lines[$yi++];
184
185            if ($delete && $add)
186                $edits[] = new _DiffOp_Change($delete, $add);
187            elseif ($delete)
188                $edits[] = new _DiffOp_Delete($delete);
189            elseif ($add)
190                $edits[] = new _DiffOp_Add($add);
191        }
192        return $edits;
193    }
194
195
196    /**
197     * Divide the Largest Common Subsequence (LCS) of the sequences
198     * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
199     * sized segments.
200     *
201     * Returns (LCS, PTS).  LCS is the length of the LCS. PTS is an
202     * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
203     * sub sequences.  The first sub-sequence is contained in [X0, X1),
204     * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
205     * that (X0, Y0) == (XOFF, YOFF) and
206     * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
207     *
208     * This function assumes that the first lines of the specified portions
209     * of the two files do not match, and likewise that the last lines do not
210     * match.  The caller must trim matching lines from the beginning and end
211     * of the portions it is going to specify.
212     */
213    function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) {
214        $flip = false;
215
216        if ($xlim - $xoff > $ylim - $yoff) {
217            // Things seems faster (I'm not sure I understand why)
218            // when the shortest sequence in X.
219            $flip = true;
220            list ($xoff, $xlim, $yoff, $ylim)
221                = array( $yoff, $ylim, $xoff, $xlim);
222        }
223
224        if ($flip)
225            for ($i = $ylim - 1; $i >= $yoff; $i--)
226                $ymatches[$this->xv[$i]][] = $i;
227        else
228            for ($i = $ylim - 1; $i >= $yoff; $i--)
229                $ymatches[$this->yv[$i]][] = $i;
230
231        $this->lcs = 0;
232        $this->seq[0]= $yoff - 1;
233        $this->in_seq = array();
234        $ymids[0] = array();
235
236        $numer = $xlim - $xoff + $nchunks - 1;
237        $x = $xoff;
238        for ($chunk = 0; $chunk < $nchunks; $chunk++) {
239            if ($chunk > 0)
240                for ($i = 0; $i <= $this->lcs; $i++)
241                    $ymids[$i][$chunk-1] = $this->seq[$i];
242
243            $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
244            for ( ; $x < $x1; $x++) {
245                $line = $flip ? $this->yv[$x] : $this->xv[$x];
246                if (empty($ymatches[$line]))
247                    continue;
248                $matches = $ymatches[$line];
249                reset($matches);
250                while (list ($junk, $y) = each($matches))
251                    if (empty($this->in_seq[$y])) {
252                        $k = $this->_lcs_pos($y);
253                        USE_ASSERTS && assert($k > 0);
254                        $ymids[$k] = $ymids[$k-1];
255                        break;
256                    }
257                while (list ($junk, $y) = each($matches)) {
258                    if ($y > $this->seq[$k-1]) {
259                        USE_ASSERTS && assert($y < $this->seq[$k]);
260                        // Optimization: this is a common case:
261                        //  next match is just replacing previous match.
262                        $this->in_seq[$this->seq[$k]] = false;
263                        $this->seq[$k] = $y;
264                        $this->in_seq[$y] = 1;
265                    }
266                    else if (empty($this->in_seq[$y])) {
267                        $k = $this->_lcs_pos($y);
268                        USE_ASSERTS && assert($k > 0);
269                        $ymids[$k] = $ymids[$k-1];
270                    }
271                }
272            }
273        }
274
275        $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
276        $ymid = $ymids[$this->lcs];
277        for ($n = 0; $n < $nchunks - 1; $n++) {
278            $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
279            $y1 = $ymid[$n] + 1;
280            $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
281        }
282        $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
283
284        return array($this->lcs, $seps);
285    }
286
287    function _lcs_pos ($ypos) {
288        $end = $this->lcs;
289        if ($end == 0 || $ypos > $this->seq[$end]) {
290            $this->seq[++$this->lcs] = $ypos;
291            $this->in_seq[$ypos] = 1;
292            return $this->lcs;
293        }
294
295        $beg = 1;
296        while ($beg < $end) {
297            $mid = (int)(($beg + $end) / 2);
298            if ( $ypos > $this->seq[$mid] )
299                $beg = $mid + 1;
300            else
301                $end = $mid;
302        }
303
304        USE_ASSERTS && assert($ypos != $this->seq[$end]);
305
306        $this->in_seq[$this->seq[$end]] = false;
307        $this->seq[$end] = $ypos;
308        $this->in_seq[$ypos] = 1;
309        return $end;
310    }
311
312    /**
313     * Find LCS of two sequences.
314     *
315     * The results are recorded in the vectors $this->{x,y}changed[], by
316     * storing a 1 in the element for each line that is an insertion
317     * or deletion (ie. is not in the LCS).
318     *
319     * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
320     *
321     * Note that XLIM, YLIM are exclusive bounds.
322     * All line numbers are origin-0 and discarded lines are not counted.
323     */
324    function _compareseq ($xoff, $xlim, $yoff, $ylim) {
325        // Slide down the bottom initial diagonal.
326        while ($xoff < $xlim && $yoff < $ylim
327                && $this->xv[$xoff] == $this->yv[$yoff]) {
328            ++$xoff;
329            ++$yoff;
330        }
331
332        // Slide up the top initial diagonal.
333        while ($xlim > $xoff && $ylim > $yoff
334                && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
335            --$xlim;
336            --$ylim;
337        }
338
339        if ($xoff == $xlim || $yoff == $ylim)
340            $lcs = 0;
341        else {
342            // This is ad hoc but seems to work well.
343            //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
344            //$nchunks = max(2,min(8,(int)$nchunks));
345            $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
346            list ($lcs, $seps)
347                = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
348        }
349
350        if ($lcs == 0) {
351            // X and Y sequences have no common subsequence:
352            // mark all changed.
353            while ($yoff < $ylim)
354                $this->ychanged[$this->yind[$yoff++]] = 1;
355            while ($xoff < $xlim)
356                $this->xchanged[$this->xind[$xoff++]] = 1;
357        }
358        else {
359            // Use the partitions to split this problem into subproblems.
360            reset($seps);
361            $pt1 = $seps[0];
362            while ($pt2 = next($seps)) {
363                $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
364                $pt1 = $pt2;
365            }
366        }
367    }
368
369    /**
370     * Adjust inserts/deletes of identical lines to join changes
371     * as much as possible.
372     *
373     * We do something when a run of changed lines include a
374     * line at one end and has an excluded, identical line at the other.
375     * We are free to choose which identical line is included.
376     * `compareseq' usually chooses the one at the beginning,
377     * but usually it is cleaner to consider the following identical line
378     * to be the "change".
379     *
380     * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
381     */
382    function _shift_boundaries ($lines, &$changed, $other_changed) {
383        $i = 0;
384        $j = 0;
385
386        USE_ASSERTS && assert('count($lines) == count($changed)');
387        $len = count($lines);
388        $other_len = count($other_changed);
389
390        while (1) {
391            /*
392             * Scan forwards to find beginning of another run of changes.
393             * Also keep track of the corresponding point in the other file.
394             *
395             * Throughout this code, $i and $j are adjusted together so that
396             * the first $i elements of $changed and the first $j elements
397             * of $other_changed both contain the same number of zeros
398             * (unchanged lines).
399             * Furthermore, $j is always kept so that $j == $other_len or
400             * $other_changed[$j] == false.
401             */
402            while ($j < $other_len && $other_changed[$j])
403                $j++;
404
405            while ($i < $len && ! $changed[$i]) {
406                USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
407                $i++;
408                $j++;
409                while ($j < $other_len && $other_changed[$j])
410                    $j++;
411            }
412
413            if ($i == $len)
414                break;
415
416            $start = $i;
417
418            // Find the end of this run of changes.
419            while (++$i < $len && $changed[$i])
420                continue;
421
422            do {
423                /*
424                 * Record the length of this run of changes, so that
425                 * we can later determine whether the run has grown.
426                 */
427                $runlength = $i - $start;
428
429                /*
430                 * Move the changed region back, so long as the
431                 * previous unchanged line matches the last changed one.
432                 * This merges with previous changed regions.
433                 */
434                while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
435                    $changed[--$start] = 1;
436                    $changed[--$i] = false;
437                    while ($start > 0 && $changed[$start - 1])
438                        $start--;
439                    USE_ASSERTS && assert('$j > 0');
440                    while ($other_changed[--$j])
441                        continue;
442                    USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
443                }
444
445                /*
446                 * Set CORRESPONDING to the end of the changed run, at the last
447                 * point where it corresponds to a changed run in the other file.
448                 * CORRESPONDING == LEN means no such point has been found.
449                 */
450                $corresponding = $j < $other_len ? $i : $len;
451
452                /*
453                 * Move the changed region forward, so long as the
454                 * first changed line matches the following unchanged one.
455                 * This merges with following changed regions.
456                 * Do this second, so that if there are no merges,
457                 * the changed region is moved forward as far as possible.
458                 */
459                while ($i < $len && $lines[$start] == $lines[$i]) {
460                    $changed[$start++] = false;
461                    $changed[$i++] = 1;
462                    while ($i < $len && $changed[$i])
463                        $i++;
464
465                    USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
466                    $j++;
467                    if ($j < $other_len && $other_changed[$j]) {
468                        $corresponding = $i;
469                        while ($j < $other_len && $other_changed[$j])
470                            $j++;
471                    }
472                }
473            } while ($runlength != $i - $start);
474
475            /*
476             * If possible, move the fully-merged run of changes
477             * back to a corresponding run in the other file.
478             */
479            while ($corresponding < $i) {
480                $changed[--$start] = 1;
481                $changed[--$i] = 0;
482                USE_ASSERTS && assert('$j > 0');
483                while ($other_changed[--$j])
484                    continue;
485                USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
486            }
487        }
488    }
489}
490
491/**
492 * Class representing a 'diff' between two sequences of strings.
493 */
494class Diff {
495
496    var $edits;
497
498    /**
499     * Constructor.
500     * Computes diff between sequences of strings.
501     *
502     * @param $from_lines array An array of strings.
503     *      (Typically these are lines from a file.)
504     * @param $to_lines array An array of strings.
505     */
506    function Diff($from_lines, $to_lines) {
507        $eng = new _DiffEngine;
508        $this->edits = $eng->diff($from_lines, $to_lines);
509        //$this->_check($from_lines, $to_lines);
510    }
511
512    /**
513     * Compute reversed Diff.
514     *
515     * SYNOPSIS:
516     *
517     *  $diff = new Diff($lines1, $lines2);
518     *  $rev = $diff->reverse();
519     * @return object A Diff object representing the inverse of the
520     *          original diff.
521     */
522    function reverse () {
523        $rev = $this;
524        $rev->edits = array();
525        foreach ($this->edits as $edit) {
526            $rev->edits[] = $edit->reverse();
527        }
528        return $rev;
529    }
530
531    /**
532     * Check for empty diff.
533     *
534     * @return bool True iff two sequences were identical.
535     */
536    function isEmpty () {
537        foreach ($this->edits as $edit) {
538            if ($edit->type != 'copy')
539                return false;
540        }
541        return true;
542    }
543
544    /**
545     * Compute the length of the Longest Common Subsequence (LCS).
546     *
547     * This is mostly for diagnostic purposed.
548     *
549     * @return int The length of the LCS.
550     */
551    function lcs () {
552        $lcs = 0;
553        foreach ($this->edits as $edit) {
554            if ($edit->type == 'copy')
555                $lcs += count($edit->orig);
556        }
557        return $lcs;
558    }
559
560    /**
561     * Get the original set of lines.
562     *
563     * This reconstructs the $from_lines parameter passed to the
564     * constructor.
565     *
566     * @return array The original sequence of strings.
567     */
568    function orig() {
569        $lines = array();
570
571        foreach ($this->edits as $edit) {
572            if ($edit->orig)
573                array_splice($lines, count($lines), 0, $edit->orig);
574        }
575        return $lines;
576    }
577
578    /**
579     * Get the closing set of lines.
580     *
581     * This reconstructs the $to_lines parameter passed to the
582     * constructor.
583     *
584     * @return array The sequence of strings.
585     */
586    function closing() {
587        $lines = array();
588
589        foreach ($this->edits as $edit) {
590            if ($edit->closing)
591                array_splice($lines, count($lines), 0, $edit->closing);
592        }
593        return $lines;
594    }
595
596    /**
597     * Check a Diff for validity.
598     *
599     * This is here only for debugging purposes.
600     */
601    function _check ($from_lines, $to_lines) {
602        if (serialize($from_lines) != serialize($this->orig()))
603            trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
604        if (serialize($to_lines) != serialize($this->closing()))
605            trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
606
607        $rev = $this->reverse();
608        if (serialize($to_lines) != serialize($rev->orig()))
609            trigger_error("Reversed original doesn't match", E_USER_ERROR);
610        if (serialize($from_lines) != serialize($rev->closing()))
611            trigger_error("Reversed closing doesn't match", E_USER_ERROR);
612
613        $prevtype = 'none';
614        foreach ($this->edits as $edit) {
615            if ( $prevtype == $edit->type )
616                trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
617            $prevtype = $edit->type;
618        }
619
620        $lcs = $this->lcs();
621        trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE);
622    }
623}
624
625/**
626 * FIXME: bad name.
627 */
628class MappedDiff extends Diff {
629    /**
630     * Constructor.
631     *
632     * Computes diff between sequences of strings.
633     *
634     * This can be used to compute things like
635     * case-insensitve diffs, or diffs which ignore
636     * changes in white-space.
637     *
638     * @param $from_lines array An array of strings.
639     *  (Typically these are lines from a file.)
640     *
641     * @param $to_lines array An array of strings.
642     *
643     * @param $mapped_from_lines array This array should
644     *  have the same size number of elements as $from_lines.
645     *  The elements in $mapped_from_lines and
646     *  $mapped_to_lines are what is actually compared
647     *  when computing the diff.
648     *
649     * @param $mapped_to_lines array This array should
650     *  have the same number of elements as $to_lines.
651     */
652    function MappedDiff($from_lines, $to_lines,
653            $mapped_from_lines, $mapped_to_lines) {
654
655        assert(count($from_lines) == count($mapped_from_lines));
656        assert(count($to_lines) == count($mapped_to_lines));
657
658        $this->Diff($mapped_from_lines, $mapped_to_lines);
659
660        $xi = $yi = 0;
661        $ecnt = count($this->edits);
662        for ($i = 0; $i < $ecnt; $i++) {
663            $orig = &$this->edits[$i]->orig;
664            if (is_array($orig)) {
665                $orig = array_slice($from_lines, $xi, count($orig));
666                $xi += count($orig);
667            }
668
669            $closing = &$this->edits[$i]->closing;
670            if (is_array($closing)) {
671                $closing = array_slice($to_lines, $yi, count($closing));
672                $yi += count($closing);
673            }
674        }
675    }
676}
677
678/**
679 * A class to format Diffs
680 *
681 * This class formats the diff in classic diff format.
682 * It is intended that this class be customized via inheritance,
683 * to obtain fancier outputs.
684 */
685class DiffFormatter {
686    /**
687     * Number of leading context "lines" to preserve.
688     *
689     * This should be left at zero for this class, but subclasses
690     * may want to set this to other values.
691     */
692    var $leading_context_lines = 0;
693
694    /**
695     * Number of trailing context "lines" to preserve.
696     *
697     * This should be left at zero for this class, but subclasses
698     * may want to set this to other values.
699     */
700    var $trailing_context_lines = 0;
701
702    /**
703     * Format a diff.
704     *
705     * @param $diff object A Diff object.
706     * @return string The formatted output.
707     */
708    function format($diff) {
709
710        $xi = $yi = 1;
711        $block = false;
712        $context = array();
713
714        $nlead = $this->leading_context_lines;
715        $ntrail = $this->trailing_context_lines;
716
717        $this->_start_diff();
718
719        foreach ($diff->edits as $edit) {
720            if ($edit->type == 'copy') {
721                if (is_array($block)) {
722                    if (count($edit->orig) <= $nlead + $ntrail) {
723                        $block[] = $edit;
724                    }
725                    else{
726                        if ($ntrail) {
727                            $context = array_slice($edit->orig, 0, $ntrail);
728                            $block[] = new _DiffOp_Copy($context);
729                        }
730                        $this->_block($x0, $ntrail + $xi - $x0,
731                                $y0, $ntrail + $yi - $y0,
732                                $block);
733                        $block = false;
734                    }
735                }
736                $context = $edit->orig;
737            }
738            else {
739                if (! is_array($block)) {
740                    $context = array_slice($context, count($context) - $nlead);
741                    $x0 = $xi - count($context);
742                    $y0 = $yi - count($context);
743                    $block = array();
744                    if ($context)
745                        $block[] = new _DiffOp_Copy($context);
746                }
747                $block[] = $edit;
748            }
749
750            if ($edit->orig)
751                $xi += count($edit->orig);
752            if ($edit->closing)
753                $yi += count($edit->closing);
754        }
755
756        if (is_array($block))
757            $this->_block($x0, $xi - $x0,
758                    $y0, $yi - $y0,
759                    $block);
760
761        return $this->_end_diff();
762    }
763
764    function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
765        $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
766        foreach ($edits as $edit) {
767            if ($edit->type == 'copy')
768                $this->_context($edit->orig);
769            elseif ($edit->type == 'add')
770                $this->_added($edit->closing);
771            elseif ($edit->type == 'delete')
772                $this->_deleted($edit->orig);
773            elseif ($edit->type == 'change')
774                $this->_changed($edit->orig, $edit->closing);
775            else
776                trigger_error("Unknown edit type", E_USER_ERROR);
777        }
778        $this->_end_block();
779    }
780
781    function _start_diff() {
782        ob_start();
783    }
784
785    function _end_diff() {
786        $val = ob_get_contents();
787        ob_end_clean();
788        return $val;
789    }
790
791    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
792        if ($xlen > 1)
793            $xbeg .= "," . ($xbeg + $xlen - 1);
794        if ($ylen > 1)
795            $ybeg .= "," . ($ybeg + $ylen - 1);
796
797        return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
798    }
799
800    function _start_block($header) {
801        echo $header;
802    }
803
804    function _end_block() {
805    }
806
807    function _lines($lines, $prefix = ' ') {
808        foreach ($lines as $line)
809            echo "$prefix $line\n";
810    }
811
812    function _context($lines) {
813        $this->_lines($lines);
814    }
815
816    function _added($lines) {
817        $this->_lines($lines, ">");
818    }
819    function _deleted($lines) {
820        $this->_lines($lines, "<");
821    }
822
823    function _changed($orig, $closing) {
824        $this->_deleted($orig);
825        echo "---\n";
826        $this->_added($closing);
827    }
828}
829
830
831/**
832 *  Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
833 *
834 */
835
836define('NBSP', "\xC2\xA0");     // utf-8 non-breaking space.
837
838class _HWLDF_WordAccumulator {
839    function _HWLDF_WordAccumulator () {
840        $this->_lines = array();
841        $this->_line = '';
842        $this->_group = '';
843        $this->_tag = '';
844    }
845
846    function _flushGroup ($new_tag) {
847        if ($this->_group !== '') {
848            if ($this->_tag == 'mark')
849                $this->_line .= '<strong>'.$this->_group.'</strong>';
850            else
851                $this->_line .= $this->_group;
852        }
853        $this->_group = '';
854        $this->_tag = $new_tag;
855    }
856
857    function _flushLine ($new_tag) {
858        $this->_flushGroup($new_tag);
859        if ($this->_line != '')
860            $this->_lines[] = $this->_line;
861        $this->_line = '';
862    }
863
864    function addWords ($words, $tag = '') {
865        if ($tag != $this->_tag)
866            $this->_flushGroup($tag);
867
868        foreach ($words as $word) {
869            // new-line should only come as first char of word.
870            if ($word == '')
871                continue;
872            if ($word[0] == "\n") {
873                $this->_group .= NBSP;
874                $this->_flushLine($tag);
875                $word = substr($word, 1);
876            }
877            assert(!strstr($word, "\n"));
878            $this->_group .= $word;
879        }
880    }
881
882    function getLines() {
883        $this->_flushLine('~done');
884        return $this->_lines;
885    }
886}
887
888class WordLevelDiff extends MappedDiff {
889
890    function WordLevelDiff ($orig_lines, $closing_lines) {
891        list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
892        list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
893
894        $this->MappedDiff($orig_words, $closing_words,
895                $orig_stripped, $closing_stripped);
896    }
897
898    function _split($lines) {
899            if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
900                    implode("\n", $lines),
901                    $m)) {
902            return array(array(''), array(''));
903            }
904            return array($m[0], $m[1]);
905            }
906
907            function orig () {
908            $orig = new _HWLDF_WordAccumulator;
909
910            foreach ($this->edits as $edit) {
911            if ($edit->type == 'copy')
912            $orig->addWords($edit->orig);
913            elseif ($edit->orig)
914            $orig->addWords($edit->orig, 'mark');
915            }
916            return $orig->getLines();
917            }
918
919            function closing () {
920                $closing = new _HWLDF_WordAccumulator;
921
922                foreach ($this->edits as $edit) {
923                    if ($edit->type == 'copy')
924                        $closing->addWords($edit->closing);
925                    elseif ($edit->closing)
926                        $closing->addWords($edit->closing, 'mark');
927                }
928                return $closing->getLines();
929            }
930}
931
932/**
933 * "Unified" diff formatter.
934 *
935 * This class formats the diff in classic "unified diff" format.
936 */
937class UnifiedDiffFormatter extends DiffFormatter {
938
939    function UnifiedDiffFormatter($context_lines = 4) {
940        $this->leading_context_lines = $context_lines;
941        $this->trailing_context_lines = $context_lines;
942    }
943
944    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
945        if ($xlen != 1)
946            $xbeg .= "," . $xlen;
947        if ($ylen != 1)
948            $ybeg .= "," . $ylen;
949        return "@@ -$xbeg +$ybeg @@\n";
950    }
951
952    function _added($lines) {
953        $this->_lines($lines, "+");
954    }
955    function _deleted($lines) {
956        $this->_lines($lines, "-");
957    }
958    function _changed($orig, $final) {
959        $this->_deleted($orig);
960        $this->_added($final);
961    }
962}
963
964/**
965 *  Wikipedia Table style diff formatter.
966 *
967 */
968class TableDiffFormatter extends DiffFormatter {
969
970    function TableDiffFormatter() {
971        $this->leading_context_lines = 2;
972        $this->trailing_context_lines = 2;
973    }
974
975    function format($diff) {
976        // Preserve whitespaces by converting some to non-breaking spaces.
977        // Do not convert all of them to allow word-wrap.
978        $val = parent::format($diff);
979        $val = str_replace('  ','&nbsp; ', $val);
980        $val = preg_replace('/ (?=<)|(?<=[ >]) /', '&nbsp;', $val);
981        return $val;
982    }
983
984    function _pre($text){
985        $text = htmlspecialchars($text);
986        return $text;
987    }
988
989    function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
990        global $lang;
991        $l1 = $lang['line'].' '.$xbeg;
992        $l2 = $lang['line'].' '.$ybeg;
993        $r = '<tr><td class="diff-blockheader" colspan="2">'.$l1.":</td>\n" .
994            '<td class="diff-blockheader" colspan="2">'.$l2.":</td></tr>\n";
995        return $r;
996    }
997
998    function _start_block( $header ) {
999        print( $header );
1000    }
1001
1002    function _end_block() {
1003    }
1004
1005    function _lines( $lines, $prefix=' ', $color="white" ) {
1006    }
1007
1008    function addedLine( $line ) {
1009        return '<td>+</td><td class="diff-addedline">' .
1010            $line.'</td>';
1011
1012    }
1013
1014    function deletedLine( $line ) {
1015        return '<td>-</td><td class="diff-deletedline">' .
1016            $line.'</td>';
1017    }
1018
1019    function emptyLine() {
1020        return '<td colspan="2">&nbsp;</td>';
1021    }
1022
1023    function contextLine( $line ) {
1024        return '<td> </td><td class="diff-context">'.$line.'</td>';
1025    }
1026
1027    function _added($lines) {
1028        foreach ($lines as $line) {
1029            print( '<tr>' . $this->emptyLine() .
1030                    $this->addedLine( $line ) . "</tr>\n" );
1031        }
1032    }
1033
1034    function _deleted($lines) {
1035        foreach ($lines as $line) {
1036            print( '<tr>' . $this->deletedLine( $line ) .
1037                    $this->emptyLine() . "</tr>\n" );
1038        }
1039    }
1040
1041    function _context( $lines ) {
1042        foreach ($lines as $line) {
1043            print( '<tr>' . $this->contextLine( $line ) .
1044                    $this->contextLine( $line ) . "</tr>\n" );
1045        }
1046    }
1047
1048    function _changed( $orig, $closing ) {
1049        $diff = new WordLevelDiff( $orig, $closing );
1050        $del = $diff->orig();
1051        $add = $diff->closing();
1052
1053        while ( $line = array_shift( $del ) ) {
1054            $aline = array_shift( $add );
1055            print( '<tr>' . $this->deletedLine( $line ) .
1056                    $this->addedLine( $aline ) . "</tr>\n" );
1057        }
1058        $this->_added( $add ); # If any leftovers
1059    }
1060}
1061
1062
1063//Setup VIM: ex: et ts=4 enc=utf-8 :
1064