xref: /dokuwiki/inc/DifferenceEngine.php (revision f3f0262c480d7e509b008d37c90aed884532bba8)
1<?php
2
3// A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
4//
5// Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
6// You may copy this code freely under the conditions of the GPL.
7//
8
9define('USE_ASSERTS', function_exists('assert'));
10
11class _DiffOp {
12	var $type;
13	var $orig;
14	var $closing;
15
16	function reverse() {
17		trigger_error("pure virtual", E_USER_ERROR);
18	}
19
20	function norig() {
21		return $this->orig ? sizeof($this->orig) : 0;
22	}
23
24	function nclosing() {
25		return $this->closing ? sizeof($this->closing) : 0;
26	}
27}
28
29class _DiffOp_Copy extends _DiffOp {
30	var $type = 'copy';
31
32	function _DiffOp_Copy ($orig, $closing = false) {
33		if (!is_array($closing))
34			$closing = $orig;
35		$this->orig = $orig;
36		$this->closing = $closing;
37	}
38
39	function reverse() {
40		return new _DiffOp_Copy($this->closing, $this->orig);
41	}
42}
43
44class _DiffOp_Delete extends _DiffOp {
45	var $type = 'delete';
46
47	function _DiffOp_Delete ($lines) {
48		$this->orig = $lines;
49		$this->closing = false;
50	}
51
52	function reverse() {
53		return new _DiffOp_Add($this->orig);
54	}
55}
56
57class _DiffOp_Add extends _DiffOp {
58	var $type = 'add';
59
60	function _DiffOp_Add ($lines) {
61		$this->closing = $lines;
62		$this->orig = false;
63	}
64
65	function reverse() {
66		return new _DiffOp_Delete($this->closing);
67	}
68}
69
70class _DiffOp_Change extends _DiffOp {
71	var $type = 'change';
72
73	function _DiffOp_Change ($orig, $closing) {
74		$this->orig = $orig;
75		$this->closing = $closing;
76	}
77
78	function reverse() {
79		return new _DiffOp_Change($this->closing, $this->orig);
80	}
81}
82
83
84/**
85 * Class used internally by Diff to actually compute the diffs.
86 *
87 * The algorithm used here is mostly lifted from the perl module
88 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
89 *	 http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
90 *
91 * More ideas are taken from:
92 *	 http://www.ics.uci.edu/~eppstein/161/960229.html
93 *
94 * Some ideas are (and a bit of code) are from from analyze.c, from GNU
95 * diffutils-2.7, which can be found at:
96 *	 ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
97 *
98 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
99 * are my own.
100 *
101 * @author Geoffrey T. Dairiki
102 * @access private
103 */
104class _DiffEngine
105{
106	function diff ($from_lines, $to_lines) {
107		$n_from = sizeof($from_lines);
108		$n_to = sizeof($to_lines);
109
110		$this->xchanged = $this->ychanged = array();
111		$this->xv = $this->yv = array();
112		$this->xind = $this->yind = array();
113		unset($this->seq);
114		unset($this->in_seq);
115		unset($this->lcs);
116
117		// Skip leading common lines.
118		for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
119			if ($from_lines[$skip] != $to_lines[$skip])
120				break;
121			$this->xchanged[$skip] = $this->ychanged[$skip] = false;
122		}
123		// Skip trailing common lines.
124		$xi = $n_from; $yi = $n_to;
125		for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
126			if ($from_lines[$xi] != $to_lines[$yi])
127				break;
128			$this->xchanged[$xi] = $this->ychanged[$yi] = false;
129		}
130
131		// Ignore lines which do not exist in both files.
132		for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
133			$xhash[$from_lines[$xi]] = 1;
134		for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
135			$line = $to_lines[$yi];
136			if ( ($this->ychanged[$yi] = empty($xhash[$line])) )
137				continue;
138			$yhash[$line] = 1;
139			$this->yv[] = $line;
140			$this->yind[] = $yi;
141		}
142		for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
143			$line = $from_lines[$xi];
144			if ( ($this->xchanged[$xi] = empty($yhash[$line])) )
145				continue;
146			$this->xv[] = $line;
147			$this->xind[] = $xi;
148		}
149
150		// Find the LCS.
151		$this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
152
153		// Merge edits when possible
154		$this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
155		$this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
156
157		// Compute the edit operations.
158		$edits = array();
159		$xi = $yi = 0;
160		while ($xi < $n_from || $yi < $n_to) {
161			USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
162			USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
163
164			// Skip matching "snake".
165			$copy = array();
166			while ( $xi < $n_from && $yi < $n_to
167					&& !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
168				$copy[] = $from_lines[$xi++];
169				++$yi;
170			}
171			if ($copy)
172				$edits[] = new _DiffOp_Copy($copy);
173
174			// Find deletes & adds.
175			$delete = array();
176			while ($xi < $n_from && $this->xchanged[$xi])
177				$delete[] = $from_lines[$xi++];
178
179			$add = array();
180			while ($yi < $n_to && $this->ychanged[$yi])
181				$add[] = $to_lines[$yi++];
182
183			if ($delete && $add)
184				$edits[] = new _DiffOp_Change($delete, $add);
185			elseif ($delete)
186				$edits[] = new _DiffOp_Delete($delete);
187			elseif ($add)
188				$edits[] = new _DiffOp_Add($add);
189		}
190		return $edits;
191	}
192
193
194	/* Divide the Largest Common Subsequence (LCS) of the sequences
195	 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
196	 * sized segments.
197	 *
198	 * Returns (LCS, PTS).	LCS is the length of the LCS. PTS is an
199	 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
200	 * sub sequences.  The first sub-sequence is contained in [X0, X1),
201	 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
202	 * that (X0, Y0) == (XOFF, YOFF) and
203	 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
204	 *
205	 * This function assumes that the first lines of the specified portions
206	 * of the two files do not match, and likewise that the last lines do not
207	 * match.  The caller must trim matching lines from the beginning and end
208	 * of the portions it is going to specify.
209	 */
210	function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) {
211	$flip = false;
212
213	if ($xlim - $xoff > $ylim - $yoff) {
214		// Things seems faster (I'm not sure I understand why)
215			// when the shortest sequence in X.
216			$flip = true;
217		list ($xoff, $xlim, $yoff, $ylim)
218		= array( $yoff, $ylim, $xoff, $xlim);
219		}
220
221	if ($flip)
222		for ($i = $ylim - 1; $i >= $yoff; $i--)
223		$ymatches[$this->xv[$i]][] = $i;
224	else
225		for ($i = $ylim - 1; $i >= $yoff; $i--)
226		$ymatches[$this->yv[$i]][] = $i;
227
228	$this->lcs = 0;
229	$this->seq[0]= $yoff - 1;
230	$this->in_seq = array();
231	$ymids[0] = array();
232
233	$numer = $xlim - $xoff + $nchunks - 1;
234	$x = $xoff;
235	for ($chunk = 0; $chunk < $nchunks; $chunk++) {
236		if ($chunk > 0)
237		for ($i = 0; $i <= $this->lcs; $i++)
238			$ymids[$i][$chunk-1] = $this->seq[$i];
239
240		$x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
241		for ( ; $x < $x1; $x++) {
242				$line = $flip ? $this->yv[$x] : $this->xv[$x];
243				if (empty($ymatches[$line]))
244			continue;
245		$matches = $ymatches[$line];
246				reset($matches);
247		while (list ($junk, $y) = each($matches))
248			if (empty($this->in_seq[$y])) {
249			$k = $this->_lcs_pos($y);
250			USE_ASSERTS && assert($k > 0);
251			$ymids[$k] = $ymids[$k-1];
252			break;
253					}
254		while (list ($junk, $y) = each($matches)) {
255			if ($y > $this->seq[$k-1]) {
256			USE_ASSERTS && assert($y < $this->seq[$k]);
257			// Optimization: this is a common case:
258			//	next match is just replacing previous match.
259			$this->in_seq[$this->seq[$k]] = false;
260			$this->seq[$k] = $y;
261			$this->in_seq[$y] = 1;
262					}
263			else if (empty($this->in_seq[$y])) {
264			$k = $this->_lcs_pos($y);
265			USE_ASSERTS && assert($k > 0);
266			$ymids[$k] = $ymids[$k-1];
267					}
268				}
269			}
270		}
271
272	$seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
273	$ymid = $ymids[$this->lcs];
274	for ($n = 0; $n < $nchunks - 1; $n++) {
275		$x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
276		$y1 = $ymid[$n] + 1;
277		$seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
278		}
279	$seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
280
281	return array($this->lcs, $seps);
282	}
283
284	function _lcs_pos ($ypos) {
285	$end = $this->lcs;
286	if ($end == 0 || $ypos > $this->seq[$end]) {
287		$this->seq[++$this->lcs] = $ypos;
288		$this->in_seq[$ypos] = 1;
289		return $this->lcs;
290		}
291
292	$beg = 1;
293	while ($beg < $end) {
294		$mid = (int)(($beg + $end) / 2);
295		if ( $ypos > $this->seq[$mid] )
296		$beg = $mid + 1;
297		else
298		$end = $mid;
299		}
300
301	USE_ASSERTS && assert($ypos != $this->seq[$end]);
302
303	$this->in_seq[$this->seq[$end]] = false;
304	$this->seq[$end] = $ypos;
305	$this->in_seq[$ypos] = 1;
306	return $end;
307	}
308
309	/* Find LCS of two sequences.
310	 *
311	 * The results are recorded in the vectors $this->{x,y}changed[], by
312	 * storing a 1 in the element for each line that is an insertion
313	 * or deletion (ie. is not in the LCS).
314	 *
315	 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
316	 *
317	 * Note that XLIM, YLIM are exclusive bounds.
318	 * All line numbers are origin-0 and discarded lines are not counted.
319	 */
320	function _compareseq ($xoff, $xlim, $yoff, $ylim) {
321	// Slide down the bottom initial diagonal.
322	while ($xoff < $xlim && $yoff < $ylim
323			   && $this->xv[$xoff] == $this->yv[$yoff]) {
324		++$xoff;
325		++$yoff;
326		}
327
328	// Slide up the top initial diagonal.
329	while ($xlim > $xoff && $ylim > $yoff
330			   && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
331		--$xlim;
332		--$ylim;
333		}
334
335	if ($xoff == $xlim || $yoff == $ylim)
336		$lcs = 0;
337	else {
338		// This is ad hoc but seems to work well.
339		//$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
340		//$nchunks = max(2,min(8,(int)$nchunks));
341		$nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
342		list ($lcs, $seps)
343		= $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
344		}
345
346	if ($lcs == 0) {
347		// X and Y sequences have no common subsequence:
348		// mark all changed.
349		while ($yoff < $ylim)
350		$this->ychanged[$this->yind[$yoff++]] = 1;
351		while ($xoff < $xlim)
352		$this->xchanged[$this->xind[$xoff++]] = 1;
353		}
354	else {
355		// Use the partitions to split this problem into subproblems.
356		reset($seps);
357		$pt1 = $seps[0];
358		while ($pt2 = next($seps)) {
359		$this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
360		$pt1 = $pt2;
361			}
362		}
363	}
364
365	/* Adjust inserts/deletes of identical lines to join changes
366	 * as much as possible.
367	 *
368	 * We do something when a run of changed lines include a
369	 * line at one end and has an excluded, identical line at the other.
370	 * We are free to choose which identical line is included.
371	 * `compareseq' usually chooses the one at the beginning,
372	 * but usually it is cleaner to consider the following identical line
373	 * to be the "change".
374	 *
375	 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
376	 */
377	function _shift_boundaries ($lines, &$changed, $other_changed) {
378	$i = 0;
379	$j = 0;
380
381	USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
382	$len = sizeof($lines);
383	$other_len = sizeof($other_changed);
384
385	while (1) {
386		/*
387		 * Scan forwards to find beginning of another run of changes.
388		 * Also keep track of the corresponding point in the other file.
389		 *
390		 * Throughout this code, $i and $j are adjusted together so that
391		 * the first $i elements of $changed and the first $j elements
392		 * of $other_changed both contain the same number of zeros
393		 * (unchanged lines).
394		 * Furthermore, $j is always kept so that $j == $other_len or
395		 * $other_changed[$j] == false.
396		 */
397		while ($j < $other_len && $other_changed[$j])
398		$j++;
399
400		while ($i < $len && ! $changed[$i]) {
401		USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
402		$i++; $j++;
403		while ($j < $other_len && $other_changed[$j])
404			$j++;
405			}
406
407		if ($i == $len)
408		break;
409
410		$start = $i;
411
412		// Find the end of this run of changes.
413		while (++$i < $len && $changed[$i])
414		continue;
415
416		do {
417		/*
418		 * Record the length of this run of changes, so that
419		 * we can later determine whether the run has grown.
420		 */
421		$runlength = $i - $start;
422
423		/*
424		 * Move the changed region back, so long as the
425		 * previous unchanged line matches the last changed one.
426		 * This merges with previous changed regions.
427		 */
428		while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
429			$changed[--$start] = 1;
430			$changed[--$i] = false;
431			while ($start > 0 && $changed[$start - 1])
432			$start--;
433			USE_ASSERTS && assert('$j > 0');
434			while ($other_changed[--$j])
435			continue;
436			USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
437				}
438
439		/*
440		 * Set CORRESPONDING to the end of the changed run, at the last
441		 * point where it corresponds to a changed run in the other file.
442		 * CORRESPONDING == LEN means no such point has been found.
443		 */
444		$corresponding = $j < $other_len ? $i : $len;
445
446		/*
447		 * Move the changed region forward, so long as the
448		 * first changed line matches the following unchanged one.
449		 * This merges with following changed regions.
450		 * Do this second, so that if there are no merges,
451		 * the changed region is moved forward as far as possible.
452		 */
453		while ($i < $len && $lines[$start] == $lines[$i]) {
454			$changed[$start++] = false;
455			$changed[$i++] = 1;
456			while ($i < $len && $changed[$i])
457			$i++;
458
459			USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
460			$j++;
461			if ($j < $other_len && $other_changed[$j]) {
462			$corresponding = $i;
463			while ($j < $other_len && $other_changed[$j])
464				$j++;
465					}
466				}
467			} while ($runlength != $i - $start);
468
469		/*
470		 * If possible, move the fully-merged run of changes
471		 * back to a corresponding run in the other file.
472		 */
473		while ($corresponding < $i) {
474		$changed[--$start] = 1;
475		$changed[--$i] = 0;
476		USE_ASSERTS && assert('$j > 0');
477		while ($other_changed[--$j])
478			continue;
479		USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
480			}
481		}
482	}
483}
484
485/**
486 * Class representing a 'diff' between two sequences of strings.
487 */
488class Diff
489{
490	var $edits;
491
492	/**
493	 * Constructor.
494	 * Computes diff between sequences of strings.
495	 *
496	 * @param $from_lines array An array of strings.
497	 *		  (Typically these are lines from a file.)
498	 * @param $to_lines array An array of strings.
499	 */
500	function Diff($from_lines, $to_lines) {
501		$eng = new _DiffEngine;
502		$this->edits = $eng->diff($from_lines, $to_lines);
503		//$this->_check($from_lines, $to_lines);
504	}
505
506	/**
507	 * Compute reversed Diff.
508	 *
509	 * SYNOPSIS:
510	 *
511	 *	$diff = new Diff($lines1, $lines2);
512	 *	$rev = $diff->reverse();
513	 * @return object A Diff object representing the inverse of the
514	 *				  original diff.
515	 */
516	function reverse () {
517	$rev = $this;
518		$rev->edits = array();
519		foreach ($this->edits as $edit) {
520			$rev->edits[] = $edit->reverse();
521		}
522	return $rev;
523	}
524
525	/**
526	 * Check for empty diff.
527	 *
528	 * @return bool True iff two sequences were identical.
529	 */
530	function isEmpty () {
531		foreach ($this->edits as $edit) {
532			if ($edit->type != 'copy')
533				return false;
534		}
535		return true;
536	}
537
538	/**
539	 * Compute the length of the Longest Common Subsequence (LCS).
540	 *
541	 * This is mostly for diagnostic purposed.
542	 *
543	 * @return int The length of the LCS.
544	 */
545	function lcs () {
546	$lcs = 0;
547		foreach ($this->edits as $edit) {
548			if ($edit->type == 'copy')
549				$lcs += sizeof($edit->orig);
550		}
551	return $lcs;
552	}
553
554	/**
555	 * Get the original set of lines.
556	 *
557	 * This reconstructs the $from_lines parameter passed to the
558	 * constructor.
559	 *
560	 * @return array The original sequence of strings.
561	 */
562	function orig() {
563		$lines = array();
564
565		foreach ($this->edits as $edit) {
566			if ($edit->orig)
567				array_splice($lines, sizeof($lines), 0, $edit->orig);
568		}
569		return $lines;
570	}
571
572	/**
573	 * Get the closing set of lines.
574	 *
575	 * This reconstructs the $to_lines parameter passed to the
576	 * constructor.
577	 *
578	 * @return array The sequence of strings.
579	 */
580	function closing() {
581		$lines = array();
582
583		foreach ($this->edits as $edit) {
584			if ($edit->closing)
585				array_splice($lines, sizeof($lines), 0, $edit->closing);
586		}
587		return $lines;
588	}
589
590	/**
591	 * Check a Diff for validity.
592	 *
593	 * This is here only for debugging purposes.
594	 */
595	function _check ($from_lines, $to_lines) {
596		if (serialize($from_lines) != serialize($this->orig()))
597			trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
598		if (serialize($to_lines) != serialize($this->closing()))
599			trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
600
601		$rev = $this->reverse();
602		if (serialize($to_lines) != serialize($rev->orig()))
603			trigger_error("Reversed original doesn't match", E_USER_ERROR);
604		if (serialize($from_lines) != serialize($rev->closing()))
605			trigger_error("Reversed closing doesn't match", E_USER_ERROR);
606
607
608		$prevtype = 'none';
609		foreach ($this->edits as $edit) {
610			if ( $prevtype == $edit->type )
611				trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
612			$prevtype = $edit->type;
613		}
614
615		$lcs = $this->lcs();
616		trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE);
617	}
618}
619
620/**
621 * FIXME: bad name.
622 */
623class MappedDiff
624extends Diff
625{
626	/**
627	 * Constructor.
628	 *
629	 * Computes diff between sequences of strings.
630	 *
631	 * This can be used to compute things like
632	 * case-insensitve diffs, or diffs which ignore
633	 * changes in white-space.
634	 *
635	 * @param $from_lines array An array of strings.
636	 *	(Typically these are lines from a file.)
637	 *
638	 * @param $to_lines array An array of strings.
639	 *
640	 * @param $mapped_from_lines array This array should
641	 *	have the same size number of elements as $from_lines.
642	 *	The elements in $mapped_from_lines and
643	 *	$mapped_to_lines are what is actually compared
644	 *	when computing the diff.
645	 *
646	 * @param $mapped_to_lines array This array should
647	 *	have the same number of elements as $to_lines.
648	 */
649	function MappedDiff($from_lines, $to_lines,
650						$mapped_from_lines, $mapped_to_lines) {
651
652		assert(sizeof($from_lines) == sizeof($mapped_from_lines));
653		assert(sizeof($to_lines) == sizeof($mapped_to_lines));
654
655		$this->Diff($mapped_from_lines, $mapped_to_lines);
656
657		$xi = $yi = 0;
658		for ($i = 0; $i < sizeof($this->edits); $i++) {
659			$orig = &$this->edits[$i]->orig;
660			if (is_array($orig)) {
661				$orig = array_slice($from_lines, $xi, sizeof($orig));
662				$xi += sizeof($orig);
663			}
664
665			$closing = &$this->edits[$i]->closing;
666			if (is_array($closing)) {
667				$closing = array_slice($to_lines, $yi, sizeof($closing));
668				$yi += sizeof($closing);
669			}
670		}
671	}
672}
673
674/**
675 * A class to format Diffs
676 *
677 * This class formats the diff in classic diff format.
678 * It is intended that this class be customized via inheritance,
679 * to obtain fancier outputs.
680 */
681class DiffFormatter
682{
683	/**
684	 * Number of leading context "lines" to preserve.
685	 *
686	 * This should be left at zero for this class, but subclasses
687	 * may want to set this to other values.
688	 */
689	var $leading_context_lines = 0;
690
691	/**
692	 * Number of trailing context "lines" to preserve.
693	 *
694	 * This should be left at zero for this class, but subclasses
695	 * may want to set this to other values.
696	 */
697	var $trailing_context_lines = 0;
698
699	/**
700	 * Format a diff.
701	 *
702	 * @param $diff object A Diff object.
703	 * @return string The formatted output.
704	 */
705	function format($diff) {
706
707		$xi = $yi = 1;
708		$block = false;
709		$context = array();
710
711		$nlead = $this->leading_context_lines;
712		$ntrail = $this->trailing_context_lines;
713
714		$this->_start_diff();
715
716		foreach ($diff->edits as $edit) {
717			if ($edit->type == 'copy') {
718				if (is_array($block)) {
719					if (sizeof($edit->orig) <= $nlead + $ntrail) {
720						$block[] = $edit;
721					}
722					else{
723						if ($ntrail) {
724							$context = array_slice($edit->orig, 0, $ntrail);
725							$block[] = new _DiffOp_Copy($context);
726						}
727						$this->_block($x0, $ntrail + $xi - $x0,
728									  $y0, $ntrail + $yi - $y0,
729									  $block);
730						$block = false;
731					}
732				}
733				$context = $edit->orig;
734			}
735			else {
736				if (! is_array($block)) {
737					$context = array_slice($context, sizeof($context) - $nlead);
738					$x0 = $xi - sizeof($context);
739					$y0 = $yi - sizeof($context);
740					$block = array();
741					if ($context)
742						$block[] = new _DiffOp_Copy($context);
743				}
744				$block[] = $edit;
745			}
746
747			if ($edit->orig)
748				$xi += sizeof($edit->orig);
749			if ($edit->closing)
750				$yi += sizeof($edit->closing);
751		}
752
753		if (is_array($block))
754			$this->_block($x0, $xi - $x0,
755						  $y0, $yi - $y0,
756						  $block);
757
758		return $this->_end_diff();
759	}
760
761	function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
762		$this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
763		foreach ($edits as $edit) {
764			if ($edit->type == 'copy')
765				$this->_context($edit->orig);
766			elseif ($edit->type == 'add')
767				$this->_added($edit->closing);
768			elseif ($edit->type == 'delete')
769				$this->_deleted($edit->orig);
770			elseif ($edit->type == 'change')
771				$this->_changed($edit->orig, $edit->closing);
772			else
773				trigger_error("Unknown edit type", E_USER_ERROR);
774		}
775		$this->_end_block();
776	}
777
778	function _start_diff() {
779		ob_start();
780	}
781
782	function _end_diff() {
783		$val = ob_get_contents();
784		ob_end_clean();
785		return $val;
786	}
787
788	function _block_header($xbeg, $xlen, $ybeg, $ylen) {
789		if ($xlen > 1)
790			$xbeg .= "," . ($xbeg + $xlen - 1);
791		if ($ylen > 1)
792			$ybeg .= "," . ($ybeg + $ylen - 1);
793
794		return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
795	}
796
797	function _start_block($header) {
798		echo $header;
799	}
800
801	function _end_block() {
802	}
803
804	function _lines($lines, $prefix = ' ') {
805		foreach ($lines as $line)
806			echo "$prefix $line\n";
807	}
808
809	function _context($lines) {
810		$this->_lines($lines);
811	}
812
813	function _added($lines) {
814		$this->_lines($lines, ">");
815	}
816	function _deleted($lines) {
817		$this->_lines($lines, "<");
818	}
819
820	function _changed($orig, $closing) {
821		$this->_deleted($orig);
822		echo "---\n";
823		$this->_added($closing);
824	}
825}
826
827
828/**
829 *	Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
830 *
831 */
832
833define('NBSP', "\xA0");			// iso-8859-x non-breaking space.
834
835class _HWLDF_WordAccumulator {
836	function _HWLDF_WordAccumulator () {
837		$this->_lines = array();
838		$this->_line = '';
839		$this->_group = '';
840		$this->_tag = '';
841	}
842
843	function _flushGroup ($new_tag) {
844		if ($this->_group !== '') {
845	  if ($this->_tag == 'mark')
846			$this->_line .= '<span class="diffchange">'.$this->_group.'</span>';
847	  else
848		$this->_line .= $this->_group;
849	}
850		$this->_group = '';
851		$this->_tag = $new_tag;
852	}
853
854	function _flushLine ($new_tag) {
855		$this->_flushGroup($new_tag);
856		if ($this->_line != '')
857			$this->_lines[] = $this->_line;
858		$this->_line = '';
859	}
860
861	function addWords ($words, $tag = '') {
862		if ($tag != $this->_tag)
863			$this->_flushGroup($tag);
864
865		foreach ($words as $word) {
866			// new-line should only come as first char of word.
867			if ($word == '')
868				continue;
869			if ($word[0] == "\n") {
870				$this->_group .= NBSP;
871				$this->_flushLine($tag);
872				$word = substr($word, 1);
873			}
874			assert(!strstr($word, "\n"));
875			$this->_group .= $word;
876		}
877	}
878
879	function getLines() {
880		$this->_flushLine('~done');
881		return $this->_lines;
882	}
883}
884
885class WordLevelDiff extends MappedDiff
886{
887	function WordLevelDiff ($orig_lines, $closing_lines) {
888		list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
889		list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
890
891
892		$this->MappedDiff($orig_words, $closing_words,
893						  $orig_stripped, $closing_stripped);
894	}
895
896	function _split($lines) {
897		// FIXME: fix POSIX char class.
898#		 if (!preg_match_all('/ ( [^\S\n]+ | [[:alnum:]]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
899		if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
900							implode("\n", $lines),
901							$m)) {
902			return array(array(''), array(''));
903		}
904		return array($m[0], $m[1]);
905	}
906
907	function orig () {
908		$orig = new _HWLDF_WordAccumulator;
909
910		foreach ($this->edits as $edit) {
911			if ($edit->type == 'copy')
912				$orig->addWords($edit->orig);
913			elseif ($edit->orig)
914				$orig->addWords($edit->orig, 'mark');
915		}
916		return $orig->getLines();
917	}
918
919	function closing () {
920		$closing = new _HWLDF_WordAccumulator;
921
922		foreach ($this->edits as $edit) {
923			if ($edit->type == 'copy')
924				$closing->addWords($edit->closing);
925			elseif ($edit->closing)
926				$closing->addWords($edit->closing, 'mark');
927		}
928		return $closing->getLines();
929	}
930}
931
932/**
933 * "Unified" diff formatter.
934 *
935 * This class formats the diff in classic "unified diff" format.
936 */
937class UnifiedDiffFormatter extends DiffFormatter
938{
939    function UnifiedDiffFormatter($context_lines = 4) {
940        $this->leading_context_lines = $context_lines;
941        $this->trailing_context_lines = $context_lines;
942    }
943
944    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
945        if ($xlen != 1)
946            $xbeg .= "," . $xlen;
947        if ($ylen != 1)
948            $ybeg .= "," . $ylen;
949        return "@@ -$xbeg +$ybeg @@\n";
950    }
951
952    function _added($lines) {
953        $this->_lines($lines, "+");
954    }
955    function _deleted($lines) {
956        $this->_lines($lines, "-");
957    }
958    function _changed($orig, $final) {
959        $this->_deleted($orig);
960        $this->_added($final);
961    }
962}
963
964/**
965 *	Wikipedia Table style diff formatter.
966 *
967 */
968class TableDiffFormatter extends DiffFormatter
969{
970	function TableDiffFormatter() {
971		$this->leading_context_lines = 2;
972		$this->trailing_context_lines = 2;
973	}
974
975  function _pre($text){
976    $text = htmlspecialchars($text);
977    $text = str_replace('  ',' &nbsp;',$text);
978    return $text;
979  }
980
981  function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
982    global $lang;
983    $l1 = $lang['line'].' '.$xbeg;
984    $l2 = $lang['line'].' '.$ybeg;
985		$r = '<tr><td class="diff-blockheader" colspan="2">'.$l1.":</td>\n" .
986		  '<td class="diff-blockheader" colspan="2">'.$l2.":</td></tr>\n";
987		return $r;
988	}
989
990	function _start_block( $header ) {
991		print( $header );
992	}
993
994	function _end_block() {
995	}
996
997	function _lines( $lines, $prefix=' ', $color="white" ) {
998	}
999
1000	function addedLine( $line ) {
1001    $line = str_replace('  ','&nbsp; ',$line);
1002		return '<td>+</td><td class="diff-addedline">' .
1003		  $line.'</td>';
1004	}
1005
1006	function deletedLine( $line ) {
1007    $line = str_replace('  ','&nbsp; ',$line);
1008		return '<td>-</td><td class="diff-deletedline">' .
1009		  $line.'</td>';
1010	}
1011
1012	function emptyLine() {
1013    $line = str_replace('  ','&nbsp; ',$line);
1014		return '<td colspan="2">&nbsp;</td>';
1015	}
1016
1017	function contextLine( $line ) {
1018    $line = str_replace('  ','&nbsp; ',$line);
1019		return '<td> </td><td class="diff-context">'.$line.'</td>';
1020	}
1021
1022	function _added($lines) {
1023		foreach ($lines as $line) {
1024			print( '<tr>' . $this->emptyLine() .
1025			  $this->addedLine( $line ) . "</tr>\n" );
1026		}
1027	}
1028
1029	function _deleted($lines) {
1030		foreach ($lines as $line) {
1031			print( '<tr>' . $this->deletedLine( $line ) .
1032			  $this->emptyLine() . "</tr>\n" );
1033		}
1034	}
1035
1036	function _context( $lines ) {
1037		foreach ($lines as $line) {
1038			print( '<tr>' . $this->contextLine( $line ) .
1039			  $this->contextLine( $line ) . "</tr>\n" );
1040		}
1041	}
1042
1043	function _changed( $orig, $closing ) {
1044		$diff = new WordLevelDiff( $orig, $closing );
1045		$del = $diff->orig();
1046		$add = $diff->closing();
1047
1048		while ( $line = array_shift( $del ) ) {
1049			$aline = array_shift( $add );
1050			print( '<tr>' . $this->deletedLine( $line ) .
1051			  $this->addedLine( $aline ) . "</tr>\n" );
1052		}
1053		$this->_added( $add ); # If any leftovers
1054	}
1055}
1056
1057?>
1058