xref: /dokuwiki/inc/DifferenceEngine.php (revision f62ea8a1d1cf10eddeae777b11420624e111b7ea)
1<?php
2/**
3 * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
4 *
5 * Additions by Axel Boldt for MediaWiki
6 *
7 * @copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
8 * @license  You may copy this code freely under the conditions of the GPL.
9 */
10define('USE_ASSERTS', function_exists('assert'));
11
12class _DiffOp {
13	var $type;
14	var $orig;
15	var $closing;
16
17	function reverse() {
18		trigger_error("pure virtual", E_USER_ERROR);
19	}
20
21	function norig() {
22		return $this->orig ? sizeof($this->orig) : 0;
23	}
24
25	function nclosing() {
26		return $this->closing ? sizeof($this->closing) : 0;
27	}
28}
29
30class _DiffOp_Copy extends _DiffOp {
31	var $type = 'copy';
32
33	function _DiffOp_Copy ($orig, $closing = false) {
34		if (!is_array($closing))
35			$closing = $orig;
36		$this->orig = $orig;
37		$this->closing = $closing;
38	}
39
40	function reverse() {
41		return new _DiffOp_Copy($this->closing, $this->orig);
42	}
43}
44
45class _DiffOp_Delete extends _DiffOp {
46	var $type = 'delete';
47
48	function _DiffOp_Delete ($lines) {
49		$this->orig = $lines;
50		$this->closing = false;
51	}
52
53	function reverse() {
54		return new _DiffOp_Add($this->orig);
55	}
56}
57
58class _DiffOp_Add extends _DiffOp {
59	var $type = 'add';
60
61	function _DiffOp_Add ($lines) {
62		$this->closing = $lines;
63		$this->orig = false;
64	}
65
66	function reverse() {
67		return new _DiffOp_Delete($this->closing);
68	}
69}
70
71class _DiffOp_Change extends _DiffOp {
72	var $type = 'change';
73
74	function _DiffOp_Change ($orig, $closing) {
75		$this->orig = $orig;
76		$this->closing = $closing;
77	}
78
79	function reverse() {
80		return new _DiffOp_Change($this->closing, $this->orig);
81	}
82}
83
84
85/**
86 * Class used internally by Diff to actually compute the diffs.
87 *
88 * The algorithm used here is mostly lifted from the perl module
89 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
90 *	 http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
91 *
92 * More ideas are taken from:
93 *	 http://www.ics.uci.edu/~eppstein/161/960229.html
94 *
95 * Some ideas are (and a bit of code) are from from analyze.c, from GNU
96 * diffutils-2.7, which can be found at:
97 *	 ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
98 *
99 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
100 * are my own.
101 *
102 * @author Geoffrey T. Dairiki
103 * @access private
104 */
105class _DiffEngine
106{
107	function diff ($from_lines, $to_lines) {
108		$n_from = sizeof($from_lines);
109		$n_to = sizeof($to_lines);
110
111		$this->xchanged = $this->ychanged = array();
112		$this->xv = $this->yv = array();
113		$this->xind = $this->yind = array();
114		unset($this->seq);
115		unset($this->in_seq);
116		unset($this->lcs);
117
118		// Skip leading common lines.
119		for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
120			if ($from_lines[$skip] != $to_lines[$skip])
121				break;
122			$this->xchanged[$skip] = $this->ychanged[$skip] = false;
123		}
124		// Skip trailing common lines.
125		$xi = $n_from; $yi = $n_to;
126		for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
127			if ($from_lines[$xi] != $to_lines[$yi])
128				break;
129			$this->xchanged[$xi] = $this->ychanged[$yi] = false;
130		}
131
132		// Ignore lines which do not exist in both files.
133		for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
134			$xhash[$from_lines[$xi]] = 1;
135		for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
136			$line = $to_lines[$yi];
137			if ( ($this->ychanged[$yi] = empty($xhash[$line])) )
138				continue;
139			$yhash[$line] = 1;
140			$this->yv[] = $line;
141			$this->yind[] = $yi;
142		}
143		for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
144			$line = $from_lines[$xi];
145			if ( ($this->xchanged[$xi] = empty($yhash[$line])) )
146				continue;
147			$this->xv[] = $line;
148			$this->xind[] = $xi;
149		}
150
151		// Find the LCS.
152		$this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
153
154		// Merge edits when possible
155		$this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
156		$this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
157
158		// Compute the edit operations.
159		$edits = array();
160		$xi = $yi = 0;
161		while ($xi < $n_from || $yi < $n_to) {
162			USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
163			USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
164
165			// Skip matching "snake".
166			$copy = array();
167			while ( $xi < $n_from && $yi < $n_to
168					&& !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
169				$copy[] = $from_lines[$xi++];
170				++$yi;
171			}
172			if ($copy)
173				$edits[] = new _DiffOp_Copy($copy);
174
175			// Find deletes & adds.
176			$delete = array();
177			while ($xi < $n_from && $this->xchanged[$xi])
178				$delete[] = $from_lines[$xi++];
179
180			$add = array();
181			while ($yi < $n_to && $this->ychanged[$yi])
182				$add[] = $to_lines[$yi++];
183
184			if ($delete && $add)
185				$edits[] = new _DiffOp_Change($delete, $add);
186			elseif ($delete)
187				$edits[] = new _DiffOp_Delete($delete);
188			elseif ($add)
189				$edits[] = new _DiffOp_Add($add);
190		}
191		return $edits;
192	}
193
194
195	/**
196   * Divide the Largest Common Subsequence (LCS) of the sequences
197	 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
198	 * sized segments.
199	 *
200	 * Returns (LCS, PTS).	LCS is the length of the LCS. PTS is an
201	 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
202	 * sub sequences.  The first sub-sequence is contained in [X0, X1),
203	 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
204	 * that (X0, Y0) == (XOFF, YOFF) and
205	 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
206	 *
207	 * This function assumes that the first lines of the specified portions
208	 * of the two files do not match, and likewise that the last lines do not
209	 * match.  The caller must trim matching lines from the beginning and end
210	 * of the portions it is going to specify.
211	 */
212	function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) {
213	$flip = false;
214
215	if ($xlim - $xoff > $ylim - $yoff) {
216		// Things seems faster (I'm not sure I understand why)
217			// when the shortest sequence in X.
218			$flip = true;
219		list ($xoff, $xlim, $yoff, $ylim)
220		= array( $yoff, $ylim, $xoff, $xlim);
221		}
222
223	if ($flip)
224		for ($i = $ylim - 1; $i >= $yoff; $i--)
225		$ymatches[$this->xv[$i]][] = $i;
226	else
227		for ($i = $ylim - 1; $i >= $yoff; $i--)
228		$ymatches[$this->yv[$i]][] = $i;
229
230	$this->lcs = 0;
231	$this->seq[0]= $yoff - 1;
232	$this->in_seq = array();
233	$ymids[0] = array();
234
235	$numer = $xlim - $xoff + $nchunks - 1;
236	$x = $xoff;
237	for ($chunk = 0; $chunk < $nchunks; $chunk++) {
238		if ($chunk > 0)
239		for ($i = 0; $i <= $this->lcs; $i++)
240			$ymids[$i][$chunk-1] = $this->seq[$i];
241
242		$x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
243		for ( ; $x < $x1; $x++) {
244				$line = $flip ? $this->yv[$x] : $this->xv[$x];
245				if (empty($ymatches[$line]))
246			continue;
247		$matches = $ymatches[$line];
248				reset($matches);
249		while (list ($junk, $y) = each($matches))
250			if (empty($this->in_seq[$y])) {
251			$k = $this->_lcs_pos($y);
252			USE_ASSERTS && assert($k > 0);
253			$ymids[$k] = $ymids[$k-1];
254			break;
255					}
256		while (list ($junk, $y) = each($matches)) {
257			if ($y > $this->seq[$k-1]) {
258			USE_ASSERTS && assert($y < $this->seq[$k]);
259			// Optimization: this is a common case:
260			//	next match is just replacing previous match.
261			$this->in_seq[$this->seq[$k]] = false;
262			$this->seq[$k] = $y;
263			$this->in_seq[$y] = 1;
264					}
265			else if (empty($this->in_seq[$y])) {
266			$k = $this->_lcs_pos($y);
267			USE_ASSERTS && assert($k > 0);
268			$ymids[$k] = $ymids[$k-1];
269					}
270				}
271			}
272		}
273
274	$seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
275	$ymid = $ymids[$this->lcs];
276	for ($n = 0; $n < $nchunks - 1; $n++) {
277		$x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
278		$y1 = $ymid[$n] + 1;
279		$seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
280		}
281	$seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
282
283	return array($this->lcs, $seps);
284	}
285
286	function _lcs_pos ($ypos) {
287	$end = $this->lcs;
288	if ($end == 0 || $ypos > $this->seq[$end]) {
289		$this->seq[++$this->lcs] = $ypos;
290		$this->in_seq[$ypos] = 1;
291		return $this->lcs;
292		}
293
294	$beg = 1;
295	while ($beg < $end) {
296		$mid = (int)(($beg + $end) / 2);
297		if ( $ypos > $this->seq[$mid] )
298		$beg = $mid + 1;
299		else
300		$end = $mid;
301		}
302
303	USE_ASSERTS && assert($ypos != $this->seq[$end]);
304
305	$this->in_seq[$this->seq[$end]] = false;
306	$this->seq[$end] = $ypos;
307	$this->in_seq[$ypos] = 1;
308	return $end;
309	}
310
311	/**
312   * Find LCS of two sequences.
313	 *
314	 * The results are recorded in the vectors $this->{x,y}changed[], by
315	 * storing a 1 in the element for each line that is an insertion
316	 * or deletion (ie. is not in the LCS).
317	 *
318	 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
319	 *
320	 * Note that XLIM, YLIM are exclusive bounds.
321	 * All line numbers are origin-0 and discarded lines are not counted.
322	 */
323	function _compareseq ($xoff, $xlim, $yoff, $ylim) {
324	// Slide down the bottom initial diagonal.
325	while ($xoff < $xlim && $yoff < $ylim
326			   && $this->xv[$xoff] == $this->yv[$yoff]) {
327		++$xoff;
328		++$yoff;
329		}
330
331	// Slide up the top initial diagonal.
332	while ($xlim > $xoff && $ylim > $yoff
333			   && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
334		--$xlim;
335		--$ylim;
336		}
337
338	if ($xoff == $xlim || $yoff == $ylim)
339		$lcs = 0;
340	else {
341		// This is ad hoc but seems to work well.
342		//$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
343		//$nchunks = max(2,min(8,(int)$nchunks));
344		$nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
345		list ($lcs, $seps)
346		= $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
347		}
348
349	if ($lcs == 0) {
350		// X and Y sequences have no common subsequence:
351		// mark all changed.
352		while ($yoff < $ylim)
353		$this->ychanged[$this->yind[$yoff++]] = 1;
354		while ($xoff < $xlim)
355		$this->xchanged[$this->xind[$xoff++]] = 1;
356		}
357	else {
358		// Use the partitions to split this problem into subproblems.
359		reset($seps);
360		$pt1 = $seps[0];
361		while ($pt2 = next($seps)) {
362		$this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
363		$pt1 = $pt2;
364			}
365		}
366	}
367
368	/**
369   * Adjust inserts/deletes of identical lines to join changes
370	 * as much as possible.
371	 *
372	 * We do something when a run of changed lines include a
373	 * line at one end and has an excluded, identical line at the other.
374	 * We are free to choose which identical line is included.
375	 * `compareseq' usually chooses the one at the beginning,
376	 * but usually it is cleaner to consider the following identical line
377	 * to be the "change".
378	 *
379	 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
380	 */
381	function _shift_boundaries ($lines, &$changed, $other_changed) {
382	$i = 0;
383	$j = 0;
384
385	USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
386	$len = sizeof($lines);
387	$other_len = sizeof($other_changed);
388
389	while (1) {
390		/*
391		 * Scan forwards to find beginning of another run of changes.
392		 * Also keep track of the corresponding point in the other file.
393		 *
394		 * Throughout this code, $i and $j are adjusted together so that
395		 * the first $i elements of $changed and the first $j elements
396		 * of $other_changed both contain the same number of zeros
397		 * (unchanged lines).
398		 * Furthermore, $j is always kept so that $j == $other_len or
399		 * $other_changed[$j] == false.
400		 */
401		while ($j < $other_len && $other_changed[$j])
402		$j++;
403
404		while ($i < $len && ! $changed[$i]) {
405		USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
406		$i++; $j++;
407		while ($j < $other_len && $other_changed[$j])
408			$j++;
409			}
410
411		if ($i == $len)
412		break;
413
414		$start = $i;
415
416		// Find the end of this run of changes.
417		while (++$i < $len && $changed[$i])
418		continue;
419
420		do {
421		/*
422		 * Record the length of this run of changes, so that
423		 * we can later determine whether the run has grown.
424		 */
425		$runlength = $i - $start;
426
427		/*
428		 * Move the changed region back, so long as the
429		 * previous unchanged line matches the last changed one.
430		 * This merges with previous changed regions.
431		 */
432		while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
433			$changed[--$start] = 1;
434			$changed[--$i] = false;
435			while ($start > 0 && $changed[$start - 1])
436			$start--;
437			USE_ASSERTS && assert('$j > 0');
438			while ($other_changed[--$j])
439			continue;
440			USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
441				}
442
443		/*
444		 * Set CORRESPONDING to the end of the changed run, at the last
445		 * point where it corresponds to a changed run in the other file.
446		 * CORRESPONDING == LEN means no such point has been found.
447		 */
448		$corresponding = $j < $other_len ? $i : $len;
449
450		/*
451		 * Move the changed region forward, so long as the
452		 * first changed line matches the following unchanged one.
453		 * This merges with following changed regions.
454		 * Do this second, so that if there are no merges,
455		 * the changed region is moved forward as far as possible.
456		 */
457		while ($i < $len && $lines[$start] == $lines[$i]) {
458			$changed[$start++] = false;
459			$changed[$i++] = 1;
460			while ($i < $len && $changed[$i])
461			$i++;
462
463			USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
464			$j++;
465			if ($j < $other_len && $other_changed[$j]) {
466			$corresponding = $i;
467			while ($j < $other_len && $other_changed[$j])
468				$j++;
469					}
470				}
471			} while ($runlength != $i - $start);
472
473		/*
474		 * If possible, move the fully-merged run of changes
475		 * back to a corresponding run in the other file.
476		 */
477		while ($corresponding < $i) {
478		$changed[--$start] = 1;
479		$changed[--$i] = 0;
480		USE_ASSERTS && assert('$j > 0');
481		while ($other_changed[--$j])
482			continue;
483		USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
484			}
485		}
486	}
487}
488
489/**
490 * Class representing a 'diff' between two sequences of strings.
491 */
492class Diff
493{
494	var $edits;
495
496	/**
497	 * Constructor.
498	 * Computes diff between sequences of strings.
499	 *
500	 * @param $from_lines array An array of strings.
501	 *		  (Typically these are lines from a file.)
502	 * @param $to_lines array An array of strings.
503	 */
504	function Diff($from_lines, $to_lines) {
505		$eng = new _DiffEngine;
506		$this->edits = $eng->diff($from_lines, $to_lines);
507		//$this->_check($from_lines, $to_lines);
508	}
509
510	/**
511	 * Compute reversed Diff.
512	 *
513	 * SYNOPSIS:
514	 *
515	 *	$diff = new Diff($lines1, $lines2);
516	 *	$rev = $diff->reverse();
517	 * @return object A Diff object representing the inverse of the
518	 *				  original diff.
519	 */
520	function reverse () {
521	$rev = $this;
522		$rev->edits = array();
523		foreach ($this->edits as $edit) {
524			$rev->edits[] = $edit->reverse();
525		}
526	return $rev;
527	}
528
529	/**
530	 * Check for empty diff.
531	 *
532	 * @return bool True iff two sequences were identical.
533	 */
534	function isEmpty () {
535		foreach ($this->edits as $edit) {
536			if ($edit->type != 'copy')
537				return false;
538		}
539		return true;
540	}
541
542	/**
543	 * Compute the length of the Longest Common Subsequence (LCS).
544	 *
545	 * This is mostly for diagnostic purposed.
546	 *
547	 * @return int The length of the LCS.
548	 */
549	function lcs () {
550	$lcs = 0;
551		foreach ($this->edits as $edit) {
552			if ($edit->type == 'copy')
553				$lcs += sizeof($edit->orig);
554		}
555	return $lcs;
556	}
557
558	/**
559	 * Get the original set of lines.
560	 *
561	 * This reconstructs the $from_lines parameter passed to the
562	 * constructor.
563	 *
564	 * @return array The original sequence of strings.
565	 */
566	function orig() {
567		$lines = array();
568
569		foreach ($this->edits as $edit) {
570			if ($edit->orig)
571				array_splice($lines, sizeof($lines), 0, $edit->orig);
572		}
573		return $lines;
574	}
575
576	/**
577	 * Get the closing set of lines.
578	 *
579	 * This reconstructs the $to_lines parameter passed to the
580	 * constructor.
581	 *
582	 * @return array The sequence of strings.
583	 */
584	function closing() {
585		$lines = array();
586
587		foreach ($this->edits as $edit) {
588			if ($edit->closing)
589				array_splice($lines, sizeof($lines), 0, $edit->closing);
590		}
591		return $lines;
592	}
593
594	/**
595	 * Check a Diff for validity.
596	 *
597	 * This is here only for debugging purposes.
598	 */
599	function _check ($from_lines, $to_lines) {
600		if (serialize($from_lines) != serialize($this->orig()))
601			trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
602		if (serialize($to_lines) != serialize($this->closing()))
603			trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
604
605		$rev = $this->reverse();
606		if (serialize($to_lines) != serialize($rev->orig()))
607			trigger_error("Reversed original doesn't match", E_USER_ERROR);
608		if (serialize($from_lines) != serialize($rev->closing()))
609			trigger_error("Reversed closing doesn't match", E_USER_ERROR);
610
611
612		$prevtype = 'none';
613		foreach ($this->edits as $edit) {
614			if ( $prevtype == $edit->type )
615				trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
616			$prevtype = $edit->type;
617		}
618
619		$lcs = $this->lcs();
620		trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE);
621	}
622}
623
624/**
625 * FIXME: bad name.
626 */
627class MappedDiff
628extends Diff
629{
630	/**
631	 * Constructor.
632	 *
633	 * Computes diff between sequences of strings.
634	 *
635	 * This can be used to compute things like
636	 * case-insensitve diffs, or diffs which ignore
637	 * changes in white-space.
638	 *
639	 * @param $from_lines array An array of strings.
640	 *	(Typically these are lines from a file.)
641	 *
642	 * @param $to_lines array An array of strings.
643	 *
644	 * @param $mapped_from_lines array This array should
645	 *	have the same size number of elements as $from_lines.
646	 *	The elements in $mapped_from_lines and
647	 *	$mapped_to_lines are what is actually compared
648	 *	when computing the diff.
649	 *
650	 * @param $mapped_to_lines array This array should
651	 *	have the same number of elements as $to_lines.
652	 */
653	function MappedDiff($from_lines, $to_lines,
654						$mapped_from_lines, $mapped_to_lines) {
655
656		assert(sizeof($from_lines) == sizeof($mapped_from_lines));
657		assert(sizeof($to_lines) == sizeof($mapped_to_lines));
658
659		$this->Diff($mapped_from_lines, $mapped_to_lines);
660
661		$xi = $yi = 0;
662		for ($i = 0; $i < sizeof($this->edits); $i++) {
663			$orig = &$this->edits[$i]->orig;
664			if (is_array($orig)) {
665				$orig = array_slice($from_lines, $xi, sizeof($orig));
666				$xi += sizeof($orig);
667			}
668
669			$closing = &$this->edits[$i]->closing;
670			if (is_array($closing)) {
671				$closing = array_slice($to_lines, $yi, sizeof($closing));
672				$yi += sizeof($closing);
673			}
674		}
675	}
676}
677
678/**
679 * A class to format Diffs
680 *
681 * This class formats the diff in classic diff format.
682 * It is intended that this class be customized via inheritance,
683 * to obtain fancier outputs.
684 */
685class DiffFormatter
686{
687	/**
688	 * Number of leading context "lines" to preserve.
689	 *
690	 * This should be left at zero for this class, but subclasses
691	 * may want to set this to other values.
692	 */
693	var $leading_context_lines = 0;
694
695	/**
696	 * Number of trailing context "lines" to preserve.
697	 *
698	 * This should be left at zero for this class, but subclasses
699	 * may want to set this to other values.
700	 */
701	var $trailing_context_lines = 0;
702
703	/**
704	 * Format a diff.
705	 *
706	 * @param $diff object A Diff object.
707	 * @return string The formatted output.
708	 */
709	function format($diff) {
710
711		$xi = $yi = 1;
712		$block = false;
713		$context = array();
714
715		$nlead = $this->leading_context_lines;
716		$ntrail = $this->trailing_context_lines;
717
718		$this->_start_diff();
719
720		foreach ($diff->edits as $edit) {
721			if ($edit->type == 'copy') {
722				if (is_array($block)) {
723					if (sizeof($edit->orig) <= $nlead + $ntrail) {
724						$block[] = $edit;
725					}
726					else{
727						if ($ntrail) {
728							$context = array_slice($edit->orig, 0, $ntrail);
729							$block[] = new _DiffOp_Copy($context);
730						}
731						$this->_block($x0, $ntrail + $xi - $x0,
732									  $y0, $ntrail + $yi - $y0,
733									  $block);
734						$block = false;
735					}
736				}
737				$context = $edit->orig;
738			}
739			else {
740				if (! is_array($block)) {
741					$context = array_slice($context, sizeof($context) - $nlead);
742					$x0 = $xi - sizeof($context);
743					$y0 = $yi - sizeof($context);
744					$block = array();
745					if ($context)
746						$block[] = new _DiffOp_Copy($context);
747				}
748				$block[] = $edit;
749			}
750
751			if ($edit->orig)
752				$xi += sizeof($edit->orig);
753			if ($edit->closing)
754				$yi += sizeof($edit->closing);
755		}
756
757		if (is_array($block))
758			$this->_block($x0, $xi - $x0,
759						  $y0, $yi - $y0,
760						  $block);
761
762		return $this->_end_diff();
763	}
764
765	function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
766		$this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
767		foreach ($edits as $edit) {
768			if ($edit->type == 'copy')
769				$this->_context($edit->orig);
770			elseif ($edit->type == 'add')
771				$this->_added($edit->closing);
772			elseif ($edit->type == 'delete')
773				$this->_deleted($edit->orig);
774			elseif ($edit->type == 'change')
775				$this->_changed($edit->orig, $edit->closing);
776			else
777				trigger_error("Unknown edit type", E_USER_ERROR);
778		}
779		$this->_end_block();
780	}
781
782	function _start_diff() {
783		ob_start();
784	}
785
786	function _end_diff() {
787		$val = ob_get_contents();
788		ob_end_clean();
789		return $val;
790	}
791
792	function _block_header($xbeg, $xlen, $ybeg, $ylen) {
793		if ($xlen > 1)
794			$xbeg .= "," . ($xbeg + $xlen - 1);
795		if ($ylen > 1)
796			$ybeg .= "," . ($ybeg + $ylen - 1);
797
798		return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
799	}
800
801	function _start_block($header) {
802		echo $header;
803	}
804
805	function _end_block() {
806	}
807
808	function _lines($lines, $prefix = ' ') {
809		foreach ($lines as $line)
810			echo "$prefix $line\n";
811	}
812
813	function _context($lines) {
814		$this->_lines($lines);
815	}
816
817	function _added($lines) {
818		$this->_lines($lines, ">");
819	}
820	function _deleted($lines) {
821		$this->_lines($lines, "<");
822	}
823
824	function _changed($orig, $closing) {
825		$this->_deleted($orig);
826		echo "---\n";
827		$this->_added($closing);
828	}
829}
830
831
832/**
833 *	Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
834 *
835 */
836
837define('NBSP', "\xA0");			// iso-8859-x non-breaking space.
838
839class _HWLDF_WordAccumulator {
840	function _HWLDF_WordAccumulator () {
841		$this->_lines = array();
842		$this->_line = '';
843		$this->_group = '';
844		$this->_tag = '';
845	}
846
847	function _flushGroup ($new_tag) {
848		if ($this->_group !== '') {
849	  if ($this->_tag == 'mark')
850			$this->_line .= '<span class="diffchange">'.$this->_group.'</span>';
851	  else
852		$this->_line .= $this->_group;
853	}
854		$this->_group = '';
855		$this->_tag = $new_tag;
856	}
857
858	function _flushLine ($new_tag) {
859		$this->_flushGroup($new_tag);
860		if ($this->_line != '')
861			$this->_lines[] = $this->_line;
862		$this->_line = '';
863	}
864
865	function addWords ($words, $tag = '') {
866		if ($tag != $this->_tag)
867			$this->_flushGroup($tag);
868
869		foreach ($words as $word) {
870			// new-line should only come as first char of word.
871			if ($word == '')
872				continue;
873			if ($word[0] == "\n") {
874				$this->_group .= NBSP;
875				$this->_flushLine($tag);
876				$word = substr($word, 1);
877			}
878			assert(!strstr($word, "\n"));
879			$this->_group .= $word;
880		}
881	}
882
883	function getLines() {
884		$this->_flushLine('~done');
885		return $this->_lines;
886	}
887}
888
889class WordLevelDiff extends MappedDiff
890{
891	function WordLevelDiff ($orig_lines, $closing_lines) {
892		list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
893		list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
894
895
896		$this->MappedDiff($orig_words, $closing_words,
897						  $orig_stripped, $closing_stripped);
898	}
899
900	function _split($lines) {
901		// FIXME: fix POSIX char class.
902#		 if (!preg_match_all('/ ( [^\S\n]+ | [[:alnum:]]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
903		if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
904							implode("\n", $lines),
905							$m)) {
906			return array(array(''), array(''));
907		}
908		return array($m[0], $m[1]);
909	}
910
911	function orig () {
912		$orig = new _HWLDF_WordAccumulator;
913
914		foreach ($this->edits as $edit) {
915			if ($edit->type == 'copy')
916				$orig->addWords($edit->orig);
917			elseif ($edit->orig)
918				$orig->addWords($edit->orig, 'mark');
919		}
920		return $orig->getLines();
921	}
922
923	function closing () {
924		$closing = new _HWLDF_WordAccumulator;
925
926		foreach ($this->edits as $edit) {
927			if ($edit->type == 'copy')
928				$closing->addWords($edit->closing);
929			elseif ($edit->closing)
930				$closing->addWords($edit->closing, 'mark');
931		}
932		return $closing->getLines();
933	}
934}
935
936/**
937 * "Unified" diff formatter.
938 *
939 * This class formats the diff in classic "unified diff" format.
940 */
941class UnifiedDiffFormatter extends DiffFormatter
942{
943    function UnifiedDiffFormatter($context_lines = 4) {
944        $this->leading_context_lines = $context_lines;
945        $this->trailing_context_lines = $context_lines;
946    }
947
948    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
949        if ($xlen != 1)
950            $xbeg .= "," . $xlen;
951        if ($ylen != 1)
952            $ybeg .= "," . $ylen;
953        return "@@ -$xbeg +$ybeg @@\n";
954    }
955
956    function _added($lines) {
957        $this->_lines($lines, "+");
958    }
959    function _deleted($lines) {
960        $this->_lines($lines, "-");
961    }
962    function _changed($orig, $final) {
963        $this->_deleted($orig);
964        $this->_added($final);
965    }
966}
967
968/**
969 *	Wikipedia Table style diff formatter.
970 *
971 */
972class TableDiffFormatter extends DiffFormatter
973{
974	function TableDiffFormatter() {
975		$this->leading_context_lines = 2;
976		$this->trailing_context_lines = 2;
977	}
978
979  function _pre($text){
980    $text = htmlspecialchars($text);
981    $text = str_replace('  ',' &nbsp;',$text);
982    return $text;
983  }
984
985  function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
986    global $lang;
987    $l1 = $lang['line'].' '.$xbeg;
988    $l2 = $lang['line'].' '.$ybeg;
989		$r = '<tr><td class="diff-blockheader" colspan="2">'.$l1.":</td>\n" .
990		  '<td class="diff-blockheader" colspan="2">'.$l2.":</td></tr>\n";
991		return $r;
992	}
993
994	function _start_block( $header ) {
995		print( $header );
996	}
997
998	function _end_block() {
999	}
1000
1001	function _lines( $lines, $prefix=' ', $color="white" ) {
1002	}
1003
1004	function addedLine( $line ) {
1005    $line = str_replace('  ','&nbsp; ',$line);
1006		return '<td>+</td><td class="diff-addedline">' .
1007		  $line.'</td>';
1008	}
1009
1010	function deletedLine( $line ) {
1011    $line = str_replace('  ','&nbsp; ',$line);
1012		return '<td>-</td><td class="diff-deletedline">' .
1013		  $line.'</td>';
1014	}
1015
1016	function emptyLine() {
1017    $line = str_replace('  ','&nbsp; ',$line);
1018		return '<td colspan="2">&nbsp;</td>';
1019	}
1020
1021	function contextLine( $line ) {
1022    $line = str_replace('  ','&nbsp; ',$line);
1023		return '<td> </td><td class="diff-context">'.$line.'</td>';
1024	}
1025
1026	function _added($lines) {
1027		foreach ($lines as $line) {
1028			print( '<tr>' . $this->emptyLine() .
1029			  $this->addedLine( $line ) . "</tr>\n" );
1030		}
1031	}
1032
1033	function _deleted($lines) {
1034		foreach ($lines as $line) {
1035			print( '<tr>' . $this->deletedLine( $line ) .
1036			  $this->emptyLine() . "</tr>\n" );
1037		}
1038	}
1039
1040	function _context( $lines ) {
1041		foreach ($lines as $line) {
1042			print( '<tr>' . $this->contextLine( $line ) .
1043			  $this->contextLine( $line ) . "</tr>\n" );
1044		}
1045	}
1046
1047	function _changed( $orig, $closing ) {
1048		$diff = new WordLevelDiff( $orig, $closing );
1049		$del = $diff->orig();
1050		$add = $diff->closing();
1051
1052		while ( $line = array_shift( $del ) ) {
1053			$aline = array_shift( $add );
1054			print( '<tr>' . $this->deletedLine( $line ) .
1055			  $this->addedLine( $aline ) . "</tr>\n" );
1056		}
1057		$this->_added( $add ); # If any leftovers
1058	}
1059}
1060
1061
1062//Setup VIM: ex: et ts=2 enc=utf-8 :
1063