xref: /dokuwiki/inc/DifferenceEngine.php (revision f3f0262c480d7e509b008d37c90aed884532bba8)
1*f3f0262cSandi<?php
2*f3f0262cSandi
3*f3f0262cSandi// A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
4*f3f0262cSandi//
5*f3f0262cSandi// Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
6*f3f0262cSandi// You may copy this code freely under the conditions of the GPL.
7*f3f0262cSandi//
8*f3f0262cSandi
9*f3f0262cSandidefine('USE_ASSERTS', function_exists('assert'));
10*f3f0262cSandi
11*f3f0262cSandiclass _DiffOp {
12*f3f0262cSandi	var $type;
13*f3f0262cSandi	var $orig;
14*f3f0262cSandi	var $closing;
15*f3f0262cSandi
16*f3f0262cSandi	function reverse() {
17*f3f0262cSandi		trigger_error("pure virtual", E_USER_ERROR);
18*f3f0262cSandi	}
19*f3f0262cSandi
20*f3f0262cSandi	function norig() {
21*f3f0262cSandi		return $this->orig ? sizeof($this->orig) : 0;
22*f3f0262cSandi	}
23*f3f0262cSandi
24*f3f0262cSandi	function nclosing() {
25*f3f0262cSandi		return $this->closing ? sizeof($this->closing) : 0;
26*f3f0262cSandi	}
27*f3f0262cSandi}
28*f3f0262cSandi
29*f3f0262cSandiclass _DiffOp_Copy extends _DiffOp {
30*f3f0262cSandi	var $type = 'copy';
31*f3f0262cSandi
32*f3f0262cSandi	function _DiffOp_Copy ($orig, $closing = false) {
33*f3f0262cSandi		if (!is_array($closing))
34*f3f0262cSandi			$closing = $orig;
35*f3f0262cSandi		$this->orig = $orig;
36*f3f0262cSandi		$this->closing = $closing;
37*f3f0262cSandi	}
38*f3f0262cSandi
39*f3f0262cSandi	function reverse() {
40*f3f0262cSandi		return new _DiffOp_Copy($this->closing, $this->orig);
41*f3f0262cSandi	}
42*f3f0262cSandi}
43*f3f0262cSandi
44*f3f0262cSandiclass _DiffOp_Delete extends _DiffOp {
45*f3f0262cSandi	var $type = 'delete';
46*f3f0262cSandi
47*f3f0262cSandi	function _DiffOp_Delete ($lines) {
48*f3f0262cSandi		$this->orig = $lines;
49*f3f0262cSandi		$this->closing = false;
50*f3f0262cSandi	}
51*f3f0262cSandi
52*f3f0262cSandi	function reverse() {
53*f3f0262cSandi		return new _DiffOp_Add($this->orig);
54*f3f0262cSandi	}
55*f3f0262cSandi}
56*f3f0262cSandi
57*f3f0262cSandiclass _DiffOp_Add extends _DiffOp {
58*f3f0262cSandi	var $type = 'add';
59*f3f0262cSandi
60*f3f0262cSandi	function _DiffOp_Add ($lines) {
61*f3f0262cSandi		$this->closing = $lines;
62*f3f0262cSandi		$this->orig = false;
63*f3f0262cSandi	}
64*f3f0262cSandi
65*f3f0262cSandi	function reverse() {
66*f3f0262cSandi		return new _DiffOp_Delete($this->closing);
67*f3f0262cSandi	}
68*f3f0262cSandi}
69*f3f0262cSandi
70*f3f0262cSandiclass _DiffOp_Change extends _DiffOp {
71*f3f0262cSandi	var $type = 'change';
72*f3f0262cSandi
73*f3f0262cSandi	function _DiffOp_Change ($orig, $closing) {
74*f3f0262cSandi		$this->orig = $orig;
75*f3f0262cSandi		$this->closing = $closing;
76*f3f0262cSandi	}
77*f3f0262cSandi
78*f3f0262cSandi	function reverse() {
79*f3f0262cSandi		return new _DiffOp_Change($this->closing, $this->orig);
80*f3f0262cSandi	}
81*f3f0262cSandi}
82*f3f0262cSandi
83*f3f0262cSandi
84*f3f0262cSandi/**
85*f3f0262cSandi * Class used internally by Diff to actually compute the diffs.
86*f3f0262cSandi *
87*f3f0262cSandi * The algorithm used here is mostly lifted from the perl module
88*f3f0262cSandi * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
89*f3f0262cSandi *	 http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
90*f3f0262cSandi *
91*f3f0262cSandi * More ideas are taken from:
92*f3f0262cSandi *	 http://www.ics.uci.edu/~eppstein/161/960229.html
93*f3f0262cSandi *
94*f3f0262cSandi * Some ideas are (and a bit of code) are from from analyze.c, from GNU
95*f3f0262cSandi * diffutils-2.7, which can be found at:
96*f3f0262cSandi *	 ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
97*f3f0262cSandi *
98*f3f0262cSandi * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
99*f3f0262cSandi * are my own.
100*f3f0262cSandi *
101*f3f0262cSandi * @author Geoffrey T. Dairiki
102*f3f0262cSandi * @access private
103*f3f0262cSandi */
104*f3f0262cSandiclass _DiffEngine
105*f3f0262cSandi{
106*f3f0262cSandi	function diff ($from_lines, $to_lines) {
107*f3f0262cSandi		$n_from = sizeof($from_lines);
108*f3f0262cSandi		$n_to = sizeof($to_lines);
109*f3f0262cSandi
110*f3f0262cSandi		$this->xchanged = $this->ychanged = array();
111*f3f0262cSandi		$this->xv = $this->yv = array();
112*f3f0262cSandi		$this->xind = $this->yind = array();
113*f3f0262cSandi		unset($this->seq);
114*f3f0262cSandi		unset($this->in_seq);
115*f3f0262cSandi		unset($this->lcs);
116*f3f0262cSandi
117*f3f0262cSandi		// Skip leading common lines.
118*f3f0262cSandi		for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
119*f3f0262cSandi			if ($from_lines[$skip] != $to_lines[$skip])
120*f3f0262cSandi				break;
121*f3f0262cSandi			$this->xchanged[$skip] = $this->ychanged[$skip] = false;
122*f3f0262cSandi		}
123*f3f0262cSandi		// Skip trailing common lines.
124*f3f0262cSandi		$xi = $n_from; $yi = $n_to;
125*f3f0262cSandi		for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
126*f3f0262cSandi			if ($from_lines[$xi] != $to_lines[$yi])
127*f3f0262cSandi				break;
128*f3f0262cSandi			$this->xchanged[$xi] = $this->ychanged[$yi] = false;
129*f3f0262cSandi		}
130*f3f0262cSandi
131*f3f0262cSandi		// Ignore lines which do not exist in both files.
132*f3f0262cSandi		for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
133*f3f0262cSandi			$xhash[$from_lines[$xi]] = 1;
134*f3f0262cSandi		for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
135*f3f0262cSandi			$line = $to_lines[$yi];
136*f3f0262cSandi			if ( ($this->ychanged[$yi] = empty($xhash[$line])) )
137*f3f0262cSandi				continue;
138*f3f0262cSandi			$yhash[$line] = 1;
139*f3f0262cSandi			$this->yv[] = $line;
140*f3f0262cSandi			$this->yind[] = $yi;
141*f3f0262cSandi		}
142*f3f0262cSandi		for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
143*f3f0262cSandi			$line = $from_lines[$xi];
144*f3f0262cSandi			if ( ($this->xchanged[$xi] = empty($yhash[$line])) )
145*f3f0262cSandi				continue;
146*f3f0262cSandi			$this->xv[] = $line;
147*f3f0262cSandi			$this->xind[] = $xi;
148*f3f0262cSandi		}
149*f3f0262cSandi
150*f3f0262cSandi		// Find the LCS.
151*f3f0262cSandi		$this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
152*f3f0262cSandi
153*f3f0262cSandi		// Merge edits when possible
154*f3f0262cSandi		$this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
155*f3f0262cSandi		$this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
156*f3f0262cSandi
157*f3f0262cSandi		// Compute the edit operations.
158*f3f0262cSandi		$edits = array();
159*f3f0262cSandi		$xi = $yi = 0;
160*f3f0262cSandi		while ($xi < $n_from || $yi < $n_to) {
161*f3f0262cSandi			USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
162*f3f0262cSandi			USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
163*f3f0262cSandi
164*f3f0262cSandi			// Skip matching "snake".
165*f3f0262cSandi			$copy = array();
166*f3f0262cSandi			while ( $xi < $n_from && $yi < $n_to
167*f3f0262cSandi					&& !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
168*f3f0262cSandi				$copy[] = $from_lines[$xi++];
169*f3f0262cSandi				++$yi;
170*f3f0262cSandi			}
171*f3f0262cSandi			if ($copy)
172*f3f0262cSandi				$edits[] = new _DiffOp_Copy($copy);
173*f3f0262cSandi
174*f3f0262cSandi			// Find deletes & adds.
175*f3f0262cSandi			$delete = array();
176*f3f0262cSandi			while ($xi < $n_from && $this->xchanged[$xi])
177*f3f0262cSandi				$delete[] = $from_lines[$xi++];
178*f3f0262cSandi
179*f3f0262cSandi			$add = array();
180*f3f0262cSandi			while ($yi < $n_to && $this->ychanged[$yi])
181*f3f0262cSandi				$add[] = $to_lines[$yi++];
182*f3f0262cSandi
183*f3f0262cSandi			if ($delete && $add)
184*f3f0262cSandi				$edits[] = new _DiffOp_Change($delete, $add);
185*f3f0262cSandi			elseif ($delete)
186*f3f0262cSandi				$edits[] = new _DiffOp_Delete($delete);
187*f3f0262cSandi			elseif ($add)
188*f3f0262cSandi				$edits[] = new _DiffOp_Add($add);
189*f3f0262cSandi		}
190*f3f0262cSandi		return $edits;
191*f3f0262cSandi	}
192*f3f0262cSandi
193*f3f0262cSandi
194*f3f0262cSandi	/* Divide the Largest Common Subsequence (LCS) of the sequences
195*f3f0262cSandi	 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
196*f3f0262cSandi	 * sized segments.
197*f3f0262cSandi	 *
198*f3f0262cSandi	 * Returns (LCS, PTS).	LCS is the length of the LCS. PTS is an
199*f3f0262cSandi	 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
200*f3f0262cSandi	 * sub sequences.  The first sub-sequence is contained in [X0, X1),
201*f3f0262cSandi	 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
202*f3f0262cSandi	 * that (X0, Y0) == (XOFF, YOFF) and
203*f3f0262cSandi	 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
204*f3f0262cSandi	 *
205*f3f0262cSandi	 * This function assumes that the first lines of the specified portions
206*f3f0262cSandi	 * of the two files do not match, and likewise that the last lines do not
207*f3f0262cSandi	 * match.  The caller must trim matching lines from the beginning and end
208*f3f0262cSandi	 * of the portions it is going to specify.
209*f3f0262cSandi	 */
210*f3f0262cSandi	function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) {
211*f3f0262cSandi	$flip = false;
212*f3f0262cSandi
213*f3f0262cSandi	if ($xlim - $xoff > $ylim - $yoff) {
214*f3f0262cSandi		// Things seems faster (I'm not sure I understand why)
215*f3f0262cSandi			// when the shortest sequence in X.
216*f3f0262cSandi			$flip = true;
217*f3f0262cSandi		list ($xoff, $xlim, $yoff, $ylim)
218*f3f0262cSandi		= array( $yoff, $ylim, $xoff, $xlim);
219*f3f0262cSandi		}
220*f3f0262cSandi
221*f3f0262cSandi	if ($flip)
222*f3f0262cSandi		for ($i = $ylim - 1; $i >= $yoff; $i--)
223*f3f0262cSandi		$ymatches[$this->xv[$i]][] = $i;
224*f3f0262cSandi	else
225*f3f0262cSandi		for ($i = $ylim - 1; $i >= $yoff; $i--)
226*f3f0262cSandi		$ymatches[$this->yv[$i]][] = $i;
227*f3f0262cSandi
228*f3f0262cSandi	$this->lcs = 0;
229*f3f0262cSandi	$this->seq[0]= $yoff - 1;
230*f3f0262cSandi	$this->in_seq = array();
231*f3f0262cSandi	$ymids[0] = array();
232*f3f0262cSandi
233*f3f0262cSandi	$numer = $xlim - $xoff + $nchunks - 1;
234*f3f0262cSandi	$x = $xoff;
235*f3f0262cSandi	for ($chunk = 0; $chunk < $nchunks; $chunk++) {
236*f3f0262cSandi		if ($chunk > 0)
237*f3f0262cSandi		for ($i = 0; $i <= $this->lcs; $i++)
238*f3f0262cSandi			$ymids[$i][$chunk-1] = $this->seq[$i];
239*f3f0262cSandi
240*f3f0262cSandi		$x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
241*f3f0262cSandi		for ( ; $x < $x1; $x++) {
242*f3f0262cSandi				$line = $flip ? $this->yv[$x] : $this->xv[$x];
243*f3f0262cSandi				if (empty($ymatches[$line]))
244*f3f0262cSandi			continue;
245*f3f0262cSandi		$matches = $ymatches[$line];
246*f3f0262cSandi				reset($matches);
247*f3f0262cSandi		while (list ($junk, $y) = each($matches))
248*f3f0262cSandi			if (empty($this->in_seq[$y])) {
249*f3f0262cSandi			$k = $this->_lcs_pos($y);
250*f3f0262cSandi			USE_ASSERTS && assert($k > 0);
251*f3f0262cSandi			$ymids[$k] = $ymids[$k-1];
252*f3f0262cSandi			break;
253*f3f0262cSandi					}
254*f3f0262cSandi		while (list ($junk, $y) = each($matches)) {
255*f3f0262cSandi			if ($y > $this->seq[$k-1]) {
256*f3f0262cSandi			USE_ASSERTS && assert($y < $this->seq[$k]);
257*f3f0262cSandi			// Optimization: this is a common case:
258*f3f0262cSandi			//	next match is just replacing previous match.
259*f3f0262cSandi			$this->in_seq[$this->seq[$k]] = false;
260*f3f0262cSandi			$this->seq[$k] = $y;
261*f3f0262cSandi			$this->in_seq[$y] = 1;
262*f3f0262cSandi					}
263*f3f0262cSandi			else if (empty($this->in_seq[$y])) {
264*f3f0262cSandi			$k = $this->_lcs_pos($y);
265*f3f0262cSandi			USE_ASSERTS && assert($k > 0);
266*f3f0262cSandi			$ymids[$k] = $ymids[$k-1];
267*f3f0262cSandi					}
268*f3f0262cSandi				}
269*f3f0262cSandi			}
270*f3f0262cSandi		}
271*f3f0262cSandi
272*f3f0262cSandi	$seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
273*f3f0262cSandi	$ymid = $ymids[$this->lcs];
274*f3f0262cSandi	for ($n = 0; $n < $nchunks - 1; $n++) {
275*f3f0262cSandi		$x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
276*f3f0262cSandi		$y1 = $ymid[$n] + 1;
277*f3f0262cSandi		$seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
278*f3f0262cSandi		}
279*f3f0262cSandi	$seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
280*f3f0262cSandi
281*f3f0262cSandi	return array($this->lcs, $seps);
282*f3f0262cSandi	}
283*f3f0262cSandi
284*f3f0262cSandi	function _lcs_pos ($ypos) {
285*f3f0262cSandi	$end = $this->lcs;
286*f3f0262cSandi	if ($end == 0 || $ypos > $this->seq[$end]) {
287*f3f0262cSandi		$this->seq[++$this->lcs] = $ypos;
288*f3f0262cSandi		$this->in_seq[$ypos] = 1;
289*f3f0262cSandi		return $this->lcs;
290*f3f0262cSandi		}
291*f3f0262cSandi
292*f3f0262cSandi	$beg = 1;
293*f3f0262cSandi	while ($beg < $end) {
294*f3f0262cSandi		$mid = (int)(($beg + $end) / 2);
295*f3f0262cSandi		if ( $ypos > $this->seq[$mid] )
296*f3f0262cSandi		$beg = $mid + 1;
297*f3f0262cSandi		else
298*f3f0262cSandi		$end = $mid;
299*f3f0262cSandi		}
300*f3f0262cSandi
301*f3f0262cSandi	USE_ASSERTS && assert($ypos != $this->seq[$end]);
302*f3f0262cSandi
303*f3f0262cSandi	$this->in_seq[$this->seq[$end]] = false;
304*f3f0262cSandi	$this->seq[$end] = $ypos;
305*f3f0262cSandi	$this->in_seq[$ypos] = 1;
306*f3f0262cSandi	return $end;
307*f3f0262cSandi	}
308*f3f0262cSandi
309*f3f0262cSandi	/* Find LCS of two sequences.
310*f3f0262cSandi	 *
311*f3f0262cSandi	 * The results are recorded in the vectors $this->{x,y}changed[], by
312*f3f0262cSandi	 * storing a 1 in the element for each line that is an insertion
313*f3f0262cSandi	 * or deletion (ie. is not in the LCS).
314*f3f0262cSandi	 *
315*f3f0262cSandi	 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
316*f3f0262cSandi	 *
317*f3f0262cSandi	 * Note that XLIM, YLIM are exclusive bounds.
318*f3f0262cSandi	 * All line numbers are origin-0 and discarded lines are not counted.
319*f3f0262cSandi	 */
320*f3f0262cSandi	function _compareseq ($xoff, $xlim, $yoff, $ylim) {
321*f3f0262cSandi	// Slide down the bottom initial diagonal.
322*f3f0262cSandi	while ($xoff < $xlim && $yoff < $ylim
323*f3f0262cSandi			   && $this->xv[$xoff] == $this->yv[$yoff]) {
324*f3f0262cSandi		++$xoff;
325*f3f0262cSandi		++$yoff;
326*f3f0262cSandi		}
327*f3f0262cSandi
328*f3f0262cSandi	// Slide up the top initial diagonal.
329*f3f0262cSandi	while ($xlim > $xoff && $ylim > $yoff
330*f3f0262cSandi			   && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
331*f3f0262cSandi		--$xlim;
332*f3f0262cSandi		--$ylim;
333*f3f0262cSandi		}
334*f3f0262cSandi
335*f3f0262cSandi	if ($xoff == $xlim || $yoff == $ylim)
336*f3f0262cSandi		$lcs = 0;
337*f3f0262cSandi	else {
338*f3f0262cSandi		// This is ad hoc but seems to work well.
339*f3f0262cSandi		//$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
340*f3f0262cSandi		//$nchunks = max(2,min(8,(int)$nchunks));
341*f3f0262cSandi		$nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
342*f3f0262cSandi		list ($lcs, $seps)
343*f3f0262cSandi		= $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
344*f3f0262cSandi		}
345*f3f0262cSandi
346*f3f0262cSandi	if ($lcs == 0) {
347*f3f0262cSandi		// X and Y sequences have no common subsequence:
348*f3f0262cSandi		// mark all changed.
349*f3f0262cSandi		while ($yoff < $ylim)
350*f3f0262cSandi		$this->ychanged[$this->yind[$yoff++]] = 1;
351*f3f0262cSandi		while ($xoff < $xlim)
352*f3f0262cSandi		$this->xchanged[$this->xind[$xoff++]] = 1;
353*f3f0262cSandi		}
354*f3f0262cSandi	else {
355*f3f0262cSandi		// Use the partitions to split this problem into subproblems.
356*f3f0262cSandi		reset($seps);
357*f3f0262cSandi		$pt1 = $seps[0];
358*f3f0262cSandi		while ($pt2 = next($seps)) {
359*f3f0262cSandi		$this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
360*f3f0262cSandi		$pt1 = $pt2;
361*f3f0262cSandi			}
362*f3f0262cSandi		}
363*f3f0262cSandi	}
364*f3f0262cSandi
365*f3f0262cSandi	/* Adjust inserts/deletes of identical lines to join changes
366*f3f0262cSandi	 * as much as possible.
367*f3f0262cSandi	 *
368*f3f0262cSandi	 * We do something when a run of changed lines include a
369*f3f0262cSandi	 * line at one end and has an excluded, identical line at the other.
370*f3f0262cSandi	 * We are free to choose which identical line is included.
371*f3f0262cSandi	 * `compareseq' usually chooses the one at the beginning,
372*f3f0262cSandi	 * but usually it is cleaner to consider the following identical line
373*f3f0262cSandi	 * to be the "change".
374*f3f0262cSandi	 *
375*f3f0262cSandi	 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
376*f3f0262cSandi	 */
377*f3f0262cSandi	function _shift_boundaries ($lines, &$changed, $other_changed) {
378*f3f0262cSandi	$i = 0;
379*f3f0262cSandi	$j = 0;
380*f3f0262cSandi
381*f3f0262cSandi	USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
382*f3f0262cSandi	$len = sizeof($lines);
383*f3f0262cSandi	$other_len = sizeof($other_changed);
384*f3f0262cSandi
385*f3f0262cSandi	while (1) {
386*f3f0262cSandi		/*
387*f3f0262cSandi		 * Scan forwards to find beginning of another run of changes.
388*f3f0262cSandi		 * Also keep track of the corresponding point in the other file.
389*f3f0262cSandi		 *
390*f3f0262cSandi		 * Throughout this code, $i and $j are adjusted together so that
391*f3f0262cSandi		 * the first $i elements of $changed and the first $j elements
392*f3f0262cSandi		 * of $other_changed both contain the same number of zeros
393*f3f0262cSandi		 * (unchanged lines).
394*f3f0262cSandi		 * Furthermore, $j is always kept so that $j == $other_len or
395*f3f0262cSandi		 * $other_changed[$j] == false.
396*f3f0262cSandi		 */
397*f3f0262cSandi		while ($j < $other_len && $other_changed[$j])
398*f3f0262cSandi		$j++;
399*f3f0262cSandi
400*f3f0262cSandi		while ($i < $len && ! $changed[$i]) {
401*f3f0262cSandi		USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
402*f3f0262cSandi		$i++; $j++;
403*f3f0262cSandi		while ($j < $other_len && $other_changed[$j])
404*f3f0262cSandi			$j++;
405*f3f0262cSandi			}
406*f3f0262cSandi
407*f3f0262cSandi		if ($i == $len)
408*f3f0262cSandi		break;
409*f3f0262cSandi
410*f3f0262cSandi		$start = $i;
411*f3f0262cSandi
412*f3f0262cSandi		// Find the end of this run of changes.
413*f3f0262cSandi		while (++$i < $len && $changed[$i])
414*f3f0262cSandi		continue;
415*f3f0262cSandi
416*f3f0262cSandi		do {
417*f3f0262cSandi		/*
418*f3f0262cSandi		 * Record the length of this run of changes, so that
419*f3f0262cSandi		 * we can later determine whether the run has grown.
420*f3f0262cSandi		 */
421*f3f0262cSandi		$runlength = $i - $start;
422*f3f0262cSandi
423*f3f0262cSandi		/*
424*f3f0262cSandi		 * Move the changed region back, so long as the
425*f3f0262cSandi		 * previous unchanged line matches the last changed one.
426*f3f0262cSandi		 * This merges with previous changed regions.
427*f3f0262cSandi		 */
428*f3f0262cSandi		while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
429*f3f0262cSandi			$changed[--$start] = 1;
430*f3f0262cSandi			$changed[--$i] = false;
431*f3f0262cSandi			while ($start > 0 && $changed[$start - 1])
432*f3f0262cSandi			$start--;
433*f3f0262cSandi			USE_ASSERTS && assert('$j > 0');
434*f3f0262cSandi			while ($other_changed[--$j])
435*f3f0262cSandi			continue;
436*f3f0262cSandi			USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
437*f3f0262cSandi				}
438*f3f0262cSandi
439*f3f0262cSandi		/*
440*f3f0262cSandi		 * Set CORRESPONDING to the end of the changed run, at the last
441*f3f0262cSandi		 * point where it corresponds to a changed run in the other file.
442*f3f0262cSandi		 * CORRESPONDING == LEN means no such point has been found.
443*f3f0262cSandi		 */
444*f3f0262cSandi		$corresponding = $j < $other_len ? $i : $len;
445*f3f0262cSandi
446*f3f0262cSandi		/*
447*f3f0262cSandi		 * Move the changed region forward, so long as the
448*f3f0262cSandi		 * first changed line matches the following unchanged one.
449*f3f0262cSandi		 * This merges with following changed regions.
450*f3f0262cSandi		 * Do this second, so that if there are no merges,
451*f3f0262cSandi		 * the changed region is moved forward as far as possible.
452*f3f0262cSandi		 */
453*f3f0262cSandi		while ($i < $len && $lines[$start] == $lines[$i]) {
454*f3f0262cSandi			$changed[$start++] = false;
455*f3f0262cSandi			$changed[$i++] = 1;
456*f3f0262cSandi			while ($i < $len && $changed[$i])
457*f3f0262cSandi			$i++;
458*f3f0262cSandi
459*f3f0262cSandi			USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
460*f3f0262cSandi			$j++;
461*f3f0262cSandi			if ($j < $other_len && $other_changed[$j]) {
462*f3f0262cSandi			$corresponding = $i;
463*f3f0262cSandi			while ($j < $other_len && $other_changed[$j])
464*f3f0262cSandi				$j++;
465*f3f0262cSandi					}
466*f3f0262cSandi				}
467*f3f0262cSandi			} while ($runlength != $i - $start);
468*f3f0262cSandi
469*f3f0262cSandi		/*
470*f3f0262cSandi		 * If possible, move the fully-merged run of changes
471*f3f0262cSandi		 * back to a corresponding run in the other file.
472*f3f0262cSandi		 */
473*f3f0262cSandi		while ($corresponding < $i) {
474*f3f0262cSandi		$changed[--$start] = 1;
475*f3f0262cSandi		$changed[--$i] = 0;
476*f3f0262cSandi		USE_ASSERTS && assert('$j > 0');
477*f3f0262cSandi		while ($other_changed[--$j])
478*f3f0262cSandi			continue;
479*f3f0262cSandi		USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
480*f3f0262cSandi			}
481*f3f0262cSandi		}
482*f3f0262cSandi	}
483*f3f0262cSandi}
484*f3f0262cSandi
485*f3f0262cSandi/**
486*f3f0262cSandi * Class representing a 'diff' between two sequences of strings.
487*f3f0262cSandi */
488*f3f0262cSandiclass Diff
489*f3f0262cSandi{
490*f3f0262cSandi	var $edits;
491*f3f0262cSandi
492*f3f0262cSandi	/**
493*f3f0262cSandi	 * Constructor.
494*f3f0262cSandi	 * Computes diff between sequences of strings.
495*f3f0262cSandi	 *
496*f3f0262cSandi	 * @param $from_lines array An array of strings.
497*f3f0262cSandi	 *		  (Typically these are lines from a file.)
498*f3f0262cSandi	 * @param $to_lines array An array of strings.
499*f3f0262cSandi	 */
500*f3f0262cSandi	function Diff($from_lines, $to_lines) {
501*f3f0262cSandi		$eng = new _DiffEngine;
502*f3f0262cSandi		$this->edits = $eng->diff($from_lines, $to_lines);
503*f3f0262cSandi		//$this->_check($from_lines, $to_lines);
504*f3f0262cSandi	}
505*f3f0262cSandi
506*f3f0262cSandi	/**
507*f3f0262cSandi	 * Compute reversed Diff.
508*f3f0262cSandi	 *
509*f3f0262cSandi	 * SYNOPSIS:
510*f3f0262cSandi	 *
511*f3f0262cSandi	 *	$diff = new Diff($lines1, $lines2);
512*f3f0262cSandi	 *	$rev = $diff->reverse();
513*f3f0262cSandi	 * @return object A Diff object representing the inverse of the
514*f3f0262cSandi	 *				  original diff.
515*f3f0262cSandi	 */
516*f3f0262cSandi	function reverse () {
517*f3f0262cSandi	$rev = $this;
518*f3f0262cSandi		$rev->edits = array();
519*f3f0262cSandi		foreach ($this->edits as $edit) {
520*f3f0262cSandi			$rev->edits[] = $edit->reverse();
521*f3f0262cSandi		}
522*f3f0262cSandi	return $rev;
523*f3f0262cSandi	}
524*f3f0262cSandi
525*f3f0262cSandi	/**
526*f3f0262cSandi	 * Check for empty diff.
527*f3f0262cSandi	 *
528*f3f0262cSandi	 * @return bool True iff two sequences were identical.
529*f3f0262cSandi	 */
530*f3f0262cSandi	function isEmpty () {
531*f3f0262cSandi		foreach ($this->edits as $edit) {
532*f3f0262cSandi			if ($edit->type != 'copy')
533*f3f0262cSandi				return false;
534*f3f0262cSandi		}
535*f3f0262cSandi		return true;
536*f3f0262cSandi	}
537*f3f0262cSandi
538*f3f0262cSandi	/**
539*f3f0262cSandi	 * Compute the length of the Longest Common Subsequence (LCS).
540*f3f0262cSandi	 *
541*f3f0262cSandi	 * This is mostly for diagnostic purposed.
542*f3f0262cSandi	 *
543*f3f0262cSandi	 * @return int The length of the LCS.
544*f3f0262cSandi	 */
545*f3f0262cSandi	function lcs () {
546*f3f0262cSandi	$lcs = 0;
547*f3f0262cSandi		foreach ($this->edits as $edit) {
548*f3f0262cSandi			if ($edit->type == 'copy')
549*f3f0262cSandi				$lcs += sizeof($edit->orig);
550*f3f0262cSandi		}
551*f3f0262cSandi	return $lcs;
552*f3f0262cSandi	}
553*f3f0262cSandi
554*f3f0262cSandi	/**
555*f3f0262cSandi	 * Get the original set of lines.
556*f3f0262cSandi	 *
557*f3f0262cSandi	 * This reconstructs the $from_lines parameter passed to the
558*f3f0262cSandi	 * constructor.
559*f3f0262cSandi	 *
560*f3f0262cSandi	 * @return array The original sequence of strings.
561*f3f0262cSandi	 */
562*f3f0262cSandi	function orig() {
563*f3f0262cSandi		$lines = array();
564*f3f0262cSandi
565*f3f0262cSandi		foreach ($this->edits as $edit) {
566*f3f0262cSandi			if ($edit->orig)
567*f3f0262cSandi				array_splice($lines, sizeof($lines), 0, $edit->orig);
568*f3f0262cSandi		}
569*f3f0262cSandi		return $lines;
570*f3f0262cSandi	}
571*f3f0262cSandi
572*f3f0262cSandi	/**
573*f3f0262cSandi	 * Get the closing set of lines.
574*f3f0262cSandi	 *
575*f3f0262cSandi	 * This reconstructs the $to_lines parameter passed to the
576*f3f0262cSandi	 * constructor.
577*f3f0262cSandi	 *
578*f3f0262cSandi	 * @return array The sequence of strings.
579*f3f0262cSandi	 */
580*f3f0262cSandi	function closing() {
581*f3f0262cSandi		$lines = array();
582*f3f0262cSandi
583*f3f0262cSandi		foreach ($this->edits as $edit) {
584*f3f0262cSandi			if ($edit->closing)
585*f3f0262cSandi				array_splice($lines, sizeof($lines), 0, $edit->closing);
586*f3f0262cSandi		}
587*f3f0262cSandi		return $lines;
588*f3f0262cSandi	}
589*f3f0262cSandi
590*f3f0262cSandi	/**
591*f3f0262cSandi	 * Check a Diff for validity.
592*f3f0262cSandi	 *
593*f3f0262cSandi	 * This is here only for debugging purposes.
594*f3f0262cSandi	 */
595*f3f0262cSandi	function _check ($from_lines, $to_lines) {
596*f3f0262cSandi		if (serialize($from_lines) != serialize($this->orig()))
597*f3f0262cSandi			trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
598*f3f0262cSandi		if (serialize($to_lines) != serialize($this->closing()))
599*f3f0262cSandi			trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
600*f3f0262cSandi
601*f3f0262cSandi		$rev = $this->reverse();
602*f3f0262cSandi		if (serialize($to_lines) != serialize($rev->orig()))
603*f3f0262cSandi			trigger_error("Reversed original doesn't match", E_USER_ERROR);
604*f3f0262cSandi		if (serialize($from_lines) != serialize($rev->closing()))
605*f3f0262cSandi			trigger_error("Reversed closing doesn't match", E_USER_ERROR);
606*f3f0262cSandi
607*f3f0262cSandi
608*f3f0262cSandi		$prevtype = 'none';
609*f3f0262cSandi		foreach ($this->edits as $edit) {
610*f3f0262cSandi			if ( $prevtype == $edit->type )
611*f3f0262cSandi				trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
612*f3f0262cSandi			$prevtype = $edit->type;
613*f3f0262cSandi		}
614*f3f0262cSandi
615*f3f0262cSandi		$lcs = $this->lcs();
616*f3f0262cSandi		trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE);
617*f3f0262cSandi	}
618*f3f0262cSandi}
619*f3f0262cSandi
620*f3f0262cSandi/**
621*f3f0262cSandi * FIXME: bad name.
622*f3f0262cSandi */
623*f3f0262cSandiclass MappedDiff
624*f3f0262cSandiextends Diff
625*f3f0262cSandi{
626*f3f0262cSandi	/**
627*f3f0262cSandi	 * Constructor.
628*f3f0262cSandi	 *
629*f3f0262cSandi	 * Computes diff between sequences of strings.
630*f3f0262cSandi	 *
631*f3f0262cSandi	 * This can be used to compute things like
632*f3f0262cSandi	 * case-insensitve diffs, or diffs which ignore
633*f3f0262cSandi	 * changes in white-space.
634*f3f0262cSandi	 *
635*f3f0262cSandi	 * @param $from_lines array An array of strings.
636*f3f0262cSandi	 *	(Typically these are lines from a file.)
637*f3f0262cSandi	 *
638*f3f0262cSandi	 * @param $to_lines array An array of strings.
639*f3f0262cSandi	 *
640*f3f0262cSandi	 * @param $mapped_from_lines array This array should
641*f3f0262cSandi	 *	have the same size number of elements as $from_lines.
642*f3f0262cSandi	 *	The elements in $mapped_from_lines and
643*f3f0262cSandi	 *	$mapped_to_lines are what is actually compared
644*f3f0262cSandi	 *	when computing the diff.
645*f3f0262cSandi	 *
646*f3f0262cSandi	 * @param $mapped_to_lines array This array should
647*f3f0262cSandi	 *	have the same number of elements as $to_lines.
648*f3f0262cSandi	 */
649*f3f0262cSandi	function MappedDiff($from_lines, $to_lines,
650*f3f0262cSandi						$mapped_from_lines, $mapped_to_lines) {
651*f3f0262cSandi
652*f3f0262cSandi		assert(sizeof($from_lines) == sizeof($mapped_from_lines));
653*f3f0262cSandi		assert(sizeof($to_lines) == sizeof($mapped_to_lines));
654*f3f0262cSandi
655*f3f0262cSandi		$this->Diff($mapped_from_lines, $mapped_to_lines);
656*f3f0262cSandi
657*f3f0262cSandi		$xi = $yi = 0;
658*f3f0262cSandi		for ($i = 0; $i < sizeof($this->edits); $i++) {
659*f3f0262cSandi			$orig = &$this->edits[$i]->orig;
660*f3f0262cSandi			if (is_array($orig)) {
661*f3f0262cSandi				$orig = array_slice($from_lines, $xi, sizeof($orig));
662*f3f0262cSandi				$xi += sizeof($orig);
663*f3f0262cSandi			}
664*f3f0262cSandi
665*f3f0262cSandi			$closing = &$this->edits[$i]->closing;
666*f3f0262cSandi			if (is_array($closing)) {
667*f3f0262cSandi				$closing = array_slice($to_lines, $yi, sizeof($closing));
668*f3f0262cSandi				$yi += sizeof($closing);
669*f3f0262cSandi			}
670*f3f0262cSandi		}
671*f3f0262cSandi	}
672*f3f0262cSandi}
673*f3f0262cSandi
674*f3f0262cSandi/**
675*f3f0262cSandi * A class to format Diffs
676*f3f0262cSandi *
677*f3f0262cSandi * This class formats the diff in classic diff format.
678*f3f0262cSandi * It is intended that this class be customized via inheritance,
679*f3f0262cSandi * to obtain fancier outputs.
680*f3f0262cSandi */
681*f3f0262cSandiclass DiffFormatter
682*f3f0262cSandi{
683*f3f0262cSandi	/**
684*f3f0262cSandi	 * Number of leading context "lines" to preserve.
685*f3f0262cSandi	 *
686*f3f0262cSandi	 * This should be left at zero for this class, but subclasses
687*f3f0262cSandi	 * may want to set this to other values.
688*f3f0262cSandi	 */
689*f3f0262cSandi	var $leading_context_lines = 0;
690*f3f0262cSandi
691*f3f0262cSandi	/**
692*f3f0262cSandi	 * Number of trailing context "lines" to preserve.
693*f3f0262cSandi	 *
694*f3f0262cSandi	 * This should be left at zero for this class, but subclasses
695*f3f0262cSandi	 * may want to set this to other values.
696*f3f0262cSandi	 */
697*f3f0262cSandi	var $trailing_context_lines = 0;
698*f3f0262cSandi
699*f3f0262cSandi	/**
700*f3f0262cSandi	 * Format a diff.
701*f3f0262cSandi	 *
702*f3f0262cSandi	 * @param $diff object A Diff object.
703*f3f0262cSandi	 * @return string The formatted output.
704*f3f0262cSandi	 */
705*f3f0262cSandi	function format($diff) {
706*f3f0262cSandi
707*f3f0262cSandi		$xi = $yi = 1;
708*f3f0262cSandi		$block = false;
709*f3f0262cSandi		$context = array();
710*f3f0262cSandi
711*f3f0262cSandi		$nlead = $this->leading_context_lines;
712*f3f0262cSandi		$ntrail = $this->trailing_context_lines;
713*f3f0262cSandi
714*f3f0262cSandi		$this->_start_diff();
715*f3f0262cSandi
716*f3f0262cSandi		foreach ($diff->edits as $edit) {
717*f3f0262cSandi			if ($edit->type == 'copy') {
718*f3f0262cSandi				if (is_array($block)) {
719*f3f0262cSandi					if (sizeof($edit->orig) <= $nlead + $ntrail) {
720*f3f0262cSandi						$block[] = $edit;
721*f3f0262cSandi					}
722*f3f0262cSandi					else{
723*f3f0262cSandi						if ($ntrail) {
724*f3f0262cSandi							$context = array_slice($edit->orig, 0, $ntrail);
725*f3f0262cSandi							$block[] = new _DiffOp_Copy($context);
726*f3f0262cSandi						}
727*f3f0262cSandi						$this->_block($x0, $ntrail + $xi - $x0,
728*f3f0262cSandi									  $y0, $ntrail + $yi - $y0,
729*f3f0262cSandi									  $block);
730*f3f0262cSandi						$block = false;
731*f3f0262cSandi					}
732*f3f0262cSandi				}
733*f3f0262cSandi				$context = $edit->orig;
734*f3f0262cSandi			}
735*f3f0262cSandi			else {
736*f3f0262cSandi				if (! is_array($block)) {
737*f3f0262cSandi					$context = array_slice($context, sizeof($context) - $nlead);
738*f3f0262cSandi					$x0 = $xi - sizeof($context);
739*f3f0262cSandi					$y0 = $yi - sizeof($context);
740*f3f0262cSandi					$block = array();
741*f3f0262cSandi					if ($context)
742*f3f0262cSandi						$block[] = new _DiffOp_Copy($context);
743*f3f0262cSandi				}
744*f3f0262cSandi				$block[] = $edit;
745*f3f0262cSandi			}
746*f3f0262cSandi
747*f3f0262cSandi			if ($edit->orig)
748*f3f0262cSandi				$xi += sizeof($edit->orig);
749*f3f0262cSandi			if ($edit->closing)
750*f3f0262cSandi				$yi += sizeof($edit->closing);
751*f3f0262cSandi		}
752*f3f0262cSandi
753*f3f0262cSandi		if (is_array($block))
754*f3f0262cSandi			$this->_block($x0, $xi - $x0,
755*f3f0262cSandi						  $y0, $yi - $y0,
756*f3f0262cSandi						  $block);
757*f3f0262cSandi
758*f3f0262cSandi		return $this->_end_diff();
759*f3f0262cSandi	}
760*f3f0262cSandi
761*f3f0262cSandi	function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
762*f3f0262cSandi		$this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
763*f3f0262cSandi		foreach ($edits as $edit) {
764*f3f0262cSandi			if ($edit->type == 'copy')
765*f3f0262cSandi				$this->_context($edit->orig);
766*f3f0262cSandi			elseif ($edit->type == 'add')
767*f3f0262cSandi				$this->_added($edit->closing);
768*f3f0262cSandi			elseif ($edit->type == 'delete')
769*f3f0262cSandi				$this->_deleted($edit->orig);
770*f3f0262cSandi			elseif ($edit->type == 'change')
771*f3f0262cSandi				$this->_changed($edit->orig, $edit->closing);
772*f3f0262cSandi			else
773*f3f0262cSandi				trigger_error("Unknown edit type", E_USER_ERROR);
774*f3f0262cSandi		}
775*f3f0262cSandi		$this->_end_block();
776*f3f0262cSandi	}
777*f3f0262cSandi
778*f3f0262cSandi	function _start_diff() {
779*f3f0262cSandi		ob_start();
780*f3f0262cSandi	}
781*f3f0262cSandi
782*f3f0262cSandi	function _end_diff() {
783*f3f0262cSandi		$val = ob_get_contents();
784*f3f0262cSandi		ob_end_clean();
785*f3f0262cSandi		return $val;
786*f3f0262cSandi	}
787*f3f0262cSandi
788*f3f0262cSandi	function _block_header($xbeg, $xlen, $ybeg, $ylen) {
789*f3f0262cSandi		if ($xlen > 1)
790*f3f0262cSandi			$xbeg .= "," . ($xbeg + $xlen - 1);
791*f3f0262cSandi		if ($ylen > 1)
792*f3f0262cSandi			$ybeg .= "," . ($ybeg + $ylen - 1);
793*f3f0262cSandi
794*f3f0262cSandi		return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
795*f3f0262cSandi	}
796*f3f0262cSandi
797*f3f0262cSandi	function _start_block($header) {
798*f3f0262cSandi		echo $header;
799*f3f0262cSandi	}
800*f3f0262cSandi
801*f3f0262cSandi	function _end_block() {
802*f3f0262cSandi	}
803*f3f0262cSandi
804*f3f0262cSandi	function _lines($lines, $prefix = ' ') {
805*f3f0262cSandi		foreach ($lines as $line)
806*f3f0262cSandi			echo "$prefix $line\n";
807*f3f0262cSandi	}
808*f3f0262cSandi
809*f3f0262cSandi	function _context($lines) {
810*f3f0262cSandi		$this->_lines($lines);
811*f3f0262cSandi	}
812*f3f0262cSandi
813*f3f0262cSandi	function _added($lines) {
814*f3f0262cSandi		$this->_lines($lines, ">");
815*f3f0262cSandi	}
816*f3f0262cSandi	function _deleted($lines) {
817*f3f0262cSandi		$this->_lines($lines, "<");
818*f3f0262cSandi	}
819*f3f0262cSandi
820*f3f0262cSandi	function _changed($orig, $closing) {
821*f3f0262cSandi		$this->_deleted($orig);
822*f3f0262cSandi		echo "---\n";
823*f3f0262cSandi		$this->_added($closing);
824*f3f0262cSandi	}
825*f3f0262cSandi}
826*f3f0262cSandi
827*f3f0262cSandi
828*f3f0262cSandi/**
829*f3f0262cSandi *	Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
830*f3f0262cSandi *
831*f3f0262cSandi */
832*f3f0262cSandi
833*f3f0262cSandidefine('NBSP', "\xA0");			// iso-8859-x non-breaking space.
834*f3f0262cSandi
835*f3f0262cSandiclass _HWLDF_WordAccumulator {
836*f3f0262cSandi	function _HWLDF_WordAccumulator () {
837*f3f0262cSandi		$this->_lines = array();
838*f3f0262cSandi		$this->_line = '';
839*f3f0262cSandi		$this->_group = '';
840*f3f0262cSandi		$this->_tag = '';
841*f3f0262cSandi	}
842*f3f0262cSandi
843*f3f0262cSandi	function _flushGroup ($new_tag) {
844*f3f0262cSandi		if ($this->_group !== '') {
845*f3f0262cSandi	  if ($this->_tag == 'mark')
846*f3f0262cSandi			$this->_line .= '<span class="diffchange">'.$this->_group.'</span>';
847*f3f0262cSandi	  else
848*f3f0262cSandi		$this->_line .= $this->_group;
849*f3f0262cSandi	}
850*f3f0262cSandi		$this->_group = '';
851*f3f0262cSandi		$this->_tag = $new_tag;
852*f3f0262cSandi	}
853*f3f0262cSandi
854*f3f0262cSandi	function _flushLine ($new_tag) {
855*f3f0262cSandi		$this->_flushGroup($new_tag);
856*f3f0262cSandi		if ($this->_line != '')
857*f3f0262cSandi			$this->_lines[] = $this->_line;
858*f3f0262cSandi		$this->_line = '';
859*f3f0262cSandi	}
860*f3f0262cSandi
861*f3f0262cSandi	function addWords ($words, $tag = '') {
862*f3f0262cSandi		if ($tag != $this->_tag)
863*f3f0262cSandi			$this->_flushGroup($tag);
864*f3f0262cSandi
865*f3f0262cSandi		foreach ($words as $word) {
866*f3f0262cSandi			// new-line should only come as first char of word.
867*f3f0262cSandi			if ($word == '')
868*f3f0262cSandi				continue;
869*f3f0262cSandi			if ($word[0] == "\n") {
870*f3f0262cSandi				$this->_group .= NBSP;
871*f3f0262cSandi				$this->_flushLine($tag);
872*f3f0262cSandi				$word = substr($word, 1);
873*f3f0262cSandi			}
874*f3f0262cSandi			assert(!strstr($word, "\n"));
875*f3f0262cSandi			$this->_group .= $word;
876*f3f0262cSandi		}
877*f3f0262cSandi	}
878*f3f0262cSandi
879*f3f0262cSandi	function getLines() {
880*f3f0262cSandi		$this->_flushLine('~done');
881*f3f0262cSandi		return $this->_lines;
882*f3f0262cSandi	}
883*f3f0262cSandi}
884*f3f0262cSandi
885*f3f0262cSandiclass WordLevelDiff extends MappedDiff
886*f3f0262cSandi{
887*f3f0262cSandi	function WordLevelDiff ($orig_lines, $closing_lines) {
888*f3f0262cSandi		list ($orig_words, $orig_stripped) = $this->_split($orig_lines);
889*f3f0262cSandi		list ($closing_words, $closing_stripped) = $this->_split($closing_lines);
890*f3f0262cSandi
891*f3f0262cSandi
892*f3f0262cSandi		$this->MappedDiff($orig_words, $closing_words,
893*f3f0262cSandi						  $orig_stripped, $closing_stripped);
894*f3f0262cSandi	}
895*f3f0262cSandi
896*f3f0262cSandi	function _split($lines) {
897*f3f0262cSandi		// FIXME: fix POSIX char class.
898*f3f0262cSandi#		 if (!preg_match_all('/ ( [^\S\n]+ | [[:alnum:]]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
899*f3f0262cSandi		if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
900*f3f0262cSandi							implode("\n", $lines),
901*f3f0262cSandi							$m)) {
902*f3f0262cSandi			return array(array(''), array(''));
903*f3f0262cSandi		}
904*f3f0262cSandi		return array($m[0], $m[1]);
905*f3f0262cSandi	}
906*f3f0262cSandi
907*f3f0262cSandi	function orig () {
908*f3f0262cSandi		$orig = new _HWLDF_WordAccumulator;
909*f3f0262cSandi
910*f3f0262cSandi		foreach ($this->edits as $edit) {
911*f3f0262cSandi			if ($edit->type == 'copy')
912*f3f0262cSandi				$orig->addWords($edit->orig);
913*f3f0262cSandi			elseif ($edit->orig)
914*f3f0262cSandi				$orig->addWords($edit->orig, 'mark');
915*f3f0262cSandi		}
916*f3f0262cSandi		return $orig->getLines();
917*f3f0262cSandi	}
918*f3f0262cSandi
919*f3f0262cSandi	function closing () {
920*f3f0262cSandi		$closing = new _HWLDF_WordAccumulator;
921*f3f0262cSandi
922*f3f0262cSandi		foreach ($this->edits as $edit) {
923*f3f0262cSandi			if ($edit->type == 'copy')
924*f3f0262cSandi				$closing->addWords($edit->closing);
925*f3f0262cSandi			elseif ($edit->closing)
926*f3f0262cSandi				$closing->addWords($edit->closing, 'mark');
927*f3f0262cSandi		}
928*f3f0262cSandi		return $closing->getLines();
929*f3f0262cSandi	}
930*f3f0262cSandi}
931*f3f0262cSandi
932*f3f0262cSandi/**
933*f3f0262cSandi * "Unified" diff formatter.
934*f3f0262cSandi *
935*f3f0262cSandi * This class formats the diff in classic "unified diff" format.
936*f3f0262cSandi */
937*f3f0262cSandiclass UnifiedDiffFormatter extends DiffFormatter
938*f3f0262cSandi{
939*f3f0262cSandi    function UnifiedDiffFormatter($context_lines = 4) {
940*f3f0262cSandi        $this->leading_context_lines = $context_lines;
941*f3f0262cSandi        $this->trailing_context_lines = $context_lines;
942*f3f0262cSandi    }
943*f3f0262cSandi
944*f3f0262cSandi    function _block_header($xbeg, $xlen, $ybeg, $ylen) {
945*f3f0262cSandi        if ($xlen != 1)
946*f3f0262cSandi            $xbeg .= "," . $xlen;
947*f3f0262cSandi        if ($ylen != 1)
948*f3f0262cSandi            $ybeg .= "," . $ylen;
949*f3f0262cSandi        return "@@ -$xbeg +$ybeg @@\n";
950*f3f0262cSandi    }
951*f3f0262cSandi
952*f3f0262cSandi    function _added($lines) {
953*f3f0262cSandi        $this->_lines($lines, "+");
954*f3f0262cSandi    }
955*f3f0262cSandi    function _deleted($lines) {
956*f3f0262cSandi        $this->_lines($lines, "-");
957*f3f0262cSandi    }
958*f3f0262cSandi    function _changed($orig, $final) {
959*f3f0262cSandi        $this->_deleted($orig);
960*f3f0262cSandi        $this->_added($final);
961*f3f0262cSandi    }
962*f3f0262cSandi}
963*f3f0262cSandi
964*f3f0262cSandi/**
965*f3f0262cSandi *	Wikipedia Table style diff formatter.
966*f3f0262cSandi *
967*f3f0262cSandi */
968*f3f0262cSandiclass TableDiffFormatter extends DiffFormatter
969*f3f0262cSandi{
970*f3f0262cSandi	function TableDiffFormatter() {
971*f3f0262cSandi		$this->leading_context_lines = 2;
972*f3f0262cSandi		$this->trailing_context_lines = 2;
973*f3f0262cSandi	}
974*f3f0262cSandi
975*f3f0262cSandi  function _pre($text){
976*f3f0262cSandi    $text = htmlspecialchars($text);
977*f3f0262cSandi    $text = str_replace('  ',' &nbsp;',$text);
978*f3f0262cSandi    return $text;
979*f3f0262cSandi  }
980*f3f0262cSandi
981*f3f0262cSandi  function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
982*f3f0262cSandi    global $lang;
983*f3f0262cSandi    $l1 = $lang['line'].' '.$xbeg;
984*f3f0262cSandi    $l2 = $lang['line'].' '.$ybeg;
985*f3f0262cSandi		$r = '<tr><td class="diff-blockheader" colspan="2">'.$l1.":</td>\n" .
986*f3f0262cSandi		  '<td class="diff-blockheader" colspan="2">'.$l2.":</td></tr>\n";
987*f3f0262cSandi		return $r;
988*f3f0262cSandi	}
989*f3f0262cSandi
990*f3f0262cSandi	function _start_block( $header ) {
991*f3f0262cSandi		print( $header );
992*f3f0262cSandi	}
993*f3f0262cSandi
994*f3f0262cSandi	function _end_block() {
995*f3f0262cSandi	}
996*f3f0262cSandi
997*f3f0262cSandi	function _lines( $lines, $prefix=' ', $color="white" ) {
998*f3f0262cSandi	}
999*f3f0262cSandi
1000*f3f0262cSandi	function addedLine( $line ) {
1001*f3f0262cSandi    $line = str_replace('  ','&nbsp; ',$line);
1002*f3f0262cSandi		return '<td>+</td><td class="diff-addedline">' .
1003*f3f0262cSandi		  $line.'</td>';
1004*f3f0262cSandi	}
1005*f3f0262cSandi
1006*f3f0262cSandi	function deletedLine( $line ) {
1007*f3f0262cSandi    $line = str_replace('  ','&nbsp; ',$line);
1008*f3f0262cSandi		return '<td>-</td><td class="diff-deletedline">' .
1009*f3f0262cSandi		  $line.'</td>';
1010*f3f0262cSandi	}
1011*f3f0262cSandi
1012*f3f0262cSandi	function emptyLine() {
1013*f3f0262cSandi    $line = str_replace('  ','&nbsp; ',$line);
1014*f3f0262cSandi		return '<td colspan="2">&nbsp;</td>';
1015*f3f0262cSandi	}
1016*f3f0262cSandi
1017*f3f0262cSandi	function contextLine( $line ) {
1018*f3f0262cSandi    $line = str_replace('  ','&nbsp; ',$line);
1019*f3f0262cSandi		return '<td> </td><td class="diff-context">'.$line.'</td>';
1020*f3f0262cSandi	}
1021*f3f0262cSandi
1022*f3f0262cSandi	function _added($lines) {
1023*f3f0262cSandi		foreach ($lines as $line) {
1024*f3f0262cSandi			print( '<tr>' . $this->emptyLine() .
1025*f3f0262cSandi			  $this->addedLine( $line ) . "</tr>\n" );
1026*f3f0262cSandi		}
1027*f3f0262cSandi	}
1028*f3f0262cSandi
1029*f3f0262cSandi	function _deleted($lines) {
1030*f3f0262cSandi		foreach ($lines as $line) {
1031*f3f0262cSandi			print( '<tr>' . $this->deletedLine( $line ) .
1032*f3f0262cSandi			  $this->emptyLine() . "</tr>\n" );
1033*f3f0262cSandi		}
1034*f3f0262cSandi	}
1035*f3f0262cSandi
1036*f3f0262cSandi	function _context( $lines ) {
1037*f3f0262cSandi		foreach ($lines as $line) {
1038*f3f0262cSandi			print( '<tr>' . $this->contextLine( $line ) .
1039*f3f0262cSandi			  $this->contextLine( $line ) . "</tr>\n" );
1040*f3f0262cSandi		}
1041*f3f0262cSandi	}
1042*f3f0262cSandi
1043*f3f0262cSandi	function _changed( $orig, $closing ) {
1044*f3f0262cSandi		$diff = new WordLevelDiff( $orig, $closing );
1045*f3f0262cSandi		$del = $diff->orig();
1046*f3f0262cSandi		$add = $diff->closing();
1047*f3f0262cSandi
1048*f3f0262cSandi		while ( $line = array_shift( $del ) ) {
1049*f3f0262cSandi			$aline = array_shift( $add );
1050*f3f0262cSandi			print( '<tr>' . $this->deletedLine( $line ) .
1051*f3f0262cSandi			  $this->addedLine( $aline ) . "</tr>\n" );
1052*f3f0262cSandi		}
1053*f3f0262cSandi		$this->_added( $add ); # If any leftovers
1054*f3f0262cSandi	}
1055*f3f0262cSandi}
1056*f3f0262cSandi
1057*f3f0262cSandi?>
1058