1f3f0262cSandi<?php 215fae107Sandi/** 315fae107Sandi * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3) 415fae107Sandi * 515fae107Sandi * Additions by Axel Boldt for MediaWiki 615fae107Sandi * 715fae107Sandi * @copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org> 815fae107Sandi * @license You may copy this code freely under the conditions of the GPL. 915fae107Sandi */ 10f3f0262cSandidefine('USE_ASSERTS', function_exists('assert')); 11f3f0262cSandi 12f3f0262cSandiclass _DiffOp { 13f3f0262cSandi var $type; 14f3f0262cSandi var $orig; 15f3f0262cSandi var $closing; 16f3f0262cSandi 17f3f0262cSandi function reverse() { 18f3f0262cSandi trigger_error("pure virtual", E_USER_ERROR); 19f3f0262cSandi } 20f3f0262cSandi 21f3f0262cSandi function norig() { 22f5b78785SAndreas Gohr return $this->orig ? count($this->orig) : 0; 23f3f0262cSandi } 24f3f0262cSandi 25f3f0262cSandi function nclosing() { 26f5b78785SAndreas Gohr return $this->closing ? count($this->closing) : 0; 27f3f0262cSandi } 28f3f0262cSandi} 29f3f0262cSandi 30f3f0262cSandiclass _DiffOp_Copy extends _DiffOp { 31f3f0262cSandi var $type = 'copy'; 32988c1340SPiyush Mishra 33988c1340SPiyush Mishra function __construct($orig, $closing = false) { 34f3f0262cSandi if (!is_array($closing)) 35f3f0262cSandi $closing = $orig; 36f3f0262cSandi $this->orig = $orig; 37f3f0262cSandi $this->closing = $closing; 38f3f0262cSandi } 39f3f0262cSandi 40f3f0262cSandi function reverse() { 41f3f0262cSandi return new _DiffOp_Copy($this->closing, $this->orig); 42f3f0262cSandi } 43f3f0262cSandi} 44f3f0262cSandi 45f3f0262cSandiclass _DiffOp_Delete extends _DiffOp { 46f3f0262cSandi var $type = 'delete'; 47988c1340SPiyush Mishra 48988c1340SPiyush Mishra function __construct($lines) { 49f3f0262cSandi $this->orig = $lines; 50f3f0262cSandi $this->closing = false; 51f3f0262cSandi } 52f3f0262cSandi 53f3f0262cSandi function reverse() { 54f3f0262cSandi return new _DiffOp_Add($this->orig); 55f3f0262cSandi } 56f3f0262cSandi} 57f3f0262cSandi 58f3f0262cSandiclass _DiffOp_Add extends _DiffOp { 59f3f0262cSandi var $type = 'add'; 60988c1340SPiyush Mishra 61988c1340SPiyush Mishra function __construct($lines) { 62f3f0262cSandi $this->closing = $lines; 63f3f0262cSandi $this->orig = false; 64f3f0262cSandi } 65f3f0262cSandi 66f3f0262cSandi function reverse() { 67f3f0262cSandi return new _DiffOp_Delete($this->closing); 68f3f0262cSandi } 69f3f0262cSandi} 70f3f0262cSandi 71f3f0262cSandiclass _DiffOp_Change extends _DiffOp { 72f3f0262cSandi var $type = 'change'; 73988c1340SPiyush Mishra 74988c1340SPiyush Mishra function __construct($orig, $closing) { 75f3f0262cSandi $this->orig = $orig; 76f3f0262cSandi $this->closing = $closing; 77f3f0262cSandi } 78f3f0262cSandi 79f3f0262cSandi function reverse() { 80f3f0262cSandi return new _DiffOp_Change($this->closing, $this->orig); 81f3f0262cSandi } 82f3f0262cSandi} 83f3f0262cSandi 84f3f0262cSandi 85f3f0262cSandi/** 86f3f0262cSandi * Class used internally by Diff to actually compute the diffs. 87f3f0262cSandi * 88f3f0262cSandi * The algorithm used here is mostly lifted from the perl module 89f3f0262cSandi * Algorithm::Diff (version 1.06) by Ned Konz, which is available at: 90f3f0262cSandi * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip 91f3f0262cSandi * 92f3f0262cSandi * More ideas are taken from: 93f3f0262cSandi * http://www.ics.uci.edu/~eppstein/161/960229.html 94f3f0262cSandi * 95f3f0262cSandi * Some ideas are (and a bit of code) are from from analyze.c, from GNU 96f3f0262cSandi * diffutils-2.7, which can be found at: 97f3f0262cSandi * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz 98f3f0262cSandi * 99f3f0262cSandi * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations) 100f3f0262cSandi * are my own. 101f3f0262cSandi * 102f3f0262cSandi * @author Geoffrey T. Dairiki 103f3f0262cSandi * @access private 104f3f0262cSandi */ 105f5b78785SAndreas Gohrclass _DiffEngine { 106f5b78785SAndreas Gohr 107f3f0262cSandi function diff($from_lines, $to_lines) { 108f5b78785SAndreas Gohr $n_from = count($from_lines); 109f5b78785SAndreas Gohr $n_to = count($to_lines); 110f3f0262cSandi 111f3f0262cSandi $this->xchanged = $this->ychanged = array(); 112f3f0262cSandi $this->xv = $this->yv = array(); 113f3f0262cSandi $this->xind = $this->yind = array(); 114f3f0262cSandi unset($this->seq); 115f3f0262cSandi unset($this->in_seq); 116f3f0262cSandi unset($this->lcs); 117f3f0262cSandi 118f3f0262cSandi // Skip leading common lines. 119f3f0262cSandi for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) { 120f3f0262cSandi if ($from_lines[$skip] != $to_lines[$skip]) 121f3f0262cSandi break; 122f3f0262cSandi $this->xchanged[$skip] = $this->ychanged[$skip] = false; 123f3f0262cSandi } 124f3f0262cSandi // Skip trailing common lines. 125f5b78785SAndreas Gohr $xi = $n_from; 126f5b78785SAndreas Gohr $yi = $n_to; 127f3f0262cSandi for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) { 128f3f0262cSandi if ($from_lines[$xi] != $to_lines[$yi]) 129f3f0262cSandi break; 130f3f0262cSandi $this->xchanged[$xi] = $this->ychanged[$yi] = false; 131f3f0262cSandi } 132f3f0262cSandi 133f3f0262cSandi // Ignore lines which do not exist in both files. 134f3f0262cSandi for ($xi = $skip; $xi < $n_from - $endskip; $xi++) 135f3f0262cSandi $xhash[$from_lines[$xi]] = 1; 136f3f0262cSandi for ($yi = $skip; $yi < $n_to - $endskip; $yi++) { 137f3f0262cSandi $line = $to_lines[$yi]; 138f3f0262cSandi if (($this->ychanged[$yi] = empty($xhash[$line]))) 139f3f0262cSandi continue; 140f3f0262cSandi $yhash[$line] = 1; 141f3f0262cSandi $this->yv[] = $line; 142f3f0262cSandi $this->yind[] = $yi; 143f3f0262cSandi } 144f3f0262cSandi for ($xi = $skip; $xi < $n_from - $endskip; $xi++) { 145f3f0262cSandi $line = $from_lines[$xi]; 146f3f0262cSandi if (($this->xchanged[$xi] = empty($yhash[$line]))) 147f3f0262cSandi continue; 148f3f0262cSandi $this->xv[] = $line; 149f3f0262cSandi $this->xind[] = $xi; 150f3f0262cSandi } 151f3f0262cSandi 152f3f0262cSandi // Find the LCS. 153f5b78785SAndreas Gohr $this->_compareseq(0, count($this->xv), 0, count($this->yv)); 154f3f0262cSandi 155f3f0262cSandi // Merge edits when possible 156f3f0262cSandi $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged); 157f3f0262cSandi $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged); 158f3f0262cSandi 159f3f0262cSandi // Compute the edit operations. 160f3f0262cSandi $edits = array(); 161f3f0262cSandi $xi = $yi = 0; 162f3f0262cSandi while ($xi < $n_from || $yi < $n_to) { 163f3f0262cSandi USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]); 164f3f0262cSandi USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]); 165f3f0262cSandi 166f3f0262cSandi // Skip matching "snake". 167f3f0262cSandi $copy = array(); 1687deca91bSRobin Getz while ($xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) { 169f3f0262cSandi $copy[] = $from_lines[$xi++]; 170f3f0262cSandi ++$yi; 171f3f0262cSandi } 172f3f0262cSandi if ($copy) 173f3f0262cSandi $edits[] = new _DiffOp_Copy($copy); 174f3f0262cSandi 175f3f0262cSandi // Find deletes & adds. 176f3f0262cSandi $delete = array(); 177f3f0262cSandi while ($xi < $n_from && $this->xchanged[$xi]) 178f3f0262cSandi $delete[] = $from_lines[$xi++]; 179f3f0262cSandi 180f3f0262cSandi $add = array(); 181f3f0262cSandi while ($yi < $n_to && $this->ychanged[$yi]) 182f3f0262cSandi $add[] = $to_lines[$yi++]; 183f3f0262cSandi 184f3f0262cSandi if ($delete && $add) 185f3f0262cSandi $edits[] = new _DiffOp_Change($delete, $add); 186f3f0262cSandi elseif ($delete) 187f3f0262cSandi $edits[] = new _DiffOp_Delete($delete); 188f3f0262cSandi elseif ($add) 189f3f0262cSandi $edits[] = new _DiffOp_Add($add); 190f3f0262cSandi } 191f3f0262cSandi return $edits; 192f3f0262cSandi } 193f3f0262cSandi 194f3f0262cSandi 19515fae107Sandi /** 19615fae107Sandi * Divide the Largest Common Subsequence (LCS) of the sequences 197f3f0262cSandi * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally 198f3f0262cSandi * sized segments. 199f3f0262cSandi * 200f3f0262cSandi * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an 201f3f0262cSandi * array of NCHUNKS+1 (X, Y) indexes giving the diving points between 202f3f0262cSandi * sub sequences. The first sub-sequence is contained in [X0, X1), 203f3f0262cSandi * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note 204f3f0262cSandi * that (X0, Y0) == (XOFF, YOFF) and 205f3f0262cSandi * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM). 206f3f0262cSandi * 207f3f0262cSandi * This function assumes that the first lines of the specified portions 208f3f0262cSandi * of the two files do not match, and likewise that the last lines do not 209f3f0262cSandi * match. The caller must trim matching lines from the beginning and end 210f3f0262cSandi * of the portions it is going to specify. 211f3f0262cSandi */ 212f3f0262cSandi function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) { 213f3f0262cSandi $flip = false; 214f3f0262cSandi 215f3f0262cSandi if ($xlim - $xoff > $ylim - $yoff) { 216f3f0262cSandi // Things seems faster (I'm not sure I understand why) 217f3f0262cSandi // when the shortest sequence in X. 218f3f0262cSandi $flip = true; 2197deca91bSRobin Getz list ($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim); 220f3f0262cSandi } 221f3f0262cSandi 222f3f0262cSandi if ($flip) 223f3f0262cSandi for ($i = $ylim - 1; $i >= $yoff; $i--) 224f3f0262cSandi $ymatches[$this->xv[$i]][] = $i; 225f3f0262cSandi else 226f3f0262cSandi for ($i = $ylim - 1; $i >= $yoff; $i--) 227f3f0262cSandi $ymatches[$this->yv[$i]][] = $i; 228f3f0262cSandi 229f3f0262cSandi $this->lcs = 0; 230f3f0262cSandi $this->seq[0]= $yoff - 1; 231f3f0262cSandi $this->in_seq = array(); 232f3f0262cSandi $ymids[0] = array(); 233f3f0262cSandi 234f3f0262cSandi $numer = $xlim - $xoff + $nchunks - 1; 235f3f0262cSandi $x = $xoff; 236f3f0262cSandi for ($chunk = 0; $chunk < $nchunks; $chunk++) { 237f3f0262cSandi if ($chunk > 0) 238f3f0262cSandi for ($i = 0; $i <= $this->lcs; $i++) 239f3f0262cSandi $ymids[$i][$chunk-1] = $this->seq[$i]; 240f3f0262cSandi 241f3f0262cSandi $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks); 242f3f0262cSandi for ( ; $x < $x1; $x++) { 243f3f0262cSandi $line = $flip ? $this->yv[$x] : $this->xv[$x]; 244f3f0262cSandi if (empty($ymatches[$line])) 245f3f0262cSandi continue; 246f3f0262cSandi $matches = $ymatches[$line]; 247f3f0262cSandi reset($matches); 248f3f0262cSandi while (list ($junk, $y) = each($matches)) 249f3f0262cSandi if (empty($this->in_seq[$y])) { 250f3f0262cSandi $k = $this->_lcs_pos($y); 251f3f0262cSandi USE_ASSERTS && assert($k > 0); 252f3f0262cSandi $ymids[$k] = $ymids[$k-1]; 253f3f0262cSandi break; 254f3f0262cSandi } 255f3f0262cSandi while (list ($junk, $y) = each($matches)) { 256f3f0262cSandi if ($y > $this->seq[$k-1]) { 257f3f0262cSandi USE_ASSERTS && assert($y < $this->seq[$k]); 258f3f0262cSandi // Optimization: this is a common case: 259f3f0262cSandi // next match is just replacing previous match. 260f3f0262cSandi $this->in_seq[$this->seq[$k]] = false; 261f3f0262cSandi $this->seq[$k] = $y; 262f3f0262cSandi $this->in_seq[$y] = 1; 263f3f0262cSandi } 264f3f0262cSandi else if (empty($this->in_seq[$y])) { 265f3f0262cSandi $k = $this->_lcs_pos($y); 266f3f0262cSandi USE_ASSERTS && assert($k > 0); 267f3f0262cSandi $ymids[$k] = $ymids[$k-1]; 268f3f0262cSandi } 269f3f0262cSandi } 270f3f0262cSandi } 271f3f0262cSandi } 272f3f0262cSandi 273f3f0262cSandi $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff); 274f3f0262cSandi $ymid = $ymids[$this->lcs]; 275f3f0262cSandi for ($n = 0; $n < $nchunks - 1; $n++) { 276f3f0262cSandi $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks); 277f3f0262cSandi $y1 = $ymid[$n] + 1; 278f3f0262cSandi $seps[] = $flip ? array($y1, $x1) : array($x1, $y1); 279f3f0262cSandi } 280f3f0262cSandi $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim); 281f3f0262cSandi 282f3f0262cSandi return array($this->lcs, $seps); 283f3f0262cSandi } 284f3f0262cSandi 285f3f0262cSandi function _lcs_pos($ypos) { 286f3f0262cSandi $end = $this->lcs; 287f3f0262cSandi if ($end == 0 || $ypos > $this->seq[$end]) { 288f3f0262cSandi $this->seq[++$this->lcs] = $ypos; 289f3f0262cSandi $this->in_seq[$ypos] = 1; 290f3f0262cSandi return $this->lcs; 291f3f0262cSandi } 292f3f0262cSandi 293f3f0262cSandi $beg = 1; 294f3f0262cSandi while ($beg < $end) { 295f3f0262cSandi $mid = (int)(($beg + $end) / 2); 296f3f0262cSandi if ($ypos > $this->seq[$mid]) 297f3f0262cSandi $beg = $mid + 1; 298f3f0262cSandi else 299f3f0262cSandi $end = $mid; 300f3f0262cSandi } 301f3f0262cSandi 302f3f0262cSandi USE_ASSERTS && assert($ypos != $this->seq[$end]); 303f3f0262cSandi 304f3f0262cSandi $this->in_seq[$this->seq[$end]] = false; 305f3f0262cSandi $this->seq[$end] = $ypos; 306f3f0262cSandi $this->in_seq[$ypos] = 1; 307f3f0262cSandi return $end; 308f3f0262cSandi } 309f3f0262cSandi 31015fae107Sandi /** 31115fae107Sandi * Find LCS of two sequences. 312f3f0262cSandi * 313f3f0262cSandi * The results are recorded in the vectors $this->{x,y}changed[], by 314f3f0262cSandi * storing a 1 in the element for each line that is an insertion 315f3f0262cSandi * or deletion (ie. is not in the LCS). 316f3f0262cSandi * 317f3f0262cSandi * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1. 318f3f0262cSandi * 319f3f0262cSandi * Note that XLIM, YLIM are exclusive bounds. 320f3f0262cSandi * All line numbers are origin-0 and discarded lines are not counted. 321f3f0262cSandi */ 322f3f0262cSandi function _compareseq($xoff, $xlim, $yoff, $ylim) { 323f3f0262cSandi // Slide down the bottom initial diagonal. 3247deca91bSRobin Getz while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) { 325f3f0262cSandi ++$xoff; 326f3f0262cSandi ++$yoff; 327f3f0262cSandi } 328f3f0262cSandi 329f3f0262cSandi // Slide up the top initial diagonal. 3307deca91bSRobin Getz while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) { 331f3f0262cSandi --$xlim; 332f3f0262cSandi --$ylim; 333f3f0262cSandi } 334f3f0262cSandi 335f3f0262cSandi if ($xoff == $xlim || $yoff == $ylim) 336f3f0262cSandi $lcs = 0; 337f3f0262cSandi else { 338f3f0262cSandi // This is ad hoc but seems to work well. 339f3f0262cSandi //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); 340f3f0262cSandi //$nchunks = max(2,min(8,(int)$nchunks)); 341f3f0262cSandi $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1; 342f3f0262cSandi list ($lcs, $seps) 343f3f0262cSandi = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks); 344f3f0262cSandi } 345f3f0262cSandi 346f3f0262cSandi if ($lcs == 0) { 347f3f0262cSandi // X and Y sequences have no common subsequence: 348f3f0262cSandi // mark all changed. 349f3f0262cSandi while ($yoff < $ylim) 350f3f0262cSandi $this->ychanged[$this->yind[$yoff++]] = 1; 351f3f0262cSandi while ($xoff < $xlim) 352f3f0262cSandi $this->xchanged[$this->xind[$xoff++]] = 1; 353f3f0262cSandi } 354f3f0262cSandi else { 355f3f0262cSandi // Use the partitions to split this problem into subproblems. 356f3f0262cSandi reset($seps); 357f3f0262cSandi $pt1 = $seps[0]; 358f3f0262cSandi while ($pt2 = next($seps)) { 359f3f0262cSandi $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]); 360f3f0262cSandi $pt1 = $pt2; 361f3f0262cSandi } 362f3f0262cSandi } 363f3f0262cSandi } 364f3f0262cSandi 36515fae107Sandi /** 36615fae107Sandi * Adjust inserts/deletes of identical lines to join changes 367f3f0262cSandi * as much as possible. 368f3f0262cSandi * 369f3f0262cSandi * We do something when a run of changed lines include a 370f3f0262cSandi * line at one end and has an excluded, identical line at the other. 371f3f0262cSandi * We are free to choose which identical line is included. 372f3f0262cSandi * `compareseq' usually chooses the one at the beginning, 373f3f0262cSandi * but usually it is cleaner to consider the following identical line 374f3f0262cSandi * to be the "change". 375f3f0262cSandi * 376f3f0262cSandi * This is extracted verbatim from analyze.c (GNU diffutils-2.7). 377f3f0262cSandi */ 378f3f0262cSandi function _shift_boundaries($lines, &$changed, $other_changed) { 379f3f0262cSandi $i = 0; 380f3f0262cSandi $j = 0; 381f3f0262cSandi 382f5b78785SAndreas Gohr USE_ASSERTS && assert('count($lines) == count($changed)'); 383f5b78785SAndreas Gohr $len = count($lines); 384f5b78785SAndreas Gohr $other_len = count($other_changed); 385f3f0262cSandi 386f3f0262cSandi while (1) { 387f3f0262cSandi /* 388f3f0262cSandi * Scan forwards to find beginning of another run of changes. 389f3f0262cSandi * Also keep track of the corresponding point in the other file. 390f3f0262cSandi * 391f3f0262cSandi * Throughout this code, $i and $j are adjusted together so that 392f3f0262cSandi * the first $i elements of $changed and the first $j elements 393f3f0262cSandi * of $other_changed both contain the same number of zeros 394f3f0262cSandi * (unchanged lines). 395f3f0262cSandi * Furthermore, $j is always kept so that $j == $other_len or 396f3f0262cSandi * $other_changed[$j] == false. 397f3f0262cSandi */ 398f3f0262cSandi while ($j < $other_len && $other_changed[$j]) 399f3f0262cSandi $j++; 400f3f0262cSandi 401f3f0262cSandi while ($i < $len && ! $changed[$i]) { 402f3f0262cSandi USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]'); 403f5b78785SAndreas Gohr $i++; 404f5b78785SAndreas Gohr $j++; 405f3f0262cSandi while ($j < $other_len && $other_changed[$j]) 406f3f0262cSandi $j++; 407f3f0262cSandi } 408f3f0262cSandi 409f3f0262cSandi if ($i == $len) 410f3f0262cSandi break; 411f3f0262cSandi 412f3f0262cSandi $start = $i; 413f3f0262cSandi 414f3f0262cSandi // Find the end of this run of changes. 415f3f0262cSandi while (++$i < $len && $changed[$i]) 416f3f0262cSandi continue; 417f3f0262cSandi 418f3f0262cSandi do { 419f3f0262cSandi /* 420f3f0262cSandi * Record the length of this run of changes, so that 421f3f0262cSandi * we can later determine whether the run has grown. 422f3f0262cSandi */ 423f3f0262cSandi $runlength = $i - $start; 424f3f0262cSandi 425f3f0262cSandi /* 426f3f0262cSandi * Move the changed region back, so long as the 427f3f0262cSandi * previous unchanged line matches the last changed one. 428f3f0262cSandi * This merges with previous changed regions. 429f3f0262cSandi */ 430f3f0262cSandi while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) { 431f3f0262cSandi $changed[--$start] = 1; 432f3f0262cSandi $changed[--$i] = false; 433f3f0262cSandi while ($start > 0 && $changed[$start - 1]) 434f3f0262cSandi $start--; 435f3f0262cSandi USE_ASSERTS && assert('$j > 0'); 436f3f0262cSandi while ($other_changed[--$j]) 437f3f0262cSandi continue; 438f3f0262cSandi USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]'); 439f3f0262cSandi } 440f3f0262cSandi 441f3f0262cSandi /* 442f3f0262cSandi * Set CORRESPONDING to the end of the changed run, at the last 443f3f0262cSandi * point where it corresponds to a changed run in the other file. 444f3f0262cSandi * CORRESPONDING == LEN means no such point has been found. 445f3f0262cSandi */ 446f3f0262cSandi $corresponding = $j < $other_len ? $i : $len; 447f3f0262cSandi 448f3f0262cSandi /* 449f3f0262cSandi * Move the changed region forward, so long as the 450f3f0262cSandi * first changed line matches the following unchanged one. 451f3f0262cSandi * This merges with following changed regions. 452f3f0262cSandi * Do this second, so that if there are no merges, 453f3f0262cSandi * the changed region is moved forward as far as possible. 454f3f0262cSandi */ 455f3f0262cSandi while ($i < $len && $lines[$start] == $lines[$i]) { 456f3f0262cSandi $changed[$start++] = false; 457f3f0262cSandi $changed[$i++] = 1; 458f3f0262cSandi while ($i < $len && $changed[$i]) 459f3f0262cSandi $i++; 460f3f0262cSandi 461f3f0262cSandi USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]'); 462f3f0262cSandi $j++; 463f3f0262cSandi if ($j < $other_len && $other_changed[$j]) { 464f3f0262cSandi $corresponding = $i; 465f3f0262cSandi while ($j < $other_len && $other_changed[$j]) 466f3f0262cSandi $j++; 467f3f0262cSandi } 468f3f0262cSandi } 469f3f0262cSandi } while ($runlength != $i - $start); 470f3f0262cSandi 471f3f0262cSandi /* 472f3f0262cSandi * If possible, move the fully-merged run of changes 473f3f0262cSandi * back to a corresponding run in the other file. 474f3f0262cSandi */ 475f3f0262cSandi while ($corresponding < $i) { 476f3f0262cSandi $changed[--$start] = 1; 477f3f0262cSandi $changed[--$i] = 0; 478f3f0262cSandi USE_ASSERTS && assert('$j > 0'); 479f3f0262cSandi while ($other_changed[--$j]) 480f3f0262cSandi continue; 481f3f0262cSandi USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]'); 482f3f0262cSandi } 483f3f0262cSandi } 484f3f0262cSandi } 485f3f0262cSandi} 486f3f0262cSandi 487f3f0262cSandi/** 488f3f0262cSandi * Class representing a 'diff' between two sequences of strings. 489f3f0262cSandi */ 490f5b78785SAndreas Gohrclass Diff { 491f5b78785SAndreas Gohr 492f3f0262cSandi var $edits; 493f3f0262cSandi 494f3f0262cSandi /** 495f3f0262cSandi * Constructor. 496f3f0262cSandi * Computes diff between sequences of strings. 497f3f0262cSandi * 498f3f0262cSandi * @param $from_lines array An array of strings. 499f3f0262cSandi * (Typically these are lines from a file.) 500f3f0262cSandi * @param $to_lines array An array of strings. 501f3f0262cSandi */ 502988c1340SPiyush Mishra function __construct($from_lines, $to_lines) { 503f3f0262cSandi $eng = new _DiffEngine; 504f3f0262cSandi $this->edits = $eng->diff($from_lines, $to_lines); 505f3f0262cSandi //$this->_check($from_lines, $to_lines); 506f3f0262cSandi } 507f3f0262cSandi 508f3f0262cSandi /** 509f3f0262cSandi * Compute reversed Diff. 510f3f0262cSandi * 511f3f0262cSandi * SYNOPSIS: 512f3f0262cSandi * 513f3f0262cSandi * $diff = new Diff($lines1, $lines2); 514f3f0262cSandi * $rev = $diff->reverse(); 515f3f0262cSandi * @return object A Diff object representing the inverse of the 516f3f0262cSandi * original diff. 517f3f0262cSandi */ 518f3f0262cSandi function reverse() { 519f3f0262cSandi $rev = $this; 520f3f0262cSandi $rev->edits = array(); 521f3f0262cSandi foreach ($this->edits as $edit) { 522f3f0262cSandi $rev->edits[] = $edit->reverse(); 523f3f0262cSandi } 524f3f0262cSandi return $rev; 525f3f0262cSandi } 526f3f0262cSandi 527f3f0262cSandi /** 528f3f0262cSandi * Check for empty diff. 529f3f0262cSandi * 530f3f0262cSandi * @return bool True iff two sequences were identical. 531f3f0262cSandi */ 532f3f0262cSandi function isEmpty() { 533f3f0262cSandi foreach ($this->edits as $edit) { 534f3f0262cSandi if ($edit->type != 'copy') 535f3f0262cSandi return false; 536f3f0262cSandi } 537f3f0262cSandi return true; 538f3f0262cSandi } 539f3f0262cSandi 540f3f0262cSandi /** 541f3f0262cSandi * Compute the length of the Longest Common Subsequence (LCS). 542f3f0262cSandi * 543f3f0262cSandi * This is mostly for diagnostic purposed. 544f3f0262cSandi * 545f3f0262cSandi * @return int The length of the LCS. 546f3f0262cSandi */ 547f3f0262cSandi function lcs() { 548f3f0262cSandi $lcs = 0; 549f3f0262cSandi foreach ($this->edits as $edit) { 550f3f0262cSandi if ($edit->type == 'copy') 551f5b78785SAndreas Gohr $lcs += count($edit->orig); 552f3f0262cSandi } 553f3f0262cSandi return $lcs; 554f3f0262cSandi } 555f3f0262cSandi 556f3f0262cSandi /** 557f3f0262cSandi * Get the original set of lines. 558f3f0262cSandi * 559f3f0262cSandi * This reconstructs the $from_lines parameter passed to the 560f3f0262cSandi * constructor. 561f3f0262cSandi * 562f3f0262cSandi * @return array The original sequence of strings. 563f3f0262cSandi */ 564f3f0262cSandi function orig() { 565f3f0262cSandi $lines = array(); 566f3f0262cSandi 567f3f0262cSandi foreach ($this->edits as $edit) { 568f3f0262cSandi if ($edit->orig) 569f5b78785SAndreas Gohr array_splice($lines, count($lines), 0, $edit->orig); 570f3f0262cSandi } 571f3f0262cSandi return $lines; 572f3f0262cSandi } 573f3f0262cSandi 574f3f0262cSandi /** 575f3f0262cSandi * Get the closing set of lines. 576f3f0262cSandi * 577f3f0262cSandi * This reconstructs the $to_lines parameter passed to the 578f3f0262cSandi * constructor. 579f3f0262cSandi * 580f3f0262cSandi * @return array The sequence of strings. 581f3f0262cSandi */ 582f3f0262cSandi function closing() { 583f3f0262cSandi $lines = array(); 584f3f0262cSandi 585f3f0262cSandi foreach ($this->edits as $edit) { 586f3f0262cSandi if ($edit->closing) 587f5b78785SAndreas Gohr array_splice($lines, count($lines), 0, $edit->closing); 588f3f0262cSandi } 589f3f0262cSandi return $lines; 590f3f0262cSandi } 591f3f0262cSandi 592f3f0262cSandi /** 593f3f0262cSandi * Check a Diff for validity. 594f3f0262cSandi * 595f3f0262cSandi * This is here only for debugging purposes. 596f3f0262cSandi */ 597f3f0262cSandi function _check($from_lines, $to_lines) { 598f3f0262cSandi if (serialize($from_lines) != serialize($this->orig())) 599f3f0262cSandi trigger_error("Reconstructed original doesn't match", E_USER_ERROR); 600f3f0262cSandi if (serialize($to_lines) != serialize($this->closing())) 601f3f0262cSandi trigger_error("Reconstructed closing doesn't match", E_USER_ERROR); 602f3f0262cSandi 603f3f0262cSandi $rev = $this->reverse(); 604f3f0262cSandi if (serialize($to_lines) != serialize($rev->orig())) 605f3f0262cSandi trigger_error("Reversed original doesn't match", E_USER_ERROR); 606f3f0262cSandi if (serialize($from_lines) != serialize($rev->closing())) 607f3f0262cSandi trigger_error("Reversed closing doesn't match", E_USER_ERROR); 608f3f0262cSandi 609f3f0262cSandi $prevtype = 'none'; 610f3f0262cSandi foreach ($this->edits as $edit) { 611f3f0262cSandi if ($prevtype == $edit->type) 612f3f0262cSandi trigger_error("Edit sequence is non-optimal", E_USER_ERROR); 613f3f0262cSandi $prevtype = $edit->type; 614f3f0262cSandi } 615f3f0262cSandi 616f3f0262cSandi $lcs = $this->lcs(); 617f3f0262cSandi trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE); 618f3f0262cSandi } 619f3f0262cSandi} 620f3f0262cSandi 621f3f0262cSandi/** 622f3f0262cSandi * FIXME: bad name. 623f3f0262cSandi */ 624f5b78785SAndreas Gohrclass MappedDiff extends Diff { 625f3f0262cSandi /** 626f3f0262cSandi * Constructor. 627f3f0262cSandi * 628f3f0262cSandi * Computes diff between sequences of strings. 629f3f0262cSandi * 630f3f0262cSandi * This can be used to compute things like 631f3f0262cSandi * case-insensitve diffs, or diffs which ignore 632f3f0262cSandi * changes in white-space. 633f3f0262cSandi * 634f3f0262cSandi * @param $from_lines array An array of strings. 635f3f0262cSandi * (Typically these are lines from a file.) 636f3f0262cSandi * 637f3f0262cSandi * @param $to_lines array An array of strings. 638f3f0262cSandi * 639f3f0262cSandi * @param $mapped_from_lines array This array should 640f3f0262cSandi * have the same size number of elements as $from_lines. 641f3f0262cSandi * The elements in $mapped_from_lines and 642f3f0262cSandi * $mapped_to_lines are what is actually compared 643f3f0262cSandi * when computing the diff. 644f3f0262cSandi * 645f3f0262cSandi * @param $mapped_to_lines array This array should 646f3f0262cSandi * have the same number of elements as $to_lines. 647f3f0262cSandi */ 648988c1340SPiyush Mishra function __construct($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) { 649f3f0262cSandi 650f5b78785SAndreas Gohr assert(count($from_lines) == count($mapped_from_lines)); 651f5b78785SAndreas Gohr assert(count($to_lines) == count($mapped_to_lines)); 652f3f0262cSandi 653988c1340SPiyush Mishra parent::__construct($mapped_from_lines, $mapped_to_lines); 654f3f0262cSandi 655f3f0262cSandi $xi = $yi = 0; 656f5b78785SAndreas Gohr $ecnt = count($this->edits); 657f5b78785SAndreas Gohr for ($i = 0; $i < $ecnt; $i++) { 658f3f0262cSandi $orig = &$this->edits[$i]->orig; 659f3f0262cSandi if (is_array($orig)) { 660f5b78785SAndreas Gohr $orig = array_slice($from_lines, $xi, count($orig)); 661f5b78785SAndreas Gohr $xi += count($orig); 662f3f0262cSandi } 663f3f0262cSandi 664f3f0262cSandi $closing = &$this->edits[$i]->closing; 665f3f0262cSandi if (is_array($closing)) { 666f5b78785SAndreas Gohr $closing = array_slice($to_lines, $yi, count($closing)); 667f5b78785SAndreas Gohr $yi += count($closing); 668f3f0262cSandi } 669f3f0262cSandi } 670f3f0262cSandi } 671f3f0262cSandi} 672f3f0262cSandi 673f3f0262cSandi/** 674f3f0262cSandi * A class to format Diffs 675f3f0262cSandi * 676f3f0262cSandi * This class formats the diff in classic diff format. 677f3f0262cSandi * It is intended that this class be customized via inheritance, 678f3f0262cSandi * to obtain fancier outputs. 679f3f0262cSandi */ 680f5b78785SAndreas Gohrclass DiffFormatter { 681f3f0262cSandi /** 682f3f0262cSandi * Number of leading context "lines" to preserve. 683f3f0262cSandi * 684f3f0262cSandi * This should be left at zero for this class, but subclasses 685f3f0262cSandi * may want to set this to other values. 686f3f0262cSandi */ 687f3f0262cSandi var $leading_context_lines = 0; 688f3f0262cSandi 689f3f0262cSandi /** 690f3f0262cSandi * Number of trailing context "lines" to preserve. 691f3f0262cSandi * 692f3f0262cSandi * This should be left at zero for this class, but subclasses 693f3f0262cSandi * may want to set this to other values. 694f3f0262cSandi */ 695f3f0262cSandi var $trailing_context_lines = 0; 696f3f0262cSandi 697f3f0262cSandi /** 698f3f0262cSandi * Format a diff. 699f3f0262cSandi * 700f3f0262cSandi * @param $diff object A Diff object. 701f3f0262cSandi * @return string The formatted output. 702f3f0262cSandi */ 703f3f0262cSandi function format($diff) { 704f3f0262cSandi 705f3f0262cSandi $xi = $yi = 1; 706f3f0262cSandi $block = false; 707f3f0262cSandi $context = array(); 708f3f0262cSandi 709f3f0262cSandi $nlead = $this->leading_context_lines; 710f3f0262cSandi $ntrail = $this->trailing_context_lines; 711f3f0262cSandi 712f3f0262cSandi $this->_start_diff(); 713f3f0262cSandi 714f3f0262cSandi foreach ($diff->edits as $edit) { 715f3f0262cSandi if ($edit->type == 'copy') { 716f3f0262cSandi if (is_array($block)) { 717f5b78785SAndreas Gohr if (count($edit->orig) <= $nlead + $ntrail) { 718f3f0262cSandi $block[] = $edit; 719f3f0262cSandi } 720f3f0262cSandi else{ 721f3f0262cSandi if ($ntrail) { 722f3f0262cSandi $context = array_slice($edit->orig, 0, $ntrail); 723f3f0262cSandi $block[] = new _DiffOp_Copy($context); 724f3f0262cSandi } 7257deca91bSRobin Getz $this->_block($x0, $ntrail + $xi - $x0, $y0, $ntrail + $yi - $y0, $block); 726f3f0262cSandi $block = false; 727f3f0262cSandi } 728f3f0262cSandi } 729f3f0262cSandi $context = $edit->orig; 730f3f0262cSandi } 731f3f0262cSandi else { 732f3f0262cSandi if (! is_array($block)) { 733f5b78785SAndreas Gohr $context = array_slice($context, count($context) - $nlead); 734f5b78785SAndreas Gohr $x0 = $xi - count($context); 735f5b78785SAndreas Gohr $y0 = $yi - count($context); 736f3f0262cSandi $block = array(); 737f3f0262cSandi if ($context) 738f3f0262cSandi $block[] = new _DiffOp_Copy($context); 739f3f0262cSandi } 740f3f0262cSandi $block[] = $edit; 741f3f0262cSandi } 742f3f0262cSandi 743f3f0262cSandi if ($edit->orig) 744f5b78785SAndreas Gohr $xi += count($edit->orig); 745f3f0262cSandi if ($edit->closing) 746f5b78785SAndreas Gohr $yi += count($edit->closing); 747f3f0262cSandi } 748f3f0262cSandi 749f3f0262cSandi if (is_array($block)) 7507deca91bSRobin Getz $this->_block($x0, $xi - $x0, $y0, $yi - $y0, $block); 751f3f0262cSandi 752f3f0262cSandi return $this->_end_diff(); 753f3f0262cSandi } 754f3f0262cSandi 755f3f0262cSandi function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) { 756f3f0262cSandi $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen)); 757f3f0262cSandi foreach ($edits as $edit) { 758f3f0262cSandi if ($edit->type == 'copy') 759f3f0262cSandi $this->_context($edit->orig); 760f3f0262cSandi elseif ($edit->type == 'add') 761f3f0262cSandi $this->_added($edit->closing); 762f3f0262cSandi elseif ($edit->type == 'delete') 763f3f0262cSandi $this->_deleted($edit->orig); 764f3f0262cSandi elseif ($edit->type == 'change') 765f3f0262cSandi $this->_changed($edit->orig, $edit->closing); 766f3f0262cSandi else 767f3f0262cSandi trigger_error("Unknown edit type", E_USER_ERROR); 768f3f0262cSandi } 769f3f0262cSandi $this->_end_block(); 770f3f0262cSandi } 771f3f0262cSandi 772f3f0262cSandi function _start_diff() { 773f3f0262cSandi ob_start(); 774f3f0262cSandi } 775f3f0262cSandi 776f3f0262cSandi function _end_diff() { 777f3f0262cSandi $val = ob_get_contents(); 778f3f0262cSandi ob_end_clean(); 779f3f0262cSandi return $val; 780f3f0262cSandi } 781f3f0262cSandi 782f3f0262cSandi function _block_header($xbeg, $xlen, $ybeg, $ylen) { 783f3f0262cSandi if ($xlen > 1) 784f3f0262cSandi $xbeg .= "," . ($xbeg + $xlen - 1); 785f3f0262cSandi if ($ylen > 1) 786f3f0262cSandi $ybeg .= "," . ($ybeg + $ylen - 1); 787f3f0262cSandi 788f3f0262cSandi return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg; 789f3f0262cSandi } 790f3f0262cSandi 791f3f0262cSandi function _start_block($header) { 792f3f0262cSandi echo $header; 793f3f0262cSandi } 794f3f0262cSandi 795f3f0262cSandi function _end_block() { 796f3f0262cSandi } 797f3f0262cSandi 798f3f0262cSandi function _lines($lines, $prefix = ' ') { 799f3f0262cSandi foreach ($lines as $line) 800f3f0262cSandi echo "$prefix $line\n"; 801f3f0262cSandi } 802f3f0262cSandi 803f3f0262cSandi function _context($lines) { 804f3f0262cSandi $this->_lines($lines); 805f3f0262cSandi } 806f3f0262cSandi 807f3f0262cSandi function _added($lines) { 808f3f0262cSandi $this->_lines($lines, ">"); 809f3f0262cSandi } 810f3f0262cSandi function _deleted($lines) { 811f3f0262cSandi $this->_lines($lines, "<"); 812f3f0262cSandi } 813f3f0262cSandi 814f3f0262cSandi function _changed($orig, $closing) { 815f3f0262cSandi $this->_deleted($orig); 816f3f0262cSandi echo "---\n"; 817f3f0262cSandi $this->_added($closing); 818f3f0262cSandi } 819f3f0262cSandi} 820f3f0262cSandi 82147a906eaSAndreas Gohr/** 82247a906eaSAndreas Gohr * Utilityclass for styling HTML formatted diffs 82347a906eaSAndreas Gohr * 82447a906eaSAndreas Gohr * Depends on global var $DIFF_INLINESTYLES, if true some minimal predefined 82547a906eaSAndreas Gohr * inline styles are used. Useful for HTML mails and RSS feeds 82647a906eaSAndreas Gohr * 82747a906eaSAndreas Gohr * @author Andreas Gohr <andi@splitbrain.org> 82847a906eaSAndreas Gohr */ 82947a906eaSAndreas Gohrclass HTMLDiff { 83047a906eaSAndreas Gohr /** 83147a906eaSAndreas Gohr * Holds the style names and basic CSS 83247a906eaSAndreas Gohr */ 83347a906eaSAndreas Gohr static public $styles = array( 83447a906eaSAndreas Gohr 'diff-addedline' => 'background-color: #ddffdd;', 83547a906eaSAndreas Gohr 'diff-deletedline' => 'background-color: #ffdddd;', 83647a906eaSAndreas Gohr 'diff-context' => 'background-color: #f5f5f5;', 83747a906eaSAndreas Gohr 'diff-mark' => 'color: #ff0000;', 83847a906eaSAndreas Gohr ); 83947a906eaSAndreas Gohr 84047a906eaSAndreas Gohr /** 84147a906eaSAndreas Gohr * Return a class or style parameter 84247a906eaSAndreas Gohr */ 84347a906eaSAndreas Gohr static function css($classname){ 84447a906eaSAndreas Gohr global $DIFF_INLINESTYLES; 84547a906eaSAndreas Gohr 84647a906eaSAndreas Gohr if($DIFF_INLINESTYLES){ 84747a906eaSAndreas Gohr if(!isset(self::$styles[$classname])) return ''; 84847a906eaSAndreas Gohr return 'style="'.self::$styles[$classname].'"'; 84947a906eaSAndreas Gohr }else{ 85047a906eaSAndreas Gohr return 'class="'.$classname.'"'; 85147a906eaSAndreas Gohr } 85247a906eaSAndreas Gohr } 85347a906eaSAndreas Gohr} 854f3f0262cSandi 855f3f0262cSandi/** 856f3f0262cSandi * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3 857f3f0262cSandi * 858f3f0262cSandi */ 859f3f0262cSandi 8609b3a5b24Schrisdefine('NBSP', "\xC2\xA0"); // utf-8 non-breaking space. 861f3f0262cSandi 862f3f0262cSandiclass _HWLDF_WordAccumulator { 863988c1340SPiyush Mishra 864988c1340SPiyush Mishra function __construct() { 865f3f0262cSandi $this->_lines = array(); 866f3f0262cSandi $this->_line = ''; 867f3f0262cSandi $this->_group = ''; 868f3f0262cSandi $this->_tag = ''; 869f3f0262cSandi } 870f3f0262cSandi 871f3f0262cSandi function _flushGroup($new_tag) { 872f3f0262cSandi if ($this->_group !== '') { 873f3f0262cSandi if ($this->_tag == 'mark') 87447a906eaSAndreas Gohr $this->_line .= '<strong '.HTMLDiff::css('diff-mark').'>'.$this->_group.'</strong>'; 875812bb04eSRobin Getz elseif ($this->_tag == 'add') 87647a906eaSAndreas Gohr $this->_line .= '<span '.HTMLDiff::css('diff-addedline').'>'.$this->_group.'</span>'; 877812bb04eSRobin Getz elseif ($this->_tag == 'del') 87847a906eaSAndreas Gohr $this->_line .= '<span '.HTMLDiff::css('diff-deletedline').'><del>'.$this->_group.'</del></span>'; 879f3f0262cSandi else 880f3f0262cSandi $this->_line .= $this->_group; 881f3f0262cSandi } 882f3f0262cSandi $this->_group = ''; 883f3f0262cSandi $this->_tag = $new_tag; 884f3f0262cSandi } 885f3f0262cSandi 886f3f0262cSandi function _flushLine($new_tag) { 887f3f0262cSandi $this->_flushGroup($new_tag); 888f3f0262cSandi if ($this->_line != '') 889f3f0262cSandi $this->_lines[] = $this->_line; 890f3f0262cSandi $this->_line = ''; 891f3f0262cSandi } 892f3f0262cSandi 893f3f0262cSandi function addWords($words, $tag = '') { 894f3f0262cSandi if ($tag != $this->_tag) 895f3f0262cSandi $this->_flushGroup($tag); 896f3f0262cSandi 897f3f0262cSandi foreach ($words as $word) { 898f3f0262cSandi // new-line should only come as first char of word. 899f3f0262cSandi if ($word == '') 900f3f0262cSandi continue; 901f3f0262cSandi if ($word[0] == "\n") { 902f3f0262cSandi $this->_group .= NBSP; 903f3f0262cSandi $this->_flushLine($tag); 904f3f0262cSandi $word = substr($word, 1); 905f3f0262cSandi } 906f3f0262cSandi assert(!strstr($word, "\n")); 907f3f0262cSandi $this->_group .= $word; 908f3f0262cSandi } 909f3f0262cSandi } 910f3f0262cSandi 911f3f0262cSandi function getLines() { 912f3f0262cSandi $this->_flushLine('~done'); 913f3f0262cSandi return $this->_lines; 914f3f0262cSandi } 915f3f0262cSandi} 916f3f0262cSandi 917f5b78785SAndreas Gohrclass WordLevelDiff extends MappedDiff { 918f5b78785SAndreas Gohr 919988c1340SPiyush Mishra function __construct($orig_lines, $closing_lines) { 920f3f0262cSandi list ($orig_words, $orig_stripped) = $this->_split($orig_lines); 921f3f0262cSandi list ($closing_words, $closing_stripped) = $this->_split($closing_lines); 922f3f0262cSandi 923988c1340SPiyush Mishra parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped); 924f3f0262cSandi } 925f3f0262cSandi 926f3f0262cSandi function _split($lines) { 9275e1ee188SXin LI if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu', 9287deca91bSRobin Getz implode("\n", $lines), $m)) { 929f3f0262cSandi return array(array(''), array('')); 930f3f0262cSandi } 931f3f0262cSandi return array($m[0], $m[1]); 932f3f0262cSandi } 933f3f0262cSandi 934f3f0262cSandi function orig() { 935f3f0262cSandi $orig = new _HWLDF_WordAccumulator; 936f3f0262cSandi 937f3f0262cSandi foreach ($this->edits as $edit) { 938f3f0262cSandi if ($edit->type == 'copy') 939f3f0262cSandi $orig->addWords($edit->orig); 940f3f0262cSandi elseif ($edit->orig) 941f3f0262cSandi $orig->addWords($edit->orig, 'mark'); 942f3f0262cSandi } 943f3f0262cSandi return $orig->getLines(); 944f3f0262cSandi } 945f3f0262cSandi 946f3f0262cSandi function closing() { 947f3f0262cSandi $closing = new _HWLDF_WordAccumulator; 948f3f0262cSandi 949f3f0262cSandi foreach ($this->edits as $edit) { 950f3f0262cSandi if ($edit->type == 'copy') 951f3f0262cSandi $closing->addWords($edit->closing); 952f3f0262cSandi elseif ($edit->closing) 953f3f0262cSandi $closing->addWords($edit->closing, 'mark'); 954f3f0262cSandi } 955f3f0262cSandi return $closing->getLines(); 956f3f0262cSandi } 957f3f0262cSandi} 958f3f0262cSandi 959812bb04eSRobin Getzclass InlineWordLevelDiff extends MappedDiff { 960812bb04eSRobin Getz 961988c1340SPiyush Mishra function __construct($orig_lines, $closing_lines) { 962812bb04eSRobin Getz list ($orig_words, $orig_stripped) = $this->_split($orig_lines); 963812bb04eSRobin Getz list ($closing_words, $closing_stripped) = $this->_split($closing_lines); 964812bb04eSRobin Getz 965988c1340SPiyush Mishra parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped); 966812bb04eSRobin Getz } 967812bb04eSRobin Getz 968812bb04eSRobin Getz function _split($lines) { 969c5982caaSDanny Lin if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu', 970812bb04eSRobin Getz implode("\n", $lines), $m)) { 971812bb04eSRobin Getz return array(array(''), array('')); 972812bb04eSRobin Getz } 973812bb04eSRobin Getz return array($m[0], $m[1]); 974812bb04eSRobin Getz } 975812bb04eSRobin Getz 976812bb04eSRobin Getz function inline() { 977812bb04eSRobin Getz $orig = new _HWLDF_WordAccumulator; 978812bb04eSRobin Getz foreach ($this->edits as $edit) { 979812bb04eSRobin Getz if ($edit->type == 'copy') 9804f2305cbSAdrian Lang $orig->addWords($edit->closing); 981812bb04eSRobin Getz elseif ($edit->type == 'change'){ 982812bb04eSRobin Getz $orig->addWords($edit->orig, 'del'); 983812bb04eSRobin Getz $orig->addWords($edit->closing, 'add'); 984812bb04eSRobin Getz } elseif ($edit->type == 'delete') 985812bb04eSRobin Getz $orig->addWords($edit->orig, 'del'); 986812bb04eSRobin Getz elseif ($edit->type == 'add') 987812bb04eSRobin Getz $orig->addWords($edit->closing, 'add'); 988812bb04eSRobin Getz elseif ($edit->orig) 989812bb04eSRobin Getz $orig->addWords($edit->orig, 'del'); 990812bb04eSRobin Getz } 991812bb04eSRobin Getz return $orig->getLines(); 992812bb04eSRobin Getz } 993812bb04eSRobin Getz} 994812bb04eSRobin Getz 995f3f0262cSandi/** 996f3f0262cSandi * "Unified" diff formatter. 997f3f0262cSandi * 998f3f0262cSandi * This class formats the diff in classic "unified diff" format. 999f3f0262cSandi */ 1000f5b78785SAndreas Gohrclass UnifiedDiffFormatter extends DiffFormatter { 1001f5b78785SAndreas Gohr 1002988c1340SPiyush Mishra function __construct($context_lines = 4) { 1003f3f0262cSandi $this->leading_context_lines = $context_lines; 1004f3f0262cSandi $this->trailing_context_lines = $context_lines; 1005f3f0262cSandi } 1006f3f0262cSandi 1007f3f0262cSandi function _block_header($xbeg, $xlen, $ybeg, $ylen) { 1008f3f0262cSandi if ($xlen != 1) 1009f3f0262cSandi $xbeg .= "," . $xlen; 1010f3f0262cSandi if ($ylen != 1) 1011f3f0262cSandi $ybeg .= "," . $ylen; 1012f3f0262cSandi return "@@ -$xbeg +$ybeg @@\n"; 1013f3f0262cSandi } 1014f3f0262cSandi 1015f3f0262cSandi function _added($lines) { 1016f3f0262cSandi $this->_lines($lines, "+"); 1017f3f0262cSandi } 1018f3f0262cSandi function _deleted($lines) { 1019f3f0262cSandi $this->_lines($lines, "-"); 1020f3f0262cSandi } 1021f3f0262cSandi function _changed($orig, $final) { 1022f3f0262cSandi $this->_deleted($orig); 1023f3f0262cSandi $this->_added($final); 1024f3f0262cSandi } 1025f3f0262cSandi} 1026f3f0262cSandi 1027f3f0262cSandi/** 1028f3f0262cSandi * Wikipedia Table style diff formatter. 1029f3f0262cSandi * 1030f3f0262cSandi */ 1031f5b78785SAndreas Gohrclass TableDiffFormatter extends DiffFormatter { 1032f5b78785SAndreas Gohr 1033988c1340SPiyush Mishra function __construct() { 1034f3f0262cSandi $this->leading_context_lines = 2; 1035f3f0262cSandi $this->trailing_context_lines = 2; 1036f3f0262cSandi } 1037f3f0262cSandi 10382d880650SAdrian Lang function format($diff) { 10392d880650SAdrian Lang // Preserve whitespaces by converting some to non-breaking spaces. 10402d880650SAdrian Lang // Do not convert all of them to allow word-wrap. 10412d880650SAdrian Lang $val = parent::format($diff); 1042e260f93bSAnika Henke $val = str_replace(' ','  ', $val); 1043e260f93bSAnika Henke $val = preg_replace('/ (?=<)|(?<=[ >]) /', ' ', $val); 10442d880650SAdrian Lang return $val; 10452d880650SAdrian Lang } 10462d880650SAdrian Lang 1047f3f0262cSandi function _pre($text){ 1048f3f0262cSandi $text = htmlspecialchars($text); 1049f3f0262cSandi return $text; 1050f3f0262cSandi } 1051f3f0262cSandi 1052f3f0262cSandi function _block_header($xbeg, $xlen, $ybeg, $ylen) { 1053f3f0262cSandi global $lang; 1054f3f0262cSandi $l1 = $lang['line'].' '.$xbeg; 1055f3f0262cSandi $l2 = $lang['line'].' '.$ybeg; 105647a906eaSAndreas Gohr $r = '<tr><td '.HTMLDiff::css('diff-blockheader').' colspan="2">'.$l1.":</td>\n". 105747a906eaSAndreas Gohr '<td '.HTMLDiff::css('diff-blockheader').' colspan="2">'.$l2.":</td>\n". 10585048c277SAnika Henke "</tr>\n"; 1059f3f0262cSandi return $r; 1060f3f0262cSandi } 1061f3f0262cSandi 1062f3f0262cSandi function _start_block($header) { 1063f3f0262cSandi print($header); 1064f3f0262cSandi } 1065f3f0262cSandi 1066f3f0262cSandi function _end_block() { 1067f3f0262cSandi } 1068f3f0262cSandi 1069f3f0262cSandi function _lines($lines, $prefix=' ', $color="white") { 1070f3f0262cSandi } 1071f3f0262cSandi 1072f3f0262cSandi function addedLine($line) { 1073*f76724a4STom N Harris return '<td '.HTMLDiff::css('diff-lineheader').'>+</td>'. 1074*f76724a4STom N Harris '<td '.HTMLDiff::css('diff-addedline').'>' . $line.'</td>'; 1075f3f0262cSandi } 1076f3f0262cSandi 1077f3f0262cSandi function deletedLine($line) { 1078*f76724a4STom N Harris return '<td '.HTMLDiff::css('diff-lineheader').'>-</td>'. 1079*f76724a4STom N Harris '<td '.HTMLDiff::css('diff-deletedline').'>' . $line.'</td>'; 1080f3f0262cSandi } 1081f3f0262cSandi 1082f3f0262cSandi function emptyLine() { 1083e260f93bSAnika Henke return '<td colspan="2"> </td>'; 1084f3f0262cSandi } 1085f3f0262cSandi 1086f3f0262cSandi function contextLine($line) { 1087*f76724a4STom N Harris return '<td '.HTMLDiff::css('diff-lineheader').'> </td>'. 1088*f76724a4STom N Harris '<td '.HTMLDiff::css('diff-context').'>'.$line.'</td>'; 1089f3f0262cSandi } 1090f3f0262cSandi 1091f3f0262cSandi function _added($lines) { 1092f3f0262cSandi foreach ($lines as $line) { 10937deca91bSRobin Getz print('<tr>' . $this->emptyLine() . $this->addedLine($line) . "</tr>\n"); 1094f3f0262cSandi } 1095f3f0262cSandi } 1096f3f0262cSandi 1097f3f0262cSandi function _deleted($lines) { 1098f3f0262cSandi foreach ($lines as $line) { 10997deca91bSRobin Getz print('<tr>' . $this->deletedLine($line) . $this->emptyLine() . "</tr>\n"); 1100f3f0262cSandi } 1101f3f0262cSandi } 1102f3f0262cSandi 1103f3f0262cSandi function _context($lines) { 1104f3f0262cSandi foreach ($lines as $line) { 11057deca91bSRobin Getz print('<tr>' . $this->contextLine($line) . $this->contextLine($line) . "</tr>\n"); 1106f3f0262cSandi } 1107f3f0262cSandi } 1108f3f0262cSandi 1109f3f0262cSandi function _changed($orig, $closing) { 1110f3f0262cSandi $diff = new WordLevelDiff($orig, $closing); 1111f3f0262cSandi $del = $diff->orig(); 1112f3f0262cSandi $add = $diff->closing(); 1113f3f0262cSandi 1114f3f0262cSandi while ($line = array_shift($del)) { 1115f3f0262cSandi $aline = array_shift($add); 11167deca91bSRobin Getz print('<tr>' . $this->deletedLine($line) . $this->addedLine($aline) . "</tr>\n"); 1117f3f0262cSandi } 1118f3f0262cSandi $this->_added($add); # If any leftovers 1119f3f0262cSandi } 1120f3f0262cSandi} 1121f3f0262cSandi 1122812bb04eSRobin Getz/** 1123812bb04eSRobin Getz * Inline style diff formatter. 1124812bb04eSRobin Getz * 1125812bb04eSRobin Getz */ 1126812bb04eSRobin Getzclass InlineDiffFormatter extends DiffFormatter { 1127*f76724a4STom N Harris var $colspan = 2; 1128988c1340SPiyush Mishra 1129988c1340SPiyush Mishra function __construct() { 1130812bb04eSRobin Getz $this->leading_context_lines = 2; 1131812bb04eSRobin Getz $this->trailing_context_lines = 2; 1132812bb04eSRobin Getz } 1133812bb04eSRobin Getz 1134812bb04eSRobin Getz function format($diff) { 1135812bb04eSRobin Getz // Preserve whitespaces by converting some to non-breaking spaces. 1136812bb04eSRobin Getz // Do not convert all of them to allow word-wrap. 1137812bb04eSRobin Getz $val = parent::format($diff); 1138e260f93bSAnika Henke $val = str_replace(' ','  ', $val); 1139e260f93bSAnika Henke $val = preg_replace('/ (?=<)|(?<=[ >]) /', ' ', $val); 1140812bb04eSRobin Getz return $val; 1141812bb04eSRobin Getz } 1142812bb04eSRobin Getz 1143812bb04eSRobin Getz function _pre($text){ 1144812bb04eSRobin Getz $text = htmlspecialchars($text); 1145812bb04eSRobin Getz return $text; 1146812bb04eSRobin Getz } 1147812bb04eSRobin Getz 1148812bb04eSRobin Getz function _block_header($xbeg, $xlen, $ybeg, $ylen) { 1149812bb04eSRobin Getz global $lang; 1150812bb04eSRobin Getz if ($xlen != 1) 1151812bb04eSRobin Getz $xbeg .= "," . $xlen; 1152812bb04eSRobin Getz if ($ylen != 1) 1153812bb04eSRobin Getz $ybeg .= "," . $ylen; 115447a906eaSAndreas Gohr $r = '<tr><td colspan="'.$this->colspan.'" '.HTMLDiff::css('diff-blockheader').'>@@ '.$lang['line']." -$xbeg +$ybeg @@"; 115547a906eaSAndreas Gohr $r .= ' <span '.HTMLDiff::css('diff-deletedline').'><del>'.$lang['deleted'].'</del></span>'; 115647a906eaSAndreas Gohr $r .= ' <span '.HTMLDiff::css('diff-addedline').'>'.$lang['created'].'</span>'; 1157812bb04eSRobin Getz $r .= "</td></tr>\n"; 1158812bb04eSRobin Getz return $r; 1159812bb04eSRobin Getz } 1160812bb04eSRobin Getz 1161812bb04eSRobin Getz function _start_block($header) { 1162812bb04eSRobin Getz print($header."\n"); 1163812bb04eSRobin Getz } 1164812bb04eSRobin Getz 1165812bb04eSRobin Getz function _end_block() { 1166812bb04eSRobin Getz } 1167812bb04eSRobin Getz 1168812bb04eSRobin Getz function _lines($lines, $prefix=' ', $color="white") { 1169812bb04eSRobin Getz } 1170812bb04eSRobin Getz 1171812bb04eSRobin Getz function _added($lines) { 1172812bb04eSRobin Getz foreach ($lines as $line) { 1173*f76724a4STom N Harris print('<tr><td '.HTMLDiff::css('diff-lineheader').'>+</td><td colspan="'.($this->colspan-1).'" '.HTMLDiff::css('diff-addedline').'>'. $line . "</td></tr>\n"); 1174812bb04eSRobin Getz } 1175812bb04eSRobin Getz } 1176812bb04eSRobin Getz 1177812bb04eSRobin Getz function _deleted($lines) { 1178812bb04eSRobin Getz foreach ($lines as $line) { 1179*f76724a4STom N Harris print('<tr><td '.HTMLDiff::css('diff-lineheader').'>-</td><td colspan="'.($this->colspan-1).'" '.HTMLDiff::css('diff-deletedline').'><del>' . $line . "</del></td></tr>\n"); 1180812bb04eSRobin Getz } 1181812bb04eSRobin Getz } 1182812bb04eSRobin Getz 1183812bb04eSRobin Getz function _context($lines) { 1184812bb04eSRobin Getz foreach ($lines as $line) { 1185*f76724a4STom N Harris print('<tr><td '.HTMLDiff::css('diff-lineheader').'> </td><td colspan="'.($this->colspan-1).'" '.HTMLDiff::css('diff-context').'>'.$line."</td></tr>\n"); 1186812bb04eSRobin Getz } 1187812bb04eSRobin Getz } 1188812bb04eSRobin Getz 1189812bb04eSRobin Getz function _changed($orig, $closing) { 1190812bb04eSRobin Getz $diff = new InlineWordLevelDiff($orig, $closing); 1191812bb04eSRobin Getz $add = $diff->inline(); 1192812bb04eSRobin Getz 1193812bb04eSRobin Getz foreach ($add as $line) 1194*f76724a4STom N Harris print('<tr><td '.HTMLDiff::css('diff-lineheader').'>!</td><td colspan="'.($this->colspan-1).'">'.$line."</td></tr>\n"); 1195812bb04eSRobin Getz } 1196812bb04eSRobin Getz} 1197812bb04eSRobin Getz 1198340756e4Sandi 1199e3776c06SMichael Hamann//Setup VIM: ex: et ts=4 : 1200