1f3f0262cSandi<?php 215fae107Sandi/** 315fae107Sandi * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3) 415fae107Sandi * 515fae107Sandi * Additions by Axel Boldt for MediaWiki 615fae107Sandi * 715fae107Sandi * @copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org> 815fae107Sandi * @license You may copy this code freely under the conditions of the GPL. 915fae107Sandi */ 10f3f0262cSandidefine('USE_ASSERTS', function_exists('assert')); 11f3f0262cSandi 12f3f0262cSandiclass _DiffOp { 13f3f0262cSandi var $type; 14f3f0262cSandi var $orig; 15f3f0262cSandi var $closing; 16f3f0262cSandi 17f3f0262cSandi function reverse() { 18f3f0262cSandi trigger_error("pure virtual", E_USER_ERROR); 19f3f0262cSandi } 20f3f0262cSandi 21f3f0262cSandi function norig() { 22f5b78785SAndreas Gohr return $this->orig ? count($this->orig) : 0; 23f3f0262cSandi } 24f3f0262cSandi 25f3f0262cSandi function nclosing() { 26f5b78785SAndreas Gohr return $this->closing ? count($this->closing) : 0; 27f3f0262cSandi } 28f3f0262cSandi} 29f3f0262cSandi 30f3f0262cSandiclass _DiffOp_Copy extends _DiffOp { 31f3f0262cSandi var $type = 'copy'; 32*988c1340SPiyush Mishra /** 33*988c1340SPiyush Mishra * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 34*988c1340SPiyush Mishra */ 35f3f0262cSandi function _DiffOp_Copy($orig, $closing = false) { 36*988c1340SPiyush Mishra $this->__construct($orig, $closing); 37*988c1340SPiyush Mishra } 38*988c1340SPiyush Mishra 39*988c1340SPiyush Mishra function __construct($orig, $closing = false) { 40f3f0262cSandi if (!is_array($closing)) 41f3f0262cSandi $closing = $orig; 42f3f0262cSandi $this->orig = $orig; 43f3f0262cSandi $this->closing = $closing; 44f3f0262cSandi } 45f3f0262cSandi 46f3f0262cSandi function reverse() { 47f3f0262cSandi return new _DiffOp_Copy($this->closing, $this->orig); 48f3f0262cSandi } 49f3f0262cSandi} 50f3f0262cSandi 51f3f0262cSandiclass _DiffOp_Delete extends _DiffOp { 52f3f0262cSandi var $type = 'delete'; 53*988c1340SPiyush Mishra /** 54*988c1340SPiyush Mishra * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 55*988c1340SPiyush Mishra */ 56f3f0262cSandi function _DiffOp_Delete($lines) { 57*988c1340SPiyush Mishra $this->__construct($lines); 58*988c1340SPiyush Mishra } 59*988c1340SPiyush Mishra 60*988c1340SPiyush Mishra function __construct($lines) { 61f3f0262cSandi $this->orig = $lines; 62f3f0262cSandi $this->closing = false; 63f3f0262cSandi } 64f3f0262cSandi 65f3f0262cSandi function reverse() { 66f3f0262cSandi return new _DiffOp_Add($this->orig); 67f3f0262cSandi } 68f3f0262cSandi} 69f3f0262cSandi 70f3f0262cSandiclass _DiffOp_Add extends _DiffOp { 71f3f0262cSandi var $type = 'add'; 72*988c1340SPiyush Mishra /** 73*988c1340SPiyush Mishra * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 74*988c1340SPiyush Mishra */ 75f3f0262cSandi function _DiffOp_Add($lines) { 76*988c1340SPiyush Mishra $this->__construct($lines); 77*988c1340SPiyush Mishra } 78*988c1340SPiyush Mishra 79*988c1340SPiyush Mishra function __construct($lines) { 80f3f0262cSandi $this->closing = $lines; 81f3f0262cSandi $this->orig = false; 82f3f0262cSandi } 83f3f0262cSandi 84f3f0262cSandi function reverse() { 85f3f0262cSandi return new _DiffOp_Delete($this->closing); 86f3f0262cSandi } 87f3f0262cSandi} 88f3f0262cSandi 89f3f0262cSandiclass _DiffOp_Change extends _DiffOp { 90f3f0262cSandi var $type = 'change'; 91*988c1340SPiyush Mishra /** 92*988c1340SPiyush Mishra * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 93*988c1340SPiyush Mishra */ 94f3f0262cSandi function _DiffOp_Change($orig, $closing) { 95*988c1340SPiyush Mishra $this->__construct($orig, $closing); 96*988c1340SPiyush Mishra } 97*988c1340SPiyush Mishra 98*988c1340SPiyush Mishra function __construct($orig, $closing) { 99f3f0262cSandi $this->orig = $orig; 100f3f0262cSandi $this->closing = $closing; 101f3f0262cSandi } 102f3f0262cSandi 103f3f0262cSandi function reverse() { 104f3f0262cSandi return new _DiffOp_Change($this->closing, $this->orig); 105f3f0262cSandi } 106f3f0262cSandi} 107f3f0262cSandi 108f3f0262cSandi 109f3f0262cSandi/** 110f3f0262cSandi * Class used internally by Diff to actually compute the diffs. 111f3f0262cSandi * 112f3f0262cSandi * The algorithm used here is mostly lifted from the perl module 113f3f0262cSandi * Algorithm::Diff (version 1.06) by Ned Konz, which is available at: 114f3f0262cSandi * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip 115f3f0262cSandi * 116f3f0262cSandi * More ideas are taken from: 117f3f0262cSandi * http://www.ics.uci.edu/~eppstein/161/960229.html 118f3f0262cSandi * 119f3f0262cSandi * Some ideas are (and a bit of code) are from from analyze.c, from GNU 120f3f0262cSandi * diffutils-2.7, which can be found at: 121f3f0262cSandi * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz 122f3f0262cSandi * 123f3f0262cSandi * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations) 124f3f0262cSandi * are my own. 125f3f0262cSandi * 126f3f0262cSandi * @author Geoffrey T. Dairiki 127f3f0262cSandi * @access private 128f3f0262cSandi */ 129f5b78785SAndreas Gohrclass _DiffEngine { 130f5b78785SAndreas Gohr 131f3f0262cSandi function diff($from_lines, $to_lines) { 132f5b78785SAndreas Gohr $n_from = count($from_lines); 133f5b78785SAndreas Gohr $n_to = count($to_lines); 134f3f0262cSandi 135f3f0262cSandi $this->xchanged = $this->ychanged = array(); 136f3f0262cSandi $this->xv = $this->yv = array(); 137f3f0262cSandi $this->xind = $this->yind = array(); 138f3f0262cSandi unset($this->seq); 139f3f0262cSandi unset($this->in_seq); 140f3f0262cSandi unset($this->lcs); 141f3f0262cSandi 142f3f0262cSandi // Skip leading common lines. 143f3f0262cSandi for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) { 144f3f0262cSandi if ($from_lines[$skip] != $to_lines[$skip]) 145f3f0262cSandi break; 146f3f0262cSandi $this->xchanged[$skip] = $this->ychanged[$skip] = false; 147f3f0262cSandi } 148f3f0262cSandi // Skip trailing common lines. 149f5b78785SAndreas Gohr $xi = $n_from; 150f5b78785SAndreas Gohr $yi = $n_to; 151f3f0262cSandi for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) { 152f3f0262cSandi if ($from_lines[$xi] != $to_lines[$yi]) 153f3f0262cSandi break; 154f3f0262cSandi $this->xchanged[$xi] = $this->ychanged[$yi] = false; 155f3f0262cSandi } 156f3f0262cSandi 157f3f0262cSandi // Ignore lines which do not exist in both files. 158f3f0262cSandi for ($xi = $skip; $xi < $n_from - $endskip; $xi++) 159f3f0262cSandi $xhash[$from_lines[$xi]] = 1; 160f3f0262cSandi for ($yi = $skip; $yi < $n_to - $endskip; $yi++) { 161f3f0262cSandi $line = $to_lines[$yi]; 162f3f0262cSandi if (($this->ychanged[$yi] = empty($xhash[$line]))) 163f3f0262cSandi continue; 164f3f0262cSandi $yhash[$line] = 1; 165f3f0262cSandi $this->yv[] = $line; 166f3f0262cSandi $this->yind[] = $yi; 167f3f0262cSandi } 168f3f0262cSandi for ($xi = $skip; $xi < $n_from - $endskip; $xi++) { 169f3f0262cSandi $line = $from_lines[$xi]; 170f3f0262cSandi if (($this->xchanged[$xi] = empty($yhash[$line]))) 171f3f0262cSandi continue; 172f3f0262cSandi $this->xv[] = $line; 173f3f0262cSandi $this->xind[] = $xi; 174f3f0262cSandi } 175f3f0262cSandi 176f3f0262cSandi // Find the LCS. 177f5b78785SAndreas Gohr $this->_compareseq(0, count($this->xv), 0, count($this->yv)); 178f3f0262cSandi 179f3f0262cSandi // Merge edits when possible 180f3f0262cSandi $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged); 181f3f0262cSandi $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged); 182f3f0262cSandi 183f3f0262cSandi // Compute the edit operations. 184f3f0262cSandi $edits = array(); 185f3f0262cSandi $xi = $yi = 0; 186f3f0262cSandi while ($xi < $n_from || $yi < $n_to) { 187f3f0262cSandi USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]); 188f3f0262cSandi USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]); 189f3f0262cSandi 190f3f0262cSandi // Skip matching "snake". 191f3f0262cSandi $copy = array(); 1927deca91bSRobin Getz while ($xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) { 193f3f0262cSandi $copy[] = $from_lines[$xi++]; 194f3f0262cSandi ++$yi; 195f3f0262cSandi } 196f3f0262cSandi if ($copy) 197f3f0262cSandi $edits[] = new _DiffOp_Copy($copy); 198f3f0262cSandi 199f3f0262cSandi // Find deletes & adds. 200f3f0262cSandi $delete = array(); 201f3f0262cSandi while ($xi < $n_from && $this->xchanged[$xi]) 202f3f0262cSandi $delete[] = $from_lines[$xi++]; 203f3f0262cSandi 204f3f0262cSandi $add = array(); 205f3f0262cSandi while ($yi < $n_to && $this->ychanged[$yi]) 206f3f0262cSandi $add[] = $to_lines[$yi++]; 207f3f0262cSandi 208f3f0262cSandi if ($delete && $add) 209f3f0262cSandi $edits[] = new _DiffOp_Change($delete, $add); 210f3f0262cSandi elseif ($delete) 211f3f0262cSandi $edits[] = new _DiffOp_Delete($delete); 212f3f0262cSandi elseif ($add) 213f3f0262cSandi $edits[] = new _DiffOp_Add($add); 214f3f0262cSandi } 215f3f0262cSandi return $edits; 216f3f0262cSandi } 217f3f0262cSandi 218f3f0262cSandi 21915fae107Sandi /** 22015fae107Sandi * Divide the Largest Common Subsequence (LCS) of the sequences 221f3f0262cSandi * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally 222f3f0262cSandi * sized segments. 223f3f0262cSandi * 224f3f0262cSandi * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an 225f3f0262cSandi * array of NCHUNKS+1 (X, Y) indexes giving the diving points between 226f3f0262cSandi * sub sequences. The first sub-sequence is contained in [X0, X1), 227f3f0262cSandi * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note 228f3f0262cSandi * that (X0, Y0) == (XOFF, YOFF) and 229f3f0262cSandi * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM). 230f3f0262cSandi * 231f3f0262cSandi * This function assumes that the first lines of the specified portions 232f3f0262cSandi * of the two files do not match, and likewise that the last lines do not 233f3f0262cSandi * match. The caller must trim matching lines from the beginning and end 234f3f0262cSandi * of the portions it is going to specify. 235f3f0262cSandi */ 236f3f0262cSandi function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) { 237f3f0262cSandi $flip = false; 238f3f0262cSandi 239f3f0262cSandi if ($xlim - $xoff > $ylim - $yoff) { 240f3f0262cSandi // Things seems faster (I'm not sure I understand why) 241f3f0262cSandi // when the shortest sequence in X. 242f3f0262cSandi $flip = true; 2437deca91bSRobin Getz list ($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim); 244f3f0262cSandi } 245f3f0262cSandi 246f3f0262cSandi if ($flip) 247f3f0262cSandi for ($i = $ylim - 1; $i >= $yoff; $i--) 248f3f0262cSandi $ymatches[$this->xv[$i]][] = $i; 249f3f0262cSandi else 250f3f0262cSandi for ($i = $ylim - 1; $i >= $yoff; $i--) 251f3f0262cSandi $ymatches[$this->yv[$i]][] = $i; 252f3f0262cSandi 253f3f0262cSandi $this->lcs = 0; 254f3f0262cSandi $this->seq[0]= $yoff - 1; 255f3f0262cSandi $this->in_seq = array(); 256f3f0262cSandi $ymids[0] = array(); 257f3f0262cSandi 258f3f0262cSandi $numer = $xlim - $xoff + $nchunks - 1; 259f3f0262cSandi $x = $xoff; 260f3f0262cSandi for ($chunk = 0; $chunk < $nchunks; $chunk++) { 261f3f0262cSandi if ($chunk > 0) 262f3f0262cSandi for ($i = 0; $i <= $this->lcs; $i++) 263f3f0262cSandi $ymids[$i][$chunk-1] = $this->seq[$i]; 264f3f0262cSandi 265f3f0262cSandi $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks); 266f3f0262cSandi for ( ; $x < $x1; $x++) { 267f3f0262cSandi $line = $flip ? $this->yv[$x] : $this->xv[$x]; 268f3f0262cSandi if (empty($ymatches[$line])) 269f3f0262cSandi continue; 270f3f0262cSandi $matches = $ymatches[$line]; 271f3f0262cSandi reset($matches); 272f3f0262cSandi while (list ($junk, $y) = each($matches)) 273f3f0262cSandi if (empty($this->in_seq[$y])) { 274f3f0262cSandi $k = $this->_lcs_pos($y); 275f3f0262cSandi USE_ASSERTS && assert($k > 0); 276f3f0262cSandi $ymids[$k] = $ymids[$k-1]; 277f3f0262cSandi break; 278f3f0262cSandi } 279f3f0262cSandi while (list ($junk, $y) = each($matches)) { 280f3f0262cSandi if ($y > $this->seq[$k-1]) { 281f3f0262cSandi USE_ASSERTS && assert($y < $this->seq[$k]); 282f3f0262cSandi // Optimization: this is a common case: 283f3f0262cSandi // next match is just replacing previous match. 284f3f0262cSandi $this->in_seq[$this->seq[$k]] = false; 285f3f0262cSandi $this->seq[$k] = $y; 286f3f0262cSandi $this->in_seq[$y] = 1; 287f3f0262cSandi } 288f3f0262cSandi else if (empty($this->in_seq[$y])) { 289f3f0262cSandi $k = $this->_lcs_pos($y); 290f3f0262cSandi USE_ASSERTS && assert($k > 0); 291f3f0262cSandi $ymids[$k] = $ymids[$k-1]; 292f3f0262cSandi } 293f3f0262cSandi } 294f3f0262cSandi } 295f3f0262cSandi } 296f3f0262cSandi 297f3f0262cSandi $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff); 298f3f0262cSandi $ymid = $ymids[$this->lcs]; 299f3f0262cSandi for ($n = 0; $n < $nchunks - 1; $n++) { 300f3f0262cSandi $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks); 301f3f0262cSandi $y1 = $ymid[$n] + 1; 302f3f0262cSandi $seps[] = $flip ? array($y1, $x1) : array($x1, $y1); 303f3f0262cSandi } 304f3f0262cSandi $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim); 305f3f0262cSandi 306f3f0262cSandi return array($this->lcs, $seps); 307f3f0262cSandi } 308f3f0262cSandi 309f3f0262cSandi function _lcs_pos($ypos) { 310f3f0262cSandi $end = $this->lcs; 311f3f0262cSandi if ($end == 0 || $ypos > $this->seq[$end]) { 312f3f0262cSandi $this->seq[++$this->lcs] = $ypos; 313f3f0262cSandi $this->in_seq[$ypos] = 1; 314f3f0262cSandi return $this->lcs; 315f3f0262cSandi } 316f3f0262cSandi 317f3f0262cSandi $beg = 1; 318f3f0262cSandi while ($beg < $end) { 319f3f0262cSandi $mid = (int)(($beg + $end) / 2); 320f3f0262cSandi if ($ypos > $this->seq[$mid]) 321f3f0262cSandi $beg = $mid + 1; 322f3f0262cSandi else 323f3f0262cSandi $end = $mid; 324f3f0262cSandi } 325f3f0262cSandi 326f3f0262cSandi USE_ASSERTS && assert($ypos != $this->seq[$end]); 327f3f0262cSandi 328f3f0262cSandi $this->in_seq[$this->seq[$end]] = false; 329f3f0262cSandi $this->seq[$end] = $ypos; 330f3f0262cSandi $this->in_seq[$ypos] = 1; 331f3f0262cSandi return $end; 332f3f0262cSandi } 333f3f0262cSandi 33415fae107Sandi /** 33515fae107Sandi * Find LCS of two sequences. 336f3f0262cSandi * 337f3f0262cSandi * The results are recorded in the vectors $this->{x,y}changed[], by 338f3f0262cSandi * storing a 1 in the element for each line that is an insertion 339f3f0262cSandi * or deletion (ie. is not in the LCS). 340f3f0262cSandi * 341f3f0262cSandi * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1. 342f3f0262cSandi * 343f3f0262cSandi * Note that XLIM, YLIM are exclusive bounds. 344f3f0262cSandi * All line numbers are origin-0 and discarded lines are not counted. 345f3f0262cSandi */ 346f3f0262cSandi function _compareseq($xoff, $xlim, $yoff, $ylim) { 347f3f0262cSandi // Slide down the bottom initial diagonal. 3487deca91bSRobin Getz while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) { 349f3f0262cSandi ++$xoff; 350f3f0262cSandi ++$yoff; 351f3f0262cSandi } 352f3f0262cSandi 353f3f0262cSandi // Slide up the top initial diagonal. 3547deca91bSRobin Getz while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) { 355f3f0262cSandi --$xlim; 356f3f0262cSandi --$ylim; 357f3f0262cSandi } 358f3f0262cSandi 359f3f0262cSandi if ($xoff == $xlim || $yoff == $ylim) 360f3f0262cSandi $lcs = 0; 361f3f0262cSandi else { 362f3f0262cSandi // This is ad hoc but seems to work well. 363f3f0262cSandi //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); 364f3f0262cSandi //$nchunks = max(2,min(8,(int)$nchunks)); 365f3f0262cSandi $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1; 366f3f0262cSandi list ($lcs, $seps) 367f3f0262cSandi = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks); 368f3f0262cSandi } 369f3f0262cSandi 370f3f0262cSandi if ($lcs == 0) { 371f3f0262cSandi // X and Y sequences have no common subsequence: 372f3f0262cSandi // mark all changed. 373f3f0262cSandi while ($yoff < $ylim) 374f3f0262cSandi $this->ychanged[$this->yind[$yoff++]] = 1; 375f3f0262cSandi while ($xoff < $xlim) 376f3f0262cSandi $this->xchanged[$this->xind[$xoff++]] = 1; 377f3f0262cSandi } 378f3f0262cSandi else { 379f3f0262cSandi // Use the partitions to split this problem into subproblems. 380f3f0262cSandi reset($seps); 381f3f0262cSandi $pt1 = $seps[0]; 382f3f0262cSandi while ($pt2 = next($seps)) { 383f3f0262cSandi $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]); 384f3f0262cSandi $pt1 = $pt2; 385f3f0262cSandi } 386f3f0262cSandi } 387f3f0262cSandi } 388f3f0262cSandi 38915fae107Sandi /** 39015fae107Sandi * Adjust inserts/deletes of identical lines to join changes 391f3f0262cSandi * as much as possible. 392f3f0262cSandi * 393f3f0262cSandi * We do something when a run of changed lines include a 394f3f0262cSandi * line at one end and has an excluded, identical line at the other. 395f3f0262cSandi * We are free to choose which identical line is included. 396f3f0262cSandi * `compareseq' usually chooses the one at the beginning, 397f3f0262cSandi * but usually it is cleaner to consider the following identical line 398f3f0262cSandi * to be the "change". 399f3f0262cSandi * 400f3f0262cSandi * This is extracted verbatim from analyze.c (GNU diffutils-2.7). 401f3f0262cSandi */ 402f3f0262cSandi function _shift_boundaries($lines, &$changed, $other_changed) { 403f3f0262cSandi $i = 0; 404f3f0262cSandi $j = 0; 405f3f0262cSandi 406f5b78785SAndreas Gohr USE_ASSERTS && assert('count($lines) == count($changed)'); 407f5b78785SAndreas Gohr $len = count($lines); 408f5b78785SAndreas Gohr $other_len = count($other_changed); 409f3f0262cSandi 410f3f0262cSandi while (1) { 411f3f0262cSandi /* 412f3f0262cSandi * Scan forwards to find beginning of another run of changes. 413f3f0262cSandi * Also keep track of the corresponding point in the other file. 414f3f0262cSandi * 415f3f0262cSandi * Throughout this code, $i and $j are adjusted together so that 416f3f0262cSandi * the first $i elements of $changed and the first $j elements 417f3f0262cSandi * of $other_changed both contain the same number of zeros 418f3f0262cSandi * (unchanged lines). 419f3f0262cSandi * Furthermore, $j is always kept so that $j == $other_len or 420f3f0262cSandi * $other_changed[$j] == false. 421f3f0262cSandi */ 422f3f0262cSandi while ($j < $other_len && $other_changed[$j]) 423f3f0262cSandi $j++; 424f3f0262cSandi 425f3f0262cSandi while ($i < $len && ! $changed[$i]) { 426f3f0262cSandi USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]'); 427f5b78785SAndreas Gohr $i++; 428f5b78785SAndreas Gohr $j++; 429f3f0262cSandi while ($j < $other_len && $other_changed[$j]) 430f3f0262cSandi $j++; 431f3f0262cSandi } 432f3f0262cSandi 433f3f0262cSandi if ($i == $len) 434f3f0262cSandi break; 435f3f0262cSandi 436f3f0262cSandi $start = $i; 437f3f0262cSandi 438f3f0262cSandi // Find the end of this run of changes. 439f3f0262cSandi while (++$i < $len && $changed[$i]) 440f3f0262cSandi continue; 441f3f0262cSandi 442f3f0262cSandi do { 443f3f0262cSandi /* 444f3f0262cSandi * Record the length of this run of changes, so that 445f3f0262cSandi * we can later determine whether the run has grown. 446f3f0262cSandi */ 447f3f0262cSandi $runlength = $i - $start; 448f3f0262cSandi 449f3f0262cSandi /* 450f3f0262cSandi * Move the changed region back, so long as the 451f3f0262cSandi * previous unchanged line matches the last changed one. 452f3f0262cSandi * This merges with previous changed regions. 453f3f0262cSandi */ 454f3f0262cSandi while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) { 455f3f0262cSandi $changed[--$start] = 1; 456f3f0262cSandi $changed[--$i] = false; 457f3f0262cSandi while ($start > 0 && $changed[$start - 1]) 458f3f0262cSandi $start--; 459f3f0262cSandi USE_ASSERTS && assert('$j > 0'); 460f3f0262cSandi while ($other_changed[--$j]) 461f3f0262cSandi continue; 462f3f0262cSandi USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]'); 463f3f0262cSandi } 464f3f0262cSandi 465f3f0262cSandi /* 466f3f0262cSandi * Set CORRESPONDING to the end of the changed run, at the last 467f3f0262cSandi * point where it corresponds to a changed run in the other file. 468f3f0262cSandi * CORRESPONDING == LEN means no such point has been found. 469f3f0262cSandi */ 470f3f0262cSandi $corresponding = $j < $other_len ? $i : $len; 471f3f0262cSandi 472f3f0262cSandi /* 473f3f0262cSandi * Move the changed region forward, so long as the 474f3f0262cSandi * first changed line matches the following unchanged one. 475f3f0262cSandi * This merges with following changed regions. 476f3f0262cSandi * Do this second, so that if there are no merges, 477f3f0262cSandi * the changed region is moved forward as far as possible. 478f3f0262cSandi */ 479f3f0262cSandi while ($i < $len && $lines[$start] == $lines[$i]) { 480f3f0262cSandi $changed[$start++] = false; 481f3f0262cSandi $changed[$i++] = 1; 482f3f0262cSandi while ($i < $len && $changed[$i]) 483f3f0262cSandi $i++; 484f3f0262cSandi 485f3f0262cSandi USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]'); 486f3f0262cSandi $j++; 487f3f0262cSandi if ($j < $other_len && $other_changed[$j]) { 488f3f0262cSandi $corresponding = $i; 489f3f0262cSandi while ($j < $other_len && $other_changed[$j]) 490f3f0262cSandi $j++; 491f3f0262cSandi } 492f3f0262cSandi } 493f3f0262cSandi } while ($runlength != $i - $start); 494f3f0262cSandi 495f3f0262cSandi /* 496f3f0262cSandi * If possible, move the fully-merged run of changes 497f3f0262cSandi * back to a corresponding run in the other file. 498f3f0262cSandi */ 499f3f0262cSandi while ($corresponding < $i) { 500f3f0262cSandi $changed[--$start] = 1; 501f3f0262cSandi $changed[--$i] = 0; 502f3f0262cSandi USE_ASSERTS && assert('$j > 0'); 503f3f0262cSandi while ($other_changed[--$j]) 504f3f0262cSandi continue; 505f3f0262cSandi USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]'); 506f3f0262cSandi } 507f3f0262cSandi } 508f3f0262cSandi } 509f3f0262cSandi} 510f3f0262cSandi 511f3f0262cSandi/** 512f3f0262cSandi * Class representing a 'diff' between two sequences of strings. 513f3f0262cSandi */ 514f5b78785SAndreas Gohrclass Diff { 515f5b78785SAndreas Gohr 516f3f0262cSandi var $edits; 517f3f0262cSandi 518f3f0262cSandi /** 519*988c1340SPiyush Mishra * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 520*988c1340SPiyush Mishra */ 521*988c1340SPiyush Mishra function Diff($from_lines, $to_lines) { 522*988c1340SPiyush Mishra $this->__construct($from_lines, $to_lines); 523*988c1340SPiyush Mishra } 524*988c1340SPiyush Mishra 525*988c1340SPiyush Mishra /** 526f3f0262cSandi * Constructor. 527f3f0262cSandi * Computes diff between sequences of strings. 528f3f0262cSandi * 529f3f0262cSandi * @param $from_lines array An array of strings. 530f3f0262cSandi * (Typically these are lines from a file.) 531f3f0262cSandi * @param $to_lines array An array of strings. 532f3f0262cSandi */ 533*988c1340SPiyush Mishra function __construct($from_lines, $to_lines) { 534f3f0262cSandi $eng = new _DiffEngine; 535f3f0262cSandi $this->edits = $eng->diff($from_lines, $to_lines); 536f3f0262cSandi //$this->_check($from_lines, $to_lines); 537f3f0262cSandi } 538f3f0262cSandi 539f3f0262cSandi /** 540f3f0262cSandi * Compute reversed Diff. 541f3f0262cSandi * 542f3f0262cSandi * SYNOPSIS: 543f3f0262cSandi * 544f3f0262cSandi * $diff = new Diff($lines1, $lines2); 545f3f0262cSandi * $rev = $diff->reverse(); 546f3f0262cSandi * @return object A Diff object representing the inverse of the 547f3f0262cSandi * original diff. 548f3f0262cSandi */ 549f3f0262cSandi function reverse() { 550f3f0262cSandi $rev = $this; 551f3f0262cSandi $rev->edits = array(); 552f3f0262cSandi foreach ($this->edits as $edit) { 553f3f0262cSandi $rev->edits[] = $edit->reverse(); 554f3f0262cSandi } 555f3f0262cSandi return $rev; 556f3f0262cSandi } 557f3f0262cSandi 558f3f0262cSandi /** 559f3f0262cSandi * Check for empty diff. 560f3f0262cSandi * 561f3f0262cSandi * @return bool True iff two sequences were identical. 562f3f0262cSandi */ 563f3f0262cSandi function isEmpty() { 564f3f0262cSandi foreach ($this->edits as $edit) { 565f3f0262cSandi if ($edit->type != 'copy') 566f3f0262cSandi return false; 567f3f0262cSandi } 568f3f0262cSandi return true; 569f3f0262cSandi } 570f3f0262cSandi 571f3f0262cSandi /** 572f3f0262cSandi * Compute the length of the Longest Common Subsequence (LCS). 573f3f0262cSandi * 574f3f0262cSandi * This is mostly for diagnostic purposed. 575f3f0262cSandi * 576f3f0262cSandi * @return int The length of the LCS. 577f3f0262cSandi */ 578f3f0262cSandi function lcs() { 579f3f0262cSandi $lcs = 0; 580f3f0262cSandi foreach ($this->edits as $edit) { 581f3f0262cSandi if ($edit->type == 'copy') 582f5b78785SAndreas Gohr $lcs += count($edit->orig); 583f3f0262cSandi } 584f3f0262cSandi return $lcs; 585f3f0262cSandi } 586f3f0262cSandi 587f3f0262cSandi /** 588f3f0262cSandi * Get the original set of lines. 589f3f0262cSandi * 590f3f0262cSandi * This reconstructs the $from_lines parameter passed to the 591f3f0262cSandi * constructor. 592f3f0262cSandi * 593f3f0262cSandi * @return array The original sequence of strings. 594f3f0262cSandi */ 595f3f0262cSandi function orig() { 596f3f0262cSandi $lines = array(); 597f3f0262cSandi 598f3f0262cSandi foreach ($this->edits as $edit) { 599f3f0262cSandi if ($edit->orig) 600f5b78785SAndreas Gohr array_splice($lines, count($lines), 0, $edit->orig); 601f3f0262cSandi } 602f3f0262cSandi return $lines; 603f3f0262cSandi } 604f3f0262cSandi 605f3f0262cSandi /** 606f3f0262cSandi * Get the closing set of lines. 607f3f0262cSandi * 608f3f0262cSandi * This reconstructs the $to_lines parameter passed to the 609f3f0262cSandi * constructor. 610f3f0262cSandi * 611f3f0262cSandi * @return array The sequence of strings. 612f3f0262cSandi */ 613f3f0262cSandi function closing() { 614f3f0262cSandi $lines = array(); 615f3f0262cSandi 616f3f0262cSandi foreach ($this->edits as $edit) { 617f3f0262cSandi if ($edit->closing) 618f5b78785SAndreas Gohr array_splice($lines, count($lines), 0, $edit->closing); 619f3f0262cSandi } 620f3f0262cSandi return $lines; 621f3f0262cSandi } 622f3f0262cSandi 623f3f0262cSandi /** 624f3f0262cSandi * Check a Diff for validity. 625f3f0262cSandi * 626f3f0262cSandi * This is here only for debugging purposes. 627f3f0262cSandi */ 628f3f0262cSandi function _check($from_lines, $to_lines) { 629f3f0262cSandi if (serialize($from_lines) != serialize($this->orig())) 630f3f0262cSandi trigger_error("Reconstructed original doesn't match", E_USER_ERROR); 631f3f0262cSandi if (serialize($to_lines) != serialize($this->closing())) 632f3f0262cSandi trigger_error("Reconstructed closing doesn't match", E_USER_ERROR); 633f3f0262cSandi 634f3f0262cSandi $rev = $this->reverse(); 635f3f0262cSandi if (serialize($to_lines) != serialize($rev->orig())) 636f3f0262cSandi trigger_error("Reversed original doesn't match", E_USER_ERROR); 637f3f0262cSandi if (serialize($from_lines) != serialize($rev->closing())) 638f3f0262cSandi trigger_error("Reversed closing doesn't match", E_USER_ERROR); 639f3f0262cSandi 640f3f0262cSandi $prevtype = 'none'; 641f3f0262cSandi foreach ($this->edits as $edit) { 642f3f0262cSandi if ($prevtype == $edit->type) 643f3f0262cSandi trigger_error("Edit sequence is non-optimal", E_USER_ERROR); 644f3f0262cSandi $prevtype = $edit->type; 645f3f0262cSandi } 646f3f0262cSandi 647f3f0262cSandi $lcs = $this->lcs(); 648f3f0262cSandi trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE); 649f3f0262cSandi } 650f3f0262cSandi} 651f3f0262cSandi 652f3f0262cSandi/** 653f3f0262cSandi * FIXME: bad name. 654f3f0262cSandi */ 655f5b78785SAndreas Gohrclass MappedDiff extends Diff { 656f3f0262cSandi /** 657*988c1340SPiyush Mishra * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 658*988c1340SPiyush Mishra */ 659*988c1340SPiyush Mishra function MappedDiff($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) { 660*988c1340SPiyush Mishra $this->__construct($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines); 661*988c1340SPiyush Mishra } 662*988c1340SPiyush Mishra 663*988c1340SPiyush Mishra /** 664f3f0262cSandi * Constructor. 665f3f0262cSandi * 666f3f0262cSandi * Computes diff between sequences of strings. 667f3f0262cSandi * 668f3f0262cSandi * This can be used to compute things like 669f3f0262cSandi * case-insensitve diffs, or diffs which ignore 670f3f0262cSandi * changes in white-space. 671f3f0262cSandi * 672f3f0262cSandi * @param $from_lines array An array of strings. 673f3f0262cSandi * (Typically these are lines from a file.) 674f3f0262cSandi * 675f3f0262cSandi * @param $to_lines array An array of strings. 676f3f0262cSandi * 677f3f0262cSandi * @param $mapped_from_lines array This array should 678f3f0262cSandi * have the same size number of elements as $from_lines. 679f3f0262cSandi * The elements in $mapped_from_lines and 680f3f0262cSandi * $mapped_to_lines are what is actually compared 681f3f0262cSandi * when computing the diff. 682f3f0262cSandi * 683f3f0262cSandi * @param $mapped_to_lines array This array should 684f3f0262cSandi * have the same number of elements as $to_lines. 685f3f0262cSandi */ 686*988c1340SPiyush Mishra function __construct($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) { 687f3f0262cSandi 688f5b78785SAndreas Gohr assert(count($from_lines) == count($mapped_from_lines)); 689f5b78785SAndreas Gohr assert(count($to_lines) == count($mapped_to_lines)); 690f3f0262cSandi 691*988c1340SPiyush Mishra parent::__construct($mapped_from_lines, $mapped_to_lines); 692f3f0262cSandi 693f3f0262cSandi $xi = $yi = 0; 694f5b78785SAndreas Gohr $ecnt = count($this->edits); 695f5b78785SAndreas Gohr for ($i = 0; $i < $ecnt; $i++) { 696f3f0262cSandi $orig = &$this->edits[$i]->orig; 697f3f0262cSandi if (is_array($orig)) { 698f5b78785SAndreas Gohr $orig = array_slice($from_lines, $xi, count($orig)); 699f5b78785SAndreas Gohr $xi += count($orig); 700f3f0262cSandi } 701f3f0262cSandi 702f3f0262cSandi $closing = &$this->edits[$i]->closing; 703f3f0262cSandi if (is_array($closing)) { 704f5b78785SAndreas Gohr $closing = array_slice($to_lines, $yi, count($closing)); 705f5b78785SAndreas Gohr $yi += count($closing); 706f3f0262cSandi } 707f3f0262cSandi } 708f3f0262cSandi } 709f3f0262cSandi} 710f3f0262cSandi 711f3f0262cSandi/** 712f3f0262cSandi * A class to format Diffs 713f3f0262cSandi * 714f3f0262cSandi * This class formats the diff in classic diff format. 715f3f0262cSandi * It is intended that this class be customized via inheritance, 716f3f0262cSandi * to obtain fancier outputs. 717f3f0262cSandi */ 718f5b78785SAndreas Gohrclass DiffFormatter { 719f3f0262cSandi /** 720f3f0262cSandi * Number of leading context "lines" to preserve. 721f3f0262cSandi * 722f3f0262cSandi * This should be left at zero for this class, but subclasses 723f3f0262cSandi * may want to set this to other values. 724f3f0262cSandi */ 725f3f0262cSandi var $leading_context_lines = 0; 726f3f0262cSandi 727f3f0262cSandi /** 728f3f0262cSandi * Number of trailing context "lines" to preserve. 729f3f0262cSandi * 730f3f0262cSandi * This should be left at zero for this class, but subclasses 731f3f0262cSandi * may want to set this to other values. 732f3f0262cSandi */ 733f3f0262cSandi var $trailing_context_lines = 0; 734f3f0262cSandi 735f3f0262cSandi /** 736f3f0262cSandi * Format a diff. 737f3f0262cSandi * 738f3f0262cSandi * @param $diff object A Diff object. 739f3f0262cSandi * @return string The formatted output. 740f3f0262cSandi */ 741f3f0262cSandi function format($diff) { 742f3f0262cSandi 743f3f0262cSandi $xi = $yi = 1; 744f3f0262cSandi $block = false; 745f3f0262cSandi $context = array(); 746f3f0262cSandi 747f3f0262cSandi $nlead = $this->leading_context_lines; 748f3f0262cSandi $ntrail = $this->trailing_context_lines; 749f3f0262cSandi 750f3f0262cSandi $this->_start_diff(); 751f3f0262cSandi 752f3f0262cSandi foreach ($diff->edits as $edit) { 753f3f0262cSandi if ($edit->type == 'copy') { 754f3f0262cSandi if (is_array($block)) { 755f5b78785SAndreas Gohr if (count($edit->orig) <= $nlead + $ntrail) { 756f3f0262cSandi $block[] = $edit; 757f3f0262cSandi } 758f3f0262cSandi else{ 759f3f0262cSandi if ($ntrail) { 760f3f0262cSandi $context = array_slice($edit->orig, 0, $ntrail); 761f3f0262cSandi $block[] = new _DiffOp_Copy($context); 762f3f0262cSandi } 7637deca91bSRobin Getz $this->_block($x0, $ntrail + $xi - $x0, $y0, $ntrail + $yi - $y0, $block); 764f3f0262cSandi $block = false; 765f3f0262cSandi } 766f3f0262cSandi } 767f3f0262cSandi $context = $edit->orig; 768f3f0262cSandi } 769f3f0262cSandi else { 770f3f0262cSandi if (! is_array($block)) { 771f5b78785SAndreas Gohr $context = array_slice($context, count($context) - $nlead); 772f5b78785SAndreas Gohr $x0 = $xi - count($context); 773f5b78785SAndreas Gohr $y0 = $yi - count($context); 774f3f0262cSandi $block = array(); 775f3f0262cSandi if ($context) 776f3f0262cSandi $block[] = new _DiffOp_Copy($context); 777f3f0262cSandi } 778f3f0262cSandi $block[] = $edit; 779f3f0262cSandi } 780f3f0262cSandi 781f3f0262cSandi if ($edit->orig) 782f5b78785SAndreas Gohr $xi += count($edit->orig); 783f3f0262cSandi if ($edit->closing) 784f5b78785SAndreas Gohr $yi += count($edit->closing); 785f3f0262cSandi } 786f3f0262cSandi 787f3f0262cSandi if (is_array($block)) 7887deca91bSRobin Getz $this->_block($x0, $xi - $x0, $y0, $yi - $y0, $block); 789f3f0262cSandi 790f3f0262cSandi return $this->_end_diff(); 791f3f0262cSandi } 792f3f0262cSandi 793f3f0262cSandi function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) { 794f3f0262cSandi $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen)); 795f3f0262cSandi foreach ($edits as $edit) { 796f3f0262cSandi if ($edit->type == 'copy') 797f3f0262cSandi $this->_context($edit->orig); 798f3f0262cSandi elseif ($edit->type == 'add') 799f3f0262cSandi $this->_added($edit->closing); 800f3f0262cSandi elseif ($edit->type == 'delete') 801f3f0262cSandi $this->_deleted($edit->orig); 802f3f0262cSandi elseif ($edit->type == 'change') 803f3f0262cSandi $this->_changed($edit->orig, $edit->closing); 804f3f0262cSandi else 805f3f0262cSandi trigger_error("Unknown edit type", E_USER_ERROR); 806f3f0262cSandi } 807f3f0262cSandi $this->_end_block(); 808f3f0262cSandi } 809f3f0262cSandi 810f3f0262cSandi function _start_diff() { 811f3f0262cSandi ob_start(); 812f3f0262cSandi } 813f3f0262cSandi 814f3f0262cSandi function _end_diff() { 815f3f0262cSandi $val = ob_get_contents(); 816f3f0262cSandi ob_end_clean(); 817f3f0262cSandi return $val; 818f3f0262cSandi } 819f3f0262cSandi 820f3f0262cSandi function _block_header($xbeg, $xlen, $ybeg, $ylen) { 821f3f0262cSandi if ($xlen > 1) 822f3f0262cSandi $xbeg .= "," . ($xbeg + $xlen - 1); 823f3f0262cSandi if ($ylen > 1) 824f3f0262cSandi $ybeg .= "," . ($ybeg + $ylen - 1); 825f3f0262cSandi 826f3f0262cSandi return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg; 827f3f0262cSandi } 828f3f0262cSandi 829f3f0262cSandi function _start_block($header) { 830f3f0262cSandi echo $header; 831f3f0262cSandi } 832f3f0262cSandi 833f3f0262cSandi function _end_block() { 834f3f0262cSandi } 835f3f0262cSandi 836f3f0262cSandi function _lines($lines, $prefix = ' ') { 837f3f0262cSandi foreach ($lines as $line) 838f3f0262cSandi echo "$prefix $line\n"; 839f3f0262cSandi } 840f3f0262cSandi 841f3f0262cSandi function _context($lines) { 842f3f0262cSandi $this->_lines($lines); 843f3f0262cSandi } 844f3f0262cSandi 845f3f0262cSandi function _added($lines) { 846f3f0262cSandi $this->_lines($lines, ">"); 847f3f0262cSandi } 848f3f0262cSandi function _deleted($lines) { 849f3f0262cSandi $this->_lines($lines, "<"); 850f3f0262cSandi } 851f3f0262cSandi 852f3f0262cSandi function _changed($orig, $closing) { 853f3f0262cSandi $this->_deleted($orig); 854f3f0262cSandi echo "---\n"; 855f3f0262cSandi $this->_added($closing); 856f3f0262cSandi } 857f3f0262cSandi} 858f3f0262cSandi 859f3f0262cSandi 860f3f0262cSandi/** 861f3f0262cSandi * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3 862f3f0262cSandi * 863f3f0262cSandi */ 864f3f0262cSandi 8659b3a5b24Schrisdefine('NBSP', "\xC2\xA0"); // utf-8 non-breaking space. 866f3f0262cSandi 867f3f0262cSandiclass _HWLDF_WordAccumulator { 868*988c1340SPiyush Mishra /** 869*988c1340SPiyush Mishra * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 870*988c1340SPiyush Mishra */ 871f3f0262cSandi function _HWLDF_WordAccumulator() { 872*988c1340SPiyush Mishra $this->__construct(); 873*988c1340SPiyush Mishra } 874*988c1340SPiyush Mishra 875*988c1340SPiyush Mishra function __construct() { 876f3f0262cSandi $this->_lines = array(); 877f3f0262cSandi $this->_line = ''; 878f3f0262cSandi $this->_group = ''; 879f3f0262cSandi $this->_tag = ''; 880f3f0262cSandi } 881f3f0262cSandi 882f3f0262cSandi function _flushGroup($new_tag) { 883f3f0262cSandi if ($this->_group !== '') { 884f3f0262cSandi if ($this->_tag == 'mark') 885ed7ecb79SAnika Henke $this->_line .= '<strong>'.$this->_group.'</strong>'; 886812bb04eSRobin Getz elseif ($this->_tag == 'add') 887812bb04eSRobin Getz $this->_line .= '<span class="diff-addedline">'.$this->_group.'</span>'; 888812bb04eSRobin Getz elseif ($this->_tag == 'del') 889812bb04eSRobin Getz $this->_line .= '<span class="diff-deletedline"><del>'.$this->_group.'</del></span>'; 890f3f0262cSandi else 891f3f0262cSandi $this->_line .= $this->_group; 892f3f0262cSandi } 893f3f0262cSandi $this->_group = ''; 894f3f0262cSandi $this->_tag = $new_tag; 895f3f0262cSandi } 896f3f0262cSandi 897f3f0262cSandi function _flushLine($new_tag) { 898f3f0262cSandi $this->_flushGroup($new_tag); 899f3f0262cSandi if ($this->_line != '') 900f3f0262cSandi $this->_lines[] = $this->_line; 901f3f0262cSandi $this->_line = ''; 902f3f0262cSandi } 903f3f0262cSandi 904f3f0262cSandi function addWords($words, $tag = '') { 905f3f0262cSandi if ($tag != $this->_tag) 906f3f0262cSandi $this->_flushGroup($tag); 907f3f0262cSandi 908f3f0262cSandi foreach ($words as $word) { 909f3f0262cSandi // new-line should only come as first char of word. 910f3f0262cSandi if ($word == '') 911f3f0262cSandi continue; 912f3f0262cSandi if ($word[0] == "\n") { 913f3f0262cSandi $this->_group .= NBSP; 914f3f0262cSandi $this->_flushLine($tag); 915f3f0262cSandi $word = substr($word, 1); 916f3f0262cSandi } 917f3f0262cSandi assert(!strstr($word, "\n")); 918f3f0262cSandi $this->_group .= $word; 919f3f0262cSandi } 920f3f0262cSandi } 921f3f0262cSandi 922f3f0262cSandi function getLines() { 923f3f0262cSandi $this->_flushLine('~done'); 924f3f0262cSandi return $this->_lines; 925f3f0262cSandi } 926f3f0262cSandi} 927f3f0262cSandi 928f5b78785SAndreas Gohrclass WordLevelDiff extends MappedDiff { 929f5b78785SAndreas Gohr 930*988c1340SPiyush Mishra /** 931*988c1340SPiyush Mishra * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 932*988c1340SPiyush Mishra */ 933f3f0262cSandi function WordLevelDiff($orig_lines, $closing_lines) { 934*988c1340SPiyush Mishra $this->__construct($orig_lines, $closing_lines); 935*988c1340SPiyush Mishra } 936*988c1340SPiyush Mishra 937*988c1340SPiyush Mishra function __construct($orig_lines, $closing_lines) { 938f3f0262cSandi list ($orig_words, $orig_stripped) = $this->_split($orig_lines); 939f3f0262cSandi list ($closing_words, $closing_stripped) = $this->_split($closing_lines); 940f3f0262cSandi 941*988c1340SPiyush Mishra parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped); 942f3f0262cSandi } 943f3f0262cSandi 944f3f0262cSandi function _split($lines) { 9455e1ee188SXin LI if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu', 9467deca91bSRobin Getz implode("\n", $lines), $m)) { 947f3f0262cSandi return array(array(''), array('')); 948f3f0262cSandi } 949f3f0262cSandi return array($m[0], $m[1]); 950f3f0262cSandi } 951f3f0262cSandi 952f3f0262cSandi function orig() { 953f3f0262cSandi $orig = new _HWLDF_WordAccumulator; 954f3f0262cSandi 955f3f0262cSandi foreach ($this->edits as $edit) { 956f3f0262cSandi if ($edit->type == 'copy') 957f3f0262cSandi $orig->addWords($edit->orig); 958f3f0262cSandi elseif ($edit->orig) 959f3f0262cSandi $orig->addWords($edit->orig, 'mark'); 960f3f0262cSandi } 961f3f0262cSandi return $orig->getLines(); 962f3f0262cSandi } 963f3f0262cSandi 964f3f0262cSandi function closing() { 965f3f0262cSandi $closing = new _HWLDF_WordAccumulator; 966f3f0262cSandi 967f3f0262cSandi foreach ($this->edits as $edit) { 968f3f0262cSandi if ($edit->type == 'copy') 969f3f0262cSandi $closing->addWords($edit->closing); 970f3f0262cSandi elseif ($edit->closing) 971f3f0262cSandi $closing->addWords($edit->closing, 'mark'); 972f3f0262cSandi } 973f3f0262cSandi return $closing->getLines(); 974f3f0262cSandi } 975f3f0262cSandi} 976f3f0262cSandi 977812bb04eSRobin Getzclass InlineWordLevelDiff extends MappedDiff { 978812bb04eSRobin Getz 979*988c1340SPiyush Mishra /** 980*988c1340SPiyush Mishra * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 981*988c1340SPiyush Mishra */ 982812bb04eSRobin Getz function InlineWordLevelDiff($orig_lines, $closing_lines) { 983*988c1340SPiyush Mishra $this->__construct($orig_lines, $closing_lines); 984*988c1340SPiyush Mishra } 985*988c1340SPiyush Mishra 986*988c1340SPiyush Mishra function __construct($orig_lines, $closing_lines) { 987812bb04eSRobin Getz list ($orig_words, $orig_stripped) = $this->_split($orig_lines); 988812bb04eSRobin Getz list ($closing_words, $closing_stripped) = $this->_split($closing_lines); 989812bb04eSRobin Getz 990*988c1340SPiyush Mishra parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped); 991812bb04eSRobin Getz } 992812bb04eSRobin Getz 993812bb04eSRobin Getz function _split($lines) { 994c5982caaSDanny Lin if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu', 995812bb04eSRobin Getz implode("\n", $lines), $m)) { 996812bb04eSRobin Getz return array(array(''), array('')); 997812bb04eSRobin Getz } 998812bb04eSRobin Getz return array($m[0], $m[1]); 999812bb04eSRobin Getz } 1000812bb04eSRobin Getz 1001812bb04eSRobin Getz function inline() { 1002812bb04eSRobin Getz $orig = new _HWLDF_WordAccumulator; 1003812bb04eSRobin Getz foreach ($this->edits as $edit) { 1004812bb04eSRobin Getz if ($edit->type == 'copy') 10054f2305cbSAdrian Lang $orig->addWords($edit->closing); 1006812bb04eSRobin Getz elseif ($edit->type == 'change'){ 1007812bb04eSRobin Getz $orig->addWords($edit->orig, 'del'); 1008812bb04eSRobin Getz $orig->addWords($edit->closing, 'add'); 1009812bb04eSRobin Getz } elseif ($edit->type == 'delete') 1010812bb04eSRobin Getz $orig->addWords($edit->orig, 'del'); 1011812bb04eSRobin Getz elseif ($edit->type == 'add') 1012812bb04eSRobin Getz $orig->addWords($edit->closing, 'add'); 1013812bb04eSRobin Getz elseif ($edit->orig) 1014812bb04eSRobin Getz $orig->addWords($edit->orig, 'del'); 1015812bb04eSRobin Getz } 1016812bb04eSRobin Getz return $orig->getLines(); 1017812bb04eSRobin Getz } 1018812bb04eSRobin Getz} 1019812bb04eSRobin Getz 1020f3f0262cSandi/** 1021f3f0262cSandi * "Unified" diff formatter. 1022f3f0262cSandi * 1023f3f0262cSandi * This class formats the diff in classic "unified diff" format. 1024f3f0262cSandi */ 1025f5b78785SAndreas Gohrclass UnifiedDiffFormatter extends DiffFormatter { 1026f5b78785SAndreas Gohr 1027*988c1340SPiyush Mishra /** 1028*988c1340SPiyush Mishra * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 1029*988c1340SPiyush Mishra */ 1030f3f0262cSandi function UnifiedDiffFormatter($context_lines = 4) { 1031*988c1340SPiyush Mishra $this->__construct($context_lines); 1032*988c1340SPiyush Mishra } 1033*988c1340SPiyush Mishra 1034*988c1340SPiyush Mishra function __construct($context_lines = 4) { 1035f3f0262cSandi $this->leading_context_lines = $context_lines; 1036f3f0262cSandi $this->trailing_context_lines = $context_lines; 1037f3f0262cSandi } 1038f3f0262cSandi 1039f3f0262cSandi function _block_header($xbeg, $xlen, $ybeg, $ylen) { 1040f3f0262cSandi if ($xlen != 1) 1041f3f0262cSandi $xbeg .= "," . $xlen; 1042f3f0262cSandi if ($ylen != 1) 1043f3f0262cSandi $ybeg .= "," . $ylen; 1044f3f0262cSandi return "@@ -$xbeg +$ybeg @@\n"; 1045f3f0262cSandi } 1046f3f0262cSandi 1047f3f0262cSandi function _added($lines) { 1048f3f0262cSandi $this->_lines($lines, "+"); 1049f3f0262cSandi } 1050f3f0262cSandi function _deleted($lines) { 1051f3f0262cSandi $this->_lines($lines, "-"); 1052f3f0262cSandi } 1053f3f0262cSandi function _changed($orig, $final) { 1054f3f0262cSandi $this->_deleted($orig); 1055f3f0262cSandi $this->_added($final); 1056f3f0262cSandi } 1057f3f0262cSandi} 1058f3f0262cSandi 1059f3f0262cSandi/** 1060f3f0262cSandi * Wikipedia Table style diff formatter. 1061f3f0262cSandi * 1062f3f0262cSandi */ 1063f5b78785SAndreas Gohrclass TableDiffFormatter extends DiffFormatter { 1064f5b78785SAndreas Gohr 1065*988c1340SPiyush Mishra /** 1066*988c1340SPiyush Mishra * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 1067*988c1340SPiyush Mishra */ 1068f3f0262cSandi function TableDiffFormatter() { 1069*988c1340SPiyush Mishra $this->__construct(); 1070*988c1340SPiyush Mishra } 1071*988c1340SPiyush Mishra 1072*988c1340SPiyush Mishra function __construct() { 1073f3f0262cSandi $this->leading_context_lines = 2; 1074f3f0262cSandi $this->trailing_context_lines = 2; 1075f3f0262cSandi } 1076f3f0262cSandi 10772d880650SAdrian Lang function format($diff) { 10782d880650SAdrian Lang // Preserve whitespaces by converting some to non-breaking spaces. 10792d880650SAdrian Lang // Do not convert all of them to allow word-wrap. 10802d880650SAdrian Lang $val = parent::format($diff); 10812d880650SAdrian Lang $val = str_replace(' ',' ', $val); 10822d880650SAdrian Lang $val = preg_replace('/ (?=<)|(?<=[ >]) /', ' ', $val); 10832d880650SAdrian Lang return $val; 10842d880650SAdrian Lang } 10852d880650SAdrian Lang 1086f3f0262cSandi function _pre($text){ 1087f3f0262cSandi $text = htmlspecialchars($text); 1088f3f0262cSandi return $text; 1089f3f0262cSandi } 1090f3f0262cSandi 1091f3f0262cSandi function _block_header($xbeg, $xlen, $ybeg, $ylen) { 1092f3f0262cSandi global $lang; 1093f3f0262cSandi $l1 = $lang['line'].' '.$xbeg; 1094f3f0262cSandi $l2 = $lang['line'].' '.$ybeg; 10955048c277SAnika Henke $r = '<tr><td class="diff-blockheader" colspan="2">'.$l1.":</td>\n". 10965048c277SAnika Henke ' <td class="diff-blockheader" colspan="2">'.$l2.":</td>\n". 10975048c277SAnika Henke "</tr>\n"; 1098f3f0262cSandi return $r; 1099f3f0262cSandi } 1100f3f0262cSandi 1101f3f0262cSandi function _start_block($header) { 1102f3f0262cSandi print($header); 1103f3f0262cSandi } 1104f3f0262cSandi 1105f3f0262cSandi function _end_block() { 1106f3f0262cSandi } 1107f3f0262cSandi 1108f3f0262cSandi function _lines($lines, $prefix=' ', $color="white") { 1109f3f0262cSandi } 1110f3f0262cSandi 1111f3f0262cSandi function addedLine($line) { 11127deca91bSRobin Getz return '<td>+</td><td class="diff-addedline">' . $line.'</td>'; 1113f3f0262cSandi } 1114f3f0262cSandi 1115f3f0262cSandi function deletedLine($line) { 11167deca91bSRobin Getz return '<td>-</td><td class="diff-deletedline">' . $line.'</td>'; 1117f3f0262cSandi } 1118f3f0262cSandi 1119f3f0262cSandi function emptyLine() { 1120f3f0262cSandi return '<td colspan="2"> </td>'; 1121f3f0262cSandi } 1122f3f0262cSandi 1123f3f0262cSandi function contextLine($line) { 1124f3f0262cSandi return '<td> </td><td class="diff-context">'.$line.'</td>'; 1125f3f0262cSandi } 1126f3f0262cSandi 1127f3f0262cSandi function _added($lines) { 1128f3f0262cSandi foreach ($lines as $line) { 11297deca91bSRobin Getz print('<tr>' . $this->emptyLine() . $this->addedLine($line) . "</tr>\n"); 1130f3f0262cSandi } 1131f3f0262cSandi } 1132f3f0262cSandi 1133f3f0262cSandi function _deleted($lines) { 1134f3f0262cSandi foreach ($lines as $line) { 11357deca91bSRobin Getz print('<tr>' . $this->deletedLine($line) . $this->emptyLine() . "</tr>\n"); 1136f3f0262cSandi } 1137f3f0262cSandi } 1138f3f0262cSandi 1139f3f0262cSandi function _context($lines) { 1140f3f0262cSandi foreach ($lines as $line) { 11417deca91bSRobin Getz print('<tr>' . $this->contextLine($line) . $this->contextLine($line) . "</tr>\n"); 1142f3f0262cSandi } 1143f3f0262cSandi } 1144f3f0262cSandi 1145f3f0262cSandi function _changed($orig, $closing) { 1146f3f0262cSandi $diff = new WordLevelDiff($orig, $closing); 1147f3f0262cSandi $del = $diff->orig(); 1148f3f0262cSandi $add = $diff->closing(); 1149f3f0262cSandi 1150f3f0262cSandi while ($line = array_shift($del)) { 1151f3f0262cSandi $aline = array_shift($add); 11527deca91bSRobin Getz print('<tr>' . $this->deletedLine($line) . $this->addedLine($aline) . "</tr>\n"); 1153f3f0262cSandi } 1154f3f0262cSandi $this->_added($add); # If any leftovers 1155f3f0262cSandi } 1156f3f0262cSandi} 1157f3f0262cSandi 1158812bb04eSRobin Getz/** 1159812bb04eSRobin Getz * Inline style diff formatter. 1160812bb04eSRobin Getz * 1161812bb04eSRobin Getz */ 1162812bb04eSRobin Getzclass InlineDiffFormatter extends DiffFormatter { 1163c495dc45SAndreas Gohr var $colspan = 4; 1164*988c1340SPiyush Mishra /** 1165*988c1340SPiyush Mishra * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 1166*988c1340SPiyush Mishra */ 1167812bb04eSRobin Getz function InlineDiffFormatter() { 1168*988c1340SPiyush Mishra $this->__construct(); 1169*988c1340SPiyush Mishra } 1170*988c1340SPiyush Mishra 1171*988c1340SPiyush Mishra function __construct() { 1172812bb04eSRobin Getz $this->leading_context_lines = 2; 1173812bb04eSRobin Getz $this->trailing_context_lines = 2; 1174812bb04eSRobin Getz } 1175812bb04eSRobin Getz 1176812bb04eSRobin Getz function format($diff) { 1177812bb04eSRobin Getz // Preserve whitespaces by converting some to non-breaking spaces. 1178812bb04eSRobin Getz // Do not convert all of them to allow word-wrap. 1179812bb04eSRobin Getz $val = parent::format($diff); 1180812bb04eSRobin Getz $val = str_replace(' ',' ', $val); 1181812bb04eSRobin Getz $val = preg_replace('/ (?=<)|(?<=[ >]) /', ' ', $val); 1182812bb04eSRobin Getz return $val; 1183812bb04eSRobin Getz } 1184812bb04eSRobin Getz 1185812bb04eSRobin Getz function _pre($text){ 1186812bb04eSRobin Getz $text = htmlspecialchars($text); 1187812bb04eSRobin Getz return $text; 1188812bb04eSRobin Getz } 1189812bb04eSRobin Getz 1190812bb04eSRobin Getz function _block_header($xbeg, $xlen, $ybeg, $ylen) { 1191812bb04eSRobin Getz global $lang; 1192812bb04eSRobin Getz if ($xlen != 1) 1193812bb04eSRobin Getz $xbeg .= "," . $xlen; 1194812bb04eSRobin Getz if ($ylen != 1) 1195812bb04eSRobin Getz $ybeg .= "," . $ylen; 1196c495dc45SAndreas Gohr $r = '<tr><td colspan="'.$this->colspan.'" class="diff-blockheader">@@ '.$lang['line']." -$xbeg +$ybeg @@"; 1197812bb04eSRobin Getz $r .= ' <span class="diff-deletedline"><del>'.$lang['deleted'].'</del></span>'; 1198812bb04eSRobin Getz $r .= ' <span class="diff-addedline">'.$lang['created'].'</span>'; 1199812bb04eSRobin Getz $r .= "</td></tr>\n"; 1200812bb04eSRobin Getz return $r; 1201812bb04eSRobin Getz } 1202812bb04eSRobin Getz 1203812bb04eSRobin Getz function _start_block($header) { 1204812bb04eSRobin Getz print($header."\n"); 1205812bb04eSRobin Getz } 1206812bb04eSRobin Getz 1207812bb04eSRobin Getz function _end_block() { 1208812bb04eSRobin Getz } 1209812bb04eSRobin Getz 1210812bb04eSRobin Getz function _lines($lines, $prefix=' ', $color="white") { 1211812bb04eSRobin Getz } 1212812bb04eSRobin Getz 1213812bb04eSRobin Getz function _added($lines) { 1214812bb04eSRobin Getz foreach ($lines as $line) { 1215c495dc45SAndreas Gohr print('<tr><td colspan="'.$this->colspan.'" class="diff-addedline">'. $line . "</td></tr>\n"); 1216812bb04eSRobin Getz } 1217812bb04eSRobin Getz } 1218812bb04eSRobin Getz 1219812bb04eSRobin Getz function _deleted($lines) { 1220812bb04eSRobin Getz foreach ($lines as $line) { 1221c495dc45SAndreas Gohr print('<tr><td colspan="'.$this->colspan.'" class="diff-deletedline"><del>' . $line . "</del></td></tr>\n"); 1222812bb04eSRobin Getz } 1223812bb04eSRobin Getz } 1224812bb04eSRobin Getz 1225812bb04eSRobin Getz function _context($lines) { 1226812bb04eSRobin Getz foreach ($lines as $line) { 1227c495dc45SAndreas Gohr print('<tr><td colspan="'.$this->colspan.'" class="diff-context">'.$line."</td></tr>\n"); 1228812bb04eSRobin Getz } 1229812bb04eSRobin Getz } 1230812bb04eSRobin Getz 1231812bb04eSRobin Getz function _changed($orig, $closing) { 1232812bb04eSRobin Getz $diff = new InlineWordLevelDiff($orig, $closing); 1233812bb04eSRobin Getz $add = $diff->inline(); 1234812bb04eSRobin Getz 1235812bb04eSRobin Getz foreach ($add as $line) 1236c495dc45SAndreas Gohr print('<tr><td colspan="'.$this->colspan.'">'.$line."</td></tr>\n"); 1237812bb04eSRobin Getz } 1238812bb04eSRobin Getz} 1239812bb04eSRobin Getz 1240340756e4Sandi 1241e3776c06SMichael Hamann//Setup VIM: ex: et ts=4 : 1242