1<?php 2/** 3 * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3) 4 * 5 * Additions by Axel Boldt for MediaWiki 6 * 7 * @copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org> 8 * @license You may copy this code freely under the conditions of the GPL. 9 */ 10define('USE_ASSERTS', function_exists('assert')); 11 12class _DiffOp { 13 var $type; 14 var $orig; 15 var $closing; 16 17 function reverse() { 18 trigger_error("pure virtual", E_USER_ERROR); 19 } 20 21 function norig() { 22 return $this->orig ? count($this->orig) : 0; 23 } 24 25 function nclosing() { 26 return $this->closing ? count($this->closing) : 0; 27 } 28} 29 30class _DiffOp_Copy extends _DiffOp { 31 var $type = 'copy'; 32 /** 33 * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 34 */ 35 function _DiffOp_Copy($orig, $closing = false) { 36 $this->__construct($orig, $closing); 37 } 38 39 function __construct($orig, $closing = false) { 40 if (!is_array($closing)) 41 $closing = $orig; 42 $this->orig = $orig; 43 $this->closing = $closing; 44 } 45 46 function reverse() { 47 return new _DiffOp_Copy($this->closing, $this->orig); 48 } 49} 50 51class _DiffOp_Delete extends _DiffOp { 52 var $type = 'delete'; 53 /** 54 * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 55 */ 56 function _DiffOp_Delete($lines) { 57 $this->__construct($lines); 58 } 59 60 function __construct($lines) { 61 $this->orig = $lines; 62 $this->closing = false; 63 } 64 65 function reverse() { 66 return new _DiffOp_Add($this->orig); 67 } 68} 69 70class _DiffOp_Add extends _DiffOp { 71 var $type = 'add'; 72 /** 73 * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 74 */ 75 function _DiffOp_Add($lines) { 76 $this->__construct($lines); 77 } 78 79 function __construct($lines) { 80 $this->closing = $lines; 81 $this->orig = false; 82 } 83 84 function reverse() { 85 return new _DiffOp_Delete($this->closing); 86 } 87} 88 89class _DiffOp_Change extends _DiffOp { 90 var $type = 'change'; 91 /** 92 * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 93 */ 94 function _DiffOp_Change($orig, $closing) { 95 $this->__construct($orig, $closing); 96 } 97 98 function __construct($orig, $closing) { 99 $this->orig = $orig; 100 $this->closing = $closing; 101 } 102 103 function reverse() { 104 return new _DiffOp_Change($this->closing, $this->orig); 105 } 106} 107 108 109/** 110 * Class used internally by Diff to actually compute the diffs. 111 * 112 * The algorithm used here is mostly lifted from the perl module 113 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at: 114 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip 115 * 116 * More ideas are taken from: 117 * http://www.ics.uci.edu/~eppstein/161/960229.html 118 * 119 * Some ideas are (and a bit of code) are from from analyze.c, from GNU 120 * diffutils-2.7, which can be found at: 121 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz 122 * 123 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations) 124 * are my own. 125 * 126 * @author Geoffrey T. Dairiki 127 * @access private 128 */ 129class _DiffEngine { 130 131 function diff($from_lines, $to_lines) { 132 $n_from = count($from_lines); 133 $n_to = count($to_lines); 134 135 $this->xchanged = $this->ychanged = array(); 136 $this->xv = $this->yv = array(); 137 $this->xind = $this->yind = array(); 138 unset($this->seq); 139 unset($this->in_seq); 140 unset($this->lcs); 141 142 // Skip leading common lines. 143 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) { 144 if ($from_lines[$skip] != $to_lines[$skip]) 145 break; 146 $this->xchanged[$skip] = $this->ychanged[$skip] = false; 147 } 148 // Skip trailing common lines. 149 $xi = $n_from; 150 $yi = $n_to; 151 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) { 152 if ($from_lines[$xi] != $to_lines[$yi]) 153 break; 154 $this->xchanged[$xi] = $this->ychanged[$yi] = false; 155 } 156 157 // Ignore lines which do not exist in both files. 158 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) 159 $xhash[$from_lines[$xi]] = 1; 160 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) { 161 $line = $to_lines[$yi]; 162 if (($this->ychanged[$yi] = empty($xhash[$line]))) 163 continue; 164 $yhash[$line] = 1; 165 $this->yv[] = $line; 166 $this->yind[] = $yi; 167 } 168 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) { 169 $line = $from_lines[$xi]; 170 if (($this->xchanged[$xi] = empty($yhash[$line]))) 171 continue; 172 $this->xv[] = $line; 173 $this->xind[] = $xi; 174 } 175 176 // Find the LCS. 177 $this->_compareseq(0, count($this->xv), 0, count($this->yv)); 178 179 // Merge edits when possible 180 $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged); 181 $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged); 182 183 // Compute the edit operations. 184 $edits = array(); 185 $xi = $yi = 0; 186 while ($xi < $n_from || $yi < $n_to) { 187 USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]); 188 USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]); 189 190 // Skip matching "snake". 191 $copy = array(); 192 while ($xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) { 193 $copy[] = $from_lines[$xi++]; 194 ++$yi; 195 } 196 if ($copy) 197 $edits[] = new _DiffOp_Copy($copy); 198 199 // Find deletes & adds. 200 $delete = array(); 201 while ($xi < $n_from && $this->xchanged[$xi]) 202 $delete[] = $from_lines[$xi++]; 203 204 $add = array(); 205 while ($yi < $n_to && $this->ychanged[$yi]) 206 $add[] = $to_lines[$yi++]; 207 208 if ($delete && $add) 209 $edits[] = new _DiffOp_Change($delete, $add); 210 elseif ($delete) 211 $edits[] = new _DiffOp_Delete($delete); 212 elseif ($add) 213 $edits[] = new _DiffOp_Add($add); 214 } 215 return $edits; 216 } 217 218 219 /** 220 * Divide the Largest Common Subsequence (LCS) of the sequences 221 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally 222 * sized segments. 223 * 224 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an 225 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between 226 * sub sequences. The first sub-sequence is contained in [X0, X1), 227 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note 228 * that (X0, Y0) == (XOFF, YOFF) and 229 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM). 230 * 231 * This function assumes that the first lines of the specified portions 232 * of the two files do not match, and likewise that the last lines do not 233 * match. The caller must trim matching lines from the beginning and end 234 * of the portions it is going to specify. 235 */ 236 function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) { 237 $flip = false; 238 239 if ($xlim - $xoff > $ylim - $yoff) { 240 // Things seems faster (I'm not sure I understand why) 241 // when the shortest sequence in X. 242 $flip = true; 243 list ($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim); 244 } 245 246 if ($flip) 247 for ($i = $ylim - 1; $i >= $yoff; $i--) 248 $ymatches[$this->xv[$i]][] = $i; 249 else 250 for ($i = $ylim - 1; $i >= $yoff; $i--) 251 $ymatches[$this->yv[$i]][] = $i; 252 253 $this->lcs = 0; 254 $this->seq[0]= $yoff - 1; 255 $this->in_seq = array(); 256 $ymids[0] = array(); 257 258 $numer = $xlim - $xoff + $nchunks - 1; 259 $x = $xoff; 260 for ($chunk = 0; $chunk < $nchunks; $chunk++) { 261 if ($chunk > 0) 262 for ($i = 0; $i <= $this->lcs; $i++) 263 $ymids[$i][$chunk-1] = $this->seq[$i]; 264 265 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks); 266 for ( ; $x < $x1; $x++) { 267 $line = $flip ? $this->yv[$x] : $this->xv[$x]; 268 if (empty($ymatches[$line])) 269 continue; 270 $matches = $ymatches[$line]; 271 reset($matches); 272 while (list ($junk, $y) = each($matches)) 273 if (empty($this->in_seq[$y])) { 274 $k = $this->_lcs_pos($y); 275 USE_ASSERTS && assert($k > 0); 276 $ymids[$k] = $ymids[$k-1]; 277 break; 278 } 279 while (list ($junk, $y) = each($matches)) { 280 if ($y > $this->seq[$k-1]) { 281 USE_ASSERTS && assert($y < $this->seq[$k]); 282 // Optimization: this is a common case: 283 // next match is just replacing previous match. 284 $this->in_seq[$this->seq[$k]] = false; 285 $this->seq[$k] = $y; 286 $this->in_seq[$y] = 1; 287 } 288 else if (empty($this->in_seq[$y])) { 289 $k = $this->_lcs_pos($y); 290 USE_ASSERTS && assert($k > 0); 291 $ymids[$k] = $ymids[$k-1]; 292 } 293 } 294 } 295 } 296 297 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff); 298 $ymid = $ymids[$this->lcs]; 299 for ($n = 0; $n < $nchunks - 1; $n++) { 300 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks); 301 $y1 = $ymid[$n] + 1; 302 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1); 303 } 304 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim); 305 306 return array($this->lcs, $seps); 307 } 308 309 function _lcs_pos($ypos) { 310 $end = $this->lcs; 311 if ($end == 0 || $ypos > $this->seq[$end]) { 312 $this->seq[++$this->lcs] = $ypos; 313 $this->in_seq[$ypos] = 1; 314 return $this->lcs; 315 } 316 317 $beg = 1; 318 while ($beg < $end) { 319 $mid = (int)(($beg + $end) / 2); 320 if ($ypos > $this->seq[$mid]) 321 $beg = $mid + 1; 322 else 323 $end = $mid; 324 } 325 326 USE_ASSERTS && assert($ypos != $this->seq[$end]); 327 328 $this->in_seq[$this->seq[$end]] = false; 329 $this->seq[$end] = $ypos; 330 $this->in_seq[$ypos] = 1; 331 return $end; 332 } 333 334 /** 335 * Find LCS of two sequences. 336 * 337 * The results are recorded in the vectors $this->{x,y}changed[], by 338 * storing a 1 in the element for each line that is an insertion 339 * or deletion (ie. is not in the LCS). 340 * 341 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1. 342 * 343 * Note that XLIM, YLIM are exclusive bounds. 344 * All line numbers are origin-0 and discarded lines are not counted. 345 */ 346 function _compareseq($xoff, $xlim, $yoff, $ylim) { 347 // Slide down the bottom initial diagonal. 348 while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) { 349 ++$xoff; 350 ++$yoff; 351 } 352 353 // Slide up the top initial diagonal. 354 while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) { 355 --$xlim; 356 --$ylim; 357 } 358 359 if ($xoff == $xlim || $yoff == $ylim) 360 $lcs = 0; 361 else { 362 // This is ad hoc but seems to work well. 363 //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); 364 //$nchunks = max(2,min(8,(int)$nchunks)); 365 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1; 366 list ($lcs, $seps) 367 = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks); 368 } 369 370 if ($lcs == 0) { 371 // X and Y sequences have no common subsequence: 372 // mark all changed. 373 while ($yoff < $ylim) 374 $this->ychanged[$this->yind[$yoff++]] = 1; 375 while ($xoff < $xlim) 376 $this->xchanged[$this->xind[$xoff++]] = 1; 377 } 378 else { 379 // Use the partitions to split this problem into subproblems. 380 reset($seps); 381 $pt1 = $seps[0]; 382 while ($pt2 = next($seps)) { 383 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]); 384 $pt1 = $pt2; 385 } 386 } 387 } 388 389 /** 390 * Adjust inserts/deletes of identical lines to join changes 391 * as much as possible. 392 * 393 * We do something when a run of changed lines include a 394 * line at one end and has an excluded, identical line at the other. 395 * We are free to choose which identical line is included. 396 * `compareseq' usually chooses the one at the beginning, 397 * but usually it is cleaner to consider the following identical line 398 * to be the "change". 399 * 400 * This is extracted verbatim from analyze.c (GNU diffutils-2.7). 401 */ 402 function _shift_boundaries($lines, &$changed, $other_changed) { 403 $i = 0; 404 $j = 0; 405 406 USE_ASSERTS && assert('count($lines) == count($changed)'); 407 $len = count($lines); 408 $other_len = count($other_changed); 409 410 while (1) { 411 /* 412 * Scan forwards to find beginning of another run of changes. 413 * Also keep track of the corresponding point in the other file. 414 * 415 * Throughout this code, $i and $j are adjusted together so that 416 * the first $i elements of $changed and the first $j elements 417 * of $other_changed both contain the same number of zeros 418 * (unchanged lines). 419 * Furthermore, $j is always kept so that $j == $other_len or 420 * $other_changed[$j] == false. 421 */ 422 while ($j < $other_len && $other_changed[$j]) 423 $j++; 424 425 while ($i < $len && ! $changed[$i]) { 426 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]'); 427 $i++; 428 $j++; 429 while ($j < $other_len && $other_changed[$j]) 430 $j++; 431 } 432 433 if ($i == $len) 434 break; 435 436 $start = $i; 437 438 // Find the end of this run of changes. 439 while (++$i < $len && $changed[$i]) 440 continue; 441 442 do { 443 /* 444 * Record the length of this run of changes, so that 445 * we can later determine whether the run has grown. 446 */ 447 $runlength = $i - $start; 448 449 /* 450 * Move the changed region back, so long as the 451 * previous unchanged line matches the last changed one. 452 * This merges with previous changed regions. 453 */ 454 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) { 455 $changed[--$start] = 1; 456 $changed[--$i] = false; 457 while ($start > 0 && $changed[$start - 1]) 458 $start--; 459 USE_ASSERTS && assert('$j > 0'); 460 while ($other_changed[--$j]) 461 continue; 462 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]'); 463 } 464 465 /* 466 * Set CORRESPONDING to the end of the changed run, at the last 467 * point where it corresponds to a changed run in the other file. 468 * CORRESPONDING == LEN means no such point has been found. 469 */ 470 $corresponding = $j < $other_len ? $i : $len; 471 472 /* 473 * Move the changed region forward, so long as the 474 * first changed line matches the following unchanged one. 475 * This merges with following changed regions. 476 * Do this second, so that if there are no merges, 477 * the changed region is moved forward as far as possible. 478 */ 479 while ($i < $len && $lines[$start] == $lines[$i]) { 480 $changed[$start++] = false; 481 $changed[$i++] = 1; 482 while ($i < $len && $changed[$i]) 483 $i++; 484 485 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]'); 486 $j++; 487 if ($j < $other_len && $other_changed[$j]) { 488 $corresponding = $i; 489 while ($j < $other_len && $other_changed[$j]) 490 $j++; 491 } 492 } 493 } while ($runlength != $i - $start); 494 495 /* 496 * If possible, move the fully-merged run of changes 497 * back to a corresponding run in the other file. 498 */ 499 while ($corresponding < $i) { 500 $changed[--$start] = 1; 501 $changed[--$i] = 0; 502 USE_ASSERTS && assert('$j > 0'); 503 while ($other_changed[--$j]) 504 continue; 505 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]'); 506 } 507 } 508 } 509} 510 511/** 512 * Class representing a 'diff' between two sequences of strings. 513 */ 514class Diff { 515 516 var $edits; 517 518 /** 519 * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 520 */ 521 function Diff($from_lines, $to_lines) { 522 $this->__construct($from_lines, $to_lines); 523 } 524 525 /** 526 * Constructor. 527 * Computes diff between sequences of strings. 528 * 529 * @param $from_lines array An array of strings. 530 * (Typically these are lines from a file.) 531 * @param $to_lines array An array of strings. 532 */ 533 function __construct($from_lines, $to_lines) { 534 $eng = new _DiffEngine; 535 $this->edits = $eng->diff($from_lines, $to_lines); 536 //$this->_check($from_lines, $to_lines); 537 } 538 539 /** 540 * Compute reversed Diff. 541 * 542 * SYNOPSIS: 543 * 544 * $diff = new Diff($lines1, $lines2); 545 * $rev = $diff->reverse(); 546 * @return object A Diff object representing the inverse of the 547 * original diff. 548 */ 549 function reverse() { 550 $rev = $this; 551 $rev->edits = array(); 552 foreach ($this->edits as $edit) { 553 $rev->edits[] = $edit->reverse(); 554 } 555 return $rev; 556 } 557 558 /** 559 * Check for empty diff. 560 * 561 * @return bool True iff two sequences were identical. 562 */ 563 function isEmpty() { 564 foreach ($this->edits as $edit) { 565 if ($edit->type != 'copy') 566 return false; 567 } 568 return true; 569 } 570 571 /** 572 * Compute the length of the Longest Common Subsequence (LCS). 573 * 574 * This is mostly for diagnostic purposed. 575 * 576 * @return int The length of the LCS. 577 */ 578 function lcs() { 579 $lcs = 0; 580 foreach ($this->edits as $edit) { 581 if ($edit->type == 'copy') 582 $lcs += count($edit->orig); 583 } 584 return $lcs; 585 } 586 587 /** 588 * Get the original set of lines. 589 * 590 * This reconstructs the $from_lines parameter passed to the 591 * constructor. 592 * 593 * @return array The original sequence of strings. 594 */ 595 function orig() { 596 $lines = array(); 597 598 foreach ($this->edits as $edit) { 599 if ($edit->orig) 600 array_splice($lines, count($lines), 0, $edit->orig); 601 } 602 return $lines; 603 } 604 605 /** 606 * Get the closing set of lines. 607 * 608 * This reconstructs the $to_lines parameter passed to the 609 * constructor. 610 * 611 * @return array The sequence of strings. 612 */ 613 function closing() { 614 $lines = array(); 615 616 foreach ($this->edits as $edit) { 617 if ($edit->closing) 618 array_splice($lines, count($lines), 0, $edit->closing); 619 } 620 return $lines; 621 } 622 623 /** 624 * Check a Diff for validity. 625 * 626 * This is here only for debugging purposes. 627 */ 628 function _check($from_lines, $to_lines) { 629 if (serialize($from_lines) != serialize($this->orig())) 630 trigger_error("Reconstructed original doesn't match", E_USER_ERROR); 631 if (serialize($to_lines) != serialize($this->closing())) 632 trigger_error("Reconstructed closing doesn't match", E_USER_ERROR); 633 634 $rev = $this->reverse(); 635 if (serialize($to_lines) != serialize($rev->orig())) 636 trigger_error("Reversed original doesn't match", E_USER_ERROR); 637 if (serialize($from_lines) != serialize($rev->closing())) 638 trigger_error("Reversed closing doesn't match", E_USER_ERROR); 639 640 $prevtype = 'none'; 641 foreach ($this->edits as $edit) { 642 if ($prevtype == $edit->type) 643 trigger_error("Edit sequence is non-optimal", E_USER_ERROR); 644 $prevtype = $edit->type; 645 } 646 647 $lcs = $this->lcs(); 648 trigger_error("Diff okay: LCS = $lcs", E_USER_NOTICE); 649 } 650} 651 652/** 653 * FIXME: bad name. 654 */ 655class MappedDiff extends Diff { 656 /** 657 * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 658 */ 659 function MappedDiff($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) { 660 $this->__construct($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines); 661 } 662 663 /** 664 * Constructor. 665 * 666 * Computes diff between sequences of strings. 667 * 668 * This can be used to compute things like 669 * case-insensitve diffs, or diffs which ignore 670 * changes in white-space. 671 * 672 * @param $from_lines array An array of strings. 673 * (Typically these are lines from a file.) 674 * 675 * @param $to_lines array An array of strings. 676 * 677 * @param $mapped_from_lines array This array should 678 * have the same size number of elements as $from_lines. 679 * The elements in $mapped_from_lines and 680 * $mapped_to_lines are what is actually compared 681 * when computing the diff. 682 * 683 * @param $mapped_to_lines array This array should 684 * have the same number of elements as $to_lines. 685 */ 686 function __construct($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) { 687 688 assert(count($from_lines) == count($mapped_from_lines)); 689 assert(count($to_lines) == count($mapped_to_lines)); 690 691 parent::__construct($mapped_from_lines, $mapped_to_lines); 692 693 $xi = $yi = 0; 694 $ecnt = count($this->edits); 695 for ($i = 0; $i < $ecnt; $i++) { 696 $orig = &$this->edits[$i]->orig; 697 if (is_array($orig)) { 698 $orig = array_slice($from_lines, $xi, count($orig)); 699 $xi += count($orig); 700 } 701 702 $closing = &$this->edits[$i]->closing; 703 if (is_array($closing)) { 704 $closing = array_slice($to_lines, $yi, count($closing)); 705 $yi += count($closing); 706 } 707 } 708 } 709} 710 711/** 712 * A class to format Diffs 713 * 714 * This class formats the diff in classic diff format. 715 * It is intended that this class be customized via inheritance, 716 * to obtain fancier outputs. 717 */ 718class DiffFormatter { 719 /** 720 * Number of leading context "lines" to preserve. 721 * 722 * This should be left at zero for this class, but subclasses 723 * may want to set this to other values. 724 */ 725 var $leading_context_lines = 0; 726 727 /** 728 * Number of trailing context "lines" to preserve. 729 * 730 * This should be left at zero for this class, but subclasses 731 * may want to set this to other values. 732 */ 733 var $trailing_context_lines = 0; 734 735 /** 736 * Format a diff. 737 * 738 * @param $diff object A Diff object. 739 * @return string The formatted output. 740 */ 741 function format($diff) { 742 743 $xi = $yi = 1; 744 $block = false; 745 $context = array(); 746 747 $nlead = $this->leading_context_lines; 748 $ntrail = $this->trailing_context_lines; 749 750 $this->_start_diff(); 751 752 foreach ($diff->edits as $edit) { 753 if ($edit->type == 'copy') { 754 if (is_array($block)) { 755 if (count($edit->orig) <= $nlead + $ntrail) { 756 $block[] = $edit; 757 } 758 else{ 759 if ($ntrail) { 760 $context = array_slice($edit->orig, 0, $ntrail); 761 $block[] = new _DiffOp_Copy($context); 762 } 763 $this->_block($x0, $ntrail + $xi - $x0, $y0, $ntrail + $yi - $y0, $block); 764 $block = false; 765 } 766 } 767 $context = $edit->orig; 768 } 769 else { 770 if (! is_array($block)) { 771 $context = array_slice($context, count($context) - $nlead); 772 $x0 = $xi - count($context); 773 $y0 = $yi - count($context); 774 $block = array(); 775 if ($context) 776 $block[] = new _DiffOp_Copy($context); 777 } 778 $block[] = $edit; 779 } 780 781 if ($edit->orig) 782 $xi += count($edit->orig); 783 if ($edit->closing) 784 $yi += count($edit->closing); 785 } 786 787 if (is_array($block)) 788 $this->_block($x0, $xi - $x0, $y0, $yi - $y0, $block); 789 790 return $this->_end_diff(); 791 } 792 793 function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) { 794 $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen)); 795 foreach ($edits as $edit) { 796 if ($edit->type == 'copy') 797 $this->_context($edit->orig); 798 elseif ($edit->type == 'add') 799 $this->_added($edit->closing); 800 elseif ($edit->type == 'delete') 801 $this->_deleted($edit->orig); 802 elseif ($edit->type == 'change') 803 $this->_changed($edit->orig, $edit->closing); 804 else 805 trigger_error("Unknown edit type", E_USER_ERROR); 806 } 807 $this->_end_block(); 808 } 809 810 function _start_diff() { 811 ob_start(); 812 } 813 814 function _end_diff() { 815 $val = ob_get_contents(); 816 ob_end_clean(); 817 return $val; 818 } 819 820 function _block_header($xbeg, $xlen, $ybeg, $ylen) { 821 if ($xlen > 1) 822 $xbeg .= "," . ($xbeg + $xlen - 1); 823 if ($ylen > 1) 824 $ybeg .= "," . ($ybeg + $ylen - 1); 825 826 return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg; 827 } 828 829 function _start_block($header) { 830 echo $header; 831 } 832 833 function _end_block() { 834 } 835 836 function _lines($lines, $prefix = ' ') { 837 foreach ($lines as $line) 838 echo "$prefix $line\n"; 839 } 840 841 function _context($lines) { 842 $this->_lines($lines); 843 } 844 845 function _added($lines) { 846 $this->_lines($lines, ">"); 847 } 848 function _deleted($lines) { 849 $this->_lines($lines, "<"); 850 } 851 852 function _changed($orig, $closing) { 853 $this->_deleted($orig); 854 echo "---\n"; 855 $this->_added($closing); 856 } 857} 858 859 860/** 861 * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3 862 * 863 */ 864 865define('NBSP', "\xC2\xA0"); // utf-8 non-breaking space. 866 867class _HWLDF_WordAccumulator { 868 /** 869 * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 870 */ 871 function _HWLDF_WordAccumulator() { 872 $this->__construct(); 873 } 874 875 function __construct() { 876 $this->_lines = array(); 877 $this->_line = ''; 878 $this->_group = ''; 879 $this->_tag = ''; 880 } 881 882 function _flushGroup($new_tag) { 883 if ($this->_group !== '') { 884 if ($this->_tag == 'mark') 885 $this->_line .= '<strong>'.$this->_group.'</strong>'; 886 elseif ($this->_tag == 'add') 887 $this->_line .= '<span class="diff-addedline">'.$this->_group.'</span>'; 888 elseif ($this->_tag == 'del') 889 $this->_line .= '<span class="diff-deletedline"><del>'.$this->_group.'</del></span>'; 890 else 891 $this->_line .= $this->_group; 892 } 893 $this->_group = ''; 894 $this->_tag = $new_tag; 895 } 896 897 function _flushLine($new_tag) { 898 $this->_flushGroup($new_tag); 899 if ($this->_line != '') 900 $this->_lines[] = $this->_line; 901 $this->_line = ''; 902 } 903 904 function addWords($words, $tag = '') { 905 if ($tag != $this->_tag) 906 $this->_flushGroup($tag); 907 908 foreach ($words as $word) { 909 // new-line should only come as first char of word. 910 if ($word == '') 911 continue; 912 if ($word[0] == "\n") { 913 $this->_group .= NBSP; 914 $this->_flushLine($tag); 915 $word = substr($word, 1); 916 } 917 assert(!strstr($word, "\n")); 918 $this->_group .= $word; 919 } 920 } 921 922 function getLines() { 923 $this->_flushLine('~done'); 924 return $this->_lines; 925 } 926} 927 928class WordLevelDiff extends MappedDiff { 929 930 /** 931 * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 932 */ 933 function WordLevelDiff($orig_lines, $closing_lines) { 934 $this->__construct($orig_lines, $closing_lines); 935 } 936 937 function __construct($orig_lines, $closing_lines) { 938 list ($orig_words, $orig_stripped) = $this->_split($orig_lines); 939 list ($closing_words, $closing_stripped) = $this->_split($closing_lines); 940 941 parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped); 942 } 943 944 function _split($lines) { 945 if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu', 946 implode("\n", $lines), $m)) { 947 return array(array(''), array('')); 948 } 949 return array($m[0], $m[1]); 950 } 951 952 function orig() { 953 $orig = new _HWLDF_WordAccumulator; 954 955 foreach ($this->edits as $edit) { 956 if ($edit->type == 'copy') 957 $orig->addWords($edit->orig); 958 elseif ($edit->orig) 959 $orig->addWords($edit->orig, 'mark'); 960 } 961 return $orig->getLines(); 962 } 963 964 function closing() { 965 $closing = new _HWLDF_WordAccumulator; 966 967 foreach ($this->edits as $edit) { 968 if ($edit->type == 'copy') 969 $closing->addWords($edit->closing); 970 elseif ($edit->closing) 971 $closing->addWords($edit->closing, 'mark'); 972 } 973 return $closing->getLines(); 974 } 975} 976 977class InlineWordLevelDiff extends MappedDiff { 978 979 /** 980 * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 981 */ 982 function InlineWordLevelDiff($orig_lines, $closing_lines) { 983 $this->__construct($orig_lines, $closing_lines); 984 } 985 986 function __construct($orig_lines, $closing_lines) { 987 list ($orig_words, $orig_stripped) = $this->_split($orig_lines); 988 list ($closing_words, $closing_stripped) = $this->_split($closing_lines); 989 990 parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped); 991 } 992 993 function _split($lines) { 994 if (!preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xsu', 995 implode("\n", $lines), $m)) { 996 return array(array(''), array('')); 997 } 998 return array($m[0], $m[1]); 999 } 1000 1001 function inline() { 1002 $orig = new _HWLDF_WordAccumulator; 1003 foreach ($this->edits as $edit) { 1004 if ($edit->type == 'copy') 1005 $orig->addWords($edit->closing); 1006 elseif ($edit->type == 'change'){ 1007 $orig->addWords($edit->orig, 'del'); 1008 $orig->addWords($edit->closing, 'add'); 1009 } elseif ($edit->type == 'delete') 1010 $orig->addWords($edit->orig, 'del'); 1011 elseif ($edit->type == 'add') 1012 $orig->addWords($edit->closing, 'add'); 1013 elseif ($edit->orig) 1014 $orig->addWords($edit->orig, 'del'); 1015 } 1016 return $orig->getLines(); 1017 } 1018} 1019 1020/** 1021 * "Unified" diff formatter. 1022 * 1023 * This class formats the diff in classic "unified diff" format. 1024 */ 1025class UnifiedDiffFormatter extends DiffFormatter { 1026 1027 /** 1028 * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 1029 */ 1030 function UnifiedDiffFormatter($context_lines = 4) { 1031 $this->__construct($context_lines); 1032 } 1033 1034 function __construct($context_lines = 4) { 1035 $this->leading_context_lines = $context_lines; 1036 $this->trailing_context_lines = $context_lines; 1037 } 1038 1039 function _block_header($xbeg, $xlen, $ybeg, $ylen) { 1040 if ($xlen != 1) 1041 $xbeg .= "," . $xlen; 1042 if ($ylen != 1) 1043 $ybeg .= "," . $ylen; 1044 return "@@ -$xbeg +$ybeg @@\n"; 1045 } 1046 1047 function _added($lines) { 1048 $this->_lines($lines, "+"); 1049 } 1050 function _deleted($lines) { 1051 $this->_lines($lines, "-"); 1052 } 1053 function _changed($orig, $final) { 1054 $this->_deleted($orig); 1055 $this->_added($final); 1056 } 1057} 1058 1059/** 1060 * Wikipedia Table style diff formatter. 1061 * 1062 */ 1063class TableDiffFormatter extends DiffFormatter { 1064 1065 /** 1066 * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 1067 */ 1068 function TableDiffFormatter() { 1069 $this->__construct(); 1070 } 1071 1072 function __construct() { 1073 $this->leading_context_lines = 2; 1074 $this->trailing_context_lines = 2; 1075 } 1076 1077 function format($diff) { 1078 // Preserve whitespaces by converting some to non-breaking spaces. 1079 // Do not convert all of them to allow word-wrap. 1080 $val = parent::format($diff); 1081 $val = str_replace(' ',' ', $val); 1082 $val = preg_replace('/ (?=<)|(?<=[ >]) /', ' ', $val); 1083 return $val; 1084 } 1085 1086 function _pre($text){ 1087 $text = htmlspecialchars($text); 1088 return $text; 1089 } 1090 1091 function _block_header($xbeg, $xlen, $ybeg, $ylen) { 1092 global $lang; 1093 $l1 = $lang['line'].' '.$xbeg; 1094 $l2 = $lang['line'].' '.$ybeg; 1095 $r = '<tr><td class="diff-blockheader" colspan="2">'.$l1.":</td>\n". 1096 ' <td class="diff-blockheader" colspan="2">'.$l2.":</td>\n". 1097 "</tr>\n"; 1098 return $r; 1099 } 1100 1101 function _start_block($header) { 1102 print($header); 1103 } 1104 1105 function _end_block() { 1106 } 1107 1108 function _lines($lines, $prefix=' ', $color="white") { 1109 } 1110 1111 function addedLine($line) { 1112 return '<td>+</td><td class="diff-addedline">' . $line.'</td>'; 1113 } 1114 1115 function deletedLine($line) { 1116 return '<td>-</td><td class="diff-deletedline">' . $line.'</td>'; 1117 } 1118 1119 function emptyLine() { 1120 return '<td colspan="2"> </td>'; 1121 } 1122 1123 function contextLine($line) { 1124 return '<td> </td><td class="diff-context">'.$line.'</td>'; 1125 } 1126 1127 function _added($lines) { 1128 foreach ($lines as $line) { 1129 print('<tr>' . $this->emptyLine() . $this->addedLine($line) . "</tr>\n"); 1130 } 1131 } 1132 1133 function _deleted($lines) { 1134 foreach ($lines as $line) { 1135 print('<tr>' . $this->deletedLine($line) . $this->emptyLine() . "</tr>\n"); 1136 } 1137 } 1138 1139 function _context($lines) { 1140 foreach ($lines as $line) { 1141 print('<tr>' . $this->contextLine($line) . $this->contextLine($line) . "</tr>\n"); 1142 } 1143 } 1144 1145 function _changed($orig, $closing) { 1146 $diff = new WordLevelDiff($orig, $closing); 1147 $del = $diff->orig(); 1148 $add = $diff->closing(); 1149 1150 while ($line = array_shift($del)) { 1151 $aline = array_shift($add); 1152 print('<tr>' . $this->deletedLine($line) . $this->addedLine($aline) . "</tr>\n"); 1153 } 1154 $this->_added($add); # If any leftovers 1155 } 1156} 1157 1158/** 1159 * Inline style diff formatter. 1160 * 1161 */ 1162class InlineDiffFormatter extends DiffFormatter { 1163 var $colspan = 4; 1164 /** 1165 * DONOT USE THIS. Its just to make sure nothing breaks because of the name change. 1166 */ 1167 function InlineDiffFormatter() { 1168 $this->__construct(); 1169 } 1170 1171 function __construct() { 1172 $this->leading_context_lines = 2; 1173 $this->trailing_context_lines = 2; 1174 } 1175 1176 function format($diff) { 1177 // Preserve whitespaces by converting some to non-breaking spaces. 1178 // Do not convert all of them to allow word-wrap. 1179 $val = parent::format($diff); 1180 $val = str_replace(' ',' ', $val); 1181 $val = preg_replace('/ (?=<)|(?<=[ >]) /', ' ', $val); 1182 return $val; 1183 } 1184 1185 function _pre($text){ 1186 $text = htmlspecialchars($text); 1187 return $text; 1188 } 1189 1190 function _block_header($xbeg, $xlen, $ybeg, $ylen) { 1191 global $lang; 1192 if ($xlen != 1) 1193 $xbeg .= "," . $xlen; 1194 if ($ylen != 1) 1195 $ybeg .= "," . $ylen; 1196 $r = '<tr><td colspan="'.$this->colspan.'" class="diff-blockheader">@@ '.$lang['line']." -$xbeg +$ybeg @@"; 1197 $r .= ' <span class="diff-deletedline"><del>'.$lang['deleted'].'</del></span>'; 1198 $r .= ' <span class="diff-addedline">'.$lang['created'].'</span>'; 1199 $r .= "</td></tr>\n"; 1200 return $r; 1201 } 1202 1203 function _start_block($header) { 1204 print($header."\n"); 1205 } 1206 1207 function _end_block() { 1208 } 1209 1210 function _lines($lines, $prefix=' ', $color="white") { 1211 } 1212 1213 function _added($lines) { 1214 foreach ($lines as $line) { 1215 print('<tr><td colspan="'.$this->colspan.'" class="diff-addedline">'. $line . "</td></tr>\n"); 1216 } 1217 } 1218 1219 function _deleted($lines) { 1220 foreach ($lines as $line) { 1221 print('<tr><td colspan="'.$this->colspan.'" class="diff-deletedline"><del>' . $line . "</del></td></tr>\n"); 1222 } 1223 } 1224 1225 function _context($lines) { 1226 foreach ($lines as $line) { 1227 print('<tr><td colspan="'.$this->colspan.'" class="diff-context">'.$line."</td></tr>\n"); 1228 } 1229 } 1230 1231 function _changed($orig, $closing) { 1232 $diff = new InlineWordLevelDiff($orig, $closing); 1233 $add = $diff->inline(); 1234 1235 foreach ($add as $line) 1236 print('<tr><td colspan="'.$this->colspan.'">'.$line."</td></tr>\n"); 1237 } 1238} 1239 1240 1241//Setup VIM: ex: et ts=4 : 1242