1<?php 2/* 3 * This file is part of sebastian/diff. 4 * 5 * (c) Sebastian Bergmann <sebastian@phpunit.de> 6 * 7 * For the full copyright and license information, please view the LICENSE 8 * file that was distributed with this source code. 9 */ 10 11namespace SebastianBergmann\Diff; 12 13use SebastianBergmann\Diff\LCS\LongestCommonSubsequence; 14use SebastianBergmann\Diff\LCS\TimeEfficientImplementation; 15use SebastianBergmann\Diff\LCS\MemoryEfficientImplementation; 16 17/** 18 * Diff implementation. 19 */ 20class Differ 21{ 22 /** 23 * @var string 24 */ 25 private $header; 26 27 /** 28 * @var bool 29 */ 30 private $showNonDiffLines; 31 32 /** 33 * @param string $header 34 * @param bool $showNonDiffLines 35 */ 36 public function __construct($header = "--- Original\n+++ New\n", $showNonDiffLines = true) 37 { 38 $this->header = $header; 39 $this->showNonDiffLines = $showNonDiffLines; 40 } 41 42 /** 43 * Returns the diff between two arrays or strings as string. 44 * 45 * @param array|string $from 46 * @param array|string $to 47 * @param LongestCommonSubsequence $lcs 48 * 49 * @return string 50 */ 51 public function diff($from, $to, LongestCommonSubsequence $lcs = null) 52 { 53 $from = $this->validateDiffInput($from); 54 $to = $this->validateDiffInput($to); 55 $diff = $this->diffToArray($from, $to, $lcs); 56 $old = $this->checkIfDiffInOld($diff); 57 $start = isset($old[0]) ? $old[0] : 0; 58 $end = \count($diff); 59 60 if ($tmp = \array_search($end, $old)) { 61 $end = $tmp; 62 } 63 64 return $this->getBuffer($diff, $old, $start, $end); 65 } 66 67 /** 68 * Casts variable to string if it is not a string or array. 69 * 70 * @param mixed $input 71 * 72 * @return string 73 */ 74 private function validateDiffInput($input) 75 { 76 if (!\is_array($input) && !\is_string($input)) { 77 return (string) $input; 78 } 79 80 return $input; 81 } 82 83 /** 84 * Takes input of the diff array and returns the old array. 85 * Iterates through diff line by line, 86 * 87 * @param array $diff 88 * 89 * @return array 90 */ 91 private function checkIfDiffInOld(array $diff) 92 { 93 $inOld = false; 94 $i = 0; 95 $old = array(); 96 97 foreach ($diff as $line) { 98 if ($line[1] === 0 /* OLD */) { 99 if ($inOld === false) { 100 $inOld = $i; 101 } 102 } elseif ($inOld !== false) { 103 if (($i - $inOld) > 5) { 104 $old[$inOld] = $i - 1; 105 } 106 107 $inOld = false; 108 } 109 110 ++$i; 111 } 112 113 return $old; 114 } 115 116 /** 117 * Generates buffer in string format, returning the patch. 118 * 119 * @param array $diff 120 * @param array $old 121 * @param int $start 122 * @param int $end 123 * 124 * @return string 125 */ 126 private function getBuffer(array $diff, array $old, $start, $end) 127 { 128 $buffer = $this->header; 129 130 if (!isset($old[$start])) { 131 $buffer = $this->getDiffBufferElementNew($diff, $buffer, $start); 132 ++$start; 133 } 134 135 for ($i = $start; $i < $end; $i++) { 136 if (isset($old[$i])) { 137 $i = $old[$i]; 138 $buffer = $this->getDiffBufferElementNew($diff, $buffer, $i); 139 } else { 140 $buffer = $this->getDiffBufferElement($diff, $buffer, $i); 141 } 142 } 143 144 return $buffer; 145 } 146 147 /** 148 * Gets individual buffer element. 149 * 150 * @param array $diff 151 * @param string $buffer 152 * @param int $diffIndex 153 * 154 * @return string 155 */ 156 private function getDiffBufferElement(array $diff, $buffer, $diffIndex) 157 { 158 if ($diff[$diffIndex][1] === 1 /* ADDED */) { 159 $buffer .= '+' . $diff[$diffIndex][0] . "\n"; 160 } elseif ($diff[$diffIndex][1] === 2 /* REMOVED */) { 161 $buffer .= '-' . $diff[$diffIndex][0] . "\n"; 162 } elseif ($this->showNonDiffLines === true) { 163 $buffer .= ' ' . $diff[$diffIndex][0] . "\n"; 164 } 165 166 return $buffer; 167 } 168 169 /** 170 * Gets individual buffer element with opening. 171 * 172 * @param array $diff 173 * @param string $buffer 174 * @param int $diffIndex 175 * 176 * @return string 177 */ 178 private function getDiffBufferElementNew(array $diff, $buffer, $diffIndex) 179 { 180 if ($this->showNonDiffLines === true) { 181 $buffer .= "@@ @@\n"; 182 } 183 184 return $this->getDiffBufferElement($diff, $buffer, $diffIndex); 185 } 186 187 /** 188 * Returns the diff between two arrays or strings as array. 189 * 190 * Each array element contains two elements: 191 * - [0] => mixed $token 192 * - [1] => 2|1|0 193 * 194 * - 2: REMOVED: $token was removed from $from 195 * - 1: ADDED: $token was added to $from 196 * - 0: OLD: $token is not changed in $to 197 * 198 * @param array|string $from 199 * @param array|string $to 200 * @param LongestCommonSubsequence $lcs 201 * 202 * @return array 203 */ 204 public function diffToArray($from, $to, LongestCommonSubsequence $lcs = null) 205 { 206 if (\is_string($from)) { 207 $fromMatches = $this->getNewLineMatches($from); 208 $from = $this->splitStringByLines($from); 209 } elseif (\is_array($from)) { 210 $fromMatches = array(); 211 } else { 212 throw new \InvalidArgumentException('"from" must be an array or string.'); 213 } 214 215 if (\is_string($to)) { 216 $toMatches = $this->getNewLineMatches($to); 217 $to = $this->splitStringByLines($to); 218 } elseif (\is_array($to)) { 219 $toMatches = array(); 220 } else { 221 throw new \InvalidArgumentException('"to" must be an array or string.'); 222 } 223 224 list($from, $to, $start, $end) = self::getArrayDiffParted($from, $to); 225 226 if ($lcs === null) { 227 $lcs = $this->selectLcsImplementation($from, $to); 228 } 229 230 $common = $lcs->calculate(\array_values($from), \array_values($to)); 231 $diff = array(); 232 233 if ($this->detectUnmatchedLineEndings($fromMatches, $toMatches)) { 234 $diff[] = array( 235 '#Warning: Strings contain different line endings!', 236 0 237 ); 238 } 239 240 foreach ($start as $token) { 241 $diff[] = array($token, 0 /* OLD */); 242 } 243 244 \reset($from); 245 \reset($to); 246 247 foreach ($common as $token) { 248 while (($fromToken = \reset($from)) !== $token) { 249 $diff[] = array(\array_shift($from), 2 /* REMOVED */); 250 } 251 252 while (($toToken = \reset($to)) !== $token) { 253 $diff[] = array(\array_shift($to), 1 /* ADDED */); 254 } 255 256 $diff[] = array($token, 0 /* OLD */); 257 258 \array_shift($from); 259 \array_shift($to); 260 } 261 262 while (($token = \array_shift($from)) !== null) { 263 $diff[] = array($token, 2 /* REMOVED */); 264 } 265 266 while (($token = \array_shift($to)) !== null) { 267 $diff[] = array($token, 1 /* ADDED */); 268 } 269 270 foreach ($end as $token) { 271 $diff[] = array($token, 0 /* OLD */); 272 } 273 274 return $diff; 275 } 276 277 /** 278 * Get new strings denoting new lines from a given string. 279 * 280 * @param string $string 281 * 282 * @return array 283 */ 284 private function getNewLineMatches($string) 285 { 286 \preg_match_all('(\r\n|\r|\n)', $string, $stringMatches); 287 288 return $stringMatches; 289 } 290 291 /** 292 * Checks if input is string, if so it will split it line-by-line. 293 * 294 * @param string $input 295 * 296 * @return array 297 */ 298 private function splitStringByLines($input) 299 { 300 return \preg_split('(\r\n|\r|\n)', $input); 301 } 302 303 /** 304 * @param array $from 305 * @param array $to 306 * 307 * @return LongestCommonSubsequence 308 */ 309 private function selectLcsImplementation(array $from, array $to) 310 { 311 // We do not want to use the time-efficient implementation if its memory 312 // footprint will probably exceed this value. Note that the footprint 313 // calculation is only an estimation for the matrix and the LCS method 314 // will typically allocate a bit more memory than this. 315 $memoryLimit = 100 * 1024 * 1024; 316 317 if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) { 318 return new MemoryEfficientImplementation; 319 } 320 321 return new TimeEfficientImplementation; 322 } 323 324 /** 325 * Calculates the estimated memory footprint for the DP-based method. 326 * 327 * @param array $from 328 * @param array $to 329 * 330 * @return int|float 331 */ 332 private function calculateEstimatedFootprint(array $from, array $to) 333 { 334 $itemSize = PHP_INT_SIZE === 4 ? 76 : 144; 335 336 return $itemSize * \pow(\min(\count($from), \count($to)), 2); 337 } 338 339 /** 340 * Returns true if line ends don't match on fromMatches and toMatches. 341 * 342 * @param array $fromMatches 343 * @param array $toMatches 344 * 345 * @return bool 346 */ 347 private function detectUnmatchedLineEndings(array $fromMatches, array $toMatches) 348 { 349 return isset($fromMatches[0], $toMatches[0]) && 350 \count($fromMatches[0]) === \count($toMatches[0]) && 351 $fromMatches[0] !== $toMatches[0]; 352 } 353 354 /** 355 * @param array $from 356 * @param array $to 357 * 358 * @return array 359 */ 360 private static function getArrayDiffParted(array &$from, array &$to) 361 { 362 $start = array(); 363 $end = array(); 364 365 \reset($to); 366 367 foreach ($from as $k => $v) { 368 $toK = \key($to); 369 370 if ($toK === $k && $v === $to[$k]) { 371 $start[$k] = $v; 372 373 unset($from[$k], $to[$k]); 374 } else { 375 break; 376 } 377 } 378 379 \end($from); 380 \end($to); 381 382 do { 383 $fromK = \key($from); 384 $toK = \key($to); 385 386 if (null === $fromK || null === $toK || \current($from) !== \current($to)) { 387 break; 388 } 389 390 \prev($from); 391 \prev($to); 392 393 $end = array($fromK => $from[$fromK]) + $end; 394 unset($from[$fromK], $to[$toK]); 395 } while (true); 396 397 return array($from, $to, $start, $end); 398 } 399} 400