1<?php
2/*
3 * This file is part of sebastian/diff.
4 *
5 * (c) Sebastian Bergmann <sebastian@phpunit.de>
6 *
7 * For the full copyright and license information, please view the LICENSE
8 * file that was distributed with this source code.
9 */
10
11namespace SebastianBergmann\Diff;
12
13use SebastianBergmann\Diff\LCS\LongestCommonSubsequence;
14use SebastianBergmann\Diff\LCS\TimeEfficientImplementation;
15use SebastianBergmann\Diff\LCS\MemoryEfficientImplementation;
16
17/**
18 * Diff implementation.
19 */
20class Differ
21{
22    /**
23     * @var string
24     */
25    private $header;
26
27    /**
28     * @var bool
29     */
30    private $showNonDiffLines;
31
32    /**
33     * @param string $header
34     * @param bool   $showNonDiffLines
35     */
36    public function __construct($header = "--- Original\n+++ New\n", $showNonDiffLines = true)
37    {
38        $this->header           = $header;
39        $this->showNonDiffLines = $showNonDiffLines;
40    }
41
42    /**
43     * Returns the diff between two arrays or strings as string.
44     *
45     * @param array|string             $from
46     * @param array|string             $to
47     * @param LongestCommonSubsequence $lcs
48     *
49     * @return string
50     */
51    public function diff($from, $to, LongestCommonSubsequence $lcs = null)
52    {
53        $from  = $this->validateDiffInput($from);
54        $to    = $this->validateDiffInput($to);
55        $diff  = $this->diffToArray($from, $to, $lcs);
56        $old   = $this->checkIfDiffInOld($diff);
57        $start = isset($old[0]) ? $old[0] : 0;
58        $end   = \count($diff);
59
60        if ($tmp = \array_search($end, $old)) {
61            $end = $tmp;
62        }
63
64        return $this->getBuffer($diff, $old, $start, $end);
65    }
66
67    /**
68     * Casts variable to string if it is not a string or array.
69     *
70     * @param mixed $input
71     *
72     * @return string
73     */
74    private function validateDiffInput($input)
75    {
76        if (!\is_array($input) && !\is_string($input)) {
77            return (string) $input;
78        }
79
80        return $input;
81    }
82
83    /**
84     * Takes input of the diff array and returns the old array.
85     * Iterates through diff line by line,
86     *
87     * @param array $diff
88     *
89     * @return array
90     */
91    private function checkIfDiffInOld(array $diff)
92    {
93        $inOld = false;
94        $i     = 0;
95        $old   = array();
96
97        foreach ($diff as $line) {
98            if ($line[1] === 0 /* OLD */) {
99                if ($inOld === false) {
100                    $inOld = $i;
101                }
102            } elseif ($inOld !== false) {
103                if (($i - $inOld) > 5) {
104                    $old[$inOld] = $i - 1;
105                }
106
107                $inOld = false;
108            }
109
110            ++$i;
111        }
112
113        return $old;
114    }
115
116    /**
117     * Generates buffer in string format, returning the patch.
118     *
119     * @param array $diff
120     * @param array $old
121     * @param int   $start
122     * @param int   $end
123     *
124     * @return string
125     */
126    private function getBuffer(array $diff, array $old, $start, $end)
127    {
128        $buffer = $this->header;
129
130        if (!isset($old[$start])) {
131            $buffer = $this->getDiffBufferElementNew($diff, $buffer, $start);
132            ++$start;
133        }
134
135        for ($i = $start; $i < $end; $i++) {
136            if (isset($old[$i])) {
137                $i      = $old[$i];
138                $buffer = $this->getDiffBufferElementNew($diff, $buffer, $i);
139            } else {
140                $buffer = $this->getDiffBufferElement($diff, $buffer, $i);
141            }
142        }
143
144        return $buffer;
145    }
146
147    /**
148     * Gets individual buffer element.
149     *
150     * @param array  $diff
151     * @param string $buffer
152     * @param int    $diffIndex
153     *
154     * @return string
155     */
156    private function getDiffBufferElement(array $diff, $buffer, $diffIndex)
157    {
158        if ($diff[$diffIndex][1] === 1 /* ADDED */) {
159            $buffer .= '+' . $diff[$diffIndex][0] . "\n";
160        } elseif ($diff[$diffIndex][1] === 2 /* REMOVED */) {
161            $buffer .= '-' . $diff[$diffIndex][0] . "\n";
162        } elseif ($this->showNonDiffLines === true) {
163            $buffer .= ' ' . $diff[$diffIndex][0] . "\n";
164        }
165
166        return $buffer;
167    }
168
169    /**
170     * Gets individual buffer element with opening.
171     *
172     * @param array  $diff
173     * @param string $buffer
174     * @param int    $diffIndex
175     *
176     * @return string
177     */
178    private function getDiffBufferElementNew(array $diff, $buffer, $diffIndex)
179    {
180        if ($this->showNonDiffLines === true) {
181            $buffer .= "@@ @@\n";
182        }
183
184        return $this->getDiffBufferElement($diff, $buffer, $diffIndex);
185    }
186
187    /**
188     * Returns the diff between two arrays or strings as array.
189     *
190     * Each array element contains two elements:
191     *   - [0] => mixed $token
192     *   - [1] => 2|1|0
193     *
194     * - 2: REMOVED: $token was removed from $from
195     * - 1: ADDED: $token was added to $from
196     * - 0: OLD: $token is not changed in $to
197     *
198     * @param array|string             $from
199     * @param array|string             $to
200     * @param LongestCommonSubsequence $lcs
201     *
202     * @return array
203     */
204    public function diffToArray($from, $to, LongestCommonSubsequence $lcs = null)
205    {
206        if (\is_string($from)) {
207            $fromMatches = $this->getNewLineMatches($from);
208            $from        = $this->splitStringByLines($from);
209        } elseif (\is_array($from)) {
210            $fromMatches = array();
211        } else {
212            throw new \InvalidArgumentException('"from" must be an array or string.');
213        }
214
215        if (\is_string($to)) {
216            $toMatches = $this->getNewLineMatches($to);
217            $to        = $this->splitStringByLines($to);
218        } elseif (\is_array($to)) {
219            $toMatches = array();
220        } else {
221            throw new \InvalidArgumentException('"to" must be an array or string.');
222        }
223
224        list($from, $to, $start, $end) = self::getArrayDiffParted($from, $to);
225
226        if ($lcs === null) {
227            $lcs = $this->selectLcsImplementation($from, $to);
228        }
229
230        $common = $lcs->calculate(\array_values($from), \array_values($to));
231        $diff   = array();
232
233        if ($this->detectUnmatchedLineEndings($fromMatches, $toMatches)) {
234            $diff[] = array(
235                '#Warning: Strings contain different line endings!',
236                0
237            );
238        }
239
240        foreach ($start as $token) {
241            $diff[] = array($token, 0 /* OLD */);
242        }
243
244        \reset($from);
245        \reset($to);
246
247        foreach ($common as $token) {
248            while (($fromToken = \reset($from)) !== $token) {
249                $diff[] = array(\array_shift($from), 2 /* REMOVED */);
250            }
251
252            while (($toToken = \reset($to)) !== $token) {
253                $diff[] = array(\array_shift($to), 1 /* ADDED */);
254            }
255
256            $diff[] = array($token, 0 /* OLD */);
257
258            \array_shift($from);
259            \array_shift($to);
260        }
261
262        while (($token = \array_shift($from)) !== null) {
263            $diff[] = array($token, 2 /* REMOVED */);
264        }
265
266        while (($token = \array_shift($to)) !== null) {
267            $diff[] = array($token, 1 /* ADDED */);
268        }
269
270        foreach ($end as $token) {
271            $diff[] = array($token, 0 /* OLD */);
272        }
273
274        return $diff;
275    }
276
277    /**
278     * Get new strings denoting new lines from a given string.
279     *
280     * @param string $string
281     *
282     * @return array
283     */
284    private function getNewLineMatches($string)
285    {
286        \preg_match_all('(\r\n|\r|\n)', $string, $stringMatches);
287
288        return $stringMatches;
289    }
290
291    /**
292     * Checks if input is string, if so it will split it line-by-line.
293     *
294     * @param string $input
295     *
296     * @return array
297     */
298    private function splitStringByLines($input)
299    {
300        return \preg_split('(\r\n|\r|\n)', $input);
301    }
302
303    /**
304     * @param array $from
305     * @param array $to
306     *
307     * @return LongestCommonSubsequence
308     */
309    private function selectLcsImplementation(array $from, array $to)
310    {
311        // We do not want to use the time-efficient implementation if its memory
312        // footprint will probably exceed this value. Note that the footprint
313        // calculation is only an estimation for the matrix and the LCS method
314        // will typically allocate a bit more memory than this.
315        $memoryLimit = 100 * 1024 * 1024;
316
317        if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) {
318            return new MemoryEfficientImplementation;
319        }
320
321        return new TimeEfficientImplementation;
322    }
323
324    /**
325     * Calculates the estimated memory footprint for the DP-based method.
326     *
327     * @param array $from
328     * @param array $to
329     *
330     * @return int|float
331     */
332    private function calculateEstimatedFootprint(array $from, array $to)
333    {
334        $itemSize = PHP_INT_SIZE === 4 ? 76 : 144;
335
336        return $itemSize * \pow(\min(\count($from), \count($to)), 2);
337    }
338
339    /**
340     * Returns true if line ends don't match on fromMatches and toMatches.
341     *
342     * @param array $fromMatches
343     * @param array $toMatches
344     *
345     * @return bool
346     */
347    private function detectUnmatchedLineEndings(array $fromMatches, array $toMatches)
348    {
349        return isset($fromMatches[0], $toMatches[0]) &&
350               \count($fromMatches[0]) === \count($toMatches[0]) &&
351               $fromMatches[0] !== $toMatches[0];
352    }
353
354    /**
355     * @param array $from
356     * @param array $to
357     *
358     * @return array
359     */
360    private static function getArrayDiffParted(array &$from, array &$to)
361    {
362        $start = array();
363        $end   = array();
364
365        \reset($to);
366
367        foreach ($from as $k => $v) {
368            $toK = \key($to);
369
370            if ($toK === $k && $v === $to[$k]) {
371                $start[$k] = $v;
372
373                unset($from[$k], $to[$k]);
374            } else {
375                break;
376            }
377        }
378
379        \end($from);
380        \end($to);
381
382        do {
383            $fromK = \key($from);
384            $toK   = \key($to);
385
386            if (null === $fromK || null === $toK || \current($from) !== \current($to)) {
387                break;
388            }
389
390            \prev($from);
391            \prev($to);
392
393            $end = array($fromK => $from[$fromK]) + $end;
394            unset($from[$fromK], $to[$toK]);
395        } while (true);
396
397        return array($from, $to, $start, $end);
398    }
399}
400