1<?php
2
3/**
4 * Hoa
5 *
6 *
7 * @license
8 *
9 * New BSD License
10 *
11 * Copyright © 2007-2017, Hoa community. All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions are met:
15 *     * Redistributions of source code must retain the above copyright
16 *       notice, this list of conditions and the following disclaimer.
17 *     * Redistributions in binary form must reproduce the above copyright
18 *       notice, this list of conditions and the following disclaimer in the
19 *       documentation and/or other materials provided with the distribution.
20 *     * Neither the name of the Hoa nor the names of its contributors may be
21 *       used to endorse or promote products derived from this software without
22 *       specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36
37namespace Hoa\Ustring;
38
39/**
40 * Class \Hoa\Ustring\Search.
41 *
42 * Some algorithms about search in strings.
43 *
44 * @copyright  Copyright © 2007-2017 Hoa community
45 * @license    New BSD License
46 */
47class Search
48{
49    /**
50     * Search by approximated patterns, with k differences based upon the
51     * principle diagonal monotony.
52     *
53     * @param   string  $y    Haystack.
54     * @param   string  $x    Needle.
55     * @param   int     $k    Number of differences.
56     * @return  array
57     */
58    public static function approximated($y, $x, $k)
59    {
60        $x      = (string) $x;
61        $y      = (string) $y;
62        $m      = strlen($x);
63        $n      = strlen($y);
64        $offset = [];
65        $L      = [-1 => array_fill(-1, $n - $m + $k + 3, -2)];
66
67        for ($q = 0, $max = $k - 1; $q <= $max; ++$q) {
68            $L[$q][-$q - 1] = $L[$q][-$q - 2] = $q - 1;
69        }
70
71        for ($q = 0; $q <= $k; ++$q) {
72            for ($d = -$q, $max = $n - $m + $k - $q; $d <= $max; ++$d) {
73                $l         = min(
74                                 max(
75                                     $L[$q - 1][$d - 1],
76                                     $L[$q - 1][$d    ] + 1,
77                                     $L[$q - 1][$d + 1] + 1
78                                 ),
79                                 $m - 1
80                             );
81                $a         = substr($x, $l + 1, $m - $l);
82                $b         = substr($y, $l + 1 + $d, $n - $l - $d);
83                $L[$q][$d] = $l + static::lcp($a, $b);
84
85                if ($L[$q][$d] == $m - 1 ||
86                    $d + $L[$q][$d] == $n - 1) {
87                    $j            = $m + $d;
88                    $i            = max(0, $j - $m);
89                    $offset[$q][] = ['i' => $i, 'j' => $j, 'l' => $j - $i];
90                }
91            }
92        }
93
94        return empty($offset) ? $offset : $offset[$k];
95    }
96
97    /**
98     * Length of the longest common prefixes.
99     *
100     * @param   string  $x    Word.
101     * @param   string  $y    Word.
102     * @return  int
103     */
104    public static function lcp($x, $y)
105    {
106        $max = min(strlen($x), strlen($y));
107        $i   = 0;
108
109        while ($i < $max && $x[$i] == $y[$i]) {
110            ++$i;
111        }
112
113        return $i;
114    }
115}
116