xref: /plugin/aichat/vendor/bdelespierre/php-kmeans/src/KMeans/Space.php (revision 3379af09b7ec10f96a8d4f23b1563bd7f9ae79ac)
1*3379af09SAndreas Gohr<?php
2*3379af09SAndreas Gohr
3*3379af09SAndreas Gohr/**
4*3379af09SAndreas Gohr * This file is part of PHP K-Means
5*3379af09SAndreas Gohr *
6*3379af09SAndreas Gohr * Copyright (c) 2014 Benjamin Delespierre
7*3379af09SAndreas Gohr *
8*3379af09SAndreas Gohr * Permission is hereby granted, free of charge, to any person obtaining a copy
9*3379af09SAndreas Gohr * of this software and associated documentation files (the "Software"), to deal
10*3379af09SAndreas Gohr * in the Software without restriction, including without limitation the rights
11*3379af09SAndreas Gohr * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12*3379af09SAndreas Gohr * copies of the Software, and to permit persons to whom the Software is furnished
13*3379af09SAndreas Gohr * to do so, subject to the following conditions:
14*3379af09SAndreas Gohr *
15*3379af09SAndreas Gohr * The above copyright notice and this permission notice shall be included in all
16*3379af09SAndreas Gohr * copies or substantial portions of the Software.
17*3379af09SAndreas Gohr *
18*3379af09SAndreas Gohr * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19*3379af09SAndreas Gohr * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20*3379af09SAndreas Gohr * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21*3379af09SAndreas Gohr * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22*3379af09SAndreas Gohr * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23*3379af09SAndreas Gohr * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24*3379af09SAndreas Gohr * THE SOFTWARE.
25*3379af09SAndreas Gohr */
26*3379af09SAndreas Gohr
27*3379af09SAndreas Gohrnamespace KMeans;
28*3379af09SAndreas Gohr
29*3379af09SAndreas Gohrclass Space extends \SplObjectStorage
30*3379af09SAndreas Gohr{
31*3379af09SAndreas Gohr    protected $dimention;
32*3379af09SAndreas Gohr
33*3379af09SAndreas Gohr    protected static $rng = 'mt_rand';
34*3379af09SAndreas Gohr
35*3379af09SAndreas Gohr    public function __construct($dimention)
36*3379af09SAndreas Gohr    {
37*3379af09SAndreas Gohr        if ($dimention < 1) {
38*3379af09SAndreas Gohr            throw new \LogicException("a space dimention cannot be null or negative");
39*3379af09SAndreas Gohr        }
40*3379af09SAndreas Gohr
41*3379af09SAndreas Gohr        $this->dimention = $dimention;
42*3379af09SAndreas Gohr    }
43*3379af09SAndreas Gohr
44*3379af09SAndreas Gohr    public static function setRng(callable $fn): void
45*3379af09SAndreas Gohr    {
46*3379af09SAndreas Gohr        static::$rng = $fn;
47*3379af09SAndreas Gohr    }
48*3379af09SAndreas Gohr
49*3379af09SAndreas Gohr    public function toArray(): array
50*3379af09SAndreas Gohr    {
51*3379af09SAndreas Gohr        $points = [];
52*3379af09SAndreas Gohr        foreach ($this as $point) {
53*3379af09SAndreas Gohr            $points[] = $point->toArray();
54*3379af09SAndreas Gohr        }
55*3379af09SAndreas Gohr
56*3379af09SAndreas Gohr        return ['points' => $points];
57*3379af09SAndreas Gohr    }
58*3379af09SAndreas Gohr
59*3379af09SAndreas Gohr    public function newPoint(array $coordinates): Point
60*3379af09SAndreas Gohr    {
61*3379af09SAndreas Gohr        if (count($coordinates) != $this->dimention) {
62*3379af09SAndreas Gohr            throw new \LogicException("(" . implode(',', $coordinates) . ") is not a point of this space");
63*3379af09SAndreas Gohr        }
64*3379af09SAndreas Gohr
65*3379af09SAndreas Gohr        return new Point($this, $coordinates);
66*3379af09SAndreas Gohr    }
67*3379af09SAndreas Gohr
68*3379af09SAndreas Gohr    public function addPoint(array $coordinates, $data = null): Point
69*3379af09SAndreas Gohr    {
70*3379af09SAndreas Gohr        $this->attach($point = $this->newPoint($coordinates), $data);
71*3379af09SAndreas Gohr
72*3379af09SAndreas Gohr        return $point;
73*3379af09SAndreas Gohr    }
74*3379af09SAndreas Gohr
75*3379af09SAndreas Gohr    public function attach($point, $data = null): void
76*3379af09SAndreas Gohr    {
77*3379af09SAndreas Gohr        if (!$point instanceof Point) {
78*3379af09SAndreas Gohr            throw new \InvalidArgumentException("can only attach points to spaces");
79*3379af09SAndreas Gohr        }
80*3379af09SAndreas Gohr
81*3379af09SAndreas Gohr        parent::attach($point, $data);
82*3379af09SAndreas Gohr    }
83*3379af09SAndreas Gohr
84*3379af09SAndreas Gohr    public function getDimention(): int
85*3379af09SAndreas Gohr    {
86*3379af09SAndreas Gohr        return $this->dimention;
87*3379af09SAndreas Gohr    }
88*3379af09SAndreas Gohr
89*3379af09SAndreas Gohr    public function getBoundaries(): array
90*3379af09SAndreas Gohr    {
91*3379af09SAndreas Gohr        if (!count($this)) {
92*3379af09SAndreas Gohr            return [];
93*3379af09SAndreas Gohr        }
94*3379af09SAndreas Gohr
95*3379af09SAndreas Gohr        $min = $this->newPoint(array_fill(0, $this->dimention, null));
96*3379af09SAndreas Gohr        $max = $this->newPoint(array_fill(0, $this->dimention, null));
97*3379af09SAndreas Gohr
98*3379af09SAndreas Gohr        foreach ($this as $point) {
99*3379af09SAndreas Gohr            for ($n = 0; $n < $this->dimention; $n++) {
100*3379af09SAndreas Gohr                if ($min[$n] === null || $min[$n] > $point[$n]) {
101*3379af09SAndreas Gohr                    $min[$n] = $point[$n];
102*3379af09SAndreas Gohr                }
103*3379af09SAndreas Gohr
104*3379af09SAndreas Gohr                if ($max[$n] === null || $max[$n] < $point[$n]) {
105*3379af09SAndreas Gohr                    $max[$n] = $point[$n];
106*3379af09SAndreas Gohr                }
107*3379af09SAndreas Gohr            }
108*3379af09SAndreas Gohr        }
109*3379af09SAndreas Gohr
110*3379af09SAndreas Gohr        return [$min, $max];
111*3379af09SAndreas Gohr    }
112*3379af09SAndreas Gohr
113*3379af09SAndreas Gohr    public function getRandomPoint(Point $min, Point $max): Point
114*3379af09SAndreas Gohr    {
115*3379af09SAndreas Gohr        $point = $this->newPoint(array_fill(0, $this->dimention, null));
116*3379af09SAndreas Gohr        $rng = static::$rng;
117*3379af09SAndreas Gohr
118*3379af09SAndreas Gohr        for ($n = 0; $n < $this->dimention; $n++) {
119*3379af09SAndreas Gohr            $point[$n] = $rng($min[$n], $max[$n]);
120*3379af09SAndreas Gohr        }
121*3379af09SAndreas Gohr
122*3379af09SAndreas Gohr        return $point;
123*3379af09SAndreas Gohr    }
124*3379af09SAndreas Gohr
125*3379af09SAndreas Gohr    public function solve(int $nbClusters, callable $iterationCallback = null, $initMethod = Cluster::INIT_RANDOM): array
126*3379af09SAndreas Gohr    {
127*3379af09SAndreas Gohr        // initialize K clusters
128*3379af09SAndreas Gohr        $clusters = $this->initializeClusters($nbClusters, $initMethod);
129*3379af09SAndreas Gohr
130*3379af09SAndreas Gohr        // there's only one cluster, clusterization has no meaning
131*3379af09SAndreas Gohr        if (count($clusters) == 1) {
132*3379af09SAndreas Gohr            return $clusters;
133*3379af09SAndreas Gohr        }
134*3379af09SAndreas Gohr
135*3379af09SAndreas Gohr        // until convergence is reached
136*3379af09SAndreas Gohr        do {
137*3379af09SAndreas Gohr            if ($iterationCallback) {
138*3379af09SAndreas Gohr                $iterationCallback($this, $clusters);
139*3379af09SAndreas Gohr            }
140*3379af09SAndreas Gohr        } while ($this->iterate($clusters));
141*3379af09SAndreas Gohr
142*3379af09SAndreas Gohr        // clustering is done.
143*3379af09SAndreas Gohr        return $clusters;
144*3379af09SAndreas Gohr    }
145*3379af09SAndreas Gohr
146*3379af09SAndreas Gohr    protected function initializeClusters(int $nbClusters, int $initMethod): array
147*3379af09SAndreas Gohr    {
148*3379af09SAndreas Gohr        if ($nbClusters <= 0) {
149*3379af09SAndreas Gohr            throw new \InvalidArgumentException("invalid clusters number");
150*3379af09SAndreas Gohr        }
151*3379af09SAndreas Gohr
152*3379af09SAndreas Gohr        switch ($initMethod) {
153*3379af09SAndreas Gohr            case Cluster::INIT_RANDOM:
154*3379af09SAndreas Gohr                $clusters = $this->initializeRandomClusters($nbClusters);
155*3379af09SAndreas Gohr
156*3379af09SAndreas Gohr                break;
157*3379af09SAndreas Gohr
158*3379af09SAndreas Gohr            case Cluster::INIT_KMEANS_PLUS_PLUS:
159*3379af09SAndreas Gohr                $clusters = $this->initializeKmeansPlusPlusClusters($nbClusters);
160*3379af09SAndreas Gohr
161*3379af09SAndreas Gohr                break;
162*3379af09SAndreas Gohr
163*3379af09SAndreas Gohr            default:
164*3379af09SAndreas Gohr                return [];
165*3379af09SAndreas Gohr        }
166*3379af09SAndreas Gohr
167*3379af09SAndreas Gohr        // assign all points to the first cluster
168*3379af09SAndreas Gohr        $clusters[0]->attachAll($this);
169*3379af09SAndreas Gohr
170*3379af09SAndreas Gohr        return $clusters;
171*3379af09SAndreas Gohr    }
172*3379af09SAndreas Gohr
173*3379af09SAndreas Gohr    protected function initializeKmeansPlusPlusClusters(int $nbClusters): array
174*3379af09SAndreas Gohr    {
175*3379af09SAndreas Gohr        $clusters = [];
176*3379af09SAndreas Gohr        $clusters[] = new Cluster($this, $this->current()->getCoordinates());
177*3379af09SAndreas Gohr
178*3379af09SAndreas Gohr        for ($i = 1; $i < $nbClusters; ++$i) {
179*3379af09SAndreas Gohr            $sum = 0;
180*3379af09SAndreas Gohr            $distances = [];
181*3379af09SAndreas Gohr            foreach ($this as $point) {
182*3379af09SAndreas Gohr                $distance = $point->getDistanceWith($point->getClosest($clusters), false);
183*3379af09SAndreas Gohr                $distances[] = $distance;
184*3379af09SAndreas Gohr                $sum += $distance;
185*3379af09SAndreas Gohr            }
186*3379af09SAndreas Gohr
187*3379af09SAndreas Gohr            $probabilities = [];
188*3379af09SAndreas Gohr            foreach ($distances as $distance) {
189*3379af09SAndreas Gohr                $probabilities[] = $distance / $sum;
190*3379af09SAndreas Gohr            }
191*3379af09SAndreas Gohr
192*3379af09SAndreas Gohr            $cumulativeProbabilities = array_reduce($probabilities, function ($c, $i) {
193*3379af09SAndreas Gohr                $c[] = end($c) + $i;
194*3379af09SAndreas Gohr                return $c;
195*3379af09SAndreas Gohr            }, []);
196*3379af09SAndreas Gohr
197*3379af09SAndreas Gohr            $rng = static::$rng;
198*3379af09SAndreas Gohr            $rand = $rng() / mt_getrandmax();
199*3379af09SAndreas Gohr            foreach ($cumulativeProbabilities as $j => $cumulativeProbability) {
200*3379af09SAndreas Gohr                if ($rand < $cumulativeProbability) {
201*3379af09SAndreas Gohr                    foreach ($this as $key => $value) {
202*3379af09SAndreas Gohr                        if ($j == $key) {
203*3379af09SAndreas Gohr                            $clusters[] = new Cluster($this, $value->getCoordinates());
204*3379af09SAndreas Gohr                            break;
205*3379af09SAndreas Gohr                        }
206*3379af09SAndreas Gohr                    }
207*3379af09SAndreas Gohr                    break;
208*3379af09SAndreas Gohr                }
209*3379af09SAndreas Gohr            }
210*3379af09SAndreas Gohr        }
211*3379af09SAndreas Gohr
212*3379af09SAndreas Gohr        return $clusters;
213*3379af09SAndreas Gohr    }
214*3379af09SAndreas Gohr
215*3379af09SAndreas Gohr    protected function initializeRandomClusters(int $nbClusters): array
216*3379af09SAndreas Gohr    {
217*3379af09SAndreas Gohr        $clusters = [];
218*3379af09SAndreas Gohr
219*3379af09SAndreas Gohr        // get the space boundaries to avoid placing clusters centroid too far from points
220*3379af09SAndreas Gohr        list($min, $max) = $this->getBoundaries();
221*3379af09SAndreas Gohr
222*3379af09SAndreas Gohr        // initialize N clusters with a random point within space boundaries
223*3379af09SAndreas Gohr        for ($n = 0; $n < $nbClusters; $n++) {
224*3379af09SAndreas Gohr            $clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
225*3379af09SAndreas Gohr        }
226*3379af09SAndreas Gohr        return $clusters;
227*3379af09SAndreas Gohr    }
228*3379af09SAndreas Gohr
229*3379af09SAndreas Gohr    protected function iterate(array $clusters): bool
230*3379af09SAndreas Gohr    {
231*3379af09SAndreas Gohr        $continue = false;
232*3379af09SAndreas Gohr
233*3379af09SAndreas Gohr        // migration storages
234*3379af09SAndreas Gohr        $attach = new \SplObjectStorage();
235*3379af09SAndreas Gohr        $detach = new \SplObjectStorage();
236*3379af09SAndreas Gohr
237*3379af09SAndreas Gohr        // calculate proximity amongst points and clusters
238*3379af09SAndreas Gohr        foreach ($clusters as $cluster) {
239*3379af09SAndreas Gohr            foreach ($cluster as $point) {
240*3379af09SAndreas Gohr                // find the closest cluster
241*3379af09SAndreas Gohr                $closest = $point->getClosest($clusters);
242*3379af09SAndreas Gohr
243*3379af09SAndreas Gohr                // move the point from its old cluster to its closest
244*3379af09SAndreas Gohr                if ($closest !== $cluster) {
245*3379af09SAndreas Gohr                    if (! isset($attach[$closest])) {
246*3379af09SAndreas Gohr                        $attach[$closest] = new \SplObjectStorage();
247*3379af09SAndreas Gohr                    }
248*3379af09SAndreas Gohr
249*3379af09SAndreas Gohr                    if (! isset($detach[$cluster])) {
250*3379af09SAndreas Gohr                        $detach[$cluster] = new \SplObjectStorage();
251*3379af09SAndreas Gohr                    }
252*3379af09SAndreas Gohr
253*3379af09SAndreas Gohr                    $attach[$closest]->attach($point);
254*3379af09SAndreas Gohr                    $detach[$cluster]->attach($point);
255*3379af09SAndreas Gohr
256*3379af09SAndreas Gohr                    $continue = true;
257*3379af09SAndreas Gohr                }
258*3379af09SAndreas Gohr            }
259*3379af09SAndreas Gohr        }
260*3379af09SAndreas Gohr
261*3379af09SAndreas Gohr        // perform points migrations
262*3379af09SAndreas Gohr        foreach ($attach as $cluster) {
263*3379af09SAndreas Gohr            $cluster->attachAll($attach[$cluster]);
264*3379af09SAndreas Gohr        }
265*3379af09SAndreas Gohr
266*3379af09SAndreas Gohr        foreach ($detach as $cluster) {
267*3379af09SAndreas Gohr            $cluster->detachAll($detach[$cluster]);
268*3379af09SAndreas Gohr        }
269*3379af09SAndreas Gohr
270*3379af09SAndreas Gohr        // update all cluster's centroids
271*3379af09SAndreas Gohr        foreach ($clusters as $cluster) {
272*3379af09SAndreas Gohr            $cluster->updateCentroid();
273*3379af09SAndreas Gohr        }
274*3379af09SAndreas Gohr
275*3379af09SAndreas Gohr        return $continue;
276*3379af09SAndreas Gohr    }
277*3379af09SAndreas Gohr}
278