1<?php
2
3/**
4 * This file is part of PHP K-Means
5 *
6 * Copyright (c) 2014 Benjamin Delespierre
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is furnished
13 * to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in all
16 * copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
27namespace KMeans;
28
29class Space extends \SplObjectStorage
30{
31    protected $dimention;
32
33    protected static $rng = 'mt_rand';
34
35    public function __construct($dimention)
36    {
37        if ($dimention < 1) {
38            throw new \LogicException("a space dimention cannot be null or negative");
39        }
40
41        $this->dimention = $dimention;
42    }
43
44    public static function setRng(callable $fn): void
45    {
46        static::$rng = $fn;
47    }
48
49    public function toArray(): array
50    {
51        $points = [];
52        foreach ($this as $point) {
53            $points[] = $point->toArray();
54        }
55
56        return ['points' => $points];
57    }
58
59    public function newPoint(array $coordinates): Point
60    {
61        if (count($coordinates) != $this->dimention) {
62            throw new \LogicException("(" . implode(',', $coordinates) . ") is not a point of this space");
63        }
64
65        return new Point($this, $coordinates);
66    }
67
68    public function addPoint(array $coordinates, $data = null): Point
69    {
70        $this->attach($point = $this->newPoint($coordinates), $data);
71
72        return $point;
73    }
74
75    public function attach($point, $data = null): void
76    {
77        if (!$point instanceof Point) {
78            throw new \InvalidArgumentException("can only attach points to spaces");
79        }
80
81        parent::attach($point, $data);
82    }
83
84    public function getDimention(): int
85    {
86        return $this->dimention;
87    }
88
89    public function getBoundaries(): array
90    {
91        if (!count($this)) {
92            return [];
93        }
94
95        $min = $this->newPoint(array_fill(0, $this->dimention, null));
96        $max = $this->newPoint(array_fill(0, $this->dimention, null));
97
98        foreach ($this as $point) {
99            for ($n = 0; $n < $this->dimention; $n++) {
100                if ($min[$n] === null || $min[$n] > $point[$n]) {
101                    $min[$n] = $point[$n];
102                }
103
104                if ($max[$n] === null || $max[$n] < $point[$n]) {
105                    $max[$n] = $point[$n];
106                }
107            }
108        }
109
110        return [$min, $max];
111    }
112
113    public function getRandomPoint(Point $min, Point $max): Point
114    {
115        $point = $this->newPoint(array_fill(0, $this->dimention, null));
116        $rng = static::$rng;
117
118        for ($n = 0; $n < $this->dimention; $n++) {
119            $point[$n] = $rng($min[$n], $max[$n]);
120        }
121
122        return $point;
123    }
124
125    public function solve(int $nbClusters, callable $iterationCallback = null, $initMethod = Cluster::INIT_RANDOM): array
126    {
127        // initialize K clusters
128        $clusters = $this->initializeClusters($nbClusters, $initMethod);
129
130        // there's only one cluster, clusterization has no meaning
131        if (count($clusters) == 1) {
132            return $clusters;
133        }
134
135        // until convergence is reached
136        do {
137            if ($iterationCallback) {
138                $iterationCallback($this, $clusters);
139            }
140        } while ($this->iterate($clusters));
141
142        // clustering is done.
143        return $clusters;
144    }
145
146    protected function initializeClusters(int $nbClusters, int $initMethod): array
147    {
148        if ($nbClusters <= 0) {
149            throw new \InvalidArgumentException("invalid clusters number");
150        }
151
152        switch ($initMethod) {
153            case Cluster::INIT_RANDOM:
154                $clusters = $this->initializeRandomClusters($nbClusters);
155
156                break;
157
158            case Cluster::INIT_KMEANS_PLUS_PLUS:
159                $clusters = $this->initializeKmeansPlusPlusClusters($nbClusters);
160
161                break;
162
163            default:
164                return [];
165        }
166
167        // assign all points to the first cluster
168        $clusters[0]->attachAll($this);
169
170        return $clusters;
171    }
172
173    protected function initializeKmeansPlusPlusClusters(int $nbClusters): array
174    {
175        $clusters = [];
176        $clusters[] = new Cluster($this, $this->current()->getCoordinates());
177
178        for ($i = 1; $i < $nbClusters; ++$i) {
179            $sum = 0;
180            $distances = [];
181            foreach ($this as $point) {
182                $distance = $point->getDistanceWith($point->getClosest($clusters), false);
183                $distances[] = $distance;
184                $sum += $distance;
185            }
186
187            $probabilities = [];
188            foreach ($distances as $distance) {
189                $probabilities[] = $distance / $sum;
190            }
191
192            $cumulativeProbabilities = array_reduce($probabilities, function ($c, $i) {
193                $c[] = end($c) + $i;
194                return $c;
195            }, []);
196
197            $rng = static::$rng;
198            $rand = $rng() / mt_getrandmax();
199            foreach ($cumulativeProbabilities as $j => $cumulativeProbability) {
200                if ($rand < $cumulativeProbability) {
201                    foreach ($this as $key => $value) {
202                        if ($j == $key) {
203                            $clusters[] = new Cluster($this, $value->getCoordinates());
204                            break;
205                        }
206                    }
207                    break;
208                }
209            }
210        }
211
212        return $clusters;
213    }
214
215    protected function initializeRandomClusters(int $nbClusters): array
216    {
217        $clusters = [];
218
219        // get the space boundaries to avoid placing clusters centroid too far from points
220        list($min, $max) = $this->getBoundaries();
221
222        // initialize N clusters with a random point within space boundaries
223        for ($n = 0; $n < $nbClusters; $n++) {
224            $clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
225        }
226        return $clusters;
227    }
228
229    protected function iterate(array $clusters): bool
230    {
231        $continue = false;
232
233        // migration storages
234        $attach = new \SplObjectStorage();
235        $detach = new \SplObjectStorage();
236
237        // calculate proximity amongst points and clusters
238        foreach ($clusters as $cluster) {
239            foreach ($cluster as $point) {
240                // find the closest cluster
241                $closest = $point->getClosest($clusters);
242
243                // move the point from its old cluster to its closest
244                if ($closest !== $cluster) {
245                    if (! isset($attach[$closest])) {
246                        $attach[$closest] = new \SplObjectStorage();
247                    }
248
249                    if (! isset($detach[$cluster])) {
250                        $detach[$cluster] = new \SplObjectStorage();
251                    }
252
253                    $attach[$closest]->attach($point);
254                    $detach[$cluster]->attach($point);
255
256                    $continue = true;
257                }
258            }
259        }
260
261        // perform points migrations
262        foreach ($attach as $cluster) {
263            $cluster->attachAll($attach[$cluster]);
264        }
265
266        foreach ($detach as $cluster) {
267            $cluster->detachAll($detach[$cluster]);
268        }
269
270        // update all cluster's centroids
271        foreach ($clusters as $cluster) {
272            $cluster->updateCentroid();
273        }
274
275        return $continue;
276    }
277}
278