1*3379af09SAndreas Gohr<?php 2*3379af09SAndreas Gohr 3*3379af09SAndreas Gohr/** 4*3379af09SAndreas Gohr * This file is part of PHP K-Means 5*3379af09SAndreas Gohr * 6*3379af09SAndreas Gohr * Copyright (c) 2014 Benjamin Delespierre 7*3379af09SAndreas Gohr * 8*3379af09SAndreas Gohr * Permission is hereby granted, free of charge, to any person obtaining a copy 9*3379af09SAndreas Gohr * of this software and associated documentation files (the "Software"), to deal 10*3379af09SAndreas Gohr * in the Software without restriction, including without limitation the rights 11*3379af09SAndreas Gohr * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12*3379af09SAndreas Gohr * copies of the Software, and to permit persons to whom the Software is furnished 13*3379af09SAndreas Gohr * to do so, subject to the following conditions: 14*3379af09SAndreas Gohr * 15*3379af09SAndreas Gohr * The above copyright notice and this permission notice shall be included in all 16*3379af09SAndreas Gohr * copies or substantial portions of the Software. 17*3379af09SAndreas Gohr * 18*3379af09SAndreas Gohr * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19*3379af09SAndreas Gohr * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20*3379af09SAndreas Gohr * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21*3379af09SAndreas Gohr * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22*3379af09SAndreas Gohr * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23*3379af09SAndreas Gohr * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24*3379af09SAndreas Gohr * THE SOFTWARE. 25*3379af09SAndreas Gohr */ 26*3379af09SAndreas Gohr 27*3379af09SAndreas Gohrnamespace KMeans; 28*3379af09SAndreas Gohr 29*3379af09SAndreas Gohrclass Space extends \SplObjectStorage 30*3379af09SAndreas Gohr{ 31*3379af09SAndreas Gohr protected $dimention; 32*3379af09SAndreas Gohr 33*3379af09SAndreas Gohr protected static $rng = 'mt_rand'; 34*3379af09SAndreas Gohr 35*3379af09SAndreas Gohr public function __construct($dimention) 36*3379af09SAndreas Gohr { 37*3379af09SAndreas Gohr if ($dimention < 1) { 38*3379af09SAndreas Gohr throw new \LogicException("a space dimention cannot be null or negative"); 39*3379af09SAndreas Gohr } 40*3379af09SAndreas Gohr 41*3379af09SAndreas Gohr $this->dimention = $dimention; 42*3379af09SAndreas Gohr } 43*3379af09SAndreas Gohr 44*3379af09SAndreas Gohr public static function setRng(callable $fn): void 45*3379af09SAndreas Gohr { 46*3379af09SAndreas Gohr static::$rng = $fn; 47*3379af09SAndreas Gohr } 48*3379af09SAndreas Gohr 49*3379af09SAndreas Gohr public function toArray(): array 50*3379af09SAndreas Gohr { 51*3379af09SAndreas Gohr $points = []; 52*3379af09SAndreas Gohr foreach ($this as $point) { 53*3379af09SAndreas Gohr $points[] = $point->toArray(); 54*3379af09SAndreas Gohr } 55*3379af09SAndreas Gohr 56*3379af09SAndreas Gohr return ['points' => $points]; 57*3379af09SAndreas Gohr } 58*3379af09SAndreas Gohr 59*3379af09SAndreas Gohr public function newPoint(array $coordinates): Point 60*3379af09SAndreas Gohr { 61*3379af09SAndreas Gohr if (count($coordinates) != $this->dimention) { 62*3379af09SAndreas Gohr throw new \LogicException("(" . implode(',', $coordinates) . ") is not a point of this space"); 63*3379af09SAndreas Gohr } 64*3379af09SAndreas Gohr 65*3379af09SAndreas Gohr return new Point($this, $coordinates); 66*3379af09SAndreas Gohr } 67*3379af09SAndreas Gohr 68*3379af09SAndreas Gohr public function addPoint(array $coordinates, $data = null): Point 69*3379af09SAndreas Gohr { 70*3379af09SAndreas Gohr $this->attach($point = $this->newPoint($coordinates), $data); 71*3379af09SAndreas Gohr 72*3379af09SAndreas Gohr return $point; 73*3379af09SAndreas Gohr } 74*3379af09SAndreas Gohr 75*3379af09SAndreas Gohr public function attach($point, $data = null): void 76*3379af09SAndreas Gohr { 77*3379af09SAndreas Gohr if (!$point instanceof Point) { 78*3379af09SAndreas Gohr throw new \InvalidArgumentException("can only attach points to spaces"); 79*3379af09SAndreas Gohr } 80*3379af09SAndreas Gohr 81*3379af09SAndreas Gohr parent::attach($point, $data); 82*3379af09SAndreas Gohr } 83*3379af09SAndreas Gohr 84*3379af09SAndreas Gohr public function getDimention(): int 85*3379af09SAndreas Gohr { 86*3379af09SAndreas Gohr return $this->dimention; 87*3379af09SAndreas Gohr } 88*3379af09SAndreas Gohr 89*3379af09SAndreas Gohr public function getBoundaries(): array 90*3379af09SAndreas Gohr { 91*3379af09SAndreas Gohr if (!count($this)) { 92*3379af09SAndreas Gohr return []; 93*3379af09SAndreas Gohr } 94*3379af09SAndreas Gohr 95*3379af09SAndreas Gohr $min = $this->newPoint(array_fill(0, $this->dimention, null)); 96*3379af09SAndreas Gohr $max = $this->newPoint(array_fill(0, $this->dimention, null)); 97*3379af09SAndreas Gohr 98*3379af09SAndreas Gohr foreach ($this as $point) { 99*3379af09SAndreas Gohr for ($n = 0; $n < $this->dimention; $n++) { 100*3379af09SAndreas Gohr if ($min[$n] === null || $min[$n] > $point[$n]) { 101*3379af09SAndreas Gohr $min[$n] = $point[$n]; 102*3379af09SAndreas Gohr } 103*3379af09SAndreas Gohr 104*3379af09SAndreas Gohr if ($max[$n] === null || $max[$n] < $point[$n]) { 105*3379af09SAndreas Gohr $max[$n] = $point[$n]; 106*3379af09SAndreas Gohr } 107*3379af09SAndreas Gohr } 108*3379af09SAndreas Gohr } 109*3379af09SAndreas Gohr 110*3379af09SAndreas Gohr return [$min, $max]; 111*3379af09SAndreas Gohr } 112*3379af09SAndreas Gohr 113*3379af09SAndreas Gohr public function getRandomPoint(Point $min, Point $max): Point 114*3379af09SAndreas Gohr { 115*3379af09SAndreas Gohr $point = $this->newPoint(array_fill(0, $this->dimention, null)); 116*3379af09SAndreas Gohr $rng = static::$rng; 117*3379af09SAndreas Gohr 118*3379af09SAndreas Gohr for ($n = 0; $n < $this->dimention; $n++) { 119*3379af09SAndreas Gohr $point[$n] = $rng($min[$n], $max[$n]); 120*3379af09SAndreas Gohr } 121*3379af09SAndreas Gohr 122*3379af09SAndreas Gohr return $point; 123*3379af09SAndreas Gohr } 124*3379af09SAndreas Gohr 125*3379af09SAndreas Gohr public function solve(int $nbClusters, callable $iterationCallback = null, $initMethod = Cluster::INIT_RANDOM): array 126*3379af09SAndreas Gohr { 127*3379af09SAndreas Gohr // initialize K clusters 128*3379af09SAndreas Gohr $clusters = $this->initializeClusters($nbClusters, $initMethod); 129*3379af09SAndreas Gohr 130*3379af09SAndreas Gohr // there's only one cluster, clusterization has no meaning 131*3379af09SAndreas Gohr if (count($clusters) == 1) { 132*3379af09SAndreas Gohr return $clusters; 133*3379af09SAndreas Gohr } 134*3379af09SAndreas Gohr 135*3379af09SAndreas Gohr // until convergence is reached 136*3379af09SAndreas Gohr do { 137*3379af09SAndreas Gohr if ($iterationCallback) { 138*3379af09SAndreas Gohr $iterationCallback($this, $clusters); 139*3379af09SAndreas Gohr } 140*3379af09SAndreas Gohr } while ($this->iterate($clusters)); 141*3379af09SAndreas Gohr 142*3379af09SAndreas Gohr // clustering is done. 143*3379af09SAndreas Gohr return $clusters; 144*3379af09SAndreas Gohr } 145*3379af09SAndreas Gohr 146*3379af09SAndreas Gohr protected function initializeClusters(int $nbClusters, int $initMethod): array 147*3379af09SAndreas Gohr { 148*3379af09SAndreas Gohr if ($nbClusters <= 0) { 149*3379af09SAndreas Gohr throw new \InvalidArgumentException("invalid clusters number"); 150*3379af09SAndreas Gohr } 151*3379af09SAndreas Gohr 152*3379af09SAndreas Gohr switch ($initMethod) { 153*3379af09SAndreas Gohr case Cluster::INIT_RANDOM: 154*3379af09SAndreas Gohr $clusters = $this->initializeRandomClusters($nbClusters); 155*3379af09SAndreas Gohr 156*3379af09SAndreas Gohr break; 157*3379af09SAndreas Gohr 158*3379af09SAndreas Gohr case Cluster::INIT_KMEANS_PLUS_PLUS: 159*3379af09SAndreas Gohr $clusters = $this->initializeKmeansPlusPlusClusters($nbClusters); 160*3379af09SAndreas Gohr 161*3379af09SAndreas Gohr break; 162*3379af09SAndreas Gohr 163*3379af09SAndreas Gohr default: 164*3379af09SAndreas Gohr return []; 165*3379af09SAndreas Gohr } 166*3379af09SAndreas Gohr 167*3379af09SAndreas Gohr // assign all points to the first cluster 168*3379af09SAndreas Gohr $clusters[0]->attachAll($this); 169*3379af09SAndreas Gohr 170*3379af09SAndreas Gohr return $clusters; 171*3379af09SAndreas Gohr } 172*3379af09SAndreas Gohr 173*3379af09SAndreas Gohr protected function initializeKmeansPlusPlusClusters(int $nbClusters): array 174*3379af09SAndreas Gohr { 175*3379af09SAndreas Gohr $clusters = []; 176*3379af09SAndreas Gohr $clusters[] = new Cluster($this, $this->current()->getCoordinates()); 177*3379af09SAndreas Gohr 178*3379af09SAndreas Gohr for ($i = 1; $i < $nbClusters; ++$i) { 179*3379af09SAndreas Gohr $sum = 0; 180*3379af09SAndreas Gohr $distances = []; 181*3379af09SAndreas Gohr foreach ($this as $point) { 182*3379af09SAndreas Gohr $distance = $point->getDistanceWith($point->getClosest($clusters), false); 183*3379af09SAndreas Gohr $distances[] = $distance; 184*3379af09SAndreas Gohr $sum += $distance; 185*3379af09SAndreas Gohr } 186*3379af09SAndreas Gohr 187*3379af09SAndreas Gohr $probabilities = []; 188*3379af09SAndreas Gohr foreach ($distances as $distance) { 189*3379af09SAndreas Gohr $probabilities[] = $distance / $sum; 190*3379af09SAndreas Gohr } 191*3379af09SAndreas Gohr 192*3379af09SAndreas Gohr $cumulativeProbabilities = array_reduce($probabilities, function ($c, $i) { 193*3379af09SAndreas Gohr $c[] = end($c) + $i; 194*3379af09SAndreas Gohr return $c; 195*3379af09SAndreas Gohr }, []); 196*3379af09SAndreas Gohr 197*3379af09SAndreas Gohr $rng = static::$rng; 198*3379af09SAndreas Gohr $rand = $rng() / mt_getrandmax(); 199*3379af09SAndreas Gohr foreach ($cumulativeProbabilities as $j => $cumulativeProbability) { 200*3379af09SAndreas Gohr if ($rand < $cumulativeProbability) { 201*3379af09SAndreas Gohr foreach ($this as $key => $value) { 202*3379af09SAndreas Gohr if ($j == $key) { 203*3379af09SAndreas Gohr $clusters[] = new Cluster($this, $value->getCoordinates()); 204*3379af09SAndreas Gohr break; 205*3379af09SAndreas Gohr } 206*3379af09SAndreas Gohr } 207*3379af09SAndreas Gohr break; 208*3379af09SAndreas Gohr } 209*3379af09SAndreas Gohr } 210*3379af09SAndreas Gohr } 211*3379af09SAndreas Gohr 212*3379af09SAndreas Gohr return $clusters; 213*3379af09SAndreas Gohr } 214*3379af09SAndreas Gohr 215*3379af09SAndreas Gohr protected function initializeRandomClusters(int $nbClusters): array 216*3379af09SAndreas Gohr { 217*3379af09SAndreas Gohr $clusters = []; 218*3379af09SAndreas Gohr 219*3379af09SAndreas Gohr // get the space boundaries to avoid placing clusters centroid too far from points 220*3379af09SAndreas Gohr list($min, $max) = $this->getBoundaries(); 221*3379af09SAndreas Gohr 222*3379af09SAndreas Gohr // initialize N clusters with a random point within space boundaries 223*3379af09SAndreas Gohr for ($n = 0; $n < $nbClusters; $n++) { 224*3379af09SAndreas Gohr $clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates()); 225*3379af09SAndreas Gohr } 226*3379af09SAndreas Gohr return $clusters; 227*3379af09SAndreas Gohr } 228*3379af09SAndreas Gohr 229*3379af09SAndreas Gohr protected function iterate(array $clusters): bool 230*3379af09SAndreas Gohr { 231*3379af09SAndreas Gohr $continue = false; 232*3379af09SAndreas Gohr 233*3379af09SAndreas Gohr // migration storages 234*3379af09SAndreas Gohr $attach = new \SplObjectStorage(); 235*3379af09SAndreas Gohr $detach = new \SplObjectStorage(); 236*3379af09SAndreas Gohr 237*3379af09SAndreas Gohr // calculate proximity amongst points and clusters 238*3379af09SAndreas Gohr foreach ($clusters as $cluster) { 239*3379af09SAndreas Gohr foreach ($cluster as $point) { 240*3379af09SAndreas Gohr // find the closest cluster 241*3379af09SAndreas Gohr $closest = $point->getClosest($clusters); 242*3379af09SAndreas Gohr 243*3379af09SAndreas Gohr // move the point from its old cluster to its closest 244*3379af09SAndreas Gohr if ($closest !== $cluster) { 245*3379af09SAndreas Gohr if (! isset($attach[$closest])) { 246*3379af09SAndreas Gohr $attach[$closest] = new \SplObjectStorage(); 247*3379af09SAndreas Gohr } 248*3379af09SAndreas Gohr 249*3379af09SAndreas Gohr if (! isset($detach[$cluster])) { 250*3379af09SAndreas Gohr $detach[$cluster] = new \SplObjectStorage(); 251*3379af09SAndreas Gohr } 252*3379af09SAndreas Gohr 253*3379af09SAndreas Gohr $attach[$closest]->attach($point); 254*3379af09SAndreas Gohr $detach[$cluster]->attach($point); 255*3379af09SAndreas Gohr 256*3379af09SAndreas Gohr $continue = true; 257*3379af09SAndreas Gohr } 258*3379af09SAndreas Gohr } 259*3379af09SAndreas Gohr } 260*3379af09SAndreas Gohr 261*3379af09SAndreas Gohr // perform points migrations 262*3379af09SAndreas Gohr foreach ($attach as $cluster) { 263*3379af09SAndreas Gohr $cluster->attachAll($attach[$cluster]); 264*3379af09SAndreas Gohr } 265*3379af09SAndreas Gohr 266*3379af09SAndreas Gohr foreach ($detach as $cluster) { 267*3379af09SAndreas Gohr $cluster->detachAll($detach[$cluster]); 268*3379af09SAndreas Gohr } 269*3379af09SAndreas Gohr 270*3379af09SAndreas Gohr // update all cluster's centroids 271*3379af09SAndreas Gohr foreach ($clusters as $cluster) { 272*3379af09SAndreas Gohr $cluster->updateCentroid(); 273*3379af09SAndreas Gohr } 274*3379af09SAndreas Gohr 275*3379af09SAndreas Gohr return $continue; 276*3379af09SAndreas Gohr } 277*3379af09SAndreas Gohr} 278