#region License Information /* HeuristicLab * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using HeuristicLab.Core; using HeuristicLab.Encodings.PermutationEncoding; namespace HeuristicLab.Problems.VehicleRouting.Encodings.Potvin { public class KMeansAlgorithm where TData : class { private class ClusterInfo { public TData Mean; public double Variance; public int Size; } private readonly Func, TData> meanCalculator; private readonly Func distanceCalculator; public KMeansAlgorithm(Func, TData> meanCalculator, Func distanceCalculator) { this.meanCalculator = meanCalculator; this.distanceCalculator = distanceCalculator; } public List> Run(List data, int k, double changeThreshold, IRandom random) { int numClusters = Math.Min(k, data.Count); var assignments = new int[data.Count]; for (int i = 0; i < assignments.Length; i++) assignments[i] = -1; // unassigned // (1) initialize each cluster with a random element as mean var clusters = new ClusterInfo[numClusters]; var initIndices = new Permutation(PermutationTypes.Absolute, data.Count, random); for (int c = 0; c < numClusters; c++) { assignments[initIndices[c]] = c; clusters[c] = new ClusterInfo { Mean = data[initIndices[c]], Size = 1 }; } // (2) repeat clustering until change rate is below the threshold double changeRate; do { int changes = Iterate(data, assignments, clusters); changeRate = (double)changes / data.Count; } while (changeRate > changeThreshold); // (3) return non-empty clusters var clustersData = new List>(numClusters); for (int c = 0; c < numClusters; c++) clustersData.Add(new List()); for (int i = 0; i < assignments.Length; i++) clustersData[assignments[i]].Add(i); clustersData.RemoveAll(c => c.Count == 0); return clustersData; } private int Iterate(List data, int[] assignments, ClusterInfo[] clusters) { int changes = 0; var newAssignments = new int[data.Count]; assignments.CopyTo(newAssignments, 0); // assign elements to currently most suited cluster for (int i = 0; i < data.Count; i++) { int bestCluster = 0; double bestImpact = CalculateImpact(data[i], clusters[0]); for (int c = 1; c < clusters.Length; c++) { double impact = CalculateImpact(data[i], clusters[c]); if (impact < bestImpact) { bestImpact = impact; bestCluster = c; } } newAssignments[i] = bestCluster; if (newAssignments[i] != assignments[i]) changes++; } // update clusters var clustersData = new List>(clusters.Length); for (int c = 0; c < clusters.Length; c++) clustersData.Add(new List()); for (int i = 0; i < data.Count; i++) { assignments[i] = newAssignments[i]; clustersData[assignments[i]].Add(data[i]); } for (int c = 0; c < clusters.Length; c++) { var clusterData = clustersData[c]; if (clusterData.Count == 0) continue; clusters[c].Mean = meanCalculator(clusterData); clusters[c].Variance = 0; foreach (var e in clusterData) clusters[c].Variance += Math.Pow(distanceCalculator(e, clusters[c].Mean), 2); clusters[c].Variance /= clusterData.Count; clusters[c].Size = clusterData.Count; } return changes; } private double CalculateImpact(TData datum, ClusterInfo cluster) { double newVariance = (cluster.Variance * cluster.Size + Math.Pow(distanceCalculator(datum, cluster.Mean), 2)) / (cluster.Size + 1); return newVariance - cluster.Variance; } } }