#region License Information
/* HeuristicLab
* Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using HeuristicLab.Core;
using HeuristicLab.Encodings.PermutationEncoding;
namespace HeuristicLab.Problems.VehicleRouting.Encodings.Potvin {
public class KMeansAlgorithm where TData : class {
private class ClusterInfo {
public TData Mean;
public double Variance;
public int Size;
}
private readonly Func, TData> meanCalculator;
private readonly Func distanceCalculator;
public KMeansAlgorithm(Func, TData> meanCalculator, Func distanceCalculator) {
this.meanCalculator = meanCalculator;
this.distanceCalculator = distanceCalculator;
}
public List> Run(List data, int k, double changeThreshold, IRandom random) {
int numClusters = Math.Min(k, data.Count);
var assignments = new int[data.Count];
for (int i = 0; i < assignments.Length; i++)
assignments[i] = -1; // unassigned
// (1) initialize each cluster with a random element as mean
var clusters = new ClusterInfo[numClusters];
var initIndices = new Permutation(PermutationTypes.Absolute, data.Count, random);
for (int c = 0; c < numClusters; c++) {
assignments[initIndices[c]] = c;
clusters[c] = new ClusterInfo {
Mean = data[initIndices[c]],
Size = 1
};
}
// (2) repeat clustering until change rate is below the threshold
double changeRate;
do {
int changes = Iterate(data, assignments, clusters);
changeRate = (double)changes / data.Count;
} while (changeRate > changeThreshold);
// (3) return non-empty clusters
var clustersData = new List>(numClusters);
for (int c = 0; c < numClusters; c++)
clustersData.Add(new List());
for (int i = 0; i < assignments.Length; i++)
clustersData[assignments[i]].Add(i);
clustersData.RemoveAll(c => c.Count == 0);
return clustersData;
}
private int Iterate(List data, int[] assignments, ClusterInfo[] clusters) {
int changes = 0;
var newAssignments = new int[data.Count];
assignments.CopyTo(newAssignments, 0);
// assign elements to currently most suited cluster
for (int i = 0; i < data.Count; i++) {
int bestCluster = 0;
double bestImpact = CalculateImpact(data[i], clusters[0]);
for (int c = 1; c < clusters.Length; c++) {
double impact = CalculateImpact(data[i], clusters[c]);
if (impact < bestImpact) {
bestImpact = impact;
bestCluster = c;
}
}
newAssignments[i] = bestCluster;
if (newAssignments[i] != assignments[i])
changes++;
}
// update clusters
var clustersData = new List>(clusters.Length);
for (int c = 0; c < clusters.Length; c++)
clustersData.Add(new List());
for (int i = 0; i < data.Count; i++) {
assignments[i] = newAssignments[i];
clustersData[assignments[i]].Add(data[i]);
}
for (int c = 0; c < clusters.Length; c++) {
var clusterData = clustersData[c];
if (clusterData.Count == 0)
continue;
clusters[c].Mean = meanCalculator(clusterData);
clusters[c].Variance = 0;
foreach (var e in clusterData)
clusters[c].Variance += Math.Pow(distanceCalculator(e, clusters[c].Mean), 2);
clusters[c].Variance /= clusterData.Count;
clusters[c].Size = clusterData.Count;
}
return changes;
}
private double CalculateImpact(TData datum, ClusterInfo cluster) {
double newVariance = (cluster.Variance * cluster.Size + Math.Pow(distanceCalculator(datum, cluster.Mean), 2)) / (cluster.Size + 1);
return newVariance - cluster.Variance;
}
}
}