Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2707_HeuristicLab.VRPEnhancements/HeuristicLab.Problems.VehicleRouting/3.4/Encodings/Potvin/Creators/Cluster.cs @ 15762

Last change on this file since 15762 was 14559, checked in by pfleck, 8 years ago

#2707 Simplified k-means clustering for ClusterCreators.

File size: 4.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using HeuristicLab.Core;
25using HeuristicLab.Encodings.PermutationEncoding;
26
27namespace HeuristicLab.Problems.VehicleRouting.Encodings.Potvin {
28
29  public class KMeansAlgorithm<TData> where TData : class {
30
31    private class ClusterInfo {
32      public TData Mean;
33      public double Variance;
34      public int Size;
35    }
36
37    private readonly Func<List<TData>, TData> meanCalculator;
38    private readonly Func<TData, TData, double> distanceCalculator;
39
40    public KMeansAlgorithm(Func<List<TData>, TData> meanCalculator, Func<TData, TData, double> distanceCalculator) {
41      this.meanCalculator = meanCalculator;
42      this.distanceCalculator = distanceCalculator;
43    }
44
45    public List<List<int>> Run(List<TData> data, int k, double changeThreshold, IRandom random) {
46      int numClusters = Math.Min(k, data.Count);
47
48      var assignments = new int[data.Count];
49      for (int i = 0; i < assignments.Length; i++)
50        assignments[i] = -1; // unassigned
51
52      // (1) initialize each cluster with a random element as mean
53      var clusters = new ClusterInfo[numClusters];
54      var initIndices = new Permutation(PermutationTypes.Absolute, data.Count, random);
55      for (int c = 0; c < numClusters; c++) {
56        assignments[initIndices[c]] = c;
57        clusters[c] = new ClusterInfo {
58          Mean = data[initIndices[c]],
59          Size = 1
60        };
61      }
62
63      // (2) repeat clustering until change rate is below the threshold
64      double changeRate;
65      do {
66        int changes = Iterate(data, assignments, clusters);
67        changeRate = (double)changes / data.Count;
68      } while (changeRate > changeThreshold);
69
70      // (3) return non-empty clusters
71      var clustersData = new List<List<int>>(numClusters);
72      for (int c = 0; c < numClusters; c++)
73        clustersData.Add(new List<int>());
74      for (int i = 0; i < assignments.Length; i++)
75        clustersData[assignments[i]].Add(i);
76      clustersData.RemoveAll(c => c.Count == 0);
77      return clustersData;
78    }
79
80    private int Iterate(List<TData> data, int[] assignments, ClusterInfo[] clusters) {
81      int changes = 0;
82
83      var newAssignments = new int[data.Count];
84      assignments.CopyTo(newAssignments, 0);
85
86      // assign elements to currently most suited cluster
87      for (int i = 0; i < data.Count; i++) {
88        int bestCluster = 0;
89        double bestImpact = CalculateImpact(data[i], clusters[0]);
90        for (int c = 1; c < clusters.Length; c++) {
91          double impact = CalculateImpact(data[i], clusters[c]);
92          if (impact < bestImpact) {
93            bestImpact = impact;
94            bestCluster = c;
95          }
96        }
97        newAssignments[i] = bestCluster;
98        if (newAssignments[i] != assignments[i])
99          changes++;
100      }
101
102      // update clusters
103      var clustersData = new List<List<TData>>(clusters.Length);
104      for (int c = 0; c < clusters.Length; c++)
105        clustersData.Add(new List<TData>());
106      for (int i = 0; i < data.Count; i++) {
107        assignments[i] = newAssignments[i];
108        clustersData[assignments[i]].Add(data[i]);
109      }
110      for (int c = 0; c < clusters.Length; c++) {
111        var clusterData = clustersData[c];
112        if (clusterData.Count == 0)
113          continue;
114
115        clusters[c].Mean = meanCalculator(clusterData);
116
117        clusters[c].Variance = 0;
118        foreach (var e in clusterData)
119          clusters[c].Variance += Math.Pow(distanceCalculator(e, clusters[c].Mean), 2);
120        clusters[c].Variance /= clusterData.Count;
121
122        clusters[c].Size = clusterData.Count;
123      }
124
125      return changes;
126    }
127
128    private double CalculateImpact(TData datum, ClusterInfo cluster) {
129      double newVariance = (cluster.Variance * cluster.Size + Math.Pow(distanceCalculator(datum, cluster.Mean), 2)) / (cluster.Size + 1);
130      return newVariance - cluster.Variance;
131    }
132  }
133}
Note: See TracBrowser for help on using the repository browser.