Changeset 14559


Ignore:
Timestamp:
01/12/17 16:42:50 (3 years ago)
Author:
pfleck
Message:

#2707 Simplified k-means clustering for ClusterCreators.

Location:
branches/HeuristicLab.VRPEnhancements/HeuristicLab.Problems.VehicleRouting/3.4/Encodings/Potvin/Creators
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.VRPEnhancements/HeuristicLab.Problems.VehicleRouting/3.4/Encodings/Potvin/Creators/Cluster.cs

    r14447 r14559  
    1 using System;
    2 using System.Collections;
     1#region License Information
     2/* HeuristicLab
     3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     4 *
     5 * This file is part of HeuristicLab.
     6 *
     7 * HeuristicLab is free software: you can redistribute it and/or modify
     8 * it under the terms of the GNU General Public License as published by
     9 * the Free Software Foundation, either version 3 of the License, or
     10 * (at your option) any later version.
     11 *
     12 * HeuristicLab is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 * GNU General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU General Public License
     18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
     19 */
     20#endregion
     21
     22using System;
    323using System.Collections.Generic;
    424using HeuristicLab.Core;
    5 using HeuristicLab.Problems.VehicleRouting.Interfaces;
     25using HeuristicLab.Encodings.PermutationEncoding;
    626
    727namespace HeuristicLab.Problems.VehicleRouting.Encodings.Potvin {
    828
    9   public class ClusterAlgorithm<TCluster,TClusterElement>
    10     where TCluster : Cluster<TClusterElement>, new()
    11     where TClusterElement : ClusterElement, new() {
     29  public class KMeansAlgorithm<TData> where TData : class {
    1230
    13     public static List<TCluster> KMeans(IRandom random, List<TClusterElement> clusterElements,
    14       int k, double changeThreshold) {
    15       HashSet<int> initMeans = new HashSet<int>();
    16       int nextMean = -1;
    17       List<TCluster> clusters = CreateCList();
     31    private class ClusterInfo {
     32      public TData Mean;
     33      public double Variance;
     34      public int Size;
     35    }
     36
     37    private readonly Func<List<TData>, TData> meanCalculator;
     38    private readonly Func<TData, TData, double> distanceCalculator;
     39
     40    public KMeansAlgorithm(Func<List<TData>, TData> meanCalculator, Func<TData, TData, double> distanceCalculator) {
     41      this.meanCalculator = meanCalculator;
     42      this.distanceCalculator = distanceCalculator;
     43    }
     44
     45    public List<List<int>> Run(List<TData> data, int k, double changeThreshold, IRandom random) {
     46      int numClusters = Math.Min(k, data.Count);
     47
     48      var assignments = new int[data.Count];
     49      for (int i = 0; i < assignments.Length; i++)
     50        assignments[i] = -1; // unassigned
    1851
    1952      // (1) initialize each cluster with a random element as mean
    20       for (int i = 0; i < k && i < clusterElements.Count; i++) {
    21         TCluster cluster = new TCluster();
    22         cluster.Id = i;
    23 
    24         do {
    25           nextMean = random.Next(0, clusterElements.Count);
    26         } while (initMeans.Contains(nextMean));
    27         initMeans.Add(nextMean);
    28         cluster.SetMean(clusterElements[nextMean]);
    29         clusters.Add(cluster);
     53      var clusters = new ClusterInfo[numClusters];
     54      var initIndices = new Permutation(PermutationTypes.Absolute, data.Count, random);
     55      for (int c = 0; c < numClusters; c++) {
     56        assignments[initIndices[c]] = c;
     57        clusters[c] = new ClusterInfo {
     58          Mean = data[initIndices[c]],
     59          Size = 1
     60        };
    3061      }
    3162
    32       // (2) repeat clustering until change rate is below threshold
    33       double changeRate = 0.0;
     63      // (2) repeat clustering until change rate is below the threshold
     64      double changeRate;
    3465      do {
    35         int changes = KMeansRun(clusters, clusterElements);
    36         changeRate = (double)changes / clusterElements.Count;
     66        int changes = Iterate(data, assignments, clusters);
     67        changeRate = (double)changes / data.Count;
    3768      } while (changeRate > changeThreshold);
    3869
    39 
    40       // remove empty clusters
    41       clusters.RemoveAll(c => c.Elements.Count.Equals(0));
    42       return clusters;
     70      // (3) return non-empty clusters
     71      var clustersData = new List<List<int>>(numClusters);
     72      for (int c = 0; c < numClusters; c++)
     73        clustersData.Add(new List<int>());
     74      for (int i = 0; i < assignments.Length; i++)
     75        clustersData[assignments[i]].Add(i);
     76      clustersData.RemoveAll(c => c.Count == 0);
     77      return clustersData;
    4378    }
    4479
    45     private static int KMeansRun(List<TCluster> clusters, List<TClusterElement> clusterElements) {
     80    private int Iterate(List<TData> data, int[] assignments, ClusterInfo[] clusters) {
    4681      int changes = 0;
    4782
    48       // clear clusters from previous assigned elements
    49       foreach (var c in clusters) {
    50         c.Elements.Clear();
     83      var newAssignments = new int[data.Count];
     84      assignments.CopyTo(newAssignments, 0);
     85
     86      // assign elements to currently most suited cluster
     87      for (int i = 0; i < data.Count; i++) {
     88        int bestCluster = 0;
     89        double bestImpact = CalculateImpact(data[i], clusters[0]);
     90        for (int c = 1; c < clusters.Length; c++) {
     91          double impact = CalculateImpact(data[i], clusters[c]);
     92          if (impact < bestImpact) {
     93            bestImpact = impact;
     94            bestCluster = c;
     95          }
     96        }
     97        newAssignments[i] = bestCluster;
     98        if (newAssignments[i] != assignments[i])
     99          changes++;
    51100      }
    52101
    53       // assign elements to currently most suitable clusters
    54       foreach (var e in clusterElements) {
    55         int optClusterIdx = 0;
    56         double optImpact = clusters[optClusterIdx].CalculateImpact(e);
    57         for (int i = 1; i < clusters.Count; i++) {
    58           double impact = clusters[i].CalculateImpact(e);
    59           if (impact < optImpact) {
    60             optImpact = impact;
    61             optClusterIdx = i;
    62           }
    63         }
    64         if (clusters[optClusterIdx].AddElement(e)) {
    65           changes++;
    66         }
     102      // update clusters
     103      var clustersData = new List<List<TData>>(clusters.Length);
     104      for (int c = 0; c < clusters.Length; c++)
     105        clustersData.Add(new List<TData>());
     106      for (int i = 0; i < data.Count; i++) {
     107        assignments[i] = newAssignments[i];
     108        clustersData[assignments[i]].Add(data[i]);
    67109      }
     110      for (int c = 0; c < clusters.Length; c++) {
     111        var clusterData = clustersData[c];
     112        if (clusterData.Count == 0)
     113          continue;
    68114
    69       // update mean and variance
    70       foreach (var c in clusters) {
    71         c.CalculateMean();
    72         c.CalculateVariance();
     115        clusters[c].Mean = meanCalculator(clusterData);
     116
     117        clusters[c].Variance = 0;
     118        foreach (var e in clusterData)
     119          clusters[c].Variance += Math.Pow(distanceCalculator(e, clusters[c].Mean), 2);
     120        clusters[c].Variance /= clusterData.Count;
     121
     122        clusters[c].Size = clusterData.Count;
    73123      }
    74124
     
    76126    }
    77127
    78     private static List<TCluster> CreateCList() {
    79       var listType = typeof(List<>);
    80       var constructedListType = listType.MakeGenericType(typeof(TCluster));
    81       return (List<TCluster>)Activator.CreateInstance(constructedListType);
     128    private double CalculateImpact(TData datum, ClusterInfo cluster) {
     129      double newVariance = (cluster.Variance * cluster.Size + Math.Pow(distanceCalculator(datum, cluster.Mean), 2)) / (cluster.Size + 1);
     130      return newVariance - cluster.Variance;
    82131    }
    83132  }
    84 
    85   #region Cluster
    86   public interface ICluster<T> where T : ClusterElement {
    87     void SetMean(T o);
    88     bool AddElement(T o);
    89     void CalculateMean();
    90     void CalculateVariance();
    91     double CalculateImpact(T e);
    92     double CalculateDistance(T e1, T e2);
    93   }
    94   public abstract class Cluster<T> : ICluster<T> where T : ClusterElement, new() {
    95     public int Id;
    96     public List<T> Elements { get; private set; }
    97     public T Mean { get; set; }
    98     public double Variance { get; set; }
    99     protected Cluster() {
    100       Elements = new List<T>();
    101     }
    102     protected Cluster(int id) {
    103       Id = id;
    104       Elements = new List<T>();
    105     }
    106     public bool AddElement(T e) {
    107       Elements.Add(e);
    108 
    109       bool clusterChanged = e.ClusterId != Id;
    110       e.ClusterId = Id;
    111       return clusterChanged;
    112     }
    113     public void SetMean(T e) {
    114       Mean = e;
    115       Mean.ClusterId = Id;
    116     }
    117     public abstract void CalculateMean();
    118     public abstract double CalculateDistance(T e1, T e2);
    119     public virtual void CalculateVariance() {
    120       if (Mean == null)
    121         CalculateMean();
    122 
    123       Variance = 0.0;
    124       foreach (T e in Elements) {
    125         Variance += Math.Pow(CalculateDistance(Mean, e), 2);
    126       }
    127       Variance /= Elements.Count;
    128     }
    129     public virtual double CalculateImpact(T e) {
    130       if (Mean == null)
    131         CalculateMean();
    132 
    133       double newVariance = (Variance * Elements.Count + Math.Pow(CalculateDistance(Mean, e), 2)) / (Elements.Count + 1);
    134       return newVariance - Variance;
    135     }
    136   }
    137   public class SpatialDistanceCluster : Cluster<SpatialDistanceClusterElement> {
    138     public SpatialDistanceCluster() : base() {
    139     }
    140 
    141     public SpatialDistanceCluster(int id) : base(id) {
    142     }
    143     public override void CalculateMean() {
    144       int dimensions = (Mean != null) ? Mean.Coordinates.Length : (Elements.Count > 0) ? Elements[0].Coordinates.Length : 0;
    145       SpatialDistanceClusterElement mean = new SpatialDistanceClusterElement(-1, dimensions, Id);
    146       foreach (SpatialDistanceClusterElement e in Elements) {
    147         for (int i = 0; i < Mean.Coordinates.Length; i++) {
    148           mean.Coordinates[i] += (e.Coordinates[i] / Elements.Count);
    149         }
    150       }
    151       Mean = mean;
    152     }
    153     public override double CalculateDistance(SpatialDistanceClusterElement e1, SpatialDistanceClusterElement e2) {
    154       if (!e1.Coordinates.Length.Equals(e2.Coordinates.Length)) {
    155         throw new ArgumentException("Distance could not be calculated since number of dimensions is unequal.");
    156       }
    157 
    158       double distance = 0.0;
    159       for (int i = 0; i < e1.Coordinates.Length; i++) {
    160         distance += Math.Pow(e1.Coordinates[i] - e2.Coordinates[i], 2);
    161       }
    162       return Math.Sqrt(distance);
    163     }
    164   }
    165   public class TemporalDistanceCluster : Cluster<TemporalDistanceClusterElement> {
    166     public TemporalDistanceCluster() : base() {
    167     }
    168     public TemporalDistanceCluster(int id) : base(id) {
    169     }
    170     public override void CalculateMean() {
    171       TemporalDistanceClusterElement mean = new TemporalDistanceClusterElement(-1, Id);
    172       foreach (TemporalDistanceClusterElement e in Elements) {
    173         mean.ReadyTime += e.ReadyTime/Elements.Count;
    174         mean.DueTime += e.DueTime/Elements.Count;
    175       }
    176       Mean = mean;
    177     }
    178     public override double CalculateDistance(TemporalDistanceClusterElement e1, TemporalDistanceClusterElement e2) {
    179       double distance = 0.0;
    180 
    181       distance += Math.Pow(e1.ReadyTime - e2.ReadyTime, 2);
    182       distance += Math.Pow(e1.DueTime - e2.DueTime, 2);
    183 
    184       return Math.Sqrt(distance);
    185     }
    186   }
    187   #endregion Cluster
    188 
    189   #region ClusterElement
    190   public abstract class ClusterElement {
    191     public int Id { get; set; }
    192     public int ClusterId { get; set; }
    193     protected ClusterElement() {
    194       ClusterId = -1;
    195     }
    196     public ClusterElement(int id, int clusterId = -1) {
    197       Id = id;
    198       ClusterId = clusterId;
    199     }
    200   }
    201   public class SpatialDistanceClusterElement : ClusterElement {
    202     public double[] Coordinates { get; set; }
    203     public SpatialDistanceClusterElement() : base() {
    204     }
    205     public SpatialDistanceClusterElement(int id, int clusterId = -1) : base(id, clusterId) {
    206     }
    207     public SpatialDistanceClusterElement(int id, double[] coordinates, int clusterId = -1) : base(id, clusterId) {
    208       Coordinates = coordinates;
    209     }
    210     public SpatialDistanceClusterElement(int id, int dimensions, int clusterId = -1) : base(id, clusterId) {
    211       Coordinates = new double[dimensions];
    212     }
    213   }
    214   public class TemporalDistanceClusterElement : ClusterElement {
    215     public double ReadyTime { get; set; }
    216     public double DueTime { get; set; }
    217     public TemporalDistanceClusterElement() : base() {
    218     }
    219     public TemporalDistanceClusterElement(int id, int clusterId = -1) : base(id, clusterId) {
    220     }
    221     public TemporalDistanceClusterElement(int id, double readyTime, double dueTime, int clusterId = -1) : base(id, clusterId) {
    222       ReadyTime = readyTime;
    223       DueTime = dueTime;
    224     }
    225   }
    226 
    227   #endregion ClusterElement
    228133}
  • branches/HeuristicLab.VRPEnhancements/HeuristicLab.Problems.VehicleRouting/3.4/Encodings/Potvin/Creators/GeographicDistanceClusterCreator.cs

    r14442 r14559  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using System.Linq;
     
    4849    }
    4950
    50     public static List<SpatialDistanceClusterElement> CreateClusterElements(IVRPProblemInstance instance) {
    51       IPickupAndDeliveryProblemInstance pdp = instance as IPickupAndDeliveryProblemInstance;
    52 
    53       // add all customers
    54       List<int> customers = new List<int>();
    55       for (int i = 1; i <= instance.Cities.Value; i++) {
    56         if (pdp == null || pdp.GetDemand(i) >= 0)
    57           customers.Add(i);
    58       }
    59 
    60       // wrap stops in SpatialDistanceClusterElement
    61       List<SpatialDistanceClusterElement> clusterElements = new List<SpatialDistanceClusterElement>();
    62       foreach (int customer in customers) {
    63         clusterElements.Add(new SpatialDistanceClusterElement(customer, instance.GetCoordinates(customer)));
    64       }
    65       return clusterElements;
    66     }
    6751
    6852    public static PotvinEncoding CreateSolution(IVRPProblemInstance instance, IRandom random, int minK, int maxK, double clusterChangeThreshold, int creationOption) {
    6953      PotvinEncoding result = new PotvinEncoding(instance);
    7054
    71       // (1) wrap stops in cluster elements
    72       List<SpatialDistanceClusterElement> clusterElements = CreateClusterElements(instance);
     55      // (1) init data
     56      var coordinates = new List<double[]>(instance.Cities.Value);
     57      var pdp = instance as IPickupAndDeliveryProblemInstance;
     58      for (int i = 1; i <= instance.Cities.Value; i++) {
     59        if (pdp == null || pdp.GetDemand(i) >= 0)
     60          coordinates.Add(instance.GetCoordinates(i));
     61      }
    7362
    7463      // (2) create a random number k of clusters
    7564      int k = random.Next(minK, maxK);
    76       List<SpatialDistanceCluster> clusters = ClusterAlgorithm<SpatialDistanceCluster, SpatialDistanceClusterElement>
    77         .KMeans(random, clusterElements, k, clusterChangeThreshold);
    78      
     65      var kMeans = new KMeansAlgorithm<double[]>(CalculateMeanHelper, CalculateDistanceHelper);
     66      var clusters = kMeans.Run(coordinates, k, clusterChangeThreshold, random);
     67
    7968      // (3) build tours with a (a) shuffling (b) greedy tour creation routine
    80       foreach (var c in clusters) {
     69      foreach (var cluster in clusters) {
    8170        Tour newTour = new Tour();
    8271        result.Tours.Add(newTour);
     
    8473        if (creationOption == 0) {
    8574          // (a) shuffle
    86           c.Elements.Shuffle(random);
    87           foreach (var o in c.Elements) {
    88             newTour.Stops.Add(o.Id);
     75          cluster.ShuffleInPlace(random);
     76          newTour.Stops.AddRange(cluster);
     77          foreach (var customer in cluster) {
     78            newTour.Stops.Add(customer + 1);
    8979          }
    9080        } else {
    9181          // (b) greedy
    92           foreach (var o in c.Elements) {
    93             newTour.Stops.Add(o.Id);
     82          foreach (var customer in cluster) {
     83            newTour.Stops.Add(customer + 1);
    9484          }
    9585          GreedyTourCreation(instance, result, newTour, false);
     
    9888
    9989      return result;
     90    }
     91
     92    private static double[] CalculateMeanHelper(List<double[]> coordinates) {
     93      var mean = new double[coordinates[0].Length];
     94      foreach (double[] coord in coordinates) {
     95        for (int i = 0; i < mean.Length; i++) {
     96          mean[i] += coord[i] / coordinates.Count;
     97        }
     98      }
     99      return mean;
     100    }
     101    private static double CalculateDistanceHelper(double[] coord1, double[] coord2) {
     102      double distance = 0.0;
     103      for (int i = 0; i < coord1.Length; i++) {
     104        distance += Math.Pow(coord1[i] - coord2[i], 2);
     105      }
     106      return Math.Sqrt(distance);
    100107    }
    101108
  • branches/HeuristicLab.VRPEnhancements/HeuristicLab.Problems.VehicleRouting/3.4/Encodings/Potvin/Creators/TemporalDistanceClusterCreator.cs

    r14442 r14559  
    2626using HeuristicLab.Core;
    2727using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    28 using HeuristicLab.Problems.VehicleRouting.Interfaces;
    2928using HeuristicLab.Problems.VehicleRouting.Variants;
    3029using HeuristicLab.Random;
     
    4948    }
    5049
    51     public static List<TemporalDistanceClusterElement> CreateClusterElements(ITimeWindowedProblemInstance instance) {
    52       IPickupAndDeliveryProblemInstance pdp = instance as IPickupAndDeliveryProblemInstance;
    53 
    54       // add all customers
    55       List<int> customers = new List<int>();
    56       for (int i = 1; i <= instance.Cities.Value; i++) {
    57         if (pdp == null || pdp.GetDemand(i) >= 0)
    58           customers.Add(i);
    59       }
    60 
    61       // wrap stops in TemporalDistanceClusterElement
    62       List<TemporalDistanceClusterElement> clusterObjects = new List<TemporalDistanceClusterElement>();
    63       foreach (int customer in customers) {
    64         clusterObjects.Add(new TemporalDistanceClusterElement(customer, instance.ReadyTime[customer], instance.DueTime[customer]));
    65       }
    66       return clusterObjects;
    67     }
    68 
    6950    public static PotvinEncoding CreateSolution(ITimeWindowedProblemInstance instance, IRandom random, int minK, int maxK, double clusterChangeThreshold, int creationOption) {
    7051      PotvinEncoding result = new PotvinEncoding(instance);
    7152
    72       // (1) wrap stops in cluster elements
    73       List<TemporalDistanceClusterElement> clusterElements = CreateClusterElements(instance);
     53      // (1) init data
     54      var times = new List<Tuple<double, double>>(instance.Cities.Value);
     55      var pdp = instance as IPickupAndDeliveryProblemInstance;
     56      for (int i = 1; i <= instance.Cities.Value; i++) {
     57        if (pdp == null || pdp.GetDemand(i) >= 0)
     58          times.Add(Tuple.Create(instance.ReadyTime[i], instance.DueTime[i]));
     59      }
    7460
    7561      // (2) create a random number k of clusters
    7662      int k = random.Next(minK, maxK);
    77       List<TemporalDistanceCluster> clusters = ClusterAlgorithm<TemporalDistanceCluster, TemporalDistanceClusterElement>
    78         .KMeans(random, clusterElements, k, clusterChangeThreshold);
     63      var kMeans = new KMeansAlgorithm<Tuple<double, double>>(CalculateMeanHelper, CalculateDistanceHelper);
     64      var clusters = kMeans.Run(times, k, clusterChangeThreshold, random);
     65
    7966
    8067      // (3) build tours with a (a) shuffling (b) greedy tour creation routine
    81       foreach (var c in clusters) {
     68      foreach (var cluster in clusters) {
    8269        Tour newTour = new Tour();
    8370        result.Tours.Add(newTour);
     
    8572        if (creationOption == 0) {
    8673          // (a) shuffle
    87           c.Elements.Shuffle(random);
    88           foreach (var o in c.Elements) {
    89             newTour.Stops.Add(o.Id);
     74          cluster.ShuffleInPlace(random);
     75          newTour.Stops.AddRange(cluster);
     76          foreach (var customer in cluster) {
     77            newTour.Stops.Add(customer + 1);
    9078          }
    9179        } else {
    9280          // (b) greedy
    93           foreach (var o in c.Elements) {
    94             newTour.Stops.Add(o.Id);
     81          foreach (var customer in cluster) {
     82            newTour.Stops.Add(customer + 1);
    9583          }
    9684          GreedyTourCreation(instance, result, newTour, true); // "true" means include costs
     
    9987
    10088      return result;
     89    }
     90
     91    private static Tuple<double, double> CalculateMeanHelper(List<Tuple<double, double>> times) {
     92      double meanReady = 0, meanDue = 0;
     93      foreach (var t in times) {
     94        meanReady += t.Item1 / times.Count;
     95        meanDue += t.Item2 / times.Count;
     96      }
     97      return Tuple.Create(meanReady, meanDue);
     98    }
     99    private static double CalculateDistanceHelper(Tuple<double, double> time1, Tuple<double, double> time2) {
     100      double distance = 0.0;
     101      distance +=   Math.Pow(time1.Item1 - time2.Item1, 2);
     102      distance +=   Math.Pow(time1.Item2 - time2.Item2, 2);
     103      return Math.Sqrt(distance);
    101104    }
    102105
Note: See TracChangeset for help on using the changeset viewer.