#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; namespace HeuristicLab.Analysis { public class ClusteringHelper { private readonly int k; private List> instances; private List> excluded; private int[] clusterValues; private int[] rankedMap; private ClusteringHelper(int K) { this.k = K; } /// /// Helps in clustering data which is available as key-value pairs. /// It is possible to specify an exclude function to omit certain points from clustering. /// These points will be assigned cluster with id k. All other points will be assigned a /// cluster id in the range [0;k-1]. /// /// The maximum number of clusters that should be created. /// The data which links a certain item with a double value that is to be clustered. /// The function that allows excluding certing data points which will receive cluster id k. /// A reference to the helper class to allow fluent calls. public static ClusteringHelper Cluster(int k, IEnumerable> values, Func, bool> excludeFunc = null) { if (excludeFunc == null) excludeFunc = _ => false; var helper = new ClusteringHelper(k); helper.Initialize(values, excludeFunc); if (helper.instances.Count == 0) { helper.clusterValues = new int[0]; helper.rankedMap = new int[k + 1]; } else { var centroids = CkMeans1D.Cluster(helper.instances.Select(x => x.Value).ToArray(), k, out helper.clusterValues); helper.rankedMap = new int[k + 1]; var rank = 0; foreach (var c in centroids) helper.rankedMap[c.Value] = rank++; } // excluded are always ranked last helper.rankedMap[k] = k; return helper; } /// /// Returns the clustered data by /// /// public IEnumerable>> GetByInstance() { return GetClustered().Select(x => new KeyValuePair>(x.Item1.Key, Tuple.Create(x.Item1.Value, rankedMap[x.Item2]))); } public IEnumerable>>> GetByCluster() { return GetClustered().GroupBy(x => x.Item2) .Select(x => new KeyValuePair>>(rankedMap[x.Key], x.Select(y => y.Item1).ToList())); } private void Initialize(IEnumerable> values, Func, bool> excludeFunc) { instances = new List>(); excluded = new List>(); foreach (var v in values) { if (!excludeFunc(v)) instances.Add(v); else excluded.Add(v); } } private IEnumerable, int>> GetClustered() { for (var i = 0; i < clusterValues.Length; i++) { yield return Tuple.Create(instances[i], clusterValues[i]); } foreach (var ex in excluded) yield return Tuple.Create(ex, k); } } }