#region License Information
/* HeuristicLab
* Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
namespace HeuristicLab.Analysis {
public class ClusteringHelper {
private readonly int k;
private List> instances;
private List> excluded;
private int[] clusterValues;
private int[] rankedMap;
private ClusteringHelper(int K) {
this.k = K;
}
///
/// Helps in clustering data which is available as key-value pairs.
/// It is possible to specify an exclude function to omit certain points from clustering.
/// These points will be assigned cluster with id k. All other points will be assigned a
/// cluster id in the range [0;k-1].
///
/// The maximum number of clusters that should be created.
/// The data which links a certain item with a double value that is to be clustered.
/// The function that allows excluding certing data points which will receive cluster id k.
/// A reference to the helper class to allow fluent calls.
public static ClusteringHelper Cluster(int k, IEnumerable> values, Func, bool> excludeFunc = null) {
if (excludeFunc == null) excludeFunc = _ => false;
var helper = new ClusteringHelper(k);
helper.Initialize(values, excludeFunc);
if (helper.instances.Count == 0) {
helper.clusterValues = new int[0];
helper.rankedMap = new int[k + 1];
} else {
var centroids = CkMeans1D.Cluster(helper.instances.Select(x => x.Value).ToArray(), k, out helper.clusterValues);
helper.rankedMap = new int[k + 1];
var rank = 0;
foreach (var c in centroids) helper.rankedMap[c.Value] = rank++;
}
// excluded are always ranked last
helper.rankedMap[k] = k;
return helper;
}
///
/// Returns the clustered data by
///
///
public IEnumerable>> GetByInstance() {
return GetClustered().Select(x => new KeyValuePair>(x.Item1.Key, Tuple.Create(x.Item1.Value, rankedMap[x.Item2])));
}
public IEnumerable>>> GetByCluster() {
return GetClustered().GroupBy(x => x.Item2)
.Select(x => new KeyValuePair>>(rankedMap[x.Key], x.Select(y => y.Item1).ToList()));
}
private void Initialize(IEnumerable> values, Func, bool> excludeFunc) {
instances = new List>();
excluded = new List>();
foreach (var v in values) {
if (!excludeFunc(v)) instances.Add(v);
else excluded.Add(v);
}
}
private IEnumerable, int>> GetClustered() {
for (var i = 0; i < clusterValues.Length; i++) {
yield return Tuple.Create(instances[i], clusterValues[i]);
}
foreach (var ex in excluded)
yield return Tuple.Create(ex, k);
}
}
}