Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2457_ExpertSystem/HeuristicLab.Analysis/3.3/Clustering/ClusterHelper.cs @ 17717

Last change on this file since 17717 was 13797, checked in by abeham, 9 years ago

#2457: worked on testing recommendation algorithms through x-validation

File size: 4.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25
26namespace HeuristicLab.Analysis {
27  public class ClusteringHelper<T> {
28    private readonly int k;
29
30    private List<KeyValuePair<T, double>> instances;
31    private List<KeyValuePair<T, double>> excluded;
32    private int[] clusterValues;
33    private int[] rankedMap;
34
35    private ClusteringHelper(int K) {
36      this.k = K;
37    }
38
39    /// <summary>
40    /// Helps in clustering data which is available as key-value pairs.
41    /// It is possible to specify an exclude function to omit certain points from clustering.
42    /// These points will be assigned cluster with id k. All other points will be assigned a
43    /// cluster id in the range [0;k-1].
44    /// </summary>
45    /// <param name="k">The maximum number of clusters that should be created.</param>
46    /// <param name="values">The data which links a certain item with a double value that is to be clustered.</param>
47    /// <param name="excludeFunc">The function that allows excluding certing data points which will receive cluster id k.</param>
48    /// <returns>A reference to the helper class to allow fluent calls.</returns>
49    public static ClusteringHelper<T> Cluster(int k, IEnumerable<KeyValuePair<T, double>> values, Func<KeyValuePair<T, double>, bool> excludeFunc = null) {
50      if (excludeFunc == null) excludeFunc = _ => false;
51      var helper = new ClusteringHelper<T>(k);
52      helper.Initialize(values, excludeFunc);
53      if (helper.instances.Count == 0) {
54        helper.clusterValues = new int[0];
55        helper.rankedMap = new int[k + 1];
56      } else {
57        var centroids = CkMeans1D.Cluster(helper.instances.Select(x => x.Value).ToArray(), k, out helper.clusterValues);
58        helper.rankedMap = new int[k + 1];
59        var rank = 0;
60        foreach (var c in centroids) helper.rankedMap[c.Value] = rank++;
61      }
62      // excluded are always ranked last
63      helper.rankedMap[k] = k;
64      return helper;
65    }
66
67    /// <summary>
68    /// Returns the clustered data by
69    /// </summary>
70    /// <returns></returns>
71    public IEnumerable<KeyValuePair<T, Tuple<double, int>>> GetByInstance() {
72      return GetClustered().Select(x => new KeyValuePair<T, Tuple<double, int>>(x.Item1.Key, Tuple.Create(x.Item1.Value, rankedMap[x.Item2])));
73    }
74
75    public IEnumerable<KeyValuePair<int, List<KeyValuePair<T, double>>>> GetByCluster() {
76      return GetClustered().GroupBy(x => x.Item2)
77        .Select(x => new KeyValuePair<int, List<KeyValuePair<T, double>>>(rankedMap[x.Key], x.Select(y => y.Item1).ToList()));
78    }
79
80    private void Initialize(IEnumerable<KeyValuePair<T, double>> values, Func<KeyValuePair<T, double>, bool> excludeFunc) {
81      instances = new List<KeyValuePair<T, double>>();
82      excluded = new List<KeyValuePair<T, double>>();
83      foreach (var v in values) {
84        if (!excludeFunc(v)) instances.Add(v);
85        else excluded.Add(v);
86      }
87    }
88
89    private IEnumerable<Tuple<KeyValuePair<T, double>, int>> GetClustered() {
90      for (var i = 0; i < clusterValues.Length; i++) {
91        yield return Tuple.Create(instances[i], clusterValues[i]);
92      }
93      foreach (var ex in excluded)
94        yield return Tuple.Create(ex, k);
95    }
96  }
97}
Note: See TracBrowser for help on using the repository browser.