Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2988_ModelsOfModels2/HeuristicLab.Algorithms.EMM/KMeansClusterizationAlgorithm.cs @ 18236

Last change on this file since 18236 was 17134, checked in by msemenki, 5 years ago

#2988:

  1. The file system was changed, folders was added and part of files was transferred in these folders.
  2. HelpFunctions class was divided on 2 parts: HelpFuctions for common purposes static functions and SelfConfiguration that include functions for self-configuration mechanism realization (is used in EMMSucsessMap).
  3. Parts of self-configuration mechanism was transferred from EMMSucsessMap.cs to SelfConfiguration.cs. Now EMMSucsessMap used SelfConfiguration like one of data member. Other parts of project was adopted for this changing.
  4. FileComunication class was added. It include the majority of functions for printing to files or reading from files. Here were realized possibility to write and read to hl files.
  5. ModelTreeNode.cs has additional possibility - to write sub-model in string (then it is possible to write it in file).
  6. InfixExpressionFormatter.cs can work with TreeModelNode.
  7. Possibility for different map types to be readable from files was extended and cheeked.
  8. Such parameters like - ClusterNumbers, ClusterNumbersShow, NegbourNumber, NegbourType (that is used only in several maps) was transferred from EMMAlgorithm to Map Parameters. Now EMMBaseMap class inherited from ParameterizedNamedItem (not from Item). And EMMIslandMap and EMMNetworkMap contains their parameters (constructors was modified). CreationMap calls functions were simplified.
  9. Functions for different distance metric calculation was added. Now, it is possible to calculate different types of distances between models (with different random values of constants).
  10. DistanceParametr was added. Now maps can be created according different types of distance calculations.
  11. The class EMMClustering has new name KMeansClusterizationAlgorithm. On KMeansClusterizationAlgorithm bug with bloating of centroids list was fixed. Algorithm was adopted for working with different type of distance metric and get maximum number of iterations.
  12. Possibilities for constants optimization in sub-models an whole tree was added. EMMAlgorithm get new function for evaluation of individuals (and some additional technical stuff for that). Function for trees with model in usual tree transformation and back was added.
  13. EMMAlgorithm was divided on 2 parts:
  • EMMAlgorithm, that contain evolutionary algorithm working with sub-models, and use ready to use maps;
  • ModelSetPreparation, that contain distance calculation, model set simplification and map creation.
File size: 5.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using HeuristicLab.Core;
23using System;
24using System.Collections.Generic;
25
26namespace HeuristicLab.Algorithms.EvolvmentModelsOfModels {
27  public class KMeansClusterizationAlgorithm {
28    public int K { get; private set; }
29    public KMeansClusterizationAlgorithm() {
30    }
31    public KMeansClusterizationAlgorithm(int k) {
32      K = k;
33    }
34    public KMeansClusterizationAlgorithm(KMeansClusterizationAlgorithm old) {
35      this.K = old.K;
36    }
37    public int Apply(IRandom random, double[,] distances, List<int> numberCluster) {
38
39      K = ApplyClusteringAlgorithm(random, distances, numberCluster, K);
40      return K;
41    }
42
43    public static int ApplyClusteringAlgorithm(IRandom random, double[,] distances, List<int> numberCluster, int k) {
44      int mapSize = distances.GetLength(0);
45      List<int> centroids = new List<int>(); // capacity is equal K
46      List<double> averageClusterDistance = new List<double>();
47      List<List<int>> clusters = new List<List<int>>(); // capacity is equal K
48      CentroidsRandomSetUp(random, centroids, mapSize, k);
49      if (numberCluster.Count == 0) {
50        for (int i = 0; i < mapSize; i++) {
51          numberCluster.Add(0);
52        }
53      }
54      bool flag = true;
55      int count = 0;
56      while (flag&&(count<1000)) {
57        clusters.Clear();
58        for (int i = 0; i < k; i++) {
59          clusters.Add(new List<int>());
60
61        }
62        for (int i = 0; i < mapSize; i++) {
63          numberCluster[i] = LookCloseCentroid(centroids, distances, i, k);
64          clusters[numberCluster[i]].Add(i);
65        }
66        k = NullSizeClusterDelete(centroids, clusters, mapSize, numberCluster, k);
67        flag = false;
68        for (int i = 0; i < k; i++) {
69          AverageClusterDistanceCalculation(averageClusterDistance, distances, numberCluster, mapSize, i);
70          var newCentroid = clusters[i][HelpFunctions.ChooseMinElementIndex(averageClusterDistance)];
71          if (newCentroid != centroids[i]) {
72            flag = true;
73            centroids[i] = newCentroid;
74          }
75          averageClusterDistance.Clear();
76        }
77        count++;
78      }
79      return k;
80    }
81    private static void CentroidsRandomSetUp(IRandom random, List<int> centroids, int size, int k) {
82      for (int i = 0; i < k; i++) {
83        centroids.Add(random.Next(size));
84      }
85    }
86    private static int LookCloseCentroid(List<int> centroids, double[,] distances, int currentNumber, int k) {
87      double minDistanse = distances[currentNumber, centroids[0]];
88      int clusterNum = 0;
89      for (int i = 1; i < k; i++) {
90        if (minDistanse > distances[currentNumber, centroids[i]]) {
91          minDistanse = distances[currentNumber, centroids[i]];
92          clusterNum = i;
93        }
94      }
95      return clusterNum;
96    }
97    private static int NullSizeClusterDelete(List<int> centroids, List<List<int>> clusters, int mapSize, List<int> numberCluster, int k) {
98      int iter = 0;
99      for (int i = 0; i < k; i++) {
100        if (clusters[i - iter].Count == 0) {
101          for (int j = 0; j < mapSize; j++) {
102            if (numberCluster[j] > (i - iter))
103              numberCluster[j]--;
104          }
105
106          for (int j = 0; j < k - iter; j++) {
107            if (j != i - iter) {
108              for (int m = 0; m < clusters[j].Count; m++)
109                if (clusters[j][m] > (i - iter))
110                  (clusters[j][m])--;
111            }
112          }
113          clusters.Remove(clusters[i - iter]);
114          centroids.Remove(centroids[i - iter]);
115          iter++;
116        }
117      }
118      k -= iter;
119      return k;
120    }
121    public static void AverageClusterDistanceCalculation(List<double> averageClusterDistance, double[,] distances, List<int> numberCluster, int MapSize, int currentClusterNumber) {
122      int m = 0;
123      for (int i = 0; i < MapSize; i++) {
124        if (numberCluster[i] == currentClusterNumber) {
125          averageClusterDistance.Add(0);
126          for (int j = 0; j < MapSize; j++) {
127            if (numberCluster[j] == currentClusterNumber)
128              averageClusterDistance[m] += Math.Abs(distances[i, j]);
129          }
130          m++;
131        }
132      }
133    }
134
135
136  }
137}
Note: See TracBrowser for help on using the repository browser.