Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2988_ModelsOfModels2/HeuristicLab.Algorithms.EMM/Analyzers/ModelClustersFrequencyAnalyzer.cs @ 18156

Last change on this file since 18156 was 17134, checked in by msemenki, 5 years ago

#2988:

  1. The file system was changed, folders was added and part of files was transferred in these folders.
  2. HelpFunctions class was divided on 2 parts: HelpFuctions for common purposes static functions and SelfConfiguration that include functions for self-configuration mechanism realization (is used in EMMSucsessMap).
  3. Parts of self-configuration mechanism was transferred from EMMSucsessMap.cs to SelfConfiguration.cs. Now EMMSucsessMap used SelfConfiguration like one of data member. Other parts of project was adopted for this changing.
  4. FileComunication class was added. It include the majority of functions for printing to files or reading from files. Here were realized possibility to write and read to hl files.
  5. ModelTreeNode.cs has additional possibility - to write sub-model in string (then it is possible to write it in file).
  6. InfixExpressionFormatter.cs can work with TreeModelNode.
  7. Possibility for different map types to be readable from files was extended and cheeked.
  8. Such parameters like - ClusterNumbers, ClusterNumbersShow, NegbourNumber, NegbourType (that is used only in several maps) was transferred from EMMAlgorithm to Map Parameters. Now EMMBaseMap class inherited from ParameterizedNamedItem (not from Item). And EMMIslandMap and EMMNetworkMap contains their parameters (constructors was modified). CreationMap calls functions were simplified.
  9. Functions for different distance metric calculation was added. Now, it is possible to calculate different types of distances between models (with different random values of constants).
  10. DistanceParametr was added. Now maps can be created according different types of distance calculations.
  11. The class EMMClustering has new name KMeansClusterizationAlgorithm. On KMeansClusterizationAlgorithm bug with bloating of centroids list was fixed. Algorithm was adopted for working with different type of distance metric and get maximum number of iterations.
  12. Possibilities for constants optimization in sub-models an whole tree was added. EMMAlgorithm get new function for evaluation of individuals (and some additional technical stuff for that). Function for trees with model in usual tree transformation and back was added.
  13. EMMAlgorithm was divided on 2 parts:
  • EMMAlgorithm, that contain evolutionary algorithm working with sub-models, and use ready to use maps;
  • ModelSetPreparation, that contain distance calculation, model set simplification and map creation.
File size: 8.2 KB
RevLine 
[16734]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using HEAL.Attic;
[17002]23using HeuristicLab.Algorithms.EvolvmentModelsOfModels;
[16734]24using HeuristicLab.Analysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using System;
32using System.Collections.Generic;
33using System.Linq;
34
35namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
36  /// <summary>
37  /// Calculates the accumulated frequencies of variable-symbols over all trees in the population.
38  /// </summary>
[17134]39  [Item("Symbolic Data Analysis Model Clusters Frequency Analyzer", "Calculates the accumulated frequencies of Model Clusters over all trees in the population.")]
[16734]40  [StorableType("4755115D-1B73-4577-BA2A-A762AE4C3B2F")]
41  public sealed class ModelClustersFrequencyAnalyzer : SymbolicDataAnalysisAnalyzer {
42    private const string ModelClustersFrequencyParameterName = "ModelClustersFrequency";
43    private const string AggregateModelClustersParameterName = "AggregateModelClusters";
44
45    #region parameter properties
[17002]46    private const string MapParameterName = "Map";
[16734]47    public ILookupParameter<DataTable> ModelClustersFrequencyParameter {
48      get { return (ILookupParameter<DataTable>)Parameters[ModelClustersFrequencyParameterName]; }
49    }
50    public IValueLookupParameter<BoolValue> AggregateModelClustersParameter {
51      get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateModelClustersParameterName]; }
52    }
[17002]53    public ILookupParameter<EMMMapBase<ISymbolicExpressionTree>> MapParameter {
54      get { return (ILookupParameter<EMMMapBase<ISymbolicExpressionTree>>)Parameters[MapParameterName]; }
55    }
56
[16734]57    #endregion
58    #region properties
59    public BoolValue AggregateModelClusters {
60      get { return AggregateModelClustersParameter.ActualValue; }
61      set { AggregateModelClustersParameter.Value = value; }
62    }
[16899]63    public DataTable ModelClustersFrequency {
64      get { return ModelClustersFrequencyParameter.ActualValue; }
65      set { ModelClustersFrequencyParameter.ActualValue = value; }
66    }
[16734]67    #endregion
68    [StorableConstructor]
69    private ModelClustersFrequencyAnalyzer(StorableConstructorFlag _) : base(_) { }
70    private ModelClustersFrequencyAnalyzer(ModelClustersFrequencyAnalyzer original, Cloner cloner)
71      : base(original, cloner) {
72    }
73    public ModelClustersFrequencyAnalyzer()
74      : base() {
75      Parameters.Add(new LookupParameter<DataTable>(ModelClustersFrequencyParameterName, "The relative Model Clusters reference frequencies aggregated over all trees in the population."));
76      Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateModelClustersParameterName, "Switch that determines whether all references to factor Model Clusters should be aggregated regardless of the value. Turn off to analyze all factor variable references with different values separately.", new BoolValue(true)));
[17002]77      Parameters.Add(new LookupParameter<EMMMapBase<ISymbolicExpressionTree>>(MapParameterName));
[16734]78    }
79
80    [StorableHook(HookType.AfterDeserialization)]
81    private void AfterDeserialization() {
82      // BackwardsCompatibility3.3
83      #region Backwards compatible code, remove with 3.4
84      if (!Parameters.ContainsKey(AggregateModelClustersParameterName)) {
85        Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateModelClustersParameterName, "Switch that determines whether all references to factor Model Clusters should be aggregated regardless of the value. Turn off to analyze all factor Model Clusters references with different values separately.", new BoolValue(true)));
86      }
87      #endregion
88    }
89
90    public override IDeepCloneable Clone(Cloner cloner) {
91      return new ModelClustersFrequencyAnalyzer(this, cloner);
92    }
93
94    public override IOperation Apply() {
95      ItemArray<ISymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
96      ResultCollection results = ResultCollection;
97      DataTable datatable;
98      if (ModelClustersFrequencyParameter.ActualValue == null) {
99        datatable = new DataTable("ModelClusters frequencies", "Relative frequency of ModelClusters references aggregated over the whole population.");
100        datatable.VisualProperties.XAxisTitle = "Generation";
101        datatable.VisualProperties.YAxisTitle = "Relative ModelClusters Frequency";
102        ModelClustersFrequencyParameter.ActualValue = datatable;
103        results.Add(new Result("ModelClusters frequencies", "Relative frequency of ModelClusters references aggregated over the whole population.", datatable));
104      }
105
106      datatable = ModelClustersFrequencyParameter.ActualValue;
107      // all rows must have the same number of values so we can just take the first
108      int numberOfValues = datatable.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
109
[17002]110      foreach (var pair in CalculateModelClustersFrequency(expressions, MapParameter.ActualValue)) {
[16734]111        if (!datatable.Rows.ContainsKey(pair.Key)) {
112          // initialize a new row for the variable and pad with zeros
113          DataRow row = new DataRow(pair.Key, "", Enumerable.Repeat(0.0, numberOfValues));
114          row.VisualProperties.StartIndexZero = true;
115          datatable.Rows.Add(row);
116        }
117        datatable.Rows[pair.Key].Values.Add(Math.Round(pair.Value, 3));
118      }
119
120      // add a zero for each data row that was not modified in the previous loop
121      foreach (var row in datatable.Rows.Where(r => r.Values.Count != numberOfValues + 1))
122        row.Values.Add(0.0);
123
124      return base.Apply();
125    }
126
[17002]127    public static IEnumerable<KeyValuePair<string, double>> CalculateModelClustersFrequency(IEnumerable<ISymbolicExpressionTree> trees, EMMMapBase<ISymbolicExpressionTree> map) {
[16734]128      var modelClustersFrequency = trees
[17002]129          .SelectMany(t => GetModelClustersReferences(t, map))
[16734]130          .GroupBy(pair => pair.Key, pair => pair.Value)
[17002]131          .ToDictionary(g => g.Key, g => g.Sum());
[16734]132
133      double totalNumberOfSymbols = modelClustersFrequency.Values.Sum();
134
135      foreach (var pair in modelClustersFrequency.OrderBy(p => p.Key, new NaturalStringComparer()))
136        yield return new KeyValuePair<string, double>(pair.Key, pair.Value / totalNumberOfSymbols);
137    }
138
[17002]139    private static IEnumerable<KeyValuePair<string, double>> GetModelClustersReferences(ISymbolicExpressionTree tree, EMMMapBase<ISymbolicExpressionTree> map) {
140      Dictionary<string, double> references = new Dictionary<string, double>();
141      if (map is EMMIslandMap island) {
142        foreach (var treeNode in tree.IterateNodesPrefix().OfType<TreeModelTreeNode>()) {
143          string referenceId = "no";
144
145          referenceId = "Cluster " + island.ClusterNumber[treeNode.TreeNumber];
146          if (references.ContainsKey(referenceId)) {
147            int a = (int)references[referenceId];
148            a++;
149            references[referenceId] = a;
150          } else {
151            references[referenceId] = 1;
152          }
153        }
154      } else {
155        if (map is EMMSucsessMap sMap) {
156          for (int i = 0; i < map.ModelSet.Count; i++) {
157            string referenceId = "Tree Probability" + i;
[17134]158            references[referenceId] = sMap.SelfConfigurationMechanism.Probabilities[i];
[17002]159          }
[16734]160        } else {
[17002]161          string referenceId = "no";
162          references[referenceId] = 0;
[16734]163        }
164      }
165      return references;
166    }
167  }
168}
Note: See TracBrowser for help on using the repository browser.