Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2988_ModelsOfModels2/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/ModelClustersFrequencyAnalyzer.cs @ 16734

Last change on this file since 16734 was 16734, checked in by msemenki, 5 years ago

#2988: Add Model Symbol Frequency Analyzer and Model's Clusters Frequency Analyzer. Fix Bag's with Keys. Fix changing during mutation for Variables Types in SubModels .

File size: 6.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using HEAL.Attic;
23using HeuristicLab.Analysis;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Optimization;
29using HeuristicLab.Parameters;
30using System;
31using System.Collections.Generic;
32using System.Linq;
33
34namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
35  /// <summary>
36  /// Calculates the accumulated frequencies of variable-symbols over all trees in the population.
37  /// </summary>
38  [Item("SymbolicDataAnalysisModelClustersFrequencyAnalyzer", "Calculates the accumulated frequencies of Model Clusters over all trees in the population.")]
39  [StorableType("4755115D-1B73-4577-BA2A-A762AE4C3B2F")]
40  public sealed class ModelClustersFrequencyAnalyzer : SymbolicDataAnalysisAnalyzer {
41    private const string ModelClustersFrequencyParameterName = "ModelClustersFrequency";
42    private const string AggregateModelClustersParameterName = "AggregateModelClusters";
43
44    #region parameter properties
45    public ILookupParameter<DataTable> ModelClustersFrequencyParameter {
46      get { return (ILookupParameter<DataTable>)Parameters[ModelClustersFrequencyParameterName]; }
47    }
48    public IValueLookupParameter<BoolValue> AggregateModelClustersParameter {
49      get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateModelClustersParameterName]; }
50    }
51    #endregion
52    #region properties
53    public BoolValue AggregateModelClusters {
54      get { return AggregateModelClustersParameter.ActualValue; }
55      set { AggregateModelClustersParameter.Value = value; }
56    }
57    #endregion
58    [StorableConstructor]
59    private ModelClustersFrequencyAnalyzer(StorableConstructorFlag _) : base(_) { }
60    private ModelClustersFrequencyAnalyzer(ModelClustersFrequencyAnalyzer original, Cloner cloner)
61      : base(original, cloner) {
62    }
63    public ModelClustersFrequencyAnalyzer()
64      : base() {
65      Parameters.Add(new LookupParameter<DataTable>(ModelClustersFrequencyParameterName, "The relative Model Clusters reference frequencies aggregated over all trees in the population."));
66      Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateModelClustersParameterName, "Switch that determines whether all references to factor Model Clusters should be aggregated regardless of the value. Turn off to analyze all factor variable references with different values separately.", new BoolValue(true)));
67    }
68
69    [StorableHook(HookType.AfterDeserialization)]
70    private void AfterDeserialization() {
71      // BackwardsCompatibility3.3
72      #region Backwards compatible code, remove with 3.4
73      if (!Parameters.ContainsKey(AggregateModelClustersParameterName)) {
74        Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateModelClustersParameterName, "Switch that determines whether all references to factor Model Clusters should be aggregated regardless of the value. Turn off to analyze all factor Model Clusters references with different values separately.", new BoolValue(true)));
75      }
76      #endregion
77    }
78
79    public override IDeepCloneable Clone(Cloner cloner) {
80      return new ModelClustersFrequencyAnalyzer(this, cloner);
81    }
82
83    public override IOperation Apply() {
84      ItemArray<ISymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
85      ResultCollection results = ResultCollection;
86      DataTable datatable;
87      if (ModelClustersFrequencyParameter.ActualValue == null) {
88        datatable = new DataTable("ModelClusters frequencies", "Relative frequency of ModelClusters references aggregated over the whole population.");
89        datatable.VisualProperties.XAxisTitle = "Generation";
90        datatable.VisualProperties.YAxisTitle = "Relative ModelClusters Frequency";
91        ModelClustersFrequencyParameter.ActualValue = datatable;
92        results.Add(new Result("ModelClusters frequencies", "Relative frequency of ModelClusters references aggregated over the whole population.", datatable));
93      }
94
95      datatable = ModelClustersFrequencyParameter.ActualValue;
96      // all rows must have the same number of values so we can just take the first
97      int numberOfValues = datatable.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
98
99      foreach (var pair in CalculateModelClustersFrequency(expressions)) {
100        if (!datatable.Rows.ContainsKey(pair.Key)) {
101          // initialize a new row for the variable and pad with zeros
102          DataRow row = new DataRow(pair.Key, "", Enumerable.Repeat(0.0, numberOfValues));
103          row.VisualProperties.StartIndexZero = true;
104          datatable.Rows.Add(row);
105        }
106        datatable.Rows[pair.Key].Values.Add(Math.Round(pair.Value, 3));
107      }
108
109      // add a zero for each data row that was not modified in the previous loop
110      foreach (var row in datatable.Rows.Where(r => r.Values.Count != numberOfValues + 1))
111        row.Values.Add(0.0);
112
113      return base.Apply();
114    }
115
116    public static IEnumerable<KeyValuePair<string, double>> CalculateModelClustersFrequency(IEnumerable<ISymbolicExpressionTree> trees) {
117      var modelClustersFrequency = trees
118          .SelectMany(t => GetModelClustersReferences(t))
119          .GroupBy(pair => pair.Key, pair => pair.Value)
120          .ToDictionary(g => g.Key, g => (double)g.Sum());
121
122      double totalNumberOfSymbols = modelClustersFrequency.Values.Sum();
123
124      foreach (var pair in modelClustersFrequency.OrderBy(p => p.Key, new NaturalStringComparer()))
125        yield return new KeyValuePair<string, double>(pair.Key, pair.Value / totalNumberOfSymbols);
126    }
127
128    private static IEnumerable<KeyValuePair<string, int>> GetModelClustersReferences(ISymbolicExpressionTree tree) {
129      Dictionary<string, int> references = new Dictionary<string, int>();
130      foreach (var treeNode in tree.IterateNodesPrefix().OfType<TreeModelTreeNode>()) {
131        string referenceId = "Cluster " + treeNode.ClusterNumer;
132        if (references.ContainsKey(referenceId)) {
133          references[referenceId]++;
134        } else {
135          references[referenceId] = 1;
136        }
137      }
138      return references;
139    }
140  }
141}
Note: See TracBrowser for help on using the repository browser.