Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2988_ModelsOfModels2/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/ModelsFrequencyAnalyzer.cs @ 16899

Last change on this file since 16899 was 16899, checked in by msemenki, 5 years ago

#2988: New version of class structure.

File size: 7.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using HEAL.Attic;
23using HeuristicLab.Analysis;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Optimization;
29using HeuristicLab.Parameters;
30using System;
31using System.Collections.Generic;
32using System.Linq;
33
34namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
35  /// <summary>
36  /// Calculates the accumulated frequencies of variable-symbols over all trees in the population.
37  /// </summary>
38  [Item("SymbolicDataAnalysisModelsFrequencyAnalyzer", "Calculates the accumulated frequencies of Model Clusters over all trees in the population.")]
39  [StorableType("0A5EAD1D-89E1-4D89-935C-2CBC142834EE")]
40  public sealed class ModelsFrequencyAnalyzer : SymbolicDataAnalysisAnalyzer {
41    private const string ModelsFrequencyParameterName = "ModelsFrequency";
42    private const string AggregateModelParameterName = "AggregateModelClusters";
43
44    #region parameter properties
45    [Storable]
46    public ILookupParameter<DataTable> ModelFrequencyParameter {
47      get { return (ILookupParameter<DataTable>)Parameters[ModelsFrequencyParameterName]; }
48    }
49    [Storable]
50    public IValueLookupParameter<BoolValue> AggregateModelParameter {
51      get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateModelParameterName]; }
52    }
53    #endregion
54    #region properties
55    public BoolValue AggregateModel {
56      get { return AggregateModelParameter.ActualValue; }
57      set { AggregateModelParameter.Value = value; }
58    }
59    public DataTable ModelFrequency {
60      get { return ModelFrequencyParameter.ActualValue; }
61      set { ModelFrequencyParameter.ActualValue = value; }
62    }
63    #endregion
64    [StorableConstructor]
65    private ModelsFrequencyAnalyzer(StorableConstructorFlag _) : base(_) { }
66    private ModelsFrequencyAnalyzer(ModelsFrequencyAnalyzer original, Cloner cloner)
67      : base(original, cloner) {
68    }
69    public ModelsFrequencyAnalyzer()
70      : base() {
71      Parameters.Add(new LookupParameter<DataTable>(ModelsFrequencyParameterName, "The relative Model Clusters reference frequencies aggregated over all trees in the population."));
72      Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateModelParameterName, "Switch that determines whether all references to factor Model Clusters should be aggregated regardless of the value. Turn off to analyze all factor variable references with different values separately.", new BoolValue(true)));
73    }
74
75    [StorableHook(HookType.AfterDeserialization)]
76    private void AfterDeserialization() {
77      // BackwardsCompatibility3.3
78      #region Backwards compatible code, remove with 3.4
79      if (!Parameters.ContainsKey(AggregateModelParameterName)) {
80        Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateModelParameterName, "Switch that determines whether all references to factor Model Clusters should be aggregated regardless of the value. Turn off to analyze all factor Model Clusters references with different values separately.", new BoolValue(true)));
81      }
82      #endregion
83    }
84
85    public override IDeepCloneable Clone(Cloner cloner) {
86      return new ModelsFrequencyAnalyzer(this, cloner);
87    }
88
89    public override IOperation Apply() {
90      ItemArray<ISymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
91      ResultCollection results = ResultCollection;
92      DataTable datatable;
93      if (ModelFrequencyParameter.ActualValue == null) {
94        datatable = new DataTable("Model frequencies", "Relative frequency of Model references aggregated over the whole population.");
95        datatable.VisualProperties.XAxisTitle = "Generation";
96        datatable.VisualProperties.YAxisTitle = "Relative Model Frequency";
97        ModelFrequencyParameter.ActualValue = datatable;
98        results.Add(new Result("Model frequencies", "Relative frequency of Modelreferences aggregated over the whole population.", datatable));
99      }
100      /* DoubleMatrix map;
101       if (results.ContainsKey("My Map")) {
102         map = (DoubleMatrix)results["My Map"];
103       } else {
104         int generations = 100;
105         map = new DoubleMatrix(generations, expressions.Length);
106       }*/
107      datatable = ModelFrequencyParameter.ActualValue;
108      // all rows must have the same number of values so we can just take the first
109      int numberOfValues = datatable.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
110      foreach (var pair in CalculateModelFrequency(expressions).OrderByDescending(x => x.Value)) {
111        //var pair in CalculateModelFrequency(expressions).OrderByDescending(x => x.Value).Take(10)
112        if (!datatable.Rows.ContainsKey(pair.Key)) {
113          // initialize a new row for the variable and pad with zeros
114          DataRow row = new DataRow(pair.Key, "", Enumerable.Repeat(0.0, numberOfValues));
115          row.VisualProperties.StartIndexZero = true;
116          datatable.Rows.Add(row);
117        }
118        datatable.Rows[pair.Key].Values.Add(Math.Round(pair.Value, 3));
119      }
120
121      // add a zero for each data row that was not modified in the previous loop
122      foreach (var row in datatable.Rows.Where(r => r.Values.Count != numberOfValues + 1))
123        row.Values.Add(0.0);
124
125      return base.Apply();
126    }
127
128    public static IEnumerable<KeyValuePair<string, double>> CalculateModelFrequency(IEnumerable<ISymbolicExpressionTree> trees) {
129      var modelFrequency = trees
130          .SelectMany(t => GetModelReferences(t))
131          .GroupBy(pair => pair.Key, pair => pair.Value)
132          .ToDictionary(g => g.Key, g => (double)g.Sum());
133
134      double totalNumberOfSymbols = modelFrequency.Values.Sum();
135
136      foreach (var pair in modelFrequency.OrderBy(p => p.Key, new NaturalStringComparer()))
137        yield return new KeyValuePair<string, double>(pair.Key, pair.Value / totalNumberOfSymbols);
138    }
139
140    private static IEnumerable<KeyValuePair<string, int>> GetModelReferences(ISymbolicExpressionTree tree) {
141      Dictionary<string, int> references = new Dictionary<string, int>();
142      foreach (var treeNode in tree.IterateNodesPrefix().OfType<TreeModelTreeNode>()) {
143        string referenceId = "Model " + treeNode.TreeNumber;
144        if (references.ContainsKey(referenceId)) {
145          references[referenceId]++;
146        } else {
147          references[referenceId] = 1;
148        }
149      }
150      return references;
151    }
152  }
153}
Note: See TracBrowser for help on using the repository browser.