Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis.IslandAlgorithms/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisVariableFrequencyAnalyzer.cs @ 15747

Last change on this file since 15747 was 11646, checked in by mkommend, 10 years ago

#1997: Merged trunk changes to data analysis island GA branch.

File size: 10.2 KB
RevLine 
[5556]1#region License Information
2/* HeuristicLab
[11646]3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[5556]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
[6709]22using System;
[5556]23using System.Collections.Generic;
24using System.Linq;
[6981]25using HeuristicLab.Analysis;
[5556]26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33
34namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
35  /// <summary>
36  /// Calculates the accumulated frequencies of variable-symbols over all trees in the population.
37  /// </summary>
38  [Item("SymbolicDataAnalysisVariableFrequencyAnalyzer", "Calculates the accumulated frequencies of variable-symbols over all trees in the population.")]
39  [StorableClass]
40  public sealed class SymbolicDataAnalysisVariableFrequencyAnalyzer : SymbolicDataAnalysisAnalyzer {
41    private const string VariableFrequenciesParameterName = "VariableFrequencies";
42    private const string AggregateLaggedVariablesParameterName = "AggregateLaggedVariables";
[5748]43    private const string VariableImpactsParameterName = "VariableImpacts";
[5556]44
45    #region parameter properties
46    public ILookupParameter<DataTable> VariableFrequenciesParameter {
47      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
48    }
[5748]49    public ILookupParameter<DoubleMatrix> VariableImpactsParameter {
50      get { return (ILookupParameter<DoubleMatrix>)Parameters[VariableImpactsParameterName]; }
51    }
[5556]52    public IValueLookupParameter<BoolValue> AggregateLaggedVariablesParameter {
53      get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateLaggedVariablesParameterName]; }
54    }
55    #endregion
56    #region properties
57    public BoolValue AggregateLaggedVariables {
58      get { return AggregateLaggedVariablesParameter.ActualValue; }
[5748]59      set { AggregateLaggedVariablesParameter.Value = value; }
[5556]60    }
61    #endregion
62    [StorableConstructor]
63    private SymbolicDataAnalysisVariableFrequencyAnalyzer(bool deserializing) : base(deserializing) { }
64    private SymbolicDataAnalysisVariableFrequencyAnalyzer(SymbolicDataAnalysisVariableFrequencyAnalyzer original, Cloner cloner)
65      : base(original, cloner) {
66    }
67    public SymbolicDataAnalysisVariableFrequencyAnalyzer()
68      : base() {
69      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over all trees in the population."));
[5748]70      Parameters.Add(new LookupParameter<DoubleMatrix>(VariableImpactsParameterName, "The relative variable relevance calculated as the average relative variable frequency over the whole run."));
[5556]71      Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateLaggedVariablesParameterName, "Switch that determines whether all references to a variable should be aggregated regardless of time-offsets. Turn off to analyze all variable references with different time offsets separately.", new BoolValue(true)));
72    }
[5748]73
[5556]74    public override IDeepCloneable Clone(Cloner cloner) {
75      return new SymbolicDataAnalysisVariableFrequencyAnalyzer(this, cloner);
76    }
77
78    public override IOperation Apply() {
79      ItemArray<ISymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
80      ResultCollection results = ResultCollection;
[5748]81      DataTable datatable;
82      if (VariableFrequenciesParameter.ActualValue == null) {
83        datatable = new DataTable("Variable frequencies", "Relative frequency of variable references aggregated over the whole population.");
84        datatable.VisualProperties.XAxisTitle = "Generation";
85        datatable.VisualProperties.YAxisTitle = "Relative Variable Frequency";
86        VariableFrequenciesParameter.ActualValue = datatable;
87        results.Add(new Result("Variable frequencies", "Relative frequency of variable references aggregated over the whole population.", datatable));
[6811]88        results.Add(new Result("Variable impacts", "The relative variable relevance calculated as the average relative variable frequency over the whole run.", new DoubleMatrix()));
[5556]89      }
90
[5748]91      datatable = VariableFrequenciesParameter.ActualValue;
[5556]92      // all rows must have the same number of values so we can just take the first
[5748]93      int numberOfValues = datatable.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
[5556]94
95      foreach (var pair in SymbolicDataAnalysisVariableFrequencyAnalyzer.CalculateVariableFrequencies(expressions, AggregateLaggedVariables.Value)) {
[5748]96        if (!datatable.Rows.ContainsKey(pair.Key)) {
[5556]97          // initialize a new row for the variable and pad with zeros
98          DataRow row = new DataRow(pair.Key, "", Enumerable.Repeat(0.0, numberOfValues));
99          row.VisualProperties.StartIndexZero = true;
[5748]100          datatable.Rows.Add(row);
[5556]101        }
[6709]102        datatable.Rows[pair.Key].Values.Add(Math.Round(pair.Value, 3));
[5556]103      }
104
105      // add a zero for each data row that was not modified in the previous loop
[5748]106      foreach (var row in datatable.Rows.Where(r => r.Values.Count != numberOfValues + 1))
[5556]107        row.Values.Add(0.0);
108
[5748]109      // update variable impacts matrix
110      var orderedImpacts = (from row in datatable.Rows
[8735]111                            select new { Name = row.Name, Impact = Math.Round(datatable.Rows[row.Name].Values.Average(), 3) })
[5748]112                           .OrderByDescending(p => p.Impact)
113                           .ToList();
[6811]114      var impacts = new DoubleMatrix();
115      var matrix = impacts as IStringConvertibleMatrix;
[5748]116      matrix.Rows = orderedImpacts.Count;
117      matrix.RowNames = orderedImpacts.Select(x => x.Name);
118      matrix.Columns = 1;
119      matrix.ColumnNames = new string[] { "Relative variable relevance" };
120      int i = 0;
121      foreach (var p in orderedImpacts) {
122        matrix.SetValue(p.Impact.ToString(), i++, 0);
123      }
124
[6811]125      VariableImpactsParameter.ActualValue = impacts;
126      results["Variable impacts"].Value = impacts;
[5556]127      return base.Apply();
128    }
129
130    public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<ISymbolicExpressionTree> trees, bool aggregateLaggedVariables = true) {
131
[6728]132      var variableFrequencies = trees
133        .SelectMany(t => GetVariableReferences(t, aggregateLaggedVariables))
134        .GroupBy(pair => pair.Key, pair => pair.Value)
135        .ToDictionary(g => g.Key, g => (double)g.Sum());
[5556]136
[6728]137      double totalNumberOfSymbols = variableFrequencies.Values.Sum();
138
[6981]139      foreach (var pair in variableFrequencies.OrderBy(p => p.Key, new NaturalStringComparer()))
[5556]140        yield return new KeyValuePair<string, double>(pair.Key, pair.Value / totalNumberOfSymbols);
141    }
142
143    private static IEnumerable<KeyValuePair<string, int>> GetVariableReferences(ISymbolicExpressionTree tree, bool aggregateLaggedVariables = true) {
144      Dictionary<string, int> references = new Dictionary<string, int>();
145      if (aggregateLaggedVariables) {
146        tree.Root.ForEachNodePrefix(node => {
147          if (node.Symbol is Variable) {
148            var varNode = node as VariableTreeNode;
149            IncReferenceCount(references, varNode.VariableName);
150          } else if (node.Symbol is VariableCondition) {
151            var varCondNode = node as VariableConditionTreeNode;
152            IncReferenceCount(references, varCondNode.VariableName);
153          }
154        });
155      } else {
156        GetVariableReferences(references, tree.Root, 0);
157      }
158      return references;
159    }
160
161    private static void GetVariableReferences(Dictionary<string, int> references, ISymbolicExpressionTreeNode node, int currentLag) {
162      if (node.Symbol is LaggedVariable) {
163        var laggedVarNode = node as LaggedVariableTreeNode;
164        IncReferenceCount(references, laggedVarNode.VariableName, currentLag + laggedVarNode.Lag);
165      } else if (node.Symbol is Variable) {
166        var varNode = node as VariableTreeNode;
167        IncReferenceCount(references, varNode.VariableName, currentLag);
168      } else if (node.Symbol is VariableCondition) {
169        var varCondNode = node as VariableConditionTreeNode;
170        IncReferenceCount(references, varCondNode.VariableName, currentLag);
[5733]171        GetVariableReferences(references, node.GetSubtree(0), currentLag);
172        GetVariableReferences(references, node.GetSubtree(1), currentLag);
[5556]173      } else if (node.Symbol is Integral) {
174        var laggedNode = node as LaggedTreeNode;
175        for (int l = laggedNode.Lag; l <= 0; l++) {
[5733]176          GetVariableReferences(references, node.GetSubtree(0), currentLag + l);
[5556]177        }
178      } else if (node.Symbol is Derivative) {
[5924]179        for (int l = -4; l <= 0; l++) {
[5733]180          GetVariableReferences(references, node.GetSubtree(0), currentLag + l);
[5556]181        }
182      } else if (node.Symbol is TimeLag) {
183        var laggedNode = node as LaggedTreeNode;
[5733]184        GetVariableReferences(references, node.GetSubtree(0), currentLag + laggedNode.Lag);
[5922]185      } else {
186        foreach (var subtree in node.Subtrees) {
187          GetVariableReferences(references, subtree, currentLag);
188        }
[5556]189      }
190    }
191
192    private static void IncReferenceCount(Dictionary<string, int> references, string variableName, int timeLag = 0) {
193      string referenceId = variableName +
194        (timeLag == 0 ? "" : timeLag < 0 ? "(t" + timeLag + ")" : "(t+" + timeLag + ")");
195      if (references.ContainsKey(referenceId)) {
196        references[referenceId]++;
197      } else {
198        references[referenceId] = 1;
199      }
200    }
201  }
202}
Note: See TracBrowser for help on using the repository browser.