Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisVariableFrequencyAnalyzer.cs @ 5733

Last change on this file since 5733 was 5733, checked in by mkommend, 13 years ago

#1418: Corrected problem interfaces & unified naming of subtrees.

File size: 8.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Operators;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.Analysis;
33
34namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
35  /// <summary>
36  /// Calculates the accumulated frequencies of variable-symbols over all trees in the population.
37  /// </summary>
38  [Item("SymbolicDataAnalysisVariableFrequencyAnalyzer", "Calculates the accumulated frequencies of variable-symbols over all trees in the population.")]
39  [StorableClass]
40  public sealed class SymbolicDataAnalysisVariableFrequencyAnalyzer : SymbolicDataAnalysisAnalyzer {
41    private const string VariableFrequenciesParameterName = "VariableFrequencies";
42    private const string AggregateLaggedVariablesParameterName = "AggregateLaggedVariables";
43
44    #region parameter properties
45    public ILookupParameter<DataTable> VariableFrequenciesParameter {
46      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
47    }
48    public IValueLookupParameter<BoolValue> AggregateLaggedVariablesParameter {
49      get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateLaggedVariablesParameterName]; }
50    }
51    #endregion
52    #region properties
53    public DataTable VariableFrequencies {
54      get { return VariableFrequenciesParameter.ActualValue; }
55      set { VariableFrequenciesParameter.ActualValue = value; }
56    }
57    public BoolValue AggregateLaggedVariables {
58      get { return AggregateLaggedVariablesParameter.ActualValue; }
59    }
60    #endregion
61    [StorableConstructor]
62    private SymbolicDataAnalysisVariableFrequencyAnalyzer(bool deserializing) : base(deserializing) { }
63    private SymbolicDataAnalysisVariableFrequencyAnalyzer(SymbolicDataAnalysisVariableFrequencyAnalyzer original, Cloner cloner)
64      : base(original, cloner) {
65    }
66    public SymbolicDataAnalysisVariableFrequencyAnalyzer()
67      : base() {
68      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over all trees in the population."));
69      Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateLaggedVariablesParameterName, "Switch that determines whether all references to a variable should be aggregated regardless of time-offsets. Turn off to analyze all variable references with different time offsets separately.", new BoolValue(true)));
70    }
71    public override IDeepCloneable Clone(Cloner cloner) {
72      return new SymbolicDataAnalysisVariableFrequencyAnalyzer(this, cloner);
73    }
74
75
76    public override IOperation Apply() {
77      ItemArray<ISymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
78      ResultCollection results = ResultCollection;
79
80      if (VariableFrequencies == null) {
81        VariableFrequencies = new DataTable("Variable frequencies", "Relative frequency of variable references aggregated over the whole population.");
82        VariableFrequencies.VisualProperties.XAxisTitle = "Generation";
83        VariableFrequencies.VisualProperties.YAxisTitle = "Relative Variable Frequency";
84        results.Add(new Result("Variable frequencies", VariableFrequencies));
85      }
86
87      // all rows must have the same number of values so we can just take the first
88      int numberOfValues = VariableFrequencies.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
89
90      foreach (var pair in SymbolicDataAnalysisVariableFrequencyAnalyzer.CalculateVariableFrequencies(expressions, AggregateLaggedVariables.Value)) {
91        if (!VariableFrequencies.Rows.ContainsKey(pair.Key)) {
92          // initialize a new row for the variable and pad with zeros
93          DataRow row = new DataRow(pair.Key, "", Enumerable.Repeat(0.0, numberOfValues));
94          row.VisualProperties.StartIndexZero = true;
95          VariableFrequencies.Rows.Add(row);
96        }
97        VariableFrequencies.Rows[pair.Key].Values.Add(pair.Value);
98      }
99
100      // add a zero for each data row that was not modified in the previous loop
101      foreach (var row in VariableFrequencies.Rows.Where(r => r.Values.Count != numberOfValues + 1))
102        row.Values.Add(0.0);
103
104      return base.Apply();
105    }
106
107    public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<ISymbolicExpressionTree> trees, bool aggregateLaggedVariables = true) {
108      Dictionary<string, double> variableFrequencies = new Dictionary<string, double>();
109      int totalNumberOfSymbols = 0;
110
111      foreach (var tree in trees) {
112        var variableReferences = GetVariableReferences(tree, aggregateLaggedVariables);
113        foreach (var pair in variableReferences) {
114          totalNumberOfSymbols += pair.Value;
115          if (variableFrequencies.ContainsKey(pair.Key)) {
116            variableFrequencies[pair.Key] += pair.Value;
117          } else {
118            variableFrequencies.Add(pair.Key, pair.Value);
119          }
120        }
121      }
122
123      foreach (var pair in variableFrequencies)
124        yield return new KeyValuePair<string, double>(pair.Key, pair.Value / totalNumberOfSymbols);
125    }
126
127    private static IEnumerable<KeyValuePair<string, int>> GetVariableReferences(ISymbolicExpressionTree tree, bool aggregateLaggedVariables = true) {
128      Dictionary<string, int> references = new Dictionary<string, int>();
129      if (aggregateLaggedVariables) {
130        tree.Root.ForEachNodePrefix(node => {
131          if (node.Symbol is Variable) {
132            var varNode = node as VariableTreeNode;
133            IncReferenceCount(references, varNode.VariableName);
134          } else if (node.Symbol is VariableCondition) {
135            var varCondNode = node as VariableConditionTreeNode;
136            IncReferenceCount(references, varCondNode.VariableName);
137          }
138        });
139      } else {
140        GetVariableReferences(references, tree.Root, 0);
141      }
142      return references;
143    }
144
145    private static void GetVariableReferences(Dictionary<string, int> references, ISymbolicExpressionTreeNode node, int currentLag) {
146      if (node.Symbol is LaggedVariable) {
147        var laggedVarNode = node as LaggedVariableTreeNode;
148        IncReferenceCount(references, laggedVarNode.VariableName, currentLag + laggedVarNode.Lag);
149      } else if (node.Symbol is Variable) {
150        var varNode = node as VariableTreeNode;
151        IncReferenceCount(references, varNode.VariableName, currentLag);
152      } else if (node.Symbol is VariableCondition) {
153        var varCondNode = node as VariableConditionTreeNode;
154        IncReferenceCount(references, varCondNode.VariableName, currentLag);
155        GetVariableReferences(references, node.GetSubtree(0), currentLag);
156        GetVariableReferences(references, node.GetSubtree(1), currentLag);
157      } else if (node.Symbol is Integral) {
158        var laggedNode = node as LaggedTreeNode;
159        for (int l = laggedNode.Lag; l <= 0; l++) {
160          GetVariableReferences(references, node.GetSubtree(0), currentLag + l);
161        }
162      } else if (node.Symbol is Derivative) {
163        var laggedNode = node as LaggedTreeNode;
164        for (int l = laggedNode.Lag; l <= 0; l++) {
165          GetVariableReferences(references, node.GetSubtree(0), currentLag + l);
166        }
167      } else if (node.Symbol is TimeLag) {
168        var laggedNode = node as LaggedTreeNode;
169        GetVariableReferences(references, node.GetSubtree(0), currentLag + laggedNode.Lag);
170      }
171    }
172
173    private static void IncReferenceCount(Dictionary<string, int> references, string variableName, int timeLag = 0) {
174      string referenceId = variableName +
175        (timeLag == 0 ? "" : timeLag < 0 ? "(t" + timeLag + ")" : "(t+" + timeLag + ")");
176      if (references.ContainsKey(referenceId)) {
177        references[referenceId]++;
178      } else {
179        references[referenceId] = 1;
180      }
181    }
182  }
183}
Note: See TracBrowser for help on using the repository browser.