source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisVariableFrequencyAnalyzer.cs @ 6728

Last change on this file since 6728 was 6728, checked in by gkronber, 10 years ago

#1557 implemented parallel evaluation in analyzers using ParallelEnumerable extension methods.

File size: 10.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Analysis;
34
35namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
36  /// <summary>
37  /// Calculates the accumulated frequencies of variable-symbols over all trees in the population.
38  /// </summary>
39  [Item("SymbolicDataAnalysisVariableFrequencyAnalyzer", "Calculates the accumulated frequencies of variable-symbols over all trees in the population.")]
40  [StorableClass]
41  public sealed class SymbolicDataAnalysisVariableFrequencyAnalyzer : SymbolicDataAnalysisAnalyzer {
42    private const string VariableFrequenciesParameterName = "VariableFrequencies";
43    private const string AggregateLaggedVariablesParameterName = "AggregateLaggedVariables";
44    private const string VariableImpactsParameterName = "VariableImpacts";
45
46    #region parameter properties
47    public ILookupParameter<DataTable> VariableFrequenciesParameter {
48      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
49    }
50    public ILookupParameter<DoubleMatrix> VariableImpactsParameter {
51      get { return (ILookupParameter<DoubleMatrix>)Parameters[VariableImpactsParameterName]; }
52    }
53    public IValueLookupParameter<BoolValue> AggregateLaggedVariablesParameter {
54      get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateLaggedVariablesParameterName]; }
55    }
56    #endregion
57    #region properties
58    public BoolValue AggregateLaggedVariables {
59      get { return AggregateLaggedVariablesParameter.ActualValue; }
60      set { AggregateLaggedVariablesParameter.Value = value; }
61    }
62    #endregion
63    [StorableConstructor]
64    private SymbolicDataAnalysisVariableFrequencyAnalyzer(bool deserializing) : base(deserializing) { }
65    private SymbolicDataAnalysisVariableFrequencyAnalyzer(SymbolicDataAnalysisVariableFrequencyAnalyzer original, Cloner cloner)
66      : base(original, cloner) {
67    }
68    public SymbolicDataAnalysisVariableFrequencyAnalyzer()
69      : base() {
70      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over all trees in the population."));
71      Parameters.Add(new LookupParameter<DoubleMatrix>(VariableImpactsParameterName, "The relative variable relevance calculated as the average relative variable frequency over the whole run."));
72      Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateLaggedVariablesParameterName, "Switch that determines whether all references to a variable should be aggregated regardless of time-offsets. Turn off to analyze all variable references with different time offsets separately.", new BoolValue(true)));
73    }
74
75    public override IDeepCloneable Clone(Cloner cloner) {
76      return new SymbolicDataAnalysisVariableFrequencyAnalyzer(this, cloner);
77    }
78
79    public override IOperation Apply() {
80      ItemArray<ISymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
81      ResultCollection results = ResultCollection;
82      DoubleMatrix impacts;
83      DataTable datatable;
84      if (VariableFrequenciesParameter.ActualValue == null) {
85        datatable = new DataTable("Variable frequencies", "Relative frequency of variable references aggregated over the whole population.");
86        datatable.VisualProperties.XAxisTitle = "Generation";
87        datatable.VisualProperties.YAxisTitle = "Relative Variable Frequency";
88        impacts = new DoubleMatrix();
89        VariableFrequenciesParameter.ActualValue = datatable;
90        VariableImpactsParameter.ActualValue = impacts;
91        results.Add(new Result("Variable frequencies", "Relative frequency of variable references aggregated over the whole population.", datatable));
92        results.Add(new Result("Variable impacts", "The relative variable relevance calculated as the average relative variable frequency over the whole run.", impacts));
93      }
94
95      impacts = VariableImpactsParameter.ActualValue;
96      datatable = VariableFrequenciesParameter.ActualValue;
97      // all rows must have the same number of values so we can just take the first
98      int numberOfValues = datatable.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
99
100      foreach (var pair in SymbolicDataAnalysisVariableFrequencyAnalyzer.CalculateVariableFrequencies(expressions, AggregateLaggedVariables.Value)) {
101        if (!datatable.Rows.ContainsKey(pair.Key)) {
102          // initialize a new row for the variable and pad with zeros
103          DataRow row = new DataRow(pair.Key, "", Enumerable.Repeat(0.0, numberOfValues));
104          row.VisualProperties.StartIndexZero = true;
105          datatable.Rows.Add(row);
106        }
107        datatable.Rows[pair.Key].Values.Add(Math.Round(pair.Value, 3));
108      }
109
110      // add a zero for each data row that was not modified in the previous loop
111      foreach (var row in datatable.Rows.Where(r => r.Values.Count != numberOfValues + 1))
112        row.Values.Add(0.0);
113
114      // update variable impacts matrix
115      var orderedImpacts = (from row in datatable.Rows
116                            select new { Name = row.Name, Impact = datatable.Rows[row.Name].Values.Average() })
117                           .OrderByDescending(p => p.Impact)
118                           .ToList();
119      var matrix = (IStringConvertibleMatrix)impacts;
120      matrix.Rows = orderedImpacts.Count;
121      matrix.RowNames = orderedImpacts.Select(x => x.Name);
122      matrix.Columns = 1;
123      matrix.ColumnNames = new string[] { "Relative variable relevance" };
124      int i = 0;
125      foreach (var p in orderedImpacts) {
126        matrix.SetValue(p.Impact.ToString(), i++, 0);
127      }
128
129      return base.Apply();
130    }
131
132    public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<ISymbolicExpressionTree> trees, bool aggregateLaggedVariables = true) {
133
134      var variableFrequencies = trees
135        .AsParallel()
136        .SelectMany(t => GetVariableReferences(t, aggregateLaggedVariables))
137        .GroupBy(pair => pair.Key, pair => pair.Value)
138        .ToDictionary(g => g.Key, g => (double)g.Sum());
139
140      double totalNumberOfSymbols = variableFrequencies.Values.Sum();
141
142      foreach (var pair in variableFrequencies)
143        yield return new KeyValuePair<string, double>(pair.Key, pair.Value / totalNumberOfSymbols);
144    }
145
146    private static IEnumerable<KeyValuePair<string, int>> GetVariableReferences(ISymbolicExpressionTree tree, bool aggregateLaggedVariables = true) {
147      Dictionary<string, int> references = new Dictionary<string, int>();
148      if (aggregateLaggedVariables) {
149        tree.Root.ForEachNodePrefix(node => {
150          if (node.Symbol is Variable) {
151            var varNode = node as VariableTreeNode;
152            IncReferenceCount(references, varNode.VariableName);
153          } else if (node.Symbol is VariableCondition) {
154            var varCondNode = node as VariableConditionTreeNode;
155            IncReferenceCount(references, varCondNode.VariableName);
156          }
157        });
158      } else {
159        GetVariableReferences(references, tree.Root, 0);
160      }
161      return references;
162    }
163
164    private static void GetVariableReferences(Dictionary<string, int> references, ISymbolicExpressionTreeNode node, int currentLag) {
165      if (node.Symbol is LaggedVariable) {
166        var laggedVarNode = node as LaggedVariableTreeNode;
167        IncReferenceCount(references, laggedVarNode.VariableName, currentLag + laggedVarNode.Lag);
168      } else if (node.Symbol is Variable) {
169        var varNode = node as VariableTreeNode;
170        IncReferenceCount(references, varNode.VariableName, currentLag);
171      } else if (node.Symbol is VariableCondition) {
172        var varCondNode = node as VariableConditionTreeNode;
173        IncReferenceCount(references, varCondNode.VariableName, currentLag);
174        GetVariableReferences(references, node.GetSubtree(0), currentLag);
175        GetVariableReferences(references, node.GetSubtree(1), currentLag);
176      } else if (node.Symbol is Integral) {
177        var laggedNode = node as LaggedTreeNode;
178        for (int l = laggedNode.Lag; l <= 0; l++) {
179          GetVariableReferences(references, node.GetSubtree(0), currentLag + l);
180        }
181      } else if (node.Symbol is Derivative) {
182        for (int l = -4; l <= 0; l++) {
183          GetVariableReferences(references, node.GetSubtree(0), currentLag + l);
184        }
185      } else if (node.Symbol is TimeLag) {
186        var laggedNode = node as LaggedTreeNode;
187        GetVariableReferences(references, node.GetSubtree(0), currentLag + laggedNode.Lag);
188      } else {
189        foreach (var subtree in node.Subtrees) {
190          GetVariableReferences(references, subtree, currentLag);
191        }
192      }
193    }
194
195    private static void IncReferenceCount(Dictionary<string, int> references, string variableName, int timeLag = 0) {
196      string referenceId = variableName +
197        (timeLag == 0 ? "" : timeLag < 0 ? "(t" + timeLag + ")" : "(t+" + timeLag + ")");
198      if (references.ContainsKey(referenceId)) {
199        references[referenceId]++;
200      } else {
201        references[referenceId] = 1;
202      }
203    }
204  }
205}
Note: See TracBrowser for help on using the repository browser.