Free cookie consent management tool by TermsFeed Policy Generator

source: branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisVariableFrequencyAnalyzer.cs @ 14232

Last change on this file since 14232 was 14232, checked in by gkronber, 8 years ago

created a feature branch for #2650 (support for categorical variables in symb reg) with a first set of changes

work in progress...

File size: 12.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Globalization;
25using System.Linq;
26using HeuristicLab.Analysis;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Data;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Optimization;
32using HeuristicLab.Parameters;
33using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
34
35namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
36  /// <summary>
37  /// Calculates the accumulated frequencies of variable-symbols over all trees in the population.
38  /// </summary>
39  [Item("SymbolicDataAnalysisVariableFrequencyAnalyzer", "Calculates the accumulated frequencies of variable-symbols over all trees in the population.")]
40  [StorableClass]
41  public sealed class SymbolicDataAnalysisVariableFrequencyAnalyzer : SymbolicDataAnalysisAnalyzer {
42    private const string VariableFrequenciesParameterName = "VariableFrequencies";
43    private const string AggregateLaggedVariablesParameterName = "AggregateLaggedVariables";
44    private const string AggregateFactorVariablesParameterName = "AggregateFactorVariables";
45    private const string VariableImpactsParameterName = "VariableImpacts";
46
47    #region parameter properties
48    public ILookupParameter<DataTable> VariableFrequenciesParameter {
49      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
50    }
51    public ILookupParameter<DoubleMatrix> VariableImpactsParameter {
52      get { return (ILookupParameter<DoubleMatrix>)Parameters[VariableImpactsParameterName]; }
53    }
54    public IValueLookupParameter<BoolValue> AggregateLaggedVariablesParameter {
55      get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateLaggedVariablesParameterName]; }
56    }
57    public IValueLookupParameter<BoolValue> AggregateFactorVariablesParameter {
58      get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateFactorVariablesParameterName]; }
59    }
60    #endregion
61    #region properties
62    public BoolValue AggregateLaggedVariables {
63      get { return AggregateLaggedVariablesParameter.ActualValue; }
64      set { AggregateLaggedVariablesParameter.Value = value; }
65    }
66    public BoolValue AggregateFactorVariables {
67      get { return AggregateFactorVariablesParameter.ActualValue; }
68      set { AggregateFactorVariablesParameter.Value = value; }
69    }
70    #endregion
71    [StorableConstructor]
72    private SymbolicDataAnalysisVariableFrequencyAnalyzer(bool deserializing) : base(deserializing) { }
73    private SymbolicDataAnalysisVariableFrequencyAnalyzer(SymbolicDataAnalysisVariableFrequencyAnalyzer original, Cloner cloner)
74      : base(original, cloner) {
75    }
76    public SymbolicDataAnalysisVariableFrequencyAnalyzer()
77      : base() {
78      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over all trees in the population."));
79      Parameters.Add(new LookupParameter<DoubleMatrix>(VariableImpactsParameterName, "The relative variable relevance calculated as the average relative variable frequency over the whole run."));
80      Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateLaggedVariablesParameterName, "Switch that determines whether all references to a variable should be aggregated regardless of time-offsets. Turn off to analyze all variable references with different time offsets separately.", new BoolValue(true)));
81      Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateFactorVariablesParameterName, "Switch that determines whether all references to factor variables should be aggregated regardless of the value. Turn off to analyze all factor variable references with different values separately.", new BoolValue(true)));
82    }
83
84    [StorableHook(HookType.AfterDeserialization)]
85    private void AfterDeserialization() {
86      // BackwardsCompatibility3.3
87      #region Backwards compatible code, remove with 3.4
88      if (!Parameters.ContainsKey(AggregateFactorVariablesParameterName)) {
89        Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateFactorVariablesParameterName, "Switch that determines whether all references to factor variables should be aggregated regardless of the value. Turn off to analyze all factor variable references with different values separately.", new BoolValue(true)));
90      }
91      #endregion
92    }
93
94    public override IDeepCloneable Clone(Cloner cloner) {
95      return new SymbolicDataAnalysisVariableFrequencyAnalyzer(this, cloner);
96    }
97
98    public override IOperation Apply() {
99      ItemArray<ISymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
100      ResultCollection results = ResultCollection;
101      DataTable datatable;
102      if (VariableFrequenciesParameter.ActualValue == null) {
103        datatable = new DataTable("Variable frequencies", "Relative frequency of variable references aggregated over the whole population.");
104        datatable.VisualProperties.XAxisTitle = "Generation";
105        datatable.VisualProperties.YAxisTitle = "Relative Variable Frequency";
106        VariableFrequenciesParameter.ActualValue = datatable;
107        results.Add(new Result("Variable frequencies", "Relative frequency of variable references aggregated over the whole population.", datatable));
108        results.Add(new Result("Variable impacts", "The relative variable relevance calculated as the average relative variable frequency over the whole run.", new DoubleMatrix()));
109      }
110
111      datatable = VariableFrequenciesParameter.ActualValue;
112      // all rows must have the same number of values so we can just take the first
113      int numberOfValues = datatable.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
114
115      foreach (var pair in CalculateVariableFrequencies(expressions, AggregateLaggedVariables.Value, AggregateFactorVariables.Value)) {
116        if (!datatable.Rows.ContainsKey(pair.Key)) {
117          // initialize a new row for the variable and pad with zeros
118          DataRow row = new DataRow(pair.Key, "", Enumerable.Repeat(0.0, numberOfValues));
119          row.VisualProperties.StartIndexZero = true;
120          datatable.Rows.Add(row);
121        }
122        datatable.Rows[pair.Key].Values.Add(Math.Round(pair.Value, 3));
123      }
124
125      // add a zero for each data row that was not modified in the previous loop
126      foreach (var row in datatable.Rows.Where(r => r.Values.Count != numberOfValues + 1))
127        row.Values.Add(0.0);
128
129      // update variable impacts matrix
130      var orderedImpacts = (from row in datatable.Rows
131                            select new { Name = row.Name, Impact = Math.Round(datatable.Rows[row.Name].Values.Average(), 3) })
132                           .OrderByDescending(p => p.Impact)
133                           .ToList();
134      var impacts = new DoubleMatrix();
135      var matrix = impacts as IStringConvertibleMatrix;
136      matrix.Rows = orderedImpacts.Count;
137      matrix.RowNames = orderedImpacts.Select(x => x.Name);
138      matrix.Columns = 1;
139      matrix.ColumnNames = new string[] { "Relative variable relevance" };
140      int i = 0;
141      foreach (var p in orderedImpacts) {
142        matrix.SetValue(p.Impact.ToString(), i++, 0);
143      }
144
145      VariableImpactsParameter.ActualValue = impacts;
146      results["Variable impacts"].Value = impacts;
147      return base.Apply();
148    }
149
150    public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<ISymbolicExpressionTree> trees,
151      bool aggregateLaggedVariables = true, bool aggregateFactorVariables = true) {
152
153      var variableFrequencies = trees
154        .SelectMany(t => GetVariableReferences(t, aggregateLaggedVariables, aggregateFactorVariables))
155        .GroupBy(pair => pair.Key, pair => pair.Value)
156        .ToDictionary(g => g.Key, g => (double)g.Sum());
157
158      double totalNumberOfSymbols = variableFrequencies.Values.Sum();
159
160      foreach (var pair in variableFrequencies.OrderBy(p => p.Key, new NaturalStringComparer()))
161        yield return new KeyValuePair<string, double>(pair.Key, pair.Value / totalNumberOfSymbols);
162    }
163
164    private static IEnumerable<KeyValuePair<string, int>> GetVariableReferences(ISymbolicExpressionTree tree,
165      bool aggregateLaggedVariables = true, bool aggregateFactorVariables = true) {
166      Dictionary<string, int> references = new Dictionary<string, int>();
167      if (aggregateLaggedVariables) {
168        tree.Root.ForEachNodePrefix(node => {
169          if (node.Symbol is Variable) {
170            var varNode = node as VariableTreeNode;
171            IncReferenceCount(references, varNode.VariableName);
172          } else if (node.Symbol is VariableCondition) {
173            var varCondNode = node as VariableConditionTreeNode;
174            IncReferenceCount(references, varCondNode.VariableName);
175          } else if (node.Symbol is FactorVariable) {
176            var factorNode = node as FactorVariableTreeNode;
177            if (aggregateFactorVariables) {
178              IncReferenceCount(references, factorNode.VariableName);
179            } else {
180              IncReferenceCount(references, factorNode.ToString());
181            }
182          }
183        });
184      } else {
185        GetVariableReferences(references, tree.Root, 0, aggregateFactorVariables);
186      }
187      return references;
188    }
189
190    private static void GetVariableReferences(Dictionary<string, int> references, ISymbolicExpressionTreeNode node, int currentLag, bool aggregateFactorVariables) {
191      if (node.Symbol is LaggedVariable) {
192        var laggedVarNode = node as LaggedVariableTreeNode;
193        IncReferenceCount(references, laggedVarNode.VariableName, currentLag + laggedVarNode.Lag);
194      } else if (node.Symbol is Variable) {
195        var varNode = node as VariableTreeNode;
196        IncReferenceCount(references, varNode.VariableName, currentLag);
197      } else if (node.Symbol is FactorVariable) {
198        var factorNode = node as FactorVariableTreeNode;
199        if (aggregateFactorVariables) {
200          IncReferenceCount(references, factorNode.VariableName, currentLag);
201        } else {
202          IncReferenceCount(references, factorNode.ToString(), currentLag);
203        }
204      } else if (node.Symbol is VariableCondition) {
205        var varCondNode = node as VariableConditionTreeNode;
206        IncReferenceCount(references, varCondNode.VariableName, currentLag);
207        GetVariableReferences(references, node.GetSubtree(0), currentLag, aggregateFactorVariables);
208        GetVariableReferences(references, node.GetSubtree(1), currentLag, aggregateFactorVariables);
209      } else if (node.Symbol is Integral) {
210        var laggedNode = node as LaggedTreeNode;
211        for (int l = laggedNode.Lag; l <= 0; l++) {
212          GetVariableReferences(references, node.GetSubtree(0), currentLag + l, aggregateFactorVariables);
213        }
214      } else if (node.Symbol is Derivative) {
215        for (int l = -4; l <= 0; l++) {
216          GetVariableReferences(references, node.GetSubtree(0), currentLag + l, aggregateFactorVariables);
217        }
218      } else if (node.Symbol is TimeLag) {
219        var laggedNode = node as LaggedTreeNode;
220        GetVariableReferences(references, node.GetSubtree(0), currentLag + laggedNode.Lag, aggregateFactorVariables);
221      } else {
222        foreach (var subtree in node.Subtrees) {
223          GetVariableReferences(references, subtree, currentLag, aggregateFactorVariables);
224        }
225      }
226    }
227
228    private static void IncReferenceCount(Dictionary<string, int> references, string variableName, int timeLag = 0) {
229      string referenceId = variableName +
230        (timeLag == 0 ? "" : timeLag < 0 ? "(t" + timeLag + ")" : "(t+" + timeLag + ")");
231      if (references.ContainsKey(referenceId)) {
232        references[referenceId]++;
233      } else {
234        references[referenceId] = 1;
235      }
236    }
237  }
238}
Note: See TracBrowser for help on using the repository browser.