Free cookie consent management tool by TermsFeed Policy Generator

source: stable/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisVariableFrequencyAnalyzer.cs @ 17759

Last change on this file since 17759 was 17181, checked in by swagner, 5 years ago

#2875: Merged r17180 from trunk to stable

File size: 12.5 KB
RevLine 
[5556]1#region License Information
2/* HeuristicLab
[17181]3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[5556]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
[6709]22using System;
[5556]23using System.Collections.Generic;
24using System.Linq;
[6981]25using HeuristicLab.Analysis;
[5556]26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
[17097]32using HEAL.Attic;
[5556]33
34namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
35  /// <summary>
36  /// Calculates the accumulated frequencies of variable-symbols over all trees in the population.
37  /// </summary>
38  [Item("SymbolicDataAnalysisVariableFrequencyAnalyzer", "Calculates the accumulated frequencies of variable-symbols over all trees in the population.")]
[17097]39  [StorableType("C7E9B375-6375-478F-8590-473BA567BA90")]
[5556]40  public sealed class SymbolicDataAnalysisVariableFrequencyAnalyzer : SymbolicDataAnalysisAnalyzer {
41    private const string VariableFrequenciesParameterName = "VariableFrequencies";
42    private const string AggregateLaggedVariablesParameterName = "AggregateLaggedVariables";
[15131]43    private const string AggregateFactorVariablesParameterName = "AggregateFactorVariables";
[5748]44    private const string VariableImpactsParameterName = "VariableImpacts";
[5556]45
46    #region parameter properties
47    public ILookupParameter<DataTable> VariableFrequenciesParameter {
48      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
49    }
[5748]50    public ILookupParameter<DoubleMatrix> VariableImpactsParameter {
51      get { return (ILookupParameter<DoubleMatrix>)Parameters[VariableImpactsParameterName]; }
52    }
[5556]53    public IValueLookupParameter<BoolValue> AggregateLaggedVariablesParameter {
54      get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateLaggedVariablesParameterName]; }
55    }
[15131]56    public IValueLookupParameter<BoolValue> AggregateFactorVariablesParameter {
57      get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateFactorVariablesParameterName]; }
58    }
[5556]59    #endregion
60    #region properties
61    public BoolValue AggregateLaggedVariables {
62      get { return AggregateLaggedVariablesParameter.ActualValue; }
[5748]63      set { AggregateLaggedVariablesParameter.Value = value; }
[5556]64    }
[15131]65    public BoolValue AggregateFactorVariables {
66      get { return AggregateFactorVariablesParameter.ActualValue; }
67      set { AggregateFactorVariablesParameter.Value = value; }
68    }
[5556]69    #endregion
70    [StorableConstructor]
[17097]71    private SymbolicDataAnalysisVariableFrequencyAnalyzer(StorableConstructorFlag _) : base(_) { }
[5556]72    private SymbolicDataAnalysisVariableFrequencyAnalyzer(SymbolicDataAnalysisVariableFrequencyAnalyzer original, Cloner cloner)
73      : base(original, cloner) {
74    }
75    public SymbolicDataAnalysisVariableFrequencyAnalyzer()
76      : base() {
77      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over all trees in the population."));
[5748]78      Parameters.Add(new LookupParameter<DoubleMatrix>(VariableImpactsParameterName, "The relative variable relevance calculated as the average relative variable frequency over the whole run."));
[5556]79      Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateLaggedVariablesParameterName, "Switch that determines whether all references to a variable should be aggregated regardless of time-offsets. Turn off to analyze all variable references with different time offsets separately.", new BoolValue(true)));
[15131]80      Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateFactorVariablesParameterName, "Switch that determines whether all references to factor variables should be aggregated regardless of the value. Turn off to analyze all factor variable references with different values separately.", new BoolValue(true)));
[5556]81    }
[5748]82
[15131]83    [StorableHook(HookType.AfterDeserialization)]
84    private void AfterDeserialization() {
85      // BackwardsCompatibility3.3
86      #region Backwards compatible code, remove with 3.4
87      if (!Parameters.ContainsKey(AggregateFactorVariablesParameterName)) {
88        Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateFactorVariablesParameterName, "Switch that determines whether all references to factor variables should be aggregated regardless of the value. Turn off to analyze all factor variable references with different values separately.", new BoolValue(true)));
89      }
90      #endregion
91    }
92
[5556]93    public override IDeepCloneable Clone(Cloner cloner) {
94      return new SymbolicDataAnalysisVariableFrequencyAnalyzer(this, cloner);
95    }
96
97    public override IOperation Apply() {
98      ItemArray<ISymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
99      ResultCollection results = ResultCollection;
[5748]100      DataTable datatable;
101      if (VariableFrequenciesParameter.ActualValue == null) {
102        datatable = new DataTable("Variable frequencies", "Relative frequency of variable references aggregated over the whole population.");
103        datatable.VisualProperties.XAxisTitle = "Generation";
104        datatable.VisualProperties.YAxisTitle = "Relative Variable Frequency";
105        VariableFrequenciesParameter.ActualValue = datatable;
106        results.Add(new Result("Variable frequencies", "Relative frequency of variable references aggregated over the whole population.", datatable));
[6811]107        results.Add(new Result("Variable impacts", "The relative variable relevance calculated as the average relative variable frequency over the whole run.", new DoubleMatrix()));
[5556]108      }
109
[5748]110      datatable = VariableFrequenciesParameter.ActualValue;
[5556]111      // all rows must have the same number of values so we can just take the first
[5748]112      int numberOfValues = datatable.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
[5556]113
[15131]114      foreach (var pair in CalculateVariableFrequencies(expressions, AggregateLaggedVariables.Value, AggregateFactorVariables.Value)) {
[5748]115        if (!datatable.Rows.ContainsKey(pair.Key)) {
[5556]116          // initialize a new row for the variable and pad with zeros
117          DataRow row = new DataRow(pair.Key, "", Enumerable.Repeat(0.0, numberOfValues));
118          row.VisualProperties.StartIndexZero = true;
[5748]119          datatable.Rows.Add(row);
[5556]120        }
[6709]121        datatable.Rows[pair.Key].Values.Add(Math.Round(pair.Value, 3));
[5556]122      }
123
124      // add a zero for each data row that was not modified in the previous loop
[5748]125      foreach (var row in datatable.Rows.Where(r => r.Values.Count != numberOfValues + 1))
[5556]126        row.Values.Add(0.0);
127
[5748]128      // update variable impacts matrix
129      var orderedImpacts = (from row in datatable.Rows
[8735]130                            select new { Name = row.Name, Impact = Math.Round(datatable.Rows[row.Name].Values.Average(), 3) })
[5748]131                           .OrderByDescending(p => p.Impact)
132                           .ToList();
[6811]133      var impacts = new DoubleMatrix();
134      var matrix = impacts as IStringConvertibleMatrix;
[5748]135      matrix.Rows = orderedImpacts.Count;
136      matrix.RowNames = orderedImpacts.Select(x => x.Name);
137      matrix.Columns = 1;
138      matrix.ColumnNames = new string[] { "Relative variable relevance" };
139      int i = 0;
140      foreach (var p in orderedImpacts) {
141        matrix.SetValue(p.Impact.ToString(), i++, 0);
142      }
143
[6811]144      VariableImpactsParameter.ActualValue = impacts;
145      results["Variable impacts"].Value = impacts;
[5556]146      return base.Apply();
147    }
148
[15131]149    public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<ISymbolicExpressionTree> trees,
150      bool aggregateLaggedVariables = true, bool aggregateFactorVariables = true) {
[5556]151
[6728]152      var variableFrequencies = trees
[15131]153        .SelectMany(t => GetVariableReferences(t, aggregateLaggedVariables, aggregateFactorVariables))
[6728]154        .GroupBy(pair => pair.Key, pair => pair.Value)
155        .ToDictionary(g => g.Key, g => (double)g.Sum());
[5556]156
[6728]157      double totalNumberOfSymbols = variableFrequencies.Values.Sum();
158
[6981]159      foreach (var pair in variableFrequencies.OrderBy(p => p.Key, new NaturalStringComparer()))
[5556]160        yield return new KeyValuePair<string, double>(pair.Key, pair.Value / totalNumberOfSymbols);
161    }
162
[15131]163    private static IEnumerable<KeyValuePair<string, int>> GetVariableReferences(ISymbolicExpressionTree tree,
164      bool aggregateLaggedVariables = true, bool aggregateFactorVariables = true) {
[5556]165      Dictionary<string, int> references = new Dictionary<string, int>();
166      if (aggregateLaggedVariables) {
167        tree.Root.ForEachNodePrefix(node => {
[15131]168          if (node is IVariableTreeNode) {
169            var factorNode = node as BinaryFactorVariableTreeNode;
170            if (factorNode != null && !aggregateFactorVariables) {
171              IncReferenceCount(references, factorNode.VariableName + "=" + factorNode.VariableValue);
172            } else {
173              var varNode = node as IVariableTreeNode;
174              IncReferenceCount(references, varNode.VariableName);
175            }
[5556]176          }
177        });
178      } else {
[15131]179        GetVariableReferences(references, tree.Root, 0, aggregateFactorVariables);
[5556]180      }
181      return references;
182    }
183
[15131]184    private static void GetVariableReferences(Dictionary<string, int> references, ISymbolicExpressionTreeNode node, int currentLag, bool aggregateFactorVariables) {
185      if (node is IVariableTreeNode) {
186        var laggedVarTreeNode = node as LaggedVariableTreeNode;
187        var binFactorVariableTreeNode = node as BinaryFactorVariableTreeNode;
188        var varConditionTreeNode = node as VariableConditionTreeNode;
189        if (laggedVarTreeNode != null) {
190          IncReferenceCount(references, laggedVarTreeNode.VariableName, currentLag + laggedVarTreeNode.Lag);
191        } else if (binFactorVariableTreeNode != null) {
192          if (aggregateFactorVariables) {
193            IncReferenceCount(references, binFactorVariableTreeNode.VariableName, currentLag);
194          } else {
195            IncReferenceCount(references, binFactorVariableTreeNode.VariableName + "=" + binFactorVariableTreeNode.VariableValue, currentLag);
196          }
197        } else if (varConditionTreeNode != null) {
198          IncReferenceCount(references, varConditionTreeNode.VariableName, currentLag);
199          GetVariableReferences(references, node.GetSubtree(0), currentLag, aggregateFactorVariables);
200          GetVariableReferences(references, node.GetSubtree(1), currentLag, aggregateFactorVariables);
201        } else {
202          var varNode = node as IVariableTreeNode;
203          IncReferenceCount(references, varNode.VariableName, currentLag);
204        }
[5556]205      } else if (node.Symbol is Integral) {
206        var laggedNode = node as LaggedTreeNode;
207        for (int l = laggedNode.Lag; l <= 0; l++) {
[15131]208          GetVariableReferences(references, node.GetSubtree(0), currentLag + l, aggregateFactorVariables);
[5556]209        }
210      } else if (node.Symbol is Derivative) {
[5924]211        for (int l = -4; l <= 0; l++) {
[15131]212          GetVariableReferences(references, node.GetSubtree(0), currentLag + l, aggregateFactorVariables);
[5556]213        }
214      } else if (node.Symbol is TimeLag) {
215        var laggedNode = node as LaggedTreeNode;
[15131]216        GetVariableReferences(references, node.GetSubtree(0), currentLag + laggedNode.Lag, aggregateFactorVariables);
[5922]217      } else {
218        foreach (var subtree in node.Subtrees) {
[15131]219          GetVariableReferences(references, subtree, currentLag, aggregateFactorVariables);
[5922]220        }
[5556]221      }
222    }
223
224    private static void IncReferenceCount(Dictionary<string, int> references, string variableName, int timeLag = 0) {
225      string referenceId = variableName +
226        (timeLag == 0 ? "" : timeLag < 0 ? "(t" + timeLag + ")" : "(t+" + timeLag + ")");
227      if (references.ContainsKey(referenceId)) {
228        references[referenceId]++;
229      } else {
230        references[referenceId] = 1;
231      }
232    }
233  }
234}
Note: See TracBrowser for help on using the repository browser.