Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis/3.3/Symbolic/VariableFrequencyAnalyser.cs @ 5224

Last change on this file since 5224 was 4125, checked in by gkronber, 14 years ago

Made variable frequency analyzer more efficient and removed subtraction of base line (variable impacts are now in the range 0..1 instead of -1 .. 1) #1011

File size: 5.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
27using HeuristicLab.Operators;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
31
32namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
33  [Item("VariableFrequencyAnalyser", "Calculates the accumulated frequencies of variable-symbols over the whole population.")]
34  [StorableClass]
35  public abstract class VariableFrequencyAnalyser : SingleSuccessorOperator {
36    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
37    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
38    private const string VariableFrequenciesParameterName = "VariableFrequencies";
39
40    #region parameter properties
41    public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
42      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
43    }
44    public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
45      get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicExpressionTreeParameterName]; }
46    }
47    public ILookupParameter<DoubleMatrix> VariableFrequenciesParameter {
48      get { return (ILookupParameter<DoubleMatrix>)Parameters[VariableFrequenciesParameterName]; }
49    }
50    #endregion
51    #region properties
52    public DataAnalysisProblemData DataAnalysisProblemData {
53      get { return DataAnalysisProblemDataParameter.ActualValue; }
54    }
55    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTrees {
56      get { return SymbolicExpressionTreeParameter.ActualValue; }
57    }
58    public DoubleMatrix VariableFrequencies {
59      get { return VariableFrequenciesParameter.ActualValue; }
60      set { VariableFrequenciesParameter.ActualValue = value; }
61    }
62    #endregion
63    public VariableFrequencyAnalyser()
64      : base() {
65      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees that should be analyzed."));
66      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The problem data on which the for which the symbolic expression tree is a solution."));
67      Parameters.Add(new LookupParameter<DoubleMatrix>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over the whole population."));
68    }
69
70    public override IOperation Apply() {
71      var inputVariables = DataAnalysisProblemData.InputVariables.Select(x => x.Value);
72      if (VariableFrequencies == null) {
73        VariableFrequencies = new DoubleMatrix(0, 1, inputVariables);
74      }
75      ((IStringConvertibleMatrix)VariableFrequencies).Rows = VariableFrequencies.Rows + 1;
76      int lastRowIndex = VariableFrequencies.Rows - 1;
77      var columnNames = VariableFrequencies.ColumnNames.ToList();
78      foreach (var pair in CalculateVariableFrequencies(SymbolicExpressionTrees, inputVariables)) {
79        int columnIndex = columnNames.IndexOf(pair.Key);
80        VariableFrequencies[lastRowIndex, columnIndex] = pair.Value;
81      }
82      return null;
83    }
84
85    public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<SymbolicExpressionTree> trees, IEnumerable<string> inputVariables) {
86      Dictionary<string, double> variableReferencesSum = new Dictionary<string, double>();
87      Dictionary<string, double> variableFrequencies = new Dictionary<string, double>();
88      foreach (var inputVariable in inputVariables)
89        variableReferencesSum[inputVariable] = 0.0;
90      foreach (var tree in trees) {
91        var variableReferences = GetVariableReferenceCount(tree, inputVariables);
92        foreach (var pair in variableReferences) {
93          variableReferencesSum[pair.Key] += pair.Value;
94        }
95      }
96      double totalVariableReferences = variableReferencesSum.Values.Sum();
97      foreach (string inputVariable in inputVariables) {
98        double relFreq = variableReferencesSum[inputVariable] / totalVariableReferences;
99        variableFrequencies.Add(inputVariable, relFreq);
100      }
101      return variableFrequencies;
102    }
103
104    private static IEnumerable<KeyValuePair<string, int>> GetVariableReferenceCount(SymbolicExpressionTree tree, IEnumerable<string> inputVariables) {
105      Dictionary<string, int> references = new Dictionary<string, int>();
106      var variableNames = from node in tree.IterateNodesPrefix().OfType<VariableTreeNode>()
107                          select node.VariableName;
108
109      foreach (var variableName in variableNames) {
110        if (!references.ContainsKey(variableName)) {
111          references[variableName] = 1;
112        } else {
113          references[variableName] += 1;
114        }
115      }
116      return references;
117    }
118  }
119}
Note: See TracBrowser for help on using the repository browser.