Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis/3.3/Symbolic/VariableFrequencyAnalyser.cs @ 10186

Last change on this file since 10186 was 5275, checked in by gkronber, 14 years ago

Merged changes from trunk to data analysis exploration branch and added fractional distance metric evaluator. #1142

File size: 6.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Operators;
29using HeuristicLab.Parameters;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
34  [Item("VariableFrequencyAnalyser", "Calculates the accumulated frequencies of variable-symbols over the whole population.")]
35  [StorableClass]
36  public abstract class VariableFrequencyAnalyser : SingleSuccessorOperator {
37    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
38    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
39    private const string VariableFrequenciesParameterName = "VariableFrequencies";
40
41    #region parameter properties
42    public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
43      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
44    }
45    public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
46      get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicExpressionTreeParameterName]; }
47    }
48    public ILookupParameter<DoubleMatrix> VariableFrequenciesParameter {
49      get { return (ILookupParameter<DoubleMatrix>)Parameters[VariableFrequenciesParameterName]; }
50    }
51    #endregion
52    #region properties
53    public DataAnalysisProblemData DataAnalysisProblemData {
54      get { return DataAnalysisProblemDataParameter.ActualValue; }
55    }
56    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTrees {
57      get { return SymbolicExpressionTreeParameter.ActualValue; }
58    }
59    public DoubleMatrix VariableFrequencies {
60      get { return VariableFrequenciesParameter.ActualValue; }
61      set { VariableFrequenciesParameter.ActualValue = value; }
62    }
63    #endregion
64    [StorableConstructor]
65    protected VariableFrequencyAnalyser(bool deserializing) : base(deserializing) { }
66    protected VariableFrequencyAnalyser(VariableFrequencyAnalyser original, Cloner cloner)
67      : base(original, cloner) {
68    }
69    public VariableFrequencyAnalyser()
70      : base() {
71      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees that should be analyzed."));
72      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The problem data on which the for which the symbolic expression tree is a solution."));
73      Parameters.Add(new LookupParameter<DoubleMatrix>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over the whole population."));
74    }
75
76    public override IOperation Apply() {
77      var inputVariables = DataAnalysisProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value);
78      if (VariableFrequencies == null) {
79        VariableFrequencies = new DoubleMatrix(0, 1, inputVariables);
80      }
81      ((IStringConvertibleMatrix)VariableFrequencies).Rows = VariableFrequencies.Rows + 1;
82      int lastRowIndex = VariableFrequencies.Rows - 1;
83      var columnNames = VariableFrequencies.ColumnNames.ToList();
84      foreach (var pair in CalculateVariableFrequencies(SymbolicExpressionTrees, inputVariables)) {
85        int columnIndex = columnNames.IndexOf(pair.Key);
86        VariableFrequencies[lastRowIndex, columnIndex] = pair.Value;
87      }
88      return base.Apply();
89    }
90
91    public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<SymbolicExpressionTree> trees, IEnumerable<string> inputVariables) {
92      Dictionary<string, double> variableReferencesSum = new Dictionary<string, double>();
93      Dictionary<string, double> variableFrequencies = new Dictionary<string, double>();
94      foreach (var inputVariable in inputVariables)
95        variableReferencesSum[inputVariable] = 0.0;
96      foreach (var tree in trees) {
97        var variableReferences = GetVariableReferenceCount(tree, inputVariables);
98        foreach (var pair in variableReferences) {
99          variableReferencesSum[pair.Key] += pair.Value;
100        }
101      }
102      double totalVariableReferences = variableReferencesSum.Values.Sum();
103      foreach (string inputVariable in inputVariables) {
104        double relFreq = variableReferencesSum[inputVariable] / totalVariableReferences;
105        variableFrequencies.Add(inputVariable, relFreq);
106      }
107      return variableFrequencies;
108    }
109
110    private static IEnumerable<KeyValuePair<string, int>> GetVariableReferenceCount(SymbolicExpressionTree tree, IEnumerable<string> inputVariables) {
111      Dictionary<string, int> references = new Dictionary<string, int>();
112      var variableNames = from node in tree.IterateNodesPrefix().OfType<VariableTreeNode>()
113                          select node.VariableName;
114
115      foreach (var variableName in variableNames) {
116        if (!references.ContainsKey(variableName)) {
117          references[variableName] = 1;
118        } else {
119          references[variableName] += 1;
120        }
121      }
122      return references;
123    }
124  }
125}
Note: See TracBrowser for help on using the repository browser.