Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis.Extensions/HeuristicLab.Problems.DataAnalysis/3.3/Symbolic/VariableFrequencyAnalyser.cs @ 5119

Last change on this file since 5119 was 4858, checked in by swinkler, 14 years ago

Removed obsolete project for symbolic expression tree formatters; (re-)added DataAnalysis project in branch DataAnalysis.Extensions. (#1270)

File size: 6.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Operators;
29using HeuristicLab.Parameters;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
34  [Item("VariableFrequencyAnalyser", "Calculates the accumulated frequencies of variable-symbols over the whole population.")]
35  [StorableClass]
36  public abstract class VariableFrequencyAnalyser : SingleSuccessorOperator {
37    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
38    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
39    private const string VariableFrequenciesParameterName = "VariableFrequencies";
40
41    #region parameter properties
42    public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
43      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
44    }
45    public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
46      get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicExpressionTreeParameterName]; }
47    }
48    public ILookupParameter<DoubleMatrix> VariableFrequenciesParameter {
49      get { return (ILookupParameter<DoubleMatrix>)Parameters[VariableFrequenciesParameterName]; }
50    }
51    #endregion
52    #region properties
53    public DataAnalysisProblemData DataAnalysisProblemData {
54      get { return DataAnalysisProblemDataParameter.ActualValue; }
55    }
56    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTrees {
57      get { return SymbolicExpressionTreeParameter.ActualValue; }
58    }
59    public DoubleMatrix VariableFrequencies {
60      get { return VariableFrequenciesParameter.ActualValue; }
61      set { VariableFrequenciesParameter.ActualValue = value; }
62    }
63    #endregion
64    [StorableConstructor]
65    protected VariableFrequencyAnalyser(bool deserializing) : base(deserializing) { }
66    protected VariableFrequencyAnalyser(VariableFrequencyAnalyser original, Cloner cloner)
67      : base(original, cloner) {
68    }
69    public VariableFrequencyAnalyser()
70      : base() {
71      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees that should be analyzed."));
72      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The problem data on which the for which the symbolic expression tree is a solution."));
73      Parameters.Add(new LookupParameter<DoubleMatrix>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over the whole population."));
74    }
75
76    public override IOperation Apply() {
77      var inputVariables = DataAnalysisProblemData.InputVariables.Select(x => x.Value);
78      if (VariableFrequencies == null) {
79        VariableFrequencies = new DoubleMatrix(0, 1, inputVariables);
80      }
81      ((IStringConvertibleMatrix)VariableFrequencies).Rows = VariableFrequencies.Rows + 1;
82      int lastRowIndex = VariableFrequencies.Rows - 1;
83      var columnNames = VariableFrequencies.ColumnNames.ToList();
84      foreach (var pair in CalculateVariableFrequencies(SymbolicExpressionTrees, inputVariables)) {
85        int columnIndex = columnNames.IndexOf(pair.Key);
86        VariableFrequencies[lastRowIndex, columnIndex] = pair.Value;
87      }
88      return base.Apply();
89    }
90
91    public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<SymbolicExpressionTree> trees, IEnumerable<string> inputVariables) {
92      Dictionary<string, double> variableReferencesSum = new Dictionary<string, double>();
93      Dictionary<string, double> variableFrequencies = new Dictionary<string, double>();
94      foreach (var inputVariable in inputVariables)
95        variableReferencesSum[inputVariable] = 0.0;
96      foreach (var tree in trees) {
97        var variableReferences = GetVariableReferenceCount(tree, inputVariables);
98        foreach (var pair in variableReferences) {
99          variableReferencesSum[pair.Key] += pair.Value;
100        }
101      }
102      double totalVariableReferences = variableReferencesSum.Values.Sum();
103      foreach (string inputVariable in inputVariables) {
104        double relFreq = variableReferencesSum[inputVariable] / totalVariableReferences;
105        variableFrequencies.Add(inputVariable, relFreq);
106      }
107      return variableFrequencies;
108    }
109
110    private static IEnumerable<KeyValuePair<string, int>> GetVariableReferenceCount(SymbolicExpressionTree tree, IEnumerable<string> inputVariables) {
111      Dictionary<string, int> references = new Dictionary<string, int>();
112      var variableNames = from node in tree.IterateNodesPrefix().OfType<VariableTreeNode>()
113                          select node.VariableName;
114
115      foreach (var variableName in variableNames) {
116        if (!references.ContainsKey(variableName)) {
117          references[variableName] = 1;
118        } else {
119          references[variableName] += 1;
120        }
121      }
122      return references;
123    }
124  }
125}
Note: See TracBrowser for help on using the repository browser.