Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Symbolic/VariableFrequencyAnalyser.cs @ 3531

Last change on this file since 3531 was 3531, checked in by gkronber, 14 years ago

Added operator for calculation of relative variable frequencies. #938 (Data types and operators for regression problems)

File size: 6.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using System;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Operators;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
30using HeuristicLab.Parameters;
31using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
34  [Item("VariableFrequencyAnalyser", "Calculates the accumulated frequencies of variable-symbols over the whole population.")]
35  [StorableClass]
36  public abstract class VariableFrequencyAnalyser : SingleSuccessorOperator {
37    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
38    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
39    private const string VariableFrequenciesParameterName = "VariableFrequencies";
40
41    #region parameter properties
42    public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
43      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
44    }
45    public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
46      get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicExpressionTreeParameterName]; }
47    }
48    public ILookupParameter<DoubleMatrix> VariableFrequenciesParameter {
49      get { return (ILookupParameter<DoubleMatrix>)Parameters[VariableFrequenciesParameterName]; }
50    }
51    #endregion
52    #region properties
53    public DataAnalysisProblemData DataAnalysisProblemData {
54      get { return DataAnalysisProblemDataParameter.ActualValue; }
55    }
56    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTrees {
57      get { return SymbolicExpressionTreeParameter.ActualValue; }
58    }
59    public DoubleMatrix VariableFrequencies {
60      get { return VariableFrequenciesParameter.ActualValue; }
61      set { VariableFrequenciesParameter.ActualValue = value; }
62    }
63    #endregion
64    public VariableFrequencyAnalyser()
65      : base() {
66      Parameters.Add(new SubScopesLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees that should be analyzed."));
67      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The problem data on which the for which the symbolic expression tree is a solution."));
68      Parameters.Add(new LookupParameter<DoubleMatrix>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over the whole population."));
69    }
70
71    public override IOperation Apply() {
72      var inputVariables = DataAnalysisProblemData.InputVariables.Select(x => x.Value);
73      if (VariableFrequencies == null) {
74        VariableFrequencies = new DoubleMatrix(0, 1, inputVariables);
75      }
76      ((IStringConvertibleMatrix)VariableFrequencies).Rows = VariableFrequencies.Rows + 1;
77      int lastRowIndex = VariableFrequencies.Rows - 1;
78      var columnNames = VariableFrequencies.ColumnNames.ToList();
79      foreach (var pair in CalculateVariableFrequencies(SymbolicExpressionTrees, inputVariables)) {
80        int columnIndex = columnNames.IndexOf(pair.Key);
81        VariableFrequencies[lastRowIndex, columnIndex] = pair.Value;
82      }
83      return null;
84    }
85
86    public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<SymbolicExpressionTree> trees, IEnumerable<string> inputVariables) {
87      int totalVariableReferences = 0;
88      Dictionary<string, double> variableReferencesSum = new Dictionary<string, double>();
89      foreach (var inputVariable in inputVariables)
90        variableReferencesSum[inputVariable] = 0.0;
91      foreach (var tree in trees) {
92        var variableReferences = GetVariableReferenceCount(tree, inputVariables);
93        foreach (var pair in variableReferences) {
94          variableReferencesSum[pair.Key] += pair.Value;
95        }
96        totalVariableReferences += GetTotalVariableReferencesCount(tree);
97      }
98      foreach (string inputVariable in inputVariables) {
99        double relFreq = variableReferencesSum[inputVariable] / (double)totalVariableReferences;
100        yield return new KeyValuePair<string, double>(inputVariable, relFreq);
101      }
102    }
103
104    private static int GetTotalVariableReferencesCount(SymbolicExpressionTree tree) {
105      return tree.IterateNodesPrefix().OfType<VariableTreeNode>().Count();
106    }
107
108    private static IEnumerable<KeyValuePair<string, int>> GetVariableReferenceCount(SymbolicExpressionTree tree, IEnumerable<string> inputVariables) {
109      var groupedFuns = (from node in tree.IterateNodesPrefix().OfType<VariableTreeNode>()
110                         select node.VariableName).GroupBy(x => x);
111
112      foreach (var inputVariable in inputVariables) {
113        var matchingFuns = from g in groupedFuns
114                           where g.Key == inputVariable
115                           select g.Count();
116        if (matchingFuns.Count() == 0) yield return new KeyValuePair<string, int>(inputVariable, 0);
117        else {
118          yield return new KeyValuePair<string, int>(inputVariable, matchingFuns.Single());
119        }
120      }
121    }
122  }
123}
Note: See TracBrowser for help on using the repository browser.