Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Symbolic/VariableFrequencyAnalyser.cs @ 3904

Last change on this file since 3904 was 3802, checked in by gkronber, 14 years ago

Minor efficiency improvements for variable frequency analyzer. #938

File size: 6.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using System;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Operators;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
30using HeuristicLab.Parameters;
31using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
34  [Item("VariableFrequencyAnalyser", "Calculates the accumulated frequencies of variable-symbols over the whole population.")]
35  [StorableClass]
36  public abstract class VariableFrequencyAnalyser : SingleSuccessorOperator {
37    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
38    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
39    private const string VariableFrequenciesParameterName = "VariableFrequencies";
40
41    #region parameter properties
42    public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
43      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
44    }
45    public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
46      get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicExpressionTreeParameterName]; }
47    }
48    public ILookupParameter<DoubleMatrix> VariableFrequenciesParameter {
49      get { return (ILookupParameter<DoubleMatrix>)Parameters[VariableFrequenciesParameterName]; }
50    }
51    #endregion
52    #region properties
53    public DataAnalysisProblemData DataAnalysisProblemData {
54      get { return DataAnalysisProblemDataParameter.ActualValue; }
55    }
56    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTrees {
57      get { return SymbolicExpressionTreeParameter.ActualValue; }
58    }
59    public DoubleMatrix VariableFrequencies {
60      get { return VariableFrequenciesParameter.ActualValue; }
61      set { VariableFrequenciesParameter.ActualValue = value; }
62    }
63    #endregion
64    public VariableFrequencyAnalyser()
65      : base() {
66      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees that should be analyzed."));
67      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The problem data on which the for which the symbolic expression tree is a solution."));
68      Parameters.Add(new LookupParameter<DoubleMatrix>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over the whole population."));
69    }
70
71    public override IOperation Apply() {
72      var inputVariables = DataAnalysisProblemData.InputVariables.Select(x => x.Value);
73      if (VariableFrequencies == null) {
74        VariableFrequencies = new DoubleMatrix(0, 1, inputVariables);
75      }
76      ((IStringConvertibleMatrix)VariableFrequencies).Rows = VariableFrequencies.Rows + 1;
77      int lastRowIndex = VariableFrequencies.Rows - 1;
78      var columnNames = VariableFrequencies.ColumnNames.ToList();
79      foreach (var pair in CalculateVariableFrequencies(SymbolicExpressionTrees, inputVariables)) {
80        int columnIndex = columnNames.IndexOf(pair.Key);
81        VariableFrequencies[lastRowIndex, columnIndex] = pair.Value;
82      }
83      return null;
84    }
85
86    public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<SymbolicExpressionTree> trees, IEnumerable<string> inputVariables) {
87      int totalVariableReferences = 0;
88      Dictionary<string, double> variableReferencesSum = new Dictionary<string, double>();
89      Dictionary<string, double> variableFrequencies = new Dictionary<string, double>();
90      foreach (var inputVariable in inputVariables)
91        variableReferencesSum[inputVariable] = 0.0;
92      foreach (var tree in trees) {
93        var variableReferences = GetVariableReferenceCount(tree, inputVariables);
94        foreach (var pair in variableReferences) {
95          variableReferencesSum[pair.Key] += pair.Value;
96        }
97        totalVariableReferences += GetTotalVariableReferencesCount(tree);
98      }
99      foreach (string inputVariable in inputVariables) {
100        double relFreq = variableReferencesSum[inputVariable] / (double)totalVariableReferences;
101        variableFrequencies.Add(inputVariable, relFreq);
102      }
103      return variableFrequencies;
104    }
105
106    private static int GetTotalVariableReferencesCount(SymbolicExpressionTree tree) {
107      return tree.IterateNodesPrefix().OfType<VariableTreeNode>().Count();
108    }
109
110    private static IEnumerable<KeyValuePair<string, int>> GetVariableReferenceCount(SymbolicExpressionTree tree, IEnumerable<string> inputVariables) {
111      Dictionary<string, int> references = new Dictionary<string, int>();
112      var groupedFuns = (from node in tree.IterateNodesPrefix().OfType<VariableTreeNode>()
113                         select node.VariableName)
114                         .GroupBy(x => x)
115                         .Select(g => new { Key = g.Key, Count = g.Count() })
116                         .ToArray();
117
118      foreach (var inputVariable in inputVariables) {
119        var matchingFuns = from g in groupedFuns
120                           where g.Key == inputVariable
121                           select g.Count;
122        if (matchingFuns.Count() == 0)
123          references.Add(inputVariable, 0);
124        else {
125          references.Add(inputVariable, matchingFuns.Single());
126        }
127      }
128      return references;
129    }
130  }
131}
Note: See TracBrowser for help on using the repository browser.