Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Symbolic/VariableFrequencyAnalyser.cs @ 5649

Last change on this file since 5649 was 5524, checked in by gkronber, 14 years ago

#1325 Implemented MATLAB formatter rule for variable conditions and integrated handling of variable condition symbols into variable frequency analyzer and symbolic classification problem.

File size: 6.3 KB
RevLine 
[2682]1#region License Information
2/* HeuristicLab
[5445]3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[2682]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
[4722]24using HeuristicLab.Common;
[2682]25using HeuristicLab.Core;
26using HeuristicLab.Data;
[4068]27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
[3531]28using HeuristicLab.Operators;
29using HeuristicLab.Parameters;
[4068]30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
[3531]31using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
[2682]32
[3531]33namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
34  [Item("VariableFrequencyAnalyser", "Calculates the accumulated frequencies of variable-symbols over the whole population.")]
35  [StorableClass]
36  public abstract class VariableFrequencyAnalyser : SingleSuccessorOperator {
37    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
38    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
39    private const string VariableFrequenciesParameterName = "VariableFrequencies";
40
41    #region parameter properties
42    public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
43      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
[2682]44    }
[3531]45    public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
46      get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicExpressionTreeParameterName]; }
47    }
48    public ILookupParameter<DoubleMatrix> VariableFrequenciesParameter {
49      get { return (ILookupParameter<DoubleMatrix>)Parameters[VariableFrequenciesParameterName]; }
50    }
51    #endregion
52    #region properties
53    public DataAnalysisProblemData DataAnalysisProblemData {
54      get { return DataAnalysisProblemDataParameter.ActualValue; }
55    }
56    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTrees {
57      get { return SymbolicExpressionTreeParameter.ActualValue; }
58    }
59    public DoubleMatrix VariableFrequencies {
60      get { return VariableFrequenciesParameter.ActualValue; }
61      set { VariableFrequenciesParameter.ActualValue = value; }
62    }
63    #endregion
[4722]64    [StorableConstructor]
65    protected VariableFrequencyAnalyser(bool deserializing) : base(deserializing) { }
66    protected VariableFrequencyAnalyser(VariableFrequencyAnalyser original, Cloner cloner)
67      : base(original, cloner) {
68    }
[2682]69    public VariableFrequencyAnalyser()
70      : base() {
[3659]71      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees that should be analyzed."));
[3531]72      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The problem data on which the for which the symbolic expression tree is a solution."));
73      Parameters.Add(new LookupParameter<DoubleMatrix>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over the whole population."));
[2682]74    }
75
[3531]76    public override IOperation Apply() {
[4945]77      var inputVariables = DataAnalysisProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value);
[3531]78      if (VariableFrequencies == null) {
79        VariableFrequencies = new DoubleMatrix(0, 1, inputVariables);
[2682]80      }
[3531]81      ((IStringConvertibleMatrix)VariableFrequencies).Rows = VariableFrequencies.Rows + 1;
82      int lastRowIndex = VariableFrequencies.Rows - 1;
83      var columnNames = VariableFrequencies.ColumnNames.ToList();
84      foreach (var pair in CalculateVariableFrequencies(SymbolicExpressionTrees, inputVariables)) {
85        int columnIndex = columnNames.IndexOf(pair.Key);
86        VariableFrequencies[lastRowIndex, columnIndex] = pair.Value;
87      }
[4722]88      return base.Apply();
[3531]89    }
90
91    public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<SymbolicExpressionTree> trees, IEnumerable<string> inputVariables) {
92      Dictionary<string, double> variableReferencesSum = new Dictionary<string, double>();
[3802]93      Dictionary<string, double> variableFrequencies = new Dictionary<string, double>();
[3531]94      foreach (var inputVariable in inputVariables)
95        variableReferencesSum[inputVariable] = 0.0;
96      foreach (var tree in trees) {
97        var variableReferences = GetVariableReferenceCount(tree, inputVariables);
98        foreach (var pair in variableReferences) {
99          variableReferencesSum[pair.Key] += pair.Value;
[2682]100        }
101      }
[4125]102      double totalVariableReferences = variableReferencesSum.Values.Sum();
[3531]103      foreach (string inputVariable in inputVariables) {
[4125]104        double relFreq = variableReferencesSum[inputVariable] / totalVariableReferences;
[3802]105        variableFrequencies.Add(inputVariable, relFreq);
[2682]106      }
[3802]107      return variableFrequencies;
[2682]108    }
109
[3531]110    private static IEnumerable<KeyValuePair<string, int>> GetVariableReferenceCount(SymbolicExpressionTree tree, IEnumerable<string> inputVariables) {
[3802]111      Dictionary<string, int> references = new Dictionary<string, int>();
[4125]112      var variableNames = from node in tree.IterateNodesPrefix().OfType<VariableTreeNode>()
113                          select node.VariableName;
[5524]114      var variableNamesInConditions = from node in tree.IterateNodesPrefix().OfType<VariableConditionTreeNode>()
115                                      select node.VariableName;
[2682]116
[5524]117      foreach (var variableName in variableNames.Concat(variableNamesInConditions)) {
[4125]118        if (!references.ContainsKey(variableName)) {
119          references[variableName] = 1;
120        } else {
121          references[variableName] += 1;
[2682]122        }
123      }
[3802]124      return references;
[2682]125    }
126  }
127}
Note: See TracBrowser for help on using the repository browser.