Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis SolutionEnsembles/HeuristicLab.Encodings.SymbolicExpressionTreeEncoding/3.3/Analyzers/SymbolicExpressionSymbolFrequencyAnalyzer.cs @ 5867

Last change on this file since 5867 was 5809, checked in by mkommend, 14 years ago

#1418: Reintegrated branch into trunk.

File size: 5.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Interfaces;
28using HeuristicLab.Operators;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.PluginInfrastructure;
33
34namespace HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Analyzers {
35  /// <summary>
36  /// An operator that tracks the frequencies of distinc symbols.
37  /// </summary>
38  [Item("SymbolicExpressionSymbolFrequencyAnalyzer", "An operator that tracks frequencies of symbols.")]
39  [StorableClass]
40  [NonDiscoverableType]
41  public class SymbolicExpressionSymbolFrequencyAnalyzer : SingleSuccessorOperator, ISymbolicExpressionTreeAnalyzer {
42    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
43    private const string ResultsParameterName = "Results";
44    private const string SymbolFrequenciesParameterName = "SymbolFrequencies";
45
46    #region parameter properties
47    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
48      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
49    }
50    public ILookupParameter<DataTable> SymbolFrequenciesParameter {
51      get { return (ILookupParameter<DataTable>)Parameters[SymbolFrequenciesParameterName]; }
52    }
53    public ILookupParameter<ResultCollection> ResultsParameter {
54      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
55    }
56    #endregion
57    #region properties
58    public DataTable SymbolFrequencies {
59      get { return SymbolFrequenciesParameter.ActualValue; }
60      set { SymbolFrequenciesParameter.ActualValue = value; }
61    }
62    #endregion
63
64    [StorableConstructor]
65    protected SymbolicExpressionSymbolFrequencyAnalyzer(bool deserializing) : base(deserializing) { }
66    protected SymbolicExpressionSymbolFrequencyAnalyzer(SymbolicExpressionSymbolFrequencyAnalyzer original, Cloner cloner) : base(original, cloner) { }
67    public SymbolicExpressionSymbolFrequencyAnalyzer()
68      : base() {
69      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
70      Parameters.Add(new ValueLookupParameter<DataTable>(SymbolFrequenciesParameterName, "The data table to store the symbol frequencies."));
71      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
72    }
73    public override IDeepCloneable Clone(Cloner cloner) {
74      return new SymbolicExpressionSymbolFrequencyAnalyzer(this, cloner);
75    }
76
77    public override IOperation Apply() {
78      ItemArray<SymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
79      ResultCollection results = ResultsParameter.ActualValue;
80
81      if (SymbolFrequencies == null) {
82        SymbolFrequencies = new DataTable("Symbol frequencies", "Relative frequency of symbols aggregated over the whole population.");
83        SymbolFrequencies.VisualProperties.YAxisTitle = "Relative Symbol Frequency";
84        results.Add(new Result("Symbol frequencies", SymbolFrequencies));
85      }
86
87      // all rows must have the same number of values so we can just take the first
88      int numberOfValues = SymbolFrequencies.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
89
90      foreach (var pair in SymbolicExpressionSymbolFrequencyAnalyzer.CalculateSymbolFrequencies(expressions)) {
91        if (!SymbolFrequencies.Rows.ContainsKey(pair.Key)) {
92          // initialize a new row for the symbol and pad with zeros
93          DataRow row = new DataRow(pair.Key, "", Enumerable.Repeat(0.0, numberOfValues));
94          row.VisualProperties.StartIndexZero = true;
95          SymbolFrequencies.Rows.Add(row);
96        }
97        SymbolFrequencies.Rows[pair.Key].Values.Add(pair.Value);
98      }
99
100      // add a zero for each data row that was not modified in the previous loop
101      foreach (var row in SymbolFrequencies.Rows.Where(r => r.Values.Count != numberOfValues + 1))
102        row.Values.Add(0.0);
103
104      return base.Apply();
105    }
106
107    public static IEnumerable<KeyValuePair<string, double>> CalculateSymbolFrequencies(IEnumerable<SymbolicExpressionTree> trees) {
108      Dictionary<string, double> symbolFrequencies = new Dictionary<string, double>();
109      int totalNumberOfSymbols = 0;
110
111      foreach (var tree in trees) {
112        foreach (var node in tree.IterateNodesPrefix()) {
113          if (symbolFrequencies.ContainsKey(node.Symbol.Name)) symbolFrequencies[node.Symbol.Name] += 1;
114          else symbolFrequencies.Add(node.Symbol.Name, 1);
115          totalNumberOfSymbols++;
116        }
117      }
118
119      foreach (var pair in symbolFrequencies)
120        yield return new KeyValuePair<string, double>(pair.Key, pair.Value / totalNumberOfSymbols);
121    }
122  }
123}
Note: See TracBrowser for help on using the repository browser.