source: trunk/sources/HeuristicLab.Encodings.SymbolicExpressionTreeEncoding/3.4/Analyzers/SymbolicExpressionSymbolFrequencyAnalyzer.cs @ 5971

Last change on this file since 5971 was 5971, checked in by gkronber, 8 years ago

#1418 added a parameter to the symbol frequency analyzer in symbolic expression tree encoding which indicates if the frequencies of symbols with the same name but different number of subtrees should be aggregated.

File size: 6.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Operators;
28using HeuristicLab.Optimization;
29using HeuristicLab.Parameters;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.Data;
32
33namespace HeuristicLab.Encodings.SymbolicExpressionTreeEncoding {
34  /// <summary>
35  /// An operator that tracks the frequencies of distinct symbols in symbolic expression trees.
36  /// </summary>
37  [Item("SymbolicExpressionSymbolFrequencyAnalyzer", "An operator that tracks frequencies of symbols in symbolic expression trees.")]
38  [StorableClass]
39  public class SymbolicExpressionSymbolFrequencyAnalyzer : SingleSuccessorOperator, ISymbolicExpressionTreeAnalyzer {
40    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
41    private const string ResultsParameterName = "Results";
42    private const string SymbolFrequenciesParameterName = "SymbolFrequencies";
43    private const string AggregateSymbolsWithDifferentSubtreeCountParameterName = "AggregateSymbolsWithDifferentSubtreeCount";
44
45    #region parameter properties
46    public IScopeTreeLookupParameter<ISymbolicExpressionTree> SymbolicExpressionTreeParameter {
47      get { return (IScopeTreeLookupParameter<ISymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
48    }
49    public ILookupParameter<DataTable> SymbolFrequenciesParameter {
50      get { return (ILookupParameter<DataTable>)Parameters[SymbolFrequenciesParameterName]; }
51    }
52    public ILookupParameter<ResultCollection> ResultsParameter {
53      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
54    }
55    public IValueParameter<BoolValue> AggregateSymbolsWithDifferentSubtreeCountParameter {
56      get { return (IValueParameter<BoolValue>)Parameters[AggregateSymbolsWithDifferentSubtreeCountParameterName]; }
57    }
58    #endregion
59    #region properties
60    public BoolValue AggregrateSymbolsWithDifferentSubtreeCount {
61      get { return AggregateSymbolsWithDifferentSubtreeCountParameter.Value; }
62      set { AggregateSymbolsWithDifferentSubtreeCountParameter.Value = value; }
63    }
64    #endregion
65
66    [StorableConstructor]
67    protected SymbolicExpressionSymbolFrequencyAnalyzer(bool deserializing) : base(deserializing) { }
68    protected SymbolicExpressionSymbolFrequencyAnalyzer(SymbolicExpressionSymbolFrequencyAnalyzer original, Cloner cloner) : base(original, cloner) { }
69    public SymbolicExpressionSymbolFrequencyAnalyzer()
70      : base() {
71      Parameters.Add(new ScopeTreeLookupParameter<ISymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
72      Parameters.Add(new LookupParameter<DataTable>(SymbolFrequenciesParameterName, "The data table to store the symbol frequencies."));
73      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the symbol frequencies should be stored."));
74      Parameters.Add(new ValueParameter<BoolValue>(AggregateSymbolsWithDifferentSubtreeCountParameterName, "Flag that indicates if the frequencies of symbols with the same name but different number of sub-trees should be aggregated.", new BoolValue(true)));
75    }
76    public override IDeepCloneable Clone(Cloner cloner) {
77      return new SymbolicExpressionSymbolFrequencyAnalyzer(this, cloner);
78    }
79
80    public override IOperation Apply() {
81      ItemArray<ISymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
82      ResultCollection results = ResultsParameter.ActualValue;
83      DataTable symbolFrequencies = SymbolFrequenciesParameter.ActualValue;
84      if (symbolFrequencies == null) {
85        symbolFrequencies = new DataTable("Symbol frequencies", "Relative frequency of symbols aggregated over the whole population.");
86        symbolFrequencies.VisualProperties.YAxisTitle = "Relative Symbol Frequency";
87
88        SymbolFrequenciesParameter.ActualValue = symbolFrequencies;
89        results.Add(new Result("Symbol frequencies", symbolFrequencies));
90      }
91
92      // all rows must have the same number of values so we can just take the first
93      int numberOfValues = symbolFrequencies.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
94
95      foreach (var pair in SymbolicExpressionSymbolFrequencyAnalyzer.CalculateSymbolFrequencies(expressions, AggregrateSymbolsWithDifferentSubtreeCount.Value)) {
96        if (!symbolFrequencies.Rows.ContainsKey(pair.Key)) {
97          // initialize a new row for the symbol and pad with zeros
98          DataRow row = new DataRow(pair.Key, "", Enumerable.Repeat(0.0, numberOfValues));
99          row.VisualProperties.StartIndexZero = true;
100          symbolFrequencies.Rows.Add(row);
101        }
102        symbolFrequencies.Rows[pair.Key].Values.Add(pair.Value);
103      }
104
105      // add a zero for each data row that was not modified in the previous loop
106      foreach (var row in symbolFrequencies.Rows.Where(r => r.Values.Count != numberOfValues + 1))
107        row.Values.Add(0.0);
108
109      return base.Apply();
110    }
111
112    public static IEnumerable<KeyValuePair<string, double>> CalculateSymbolFrequencies(IEnumerable<ISymbolicExpressionTree> trees, bool aggregateDifferentNumberOfSubtrees = true) {
113      Dictionary<string, double> symbolFrequencies = new Dictionary<string, double>();
114      int totalNumberOfSymbols = 0;
115
116      foreach (var tree in trees) {
117        foreach (var node in tree.IterateNodesPrefix()) {
118          string symbolName;
119          if (aggregateDifferentNumberOfSubtrees) symbolName = node.Symbol.Name;
120          else symbolName = node.Symbol.Name + "-" + node.SubtreesCount;
121          if (symbolFrequencies.ContainsKey(symbolName)) symbolFrequencies[symbolName] += 1;
122          else symbolFrequencies.Add(symbolName, 1);
123          totalNumberOfSymbols++;
124        }
125      }
126
127      foreach (var pair in symbolFrequencies)
128        yield return new KeyValuePair<string, double>(pair.Key, pair.Value / totalNumberOfSymbols);
129    }
130  }
131}
Note: See TracBrowser for help on using the repository browser.