Free cookie consent management tool by TermsFeed Policy Generator

source: branches/PersistenceOverhaul/HeuristicLab.Encodings.SymbolicExpressionTreeEncoding/3.4/Analyzers/SymbolicExpressionSymbolFrequencyAnalyzer.cs @ 15428

Last change on this file since 15428 was 14711, checked in by gkronber, 8 years ago

#2520

  • renamed StorableClass -> StorableType
  • changed persistence to use GUIDs instead of type names
File size: 7.5 KB
RevLine 
[13368]1#region License Information
[5386]2/* HeuristicLab
[12012]3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[5386]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
[6709]22using System;
[5386]23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Analysis;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
[5983]28using HeuristicLab.Data;
[5386]29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33
[5499]34namespace HeuristicLab.Encodings.SymbolicExpressionTreeEncoding {
[5386]35  /// <summary>
[5499]36  /// An operator that tracks the frequencies of distinct symbols in symbolic expression trees.
[5386]37  /// </summary>
[5499]38  [Item("SymbolicExpressionSymbolFrequencyAnalyzer", "An operator that tracks frequencies of symbols in symbolic expression trees.")]
[14711]39  [StorableType("29CBFC73-BB85-4583-B18C-5632F2D46004")]
[5386]40  public class SymbolicExpressionSymbolFrequencyAnalyzer : SingleSuccessorOperator, ISymbolicExpressionTreeAnalyzer {
41    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
42    private const string ResultsParameterName = "Results";
43    private const string SymbolFrequenciesParameterName = "SymbolFrequencies";
[5971]44    private const string AggregateSymbolsWithDifferentSubtreeCountParameterName = "AggregateSymbolsWithDifferentSubtreeCount";
[5386]45
46    #region parameter properties
[5510]47    public IScopeTreeLookupParameter<ISymbolicExpressionTree> SymbolicExpressionTreeParameter {
48      get { return (IScopeTreeLookupParameter<ISymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
[5386]49    }
50    public ILookupParameter<DataTable> SymbolFrequenciesParameter {
51      get { return (ILookupParameter<DataTable>)Parameters[SymbolFrequenciesParameterName]; }
52    }
53    public ILookupParameter<ResultCollection> ResultsParameter {
54      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
55    }
[5971]56    public IValueParameter<BoolValue> AggregateSymbolsWithDifferentSubtreeCountParameter {
57      get { return (IValueParameter<BoolValue>)Parameters[AggregateSymbolsWithDifferentSubtreeCountParameterName]; }
58    }
[5386]59    #endregion
60    #region properties
[7172]61    public virtual bool EnabledByDefault {
62      get { return true; }
63    }
[5971]64    public BoolValue AggregrateSymbolsWithDifferentSubtreeCount {
65      get { return AggregateSymbolsWithDifferentSubtreeCountParameter.Value; }
66      set { AggregateSymbolsWithDifferentSubtreeCountParameter.Value = value; }
[5386]67    }
68    #endregion
69
70    [StorableConstructor]
71    protected SymbolicExpressionSymbolFrequencyAnalyzer(bool deserializing) : base(deserializing) { }
72    protected SymbolicExpressionSymbolFrequencyAnalyzer(SymbolicExpressionSymbolFrequencyAnalyzer original, Cloner cloner) : base(original, cloner) { }
73    public SymbolicExpressionSymbolFrequencyAnalyzer()
74      : base() {
[5510]75      Parameters.Add(new ScopeTreeLookupParameter<ISymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
[5971]76      Parameters.Add(new LookupParameter<DataTable>(SymbolFrequenciesParameterName, "The data table to store the symbol frequencies."));
[5499]77      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the symbol frequencies should be stored."));
[5971]78      Parameters.Add(new ValueParameter<BoolValue>(AggregateSymbolsWithDifferentSubtreeCountParameterName, "Flag that indicates if the frequencies of symbols with the same name but different number of sub-trees should be aggregated.", new BoolValue(true)));
[5386]79    }
80    public override IDeepCloneable Clone(Cloner cloner) {
81      return new SymbolicExpressionSymbolFrequencyAnalyzer(this, cloner);
82    }
83
[5983]84    [StorableHook(HookType.AfterDeserialization)]
85    private void AfterDeserialization() {
86      #region remove with HL 3.4
87      if (!Parameters.ContainsKey(AggregateSymbolsWithDifferentSubtreeCountParameterName))
88        Parameters.Add(new ValueParameter<BoolValue>(AggregateSymbolsWithDifferentSubtreeCountParameterName, "Flag that indicates if the frequencies of symbols with the same name but different number of sub-trees should be aggregated.", new BoolValue(true)));
89      #endregion
90    }
91
[5386]92    public override IOperation Apply() {
[5510]93      ItemArray<ISymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
[5386]94      ResultCollection results = ResultsParameter.ActualValue;
[5971]95      DataTable symbolFrequencies = SymbolFrequenciesParameter.ActualValue;
96      if (symbolFrequencies == null) {
97        symbolFrequencies = new DataTable("Symbol frequencies", "Relative frequency of symbols aggregated over the whole population.");
98        symbolFrequencies.VisualProperties.YAxisTitle = "Relative Symbol Frequency";
[5386]99
[5971]100        SymbolFrequenciesParameter.ActualValue = symbolFrequencies;
101        results.Add(new Result("Symbol frequencies", symbolFrequencies));
[5386]102      }
103
[5392]104      // all rows must have the same number of values so we can just take the first
[5971]105      int numberOfValues = symbolFrequencies.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
[5392]106
[5971]107      foreach (var pair in SymbolicExpressionSymbolFrequencyAnalyzer.CalculateSymbolFrequencies(expressions, AggregrateSymbolsWithDifferentSubtreeCount.Value)) {
108        if (!symbolFrequencies.Rows.ContainsKey(pair.Key)) {
[5392]109          // initialize a new row for the symbol and pad with zeros
110          DataRow row = new DataRow(pair.Key, "", Enumerable.Repeat(0.0, numberOfValues));
[5386]111          row.VisualProperties.StartIndexZero = true;
[5971]112          symbolFrequencies.Rows.Add(row);
[5386]113        }
[6709]114        symbolFrequencies.Rows[pair.Key].Values.Add(Math.Round(pair.Value, 3));
[5386]115      }
116
[5392]117      // add a zero for each data row that was not modified in the previous loop
[5971]118      foreach (var row in symbolFrequencies.Rows.Where(r => r.Values.Count != numberOfValues + 1))
[5386]119        row.Values.Add(0.0);
120
121      return base.Apply();
122    }
123
[5971]124    public static IEnumerable<KeyValuePair<string, double>> CalculateSymbolFrequencies(IEnumerable<ISymbolicExpressionTree> trees, bool aggregateDifferentNumberOfSubtrees = true) {
[5386]125      Dictionary<string, double> symbolFrequencies = new Dictionary<string, double>();
126      int totalNumberOfSymbols = 0;
127
128      foreach (var tree in trees) {
129        foreach (var node in tree.IterateNodesPrefix()) {
[5971]130          string symbolName;
131          if (aggregateDifferentNumberOfSubtrees) symbolName = node.Symbol.Name;
[6803]132          else symbolName = node.Symbol.Name + "-" + node.SubtreeCount;
[5971]133          if (symbolFrequencies.ContainsKey(symbolName)) symbolFrequencies[symbolName] += 1;
134          else symbolFrequencies.Add(symbolName, 1);
[5386]135          totalNumberOfSymbols++;
136        }
137      }
138
[5392]139      foreach (var pair in symbolFrequencies)
140        yield return new KeyValuePair<string, double>(pair.Key, pair.Value / totalNumberOfSymbols);
[5386]141    }
142  }
143}
Note: See TracBrowser for help on using the repository browser.