source: trunk/sources/HeuristicLab.Encodings.SymbolicExpressionTreeEncoding/3.4/Analyzers/SymbolicExpressionSymbolFrequencyAnalyzer.cs @ 5983

Last change on this file since 5983 was 5983, checked in by mkommend, 8 years ago

#1418: Added after deserialization hook to SymbolicExpressionSymbolFrequencyAnalyzer for newly added parameter.

File size: 7.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Operators;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Encodings.SymbolicExpressionTreeEncoding {
34  /// <summary>
35  /// An operator that tracks the frequencies of distinct symbols in symbolic expression trees.
36  /// </summary>
37  [Item("SymbolicExpressionSymbolFrequencyAnalyzer", "An operator that tracks frequencies of symbols in symbolic expression trees.")]
38  [StorableClass]
39  public class SymbolicExpressionSymbolFrequencyAnalyzer : SingleSuccessorOperator, ISymbolicExpressionTreeAnalyzer {
40    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
41    private const string ResultsParameterName = "Results";
42    private const string SymbolFrequenciesParameterName = "SymbolFrequencies";
43    private const string AggregateSymbolsWithDifferentSubtreeCountParameterName = "AggregateSymbolsWithDifferentSubtreeCount";
44
45    #region parameter properties
46    public IScopeTreeLookupParameter<ISymbolicExpressionTree> SymbolicExpressionTreeParameter {
47      get { return (IScopeTreeLookupParameter<ISymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
48    }
49    public ILookupParameter<DataTable> SymbolFrequenciesParameter {
50      get { return (ILookupParameter<DataTable>)Parameters[SymbolFrequenciesParameterName]; }
51    }
52    public ILookupParameter<ResultCollection> ResultsParameter {
53      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
54    }
55    public IValueParameter<BoolValue> AggregateSymbolsWithDifferentSubtreeCountParameter {
56      get { return (IValueParameter<BoolValue>)Parameters[AggregateSymbolsWithDifferentSubtreeCountParameterName]; }
57    }
58    #endregion
59    #region properties
60    public BoolValue AggregrateSymbolsWithDifferentSubtreeCount {
61      get { return AggregateSymbolsWithDifferentSubtreeCountParameter.Value; }
62      set { AggregateSymbolsWithDifferentSubtreeCountParameter.Value = value; }
63    }
64    #endregion
65
66    [StorableConstructor]
67    protected SymbolicExpressionSymbolFrequencyAnalyzer(bool deserializing) : base(deserializing) { }
68    protected SymbolicExpressionSymbolFrequencyAnalyzer(SymbolicExpressionSymbolFrequencyAnalyzer original, Cloner cloner) : base(original, cloner) { }
69    public SymbolicExpressionSymbolFrequencyAnalyzer()
70      : base() {
71      Parameters.Add(new ScopeTreeLookupParameter<ISymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
72      Parameters.Add(new LookupParameter<DataTable>(SymbolFrequenciesParameterName, "The data table to store the symbol frequencies."));
73      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the symbol frequencies should be stored."));
74      Parameters.Add(new ValueParameter<BoolValue>(AggregateSymbolsWithDifferentSubtreeCountParameterName, "Flag that indicates if the frequencies of symbols with the same name but different number of sub-trees should be aggregated.", new BoolValue(true)));
75    }
76    public override IDeepCloneable Clone(Cloner cloner) {
77      return new SymbolicExpressionSymbolFrequencyAnalyzer(this, cloner);
78    }
79
80    [StorableHook(HookType.AfterDeserialization)]
81    private void AfterDeserialization() {
82      #region remove with HL 3.4
83      if (!Parameters.ContainsKey(AggregateSymbolsWithDifferentSubtreeCountParameterName))
84        Parameters.Add(new ValueParameter<BoolValue>(AggregateSymbolsWithDifferentSubtreeCountParameterName, "Flag that indicates if the frequencies of symbols with the same name but different number of sub-trees should be aggregated.", new BoolValue(true)));
85      #endregion
86    }
87
88    public override IOperation Apply() {
89      ItemArray<ISymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
90      ResultCollection results = ResultsParameter.ActualValue;
91      DataTable symbolFrequencies = SymbolFrequenciesParameter.ActualValue;
92      if (symbolFrequencies == null) {
93        symbolFrequencies = new DataTable("Symbol frequencies", "Relative frequency of symbols aggregated over the whole population.");
94        symbolFrequencies.VisualProperties.YAxisTitle = "Relative Symbol Frequency";
95
96        SymbolFrequenciesParameter.ActualValue = symbolFrequencies;
97        results.Add(new Result("Symbol frequencies", symbolFrequencies));
98      }
99
100      // all rows must have the same number of values so we can just take the first
101      int numberOfValues = symbolFrequencies.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
102
103      foreach (var pair in SymbolicExpressionSymbolFrequencyAnalyzer.CalculateSymbolFrequencies(expressions, AggregrateSymbolsWithDifferentSubtreeCount.Value)) {
104        if (!symbolFrequencies.Rows.ContainsKey(pair.Key)) {
105          // initialize a new row for the symbol and pad with zeros
106          DataRow row = new DataRow(pair.Key, "", Enumerable.Repeat(0.0, numberOfValues));
107          row.VisualProperties.StartIndexZero = true;
108          symbolFrequencies.Rows.Add(row);
109        }
110        symbolFrequencies.Rows[pair.Key].Values.Add(pair.Value);
111      }
112
113      // add a zero for each data row that was not modified in the previous loop
114      foreach (var row in symbolFrequencies.Rows.Where(r => r.Values.Count != numberOfValues + 1))
115        row.Values.Add(0.0);
116
117      return base.Apply();
118    }
119
120    public static IEnumerable<KeyValuePair<string, double>> CalculateSymbolFrequencies(IEnumerable<ISymbolicExpressionTree> trees, bool aggregateDifferentNumberOfSubtrees = true) {
121      Dictionary<string, double> symbolFrequencies = new Dictionary<string, double>();
122      int totalNumberOfSymbols = 0;
123
124      foreach (var tree in trees) {
125        foreach (var node in tree.IterateNodesPrefix()) {
126          string symbolName;
127          if (aggregateDifferentNumberOfSubtrees) symbolName = node.Symbol.Name;
128          else symbolName = node.Symbol.Name + "-" + node.SubtreesCount;
129          if (symbolFrequencies.ContainsKey(symbolName)) symbolFrequencies[symbolName] += 1;
130          else symbolFrequencies.Add(symbolName, 1);
131          totalNumberOfSymbols++;
132        }
133      }
134
135      foreach (var pair in symbolFrequencies)
136        yield return new KeyValuePair<string, double>(pair.Key, pair.Value / totalNumberOfSymbols);
137    }
138  }
139}
Note: See TracBrowser for help on using the repository browser.