Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisBuildingBlockAnalyzer.cs @ 16259

Last change on this file since 16259 was 16259, checked in by bburlacu, 5 years ago

#2950: Add storable constructor.

File size: 6.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using System.Text;
25using HeuristicLab.Analysis;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using static HeuristicLab.Problems.DataAnalysis.Symbolic.SymbolicExpressionHashExtensions;
34
35namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Analyzers {
36  [Item("SymbolicDataAnalysisBuildingBlockAnalyzer", "An analyzer that uses tree hashing to identify the most common subtrees (building blocks) in the population")]
37  [StorableClass]
38  public sealed class SymbolicDataAnalysisBuildingBlockAnalyzer : SymbolicDataAnalysisAnalyzer {
39    private const string BuildingBlocksResultName = "BuildingBlocks";
40    private const string MinimumSubtreeLengthParameterName = "MinimumSubtreeLength";
41    private const string SimplifyTreesParameterName = "SimplifyTrees";
42
43    private Dictionary<int, DataRow> hashToRow = new Dictionary<int, DataRow>();
44
45    #region parameters
46    public IValueLookupParameter<IntValue> MinimumSubtreeLengthParameter {
47      get { return (IValueLookupParameter<IntValue>)Parameters[MinimumSubtreeLengthParameterName]; }
48    }
49
50    public IValueLookupParameter<BoolValue> SimplifyTreesParameter {
51      get { return (IValueLookupParameter<BoolValue>)Parameters[SimplifyTreesParameterName]; }
52    }
53    #endregion
54
55    #region parameter properties
56    public IntValue MinimumSubtreeLength {
57      get { return MinimumSubtreeLengthParameter.ActualValue; }
58    }
59
60    public BoolValue SimplifyTrees {
61      get { return SimplifyTreesParameter.ActualValue; }
62    }
63    #endregion
64
65    public override void InitializeState() {
66      base.InitializeState();
67
68      hashToRow = new Dictionary<int, DataRow>();
69    }
70
71    [StorableHook(HookType.AfterDeserialization)]
72    private void AfterDeserialization() {
73      if (!Parameters.ContainsKey(SimplifyTreesParameterName)) {
74        Parameters.Add(new ValueLookupParameter<BoolValue>(SimplifyTreesParameterName, new BoolValue(false)));
75      }
76    }
77
78    public SymbolicDataAnalysisBuildingBlockAnalyzer() {
79      Parameters.Add(new ValueLookupParameter<IntValue>(MinimumSubtreeLengthParameterName, new IntValue(3)));
80      Parameters.Add(new ValueLookupParameter<BoolValue>(SimplifyTreesParameterName, new BoolValue(false)));
81    }
82
83    private SymbolicDataAnalysisBuildingBlockAnalyzer(SymbolicDataAnalysisBuildingBlockAnalyzer original, Cloner cloner) : base(original, cloner) {
84    }
85
86    public override IDeepCloneable Clone(Cloner cloner) {
87      return new SymbolicDataAnalysisBuildingBlockAnalyzer(this, cloner);
88    }
89
90    [StorableConstructor]
91    private SymbolicDataAnalysisBuildingBlockAnalyzer(bool deserializing) : base(deserializing) { }
92
93    public override IOperation Apply() {
94      DataTable dt;
95
96      if (!ResultCollection.ContainsKey(BuildingBlocksResultName)) {
97        dt = new DataTable(BuildingBlocksResultName);
98        ResultCollection.Add(new Result(BuildingBlocksResultName, dt));
99      } else {
100        dt = (DataTable)ResultCollection[BuildingBlocksResultName].Value;
101      }
102
103      var minLength = MinimumSubtreeLength.Value - 1; // -1 because the HashNode.Size property returns the size without current node (-1)
104      var simplify = SimplifyTrees.Value;
105
106      var expressions = new Dictionary<int, string>();
107      var expressionCounts = new Dictionary<int, int>();
108
109      int totalCount = 0; // total number of examined subtrees
110
111      // count hashes
112      foreach (var tree in SymbolicExpressionTree) {
113        var hashNodes = tree.Root.GetSubtree(0).GetSubtree(0).MakeNodes();
114        var simplified = simplify ? hashNodes.Simplify() : hashNodes.Sort();
115
116        for (int i = 0; i < simplified.Length; i++) {
117          HashNode<ISymbolicExpressionTreeNode> s = simplified[i];
118          if (s.IsChild || s.Size < minLength) {
119            continue;
120          }
121          ++totalCount;
122          var hash = s.CalculatedHashValue;
123          if (expressions.ContainsKey(hash)) {
124            expressionCounts[hash]++;
125            continue;
126          }
127
128          var sb = new StringBuilder();
129          for (int j = i - s.Size; j < i; ++j) {
130            sb.Append(GetLabel(simplified[j].Data)).Append(" ");
131          }
132          sb.Append(GetLabel(simplified[i].Data));
133          expressions[hash] = sb.ToString();
134          expressionCounts[hash] = 1;
135        }
136      }
137
138      // fill in values for existing rows
139      foreach (var t in hashToRow) {
140        var hash = t.Key;
141        var row = t.Value;
142
143        expressionCounts.TryGetValue(hash, out int count);
144        row.Values.Add(count);
145      }
146
147      var nValues = dt.Rows.Any() ? dt.Rows.Max(x => x.Values.Count) : 0;
148
149      // check if we have new rows
150      foreach (var t in expressionCounts.OrderByDescending(x => x.Value).Take(10)) {
151        var hash = t.Key;
152        var count = t.Value;
153        var label = expressions[hash];
154
155        if (hashToRow.ContainsKey(hash)) {
156          continue;
157        }
158        var row = new DataRow(label) { VisualProperties = { StartIndexZero = true } };
159        if (nValues > 0) {
160          row.Values.AddRange(Enumerable.Repeat<double>(0, nValues - 1)); // pad with zeroes
161        }
162        row.Values.Add(count);
163        dt.Rows.Add(row);
164        hashToRow[hash] = row;
165      }
166
167      return base.Apply();
168    }
169
170    private static string GetLabel(ISymbolicExpressionTreeNode node) {
171      if (node is ConstantTreeNode constant) {
172        return "C";
173      }
174      if (node is VariableTreeNode variable) {
175        return variable.VariableName;
176      }
177      if (node.Symbol is Addition) {
178        return "+";
179      }
180      if (node.Symbol is Subtraction) {
181        return "-";
182      }
183      if (node.Symbol is Multiplication) {
184        return "*";
185      }
186      if (node.Symbol is Division) {
187        return "/";
188      }
189      return node.Symbol.ToString();
190    }
191  }
192}
Note: See TracBrowser for help on using the repository browser.