Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisBuildingBlockAnalyzer.cs @ 16371

Last change on this file since 16371 was 16272, checked in by bburlacu, 6 years ago

#2950: Refactor hash extensions and utility methods: hashes are computed from byte[] arrays, simplification accepts an argument specifying which hash function to use. Update SymbolicDataAnalysisBuildingBlockAnalyzer and SymbolicDataAnalysisExpressionDiversityPreservingCrossover.

File size: 8.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Text;
26using HeuristicLab.Analysis;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Data;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Optimization;
32using HeuristicLab.Parameters;
33using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
34using static HeuristicLab.Problems.DataAnalysis.Symbolic.SymbolicExpressionHashExtensions;
35
36namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Analyzers {
37  [Item("SymbolicDataAnalysisBuildingBlockAnalyzer", "An analyzer that uses tree hashing to identify the most common subtrees (building blocks) in the population")]
38  [StorableClass]
39  public sealed class SymbolicDataAnalysisBuildingBlockAnalyzer : SymbolicDataAnalysisAnalyzer {
40    private const string BuildingBlocksResultName = "BuildingBlocks";
41    private const string SolutionUniquenessResultName = "SolutionUniqueness";
42    private const string MinimumSubtreeLengthParameterName = "MinimumSubtreeLength";
43    private const string SimplifyTreesParameterName = "SimplifyTrees";
44
45    private Dictionary<ulong, DataRow> hashToRow = new Dictionary<ulong, DataRow>();
46
47    #region parameters
48    public IValueLookupParameter<IntValue> MinimumSubtreeLengthParameter {
49      get { return (IValueLookupParameter<IntValue>)Parameters[MinimumSubtreeLengthParameterName]; }
50    }
51
52    public IValueLookupParameter<BoolValue> SimplifyTreesParameter {
53      get { return (IValueLookupParameter<BoolValue>)Parameters[SimplifyTreesParameterName]; }
54    }
55    #endregion
56
57    #region parameter properties
58    public IntValue MinimumSubtreeLength {
59      get { return MinimumSubtreeLengthParameter.ActualValue; }
60    }
61
62    public BoolValue SimplifyTrees {
63      get { return SimplifyTreesParameter.ActualValue; }
64    }
65    #endregion
66
67    public override void InitializeState() {
68      base.InitializeState();
69
70      hashToRow = new Dictionary<ulong, DataRow>();
71    }
72
73    [StorableHook(HookType.AfterDeserialization)]
74    private void AfterDeserialization() {
75      if (!Parameters.ContainsKey(SimplifyTreesParameterName)) {
76        Parameters.Add(new ValueLookupParameter<BoolValue>(SimplifyTreesParameterName, new BoolValue(false)));
77      }
78    }
79
80    public SymbolicDataAnalysisBuildingBlockAnalyzer() {
81      Parameters.Add(new ValueLookupParameter<IntValue>(MinimumSubtreeLengthParameterName, new IntValue(3)));
82      Parameters.Add(new ValueLookupParameter<BoolValue>(SimplifyTreesParameterName, new BoolValue(false)));
83    }
84
85    private SymbolicDataAnalysisBuildingBlockAnalyzer(SymbolicDataAnalysisBuildingBlockAnalyzer original, Cloner cloner) : base(original, cloner) {
86    }
87
88    public override IDeepCloneable Clone(Cloner cloner) {
89      return new SymbolicDataAnalysisBuildingBlockAnalyzer(this, cloner);
90    }
91
92    [StorableConstructor]
93    private SymbolicDataAnalysisBuildingBlockAnalyzer(bool deserializing) : base(deserializing) { }
94
95    private readonly Func<byte[], ulong> hashFunction = HashUtil.JSHash;
96
97    public override IOperation Apply() {
98      DataTable dt;
99
100      if (!ResultCollection.ContainsKey(BuildingBlocksResultName)) {
101        dt = new DataTable(BuildingBlocksResultName);
102        ResultCollection.Add(new Result(BuildingBlocksResultName, dt));
103      } else {
104        dt = (DataTable)ResultCollection[BuildingBlocksResultName].Value;
105      }
106
107      var minLength = MinimumSubtreeLength.Value - 1; // -1 because the HashNode.Size property returns the size without current node (-1)
108      var simplify = SimplifyTrees.Value;
109
110      var expressions = new Dictionary<ulong, string>();
111      var expressionCounts = new Dictionary<ulong, int>();
112
113      int totalCount = 0; // total number of examined subtrees
114
115      var hashes = new List<ulong>();
116      // count hashes
117      foreach (var tree in SymbolicExpressionTree) {
118        var hashNodes = tree.Root.GetSubtree(0).GetSubtree(0).MakeNodes();
119        var simplified = simplify ? hashNodes.Simplify(hashFunction) : hashNodes.Sort(hashFunction);
120        hashes.Add(simplified.Last().CalculatedHashValue); // maybe calculate aggregate hash instead
121
122        for (int i = 0; i < simplified.Length; i++) {
123          HashNode<ISymbolicExpressionTreeNode> s = simplified[i];
124          if (s.IsLeaf || s.Size < minLength) {
125            continue;
126          }
127          ++totalCount;
128          var hash = s.CalculatedHashValue;
129          if (expressions.ContainsKey(hash)) {
130            expressionCounts[hash]++;
131            continue;
132          }
133
134          var sb = new StringBuilder();
135          for (int j = i - s.Size; j < i; ++j) {
136            sb.Append(GetLabel(simplified[j].Data)).Append(" ");
137          }
138          sb.Append(GetLabel(simplified[i].Data));
139          expressions[hash] = sb.ToString();
140          expressionCounts[hash] = 1;
141        }
142      }
143
144      // fill in values for existing rows
145      foreach (var t in hashToRow) {
146        var hash = t.Key;
147        var row = t.Value;
148
149        expressionCounts.TryGetValue(hash, out int count);
150        row.Values.Add(count);
151      }
152
153      var nValues = dt.Rows.Any() ? dt.Rows.Max(x => x.Values.Count) : 0;
154
155      // check if we have new rows
156      foreach (var t in expressionCounts.OrderByDescending(x => x.Value).Take(10)) {
157        var hash = t.Key;
158        var count = t.Value;
159        var label = expressions[hash];
160
161        if (hashToRow.ContainsKey(hash)) {
162          continue;
163        }
164        var row = new DataRow(label) { VisualProperties = { StartIndexZero = true } };
165        if (nValues > 0) {
166          row.Values.AddRange(Enumerable.Repeat<double>(0, nValues - 1)); // pad with zeroes
167        }
168        row.Values.Add(count);
169        dt.Rows.Add(row);
170        hashToRow[hash] = row;
171      }
172
173      // compute solution uniqueness
174      DataTableHistory dth;
175      var counts = hashes.GroupBy(x => x).ToDictionary(x => x.Key, x => x.Count());
176      if (!ResultCollection.ContainsKey(SolutionUniquenessResultName)) {
177        dth = new DataTableHistory();
178        ResultCollection.Add(new Result(SolutionUniquenessResultName, dth));
179      } else {
180        dth = (DataTableHistory)ResultCollection[SolutionUniquenessResultName].Value;
181      }
182
183      var ct = new DataTable("Unique Solutions");
184      var ctr = new DataRow { VisualProperties = { StartIndexZero = true, ChartType = DataRowVisualProperties.DataRowChartType.Columns } };
185      ctr.Values.AddRange(hashes.Select(x => (double)counts[x]).OrderByDescending(x => x));
186      ct.Rows.Add(ctr);
187      dth.Add(ct);
188
189      var max = dth.Max(x => x.Rows.First().Values.Max());
190      foreach (var table in dth) {
191        table.VisualProperties.YAxisMinimumAuto = false;
192        table.VisualProperties.YAxisMaximumAuto = false;
193        table.VisualProperties.YAxisMinimumFixedValue = 0;
194        table.VisualProperties.YAxisMaximumFixedValue = max;
195      }
196
197      return base.Apply();
198    }
199
200    private static string GetLabel(ISymbolicExpressionTreeNode node) {
201      if (node is ConstantTreeNode constant) {
202        return "C";
203      }
204      if (node is VariableTreeNode variable) {
205        return variable.VariableName;
206      }
207      if (node.Symbol is Addition) {
208        return "+";
209      }
210      if (node.Symbol is Subtraction) {
211        return "-";
212      }
213      if (node.Symbol is Multiplication) {
214        return "*";
215      }
216      if (node.Symbol is Division) {
217        return "/";
218      }
219      return node.Symbol.ToString();
220    }
221  }
222}
Note: See TracBrowser for help on using the repository browser.