Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisBuildingBlockAnalyzer.cs @ 16375

Last change on this file since 16375 was 16272, checked in by bburlacu, 6 years ago

#2950: Refactor hash extensions and utility methods: hashes are computed from byte[] arrays, simplification accepts an argument specifying which hash function to use. Update SymbolicDataAnalysisBuildingBlockAnalyzer and SymbolicDataAnalysisExpressionDiversityPreservingCrossover.

File size: 8.4 KB
RevLine 
[16255]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
[16272]22using System;
[16255]23using System.Collections.Generic;
24using System.Linq;
[16258]25using System.Text;
[16255]26using HeuristicLab.Analysis;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Data;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Optimization;
32using HeuristicLab.Parameters;
33using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
34using static HeuristicLab.Problems.DataAnalysis.Symbolic.SymbolicExpressionHashExtensions;
35
36namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Analyzers {
37  [Item("SymbolicDataAnalysisBuildingBlockAnalyzer", "An analyzer that uses tree hashing to identify the most common subtrees (building blocks) in the population")]
38  [StorableClass]
39  public sealed class SymbolicDataAnalysisBuildingBlockAnalyzer : SymbolicDataAnalysisAnalyzer {
40    private const string BuildingBlocksResultName = "BuildingBlocks";
[16272]41    private const string SolutionUniquenessResultName = "SolutionUniqueness";
[16255]42    private const string MinimumSubtreeLengthParameterName = "MinimumSubtreeLength";
43    private const string SimplifyTreesParameterName = "SimplifyTrees";
44
[16263]45    private Dictionary<ulong, DataRow> hashToRow = new Dictionary<ulong, DataRow>();
[16255]46
[16258]47    #region parameters
[16255]48    public IValueLookupParameter<IntValue> MinimumSubtreeLengthParameter {
49      get { return (IValueLookupParameter<IntValue>)Parameters[MinimumSubtreeLengthParameterName]; }
50    }
51
52    public IValueLookupParameter<BoolValue> SimplifyTreesParameter {
53      get { return (IValueLookupParameter<BoolValue>)Parameters[SimplifyTreesParameterName]; }
54    }
[16258]55    #endregion
[16255]56
[16258]57    #region parameter properties
[16255]58    public IntValue MinimumSubtreeLength {
59      get { return MinimumSubtreeLengthParameter.ActualValue; }
60    }
61
62    public BoolValue SimplifyTrees {
63      get { return SimplifyTreesParameter.ActualValue; }
64    }
[16258]65    #endregion
[16255]66
67    public override void InitializeState() {
68      base.InitializeState();
69
[16263]70      hashToRow = new Dictionary<ulong, DataRow>();
[16255]71    }
72
73    [StorableHook(HookType.AfterDeserialization)]
74    private void AfterDeserialization() {
75      if (!Parameters.ContainsKey(SimplifyTreesParameterName)) {
76        Parameters.Add(new ValueLookupParameter<BoolValue>(SimplifyTreesParameterName, new BoolValue(false)));
77      }
78    }
79
80    public SymbolicDataAnalysisBuildingBlockAnalyzer() {
81      Parameters.Add(new ValueLookupParameter<IntValue>(MinimumSubtreeLengthParameterName, new IntValue(3)));
82      Parameters.Add(new ValueLookupParameter<BoolValue>(SimplifyTreesParameterName, new BoolValue(false)));
83    }
84
85    private SymbolicDataAnalysisBuildingBlockAnalyzer(SymbolicDataAnalysisBuildingBlockAnalyzer original, Cloner cloner) : base(original, cloner) {
86    }
87
88    public override IDeepCloneable Clone(Cloner cloner) {
89      return new SymbolicDataAnalysisBuildingBlockAnalyzer(this, cloner);
90    }
91
[16259]92    [StorableConstructor]
93    private SymbolicDataAnalysisBuildingBlockAnalyzer(bool deserializing) : base(deserializing) { }
94
[16272]95    private readonly Func<byte[], ulong> hashFunction = HashUtil.JSHash;
96
[16255]97    public override IOperation Apply() {
98      DataTable dt;
99
100      if (!ResultCollection.ContainsKey(BuildingBlocksResultName)) {
101        dt = new DataTable(BuildingBlocksResultName);
102        ResultCollection.Add(new Result(BuildingBlocksResultName, dt));
103      } else {
104        dt = (DataTable)ResultCollection[BuildingBlocksResultName].Value;
105      }
106
107      var minLength = MinimumSubtreeLength.Value - 1; // -1 because the HashNode.Size property returns the size without current node (-1)
108      var simplify = SimplifyTrees.Value;
109
[16263]110      var expressions = new Dictionary<ulong, string>();
111      var expressionCounts = new Dictionary<ulong, int>();
[16255]112
[16258]113      int totalCount = 0; // total number of examined subtrees
114
[16272]115      var hashes = new List<ulong>();
[16258]116      // count hashes
[16255]117      foreach (var tree in SymbolicExpressionTree) {
118        var hashNodes = tree.Root.GetSubtree(0).GetSubtree(0).MakeNodes();
[16272]119        var simplified = simplify ? hashNodes.Simplify(hashFunction) : hashNodes.Sort(hashFunction);
120        hashes.Add(simplified.Last().CalculatedHashValue); // maybe calculate aggregate hash instead
[16255]121
122        for (int i = 0; i < simplified.Length; i++) {
123          HashNode<ISymbolicExpressionTreeNode> s = simplified[i];
[16271]124          if (s.IsLeaf || s.Size < minLength) {
[16255]125            continue;
126          }
127          ++totalCount;
128          var hash = s.CalculatedHashValue;
[16258]129          if (expressions.ContainsKey(hash)) {
[16255]130            expressionCounts[hash]++;
[16258]131            continue;
132          }
[16255]133
[16258]134          var sb = new StringBuilder();
135          for (int j = i - s.Size; j < i; ++j) {
136            sb.Append(GetLabel(simplified[j].Data)).Append(" ");
[16255]137          }
[16258]138          sb.Append(GetLabel(simplified[i].Data));
139          expressions[hash] = sb.ToString();
140          expressionCounts[hash] = 1;
[16255]141        }
142      }
143
[16258]144      // fill in values for existing rows
[16255]145      foreach (var t in hashToRow) {
146        var hash = t.Key;
147        var row = t.Value;
148
[16258]149        expressionCounts.TryGetValue(hash, out int count);
150        row.Values.Add(count);
[16255]151      }
152
153      var nValues = dt.Rows.Any() ? dt.Rows.Max(x => x.Values.Count) : 0;
154
[16258]155      // check if we have new rows
156      foreach (var t in expressionCounts.OrderByDescending(x => x.Value).Take(10)) {
157        var hash = t.Key;
158        var count = t.Value;
159        var label = expressions[hash];
[16255]160
161        if (hashToRow.ContainsKey(hash)) {
162          continue;
163        }
164        var row = new DataRow(label) { VisualProperties = { StartIndexZero = true } };
[16258]165        if (nValues > 0) {
166          row.Values.AddRange(Enumerable.Repeat<double>(0, nValues - 1)); // pad with zeroes
[16255]167        }
[16258]168        row.Values.Add(count);
[16255]169        dt.Rows.Add(row);
170        hashToRow[hash] = row;
171      }
[16258]172
[16272]173      // compute solution uniqueness
174      DataTableHistory dth;
175      var counts = hashes.GroupBy(x => x).ToDictionary(x => x.Key, x => x.Count());
176      if (!ResultCollection.ContainsKey(SolutionUniquenessResultName)) {
177        dth = new DataTableHistory();
178        ResultCollection.Add(new Result(SolutionUniquenessResultName, dth));
179      } else {
180        dth = (DataTableHistory)ResultCollection[SolutionUniquenessResultName].Value;
181      }
182
183      var ct = new DataTable("Unique Solutions");
184      var ctr = new DataRow { VisualProperties = { StartIndexZero = true, ChartType = DataRowVisualProperties.DataRowChartType.Columns } };
185      ctr.Values.AddRange(hashes.Select(x => (double)counts[x]).OrderByDescending(x => x));
186      ct.Rows.Add(ctr);
187      dth.Add(ct);
188
189      var max = dth.Max(x => x.Rows.First().Values.Max());
190      foreach (var table in dth) {
191        table.VisualProperties.YAxisMinimumAuto = false;
192        table.VisualProperties.YAxisMaximumAuto = false;
193        table.VisualProperties.YAxisMinimumFixedValue = 0;
194        table.VisualProperties.YAxisMaximumFixedValue = max;
195      }
196
[16255]197      return base.Apply();
198    }
[16258]199
200    private static string GetLabel(ISymbolicExpressionTreeNode node) {
201      if (node is ConstantTreeNode constant) {
202        return "C";
203      }
204      if (node is VariableTreeNode variable) {
205        return variable.VariableName;
206      }
207      if (node.Symbol is Addition) {
208        return "+";
209      }
210      if (node.Symbol is Subtraction) {
211        return "-";
212      }
213      if (node.Symbol is Multiplication) {
214        return "*";
215      }
216      if (node.Symbol is Division) {
217        return "/";
218      }
219      return node.Symbol.ToString();
220    }
[16255]221  }
222}
Note: See TracBrowser for help on using the repository browser.