Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs

Last change on this file was 18143, checked in by chaider, 3 years ago

#3140 merged branch into trunk

File size: 15.2 KB
RevLine 
[16255]1#region License Information
2/* HeuristicLab
[17180]3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[16255]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
[16284]22using System;
[16382]23using System.Collections.Generic;
[16218]24using System.Linq;
25using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
26using static HeuristicLab.Problems.DataAnalysis.Symbolic.SymbolicExpressionHashExtensions;
27
28namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
29  public static class SymbolicExpressionTreeHash {
[18132]30    private static readonly Number number = new Number();
[16218]31
[16478]32    private static ISymbolicExpressionTreeNode ActualRoot(this ISymbolicExpressionTree tree) => tree.Root.GetSubtree(0).GetSubtree(0);
[17132]33    public static ulong HashFunction(byte[] input) => HashUtil.DJBHash(input);
[16218]34
[16478]35    #region tree hashing
36    public static ulong[] Hash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) {
37      return tree.ActualRoot().Hash(simplify, strict);
[16218]38    }
39
[16478]40    public static ulong[] Hash(this ISymbolicExpressionTreeNode node, bool simplify = false, bool strict = false) {
[17132]41      var hashNodes = simplify ? node.MakeNodes(strict).Simplify(HashFunction) : node.MakeNodes(strict).Sort(HashFunction);
[16478]42      var hashes = new ulong[hashNodes.Length];
43      for (int i = 0; i < hashes.Length; ++i) {
44        hashes[i] = hashNodes[i].CalculatedHashValue;
45      }
46      return hashes;
[16284]47    }
48
[16478]49    public static ulong ComputeHash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) {
50      return ComputeHash(tree.ActualRoot(), simplify, strict);
51    }
[16284]52
[16478]53    public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool simplify = false, bool strict = false) {
54      return treeNode.Hash(simplify, strict).Last();
55    }
[16284]56
[16478]57    public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node, bool strict = false) {
58      var symbol = node.Symbol;
59      var name = symbol.Name;
[18132]60      if (node is INumericTreeNode numNode) {
61        name = strict ? numNode.Value.ToString() : "Number";
[16478]62      } else if (node is VariableTreeNode variableNode) {
63        name = strict ? variableNode.Weight.ToString() + variableNode.VariableName : variableNode.VariableName;
[16284]64      }
[16478]65      var hash = (ulong)name.GetHashCode();
[16983]66      var hashNode = new HashNode<ISymbolicExpressionTreeNode> {
[16478]67        Data = node,
68        Arity = node.SubtreeCount,
69        Size = node.SubtreeCount,
70        IsCommutative = node.Symbol is Addition || node.Symbol is Multiplication,
71        Enabled = true,
72        HashValue = hash,
73        CalculatedHashValue = hash
74      };
75      if (symbol is Addition) {
76        hashNode.Simplify = SimplifyAddition;
77      } else if (symbol is Multiplication) {
78        hashNode.Simplify = SimplifyMultiplication;
79      } else if (symbol is Division) {
80        hashNode.Simplify = SimplifyDivision;
81      } else if (symbol is Logarithm || symbol is Exponential || symbol is Sine || symbol is Cosine) {
82        hashNode.Simplify = SimplifyUnaryNode;
83      } else if (symbol is Subtraction) {
84        hashNode.Simplify = SimplifyBinaryNode;
85      }
86      return hashNode;
87    }
[16284]88
[16478]89    public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree, bool strict = false) {
90      return MakeNodes(tree.ActualRoot(), strict);
91    }
[16284]92
[16478]93    public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node, bool strict = false) {
94      return node.IterateNodesPostfix().Select(x => x.ToHashNode(strict)).ToArray().UpdateNodeSizes();
95    }
96    #endregion
97
98    #region tree similarity
99    public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false, bool strict = false) {
100      return ComputeSimilarity(t1.ActualRoot(), t2.ActualRoot(), simplify, strict);
101    }
102
103    public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false, bool strict = false) {
104      var lh = t1.Hash(simplify, strict);
105      var rh = t2.Hash(simplify, strict);
106
[16284]107      Array.Sort(lh);
108      Array.Sort(rh);
109
110      return ComputeSimilarity(lh, rh);
111    }
112
[16478]113    // requires lhs and rhs to be sorted
114    public static int IntersectCount(this ulong[] lh, ulong[] rh) {
115      int count = 0;
[16284]116      for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
117        var h1 = lh[i];
118        var h2 = rh[j];
119        if (h1 == h2) {
120          ++count;
121          ++i;
122          ++j;
123        } else if (h1 < h2) {
124          ++i;
125        } else if (h1 > h2) {
126          ++j;
127        }
128      }
[16478]129      return count;
130    }
[16284]131
[16478]132    public static IEnumerable<ulong> Intersect(this ulong[] lh, ulong[] rh) {
133      for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
134        var h1 = lh[i];
135        var h2 = rh[j];
136        if (h1 == h2) {
137          yield return h1;
138          ++i;
139          ++j;
140        } else if (h1 < h2) {
141          ++i;
142        } else if (h1 > h2) {
143          ++j;
144        }
145      }
[16284]146    }
147
[16478]148    // this will only work if lh and rh are sorted
149    public static double ComputeSimilarity(ulong[] lh, ulong[] rh) {
150      return 2d * IntersectCount(lh, rh) / (lh.Length + rh.Length);
151    }
152
[16382]153    public static double ComputeAverageSimilarity(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
[16478]154      var total = trees.Count * (trees.Count - 1) / 2;
[16284]155      double avg = 0;
[16382]156      var hashes = new ulong[trees.Count][];
[16284]157      // build hash arrays
[16382]158      for (int i = 0; i < trees.Count; ++i) {
[16302]159        var nodes = trees[i].MakeNodes(strict);
[17132]160        hashes[i] = (simplify ? nodes.Simplify(HashFunction) : nodes.Sort(HashFunction)).Select(x => x.CalculatedHashValue).ToArray();
[16284]161        Array.Sort(hashes[i]);
162      }
163      // compute similarity matrix
[16382]164      for (int i = 0; i < trees.Count - 1; ++i) {
165        for (int j = i + 1; j < trees.Count; ++j) {
[16291]166          avg += ComputeSimilarity(hashes[i], hashes[j]);
[16284]167        }
168      }
169      return avg / total;
170    }
171
[17132]172    public static double[,] ComputeSimilarityMatrix(IList<ulong[]> hashes) {
173      // compute similarity matrix
174      var n = hashes.Count;
175      var sim = new double[n, n];
176      for (int i = 0; i < n - 1; ++i) {
177        for (int j = i + 1; j < n; ++j) {
178          sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
179        }
180      }
181      return sim;
182    }
183
[16382]184    public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
185      var hashes = new ulong[trees.Count][];
[16284]186      // build hash arrays
[16382]187      for (int i = 0; i < trees.Count; ++i) {
[16302]188        var nodes = trees[i].MakeNodes(strict);
[17132]189        hashes[i] = (simplify ? nodes.Simplify(HashFunction) : nodes.Sort(HashFunction)).Select(x => x.CalculatedHashValue).ToArray();
[16284]190        Array.Sort(hashes[i]);
191      }
[17132]192      return ComputeSimilarityMatrix(hashes);
[16284]193    }
[16478]194    #endregion
[16284]195
[16218]196    #region parse a nodes array back into a tree
197    public static ISymbolicExpressionTree ToTree(this HashNode<ISymbolicExpressionTreeNode>[] nodes) {
198      var root = new ProgramRootSymbol().CreateTreeNode();
199      var start = new StartSymbol().CreateTreeNode();
200      root.AddSubtree(start);
201      start.AddSubtree(nodes.ToSubtree());
202      return new SymbolicExpressionTree(root);
203    }
204
205    public static ISymbolicExpressionTreeNode ToSubtree(this HashNode<ISymbolicExpressionTreeNode>[] nodes) {
206      var treeNodes = nodes.Select(x => x.Data.Symbol.CreateTreeNode()).ToArray();
207
208      for (int i = nodes.Length - 1; i >= 0; --i) {
209        var node = nodes[i];
210
[16267]211        if (node.IsLeaf) {
[16218]212          if (node.Data is VariableTreeNode variable) {
213            var variableTreeNode = (VariableTreeNode)treeNodes[i];
214            variableTreeNode.VariableName = variable.VariableName;
[16255]215            variableTreeNode.Weight = variable.Weight;
[18132]216          } else if (node.Data is INumericTreeNode existingNumNode) {
[18143]217            var newNumNode = (NumberTreeNode)treeNodes[i];
[18132]218            newNumNode.Value = existingNumNode.Value;
[16218]219          }
220          continue;
221        }
222
223        var treeNode = treeNodes[i];
224
225        foreach (var j in nodes.IterateChildren(i)) {
226          treeNode.AddSubtree(treeNodes[j]);
227        }
228      }
229
230      return treeNodes.Last();
231    }
232
233    private static T CreateTreeNode<T>(this ISymbol symbol) where T : class, ISymbolicExpressionTreeNode {
234      return (T)symbol.CreateTreeNode();
235    }
236    #endregion
237
238    #region tree simplification
239    // these simplification methods rely on the assumption that child nodes of the current node have already been simplified
[16382]240    // (in other words simplification should be applied in a bottom-up fashion)
[16218]241    public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) {
[17132]242      return tree.MakeNodes().Simplify(HashFunction).ToTree();
[16218]243    }
244
[16305]245    public static void SimplifyAddition(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
[16218]246      // simplify additions of terms by eliminating terms with the same symbol and hash
247      var children = nodes.IterateChildren(i);
248
[16305]249      // we always assume the child nodes are sorted
[16218]250      var curr = children[0];
251      var node = nodes[i];
252
253      foreach (var j in children.Skip(1)) {
254        if (nodes[j] == nodes[curr]) {
[16305]255          nodes.SetEnabled(j, false);
[16218]256          node.Arity--;
257        } else {
258          curr = j;
259        }
260      }
261      if (node.Arity == 1) { // if the arity is 1 we don't need the addition node at all
262        node.Enabled = false;
263      }
264    }
265
[18132]266    // simplify multiplications by reducing numbers and div terms
[16305]267    public static void SimplifyMultiplication(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
[16218]268      var node = nodes[i];
269      var children = nodes.IterateChildren(i);
270
271      for (int j = 0; j < children.Length; ++j) {
272        var c = children[j];
273        var child = nodes[c];
274
275        if (!child.Enabled)
276          continue;
277
278        var symbol = child.Data.Symbol;
[18143]279        if (child.Data is NumberTreeNode firstNum) {
[18132]280          // fold sibling number nodes into the first number
[16218]281          for (int k = j + 1; k < children.Length; ++k) {
[17132]282            var sibling = nodes[children[k]];
[18132]283            if (sibling.Data is INumericTreeNode otherNum) {
[17132]284              sibling.Enabled = false;
[16218]285              node.Arity--;
[18132]286              firstNum.Value *= otherNum.Value;
[16218]287            } else {
288              break;
289            }
290          }
[17132]291        } else if (child.Data is VariableTreeNode variable) {
[18132]292          // fold sibling number nodes into the variable weight
[17132]293          for (int k = j + 1; k < children.Length; ++k) {
294            var sibling = nodes[children[k]];
[18132]295            if (sibling.Data is INumericTreeNode numNode) {
[17132]296              sibling.Enabled = false;
297              node.Arity--;
[18132]298              variable.Weight *= numNode.Value;
[17132]299            } else {
300              break;
301            }
302          }
[16218]303        } else if (symbol is Division) {
[17132]304          // 1/x is expressed as div(x) (with a single child)
305          // we assume division always has arity 1 or 2
306          var d = child.Arity == 1 ? c - 1 : c - nodes[c - 1].Size - 2;
307          var denominator = nodes[d];
[16218]308
[17132]309          // iterate children of node i to see if any of them matches the denominator of div node c
310          for (int k = 0; k < children.Length; ++k) {
311            var sibling = nodes[children[k]];
312            if (sibling.Enabled && sibling == denominator) {
313              nodes.SetEnabled(children[j], false); // disable the div subtree
314              nodes.SetEnabled(children[k], false); // disable the sibling matching the denominator
315
[16218]316              node.Arity -= 2; // matching child + division node
317              break;
318            }
319          }
320        }
321
[18132]322        if (node.Arity == 0) { // if everything is simplified this node becomes a number
323          var numNode = number.CreateTreeNode<NumberTreeNode>();
324          numNode.Value = 1;
325          nodes[i] = numNode.ToHashNode();
[16218]326        } else if (node.Arity == 1) { // when i have only 1 arg left i can skip this node
327          node.Enabled = false;
328        }
329      }
330    }
331
[16305]332    public static void SimplifyDivision(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
[16218]333      var node = nodes[i];
334      var children = nodes.IterateChildren(i);
335
[16305]336      var tmp = nodes;
337
[18132]338      if (children.All(x => tmp[x].Data.Symbol is INumericSymbol)) {
339        var v = ((INumericTreeNode)nodes[children.First()].Data).Value;
[16218]340        if (node.Arity == 1) {
341          v = 1 / v;
342        } else if (node.Arity > 1) {
343          foreach (var j in children.Skip(1)) {
[18132]344            v /= ((INumericTreeNode)nodes[j].Data).Value;
[16218]345          }
346        }
[18132]347        var numNode = number.CreateTreeNode<NumberTreeNode>();
348        numNode.Value = v;
349        nodes[i] = numNode.ToHashNode();
[16218]350        return;
351      }
352
353      var nominator = nodes[children[0]];
354      foreach (var j in children.Skip(1)) {
355        var denominator = nodes[j];
356        if (nominator == denominator) {
357          // disable all the children of the division node (nominator and children + denominator and children)
358          nominator.Enabled = denominator.Enabled = false;
359          node.Arity -= 2; // nominator + denominator
360        }
361        if (node.Arity == 0) {
[18132]362          var numNode = number.CreateTreeNode<NumberTreeNode>();
363          numNode.Value = 1; // x / x = 1
364          nodes[i] = numNode.ToHashNode();
[16218]365        }
366      }
367    }
368
[16305]369    public static void SimplifyUnaryNode(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
[18132]370      // check if the child of the unary node is a number, then the whole node can be simplified
[16218]371      var parent = nodes[i];
372      var child = nodes[i - 1];
373
374      var parentSymbol = parent.Data.Symbol;
375      var childSymbol = child.Data.Symbol;
376
[18132]377      if (childSymbol is INumericSymbol) {
[16218]378        nodes[i].Enabled = false;
379      } else if ((parentSymbol is Exponential && childSymbol is Logarithm) || (parentSymbol is Logarithm && childSymbol is Exponential)) {
380        child.Enabled = parent.Enabled = false;
381      }
382    }
383
[16305]384    public static void SimplifyBinaryNode(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
[16218]385      var children = nodes.IterateChildren(i);
[16305]386      var tmp = nodes;
[18132]387      if (children.All(x => tmp[x].Data.Symbol is INumericSymbol)) {
[16218]388        foreach (var j in children) {
389          nodes[j].Enabled = false;
390        }
[18132]391        nodes[i] = number.CreateTreeNode().ToHashNode();
[16218]392      }
393    }
394    #endregion
395  }
396}
Note: See TracBrowser for help on using the repository browser.