Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3140_NumberSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs @ 18115

Last change on this file since 18115 was 18115, checked in by gkronber, 3 years ago

#3140: made several more changes for the constant -> number branch

File size: 15.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
26using static HeuristicLab.Problems.DataAnalysis.Symbolic.SymbolicExpressionHashExtensions;
27
28namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
29  public static class SymbolicExpressionTreeHash {
30    private static readonly Number number = new Number();
31
32    private static ISymbolicExpressionTreeNode ActualRoot(this ISymbolicExpressionTree tree) => tree.Root.GetSubtree(0).GetSubtree(0);
33    public static ulong HashFunction(byte[] input) => HashUtil.DJBHash(input);
34
35    #region tree hashing
36    public static ulong[] Hash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) {
37      return tree.ActualRoot().Hash(simplify, strict);
38    }
39
40    public static ulong[] Hash(this ISymbolicExpressionTreeNode node, bool simplify = false, bool strict = false) {
41      var hashNodes = simplify ? node.MakeNodes(strict).Simplify(HashFunction) : node.MakeNodes(strict).Sort(HashFunction);
42      var hashes = new ulong[hashNodes.Length];
43      for (int i = 0; i < hashes.Length; ++i) {
44        hashes[i] = hashNodes[i].CalculatedHashValue;
45      }
46      return hashes;
47    }
48
49    public static ulong ComputeHash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) {
50      return ComputeHash(tree.ActualRoot(), simplify, strict);
51    }
52
53    public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool simplify = false, bool strict = false) {
54      return treeNode.Hash(simplify, strict).Last();
55    }
56
57    public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node, bool strict = false) {
58      var symbol = node.Symbol;
59      var name = symbol.Name;
60      if (node is INumericTreeNode numNode) {
61        name = strict ? numNode.Value.ToString() : "Number";
62      } else if (node is VariableTreeNode variableNode) {
63        name = strict ? variableNode.Weight.ToString() + variableNode.VariableName : variableNode.VariableName;
64      }
65      var hash = (ulong)name.GetHashCode();
66      var hashNode = new HashNode<ISymbolicExpressionTreeNode> {
67        Data = node,
68        Arity = node.SubtreeCount,
69        Size = node.SubtreeCount,
70        IsCommutative = node.Symbol is Addition || node.Symbol is Multiplication,
71        Enabled = true,
72        HashValue = hash,
73        CalculatedHashValue = hash
74      };
75      if (symbol is Addition) {
76        hashNode.Simplify = SimplifyAddition;
77      } else if (symbol is Multiplication) {
78        hashNode.Simplify = SimplifyMultiplication;
79      } else if (symbol is Division) {
80        hashNode.Simplify = SimplifyDivision;
81      } else if (symbol is Logarithm || symbol is Exponential || symbol is Sine || symbol is Cosine) {
82        hashNode.Simplify = SimplifyUnaryNode;
83      } else if (symbol is Subtraction) {
84        hashNode.Simplify = SimplifyBinaryNode;
85      }
86      return hashNode;
87    }
88
89    public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree, bool strict = false) {
90      return MakeNodes(tree.ActualRoot(), strict);
91    }
92
93    public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node, bool strict = false) {
94      return node.IterateNodesPostfix().Select(x => x.ToHashNode(strict)).ToArray().UpdateNodeSizes();
95    }
96    #endregion
97
98    #region tree similarity
99    public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false, bool strict = false) {
100      return ComputeSimilarity(t1.ActualRoot(), t2.ActualRoot(), simplify, strict);
101    }
102
103    public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false, bool strict = false) {
104      var lh = t1.Hash(simplify, strict);
105      var rh = t2.Hash(simplify, strict);
106
107      Array.Sort(lh);
108      Array.Sort(rh);
109
110      return ComputeSimilarity(lh, rh);
111    }
112
113    // requires lhs and rhs to be sorted
114    public static int IntersectCount(this ulong[] lh, ulong[] rh) {
115      int count = 0;
116      for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
117        var h1 = lh[i];
118        var h2 = rh[j];
119        if (h1 == h2) {
120          ++count;
121          ++i;
122          ++j;
123        } else if (h1 < h2) {
124          ++i;
125        } else if (h1 > h2) {
126          ++j;
127        }
128      }
129      return count;
130    }
131
132    public static IEnumerable<ulong> Intersect(this ulong[] lh, ulong[] rh) {
133      for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
134        var h1 = lh[i];
135        var h2 = rh[j];
136        if (h1 == h2) {
137          yield return h1;
138          ++i;
139          ++j;
140        } else if (h1 < h2) {
141          ++i;
142        } else if (h1 > h2) {
143          ++j;
144        }
145      }
146    }
147
148    // this will only work if lh and rh are sorted
149    public static double ComputeSimilarity(ulong[] lh, ulong[] rh) {
150      return 2d * IntersectCount(lh, rh) / (lh.Length + rh.Length);
151    }
152
153    public static double ComputeAverageSimilarity(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
154      var total = trees.Count * (trees.Count - 1) / 2;
155      double avg = 0;
156      var hashes = new ulong[trees.Count][];
157      // build hash arrays
158      for (int i = 0; i < trees.Count; ++i) {
159        var nodes = trees[i].MakeNodes(strict);
160        hashes[i] = (simplify ? nodes.Simplify(HashFunction) : nodes.Sort(HashFunction)).Select(x => x.CalculatedHashValue).ToArray();
161        Array.Sort(hashes[i]);
162      }
163      // compute similarity matrix
164      for (int i = 0; i < trees.Count - 1; ++i) {
165        for (int j = i + 1; j < trees.Count; ++j) {
166          avg += ComputeSimilarity(hashes[i], hashes[j]);
167        }
168      }
169      return avg / total;
170    }
171
172    public static double[,] ComputeSimilarityMatrix(IList<ulong[]> hashes) {
173      // compute similarity matrix
174      var n = hashes.Count;
175      var sim = new double[n, n];
176      for (int i = 0; i < n - 1; ++i) {
177        for (int j = i + 1; j < n; ++j) {
178          sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
179        }
180      }
181      return sim;
182    }
183
184    public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
185      var hashes = new ulong[trees.Count][];
186      // build hash arrays
187      for (int i = 0; i < trees.Count; ++i) {
188        var nodes = trees[i].MakeNodes(strict);
189        hashes[i] = (simplify ? nodes.Simplify(HashFunction) : nodes.Sort(HashFunction)).Select(x => x.CalculatedHashValue).ToArray();
190        Array.Sort(hashes[i]);
191      }
192      return ComputeSimilarityMatrix(hashes);
193    }
194    #endregion
195
196    #region parse a nodes array back into a tree
197    public static ISymbolicExpressionTree ToTree(this HashNode<ISymbolicExpressionTreeNode>[] nodes) {
198      var root = new ProgramRootSymbol().CreateTreeNode();
199      var start = new StartSymbol().CreateTreeNode();
200      root.AddSubtree(start);
201      start.AddSubtree(nodes.ToSubtree());
202      return new SymbolicExpressionTree(root);
203    }
204
205    public static ISymbolicExpressionTreeNode ToSubtree(this HashNode<ISymbolicExpressionTreeNode>[] nodes) {
206      var treeNodes = nodes.Select(x => x.Data.Symbol.CreateTreeNode()).ToArray();
207
208      for (int i = nodes.Length - 1; i >= 0; --i) {
209        var node = nodes[i];
210
211        if (node.IsLeaf) {
212          if (node.Data is VariableTreeNode variable) {
213            var variableTreeNode = (VariableTreeNode)treeNodes[i];
214            variableTreeNode.VariableName = variable.VariableName;
215            variableTreeNode.Weight = variable.Weight;
216          } else if (node.Data is INumericTreeNode existingNumNode) {
217            var newNumNode = (INumericTreeNode)treeNodes[i];
218            newNumNode.Value = existingNumNode.Value;
219          }
220          continue;
221        }
222
223        var treeNode = treeNodes[i];
224
225        foreach (var j in nodes.IterateChildren(i)) {
226          treeNode.AddSubtree(treeNodes[j]);
227        }
228      }
229
230      return treeNodes.Last();
231    }
232
233    private static T CreateTreeNode<T>(this ISymbol symbol) where T : class, ISymbolicExpressionTreeNode {
234      return (T)symbol.CreateTreeNode();
235    }
236    #endregion
237
238    #region tree simplification
239    // these simplification methods rely on the assumption that child nodes of the current node have already been simplified
240    // (in other words simplification should be applied in a bottom-up fashion)
241    public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) {
242      return tree.MakeNodes().Simplify(HashFunction).ToTree();
243    }
244
245    public static void SimplifyAddition(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
246      // simplify additions of terms by eliminating terms with the same symbol and hash
247      var children = nodes.IterateChildren(i);
248
249      // we always assume the child nodes are sorted
250      var curr = children[0];
251      var node = nodes[i];
252
253      foreach (var j in children.Skip(1)) {
254        if (nodes[j] == nodes[curr]) {
255          nodes.SetEnabled(j, false);
256          node.Arity--;
257        } else {
258          curr = j;
259        }
260      }
261      if (node.Arity == 1) { // if the arity is 1 we don't need the addition node at all
262        node.Enabled = false;
263      }
264    }
265
266    // simplify multiplications by reducing numbers and div terms
267    public static void SimplifyMultiplication(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
268      var node = nodes[i];
269      var children = nodes.IterateChildren(i);
270
271      for (int j = 0; j < children.Length; ++j) {
272        var c = children[j];
273        var child = nodes[c];
274
275        if (!child.Enabled)
276          continue;
277
278        var symbol = child.Data.Symbol;
279        if (child.Data is INumericTreeNode firstNum) {
280          // fold sibling number nodes into the first number
281          for (int k = j + 1; k < children.Length; ++k) {
282            var sibling = nodes[children[k]];
283            if (sibling.Data is INumericTreeNode otherNum) {
284              sibling.Enabled = false;
285              node.Arity--;
286              firstNum.Value *= otherNum.Value;
287            } else {
288              break;
289            }
290          }
291        } else if (child.Data is VariableTreeNode variable) {
292          // fold sibling number nodes into the variable weight
293          for (int k = j + 1; k < children.Length; ++k) {
294            var sibling = nodes[children[k]];
295            if (sibling.Data is INumericTreeNode numNode) {
296              sibling.Enabled = false;
297              node.Arity--;
298              variable.Weight *= numNode.Value;
299            } else {
300              break;
301            }
302          }
303        } else if (symbol is Division) {
304          // 1/x is expressed as div(x) (with a single child)
305          // we assume division always has arity 1 or 2
306          var d = child.Arity == 1 ? c - 1 : c - nodes[c - 1].Size - 2;
307          var denominator = nodes[d];
308
309          // iterate children of node i to see if any of them matches the denominator of div node c
310          for (int k = 0; k < children.Length; ++k) {
311            var sibling = nodes[children[k]];
312            if (sibling.Enabled && sibling == denominator) {
313              nodes.SetEnabled(children[j], false); // disable the div subtree
314              nodes.SetEnabled(children[k], false); // disable the sibling matching the denominator
315
316              node.Arity -= 2; // matching child + division node
317              break;
318            }
319          }
320        }
321
322        if (node.Arity == 0) { // if everything is simplified this node becomes a number
323          var numNode = number.CreateTreeNode<NumberTreeNode>();
324          numNode.Value = 1;
325          nodes[i] = numNode.ToHashNode();
326        } else if (node.Arity == 1) { // when i have only 1 arg left i can skip this node
327          node.Enabled = false;
328        }
329      }
330    }
331
332    public static void SimplifyDivision(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
333      var node = nodes[i];
334      var children = nodes.IterateChildren(i);
335
336      var tmp = nodes;
337
338      if (children.All(x => tmp[x].Data.Symbol is INumericSymbol)) {
339        var v = ((INumericTreeNode)nodes[children.First()].Data).Value;
340        if (node.Arity == 1) {
341          v = 1 / v;
342        } else if (node.Arity > 1) {
343          foreach (var j in children.Skip(1)) {
344            v /= ((INumericTreeNode)nodes[j].Data).Value;
345          }
346        }
347        var numNode = number.CreateTreeNode<NumberTreeNode>();
348        numNode.Value = v;
349        nodes[i] = numNode.ToHashNode();
350        return;
351      }
352
353      var nominator = nodes[children[0]];
354      foreach (var j in children.Skip(1)) {
355        var denominator = nodes[j];
356        if (nominator == denominator) {
357          // disable all the children of the division node (nominator and children + denominator and children)
358          nominator.Enabled = denominator.Enabled = false;
359          node.Arity -= 2; // nominator + denominator
360        }
361        if (node.Arity == 0) {
362          var numNode = number.CreateTreeNode<NumberTreeNode>();
363          numNode.Value = 1; // x / x = 1
364          nodes[i] = numNode.ToHashNode();
365        }
366      }
367    }
368
369    public static void SimplifyUnaryNode(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
370      // check if the child of the unary node is a number, then the whole node can be simplified
371      var parent = nodes[i];
372      var child = nodes[i - 1];
373
374      var parentSymbol = parent.Data.Symbol;
375      var childSymbol = child.Data.Symbol;
376
377      if (childSymbol is INumericSymbol) {
378        nodes[i].Enabled = false;
379      } else if ((parentSymbol is Exponential && childSymbol is Logarithm) || (parentSymbol is Logarithm && childSymbol is Exponential)) {
380        child.Enabled = parent.Enabled = false;
381      }
382    }
383
384    public static void SimplifyBinaryNode(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
385      var children = nodes.IterateChildren(i);
386      var tmp = nodes;
387      if (children.All(x => tmp[x].Data.Symbol is INumericSymbol)) {
388        foreach (var j in children) {
389          nodes[j].Enabled = false;
390        }
391        nodes[i] = number.CreateTreeNode().ToHashNode();
392      }
393    }
394    #endregion
395  }
396}
Note: See TracBrowser for help on using the repository browser.