Free cookie consent management tool by TermsFeed Policy Generator

Changeset 16284 for trunk


Ignore:
Timestamp:
11/07/18 15:16:57 (6 years ago)
Author:
bburlacu
Message:

#2950: Add the ability to compute the structural similarity between symbolic expression trees.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs

    r16272 r16284  
    2020#endregion
    2121
     22using System;
    2223using System.Linq;
    2324using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     
    4041    public static ulong ComputeHash(this ISymbolicExpressionTree tree) {
    4142      return ComputeHash(tree.Root.GetSubtree(0).GetSubtree(0));
     43    }
     44
     45    public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false) {
     46      return ComputeSimilarity(t1.Root.GetSubtree(0).GetSubtree(0), t2.Root.GetSubtree(0).GetSubtree(0), simplify);
     47    }
     48
     49    public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false) {
     50      HashNode<ISymbolicExpressionTreeNode>[] lhs;
     51      HashNode<ISymbolicExpressionTreeNode>[] rhs;
     52
     53      ulong hashFunction(byte[] input) => HashUtil.DJBHash(input);
     54
     55      if (simplify) {
     56        lhs = t1.MakeNodes().Simplify(hashFunction);
     57        rhs = t2.MakeNodes().Simplify(hashFunction);
     58      } else {
     59        lhs = t1.MakeNodes().Sort(hashFunction); // sort calculates hash values
     60        rhs = t2.MakeNodes().Sort(hashFunction);
     61      }
     62
     63      var lh = lhs.Select(x => x.CalculatedHashValue).ToArray();
     64      var rh = rhs.Select(x => x.CalculatedHashValue).ToArray();
     65
     66      Array.Sort(lh);
     67      Array.Sort(rh);
     68
     69      return ComputeSimilarity(lh, rh);
     70    }
     71
     72    // this will only work if lh and rh are sorted
     73    private static double ComputeSimilarity(ulong[] lh, ulong[] rh) {
     74      double count = 0;
     75      for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
     76        var h1 = lh[i];
     77        var h2 = rh[j];
     78        if (h1 == h2) {
     79          ++count;
     80          ++i;
     81          ++j;
     82        } else if (h1 < h2) {
     83          ++i;
     84        } else if (h1 > h2) {
     85          ++j;
     86        }
     87      }
     88
     89      return 2d * count / (lh.Length + rh.Length);
     90    }
     91
     92    public static double ComputeAverageSimilarity(ISymbolicExpressionTree[] trees, bool simplify = false) {
     93      var total = (double)trees.Length * (trees.Length - 1) / 2;
     94      double avg = 0;
     95      var hashes = new ulong[trees.Length][];
     96      // build hash arrays
     97      for (int i = 0; i < trees.Length; ++i) {
     98        hashes[i] = (simplify ? trees[i].MakeNodes().Simplify(HashUtil.DJBHash) : trees[i].MakeNodes().Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
     99        Array.Sort(hashes[i]);
     100      }
     101      // compute similarity matrix
     102      for (int i = 0; i < trees.Length - 1; ++i) {
     103        for (int j = i + 1; j < trees.Length; ++j) {
     104          avg = ComputeSimilarity(hashes[i], hashes[j]);
     105        }
     106      }
     107      return avg / total;
     108    }
     109
     110    public static double[,] ComputeSimilarityMatrix(ISymbolicExpressionTree[] trees, bool simplify = false) {
     111      var sim = new double[trees.Length, trees.Length];
     112      var hashes = new ulong[trees.Length][];
     113      // build hash arrays
     114      for (int i = 0; i < trees.Length; ++i) {
     115        hashes[i] = (simplify ? trees[i].MakeNodes().Simplify(HashUtil.DJBHash) : trees[i].MakeNodes().Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
     116        Array.Sort(hashes[i]);
     117      }
     118      // compute similarity matrix
     119      for (int i = 0; i < trees.Length - 1; ++i) {
     120        for (int j = i + 1; j < trees.Length; ++j) {
     121          sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
     122        }
     123      }
     124      return sim;
    42125    }
    43126
     
    80163    }
    81164
     165    public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree) {
     166      return MakeNodes(tree.Root.GetSubtree(0).GetSubtree(0));
     167    }
     168
    82169    public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node) {
    83170      return node.IterateNodesPostfix().Select(ToHashNode).ToArray().UpdateNodeSizes();
Note: See TracChangeset for help on using the changeset viewer.