Changeset 16487


Ignore:
Timestamp:
01/03/19 12:17:39 (3 weeks ago)
Author:
gkronber
Message:

#2520 merged r16478 (HEAD) from trunk to branch

Location:
branches/2520_PersistenceReintegration
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/2520_PersistenceReintegration

    • Property svn:mergeinfo changed
      /trunk (added)merged: 16478
  • branches/2520_PersistenceReintegration/HeuristicLab.Problems.DataAnalysis.Symbolic

  • branches/2520_PersistenceReintegration/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs

    r16453 r16487  
    3939
    4040    private static readonly ISymbolicExpressionTreeNodeComparer comparer = new SymbolicExpressionTreeNodeComparer();
    41 
    42     public static ulong ComputeHash(this ISymbolicExpressionTree tree) {
    43       return ComputeHash(tree.Root.GetSubtree(0).GetSubtree(0));
    44     }
    45 
    46     public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false) {
    47       return ComputeSimilarity(t1.Root.GetSubtree(0).GetSubtree(0), t2.Root.GetSubtree(0).GetSubtree(0), simplify);
    48     }
    49 
    50     public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false) {
    51       HashNode<ISymbolicExpressionTreeNode>[] lhs;
    52       HashNode<ISymbolicExpressionTreeNode>[] rhs;
    53 
     41    private static ISymbolicExpressionTreeNode ActualRoot(this ISymbolicExpressionTree tree) => tree.Root.GetSubtree(0).GetSubtree(0);
     42
     43    #region tree hashing
     44    public static ulong[] Hash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) {
     45      return tree.ActualRoot().Hash(simplify, strict);
     46    }
     47
     48    public static ulong[] Hash(this ISymbolicExpressionTreeNode node, bool simplify = false, bool strict = false) {
    5449      ulong hashFunction(byte[] input) => HashUtil.DJBHash(input);
    5550
    56       if (simplify) {
    57         lhs = t1.MakeNodes().Simplify(hashFunction);
    58         rhs = t2.MakeNodes().Simplify(hashFunction);
    59       } else {
    60         lhs = t1.MakeNodes().Sort(hashFunction); // sort calculates hash values
    61         rhs = t2.MakeNodes().Sort(hashFunction);
    62       }
    63 
    64       var lh = lhs.Select(x => x.CalculatedHashValue).ToArray();
    65       var rh = rhs.Select(x => x.CalculatedHashValue).ToArray();
    66 
    67       Array.Sort(lh);
    68       Array.Sort(rh);
    69 
    70       return ComputeSimilarity(lh, rh);
    71     }
    72 
    73     // this will only work if lh and rh are sorted
    74     private static double ComputeSimilarity(ulong[] lh, ulong[] rh) {
    75       double count = 0;
    76       for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
    77         var h1 = lh[i];
    78         var h2 = rh[j];
    79         if (h1 == h2) {
    80           ++count;
    81           ++i;
    82           ++j;
    83         } else if (h1 < h2) {
    84           ++i;
    85         } else if (h1 > h2) {
    86           ++j;
    87         }
    88       }
    89 
    90       return 2d * count / (lh.Length + rh.Length);
    91     }
    92 
    93     public static double ComputeAverageSimilarity(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
    94       var total = (double)trees.Count * (trees.Count - 1) / 2;
    95       double avg = 0;
    96       var hashes = new ulong[trees.Count][];
    97       // build hash arrays
    98       for (int i = 0; i < trees.Count; ++i) {
    99         var nodes = trees[i].MakeNodes(strict);
    100         hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
    101         Array.Sort(hashes[i]);
    102       }
    103       // compute similarity matrix
    104       for (int i = 0; i < trees.Count - 1; ++i) {
    105         for (int j = i + 1; j < trees.Count; ++j) {
    106           avg += ComputeSimilarity(hashes[i], hashes[j]);
    107         }
    108       }
    109       return avg / total;
    110     }
    111 
    112     public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
    113       var sim = new double[trees.Count, trees.Count];
    114       var hashes = new ulong[trees.Count][];
    115       // build hash arrays
    116       for (int i = 0; i < trees.Count; ++i) {
    117         var nodes = trees[i].MakeNodes(strict);
    118         hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
    119         Array.Sort(hashes[i]);
    120       }
    121       // compute similarity matrix
    122       for (int i = 0; i < trees.Count - 1; ++i) {
    123         for (int j = i + 1; j < trees.Count; ++j) {
    124           sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
    125         }
    126       }
    127       return sim;
    128     }
    129 
    130     public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool strict = false) {
    131       ulong hashFunction(byte[] input) => HashUtil.JSHash(input);
    132       var hashNodes = treeNode.MakeNodes(strict);
    133       var simplified = hashNodes.Simplify(hashFunction);
    134       return simplified.Last().CalculatedHashValue;
     51      var hashNodes = simplify ? node.MakeNodes(strict).Simplify(hashFunction) : node.MakeNodes(strict).Sort(hashFunction);
     52      var hashes = new ulong[hashNodes.Length];
     53      for (int i = 0; i < hashes.Length; ++i) {
     54        hashes[i] = hashNodes[i].CalculatedHashValue;
     55      }
     56      return hashes;
     57    }
     58
     59    public static ulong ComputeHash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) {
     60      return ComputeHash(tree.ActualRoot(), simplify, strict);
     61    }
     62
     63    public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool simplify = false, bool strict = false) {
     64      return treeNode.Hash(simplify, strict).Last();
    13565    }
    13666
     
    16898
    16999    public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree, bool strict = false) {
    170       return MakeNodes(tree.Root.GetSubtree(0).GetSubtree(0), strict);
     100      return MakeNodes(tree.ActualRoot(), strict);
    171101    }
    172102
     
    174104      return node.IterateNodesPostfix().Select(x => x.ToHashNode(strict)).ToArray().UpdateNodeSizes();
    175105    }
     106    #endregion
     107
     108    #region tree similarity
     109    public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false, bool strict = false) {
     110      return ComputeSimilarity(t1.ActualRoot(), t2.ActualRoot(), simplify, strict);
     111    }
     112
     113    public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false, bool strict = false) {
     114      var lh = t1.Hash(simplify, strict);
     115      var rh = t2.Hash(simplify, strict);
     116
     117      Array.Sort(lh);
     118      Array.Sort(rh);
     119
     120      return ComputeSimilarity(lh, rh);
     121    }
     122
     123    // requires lhs and rhs to be sorted
     124    public static int IntersectCount(this ulong[] lh, ulong[] rh) {
     125      int count = 0;
     126      for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
     127        var h1 = lh[i];
     128        var h2 = rh[j];
     129        if (h1 == h2) {
     130          ++count;
     131          ++i;
     132          ++j;
     133        } else if (h1 < h2) {
     134          ++i;
     135        } else if (h1 > h2) {
     136          ++j;
     137        }
     138      }
     139      return count;
     140    }
     141
     142    public static IEnumerable<ulong> Intersect(this ulong[] lh, ulong[] rh) {
     143      for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
     144        var h1 = lh[i];
     145        var h2 = rh[j];
     146        if (h1 == h2) {
     147          yield return h1;
     148          ++i;
     149          ++j;
     150        } else if (h1 < h2) {
     151          ++i;
     152        } else if (h1 > h2) {
     153          ++j;
     154        }
     155      }
     156    }
     157
     158    // this will only work if lh and rh are sorted
     159    public static double ComputeSimilarity(ulong[] lh, ulong[] rh) {
     160      return 2d * IntersectCount(lh, rh) / (lh.Length + rh.Length);
     161    }
     162
     163    public static double ComputeAverageSimilarity(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
     164      var total = trees.Count * (trees.Count - 1) / 2;
     165      double avg = 0;
     166      var hashes = new ulong[trees.Count][];
     167      // build hash arrays
     168      for (int i = 0; i < trees.Count; ++i) {
     169        var nodes = trees[i].MakeNodes(strict);
     170        hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
     171        Array.Sort(hashes[i]);
     172      }
     173      // compute similarity matrix
     174      for (int i = 0; i < trees.Count - 1; ++i) {
     175        for (int j = i + 1; j < trees.Count; ++j) {
     176          avg += ComputeSimilarity(hashes[i], hashes[j]);
     177        }
     178      }
     179      return avg / total;
     180    }
     181
     182    public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
     183      var sim = new double[trees.Count, trees.Count];
     184      var hashes = new ulong[trees.Count][];
     185      // build hash arrays
     186      for (int i = 0; i < trees.Count; ++i) {
     187        var nodes = trees[i].MakeNodes(strict);
     188        hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
     189        Array.Sort(hashes[i]);
     190      }
     191      // compute similarity matrix
     192      for (int i = 0; i < trees.Count - 1; ++i) {
     193        for (int j = i + 1; j < trees.Count; ++j) {
     194          sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
     195        }
     196      }
     197      return sim;
     198    }
     199    #endregion
    176200
    177201    #region parse a nodes array back into a tree
Note: See TracChangeset for help on using the changeset viewer.