Changeset 16284
- Timestamp:
- 11/07/18 15:16:57 (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs
r16272 r16284 20 20 #endregion 21 21 22 using System; 22 23 using System.Linq; 23 24 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; … … 40 41 public static ulong ComputeHash(this ISymbolicExpressionTree tree) { 41 42 return ComputeHash(tree.Root.GetSubtree(0).GetSubtree(0)); 43 } 44 45 public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false) { 46 return ComputeSimilarity(t1.Root.GetSubtree(0).GetSubtree(0), t2.Root.GetSubtree(0).GetSubtree(0), simplify); 47 } 48 49 public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false) { 50 HashNode<ISymbolicExpressionTreeNode>[] lhs; 51 HashNode<ISymbolicExpressionTreeNode>[] rhs; 52 53 ulong hashFunction(byte[] input) => HashUtil.DJBHash(input); 54 55 if (simplify) { 56 lhs = t1.MakeNodes().Simplify(hashFunction); 57 rhs = t2.MakeNodes().Simplify(hashFunction); 58 } else { 59 lhs = t1.MakeNodes().Sort(hashFunction); // sort calculates hash values 60 rhs = t2.MakeNodes().Sort(hashFunction); 61 } 62 63 var lh = lhs.Select(x => x.CalculatedHashValue).ToArray(); 64 var rh = rhs.Select(x => x.CalculatedHashValue).ToArray(); 65 66 Array.Sort(lh); 67 Array.Sort(rh); 68 69 return ComputeSimilarity(lh, rh); 70 } 71 72 // this will only work if lh and rh are sorted 73 private static double ComputeSimilarity(ulong[] lh, ulong[] rh) { 74 double count = 0; 75 for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) { 76 var h1 = lh[i]; 77 var h2 = rh[j]; 78 if (h1 == h2) { 79 ++count; 80 ++i; 81 ++j; 82 } else if (h1 < h2) { 83 ++i; 84 } else if (h1 > h2) { 85 ++j; 86 } 87 } 88 89 return 2d * count / (lh.Length + rh.Length); 90 } 91 92 public static double ComputeAverageSimilarity(ISymbolicExpressionTree[] trees, bool simplify = false) { 93 var total = (double)trees.Length * (trees.Length - 1) / 2; 94 double avg = 0; 95 var hashes = new ulong[trees.Length][]; 96 // build hash arrays 97 for (int i = 0; i < trees.Length; ++i) { 98 hashes[i] = (simplify ? trees[i].MakeNodes().Simplify(HashUtil.DJBHash) : trees[i].MakeNodes().Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray(); 99 Array.Sort(hashes[i]); 100 } 101 // compute similarity matrix 102 for (int i = 0; i < trees.Length - 1; ++i) { 103 for (int j = i + 1; j < trees.Length; ++j) { 104 avg = ComputeSimilarity(hashes[i], hashes[j]); 105 } 106 } 107 return avg / total; 108 } 109 110 public static double[,] ComputeSimilarityMatrix(ISymbolicExpressionTree[] trees, bool simplify = false) { 111 var sim = new double[trees.Length, trees.Length]; 112 var hashes = new ulong[trees.Length][]; 113 // build hash arrays 114 for (int i = 0; i < trees.Length; ++i) { 115 hashes[i] = (simplify ? trees[i].MakeNodes().Simplify(HashUtil.DJBHash) : trees[i].MakeNodes().Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray(); 116 Array.Sort(hashes[i]); 117 } 118 // compute similarity matrix 119 for (int i = 0; i < trees.Length - 1; ++i) { 120 for (int j = i + 1; j < trees.Length; ++j) { 121 sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]); 122 } 123 } 124 return sim; 42 125 } 43 126 … … 80 163 } 81 164 165 public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree) { 166 return MakeNodes(tree.Root.GetSubtree(0).GetSubtree(0)); 167 } 168 82 169 public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node) { 83 170 return node.IterateNodesPostfix().Select(ToHashNode).ToArray().UpdateNodeSizes();
Note: See TracChangeset
for help on using the changeset viewer.