Changeset 16487 for branches/2520_PersistenceReintegration/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing
- Timestamp:
- 01/03/19 12:17:39 (6 years ago)
- Location:
- branches/2520_PersistenceReintegration
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2520_PersistenceReintegration
-
branches/2520_PersistenceReintegration/HeuristicLab.Problems.DataAnalysis.Symbolic
- Property svn:mergeinfo changed
/trunk/HeuristicLab.Problems.DataAnalysis.Symbolic (added) merged: 16478
- Property svn:mergeinfo changed
-
branches/2520_PersistenceReintegration/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs
r16453 r16487 39 39 40 40 private static readonly ISymbolicExpressionTreeNodeComparer comparer = new SymbolicExpressionTreeNodeComparer(); 41 42 public static ulong ComputeHash(this ISymbolicExpressionTree tree) { 43 return ComputeHash(tree.Root.GetSubtree(0).GetSubtree(0)); 44 } 45 46 public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false) { 47 return ComputeSimilarity(t1.Root.GetSubtree(0).GetSubtree(0), t2.Root.GetSubtree(0).GetSubtree(0), simplify); 48 } 49 50 public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false) { 51 HashNode<ISymbolicExpressionTreeNode>[] lhs; 52 HashNode<ISymbolicExpressionTreeNode>[] rhs; 53 41 private static ISymbolicExpressionTreeNode ActualRoot(this ISymbolicExpressionTree tree) => tree.Root.GetSubtree(0).GetSubtree(0); 42 43 #region tree hashing 44 public static ulong[] Hash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) { 45 return tree.ActualRoot().Hash(simplify, strict); 46 } 47 48 public static ulong[] Hash(this ISymbolicExpressionTreeNode node, bool simplify = false, bool strict = false) { 54 49 ulong hashFunction(byte[] input) => HashUtil.DJBHash(input); 55 50 56 if (simplify) { 57 lhs = t1.MakeNodes().Simplify(hashFunction); 58 rhs = t2.MakeNodes().Simplify(hashFunction); 59 } else { 60 lhs = t1.MakeNodes().Sort(hashFunction); // sort calculates hash values 61 rhs = t2.MakeNodes().Sort(hashFunction); 62 } 63 64 var lh = lhs.Select(x => x.CalculatedHashValue).ToArray(); 65 var rh = rhs.Select(x => x.CalculatedHashValue).ToArray(); 66 67 Array.Sort(lh); 68 Array.Sort(rh); 69 70 return ComputeSimilarity(lh, rh); 71 } 72 73 // this will only work if lh and rh are sorted 74 private static double ComputeSimilarity(ulong[] lh, ulong[] rh) { 75 double count = 0; 76 for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) { 77 var h1 = lh[i]; 78 var h2 = rh[j]; 79 if (h1 == h2) { 80 ++count; 81 ++i; 82 ++j; 83 } else if (h1 < h2) { 84 ++i; 85 } else if (h1 > h2) { 86 ++j; 87 } 88 } 89 90 return 2d * count / (lh.Length + rh.Length); 91 } 92 93 public static double ComputeAverageSimilarity(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) { 94 var total = (double)trees.Count * (trees.Count - 1) / 2; 95 double avg = 0; 96 var hashes = new ulong[trees.Count][]; 97 // build hash arrays 98 for (int i = 0; i < trees.Count; ++i) { 99 var nodes = trees[i].MakeNodes(strict); 100 hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray(); 101 Array.Sort(hashes[i]); 102 } 103 // compute similarity matrix 104 for (int i = 0; i < trees.Count - 1; ++i) { 105 for (int j = i + 1; j < trees.Count; ++j) { 106 avg += ComputeSimilarity(hashes[i], hashes[j]); 107 } 108 } 109 return avg / total; 110 } 111 112 public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) { 113 var sim = new double[trees.Count, trees.Count]; 114 var hashes = new ulong[trees.Count][]; 115 // build hash arrays 116 for (int i = 0; i < trees.Count; ++i) { 117 var nodes = trees[i].MakeNodes(strict); 118 hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray(); 119 Array.Sort(hashes[i]); 120 } 121 // compute similarity matrix 122 for (int i = 0; i < trees.Count - 1; ++i) { 123 for (int j = i + 1; j < trees.Count; ++j) { 124 sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]); 125 } 126 } 127 return sim; 128 } 129 130 public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool strict = false) { 131 ulong hashFunction(byte[] input) => HashUtil.JSHash(input); 132 var hashNodes = treeNode.MakeNodes(strict); 133 var simplified = hashNodes.Simplify(hashFunction); 134 return simplified.Last().CalculatedHashValue; 51 var hashNodes = simplify ? node.MakeNodes(strict).Simplify(hashFunction) : node.MakeNodes(strict).Sort(hashFunction); 52 var hashes = new ulong[hashNodes.Length]; 53 for (int i = 0; i < hashes.Length; ++i) { 54 hashes[i] = hashNodes[i].CalculatedHashValue; 55 } 56 return hashes; 57 } 58 59 public static ulong ComputeHash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) { 60 return ComputeHash(tree.ActualRoot(), simplify, strict); 61 } 62 63 public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool simplify = false, bool strict = false) { 64 return treeNode.Hash(simplify, strict).Last(); 135 65 } 136 66 … … 168 98 169 99 public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree, bool strict = false) { 170 return MakeNodes(tree. Root.GetSubtree(0).GetSubtree(0), strict);100 return MakeNodes(tree.ActualRoot(), strict); 171 101 } 172 102 … … 174 104 return node.IterateNodesPostfix().Select(x => x.ToHashNode(strict)).ToArray().UpdateNodeSizes(); 175 105 } 106 #endregion 107 108 #region tree similarity 109 public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false, bool strict = false) { 110 return ComputeSimilarity(t1.ActualRoot(), t2.ActualRoot(), simplify, strict); 111 } 112 113 public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false, bool strict = false) { 114 var lh = t1.Hash(simplify, strict); 115 var rh = t2.Hash(simplify, strict); 116 117 Array.Sort(lh); 118 Array.Sort(rh); 119 120 return ComputeSimilarity(lh, rh); 121 } 122 123 // requires lhs and rhs to be sorted 124 public static int IntersectCount(this ulong[] lh, ulong[] rh) { 125 int count = 0; 126 for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) { 127 var h1 = lh[i]; 128 var h2 = rh[j]; 129 if (h1 == h2) { 130 ++count; 131 ++i; 132 ++j; 133 } else if (h1 < h2) { 134 ++i; 135 } else if (h1 > h2) { 136 ++j; 137 } 138 } 139 return count; 140 } 141 142 public static IEnumerable<ulong> Intersect(this ulong[] lh, ulong[] rh) { 143 for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) { 144 var h1 = lh[i]; 145 var h2 = rh[j]; 146 if (h1 == h2) { 147 yield return h1; 148 ++i; 149 ++j; 150 } else if (h1 < h2) { 151 ++i; 152 } else if (h1 > h2) { 153 ++j; 154 } 155 } 156 } 157 158 // this will only work if lh and rh are sorted 159 public static double ComputeSimilarity(ulong[] lh, ulong[] rh) { 160 return 2d * IntersectCount(lh, rh) / (lh.Length + rh.Length); 161 } 162 163 public static double ComputeAverageSimilarity(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) { 164 var total = trees.Count * (trees.Count - 1) / 2; 165 double avg = 0; 166 var hashes = new ulong[trees.Count][]; 167 // build hash arrays 168 for (int i = 0; i < trees.Count; ++i) { 169 var nodes = trees[i].MakeNodes(strict); 170 hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray(); 171 Array.Sort(hashes[i]); 172 } 173 // compute similarity matrix 174 for (int i = 0; i < trees.Count - 1; ++i) { 175 for (int j = i + 1; j < trees.Count; ++j) { 176 avg += ComputeSimilarity(hashes[i], hashes[j]); 177 } 178 } 179 return avg / total; 180 } 181 182 public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) { 183 var sim = new double[trees.Count, trees.Count]; 184 var hashes = new ulong[trees.Count][]; 185 // build hash arrays 186 for (int i = 0; i < trees.Count; ++i) { 187 var nodes = trees[i].MakeNodes(strict); 188 hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray(); 189 Array.Sort(hashes[i]); 190 } 191 // compute similarity matrix 192 for (int i = 0; i < trees.Count - 1; ++i) { 193 for (int j = i + 1; j < trees.Count; ++j) { 194 sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]); 195 } 196 } 197 return sim; 198 } 199 #endregion 176 200 177 201 #region parse a nodes array back into a tree
Note: See TracChangeset
for help on using the changeset viewer.