Changeset 16654 for branches/2866_SymRegHyperbolicFunctions/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing
- Timestamp:
- 03/07/19 12:29:27 (6 years ago)
- Location:
- branches/2866_SymRegHyperbolicFunctions/HeuristicLab.Problems.DataAnalysis.Symbolic
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2866_SymRegHyperbolicFunctions/HeuristicLab.Problems.DataAnalysis.Symbolic
- Property svn:mergeinfo changed
-
branches/2866_SymRegHyperbolicFunctions/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashExtensions.cs
r16305 r16654 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 8Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. -
branches/2866_SymRegHyperbolicFunctions/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashUtil.cs
r16272 r16654 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 8Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. -
branches/2866_SymRegHyperbolicFunctions/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs
r16305 r16654 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 8Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 21 21 22 22 using System; 23 using System.Collections.Generic; 23 24 using System.Linq; 24 25 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; … … 38 39 39 40 private static readonly ISymbolicExpressionTreeNodeComparer comparer = new SymbolicExpressionTreeNodeComparer(); 40 41 public static ulong ComputeHash(this ISymbolicExpressionTree tree) { 42 return ComputeHash(tree.Root.GetSubtree(0).GetSubtree(0)); 43 } 44 45 public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false) { 46 return ComputeSimilarity(t1.Root.GetSubtree(0).GetSubtree(0), t2.Root.GetSubtree(0).GetSubtree(0), simplify); 47 } 48 49 public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false) { 50 HashNode<ISymbolicExpressionTreeNode>[] lhs; 51 HashNode<ISymbolicExpressionTreeNode>[] rhs; 52 41 private static ISymbolicExpressionTreeNode ActualRoot(this ISymbolicExpressionTree tree) => tree.Root.GetSubtree(0).GetSubtree(0); 42 43 #region tree hashing 44 public static ulong[] Hash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) { 45 return tree.ActualRoot().Hash(simplify, strict); 46 } 47 48 public static ulong[] Hash(this ISymbolicExpressionTreeNode node, bool simplify = false, bool strict = false) { 53 49 ulong hashFunction(byte[] input) => HashUtil.DJBHash(input); 54 50 55 if (simplify) { 56 lhs = t1.MakeNodes().Simplify(hashFunction); 57 rhs = t2.MakeNodes().Simplify(hashFunction); 58 } else { 59 lhs = t1.MakeNodes().Sort(hashFunction); // sort calculates hash values 60 rhs = t2.MakeNodes().Sort(hashFunction); 61 } 62 63 var lh = lhs.Select(x => x.CalculatedHashValue).ToArray(); 64 var rh = rhs.Select(x => x.CalculatedHashValue).ToArray(); 65 66 Array.Sort(lh); 67 Array.Sort(rh); 68 69 return ComputeSimilarity(lh, rh); 70 } 71 72 // this will only work if lh and rh are sorted 73 private static double ComputeSimilarity(ulong[] lh, ulong[] rh) { 74 double count = 0; 75 for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) { 76 var h1 = lh[i]; 77 var h2 = rh[j]; 78 if (h1 == h2) { 79 ++count; 80 ++i; 81 ++j; 82 } else if (h1 < h2) { 83 ++i; 84 } else if (h1 > h2) { 85 ++j; 86 } 87 } 88 89 return 2d * count / (lh.Length + rh.Length); 90 } 91 92 public static double ComputeAverageSimilarity(ISymbolicExpressionTree[] trees, bool simplify = false, bool strict = false) { 93 var total = (double)trees.Length * (trees.Length - 1) / 2; 94 double avg = 0; 95 var hashes = new ulong[trees.Length][]; 96 // build hash arrays 97 for (int i = 0; i < trees.Length; ++i) { 98 var nodes = trees[i].MakeNodes(strict); 99 hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray(); 100 Array.Sort(hashes[i]); 101 } 102 // compute similarity matrix 103 for (int i = 0; i < trees.Length - 1; ++i) { 104 for (int j = i + 1; j < trees.Length; ++j) { 105 avg += ComputeSimilarity(hashes[i], hashes[j]); 106 } 107 } 108 return avg / total; 109 } 110 111 public static double[,] ComputeSimilarityMatrix(ISymbolicExpressionTree[] trees, bool simplify = false, bool strict = false) { 112 var sim = new double[trees.Length, trees.Length]; 113 var hashes = new ulong[trees.Length][]; 114 // build hash arrays 115 for (int i = 0; i < trees.Length; ++i) { 116 var nodes = trees[i].MakeNodes(strict); 117 hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray(); 118 Array.Sort(hashes[i]); 119 } 120 // compute similarity matrix 121 for (int i = 0; i < trees.Length - 1; ++i) { 122 for (int j = i + 1; j < trees.Length; ++j) { 123 sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]); 124 } 125 } 126 return sim; 127 } 128 129 public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool strict = false) { 130 ulong hashFunction(byte[] input) => HashUtil.JSHash(input); 131 var hashNodes = treeNode.MakeNodes(strict); 132 var simplified = hashNodes.Simplify(hashFunction); 133 return simplified.Last().CalculatedHashValue; 51 var hashNodes = simplify ? node.MakeNodes(strict).Simplify(hashFunction) : node.MakeNodes(strict).Sort(hashFunction); 52 var hashes = new ulong[hashNodes.Length]; 53 for (int i = 0; i < hashes.Length; ++i) { 54 hashes[i] = hashNodes[i].CalculatedHashValue; 55 } 56 return hashes; 57 } 58 59 public static ulong ComputeHash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) { 60 return ComputeHash(tree.ActualRoot(), simplify, strict); 61 } 62 63 public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool simplify = false, bool strict = false) { 64 return treeNode.Hash(simplify, strict).Last(); 134 65 } 135 66 … … 167 98 168 99 public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree, bool strict = false) { 169 return MakeNodes(tree. Root.GetSubtree(0).GetSubtree(0), strict);100 return MakeNodes(tree.ActualRoot(), strict); 170 101 } 171 102 … … 173 104 return node.IterateNodesPostfix().Select(x => x.ToHashNode(strict)).ToArray().UpdateNodeSizes(); 174 105 } 106 #endregion 107 108 #region tree similarity 109 public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false, bool strict = false) { 110 return ComputeSimilarity(t1.ActualRoot(), t2.ActualRoot(), simplify, strict); 111 } 112 113 public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false, bool strict = false) { 114 var lh = t1.Hash(simplify, strict); 115 var rh = t2.Hash(simplify, strict); 116 117 Array.Sort(lh); 118 Array.Sort(rh); 119 120 return ComputeSimilarity(lh, rh); 121 } 122 123 // requires lhs and rhs to be sorted 124 public static int IntersectCount(this ulong[] lh, ulong[] rh) { 125 int count = 0; 126 for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) { 127 var h1 = lh[i]; 128 var h2 = rh[j]; 129 if (h1 == h2) { 130 ++count; 131 ++i; 132 ++j; 133 } else if (h1 < h2) { 134 ++i; 135 } else if (h1 > h2) { 136 ++j; 137 } 138 } 139 return count; 140 } 141 142 public static IEnumerable<ulong> Intersect(this ulong[] lh, ulong[] rh) { 143 for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) { 144 var h1 = lh[i]; 145 var h2 = rh[j]; 146 if (h1 == h2) { 147 yield return h1; 148 ++i; 149 ++j; 150 } else if (h1 < h2) { 151 ++i; 152 } else if (h1 > h2) { 153 ++j; 154 } 155 } 156 } 157 158 // this will only work if lh and rh are sorted 159 public static double ComputeSimilarity(ulong[] lh, ulong[] rh) { 160 return 2d * IntersectCount(lh, rh) / (lh.Length + rh.Length); 161 } 162 163 public static double ComputeAverageSimilarity(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) { 164 var total = trees.Count * (trees.Count - 1) / 2; 165 double avg = 0; 166 var hashes = new ulong[trees.Count][]; 167 // build hash arrays 168 for (int i = 0; i < trees.Count; ++i) { 169 var nodes = trees[i].MakeNodes(strict); 170 hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray(); 171 Array.Sort(hashes[i]); 172 } 173 // compute similarity matrix 174 for (int i = 0; i < trees.Count - 1; ++i) { 175 for (int j = i + 1; j < trees.Count; ++j) { 176 avg += ComputeSimilarity(hashes[i], hashes[j]); 177 } 178 } 179 return avg / total; 180 } 181 182 public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) { 183 var sim = new double[trees.Count, trees.Count]; 184 var hashes = new ulong[trees.Count][]; 185 // build hash arrays 186 for (int i = 0; i < trees.Count; ++i) { 187 var nodes = trees[i].MakeNodes(strict); 188 hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray(); 189 Array.Sort(hashes[i]); 190 } 191 // compute similarity matrix 192 for (int i = 0; i < trees.Count - 1; ++i) { 193 for (int j = i + 1; j < trees.Count; ++j) { 194 sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]); 195 } 196 } 197 return sim; 198 } 199 #endregion 175 200 176 201 #region parse a nodes array back into a tree … … 218 243 #region tree simplification 219 244 // these simplification methods rely on the assumption that child nodes of the current node have already been simplified 220 // (in other words simplification should be applied in a bottom-up fashion) 245 // (in other words simplification should be applied in a bottom-up fashion) 221 246 public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) { 222 247 ulong hashFunction(byte[] bytes) => HashUtil.JSHash(bytes); … … 248 273 } 249 274 250 // simplify multiplications by reducing constants and div terms 275 // simplify multiplications by reducing constants and div terms 251 276 public static void SimplifyMultiplication(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) { 252 277 var node = nodes[i];
Note: See TracChangeset
for help on using the changeset viewer.