- Timestamp:
- 07/11/19 15:32:31 (6 years ago)
- Location:
- trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashExtensions.cs
r17076 r17132 45 45 46 46 public int CompareTo(HashNode<T> other) { 47 return CalculatedHashValue.CompareTo(other.CalculatedHashValue); 47 var res = HashValue.CompareTo(other.HashValue); 48 return res == 0 ? CalculatedHashValue.CompareTo(other.CalculatedHashValue) : res; 48 49 } 49 50 … … 112 113 } 113 114 node.Simplify?.Invoke(ref nodes, i); 114 for (int j = i - node.Size; j < i; ++j) {115 for (int j = i - node.Size; j <= i; ++j) { 115 116 // detect if anything was simplified 116 117 if (!nodes[j].Enabled) { -
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs
r17076 r17132 39 39 40 40 private static ISymbolicExpressionTreeNode ActualRoot(this ISymbolicExpressionTree tree) => tree.Root.GetSubtree(0).GetSubtree(0); 41 public static ulong HashFunction(byte[] input) => HashUtil.DJBHash(input); 41 42 42 43 #region tree hashing … … 46 47 47 48 public static ulong[] Hash(this ISymbolicExpressionTreeNode node, bool simplify = false, bool strict = false) { 48 ulong hashFunction(byte[] input) => HashUtil.DJBHash(input); 49 50 var hashNodes = simplify ? node.MakeNodes(strict).Simplify(hashFunction) : node.MakeNodes(strict).Sort(hashFunction); // simplify sorts implicitly 49 var hashNodes = simplify ? node.MakeNodes(strict).Simplify(HashFunction) : node.MakeNodes(strict).Sort(HashFunction); 51 50 var hashes = new ulong[hashNodes.Length]; 52 51 for (int i = 0; i < hashes.Length; ++i) { … … 167 166 for (int i = 0; i < trees.Count; ++i) { 168 167 var nodes = trees[i].MakeNodes(strict); 169 hashes[i] = (simplify ? nodes.Simplify(Hash Util.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();168 hashes[i] = (simplify ? nodes.Simplify(HashFunction) : nodes.Sort(HashFunction)).Select(x => x.CalculatedHashValue).ToArray(); 170 169 Array.Sort(hashes[i]); 171 170 } … … 179 178 } 180 179 180 public static double[,] ComputeSimilarityMatrix(IList<ulong[]> hashes) { 181 // compute similarity matrix 182 var n = hashes.Count; 183 var sim = new double[n, n]; 184 for (int i = 0; i < n - 1; ++i) { 185 for (int j = i + 1; j < n; ++j) { 186 sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]); 187 } 188 } 189 return sim; 190 } 191 181 192 public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) { 182 var sim = new double[trees.Count, trees.Count];183 193 var hashes = new ulong[trees.Count][]; 184 194 // build hash arrays 185 195 for (int i = 0; i < trees.Count; ++i) { 186 196 var nodes = trees[i].MakeNodes(strict); 187 hashes[i] = (simplify ? nodes.Simplify(Hash Util.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();197 hashes[i] = (simplify ? nodes.Simplify(HashFunction) : nodes.Sort(HashFunction)).Select(x => x.CalculatedHashValue).ToArray(); 188 198 Array.Sort(hashes[i]); 189 199 } 190 // compute similarity matrix 191 for (int i = 0; i < trees.Count - 1; ++i) { 192 for (int j = i + 1; j < trees.Count; ++j) { 193 sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]); 194 } 195 } 196 return sim; 200 return ComputeSimilarityMatrix(hashes); 197 201 } 198 202 #endregion … … 210 214 var treeNodes = nodes.Select(x => x.Data.Symbol.CreateTreeNode()).ToArray(); 211 215 212 // construct tree top down (assumes postfix order for nodes)213 216 for (int i = nodes.Length - 1; i >= 0; --i) { 214 217 var node = nodes[i]; … … 245 248 // (in other words simplification should be applied in a bottom-up fashion) 246 249 public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) { 247 ulong hashFunction(byte[] bytes) => HashUtil.DJBHash(bytes); 248 var root = tree.ActualRoot(); 249 var nodes = root.MakeNodes(); 250 var simplified = nodes.Simplify(hashFunction); 251 return simplified.ToTree(); 250 return tree.MakeNodes().Simplify(HashFunction).ToTree(); 252 251 } 253 252 … … 286 285 287 286 var symbol = child.Data.Symbol; 288 if (symbol is Constant) { 287 if (child.Data is ConstantTreeNode firstConst) { 288 // fold sibling constant nodes into the first constant 289 289 for (int k = j + 1; k < children.Length; ++k) { 290 var d = children[k];291 if ( nodes[d].Data.Symbol is Constant) {292 nodes[d].Enabled = false;290 var sibling = nodes[children[k]]; 291 if (sibling.Data is ConstantTreeNode otherConst) { 292 sibling.Enabled = false; 293 293 node.Arity--; 294 firstConst.Value *= otherConst.Value; 295 } else { 296 break; 297 } 298 } 299 } else if (child.Data is VariableTreeNode variable) { 300 // fold sibling constant nodes into the variable weight 301 for (int k = j + 1; k < children.Length; ++k) { 302 var sibling = nodes[children[k]]; 303 if (sibling.Data is ConstantTreeNode constantNode) { 304 sibling.Enabled = false; 305 node.Arity--; 306 variable.Weight *= constantNode.Value; 294 307 } else { 295 308 break; … … 297 310 } 298 311 } else if (symbol is Division) { 299 var div = nodes[c]; 300 var denominator = 301 div.Arity == 1 ? 302 nodes[c - 1] : // 1 / x is expressed as div(x) (with a single child) 303 nodes[c - nodes[c - 1].Size - 2]; // assume division always has arity 1 or 2 304 305 foreach (var d in children) { 306 if (nodes[d].Enabled && nodes[d] == denominator) { 307 nodes[c].Enabled = nodes[d].Enabled = denominator.Enabled = false; 312 // 1/x is expressed as div(x) (with a single child) 313 // we assume division always has arity 1 or 2 314 var d = child.Arity == 1 ? c - 1 : c - nodes[c - 1].Size - 2; 315 var denominator = nodes[d]; 316 317 // iterate children of node i to see if any of them matches the denominator of div node c 318 for (int k = 0; k < children.Length; ++k) { 319 var sibling = nodes[children[k]]; 320 if (sibling.Enabled && sibling == denominator) { 321 nodes.SetEnabled(children[j], false); // disable the div subtree 322 nodes.SetEnabled(children[k], false); // disable the sibling matching the denominator 323 308 324 node.Arity -= 2; // matching child + division node 309 325 break; … … 370 386 nodes[i].Enabled = false; 371 387 } else if ((parentSymbol is Exponential && childSymbol is Logarithm) || (parentSymbol is Logarithm && childSymbol is Exponential)) { 372 // exp(log(x)) == x only for positive x. We consider this as equivalent for hashing anyway.373 388 child.Enabled = parent.Enabled = false; 374 389 }
Note: See TracChangeset
for help on using the changeset viewer.