Changeset 17193 for branches/2974_Constants_Optimization/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing
- Timestamp:
- 08/07/19 13:32:09 (5 years ago)
- Location:
- branches/2974_Constants_Optimization/HeuristicLab.Problems.DataAnalysis.Symbolic
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2974_Constants_Optimization/HeuristicLab.Problems.DataAnalysis.Symbolic
- Property svn:mergeinfo changed
-
branches/2974_Constants_Optimization/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashExtensions.cs
r16676 r17193 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-2019Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 21 21 22 22 using System; 23 using System. Collections.Generic;23 using System.Linq; 24 24 25 25 namespace HeuristicLab.Problems.DataAnalysis.Symbolic { 26 26 public static class SymbolicExpressionHashExtensions { 27 /// <summary> 28 /// Holds data that is necessary to handle tree nodes in hashing / simplification. 29 /// </summary> 30 /// <typeparam name="T">The tree node type</typeparam> 27 31 public sealed class HashNode<T> : IComparable<HashNode<T>>, IEquatable<HashNode<T>> where T : class { 28 32 public T Data; … … 38 42 public SimplifyAction Simplify; 39 43 40 public IComparer<T> Comparer;41 42 44 public bool IsLeaf => Arity == 0; 43 45 44 public HashNode(IComparer<T> comparer) {45 Comparer = comparer;46 }47 48 private HashNode() { }49 50 46 public int CompareTo(HashNode<T> other) { 51 var res = Comparer.Compare(Data, other.Data);47 var res = HashValue.CompareTo(other.HashValue); 52 48 return res == 0 ? CalculatedHashValue.CompareTo(other.CalculatedHashValue) : res; 53 49 } … … 103 99 104 100 public static HashNode<T>[] Simplify<T>(this HashNode<T>[] nodes, Func<byte[], ulong> hashFunction) where T : class { 105 var reduced = nodes.UpdateNodeSizes().Reduce().Sort(hashFunction); 106 107 for (int i = 0; i < reduced.Length; ++i) { 108 var node = reduced[i]; 109 if (node.IsLeaf) { 110 continue; 111 } 112 node.Simplify?.Invoke(ref reduced, i); 113 } 114 // detect if anything was simplified 115 var count = 0; 116 foreach (var node in reduced) { 117 if (!node.Enabled) { ++count; } 118 } 119 if (count == 0) { 120 return reduced; 121 } 122 123 var simplified = new HashNode<T>[reduced.Length - count]; 124 int j = 0; 125 foreach (var node in reduced) { 126 if (node.Enabled) { 127 simplified[j++] = node; 128 } 129 } 130 return simplified.UpdateNodeSizes().Reduce().Sort(hashFunction); 101 bool simplified = false; 102 nodes = nodes.UpdateNodeSizes().Reduce().Sort(hashFunction); 103 do { 104 if (simplified) { 105 simplified = false; 106 nodes = nodes.Where(x => x.Enabled).ToArray().UpdateNodeSizes().Reduce().Sort(hashFunction); 107 } 108 109 for (int i = 0; i < nodes.Length; ++i) { 110 var node = nodes[i]; 111 if (node.IsLeaf) { 112 continue; 113 } 114 node.Simplify?.Invoke(ref nodes, i); 115 for (int j = i - node.Size; j <= i; ++j) { 116 // detect if anything was simplified 117 if (!nodes[j].Enabled) { 118 simplified = true; 119 break; 120 } 121 } 122 } 123 } while (simplified); 124 return nodes.UpdateNodeSizes().Sort(hashFunction); 131 125 } 132 126 … … 173 167 174 168 /// <summary> 175 /// Get a function node's child indices t169 /// Get a function node's child indices 176 170 /// </summary> 177 171 /// <typeparam name="T">The data type encapsulated by a hash node</typeparam> 178 /// <param name="nodes">An array of hash nodes with up-to-date node sizes </param>172 /// <param name="nodes">An array of hash nodes with up-to-date node sizes (see UpdateNodeSizes)</param> 179 173 /// <param name="i">The index in the array of hash nodes of the node whose children we want to iterate</param> 180 174 /// <returns>An array containing child indices</returns> … … 191 185 } 192 186 187 /// <summary> 188 /// Determines size of each branch and sets the results for each node. 189 /// </summary> 190 /// <typeparam name="T">The data type encapsulated by a hash node</typeparam> 191 /// <param name="nodes">An array of hash nodes in postfix order.</param> 192 /// <returns>The array with updated node sizes. The array is not copied.</returns> 193 193 public static HashNode<T>[] UpdateNodeSizes<T>(this HashNode<T>[] nodes) where T : class { 194 194 for (int i = 0; i < nodes.Length; ++i) { … … 199 199 } 200 200 node.Size = node.Arity; 201 201 // visit all children and sum up their size (assumes postfix order). 202 202 for (int j = i - 1, k = 0; k < node.Arity; j -= 1 + nodes[j].Size, ++k) { 203 203 node.Size += nodes[j].Size; … … 207 207 } 208 208 209 private static HashNode<T>[] Reduce<T>(this HashNode<T>[] nodes) where T : class { 209 // disables duplicate branches and removes the disabled nodes 210 public static HashNode<T>[] Reduce<T>(this HashNode<T>[] nodes) where T : class { 210 211 int count = 0; 211 212 for (int i = 0; i < nodes.Length; ++i) { -
branches/2974_Constants_Optimization/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashUtil.cs
r16676 r17193 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-2019Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. -
branches/2974_Constants_Optimization/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs
r16676 r17193 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-2019Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 38 38 private static readonly Constant constant = new Constant(); 39 39 40 private static readonly ISymbolicExpressionTreeNodeComparer comparer = new SymbolicExpressionTreeNodeComparer();41 40 private static ISymbolicExpressionTreeNode ActualRoot(this ISymbolicExpressionTree tree) => tree.Root.GetSubtree(0).GetSubtree(0); 41 public static ulong HashFunction(byte[] input) => HashUtil.DJBHash(input); 42 42 43 43 #region tree hashing … … 47 47 48 48 public static ulong[] Hash(this ISymbolicExpressionTreeNode node, bool simplify = false, bool strict = false) { 49 ulong hashFunction(byte[] input) => HashUtil.DJBHash(input); 50 51 var hashNodes = simplify ? node.MakeNodes(strict).Simplify(hashFunction) : node.MakeNodes(strict).Sort(hashFunction); 49 var hashNodes = simplify ? node.MakeNodes(strict).Simplify(HashFunction) : node.MakeNodes(strict).Sort(HashFunction); 52 50 var hashes = new ulong[hashNodes.Length]; 53 51 for (int i = 0; i < hashes.Length; ++i) { … … 74 72 } 75 73 var hash = (ulong)name.GetHashCode(); 76 var hashNode = new HashNode<ISymbolicExpressionTreeNode> (comparer){74 var hashNode = new HashNode<ISymbolicExpressionTreeNode> { 77 75 Data = node, 78 76 Arity = node.SubtreeCount, … … 168 166 for (int i = 0; i < trees.Count; ++i) { 169 167 var nodes = trees[i].MakeNodes(strict); 170 hashes[i] = (simplify ? nodes.Simplify(Hash Util.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();168 hashes[i] = (simplify ? nodes.Simplify(HashFunction) : nodes.Sort(HashFunction)).Select(x => x.CalculatedHashValue).ToArray(); 171 169 Array.Sort(hashes[i]); 172 170 } … … 180 178 } 181 179 180 public static double[,] ComputeSimilarityMatrix(IList<ulong[]> hashes) { 181 // compute similarity matrix 182 var n = hashes.Count; 183 var sim = new double[n, n]; 184 for (int i = 0; i < n - 1; ++i) { 185 for (int j = i + 1; j < n; ++j) { 186 sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]); 187 } 188 } 189 return sim; 190 } 191 182 192 public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) { 183 var sim = new double[trees.Count, trees.Count];184 193 var hashes = new ulong[trees.Count][]; 185 194 // build hash arrays 186 195 for (int i = 0; i < trees.Count; ++i) { 187 196 var nodes = trees[i].MakeNodes(strict); 188 hashes[i] = (simplify ? nodes.Simplify(Hash Util.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();197 hashes[i] = (simplify ? nodes.Simplify(HashFunction) : nodes.Sort(HashFunction)).Select(x => x.CalculatedHashValue).ToArray(); 189 198 Array.Sort(hashes[i]); 190 199 } 191 // compute similarity matrix 192 for (int i = 0; i < trees.Count - 1; ++i) { 193 for (int j = i + 1; j < trees.Count; ++j) { 194 sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]); 195 } 196 } 197 return sim; 200 return ComputeSimilarityMatrix(hashes); 198 201 } 199 202 #endregion … … 245 248 // (in other words simplification should be applied in a bottom-up fashion) 246 249 public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) { 247 ulong hashFunction(byte[] bytes) => HashUtil.JSHash(bytes); 248 var root = tree.Root.GetSubtree(0).GetSubtree(0); 249 var nodes = root.MakeNodes(); 250 var simplified = nodes.Simplify(hashFunction); 251 return simplified.ToTree(); 250 return tree.MakeNodes().Simplify(HashFunction).ToTree(); 252 251 } 253 252 … … 286 285 287 286 var symbol = child.Data.Symbol; 288 if (symbol is Constant) { 287 if (child.Data is ConstantTreeNode firstConst) { 288 // fold sibling constant nodes into the first constant 289 289 for (int k = j + 1; k < children.Length; ++k) { 290 var d = children[k];291 if ( nodes[d].Data.Symbol is Constant) {292 nodes[d].Enabled = false;290 var sibling = nodes[children[k]]; 291 if (sibling.Data is ConstantTreeNode otherConst) { 292 sibling.Enabled = false; 293 293 node.Arity--; 294 firstConst.Value *= otherConst.Value; 295 } else { 296 break; 297 } 298 } 299 } else if (child.Data is VariableTreeNode variable) { 300 // fold sibling constant nodes into the variable weight 301 for (int k = j + 1; k < children.Length; ++k) { 302 var sibling = nodes[children[k]]; 303 if (sibling.Data is ConstantTreeNode constantNode) { 304 sibling.Enabled = false; 305 node.Arity--; 306 variable.Weight *= constantNode.Value; 294 307 } else { 295 308 break; … … 297 310 } 298 311 } else if (symbol is Division) { 299 var div = nodes[c]; 300 var denominator = 301 div.Arity == 1 ? 302 nodes[c - 1] : // 1 / x is expressed as div(x) (with a single child) 303 nodes[c - nodes[c - 1].Size - 2]; // assume division always has arity 1 or 2 304 305 foreach (var d in children) { 306 if (nodes[d].Enabled && nodes[d] == denominator) { 307 nodes[c].Enabled = nodes[d].Enabled = denominator.Enabled = false; 312 // 1/x is expressed as div(x) (with a single child) 313 // we assume division always has arity 1 or 2 314 var d = child.Arity == 1 ? c - 1 : c - nodes[c - 1].Size - 2; 315 var denominator = nodes[d]; 316 317 // iterate children of node i to see if any of them matches the denominator of div node c 318 for (int k = 0; k < children.Length; ++k) { 319 var sibling = nodes[children[k]]; 320 if (sibling.Enabled && sibling == denominator) { 321 nodes.SetEnabled(children[j], false); // disable the div subtree 322 nodes.SetEnabled(children[k], false); // disable the sibling matching the denominator 323 308 324 node.Arity -= 2; // matching child + division node 309 325 break;
Note: See TracChangeset
for help on using the changeset viewer.