Changeset 16304 for branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs
- Timestamp:
- 11/19/18 14:34:25 (5 years ago)
- Location:
- branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic
-
branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs
r16218 r16304 1 using System.Collections.Generic; 1 #region License Information 2 /* HeuristicLab 3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 * 5 * This file is part of HeuristicLab. 6 * 7 * HeuristicLab is free software: you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation, either version 3 of the License, or 10 * (at your option) any later version. 11 * 12 * HeuristicLab is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>. 19 */ 20 #endregion 21 22 using System; 2 23 using System.Linq; 3 24 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; … … 18 39 private static readonly ISymbolicExpressionTreeNodeComparer comparer = new SymbolicExpressionTreeNodeComparer(); 19 40 20 public static intComputeHash(this ISymbolicExpressionTree tree) {41 public static ulong ComputeHash(this ISymbolicExpressionTree tree) { 21 42 return ComputeHash(tree.Root.GetSubtree(0).GetSubtree(0)); 22 43 } 23 44 24 public static Dictionary<ISymbolicExpressionTreeNode, int> ComputeNodeHashes(this ISymbolicExpressionTree tree) { 25 var root = tree.Root.GetSubtree(0).GetSubtree(0); 26 var nodes = root.MakeNodes(); 27 nodes.UpdateNodeSizes(); 28 29 for (int i = 0; i < nodes.Length; ++i) { 30 if (nodes[i].IsChild) 31 continue; 32 nodes[i].CalculatedHashValue = nodes.ComputeHash(i); 33 } 34 return nodes.ToDictionary(x => x.Data, x => x.CalculatedHashValue); 35 } 36 37 public static int ComputeHash(this ISymbolicExpressionTreeNode treeNode) { 38 var hashNodes = treeNode.MakeNodes(); 39 var simplified = hashNodes.Simplify(); 40 return ComputeHash(simplified); 41 } 42 43 public static int ComputeHash(this HashNode<ISymbolicExpressionTreeNode>[] nodes) { 44 int hash = 1315423911; 45 foreach (var node in nodes) 46 hash ^= (hash << 5) + node.CalculatedHashValue + (hash >> 2); 47 return hash; 48 } 49 50 public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node) { 45 public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false) { 46 return ComputeSimilarity(t1.Root.GetSubtree(0).GetSubtree(0), t2.Root.GetSubtree(0).GetSubtree(0), simplify); 47 } 48 49 public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false) { 50 HashNode<ISymbolicExpressionTreeNode>[] lhs; 51 HashNode<ISymbolicExpressionTreeNode>[] rhs; 52 53 ulong hashFunction(byte[] input) => HashUtil.DJBHash(input); 54 55 if (simplify) { 56 lhs = t1.MakeNodes().Simplify(hashFunction); 57 rhs = t2.MakeNodes().Simplify(hashFunction); 58 } else { 59 lhs = t1.MakeNodes().Sort(hashFunction); // sort calculates hash values 60 rhs = t2.MakeNodes().Sort(hashFunction); 61 } 62 63 var lh = lhs.Select(x => x.CalculatedHashValue).ToArray(); 64 var rh = rhs.Select(x => x.CalculatedHashValue).ToArray(); 65 66 Array.Sort(lh); 67 Array.Sort(rh); 68 69 return ComputeSimilarity(lh, rh); 70 } 71 72 // this will only work if lh and rh are sorted 73 private static double ComputeSimilarity(ulong[] lh, ulong[] rh) { 74 double count = 0; 75 for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) { 76 var h1 = lh[i]; 77 var h2 = rh[j]; 78 if (h1 == h2) { 79 ++count; 80 ++i; 81 ++j; 82 } else if (h1 < h2) { 83 ++i; 84 } else if (h1 > h2) { 85 ++j; 86 } 87 } 88 89 return 2d * count / (lh.Length + rh.Length); 90 } 91 92 public static double ComputeAverageSimilarity(ISymbolicExpressionTree[] trees, bool simplify = false, bool strict = false) { 93 var total = (double)trees.Length * (trees.Length - 1) / 2; 94 double avg = 0; 95 var hashes = new ulong[trees.Length][]; 96 // build hash arrays 97 for (int i = 0; i < trees.Length; ++i) { 98 var nodes = trees[i].MakeNodes(strict); 99 hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray(); 100 Array.Sort(hashes[i]); 101 } 102 // compute similarity matrix 103 for (int i = 0; i < trees.Length - 1; ++i) { 104 for (int j = i + 1; j < trees.Length; ++j) { 105 avg += ComputeSimilarity(hashes[i], hashes[j]); 106 } 107 } 108 return avg / total; 109 } 110 111 public static double[,] ComputeSimilarityMatrix(ISymbolicExpressionTree[] trees, bool simplify = false, bool strict = false) { 112 var sim = new double[trees.Length, trees.Length]; 113 var hashes = new ulong[trees.Length][]; 114 // build hash arrays 115 for (int i = 0; i < trees.Length; ++i) { 116 var nodes = trees[i].MakeNodes(strict); 117 hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray(); 118 Array.Sort(hashes[i]); 119 } 120 // compute similarity matrix 121 for (int i = 0; i < trees.Length - 1; ++i) { 122 for (int j = i + 1; j < trees.Length; ++j) { 123 sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]); 124 } 125 } 126 return sim; 127 } 128 129 public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool strict = false) { 130 ulong hashFunction(byte[] input) => HashUtil.JSHash(input); 131 var hashNodes = treeNode.MakeNodes(strict); 132 var simplified = hashNodes.Simplify(hashFunction); 133 return simplified.Last().CalculatedHashValue; 134 } 135 136 public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node, bool strict = false) { 51 137 var symbol = node.Symbol; 52 138 var name = symbol.Name; 53 if (symbol is Variable) { 54 var variableTreeNode = (VariableTreeNode)node; 55 name = variableTreeNode.VariableName; 56 } 57 var hash = name.GetHashCode(); 139 if (node is ConstantTreeNode constantNode) { 140 name = strict ? constantNode.Value.ToString() : symbol.Name; 141 } else if (node is VariableTreeNode variableNode) { 142 name = strict ? variableNode.Weight.ToString() + variableNode.VariableName : variableNode.VariableName; 143 } 144 var hash = (ulong)name.GetHashCode(); 58 145 var hashNode = new HashNode<ISymbolicExpressionTreeNode>(comparer) { 59 146 Data = node, … … 79 166 } 80 167 81 public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node) { 82 return node.IterateNodesPostfix().Select(ToHashNode).ToArray(); 168 public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree, bool strict = false) { 169 return MakeNodes(tree.Root.GetSubtree(0).GetSubtree(0), strict); 170 } 171 172 public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node, bool strict = false) { 173 return node.IterateNodesPostfix().Select(x => x.ToHashNode(strict)).ToArray().UpdateNodeSizes(); 83 174 } 84 175 … … 98 189 var node = nodes[i]; 99 190 100 if (node.Is Child) {191 if (node.IsLeaf) { 101 192 if (node.Data is VariableTreeNode variable) { 102 193 var variableTreeNode = (VariableTreeNode)treeNodes[i]; 103 194 variableTreeNode.VariableName = variable.VariableName; 104 variableTreeNode.Weight = 1;195 variableTreeNode.Weight = variable.Weight; 105 196 } else if (node.Data is ConstantTreeNode @const) { 106 197 var constantTreeNode = (ConstantTreeNode)treeNodes[i]; … … 129 220 // (in other words simplification should be applied in a bottom-up fashion) 130 221 public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) { 222 ulong hashFunction(byte[] bytes) => HashUtil.JSHash(bytes); 131 223 var root = tree.Root.GetSubtree(0).GetSubtree(0); 132 224 var nodes = root.MakeNodes(); 133 var simplified = nodes.Simplify( );225 var simplified = nodes.Simplify(hashFunction); 134 226 return simplified.ToTree(); 135 227 } … … 174 266 var d = children[k]; 175 267 if (nodes[d].Data.Symbol is Constant) { 176 ((ConstantTreeNode)child.Data).Value *= ((ConstantTreeNode)nodes[d].Data).Value;177 268 nodes[d].Enabled = false; 178 269 node.Arity--;
Note: See TracChangeset
for help on using the changeset viewer.