Changeset 16283
Legend:
- Unmodified
- Added
- Removed
-
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/TreeMatching/SymbolicExpressionTreeBottomUpSimilarityCalculator.cs
r16279 r16283 30 30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 31 31 32 using NodeMap = System.Collections.Generic.Dictionary<HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.ISymbolicExpressionTreeNode, HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.ISymbolicExpressionTreeNode>; 33 32 34 namespace HeuristicLab.Problems.DataAnalysis.Symbolic { 33 35 [StorableClass] … … 55 57 } 56 58 59 #region static methods 60 private static ISymbolicExpressionTreeNode ActualRoot(ISymbolicExpressionTree tree) { 61 return tree.Root.GetSubtree(0).GetSubtree(0); 62 } 63 64 public static double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool strict = false) { 65 return CalculateSimilarity(ActualRoot(t1), ActualRoot(t2), strict); 66 } 67 68 public static double CalculateSimilarity(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2, bool strict = false) { 69 var calculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator { MatchConstantValues = strict, MatchVariableWeights = strict }; 70 return CalculateSimilarity(n1, n2, strict); 71 } 72 73 public static Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> ComputeBottomUpMapping(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool strict = false) { 74 return ComputeBottomUpMapping(ActualRoot(t1), ActualRoot(t2), strict); 75 } 76 77 public static Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> ComputeBottomUpMapping(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2, bool strict = false) { 78 var calculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator { MatchConstantValues = strict, MatchVariableWeights = strict }; 79 return calculator.ComputeBottomUpMapping(n1, n2); 80 } 81 #endregion 82 57 83 public double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) { 58 if (t1 == t2) 59 return 1; 60 61 var actualRoot1 = t1.Root.GetSubtree(0).GetSubtree(0); // skip root and start symbols 62 var actualRoot2 = t2.Root.GetSubtree(0).GetSubtree(0); // skip root and start symbols 63 var map = ComputeBottomUpMapping(actualRoot1, actualRoot2); 64 return 2.0 * map.Count / (t1.Length + t2.Length - 4); // -4 for skipping root and start symbols in the two trees 65 } 66 67 public double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, out Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> map) { 84 return CalculateSimilarity(t1, t2, out Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> map); 85 } 86 87 public double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, out NodeMap map) { 68 88 if (t1 == t2) { 69 89 map = null; 70 90 return 1; 71 91 } 72 73 var actualRoot1 = t1.Root.GetSubtree(0).GetSubtree(0); // skip root and start symbols 74 var actualRoot2 = t2.Root.GetSubtree(0).GetSubtree(0); // skip root and start symbols 75 map = ComputeBottomUpMapping(actualRoot1, actualRoot2); 76 92 map = ComputeBottomUpMapping(t1, t2); 77 93 return 2.0 * map.Count / (t1.Length + t2.Length - 4); // -4 for skipping root and start symbols in the two trees 78 94 } … … 95 111 } 96 112 113 public Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> ComputeBottomUpMapping(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) { 114 return ComputeBottomUpMapping(t1.Root.GetSubtree(0).GetSubtree(0), t2.Root.GetSubtree(0).GetSubtree(0)); 115 } 116 97 117 public Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> ComputeBottomUpMapping(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2) { 98 var comparer = new SymbolicExpressionTreeNodeComparer(); // use a node comparer because it's faster than calling node.ToString() (strings are expensive) and comparing strings99 118 var compactedGraph = Compact(n1, n2); 100 119 101 var forwardMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t1 => nodes of t2 102 var reverseMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t2 => nodes of t1 103 104 // visit nodes in order of decreasing height to ensure correct mapping 105 var nodes1 = (List<ISymbolicExpressionTreeNode>)n1.IterateNodesPrefix(); 106 var nodes2 = (List<ISymbolicExpressionTreeNode>)n2.IterateNodesPrefix(); 107 108 foreach (var v in nodes1) { 109 if (forwardMap.ContainsKey(v)) { 120 IEnumerable<ISymbolicExpressionTreeNode> Subtrees(ISymbolicExpressionTreeNode node, bool commutative) { 121 var subtrees = node.IterateNodesPrefix(); 122 return commutative ? subtrees.OrderBy(x => compactedGraph[x].Hash) : subtrees; 123 } 124 125 var nodes1 = n1.IterateNodesPostfix().OrderByDescending(x => x.GetLength()); // by descending length so that largest subtrees are mapped first 126 var nodes2 = (List<ISymbolicExpressionTreeNode>)n2.IterateNodesPostfix(); 127 128 var forward = new NodeMap(); 129 var reverse = new NodeMap(); 130 131 foreach (ISymbolicExpressionTreeNode v in nodes1) { 132 if (forward.ContainsKey(v)) 110 133 continue; 134 135 var kv = compactedGraph[v]; 136 var commutative = v.SubtreeCount > 1 && commutativeSymbols.Contains(kv.Label); 137 138 foreach (ISymbolicExpressionTreeNode w in nodes2) { 139 if (w.GetLength() != kv.Length || w.GetDepth() != kv.Depth || reverse.ContainsKey(w) || compactedGraph[w] != kv) 140 continue; 141 142 // map one whole subtree to the other 143 foreach (var t in Subtrees(v, commutative).Zip(Subtrees(w, commutative), Tuple.Create)) { 144 forward[t.Item1] = t.Item2; 145 reverse[t.Item2] = t.Item1; 146 } 147 148 break; 111 149 } 112 113 var kv = compactedGraph[v]; 114 115 var w = nodes2.Last(); 116 int k = nodes2.Count - 1; 117 118 for (int j = 0; j < nodes2.Count; ++j) { 119 var t = nodes2[j]; 120 121 if (reverseMap.ContainsKey(t) || kv != compactedGraph[t]) 122 continue; 123 124 if (j < k) { 125 w = t; 126 k = j; 127 } 128 } 129 130 if (kv != compactedGraph[w]) { 131 continue; 132 } 133 134 // at this point we know that v and w are isomorphic (same length and depth) 135 // however, the mapping cannot be done directly (as in the paper) 136 // because the trees are unordered (subtree order might differ). 137 // the solution is to sort once again (this will work because the subtrees are isomorphic!) 138 var vv = v.IterateNodesPrefix().OrderBy(x => compactedGraph[x].Hash); 139 var ww = w.IterateNodesPrefix().OrderBy(x => compactedGraph[x].Hash); 140 141 foreach (var pair in vv.Zip(ww, Tuple.Create)) { 142 var s = pair.Item1; 143 var t = pair.Item2; 144 145 if (reverseMap.ContainsKey(t)) 146 continue; 147 148 forwardMap[s] = t; 149 reverseMap[t] = s; 150 } 151 } 152 153 return forwardMap; 150 } 151 152 return forward; 154 153 } 155 154 … … 167 166 var graph = new List<GraphNode>(); 168 167 168 IEnumerable<GraphNode> Subtrees(GraphNode g, bool commutative) { 169 var subtrees = g.SymbolicExpressionTreeNode.Subtrees.Select(x => nodeMap[x]); 170 return commutative ? subtrees.OrderBy(x => x.Hash) : subtrees; 171 } 172 169 173 foreach (var node in nodes) { 170 174 var label = GetLabel(node); … … 172 176 if (node.SubtreeCount == 0) { 173 177 if (!labelMap.ContainsKey(label)) { 174 var g = new GraphNode(node, label); 175 graph.Add(g); 176 labelMap[label] = g; 178 labelMap[label] = new GraphNode(node, label); 177 179 } 178 180 nodeMap[node] = labelMap[label]; … … 182 184 var commutative = node.SubtreeCount > 1 && commutativeSymbols.Contains(label); 183 185 184 IEnumerable<GraphNode> vv, ww; 185 186 for (int i = graph.Count - 1; i >= 0; --i) { 187 var w = graph[i]; 188 186 var vv = Subtrees(v, commutative); 187 188 foreach (var w in graph) { 189 189 if (v.Depth != w.Depth || v.SubtreeCount != w.SubtreeCount || v.Length != w.Length || v.Label != w.Label) { 190 190 continue; 191 191 } 192 192 193 vv = commutative ? v.SymbolicExpressionTreeNode.Subtrees.Select(x => nodeMap[x]).OrderBy(x => x.Hash) : v.SymbolicExpressionTreeNode.Subtrees.Select(x => nodeMap[x]); 194 ww = commutative ? w.SymbolicExpressionTreeNode.Subtrees.Select(x => nodeMap[x]).OrderBy(x => x.Hash) : w.SymbolicExpressionTreeNode.Subtrees.Select(x => nodeMap[x]); 193 var ww = Subtrees(w, commutative); 195 194 found = vv.SequenceEqual(ww); 196 195 … … 234 233 235 234 public int Hash { get; } 236 237 235 public ISymbolicExpressionTreeNode SymbolicExpressionTreeNode { get; } 238 239 236 public string Label { get; } 240 241 237 public int Depth { get; } 242 243 238 public int SubtreeCount { get { return SymbolicExpressionTreeNode.SubtreeCount; } } 244 245 239 public int Length { get; } 246 240 } -
trunk/HeuristicLab.Tests/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4/SymbolicExpressionTreeBottomUpSimilarityCalculatorTest.cs
r14354 r16283 7 7 [TestClass] 8 8 public class BottomUpSimilarityCalculatorTest { 9 private readonly SymbolicExpressionTreeBottomUpSimilarityCalculator busCalculator;10 private readonly SymbolicExpressionImporter importer ;9 private readonly SymbolicExpressionTreeBottomUpSimilarityCalculator similarityCalculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator() { MatchConstantValues = false, MatchVariableWeights = false }; 10 private readonly SymbolicExpressionImporter importer = new SymbolicExpressionImporter(); 11 11 12 private const int N = 1 50;12 private const int N = 1000; 13 13 private const int Rows = 1; 14 14 private const int Columns = 10; 15 15 16 16 public BottomUpSimilarityCalculatorTest() { 17 busCalculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator(); 18 importer = new SymbolicExpressionImporter(); 17 var parser = new InfixExpressionParser(); 19 18 } 20 19 … … 23 22 [TestProperty("Time", "short")] 24 23 public void BottomUpTreeSimilarityCalculatorTestMapping() { 25 TestMatchedNodes("(+ 1 2)", "(+ 2 1)", 5); 26 TestMatchedNodes("(- 2 1)", "(- 1 2)", 2); 27 TestMatchedNodes("(* (variable 1 X1) (variable 1 X2))", "(* (+ (variable 1 X1) 1) (+ (variable 1 X2) 1))", 2); 24 TestMatchedNodes("(+ 1 1)", "(+ 2 2)", 0, strict: true); 25 TestMatchedNodes("(+ 1 1)", "(+ 2 2)", 3, strict: false); 26 TestMatchedNodes("(+ 1 1)", "(+ 1 2)", 1, strict: true); 27 TestMatchedNodes("(+ 2 1)", "(+ 1 2)", 3, strict: true); 28 28 29 TestMatchedNodes("(* (variable 1 X1) (variable 1 X2))", "(* (+ (variable 1 X1) 1) (variable 1 X2))", 2); 29 TestMatchedNodes("(- 1 1)", "(- 2 2)", 0, strict: true); 30 TestMatchedNodes("(- 1 1)", "(- 2 2)", 3, strict: false); 30 31 31 TestMatchedNodes("(+ (variable 1 a) (variable 1 b))", "(+ (variable 1 a) (variable 1 a))", 1); 32 TestMatchedNodes("(+ (+ (variable 1 a) (variable 1 b)) (variable 1 b))", "(+ (* (+ (variable 1 a) (variable 1 b)) (variable 1 b)) (+ (+ (variable 1 a) (variable 1 b)) (variable 1 b)))", 5); 33 34 TestMatchedNodes( 35 "(* (+ 2.84 (exp (+ (log (/ (variable 2.0539 X5) (variable -9.2452e-1 X6))) (/ (variable 2.0539 X5) (variable -9.2452e-1 X6))))) 2.9081)", 36 "(* (- (variable 9.581e-1 X6) (+ (- (variable 5.1491e-1 X5) 1.614e+1) (+ (/ (variable 2.0539 X5) (variable -9.2452e-1 X6)) (log (/ (variable 2.0539 X5) (variable -9.2452e-1 X6)))))) 2.9081)", 37 9); 38 39 TestMatchedNodes("(* (* (* (variable 1.68 x) (* (variable 1.68 x) (variable 2.55 x))) (variable 1.68 x)) (* (* (variable 1.68 x) (* (variable 1.68 x) (* (variable 1.68 x) (variable 2.55 x)))) (variable 2.55 x)))", "(* (variable 2.55 x) (* (variable 1.68 x) (* (variable 1.68 x) (* (variable 1.68 x) (variable 2.55 x)))))", 9); 40 41 TestMatchedNodes("(+ (exp 2.1033) (/ -4.3072 (variable 2.4691 X7)))", "(/ 1 (+ (/ -4.3072 (variable 2.4691 X7)) (exp 2.1033)))", 6); 42 TestMatchedNodes("(+ (exp 2.1033) (/ -4.3072 (variable 2.4691 X7)))", "(/ 1 (+ (/ (variable 2.4691 X7) -4.3072) (exp 2.1033)))", 4); 43 44 const string expr1 = "(* (- 1.2175e+1 (+ (/ (exp -1.4134e+1) (exp 9.2013)) (exp (log (exp (/ (exp (- (* -4.2461 (variable 2.2634 X5)) (- -9.6267e-1 3.3243))) (- (/ (/ (variable 1.0883 X1) (variable 6.9620e-1 X2)) (log 1.3011e+1)) (variable -4.3098e-1 X7)))))))) (log 1.3011e+1))"; 45 const string expr2 = "(* (- 1.2175e+1 (+ (/ (/ (+ (variable 3.0140 X9) (variable 1.3430 X8)) -1.0864e+1) (exp 9.2013)) (exp (log (exp (/ (exp (- (* -4.2461 (variable 2.2634 X5)) (- -9.6267e-1 3.3243))) (- (/ (/ (variable 1.0883 X1) (variable 6.9620e-1 X2)) (log 1.3011e+1)) (variable -4.3098e-1 X7)))))))) (exp (variable 4.0899e-1 X7)))"; 46 47 TestMatchedNodes(expr1, expr2, 23); 48 32 TestMatchedNodes("(- 2 1)", "(- 1 2)", 2, strict: true); 33 TestMatchedNodes("(- 2 1)", "(- 1 2)", 3, strict: false); 49 34 } 50 35 51 private void TestMatchedNodes(string expr1, string expr2, int expected ) {36 private void TestMatchedNodes(string expr1, string expr2, int expected, bool strict) { 52 37 var t1 = importer.Import(expr1); 53 38 var t2 = importer.Import(expr2); 54 39 55 var mapping = busCalculator.ComputeBottomUpMapping(t1.Root, t2.Root); 56 var c = mapping.Count; 40 var map = SymbolicExpressionTreeBottomUpSimilarityCalculator.ComputeBottomUpMapping(t1, t2, strict); 57 41 58 if ( c!= expected) {59 throw new Exception( "Match count " + c + " is different than expected value " + expected);42 if (map.Count != expected) { 43 throw new Exception($"Match count {map.Count} is different than expected value {expected} for expressions:\n{expr1} and {expr2} (strict = {strict})"); 60 44 } 61 45 } … … 77 61 for (int i = 0; i < trees.Length - 1; ++i) { 78 62 for (int j = i + 1; j < trees.Length; ++j) { 79 s += busCalculator.CalculateSimilarity(trees[i], trees[j]);63 s += similarityCalculator.CalculateSimilarity(trees[i], trees[j]); 80 64 } 81 65 }
Note: See TracChangeset
for help on using the changeset viewer.