Free cookie consent management tool by TermsFeed Policy Generator

Changeset 16283


Ignore:
Timestamp:
11/07/18 14:42:35 (6 years ago)
Author:
bburlacu
Message:

#2959: Refactor bottom-up similarity calculator methods (main logic stays the same, better implementation), update unit tests.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/TreeMatching/SymbolicExpressionTreeBottomUpSimilarityCalculator.cs

    r16279 r16283  
    3030using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3131
     32using NodeMap = System.Collections.Generic.Dictionary<HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.ISymbolicExpressionTreeNode, HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.ISymbolicExpressionTreeNode>;
     33
    3234namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
    3335  [StorableClass]
     
    5557    }
    5658
     59    #region static methods
     60    private static ISymbolicExpressionTreeNode ActualRoot(ISymbolicExpressionTree tree) {
     61      return tree.Root.GetSubtree(0).GetSubtree(0);
     62    }
     63
     64    public static double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool strict = false) {
     65      return CalculateSimilarity(ActualRoot(t1), ActualRoot(t2), strict);
     66    }
     67
     68    public static double CalculateSimilarity(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2, bool strict = false) {
     69      var calculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator { MatchConstantValues = strict, MatchVariableWeights = strict };
     70      return CalculateSimilarity(n1, n2, strict);
     71    }
     72
     73    public static Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> ComputeBottomUpMapping(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool strict = false) {
     74      return ComputeBottomUpMapping(ActualRoot(t1), ActualRoot(t2), strict);
     75    }
     76
     77    public static Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> ComputeBottomUpMapping(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2, bool strict = false) {
     78      var calculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator { MatchConstantValues = strict, MatchVariableWeights = strict };
     79      return calculator.ComputeBottomUpMapping(n1, n2);
     80    }
     81    #endregion
     82
    5783    public double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {
    58       if (t1 == t2)
    59         return 1;
    60 
    61       var actualRoot1 = t1.Root.GetSubtree(0).GetSubtree(0); // skip root and start symbols
    62       var actualRoot2 = t2.Root.GetSubtree(0).GetSubtree(0); // skip root and start symbols
    63       var map = ComputeBottomUpMapping(actualRoot1, actualRoot2);
    64       return 2.0 * map.Count / (t1.Length + t2.Length - 4); // -4 for skipping root and start symbols in the two trees
    65     }
    66 
    67     public double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, out Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> map) {
     84      return CalculateSimilarity(t1, t2, out Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> map);
     85    }
     86
     87    public double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, out NodeMap map) {
    6888      if (t1 == t2) {
    6989        map = null;
    7090        return 1;
    7191      }
    72 
    73       var actualRoot1 = t1.Root.GetSubtree(0).GetSubtree(0); // skip root and start symbols
    74       var actualRoot2 = t2.Root.GetSubtree(0).GetSubtree(0); // skip root and start symbols
    75       map = ComputeBottomUpMapping(actualRoot1, actualRoot2);
    76 
     92      map = ComputeBottomUpMapping(t1, t2);
    7793      return 2.0 * map.Count / (t1.Length + t2.Length - 4); // -4 for skipping root and start symbols in the two trees
    7894    }
     
    95111    }
    96112
     113    public Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> ComputeBottomUpMapping(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {
     114      return ComputeBottomUpMapping(t1.Root.GetSubtree(0).GetSubtree(0), t2.Root.GetSubtree(0).GetSubtree(0));
     115    }
     116
    97117    public Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> ComputeBottomUpMapping(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2) {
    98       var comparer = new SymbolicExpressionTreeNodeComparer(); // use a node comparer because it's faster than calling node.ToString() (strings are expensive) and comparing strings
    99118      var compactedGraph = Compact(n1, n2);
    100119
    101       var forwardMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t1 => nodes of t2
    102       var reverseMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t2 => nodes of t1
    103 
    104       // visit nodes in order of decreasing height to ensure correct mapping
    105       var nodes1 = (List<ISymbolicExpressionTreeNode>)n1.IterateNodesPrefix();
    106       var nodes2 = (List<ISymbolicExpressionTreeNode>)n2.IterateNodesPrefix();
    107 
    108       foreach (var v in nodes1) {
    109         if (forwardMap.ContainsKey(v)) {
     120      IEnumerable<ISymbolicExpressionTreeNode> Subtrees(ISymbolicExpressionTreeNode node, bool commutative) {
     121        var subtrees = node.IterateNodesPrefix();
     122        return commutative ? subtrees.OrderBy(x => compactedGraph[x].Hash) : subtrees;
     123      }
     124
     125      var nodes1 = n1.IterateNodesPostfix().OrderByDescending(x => x.GetLength()); // by descending length so that largest subtrees are mapped first
     126      var nodes2 = (List<ISymbolicExpressionTreeNode>)n2.IterateNodesPostfix();
     127
     128      var forward = new NodeMap();
     129      var reverse = new NodeMap();
     130
     131      foreach (ISymbolicExpressionTreeNode v in nodes1) {
     132        if (forward.ContainsKey(v))
    110133          continue;
     134
     135        var kv = compactedGraph[v];
     136        var commutative = v.SubtreeCount > 1 && commutativeSymbols.Contains(kv.Label);
     137
     138        foreach (ISymbolicExpressionTreeNode w in nodes2) {
     139          if (w.GetLength() != kv.Length || w.GetDepth() != kv.Depth || reverse.ContainsKey(w) || compactedGraph[w] != kv)
     140            continue;
     141
     142          // map one whole subtree to the other
     143          foreach (var t in Subtrees(v, commutative).Zip(Subtrees(w, commutative), Tuple.Create)) {
     144            forward[t.Item1] = t.Item2;
     145            reverse[t.Item2] = t.Item1;
     146          }
     147
     148          break;
    111149        }
    112 
    113         var kv = compactedGraph[v];
    114 
    115         var w = nodes2.Last();
    116         int k = nodes2.Count - 1;
    117 
    118         for (int j = 0; j < nodes2.Count; ++j) {
    119           var t = nodes2[j];
    120 
    121           if (reverseMap.ContainsKey(t) || kv != compactedGraph[t])
    122             continue;
    123 
    124           if (j < k) {
    125             w = t;
    126             k = j;
    127           }
    128         }
    129 
    130         if (kv != compactedGraph[w]) {
    131           continue;
    132         }
    133 
    134         // at this point we know that v and w are isomorphic (same length and depth)
    135         // however, the mapping cannot be done directly (as in the paper)
    136         // because the trees are unordered (subtree order might differ).
    137         // the solution is to sort once again (this will work because the subtrees are isomorphic!)
    138         var vv = v.IterateNodesPrefix().OrderBy(x => compactedGraph[x].Hash);
    139         var ww = w.IterateNodesPrefix().OrderBy(x => compactedGraph[x].Hash);
    140 
    141         foreach (var pair in vv.Zip(ww, Tuple.Create)) {
    142           var s = pair.Item1;
    143           var t = pair.Item2;
    144 
    145           if (reverseMap.ContainsKey(t))
    146             continue;
    147 
    148           forwardMap[s] = t;
    149           reverseMap[t] = s;
    150         }
    151       }
    152 
    153       return forwardMap;
     150      }
     151
     152      return forward;
    154153    }
    155154
     
    167166      var graph = new List<GraphNode>();
    168167
     168      IEnumerable<GraphNode> Subtrees(GraphNode g, bool commutative) {
     169        var subtrees = g.SymbolicExpressionTreeNode.Subtrees.Select(x => nodeMap[x]);
     170        return commutative ? subtrees.OrderBy(x => x.Hash) : subtrees;
     171      }
     172
    169173      foreach (var node in nodes) {
    170174        var label = GetLabel(node);
     
    172176        if (node.SubtreeCount == 0) {
    173177          if (!labelMap.ContainsKey(label)) {
    174             var g = new GraphNode(node, label);
    175             graph.Add(g);
    176             labelMap[label] = g;
     178            labelMap[label] = new GraphNode(node, label);
    177179          }
    178180          nodeMap[node] = labelMap[label];
     
    182184          var commutative = node.SubtreeCount > 1 && commutativeSymbols.Contains(label);
    183185
    184           IEnumerable<GraphNode> vv, ww;
    185 
    186           for (int i = graph.Count - 1; i >= 0; --i) {
    187             var w = graph[i];
    188 
     186          var vv = Subtrees(v, commutative);
     187
     188          foreach (var w in graph) {
    189189            if (v.Depth != w.Depth || v.SubtreeCount != w.SubtreeCount || v.Length != w.Length || v.Label != w.Label) {
    190190              continue;
    191191            }
    192192
    193             vv = commutative ? v.SymbolicExpressionTreeNode.Subtrees.Select(x => nodeMap[x]).OrderBy(x => x.Hash) : v.SymbolicExpressionTreeNode.Subtrees.Select(x => nodeMap[x]);
    194             ww = commutative ? w.SymbolicExpressionTreeNode.Subtrees.Select(x => nodeMap[x]).OrderBy(x => x.Hash) : w.SymbolicExpressionTreeNode.Subtrees.Select(x => nodeMap[x]);
     193            var ww = Subtrees(w, commutative);
    195194            found = vv.SequenceEqual(ww);
    196195
     
    234233
    235234      public int Hash { get; }
    236 
    237235      public ISymbolicExpressionTreeNode SymbolicExpressionTreeNode { get; }
    238 
    239236      public string Label { get; }
    240 
    241237      public int Depth { get; }
    242 
    243238      public int SubtreeCount { get { return SymbolicExpressionTreeNode.SubtreeCount; } }
    244 
    245239      public int Length { get; }
    246240    }
  • trunk/HeuristicLab.Tests/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4/SymbolicExpressionTreeBottomUpSimilarityCalculatorTest.cs

    r14354 r16283  
    77  [TestClass]
    88  public class BottomUpSimilarityCalculatorTest {
    9     private readonly SymbolicExpressionTreeBottomUpSimilarityCalculator busCalculator;
    10     private readonly SymbolicExpressionImporter importer;
     9    private readonly SymbolicExpressionTreeBottomUpSimilarityCalculator similarityCalculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator() { MatchConstantValues = false, MatchVariableWeights = false };
     10    private readonly SymbolicExpressionImporter importer = new SymbolicExpressionImporter();
    1111
    12     private const int N = 150;
     12    private const int N = 1000;
    1313    private const int Rows = 1;
    1414    private const int Columns = 10;
    1515
    1616    public BottomUpSimilarityCalculatorTest() {
    17       busCalculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator();
    18       importer = new SymbolicExpressionImporter();
     17      var parser = new InfixExpressionParser();
    1918    }
    2019
     
    2322    [TestProperty("Time", "short")]
    2423    public void BottomUpTreeSimilarityCalculatorTestMapping() {
    25       TestMatchedNodes("(+ 1 2)", "(+ 2 1)", 5);
    26       TestMatchedNodes("(- 2 1)", "(- 1 2)", 2);
    27       TestMatchedNodes("(* (variable 1 X1) (variable 1 X2))", "(* (+ (variable 1 X1) 1) (+ (variable 1 X2) 1))", 2);
     24      TestMatchedNodes("(+ 1 1)", "(+ 2 2)", 0, strict: true);
     25      TestMatchedNodes("(+ 1 1)", "(+ 2 2)", 3, strict: false);
     26      TestMatchedNodes("(+ 1 1)", "(+ 1 2)", 1, strict: true);
     27      TestMatchedNodes("(+ 2 1)", "(+ 1 2)", 3, strict: true);
    2828
    29       TestMatchedNodes("(* (variable 1 X1) (variable 1 X2))", "(* (+ (variable 1 X1) 1) (variable 1 X2))", 2);
     29      TestMatchedNodes("(- 1 1)", "(- 2 2)", 0, strict: true);
     30      TestMatchedNodes("(- 1 1)", "(- 2 2)", 3, strict: false);
    3031
    31       TestMatchedNodes("(+ (variable 1 a) (variable 1 b))", "(+ (variable 1 a) (variable 1 a))", 1);
    32       TestMatchedNodes("(+ (+ (variable 1 a) (variable 1 b)) (variable 1 b))", "(+ (* (+ (variable 1 a) (variable 1 b)) (variable 1 b)) (+ (+ (variable 1 a) (variable 1 b)) (variable 1 b)))", 5);
    33 
    34       TestMatchedNodes(
    35         "(* (+ 2.84 (exp (+ (log (/ (variable 2.0539 X5) (variable -9.2452e-1 X6))) (/ (variable 2.0539 X5) (variable -9.2452e-1 X6))))) 2.9081)",
    36         "(* (- (variable 9.581e-1 X6) (+ (- (variable 5.1491e-1 X5) 1.614e+1) (+ (/ (variable 2.0539 X5) (variable -9.2452e-1 X6)) (log (/ (variable 2.0539 X5) (variable -9.2452e-1 X6)))))) 2.9081)",
    37         9);
    38 
    39       TestMatchedNodes("(* (* (* (variable 1.68 x) (* (variable 1.68 x) (variable 2.55 x))) (variable 1.68 x)) (* (* (variable 1.68 x) (* (variable 1.68 x) (* (variable 1.68 x) (variable 2.55 x)))) (variable 2.55 x)))", "(* (variable 2.55 x) (* (variable 1.68 x) (* (variable 1.68 x) (* (variable 1.68 x) (variable 2.55 x)))))", 9);
    40 
    41       TestMatchedNodes("(+ (exp 2.1033) (/ -4.3072 (variable 2.4691 X7)))", "(/ 1 (+ (/ -4.3072 (variable 2.4691 X7)) (exp 2.1033)))", 6);
    42       TestMatchedNodes("(+ (exp 2.1033) (/ -4.3072 (variable 2.4691 X7)))", "(/ 1 (+ (/ (variable 2.4691 X7) -4.3072) (exp 2.1033)))", 4);
    43 
    44       const string expr1 = "(* (- 1.2175e+1 (+ (/ (exp -1.4134e+1) (exp 9.2013)) (exp (log (exp (/ (exp (- (* -4.2461 (variable 2.2634 X5)) (- -9.6267e-1 3.3243))) (- (/ (/ (variable 1.0883 X1) (variable 6.9620e-1 X2)) (log 1.3011e+1)) (variable -4.3098e-1 X7)))))))) (log 1.3011e+1))";
    45       const string expr2 = "(* (- 1.2175e+1 (+ (/ (/ (+ (variable 3.0140 X9) (variable 1.3430 X8)) -1.0864e+1) (exp 9.2013)) (exp (log (exp (/ (exp (- (* -4.2461 (variable 2.2634 X5)) (- -9.6267e-1 3.3243))) (- (/ (/ (variable 1.0883 X1) (variable 6.9620e-1 X2)) (log 1.3011e+1)) (variable -4.3098e-1 X7)))))))) (exp (variable 4.0899e-1 X7)))";
    46 
    47       TestMatchedNodes(expr1, expr2, 23);
    48 
     32      TestMatchedNodes("(- 2 1)", "(- 1 2)", 2, strict: true);
     33      TestMatchedNodes("(- 2 1)", "(- 1 2)", 3, strict: false);
    4934    }
    5035
    51     private void TestMatchedNodes(string expr1, string expr2, int expected) {
     36    private void TestMatchedNodes(string expr1, string expr2, int expected, bool strict) {
    5237      var t1 = importer.Import(expr1);
    5338      var t2 = importer.Import(expr2);
    5439
    55       var mapping = busCalculator.ComputeBottomUpMapping(t1.Root, t2.Root);
    56       var c = mapping.Count;
     40      var map = SymbolicExpressionTreeBottomUpSimilarityCalculator.ComputeBottomUpMapping(t1, t2, strict);
    5741
    58       if (c != expected) {
    59         throw new Exception("Match count " + c + " is different than expected value " + expected);
     42      if (map.Count != expected) {
     43        throw new Exception($"Match count {map.Count} is different than expected value {expected} for expressions:\n{expr1} and {expr2} (strict = {strict})");
    6044      }
    6145    }
     
    7761      for (int i = 0; i < trees.Length - 1; ++i) {
    7862        for (int j = i + 1; j < trees.Length; ++j) {
    79           s += busCalculator.CalculateSimilarity(trees[i], trees[j]);
     63          s += similarityCalculator.CalculateSimilarity(trees[i], trees[j]);
    8064        }
    8165      }
Note: See TracChangeset for help on using the changeset viewer.