source: branches/HeuristicLab.BottomUpTreeDistance/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Matching/SymbolicExpressionTreeMatching.cs @ 11239

Last change on this file since 11239 was 11239, checked in by bburlacu, 7 years ago

#2215:

  • Renamed BottomUpSimilarityCalculator to BottomUpTreeSimilarityCalculator.
  • Refactored the BottomUpTreeSimilarityCalculator to accept a configurable list of commutative symbols (the children of commutative symbols need to be sorted according to their label).
  • Added MaxCommonSubtreeSimilarityCalculator performance test
  • Updated BottomUpTreeSimilarityCalculatorTest
File size: 3.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
26//using HeuristicLab.EvolutionTracking;
27
28namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
29  public static class SymbolicExpressionTreeMatching {
30    public static bool ContainsSubtree(this ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeNode subtree, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
31      return FindMatches(root, subtree, comparer).Any();
32    }
33    public static IEnumerable<ISymbolicExpressionTreeNode> FindMatches(ISymbolicExpressionTree tree, ISymbolicExpressionTreeNode subtree, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
34      return FindMatches(tree.Root, subtree, comparer);
35    }
36
37    public static IEnumerable<ISymbolicExpressionTreeNode> FindMatches(ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeNode subtree, SymbolicExpressionTreeNodeSimilarityComparer comp) {
38      var fragmentLength = subtree.GetLength();
39      // below, we use ">=" for Match(n, subtree, comp) >= fragmentLength because in case of relaxed conditions,
40      // we can have multiple matches of the same node
41
42      return root.IterateNodesBreadth().Where(n => n.GetLength() >= fragmentLength && Match(n, subtree, comp) == fragmentLength);
43    }
44
45    ///<summary>
46    /// Finds the longest common subsequence in quadratic time and linear space
47    /// Variant of:
48    /// D. S. Hirschberg. A linear space algorithm for or computing maximal common subsequences. 1975.
49    /// http://dl.acm.org/citation.cfm?id=360861
50    /// </summary>
51    /// <returns>Number of pairs that were matched</returns>
52    public static int Match(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, ISymbolicExpressionTreeNodeSimilarityComparer comp) {
53      if (!comp.Equals(a, b)) return 0;
54      int m = a.SubtreeCount;
55      int n = b.SubtreeCount;
56      if (m == 0 || n == 0) return 1;
57      var matrix = new int[m + 1, n + 1];
58      for (int i = 1; i <= m; ++i) {
59        var ai = a.GetSubtree(i - 1);
60        for (int j = 1; j <= n; ++j) {
61          var bj = b.GetSubtree(j - 1);
62          int match = Match(ai, bj, comp);
63          matrix[i, j] = Math.Max(Math.Max(matrix[i, j - 1], matrix[i - 1, j]), matrix[i - 1, j - 1] + match);
64        }
65      }
66      return matrix[m, n] + 1;
67    }
68  }
69}
Note: See TracBrowser for help on using the repository browser.