Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisExpressionTreeMatching.cs @ 12062

Last change on this file since 12062 was 9835, checked in by bburlacu, 11 years ago

#1772: Merged remaining trunk changes into the EvolutionaryTracking branch.

File size: 10.2 KB
RevLine 
[8557]1using System;
2using System.Collections.Generic;
[8213]3using System.Linq;
[8557]4using HeuristicLab.Common;
5using HeuristicLab.Core;
[8213]6using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
[8557]7using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
[9241]8using HeuristicLab.Problems.DataAnalysis.Symbolic;
[8213]9
10namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
[8557]11  [Item("SymbolicExpressionTreeNodeSimilarityComparer", "A comparison operator that checks node equality based on different similarity measures.")]
12  [StorableClass]
[9423]13  public class SymbolicExpressionTreeNodeSimilarityComparer : Item, ISymbolicExpressionTreeNodeSimilarityComparer {
[8557]14    [StorableConstructor]
15    private SymbolicExpressionTreeNodeSimilarityComparer(bool deserializing) : base(deserializing) { }
[9241]16    private SymbolicExpressionTreeNodeSimilarityComparer(SymbolicExpressionTreeNodeSimilarityComparer original, Cloner cloner)
17      : base(original, cloner) {
18      matchConstantValues = original.matchConstantValues;
19      matchVariableNames = original.matchVariableNames;
20      matchVariableWeights = original.matchVariableWeights;
21    }
[8557]22    public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicExpressionTreeNodeSimilarityComparer(this, cloner); }
[8213]23
[9241]24    // more flexible matching criteria
25    [Storable]
[9835]26    private bool matchConstantValues;
[9241]27    public bool MatchConstantValues {
[9835]28      get { return matchConstantValues; }
29      set { matchConstantValues = value; }
[8213]30    }
31
[9241]32    [Storable]
[9835]33    private bool matchVariableNames;
[9241]34    public bool MatchVariableNames {
[9835]35      get { return matchVariableNames; }
36      set { matchVariableNames = value; }
[8557]37    }
38
[9241]39    [Storable]
[9835]40    private bool matchVariableWeights;
[9241]41    public bool MatchVariableWeights {
[9835]42      get { return matchVariableWeights; }
43      set { matchVariableWeights = value; }
[8557]44    }
45
[9241]46    [StorableHook(HookType.AfterDeserialization)]
47    private void AfterDeserialization() {
48    }
49
50    public SymbolicExpressionTreeNodeSimilarityComparer() {
[9835]51      matchConstantValues = true;
52      matchVariableNames = true;
53      matchVariableWeights = true;
[9241]54    }
55
[8213]56    public int GetHashCode(ISymbolicExpressionTreeNode n) {
57      return n.ToString().ToLower().GetHashCode();
58    }
59
60    public bool Equals(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) {
[9241]61      if (!(a is SymbolicExpressionTreeTerminalNode))
62        // if a and b are non terminal nodes, check equality of symbol names
63        return !(b is SymbolicExpressionTreeTerminalNode) && a.Symbol.Name.Equals(b.Symbol.Name);
[8557]64      var va = a as VariableTreeNode;
[9241]65      if (va != null) {
66        var vb = b as VariableTreeNode;
67        if (vb == null) return false;
[8213]68
[9241]69        return (!MatchVariableNames || va.VariableName.Equals(vb.VariableName)) && (!MatchVariableWeights || va.Weight.Equals(vb.Weight));
[8213]70      }
[9241]71      var ca = a as ConstantTreeNode;
72      if (ca != null) {
73        var cb = b as ConstantTreeNode;
74        if (cb == null) return false;
75        return (!MatchConstantValues || ca.Value.Equals(cb.Value));
76      }
77      return false;
[8213]78    }
[9241]79  }
[8213]80
[9241]81  public class SymbolicExpressionTreeFragmentSimilarityComparer : IEqualityComparer<IFragment> {
82    public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; }
83
84    public bool Equals(IFragment x, IFragment y) {
85      if (SimilarityComparer == null)
86        throw new ArgumentNullException("SimilarityComparer needs to be initialized first.");
87      return x.Length == y.Length &&
88             SymbolicExpressionTreeMatching.Match(x.Root, y.Root, SimilarityComparer) == x.Length;
[8557]89    }
90
[9241]91    public int GetHashCode(IFragment fragment) {
92      return fragment.Root.Symbol.Name.ToLower().GetHashCode();
[8557]93    }
[8213]94  }
95
[9241]96  // this comparer considers that a < b if the type of a is "greater" than the type of b, for example:
97  // - A function node is "greater" than a terminal node
98  // - A variable terminal is "greater" than a constant terminal
[9423]99  public class SymbolicExpressionTreeNodeComparer : ISymbolicExpressionTreeNodeComparer {
[8213]100    public int Compare(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) {
[9241]101      if (!(a is SymbolicExpressionTreeTerminalNode)) {
102        if (b is SymbolicExpressionTreeTerminalNode) return -1;
103        return string.Compare(a.Symbol.Name, b.Symbol.Name, StringComparison.Ordinal);
104      }
105      if (!(b is SymbolicExpressionTreeTerminalNode)) return 1;
106      // at this point we know a and b are terminal nodes
[8557]107      var va = a as VariableTreeNode;
[9241]108      if (va != null) {
109        if (b is ConstantTreeNode) return -1;
110        var vb = (VariableTreeNode)b;
111        return (va.VariableName.Equals(vb.VariableName)
112                  ? va.Weight.CompareTo(vb.Weight)
113                  : string.Compare(va.VariableName, vb.VariableName, StringComparison.Ordinal));
114      }
115      // at this point we know for sure that a is a constant tree node
116      if (b is VariableTreeNode) return 1;
117      var ca = (ConstantTreeNode)a;
118      var cb = (ConstantTreeNode)b;
119      return ca.Value.CompareTo(cb.Value);
[8213]120    }
121  }
122
123  // tree equality
124  public class SymbolicExpressionTreeEqualityComparer : IEqualityComparer<ISymbolicExpressionTree> {
[9241]125    public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; }
126
[8213]127    public bool Equals(ISymbolicExpressionTree a, ISymbolicExpressionTree b) {
[9241]128      if (SimilarityComparer == null) throw new ArgumentNullException("SimilarityComparer needs to be initialized first.");
129      return SymbolicExpressionTreeMatching.Match(a.Root, b.Root, SimilarityComparer) == Math.Min(a.Length, b.Length);
[8213]130    }
131
132    public int GetHashCode(ISymbolicExpressionTree tree) {
[8557]133      return (tree.Length << 8) % 12345;
[8213]134    }
135  }
136
[9241]137  public class SymbolicExpressionTreeCanonicalSorter {
138    private readonly HashSet<Type> nonSymmetricSymbols = new HashSet<Type> { typeof(Subtraction), typeof(Division) };
[8213]139
[9241]140    public void SortSubtrees(ISymbolicExpressionTree tree) {
[8213]141      SortSubtrees(tree.Root);
142    }
143
[9241]144    public void SortSubtrees(ISymbolicExpressionTreeNode node) {
[8557]145      if (node.SubtreeCount == 0) return;
[9241]146      var subtrees = node.Subtrees as List<ISymbolicExpressionTreeNode> ?? node.Subtrees.ToList();
147      if (IsSymmetric(node.Symbol)) {
148        var comparer = new SymbolicExpressionTreeNodeComparer();
149        subtrees.Sort(comparer);
150      }
151      foreach (var s in subtrees)
152        SortSubtrees(s);
[8213]153    }
154
[9241]155    private bool IsSymmetric(ISymbol s) {
156      return !nonSymmetricSymbols.Contains(s.GetType());
[8557]157    }
[9241]158  }
159}
[8557]160
[9241]161public static class SymbolicExpressionTreeMatching {
162  public static bool ContainsFragment(this ISymbolicExpressionTreeNode root, IFragment fragment, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
163    return FindMatches(root, fragment.Root, comparer).Any();
164  }
165  public static IEnumerable<ISymbolicExpressionTreeNode> FindMatches(ISymbolicExpressionTree tree, ISymbolicExpressionTreeNode fragment, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
166    return FindMatches(tree.Root, fragment, comparer);
167  }
[8557]168
[9241]169  public static IEnumerable<ISymbolicExpressionTreeNode>
[8557]170    FindMatches(ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeNode fragment, SymbolicExpressionTreeNodeSimilarityComparer comp) {
[9241]171    var fragmentLength = fragment.GetLength();
172    // below, we use ">=" for Match(n, fragment, comp) >= fragmentLength because in case of relaxed conditions,
173    // we can have multiple matches of the same node
[9293]174
[9241]175    return root.IterateNodesBreadth().Where(n => n.GetLength() >= fragmentLength && Match(n, fragment, comp) == fragmentLength);
176  }
[8557]177
[9241]178  ///<summary>
179  /// Finds the longest common subsequence in quadratic time and linear space
180  /// Variant of:
181  /// D. S. Hirschberg. A linear space algorithm for or computing maximal common subsequences. 1975.
182  /// http://dl.acm.org/citation.cfm?id=360861
183  /// </summary>
184  /// <returns>Number of pairs that were matched</returns>
185  public static int Match(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SymbolicExpressionTreeNodeSimilarityComparer comp) {
186    if (!comp.Equals(a, b)) return 0;
187    int m = a.SubtreeCount;
188    int n = b.SubtreeCount;
189    if (m == 0 || n == 0) return 1;
190    var matrix = new int[m + 1, n + 1];
191    for (int i = 1; i <= m; ++i) {
192      var ai = a.GetSubtree(i - 1);
193      for (int j = 1; j <= n; ++j) {
194        var bj = b.GetSubtree(j - 1);
195        int match = Match(ai, bj, comp);
196        matrix[i, j] = Math.Max(Math.Max(matrix[i, j - 1], matrix[i - 1, j]), matrix[i - 1, j - 1] + match);
[8557]197      }
198    }
[9241]199    return matrix[m, n] + 1;
[8213]200  }
[9293]201}
202
203public class MaximalCommonSubsequenceCalculator {
204  private ISymbolicExpressionTreeNode[] x;
205  private ISymbolicExpressionTreeNode[] y;
206  private List<ISymbolicExpressionTreeNode> maxCommonSubseq;
207  private double[,] matrix;
208
[9423]209  public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; }
[9293]210
211  public List<ISymbolicExpressionTreeNode> Calculate(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2) {
[9423]212    if (SimilarityComparer == null) throw new Exception("Comparer cannot be null.");
[9293]213
214    x = n1.IterateNodesPrefix().ToArray();
215    y = n2.IterateNodesPrefix().ToArray();
216
217    if (maxCommonSubseq == null) maxCommonSubseq = new List<ISymbolicExpressionTreeNode>();
218    else maxCommonSubseq.Clear();
219
220    int n = x.Length;
221    int m = y.Length;
222
223    matrix = new double[n + 1, m + 1];
224
225    for (int i = 0; i <= n; ++i) {
226      for (int j = 0; j <= m; ++j) {
227        if (i == 0 || j == 0) {
228          matrix[i, j] = 0;
[9423]229        } else if (SimilarityComparer.Equals(x[i - 1], y[j - 1])) {
[9293]230          matrix[i, j] = matrix[i - 1, j - 1] + 1;
231        } else {
232          matrix[i, j] = Math.Max(matrix[i - 1, j], matrix[i, j - 1]);
233        }
234      }
235    }
236    recon(n, m);
237    return new List<ISymbolicExpressionTreeNode>(maxCommonSubseq);
238  }
239
240  private void recon(int i, int j) {
[9835]241    while (true) {
242      if (i == 0 || j == 0) return;
243      if (SimilarityComparer.Equals(x[i - 1], y[j - 1])) {
244        recon(i - 1, j - 1);
245        maxCommonSubseq.Add(x[i - 1]);
246      } else if (matrix[i - 1, j] > matrix[i, j - 1]) {
247        i = i - 1;
248        continue;
249      } else {
250        j = j - 1;
251        continue;
252      }
253      break;
254    }
[9293]255  }
[9241]256}
Note: See TracBrowser for help on using the repository browser.