Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisExpressionTreeMatching.cs @ 9651

Last change on this file since 9651 was 9423, checked in by bburlacu, 12 years ago

#1772: Implemented GeneticItem-based similarity measure. Renamed ISymbolicExpressionTreeNodeComparer to ISymbolicExpressionTreeNodeSimilarityComparer.

File size: 10.3 KB
RevLine 
[8557]1using System;
2using System.Collections.Generic;
[8213]3using System.Linq;
[8557]4using HeuristicLab.Common;
5using HeuristicLab.Core;
[9249]6using HeuristicLab.Data;
[8213]7using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
[8557]8using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
[9241]9using HeuristicLab.Problems.DataAnalysis.Symbolic;
[8213]10
11namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
[8557]12  [Item("SymbolicExpressionTreeNodeSimilarityComparer", "A comparison operator that checks node equality based on different similarity measures.")]
13  [StorableClass]
[9423]14  public class SymbolicExpressionTreeNodeSimilarityComparer : Item, ISymbolicExpressionTreeNodeSimilarityComparer {
[8557]15    [StorableConstructor]
16    private SymbolicExpressionTreeNodeSimilarityComparer(bool deserializing) : base(deserializing) { }
[9241]17    private SymbolicExpressionTreeNodeSimilarityComparer(SymbolicExpressionTreeNodeSimilarityComparer original, Cloner cloner)
18      : base(original, cloner) {
19      matchConstantValues = original.matchConstantValues;
20      matchVariableNames = original.matchVariableNames;
21      matchVariableWeights = original.matchVariableWeights;
22    }
[8557]23    public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicExpressionTreeNodeSimilarityComparer(this, cloner); }
[8213]24
[9241]25    // more flexible matching criteria
26    [Storable]
[9249]27    private BoolValue matchConstantValues;
[9241]28    public bool MatchConstantValues {
[9249]29      get { return matchConstantValues.Value; }
30      set { matchConstantValues.Value = value; }
[8213]31    }
32
[9241]33    [Storable]
[9249]34    private BoolValue matchVariableNames;
[9241]35    public bool MatchVariableNames {
[9249]36      get { return matchVariableNames.Value; }
37      set { matchVariableNames.Value = value; }
[8557]38    }
39
[9241]40    [Storable]
[9249]41    private BoolValue matchVariableWeights;
[9241]42    public bool MatchVariableWeights {
[9249]43      get { return matchVariableWeights.Value; }
44      set { matchVariableWeights.Value = value; }
[8557]45    }
46
[9241]47    [StorableHook(HookType.AfterDeserialization)]
48    private void AfterDeserialization() {
49    }
50
51    public SymbolicExpressionTreeNodeSimilarityComparer() {
[9249]52      matchConstantValues = new BoolValue(true);
53      matchVariableNames = new BoolValue(true);
54      matchVariableWeights = new BoolValue(true);
[9241]55    }
56
[8213]57    public int GetHashCode(ISymbolicExpressionTreeNode n) {
58      return n.ToString().ToLower().GetHashCode();
59    }
60
61    public bool Equals(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) {
[9241]62      if (!(a is SymbolicExpressionTreeTerminalNode))
63        // if a and b are non terminal nodes, check equality of symbol names
64        return !(b is SymbolicExpressionTreeTerminalNode) && a.Symbol.Name.Equals(b.Symbol.Name);
[8557]65      var va = a as VariableTreeNode;
[9241]66      if (va != null) {
67        var vb = b as VariableTreeNode;
68        if (vb == null) return false;
[8213]69
[9241]70        return (!MatchVariableNames || va.VariableName.Equals(vb.VariableName)) && (!MatchVariableWeights || va.Weight.Equals(vb.Weight));
[8213]71      }
[9241]72      var ca = a as ConstantTreeNode;
73      if (ca != null) {
74        var cb = b as ConstantTreeNode;
75        if (cb == null) return false;
76        return (!MatchConstantValues || ca.Value.Equals(cb.Value));
77      }
78      return false;
[8213]79    }
[9241]80  }
[8213]81
[9241]82  public class SymbolicExpressionTreeFragmentSimilarityComparer : IEqualityComparer<IFragment> {
83    public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; }
84
85    public bool Equals(IFragment x, IFragment y) {
86      if (SimilarityComparer == null)
87        throw new ArgumentNullException("SimilarityComparer needs to be initialized first.");
88      return x.Length == y.Length &&
89             SymbolicExpressionTreeMatching.Match(x.Root, y.Root, SimilarityComparer) == x.Length;
[8557]90    }
91
[9241]92    public int GetHashCode(IFragment fragment) {
93      return fragment.Root.Symbol.Name.ToLower().GetHashCode();
[8557]94    }
[8213]95  }
96
[9241]97  // this comparer considers that a < b if the type of a is "greater" than the type of b, for example:
98  // - A function node is "greater" than a terminal node
99  // - A variable terminal is "greater" than a constant terminal
[9423]100  public class SymbolicExpressionTreeNodeComparer : ISymbolicExpressionTreeNodeComparer {
[8213]101    public int Compare(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) {
[9241]102      if (!(a is SymbolicExpressionTreeTerminalNode)) {
103        if (b is SymbolicExpressionTreeTerminalNode) return -1;
104        return string.Compare(a.Symbol.Name, b.Symbol.Name, StringComparison.Ordinal);
105      }
106      if (!(b is SymbolicExpressionTreeTerminalNode)) return 1;
107      // at this point we know a and b are terminal nodes
[8557]108      var va = a as VariableTreeNode;
[9241]109      if (va != null) {
110        if (b is ConstantTreeNode) return -1;
111        var vb = (VariableTreeNode)b;
112        return (va.VariableName.Equals(vb.VariableName)
113                  ? va.Weight.CompareTo(vb.Weight)
114                  : string.Compare(va.VariableName, vb.VariableName, StringComparison.Ordinal));
115      }
116      // at this point we know for sure that a is a constant tree node
117      if (b is VariableTreeNode) return 1;
118      var ca = (ConstantTreeNode)a;
119      var cb = (ConstantTreeNode)b;
120      return ca.Value.CompareTo(cb.Value);
[8213]121    }
122  }
123
124  // tree equality
125  public class SymbolicExpressionTreeEqualityComparer : IEqualityComparer<ISymbolicExpressionTree> {
[9241]126    public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; }
127
[8213]128    public bool Equals(ISymbolicExpressionTree a, ISymbolicExpressionTree b) {
[9241]129      if (SimilarityComparer == null) throw new ArgumentNullException("SimilarityComparer needs to be initialized first.");
130      return SymbolicExpressionTreeMatching.Match(a.Root, b.Root, SimilarityComparer) == Math.Min(a.Length, b.Length);
[8213]131    }
132
133    public int GetHashCode(ISymbolicExpressionTree tree) {
[8557]134      return (tree.Length << 8) % 12345;
[8213]135    }
136  }
137
[9241]138  public class SymbolicExpressionTreeCanonicalSorter {
139    private readonly HashSet<Type> nonSymmetricSymbols = new HashSet<Type> { typeof(Subtraction), typeof(Division) };
[8213]140
[9241]141    public void SortSubtrees(ISymbolicExpressionTree tree) {
[8213]142      SortSubtrees(tree.Root);
143    }
144
[9241]145    public void SortSubtrees(ISymbolicExpressionTreeNode node) {
[8557]146      if (node.SubtreeCount == 0) return;
[9241]147      var subtrees = node.Subtrees as List<ISymbolicExpressionTreeNode> ?? node.Subtrees.ToList();
148      if (IsSymmetric(node.Symbol)) {
149        var comparer = new SymbolicExpressionTreeNodeComparer();
150        subtrees.Sort(comparer);
151      }
152      foreach (var s in subtrees)
153        SortSubtrees(s);
[8213]154    }
155
[9241]156    private bool IsSymmetric(ISymbol s) {
157      return !nonSymmetricSymbols.Contains(s.GetType());
[8557]158    }
[9241]159  }
160}
[8557]161
[9241]162public static class SymbolicExpressionTreeMatching {
163  public static bool ContainsFragment(this ISymbolicExpressionTreeNode root, IFragment fragment, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
164    return FindMatches(root, fragment.Root, comparer).Any();
165  }
166  public static IEnumerable<ISymbolicExpressionTreeNode> FindMatches(ISymbolicExpressionTree tree, ISymbolicExpressionTreeNode fragment, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
167    return FindMatches(tree.Root, fragment, comparer);
168  }
[8557]169
[9241]170  public static IEnumerable<ISymbolicExpressionTreeNode>
[8557]171    FindMatches(ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeNode fragment, SymbolicExpressionTreeNodeSimilarityComparer comp) {
[9241]172    var fragmentLength = fragment.GetLength();
173    // below, we use ">=" for Match(n, fragment, comp) >= fragmentLength because in case of relaxed conditions,
174    // we can have multiple matches of the same node
[9293]175
[9241]176    return root.IterateNodesBreadth().Where(n => n.GetLength() >= fragmentLength && Match(n, fragment, comp) == fragmentLength);
177  }
[8557]178
[9241]179  ///<summary>
180  /// Finds the longest common subsequence in quadratic time and linear space
181  /// Variant of:
182  /// D. S. Hirschberg. A linear space algorithm for or computing maximal common subsequences. 1975.
183  /// http://dl.acm.org/citation.cfm?id=360861
184  /// </summary>
185  /// <returns>Number of pairs that were matched</returns>
186  public static int Match(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SymbolicExpressionTreeNodeSimilarityComparer comp) {
187    if (!comp.Equals(a, b)) return 0;
188    int m = a.SubtreeCount;
189    int n = b.SubtreeCount;
190    if (m == 0 || n == 0) return 1;
191    var matrix = new int[m + 1, n + 1];
192    for (int i = 1; i <= m; ++i) {
193      var ai = a.GetSubtree(i - 1);
194      for (int j = 1; j <= n; ++j) {
195        var bj = b.GetSubtree(j - 1);
196        int match = Match(ai, bj, comp);
197        matrix[i, j] = Math.Max(Math.Max(matrix[i, j - 1], matrix[i - 1, j]), matrix[i - 1, j - 1] + match);
[8557]198      }
199    }
[9241]200    return matrix[m, n] + 1;
[8213]201  }
[9293]202}
203
204public class MaximalCommonSubsequenceCalculator {
205  private ISymbolicExpressionTreeNode[] x;
206  private ISymbolicExpressionTreeNode[] y;
207  private List<ISymbolicExpressionTreeNode> maxCommonSubseq;
208  private double[,] matrix;
209
[9423]210  public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; }
[9293]211
212  public List<ISymbolicExpressionTreeNode> Calculate(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2) {
[9423]213    if (SimilarityComparer == null) throw new Exception("Comparer cannot be null.");
[9293]214
215    x = n1.IterateNodesPrefix().ToArray();
216    y = n2.IterateNodesPrefix().ToArray();
217
218    if (maxCommonSubseq == null) maxCommonSubseq = new List<ISymbolicExpressionTreeNode>();
219    else maxCommonSubseq.Clear();
220
221    int n = x.Length;
222    int m = y.Length;
223
224    matrix = new double[n + 1, m + 1];
225
226    for (int i = 0; i <= n; ++i) {
227      for (int j = 0; j <= m; ++j) {
228        if (i == 0 || j == 0) {
229          matrix[i, j] = 0;
[9423]230        } else if (SimilarityComparer.Equals(x[i - 1], y[j - 1])) {
[9293]231          matrix[i, j] = matrix[i - 1, j - 1] + 1;
232        } else {
233          matrix[i, j] = Math.Max(matrix[i - 1, j], matrix[i, j - 1]);
234        }
235      }
236    }
237    recon(n, m);
238    return new List<ISymbolicExpressionTreeNode>(maxCommonSubseq);
239  }
240
241  private void recon(int i, int j) {
242    if (i == 0 || j == 0) return;
[9423]243    if (SimilarityComparer.Equals(x[i - 1], y[j - 1])) {
[9293]244      recon(i - 1, j - 1);
245      maxCommonSubseq.Add(x[i - 1]);
246    } else if (matrix[i - 1, j] > matrix[i, j - 1]) {
247      recon(i - 1, j);
248    } else recon(i, j - 1);
249  }
[9241]250}
Note: See TracBrowser for help on using the repository browser.