Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisExpressionTreeMatching.cs @ 10787

Last change on this file since 10787 was 10347, checked in by bburlacu, 11 years ago

#1772: Small changes to the GenealogyGraph. Added generic Fragment class and interface. Added the SymbolicDataAnalysisPopulationDiversityAnalyzer. Added specialized tracking operators for symbolic data analysis. Merged trunk changes.

File size: 10.8 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using HeuristicLab.Common;
5using HeuristicLab.Core;
6using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
7using HeuristicLab.EvolutionTracking;
8using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
9using HeuristicLab.Problems.DataAnalysis.Symbolic;
10
11namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
12  [Item("SymbolicExpressionTreeNodeSimilarityComparer", "A comparison operator that checks node equality based on different similarity measures.")]
13  [StorableClass]
14  public class SymbolicExpressionTreeNodeSimilarityComparer : Item, ISymbolicExpressionTreeNodeSimilarityComparer {
15    [StorableConstructor]
16    private SymbolicExpressionTreeNodeSimilarityComparer(bool deserializing) : base(deserializing) { }
17    private SymbolicExpressionTreeNodeSimilarityComparer(SymbolicExpressionTreeNodeSimilarityComparer original, Cloner cloner)
18      : base(original, cloner) {
19      matchConstantValues = original.matchConstantValues;
20      matchVariableNames = original.matchVariableNames;
21      matchVariableWeights = original.matchVariableWeights;
22    }
23    public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicExpressionTreeNodeSimilarityComparer(this, cloner); }
24
25    // more flexible matching criteria
26    [Storable]
27    private bool matchConstantValues;
28    public bool MatchConstantValues {
29      get { return matchConstantValues; }
30      set { matchConstantValues = value; }
31    }
32
33    [Storable]
34    private bool matchVariableNames;
35    public bool MatchVariableNames {
36      get { return matchVariableNames; }
37      set { matchVariableNames = value; }
38    }
39
40    [Storable]
41    private bool matchVariableWeights;
42    public bool MatchVariableWeights {
43      get { return matchVariableWeights; }
44      set { matchVariableWeights = value; }
45    }
46
47    [StorableHook(HookType.AfterDeserialization)]
48    private void AfterDeserialization() {
49    }
50
51    public SymbolicExpressionTreeNodeSimilarityComparer() {
52      matchConstantValues = true;
53      matchVariableNames = true;
54      matchVariableWeights = true;
55    }
56
57    public int GetHashCode(ISymbolicExpressionTreeNode n) {
58      return n.ToString().ToLower().GetHashCode();
59    }
60
61    public bool Equals(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) {
62      if (!(a is SymbolicExpressionTreeTerminalNode))
63        // if a and b are non terminal nodes, check equality of symbol names
64        return !(b is SymbolicExpressionTreeTerminalNode) && a.Symbol.Name.Equals(b.Symbol.Name);
65      var va = a as VariableTreeNode;
66      if (va != null) {
67        var vb = b as VariableTreeNode;
68        if (vb == null) return false;
69
70        return (!MatchVariableNames || va.VariableName.Equals(vb.VariableName)) && (!MatchVariableWeights || va.Weight.Equals(vb.Weight));
71      }
72      var ca = a as ConstantTreeNode;
73      if (ca != null) {
74        var cb = b as ConstantTreeNode;
75        if (cb == null) return false;
76        return (!MatchConstantValues || ca.Value.Equals(cb.Value));
77      }
78      return false;
79    }
80  }
81
82  public class SymbolicExpressionTreeFragmentSimilarityComparer : IEqualityComparer<IFragment<ISymbolicExpressionTreeNode>> {
83    public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; }
84
85    public bool Equals(IFragment<ISymbolicExpressionTreeNode> x, IFragment<ISymbolicExpressionTreeNode> y) {
86      if (SimilarityComparer == null)
87        throw new ArgumentNullException("SimilarityComparer needs to be initialized first.");
88      return x.Root.GetLength() == y.Root.GetLength() && SymbolicExpressionTreeMatching.Match(x.Root, y.Root, SimilarityComparer) == x.Root.GetLength();
89    }
90
91    public int GetHashCode(IFragment<ISymbolicExpressionTreeNode> fragment) {
92      return fragment.Root.Symbol.Name.ToLower().GetHashCode();
93    }
94  }
95
96  // this comparer considers that a < b if the type of a is "greater" than the type of b, for example:
97  // - A function node is "greater" than a terminal node
98  // - A variable terminal is "greater" than a constant terminal
99  // - used for bringing subtrees to a "canonical" form when the operation allows reordering of arguments
100  public class SymbolicExpressionTreeNodeComparer : ISymbolicExpressionTreeNodeComparer {
101    public int Compare(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) {
102      if (!(a is SymbolicExpressionTreeTerminalNode)) {
103        return b is SymbolicExpressionTreeTerminalNode
104          ? -1
105          : string.Compare(a.Symbol.Name, b.Symbol.Name, StringComparison.Ordinal);
106      }
107      if (!(b is SymbolicExpressionTreeTerminalNode)) return 1;
108      // at this point we know a and b are terminal nodes
109      var va = a as VariableTreeNode;
110      if (va != null) {
111        if (b is ConstantTreeNode) return -1;
112        var vb = (VariableTreeNode)b;
113        return (va.VariableName.Equals(vb.VariableName)
114                  ? va.Weight.CompareTo(vb.Weight)
115                  : string.Compare(va.VariableName, vb.VariableName, StringComparison.Ordinal));
116      }
117      // at this point we know for sure that a is a constant tree node
118      if (b is VariableTreeNode) return 1;
119      var ca = (ConstantTreeNode)a;
120      var cb = (ConstantTreeNode)b;
121      return ca.Value.CompareTo(cb.Value);
122    }
123  }
124
125  // tree equality
126  public class SymbolicExpressionTreeEqualityComparer : IEqualityComparer<ISymbolicExpressionTree> {
127    public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; }
128
129    public bool Equals(ISymbolicExpressionTree a, ISymbolicExpressionTree b) {
130      if (SimilarityComparer == null) throw new Exception("SimilarityComparer needs to be initialized first.");
131      return a.Length == b.Length &&
132             SymbolicExpressionTreeMatching.Match(a.Root, b.Root, SimilarityComparer) == Math.Min(a.Length, b.Length);
133    }
134
135    public int GetHashCode(ISymbolicExpressionTree tree) {
136      return (tree.Length << 8) % 12345;
137    }
138  }
139
140  public class SymbolicExpressionTreeCanonicalSorter {
141    private readonly HashSet<Type> nonSymmetricSymbols = new HashSet<Type> { typeof(Subtraction), typeof(Division) };
142
143    public void SortSubtrees(ISymbolicExpressionTree tree) {
144      SortSubtrees(tree.Root);
145    }
146
147    public void SortSubtrees(ISymbolicExpressionTreeNode node) {
148      if (node.SubtreeCount == 0) return;
149      var subtrees = node.Subtrees as List<ISymbolicExpressionTreeNode> ?? node.Subtrees.ToList();
150      if (IsSymmetric(node.Symbol)) {
151        var comparer = new SymbolicExpressionTreeNodeComparer();
152        subtrees.Sort(comparer);
153      }
154      foreach (var s in subtrees)
155        SortSubtrees(s);
156    }
157
158    private bool IsSymmetric(ISymbol s) {
159      return !nonSymmetricSymbols.Contains(s.GetType());
160    }
161  }
162}
163
164public static class SymbolicExpressionTreeMatching {
165  public static bool ContainsFragment(this ISymbolicExpressionTreeNode root, IFragment<ISymbolicExpressionTreeNode> fragment, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
166    return FindMatches(root, fragment.Root, comparer).Any();
167  }
168  public static IEnumerable<ISymbolicExpressionTreeNode> FindMatches(ISymbolicExpressionTree tree, ISymbolicExpressionTreeNode fragment, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
169    return FindMatches(tree.Root, fragment, comparer);
170  }
171
172  public static IEnumerable<ISymbolicExpressionTreeNode> FindMatches(ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeNode fragment, SymbolicExpressionTreeNodeSimilarityComparer comp) {
173    var fragmentLength = fragment.GetLength();
174    // below, we use ">=" for Match(n, fragment, comp) >= fragmentLength because in case of relaxed conditions,
175    // we can have multiple matches of the same node
176
177    return root.IterateNodesBreadth().Where(n => n.GetLength() >= fragmentLength && Match(n, fragment, comp) == fragmentLength);
178  }
179
180  ///<summary>
181  /// Finds the longest common subsequence in quadratic time and linear space
182  /// Variant of:
183  /// D. S. Hirschberg. A linear space algorithm for or computing maximal common subsequences. 1975.
184  /// http://dl.acm.org/citation.cfm?id=360861
185  /// </summary>
186  /// <returns>Number of pairs that were matched</returns>
187  public static int Match(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SymbolicExpressionTreeNodeSimilarityComparer comp) {
188    if (!comp.Equals(a, b)) return 0;
189    int m = a.SubtreeCount;
190    int n = b.SubtreeCount;
191    if (m == 0 || n == 0) return 1;
192    var matrix = new int[m + 1, n + 1];
193    for (int i = 1; i <= m; ++i) {
194      var ai = a.GetSubtree(i - 1);
195      for (int j = 1; j <= n; ++j) {
196        var bj = b.GetSubtree(j - 1);
197        int match = Match(ai, bj, comp);
198        matrix[i, j] = Math.Max(Math.Max(matrix[i, j - 1], matrix[i - 1, j]), matrix[i - 1, j - 1] + match);
199      }
200    }
201    return matrix[m, n] + 1;
202  }
203}
204
205// this maximal common subsequence calculator follows the same approach as the Match method above and should yield identical results
206// the difference is that the maximal common subsequence calculator returns the actual common subsequence as a list
207public class MaximalCommonSubsequenceCalculator {
208  private ISymbolicExpressionTreeNode[] x;
209  private ISymbolicExpressionTreeNode[] y;
210  private List<ISymbolicExpressionTreeNode> maxCommonSubseq;
211  private double[,] matrix;
212
213  public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; }
214
215  public List<ISymbolicExpressionTreeNode> Calculate(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2) {
216    if (SimilarityComparer == null) throw new Exception("Comparer cannot be null.");
217
218    x = n1.IterateNodesPrefix().ToArray();
219    y = n2.IterateNodesPrefix().ToArray();
220
221    if (maxCommonSubseq == null) maxCommonSubseq = new List<ISymbolicExpressionTreeNode>();
222    else maxCommonSubseq.Clear();
223
224    int n = x.Length;
225    int m = y.Length;
226
227    matrix = new double[n + 1, m + 1];
228
229    for (int i = 0; i <= n; ++i) {
230      for (int j = 0; j <= m; ++j) {
231        if (i == 0 || j == 0) {
232          matrix[i, j] = 0;
233        } else if (SimilarityComparer.Equals(x[i - 1], y[j - 1])) {
234          matrix[i, j] = matrix[i - 1, j - 1] + 1;
235        } else {
236          matrix[i, j] = Math.Max(matrix[i - 1, j], matrix[i, j - 1]);
237        }
238      }
239    }
240    recon(n, m);
241    return new List<ISymbolicExpressionTreeNode>(maxCommonSubseq);
242  }
243
244  private void recon(int i, int j) {
245    while (true) {
246      if (i == 0 || j == 0) return;
247      if (SimilarityComparer.Equals(x[i - 1], y[j - 1])) {
248        recon(i - 1, j - 1);
249        maxCommonSubseq.Add(x[i - 1]);
250      } else if (matrix[i - 1, j] > matrix[i, j - 1]) {
251        i = i - 1;
252        continue;
253      } else {
254        j = j - 1;
255        continue;
256      }
257      break;
258    }
259  }
260}
Note: See TracBrowser for help on using the repository browser.