Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisExpressionTreeMatching.cs @ 8557

Last change on this file since 8557 was 8557, checked in by bburlacu, 12 years ago

#1772: Introduced base class and interface for tracing operators. Introduced SymbolicExpressionTreeNodeComparer interface to be implemented by node matching operators. Fixed bug in the TracingSymbolicExpressionTreeManipulator where nodes modified by the Full- and OnePoint tree shakers were not correctly detected. The SymbolicExpressionTreeNodeSimilarityComparer is now injected in the tracing genetic operators as a parameter.

File size: 12.6 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.IO;
4using System.Linq;
5using HeuristicLab.Common;
6using HeuristicLab.Core;
7using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
8using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
9
10namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
11  public enum SimilarityLevel {
12    Exact,   // everything is matched bit by bit (functions and terminals)
13    High,    // symbols are matched. leaf node types are matched
14    Relaxed  // only symbols are matched, leafs count as wildcards
15  }
16  [Item("SymbolicExpressionTreeNodeSimilarityComparer", "A comparison operator that checks node equality based on different similarity measures.")]
17  [StorableClass]
18  public class SymbolicExpressionTreeNodeSimilarityComparer : Item, ISymbolicExpressionTreeNodeComparer {
19    [StorableConstructor]
20    private SymbolicExpressionTreeNodeSimilarityComparer(bool deserializing) : base(deserializing) { }
21    private SymbolicExpressionTreeNodeSimilarityComparer(SymbolicExpressionTreeNodeSimilarityComparer original, Cloner cloner) : base(original, cloner) { }
22    public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicExpressionTreeNodeSimilarityComparer(this, cloner); }
23
24    private SimilarityLevel similarityLevel;
25    public SimilarityLevel SimilarityLevel {
26      get { return similarityLevel; }
27      set { similarityLevel = value; }
28    }
29
30    public SymbolicExpressionTreeNodeSimilarityComparer() {
31      this.similarityLevel = SimilarityLevel.Exact;
32    }
33
34    public SymbolicExpressionTreeNodeSimilarityComparer(SimilarityLevel similarityLevel) {
35      this.similarityLevel = similarityLevel;
36    }
37
38    public int GetHashCode(ISymbolicExpressionTreeNode n) {
39      return n.ToString().ToLower().GetHashCode();
40    }
41
42    public bool Equals(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) {
43      var ca = a as ConstantTreeNode;
44      var cb = b as ConstantTreeNode;
45      var va = a as VariableTreeNode;
46      var vb = b as VariableTreeNode;
47
48      var aIsConstant = ca != null;
49      var aIsVariable = va != null;
50      var bIsConstant = cb != null;
51      var bIsVariable = vb != null;
52      var aIsFunction = (!(aIsConstant | aIsVariable));
53      var bIsFunction = (!(bIsConstant | bIsVariable));
54
55      if (aIsFunction)
56        return bIsFunction && a.Symbol.Name.Equals(b.Symbol.Name);
57      if (bIsFunction) // one is function and the other is not, return false
58        return false;
59
60      switch (similarityLevel) {
61        case (SimilarityLevel.Exact):
62          if (aIsConstant & bIsConstant)
63            return ca.Value.Equals(cb.Value);
64          if (aIsVariable & bIsVariable)
65            return va.Weight.Equals(vb.Weight);
66          return false;
67        case (SimilarityLevel.High):
68          return ((aIsConstant & bIsConstant) || (aIsVariable & bIsVariable));
69        case (SimilarityLevel.Relaxed):
70          return true;
71        default:
72          return false;
73      }
74    }
75
76    public static bool ExactlySimilar(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) {
77      return Similar(a, b, SimilarityLevel.Exact);
78    }
79
80    public static bool Similar(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SimilarityLevel level) {
81      var comp = new SymbolicExpressionTreeNodeSimilarityComparer(level);
82      return comp.Equals(a, b);
83    }
84  }
85
86  public class SymbolicExpressionTreeNodeOrderingComparer : IComparer<ISymbolicExpressionTreeNode> {
87    public int Compare(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) {
88      var ca = a as ConstantTreeNode;
89      var cb = b as ConstantTreeNode;
90      var va = a as VariableTreeNode;
91      var vb = b as VariableTreeNode;
92
93      var aIsConstant = ca != null;
94      var aIsVariable = va != null;
95      var bIsConstant = cb != null;
96      var bIsVariable = vb != null;
97      var aIsFunction = (!(aIsConstant | aIsVariable));
98      var bIsFunction = (!(bIsConstant | bIsVariable));
99
100      if (aIsFunction)
101        return bIsFunction ? a.Symbol.Name.CompareTo(b.Symbol.Name) : -1;
102      if (bIsFunction) // a is Constant or Variables
103        return 1;
104      if (aIsVariable)
105        return bIsVariable ? va.Weight.CompareTo(vb.Weight) : -1;
106      return
107        bIsVariable ? 1 : ca.Value.CompareTo(cb.Value);
108    }
109  }
110
111  // tree equality
112  public class SymbolicExpressionTreeEqualityComparer : IEqualityComparer<ISymbolicExpressionTree> {
113    public bool Equals(ISymbolicExpressionTree a, ISymbolicExpressionTree b) {
114      return SymbolicExpressionTreeMatching.FindMatch(a, b, similarityLevel) == 0;
115    }
116
117    public int GetHashCode(ISymbolicExpressionTree tree) {
118      return (tree.Length << 8) % 12345;
119    }
120
121    private SimilarityLevel similarityLevel;
122    public void SetComparisonMode(SimilarityLevel simLevel) {
123      similarityLevel = simLevel;
124    }
125  }
126
127  public static class SymbolicExpressionTreeMatching {
128    public static bool IsSimilarTo(this ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, SimilarityLevel mode) {
129      return FindMatch(t1, t2, mode) == 0;
130    }
131
132    public static bool IsSimilarTo(this ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, SimilarityLevel mode) {
133      return FindMatch(t1, t2, mode) == 0;
134    }
135
136    public static bool ContainsFragment(this ISymbolicExpressionTree tree, ISymbolicExpressionTreeNode fragment, SimilarityLevel mode) {
137      var matches = FindMatches(tree, fragment, mode);
138      return matches.Count > 0;
139    }
140
141    public static void SortSubtrees(this ISymbolicExpressionTree tree) {
142      SortSubtrees(tree.Root);
143    }
144
145    public static void SortSubtrees(this ISymbolicExpressionTreeNode node) {
146      if (node.SubtreeCount == 0) return;
147      var subtrees = node.Subtrees as List<ISymbolicExpressionTreeNode>;
148      if (subtrees == null) return;
149      var comparer = new SymbolicExpressionTreeNodeOrderingComparer();
150      subtrees.Sort(comparer);
151      foreach (var subtree in subtrees)
152        subtree.SortSubtrees();
153    }
154
155    // return child that is the same as node
156    public static ISymbolicExpressionTreeNode FindChild(this ISymbolicExpressionTreeNode parent, ISymbolicExpressionTreeNode node) {
157      var subtrees = parent.Subtrees as List<ISymbolicExpressionTreeNode>;
158      var comparer = new SymbolicExpressionTreeNodeSimilarityComparer(SimilarityLevel.Exact);
159      return subtrees.Find(x => comparer.Equals(x, node));
160    }
161
162    public static int FindMatch(ISymbolicExpressionTree a, ISymbolicExpressionTree b, SimilarityLevel similarityLevel) {
163      var nodesA = a.IterateNodesPostfix() as List<ISymbolicExpressionTreeNode>;
164      var nodesB = b.IterateNodesPostfix() as List<ISymbolicExpressionTreeNode>;
165      return FindMatch(nodesA, nodesB, similarityLevel);
166    }
167
168    public static int FindMatch(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SimilarityLevel similarityLevel) {
169      var nodesA = a.IterateNodesPostfix() as List<ISymbolicExpressionTreeNode>;
170      var nodesB = b.IterateNodesPostfix() as List<ISymbolicExpressionTreeNode>;
171      return FindMatch(nodesA, nodesB, similarityLevel);
172    }
173
174    // returns the index in the sequence 'seq', where pattern 'pat' is matched, or -1 for no match
175    // -1 means no match, 0 means perfect match (the sequences overlap, hence the match index is 0)
176    public static int FindMatch(List<ISymbolicExpressionTreeNode> seq, List<ISymbolicExpressionTreeNode> pat, SimilarityLevel similarityLevel) {
177      int patlen = pat.Count;
178      int seqlen = seq.Count;
179      if (patlen == 0 || seqlen == 0) return -1;
180      var comparer = new SymbolicExpressionTreeNodeSimilarityComparer(similarityLevel);
181      if (patlen == seqlen) return seq.SequenceEqual(pat, comparer) ? 0 : -1;
182      for (int i = patlen; i <= seqlen; ++i) {
183        if (comparer.Equals(seq[i - 1], pat.Last())) // first check if last element is a match
184          if (seq.Skip(i - patlen).Take(patlen).SequenceEqual(pat, comparer)) // then check the whole sequence
185            return i - patlen;
186      }
187      return -1;
188    }
189
190    public static List<ISymbolicExpressionTreeNode> FindMatches(ISymbolicExpressionTree tree, ISymbolicExpressionTreeNode fragment, SimilarityLevel similarityLevel) {
191      var comp = new SymbolicExpressionTreeNodeSimilarityComparer(similarityLevel);
192      return FindMatches(tree.Root, fragment, comp);
193    }
194
195    public static List<ISymbolicExpressionTreeNode>
196    FindMatches(ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeNode fragment, SymbolicExpressionTreeNodeSimilarityComparer comp) {
197      var matches = new List<ISymbolicExpressionTreeNode>();
198      root.ForEachNodePostfix(node => {
199        if (Match(node, fragment, comp) == fragment.GetLength()) matches.Add(node);
200      });
201      return matches;
202    }
203
204    ///<summary>
205    /// Finds the longest common subsequence in quadratic time and linear space
206    /// Variant of:
207    /// D . S. Hirschberg. A linear space algorithm for or computing maximal common subsequences. 1975.
208    /// http://dl.acm.org/citation.cfm?id=360861
209    /// </summary>
210    /// <returns>Number of pairs that were matched</returns>
211    public static int
212    Match(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SymbolicExpressionTreeNodeSimilarityComparer comp) {
213      if (!comp.Equals(a, b))
214        return 0;
215
216      int m = a.SubtreeCount;
217      int n = b.SubtreeCount;
218      if (m == 0 || n == 0) return 1;
219
220      var matrix = new int[m + 1, n + 1];
221      for (int i = 1; i <= m; ++i)
222        for (int j = 1; j <= n; ++j) {
223          var ai = a.GetSubtree(i - 1);
224          var bj = b.GetSubtree(j - 1);
225          int match = Match(ai, bj, comp);
226          matrix[i, j] = Math.Max(Math.Max(matrix[i, j - 1], matrix[i - 1, j]), matrix[i - 1, j - 1] + match);
227        }
228      return matrix[m, n] + 1;
229    }
230
231    public static int
232    LCS(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SymbolicExpressionTreeNodeSimilarityComparer comp) {
233      var nodesA = a.IterateNodesPrefix().ToList();
234      var nodesB = b.IterateNodesPrefix().ToList();
235      int m = nodesA.Count;
236      int n = nodesB.Count;
237      var matrix = new int[m + 1, n + 1];
238      for (int i = 1; i <= m; ++i) {
239        for (int j = 1; j <= n; ++j) {
240          int w = comp.Equals(a, b) ? 1 : 0;
241          matrix[i, j] = Math.Max(Math.Max(matrix[i, j - 1], matrix[i - 1, j]), matrix[i - 1, j - 1] + w);
242        }
243      }
244      return matrix[m, n];
245    }
246
247    public static void RenderTree(TextWriter writer, ISymbolicExpressionTree tree) {
248      RenderNode(writer, tree.Root, string.Empty);
249    }
250
251    public static void RenderNode(TextWriter writer, ISymbolicExpressionTreeNode node, string prefix) {
252      string label;
253      if (node is VariableTreeNode) {
254        var variable = node as VariableTreeNode;
255        label = string.Concat(string.Format("{0:0.000}", variable.Weight), '·', variable.VariableName);
256      } else if (node is ConstantTreeNode) {
257        var constant = node as ConstantTreeNode;
258        label = string.Format("{0:0.000}", constant.Value);
259      } else {
260        label = node.Symbol.ToString();
261      }
262
263      writer.Write(label);
264      if (node.SubtreeCount > 0) {
265        char connector, extender = ' ';
266        var padding = prefix + new string(' ', label.Length);
267        for (int i = 0; i != node.SubtreeCount; ++i) {
268          if (i == 0) {
269            if (node.SubtreeCount > 1) {
270              connector = RenderChars.JunctionDown;
271              extender = RenderChars.VerticalLine;
272            } else {
273              connector = RenderChars.HorizontalLine;
274              extender = ' ';
275            }
276          } else {
277            writer.Write(padding);
278            if (i == node.SubtreeCount - 1) {
279              connector = RenderChars.CornerRight;
280              extender = ' ';
281            } else {
282              connector = RenderChars.JunctionRight;
283              extender = RenderChars.VerticalLine;
284            }
285          }
286          writer.Write(string.Concat(connector, RenderChars.HorizontalLine));
287          var newPrefix = string.Concat(padding, extender, ' ');
288          RenderNode(writer, node.GetSubtree(i), newPrefix);
289        }
290      } else
291        writer.WriteLine();
292    }
293  }
294
295  // helper class providing characters for displaying a tree in the console
296  public static class RenderChars {
297    public const char JunctionDown = '┬';
298    public const char HorizontalLine = '─';
299    public const char VerticalLine = '│';
300    public const char JunctionRight = '├';
301    public const char CornerRight = '└';
302  }
303}
Note: See TracBrowser for help on using the repository browser.