Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/TreeMatching/QueryMatch.cs @ 12933

Last change on this file since 12933 was 12933, checked in by bburlacu, 9 years ago

#1772: Added a parameter to the tree query matching algorithm to specify whether only strict parent-child pairs in the data tree should be matched with the query (otherwise ancestor-descendant pairs are also considered valid).

File size: 10.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Text;
26using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
27
28namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
29  // this class implements the decision version of the tree pattern query matching algorithm
30  // by M. Götz, C. Koch and W. Martens in http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.182.5440
31  public class QueryMatch {
32    private List<NodeInfo> dNodes;
33    private List<NodeInfo> qNodes;
34    private readonly ISymbolicExpressionTreeNodeEqualityComparer comparer;
35
36    private QueryMatch() { }
37
38    private readonly Stack<string> stack = new Stack<string>();
39
40    public string Trace {
41      get {
42        var sb = new StringBuilder();
43        while (stack.Count > 0)
44          sb.Append(stack.Pop());
45        return sb.ToString();
46      }
47    }
48
49    // whether matching nodes should also have matching parents
50    // in theory, this restricts the matching so that parent-child
51    // pairs in the query tree are matched by parent-child pairs in
52    // the data tree (and not ancestor-descendant pairs)
53    public bool MatchParents { get; set; }
54
55    public QueryMatch(ISymbolicExpressionTreeNodeEqualityComparer comparer) {
56      this.comparer = comparer;
57    }
58
59    public bool Match(ISymbolicExpressionTreeNode data, ISymbolicExpressionTreeNode query) {
60      stack.Clear();
61      dNodes = InitializePostOrder(data);
62      qNodes = InitializePostOrder(query);
63
64      var dRoot = dNodes.Last();
65      var qRoot = qNodes.Last();
66      var result = Tmatch(dRoot, qNodes.First(), qNodes.Last());
67      return result == qRoot;
68    }
69
70    private List<NodeInfo> InitializePostOrder(ISymbolicExpressionTreeNode node) {
71      var list = node.IterateNodesPostfix().Select((x, i) => new NodeInfo(x, i)).ToList();
72      foreach (var l in list)
73        l.Nodes = list;
74
75      return list;
76    }
77
78    private bool AreMatching(NodeInfo a, NodeInfo b) {
79      bool equals = comparer.Equals(a.Node, b.Node);
80
81      if (equals && MatchParents) {
82        var pa = a.Parent();
83        var pb = b.Parent();
84        if (pa != null && pb != null)
85          return comparer.Equals(pa.Node, pb.Node);
86        if (!(pa == null && pb == null))
87          return false;
88      }
89      return equals;
90    }
91
92    private NodeInfo Tmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil, int tab = 0) {
93#if DEBUG
94      var sb = new StringBuilder();
95#endif
96      NodeInfo qBest = d.IsLeaf ? qFrom.Previous() : Hmatch(d.LastChild(), qFrom, qUntil, tab + 1);
97
98      var next = qBest.Next();
99      if (next.Index <= qUntil.Index && AreMatching(d, next)) {
100        qBest = next;
101        next = qBest.Next();
102        var lastSibling = qBest.LastSibling();
103        var result = next.Index <= lastSibling.Index ? Tmatch(d, next, lastSibling, tab + 1) : qBest;
104#if DEBUG
105        for (int i = 0; i < tab; ++i)
106          sb.Append("\t");
107        sb.AppendLine(string.Format("TM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, result.Index));
108        stack.Push(sb.ToString());
109#endif
110        return result;
111      }
112#if DEBUG
113      for (int i = 0; i < tab; ++i)
114        sb.Append("\t");
115      sb.AppendLine(string.Format("TM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, qBest.Index));
116      stack.Push(sb.ToString());
117#endif
118      return qBest;
119    }
120
121    private NodeInfo Hmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil, int tab = 0) {
122#if DEBUG
123      var sb = new StringBuilder();
124#endif
125      if (d.IsFirstSibling) {
126        var result = Tmatch(d, qFrom, qUntil, tab + 1);
127#if DEBUG
128        for (int i = 0; i < tab; ++i)
129          sb.Append("\t");
130        sb.AppendLine(string.Format("HM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, result.Index));
131        stack.Push(sb.ToString());
132#endif
133        return result;
134      }
135
136      var qHedge = Hmatch(d.PreviousSibling(), qFrom, qUntil, tab + 1);
137      var qTree = Tmatch(d, qFrom, qUntil, tab + 1);
138
139      for (;;) {
140        if (qHedge == qTree) {
141#if DEBUG
142          for (int i = 0; i < tab; ++i)
143            sb.Append("\t");
144          sb.AppendLine(string.Format("HM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, qHedge.Index));
145          stack.Push(sb.ToString());
146#endif
147          return qHedge;
148        }
149        if (qTree.Index < qHedge.Index) {
150          var rtop = Rtop(qTree.Next(), qHedge, tab);
151          while (rtop.Index < qHedge.Next().Index && qHedge.Index < rtop.LastSibling().Index) {
152            qTree = Tmatch(d, rtop.Next(), rtop.LastSibling(), tab + 1);
153            rtop = Rtop(qTree.Next(), qHedge, tab);
154          }
155          if (qTree.Index <= qHedge.Index) {
156#if DEBUG
157            for (int i = 0; i < tab; ++i)
158              sb.Append("\t");
159            sb.AppendLine(string.Format("HM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, qHedge.Index));
160            stack.Push(sb.ToString());
161#endif
162            return qHedge;
163          }
164        } else {
165          var rtop = Rtop(qHedge.Next(), qTree, tab);
166          while (rtop.Index < qTree.Next().Index && qTree.Index < rtop.LastSibling().Index) {
167            qHedge = Hmatch(d.PreviousSibling(), rtop.Next(), rtop.LastSibling(), tab + 1);
168            rtop = Rtop(qHedge.Next(), qTree, tab);
169          }
170          if (qHedge.Index <= qTree.Index) {
171#if DEBUG
172            for (int i = 0; i < tab; ++i)
173              sb.Append("\t");
174            sb.AppendLine(string.Format("HM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, qTree.Index));
175            stack.Push(sb.ToString());
176#endif
177            return qTree;
178          }
179        }
180      }
181    }
182
183    // returns the rightmost node from the topmost nodes in the interval [hFrom, hUntil]
184    private NodeInfo Rtop(NodeInfo hFrom, NodeInfo hUntil, int tab = 0) {
185#if DEBUG
186      var sb = new StringBuilder();
187      for (int i = 0; i < tab; ++i)
188        sb.Append("\t");
189      sb.Append(string.Format("Rtop(q{0}, q{1})", hFrom.Index, hUntil.Index));
190#endif
191
192      if (hFrom == hUntil) {
193#if DEBUG
194        sb.AppendLine(string.Format(" => q{0}", hUntil.Index));
195        stack.Push(sb.ToString());
196#endif
197        return hUntil;
198      }
199
200      if (hFrom.Nodes != hUntil.Nodes)
201        throw new ArgumentException("hFrom and hUntil must belong to the same tree/hedge");
202
203
204      NodeInfo rtop = null;
205      if (hFrom.Index > hUntil.Index) {
206        rtop = hFrom.Next(); // or return inf (inf is defined in the paper as the successor of the largest node in the hedge)
207#if DEBUG
208        sb.AppendLine(string.Format(" => q{0}", rtop.Index));
209        stack.Push(sb.ToString());
210#endif
211        return rtop;
212      }
213
214      // let u be the highest ancestor of hUntil that has a previous sibling s such that s >= hFrom
215      // if no such u exists, then Rtop(hFrom, hUntil) = hUntil. Otherwise, rtop(hFrom, hUntil) = s
216      var p = hUntil.Parent();
217      if (p == null)
218        return hUntil;
219      List<NodeInfo> ancestors = hUntil.Ancestors().ToList();
220      // ancestors list is ordered in decreasing depth therefore we start from the end
221      for (int i = ancestors.Count - 1; i >= 0; --i) {
222        var s = ancestors[i].PreviousSibling();
223        if (s.Index >= hFrom.Index) {
224          rtop = s;
225          break;
226        }
227      }
228      if (rtop == null)
229        rtop = hUntil;
230#if DEBUG
231      sb.AppendLine(string.Format(" => q{0}", rtop.Index));
232      stack.Push(sb.ToString());
233#endif
234      return rtop;
235    }
236
237    private class NodeInfo {
238      private NodeInfo() { }
239
240      public NodeInfo(ISymbolicExpressionTreeNode n, int i) {
241        Node = n;
242        Index = i;
243      }
244
245      public List<NodeInfo> Nodes { get; set; }
246      public ISymbolicExpressionTreeNode Node { get; }
247      public int Index { get; }
248
249      public bool IsLeaf {
250        get { return Node.SubtreeCount == 0; }
251      }
252
253      public bool IsFirstSibling {
254        get { return this.Node == this.Node.Parent.Subtrees.First(); }
255      }
256
257      public IEnumerable<NodeInfo> Ancestors() {
258        var p = Parent();
259        while (p != null) {
260          yield return p;
261          p = p.Parent();
262        }
263      }
264
265      public NodeInfo Parent() {
266        var p = Node.Parent;
267        if (p == null || p.Symbol is StartSymbol)
268          return null;
269        var index = Index + 1;
270        for (int i = p.SubtreeCount - 1; i >= 0; --i) {
271          var subtree = p.GetSubtree(i);
272          if (subtree == Node)
273            break;
274          index += subtree.GetLength();
275        }
276        return Nodes[index];
277      }
278
279      public NodeInfo Next() {
280        if (this == Nodes.Last())
281          return new NodeInfo(null, int.MaxValue) { Nodes = Nodes };
282        return Nodes[Index + 1];
283      }
284
285      public NodeInfo NextSibling() {
286        if (Node.Parent == null || Node == Node.Parent.Subtrees.Last())
287          return new NodeInfo(null, -1);
288
289        var s = Node.Parent.GetSubtree(Node.Parent.IndexOfSubtree(Node) + 1);
290        var nextSibling = Nodes[Index + s.GetLength()];
291        return nextSibling;
292      }
293
294      public NodeInfo LastSibling() {
295        if (Node.Parent == null)
296          return new NodeInfo(null, -1);
297
298        var last = Node.Parent.Subtrees.Last();
299        if (this.Node == last)
300          return this;
301
302        var n = this;
303        while (n.Node != last)
304          n = n.NextSibling();
305        return n;
306      }
307
308      public NodeInfo Previous() {
309        if (this == Nodes.First())
310          return new NodeInfo(null, -1) { Nodes = Nodes }; // return nil
311
312        return Nodes[Index - 1];
313      }
314
315      public NodeInfo PreviousSibling() {
316        if (Node.Parent == null || Node == Node.Parent.Subtrees.First())
317          return new NodeInfo(null, -1) { Nodes = Nodes };
318        var previousSibling = Nodes[Index - Node.GetLength()];
319        return previousSibling;
320      }
321
322      public NodeInfo LastChild() {
323        var nil = new NodeInfo(null, -1);
324        if (IsLeaf)
325          return nil;
326        return Previous();
327      }
328    }
329  }
330}
Note: See TracBrowser for help on using the repository browser.