Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/TreeMatching/QueryMatch.cs @ 12929

Last change on this file since 12929 was 12929, checked in by bburlacu, 9 years ago

#1772: Initial implementation of tree pattern matching algorithm by Götz et al. (http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.182.5440). Added wildcard symbols and nodes and updated the matching code accordingly.

File size: 10.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Text;
26using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
27
28namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
29  // this class implements the decision version of the tree pattern query matching algorithm
30  // by M. Götz, C. Koch and W. Martens in http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.182.5440
31  public class QueryMatch {
32    private List<NodeInfo> dNodes;
33    private List<NodeInfo> qNodes;
34    private readonly ISymbolicExpressionTreeNodeEqualityComparer comparer;
35
36    private QueryMatch() { }
37
38    private readonly Stack<string> stack = new Stack<string>();
39
40    public string Trace {
41      get {
42        var sb = new StringBuilder();
43        while (stack.Count > 0)
44          sb.Append(stack.Pop());
45        return sb.ToString();
46      }
47    }
48
49    public QueryMatch(ISymbolicExpressionTreeNodeEqualityComparer comparer) {
50      this.comparer = comparer;
51    }
52
53    public bool Match(ISymbolicExpressionTreeNode data, ISymbolicExpressionTreeNode query) {
54      stack.Clear();
55      dNodes = InitializePostOrder(data);
56      qNodes = InitializePostOrder(query);
57
58      var dRoot = dNodes.Last();
59      var qRoot = qNodes.Last();
60      var result = Tmatch(dRoot, qNodes.First(), qNodes.Last());
61      return result == qRoot;
62    }
63
64    private List<NodeInfo> InitializePostOrder(ISymbolicExpressionTreeNode node) {
65      var list = node.IterateNodesPostfix().Select((x, i) => new NodeInfo(x, i)).ToList();
66      foreach (var l in list)
67        l.Nodes = list;
68
69      return list;
70    }
71
72    private NodeInfo Tmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil, int tab = 0) {
73#if DEBUG
74      var sb = new StringBuilder();
75#endif
76      NodeInfo qBest = d.IsLeaf ? qFrom.Previous() : Hmatch(d.LastChild(), qFrom, qUntil, tab + 1);
77
78      var next = qBest.Next();
79      if (next.Index <= qUntil.Index && comparer.Equals(d.Node, next.Node)) {
80        qBest = next;
81        next = qBest.Next();
82        var lastSibling = qBest.LastSibling();
83        var result = next.Index <= lastSibling.Index ? Tmatch(d, next, lastSibling, tab + 1) : qBest;
84#if DEBUG
85        for (int i = 0; i < tab; ++i)
86          sb.Append("\t");
87        sb.AppendLine(string.Format("TM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, result.Index));
88        stack.Push(sb.ToString());
89#endif
90        return result;
91      }
92#if DEBUG
93      for (int i = 0; i < tab; ++i)
94        sb.Append("\t");
95      sb.AppendLine(string.Format("TM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, qBest.Index));
96      stack.Push(sb.ToString());
97#endif
98      return qBest;
99    }
100
101    private NodeInfo Hmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil, int tab = 0) {
102#if DEBUG
103      var sb = new StringBuilder();
104#endif
105      if (d.IsFirstSibling) {
106        var result = Tmatch(d, qFrom, qUntil, tab + 1);
107#if DEBUG
108        for (int i = 0; i < tab; ++i)
109          sb.Append("\t");
110        sb.AppendLine(string.Format("HM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, result.Index));
111        stack.Push(sb.ToString());
112#endif
113        return result;
114      }
115
116      var qHedge = Hmatch(d.PreviousSibling(), qFrom, qUntil, tab + 1);
117      var qTree = Tmatch(d, qFrom, qUntil, tab + 1);
118
119      for (;;) {
120        if (qHedge == qTree) {
121#if DEBUG
122          for (int i = 0; i < tab; ++i)
123            sb.Append("\t");
124          sb.AppendLine(string.Format("HM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, qHedge.Index));
125          stack.Push(sb.ToString());
126#endif
127          return qHedge;
128        }
129        if (qTree.Index < qHedge.Index) {
130          var rtop = Rtop(qTree.Next(), qHedge, tab);
131          while (rtop.Index < qHedge.Next().Index && qHedge.Index < rtop.LastSibling().Index) {
132            qTree = Tmatch(d, rtop.Next(), rtop.LastSibling(), tab + 1);
133            rtop = Rtop(qTree.Next(), qHedge, tab);
134          }
135          if (qTree.Index <= qHedge.Index) {
136#if DEBUG
137            for (int i = 0; i < tab; ++i)
138              sb.Append("\t");
139            sb.AppendLine(string.Format("HM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, qHedge.Index));
140            stack.Push(sb.ToString());
141#endif
142            return qHedge;
143          }
144        } else {
145          var rtop = Rtop(qHedge.Next(), qTree, tab);
146          while (rtop.Index < qTree.Next().Index && qTree.Index < rtop.LastSibling().Index) {
147            qHedge = Hmatch(d.PreviousSibling(), rtop.Next(), rtop.LastSibling(), tab + 1);
148            rtop = Rtop(qHedge.Next(), qTree, tab);
149          }
150          if (qHedge.Index <= qTree.Index) {
151#if DEBUG
152            for (int i = 0; i < tab; ++i)
153              sb.Append("\t");
154            sb.AppendLine(string.Format("HM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, qTree.Index));
155            stack.Push(sb.ToString());
156#endif
157            return qTree;
158          }
159        }
160      }
161    }
162
163    // returns the rightmost node from the topmost nodes in the interval [hFrom, hUntil]
164    private NodeInfo Rtop(NodeInfo hFrom, NodeInfo hUntil, int tab = 0) {
165#if DEBUG
166      var sb = new StringBuilder();
167      for (int i = 0; i < tab; ++i)
168        sb.Append("\t");
169      sb.Append(string.Format("Rtop(q{0}, q{1})", hFrom.Index, hUntil.Index));
170#endif
171
172      if (hFrom == hUntil) {
173#if DEBUG
174        sb.AppendLine(string.Format(" => q{0}", hUntil.Index));
175        stack.Push(sb.ToString());
176#endif
177        return hUntil;
178      }
179
180      if (hFrom.Nodes != hUntil.Nodes)
181        throw new ArgumentException("hFrom and hUntil must belong to the same tree/hedge");
182
183
184      NodeInfo rtop = null;
185      if (hFrom.Index > hUntil.Index) {
186        rtop = hFrom.Next(); // or return inf (inf is defined in the paper as the successor of the largest node in the hedge)
187#if DEBUG
188        sb.AppendLine(string.Format(" => q{0}", rtop.Index));
189        stack.Push(sb.ToString());
190#endif
191        return rtop;
192      }
193
194      // let u be the highest ancestor of hUntil that has a previous sibling s such that s >= hFrom
195      // if no such u exists, then Rtop(hFrom, hUntil) = hUntil. Otherwise, rtop(hFrom, hUntil) = s
196      var p = hUntil.Parent();
197      if (p == null)
198        return hUntil;
199      List<NodeInfo> ancestors = hUntil.Ancestors().ToList();
200      // ancestors list is ordered in decreasing depth therefore we start from the end
201      for (int i = ancestors.Count - 1; i >= 0; --i) {
202        var s = ancestors[i].PreviousSibling();
203        if (s.Index >= hFrom.Index) {
204          rtop = s;
205          break;
206        }
207      }
208      if (rtop == null)
209        rtop = hUntil;
210#if DEBUG
211      sb.AppendLine(string.Format(" => q{0}", rtop.Index));
212      stack.Push(sb.ToString());
213#endif
214      return rtop;
215    }
216
217    private class NodeInfo {
218      private NodeInfo() { }
219
220      public NodeInfo(ISymbolicExpressionTreeNode n, int i) {
221        Node = n;
222        Index = i;
223      }
224
225      public List<NodeInfo> Nodes { get; set; }
226      public ISymbolicExpressionTreeNode Node { get; }
227      public int Index { get; }
228
229      public bool IsLeaf {
230        get { return Node.SubtreeCount == 0; }
231      }
232
233      public bool IsFirstSibling {
234        get { return this.Node == this.Node.Parent.Subtrees.First(); }
235      }
236
237      public IEnumerable<NodeInfo> Ancestors() {
238        var p = Parent();
239        while (p != null) {
240          yield return p;
241          p = p.Parent();
242        }
243      }
244
245      public NodeInfo Parent() {
246        var p = Node.Parent;
247        if (p == null || p.Symbol is StartSymbol)
248          return null;
249        var index = Index + 1;
250        for (int i = p.SubtreeCount - 1; i >= 0; --i) {
251          var subtree = p.GetSubtree(i);
252          if (subtree == Node)
253            break;
254          index += subtree.GetLength();
255        }
256        return Nodes[index];
257      }
258
259      public NodeInfo Next() {
260        if (this == Nodes.Last())
261          return new NodeInfo(null, int.MaxValue) { Nodes = Nodes };
262        return Nodes[Index + 1];
263      }
264
265      public NodeInfo NextSibling() {
266        if (Node.Parent == null || Node == Node.Parent.Subtrees.Last())
267          return new NodeInfo(null, -1);
268
269        var s = Node.Parent.GetSubtree(Node.Parent.IndexOfSubtree(Node) + 1);
270        var nextSibling = Nodes[Index + s.GetLength()];
271        return nextSibling;
272      }
273
274      public NodeInfo LastSibling() {
275        if (Node.Parent == null)
276          return new NodeInfo(null, -1);
277
278        var last = Node.Parent.Subtrees.Last();
279        if (this.Node == last)
280          return this;
281
282        var n = this;
283        while (n.Node != last)
284          n = n.NextSibling();
285        return n;
286      }
287
288      public NodeInfo Previous() {
289        if (this == Nodes.First())
290          return new NodeInfo(null, -1) { Nodes = Nodes }; // return nil
291
292        return Nodes[Index - 1];
293      }
294
295      public NodeInfo PreviousSibling() {
296        if (Node.Parent == null || Node == Node.Parent.Subtrees.First())
297          return new NodeInfo(null, -1) { Nodes = Nodes };
298        var previousSibling = Nodes[Index - Node.GetLength()];
299        return previousSibling;
300      }
301
302      public NodeInfo LastChild() {
303        var nil = new NodeInfo(null, -1);
304        if (IsLeaf)
305          return nil;
306        return Previous();
307      }
308    }
309  }
310}
Note: See TracBrowser for help on using the repository browser.