Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/TreeMatching/QueryMatch.cs @ 14326

Last change on this file since 14326 was 14326, checked in by bburlacu, 7 years ago

#1772: Refactor QueryMatch class to make the code more readable and easier to understand.

File size: 9.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
26
27namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
28  // this class implements the decision version of the tree pattern query matching algorithm
29  // by M. Götz, C. Koch and W. Martens in http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.182.5440
30  public class QueryMatch {
31    public ISymbolicExpressionTreeNodeEqualityComparer Comparer { get; private set; }
32    private readonly NodeInfo Inf = new NodeInfo { Index = int.MaxValue };
33    private readonly NodeInfo Nil = new NodeInfo { Index = -1 };
34
35    private QueryMatch() { }
36
37    // whether matching nodes should also have matching parents
38    // in theory, this restricts the matching so that parent-child
39    // pairs in the query tree are matched by parent-child pairs in
40    // the data tree (and not ancestor-descendant pairs)
41    public bool MatchParents { get; set; }
42
43    public QueryMatch(ISymbolicExpressionTreeNodeEqualityComparer comparer) {
44      Comparer = comparer;
45    }
46
47    internal bool Match(List<NodeInfo> data, List<NodeInfo> query) {
48      var dRoot = data.Last();
49      var qRoot = query.Last();
50
51      var result = Tmatch(dRoot, query.First(), qRoot);
52      return result == qRoot;
53    }
54
55    public bool Match(ISymbolicExpressionTree data, ISymbolicExpressionTree query) {
56      return Match(data.Root.GetSubtree(0).GetSubtree(0), query.Root.GetSubtree(0).GetSubtree(0));
57    }
58
59    public bool Match(ISymbolicExpressionTreeNode data, ISymbolicExpressionTreeNode query) {
60      if (!Comparer.Equals(data, query))
61        return false;
62
63      var dNodes = InitializePostOrder(data);
64      var qNodes = InitializePostOrder(query);
65
66      var dRoot = dNodes.Last();
67      var qRoot = qNodes.Last();
68      var result = Tmatch(dRoot, qNodes.First(), qRoot);
69      return result == qRoot;
70    }
71
72    public IEnumerable<ISymbolicExpressionTree> GetMatchingTrees(IEnumerable<ISymbolicExpressionTree> data, ISymbolicExpressionTree query) {
73      var qRoot = query.Root.GetSubtree(0).GetSubtree(0);
74      var filtered = data.Where(x => x.Length >= query.Length && Comparer.Equals(x.Root.GetSubtree(0).GetSubtree(0), qRoot));
75      var qNodes = InitializePostOrder(query.Root.GetSubtree(0).GetSubtree(0));
76      return from d in filtered let dNodes = InitializePostOrder(d.Root.GetSubtree(0).GetSubtree(0)) where Match(dNodes, qNodes) select d;
77    }
78
79    private bool AreMatching(NodeInfo a, NodeInfo b) {
80      // force the nodes to be on the same level
81      if (a.Level != b.Level)
82        return false;
83
84      bool equals = Comparer.Equals(a.Node, b.Node);
85      if (equals && MatchParents) {
86        var pa = a.Parent;
87        var pb = b.Parent;
88        if (pa == null && pb == null)
89          return true;
90        if (pa == null || pb == null)
91          return false;
92        return pa.Level == pb.Level && Comparer.Equals(pa.Node, pb.Node);
93      }
94      return equals;
95    }
96
97    private NodeInfo Tmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil) {
98      var qBest = d.IsLeaf ? qFrom.Previous : Hmatch(d.LastChild, qFrom, qUntil);
99      var next = qBest.Next;
100      if (next <= qUntil && AreMatching(d, next)) {
101        qBest = next;
102        next = qBest.Next;
103        var lastSibling = qBest.LastSibling;
104        return next <= lastSibling ? Tmatch(d, next, lastSibling) : qBest;
105      }
106      return qBest;
107    }
108
109    private NodeInfo Hmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil) {
110      if (d.IsFirstSibling)
111        return Tmatch(d, qFrom, qUntil);
112
113      var qHedge = Hmatch(d.PreviousSibling, qFrom, qUntil);
114      var qTree = Tmatch(d, qFrom, qUntil);
115
116      for (;;) {
117        if (qHedge == qTree)
118          return qHedge;
119        if (qTree < qHedge) {
120          var rtop = Rtop(qTree.Next, qHedge);
121          while (rtop < Inf && qHedge < rtop.LastSibling) {
122            qTree = Tmatch(d, rtop.Next, rtop.LastSibling);
123            rtop = Rtop(qTree.Next, qHedge);
124          }
125          if (qTree <= qHedge)
126            return qHedge;
127        } else {
128          var rtop = Rtop(qHedge.Next, qTree);
129          while (rtop < Inf && qTree < rtop.LastSibling) {
130            qHedge = Hmatch(d.PreviousSibling, rtop.Next, rtop.LastSibling);
131            rtop = Rtop(qHedge.Next, qTree);
132          }
133          if (qHedge <= qTree)
134            return qTree;
135        }
136      }
137    }
138
139    /// <summary>
140    /// Description from Götz et al. - Efficient Algorithms for Descendant-only Tree Pattern Queries, 2009:
141    /// Given two nodes hFrom and hUntil, returns the rightmost node among the topmost nodes in the interval [hFrom, hUntil]
142    /// More formally, Rtop(hFrom,hUntil) is the node u such that depth(u) is minimal and u is larger than every other node v
143    /// in [hFrom, hUntil] with depth(u)==depth(v).
144    /// </summary>
145    /// <param name="hFrom">The interval start</param>
146    /// <param name="hUntil">The interval end</param>
147    /// <returns>The rightmost node from the topmost nodes in the interval [hFrom, hUntil]</returns>
148    private NodeInfo Rtop(NodeInfo hFrom, NodeInfo hUntil) {
149      if (hFrom == hUntil)
150        return hUntil;
151      if (hFrom.Index > hUntil.Index)
152        return Inf;
153      // let u be the highest ancestor of hUntil that has a previous sibling s such that s >= hFrom
154      // if no such u exists, then Rtop(hFrom, hUntil) = hUntil. Otherwise, rtop(hFrom, hUntil) = s
155      var rtop = hUntil.Ancestors.OrderBy(x => x.Level).Select(x => x.PreviousSibling).FirstOrDefault(x => x != null && x >= hFrom);
156      return rtop ?? hUntil;
157    }
158
159    internal static List<NodeInfo> InitializePostOrder(ISymbolicExpressionTreeNode node) {
160      // levels will be computed below
161      var nodes = node.IterateNodesPostfix().Select((x, i) => new NodeInfo { Node = x, Index = i, Level = 0 }).ToList();
162      var map = nodes.ToDictionary(x => x.Node, x => x);
163      nodes.First().Previous = new NodeInfo { Node = null, Index = -1, Next = nodes.First() };
164      nodes.Last().Next = new NodeInfo { Node = null, Index = int.MaxValue, Previous = nodes.Last() };
165      for (int i = nodes.Count - 1; i >= 0; --i) {
166        var n = nodes[i];
167        if (n != nodes.Last()) n.Next = nodes[n.Index + 1];
168        if (n != nodes.First()) n.Previous = nodes[n.Index - 1];
169        NodeInfo parentInfo;
170        map.TryGetValue(n.Node.Parent, out parentInfo);
171        n.Parent = parentInfo;
172        if (n.Parent == null) {
173          n.PreviousSibling = n.NextSibling = n.LastSibling = null;
174        } else {
175          var parent = n.Parent.Node;
176          int si = parent.IndexOfSubtree(n.Node);
177          n.PreviousSibling = n.IsFirstSibling ? null : map[parent.GetSubtree(si - 1)];
178          n.NextSibling = n.IsLastSibling ? null : map[parent.GetSubtree(si + 1)];
179          n.LastSibling = map[parent.Subtrees.Last()];
180          n.Level = n.Parent.Level + 1;
181        }
182        n.LastChild = n.IsLeaf ? null : n.Previous;
183      }
184      return nodes;
185    }
186  }
187
188  /// <summary>
189  /// NodeInfo objects are useful for keeping sibling, parent, index and depth information for tree nodes
190  /// </summary>
191  internal class NodeInfo : IComparable<NodeInfo> {
192    public ISymbolicExpressionTreeNode Node { get; set; }
193    public int Index { get; set; }
194    public int Level { get; set; }
195    public NodeInfo Parent { get; set; }
196    public NodeInfo Previous { get; set; }
197    public NodeInfo PreviousSibling { get; set; }
198    public NodeInfo Next { get; set; }
199    public NodeInfo NextSibling { get; set; }
200    public NodeInfo LastSibling { get; set; }
201    public NodeInfo LastChild { get; set; }
202
203    public bool IsLeaf {
204      get { return Node.SubtreeCount == 0; }
205    }
206
207    public bool IsFirstSibling {
208      get { return Parent != null && Node == Node.Parent.Subtrees.First(); }
209    }
210
211    public bool IsLastSibling {
212      get { return Parent != null && Node == Node.Parent.Subtrees.Last(); }
213    }
214
215    public IEnumerable<NodeInfo> Ancestors {
216      get {
217        var p = Parent;
218        while (p != null) {
219          yield return p;
220          p = p.Parent;
221        }
222      }
223    }
224
225    public int CompareTo(NodeInfo other) {
226      return other == null ? 1 : Index.CompareTo(other.Index);
227    }
228
229    public static bool operator <(NodeInfo lsh, NodeInfo rhs) {
230      return lsh.CompareTo(rhs) == -1;
231    }
232
233    public static bool operator >(NodeInfo lhs, NodeInfo rhs) {
234      return lhs.CompareTo(rhs) == 1;
235    }
236
237    public static bool operator <=(NodeInfo lhs, NodeInfo rhs) {
238      return lhs.CompareTo(rhs) <= 0;
239    }
240
241    public static bool operator >=(NodeInfo lhs, NodeInfo rhs) {
242      return lhs.CompareTo(rhs) >= 0;
243    }
244  }
245}
246
Note: See TracBrowser for help on using the repository browser.