Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/TreeMatching/QueryMatch.cs @ 14426

Last change on this file since 14426 was 14426, checked in by bburlacu, 7 years ago

#1772: QueryMatch: implement more elaborate node comparison (better accuracy when matching schemas).

File size: 10.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
26
27namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
28  // this class implements the decision version of the tree pattern query matching algorithm
29  // by M. Götz, C. Koch and W. Martens in http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.182.5440
30  public class QueryMatch {
31    public ISymbolicExpressionTreeNodeEqualityComparer EqualityComparer { get; private set; }
32    private readonly NodeInfo Inf = new NodeInfo { Index = int.MaxValue };
33    private readonly NodeInfo Nil = new NodeInfo { Index = -1 };
34    private readonly HashSet<string> commutativeSymbols = new HashSet<string> { "Addition", "Multiplication", "Average", "And", "Or", "Xor" };
35    private readonly ISymbolicExpressionTreeNodeComparer nodeComparer = new SymbolicExpressionTreeNodeComparer();
36
37    private QueryMatch() { }
38
39    // whether matching nodes should also have matching parents
40    // in theory, this restricts the matching so that parent-child
41    // pairs in the query tree are matched by parent-child pairs in
42    // the data tree (and not ancestor-descendant pairs)
43    public bool MatchParents { get; set; }
44
45    public QueryMatch(ISymbolicExpressionTreeNodeEqualityComparer equalityComparer) {
46      EqualityComparer = equalityComparer;
47    }
48
49    internal bool Match(List<NodeInfo> data, List<NodeInfo> query) {
50      var dRoot = data.Last();
51      var qRoot = query.Last();
52
53      var result = Tmatch(dRoot, query.First(), qRoot);
54      return result == qRoot;
55    }
56
57    public bool Match(ISymbolicExpressionTree data, ISymbolicExpressionTree query) {
58      return Match(data.Root.GetSubtree(0).GetSubtree(0), query.Root.GetSubtree(0).GetSubtree(0));
59    }
60
61    public bool Match(ISymbolicExpressionTreeNode data, ISymbolicExpressionTreeNode query) {
62      if (!EqualityComparer.Equals(data, query))
63        return false;
64
65      var dNodes = InitializePostOrder(data);
66      var qNodes = InitializePostOrder(query);
67
68      var dRoot = dNodes.Last();
69      var qRoot = qNodes.Last();
70      var result = Tmatch(dRoot, qNodes.First(), qRoot);
71      return result == qRoot;
72    }
73
74    public IEnumerable<ISymbolicExpressionTree> GetMatchingTrees(IEnumerable<ISymbolicExpressionTree> data, ISymbolicExpressionTree query) {
75      var qRoot = query.Root.GetSubtree(0).GetSubtree(0);
76      var filtered = data.Where(x => x.Length >= query.Length && EqualityComparer.Equals(x.Root.GetSubtree(0).GetSubtree(0), qRoot));
77      var qNodes = InitializePostOrder(query.Root.GetSubtree(0).GetSubtree(0));
78      return from d in filtered let dNodes = InitializePostOrder(d.Root.GetSubtree(0).GetSubtree(0)) where Match(dNodes, qNodes) select d;
79    }
80
81    private bool AreMatching(NodeInfo d, NodeInfo q) {
82      // force the nodes to be on the same level
83      if (d.Level != q.Level)
84        return false;
85
86      // compare nodes
87      bool equals = EqualityComparer.Equals(d.Node, q.Node);
88
89      if (!equals)
90        return false;
91
92      // compare node parents
93      if (MatchParents && !EqualityComparer.Equals(d.Node.Parent, q.Node.Parent))
94        return false;
95
96      // compare children to make sure they are the same
97      if (d.Node.SubtreeCount > 0 && q.Node.SubtreeCount > 0) {
98        var dSubtrees = d.Node.Subtrees.ToList();
99
100        if (commutativeSymbols.Contains(d.Node.Symbol.Name)) {
101          var qSubtrees = q.Node.Subtrees.Where(x => !(x is AnyNode || x is AnySubtree)).ToList();
102
103          dSubtrees.Sort(nodeComparer);
104          qSubtrees.Sort(nodeComparer);
105
106          var nw = q.Node.SubtreeCount - qSubtrees.Count; // number of wildcards
107          if (nw == 0)
108            return dSubtrees.SequenceEqual(qSubtrees, EqualityComparer);
109
110          for (int i = 0, j = 0; i < dSubtrees.Count && j < qSubtrees.Count;) {
111            if (EqualityComparer.Equals(dSubtrees[i], qSubtrees[j])) {
112              ++i;
113              ++j;
114            } else {
115              if (nw == 0)
116                return false;
117              ++i;
118              --nw;
119            }
120          }
121        } else {
122          var qSubtrees = q.Node.Subtrees;
123          return dSubtrees.SequenceEqual(qSubtrees, EqualityComparer);
124        }
125      }
126      return true;
127    }
128
129    private NodeInfo Tmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil) {
130      var qBest = d.IsLeaf ? qFrom.Previous : Hmatch(d.LastChild, qFrom, qUntil);
131      var next = qBest.Next;
132      if (next <= qUntil && AreMatching(d, next)) {
133        qBest = next;
134        next = qBest.Next;
135        var lastSibling = qBest.LastSibling;
136        return next <= lastSibling ? Tmatch(d, next, lastSibling) : qBest;
137      }
138      return qBest;
139    }
140
141    private NodeInfo Hmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil) {
142      if (d.IsFirstSibling)
143        return Tmatch(d, qFrom, qUntil);
144
145      var qHedge = Hmatch(d.PreviousSibling, qFrom, qUntil);
146      var qTree = Tmatch(d, qFrom, qUntil);
147
148      for (;;) {
149        if (qHedge == qTree)
150          return qHedge;
151        if (qTree < qHedge) {
152          var rtop = Rtop(qTree.Next, qHedge);
153          while (rtop < Inf && qHedge < rtop.LastSibling) {
154            qTree = Tmatch(d, rtop.Next, rtop.LastSibling);
155            rtop = Rtop(qTree.Next, qHedge);
156          }
157          if (qTree <= qHedge)
158            return qHedge;
159        } else {
160          var rtop = Rtop(qHedge.Next, qTree);
161          while (rtop < Inf && qTree < rtop.LastSibling) {
162            qHedge = Hmatch(d.PreviousSibling, rtop.Next, rtop.LastSibling);
163            rtop = Rtop(qHedge.Next, qTree);
164          }
165          if (qHedge <= qTree)
166            return qTree;
167        }
168      }
169    }
170
171    /// <summary>
172    /// Description from Götz et al. - Efficient Algorithms for Descendant-only Tree Pattern Queries, 2009:
173    /// Given two nodes hFrom and hUntil, returns the rightmost node among the topmost nodes in the interval [hFrom, hUntil]
174    /// More formally, Rtop(hFrom,hUntil) is the node u such that depth(u) is minimal and u is larger than every other node v
175    /// in [hFrom, hUntil] with depth(u)==depth(v).
176    /// </summary>
177    /// <param name="hFrom">The interval start</param>
178    /// <param name="hUntil">The interval end</param>
179    /// <returns>The rightmost node from the topmost nodes in the interval [hFrom, hUntil]</returns>
180    private NodeInfo Rtop(NodeInfo hFrom, NodeInfo hUntil) {
181      if (hFrom == hUntil)
182        return hUntil;
183      if (hFrom.Index > hUntil.Index)
184        return Inf;
185      // let u be the highest ancestor of hUntil that has a previous sibling s such that s >= hFrom
186      // if no such u exists, then Rtop(hFrom, hUntil) = hUntil. Otherwise, rtop(hFrom, hUntil) = s
187      var rtop = hUntil.Ancestors.OrderBy(x => x.Level).Select(x => x.PreviousSibling).FirstOrDefault(x => x != null && x >= hFrom);
188      return rtop ?? hUntil;
189    }
190
191    internal static List<NodeInfo> InitializePostOrder(ISymbolicExpressionTreeNode node) {
192      // levels will be computed below
193      var nodes = node.IterateNodesPostfix().Select((x, i) => new NodeInfo { Node = x, Index = i, Level = 0 }).ToList();
194      var map = nodes.ToDictionary(x => x.Node, x => x);
195      nodes.First().Previous = new NodeInfo { Node = null, Index = -1, Next = nodes.First() };
196      nodes.Last().Next = new NodeInfo { Node = null, Index = int.MaxValue, Previous = nodes.Last() };
197      for (int i = nodes.Count - 1; i >= 0; --i) {
198        var n = nodes[i];
199        if (n != nodes.Last()) n.Next = nodes[n.Index + 1];
200        if (n != nodes.First()) n.Previous = nodes[n.Index - 1];
201        NodeInfo parentInfo = null;
202        if (n.Node.Parent != null)
203          map.TryGetValue(n.Node.Parent, out parentInfo);
204        n.Parent = parentInfo;
205        if (n.Parent == null) {
206          n.PreviousSibling = n.NextSibling = n.LastSibling = null;
207        } else {
208          var parent = n.Parent.Node;
209          int si = parent.IndexOfSubtree(n.Node);
210          n.PreviousSibling = n.IsFirstSibling ? null : map[parent.GetSubtree(si - 1)];
211          n.NextSibling = n.IsLastSibling ? null : map[parent.GetSubtree(si + 1)];
212          n.LastSibling = map[parent.Subtrees.Last()];
213          n.Level = n.Parent.Level + 1;
214        }
215        n.LastChild = n.IsLeaf ? null : n.Previous;
216      }
217      return nodes;
218    }
219  }
220
221  /// <summary>
222  /// NodeInfo objects are useful for keeping sibling, parent, index and depth information for tree nodes
223  /// </summary>
224  internal class NodeInfo : IComparable<NodeInfo> {
225    public ISymbolicExpressionTreeNode Node { get; set; }
226    public int Index { get; set; }
227    public int Level { get; set; }
228    public NodeInfo Parent { get; set; }
229    public NodeInfo Previous { get; set; }
230    public NodeInfo PreviousSibling { get; set; }
231    public NodeInfo Next { get; set; }
232    public NodeInfo NextSibling { get; set; }
233    public NodeInfo LastSibling { get; set; }
234    public NodeInfo LastChild { get; set; }
235
236    public bool IsLeaf {
237      get { return Node.SubtreeCount == 0; }
238    }
239
240    public bool IsFirstSibling {
241      get { return Parent != null && Node == Node.Parent.Subtrees.First(); }
242    }
243
244    public bool IsLastSibling {
245      get { return Parent != null && Node == Node.Parent.Subtrees.Last(); }
246    }
247
248    public IEnumerable<NodeInfo> Ancestors {
249      get {
250        var p = Parent;
251        while (p != null) {
252          yield return p;
253          p = p.Parent;
254        }
255      }
256    }
257
258    public int CompareTo(NodeInfo other) {
259      return other == null ? 1 : Index.CompareTo(other.Index);
260    }
261
262    public static bool operator <(NodeInfo lsh, NodeInfo rhs) {
263      return lsh.CompareTo(rhs) == -1;
264    }
265
266    public static bool operator >(NodeInfo lhs, NodeInfo rhs) {
267      return lhs.CompareTo(rhs) == 1;
268    }
269
270    public static bool operator <=(NodeInfo lhs, NodeInfo rhs) {
271      return lhs.CompareTo(rhs) <= 0;
272    }
273
274    public static bool operator >=(NodeInfo lhs, NodeInfo rhs) {
275      return lhs.CompareTo(rhs) >= 0;
276    }
277  }
278}
279
Note: See TracBrowser for help on using the repository browser.