Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/TreeMatching/QueryMatch.cs @ 16130

Last change on this file since 16130 was 14634, checked in by bburlacu, 8 years ago

#1772: QueryMatch.cs: implement caching of NodeInfo lists to improve performance when repeatedly matching trees against schemas.

File size: 11.3 KB
RevLine 
[12929]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
[14326]22using System;
[12929]23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
26
27namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
28  // this class implements the decision version of the tree pattern query matching algorithm
29  // by M. Götz, C. Koch and W. Martens in http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.182.5440
30  public class QueryMatch {
[14426]31    public ISymbolicExpressionTreeNodeEqualityComparer EqualityComparer { get; private set; }
[14326]32    private readonly NodeInfo Inf = new NodeInfo { Index = int.MaxValue };
33    private readonly NodeInfo Nil = new NodeInfo { Index = -1 };
[14426]34    private readonly HashSet<string> commutativeSymbols = new HashSet<string> { "Addition", "Multiplication", "Average", "And", "Or", "Xor" };
35    private readonly ISymbolicExpressionTreeNodeComparer nodeComparer = new SymbolicExpressionTreeNodeComparer();
[12929]36
[14634]37    private readonly Dictionary<ISymbolicExpressionTreeNode, List<NodeInfo>> cache = new Dictionary<ISymbolicExpressionTreeNode, List<NodeInfo>>();
38
39    public void ClearCache() {
40      cache.Clear();
41    }
42
[12929]43    private QueryMatch() { }
44
[12933]45    // whether matching nodes should also have matching parents
46    // in theory, this restricts the matching so that parent-child
47    // pairs in the query tree are matched by parent-child pairs in
48    // the data tree (and not ancestor-descendant pairs)
49    public bool MatchParents { get; set; }
50
[14426]51    public QueryMatch(ISymbolicExpressionTreeNodeEqualityComparer equalityComparer) {
52      EqualityComparer = equalityComparer;
[12929]53    }
54
[12979]55    internal bool Match(List<NodeInfo> data, List<NodeInfo> query) {
56      var dRoot = data.Last();
57      var qRoot = query.Last();
58
59      var result = Tmatch(dRoot, query.First(), qRoot);
60      return result == qRoot;
61    }
62
[12951]63    public bool Match(ISymbolicExpressionTree data, ISymbolicExpressionTree query) {
64      return Match(data.Root.GetSubtree(0).GetSubtree(0), query.Root.GetSubtree(0).GetSubtree(0));
65    }
66
[12929]67    public bool Match(ISymbolicExpressionTreeNode data, ISymbolicExpressionTreeNode query) {
[14426]68      if (!EqualityComparer.Equals(data, query))
[12941]69        return false;
70
[14634]71      List<NodeInfo> dNodes, qNodes;
[12929]72
[14634]73      if (!cache.TryGetValue(data, out dNodes)) {
74        dNodes = InitializePostOrder(data);
75        cache[data] = dNodes;
76      }
77
78      if (!cache.TryGetValue(query, out qNodes)) {
79        qNodes = InitializePostOrder(query);
80        cache[query] = qNodes;
81      }
82
[12929]83      var dRoot = dNodes.Last();
84      var qRoot = qNodes.Last();
[12979]85      var result = Tmatch(dRoot, qNodes.First(), qRoot);
[12929]86      return result == qRoot;
87    }
88
[12979]89    public IEnumerable<ISymbolicExpressionTree> GetMatchingTrees(IEnumerable<ISymbolicExpressionTree> data, ISymbolicExpressionTree query) {
90      var qRoot = query.Root.GetSubtree(0).GetSubtree(0);
[14426]91      var filtered = data.Where(x => x.Length >= query.Length && EqualityComparer.Equals(x.Root.GetSubtree(0).GetSubtree(0), qRoot));
[12979]92      var qNodes = InitializePostOrder(query.Root.GetSubtree(0).GetSubtree(0));
93      return from d in filtered let dNodes = InitializePostOrder(d.Root.GetSubtree(0).GetSubtree(0)) where Match(dNodes, qNodes) select d;
94    }
95
[14426]96    private bool AreMatching(NodeInfo d, NodeInfo q) {
[12941]97      // force the nodes to be on the same level
[14426]98      if (d.Level != q.Level)
[12941]99        return false;
[12929]100
[14426]101      // compare nodes
102      bool equals = EqualityComparer.Equals(d.Node, q.Node);
103
104      if (!equals)
105        return false;
106
107      // compare node parents
108      if (MatchParents && !EqualityComparer.Equals(d.Node.Parent, q.Node.Parent))
109        return false;
110
111      // compare children to make sure they are the same
112      if (d.Node.SubtreeCount > 0 && q.Node.SubtreeCount > 0) {
113        var dSubtrees = d.Node.Subtrees.ToList();
114
115        if (commutativeSymbols.Contains(d.Node.Symbol.Name)) {
116          var qSubtrees = q.Node.Subtrees.Where(x => !(x is AnyNode || x is AnySubtree)).ToList();
117
118          dSubtrees.Sort(nodeComparer);
119          qSubtrees.Sort(nodeComparer);
120
121          var nw = q.Node.SubtreeCount - qSubtrees.Count; // number of wildcards
122          if (nw == 0)
123            return dSubtrees.SequenceEqual(qSubtrees, EqualityComparer);
124
125          for (int i = 0, j = 0; i < dSubtrees.Count && j < qSubtrees.Count;) {
126            if (EqualityComparer.Equals(dSubtrees[i], qSubtrees[j])) {
127              ++i;
128              ++j;
129            } else {
130              if (nw == 0)
131                return false;
132              ++i;
133              --nw;
134            }
135          }
136        } else {
137          var qSubtrees = q.Node.Subtrees;
138          return dSubtrees.SequenceEqual(qSubtrees, EqualityComparer);
139        }
[12933]140      }
[14426]141      return true;
[12933]142    }
143
[12942]144    private NodeInfo Tmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil) {
[14326]145      var qBest = d.IsLeaf ? qFrom.Previous : Hmatch(d.LastChild, qFrom, qUntil);
[12941]146      var next = qBest.Next;
[14326]147      if (next <= qUntil && AreMatching(d, next)) {
[12929]148        qBest = next;
[12941]149        next = qBest.Next;
150        var lastSibling = qBest.LastSibling;
[14326]151        return next <= lastSibling ? Tmatch(d, next, lastSibling) : qBest;
[12929]152      }
153      return qBest;
154    }
155
[12942]156    private NodeInfo Hmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil) {
[12941]157      if (d.IsFirstSibling)
[12942]158        return Tmatch(d, qFrom, qUntil);
[12929]159
[12942]160      var qHedge = Hmatch(d.PreviousSibling, qFrom, qUntil);
161      var qTree = Tmatch(d, qFrom, qUntil);
[12929]162
163      for (;;) {
[12941]164        if (qHedge == qTree)
[12929]165          return qHedge;
[14326]166        if (qTree < qHedge) {
[12942]167          var rtop = Rtop(qTree.Next, qHedge);
[14326]168          while (rtop < Inf && qHedge < rtop.LastSibling) {
[12942]169            qTree = Tmatch(d, rtop.Next, rtop.LastSibling);
170            rtop = Rtop(qTree.Next, qHedge);
[12929]171          }
[14326]172          if (qTree <= qHedge)
[12929]173            return qHedge;
174        } else {
[12942]175          var rtop = Rtop(qHedge.Next, qTree);
[14326]176          while (rtop < Inf && qTree < rtop.LastSibling) {
[12942]177            qHedge = Hmatch(d.PreviousSibling, rtop.Next, rtop.LastSibling);
178            rtop = Rtop(qHedge.Next, qTree);
[12929]179          }
[14326]180          if (qHedge <= qTree)
[12929]181            return qTree;
182        }
183      }
184    }
185
[14326]186    /// <summary>
187    /// Description from Götz et al. - Efficient Algorithms for Descendant-only Tree Pattern Queries, 2009:
188    /// Given two nodes hFrom and hUntil, returns the rightmost node among the topmost nodes in the interval [hFrom, hUntil]
189    /// More formally, Rtop(hFrom,hUntil) is the node u such that depth(u) is minimal and u is larger than every other node v
190    /// in [hFrom, hUntil] with depth(u)==depth(v).
191    /// </summary>
192    /// <param name="hFrom">The interval start</param>
193    /// <param name="hUntil">The interval end</param>
194    /// <returns>The rightmost node from the topmost nodes in the interval [hFrom, hUntil]</returns>
[12942]195    private NodeInfo Rtop(NodeInfo hFrom, NodeInfo hUntil) {
[12941]196      if (hFrom == hUntil)
[12929]197        return hUntil;
[12941]198      if (hFrom.Index > hUntil.Index)
[14326]199        return Inf;
[12929]200      // let u be the highest ancestor of hUntil that has a previous sibling s such that s >= hFrom
201      // if no such u exists, then Rtop(hFrom, hUntil) = hUntil. Otherwise, rtop(hFrom, hUntil) = s
[14326]202      var rtop = hUntil.Ancestors.OrderBy(x => x.Level).Select(x => x.PreviousSibling).FirstOrDefault(x => x != null && x >= hFrom);
[12941]203      return rtop ?? hUntil;
[12929]204    }
205
[12979]206    internal static List<NodeInfo> InitializePostOrder(ISymbolicExpressionTreeNode node) {
[14326]207      // levels will be computed below
[12988]208      var nodes = node.IterateNodesPostfix().Select((x, i) => new NodeInfo { Node = x, Index = i, Level = 0 }).ToList();
209      var map = nodes.ToDictionary(x => x.Node, x => x);
[14326]210      nodes.First().Previous = new NodeInfo { Node = null, Index = -1, Next = nodes.First() };
211      nodes.Last().Next = new NodeInfo { Node = null, Index = int.MaxValue, Previous = nodes.Last() };
[12988]212      for (int i = nodes.Count - 1; i >= 0; --i) {
213        var n = nodes[i];
[14326]214        if (n != nodes.Last()) n.Next = nodes[n.Index + 1];
215        if (n != nodes.First()) n.Previous = nodes[n.Index - 1];
[14426]216        NodeInfo parentInfo = null;
217        if (n.Node.Parent != null)
218          map.TryGetValue(n.Node.Parent, out parentInfo);
[14326]219        n.Parent = parentInfo;
[12941]220        if (n.Parent == null) {
[14326]221          n.PreviousSibling = n.NextSibling = n.LastSibling = null;
[12941]222        } else {
223          var parent = n.Parent.Node;
224          int si = parent.IndexOfSubtree(n.Node);
225          n.PreviousSibling = n.IsFirstSibling ? null : map[parent.GetSubtree(si - 1)];
226          n.NextSibling = n.IsLastSibling ? null : map[parent.GetSubtree(si + 1)];
227          n.LastSibling = map[parent.Subtrees.Last()];
[14326]228          n.Level = n.Parent.Level + 1;
[12941]229        }
230        n.LastChild = n.IsLeaf ? null : n.Previous;
[12929]231      }
[12941]232      return nodes;
233    }
[12979]234  }
[12929]235
[14326]236  /// <summary>
237  /// NodeInfo objects are useful for keeping sibling, parent, index and depth information for tree nodes
238  /// </summary>
239  internal class NodeInfo : IComparable<NodeInfo> {
[12979]240    public ISymbolicExpressionTreeNode Node { get; set; }
241    public int Index { get; set; }
242    public int Level { get; set; }
243    public NodeInfo Parent { get; set; }
244    public NodeInfo Previous { get; set; }
245    public NodeInfo PreviousSibling { get; set; }
246    public NodeInfo Next { get; set; }
247    public NodeInfo NextSibling { get; set; }
248    public NodeInfo LastSibling { get; set; }
249    public NodeInfo LastChild { get; set; }
[12929]250
[12979]251    public bool IsLeaf {
252      get { return Node.SubtreeCount == 0; }
253    }
[12929]254
[12979]255    public bool IsFirstSibling {
[13877]256      get { return Parent != null && Node == Node.Parent.Subtrees.First(); }
[12979]257    }
[12929]258
[12979]259    public bool IsLastSibling {
[13877]260      get { return Parent != null && Node == Node.Parent.Subtrees.Last(); }
[12979]261    }
[12929]262
[12979]263    public IEnumerable<NodeInfo> Ancestors {
264      get {
265        var p = Parent;
266        while (p != null) {
267          yield return p;
268          p = p.Parent;
[12929]269        }
270      }
271    }
[14326]272
273    public int CompareTo(NodeInfo other) {
274      return other == null ? 1 : Index.CompareTo(other.Index);
275    }
276
277    public static bool operator <(NodeInfo lsh, NodeInfo rhs) {
278      return lsh.CompareTo(rhs) == -1;
279    }
280
281    public static bool operator >(NodeInfo lhs, NodeInfo rhs) {
282      return lhs.CompareTo(rhs) == 1;
283    }
284
285    public static bool operator <=(NodeInfo lhs, NodeInfo rhs) {
286      return lhs.CompareTo(rhs) <= 0;
287    }
288
289    public static bool operator >=(NodeInfo lhs, NodeInfo rhs) {
290      return lhs.CompareTo(rhs) >= 0;
291    }
[12929]292  }
293}
[12941]294
Note: See TracBrowser for help on using the repository browser.