#region License Information /* HeuristicLab * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System.Collections.Generic; using System.Linq; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { // this class implements the decision version of the tree pattern query matching algorithm // by M. Götz, C. Koch and W. Martens in http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.182.5440 public class QueryMatch { public ISymbolicExpressionTreeNodeEqualityComparer Comparer { get; } private QueryMatch() { } // whether matching nodes should also have matching parents // in theory, this restricts the matching so that parent-child // pairs in the query tree are matched by parent-child pairs in // the data tree (and not ancestor-descendant pairs) public bool MatchParents { get; set; } public QueryMatch(ISymbolicExpressionTreeNodeEqualityComparer comparer) { this.Comparer = comparer; } internal bool Match(List data, List query) { var dRoot = data.Last(); var qRoot = query.Last(); var result = Tmatch(dRoot, query.First(), qRoot); return result == qRoot; } public bool Match(ISymbolicExpressionTree data, ISymbolicExpressionTree query) { return Match(data.Root.GetSubtree(0).GetSubtree(0), query.Root.GetSubtree(0).GetSubtree(0)); } public bool Match(ISymbolicExpressionTreeNode data, ISymbolicExpressionTreeNode query) { if (!Comparer.Equals(data, query)) return false; var dNodes = InitializePostOrder(data); var qNodes = InitializePostOrder(query); var dRoot = dNodes.Last(); var qRoot = qNodes.Last(); var result = Tmatch(dRoot, qNodes.First(), qRoot); return result == qRoot; } public IEnumerable GetMatchingTrees(IEnumerable data, ISymbolicExpressionTree query) { var qRoot = query.Root.GetSubtree(0).GetSubtree(0); var filtered = data.Where(x => x.Length >= query.Length && Comparer.Equals(x.Root.GetSubtree(0).GetSubtree(0), qRoot)); var qNodes = InitializePostOrder(query.Root.GetSubtree(0).GetSubtree(0)); return from d in filtered let dNodes = InitializePostOrder(d.Root.GetSubtree(0).GetSubtree(0)) where Match(dNodes, qNodes) select d; } private bool AreMatching(NodeInfo a, NodeInfo b) { // force the nodes to be on the same level if (a.Level != b.Level) return false; bool equals = Comparer.Equals(a.Node, b.Node); if (equals && MatchParents) { var pa = a.Parent; var pb = b.Parent; if (pa == null && pb == null) return true; if (pa == null || pb == null) return false; return pa.Level == pb.Level && Comparer.Equals(pa.Node, pb.Node); } return equals; } private NodeInfo Tmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil) { NodeInfo qBest = d.IsLeaf ? qFrom.Previous : Hmatch(d.LastChild, qFrom, qUntil); var next = qBest.Next; if (next.Index <= qUntil.Index && AreMatching(d, next)) { qBest = next; next = qBest.Next; var lastSibling = qBest.LastSibling; return next.Index <= lastSibling.Index ? Tmatch(d, next, lastSibling) : qBest; } return qBest; } private NodeInfo Hmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil) { if (d.IsFirstSibling) return Tmatch(d, qFrom, qUntil); var qHedge = Hmatch(d.PreviousSibling, qFrom, qUntil); var qTree = Tmatch(d, qFrom, qUntil); for (;;) { if (qHedge == qTree) return qHedge; if (qTree.Index < qHedge.Index) { var rtop = Rtop(qTree.Next, qHedge); while (rtop.Index < int.MaxValue && qHedge.Index < rtop.LastSibling.Index) { qTree = Tmatch(d, rtop.Next, rtop.LastSibling); rtop = Rtop(qTree.Next, qHedge); } if (qTree.Index <= qHedge.Index) return qHedge; } else { var rtop = Rtop(qHedge.Next, qTree); while (rtop.Index < int.MaxValue && qTree.Index < rtop.LastSibling.Index) { qHedge = Hmatch(d.PreviousSibling, rtop.Next, rtop.LastSibling); rtop = Rtop(qHedge.Next, qTree); } if (qHedge.Index <= qTree.Index) return qTree; } } } // returns the rightmost node from the topmost nodes in the interval [hFrom, hUntil] private NodeInfo Rtop(NodeInfo hFrom, NodeInfo hUntil) { if (hFrom == hUntil) return hUntil; if (hFrom.Index > hUntil.Index) return new NodeInfo { Node = null, Index = int.MaxValue }; // let u be the highest ancestor of hUntil that has a previous sibling s such that s >= hFrom // if no such u exists, then Rtop(hFrom, hUntil) = hUntil. Otherwise, rtop(hFrom, hUntil) = s NodeInfo rtop = null; List ancestors = hUntil.Ancestors.ToList(); // ancestors list is ordered in decreasing depth therefore we start from the end for (int i = ancestors.Count - 1; i >= 0; --i) { if (ancestors[i].Parent == null || ancestors[i].IsFirstSibling) continue; var s = ancestors[i].PreviousSibling; if (s != null && s.Index >= hFrom.Index) { rtop = s; break; } } return rtop ?? hUntil; } internal static List InitializePostOrder(ISymbolicExpressionTreeNode node) { var nodes = node.IterateNodesPostfix().Select((x, i) => new NodeInfo { Node = x, Index = i, Level = 0 }).ToList(); var map = nodes.ToDictionary(x => x.Node, x => x); var inf = new NodeInfo { Node = null, Index = int.MaxValue, Previous = nodes.Last() }; var nil = new NodeInfo { Node = null, Index = -1, Next = nodes.First() }; for (int i = nodes.Count - 1; i >= 0; --i) { var n = nodes[i]; n.Parent = n.Node.Parent != null && map.ContainsKey(n.Node.Parent) ? map[n.Node.Parent] : null; n.Level = n.Parent?.Level + 1 ?? -1; n.Next = n == nodes.Last() ? inf : nodes[n.Index + 1]; n.Previous = n == nodes.First() ? nil : nodes[n.Index - 1]; if (n.Parent == null) { n.PreviousSibling = n.NextSibling = null; n.LastSibling = nil; } else { var parent = n.Parent.Node; int si = parent.IndexOfSubtree(n.Node); n.PreviousSibling = n.IsFirstSibling ? null : map[parent.GetSubtree(si - 1)]; n.NextSibling = n.IsLastSibling ? null : map[parent.GetSubtree(si + 1)]; n.LastSibling = map[parent.Subtrees.Last()]; } n.LastChild = n.IsLeaf ? null : n.Previous; } return nodes; } } internal class NodeInfo { public ISymbolicExpressionTreeNode Node { get; set; } public int Index { get; set; } public int Level { get; set; } public NodeInfo Parent { get; set; } public NodeInfo Previous { get; set; } public NodeInfo PreviousSibling { get; set; } public NodeInfo Next { get; set; } public NodeInfo NextSibling { get; set; } public NodeInfo LastSibling { get; set; } public NodeInfo LastChild { get; set; } public bool IsLeaf { get { return Node.SubtreeCount == 0; } } public bool IsFirstSibling { get { return Node.Parent != null && Node == Node.Parent.Subtrees.First(); } } public bool IsLastSibling { get { return Node.Parent != null && Node == Node.Parent.Subtrees.Last(); } } public IEnumerable Ancestors { get { var p = Parent; while (p != null) { yield return p; p = p.Parent; } } } } }