#region License Information /* HeuristicLab * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { // this class implements the decision version of the tree pattern query matching algorithm // by M. Götz, C. Koch and W. Martens in http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.182.5440 public class QueryMatch { public ISymbolicExpressionTreeNodeEqualityComparer EqualityComparer { get; private set; } private readonly NodeInfo Inf = new NodeInfo { Index = int.MaxValue }; private readonly NodeInfo Nil = new NodeInfo { Index = -1 }; private readonly HashSet commutativeSymbols = new HashSet { "Addition", "Multiplication", "Average", "And", "Or", "Xor" }; private readonly ISymbolicExpressionTreeNodeComparer nodeComparer = new SymbolicExpressionTreeNodeComparer(); private QueryMatch() { } // whether matching nodes should also have matching parents // in theory, this restricts the matching so that parent-child // pairs in the query tree are matched by parent-child pairs in // the data tree (and not ancestor-descendant pairs) public bool MatchParents { get; set; } public QueryMatch(ISymbolicExpressionTreeNodeEqualityComparer equalityComparer) { EqualityComparer = equalityComparer; } internal bool Match(List data, List query) { var dRoot = data.Last(); var qRoot = query.Last(); var result = Tmatch(dRoot, query.First(), qRoot); return result == qRoot; } public bool Match(ISymbolicExpressionTree data, ISymbolicExpressionTree query) { return Match(data.Root.GetSubtree(0).GetSubtree(0), query.Root.GetSubtree(0).GetSubtree(0)); } public bool Match(ISymbolicExpressionTreeNode data, ISymbolicExpressionTreeNode query) { if (!EqualityComparer.Equals(data, query)) return false; var dNodes = InitializePostOrder(data); var qNodes = InitializePostOrder(query); var dRoot = dNodes.Last(); var qRoot = qNodes.Last(); var result = Tmatch(dRoot, qNodes.First(), qRoot); return result == qRoot; } public IEnumerable GetMatchingTrees(IEnumerable data, ISymbolicExpressionTree query) { var qRoot = query.Root.GetSubtree(0).GetSubtree(0); var filtered = data.Where(x => x.Length >= query.Length && EqualityComparer.Equals(x.Root.GetSubtree(0).GetSubtree(0), qRoot)); var qNodes = InitializePostOrder(query.Root.GetSubtree(0).GetSubtree(0)); return from d in filtered let dNodes = InitializePostOrder(d.Root.GetSubtree(0).GetSubtree(0)) where Match(dNodes, qNodes) select d; } private bool AreMatching(NodeInfo d, NodeInfo q) { // force the nodes to be on the same level if (d.Level != q.Level) return false; // compare nodes bool equals = EqualityComparer.Equals(d.Node, q.Node); if (!equals) return false; // compare node parents if (MatchParents && !EqualityComparer.Equals(d.Node.Parent, q.Node.Parent)) return false; // compare children to make sure they are the same if (d.Node.SubtreeCount > 0 && q.Node.SubtreeCount > 0) { var dSubtrees = d.Node.Subtrees.ToList(); if (commutativeSymbols.Contains(d.Node.Symbol.Name)) { var qSubtrees = q.Node.Subtrees.Where(x => !(x is AnyNode || x is AnySubtree)).ToList(); dSubtrees.Sort(nodeComparer); qSubtrees.Sort(nodeComparer); var nw = q.Node.SubtreeCount - qSubtrees.Count; // number of wildcards if (nw == 0) return dSubtrees.SequenceEqual(qSubtrees, EqualityComparer); for (int i = 0, j = 0; i < dSubtrees.Count && j < qSubtrees.Count;) { if (EqualityComparer.Equals(dSubtrees[i], qSubtrees[j])) { ++i; ++j; } else { if (nw == 0) return false; ++i; --nw; } } } else { var qSubtrees = q.Node.Subtrees; return dSubtrees.SequenceEqual(qSubtrees, EqualityComparer); } } return true; } private NodeInfo Tmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil) { var qBest = d.IsLeaf ? qFrom.Previous : Hmatch(d.LastChild, qFrom, qUntil); var next = qBest.Next; if (next <= qUntil && AreMatching(d, next)) { qBest = next; next = qBest.Next; var lastSibling = qBest.LastSibling; return next <= lastSibling ? Tmatch(d, next, lastSibling) : qBest; } return qBest; } private NodeInfo Hmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil) { if (d.IsFirstSibling) return Tmatch(d, qFrom, qUntil); var qHedge = Hmatch(d.PreviousSibling, qFrom, qUntil); var qTree = Tmatch(d, qFrom, qUntil); for (;;) { if (qHedge == qTree) return qHedge; if (qTree < qHedge) { var rtop = Rtop(qTree.Next, qHedge); while (rtop < Inf && qHedge < rtop.LastSibling) { qTree = Tmatch(d, rtop.Next, rtop.LastSibling); rtop = Rtop(qTree.Next, qHedge); } if (qTree <= qHedge) return qHedge; } else { var rtop = Rtop(qHedge.Next, qTree); while (rtop < Inf && qTree < rtop.LastSibling) { qHedge = Hmatch(d.PreviousSibling, rtop.Next, rtop.LastSibling); rtop = Rtop(qHedge.Next, qTree); } if (qHedge <= qTree) return qTree; } } } /// /// Description from Götz et al. - Efficient Algorithms for Descendant-only Tree Pattern Queries, 2009: /// Given two nodes hFrom and hUntil, returns the rightmost node among the topmost nodes in the interval [hFrom, hUntil] /// More formally, Rtop(hFrom,hUntil) is the node u such that depth(u) is minimal and u is larger than every other node v /// in [hFrom, hUntil] with depth(u)==depth(v). /// /// The interval start /// The interval end /// The rightmost node from the topmost nodes in the interval [hFrom, hUntil] private NodeInfo Rtop(NodeInfo hFrom, NodeInfo hUntil) { if (hFrom == hUntil) return hUntil; if (hFrom.Index > hUntil.Index) return Inf; // let u be the highest ancestor of hUntil that has a previous sibling s such that s >= hFrom // if no such u exists, then Rtop(hFrom, hUntil) = hUntil. Otherwise, rtop(hFrom, hUntil) = s var rtop = hUntil.Ancestors.OrderBy(x => x.Level).Select(x => x.PreviousSibling).FirstOrDefault(x => x != null && x >= hFrom); return rtop ?? hUntil; } internal static List InitializePostOrder(ISymbolicExpressionTreeNode node) { // levels will be computed below var nodes = node.IterateNodesPostfix().Select((x, i) => new NodeInfo { Node = x, Index = i, Level = 0 }).ToList(); var map = nodes.ToDictionary(x => x.Node, x => x); nodes.First().Previous = new NodeInfo { Node = null, Index = -1, Next = nodes.First() }; nodes.Last().Next = new NodeInfo { Node = null, Index = int.MaxValue, Previous = nodes.Last() }; for (int i = nodes.Count - 1; i >= 0; --i) { var n = nodes[i]; if (n != nodes.Last()) n.Next = nodes[n.Index + 1]; if (n != nodes.First()) n.Previous = nodes[n.Index - 1]; NodeInfo parentInfo = null; if (n.Node.Parent != null) map.TryGetValue(n.Node.Parent, out parentInfo); n.Parent = parentInfo; if (n.Parent == null) { n.PreviousSibling = n.NextSibling = n.LastSibling = null; } else { var parent = n.Parent.Node; int si = parent.IndexOfSubtree(n.Node); n.PreviousSibling = n.IsFirstSibling ? null : map[parent.GetSubtree(si - 1)]; n.NextSibling = n.IsLastSibling ? null : map[parent.GetSubtree(si + 1)]; n.LastSibling = map[parent.Subtrees.Last()]; n.Level = n.Parent.Level + 1; } n.LastChild = n.IsLeaf ? null : n.Previous; } return nodes; } } /// /// NodeInfo objects are useful for keeping sibling, parent, index and depth information for tree nodes /// internal class NodeInfo : IComparable { public ISymbolicExpressionTreeNode Node { get; set; } public int Index { get; set; } public int Level { get; set; } public NodeInfo Parent { get; set; } public NodeInfo Previous { get; set; } public NodeInfo PreviousSibling { get; set; } public NodeInfo Next { get; set; } public NodeInfo NextSibling { get; set; } public NodeInfo LastSibling { get; set; } public NodeInfo LastChild { get; set; } public bool IsLeaf { get { return Node.SubtreeCount == 0; } } public bool IsFirstSibling { get { return Parent != null && Node == Node.Parent.Subtrees.First(); } } public bool IsLastSibling { get { return Parent != null && Node == Node.Parent.Subtrees.Last(); } } public IEnumerable Ancestors { get { var p = Parent; while (p != null) { yield return p; p = p.Parent; } } } public int CompareTo(NodeInfo other) { return other == null ? 1 : Index.CompareTo(other.Index); } public static bool operator <(NodeInfo lsh, NodeInfo rhs) { return lsh.CompareTo(rhs) == -1; } public static bool operator >(NodeInfo lhs, NodeInfo rhs) { return lhs.CompareTo(rhs) == 1; } public static bool operator <=(NodeInfo lhs, NodeInfo rhs) { return lhs.CompareTo(rhs) <= 0; } public static bool operator >=(NodeInfo lhs, NodeInfo rhs) { return lhs.CompareTo(rhs) >= 0; } } }