#region License Information
/* HeuristicLab
* Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
// this class implements the decision version of the tree pattern query matching algorithm
// by M. Götz, C. Koch and W. Martens in http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.182.5440
public class QueryMatch {
public ISymbolicExpressionTreeNodeEqualityComparer EqualityComparer { get; private set; }
private readonly NodeInfo Inf = new NodeInfo { Index = int.MaxValue };
private readonly NodeInfo Nil = new NodeInfo { Index = -1 };
private readonly HashSet commutativeSymbols = new HashSet { "Addition", "Multiplication", "Average", "And", "Or", "Xor" };
private readonly ISymbolicExpressionTreeNodeComparer nodeComparer = new SymbolicExpressionTreeNodeComparer();
private readonly Dictionary> cache = new Dictionary>();
public void ClearCache() {
cache.Clear();
}
private QueryMatch() { }
// whether matching nodes should also have matching parents
// in theory, this restricts the matching so that parent-child
// pairs in the query tree are matched by parent-child pairs in
// the data tree (and not ancestor-descendant pairs)
public bool MatchParents { get; set; }
public QueryMatch(ISymbolicExpressionTreeNodeEqualityComparer equalityComparer) {
EqualityComparer = equalityComparer;
}
internal bool Match(List data, List query) {
var dRoot = data.Last();
var qRoot = query.Last();
var result = Tmatch(dRoot, query.First(), qRoot);
return result == qRoot;
}
public bool Match(ISymbolicExpressionTree data, ISymbolicExpressionTree query) {
return Match(data.Root.GetSubtree(0).GetSubtree(0), query.Root.GetSubtree(0).GetSubtree(0));
}
public bool Match(ISymbolicExpressionTreeNode data, ISymbolicExpressionTreeNode query) {
if (!EqualityComparer.Equals(data, query))
return false;
List dNodes, qNodes;
if (!cache.TryGetValue(data, out dNodes)) {
dNodes = InitializePostOrder(data);
cache[data] = dNodes;
}
if (!cache.TryGetValue(query, out qNodes)) {
qNodes = InitializePostOrder(query);
cache[query] = qNodes;
}
var dRoot = dNodes.Last();
var qRoot = qNodes.Last();
var result = Tmatch(dRoot, qNodes.First(), qRoot);
return result == qRoot;
}
public IEnumerable GetMatchingTrees(IEnumerable data, ISymbolicExpressionTree query) {
var qRoot = query.Root.GetSubtree(0).GetSubtree(0);
var filtered = data.Where(x => x.Length >= query.Length && EqualityComparer.Equals(x.Root.GetSubtree(0).GetSubtree(0), qRoot));
var qNodes = InitializePostOrder(query.Root.GetSubtree(0).GetSubtree(0));
return from d in filtered let dNodes = InitializePostOrder(d.Root.GetSubtree(0).GetSubtree(0)) where Match(dNodes, qNodes) select d;
}
private bool AreMatching(NodeInfo d, NodeInfo q) {
// force the nodes to be on the same level
if (d.Level != q.Level)
return false;
// compare nodes
bool equals = EqualityComparer.Equals(d.Node, q.Node);
if (!equals)
return false;
// compare node parents
if (MatchParents && !EqualityComparer.Equals(d.Node.Parent, q.Node.Parent))
return false;
// compare children to make sure they are the same
if (d.Node.SubtreeCount > 0 && q.Node.SubtreeCount > 0) {
var dSubtrees = d.Node.Subtrees.ToList();
if (commutativeSymbols.Contains(d.Node.Symbol.Name)) {
var qSubtrees = q.Node.Subtrees.Where(x => !(x is AnyNode || x is AnySubtree)).ToList();
dSubtrees.Sort(nodeComparer);
qSubtrees.Sort(nodeComparer);
var nw = q.Node.SubtreeCount - qSubtrees.Count; // number of wildcards
if (nw == 0)
return dSubtrees.SequenceEqual(qSubtrees, EqualityComparer);
for (int i = 0, j = 0; i < dSubtrees.Count && j < qSubtrees.Count;) {
if (EqualityComparer.Equals(dSubtrees[i], qSubtrees[j])) {
++i;
++j;
} else {
if (nw == 0)
return false;
++i;
--nw;
}
}
} else {
var qSubtrees = q.Node.Subtrees;
return dSubtrees.SequenceEqual(qSubtrees, EqualityComparer);
}
}
return true;
}
private NodeInfo Tmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil) {
var qBest = d.IsLeaf ? qFrom.Previous : Hmatch(d.LastChild, qFrom, qUntil);
var next = qBest.Next;
if (next <= qUntil && AreMatching(d, next)) {
qBest = next;
next = qBest.Next;
var lastSibling = qBest.LastSibling;
return next <= lastSibling ? Tmatch(d, next, lastSibling) : qBest;
}
return qBest;
}
private NodeInfo Hmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil) {
if (d.IsFirstSibling)
return Tmatch(d, qFrom, qUntil);
var qHedge = Hmatch(d.PreviousSibling, qFrom, qUntil);
var qTree = Tmatch(d, qFrom, qUntil);
for (;;) {
if (qHedge == qTree)
return qHedge;
if (qTree < qHedge) {
var rtop = Rtop(qTree.Next, qHedge);
while (rtop < Inf && qHedge < rtop.LastSibling) {
qTree = Tmatch(d, rtop.Next, rtop.LastSibling);
rtop = Rtop(qTree.Next, qHedge);
}
if (qTree <= qHedge)
return qHedge;
} else {
var rtop = Rtop(qHedge.Next, qTree);
while (rtop < Inf && qTree < rtop.LastSibling) {
qHedge = Hmatch(d.PreviousSibling, rtop.Next, rtop.LastSibling);
rtop = Rtop(qHedge.Next, qTree);
}
if (qHedge <= qTree)
return qTree;
}
}
}
///
/// Description from Götz et al. - Efficient Algorithms for Descendant-only Tree Pattern Queries, 2009:
/// Given two nodes hFrom and hUntil, returns the rightmost node among the topmost nodes in the interval [hFrom, hUntil]
/// More formally, Rtop(hFrom,hUntil) is the node u such that depth(u) is minimal and u is larger than every other node v
/// in [hFrom, hUntil] with depth(u)==depth(v).
///
/// The interval start
/// The interval end
/// The rightmost node from the topmost nodes in the interval [hFrom, hUntil]
private NodeInfo Rtop(NodeInfo hFrom, NodeInfo hUntil) {
if (hFrom == hUntil)
return hUntil;
if (hFrom.Index > hUntil.Index)
return Inf;
// let u be the highest ancestor of hUntil that has a previous sibling s such that s >= hFrom
// if no such u exists, then Rtop(hFrom, hUntil) = hUntil. Otherwise, rtop(hFrom, hUntil) = s
var rtop = hUntil.Ancestors.OrderBy(x => x.Level).Select(x => x.PreviousSibling).FirstOrDefault(x => x != null && x >= hFrom);
return rtop ?? hUntil;
}
internal static List InitializePostOrder(ISymbolicExpressionTreeNode node) {
// levels will be computed below
var nodes = node.IterateNodesPostfix().Select((x, i) => new NodeInfo { Node = x, Index = i, Level = 0 }).ToList();
var map = nodes.ToDictionary(x => x.Node, x => x);
nodes.First().Previous = new NodeInfo { Node = null, Index = -1, Next = nodes.First() };
nodes.Last().Next = new NodeInfo { Node = null, Index = int.MaxValue, Previous = nodes.Last() };
for (int i = nodes.Count - 1; i >= 0; --i) {
var n = nodes[i];
if (n != nodes.Last()) n.Next = nodes[n.Index + 1];
if (n != nodes.First()) n.Previous = nodes[n.Index - 1];
NodeInfo parentInfo = null;
if (n.Node.Parent != null)
map.TryGetValue(n.Node.Parent, out parentInfo);
n.Parent = parentInfo;
if (n.Parent == null) {
n.PreviousSibling = n.NextSibling = n.LastSibling = null;
} else {
var parent = n.Parent.Node;
int si = parent.IndexOfSubtree(n.Node);
n.PreviousSibling = n.IsFirstSibling ? null : map[parent.GetSubtree(si - 1)];
n.NextSibling = n.IsLastSibling ? null : map[parent.GetSubtree(si + 1)];
n.LastSibling = map[parent.Subtrees.Last()];
n.Level = n.Parent.Level + 1;
}
n.LastChild = n.IsLeaf ? null : n.Previous;
}
return nodes;
}
}
///
/// NodeInfo objects are useful for keeping sibling, parent, index and depth information for tree nodes
///
internal class NodeInfo : IComparable {
public ISymbolicExpressionTreeNode Node { get; set; }
public int Index { get; set; }
public int Level { get; set; }
public NodeInfo Parent { get; set; }
public NodeInfo Previous { get; set; }
public NodeInfo PreviousSibling { get; set; }
public NodeInfo Next { get; set; }
public NodeInfo NextSibling { get; set; }
public NodeInfo LastSibling { get; set; }
public NodeInfo LastChild { get; set; }
public bool IsLeaf {
get { return Node.SubtreeCount == 0; }
}
public bool IsFirstSibling {
get { return Parent != null && Node == Node.Parent.Subtrees.First(); }
}
public bool IsLastSibling {
get { return Parent != null && Node == Node.Parent.Subtrees.Last(); }
}
public IEnumerable Ancestors {
get {
var p = Parent;
while (p != null) {
yield return p;
p = p.Parent;
}
}
}
public int CompareTo(NodeInfo other) {
return other == null ? 1 : Index.CompareTo(other.Index);
}
public static bool operator <(NodeInfo lsh, NodeInfo rhs) {
return lsh.CompareTo(rhs) == -1;
}
public static bool operator >(NodeInfo lhs, NodeInfo rhs) {
return lhs.CompareTo(rhs) == 1;
}
public static bool operator <=(NodeInfo lhs, NodeInfo rhs) {
return lhs.CompareTo(rhs) <= 0;
}
public static bool operator >=(NodeInfo lhs, NodeInfo rhs) {
return lhs.CompareTo(rhs) >= 0;
}
}
}