1 | #region License Information
|
---|
2 | /* HeuristicLab
|
---|
3 | * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
4 | *
|
---|
5 | * This file is part of HeuristicLab.
|
---|
6 | *
|
---|
7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
8 | * it under the terms of the GNU General Public License as published by
|
---|
9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
10 | * (at your option) any later version.
|
---|
11 | *
|
---|
12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
15 | * GNU General Public License for more details.
|
---|
16 | *
|
---|
17 | * You should have received a copy of the GNU General Public License
|
---|
18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
19 | */
|
---|
20 | #endregion
|
---|
21 |
|
---|
22 | using System;
|
---|
23 | using System.Collections.Generic;
|
---|
24 | using System.Linq;
|
---|
25 | using System.Text;
|
---|
26 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
27 |
|
---|
28 | namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
|
---|
29 | // this class implements the decision version of the tree pattern query matching algorithm
|
---|
30 | // by M. Götz, C. Koch and W. Martens in http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.182.5440
|
---|
31 | public class QueryMatch {
|
---|
32 | private List<NodeInfo> dNodes;
|
---|
33 | private List<NodeInfo> qNodes;
|
---|
34 | private readonly ISymbolicExpressionTreeNodeEqualityComparer comparer;
|
---|
35 |
|
---|
36 | private QueryMatch() { }
|
---|
37 |
|
---|
38 | private readonly Stack<string> stack = new Stack<string>();
|
---|
39 |
|
---|
40 | public string Trace {
|
---|
41 | get {
|
---|
42 | var sb = new StringBuilder();
|
---|
43 | while (stack.Count > 0)
|
---|
44 | sb.Append(stack.Pop());
|
---|
45 | return sb.ToString();
|
---|
46 | }
|
---|
47 | }
|
---|
48 |
|
---|
49 | // whether matching nodes should also have matching parents
|
---|
50 | // in theory, this restricts the matching so that parent-child
|
---|
51 | // pairs in the query tree are matched by parent-child pairs in
|
---|
52 | // the data tree (and not ancestor-descendant pairs)
|
---|
53 | public bool MatchParents { get; set; }
|
---|
54 |
|
---|
55 | public QueryMatch(ISymbolicExpressionTreeNodeEqualityComparer comparer) {
|
---|
56 | this.comparer = comparer;
|
---|
57 | }
|
---|
58 |
|
---|
59 | public bool Match(ISymbolicExpressionTreeNode data, ISymbolicExpressionTreeNode query) {
|
---|
60 | stack.Clear();
|
---|
61 | dNodes = InitializePostOrder(data);
|
---|
62 | qNodes = InitializePostOrder(query);
|
---|
63 |
|
---|
64 | var dRoot = dNodes.Last();
|
---|
65 | var qRoot = qNodes.Last();
|
---|
66 | var result = Tmatch(dRoot, qNodes.First(), qNodes.Last());
|
---|
67 | return result == qRoot;
|
---|
68 | }
|
---|
69 |
|
---|
70 | private List<NodeInfo> InitializePostOrder(ISymbolicExpressionTreeNode node) {
|
---|
71 | var list = node.IterateNodesPostfix().Select((x, i) => new NodeInfo(x, i)).ToList();
|
---|
72 | foreach (var l in list)
|
---|
73 | l.Nodes = list;
|
---|
74 |
|
---|
75 | return list;
|
---|
76 | }
|
---|
77 |
|
---|
78 | private bool AreMatching(NodeInfo a, NodeInfo b) {
|
---|
79 | bool equals = comparer.Equals(a.Node, b.Node);
|
---|
80 |
|
---|
81 | if (equals && MatchParents) {
|
---|
82 | var pa = a.Parent();
|
---|
83 | var pb = b.Parent();
|
---|
84 | if (pa != null && pb != null)
|
---|
85 | return comparer.Equals(pa.Node, pb.Node);
|
---|
86 | if (!(pa == null && pb == null))
|
---|
87 | return false;
|
---|
88 | }
|
---|
89 | return equals;
|
---|
90 | }
|
---|
91 |
|
---|
92 | private NodeInfo Tmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil, int tab = 0) {
|
---|
93 | #if DEBUG
|
---|
94 | var sb = new StringBuilder();
|
---|
95 | #endif
|
---|
96 | NodeInfo qBest = d.IsLeaf ? qFrom.Previous() : Hmatch(d.LastChild(), qFrom, qUntil, tab + 1);
|
---|
97 |
|
---|
98 | var next = qBest.Next();
|
---|
99 | if (next.Index <= qUntil.Index && AreMatching(d, next)) {
|
---|
100 | qBest = next;
|
---|
101 | next = qBest.Next();
|
---|
102 | var lastSibling = qBest.LastSibling();
|
---|
103 | var result = next.Index <= lastSibling.Index ? Tmatch(d, next, lastSibling, tab + 1) : qBest;
|
---|
104 | #if DEBUG
|
---|
105 | for (int i = 0; i < tab; ++i)
|
---|
106 | sb.Append("\t");
|
---|
107 | sb.AppendLine(string.Format("TM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, result.Index));
|
---|
108 | stack.Push(sb.ToString());
|
---|
109 | #endif
|
---|
110 | return result;
|
---|
111 | }
|
---|
112 | #if DEBUG
|
---|
113 | for (int i = 0; i < tab; ++i)
|
---|
114 | sb.Append("\t");
|
---|
115 | sb.AppendLine(string.Format("TM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, qBest.Index));
|
---|
116 | stack.Push(sb.ToString());
|
---|
117 | #endif
|
---|
118 | return qBest;
|
---|
119 | }
|
---|
120 |
|
---|
121 | private NodeInfo Hmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil, int tab = 0) {
|
---|
122 | #if DEBUG
|
---|
123 | var sb = new StringBuilder();
|
---|
124 | #endif
|
---|
125 | if (d.IsFirstSibling) {
|
---|
126 | var result = Tmatch(d, qFrom, qUntil, tab + 1);
|
---|
127 | #if DEBUG
|
---|
128 | for (int i = 0; i < tab; ++i)
|
---|
129 | sb.Append("\t");
|
---|
130 | sb.AppendLine(string.Format("HM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, result.Index));
|
---|
131 | stack.Push(sb.ToString());
|
---|
132 | #endif
|
---|
133 | return result;
|
---|
134 | }
|
---|
135 |
|
---|
136 | var qHedge = Hmatch(d.PreviousSibling(), qFrom, qUntil, tab + 1);
|
---|
137 | var qTree = Tmatch(d, qFrom, qUntil, tab + 1);
|
---|
138 |
|
---|
139 | for (;;) {
|
---|
140 | if (qHedge == qTree) {
|
---|
141 | #if DEBUG
|
---|
142 | for (int i = 0; i < tab; ++i)
|
---|
143 | sb.Append("\t");
|
---|
144 | sb.AppendLine(string.Format("HM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, qHedge.Index));
|
---|
145 | stack.Push(sb.ToString());
|
---|
146 | #endif
|
---|
147 | return qHedge;
|
---|
148 | }
|
---|
149 | if (qTree.Index < qHedge.Index) {
|
---|
150 | var rtop = Rtop(qTree.Next(), qHedge, tab);
|
---|
151 | while (rtop.Index < qHedge.Next().Index && qHedge.Index < rtop.LastSibling().Index) {
|
---|
152 | qTree = Tmatch(d, rtop.Next(), rtop.LastSibling(), tab + 1);
|
---|
153 | rtop = Rtop(qTree.Next(), qHedge, tab);
|
---|
154 | }
|
---|
155 | if (qTree.Index <= qHedge.Index) {
|
---|
156 | #if DEBUG
|
---|
157 | for (int i = 0; i < tab; ++i)
|
---|
158 | sb.Append("\t");
|
---|
159 | sb.AppendLine(string.Format("HM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, qHedge.Index));
|
---|
160 | stack.Push(sb.ToString());
|
---|
161 | #endif
|
---|
162 | return qHedge;
|
---|
163 | }
|
---|
164 | } else {
|
---|
165 | var rtop = Rtop(qHedge.Next(), qTree, tab);
|
---|
166 | while (rtop.Index < qTree.Next().Index && qTree.Index < rtop.LastSibling().Index) {
|
---|
167 | qHedge = Hmatch(d.PreviousSibling(), rtop.Next(), rtop.LastSibling(), tab + 1);
|
---|
168 | rtop = Rtop(qHedge.Next(), qTree, tab);
|
---|
169 | }
|
---|
170 | if (qHedge.Index <= qTree.Index) {
|
---|
171 | #if DEBUG
|
---|
172 | for (int i = 0; i < tab; ++i)
|
---|
173 | sb.Append("\t");
|
---|
174 | sb.AppendLine(string.Format("HM(d{0}, q{1}, q{2}) => q{3}", d.Index, qFrom.Index, qUntil.Index, qTree.Index));
|
---|
175 | stack.Push(sb.ToString());
|
---|
176 | #endif
|
---|
177 | return qTree;
|
---|
178 | }
|
---|
179 | }
|
---|
180 | }
|
---|
181 | }
|
---|
182 |
|
---|
183 | // returns the rightmost node from the topmost nodes in the interval [hFrom, hUntil]
|
---|
184 | private NodeInfo Rtop(NodeInfo hFrom, NodeInfo hUntil, int tab = 0) {
|
---|
185 | #if DEBUG
|
---|
186 | var sb = new StringBuilder();
|
---|
187 | for (int i = 0; i < tab; ++i)
|
---|
188 | sb.Append("\t");
|
---|
189 | sb.Append(string.Format("Rtop(q{0}, q{1})", hFrom.Index, hUntil.Index));
|
---|
190 | #endif
|
---|
191 |
|
---|
192 | if (hFrom == hUntil) {
|
---|
193 | #if DEBUG
|
---|
194 | sb.AppendLine(string.Format(" => q{0}", hUntil.Index));
|
---|
195 | stack.Push(sb.ToString());
|
---|
196 | #endif
|
---|
197 | return hUntil;
|
---|
198 | }
|
---|
199 |
|
---|
200 | if (hFrom.Nodes != hUntil.Nodes)
|
---|
201 | throw new ArgumentException("hFrom and hUntil must belong to the same tree/hedge");
|
---|
202 |
|
---|
203 |
|
---|
204 | NodeInfo rtop = null;
|
---|
205 | if (hFrom.Index > hUntil.Index) {
|
---|
206 | rtop = hFrom.Next(); // or return inf (inf is defined in the paper as the successor of the largest node in the hedge)
|
---|
207 | #if DEBUG
|
---|
208 | sb.AppendLine(string.Format(" => q{0}", rtop.Index));
|
---|
209 | stack.Push(sb.ToString());
|
---|
210 | #endif
|
---|
211 | return rtop;
|
---|
212 | }
|
---|
213 |
|
---|
214 | // let u be the highest ancestor of hUntil that has a previous sibling s such that s >= hFrom
|
---|
215 | // if no such u exists, then Rtop(hFrom, hUntil) = hUntil. Otherwise, rtop(hFrom, hUntil) = s
|
---|
216 | var p = hUntil.Parent();
|
---|
217 | if (p == null)
|
---|
218 | return hUntil;
|
---|
219 | List<NodeInfo> ancestors = hUntil.Ancestors().ToList();
|
---|
220 | // ancestors list is ordered in decreasing depth therefore we start from the end
|
---|
221 | for (int i = ancestors.Count - 1; i >= 0; --i) {
|
---|
222 | var s = ancestors[i].PreviousSibling();
|
---|
223 | if (s.Index >= hFrom.Index) {
|
---|
224 | rtop = s;
|
---|
225 | break;
|
---|
226 | }
|
---|
227 | }
|
---|
228 | if (rtop == null)
|
---|
229 | rtop = hUntil;
|
---|
230 | #if DEBUG
|
---|
231 | sb.AppendLine(string.Format(" => q{0}", rtop.Index));
|
---|
232 | stack.Push(sb.ToString());
|
---|
233 | #endif
|
---|
234 | return rtop;
|
---|
235 | }
|
---|
236 |
|
---|
237 | private class NodeInfo {
|
---|
238 | private NodeInfo() { }
|
---|
239 |
|
---|
240 | public NodeInfo(ISymbolicExpressionTreeNode n, int i) {
|
---|
241 | Node = n;
|
---|
242 | Index = i;
|
---|
243 | }
|
---|
244 |
|
---|
245 | public List<NodeInfo> Nodes { get; set; }
|
---|
246 | public ISymbolicExpressionTreeNode Node { get; }
|
---|
247 | public int Index { get; }
|
---|
248 |
|
---|
249 | public bool IsLeaf {
|
---|
250 | get { return Node.SubtreeCount == 0; }
|
---|
251 | }
|
---|
252 |
|
---|
253 | public bool IsFirstSibling {
|
---|
254 | get { return this.Node == this.Node.Parent.Subtrees.First(); }
|
---|
255 | }
|
---|
256 |
|
---|
257 | public IEnumerable<NodeInfo> Ancestors() {
|
---|
258 | var p = Parent();
|
---|
259 | while (p != null) {
|
---|
260 | yield return p;
|
---|
261 | p = p.Parent();
|
---|
262 | }
|
---|
263 | }
|
---|
264 |
|
---|
265 | public NodeInfo Parent() {
|
---|
266 | var p = Node.Parent;
|
---|
267 | if (p == null || p.Symbol is StartSymbol)
|
---|
268 | return null;
|
---|
269 | var index = Index + 1;
|
---|
270 | for (int i = p.SubtreeCount - 1; i >= 0; --i) {
|
---|
271 | var subtree = p.GetSubtree(i);
|
---|
272 | if (subtree == Node)
|
---|
273 | break;
|
---|
274 | index += subtree.GetLength();
|
---|
275 | }
|
---|
276 | return Nodes[index];
|
---|
277 | }
|
---|
278 |
|
---|
279 | public NodeInfo Next() {
|
---|
280 | if (this == Nodes.Last())
|
---|
281 | return new NodeInfo(null, int.MaxValue) { Nodes = Nodes };
|
---|
282 | return Nodes[Index + 1];
|
---|
283 | }
|
---|
284 |
|
---|
285 | public NodeInfo NextSibling() {
|
---|
286 | if (Node.Parent == null || Node == Node.Parent.Subtrees.Last())
|
---|
287 | return new NodeInfo(null, -1);
|
---|
288 |
|
---|
289 | var s = Node.Parent.GetSubtree(Node.Parent.IndexOfSubtree(Node) + 1);
|
---|
290 | var nextSibling = Nodes[Index + s.GetLength()];
|
---|
291 | return nextSibling;
|
---|
292 | }
|
---|
293 |
|
---|
294 | public NodeInfo LastSibling() {
|
---|
295 | if (Node.Parent == null)
|
---|
296 | return new NodeInfo(null, -1);
|
---|
297 |
|
---|
298 | var last = Node.Parent.Subtrees.Last();
|
---|
299 | if (this.Node == last)
|
---|
300 | return this;
|
---|
301 |
|
---|
302 | var n = this;
|
---|
303 | while (n.Node != last)
|
---|
304 | n = n.NextSibling();
|
---|
305 | return n;
|
---|
306 | }
|
---|
307 |
|
---|
308 | public NodeInfo Previous() {
|
---|
309 | if (this == Nodes.First())
|
---|
310 | return new NodeInfo(null, -1) { Nodes = Nodes }; // return nil
|
---|
311 |
|
---|
312 | return Nodes[Index - 1];
|
---|
313 | }
|
---|
314 |
|
---|
315 | public NodeInfo PreviousSibling() {
|
---|
316 | if (Node.Parent == null || Node == Node.Parent.Subtrees.First())
|
---|
317 | return new NodeInfo(null, -1) { Nodes = Nodes };
|
---|
318 | var previousSibling = Nodes[Index - Node.GetLength()];
|
---|
319 | return previousSibling;
|
---|
320 | }
|
---|
321 |
|
---|
322 | public NodeInfo LastChild() {
|
---|
323 | var nil = new NodeInfo(null, -1);
|
---|
324 | if (IsLeaf)
|
---|
325 | return nil;
|
---|
326 | return Previous();
|
---|
327 | }
|
---|
328 | }
|
---|
329 | }
|
---|
330 | }
|
---|