Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/TreeMatching/QueryMatch.cs @ 12988

Last change on this file since 12988 was 12988, checked in by bburlacu, 9 years ago

#1772: Performance improvement changes

  • QueryMatch.cs: eliminate unnecessary ToList() call and expensive GetBranchLevel calls
  • Diversification: eliminated creation of shallow copies of the individual subscopes as it was either too slow (due to events being registered/deregistered when variables are added to the scope) or too leaky (if attempting to clear the scopes without clearing the variables then the code is leaking EventHandlers)
  • Aggregated diversification statistics separately with the help of some parameters set up in the SchemaCreator and SchemaEvaluator
  • Made code in the UpdateEstimatedValuesOperator perform exactly as in the evaluator (updating quality and estimated values)
  • Removed no longer needed SchemaCleanupOperator
  • Do not evaluate intermediate vertices in the genealogy analyzer if the TrimOlderGenerations flag is activated

New functionality:

  • parameter to control the fraction of the population to be considered by the diversification strategy
  • parameter to control whether individuals may be matched by any schema and mutated only once (exclusive matching)
  • parameter to control whether linear scaling should be applied to the estimated values used for the calculation of phenotypic similarity (default: yes)
File size: 8.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
25
26namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
27  // this class implements the decision version of the tree pattern query matching algorithm
28  // by M. Götz, C. Koch and W. Martens in http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.182.5440
29  public class QueryMatch {
30    public ISymbolicExpressionTreeNodeEqualityComparer Comparer { get; }
31
32    private QueryMatch() { }
33
34    // whether matching nodes should also have matching parents
35    // in theory, this restricts the matching so that parent-child
36    // pairs in the query tree are matched by parent-child pairs in
37    // the data tree (and not ancestor-descendant pairs)
38    public bool MatchParents { get; set; }
39
40    public QueryMatch(ISymbolicExpressionTreeNodeEqualityComparer comparer) {
41      this.Comparer = comparer;
42    }
43
44    internal bool Match(List<NodeInfo> data, List<NodeInfo> query) {
45      var dRoot = data.Last();
46      var qRoot = query.Last();
47
48      var result = Tmatch(dRoot, query.First(), qRoot);
49      return result == qRoot;
50    }
51
52    public bool Match(ISymbolicExpressionTree data, ISymbolicExpressionTree query) {
53      return Match(data.Root.GetSubtree(0).GetSubtree(0), query.Root.GetSubtree(0).GetSubtree(0));
54    }
55
56    public bool Match(ISymbolicExpressionTreeNode data, ISymbolicExpressionTreeNode query) {
57      if (!Comparer.Equals(data, query))
58        return false;
59
60      var dNodes = InitializePostOrder(data);
61      var qNodes = InitializePostOrder(query);
62
63      var dRoot = dNodes.Last();
64      var qRoot = qNodes.Last();
65      var result = Tmatch(dRoot, qNodes.First(), qRoot);
66      return result == qRoot;
67    }
68
69    public IEnumerable<ISymbolicExpressionTree> GetMatchingTrees(IEnumerable<ISymbolicExpressionTree> data, ISymbolicExpressionTree query) {
70      var qRoot = query.Root.GetSubtree(0).GetSubtree(0);
71      var filtered = data.Where(x => x.Length >= query.Length && Comparer.Equals(x.Root.GetSubtree(0).GetSubtree(0), qRoot));
72      var qNodes = InitializePostOrder(query.Root.GetSubtree(0).GetSubtree(0));
73      return from d in filtered let dNodes = InitializePostOrder(d.Root.GetSubtree(0).GetSubtree(0)) where Match(dNodes, qNodes) select d;
74    }
75
76    private bool AreMatching(NodeInfo a, NodeInfo b) {
77      // force the nodes to be on the same level
78      if (a.Level != b.Level)
79        return false;
80
81      bool equals = Comparer.Equals(a.Node, b.Node);
82
83      if (equals && MatchParents) {
84        var pa = a.Parent;
85        var pb = b.Parent;
86        if (pa == null && pb == null)
87          return true;
88        if (pa != null && pb != null)
89          return pa.Level == pb.Level && Comparer.Equals(pa.Node, pb.Node);
90        return false;
91      }
92      return equals;
93    }
94
95    private NodeInfo Tmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil) {
96      NodeInfo qBest = d.IsLeaf ? qFrom.Previous : Hmatch(d.LastChild, qFrom, qUntil);
97      var next = qBest.Next;
98      if (next.Index <= qUntil.Index && AreMatching(d, next)) {
99        qBest = next;
100        next = qBest.Next;
101        var lastSibling = qBest.LastSibling;
102        return next.Index <= lastSibling.Index ? Tmatch(d, next, lastSibling) : qBest;
103      }
104      return qBest;
105    }
106
107    private NodeInfo Hmatch(NodeInfo d, NodeInfo qFrom, NodeInfo qUntil) {
108      if (d.IsFirstSibling)
109        return Tmatch(d, qFrom, qUntil);
110
111      var qHedge = Hmatch(d.PreviousSibling, qFrom, qUntil);
112      var qTree = Tmatch(d, qFrom, qUntil);
113
114      for (;;) {
115        if (qHedge == qTree)
116          return qHedge;
117        if (qTree.Index < qHedge.Index) {
118          var rtop = Rtop(qTree.Next, qHedge);
119          while (rtop.Index < int.MaxValue && qHedge.Index < rtop.LastSibling.Index) {
120            qTree = Tmatch(d, rtop.Next, rtop.LastSibling);
121            rtop = Rtop(qTree.Next, qHedge);
122          }
123          if (qTree.Index <= qHedge.Index)
124            return qHedge;
125        } else {
126          var rtop = Rtop(qHedge.Next, qTree);
127          while (rtop.Index < int.MaxValue && qTree.Index < rtop.LastSibling.Index) {
128            qHedge = Hmatch(d.PreviousSibling, rtop.Next, rtop.LastSibling);
129            rtop = Rtop(qHedge.Next, qTree);
130          }
131          if (qHedge.Index <= qTree.Index)
132            return qTree;
133        }
134      }
135    }
136
137    // returns the rightmost node from the topmost nodes in the interval [hFrom, hUntil]
138    private NodeInfo Rtop(NodeInfo hFrom, NodeInfo hUntil) {
139      if (hFrom == hUntil)
140        return hUntil;
141      if (hFrom.Index > hUntil.Index)
142        return new NodeInfo { Node = null, Index = int.MaxValue };
143      // let u be the highest ancestor of hUntil that has a previous sibling s such that s >= hFrom
144      // if no such u exists, then Rtop(hFrom, hUntil) = hUntil. Otherwise, rtop(hFrom, hUntil) = s
145      NodeInfo rtop = null;
146      List<NodeInfo> ancestors = hUntil.Ancestors.ToList();
147      // ancestors list is ordered in decreasing depth therefore we start from the end
148      for (int i = ancestors.Count - 1; i >= 0; --i) {
149        if (ancestors[i].Parent == null || ancestors[i].IsFirstSibling)
150          continue;
151        var s = ancestors[i].PreviousSibling;
152        if (s != null && s.Index >= hFrom.Index) {
153          rtop = s;
154          break;
155        }
156      }
157      return rtop ?? hUntil;
158    }
159
160    internal static List<NodeInfo> InitializePostOrder(ISymbolicExpressionTreeNode node) {
161      var nodes = node.IterateNodesPostfix().Select((x, i) => new NodeInfo { Node = x, Index = i, Level = 0 }).ToList();
162      var map = nodes.ToDictionary(x => x.Node, x => x);
163
164      var inf = new NodeInfo { Node = null, Index = int.MaxValue, Previous = nodes.Last() };
165      var nil = new NodeInfo { Node = null, Index = -1, Next = nodes.First() };
166
167      for (int i = nodes.Count - 1; i >= 0; --i) {
168        var n = nodes[i];
169        n.Parent = n.Node.Parent != null && map.ContainsKey(n.Node.Parent) ? map[n.Node.Parent] : null;
170        n.Level = n.Parent?.Level + 1 ?? -1;
171        n.Next = n == nodes.Last() ? inf : nodes[n.Index + 1];
172        n.Previous = n == nodes.First() ? nil : nodes[n.Index - 1];
173        if (n.Parent == null) {
174          n.PreviousSibling = n.NextSibling = null;
175          n.LastSibling = nil;
176        } else {
177          var parent = n.Parent.Node;
178          int si = parent.IndexOfSubtree(n.Node);
179          n.PreviousSibling = n.IsFirstSibling ? null : map[parent.GetSubtree(si - 1)];
180          n.NextSibling = n.IsLastSibling ? null : map[parent.GetSubtree(si + 1)];
181          n.LastSibling = map[parent.Subtrees.Last()];
182        }
183        n.LastChild = n.IsLeaf ? null : n.Previous;
184      }
185      return nodes;
186    }
187  }
188
189  internal class NodeInfo {
190    public ISymbolicExpressionTreeNode Node { get; set; }
191    public int Index { get; set; }
192    public int Level { get; set; }
193    public NodeInfo Parent { get; set; }
194    public NodeInfo Previous { get; set; }
195    public NodeInfo PreviousSibling { get; set; }
196    public NodeInfo Next { get; set; }
197    public NodeInfo NextSibling { get; set; }
198    public NodeInfo LastSibling { get; set; }
199    public NodeInfo LastChild { get; set; }
200
201    public bool IsLeaf {
202      get { return Node.SubtreeCount == 0; }
203    }
204
205    public bool IsFirstSibling {
206      get { return Node.Parent != null && Node == Node.Parent.Subtrees.First(); }
207    }
208
209    public bool IsLastSibling {
210      get { return Node.Parent != null && Node == Node.Parent.Subtrees.Last(); }
211    }
212
213    public IEnumerable<NodeInfo> Ancestors {
214      get {
215        var p = Parent;
216        while (p != null) {
217          yield return p;
218          p = p.Parent;
219        }
220      }
221    }
222  }
223}
224
Note: See TracBrowser for help on using the repository browser.