Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/TraceCalculator.cs @ 14821

Last change on this file since 14821 was 14575, checked in by bburlacu, 8 years ago

#1772: Minor refactoring in the TraceCalculator (use Tuple.Create factory method to make the code more readable).

File size: 14.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Linq;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.EvolutionTracking;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31
32namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
33  [Item("TraceCalculator", "Walks a genealogy graph and produces a trace of the specified subtree")]
34  [StorableClass]
35  public class TraceCalculator : Item {
36    private Dictionary<ISymbolicExpressionTree, List<ISymbolicExpressionTreeNode>> nodeListCache;
37    // the trace cache consits of tuples of the current and last vertices, the subtree index to be traced in the current vertex, and the last subtree and fragment indexes
38    private HashSet<Tuple<IGenealogyGraphNode<ISymbolicExpressionTree>, int, IGenealogyGraphNode<ISymbolicExpressionTree>, int, int>> traceCache;
39    public IGenealogyGraph<ISymbolicExpressionTree> TraceGraph { get; private set; }
40
41    public bool UpdateVertexWeights { get; set; }
42    public bool UpdateSubtreeWeights { get; set; }
43    public bool CacheTraceNodes { get; set; }
44
45    public int TraceCacheHits { get; set; }
46
47    public TraceCalculator() {
48      ResetState();
49    }
50
51    [StorableConstructor]
52    protected TraceCalculator(bool deserializing) : base(deserializing) { }
53
54    protected TraceCalculator(TraceCalculator original, Cloner cloner)
55      : base(original, cloner) {
56    }
57
58    public override IDeepCloneable Clone(Cloner cloner) {
59      return new TraceCalculator(this, cloner);
60    }
61
62    public void ResetState() {
63      TraceGraph = new GenealogyGraph<ISymbolicExpressionTree>();
64      nodeListCache = new Dictionary<ISymbolicExpressionTree, List<ISymbolicExpressionTreeNode>>();
65      traceCache = new HashSet<Tuple<IGenealogyGraphNode<ISymbolicExpressionTree>, int, IGenealogyGraphNode<ISymbolicExpressionTree>, int, int>>();
66    }
67
68    public static IGenealogyGraph<ISymbolicExpressionTree> TraceSubtree(IGenealogyGraphNode<ISymbolicExpressionTree> node, int subtreeIndex, bool updateVertexWeights = false, bool updateSubtreeWeights = false, bool cacheTraceNodes = true) {
69      var tc = new TraceCalculator {
70        UpdateVertexWeights = updateSubtreeWeights,
71        UpdateSubtreeWeights = updateSubtreeWeights,
72        CacheTraceNodes = cacheTraceNodes
73      };
74      tc.Trace(node, subtreeIndex);
75      return tc.TraceGraph;
76    }
77
78    public IGenealogyGraph<ISymbolicExpressionTree> Trace(IGenealogyGraphNode<ISymbolicExpressionTree> node, int subtreeIndex, bool resetState = true) {
79      if (resetState) ResetState();
80      TraceRecursive(node, subtreeIndex);
81      return TraceGraph;
82    }
83
84    /// <summary>
85    /// This method starts from a given vertex in the genealogy graph and works its way
86    /// up the ancestry trying to track the structure of the subtree given by subtreeIndex.
87    /// This method will skip genealogy graph nodes that did not have an influence on the
88    /// structure of the tracked subtree.
89    ///
90    /// Only genealogy nodes which did have an influence are added (as copies) to the trace
91    /// and are consequently called 'trace nodes'.
92    ///
93    /// The arcs connecting trace nodes hold information about the locations of the subtrees
94    /// and fragments that have been swapped in the form of a tuple (si, fi, lastSi, lastFi),
95    /// where:
96    /// - si is the subtree index in the current trace node
97    /// - fi is the fragment index in the current trace node
98    /// - lastSi is the subtree index in the previous trace node
99    /// - lastFi is the subtree index in the previous trace node
100    /// </summary>
101    /// <param name="node">The current node in the genealogy graph</param>
102    /// <param name="subtreeIndex">The index of the traced subtree</param>
103    /// <param name="last">The last added node in the trace graph</param>
104    /// <param name="lastSi">The subtree index in the last added individual</param>
105    /// <param name="lastFi">The fragment index in the last added individual</param>
106    private void TraceRecursive(IGenealogyGraphNode<ISymbolicExpressionTree> node, int subtreeIndex, IGenealogyGraphNode<ISymbolicExpressionTree> last = null, int lastSi = -1, int lastFi = -1) {
107      var g = node;
108      var si = subtreeIndex; // subtree index
109      var fi = 0; // fragment index
110      while (((List<IArc>)((IVertex)g).InArcs).Count > 0) {
111        if (!(si < g.Data.Length)) throw new ArgumentOutOfRangeException("The subtree index exceeds the size of the tree.");
112        var inArcs = (List<IArc>)((IVertex)g).InArcs;
113        var fragment = (IFragment<ISymbolicExpressionTreeNode>)((IGenealogyGraphArc)inArcs.Last()).Data;
114        if (fragment == null) {
115          // TODO: think about what the correct behavior should be here (seems good so far)
116          // the node is either an elite node or (in rare cases) no fragment was transferred
117          g = (IGenealogyGraphNode<ISymbolicExpressionTree>)inArcs[0].Source;
118          continue;
119        }
120
121        fi = fragment.Index1; // fragment index
122        int fl = fragment.Root.GetLength(); // fragment length
123        int sl = NodeAt(g.Data, si).GetLength(); // subtree length
124
125        #region trace crossover
126        if (inArcs.Count == 2) {
127          var parent0 = (IGenealogyGraphNode<ISymbolicExpressionTree>)inArcs[0].Source;
128          var parent1 = (IGenealogyGraphNode<ISymbolicExpressionTree>)inArcs[1].Source;
129          if (fi == si) {
130            g = parent1;
131            si = fragment.Index2;
132            continue;
133          }
134          if (fi < si) {
135            if (fi + fl > si) {
136              // fragment contains subtree
137              g = parent1;
138              si += fragment.Index2 - fi;
139            } else {
140              // fragment distinct from subtree
141              g = parent0;
142              si += NodeAt(g.Data, fi).GetLength() - fl;
143            }
144            continue;
145          }
146          if (fi > si) {
147            if (fi < si + sl) {
148              // subtree contains fragment => branching point in the fragment graph
149              var n = AddTraceNode(g); // current node becomes "last" as we restart tracing from the parent
150              if (CacheTraceNodes) {
151                var t0 = Tuple.Create(parent0, si, n, si, fi);
152                var t1 = Tuple.Create(parent1, fragment.Index2, n, si, fi);
153                // trace subtree
154                if (traceCache.Add(t0))
155                  TraceRecursive(parent0, si, n, si, fi);
156                else
157                  TraceCacheHits++;
158                // trace fragment
159                if (traceCache.Add(t1))
160                  TraceRecursive(parent1, fragment.Index2, n, si, fi);
161                else
162                  TraceCacheHits++;
163              } else {
164                TraceRecursive(parent0, si, n, si, fi);
165                TraceRecursive(parent1, fragment.Index2, n, si, fi);
166              }
167              break;
168            } else {
169              // subtree and fragment are distinct.
170              g = parent0;
171              continue;
172            }
173          }
174        }
175        #endregion
176        #region trace mutation
177        // mutation is handled in a simple way: we branch every time there is an overlap between the subtree and the fragment
178        // (since mutation effects can be quite unpredictable: replace branch, change node, shake tree, etc)
179        if (inArcs.Count == 1) {
180          var parent0 = (IGenealogyGraphNode<ISymbolicExpressionTree>)inArcs[0].Source;
181          Debug.Assert(fragment.Index1 == fragment.Index2);
182          // check if the subtree and the fragment overlap => branch out
183          // optionally we could ignore the case when the fragment contains the subtree:
184          // ((si == fi) || fi < si && si < fi + fl)
185          if ((si == fi) || (si < fi && fi < si + sl) || (fi < si && si < fi + fl)) {
186            var n = AddTraceNode(g); // current node becomes "last" as we restart tracing from the parent
187            int i = si < fi ? si : fi;
188            var t = Tuple.Create(parent0, i, n, si, fi);
189            if (CacheTraceNodes) {
190              if (traceCache.Add(t))
191                TraceRecursive(parent0, i, n, si, fi);
192              else
193                TraceCacheHits++;
194            } else {
195              TraceRecursive(parent0, i, n, si, fi);
196            }
197            break;
198          } else {
199            // if they don't overlap, go up
200            g = parent0;
201            if (fi < si)
202              si += NodeAt(g.Data, fi).GetLength() - fl;
203            continue;
204          }
205
206        }
207        #endregion
208        throw new InvalidOperationException("A node cannot have more than two parents");
209      }
210      // when we are out of the while the last vertex must be connected with the current one
211      // if there is no last vertex, it means the tracing reached the top of the genealogy graph
212      if (last != null) {
213        var current = AddTraceNode(g);
214        if (current.Rank.IsAlmost(0)) fi = -1; // if current is part of the initial population there will be no fragment
215        var td = new TraceData(si, fi, lastSi, lastFi);
216#if DEBUG
217        var currentLength = current.Data.Length;
218        var lastLength = last.Data.Length;
219
220        if (!(si < currentLength))
221          throw new ArgumentOutOfRangeException(string.Format("Subtree index {0} exceeds tree length ({1}", si, currentLength));
222
223        if (!(fi < currentLength))
224          throw new ArgumentOutOfRangeException(string.Format("Fragment index {0} exceeds tree length ({1}", fi, currentLength));
225
226        if (!(lastSi < lastLength))
227          throw new ArgumentOutOfRangeException(string.Format("Last subtree index {0} exceeds tree length ({1}", lastSi, lastLength));
228
229        if (!(lastFi < lastLength))
230          throw new ArgumentOutOfRangeException(string.Format("Last fragment index {0} exceeds tree length ({1}", lastFi, lastLength));
231#endif
232        ConnectLast(current, last, td);
233      }
234    }
235
236    /// <summary>
237    /// Get the trace node from the trace graph which corresponds to node g from the genealogy graph.
238    /// If the trace graph does not contain such a node, one is created by performing a shallow copy of g, then inserted into the trace graph.
239    /// </summary>
240    /// <param name="g">The genealogy graph node</param>
241    /// <returns></returns>
242    private IGenealogyGraphNode<ISymbolicExpressionTree> AddTraceNode(IGenealogyGraphNode<ISymbolicExpressionTree> g) {
243      var n = TraceGraph.GetByContent(g.Data);
244      if (n == null) {
245        n = g.Copy();
246        TraceGraph.AddVertex(n);
247      }
248      return n;
249    }
250
251    // caching node lists brings ~2.5-2.7x speed improvement (since graph nodes are visited multiple times)
252    // this caching will be even more effective with larger tree sizes
253    private ISymbolicExpressionTreeNode NodeAt(ISymbolicExpressionTree tree, int index) {
254      List<ISymbolicExpressionTreeNode> list;
255      nodeListCache.TryGetValue(tree, out list);
256      if (list == null) {
257        list = tree.IterateNodesPrefix().ToList();
258        nodeListCache[tree] = list;
259      }
260      return list[index];
261    }
262
263    /// <summary>
264    /// Connect the current node of the trace graph with the node that was previously added (@last). The current node of the trace graph is determined by the content
265    /// of the genealogy graph node @g. 
266    /// </summary>
267    /// <param name="current">The current node in the genealogy graph</param>
268    /// <param name="last">The last added node in the trace graph</param>
269    /// <param name="td">The trace data specifying the preorder indices of the subtree and fragment in the @current and @last vertices</param>   
270    private void ConnectLast(IGenealogyGraphNode<ISymbolicExpressionTree> current, IGenealogyGraphNode<ISymbolicExpressionTree> last, TraceData td) {
271      // TODO: more testing
272      var inArcs = (List<IArc>)((IVertex)last).InArcs; // using the InArcs seems to be slightly more efficient than using the OutArcs
273      var arc = inArcs.FirstOrDefault(a => a.Source == current && ((IArc<IDeepCloneable>)a).Data.Equals(td));
274      if (arc == null) {
275        arc = new GenealogyGraphArc(current, last) { Data = td };
276        TraceGraph.AddArc(arc);
277      }
278      if (UpdateVertexWeights) {
279        arc.Weight++;
280        current.Weight++;
281      }
282      if (UpdateSubtreeWeights) {
283        var subtree = NodeAt(current.Data, td.SubtreeIndex);
284        foreach (var s in subtree.IterateNodesPrefix())
285          s.NodeWeight++;
286      }
287    }
288  }
289
290  public class TraceData : Tuple<int, int, int, int>, IDeepCloneable {
291    public TraceData(int currentSubtreeIndex, int currentFragmentIndex, int lastSubtreeIndex, int lastFragmentIndex)
292      : base(currentSubtreeIndex, currentFragmentIndex, lastSubtreeIndex, lastFragmentIndex) {
293    }
294
295    public int SubtreeIndex { get { return Item1; } }
296    public int FragmentIndex { get { return Item2; } }
297    public int LastSubtreeIndex { get { return Item3; } }
298    public int LastFragmentIndex { get { return Item4; } }
299    public object Clone() {
300      return new TraceData(SubtreeIndex, FragmentIndex, LastSubtreeIndex, LastFragmentIndex);
301    }
302
303    protected TraceData(TraceData original, Cloner cloner) :
304      base(original.SubtreeIndex, original.FragmentIndex, original.LastFragmentIndex, original.LastFragmentIndex) {
305    }
306
307    public IDeepCloneable Clone(Cloner cloner) {
308      return cloner.Clone(this);
309    }
310  }
311
312  internal static class Util {
313    // shallow node copy (does not clone the data or the arcs)
314    #region some helper methods for shortening the tracing code
315    public static IGenealogyGraphNode<ISymbolicExpressionTree> Copy(this IGenealogyGraphNode<ISymbolicExpressionTree> node) {
316      return new GenealogyGraphNode<ISymbolicExpressionTree>(node.Data) { Rank = node.Rank, Quality = node.Quality };
317    }
318    #endregion
319  }
320}
Note: See TracBrowser for help on using the repository browser.