Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/TraceCalculator.cs @ 12231

Last change on this file since 12231 was 12231, checked in by bburlacu, 9 years ago

#1772: Changed the way node highlighting is performed (taking into account sampling count relative to current generation). Made NodeWeight field storable in the SymbolicExpressionTreeNode. Updated the statistics counting in the TraceCalculator.

File size: 14.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Linq;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.EvolutionTracking;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31
32namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
33  [Item("TraceCalculator", "Walks a genealogy graph and produces a trace of the specified subtree")]
34  [StorableClass]
35  public class TraceCalculator : Item {
36    private Dictionary<IGenealogyGraphNode<ISymbolicExpressionTree>, TraceData> traceMap;
37    private Dictionary<ISymbolicExpressionTree, List<ISymbolicExpressionTreeNode>> nodeListCache;
38    private HashSet<Tuple<IGenealogyGraphNode<ISymbolicExpressionTree>, IGenealogyGraphNode<ISymbolicExpressionTree>, int>> traceCache;
39    public IGenealogyGraph<ISymbolicExpressionTree> TraceGraph { get; private set; }
40
41    public bool UpdateVertexWeights { get; set; }
42    public bool UpdateSubtreeWeights { get; set; }
43    public bool CacheTraceNodes { get; set; }
44
45    public TraceCalculator() {
46      ResetState();
47    }
48
49    protected TraceCalculator(TraceCalculator original, Cloner cloner)
50      : base(original, cloner) {
51    }
52
53    public override IDeepCloneable Clone(Cloner cloner) {
54      return new TraceCalculator(this, cloner);
55    }
56
57    public void ResetState() {
58      TraceGraph = new GenealogyGraph<ISymbolicExpressionTree>();
59      traceMap = new Dictionary<IGenealogyGraphNode<ISymbolicExpressionTree>, TraceData>();
60      nodeListCache = new Dictionary<ISymbolicExpressionTree, List<ISymbolicExpressionTreeNode>>();
61      traceCache = new HashSet<Tuple<IGenealogyGraphNode<ISymbolicExpressionTree>, IGenealogyGraphNode<ISymbolicExpressionTree>, int>>();
62    }
63
64    public static IGenealogyGraph<ISymbolicExpressionTree> TraceSubtree(IGenealogyGraphNode<ISymbolicExpressionTree> node, int subtreeIndex, bool updateVertexWeights = false, bool updateSubtreeWeights = false, bool cacheTraceNodes = true) {
65      var tc = new TraceCalculator {
66        UpdateVertexWeights = updateSubtreeWeights,
67        UpdateSubtreeWeights = updateSubtreeWeights,
68        CacheTraceNodes = cacheTraceNodes
69      };
70      tc.Trace(node, subtreeIndex);
71      return tc.TraceGraph;
72    }
73
74    public IGenealogyGraph<ISymbolicExpressionTree> Trace(IGenealogyGraphNode<ISymbolicExpressionTree> node, int subtreeIndex, bool resetState = true) {
75      if (resetState) ResetState();
76      TraceRecursive(node, subtreeIndex);
77      return TraceGraph;
78    }
79
80    /// <summary>
81    /// This method starts from a given vertex in the genealogy graph and works its way
82    /// up the ancestry trying to track the structure of the subtree given by subtreeIndex.
83    /// This method will skip genealogy graph nodes that did not have an influence on the
84    /// structure of the tracked subtree.
85    ///
86    /// Only genealogy nodes which did have an influence are added (as copies) to the trace
87    /// and are consequently called 'trace nodes'.
88    ///
89    /// The arcs connecting trace nodes hold information about the locations of the subtrees
90    /// and fragments that have been swapped in the form of a tuple (si, fi, lastSi, lastFi),
91    /// where:
92    /// - si is the subtree index in the current trace node
93    /// - fi is the fragment index in the current trace node
94    /// - lastSi is the subtree index in the previous trace node
95    /// - lastFi is the subtree index in the previous trace node
96    /// </summary>
97    /// <param name="node">The current node in the genealogy graph</param>
98    /// <param name="subtreeIndex">The index of the traced subtree</param>
99    /// <param name="last">The last added node in the trace graph</param>
100    private void TraceRecursive(IGenealogyGraphNode<ISymbolicExpressionTree> node, int subtreeIndex, IGenealogyGraphNode<ISymbolicExpressionTree> last = null) {
101      var g = node;
102      int si = subtreeIndex; // subtree index
103      int fi = 0; // fragment index
104      while (((List<IArc>)((IVertex)g).InArcs).Count > 0) {
105        Debug.Assert(si < g.Data.Length);
106        var inArcs = (List<IArc>)((IVertex)g).InArcs;
107        var fragment = (IFragment<ISymbolicExpressionTreeNode>)((IGenealogyGraphArc)inArcs.Last()).Data;
108        if (fragment == null) {
109          // TODO: think about what the correct behavior should be here (seems good so far)
110          // the node is either an elite node or (in rare cases) no fragment was transferred
111          g = (IGenealogyGraphNode<ISymbolicExpressionTree>)inArcs[0].Source;
112          continue;
113        }
114
115        fi = fragment.Index1; // fragment index
116        int fl = fragment.Root.GetLength(); // fragment length
117        int sl = NodeAt(g.Data, si).GetLength(); // subtree length
118
119        #region trace crossover
120
121        if (inArcs.Count == 2) {
122          var parent0 = (IGenealogyGraphNode<ISymbolicExpressionTree>)inArcs[0].Source;
123          var parent1 = (IGenealogyGraphNode<ISymbolicExpressionTree>)inArcs[1].Source;
124          if (fi == si) {
125            g = parent1;
126            si = fragment.Index2;
127            continue;
128          }
129          if (fi < si) {
130            if (fi + fl > si) {
131              // fragment contains subtree
132              g = parent1;
133              si += fragment.Index2 - fi;
134            } else {
135              // fragment distinct from subtree
136              g = parent0;
137              si += NodeAt(g.Data, fi).GetLength() - fl;
138            }
139            continue;
140          }
141          if (fi > si) {
142            if (fi < si + sl) {
143              // subtree contains fragment => branching point in the fragment graph
144              var n = AddTraceNode(g, si, fi); // current node becomes "last" as we restart tracing from the parent
145              var t0 = new Tuple<IGenealogyGraphNode<ISymbolicExpressionTree>, IGenealogyGraphNode<ISymbolicExpressionTree>, int>(parent0, n, si);
146              if (!(CacheTraceNodes && traceCache.Contains(t0))) {
147                TraceRecursive(parent0, si, n);
148                traceCache.Add(t0);
149              }
150              if (UpdateVertexWeights)
151                n.Weight++;
152              var t1 = new Tuple<IGenealogyGraphNode<ISymbolicExpressionTree>, IGenealogyGraphNode<ISymbolicExpressionTree>, int>(parent1, n, fragment.Index2);
153              if (!(CacheTraceNodes && traceCache.Contains(t1))) {
154                TraceRecursive(parent1, fragment.Index2, n);
155                traceCache.Add(t1);
156              }
157              // gather statistics about sampled individuals and sampled subtrees
158              if (UpdateVertexWeights)
159                n.Weight++;
160              if (UpdateSubtreeWeights) {
161                var arcs = (List<IArc>)((IVertex)n).InArcs; // at this moment n will have been added as a child to the next trace node
162
163                // TODO: try to simplify the code below
164                for (int i = 0; i < arcs.Count; ++i) {
165                  var td = (TraceData)((IArc<IDeepCloneable>)arcs[i]).Data;
166                  var p = (IGenealogyGraphNode<ISymbolicExpressionTree>)arcs[i].Source;
167                  var s = NodeAt(p.Data, td.SubtreeIndex);
168                  if (td.LastFragmentIndex == td.SubtreeIndex && fragment.Root.Difference(s) == null) {
169                    foreach (var ss in s.IterateNodesPrefix()) ss.NodeWeight++; // the node weight will represent the total sample count for a given node
170                    arcs[i].Weight++; // the arc weights (since there are multiple arcs) will sum up to the same count but give more detail
171                    break;
172                  }
173                }
174              }
175              break;
176            } else {
177              // subtree and fragment are distinct.
178              g = parent0;
179              continue;
180            }
181          }
182        }
183        #endregion
184        #region trace mutation
185        // mutation is handled in a simple way: we branch every time there is an overlap between the subtree and the fragment
186        // (since mutation effects can be quite unpredictable: replace branch, change node, shake tree, etc)
187        if (inArcs.Count == 1) {
188          var parent0 = (IGenealogyGraphNode<ISymbolicExpressionTree>)inArcs[0].Source;
189          Debug.Assert(fragment.Index1 == fragment.Index2);
190          // check if the subtree and the fragment overlap => branch out
191          if ((si == fi) || (si < fi && fi < si + sl) || (fi < si && si < fi + fl)) {
192            var n = AddTraceNode(g, si, fi); // current node becomes "last" as we restart tracing from the parent
193            int i = si < fi ? si : fi;
194            var t = new Tuple<IGenealogyGraphNode<ISymbolicExpressionTree>, IGenealogyGraphNode<ISymbolicExpressionTree>, int>(parent0, n, i);
195            if (!(CacheTraceNodes && traceCache.Contains(t))) {
196              TraceRecursive(parent0, i, n);
197              traceCache.Add(t);
198            }
199            if (UpdateVertexWeights)
200              n.Weight++;
201            break;
202          } else {
203            // if they don't overlap, go up
204            g = parent0;
205            if (fi < si)
206              si += NodeAt(g.Data, fi).GetLength() - fl;
207            continue;
208          }
209        }
210        #endregion
211        throw new InvalidOperationException("A node cannot have more than two parents");
212      }
213      // when we are out of the while the last vertex must be connected with the current one
214      // if there is no last vertex, it means the tracing reached the top of the genealogy graph
215      var current = AddTraceNode(g, si, fi);
216      if (last != null)
217        ConnectLast(current, last, si, fi);
218    }
219
220    /// <summary>
221    /// Get the trace node from the trace graph which corresponds to node g from the genealogy graph.
222    /// If the trace graph does not contain such a node, one is created by performing a shallow copy of g, then inserted into the trace graph.
223    /// </summary>
224    /// <param name="g">The genealogy graph node</param>
225    /// <param name="si">The subtree index</param>
226    /// <param name="fi">The fragment index</param>
227    /// <returns></returns>
228    private IGenealogyGraphNode<ISymbolicExpressionTree> AddTraceNode(IGenealogyGraphNode<ISymbolicExpressionTree> g, int si, int fi) {
229      var n = TraceGraph.GetByContent(g.Data);
230      if (n == null) {
231        n = g.Copy();
232        TraceGraph.AddVertex(n);
233        Debug.Assert(!traceMap.ContainsKey(n));
234        traceMap[n] = new TraceData(si, fi, -1, -1); // only the first two fields are needed
235      }
236      return n;
237    }
238
239    // caching node lists brings ~2.5-2.7x speed improvement (since graph nodes are visited multiple times)
240    // this caching will be even more effective with larger tree sizes
241    private ISymbolicExpressionTreeNode NodeAt(ISymbolicExpressionTree tree, int index) {
242      List<ISymbolicExpressionTreeNode> list;
243      nodeListCache.TryGetValue(tree, out list);
244      if (list == null) {
245        list = tree.IterateNodesPrefix().ToList();
246        nodeListCache[tree] = list;
247      }
248      return list[index];
249    }
250
251    /// <summary>
252    /// Connect the current node of the trace graph with the node that was previously added (@last). The current node of the trace graph is determined by the content
253    /// of the genealogy graph node @g. 
254    /// </summary>
255    /// <param name="current">The current node in the genealogy graph</param>
256    /// <param name="last">The last added node in the trace graph</param>   
257    /// <param name="si">The index of the traced subtree</param>
258    /// <param name="fi">The index of the fragment</param>
259    private void ConnectLast(IGenealogyGraphNode<ISymbolicExpressionTree> current,
260      IGenealogyGraphNode<ISymbolicExpressionTree> last, int si, int fi) {
261      var lastTraceData = traceMap[last];
262      int lastSi = lastTraceData.SubtreeIndex; // last subtree index (index of the traced subtree in the previous trace node)
263      int lastFi = lastTraceData.FragmentIndex; // last fragment index (index of the fragment in the previous trace node)
264      var td = new TraceData(si, fi, lastSi, lastFi); // trace data
265      // using the inArcs seems to be slightly more efficient than using the outArcs
266      // TODO: more testing
267      var inArcs = (List<IArc>)((IVertex)last).InArcs;
268      var arc = inArcs.FirstOrDefault(a => a.Source == current && ((IArc<IDeepCloneable>)a).Data.Equals(td));
269      if (arc == null) {
270        arc = new GenealogyGraphArc(current, last) { Data = td };
271        TraceGraph.AddArc(arc);
272      }
273    }
274  }
275
276  public class TraceData : Tuple<int, int, int, int>, IDeepCloneable {
277    public TraceData(int currentSubtreeIndex, int currentFragmentIndex, int lastSubtreeIndex, int lastFragmentIndex)
278      : base(currentSubtreeIndex, currentFragmentIndex, lastSubtreeIndex, lastFragmentIndex) {
279    }
280
281    public int SubtreeIndex { get { return Item1; } }
282    public int FragmentIndex { get { return Item2; } }
283    public int LastSubtreeIndex { get { return Item3; } }
284    public int LastFragmentIndex { get { return Item4; } }
285    public object Clone() {
286      return new TraceData(SubtreeIndex, FragmentIndex, LastSubtreeIndex, LastFragmentIndex);
287    }
288
289    public IDeepCloneable Clone(Cloner cloner) {
290      return cloner.Clone(this);
291    }
292  }
293
294  internal static class Util {
295    // shallow node copy (does not clone the data or the arcs)
296    #region some helper methods for shortening the tracing code
297    public static IGenealogyGraphNode<ISymbolicExpressionTree> Copy(this IGenealogyGraphNode<ISymbolicExpressionTree> node) {
298      return new GenealogyGraphNode<ISymbolicExpressionTree>(node.Data) { Rank = node.Rank, Quality = node.Quality };
299    }
300    #endregion
301  }
302}
Note: See TracBrowser for help on using the repository browser.