#region License Information /* HeuristicLab * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.EvolutionTracking; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { [Item("TraceCalculator", "Walks a genealogy graph and produces a trace of the specified subtree")] [StorableClass] public class TraceCalculator : Item { private Dictionary, TraceData> traceMap; private Dictionary> nodeListCache; private HashSet, IGenealogyGraphNode, int>> traceCache; public IGenealogyGraph TraceGraph { get; private set; } public bool UpdateVertexWeights { get; set; } public bool UpdateSubtreeWeights { get; set; } public bool CacheTraceNodes { get; set; } public TraceCalculator() { ResetState(); } protected TraceCalculator(TraceCalculator original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new TraceCalculator(this, cloner); } public void ResetState() { TraceGraph = new GenealogyGraph(); traceMap = new Dictionary, TraceData>(); nodeListCache = new Dictionary>(); traceCache = new HashSet, IGenealogyGraphNode, int>>(); } public static IGenealogyGraph TraceSubtree(IGenealogyGraphNode node, int subtreeIndex, bool updateVertexWeights = false, bool updateSubtreeWeights = false, bool cacheTraceNodes = true) { var tc = new TraceCalculator { UpdateVertexWeights = updateSubtreeWeights, UpdateSubtreeWeights = updateSubtreeWeights, CacheTraceNodes = cacheTraceNodes }; tc.Trace(node, subtreeIndex); return tc.TraceGraph; } public IGenealogyGraph Trace(IGenealogyGraphNode node, int subtreeIndex, bool resetState = true) { if (resetState) ResetState(); TraceRecursive(node, subtreeIndex); return TraceGraph; } /// /// This method starts from a given vertex in the genealogy graph and works its way /// up the ancestry trying to track the structure of the subtree given by subtreeIndex. /// This method will skip genealogy graph nodes that did not have an influence on the /// structure of the tracked subtree. /// /// Only genealogy nodes which did have an influence are added (as copies) to the trace /// and are consequently called 'trace nodes'. /// /// The arcs connecting trace nodes hold information about the locations of the subtrees /// and fragments that have been swapped in the form of a tuple (si, fi, lastSi, lastFi), /// where: /// - si is the subtree index in the current trace node /// - fi is the fragment index in the current trace node /// - lastSi is the subtree index in the previous trace node /// - lastFi is the subtree index in the previous trace node /// /// The current node in the genealogy graph /// The index of the traced subtree /// The last added node in the trace graph private void TraceRecursive(IGenealogyGraphNode node, int subtreeIndex, IGenealogyGraphNode last = null) { var g = node; int si = subtreeIndex; // subtree index int fi = 0; // fragment index while (((List)((IVertex)g).InArcs).Count > 0) { Debug.Assert(si < g.Data.Length); var inArcs = (List)((IVertex)g).InArcs; var fragment = (IFragment)((IGenealogyGraphArc)inArcs.Last()).Data; if (fragment == null) { // TODO: think about what the correct behavior should be here (seems good so far) // the node is either an elite node or (in rare cases) no fragment was transferred g = (IGenealogyGraphNode)inArcs[0].Source; continue; } fi = fragment.Index1; // fragment index int fl = fragment.Root.GetLength(); // fragment length int sl = NodeAt(g.Data, si).GetLength(); // subtree length #region trace crossover if (inArcs.Count == 2) { var parent0 = (IGenealogyGraphNode)inArcs[0].Source; var parent1 = (IGenealogyGraphNode)inArcs[1].Source; if (fi == si) { g = parent1; si = fragment.Index2; continue; } if (fi < si) { if (fi + fl > si) { // fragment contains subtree g = parent1; si += fragment.Index2 - fi; } else { // fragment distinct from subtree g = parent0; si += NodeAt(g.Data, fi).GetLength() - fl; } continue; } if (fi > si) { if (fi < si + sl) { // subtree contains fragment => branching point in the fragment graph var n = AddTraceNode(g, si, fi); // current node becomes "last" as we restart tracing from the parent var t0 = new Tuple, IGenealogyGraphNode, int>(parent0, n, si); if (!(CacheTraceNodes && traceCache.Contains(t0))) { TraceRecursive(parent0, si, n); traceCache.Add(t0); } if (UpdateVertexWeights) n.Weight++; var t1 = new Tuple, IGenealogyGraphNode, int>(parent1, n, fragment.Index2); if (!(CacheTraceNodes && traceCache.Contains(t1))) { TraceRecursive(parent1, fragment.Index2, n); traceCache.Add(t1); } // gather statistics about sampled individuals and sampled subtrees if (UpdateVertexWeights) n.Weight++; if (UpdateSubtreeWeights) { var arcs = (List)((IVertex)n).InArcs; // at this moment n will have been added as a child to the next trace node // TODO: try to simplify the code below for (int i = 0; i < arcs.Count; ++i) { var td = (TraceData)((IArc)arcs[i]).Data; var p = (IGenealogyGraphNode)arcs[i].Source; var s = NodeAt(p.Data, td.SubtreeIndex); if (td.LastFragmentIndex == td.SubtreeIndex && fragment.Root.Difference(s) == null) { foreach (var ss in s.IterateNodesPrefix()) ss.NodeWeight++; // the node weight will represent the total sample count for a given node arcs[i].Weight++; // the arc weights (since there are multiple arcs) will sum up to the same count but give more detail break; } } } break; } else { // subtree and fragment are distinct. g = parent0; continue; } } } #endregion #region trace mutation // mutation is handled in a simple way: we branch every time there is an overlap between the subtree and the fragment // (since mutation effects can be quite unpredictable: replace branch, change node, shake tree, etc) if (inArcs.Count == 1) { var parent0 = (IGenealogyGraphNode)inArcs[0].Source; Debug.Assert(fragment.Index1 == fragment.Index2); // check if the subtree and the fragment overlap => branch out if ((si == fi) || (si < fi && fi < si + sl) || (fi < si && si < fi + fl)) { var n = AddTraceNode(g, si, fi); // current node becomes "last" as we restart tracing from the parent int i = si < fi ? si : fi; var t = new Tuple, IGenealogyGraphNode, int>(parent0, n, i); if (!(CacheTraceNodes && traceCache.Contains(t))) { TraceRecursive(parent0, i, n); traceCache.Add(t); } if (UpdateVertexWeights) n.Weight++; break; } else { // if they don't overlap, go up g = parent0; if (fi < si) si += NodeAt(g.Data, fi).GetLength() - fl; continue; } } #endregion throw new InvalidOperationException("A node cannot have more than two parents"); } // when we are out of the while the last vertex must be connected with the current one // if there is no last vertex, it means the tracing reached the top of the genealogy graph var current = AddTraceNode(g, si, fi); if (last != null) ConnectLast(current, last, si, fi); } /// /// Get the trace node from the trace graph which corresponds to node g from the genealogy graph. /// If the trace graph does not contain such a node, one is created by performing a shallow copy of g, then inserted into the trace graph. /// /// The genealogy graph node /// The subtree index /// The fragment index /// private IGenealogyGraphNode AddTraceNode(IGenealogyGraphNode g, int si, int fi) { var n = TraceGraph.GetByContent(g.Data); if (n == null) { n = g.Copy(); TraceGraph.AddVertex(n); Debug.Assert(!traceMap.ContainsKey(n)); traceMap[n] = new TraceData(si, fi, -1, -1); // only the first two fields are needed } return n; } // caching node lists brings ~2.5-2.7x speed improvement (since graph nodes are visited multiple times) // this caching will be even more effective with larger tree sizes private ISymbolicExpressionTreeNode NodeAt(ISymbolicExpressionTree tree, int index) { List list; nodeListCache.TryGetValue(tree, out list); if (list == null) { list = tree.IterateNodesPrefix().ToList(); nodeListCache[tree] = list; } return list[index]; } /// /// Connect the current node of the trace graph with the node that was previously added (@last). The current node of the trace graph is determined by the content /// of the genealogy graph node @g. /// /// The current node in the genealogy graph /// The last added node in the trace graph /// The index of the traced subtree /// The index of the fragment private void ConnectLast(IGenealogyGraphNode current, IGenealogyGraphNode last, int si, int fi) { var lastTraceData = traceMap[last]; int lastSi = lastTraceData.SubtreeIndex; // last subtree index (index of the traced subtree in the previous trace node) int lastFi = lastTraceData.FragmentIndex; // last fragment index (index of the fragment in the previous trace node) var td = new TraceData(si, fi, lastSi, lastFi); // trace data // using the inArcs seems to be slightly more efficient than using the outArcs // TODO: more testing var inArcs = (List)((IVertex)last).InArcs; var arc = inArcs.FirstOrDefault(a => a.Source == current && ((IArc)a).Data.Equals(td)); if (arc == null) { arc = new GenealogyGraphArc(current, last) { Data = td }; TraceGraph.AddArc(arc); } } } public class TraceData : Tuple, IDeepCloneable { public TraceData(int currentSubtreeIndex, int currentFragmentIndex, int lastSubtreeIndex, int lastFragmentIndex) : base(currentSubtreeIndex, currentFragmentIndex, lastSubtreeIndex, lastFragmentIndex) { } public int SubtreeIndex { get { return Item1; } } public int FragmentIndex { get { return Item2; } } public int LastSubtreeIndex { get { return Item3; } } public int LastFragmentIndex { get { return Item4; } } public object Clone() { return new TraceData(SubtreeIndex, FragmentIndex, LastSubtreeIndex, LastFragmentIndex); } public IDeepCloneable Clone(Cloner cloner) { return cloner.Clone(this); } } internal static class Util { // shallow node copy (does not clone the data or the arcs) #region some helper methods for shortening the tracing code public static IGenealogyGraphNode Copy(this IGenealogyGraphNode node) { return new GenealogyGraphNode(node.Data) { Rank = node.Rank, Quality = node.Quality }; } #endregion } }