#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.EvolutionTracking; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { [Item("TraceCalculator", "Walks a genealogy graph and produces a trace of the specified subtree")] [StorableClass] public class TraceCalculator : Item { private Dictionary> nodeListCache; // the trace cache consits of tuples of the current and last vertices, the subtree index to be traced in the current vertex, and the last subtree and fragment indexes private HashSet, int, IGenealogyGraphNode, int, int>> traceCache; public IGenealogyGraph TraceGraph { get; private set; } public bool UpdateVertexWeights { get; set; } public bool UpdateSubtreeWeights { get; set; } public bool CacheTraceNodes { get; set; } public int TraceCacheHits { get; set; } public TraceCalculator() { ResetState(); } [StorableConstructor] protected TraceCalculator(bool deserializing) : base(deserializing) { } protected TraceCalculator(TraceCalculator original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new TraceCalculator(this, cloner); } public void ResetState() { TraceGraph = new GenealogyGraph(); nodeListCache = new Dictionary>(); traceCache = new HashSet, int, IGenealogyGraphNode, int, int>>(); } public static IGenealogyGraph TraceSubtree(IGenealogyGraphNode node, int subtreeIndex, bool updateVertexWeights = false, bool updateSubtreeWeights = false, bool cacheTraceNodes = true) { var tc = new TraceCalculator { UpdateVertexWeights = updateSubtreeWeights, UpdateSubtreeWeights = updateSubtreeWeights, CacheTraceNodes = cacheTraceNodes }; tc.Trace(node, subtreeIndex); return tc.TraceGraph; } public IGenealogyGraph Trace(IGenealogyGraphNode node, int subtreeIndex, bool resetState = true) { if (resetState) ResetState(); TraceRecursive(node, subtreeIndex); return TraceGraph; } /// /// This method starts from a given vertex in the genealogy graph and works its way /// up the ancestry trying to track the structure of the subtree given by subtreeIndex. /// This method will skip genealogy graph nodes that did not have an influence on the /// structure of the tracked subtree. /// /// Only genealogy nodes which did have an influence are added (as copies) to the trace /// and are consequently called 'trace nodes'. /// /// The arcs connecting trace nodes hold information about the locations of the subtrees /// and fragments that have been swapped in the form of a tuple (si, fi, lastSi, lastFi), /// where: /// - si is the subtree index in the current trace node /// - fi is the fragment index in the current trace node /// - lastSi is the subtree index in the previous trace node /// - lastFi is the subtree index in the previous trace node /// /// The current node in the genealogy graph /// The index of the traced subtree /// The last added node in the trace graph /// The subtree index in the last added individual /// The fragment index in the last added individual private void TraceRecursive(IGenealogyGraphNode node, int subtreeIndex, IGenealogyGraphNode last = null, int lastSi = -1, int lastFi = -1) { var g = node; var si = subtreeIndex; // subtree index var fi = 0; // fragment index while (((List)((IVertex)g).InArcs).Count > 0) { if (!(si < g.Data.Length)) throw new ArgumentOutOfRangeException("The subtree index exceeds the size of the tree."); var inArcs = (List)((IVertex)g).InArcs; var fragment = (IFragment)((IGenealogyGraphArc)inArcs.Last()).Data; if (fragment == null) { // TODO: think about what the correct behavior should be here (seems good so far) // the node is either an elite node or (in rare cases) no fragment was transferred g = (IGenealogyGraphNode)inArcs[0].Source; continue; } fi = fragment.Index1; // fragment index int fl = fragment.Root.GetLength(); // fragment length int sl = NodeAt(g.Data, si).GetLength(); // subtree length #region trace crossover if (inArcs.Count == 2) { var parent0 = (IGenealogyGraphNode)inArcs[0].Source; var parent1 = (IGenealogyGraphNode)inArcs[1].Source; if (fi == si) { g = parent1; si = fragment.Index2; continue; } if (fi < si) { if (fi + fl > si) { // fragment contains subtree g = parent1; si += fragment.Index2 - fi; } else { // fragment distinct from subtree g = parent0; si += NodeAt(g.Data, fi).GetLength() - fl; } continue; } if (fi > si) { if (fi < si + sl) { // subtree contains fragment => branching point in the fragment graph var n = AddTraceNode(g); // current node becomes "last" as we restart tracing from the parent if (CacheTraceNodes) { var t0 = Tuple.Create(parent0, si, n, si, fi); var t1 = Tuple.Create(parent1, fragment.Index2, n, si, fi); // trace subtree if (traceCache.Add(t0)) TraceRecursive(parent0, si, n, si, fi); else TraceCacheHits++; // trace fragment if (traceCache.Add(t1)) TraceRecursive(parent1, fragment.Index2, n, si, fi); else TraceCacheHits++; } else { TraceRecursive(parent0, si, n, si, fi); TraceRecursive(parent1, fragment.Index2, n, si, fi); } break; } else { // subtree and fragment are distinct. g = parent0; continue; } } } #endregion #region trace mutation // mutation is handled in a simple way: we branch every time there is an overlap between the subtree and the fragment // (since mutation effects can be quite unpredictable: replace branch, change node, shake tree, etc) if (inArcs.Count == 1) { var parent0 = (IGenealogyGraphNode)inArcs[0].Source; Debug.Assert(fragment.Index1 == fragment.Index2); // check if the subtree and the fragment overlap => branch out // optionally we could ignore the case when the fragment contains the subtree: // ((si == fi) || fi < si && si < fi + fl) if ((si == fi) || (si < fi && fi < si + sl) || (fi < si && si < fi + fl)) { var n = AddTraceNode(g); // current node becomes "last" as we restart tracing from the parent int i = si < fi ? si : fi; var t = Tuple.Create(parent0, i, n, si, fi); if (CacheTraceNodes) { if (traceCache.Add(t)) TraceRecursive(parent0, i, n, si, fi); else TraceCacheHits++; } else { TraceRecursive(parent0, i, n, si, fi); } break; } else { // if they don't overlap, go up g = parent0; if (fi < si) si += NodeAt(g.Data, fi).GetLength() - fl; continue; } } #endregion throw new InvalidOperationException("A node cannot have more than two parents"); } // when we are out of the while the last vertex must be connected with the current one // if there is no last vertex, it means the tracing reached the top of the genealogy graph if (last != null) { var current = AddTraceNode(g); if (current.Rank.IsAlmost(0)) fi = -1; // if current is part of the initial population there will be no fragment var td = new TraceData(si, fi, lastSi, lastFi); #if DEBUG var currentLength = current.Data.Length; var lastLength = last.Data.Length; if (!(si < currentLength)) throw new ArgumentOutOfRangeException(string.Format("Subtree index {0} exceeds tree length ({1}", si, currentLength)); if (!(fi < currentLength)) throw new ArgumentOutOfRangeException(string.Format("Fragment index {0} exceeds tree length ({1}", fi, currentLength)); if (!(lastSi < lastLength)) throw new ArgumentOutOfRangeException(string.Format("Last subtree index {0} exceeds tree length ({1}", lastSi, lastLength)); if (!(lastFi < lastLength)) throw new ArgumentOutOfRangeException(string.Format("Last fragment index {0} exceeds tree length ({1}", lastFi, lastLength)); #endif ConnectLast(current, last, td); } } /// /// Get the trace node from the trace graph which corresponds to node g from the genealogy graph. /// If the trace graph does not contain such a node, one is created by performing a shallow copy of g, then inserted into the trace graph. /// /// The genealogy graph node /// private IGenealogyGraphNode AddTraceNode(IGenealogyGraphNode g) { var n = TraceGraph.GetByContent(g.Data); if (n == null) { n = g.Copy(); TraceGraph.AddVertex(n); } return n; } // caching node lists brings ~2.5-2.7x speed improvement (since graph nodes are visited multiple times) // this caching will be even more effective with larger tree sizes private ISymbolicExpressionTreeNode NodeAt(ISymbolicExpressionTree tree, int index) { List list; nodeListCache.TryGetValue(tree, out list); if (list == null) { list = tree.IterateNodesPrefix().ToList(); nodeListCache[tree] = list; } return list[index]; } /// /// Connect the current node of the trace graph with the node that was previously added (@last). The current node of the trace graph is determined by the content /// of the genealogy graph node @g. /// /// The current node in the genealogy graph /// The last added node in the trace graph /// The trace data specifying the preorder indices of the subtree and fragment in the @current and @last vertices private void ConnectLast(IGenealogyGraphNode current, IGenealogyGraphNode last, TraceData td) { // TODO: more testing var inArcs = (List)((IVertex)last).InArcs; // using the InArcs seems to be slightly more efficient than using the OutArcs var arc = inArcs.FirstOrDefault(a => a.Source == current && ((IArc)a).Data.Equals(td)); if (arc == null) { arc = new GenealogyGraphArc(current, last) { Data = td }; TraceGraph.AddArc(arc); } if (UpdateVertexWeights) { arc.Weight++; current.Weight++; } if (UpdateSubtreeWeights) { var subtree = NodeAt(current.Data, td.SubtreeIndex); foreach (var s in subtree.IterateNodesPrefix()) s.NodeWeight++; } } } public class TraceData : Tuple, IDeepCloneable { public TraceData(int currentSubtreeIndex, int currentFragmentIndex, int lastSubtreeIndex, int lastFragmentIndex) : base(currentSubtreeIndex, currentFragmentIndex, lastSubtreeIndex, lastFragmentIndex) { } public int SubtreeIndex { get { return Item1; } } public int FragmentIndex { get { return Item2; } } public int LastSubtreeIndex { get { return Item3; } } public int LastFragmentIndex { get { return Item4; } } public object Clone() { return new TraceData(SubtreeIndex, FragmentIndex, LastSubtreeIndex, LastFragmentIndex); } protected TraceData(TraceData original, Cloner cloner) : base(original.SubtreeIndex, original.FragmentIndex, original.LastFragmentIndex, original.LastFragmentIndex) { } public IDeepCloneable Clone(Cloner cloner) { return cloner.Clone(this); } } internal static class Util { // shallow node copy (does not clone the data or the arcs) #region some helper methods for shortening the tracing code public static IGenealogyGraphNode Copy(this IGenealogyGraphNode node) { return new GenealogyGraphNode(node.Data) { Rank = node.Rank, Quality = node.Quality }; } #endregion } }