#region License Information /* HeuristicLab * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Text; using System.Threading; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Operators; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Problems.DataAnalysis.Symbolic; using CloneMapType = HeuristicLab.Core.ItemDictionary; using TraceMapType = HeuristicLab.Core.ItemDictionary>; namespace HeuristicLab.EvolutionaryTracking { /// /// An operator that tracks the genealogy of every individual /// [Item("SymbolicExpressionTreeGenealogyAnalyzer", "An operator that tracks tree lengths of Symbolic Expression Trees")] [StorableClass] public sealed class SymbolicExpressionTreeGenealogyAnalyzer : SingleSuccessorOperator, IAnalyzer { private const string UpdateIntervalParameterName = "UpdateInterval"; private const string UpdateCounterParameterName = "UpdateCounter"; private const string ResultsParameterName = "Results"; private const string ElitesParameterName = "Elites"; private const string GenerationsParameterName = "Generations"; private const string MaximumGenerationsParameterName = "MaximumGenerations"; private const string MaximumSelectionPressureParameterName = "MaximumSelectionPressure"; private const string SelectionPressureParameterName = "SelectionPressure"; private const string GlobalTraceMapParameterName = "GlobalTraceMap"; private const string GlobalCloneMapParameterName = "GlobalCloneMap"; private const string PopulationGraphResultParameterName = "PopulationGraph"; private const string PopulationGraphResultParameterDescription = "Individual lineages"; private const string SymbolicExpressionInterpreterParameterName = "SymbolicExpressionTreeInterpreter"; private const string SymbolicRegressionProblemDataParameterName = "ProblemData"; private const string SymbolicDataAnalysisProblemEvaluatorParameterName = "Evaluator"; #region Parameter properties public ValueParameter UpdateIntervalParameter { get { return (ValueParameter)Parameters[UpdateIntervalParameterName]; } } public ValueParameter UpdateCounterParameter { get { return (ValueParameter)Parameters[UpdateCounterParameterName]; } } public LookupParameter ResultsParameter { get { return (LookupParameter)Parameters[ResultsParameterName]; } } public LookupParameter ElitesParameter { get { return (LookupParameter)Parameters[ElitesParameterName]; } } public LookupParameter GenerationsParameter { get { return (LookupParameter)Parameters[GenerationsParameterName]; } } public LookupParameter MaximumGenerationsParameter { get { return (LookupParameter)Parameters[MaximumGenerationsParameterName]; } } public LookupParameter SelectionPressureParameter { get { return (LookupParameter)Parameters[SelectionPressureParameterName]; } } public LookupParameter MaximumSelectionPressureParameter { get { return (LookupParameter)Parameters[MaximumSelectionPressureParameterName]; } } // problem data, interpreter and evaluator public LookupParameter SymbolicExpressionInterpreterParameter { get { return (LookupParameter)Parameters[SymbolicExpressionInterpreterParameterName]; } } public LookupParameter SymbolicRegressionProblemDataParameter { get { return (LookupParameter)Parameters[SymbolicRegressionProblemDataParameterName]; } } public LookupParameter SymbolicDataAnalysisProblemEvaluatorParameter { get { return (LookupParameter)Parameters[SymbolicDataAnalysisProblemEvaluatorParameterName]; } } // genealogy global parameters public LookupParameter GlobalTraceMapParameter { get { return (LookupParameter)Parameters[GlobalTraceMapParameterName]; } } public LookupParameter GlobalCloneMapParameter { get { return (LookupParameter)Parameters[GlobalCloneMapParameterName]; } } #endregion #region Properties public bool EnabledByDefault { get { return true; } } public IntValue UpdateInterval { get { return UpdateIntervalParameter.Value; } } public IntValue UpdateCounter { get { return UpdateCounterParameter.Value; } } public ResultCollection Results { get { return ResultsParameter.ActualValue; } } public IntValue Elites { get { return ElitesParameter.ActualValue; } } public IntValue Generations { get { return GenerationsParameter.ActualValue; } } public IntValue MaximumGenerations { get { return MaximumGenerationsParameter.ActualValue; } } public DoubleValue SelectionPressure { get { return SelectionPressureParameter.ActualValue; } } public DoubleValue MaximumSelectionPressure { get { return MaximumSelectionPressureParameter.ActualValue; } } public CloneMapType GlobalCloneMap { get { return GlobalCloneMapParameter.ActualValue; } } public TraceMapType GlobalTraceMap { get { return GlobalTraceMapParameter.ActualValue; } } public SymbolicDataAnalysisExpressionTreeInterpreter SymbolicExpressionInterpreter { get { return SymbolicExpressionInterpreterParameter.ActualValue; } } public RegressionProblemData SymbolicRegressionProblemData { get { return SymbolicRegressionProblemDataParameter.ActualValue; } } public IEvaluator SymbolicDataAnalysisEvaluator { get { return SymbolicDataAnalysisProblemEvaluatorParameter.ActualValue; } } #endregion [StorableConstructor] private SymbolicExpressionTreeGenealogyAnalyzer(bool deserializing) : base() { } private SymbolicExpressionTreeGenealogyAnalyzer(SymbolicExpressionTreeGenealogyAnalyzer original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicExpressionTreeGenealogyAnalyzer(this, cloner); } public SymbolicExpressionTreeGenealogyAnalyzer() : base() { Parameters.Add(new LookupParameter(ElitesParameterName, "The number of elites.")); Parameters.Add(new LookupParameter(GenerationsParameterName, "The number of generations so far.")); Parameters.Add(new LookupParameter(MaximumGenerationsParameterName, "The maximum number of generations")); Parameters.Add(new LookupParameter(SelectionPressureParameterName, "The current selection (ony for OSGA)")); Parameters.Add(new LookupParameter(MaximumSelectionPressureParameterName, "Maximum allowed value of selection pressure.")); Parameters.Add(new ValueParameter(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1))); Parameters.Add(new ValueParameter(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0))); Parameters.Add(new LookupParameter(GlobalTraceMapParameterName, "A global cache containing the whole genealogy.")); Parameters.Add(new LookupParameter(GlobalCloneMapParameterName, "A global map keeping track of trees and their clones (made during selection).")); Parameters.Add(new ValueLookupParameter(ResultsParameterName, "The results collection where the analysis values should be stored.")); Parameters.Add(new LookupParameter(SymbolicExpressionInterpreterParameterName, "Interpreter for symbolic expression trees")); Parameters.Add(new LookupParameter(SymbolicRegressionProblemDataParameterName, "The symbolic data analysis problem.")); Parameters.Add(new LookupParameter(SymbolicDataAnalysisProblemEvaluatorParameterName, "The fitness evaluator")); UpdateCounterParameter.Hidden = true; UpdateIntervalParameter.Hidden = true; } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { // check if all the parameters are present and accounted for if (!Parameters.ContainsKey(UpdateIntervalParameterName)) { Parameters.Add(new ValueParameter(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1))); } if (Parameters.ContainsKey(UpdateCounterParameterName)) return; Parameters.Add(new ValueParameter(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0))); UpdateCounterParameter.Hidden = true; } #region IStatefulItem members public override void InitializeState() { base.InitializeState(); UpdateCounter.Value = 0; } public override void ClearState() { base.ClearState(); UpdateCounter.Value = 0; } #endregion public override IOperation Apply() { UpdateCounter.Value++; // the analyzer runs periodically, every 'updateInterval' times if (UpdateCounter.Value == UpdateInterval.Value) { UpdateCounter.Value = 0; // reset counter var gScope = ExecutionContext.Scope; while (gScope.Parent != null) gScope = gScope.Parent; GenealogyGraph graph; if (!Results.ContainsKey(PopulationGraphResultParameterName)) { graph = new GenealogyGraph(); Results.Add(new Result(PopulationGraphResultParameterName, PopulationGraphResultParameterDescription, graph)); } else { graph = (GenealogyGraph)Results[PopulationGraphResultParameterName].Value; } // get tree quality values var qualities = (from s in gScope.SubScopes let individual = s.Variables.First().Value let quality = (DoubleValue)s.Variables["Quality"].Value orderby quality.Value descending select new Tuple(individual, quality.Value)).ToDictionary(t => t.Item1, t => t.Item2); // add all individuals to the evolutionary graph int generation = Generations.Value; int count = GlobalTraceMap.Count; string label; if (generation == 0) { // at generation 0 no trace map is present (since no reproduction has taken place yet), // so we only add the initial trees as nodes in the genealogy graph for (int i = 0; i != qualities.Count; ++i) { var tree = qualities.ElementAt(i).Key; label = (i + 1).ToString(CultureInfo.InvariantCulture); graph.AddNode(tree, qualities[tree], label, generation); } return base.Apply(); } // mark and add elites // elites do not appear in the trace map (because they are never the product of a genetic operation) var elites = qualities.OrderByDescending(x => x.Value).Take(Elites.Value).Select(x => x.Key).ToList(); for (int i = 0; i != Elites.Value; ++i) { label = (generation * count + i + 1).ToString(CultureInfo.InvariantCulture); var elite = elites[i]; if (!graph.HasNode(elite)) graph.AddNode(elite, qualities[elite], label, Generations.Value, true); else graph.GetNode(elite).Label += "\\n" + label; graph.GetNode(elite).Color = new Color { R = 0, G = 100, B = 150 }; } for (int i = 0; i != count; ++i) { var trace = GlobalTraceMap.ElementAt(i); var child = (ISymbolicExpressionTree)trace.Key; if (!graph.HasNode(child)) { // due to the structure of the trace map, qualities[child] will never throw an exception, so we use it directly label = (generation * count + i + 1 + Elites.Value).ToString(CultureInfo.InvariantCulture); graph.AddNode(child, qualities[child], label, generation); } var parents = trace.Value; foreach (var parent in parents) { if (!graph.HasNode(parent)) { // if the node is a clone introduced pre-mutation, then its quality value has to be evaluated if (!qualities.ContainsKey(parent)) qualities[parent] = Evaluate((ISymbolicExpressionTree)parent); label = ((generation - 1) * count + i + 1).ToString(CultureInfo.InvariantCulture); graph.AddNode(parent, qualities[parent], label, generation - 1); } graph.AddArc(parent, child); } } GlobalTraceMap.Clear(); // no need to check for null here (see line 212) // if we've reached the end of the run bool maxGenerationsReached = (Generations.Value == MaximumGenerations.Value); bool isOsga = (SelectionPressure != null && MaximumSelectionPressure != null); bool maxSelectionPressureReached = isOsga && (SelectionPressure.Value >= MaximumSelectionPressure.Value); #region end of the run if (maxGenerationsReached || maxSelectionPressureReached) { var path = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile); // write whole graph to a dot file WriteDot(path + @"\lineage.dot", graph); // get genealogy of the best solution var bestSolution = (ISymbolicExpressionTree)qualities.First().Key; var genealogy = graph.GetNode(bestSolution).Genealogy(); WriteDot(path + @"\bestlineage.dot", genealogy); // write the direct root lineage of the best solution (is it useful?) // calculate impact values of nodes in the best solution, attempt to trace those with high impact to their origins //var impactValuesCalculator = new RegressionSolutionImpactValuesCalculator(); //var impactValues = impactValuesCalculator.CalculateImpactValues(bestSolution, SymbolicExpressionInterpreter, SymbolicRegressionProblemData); ////var impactValues = CalculateImpactValues(bestSolution); //foreach (var pair in impactValues.Where(pair => !(pair.Key is ConstantTreeNode || pair.Key is VariableTreeNode) && pair.Value > 0.9)) { // var node = pair.Key; // foreach (var ancestor in genealogy.Keys) { // graph.GetNode(ancestor).Color = ContainsSubtree(ancestor as ISymbolicExpressionTree, node) ? new Color { R = 0, G = 0, B = 150 } : new Color { R = 255, G = 255, B = 255 }; // } //} //WriteDot(path + @"\impactancestry.dot", genealogy); // trim the graph // exclude the individuals of the last generation var individuals = graph.Keys.Except(qualities.Select(x => x.Key)).ToList(); bool done = false; while (!done) { done = true; foreach (var ind in individuals) { // if node has no outgoing connections (absence of offspring), remove it from the graph var node = graph.GetNode(ind); if (node == null) continue; if (node.OutEdges == null) { done = false; // we still have "dead" nodes graph.RemoveNode(ind); } } } WriteDot(path + @"\trimmedlineage.dot", graph); } #endregion } return base.Apply(); } private double Evaluate(ISymbolicExpressionTree tree) { // we perform evaluation by adding a temporary subscope with the tree in it, and calling evaluator.Apply() var subScope = new Scope(); // inject expected variables into the subscope subScope.Variables.Add(new Core.Variable("SymbolicExpressionTree", tree)); ExecutionContext.Scope.SubScopes.Add(subScope); var context = new Core.ExecutionContext(ExecutionContext, SymbolicDataAnalysisEvaluator, subScope); SymbolicDataAnalysisEvaluator.Execute(context, new CancellationToken()); // get the quality double quality = ((DoubleValue)subScope.Variables["Quality"].Value).Value; // remove the subscope ExecutionContext.Scope.SubScopes.Remove(subScope); return quality; } #region Export to dot file private static void WriteDot(string path, GenealogyGraph graph) { using (var file = new System.IO.StreamWriter(path)) { string nl = Environment.NewLine; file.WriteLine("digraph \"lineage " + graph.AverageDegree.ToString(CultureInfo.InvariantCulture) + "\" {" + nl + "ratio=auto;" + nl + "mincross=2.0"); file.WriteLine("\tnode [shape=circle,label=\"\",style=filled]"); foreach (var node in graph.Values) { string fillColor = String.Format("#{0:x2}{1:x2}{2:x2}", node.Color.R, node.Color.G, node.Color.B); if (node.IsElite) fillColor = String.Format("#{0:x2}{1:x2}{2:x2}", node.Color.R, node.Color.G, 150); file.WriteLine("\t\"" + node.Id + "\" [fillcolor=\"" + fillColor + "\",label=\"" + node.Label + "\"];"); if (node.InEdges == null) continue; foreach (var edge in node.InEdges) { var edgeStyle = node.InEdges.Count == 1 ? "dashed" : String.Empty; file.WriteLine("\t\"" + edge.Target.Id + "\" -> \"" + node.Id + "\" [arrowsize=.5, color=\"" + fillColor + "\", style=\"" + edgeStyle + "\"];"); } } foreach (var g in graph.Values.GroupBy(x => x.Generation)) { var sb = new StringBuilder(); sb.Append("\t{rank=same;"); foreach (var node in g) sb.Append("\"" + node.Id + "\" "); sb.Append("}\n"); file.Write(sb.ToString()); } file.WriteLine("}"); } } #endregion #region Impact values (code for calculating to be moved in separate class) private Dictionary CalculateImpactValues(ISymbolicExpressionTree tree) { var interpreter = SymbolicExpressionInterpreter; var problemData = (IRegressionProblemData)SymbolicDataAnalysisEvaluator.Parameters["ProblemData"].ActualValue; var dataset = problemData.Dataset; var rows = problemData.TrainingIndizes; string targetVariable = problemData.TargetVariable; var impactValues = new Dictionary(); var nodes = tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPostfix().ToList(); var originalOutput = interpreter.GetSymbolicExpressionTreeValues(tree, dataset, rows); var targetValues = dataset.GetDoubleValues(targetVariable, rows); OnlineCalculatorError errorState; double originalR2 = OnlinePearsonsRSquaredCalculator.Calculate(targetValues, originalOutput, out errorState); if (errorState != OnlineCalculatorError.None) originalR2 = 0.0; var constantNode = ((ConstantTreeNode)new Constant().CreateTreeNode()); var root = new ProgramRootSymbol().CreateTreeNode(); // root node var start = new StartSymbol().CreateTreeNode(); // start node root.AddSubtree(start); var tempTree = new SymbolicExpressionTree(root); foreach (ISymbolicExpressionTreeNode node in nodes) { var parent = node.Parent; constantNode.Value = CalculateReplacementValue(tempTree, node, tree); ISymbolicExpressionTreeNode replacementNode = constantNode; SwitchNode(parent, node, replacementNode); var newOutput = interpreter.GetSymbolicExpressionTreeValues(tree, dataset, rows); double newR2 = OnlinePearsonsRSquaredCalculator.Calculate(targetValues, newOutput, out errorState); if (errorState != OnlineCalculatorError.None) newR2 = 0.0; // impact = 0 if no change // impact < 0 if new solution is better // impact > 0 if new solution is worse impactValues[node] = originalR2 - newR2; SwitchNode(parent, replacementNode, node); } return impactValues; } private double CalculateReplacementValue(ISymbolicExpressionTree tempTree, ISymbolicExpressionTreeNode node, ISymbolicExpressionTree sourceTree) { // remove old ADFs while (tempTree.Root.SubtreeCount > 1) tempTree.Root.RemoveSubtree(1); // clone ADFs of source tree for (int i = 1; i < sourceTree.Root.SubtreeCount; i++) { tempTree.Root.AddSubtree((ISymbolicExpressionTreeNode)sourceTree.Root.GetSubtree(i).Clone()); } var start = tempTree.Root.GetSubtree(0); while (start.SubtreeCount > 0) start.RemoveSubtree(0); start.AddSubtree((ISymbolicExpressionTreeNode)node.Clone()); var interpreter = SymbolicExpressionInterpreter; var rows = SymbolicRegressionProblemData.TrainingIndizes; return interpreter.GetSymbolicExpressionTreeValues(tempTree, SymbolicRegressionProblemData.Dataset, rows).Median(); } private static void SwitchNode(ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeNode oldBranch, ISymbolicExpressionTreeNode newBranch) { for (int i = 0; i < root.SubtreeCount; i++) { if (root.GetSubtree(i) == oldBranch) { root.RemoveSubtree(i); root.InsertSubtree(i, newBranch); return; } } } #endregion #region Allele tracking private bool ContainsSubtree(ISymbolicExpressionTree tree, ISymbolicExpressionTreeNode node) { return tree.IterateNodesPostfix().Where(n => n.Symbol == node.Symbol && n.GetLength() == node.GetLength()) .Where(n => n.Subtrees.Any() && node.Subtrees.Any()) .Any(n => n.Subtrees.First().Symbol == node.Subtrees.First().Symbol); } #endregion #region Extra / not really needed private IEnumerable IterateNodes(ISymbolicExpressionTree tree) { return IterateNodes(tree.Root); } private static IEnumerable IterateNodes(ISymbolicExpressionTreeNode root) { var list = new List { root }; int offset = 0, count = 1; while (offset != count) { var c = count; for (int i = offset; i != count; ++i) { yield return list[i]; if (!list[i].Subtrees.Any()) continue; list.AddRange(list[i].Subtrees); } offset = c; count = list.Count; } } #endregion } }