#region License Information
/* HeuristicLab
* Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Threading;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Operators;
using HeuristicLab.Optimization;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
using HeuristicLab.Problems.DataAnalysis;
using HeuristicLab.Problems.DataAnalysis.Symbolic;
using CloneMapType = HeuristicLab.Core.ItemDictionary;
using TraceMapType = HeuristicLab.Core.ItemDictionary>;
namespace HeuristicLab.EvolutionaryTracking {
///
/// An operator that tracks the genealogy of every individual
///
[Item("SymbolicExpressionTreeGenealogyAnalyzer", "An operator that tracks tree lengths of Symbolic Expression Trees")]
[StorableClass]
public sealed class SymbolicExpressionTreeGenealogyAnalyzer : SingleSuccessorOperator, IAnalyzer {
private const string UpdateIntervalParameterName = "UpdateInterval";
private const string UpdateCounterParameterName = "UpdateCounter";
private const string ResultsParameterName = "Results";
private const string ElitesParameterName = "Elites";
private const string GenerationsParameterName = "Generations";
private const string MaximumGenerationsParameterName = "MaximumGenerations";
private const string MaximumSelectionPressureParameterName = "MaximumSelectionPressure";
private const string SelectionPressureParameterName = "SelectionPressure";
private const string GlobalTraceMapParameterName = "GlobalTraceMap";
private const string GlobalCloneMapParameterName = "GlobalCloneMap";
private const string PopulationGraphResultParameterName = "PopulationGraph";
private const string PopulationGraphResultParameterDescription = "Individual lineages";
private const string SymbolicExpressionInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
private const string SymbolicRegressionProblemDataParameterName = "ProblemData";
private const string SymbolicDataAnalysisProblemEvaluatorParameterName = "Evaluator";
#region Parameter properties
public ValueParameter UpdateIntervalParameter {
get { return (ValueParameter)Parameters[UpdateIntervalParameterName]; }
}
public ValueParameter UpdateCounterParameter {
get { return (ValueParameter)Parameters[UpdateCounterParameterName]; }
}
public LookupParameter ResultsParameter {
get { return (LookupParameter)Parameters[ResultsParameterName]; }
}
public LookupParameter ElitesParameter {
get { return (LookupParameter)Parameters[ElitesParameterName]; }
}
public LookupParameter GenerationsParameter {
get { return (LookupParameter)Parameters[GenerationsParameterName]; }
}
public LookupParameter MaximumGenerationsParameter {
get { return (LookupParameter)Parameters[MaximumGenerationsParameterName]; }
}
public LookupParameter SelectionPressureParameter {
get { return (LookupParameter)Parameters[SelectionPressureParameterName]; }
}
public LookupParameter MaximumSelectionPressureParameter {
get { return (LookupParameter)Parameters[MaximumSelectionPressureParameterName]; }
}
// problem data, interpreter and evaluator
public LookupParameter SymbolicExpressionInterpreterParameter {
get { return (LookupParameter)Parameters[SymbolicExpressionInterpreterParameterName]; }
}
public LookupParameter SymbolicRegressionProblemDataParameter {
get { return (LookupParameter)Parameters[SymbolicRegressionProblemDataParameterName]; }
}
public LookupParameter SymbolicDataAnalysisProblemEvaluatorParameter {
get { return (LookupParameter)Parameters[SymbolicDataAnalysisProblemEvaluatorParameterName]; }
}
// genealogy global parameters
public LookupParameter GlobalTraceMapParameter {
get { return (LookupParameter)Parameters[GlobalTraceMapParameterName]; }
}
public LookupParameter GlobalCloneMapParameter {
get { return (LookupParameter)Parameters[GlobalCloneMapParameterName]; }
}
#endregion
#region Properties
public bool EnabledByDefault {
get { return true; }
}
public IntValue UpdateInterval {
get { return UpdateIntervalParameter.Value; }
}
public IntValue UpdateCounter {
get { return UpdateCounterParameter.Value; }
}
public ResultCollection Results {
get { return ResultsParameter.ActualValue; }
}
public IntValue Elites {
get { return ElitesParameter.ActualValue; }
}
public IntValue Generations {
get { return GenerationsParameter.ActualValue; }
}
public IntValue MaximumGenerations {
get { return MaximumGenerationsParameter.ActualValue; }
}
public DoubleValue SelectionPressure {
get { return SelectionPressureParameter.ActualValue; }
}
public DoubleValue MaximumSelectionPressure {
get { return MaximumSelectionPressureParameter.ActualValue; }
}
public CloneMapType GlobalCloneMap {
get { return GlobalCloneMapParameter.ActualValue; }
}
public TraceMapType GlobalTraceMap {
get { return GlobalTraceMapParameter.ActualValue; }
}
public SymbolicDataAnalysisExpressionTreeInterpreter SymbolicExpressionInterpreter {
get { return SymbolicExpressionInterpreterParameter.ActualValue; }
}
public RegressionProblemData SymbolicRegressionProblemData {
get { return SymbolicRegressionProblemDataParameter.ActualValue; }
}
public IEvaluator SymbolicDataAnalysisEvaluator {
get { return SymbolicDataAnalysisProblemEvaluatorParameter.ActualValue; }
}
#endregion
[StorableConstructor]
private SymbolicExpressionTreeGenealogyAnalyzer(bool deserializing)
: base() {
}
private SymbolicExpressionTreeGenealogyAnalyzer(SymbolicExpressionTreeGenealogyAnalyzer original, Cloner cloner)
: base(original, cloner) {
}
public override IDeepCloneable Clone(Cloner cloner) {
return new SymbolicExpressionTreeGenealogyAnalyzer(this, cloner);
}
public SymbolicExpressionTreeGenealogyAnalyzer()
: base() {
Parameters.Add(new LookupParameter(ElitesParameterName, "The number of elites."));
Parameters.Add(new LookupParameter(GenerationsParameterName, "The number of generations so far."));
Parameters.Add(new LookupParameter(MaximumGenerationsParameterName, "The maximum number of generations"));
Parameters.Add(new LookupParameter(SelectionPressureParameterName, "The current selection (ony for OSGA)"));
Parameters.Add(new LookupParameter(MaximumSelectionPressureParameterName, "Maximum allowed value of selection pressure."));
Parameters.Add(new ValueParameter(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1)));
Parameters.Add(new ValueParameter(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0)));
Parameters.Add(new LookupParameter(GlobalTraceMapParameterName, "A global cache containing the whole genealogy."));
Parameters.Add(new LookupParameter(GlobalCloneMapParameterName, "A global map keeping track of trees and their clones (made during selection)."));
Parameters.Add(new ValueLookupParameter(ResultsParameterName, "The results collection where the analysis values should be stored."));
Parameters.Add(new LookupParameter(SymbolicExpressionInterpreterParameterName, "Interpreter for symbolic expression trees"));
Parameters.Add(new LookupParameter(SymbolicRegressionProblemDataParameterName, "The symbolic data analysis problem."));
Parameters.Add(new LookupParameter(SymbolicDataAnalysisProblemEvaluatorParameterName, "The fitness evaluator"));
UpdateCounterParameter.Hidden = true;
UpdateIntervalParameter.Hidden = true;
}
[StorableHook(HookType.AfterDeserialization)]
private void AfterDeserialization() {
// check if all the parameters are present and accounted for
if (!Parameters.ContainsKey(UpdateIntervalParameterName)) {
Parameters.Add(new ValueParameter(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1)));
}
if (Parameters.ContainsKey(UpdateCounterParameterName)) return;
Parameters.Add(new ValueParameter(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0)));
UpdateCounterParameter.Hidden = true;
}
#region IStatefulItem members
public override void InitializeState() {
base.InitializeState();
UpdateCounter.Value = 0;
}
public override void ClearState() {
base.ClearState();
UpdateCounter.Value = 0;
}
#endregion
public override IOperation Apply() {
UpdateCounter.Value++;
// the analyzer runs periodically, every 'updateInterval' times
if (UpdateCounter.Value == UpdateInterval.Value) {
UpdateCounter.Value = 0; // reset counter
var gScope = ExecutionContext.Scope;
while (gScope.Parent != null) gScope = gScope.Parent;
GenealogyGraph graph;
if (!Results.ContainsKey(PopulationGraphResultParameterName)) {
graph = new GenealogyGraph();
Results.Add(new Result(PopulationGraphResultParameterName, PopulationGraphResultParameterDescription, graph));
} else {
graph = (GenealogyGraph)Results[PopulationGraphResultParameterName].Value;
}
// get tree quality values
var qualities = (from s in gScope.SubScopes
let individual = s.Variables.First().Value
let quality = (DoubleValue)s.Variables["Quality"].Value
orderby quality.Value descending
select new Tuple(individual, quality.Value)).ToDictionary(t => t.Item1, t => t.Item2);
// add all individuals to the evolutionary graph
int generation = Generations.Value;
int count = GlobalTraceMap.Count;
string label;
if (generation == 0) {
// at generation 0 no trace map is present (since no reproduction has taken place yet),
// so we only add the initial trees as nodes in the genealogy graph
for (int i = 0; i != qualities.Count; ++i) {
var tree = qualities.ElementAt(i).Key;
label = (i + 1).ToString(CultureInfo.InvariantCulture);
graph.AddNode(tree, qualities[tree], label, generation);
}
return base.Apply();
}
// mark and add elites
// elites do not appear in the trace map (because they are never the product of a genetic operation)
var elites = qualities.OrderByDescending(x => x.Value).Take(Elites.Value).Select(x => x.Key).ToList();
for (int i = 0; i != Elites.Value; ++i) {
label = (generation * count + i + 1).ToString(CultureInfo.InvariantCulture);
var elite = elites[i];
if (!graph.HasNode(elite))
graph.AddNode(elite, qualities[elite], label, Generations.Value, true);
else
graph.GetNode(elite).Label += "\\n" + label;
graph.GetNode(elite).Color = new Color { R = 0, G = 100, B = 150 };
}
for (int i = 0; i != count; ++i) {
var trace = GlobalTraceMap.ElementAt(i);
var child = (ISymbolicExpressionTree)trace.Key;
if (!graph.HasNode(child)) {
// due to the structure of the trace map, qualities[child] will never throw an exception, so we use it directly
label = (generation * count + i + 1 + Elites.Value).ToString(CultureInfo.InvariantCulture);
graph.AddNode(child, qualities[child], label, generation);
}
var parents = trace.Value;
foreach (var parent in parents) {
if (!graph.HasNode(parent)) {
// if the node is a clone introduced pre-mutation, then its quality value has to be evaluated
if (!qualities.ContainsKey(parent))
qualities[parent] = Evaluate((ISymbolicExpressionTree)parent);
label = ((generation - 1) * count + i + 1).ToString(CultureInfo.InvariantCulture);
graph.AddNode(parent, qualities[parent], label, generation - 1);
}
graph.AddArc(parent, child);
}
}
GlobalTraceMap.Clear(); // no need to check for null here (see line 212)
// if we've reached the end of the run
bool maxGenerationsReached = (Generations.Value == MaximumGenerations.Value);
bool isOsga = (SelectionPressure != null && MaximumSelectionPressure != null);
bool maxSelectionPressureReached = isOsga && (SelectionPressure.Value >= MaximumSelectionPressure.Value);
#region end of the run
if (maxGenerationsReached || maxSelectionPressureReached) {
var path = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile);
// write whole graph to a dot file
WriteDot(path + @"\lineage.dot", graph);
// get genealogy of the best solution
var bestSolution = (ISymbolicExpressionTree)qualities.First().Key;
var genealogy = graph.GetNode(bestSolution).Genealogy();
WriteDot(path + @"\bestlineage.dot", genealogy);
// write the direct root lineage of the best solution (is it useful?)
// calculate impact values of nodes in the best solution, attempt to trace those with high impact to their origins
//var impactValuesCalculator = new RegressionSolutionImpactValuesCalculator();
//var impactValues = impactValuesCalculator.CalculateImpactValues(bestSolution, SymbolicExpressionInterpreter, SymbolicRegressionProblemData);
////var impactValues = CalculateImpactValues(bestSolution);
//foreach (var pair in impactValues.Where(pair => !(pair.Key is ConstantTreeNode || pair.Key is VariableTreeNode) && pair.Value > 0.9)) {
// var node = pair.Key;
// foreach (var ancestor in genealogy.Keys) {
// graph.GetNode(ancestor).Color = ContainsSubtree(ancestor as ISymbolicExpressionTree, node) ? new Color { R = 0, G = 0, B = 150 } : new Color { R = 255, G = 255, B = 255 };
// }
//}
//WriteDot(path + @"\impactancestry.dot", genealogy);
// trim the graph
// exclude the individuals of the last generation
var individuals = graph.Keys.Except(qualities.Select(x => x.Key)).ToList();
bool done = false;
while (!done) {
done = true;
foreach (var ind in individuals) {
// if node has no outgoing connections (absence of offspring), remove it from the graph
var node = graph.GetNode(ind);
if (node == null) continue;
if (node.OutEdges == null) {
done = false; // we still have "dead" nodes
graph.RemoveNode(ind);
}
}
}
WriteDot(path + @"\trimmedlineage.dot", graph);
}
#endregion
}
return base.Apply();
}
private double Evaluate(ISymbolicExpressionTree tree) {
// we perform evaluation by adding a temporary subscope with the tree in it, and calling evaluator.Apply()
var subScope = new Scope();
// inject expected variables into the subscope
subScope.Variables.Add(new Core.Variable("SymbolicExpressionTree", tree));
ExecutionContext.Scope.SubScopes.Add(subScope);
var context = new Core.ExecutionContext(ExecutionContext, SymbolicDataAnalysisEvaluator, subScope);
SymbolicDataAnalysisEvaluator.Execute(context, new CancellationToken());
// get the quality
double quality = ((DoubleValue)subScope.Variables["Quality"].Value).Value;
// remove the subscope
ExecutionContext.Scope.SubScopes.Remove(subScope);
return quality;
}
#region Export to dot file
private static void WriteDot(string path, GenealogyGraph graph) {
using (var file = new System.IO.StreamWriter(path)) {
string nl = Environment.NewLine;
file.WriteLine("digraph \"lineage " + graph.AverageDegree.ToString(CultureInfo.InvariantCulture) + "\" {" + nl +
"ratio=auto;" + nl +
"mincross=2.0");
file.WriteLine("\tnode [shape=circle,label=\"\",style=filled]");
foreach (var node in graph.Values) {
string fillColor = String.Format("#{0:x2}{1:x2}{2:x2}", node.Color.R, node.Color.G, node.Color.B);
if (node.IsElite)
fillColor = String.Format("#{0:x2}{1:x2}{2:x2}", node.Color.R, node.Color.G, 150);
file.WriteLine("\t\"" + node.Id + "\" [fillcolor=\"" + fillColor + "\",label=\"" + node.Label + "\"];");
if (node.InEdges == null)
continue;
foreach (var edge in node.InEdges) {
var edgeStyle = node.InEdges.Count == 1 ? "dashed" : String.Empty;
file.WriteLine("\t\"" + edge.Target.Id + "\" -> \"" + node.Id + "\" [arrowsize=.5, color=\"" + fillColor + "\", style=\"" + edgeStyle + "\"];");
}
}
foreach (var g in graph.Values.GroupBy(x => x.Generation)) {
var sb = new StringBuilder();
sb.Append("\t{rank=same;");
foreach (var node in g)
sb.Append("\"" + node.Id + "\" ");
sb.Append("}\n");
file.Write(sb.ToString());
}
file.WriteLine("}");
}
}
#endregion
#region Impact values (code for calculating to be moved in separate class)
private Dictionary CalculateImpactValues(ISymbolicExpressionTree tree) {
var interpreter = SymbolicExpressionInterpreter;
var problemData = (IRegressionProblemData)SymbolicDataAnalysisEvaluator.Parameters["ProblemData"].ActualValue;
var dataset = problemData.Dataset;
var rows = problemData.TrainingIndizes;
string targetVariable = problemData.TargetVariable;
var impactValues = new Dictionary();
var nodes = tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPostfix().ToList();
var originalOutput = interpreter.GetSymbolicExpressionTreeValues(tree, dataset, rows);
var targetValues = dataset.GetDoubleValues(targetVariable, rows);
OnlineCalculatorError errorState;
double originalR2 = OnlinePearsonsRSquaredCalculator.Calculate(targetValues, originalOutput, out errorState);
if (errorState != OnlineCalculatorError.None) originalR2 = 0.0;
var constantNode = ((ConstantTreeNode)new Constant().CreateTreeNode());
var root = new ProgramRootSymbol().CreateTreeNode(); // root node
var start = new StartSymbol().CreateTreeNode(); // start node
root.AddSubtree(start);
var tempTree = new SymbolicExpressionTree(root);
foreach (ISymbolicExpressionTreeNode node in nodes) {
var parent = node.Parent;
constantNode.Value = CalculateReplacementValue(tempTree, node, tree);
ISymbolicExpressionTreeNode replacementNode = constantNode;
SwitchNode(parent, node, replacementNode);
var newOutput = interpreter.GetSymbolicExpressionTreeValues(tree, dataset, rows);
double newR2 = OnlinePearsonsRSquaredCalculator.Calculate(targetValues, newOutput, out errorState);
if (errorState != OnlineCalculatorError.None) newR2 = 0.0;
// impact = 0 if no change
// impact < 0 if new solution is better
// impact > 0 if new solution is worse
impactValues[node] = originalR2 - newR2;
SwitchNode(parent, replacementNode, node);
}
return impactValues;
}
private double CalculateReplacementValue(ISymbolicExpressionTree tempTree, ISymbolicExpressionTreeNode node, ISymbolicExpressionTree sourceTree) {
// remove old ADFs
while (tempTree.Root.SubtreeCount > 1) tempTree.Root.RemoveSubtree(1);
// clone ADFs of source tree
for (int i = 1; i < sourceTree.Root.SubtreeCount; i++) {
tempTree.Root.AddSubtree((ISymbolicExpressionTreeNode)sourceTree.Root.GetSubtree(i).Clone());
}
var start = tempTree.Root.GetSubtree(0);
while (start.SubtreeCount > 0) start.RemoveSubtree(0);
start.AddSubtree((ISymbolicExpressionTreeNode)node.Clone());
var interpreter = SymbolicExpressionInterpreter;
var rows = SymbolicRegressionProblemData.TrainingIndizes;
return interpreter.GetSymbolicExpressionTreeValues(tempTree, SymbolicRegressionProblemData.Dataset, rows).Median();
}
private static void SwitchNode(ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeNode oldBranch, ISymbolicExpressionTreeNode newBranch) {
for (int i = 0; i < root.SubtreeCount; i++) {
if (root.GetSubtree(i) == oldBranch) {
root.RemoveSubtree(i);
root.InsertSubtree(i, newBranch);
return;
}
}
}
#endregion
#region Allele tracking
private bool ContainsSubtree(ISymbolicExpressionTree tree, ISymbolicExpressionTreeNode node) {
return tree.IterateNodesPostfix().Where(n => n.Symbol == node.Symbol && n.GetLength() == node.GetLength())
.Where(n => n.Subtrees.Any() && node.Subtrees.Any())
.Any(n => n.Subtrees.First().Symbol == node.Subtrees.First().Symbol);
}
#endregion
#region Extra / not really needed
private IEnumerable IterateNodes(ISymbolicExpressionTree tree) {
return IterateNodes(tree.Root);
}
private static IEnumerable IterateNodes(ISymbolicExpressionTreeNode root) {
var list = new List { root };
int offset = 0, count = 1;
while (offset != count) {
var c = count;
for (int i = offset; i != count; ++i) {
yield return list[i];
if (!list[i].Subtrees.Any()) continue;
list.AddRange(list[i].Subtrees);
}
offset = c;
count = list.Count;
}
}
#endregion
}
}