#region License Information
/* HeuristicLab
* Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System.Linq;
using HeuristicLab.Core;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
[Item("SymbolicDataAnalysisSolutionPruningOptimizer", "An operator which automatically removes nodes that have a negative impact from the tree model, optimizing the remaining constants.")]
[StorableClass]
public class SymbolicDataAnalysisRegressionSolutionPruningOptimizer : SymbolicDataAnalysisSolutionPruningOptimizer {
public override ISymbolicDataAnalysisSolution PruneAndOptimizeSolution(ISymbolicDataAnalysisSolution solution) {
var regressionSolution = (ISymbolicRegressionSolution)solution;
return PruneAndOptimizeRegressionSolution(regressionSolution);
}
///
/// This method will walk all the levels of the symbolic regression solution model root starting from the deepest level and:
/// - it calculates the impact value of every originalNode on that level
/// - it prunes (replaces with a constant) the originalNode with the lowest negative impact value (0 or positive impacts are left unchanged)
/// - when no more nodes can be pruned, it moves on the upper level in the tree
/// - if the pruned and optimized solution is worse than the original solution (it can happen sometimes), then the original solution is returned
///
///
///
private ISymbolicRegressionSolution PruneAndOptimizeRegressionSolution(ISymbolicRegressionSolution solution) {
var calculator = new SymbolicRegressionSolutionImpactValuesCalculator();
var model = (ISymbolicRegressionModel)solution.Model;
var problemData = solution.ProblemData;
// get tree levels and iterate each level from the bottom up
var root = model.SymbolicExpressionTree.Root.GetSubtree(0).GetSubtree(0);
var levels = root.IterateNodesBreadth().GroupBy(root.GetBranchLevel).OrderByDescending(g => g.Key);
OptimizeConstants(solution); // even if there are no negative impacts we still optimize the solution
foreach (var level in levels) {
var nodes = level.ToArray();
double minImpact;
do {
minImpact = 0.0;
int minImpactIndex = -1;
for (int i = 0; i < nodes.Length; ++i) {
if (nodes[i] is ConstantTreeNode) continue;
var impact = calculator.CalculateImpactValue(model, nodes[i], problemData, problemData.TrainingIndices);
if (impact < minImpact) {
minImpact = impact;
minImpactIndex = i;
}
}
if (minImpact >= 0) continue;
var node = nodes[minImpactIndex];
var replacementValue = calculator.CalculateReplacementValue(model, node, problemData, problemData.TrainingIndices);
var constantNode = MakeConstantTreeNode(replacementValue);
ReplaceWithConstantNode(node, constantNode);
nodes[minImpactIndex] = constantNode;
OptimizeConstants(solution);
} while (minImpact < 0);
}
var newSolution = (ISymbolicRegressionSolution)model.CreateRegressionSolution(problemData);
return newSolution.TrainingRSquared > solution.TrainingRSquared ? newSolution : solution;
}
private static void OptimizeConstants(ISymbolicRegressionSolution solution) {
var model = solution.Model;
var problemData = solution.ProblemData;
SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(model.Interpreter, model.SymbolicExpressionTree, problemData, problemData.TrainingIndices, true, 50);
}
}
}