#region License Information /* HeuristicLab * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System.Linq; using HeuristicLab.Core; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { [Item("SymbolicDataAnalysisSolutionPruningOptimizer", "An operator which automatically removes nodes that have a negative impact from the tree model, optimizing the remaining constants.")] [StorableClass] public class SymbolicDataAnalysisRegressionSolutionPruningOptimizer : SymbolicDataAnalysisSolutionPruningOptimizer { public override ISymbolicDataAnalysisSolution PruneAndOptimizeSolution(ISymbolicDataAnalysisSolution solution) { var regressionSolution = (ISymbolicRegressionSolution)solution; return PruneAndOptimizeRegressionSolution(regressionSolution); } /// /// This method will walk all the levels of the symbolic regression solution model root starting from the deepest level and: /// - it calculates the impact value of every originalNode on that level /// - it prunes (replaces with a constant) the originalNode with the lowest negative impact value (0 or positive impacts are left unchanged) /// - when no more nodes can be pruned, it moves on the upper level in the tree /// - if the pruned and optimized solution is worse than the original solution (it can happen sometimes), then the original solution is returned /// /// /// private ISymbolicRegressionSolution PruneAndOptimizeRegressionSolution(ISymbolicRegressionSolution solution) { var calculator = new SymbolicRegressionSolutionImpactValuesCalculator(); var model = (ISymbolicRegressionModel)solution.Model; var problemData = solution.ProblemData; // get tree levels and iterate each level from the bottom up var root = model.SymbolicExpressionTree.Root.GetSubtree(0).GetSubtree(0); var levels = root.IterateNodesBreadth().GroupBy(root.GetBranchLevel).OrderByDescending(g => g.Key); OptimizeConstants(solution); // even if there are no negative impacts we still optimize the solution foreach (var level in levels) { var nodes = level.ToArray(); double minImpact; do { minImpact = 0.0; int minImpactIndex = -1; for (int i = 0; i < nodes.Length; ++i) { if (nodes[i] is ConstantTreeNode) continue; var impact = calculator.CalculateImpactValue(model, nodes[i], problemData, problemData.TrainingIndices); if (impact < minImpact) { minImpact = impact; minImpactIndex = i; } } if (minImpact >= 0) continue; var node = nodes[minImpactIndex]; var replacementValue = calculator.CalculateReplacementValue(model, node, problemData, problemData.TrainingIndices); var constantNode = MakeConstantTreeNode(replacementValue); ReplaceWithConstantNode(node, constantNode); nodes[minImpactIndex] = constantNode; OptimizeConstants(solution); } while (minImpact < 0); } var newSolution = (ISymbolicRegressionSolution)model.CreateRegressionSolution(problemData); return newSolution.TrainingRSquared > solution.TrainingRSquared ? newSolution : solution; } private static void OptimizeConstants(ISymbolicRegressionSolution solution) { var model = solution.Model; var problemData = solution.ProblemData; SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(model.Interpreter, model.SymbolicExpressionTree, problemData, problemData.TrainingIndices, true, 50); } } }