#region License Information /* HeuristicLab * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System.Collections.Generic; using System.Linq; using HeuristicLab.Analysis; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Operators; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis.Symbolic; using HeuristicLab.Problems.DataAnalysis.Evaluators; namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers { /// /// An operator that analyzes the training best scaled symbolic regression solution. /// [Item("TrainingBestScaledSymbolicRegressionSolutionAnalyzer", "An operator that analyzes the training best scaled symbolic regression solution.")] [StorableClass] public sealed class TrainingBestScaledSymbolicRegressionSolutionAnalyzer : SingleSuccessorOperator, ISymbolicRegressionAnalyzer { private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree"; private const string QualityParameterName = "Quality"; private const string MaximizationParameterName = "Maximization"; private const string CalculateSolutionComplexityParameterName = "CalculateSolutionComplexity"; private const string CalculateSolutionAccuracyParameterName = "CalculateSolutionAccuracy"; private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter"; private const string ProblemDataParameterName = "DataAnalysisProblemData"; private const string UpperEstimationLimitParameterName = "UpperEstimationLimit"; private const string LowerEstimationLimitParameterName = "LowerEstimationLimit"; private const string BestSolutionParameterName = "Best training solution"; private const string BestSolutionQualityParameterName = "Best training solution quality"; private const string BestSolutionLengthParameterName = "Best training solution length"; private const string BestSolutionHeightParameterName = "Best training solution height"; private const string BestSolutionVariablesParameterName = "Best training solution variables"; private const string BestSolutionTrainingRSquaredParameterName = "Best training solution Rē (training)"; private const string BestSolutionTestRSquaredParameterName = "Best training solution Rē (test)"; private const string BestSolutionTrainingMseParameterName = "Best training solution mean squared error (training)"; private const string BestSolutionTestMseParameterName = "Best training solution mean squared error (test)"; private const string BestSolutionTrainingRelativeErrorParameterName = "Best training solution relative error (training)"; private const string BestSolutionTestRelativeErrorParameterName = "Best training solution relative error (test)"; private const string ResultsParameterName = "Results"; #region parameter properties public ScopeTreeLookupParameter SymbolicExpressionTreeParameter { get { return (ScopeTreeLookupParameter)Parameters[SymbolicExpressionTreeParameterName]; } } public ScopeTreeLookupParameter QualityParameter { get { return (ScopeTreeLookupParameter)Parameters[QualityParameterName]; } } public ILookupParameter MaximizationParameter { get { return (ILookupParameter)Parameters[MaximizationParameterName]; } } public IValueParameter CalculateSolutionComplexityParameter { get { return (IValueParameter)Parameters[CalculateSolutionComplexityParameterName]; } } public IValueParameter CalculateSolutionAccuracyParameter { get { return (IValueParameter)Parameters[CalculateSolutionAccuracyParameterName]; } } public IValueLookupParameter SymbolicExpressionTreeInterpreterParameter { get { return (IValueLookupParameter)Parameters[SymbolicExpressionTreeInterpreterParameterName]; } } public IValueLookupParameter ProblemDataParameter { get { return (IValueLookupParameter)Parameters[ProblemDataParameterName]; } } public IValueLookupParameter UpperEstimationLimitParameter { get { return (IValueLookupParameter)Parameters[UpperEstimationLimitParameterName]; } } public IValueLookupParameter LowerEstimationLimitParameter { get { return (IValueLookupParameter)Parameters[LowerEstimationLimitParameterName]; } } public ILookupParameter BestSolutionParameter { get { return (ILookupParameter)Parameters[BestSolutionParameterName]; } } public ILookupParameter BestSolutionQualityParameter { get { return (ILookupParameter)Parameters[BestSolutionQualityParameterName]; } } public ILookupParameter BestSolutionLengthParameter { get { return (ILookupParameter)Parameters[BestSolutionLengthParameterName]; } } public ILookupParameter BestSolutionHeightParameter { get { return (ILookupParameter)Parameters[BestSolutionHeightParameterName]; } } public ILookupParameter BestSolutionVariablesParameter { get { return (ILookupParameter)Parameters[BestSolutionVariablesParameterName]; } } public ILookupParameter BestSolutionTrainingRSquaredParameter { get { return (ILookupParameter)Parameters[BestSolutionTrainingRSquaredParameterName]; } } public ILookupParameter BestSolutionTestRSquaredParameter { get { return (ILookupParameter)Parameters[BestSolutionTestRSquaredParameterName]; } } public ILookupParameter BestSolutionTrainingMseParameter { get { return (ILookupParameter)Parameters[BestSolutionTrainingMseParameterName]; } } public ILookupParameter BestSolutionTestMseParameter { get { return (ILookupParameter)Parameters[BestSolutionTestMseParameterName]; } } public ILookupParameter BestSolutionTrainingRelativeErrorParameter { get { return (ILookupParameter)Parameters[BestSolutionTrainingRelativeErrorParameterName]; } } public ILookupParameter BestSolutionTestRelativeErrorParameter { get { return (ILookupParameter)Parameters[BestSolutionTestRelativeErrorParameterName]; } } public ILookupParameter ResultsParameter { get { return (ILookupParameter)Parameters[ResultsParameterName]; } } #endregion #region properties public ItemArray SymbolicExpressionTree { get { return SymbolicExpressionTreeParameter.ActualValue; } } public ItemArray Quality { get { return QualityParameter.ActualValue; } } public BoolValue Maximization { get { return MaximizationParameter.ActualValue; } } public BoolValue CalculateSolutionComplexity { get { return CalculateSolutionComplexityParameter.Value; } set { CalculateSolutionComplexityParameter.Value = value; } } public BoolValue CalculateSolutionAccuracy { get { return CalculateSolutionAccuracyParameter.Value; } set { CalculateSolutionAccuracyParameter.Value = value; } } public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter { get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; } } public DataAnalysisProblemData ProblemData { get { return ProblemDataParameter.ActualValue; } } public DoubleValue UpperEstimationLimit { get { return UpperEstimationLimitParameter.ActualValue; } } public DoubleValue LowerEstimationLimit { get { return LowerEstimationLimitParameter.ActualValue; } } public ResultCollection Results { get { return ResultsParameter.ActualValue; } } public SymbolicRegressionSolution BestSolution { get { return BestSolutionParameter.ActualValue; } set { BestSolutionParameter.ActualValue = value; } } public DoubleValue BestSolutionQuality { get { return BestSolutionQualityParameter.ActualValue; } set { BestSolutionQualityParameter.ActualValue = value; } } public IntValue BestSolutionLength { get { return BestSolutionLengthParameter.ActualValue; } set { BestSolutionLengthParameter.ActualValue = value; } } public IntValue BestSolutionHeight { get { return BestSolutionHeightParameter.ActualValue; } set { BestSolutionHeightParameter.ActualValue = value; } } public IntValue BestSolutionVariables { get { return BestSolutionVariablesParameter.ActualValue; } set { BestSolutionVariablesParameter.ActualValue = value; } } public DoubleValue BestSolutionTrainingRSquared { get { return BestSolutionTrainingRSquaredParameter.ActualValue; } set { BestSolutionTrainingRSquaredParameter.ActualValue = value; } } public DoubleValue BestSolutionTestRSquared { get { return BestSolutionTestRSquaredParameter.ActualValue; } set { BestSolutionTestRSquaredParameter.ActualValue = value; } } public DoubleValue BestSolutionTrainingMse { get { return BestSolutionTrainingMseParameter.ActualValue; } set { BestSolutionTrainingMseParameter.ActualValue = value; } } public DoubleValue BestSolutionTestMse { get { return BestSolutionTestMseParameter.ActualValue; } set { BestSolutionTestMseParameter.ActualValue = value; } } public DoubleValue BestSolutionTrainingRelativeError { get { return BestSolutionTrainingRelativeErrorParameter.ActualValue; } set { BestSolutionTrainingRelativeErrorParameter.ActualValue = value; } } public DoubleValue BestSolutionTestRelativeError { get { return BestSolutionTestRelativeErrorParameter.ActualValue; } set { BestSolutionTestRelativeErrorParameter.ActualValue = value; } } #endregion [StorableConstructor] private TrainingBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base(deserializing) { } private TrainingBestScaledSymbolicRegressionSolutionAnalyzer(TrainingBestScaledSymbolicRegressionSolutionAnalyzer original, Cloner cloner) : base(original, cloner) { } public TrainingBestScaledSymbolicRegressionSolutionAnalyzer() : base() { Parameters.Add(new LookupParameter(MaximizationParameterName, "The direction of optimization.")); Parameters.Add(new ScopeTreeLookupParameter(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze.")); Parameters.Add(new ScopeTreeLookupParameter(QualityParameterName, "The qualities of the symbolic expression trees to analyze.")); Parameters.Add(new ValueParameter(CalculateSolutionComplexityParameterName, "Determines if the length and height of the training best solution should be calculated.", new BoolValue(false))); Parameters.Add(new ValueParameter(CalculateSolutionAccuracyParameterName, "Determines if the accuracy of the training best solution on the training and test set should be calculated.", new BoolValue(false))); Parameters.Add(new ValueLookupParameter(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees.")); Parameters.Add(new ValueLookupParameter(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution.")); Parameters.Add(new ValueLookupParameter(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees.")); Parameters.Add(new ValueLookupParameter(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees.")); Parameters.Add(new LookupParameter(BestSolutionParameterName, "The best symbolic regression solution.")); Parameters.Add(new LookupParameter(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution.")); Parameters.Add(new LookupParameter(BestSolutionLengthParameterName, "The length of the best symbolic regression solution.")); Parameters.Add(new LookupParameter(BestSolutionHeightParameterName, "The height of the best symbolic regression solution.")); Parameters.Add(new LookupParameter(BestSolutionVariablesParameterName, "The number of variables used by the best symbolic regression solution.")); Parameters.Add(new LookupParameter(BestSolutionTrainingRSquaredParameterName, "The Rē value on the training set of the best symbolic regression solution.")); Parameters.Add(new LookupParameter(BestSolutionTestRSquaredParameterName, "The Rē value on the test set of the best symbolic regression solution.")); Parameters.Add(new LookupParameter(BestSolutionTrainingMseParameterName, "The mean squared error on the training set of the best symbolic regression solution.")); Parameters.Add(new LookupParameter(BestSolutionTestMseParameterName, "The mean squared error value on the test set of the best symbolic regression solution.")); Parameters.Add(new LookupParameter(BestSolutionTrainingRelativeErrorParameterName, "The relative error on the training set of the best symbolic regression solution.")); Parameters.Add(new LookupParameter(BestSolutionTestRelativeErrorParameterName, "The relative error value on the test set of the best symbolic regression solution.")); Parameters.Add(new LookupParameter(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored.")); } public override IDeepCloneable Clone(Cloner cloner) { return new TrainingBestScaledSymbolicRegressionSolutionAnalyzer(this, cloner); } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { } public override IOperation Apply() { #region find best tree double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity; SymbolicExpressionTree bestTree = null; SymbolicExpressionTree[] tree = SymbolicExpressionTree.ToArray(); double[] quality = Quality.Select(x => x.Value).ToArray(); for (int i = 0; i < tree.Length; i++) { if ((Maximization.Value && quality[i] > bestQuality) || (!Maximization.Value && quality[i] < bestQuality)) { bestQuality = quality[i]; bestTree = tree[i]; } } #endregion #region update best solution // if the best tree is better than the current best solution => update bool newBest = BestSolutionQuality == null || (Maximization.Value && bestQuality > BestSolutionQuality.Value) || (!Maximization.Value && bestQuality < BestSolutionQuality.Value); if (newBest) { double lowerEstimationLimit = LowerEstimationLimit.Value; double upperEstimationLimit = UpperEstimationLimit.Value; string targetVariable = ProblemData.TargetVariable.Value; // calculate scaling parameters and only for the best tree using the full training set double alpha, beta; SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree, lowerEstimationLimit, upperEstimationLimit, ProblemData.Dataset, targetVariable, ProblemData.TrainingIndizes, out beta, out alpha); // scale tree for solution var scaledTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta); var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(), scaledTree); var solution = new SymbolicRegressionSolution((DataAnalysisProblemData)ProblemData.Clone(), model, lowerEstimationLimit, upperEstimationLimit); solution.Name = BestSolutionParameterName; solution.Description = "Best solution on training partition found over the whole run."; BestSolution = solution; BestSolutionQuality = new DoubleValue(bestQuality); if (CalculateSolutionComplexity.Value) { BestSolutionLength = new IntValue(solution.Model.SymbolicExpressionTree.Size); BestSolutionHeight = new IntValue(solution.Model.SymbolicExpressionTree.Height); BestSolutionVariables = new IntValue(solution.Model.InputVariables.Count()); if (!Results.ContainsKey(BestSolutionLengthParameterName)) { Results.Add(new Result(BestSolutionLengthParameterName, "Length of the best solution on the training set.", BestSolutionLength)); Results.Add(new Result(BestSolutionHeightParameterName, "Height of the best solution on the training set.", BestSolutionHeight)); Results.Add(new Result(BestSolutionVariablesParameterName, "Number of variables used by the best solution on the training set.", BestSolutionVariables)); } else { Results[BestSolutionLengthParameterName].Value = BestSolutionLength; Results[BestSolutionHeightParameterName].Value = BestSolutionHeight; Results[BestSolutionVariablesParameterName].Value = BestSolutionVariables; } } if (CalculateSolutionAccuracy.Value) { #region update R2,MSE, Rel Error IEnumerable trainingValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable.Value, ProblemData.TrainingIndizes); IEnumerable testValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable.Value, ProblemData.TestIndizes); OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator(); OnlineMeanAbsolutePercentageErrorEvaluator relErrorEvaluator = new OnlineMeanAbsolutePercentageErrorEvaluator(); OnlinePearsonsRSquaredEvaluator r2Evaluator = new OnlinePearsonsRSquaredEvaluator(); #region training var originalEnumerator = trainingValues.GetEnumerator(); var estimatedEnumerator = solution.EstimatedTrainingValues.GetEnumerator(); while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) { mseEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current); r2Evaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current); relErrorEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current); } double trainingR2 = r2Evaluator.RSquared; double trainingMse = mseEvaluator.MeanSquaredError; double trainingRelError = relErrorEvaluator.MeanAbsolutePercentageError; #endregion mseEvaluator.Reset(); relErrorEvaluator.Reset(); r2Evaluator.Reset(); #region test originalEnumerator = testValues.GetEnumerator(); estimatedEnumerator = solution.EstimatedTestValues.GetEnumerator(); while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) { mseEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current); r2Evaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current); relErrorEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current); } double testR2 = r2Evaluator.RSquared; double testMse = mseEvaluator.MeanSquaredError; double testRelError = relErrorEvaluator.MeanAbsolutePercentageError; #endregion BestSolutionTrainingRSquared = new DoubleValue(trainingR2); BestSolutionTestRSquared = new DoubleValue(testR2); BestSolutionTrainingMse = new DoubleValue(trainingMse); BestSolutionTestMse = new DoubleValue(testMse); BestSolutionTrainingRelativeError = new DoubleValue(trainingRelError); BestSolutionTestRelativeError = new DoubleValue(testRelError); if (!Results.ContainsKey(BestSolutionTrainingRSquaredParameterName)) { Results.Add(new Result(BestSolutionTrainingRSquaredParameterName, BestSolutionTrainingRSquared)); Results.Add(new Result(BestSolutionTestRSquaredParameterName, BestSolutionTestRSquared)); Results.Add(new Result(BestSolutionTrainingMseParameterName, BestSolutionTrainingMse)); Results.Add(new Result(BestSolutionTestMseParameterName, BestSolutionTestMse)); Results.Add(new Result(BestSolutionTrainingRelativeErrorParameterName, BestSolutionTrainingRelativeError)); Results.Add(new Result(BestSolutionTestRelativeErrorParameterName, BestSolutionTestRelativeError)); } else { Results[BestSolutionTrainingRSquaredParameterName].Value = BestSolutionTrainingRSquared; Results[BestSolutionTestRSquaredParameterName].Value = BestSolutionTestRSquared; Results[BestSolutionTrainingMseParameterName].Value = BestSolutionTrainingMse; Results[BestSolutionTestMseParameterName].Value = BestSolutionTestMse; Results[BestSolutionTrainingRelativeErrorParameterName].Value = BestSolutionTrainingRelativeError; Results[BestSolutionTestRelativeErrorParameterName].Value = BestSolutionTestRelativeError; } #endregion } if (!Results.ContainsKey(BestSolutionQualityParameterName)) { Results.Add(new Result(BestSolutionQualityParameterName, BestSolutionQuality)); Results.Add(new Result(BestSolutionParameterName, BestSolution)); } else { Results[BestSolutionQualityParameterName].Value = BestSolutionQuality; Results[BestSolutionParameterName].Value = BestSolution; } } #endregion return base.Apply(); } } }