#region License Information
/* HeuristicLab
* Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Operators;
using HeuristicLab.Optimization;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Problems.DataAnalysis.Evaluators;
using HeuristicLab.Problems.DataAnalysis.Symbolic;
namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
///
/// An operator for visualizing the best symbolic regression solution based on the validation set.
///
[Item("BestSymbolicExpressionTreeVisualizer", "An operator for visualizing the best symbolic regression solution based on the validation set.")]
[StorableClass]
public sealed class BestValidationSymbolicRegressionSolutionVisualizer : SingleSuccessorOperator, ISingleObjectiveSolutionsVisualizer, ISolutionsVisualizer {
private const string EvaluatorParameterName = "Evaluator";
private const string SymbolicRegressionModelParameterName = "SymbolicRegressionModel";
private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
private const string BestValidationSolutionParameterName = "BestValidationSolution";
private const string ValidationSamplesStartParameterName = "ValidationSamplesStart";
private const string ValidationSamplesEndParameterName = "ValidationSamplesEnd";
private const string QualityParameterName = "Quality";
private const string ResultsParameterName = "Results";
#region parameter properties
public ILookupParameter EvaluatorParameter {
get { return (ILookupParameter)Parameters[EvaluatorParameterName]; }
}
public IValueLookupParameter ValidationSamplesStartParameter {
get { return (IValueLookupParameter)Parameters[ValidationSamplesStartParameterName]; }
}
public IValueLookupParameter ValidationSamplesEndParameter {
get { return (IValueLookupParameter)Parameters[ValidationSamplesEndParameterName]; }
}
public ILookupParameter> SymbolicExpressionTreeParameter {
get { return (ILookupParameter>)Parameters[SymbolicRegressionModelParameterName]; }
}
public ILookupParameter DataAnalysisProblemDataParameter {
get { return (ILookupParameter)Parameters[DataAnalysisProblemDataParameterName]; }
}
public ILookupParameter BestValidationSolutionParameter {
get { return (ILookupParameter)Parameters[BestValidationSolutionParameterName]; }
}
ILookupParameter ISolutionsVisualizer.VisualizationParameter {
get { return BestValidationSolutionParameter; }
}
public ILookupParameter> QualityParameter {
get { return (ILookupParameter>)Parameters[QualityParameterName]; }
}
public ILookupParameter ResultParameter {
get { return (ILookupParameter)Parameters[ResultsParameterName]; }
}
#endregion
#region properties
public ISymbolicRegressionEvaluator Evaluator {
get { return EvaluatorParameter.ActualValue; }
}
public IntValue ValidationSamplesStart {
get { return ValidationSamplesStartParameter.ActualValue; }
}
public IntValue ValidationSamplesEnd {
get { return ValidationSamplesEndParameter.ActualValue; }
}
#endregion
public BestValidationSymbolicRegressionSolutionVisualizer()
: base() {
Parameters.Add(new SubScopesLookupParameter(SymbolicRegressionModelParameterName, "The symbolic regression solutions from which the best solution should be visualized."));
Parameters.Add(new SubScopesLookupParameter(QualityParameterName, "The quality of the symbolic regression solutions."));
Parameters.Add(new LookupParameter(DataAnalysisProblemDataParameterName, "The symbolic regression problme data on which the best solution should be evaluated."));
Parameters.Add(new ValueLookupParameter(ValidationSamplesStartParameterName, "The start index of the validation partition (part of the training partition)."));
Parameters.Add(new ValueLookupParameter(ValidationSamplesEndParameterName, "The end index of the validation partition (part of the training partition)."));
Parameters.Add(new LookupParameter(BestValidationSolutionParameterName, "The best symbolic expression tree based on the validation data for the symbolic regression problem."));
Parameters.Add(new LookupParameter(ResultsParameterName, "The result collection of the algorithm."));
}
public override IOperation Apply() {
ItemArray expressions = SymbolicExpressionTreeParameter.ActualValue;
DataAnalysisProblemData problemData = DataAnalysisProblemDataParameter.ActualValue;
int validationSamplesStart = ValidationSamplesStart.Value;
int validationSamplesEnd = ValidationSamplesEnd.Value;
var validationValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, validationSamplesStart, validationSamplesEnd);
var currentBestExpression = (from expression in expressions
let validationQuality = SymbolicRegressionMeanSquaredErrorEvaluator.Calculate(expression, problemData.Dataset, problemData.TargetVariable.Value, validationSamplesStart, validationSamplesEnd)
select new { Expression = expression, ValidationQuality = validationQuality })
.OrderBy(x => x.ValidationQuality)
.First();
SymbolicRegressionSolution bestOfRunSolution = BestValidationSolutionParameter.ActualValue;
if (bestOfRunSolution == null) {
// no best of run solution yet -> make a solution from the currentBestExpression
UpdateBestOfRunSolution(problemData, currentBestExpression.Expression);
} else {
// compare quality of current best with best of run solution
var estimatedValidationValues = bestOfRunSolution.EstimatedValues.Skip(validationSamplesStart).Take(validationSamplesEnd - validationSamplesStart);
var bestOfRunValidationQuality = SimpleMSEEvaluator.Calculate(validationValues, estimatedValidationValues);
if (bestOfRunValidationQuality > currentBestExpression.ValidationQuality) {
UpdateBestOfRunSolution(problemData, currentBestExpression.Expression);
}
}
return base.Apply();
}
private void UpdateBestOfRunSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree tree) {
var newBestSolution = CreateDataAnalysisSolution(problemData, tree);
BestValidationSolutionParameter.ActualValue = newBestSolution;
var trainingValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TrainingSamplesStart.Value, problemData.TrainingSamplesEnd.Value);
var testValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TestSamplesStart.Value, problemData.TestSamplesEnd.Value);
AddResult("MeanSquaredError (Training)", new DoubleValue(SimpleMSEEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
AddResult("MeanRelativeError (Training)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
AddResult("RSquared (Training)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
AddResult("MeanSquaredError (Test)", new DoubleValue(SimpleMSEEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
AddResult("MeanRelativeError (Test)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
AddResult("RSquared (Test)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
}
private void AddResult(string resultName, IItem value) {
var resultCollection = ResultParameter.ActualValue;
if (resultCollection.ContainsKey(resultName)) {
resultCollection[resultName].Value = value;
} else {
resultCollection.Add(new Result(resultName, value));
}
}
private SymbolicRegressionModel CreateModel(DataAnalysisProblemData problemData, SymbolicExpressionTree expression) {
return new SymbolicRegressionModel(expression, problemData.InputVariables.Select(x => x.Value));
}
private SymbolicRegressionSolution CreateDataAnalysisSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree expression) {
return new SymbolicRegressionSolution(problemData, CreateModel(problemData, expression));
}
}
}