[3442] | 1 | #region License Information
|
---|
| 2 | /* HeuristicLab
|
---|
| 3 | * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
| 4 | *
|
---|
| 5 | * This file is part of HeuristicLab.
|
---|
| 6 | *
|
---|
| 7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
| 8 | * it under the terms of the GNU General Public License as published by
|
---|
| 9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 10 | * (at your option) any later version.
|
---|
| 11 | *
|
---|
| 12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 15 | * GNU General Public License for more details.
|
---|
| 16 | *
|
---|
| 17 | * You should have received a copy of the GNU General Public License
|
---|
| 18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | */
|
---|
| 20 | #endregion
|
---|
| 21 |
|
---|
| 22 | using System.Linq;
|
---|
| 23 | using HeuristicLab.Common;
|
---|
| 24 | using HeuristicLab.Core;
|
---|
| 25 | using HeuristicLab.Data;
|
---|
| 26 | using HeuristicLab.Operators;
|
---|
| 27 | using HeuristicLab.Optimization;
|
---|
| 28 | using HeuristicLab.Parameters;
|
---|
| 29 | using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
|
---|
| 30 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
[3452] | 31 | using HeuristicLab.Problems.DataAnalysis.Evaluators;
|
---|
| 32 | using HeuristicLab.Problems.DataAnalysis.Symbolic;
|
---|
[3442] | 33 |
|
---|
| 34 | namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
|
---|
| 35 | /// <summary>
|
---|
| 36 | /// An operator for visualizing the best symbolic regression solution based on the validation set.
|
---|
| 37 | /// </summary>
|
---|
| 38 | [Item("BestSymbolicExpressionTreeVisualizer", "An operator for visualizing the best symbolic regression solution based on the validation set.")]
|
---|
| 39 | [StorableClass]
|
---|
| 40 | public sealed class BestValidationSymbolicRegressionSolutionVisualizer : SingleSuccessorOperator, ISingleObjectiveSolutionsVisualizer, ISolutionsVisualizer {
|
---|
[3452] | 41 | private const string EvaluatorParameterName = "Evaluator";
|
---|
[3442] | 42 | private const string SymbolicRegressionModelParameterName = "SymbolicRegressionModel";
|
---|
| 43 | private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
|
---|
| 44 | private const string BestValidationSolutionParameterName = "BestValidationSolution";
|
---|
[3452] | 45 | private const string ValidationSamplesStartParameterName = "ValidationSamplesStart";
|
---|
| 46 | private const string ValidationSamplesEndParameterName = "ValidationSamplesEnd";
|
---|
[3442] | 47 | private const string QualityParameterName = "Quality";
|
---|
[3452] | 48 | private const string ResultsParameterName = "Results";
|
---|
| 49 |
|
---|
| 50 | #region parameter properties
|
---|
| 51 | public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
|
---|
| 52 | get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
|
---|
| 53 | }
|
---|
| 54 | public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
|
---|
| 55 | get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
|
---|
| 56 | }
|
---|
| 57 | public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
|
---|
| 58 | get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
|
---|
| 59 | }
|
---|
| 60 |
|
---|
[3442] | 61 | public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
|
---|
| 62 | get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicRegressionModelParameterName]; }
|
---|
| 63 | }
|
---|
| 64 | public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
|
---|
| 65 | get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
|
---|
| 66 | }
|
---|
| 67 | public ILookupParameter<SymbolicRegressionSolution> BestValidationSolutionParameter {
|
---|
| 68 | get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestValidationSolutionParameterName]; }
|
---|
| 69 | }
|
---|
| 70 | ILookupParameter ISolutionsVisualizer.VisualizationParameter {
|
---|
| 71 | get { return BestValidationSolutionParameter; }
|
---|
| 72 | }
|
---|
| 73 |
|
---|
| 74 | public ILookupParameter<ItemArray<DoubleValue>> QualityParameter {
|
---|
| 75 | get { return (ILookupParameter<ItemArray<DoubleValue>>)Parameters[QualityParameterName]; }
|
---|
| 76 | }
|
---|
| 77 |
|
---|
[3452] | 78 | public ILookupParameter<ResultCollection> ResultParameter {
|
---|
| 79 | get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
|
---|
| 80 | }
|
---|
| 81 | #endregion
|
---|
| 82 |
|
---|
| 83 | #region properties
|
---|
| 84 | public ISymbolicRegressionEvaluator Evaluator {
|
---|
| 85 | get { return EvaluatorParameter.ActualValue; }
|
---|
| 86 | }
|
---|
| 87 | public IntValue ValidationSamplesStart {
|
---|
| 88 | get { return ValidationSamplesStartParameter.ActualValue; }
|
---|
| 89 | }
|
---|
| 90 | public IntValue ValidationSamplesEnd {
|
---|
| 91 | get { return ValidationSamplesEndParameter.ActualValue; }
|
---|
| 92 | }
|
---|
| 93 | #endregion
|
---|
| 94 |
|
---|
[3442] | 95 | public BestValidationSymbolicRegressionSolutionVisualizer()
|
---|
| 96 | : base() {
|
---|
| 97 | Parameters.Add(new SubScopesLookupParameter<SymbolicExpressionTree>(SymbolicRegressionModelParameterName, "The symbolic regression solutions from which the best solution should be visualized."));
|
---|
| 98 | Parameters.Add(new SubScopesLookupParameter<DoubleValue>(QualityParameterName, "The quality of the symbolic regression solutions."));
|
---|
| 99 | Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The symbolic regression problme data on which the best solution should be evaluated."));
|
---|
[3452] | 100 | Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The start index of the validation partition (part of the training partition)."));
|
---|
| 101 | Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The end index of the validation partition (part of the training partition)."));
|
---|
[3442] | 102 | Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestValidationSolutionParameterName, "The best symbolic expression tree based on the validation data for the symbolic regression problem."));
|
---|
[3452] | 103 | Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection of the algorithm."));
|
---|
[3442] | 104 | }
|
---|
| 105 |
|
---|
| 106 | public override IOperation Apply() {
|
---|
| 107 | ItemArray<SymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
|
---|
| 108 | DataAnalysisProblemData problemData = DataAnalysisProblemDataParameter.ActualValue;
|
---|
| 109 |
|
---|
[3452] | 110 | int validationSamplesStart = ValidationSamplesStart.Value;
|
---|
| 111 | int validationSamplesEnd = ValidationSamplesEnd.Value;
|
---|
| 112 | var validationValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, validationSamplesStart, validationSamplesEnd);
|
---|
[3442] | 113 |
|
---|
[3452] | 114 | var currentBestExpression = (from expression in expressions
|
---|
| 115 | let validationQuality = SymbolicRegressionMeanSquaredErrorEvaluator.Calculate(expression, problemData.Dataset, problemData.TargetVariable.Value, validationSamplesStart, validationSamplesEnd)
|
---|
| 116 | select new { Expression = expression, ValidationQuality = validationQuality })
|
---|
| 117 | .OrderBy(x => x.ValidationQuality)
|
---|
| 118 | .First();
|
---|
| 119 |
|
---|
| 120 | SymbolicRegressionSolution bestOfRunSolution = BestValidationSolutionParameter.ActualValue;
|
---|
| 121 | if (bestOfRunSolution == null) {
|
---|
| 122 | // no best of run solution yet -> make a solution from the currentBestExpression
|
---|
| 123 | UpdateBestOfRunSolution(problemData, currentBestExpression.Expression);
|
---|
| 124 | } else {
|
---|
| 125 | // compare quality of current best with best of run solution
|
---|
| 126 | var estimatedValidationValues = bestOfRunSolution.EstimatedValues.Skip(validationSamplesStart).Take(validationSamplesEnd - validationSamplesStart);
|
---|
| 127 | var bestOfRunValidationQuality = SimpleMSEEvaluator.Calculate(validationValues, estimatedValidationValues);
|
---|
| 128 | if (bestOfRunValidationQuality > currentBestExpression.ValidationQuality) {
|
---|
| 129 | UpdateBestOfRunSolution(problemData, currentBestExpression.Expression);
|
---|
| 130 | }
|
---|
[3442] | 131 | }
|
---|
[3452] | 132 |
|
---|
| 133 |
|
---|
[3442] | 134 | return base.Apply();
|
---|
| 135 | }
|
---|
| 136 |
|
---|
[3452] | 137 | private void UpdateBestOfRunSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree tree) {
|
---|
| 138 | var newBestSolution = CreateDataAnalysisSolution(problemData, tree);
|
---|
| 139 | BestValidationSolutionParameter.ActualValue = newBestSolution;
|
---|
| 140 |
|
---|
| 141 | var trainingValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TrainingSamplesStart.Value, problemData.TrainingSamplesEnd.Value);
|
---|
| 142 | var testValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TestSamplesStart.Value, problemData.TestSamplesEnd.Value);
|
---|
| 143 |
|
---|
| 144 | AddResult("MeanSquaredError (Training)", new DoubleValue(SimpleMSEEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
|
---|
| 145 | AddResult("MeanRelativeError (Training)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
|
---|
| 146 | AddResult("RSquared (Training)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
|
---|
| 147 |
|
---|
| 148 | AddResult("MeanSquaredError (Test)", new DoubleValue(SimpleMSEEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
|
---|
| 149 | AddResult("MeanRelativeError (Test)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
|
---|
| 150 | AddResult("RSquared (Test)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
|
---|
| 151 | }
|
---|
| 152 |
|
---|
| 153 | private void AddResult(string resultName, IItem value) {
|
---|
| 154 | var resultCollection = ResultParameter.ActualValue;
|
---|
| 155 | if (resultCollection.ContainsKey(resultName)) {
|
---|
| 156 | resultCollection[resultName].Value = value;
|
---|
| 157 | } else {
|
---|
| 158 | resultCollection.Add(new Result(resultName, value));
|
---|
| 159 | }
|
---|
| 160 | }
|
---|
| 161 |
|
---|
[3442] | 162 | private SymbolicRegressionModel CreateModel(DataAnalysisProblemData problemData, SymbolicExpressionTree expression) {
|
---|
| 163 | return new SymbolicRegressionModel(expression, problemData.InputVariables.Select(x => x.Value));
|
---|
| 164 | }
|
---|
| 165 |
|
---|
| 166 | private SymbolicRegressionSolution CreateDataAnalysisSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree expression) {
|
---|
| 167 | return new SymbolicRegressionSolution(problemData, CreateModel(problemData, expression));
|
---|
| 168 | }
|
---|
| 169 | }
|
---|
| 170 | }
|
---|