source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/BestValidationSymbolicRegressionSolutionVisualizer.cs @ 3452

Last change on this file since 3452 was 3452, checked in by gkronber, 11 years ago

Included tracking of best of run solution (based on validation set) and calculation of MSE, R² and rel. Error on training and test sets. #938 (Data types and operators for regression problems)

File size: 10.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Linq;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Problems.DataAnalysis.Evaluators;
32using HeuristicLab.Problems.DataAnalysis.Symbolic;
33
34namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
35  /// <summary>
36  /// An operator for visualizing the best symbolic regression solution based on the validation set.
37  /// </summary>
38  [Item("BestSymbolicExpressionTreeVisualizer", "An operator for visualizing the best symbolic regression solution based on the validation set.")]
39  [StorableClass]
40  public sealed class BestValidationSymbolicRegressionSolutionVisualizer : SingleSuccessorOperator, ISingleObjectiveSolutionsVisualizer, ISolutionsVisualizer {
41    private const string EvaluatorParameterName = "Evaluator";
42    private const string SymbolicRegressionModelParameterName = "SymbolicRegressionModel";
43    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
44    private const string BestValidationSolutionParameterName = "BestValidationSolution";
45    private const string ValidationSamplesStartParameterName = "ValidationSamplesStart";
46    private const string ValidationSamplesEndParameterName = "ValidationSamplesEnd";
47    private const string QualityParameterName = "Quality";
48    private const string ResultsParameterName = "Results";
49
50    #region parameter properties
51    public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
52      get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
53    }
54    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
55      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
56    }
57    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
58      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
59    }
60
61    public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
62      get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicRegressionModelParameterName]; }
63    }
64    public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
65      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
66    }
67    public ILookupParameter<SymbolicRegressionSolution> BestValidationSolutionParameter {
68      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestValidationSolutionParameterName]; }
69    }
70    ILookupParameter ISolutionsVisualizer.VisualizationParameter {
71      get { return BestValidationSolutionParameter; }
72    }
73
74    public ILookupParameter<ItemArray<DoubleValue>> QualityParameter {
75      get { return (ILookupParameter<ItemArray<DoubleValue>>)Parameters[QualityParameterName]; }
76    }
77
78    public ILookupParameter<ResultCollection> ResultParameter {
79      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
80    }
81    #endregion
82
83    #region properties
84    public ISymbolicRegressionEvaluator Evaluator {
85      get { return EvaluatorParameter.ActualValue; }
86    }
87    public IntValue ValidationSamplesStart {
88      get { return ValidationSamplesStartParameter.ActualValue; }
89    }
90    public IntValue ValidationSamplesEnd {
91      get { return ValidationSamplesEndParameter.ActualValue; }
92    }
93    #endregion
94
95    public BestValidationSymbolicRegressionSolutionVisualizer()
96      : base() {
97      Parameters.Add(new SubScopesLookupParameter<SymbolicExpressionTree>(SymbolicRegressionModelParameterName, "The symbolic regression solutions from which the best solution should be visualized."));
98      Parameters.Add(new SubScopesLookupParameter<DoubleValue>(QualityParameterName, "The quality of the symbolic regression solutions."));
99      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The symbolic regression problme data on which the best solution should be evaluated."));
100      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The start index of the validation partition (part of the training partition)."));
101      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The end index of the validation partition (part of the training partition)."));
102      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestValidationSolutionParameterName, "The best symbolic expression tree based on the validation data for the symbolic regression problem."));
103      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection of the algorithm."));
104    }
105
106    public override IOperation Apply() {
107      ItemArray<SymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
108      DataAnalysisProblemData problemData = DataAnalysisProblemDataParameter.ActualValue;
109
110      int validationSamplesStart = ValidationSamplesStart.Value;
111      int validationSamplesEnd = ValidationSamplesEnd.Value;
112      var validationValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, validationSamplesStart, validationSamplesEnd);
113
114      var currentBestExpression = (from expression in expressions
115                                   let validationQuality = SymbolicRegressionMeanSquaredErrorEvaluator.Calculate(expression, problemData.Dataset, problemData.TargetVariable.Value, validationSamplesStart, validationSamplesEnd)
116                                   select new { Expression = expression, ValidationQuality = validationQuality })
117                                   .OrderBy(x => x.ValidationQuality)
118                                   .First();
119
120      SymbolicRegressionSolution bestOfRunSolution = BestValidationSolutionParameter.ActualValue;
121      if (bestOfRunSolution == null) {
122        // no best of run solution yet -> make a solution from the currentBestExpression
123        UpdateBestOfRunSolution(problemData, currentBestExpression.Expression);
124      } else {
125        // compare quality of current best with best of run solution
126        var estimatedValidationValues = bestOfRunSolution.EstimatedValues.Skip(validationSamplesStart).Take(validationSamplesEnd - validationSamplesStart);
127        var bestOfRunValidationQuality = SimpleMSEEvaluator.Calculate(validationValues, estimatedValidationValues);
128        if (bestOfRunValidationQuality > currentBestExpression.ValidationQuality) {
129          UpdateBestOfRunSolution(problemData, currentBestExpression.Expression);
130        }
131      }
132
133
134      return base.Apply();
135    }
136
137    private void UpdateBestOfRunSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree tree) {
138      var newBestSolution = CreateDataAnalysisSolution(problemData, tree);
139      BestValidationSolutionParameter.ActualValue = newBestSolution;
140
141      var trainingValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TrainingSamplesStart.Value, problemData.TrainingSamplesEnd.Value);
142      var testValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TestSamplesStart.Value, problemData.TestSamplesEnd.Value);
143
144      AddResult("MeanSquaredError (Training)", new DoubleValue(SimpleMSEEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
145      AddResult("MeanRelativeError (Training)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
146      AddResult("RSquared (Training)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
147
148      AddResult("MeanSquaredError (Test)", new DoubleValue(SimpleMSEEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
149      AddResult("MeanRelativeError (Test)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
150      AddResult("RSquared (Test)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
151    }
152
153    private void AddResult(string resultName, IItem value) {
154      var resultCollection = ResultParameter.ActualValue;
155      if (resultCollection.ContainsKey(resultName)) {
156        resultCollection[resultName].Value = value;
157      } else {
158        resultCollection.Add(new Result(resultName, value));
159      }
160    }
161
162    private SymbolicRegressionModel CreateModel(DataAnalysisProblemData problemData, SymbolicExpressionTree expression) {
163      return new SymbolicRegressionModel(expression, problemData.InputVariables.Select(x => x.Value));
164    }
165
166    private SymbolicRegressionSolution CreateDataAnalysisSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree expression) {
167      return new SymbolicRegressionSolution(problemData, CreateModel(problemData, expression));
168    }
169  }
170}
Note: See TracBrowser for help on using the repository browser.