source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/BestValidationSymbolicRegressionSolutionVisualizer.cs @ 3513

Last change on this file since 3513 was 3513, checked in by gkronber, 12 years ago

Added upper and lower estimation limits. #938 (Data types and operators for regression problems)

File size: 12.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Linq;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Problems.DataAnalysis.Evaluators;
32using HeuristicLab.Problems.DataAnalysis.Symbolic;
33
34namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
35  /// <summary>
36  /// An operator for visualizing the best symbolic regression solution based on the validation set.
37  /// </summary>
38  [Item("BestSymbolicExpressionTreeVisualizer", "An operator for visualizing the best symbolic regression solution based on the validation set.")]
39  [StorableClass]
40  public sealed class BestValidationSymbolicRegressionSolutionVisualizer : SingleSuccessorOperator, ISingleObjectiveSolutionsVisualizer, ISolutionsVisualizer {
41    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
42    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
43    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
44    private const string SymbolicRegressionModelParameterName = "SymbolicRegressionModel";
45    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
46    private const string BestValidationSolutionParameterName = "BestValidationSolution";
47    private const string ValidationSamplesStartParameterName = "ValidationSamplesStart";
48    private const string ValidationSamplesEndParameterName = "ValidationSamplesEnd";
49    private const string QualityParameterName = "Quality";
50    private const string ResultsParameterName = "Results";
51
52    #region parameter properties
53    public ILookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
54      get { return (ILookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
55    }
56    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
57      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
58    }
59    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
60      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
61    }
62    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
63      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
64    }
65    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
66      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
67    }
68
69    public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
70      get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicRegressionModelParameterName]; }
71    }
72    public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
73      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
74    }
75    public ILookupParameter<SymbolicRegressionSolution> BestValidationSolutionParameter {
76      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestValidationSolutionParameterName]; }
77    }
78    ILookupParameter ISolutionsVisualizer.VisualizationParameter {
79      get { return BestValidationSolutionParameter; }
80    }
81
82    public ILookupParameter<ItemArray<DoubleValue>> QualityParameter {
83      get { return (ILookupParameter<ItemArray<DoubleValue>>)Parameters[QualityParameterName]; }
84    }
85
86    public ILookupParameter<ResultCollection> ResultParameter {
87      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
88    }
89    #endregion
90
91    #region properties
92    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
93      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
94    }
95    public DoubleValue UpperEstimationLimit {
96      get { return UpperEstimationLimitParameter.ActualValue; }
97    }
98    public DoubleValue LowerEstimationLimit {
99      get { return LowerEstimationLimitParameter.ActualValue; }
100    }
101    public IntValue ValidationSamplesStart {
102      get { return ValidationSamplesStartParameter.ActualValue; }
103    }
104    public IntValue ValidationSamplesEnd {
105      get { return ValidationSamplesEndParameter.ActualValue; }
106    }
107    #endregion
108
109    public BestValidationSymbolicRegressionSolutionVisualizer()
110      : base() {
111      Parameters.Add(new SubScopesLookupParameter<SymbolicExpressionTree>(SymbolicRegressionModelParameterName, "The symbolic regression solutions from which the best solution should be visualized."));
112      Parameters.Add(new SubScopesLookupParameter<DoubleValue>(QualityParameterName, "The quality of the symbolic regression solutions."));
113      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The symbolic regression problme data on which the best solution should be evaluated."));
114      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used to calculate the output values of symbolic expression trees."));
115      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper limit that should be used as cut off value for the output values of symbolic expression trees."));
116      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower limit that should be used as cut off value for the output values of symbolic expression trees."));
117      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The start index of the validation partition (part of the training partition)."));
118      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The end index of the validation partition (part of the training partition)."));
119      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestValidationSolutionParameterName, "The best symbolic expression tree based on the validation data for the symbolic regression problem."));
120      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection of the algorithm."));
121    }
122
123    public override IOperation Apply() {
124      ItemArray<SymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
125      DataAnalysisProblemData problemData = DataAnalysisProblemDataParameter.ActualValue;
126
127      int validationSamplesStart = ValidationSamplesStart.Value;
128      int validationSamplesEnd = ValidationSamplesEnd.Value;
129      var validationValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, validationSamplesStart, validationSamplesEnd);
130      double upperEstimationLimit = UpperEstimationLimit.Value;
131      double lowerEstimationLimit = LowerEstimationLimit.Value;
132      var currentBestExpression = (from expression in expressions
133                                   let validationQuality =
134                                     SymbolicRegressionMeanSquaredErrorEvaluator.Calculate(
135                                       SymbolicExpressionTreeInterpreter, expression,
136                                       lowerEstimationLimit, upperEstimationLimit,
137                                       problemData.Dataset, problemData.TargetVariable.Value,
138                                       validationSamplesStart, validationSamplesEnd)
139                                   select new { Expression = expression, ValidationQuality = validationQuality })
140                                   .OrderBy(x => x.ValidationQuality)
141                                   .First();
142
143      SymbolicRegressionSolution bestOfRunSolution = BestValidationSolutionParameter.ActualValue;
144      if (bestOfRunSolution == null) {
145        // no best of run solution yet -> make a solution from the currentBestExpression
146        UpdateBestOfRunSolution(problemData, currentBestExpression.Expression, SymbolicExpressionTreeInterpreter, lowerEstimationLimit, upperEstimationLimit);
147      } else {
148        // compare quality of current best with best of run solution
149        var estimatedValidationValues = bestOfRunSolution.EstimatedValues.Skip(validationSamplesStart).Take(validationSamplesEnd - validationSamplesStart);
150        var bestOfRunValidationQuality = SimpleMSEEvaluator.Calculate(validationValues, estimatedValidationValues);
151        if (bestOfRunValidationQuality > currentBestExpression.ValidationQuality) {
152          UpdateBestOfRunSolution(problemData, currentBestExpression.Expression, SymbolicExpressionTreeInterpreter, lowerEstimationLimit, upperEstimationLimit);
153        }
154      }
155
156
157      return base.Apply();
158    }
159
160    private void UpdateBestOfRunSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree tree, ISymbolicExpressionTreeInterpreter interpreter,
161      double lowerEstimationLimit, double upperEstimationLimit) {
162      var newBestSolution = CreateDataAnalysisSolution(problemData, tree, interpreter, lowerEstimationLimit, upperEstimationLimit);
163      if (BestValidationSolutionParameter.ActualValue == null)
164        BestValidationSolutionParameter.ActualValue = newBestSolution;
165      else
166        // only update model
167        BestValidationSolutionParameter.ActualValue.Model = newBestSolution.Model;
168
169      var trainingValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TrainingSamplesStart.Value, problemData.TrainingSamplesEnd.Value);
170      var testValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TestSamplesStart.Value, problemData.TestSamplesEnd.Value);
171
172      AddResult("MeanSquaredError (Training)", new DoubleValue(SimpleMSEEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
173      AddResult("MeanRelativeError (Training)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
174      AddResult("RSquared (Training)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
175
176      AddResult("MeanSquaredError (Test)", new DoubleValue(SimpleMSEEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
177      AddResult("MeanRelativeError (Test)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
178      AddResult("RSquared (Test)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
179    }
180
181    private void AddResult(string resultName, IItem value) {
182      var resultCollection = ResultParameter.ActualValue;
183      if (resultCollection.ContainsKey(resultName)) {
184        resultCollection[resultName].Value = value;
185      } else {
186        resultCollection.Add(new Result(resultName, value));
187      }
188    }
189
190    private SymbolicRegressionSolution CreateDataAnalysisSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree expression, ISymbolicExpressionTreeInterpreter interpreter,
191      double lowerEstimationLimit, double upperEstimationLimit) {
192      var model = new SymbolicRegressionModel(interpreter, expression, problemData.InputVariables.Select(s => s.Value));
193      return new SymbolicRegressionSolution(problemData, model, lowerEstimationLimit, upperEstimationLimit);
194    }
195  }
196}
Note: See TracBrowser for help on using the repository browser.