Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/TrainingBestScaledSymbolicRegressionSolutionAnalyzer.cs @ 5304

Last change on this file since 5304 was 5259, checked in by gkronber, 14 years ago

Added calculation of accuracy of best training solution. #1369

File size: 23.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34using HeuristicLab.Problems.DataAnalysis.Evaluators;
35
36namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
37  /// <summary>
38  /// An operator that analyzes the training best scaled symbolic regression solution.
39  /// </summary>
40  [Item("TrainingBestScaledSymbolicRegressionSolutionAnalyzer", "An operator that analyzes the training best scaled symbolic regression solution.")]
41  [StorableClass]
42  public sealed class TrainingBestScaledSymbolicRegressionSolutionAnalyzer : SingleSuccessorOperator, ISymbolicRegressionAnalyzer {
43    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
44    private const string QualityParameterName = "Quality";
45    private const string MaximizationParameterName = "Maximization";
46    private const string CalculateSolutionComplexityParameterName = "CalculateSolutionComplexity";
47    private const string CalculateSolutionAccuracyParameterName = "CalculateSolutionAccuracy";
48    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
49    private const string ProblemDataParameterName = "DataAnalysisProblemData";
50    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
51    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
52    private const string BestSolutionParameterName = "Best training solution";
53    private const string BestSolutionQualityParameterName = "Best training solution quality";
54    private const string BestSolutionLengthParameterName = "Best training solution length";
55    private const string BestSolutionHeightParameterName = "Best training solution height";
56    private const string BestSolutionVariablesParameterName = "Best training solution variables";
57    private const string BestSolutionTrainingRSquaredParameterName = "Best training solution R² (training)";
58    private const string BestSolutionTestRSquaredParameterName = "Best training solution R² (test)";
59    private const string BestSolutionTrainingMseParameterName = "Best training solution mean squared error (training)";
60    private const string BestSolutionTestMseParameterName = "Best training solution mean squared error (test)";
61    private const string BestSolutionTrainingRelativeErrorParameterName = "Best training solution relative error (training)";
62    private const string BestSolutionTestRelativeErrorParameterName = "Best training solution relative error (test)";
63    private const string ResultsParameterName = "Results";
64
65    #region parameter properties
66    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
67      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
68    }
69    public ScopeTreeLookupParameter<DoubleValue> QualityParameter {
70      get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
71    }
72    public ILookupParameter<BoolValue> MaximizationParameter {
73      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
74    }
75    public IValueParameter<BoolValue> CalculateSolutionComplexityParameter {
76      get { return (IValueParameter<BoolValue>)Parameters[CalculateSolutionComplexityParameterName]; }
77    }
78    public IValueParameter<BoolValue> CalculateSolutionAccuracyParameter {
79      get { return (IValueParameter<BoolValue>)Parameters[CalculateSolutionAccuracyParameterName]; }
80    }
81    public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
82      get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
83    }
84    public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
85      get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
86    }
87    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
88      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
89    }
90    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
91      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
92    }
93
94    public ILookupParameter<SymbolicRegressionSolution> BestSolutionParameter {
95      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestSolutionParameterName]; }
96    }
97    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
98      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
99    }
100    public ILookupParameter<IntValue> BestSolutionLengthParameter {
101      get { return (ILookupParameter<IntValue>)Parameters[BestSolutionLengthParameterName]; }
102    }
103    public ILookupParameter<IntValue> BestSolutionHeightParameter {
104      get { return (ILookupParameter<IntValue>)Parameters[BestSolutionHeightParameterName]; }
105    }
106    public ILookupParameter<IntValue> BestSolutionVariablesParameter {
107      get { return (ILookupParameter<IntValue>)Parameters[BestSolutionVariablesParameterName]; }
108    }
109    public ILookupParameter<DoubleValue> BestSolutionTrainingRSquaredParameter {
110      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionTrainingRSquaredParameterName]; }
111    }
112    public ILookupParameter<DoubleValue> BestSolutionTestRSquaredParameter {
113      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionTestRSquaredParameterName]; }
114    }
115    public ILookupParameter<DoubleValue> BestSolutionTrainingMseParameter {
116      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionTrainingMseParameterName]; }
117    }
118    public ILookupParameter<DoubleValue> BestSolutionTestMseParameter {
119      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionTestMseParameterName]; }
120    }
121    public ILookupParameter<DoubleValue> BestSolutionTrainingRelativeErrorParameter {
122      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionTrainingRelativeErrorParameterName]; }
123    }
124    public ILookupParameter<DoubleValue> BestSolutionTestRelativeErrorParameter {
125      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionTestRelativeErrorParameterName]; }
126    }
127    public ILookupParameter<ResultCollection> ResultsParameter {
128      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
129    }
130    #endregion
131    #region properties
132    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTree {
133      get { return SymbolicExpressionTreeParameter.ActualValue; }
134    }
135    public ItemArray<DoubleValue> Quality {
136      get { return QualityParameter.ActualValue; }
137    }
138    public BoolValue Maximization {
139      get { return MaximizationParameter.ActualValue; }
140    }
141    public BoolValue CalculateSolutionComplexity {
142      get { return CalculateSolutionComplexityParameter.Value; }
143      set { CalculateSolutionComplexityParameter.Value = value; }
144    }
145    public BoolValue CalculateSolutionAccuracy {
146      get { return CalculateSolutionAccuracyParameter.Value; }
147      set { CalculateSolutionAccuracyParameter.Value = value; }
148    }
149    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
150      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
151    }
152    public DataAnalysisProblemData ProblemData {
153      get { return ProblemDataParameter.ActualValue; }
154    }
155    public DoubleValue UpperEstimationLimit {
156      get { return UpperEstimationLimitParameter.ActualValue; }
157    }
158    public DoubleValue LowerEstimationLimit {
159      get { return LowerEstimationLimitParameter.ActualValue; }
160    }
161    public ResultCollection Results {
162      get { return ResultsParameter.ActualValue; }
163    }
164    public SymbolicRegressionSolution BestSolution {
165      get { return BestSolutionParameter.ActualValue; }
166      set { BestSolutionParameter.ActualValue = value; }
167    }
168    public DoubleValue BestSolutionQuality {
169      get { return BestSolutionQualityParameter.ActualValue; }
170      set { BestSolutionQualityParameter.ActualValue = value; }
171    }
172    public IntValue BestSolutionLength {
173      get { return BestSolutionLengthParameter.ActualValue; }
174      set { BestSolutionLengthParameter.ActualValue = value; }
175    }
176    public IntValue BestSolutionHeight {
177      get { return BestSolutionHeightParameter.ActualValue; }
178      set { BestSolutionHeightParameter.ActualValue = value; }
179    }
180    public IntValue BestSolutionVariables {
181      get { return BestSolutionVariablesParameter.ActualValue; }
182      set { BestSolutionVariablesParameter.ActualValue = value; }
183    }
184    public DoubleValue BestSolutionTrainingRSquared {
185      get { return BestSolutionTrainingRSquaredParameter.ActualValue; }
186      set { BestSolutionTrainingRSquaredParameter.ActualValue = value; }
187    }
188    public DoubleValue BestSolutionTestRSquared {
189      get { return BestSolutionTestRSquaredParameter.ActualValue; }
190      set { BestSolutionTestRSquaredParameter.ActualValue = value; }
191    }
192    public DoubleValue BestSolutionTrainingMse {
193      get { return BestSolutionTrainingMseParameter.ActualValue; }
194      set { BestSolutionTrainingMseParameter.ActualValue = value; }
195    }
196    public DoubleValue BestSolutionTestMse {
197      get { return BestSolutionTestMseParameter.ActualValue; }
198      set { BestSolutionTestMseParameter.ActualValue = value; }
199    }
200    public DoubleValue BestSolutionTrainingRelativeError {
201      get { return BestSolutionTrainingRelativeErrorParameter.ActualValue; }
202      set { BestSolutionTrainingRelativeErrorParameter.ActualValue = value; }
203    }
204    public DoubleValue BestSolutionTestRelativeError {
205      get { return BestSolutionTestRelativeErrorParameter.ActualValue; }
206      set { BestSolutionTestRelativeErrorParameter.ActualValue = value; }
207    }
208    #endregion
209
210    [StorableConstructor]
211    private TrainingBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base(deserializing) { }
212    private TrainingBestScaledSymbolicRegressionSolutionAnalyzer(TrainingBestScaledSymbolicRegressionSolutionAnalyzer original, Cloner cloner) : base(original, cloner) { }
213    public TrainingBestScaledSymbolicRegressionSolutionAnalyzer()
214      : base() {
215      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
216      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
217      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The qualities of the symbolic expression trees to analyze."));
218      Parameters.Add(new ValueParameter<BoolValue>(CalculateSolutionComplexityParameterName, "Determines if the length and height of the training best solution should be calculated.", new BoolValue(false)));
219      Parameters.Add(new ValueParameter<BoolValue>(CalculateSolutionAccuracyParameterName, "Determines if the accuracy of the training best solution on the training and test set should be calculated.", new BoolValue(false)));
220      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
221      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
222      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
223      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
224      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestSolutionParameterName, "The best symbolic regression solution."));
225      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
226      Parameters.Add(new LookupParameter<IntValue>(BestSolutionLengthParameterName, "The length of the best symbolic regression solution."));
227      Parameters.Add(new LookupParameter<IntValue>(BestSolutionHeightParameterName, "The height of the best symbolic regression solution."));
228      Parameters.Add(new LookupParameter<IntValue>(BestSolutionVariablesParameterName, "The number of variables used by the best symbolic regression solution."));
229      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionTrainingRSquaredParameterName, "The R² value on the training set of the best symbolic regression solution."));
230      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionTestRSquaredParameterName, "The R² value on the test set of the best symbolic regression solution."));
231      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionTrainingMseParameterName, "The mean squared error on the training set of the best symbolic regression solution."));
232      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionTestMseParameterName, "The mean squared error value on the test set of the best symbolic regression solution."));
233      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionTrainingRelativeErrorParameterName, "The relative error on the training set of the best symbolic regression solution."));
234      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionTestRelativeErrorParameterName, "The relative error value on the test set of the best symbolic regression solution."));
235      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
236    }
237
238    public override IDeepCloneable Clone(Cloner cloner) {
239      return new TrainingBestScaledSymbolicRegressionSolutionAnalyzer(this, cloner);
240    }
241
242    [StorableHook(HookType.AfterDeserialization)]
243    private void AfterDeserialization() { }
244
245    public override IOperation Apply() {
246      #region find best tree
247      double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
248      SymbolicExpressionTree bestTree = null;
249      SymbolicExpressionTree[] tree = SymbolicExpressionTree.ToArray();
250      double[] quality = Quality.Select(x => x.Value).ToArray();
251      for (int i = 0; i < tree.Length; i++) {
252        if ((Maximization.Value && quality[i] > bestQuality) ||
253            (!Maximization.Value && quality[i] < bestQuality)) {
254          bestQuality = quality[i];
255          bestTree = tree[i];
256        }
257      }
258      #endregion
259
260      #region update best solution
261      // if the best tree is better than the current best solution => update
262      bool newBest =
263        BestSolutionQuality == null ||
264        (Maximization.Value && bestQuality > BestSolutionQuality.Value) ||
265        (!Maximization.Value && bestQuality < BestSolutionQuality.Value);
266      if (newBest) {
267        double lowerEstimationLimit = LowerEstimationLimit.Value;
268        double upperEstimationLimit = UpperEstimationLimit.Value;
269        string targetVariable = ProblemData.TargetVariable.Value;
270
271        // calculate scaling parameters and only for the best tree using the full training set
272        double alpha, beta;
273        SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree,
274          lowerEstimationLimit, upperEstimationLimit,
275          ProblemData.Dataset, targetVariable,
276          ProblemData.TrainingIndizes, out beta, out alpha);
277
278        // scale tree for solution
279        var scaledTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta);
280        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
281          scaledTree);
282        var solution = new SymbolicRegressionSolution((DataAnalysisProblemData)ProblemData.Clone(), model, lowerEstimationLimit, upperEstimationLimit);
283        solution.Name = BestSolutionParameterName;
284        solution.Description = "Best solution on training partition found over the whole run.";
285
286        BestSolution = solution;
287        BestSolutionQuality = new DoubleValue(bestQuality);
288
289        if (CalculateSolutionComplexity.Value) {
290          BestSolutionLength = new IntValue(solution.Model.SymbolicExpressionTree.Size);
291          BestSolutionHeight = new IntValue(solution.Model.SymbolicExpressionTree.Height);
292          BestSolutionVariables = new IntValue(solution.Model.InputVariables.Count());
293          if (!Results.ContainsKey(BestSolutionLengthParameterName)) {
294            Results.Add(new Result(BestSolutionLengthParameterName, "Length of the best solution on the training set.", BestSolutionLength));
295            Results.Add(new Result(BestSolutionHeightParameterName, "Height of the best solution on the training set.", BestSolutionHeight));
296            Results.Add(new Result(BestSolutionVariablesParameterName, "Number of variables used by the best solution on the training set.", BestSolutionVariables));
297          } else {
298            Results[BestSolutionLengthParameterName].Value = BestSolutionLength;
299            Results[BestSolutionHeightParameterName].Value = BestSolutionHeight;
300            Results[BestSolutionVariablesParameterName].Value = BestSolutionVariables;
301          }
302        }
303
304        if (CalculateSolutionAccuracy.Value) {
305          #region update R2,MSE, Rel Error
306          IEnumerable<double> trainingValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable.Value, ProblemData.TrainingIndizes);
307          IEnumerable<double> testValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable.Value, ProblemData.TestIndizes);
308          OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator();
309          OnlineMeanAbsolutePercentageErrorEvaluator relErrorEvaluator = new OnlineMeanAbsolutePercentageErrorEvaluator();
310          OnlinePearsonsRSquaredEvaluator r2Evaluator = new OnlinePearsonsRSquaredEvaluator();
311
312          #region training
313          var originalEnumerator = trainingValues.GetEnumerator();
314          var estimatedEnumerator = solution.EstimatedTrainingValues.GetEnumerator();
315          while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
316            mseEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
317            r2Evaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
318            relErrorEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
319          }
320          double trainingR2 = r2Evaluator.RSquared;
321          double trainingMse = mseEvaluator.MeanSquaredError;
322          double trainingRelError = relErrorEvaluator.MeanAbsolutePercentageError;
323          #endregion
324
325          mseEvaluator.Reset();
326          relErrorEvaluator.Reset();
327          r2Evaluator.Reset();
328
329          #region test
330          originalEnumerator = testValues.GetEnumerator();
331          estimatedEnumerator = solution.EstimatedTestValues.GetEnumerator();
332          while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
333            mseEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
334            r2Evaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
335            relErrorEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
336          }
337          double testR2 = r2Evaluator.RSquared;
338          double testMse = mseEvaluator.MeanSquaredError;
339          double testRelError = relErrorEvaluator.MeanAbsolutePercentageError;
340          #endregion
341          BestSolutionTrainingRSquared = new DoubleValue(trainingR2);
342          BestSolutionTestRSquared = new DoubleValue(testR2);
343          BestSolutionTrainingMse = new DoubleValue(trainingMse);
344          BestSolutionTestMse = new DoubleValue(testMse);
345          BestSolutionTrainingRelativeError = new DoubleValue(trainingRelError);
346          BestSolutionTestRelativeError = new DoubleValue(testRelError);
347
348          if (!Results.ContainsKey(BestSolutionTrainingRSquaredParameterName)) {
349            Results.Add(new Result(BestSolutionTrainingRSquaredParameterName, BestSolutionTrainingRSquared));
350            Results.Add(new Result(BestSolutionTestRSquaredParameterName, BestSolutionTestRSquared));
351            Results.Add(new Result(BestSolutionTrainingMseParameterName, BestSolutionTrainingMse));
352            Results.Add(new Result(BestSolutionTestMseParameterName, BestSolutionTestMse));
353            Results.Add(new Result(BestSolutionTrainingRelativeErrorParameterName, BestSolutionTrainingRelativeError));
354            Results.Add(new Result(BestSolutionTestRelativeErrorParameterName, BestSolutionTestRelativeError));
355          } else {
356            Results[BestSolutionTrainingRSquaredParameterName].Value = BestSolutionTrainingRSquared;
357            Results[BestSolutionTestRSquaredParameterName].Value = BestSolutionTestRSquared;
358            Results[BestSolutionTrainingMseParameterName].Value = BestSolutionTrainingMse;
359            Results[BestSolutionTestMseParameterName].Value = BestSolutionTestMse;
360            Results[BestSolutionTrainingRelativeErrorParameterName].Value = BestSolutionTrainingRelativeError;
361            Results[BestSolutionTestRelativeErrorParameterName].Value = BestSolutionTestRelativeError;
362          }
363          #endregion
364        }
365
366        if (!Results.ContainsKey(BestSolutionQualityParameterName)) {
367          Results.Add(new Result(BestSolutionQualityParameterName, BestSolutionQuality));
368          Results.Add(new Result(BestSolutionParameterName, BestSolution));
369        } else {
370          Results[BestSolutionQualityParameterName].Value = BestSolutionQuality;
371          Results[BestSolutionParameterName].Value = BestSolution;
372        }
373      }
374      #endregion
375      return base.Apply();
376    }
377  }
378}
Note: See TracBrowser for help on using the repository browser.