Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/TrainingBestScaledSymbolicRegressionSolutionAnalyzer.cs @ 10743

Last change on this file since 10743 was 5275, checked in by gkronber, 14 years ago

Merged changes from trunk to data analysis exploration branch and added fractional distance metric evaluator. #1142

File size: 23.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34using HeuristicLab.Problems.DataAnalysis.Evaluators;
35
36namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
37  /// <summary>
38  /// An operator that analyzes the training best scaled symbolic regression solution.
39  /// </summary>
40  [Item("TrainingBestScaledSymbolicRegressionSolutionAnalyzer", "An operator that analyzes the training best scaled symbolic regression solution.")]
41  [StorableClass]
42  public sealed class TrainingBestScaledSymbolicRegressionSolutionAnalyzer : SingleSuccessorOperator, ISymbolicRegressionAnalyzer {
43    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
44    private const string QualityParameterName = "Quality";
45    private const string MaximizationParameterName = "Maximization";
46    private const string CalculateSolutionComplexityParameterName = "CalculateSolutionComplexity";
47    private const string CalculateSolutionAccuracyParameterName = "CalculateSolutionAccuracy";
48    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
49    private const string ProblemDataParameterName = "DataAnalysisProblemData";
50    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
51    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
52    private const string BestSolutionParameterName = "Best training solution";
53    private const string BestSolutionQualityParameterName = "Best training solution quality";
54    private const string BestSolutionLengthParameterName = "Best training solution length";
55    private const string BestSolutionHeightParameterName = "Best training solution height";
56    private const string BestSolutionVariablesParameterName = "Best training solution variables";
57    private const string BestSolutionTrainingRSquaredParameterName = "Best training solution R² (training)";
58    private const string BestSolutionTestRSquaredParameterName = "Best training solution R² (test)";
59    private const string BestSolutionTrainingMseParameterName = "Best training solution mean squared error (training)";
60    private const string BestSolutionTestMseParameterName = "Best training solution mean squared error (test)";
61    private const string BestSolutionTrainingRelativeErrorParameterName = "Best training solution relative error (training)";
62    private const string BestSolutionTestRelativeErrorParameterName = "Best training solution relative error (test)";
63    private const string ResultsParameterName = "Results";
64
65    #region parameter properties
66    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
67      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
68    }
69    public ScopeTreeLookupParameter<DoubleValue> QualityParameter {
70      get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
71    }
72    public ILookupParameter<BoolValue> MaximizationParameter {
73      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
74    }
75    public IValueParameter<BoolValue> CalculateSolutionComplexityParameter {
76      get { return (IValueParameter<BoolValue>)Parameters[CalculateSolutionComplexityParameterName]; }
77    }
78    public IValueParameter<BoolValue> CalculateSolutionAccuracyParameter {
79      get { return (IValueParameter<BoolValue>)Parameters[CalculateSolutionAccuracyParameterName]; }
80    }
81    public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
82      get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
83    }
84    public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
85      get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
86    }
87    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
88      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
89    }
90    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
91      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
92    }
93
94    public ILookupParameter<SymbolicRegressionSolution> BestSolutionParameter {
95      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestSolutionParameterName]; }
96    }
97    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
98      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
99    }
100    public ILookupParameter<IntValue> BestSolutionLengthParameter {
101      get { return (ILookupParameter<IntValue>)Parameters[BestSolutionLengthParameterName]; }
102    }
103    public ILookupParameter<IntValue> BestSolutionHeightParameter {
104      get { return (ILookupParameter<IntValue>)Parameters[BestSolutionHeightParameterName]; }
105    }
106    public ILookupParameter<IntValue> BestSolutionVariablesParameter {
107      get { return (ILookupParameter<IntValue>)Parameters[BestSolutionVariablesParameterName]; }
108    }
109    public ILookupParameter<DoubleValue> BestSolutionTrainingRSquaredParameter {
110      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionTrainingRSquaredParameterName]; }
111    }
112    public ILookupParameter<DoubleValue> BestSolutionTestRSquaredParameter {
113      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionTestRSquaredParameterName]; }
114    }
115    public ILookupParameter<DoubleValue> BestSolutionTrainingMseParameter {
116      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionTrainingMseParameterName]; }
117    }
118    public ILookupParameter<DoubleValue> BestSolutionTestMseParameter {
119      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionTestMseParameterName]; }
120    }
121    public ILookupParameter<DoubleValue> BestSolutionTrainingRelativeErrorParameter {
122      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionTrainingRelativeErrorParameterName]; }
123    }
124    public ILookupParameter<DoubleValue> BestSolutionTestRelativeErrorParameter {
125      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionTestRelativeErrorParameterName]; }
126    }
127    public ILookupParameter<ResultCollection> ResultsParameter {
128      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
129    }
130    #endregion
131    #region properties
132    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTree {
133      get { return SymbolicExpressionTreeParameter.ActualValue; }
134    }
135    public ItemArray<DoubleValue> Quality {
136      get { return QualityParameter.ActualValue; }
137    }
138    public BoolValue Maximization {
139      get { return MaximizationParameter.ActualValue; }
140    }
141    public BoolValue CalculateSolutionComplexity {
142      get { return CalculateSolutionComplexityParameter.Value; }
143      set { CalculateSolutionComplexityParameter.Value = value; }
144    }
145    public BoolValue CalculateSolutionAccuracy {
146      get { return CalculateSolutionAccuracyParameter.Value; }
147      set { CalculateSolutionAccuracyParameter.Value = value; }
148    }
149    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
150      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
151    }
152    public DataAnalysisProblemData ProblemData {
153      get { return ProblemDataParameter.ActualValue; }
154    }
155    public DoubleValue UpperEstimationLimit {
156      get { return UpperEstimationLimitParameter.ActualValue; }
157    }
158    public DoubleValue LowerEstimationLimit {
159      get { return LowerEstimationLimitParameter.ActualValue; }
160    }
161    public ResultCollection Results {
162      get { return ResultsParameter.ActualValue; }
163    }
164    public SymbolicRegressionSolution BestSolution {
165      get { return BestSolutionParameter.ActualValue; }
166      set { BestSolutionParameter.ActualValue = value; }
167    }
168    public DoubleValue BestSolutionQuality {
169      get { return BestSolutionQualityParameter.ActualValue; }
170      set { BestSolutionQualityParameter.ActualValue = value; }
171    }
172    public IntValue BestSolutionLength {
173      get { return BestSolutionLengthParameter.ActualValue; }
174      set { BestSolutionLengthParameter.ActualValue = value; }
175    }
176    public IntValue BestSolutionHeight {
177      get { return BestSolutionHeightParameter.ActualValue; }
178      set { BestSolutionHeightParameter.ActualValue = value; }
179    }
180    public IntValue BestSolutionVariables {
181      get { return BestSolutionVariablesParameter.ActualValue; }
182      set { BestSolutionVariablesParameter.ActualValue = value; }
183    }
184    public DoubleValue BestSolutionTrainingRSquared {
185      get { return BestSolutionTrainingRSquaredParameter.ActualValue; }
186      set { BestSolutionTrainingRSquaredParameter.ActualValue = value; }
187    }
188    public DoubleValue BestSolutionTestRSquared {
189      get { return BestSolutionTestRSquaredParameter.ActualValue; }
190      set { BestSolutionTestRSquaredParameter.ActualValue = value; }
191    }
192    public DoubleValue BestSolutionTrainingMse {
193      get { return BestSolutionTrainingMseParameter.ActualValue; }
194      set { BestSolutionTrainingMseParameter.ActualValue = value; }
195    }
196    public DoubleValue BestSolutionTestMse {
197      get { return BestSolutionTestMseParameter.ActualValue; }
198      set { BestSolutionTestMseParameter.ActualValue = value; }
199    }
200    public DoubleValue BestSolutionTrainingRelativeError {
201      get { return BestSolutionTrainingRelativeErrorParameter.ActualValue; }
202      set { BestSolutionTrainingRelativeErrorParameter.ActualValue = value; }
203    }
204    public DoubleValue BestSolutionTestRelativeError {
205      get { return BestSolutionTestRelativeErrorParameter.ActualValue; }
206      set { BestSolutionTestRelativeErrorParameter.ActualValue = value; }
207    }
208    #endregion
209
210    [StorableConstructor]
211    private TrainingBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base(deserializing) { }
212    private TrainingBestScaledSymbolicRegressionSolutionAnalyzer(TrainingBestScaledSymbolicRegressionSolutionAnalyzer original, Cloner cloner) : base(original, cloner) { }
213    public TrainingBestScaledSymbolicRegressionSolutionAnalyzer()
214      : base() {
215      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
216      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
217      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The qualities of the symbolic expression trees to analyze."));
218      Parameters.Add(new ValueParameter<BoolValue>(CalculateSolutionComplexityParameterName, "Determines if the length and height of the training best solution should be calculated.", new BoolValue(false)));
219      Parameters.Add(new ValueParameter<BoolValue>(CalculateSolutionAccuracyParameterName, "Determines if the accuracy of the training best solution on the training and test set should be calculated.", new BoolValue(false)));
220      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
221      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
222      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
223      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
224      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestSolutionParameterName, "The best symbolic regression solution."));
225      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
226      Parameters.Add(new LookupParameter<IntValue>(BestSolutionLengthParameterName, "The length of the best symbolic regression solution."));
227      Parameters.Add(new LookupParameter<IntValue>(BestSolutionHeightParameterName, "The height of the best symbolic regression solution."));
228      Parameters.Add(new LookupParameter<IntValue>(BestSolutionVariablesParameterName, "The number of variables used by the best symbolic regression solution."));
229      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionTrainingRSquaredParameterName, "The R² value on the training set of the best symbolic regression solution."));
230      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionTestRSquaredParameterName, "The R² value on the test set of the best symbolic regression solution."));
231      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionTrainingMseParameterName, "The mean squared error on the training set of the best symbolic regression solution."));
232      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionTestMseParameterName, "The mean squared error value on the test set of the best symbolic regression solution."));
233      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionTrainingRelativeErrorParameterName, "The relative error on the training set of the best symbolic regression solution."));
234      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionTestRelativeErrorParameterName, "The relative error value on the test set of the best symbolic regression solution."));
235      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
236    }
237
238    public override IDeepCloneable Clone(Cloner cloner) {
239      return new TrainingBestScaledSymbolicRegressionSolutionAnalyzer(this, cloner);
240    }
241
242    [StorableHook(HookType.AfterDeserialization)]
243    private void AfterDeserialization() { }
244
245    public override IOperation Apply() {
246      #region find best tree
247      double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
248      SymbolicExpressionTree bestTree = null;
249      SymbolicExpressionTree[] tree = SymbolicExpressionTree.ToArray();
250      double[] quality = Quality.Select(x => x.Value).ToArray();
251      for (int i = 0; i < tree.Length; i++) {
252        if ((Maximization.Value && quality[i] > bestQuality) ||
253            (!Maximization.Value && quality[i] < bestQuality)) {
254          bestQuality = quality[i];
255          bestTree = tree[i];
256        }
257      }
258      #endregion
259
260      #region update best solution
261      // if the best tree is better than the current best solution => update
262      bool newBest =
263        BestSolutionQuality == null ||
264        (Maximization.Value && bestQuality > BestSolutionQuality.Value) ||
265        (!Maximization.Value && bestQuality < BestSolutionQuality.Value);
266      if (newBest) {
267        double lowerEstimationLimit = LowerEstimationLimit.Value;
268        double upperEstimationLimit = UpperEstimationLimit.Value;
269        string targetVariable = ProblemData.TargetVariable.Value;
270
271        // calculate scaling parameters and only for the best tree using the full training set
272        double alpha, beta;
273        SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree,
274          lowerEstimationLimit, upperEstimationLimit,
275          ProblemData.Dataset, targetVariable,
276          ProblemData.TrainingIndizes, out beta, out alpha);
277
278        // scale tree for solution
279        var scaledTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta);
280        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
281          scaledTree);
282        var solution = new SymbolicRegressionSolution((DataAnalysisProblemData)ProblemData.Clone(), model, lowerEstimationLimit, upperEstimationLimit);
283        solution.Name = BestSolutionParameterName;
284        solution.Description = "Best solution on training partition found over the whole run.";
285
286        BestSolution = solution;
287        BestSolutionQuality = new DoubleValue(bestQuality);
288
289        if (CalculateSolutionComplexity.Value) {
290          BestSolutionLength = new IntValue(solution.Model.SymbolicExpressionTree.Size);
291          BestSolutionHeight = new IntValue(solution.Model.SymbolicExpressionTree.Height);
292          BestSolutionVariables = new IntValue(solution.Model.InputVariables.Count());
293          if (!Results.ContainsKey(BestSolutionLengthParameterName)) {
294            Results.Add(new Result(BestSolutionLengthParameterName, "Length of the best solution on the training set.", BestSolutionLength));
295            Results.Add(new Result(BestSolutionHeightParameterName, "Height of the best solution on the training set.", BestSolutionHeight));
296            Results.Add(new Result(BestSolutionVariablesParameterName, "Number of variables used by the best solution on the training set.", BestSolutionVariables));
297          } else {
298            Results[BestSolutionLengthParameterName].Value = BestSolutionLength;
299            Results[BestSolutionHeightParameterName].Value = BestSolutionHeight;
300            Results[BestSolutionVariablesParameterName].Value = BestSolutionVariables;
301          }
302        }
303
304        if (CalculateSolutionAccuracy.Value) {
305          #region update R2,MSE, Rel Error
306          IEnumerable<double> trainingValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable.Value, ProblemData.TrainingIndizes);
307          IEnumerable<double> testValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable.Value, ProblemData.TestIndizes);
308          OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator();
309          OnlineMeanAbsolutePercentageErrorEvaluator relErrorEvaluator = new OnlineMeanAbsolutePercentageErrorEvaluator();
310          OnlinePearsonsRSquaredEvaluator r2Evaluator = new OnlinePearsonsRSquaredEvaluator();
311
312          #region training
313          var originalEnumerator = trainingValues.GetEnumerator();
314          var estimatedEnumerator = solution.EstimatedTrainingValues.GetEnumerator();
315          while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
316            mseEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
317            r2Evaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
318            relErrorEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
319          }
320          double trainingR2 = r2Evaluator.RSquared;
321          double trainingMse = mseEvaluator.MeanSquaredError;
322          double trainingRelError = relErrorEvaluator.MeanAbsolutePercentageError;
323          #endregion
324
325          mseEvaluator.Reset();
326          relErrorEvaluator.Reset();
327          r2Evaluator.Reset();
328
329          #region test
330          originalEnumerator = testValues.GetEnumerator();
331          estimatedEnumerator = solution.EstimatedTestValues.GetEnumerator();
332          while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
333            mseEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
334            r2Evaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
335            relErrorEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
336          }
337          double testR2 = r2Evaluator.RSquared;
338          double testMse = mseEvaluator.MeanSquaredError;
339          double testRelError = relErrorEvaluator.MeanAbsolutePercentageError;
340          #endregion
341          BestSolutionTrainingRSquared = new DoubleValue(trainingR2);
342          BestSolutionTestRSquared = new DoubleValue(testR2);
343          BestSolutionTrainingMse = new DoubleValue(trainingMse);
344          BestSolutionTestMse = new DoubleValue(testMse);
345          BestSolutionTrainingRelativeError = new DoubleValue(trainingRelError);
346          BestSolutionTestRelativeError = new DoubleValue(testRelError);
347
348          if (!Results.ContainsKey(BestSolutionTrainingRSquaredParameterName)) {
349            Results.Add(new Result(BestSolutionTrainingRSquaredParameterName, BestSolutionTrainingRSquared));
350            Results.Add(new Result(BestSolutionTestRSquaredParameterName, BestSolutionTestRSquared));
351            Results.Add(new Result(BestSolutionTrainingMseParameterName, BestSolutionTrainingMse));
352            Results.Add(new Result(BestSolutionTestMseParameterName, BestSolutionTestMse));
353            Results.Add(new Result(BestSolutionTrainingRelativeErrorParameterName, BestSolutionTrainingRelativeError));
354            Results.Add(new Result(BestSolutionTestRelativeErrorParameterName, BestSolutionTestRelativeError));
355          } else {
356            Results[BestSolutionTrainingRSquaredParameterName].Value = BestSolutionTrainingRSquared;
357            Results[BestSolutionTestRSquaredParameterName].Value = BestSolutionTestRSquared;
358            Results[BestSolutionTrainingMseParameterName].Value = BestSolutionTrainingMse;
359            Results[BestSolutionTestMseParameterName].Value = BestSolutionTestMse;
360            Results[BestSolutionTrainingRelativeErrorParameterName].Value = BestSolutionTrainingRelativeError;
361            Results[BestSolutionTestRelativeErrorParameterName].Value = BestSolutionTestRelativeError;
362          }
363          #endregion
364        }
365
366        if (!Results.ContainsKey(BestSolutionQualityParameterName)) {
367          Results.Add(new Result(BestSolutionQualityParameterName, BestSolutionQuality));
368          Results.Add(new Result(BestSolutionParameterName, BestSolution));
369        } else {
370          Results[BestSolutionQualityParameterName].Value = BestSolutionQuality;
371          Results[BestSolutionParameterName].Value = BestSolution;
372        }
373      }
374      #endregion
375      return base.Apply();
376    }
377  }
378}
Note: See TracBrowser for help on using the repository browser.