Free cookie consent management tool by TermsFeed Policy Generator

source: branches/ParameterBinding/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Analyzers/RegressionSolutionAnalyzer.cs @ 10015

Last change on this file since 10015 was 4722, checked in by swagner, 14 years ago

Merged cloning refactoring branch back into trunk (#922)

File size: 10.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Problems.DataAnalysis.Evaluators;
31
32namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
33  [StorableClass]
34  public abstract class RegressionSolutionAnalyzer : SingleSuccessorOperator {
35    private const string ProblemDataParameterName = "ProblemData";
36    private const string QualityParameterName = "Quality";
37    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
38    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
39    private const string BestSolutionQualityParameterName = "BestSolutionQuality";
40    private const string GenerationsParameterName = "Generations";
41    private const string ResultsParameterName = "Results";
42    private const string BestSolutionResultName = "Best solution (on validiation set)";
43    private const string BestSolutionTrainingRSquared = "Best solution R² (training)";
44    private const string BestSolutionTestRSquared = "Best solution R² (test)";
45    private const string BestSolutionTrainingMse = "Best solution mean squared error (training)";
46    private const string BestSolutionTestMse = "Best solution mean squared error (test)";
47    private const string BestSolutionTrainingRelativeError = "Best solution average relative error (training)";
48    private const string BestSolutionTestRelativeError = "Best solution average relative error (test)";
49    private const string BestSolutionGeneration = "Best solution generation";
50
51    #region parameter properties
52    public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
53      get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
54    }
55    public ScopeTreeLookupParameter<DoubleValue> QualityParameter {
56      get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
57    }
58    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
59      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
60    }
61    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
62      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
63    }
64    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
65      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
66    }
67    public ILookupParameter<ResultCollection> ResultsParameter {
68      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
69    }
70    public ILookupParameter<IntValue> GenerationsParameter {
71      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
72    }
73    #endregion
74    #region properties
75    public DoubleValue UpperEstimationLimit {
76      get { return UpperEstimationLimitParameter.ActualValue; }
77    }
78    public DoubleValue LowerEstimationLimit {
79      get { return LowerEstimationLimitParameter.ActualValue; }
80    }
81    public ItemArray<DoubleValue> Quality {
82      get { return QualityParameter.ActualValue; }
83    }
84    public ResultCollection Results {
85      get { return ResultsParameter.ActualValue; }
86    }
87    public DataAnalysisProblemData ProblemData {
88      get { return ProblemDataParameter.ActualValue; }
89    }
90    #endregion
91
92
93    [StorableConstructor]
94    protected RegressionSolutionAnalyzer(bool deserializing) : base(deserializing) { }
95    protected RegressionSolutionAnalyzer(RegressionSolutionAnalyzer original, Cloner cloner) : base(original, cloner) { }
96    public RegressionSolutionAnalyzer()
97      : base() {
98      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
99      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
100      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
101      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The qualities of the symbolic regression trees which should be analyzed."));
102      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best regression solution."));
103      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
104      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
105    }
106
107    [StorableHook(HookType.AfterDeserialization)]
108    private void AfterDeserialization() {
109      // backwards compatibility
110      if (!Parameters.ContainsKey(GenerationsParameterName)) {
111        Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
112      }
113    }
114
115    public override IOperation Apply() {
116      DoubleValue prevBestSolutionQuality = BestSolutionQualityParameter.ActualValue;
117      var bestSolution = UpdateBestSolution();
118      if (prevBestSolutionQuality == null || prevBestSolutionQuality.Value > BestSolutionQualityParameter.ActualValue.Value) {
119        RegressionSolutionAnalyzer.UpdateBestSolutionResults(bestSolution, ProblemData, Results, GenerationsParameter.ActualValue);
120      }
121
122      return base.Apply();
123    }
124
125    public static void UpdateBestSolutionResults(DataAnalysisSolution bestSolution, DataAnalysisProblemData problemData, ResultCollection results, IntValue CurrentGeneration) {
126      var solution = bestSolution;
127      #region update R2,MSE, Rel Error
128      IEnumerable<double> trainingValues = problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable.Value, problemData.TrainingIndizes);
129      IEnumerable<double> testValues = problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable.Value, problemData.TestIndizes);
130      OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator();
131      OnlineMeanAbsolutePercentageErrorEvaluator relErrorEvaluator = new OnlineMeanAbsolutePercentageErrorEvaluator();
132      OnlinePearsonsRSquaredEvaluator r2Evaluator = new OnlinePearsonsRSquaredEvaluator();
133
134      #region training
135      var originalEnumerator = trainingValues.GetEnumerator();
136      var estimatedEnumerator = solution.EstimatedTrainingValues.GetEnumerator();
137      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
138        mseEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
139        r2Evaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
140        relErrorEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
141      }
142      double trainingR2 = r2Evaluator.RSquared;
143      double trainingMse = mseEvaluator.MeanSquaredError;
144      double trainingRelError = relErrorEvaluator.MeanAbsolutePercentageError;
145      #endregion
146
147      mseEvaluator.Reset();
148      relErrorEvaluator.Reset();
149      r2Evaluator.Reset();
150
151      #region test
152      originalEnumerator = testValues.GetEnumerator();
153      estimatedEnumerator = solution.EstimatedTestValues.GetEnumerator();
154      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
155        mseEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
156        r2Evaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
157        relErrorEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
158      }
159      double testR2 = r2Evaluator.RSquared;
160      double testMse = mseEvaluator.MeanSquaredError;
161      double testRelError = relErrorEvaluator.MeanAbsolutePercentageError;
162      #endregion
163
164      if (results.ContainsKey(BestSolutionResultName)) {
165        results[BestSolutionResultName].Value = solution;
166        results[BestSolutionTrainingRSquared].Value = new DoubleValue(trainingR2);
167        results[BestSolutionTestRSquared].Value = new DoubleValue(testR2);
168        results[BestSolutionTrainingMse].Value = new DoubleValue(trainingMse);
169        results[BestSolutionTestMse].Value = new DoubleValue(testMse);
170        results[BestSolutionTrainingRelativeError].Value = new DoubleValue(trainingRelError);
171        results[BestSolutionTestRelativeError].Value = new DoubleValue(testRelError);
172        if (CurrentGeneration != null) // this check is needed because linear regression solutions do not have a generations parameter
173          results[BestSolutionGeneration].Value = new IntValue(CurrentGeneration.Value);
174      } else {
175        results.Add(new Result(BestSolutionResultName, solution));
176        results.Add(new Result(BestSolutionTrainingRSquared, new DoubleValue(trainingR2)));
177        results.Add(new Result(BestSolutionTestRSquared, new DoubleValue(testR2)));
178        results.Add(new Result(BestSolutionTrainingMse, new DoubleValue(trainingMse)));
179        results.Add(new Result(BestSolutionTestMse, new DoubleValue(testMse)));
180        results.Add(new Result(BestSolutionTrainingRelativeError, new DoubleValue(trainingRelError)));
181        results.Add(new Result(BestSolutionTestRelativeError, new DoubleValue(testRelError)));
182        if (CurrentGeneration != null)
183          results.Add(new Result(BestSolutionGeneration, new IntValue(CurrentGeneration.Value)));
184      }
185      #endregion
186    }
187
188    protected abstract DataAnalysisSolution UpdateBestSolution();
189  }
190}
Note: See TracBrowser for help on using the repository browser.