Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Analyzers/RegressionSolutionAnalyzer.cs @ 3923

Last change on this file since 3923 was 3923, checked in by mkommend, 14 years ago

fixed bug in RegressionSolutionAnalyzer - linear regression does not contain a parameter generations (ticket #938)

File size: 9.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Linq;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
32using HeuristicLab.Problems.DataAnalysis.Symbolic;
33using System.Collections.Generic;
34using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
35using HeuristicLab.Problems.DataAnalysis;
36using HeuristicLab.Problems.DataAnalysis.Evaluators;
37
38namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
39  [StorableClass]
40  public abstract class RegressionSolutionAnalyzer : SingleSuccessorOperator {
41    private const string ProblemDataParameterName = "ProblemData";
42    private const string QualityParameterName = "Quality";
43    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
44    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
45    private const string BestSolutionQualityParameterName = "BestSolutionQuality";
46    private const string GenerationsParameterName = "Generations";
47    private const string ResultsParameterName = "Results";
48    private const string BestSolutionResultName = "Best solution (on validiation set)";
49    private const string BestSolutionTrainingRSquared = "Best solution R² (training)";
50    private const string BestSolutionTestRSquared = "Best solution R² (test)";
51    private const string BestSolutionTrainingMse = "Best solution mean squared error (training)";
52    private const string BestSolutionTestMse = "Best solution mean squared error (test)";
53    private const string BestSolutionTrainingRelativeError = "Best solution average relative error (training)";
54    private const string BestSolutionTestRelativeError = "Best solution average relative error (test)";
55    private const string BestSolutionGeneration = "Best solution generation";
56
57    #region parameter properties
58    public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
59      get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
60    }
61    public ScopeTreeLookupParameter<DoubleValue> QualityParameter {
62      get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
63    }
64    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
65      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
66    }
67    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
68      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
69    }
70    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
71      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
72    }
73    public ILookupParameter<ResultCollection> ResultsParameter {
74      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
75    }
76    public ILookupParameter<IntValue> GenerationsParameter {
77      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
78    }
79    #endregion
80    #region properties
81    public DoubleValue UpperEstimationLimit {
82      get { return UpperEstimationLimitParameter.ActualValue; }
83    }
84    public DoubleValue LowerEstimationLimit {
85      get { return LowerEstimationLimitParameter.ActualValue; }
86    }
87    public ItemArray<DoubleValue> Quality {
88      get { return QualityParameter.ActualValue; }
89    }
90    public ResultCollection Results {
91      get { return ResultsParameter.ActualValue; }
92    }
93    public DataAnalysisProblemData ProblemData {
94      get { return ProblemDataParameter.ActualValue; }
95    }
96    #endregion
97
98    public RegressionSolutionAnalyzer()
99      : base() {
100      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
101      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
102      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
103      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The qualities of the symbolic regression trees which should be analyzed."));
104      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best regression solution."));
105      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
106      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
107    }
108
109    [StorableHook(HookType.AfterDeserialization)]
110    public void Initialize() {
111      // backwards compatibility
112      if (!Parameters.ContainsKey(GenerationsParameterName)) {
113        Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
114      }
115    }
116
117    public override IOperation Apply() {
118      DoubleValue prevBestSolutionQuality = BestSolutionQualityParameter.ActualValue;
119      var bestSolution = UpdateBestSolution();
120      if (prevBestSolutionQuality == null || prevBestSolutionQuality.Value > BestSolutionQualityParameter.ActualValue.Value) {
121        UpdateBestSolutionResults(bestSolution);
122      }
123
124      return base.Apply();
125    }
126    private void UpdateBestSolutionResults(DataAnalysisSolution bestSolution) {
127      var solution = bestSolution;
128      #region update R2,MSE, Rel Error
129      double[] trainingValues = ProblemData.Dataset.GetVariableValues(
130        ProblemData.TargetVariable.Value,
131        ProblemData.TrainingSamplesStart.Value,
132        ProblemData.TrainingSamplesEnd.Value);
133      double[] testValues = ProblemData.Dataset.GetVariableValues(
134        ProblemData.TargetVariable.Value,
135        ProblemData.TestSamplesStart.Value,
136        ProblemData.TestSamplesEnd.Value);
137      double trainingR2 = SimpleRSquaredEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
138      double testR2 = SimpleRSquaredEvaluator.Calculate(testValues, solution.EstimatedTestValues);
139      double trainingMse = SimpleMSEEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
140      double testMse = SimpleMSEEvaluator.Calculate(testValues, solution.EstimatedTestValues);
141      double trainingRelError = SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
142      double testRelError = SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(testValues, solution.EstimatedTestValues);
143      if (Results.ContainsKey(BestSolutionResultName)) {
144        Results[BestSolutionResultName].Value = solution;
145        Results[BestSolutionTrainingRSquared].Value = new DoubleValue(trainingR2);
146        Results[BestSolutionTestRSquared].Value = new DoubleValue(testR2);
147        Results[BestSolutionTrainingMse].Value = new DoubleValue(trainingMse);
148        Results[BestSolutionTestMse].Value = new DoubleValue(testMse);
149        Results[BestSolutionTrainingRelativeError].Value = new DoubleValue(trainingRelError);
150        Results[BestSolutionTestRelativeError].Value = new DoubleValue(testRelError);
151        if (GenerationsParameter.ActualValue != null) // this check is needed because linear regression solutions do not have a generations parameter
152          Results[BestSolutionGeneration].Value = new IntValue(GenerationsParameter.ActualValue.Value);
153      } else {
154        Results.Add(new Result(BestSolutionResultName, solution));
155        Results.Add(new Result(BestSolutionTrainingRSquared, new DoubleValue(trainingR2)));
156        Results.Add(new Result(BestSolutionTestRSquared, new DoubleValue(testR2)));
157        Results.Add(new Result(BestSolutionTrainingMse, new DoubleValue(trainingMse)));
158        Results.Add(new Result(BestSolutionTestMse, new DoubleValue(testMse)));
159        Results.Add(new Result(BestSolutionTrainingRelativeError, new DoubleValue(trainingRelError)));
160        Results.Add(new Result(BestSolutionTestRelativeError, new DoubleValue(testRelError)));
161        if (GenerationsParameter.ActualValue != null)
162          Results.Add(new Result(BestSolutionGeneration, new IntValue(GenerationsParameter.ActualValue.Value)));
163      }
164      #endregion
165    }
166
167    protected abstract DataAnalysisSolution UpdateBestSolution();
168  }
169}
Note: See TracBrowser for help on using the repository browser.