Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Analyzers/RegressionSolutionAnalyzer.cs @ 3997

Last change on this file since 3997 was 3997, checked in by gkronber, 14 years ago

Minor improvements concerning efficiency of symbolic expression tree encoding data structures and operators. #1073

File size: 10.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Linq;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
32using HeuristicLab.Problems.DataAnalysis.Symbolic;
33using System.Collections.Generic;
34using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
35using HeuristicLab.Problems.DataAnalysis;
36using HeuristicLab.Problems.DataAnalysis.Evaluators;
37using System;
38
39namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
40  [StorableClass]
41  public abstract class RegressionSolutionAnalyzer : SingleSuccessorOperator {
42    private const string ProblemDataParameterName = "ProblemData";
43    private const string QualityParameterName = "Quality";
44    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
45    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
46    private const string BestSolutionQualityParameterName = "BestSolutionQuality";
47    private const string GenerationsParameterName = "Generations";
48    private const string ResultsParameterName = "Results";
49    private const string BestSolutionResultName = "Best solution (on validiation set)";
50    private const string BestSolutionTrainingRSquared = "Best solution R² (training)";
51    private const string BestSolutionTestRSquared = "Best solution R² (test)";
52    private const string BestSolutionTrainingMse = "Best solution mean squared error (training)";
53    private const string BestSolutionTestMse = "Best solution mean squared error (test)";
54    private const string BestSolutionTrainingRelativeError = "Best solution average relative error (training)";
55    private const string BestSolutionTestRelativeError = "Best solution average relative error (test)";
56    private const string BestSolutionGeneration = "Best solution generation";
57
58    #region parameter properties
59    public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
60      get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
61    }
62    public ScopeTreeLookupParameter<DoubleValue> QualityParameter {
63      get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
64    }
65    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
66      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
67    }
68    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
69      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
70    }
71    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
72      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
73    }
74    public ILookupParameter<ResultCollection> ResultsParameter {
75      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
76    }
77    public ILookupParameter<IntValue> GenerationsParameter {
78      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
79    }
80    #endregion
81    #region properties
82    public DoubleValue UpperEstimationLimit {
83      get { return UpperEstimationLimitParameter.ActualValue; }
84    }
85    public DoubleValue LowerEstimationLimit {
86      get { return LowerEstimationLimitParameter.ActualValue; }
87    }
88    public ItemArray<DoubleValue> Quality {
89      get { return QualityParameter.ActualValue; }
90    }
91    public ResultCollection Results {
92      get { return ResultsParameter.ActualValue; }
93    }
94    public DataAnalysisProblemData ProblemData {
95      get { return ProblemDataParameter.ActualValue; }
96    }
97    #endregion
98
99    public RegressionSolutionAnalyzer()
100      : base() {
101      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
102      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
103      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
104      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The qualities of the symbolic regression trees which should be analyzed."));
105      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best regression solution."));
106      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
107      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
108    }
109
110    [StorableHook(HookType.AfterDeserialization)]
111    public void Initialize() {
112      // backwards compatibility
113      if (!Parameters.ContainsKey(GenerationsParameterName)) {
114        Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
115      }
116    }
117
118    public override IOperation Apply() {
119      DoubleValue prevBestSolutionQuality = BestSolutionQualityParameter.ActualValue;
120      var bestSolution = UpdateBestSolution();
121      if (prevBestSolutionQuality == null || prevBestSolutionQuality.Value > BestSolutionQualityParameter.ActualValue.Value) {
122        RegressionSolutionAnalyzer.UpdateBestSolutionResults(bestSolution, ProblemData, Results, GenerationsParameter.ActualValue);
123      }
124
125      return base.Apply();
126    }
127
128    public static void UpdateBestSolutionResults(DataAnalysisSolution bestSolution, DataAnalysisProblemData problemData, ResultCollection results, IntValue CurrentGeneration) {
129      var solution = bestSolution;
130      #region update R2,MSE, Rel Error
131      IEnumerable<double> trainingValues = problemData.Dataset.GetEnumeratedVariableValues(
132        problemData.TargetVariable.Value,
133        problemData.TrainingSamplesStart.Value,
134        problemData.TrainingSamplesEnd.Value);
135      IEnumerable<double> testValues = problemData.Dataset.GetEnumeratedVariableValues(
136        problemData.TargetVariable.Value,
137        problemData.TestSamplesStart.Value,
138        problemData.TestSamplesEnd.Value);
139      OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator();
140      OnlineMeanAbsolutePercentageErrorEvaluator relErrorEvaluator = new OnlineMeanAbsolutePercentageErrorEvaluator();
141      OnlinePearsonsRSquaredEvaluator r2Evaluator = new OnlinePearsonsRSquaredEvaluator();
142      #region training
143      var originalEnumerator = trainingValues.GetEnumerator();
144      var estimatedEnumerator = solution.EstimatedTrainingValues.GetEnumerator();
145      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
146        mseEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
147        r2Evaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
148        relErrorEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
149      }
150      double trainingR2 = r2Evaluator.RSquared;
151      double trainingMse = mseEvaluator.MeanSquaredError;
152      double trainingRelError = relErrorEvaluator.MeanAbsolutePercentageError;
153      #endregion
154      mseEvaluator.Reset();
155      relErrorEvaluator.Reset();
156      r2Evaluator.Reset();
157      #region test
158      originalEnumerator = testValues.GetEnumerator();
159      estimatedEnumerator = solution.EstimatedTestValues.GetEnumerator();
160      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
161        mseEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
162        r2Evaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
163        relErrorEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
164      }
165      double testR2 = r2Evaluator.RSquared;
166      double testMse = mseEvaluator.MeanSquaredError;
167      double testRelError = relErrorEvaluator.MeanAbsolutePercentageError;
168      #endregion
169      if (results.ContainsKey(BestSolutionResultName)) {
170        results[BestSolutionResultName].Value = solution;
171        results[BestSolutionTrainingRSquared].Value = new DoubleValue(trainingR2);
172        results[BestSolutionTestRSquared].Value = new DoubleValue(testR2);
173        results[BestSolutionTrainingMse].Value = new DoubleValue(trainingMse);
174        results[BestSolutionTestMse].Value = new DoubleValue(testMse);
175        results[BestSolutionTrainingRelativeError].Value = new DoubleValue(trainingRelError);
176        results[BestSolutionTestRelativeError].Value = new DoubleValue(testRelError);
177        if (CurrentGeneration != null) // this check is needed because linear regression solutions do not have a generations parameter
178          results[BestSolutionGeneration].Value = new IntValue(CurrentGeneration.Value);
179      } else {
180        results.Add(new Result(BestSolutionResultName, solution));
181        results.Add(new Result(BestSolutionTrainingRSquared, new DoubleValue(trainingR2)));
182        results.Add(new Result(BestSolutionTestRSquared, new DoubleValue(testR2)));
183        results.Add(new Result(BestSolutionTrainingMse, new DoubleValue(trainingMse)));
184        results.Add(new Result(BestSolutionTestMse, new DoubleValue(testMse)));
185        results.Add(new Result(BestSolutionTrainingRelativeError, new DoubleValue(trainingRelError)));
186        results.Add(new Result(BestSolutionTestRelativeError, new DoubleValue(testRelError)));
187        if (CurrentGeneration != null)
188          results.Add(new Result(BestSolutionGeneration, new IntValue(CurrentGeneration.Value)));
189      }
190      #endregion
191    }
192
193    protected abstract DataAnalysisSolution UpdateBestSolution();
194  }
195}
Note: See TracBrowser for help on using the repository browser.