Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/PopulationSymbolicRegressionModelQualityAnalyzer.cs @ 3666

Last change on this file since 3666 was 3666, checked in by gkronber, 14 years ago

Implemented operators to analyze and track training/test mse/R²/rel. Error over time for symbolic regression problems. #999

File size: 21.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Linq;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
32using HeuristicLab.Problems.DataAnalysis.Symbolic;
33using System.Collections.Generic;
34using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
35using HeuristicLab.Problems.DataAnalysis;
36using HeuristicLab.Analysis;
37using HeuristicLab.Problems.DataAnalysis.Evaluators;
38using HeuristicLab.Optimization.Operators;
39
40namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
41  /// <summary>
42  /// "An operator for analyzing the quality of symbolic regression solutions symbolic expression tree encoding."
43  /// </summary>
44  [Item("PopulationSymbolicRegressionModelQualityAnalyzer", "An operator for analyzing the quality of symbolic regression solutions symbolic expression tree encoding.")]
45  [StorableClass]
46  public sealed class PopulationSymbolicRegressionModelQualityAnalyzer : AlgorithmOperator, ISymbolicRegressionSolutionPopulationAnalyzer {
47    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
48    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
49    private const string ProblemDataParameterName = "ProblemData";
50    private const string ResultsParameterName = "Results";
51
52    private const string TrainingMeanSquaredErrorQualityParameterName = "TrainingMeanSquaredError";
53    private const string MinTrainingMeanSquaredErrorQualityParameterName = "MinTrainingMeanSquaredError";
54    private const string MaxTrainingMeanSquaredErrorQualityParameterName = "MaxTrainingMeanSquaredError";
55    private const string AverageTrainingMeanSquaredErrorQualityParameterName = "AverageTrainingMeanSquaredError";
56
57    private const string TrainingAverageRelativeErrorQualityParameterName = "TrainingAverageRelativeError";
58    private const string MinTrainingAverageRelativeErrorQualityParameterName = "MinTrainingAverageRelativeError";
59    private const string MaxTrainingAverageRelativeErrorQualityParameterName = "MaxTrainingAverageRelativeError";
60    private const string AverageTrainingAverageRelativeErrorQualityParameterName = "AverageTrainingAverageRelativeError";
61
62    private const string TrainingRSquaredQualityParameterName = "TrainingRSquared";
63    private const string MinTrainingRSquaredQualityParameterName = "MinTrainingRSquared";
64    private const string MaxTrainingRSquaredQualityParameterName = "MaxTrainingRSquared";
65    private const string AverageTrainingRSquaredQualityParameterName = "AverageTrainingRSquared";
66
67    private const string TestMeanSquaredErrorQualityParameterName = "TestMeanSquaredError";
68    private const string MinTestMeanSquaredErrorQualityParameterName = "MinTestMeanSquaredError";
69    private const string MaxTestMeanSquaredErrorQualityParameterName = "MaxTestMeanSquaredError";
70    private const string AverageTestMeanSquaredErrorQualityParameterName = "AverageTestMeanSquaredError";
71
72    private const string TestAverageRelativeErrorQualityParameterName = "TestAverageRelativeError";
73    private const string MinTestAverageRelativeErrorQualityParameterName = "MinTestAverageRelativeError";
74    private const string MaxTestAverageRelativeErrorQualityParameterName = "MaxTestAverageRelativeError";
75    private const string AverageTestAverageRelativeErrorQualityParameterName = "AverageTestAverageRelativeError";
76
77    private const string TestRSquaredQualityParameterName = "TestRSquared";
78    private const string MinTestRSquaredQualityParameterName = "MinTestRSquared";
79    private const string MaxTestRSquaredQualityParameterName = "MaxTestRSquared";
80    private const string AverageTestRSquaredQualityParameterName = "AverageTestRSquared";
81
82    private const string RSquaredValuesParameterName = "R-squared Values";
83    private const string MeanSquaredErrorValuesParameterName = "Mean Squared Error Values";
84    private const string RelativeErrorValuesParameterName = "Average Relative Error Values";
85
86    private const string TrainingSamplesStartParameterName = "TrainingSamplesStart";
87    private const string TrainingSamplesEndParameterName = "TrainingSamplesEnd";
88    private const string TestSamplesStartParameterName = "TestSamplesStart";
89    private const string TestSamplesEndParameterName = "TestSamplesEnd";
90    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
91    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
92
93    #region parameter properties
94    public ILookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
95      get { return (ILookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
96    }
97    public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
98      get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicExpressionTreeParameterName]; }
99    }
100    public ILookupParameter<DataAnalysisProblemData> ProblemDataParameter {
101      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
102    }
103    public ILookupParameter<ResultCollection> ResultsParameter {
104      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
105    }
106    public IValueLookupParameter<IntValue> TrainingSamplesStartParameter {
107      get { return (IValueLookupParameter<IntValue>)Parameters[TrainingSamplesStartParameterName]; }
108    }
109    public IValueLookupParameter<IntValue> TrainingSamplesEndParameter {
110      get { return (IValueLookupParameter<IntValue>)Parameters[TrainingSamplesEndParameterName]; }
111    }
112    public IValueLookupParameter<IntValue> TestSamplesStartParameter {
113      get { return (IValueLookupParameter<IntValue>)Parameters[TestSamplesStartParameterName]; }
114    }
115    public IValueLookupParameter<IntValue> TestSamplesEndParameter {
116      get { return (IValueLookupParameter<IntValue>)Parameters[TestSamplesEndParameterName]; }
117    }
118    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
119      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
120    }
121    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
122      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
123    }
124    #endregion
125
126    public PopulationSymbolicRegressionModelQualityAnalyzer()
127      : base() {
128      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used to calculate the output values of the symbolic expression tree."));
129      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
130      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data containing the input varaibles for the symbolic regression problem."));
131      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
132      Parameters.Add(new ValueLookupParameter<IntValue>(TrainingSamplesStartParameterName, "The first index of the training data set partition on which the model quality values should be calculated."));
133      Parameters.Add(new ValueLookupParameter<IntValue>(TrainingSamplesEndParameterName, "The last index of the training data set partition on which the model quality values should be calculated."));
134      Parameters.Add(new ValueLookupParameter<IntValue>(TestSamplesStartParameterName, "The first index of the test data set partition on which the model quality values should be calculated."));
135      Parameters.Add(new ValueLookupParameter<IntValue>(TestSamplesEndParameterName, "The last index of the test data set partition on which the model quality values should be calculated."));
136      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper limit that should be used as cut off value for the output values of symbolic expression trees."));
137      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower limit that should be used as cut off value for the output values of symbolic expression trees."));
138      Parameters.Add(new LookupParameter<DataTable>(MeanSquaredErrorValuesParameterName, "The data table to collect mean squared error values."));
139      Parameters.Add(new LookupParameter<DataTable>(RSquaredValuesParameterName, "The data table to collect R² correlation coefficient values."));
140      Parameters.Add(new LookupParameter<DataTable>(RelativeErrorValuesParameterName, "The data table to collect relative error values."));
141
142      #region operator initialization
143      // should be extended to calculate MSE, rel. Error and R² on the training (validation) and test set
144      UniformSubScopesProcessor subScopesProcessor = new UniformSubScopesProcessor();
145      SymbolicRegressionModelQualityCalculator trainingQualityCalculator = new SymbolicRegressionModelQualityCalculator();
146      SymbolicRegressionModelQualityCalculator testQualityCalculator = new SymbolicRegressionModelQualityCalculator();
147      MinAverageMaxValueCalculator minAvgMaxTrainingMseCalculator = new MinAverageMaxValueCalculator();
148      MinAverageMaxValueCalculator minAvgMaxTestMseCalculator = new MinAverageMaxValueCalculator();
149      MinAverageMaxValueCalculator minAvgMaxTrainingR2Calculator = new MinAverageMaxValueCalculator();
150      MinAverageMaxValueCalculator minAvgMaxTestR2Calculator = new MinAverageMaxValueCalculator();
151      MinAverageMaxValueCalculator minAvgMaxTrainingRelErrorCalculator = new MinAverageMaxValueCalculator();
152      MinAverageMaxValueCalculator minAvgMaxTestRelErrorCalculator = new MinAverageMaxValueCalculator();
153      DataTableValuesCollector mseDataTableValuesCollector = new DataTableValuesCollector();
154      DataTableValuesCollector r2DataTableValuesCollector = new DataTableValuesCollector();
155      DataTableValuesCollector relErrorDataTableValuesCollector = new DataTableValuesCollector();
156      ResultsCollector resultsCollector = new ResultsCollector();
157      #endregion
158
159      #region parameter wiring
160      trainingQualityCalculator.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.ActualName;
161      trainingQualityCalculator.ProblemDataParameter.ActualName = ProblemDataParameter.ActualName;
162      trainingQualityCalculator.SamplesStartParameter.ActualName = TrainingSamplesStartParameter.ActualName;
163      trainingQualityCalculator.SamplesEndParameter.ActualName = TrainingSamplesEndParameter.ActualName;
164      trainingQualityCalculator.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.ActualName;
165      trainingQualityCalculator.SymbolicExpressionTreeParameter.ActualName = SymbolicExpressionTreeParameter.ActualName;
166      trainingQualityCalculator.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.ActualName;
167      trainingQualityCalculator.AverageRelativeErrorQualityParameter.ActualName = TrainingAverageRelativeErrorQualityParameterName;
168      trainingQualityCalculator.MeanSquaredErrorQualityParameter.ActualName = TrainingMeanSquaredErrorQualityParameterName;
169      trainingQualityCalculator.RSquaredQualityParameter.ActualName = TrainingRSquaredQualityParameterName;
170
171      testQualityCalculator.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.ActualName;
172      testQualityCalculator.ProblemDataParameter.ActualName = ProblemDataParameter.ActualName;
173      testQualityCalculator.SamplesStartParameter.ActualName = TestSamplesStartParameter.ActualName;
174      testQualityCalculator.SamplesEndParameter.ActualName = TestSamplesEndParameter.ActualName;
175      testQualityCalculator.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.ActualName;
176      testQualityCalculator.SymbolicExpressionTreeParameter.ActualName = SymbolicExpressionTreeParameter.ActualName;
177      testQualityCalculator.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.ActualName;
178      testQualityCalculator.AverageRelativeErrorQualityParameter.ActualName = TestAverageRelativeErrorQualityParameterName;
179      testQualityCalculator.MeanSquaredErrorQualityParameter.ActualName = TestMeanSquaredErrorQualityParameterName;
180      testQualityCalculator.RSquaredQualityParameter.ActualName = TestRSquaredQualityParameterName;
181
182      #region training min/avg/max
183      minAvgMaxTrainingMseCalculator.AverageValueParameter.ActualName = AverageTrainingMeanSquaredErrorQualityParameterName;
184      minAvgMaxTrainingMseCalculator.MaxValueParameter.ActualName = MaxTrainingMeanSquaredErrorQualityParameterName;
185      minAvgMaxTrainingMseCalculator.MinValueParameter.ActualName = MinTrainingMeanSquaredErrorQualityParameterName;
186      minAvgMaxTrainingMseCalculator.ValueParameter.ActualName = TrainingMeanSquaredErrorQualityParameterName;
187
188      minAvgMaxTrainingR2Calculator.AverageValueParameter.ActualName = AverageTrainingRSquaredQualityParameterName;
189      minAvgMaxTrainingR2Calculator.MaxValueParameter.ActualName = MaxTrainingRSquaredQualityParameterName;
190      minAvgMaxTrainingR2Calculator.MinValueParameter.ActualName = MinTrainingRSquaredQualityParameterName;
191      minAvgMaxTrainingR2Calculator.ValueParameter.ActualName = TrainingRSquaredQualityParameterName;
192
193      minAvgMaxTrainingRelErrorCalculator.AverageValueParameter.ActualName = AverageTrainingAverageRelativeErrorQualityParameterName;
194      minAvgMaxTrainingRelErrorCalculator.MaxValueParameter.ActualName = MaxTrainingAverageRelativeErrorQualityParameterName;
195      minAvgMaxTrainingRelErrorCalculator.MinValueParameter.ActualName = MinTrainingAverageRelativeErrorQualityParameterName;
196      minAvgMaxTrainingRelErrorCalculator.ValueParameter.ActualName = TrainingAverageRelativeErrorQualityParameterName;
197      #endregion
198
199      #region test min/avg/max
200      minAvgMaxTestMseCalculator.AverageValueParameter.ActualName = AverageTestMeanSquaredErrorQualityParameterName;
201      minAvgMaxTestMseCalculator.MaxValueParameter.ActualName = MaxTestMeanSquaredErrorQualityParameterName;
202      minAvgMaxTestMseCalculator.MinValueParameter.ActualName = MinTestMeanSquaredErrorQualityParameterName;
203      minAvgMaxTestMseCalculator.ValueParameter.ActualName = TestMeanSquaredErrorQualityParameterName;
204
205      minAvgMaxTestR2Calculator.AverageValueParameter.ActualName = AverageTestRSquaredQualityParameterName;
206      minAvgMaxTestR2Calculator.MaxValueParameter.ActualName = MaxTestRSquaredQualityParameterName;
207      minAvgMaxTestR2Calculator.MinValueParameter.ActualName = MinTestRSquaredQualityParameterName;
208      minAvgMaxTestR2Calculator.ValueParameter.ActualName = TestRSquaredQualityParameterName;
209
210      minAvgMaxTestRelErrorCalculator.AverageValueParameter.ActualName = AverageTestAverageRelativeErrorQualityParameterName;
211      minAvgMaxTestRelErrorCalculator.MaxValueParameter.ActualName = MaxTestAverageRelativeErrorQualityParameterName;
212      minAvgMaxTestRelErrorCalculator.MinValueParameter.ActualName = MinTestAverageRelativeErrorQualityParameterName;
213      minAvgMaxTestRelErrorCalculator.ValueParameter.ActualName = TestAverageRelativeErrorQualityParameterName;
214      #endregion
215
216      mseDataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(AverageTrainingMeanSquaredErrorQualityParameterName, null, AverageTrainingMeanSquaredErrorQualityParameterName));
217      mseDataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(MaxTrainingMeanSquaredErrorQualityParameterName, null, MaxTrainingMeanSquaredErrorQualityParameterName));
218      mseDataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(MinTrainingMeanSquaredErrorQualityParameterName, null, MinTrainingMeanSquaredErrorQualityParameterName));
219      mseDataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(AverageTestMeanSquaredErrorQualityParameterName, null, AverageTestMeanSquaredErrorQualityParameterName));
220      mseDataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(MaxTestMeanSquaredErrorQualityParameterName, null, MaxTestMeanSquaredErrorQualityParameterName));
221      mseDataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(MinTestMeanSquaredErrorQualityParameterName, null, MinTestMeanSquaredErrorQualityParameterName));
222      mseDataTableValuesCollector.DataTableParameter.ActualName = MeanSquaredErrorValuesParameterName;
223
224      r2DataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(AverageTrainingRSquaredQualityParameterName, null, AverageTrainingRSquaredQualityParameterName));
225      r2DataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(MaxTrainingRSquaredQualityParameterName, null, MaxTrainingRSquaredQualityParameterName));
226      r2DataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(MinTrainingRSquaredQualityParameterName, null, MinTrainingRSquaredQualityParameterName));
227      r2DataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(AverageTestRSquaredQualityParameterName, null, AverageTestRSquaredQualityParameterName));
228      r2DataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(MaxTestRSquaredQualityParameterName, null, MaxTestRSquaredQualityParameterName));
229      r2DataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(MinTestRSquaredQualityParameterName, null, MinTestRSquaredQualityParameterName));
230      r2DataTableValuesCollector.DataTableParameter.ActualName = RSquaredValuesParameterName;
231
232      relErrorDataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(AverageTrainingAverageRelativeErrorQualityParameterName, null, AverageTrainingAverageRelativeErrorQualityParameterName));
233      relErrorDataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(MaxTrainingAverageRelativeErrorQualityParameterName, null, MaxTrainingAverageRelativeErrorQualityParameterName));
234      relErrorDataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(MinTrainingAverageRelativeErrorQualityParameterName, null, MinTrainingAverageRelativeErrorQualityParameterName));
235      relErrorDataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(AverageTestAverageRelativeErrorQualityParameterName, null, AverageTestAverageRelativeErrorQualityParameterName));
236      relErrorDataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(MaxTestAverageRelativeErrorQualityParameterName, null, MaxTestAverageRelativeErrorQualityParameterName));
237      relErrorDataTableValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(MinTestAverageRelativeErrorQualityParameterName, null, MinTestAverageRelativeErrorQualityParameterName));
238      relErrorDataTableValuesCollector.DataTableParameter.ActualName = RelativeErrorValuesParameterName;
239
240      resultsCollector.CollectedValues.Add(new LookupParameter<DataTable>(MeanSquaredErrorValuesParameterName));
241      resultsCollector.CollectedValues.Add(new LookupParameter<DataTable>(RSquaredValuesParameterName));
242      resultsCollector.CollectedValues.Add(new LookupParameter<DataTable>(RelativeErrorValuesParameterName));
243      resultsCollector.ResultsParameter.ActualName = ResultsParameter.Name;
244
245      #endregion
246
247      #region operator graph
248      OperatorGraph.InitialOperator = subScopesProcessor;
249      subScopesProcessor.Operator = trainingQualityCalculator;
250      trainingQualityCalculator.Successor = testQualityCalculator;
251      testQualityCalculator.Successor = null;
252      subScopesProcessor.Successor = minAvgMaxTrainingMseCalculator;
253      minAvgMaxTrainingMseCalculator.Successor = minAvgMaxTestMseCalculator;
254      minAvgMaxTestMseCalculator.Successor = minAvgMaxTrainingR2Calculator;
255      minAvgMaxTrainingR2Calculator.Successor = minAvgMaxTestR2Calculator;
256      minAvgMaxTestR2Calculator.Successor = minAvgMaxTrainingRelErrorCalculator;
257      minAvgMaxTrainingRelErrorCalculator.Successor = minAvgMaxTestRelErrorCalculator;
258      minAvgMaxTestRelErrorCalculator.Successor = mseDataTableValuesCollector;
259      mseDataTableValuesCollector.Successor = r2DataTableValuesCollector;
260      r2DataTableValuesCollector.Successor = relErrorDataTableValuesCollector;
261      relErrorDataTableValuesCollector.Successor = resultsCollector;
262      #endregion
263
264    }
265  }
266}
Note: See TracBrowser for help on using the repository browser.