source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/ValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs @ 4468

Last change on this file since 4468 was 4468, checked in by mkommend, 9 years ago

Preparation for cross validation - removed the test samples from the trainining samples and added ValidationPercentage parameter (ticket #1199).

File size: 18.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using HeuristicLab.Analysis;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Operators;
29using HeuristicLab.Optimization;
30using HeuristicLab.Optimization.Operators;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34
35namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
36  /// <summary>
37  /// An operator that analyzes the validation best scaled symbolic regression solution.
38  /// </summary>
39  [Item("ValidationBestScaledSymbolicRegressionSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic regression solution.")]
40  [StorableClass]
41  [Obsolete("This class should not be used anymore because of performance reasons and will therefore not be updated.")]
42  public sealed class ValidationBestScaledSymbolicRegressionSolutionAnalyzer : AlgorithmOperator, ISymbolicRegressionAnalyzer {
43    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
44    private const string ScaledSymbolicExpressionTreeParameterName = "ScaledSymbolicExpressionTree";
45    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
46    private const string ProblemDataParameterName = "ProblemData";
47    private const string TrainingSamplesStartParameterName = "TrainingSamplesStart";
48    private const string TrainingSamplesEndParameterName = "TrainingSamplesEnd";
49    private const string ValidationSamplesStartParameterName = "ValidationSamplesStart";
50    private const string ValidationSamplesEndParameterName = "ValidationSamplesEnd";
51    private const string TestSamplesStartParameterName = "TestSamplesStart";
52    private const string TestSamplesEndParameterName = "TestSamplesEnd";
53    private const string QualityParameterName = "Quality";
54    private const string ScaledQualityParameterName = "ScaledQuality";
55    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
56    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
57    private const string AlphaParameterName = "Alpha";
58    private const string BetaParameterName = "Beta";
59    private const string BestSolutionParameterName = "Best solution (validation)";
60    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
61    private const string CurrentBestValidationQualityParameterName = "Current best validation quality";
62    private const string ResultsParameterName = "Results";
63    private const string BestKnownQualityParameterName = "BestKnownQuality";
64
65    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
66      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
67    }
68    public ScopeTreeLookupParameter<DoubleValue> QualityParameter {
69      get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
70    }
71    public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
72      get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
73    }
74    public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
75      get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
76    }
77    public IValueLookupParameter<IntValue> TrainingSamplesStartParameter {
78      get { return (IValueLookupParameter<IntValue>)Parameters[TrainingSamplesStartParameterName]; }
79    }
80    public IValueLookupParameter<IntValue> TrainingSamplesEndParameter {
81      get { return (IValueLookupParameter<IntValue>)Parameters[TrainingSamplesEndParameterName]; }
82    }
83    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
84      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
85    }
86    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
87      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
88    }
89    public IValueLookupParameter<IntValue> TestSamplesStartParameter {
90      get { return (IValueLookupParameter<IntValue>)Parameters[TestSamplesStartParameterName]; }
91    }
92    public IValueLookupParameter<IntValue> TestSamplesEndParameter {
93      get { return (IValueLookupParameter<IntValue>)Parameters[TestSamplesEndParameterName]; }
94    }
95    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
96      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
97    }
98    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
99      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
100    }
101    public ILookupParameter<SymbolicRegressionSolution> BestSolutionParameter {
102      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestSolutionParameterName]; }
103    }
104    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
105      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
106    }
107    public ILookupParameter<ResultCollection> ResultsParameter {
108      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
109    }
110    public ILookupParameter<DoubleValue> BestKnownQualityParameter {
111      get { return (ILookupParameter<DoubleValue>)Parameters[BestKnownQualityParameterName]; }
112    }
113
114    [Storable]
115    private UniformSubScopesProcessor subScopesProcessor;
116    [Storable]
117    private SymbolicRegressionSolutionLinearScaler linearScaler;
118    [Storable]
119    private SymbolicRegressionModelQualityAnalyzer modelQualityAnalyzer;
120    [Storable]
121    private SymbolicRegressionMeanSquaredErrorEvaluator validationMseEvaluator;
122    [Storable]
123    private BestSymbolicRegressionSolutionAnalyzer bestSolutionAnalyzer;
124    [Storable]
125    private UniformSubScopesProcessor cleaningSubScopesProcessor;
126    [Storable]
127    private Assigner removeScaledExpressionTreeAssigner;
128    [Storable]
129    private BestQualityMemorizer bestKnownQualityMemorizer;
130    [Storable]
131    private BestAverageWorstQualityCalculator bestAvgWorstValidationQualityCalculator;
132    [Storable]
133    private DataTableValuesCollector validationValuesCollector;
134    [Storable]
135    private ResultsCollector resultsCollector;
136
137    public ValidationBestScaledSymbolicRegressionSolutionAnalyzer()
138      : base() {
139      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
140      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The quality of the symbolic expression trees to analyze."));
141      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
142      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
143      Parameters.Add(new ValueLookupParameter<IntValue>(TrainingSamplesStartParameterName, "The first index of the training partition of the data set."));
144      Parameters.Add(new ValueLookupParameter<IntValue>(TrainingSamplesEndParameterName, "The last index of the training partition of the data set."));
145      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
146      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
147      Parameters.Add(new ValueLookupParameter<IntValue>(TestSamplesStartParameterName, "The first index of the test partition of the data set."));
148      Parameters.Add(new ValueLookupParameter<IntValue>(TestSamplesEndParameterName, "The last index of the test partition of the data set."));
149      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
150      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
151      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestSolutionParameterName, "The best symbolic regression solution."));
152      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
153      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
154      Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set."));
155
156      #region operator initialization
157      subScopesProcessor = new UniformSubScopesProcessor();
158      linearScaler = new SymbolicRegressionSolutionLinearScaler();
159      modelQualityAnalyzer = new SymbolicRegressionModelQualityAnalyzer();
160      validationMseEvaluator = new SymbolicRegressionMeanSquaredErrorEvaluator();
161      bestSolutionAnalyzer = new BestSymbolicRegressionSolutionAnalyzer();
162      cleaningSubScopesProcessor = new UniformSubScopesProcessor();
163      removeScaledExpressionTreeAssigner = new Assigner();
164      bestKnownQualityMemorizer = new BestQualityMemorizer();
165      bestAvgWorstValidationQualityCalculator = new BestAverageWorstQualityCalculator();
166      validationValuesCollector = new DataTableValuesCollector();
167      resultsCollector = new ResultsCollector();
168      #endregion
169
170      #region parameter wiring
171      subScopesProcessor.Depth.Value = SymbolicExpressionTreeParameter.Depth;
172
173      linearScaler.AlphaParameter.ActualName = AlphaParameterName;
174      linearScaler.BetaParameter.ActualName = BetaParameterName;
175      linearScaler.SymbolicExpressionTreeParameter.ActualName = SymbolicExpressionTreeParameter.Name;
176      linearScaler.ScaledSymbolicExpressionTreeParameter.ActualName = ScaledSymbolicExpressionTreeParameterName;
177
178      modelQualityAnalyzer.ProblemDataParameter.ActualName = ProblemDataParameter.Name;
179      modelQualityAnalyzer.SymbolicExpressionTreeParameter.ActualName = ScaledSymbolicExpressionTreeParameterName;
180      modelQualityAnalyzer.SymbolicExpressionTreeParameter.Depth = SymbolicExpressionTreeParameter.Depth;
181      modelQualityAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
182      modelQualityAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
183      modelQualityAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
184
185      validationMseEvaluator.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
186      validationMseEvaluator.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
187      validationMseEvaluator.SymbolicExpressionTreeParameter.ActualName = ScaledSymbolicExpressionTreeParameterName;
188      validationMseEvaluator.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
189      validationMseEvaluator.QualityParameter.ActualName = ScaledQualityParameterName;
190      validationMseEvaluator.RegressionProblemDataParameter.ActualName = ProblemDataParameter.Name;
191      validationMseEvaluator.SamplesStartParameter.ActualName = ValidationSamplesStartParameter.Name;
192      validationMseEvaluator.SamplesEndParameter.ActualName = ValidationSamplesEndParameter.Name;
193
194      bestSolutionAnalyzer.BestSolutionParameter.ActualName = BestSolutionParameter.Name;
195      bestSolutionAnalyzer.BestSolutionQualityParameter.ActualName = BestSolutionQualityParameter.Name;
196      bestSolutionAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
197      bestSolutionAnalyzer.ProblemDataParameter.ActualName = ProblemDataParameter.Name;
198      bestSolutionAnalyzer.QualityParameter.ActualName = ScaledQualityParameterName;
199      bestSolutionAnalyzer.ResultsParameter.ActualName = ResultsParameter.Name;
200      bestSolutionAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
201      bestSolutionAnalyzer.SymbolicExpressionTreeParameter.ActualName = ScaledSymbolicExpressionTreeParameterName;
202      bestSolutionAnalyzer.SymbolicExpressionTreeParameter.Depth = SymbolicExpressionTreeParameter.Depth;
203      bestSolutionAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
204
205      cleaningSubScopesProcessor.Depth.Value = SymbolicExpressionTreeParameter.Depth;
206
207      removeScaledExpressionTreeAssigner.LeftSideParameter.ActualName = ScaledSymbolicExpressionTreeParameterName;
208      removeScaledExpressionTreeAssigner.RightSideParameter.Value = new SymbolicExpressionTree();
209
210      bestAvgWorstValidationQualityCalculator.AverageQualityParameter.ActualName = "Current average validation quality";
211      bestAvgWorstValidationQualityCalculator.BestQualityParameter.ActualName = CurrentBestValidationQualityParameterName;
212      bestAvgWorstValidationQualityCalculator.MaximizationParameter.Value = new BoolValue(false);
213      bestAvgWorstValidationQualityCalculator.QualityParameter.ActualName = ScaledQualityParameterName;
214      bestAvgWorstValidationQualityCalculator.QualityParameter.Depth = SymbolicExpressionTreeParameter.Depth;
215      bestAvgWorstValidationQualityCalculator.WorstQualityParameter.ActualName = "Current worst validation quality";
216
217      bestKnownQualityMemorizer.BestQualityParameter.ActualName = BestKnownQualityParameterName;
218      bestKnownQualityMemorizer.MaximizationParameter.Value = new BoolValue(false);
219      bestKnownQualityMemorizer.QualityParameter.ActualName = QualityParameter.Name;
220      bestKnownQualityMemorizer.QualityParameter.Depth = QualityParameter.Depth;
221
222      validationValuesCollector.DataTableParameter.ActualName = "Validation quality";
223      validationValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(CurrentBestValidationQualityParameterName, null, CurrentBestValidationQualityParameterName));
224      validationValuesCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameter.Name, null, BestSolutionQualityParameter.Name));
225
226      resultsCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(CurrentBestValidationQualityParameterName, null, CurrentBestValidationQualityParameterName));
227      resultsCollector.CollectedValues.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameter.Name, null, BestSolutionQualityParameter.Name));
228      resultsCollector.CollectedValues.Add(new LookupParameter<DataTable>("Validation quality"));
229      resultsCollector.ResultsParameter.ActualName = ResultsParameter.Name;
230      #endregion
231
232      #region operator graph
233      OperatorGraph.InitialOperator = subScopesProcessor;
234      subScopesProcessor.Operator = linearScaler;
235      linearScaler.Successor = validationMseEvaluator;
236      validationMseEvaluator.Successor = null;
237      subScopesProcessor.Successor = modelQualityAnalyzer;
238      modelQualityAnalyzer.Successor = bestSolutionAnalyzer;
239      bestSolutionAnalyzer.Successor = cleaningSubScopesProcessor;
240      cleaningSubScopesProcessor.Operator = removeScaledExpressionTreeAssigner;
241      cleaningSubScopesProcessor.Successor = bestAvgWorstValidationQualityCalculator;
242      bestAvgWorstValidationQualityCalculator.Successor = bestKnownQualityMemorizer;
243      bestKnownQualityMemorizer.Successor = validationValuesCollector;
244      validationValuesCollector.Successor = resultsCollector;
245      resultsCollector.Successor = null;
246      #endregion
247
248      Initialize();
249    }
250
251    [StorableConstructor]
252    private ValidationBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base() { }
253
254    [StorableHook(HookType.AfterDeserialization)]
255    private void Initialize() {
256      SymbolicExpressionTreeParameter.DepthChanged += new EventHandler(SymbolicExpressionTreeParameter_DepthChanged);
257    }
258
259    public override IDeepCloneable Clone(Cloner cloner) {
260      ValidationBestScaledSymbolicRegressionSolutionAnalyzer clone = (ValidationBestScaledSymbolicRegressionSolutionAnalyzer)base.Clone(cloner);
261      clone.Initialize();
262      return clone;
263    }
264
265    private void SymbolicExpressionTreeParameter_DepthChanged(object sender, EventArgs e) {
266      subScopesProcessor.Depth.Value = SymbolicExpressionTreeParameter.Depth;
267      cleaningSubScopesProcessor.Depth.Value = SymbolicExpressionTreeParameter.Depth;
268      bestSolutionAnalyzer.SymbolicExpressionTreeParameter.Depth = SymbolicExpressionTreeParameter.Depth;
269      bestSolutionAnalyzer.QualityParameter.Depth = SymbolicExpressionTreeParameter.Depth;
270      bestAvgWorstValidationQualityCalculator.QualityParameter.Depth = SymbolicExpressionTreeParameter.Depth;
271      bestKnownQualityMemorizer.QualityParameter.Depth = SymbolicExpressionTreeParameter.Depth;
272      modelQualityAnalyzer.SymbolicExpressionTreeParameter.Depth = SymbolicExpressionTreeParameter.Depth;
273    }
274  }
275}
Note: See TracBrowser for help on using the repository browser.