source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs @ 4022

Last change on this file since 4022 was 4022, checked in by gkronber, 11 years ago

Worked on symbolic regression classes to prepare for time series prognosis plugin. #1081

File size: 19.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Linq;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
32using HeuristicLab.Problems.DataAnalysis.Symbolic;
33using System.Collections.Generic;
34using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
35using HeuristicLab.Problems.DataAnalysis;
36using HeuristicLab.Analysis;
37using System;
38using HeuristicLab.Optimization.Operators;
39using HeuristicLab.Problems.DataAnalysis.Evaluators;
40
41namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
42  /// <summary>
43  /// An operator that analyzes the validation best scaled symbolic regression solution.
44  /// </summary>
45  [Item("FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic regression solution.")]
46  [StorableClass]
47  public sealed class FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer : SingleSuccessorOperator, ISymbolicRegressionAnalyzer {
48    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
49    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
50    private const string ProblemDataParameterName = "ProblemData";
51    private const string ValidationSamplesStartParameterName = "SamplesStart";
52    private const string ValidationSamplesEndParameterName = "SamplesEnd";
53    private const string QualityParameterName = "Quality";
54    private const string ScaledQualityParameterName = "ScaledQuality";
55    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
56    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
57    private const string AlphaParameterName = "Alpha";
58    private const string BetaParameterName = "Beta";
59    private const string BestSolutionParameterName = "Best solution (validation)";
60    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
61    private const string CurrentBestValidationQualityParameterName = "Current best validation quality";
62    private const string BestSolutionQualityValuesParameterName = "Validation Quality";
63    private const string ResultsParameterName = "Results";
64    private const string VariableFrequenciesParameterName = "VariableFrequencies";
65    private const string BestKnownQualityParameterName = "BestKnownQuality";
66    private const string GenerationsParameterName = "Generations";
67
68    private const string TrainingMeanSquaredErrorQualityParameterName = "Mean squared error (training)";
69    private const string MinTrainingMeanSquaredErrorQualityParameterName = "Min mean squared error (training)";
70    private const string MaxTrainingMeanSquaredErrorQualityParameterName = "Max mean squared error (training)";
71    private const string AverageTrainingMeanSquaredErrorQualityParameterName = "Average mean squared error (training)";
72    private const string BestTrainingMeanSquaredErrorQualityParameterName = "Best mean squared error (training)";
73
74    private const string TrainingAverageRelativeErrorQualityParameterName = "Average relative error (training)";
75    private const string MinTrainingAverageRelativeErrorQualityParameterName = "Min average relative error (training)";
76    private const string MaxTrainingAverageRelativeErrorQualityParameterName = "Max average relative error (training)";
77    private const string AverageTrainingAverageRelativeErrorQualityParameterName = "Average average relative error (training)";
78    private const string BestTrainingAverageRelativeErrorQualityParameterName = "Best average relative error (training)";
79
80    private const string TrainingRSquaredQualityParameterName = "R² (training)";
81    private const string MinTrainingRSquaredQualityParameterName = "Min R² (training)";
82    private const string MaxTrainingRSquaredQualityParameterName = "Max R² (training)";
83    private const string AverageTrainingRSquaredQualityParameterName = "Average R² (training)";
84    private const string BestTrainingRSquaredQualityParameterName = "Best R² (training)";
85
86    private const string TestMeanSquaredErrorQualityParameterName = "Mean squared error (test)";
87    private const string MinTestMeanSquaredErrorQualityParameterName = "Min mean squared error (test)";
88    private const string MaxTestMeanSquaredErrorQualityParameterName = "Max mean squared error (test)";
89    private const string AverageTestMeanSquaredErrorQualityParameterName = "Average mean squared error (test)";
90    private const string BestTestMeanSquaredErrorQualityParameterName = "Best mean squared error (test)";
91
92    private const string TestAverageRelativeErrorQualityParameterName = "Average relative error (test)";
93    private const string MinTestAverageRelativeErrorQualityParameterName = "Min average relative error (test)";
94    private const string MaxTestAverageRelativeErrorQualityParameterName = "Max average relative error (test)";
95    private const string AverageTestAverageRelativeErrorQualityParameterName = "Average average relative error (test)";
96    private const string BestTestAverageRelativeErrorQualityParameterName = "Best average relative error (test)";
97
98    private const string TestRSquaredQualityParameterName = "R² (test)";
99    private const string MinTestRSquaredQualityParameterName = "Min R² (test)";
100    private const string MaxTestRSquaredQualityParameterName = "Max R² (test)";
101    private const string AverageTestRSquaredQualityParameterName = "Average R² (test)";
102    private const string BestTestRSquaredQualityParameterName = "Best R² (test)";
103
104    private const string RSquaredValuesParameterName = "R²";
105    private const string MeanSquaredErrorValuesParameterName = "Mean squared error";
106    private const string RelativeErrorValuesParameterName = "Average relative error";
107
108    #region parameter properties
109    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
110      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
111    }
112    public ScopeTreeLookupParameter<DoubleValue> QualityParameter {
113      get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
114    }
115    public ScopeTreeLookupParameter<DoubleValue> AlphaParameter {
116      get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[AlphaParameterName]; }
117    }
118    public ScopeTreeLookupParameter<DoubleValue> BetaParameter {
119      get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[BetaParameterName]; }
120    }
121    public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
122      get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
123    }
124    public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
125      get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
126    }
127    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
128      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
129    }
130    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
131      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
132    }
133    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
134      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
135    }
136    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
137      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
138    }
139    public ILookupParameter<SymbolicRegressionSolution> BestSolutionParameter {
140      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestSolutionParameterName]; }
141    }
142    public ILookupParameter<IntValue> GenerationsParameter {
143      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
144    }
145    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
146      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
147    }
148    public ILookupParameter<ResultCollection> ResultsParameter {
149      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
150    }
151    public ILookupParameter<DoubleValue> BestKnownQualityParameter {
152      get { return (ILookupParameter<DoubleValue>)Parameters[BestKnownQualityParameterName]; }
153    }
154    public ILookupParameter<DataTable> VariableFrequenciesParameter {
155      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
156    }
157
158    #endregion
159    #region properties
160    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTree {
161      get { return SymbolicExpressionTreeParameter.ActualValue; }
162    }
163    public ItemArray<DoubleValue> Quality {
164      get { return QualityParameter.ActualValue; }
165    }
166    public ItemArray<DoubleValue> Alpha {
167      get { return AlphaParameter.ActualValue; }
168    }
169    public ItemArray<DoubleValue> Beta {
170      get { return BetaParameter.ActualValue; }
171    }
172    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
173      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
174    }
175    public DataAnalysisProblemData ProblemData {
176      get { return ProblemDataParameter.ActualValue; }
177    }
178    public IntValue ValidiationSamplesStart {
179      get { return ValidationSamplesStartParameter.ActualValue; }
180    }
181    public IntValue ValidationSamplesEnd {
182      get { return ValidationSamplesEndParameter.ActualValue; }
183    }
184    public DoubleValue UpperEstimationLimit {
185      get { return UpperEstimationLimitParameter.ActualValue; }
186    }
187    public DoubleValue LowerEstimationLimit {
188      get { return LowerEstimationLimitParameter.ActualValue; }
189    }
190    public ResultCollection Results {
191      get { return ResultsParameter.ActualValue; }
192    }
193    public DataTable VariableFrequencies {
194      get { return VariableFrequenciesParameter.ActualValue; }
195    }
196    public IntValue Generations {
197      get { return GenerationsParameter.ActualValue; }
198    }
199
200    #endregion
201
202    public FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer()
203      : base() {
204      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
205      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The quality of the symbolic expression trees to analyze."));
206      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(AlphaParameterName, "The alpha parameter for linear scaling."));
207      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(BetaParameterName, "The beta parameter for linear scaling."));
208      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
209      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
210      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
211      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
212      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
213      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
214      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestSolutionParameterName, "The best symbolic regression solution."));
215      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
216      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
217      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
218      Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set."));
219      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts"));
220    }
221
222    [StorableConstructor]
223    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base() { }
224
225    public override IOperation Apply() {
226      var alphas = Alpha;
227      var betas = Beta;
228      var trees = SymbolicExpressionTree;
229
230      IEnumerable<SymbolicExpressionTree> scaledTrees;
231      if (alphas.Length == trees.Length) {
232        scaledTrees = from i in Enumerable.Range(0, trees.Length)
233                      select SymbolicRegressionSolutionLinearScaler.Scale(trees[i], alphas[i].Value, betas[i].Value);
234      } else {
235        scaledTrees = trees;
236      }
237
238      int trainingStart = ProblemData.TrainingSamplesStart.Value;
239      int trainingEnd = ProblemData.TrainingSamplesEnd.Value;
240      int testStart = ProblemData.TestSamplesStart.Value;
241      int testEnd = ProblemData.TestSamplesEnd.Value;
242
243      SymbolicRegressionModelQualityAnalyzer.Analyze(scaledTrees, SymbolicExpressionTreeInterpreter,
244        UpperEstimationLimit.Value, LowerEstimationLimit.Value,
245        ProblemData, trainingStart, trainingEnd, testStart, testEnd, Results);
246
247      #region validation best model
248      string targetVariable = ProblemData.TargetVariable.Value;
249      int validationStart = ValidiationSamplesStart.Value;
250      int validationEnd = ValidationSamplesEnd.Value;
251      double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
252      double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
253
254      double bestValidationMse = double.MaxValue;
255      SymbolicExpressionTree bestTree = null;
256
257      OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator();
258      foreach (var scaledTree in scaledTrees) {
259        double validationMse = SymbolicRegressionMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, scaledTree,
260          lowerEstimationLimit, upperEstimationLimit,
261          ProblemData.Dataset, targetVariable,
262          validationStart, validationEnd);
263
264        if (validationMse < bestValidationMse) {
265          bestValidationMse = validationMse;
266          bestTree = scaledTree;
267        }
268      }
269
270
271      if (BestSolutionQualityParameter.ActualValue == null || BestSolutionQualityParameter.ActualValue.Value > bestValidationMse) {
272        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
273          bestTree);
274        var solution = new SymbolicRegressionSolution(ProblemData, model, lowerEstimationLimit, upperEstimationLimit);
275        solution.Name = BestSolutionParameterName;
276        solution.Description = "Best solution on validation partition found over the whole run.";
277
278        BestSolutionParameter.ActualValue = solution;
279        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestValidationMse);
280
281        BestSymbolicRegressionSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies);
282      }
283
284      if (!Results.ContainsKey(BestSolutionQualityValuesParameterName)) {
285        Results.Add(new Result(BestSolutionQualityValuesParameterName, new DataTable(BestSolutionQualityValuesParameterName, BestSolutionQualityValuesParameterName)));
286        Results.Add(new Result(BestSolutionQualityParameterName, new DoubleValue()));
287        Results.Add(new Result(CurrentBestValidationQualityParameterName, new DoubleValue()));
288      }
289      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
290      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestValidationMse);
291
292      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
293      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
294      AddValue(validationValues, bestValidationMse, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
295      #endregion
296      return base.Apply();
297    }
298
299    [StorableHook(HookType.AfterDeserialization)]
300    private void Initialize() {
301      if (!Parameters.ContainsKey(AlphaParameterName)) {
302        Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(AlphaParameterName, "The alpha parameter for linear scaling."));
303      }
304      if (!Parameters.ContainsKey(BetaParameterName)) {
305        Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(BetaParameterName, "The beta parameter for linear scaling."));
306      }
307      if (!Parameters.ContainsKey(VariableFrequenciesParameterName)) {
308        Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts"));
309      }
310      if (!Parameters.ContainsKey(GenerationsParameterName)) {
311        Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
312      }
313    }
314
315    private static void AddValue(DataTable table, double data, string name, string description) {
316      DataRow row;
317      table.Rows.TryGetValue(name, out row);
318      if (row == null) {
319        row = new DataRow(name, description);
320        row.Values.Add(data);
321        table.Rows.Add(row);
322      } else {
323        row.Values.Add(data);
324      }
325    }
326  }
327}
Note: See TracBrowser for help on using the repository browser.