source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs @ 4191

Last change on this file since 4191 was 4191, checked in by gkronber, 10 years ago

Changed validation best solution analyzer and tournament pruning operator to use the evaluator specified in the problem parameters. #1117

File size: 19.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Operators;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.Problems.DataAnalysis.Evaluators;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34
35namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
36  /// <summary>
37  /// An operator that analyzes the validation best scaled symbolic regression solution.
38  /// </summary>
39  [Item("FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic regression solution.")]
40  [StorableClass]
41  public sealed class FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer : SingleSuccessorOperator, ISymbolicRegressionAnalyzer {
42    private const string RandomParameterName = "Random";
43    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
44    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
45    private const string ProblemDataParameterName = "ProblemData";
46    private const string ValidationSamplesStartParameterName = "SamplesStart";
47    private const string ValidationSamplesEndParameterName = "SamplesEnd";
48    // private const string QualityParameterName = "Quality";
49    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
50    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
51    private const string EvaluatorParameterName = "Evaluator";
52    private const string MaximizationParameterName = "Maximization";
53    private const string BestSolutionParameterName = "Best solution (validation)";
54    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
55    private const string CurrentBestValidationQualityParameterName = "Current best validation quality";
56    private const string BestSolutionQualityValuesParameterName = "Validation Quality";
57    private const string ResultsParameterName = "Results";
58    private const string VariableFrequenciesParameterName = "VariableFrequencies";
59    private const string BestKnownQualityParameterName = "BestKnownQuality";
60    private const string GenerationsParameterName = "Generations";
61    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
62
63    private const string TrainingMeanSquaredErrorQualityParameterName = "Mean squared error (training)";
64    private const string MinTrainingMeanSquaredErrorQualityParameterName = "Min mean squared error (training)";
65    private const string MaxTrainingMeanSquaredErrorQualityParameterName = "Max mean squared error (training)";
66    private const string AverageTrainingMeanSquaredErrorQualityParameterName = "Average mean squared error (training)";
67    private const string BestTrainingMeanSquaredErrorQualityParameterName = "Best mean squared error (training)";
68
69    private const string TrainingAverageRelativeErrorQualityParameterName = "Average relative error (training)";
70    private const string MinTrainingAverageRelativeErrorQualityParameterName = "Min average relative error (training)";
71    private const string MaxTrainingAverageRelativeErrorQualityParameterName = "Max average relative error (training)";
72    private const string AverageTrainingAverageRelativeErrorQualityParameterName = "Average average relative error (training)";
73    private const string BestTrainingAverageRelativeErrorQualityParameterName = "Best average relative error (training)";
74
75    private const string TrainingRSquaredQualityParameterName = "R² (training)";
76    private const string MinTrainingRSquaredQualityParameterName = "Min R² (training)";
77    private const string MaxTrainingRSquaredQualityParameterName = "Max R² (training)";
78    private const string AverageTrainingRSquaredQualityParameterName = "Average R² (training)";
79    private const string BestTrainingRSquaredQualityParameterName = "Best R² (training)";
80
81    private const string TestMeanSquaredErrorQualityParameterName = "Mean squared error (test)";
82    private const string MinTestMeanSquaredErrorQualityParameterName = "Min mean squared error (test)";
83    private const string MaxTestMeanSquaredErrorQualityParameterName = "Max mean squared error (test)";
84    private const string AverageTestMeanSquaredErrorQualityParameterName = "Average mean squared error (test)";
85    private const string BestTestMeanSquaredErrorQualityParameterName = "Best mean squared error (test)";
86
87    private const string TestAverageRelativeErrorQualityParameterName = "Average relative error (test)";
88    private const string MinTestAverageRelativeErrorQualityParameterName = "Min average relative error (test)";
89    private const string MaxTestAverageRelativeErrorQualityParameterName = "Max average relative error (test)";
90    private const string AverageTestAverageRelativeErrorQualityParameterName = "Average average relative error (test)";
91    private const string BestTestAverageRelativeErrorQualityParameterName = "Best average relative error (test)";
92
93    private const string TestRSquaredQualityParameterName = "R² (test)";
94    private const string MinTestRSquaredQualityParameterName = "Min R² (test)";
95    private const string MaxTestRSquaredQualityParameterName = "Max R² (test)";
96    private const string AverageTestRSquaredQualityParameterName = "Average R² (test)";
97    private const string BestTestRSquaredQualityParameterName = "Best R² (test)";
98
99    private const string RSquaredValuesParameterName = "R²";
100    private const string MeanSquaredErrorValuesParameterName = "Mean squared error";
101    private const string RelativeErrorValuesParameterName = "Average relative error";
102
103    #region parameter properties
104    public ILookupParameter<IRandom> RandomParameter {
105      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
106    }
107    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
108      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
109    }
110    public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
111      get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
112    }
113    public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
114      get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
115    }
116    public ILookupParameter<BoolValue> MaximizationParameter {
117      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
118    }
119    public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
120      get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
121    }
122    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
123      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
124    }
125    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
126      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
127    }
128    public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
129      get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
130    }
131
132    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
133      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
134    }
135    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
136      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
137    }
138    public ILookupParameter<SymbolicRegressionSolution> BestSolutionParameter {
139      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestSolutionParameterName]; }
140    }
141    public ILookupParameter<IntValue> GenerationsParameter {
142      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
143    }
144    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
145      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
146    }
147    public ILookupParameter<ResultCollection> ResultsParameter {
148      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
149    }
150    public ILookupParameter<DoubleValue> BestKnownQualityParameter {
151      get { return (ILookupParameter<DoubleValue>)Parameters[BestKnownQualityParameterName]; }
152    }
153    public ILookupParameter<DataTable> VariableFrequenciesParameter {
154      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
155    }
156
157    #endregion
158    #region properties
159    public IRandom Random {
160      get { return RandomParameter.ActualValue; }
161    }
162    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTree {
163      get { return SymbolicExpressionTreeParameter.ActualValue; }
164    }
165    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
166      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
167    }
168    public ISymbolicRegressionEvaluator Evaluator {
169      get { return EvaluatorParameter.ActualValue; }
170    }
171    public BoolValue Maximization {
172      get { return MaximizationParameter.ActualValue; }
173    }
174    public DataAnalysisProblemData ProblemData {
175      get { return ProblemDataParameter.ActualValue; }
176    }
177    public IntValue ValidiationSamplesStart {
178      get { return ValidationSamplesStartParameter.ActualValue; }
179    }
180    public IntValue ValidationSamplesEnd {
181      get { return ValidationSamplesEndParameter.ActualValue; }
182    }
183    public PercentValue RelativeNumberOfEvaluatedSamples {
184      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
185    }
186
187    public DoubleValue UpperEstimationLimit {
188      get { return UpperEstimationLimitParameter.ActualValue; }
189    }
190    public DoubleValue LowerEstimationLimit {
191      get { return LowerEstimationLimitParameter.ActualValue; }
192    }
193    public ResultCollection Results {
194      get { return ResultsParameter.ActualValue; }
195    }
196    public DataTable VariableFrequencies {
197      get { return VariableFrequenciesParameter.ActualValue; }
198    }
199    public IntValue Generations {
200      get { return GenerationsParameter.ActualValue; }
201    }
202    public DoubleValue BestSolutionQuality {
203      get { return BestSolutionQualityParameter.ActualValue; }
204    }
205
206    #endregion
207
208    public FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer()
209      : base() {
210      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
211      Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
212      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
213      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
214      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
215      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
216      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
217      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
218      Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
219      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
220      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
221      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestSolutionParameterName, "The best symbolic regression solution."));
222      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
223      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
224      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
225      Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set."));
226      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts"));
227    }
228
229    [StorableConstructor]
230    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base() { }
231
232    [StorableHook(HookType.AfterDeserialization)]
233    private void AfterDeserialization() {
234      #region compatibility remove before releasing 3.3.1
235      if (!Parameters.ContainsKey(EvaluatorParameterName)) {
236        Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
237      }
238      if (!Parameters.ContainsKey(MaximizationParameterName)) {
239        Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
240      }
241      #endregion
242    }
243
244    public override IOperation Apply() {
245      var trees = SymbolicExpressionTree;
246
247      string targetVariable = ProblemData.TargetVariable.Value;
248
249      // select a random subset of rows in the validation set
250      int validationStart = ValidiationSamplesStart.Value;
251      int validationEnd = ValidationSamplesEnd.Value;
252      uint seed = (uint)Random.Next();
253      int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
254      if (count == 0) count = 1;
255      IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count);
256
257      double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
258      double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
259
260      double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
261      SymbolicExpressionTree bestTree = null;
262
263      foreach (var tree in trees) {
264        double quality = Evaluator.Evaluate(SymbolicExpressionTreeInterpreter, tree,
265          lowerEstimationLimit, upperEstimationLimit,
266          ProblemData.Dataset, targetVariable,
267         rows);
268
269        if ((Maximization.Value && quality > bestQuality) ||
270            (!Maximization.Value && quality < bestQuality)) {
271          bestQuality = quality;
272          bestTree = tree;
273        }
274      }
275
276      // if the best validation tree is better than the current best solution => update
277      bool newBest =
278        BestSolutionQuality == null ||
279        (Maximization.Value && bestQuality > BestSolutionQuality.Value) ||
280        (!Maximization.Value && bestQuality < BestSolutionQuality.Value);
281      if (newBest) {
282        // calculate scaling parameters and only for the best tree using the full training set
283        double alpha, beta;
284        int trainingStart = ProblemData.TrainingSamplesStart.Value;
285        int trainingEnd = ProblemData.TrainingSamplesEnd.Value;
286        IEnumerable<int> trainingRows = Enumerable.Range(trainingStart, trainingEnd - trainingStart);
287        SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree,
288          lowerEstimationLimit, upperEstimationLimit,
289          ProblemData.Dataset, targetVariable,
290          trainingRows, out beta, out alpha);
291
292        // scale tree for solution
293        var scaledTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta);
294        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
295          scaledTree);
296        var solution = new SymbolicRegressionSolution(ProblemData, model, lowerEstimationLimit, upperEstimationLimit);
297        solution.Name = BestSolutionParameterName;
298        solution.Description = "Best solution on validation partition found over the whole run.";
299
300        BestSolutionParameter.ActualValue = solution;
301        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestQuality);
302
303        BestSymbolicRegressionSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies);
304      }
305
306
307      if (!Results.ContainsKey(BestSolutionQualityValuesParameterName)) {
308        Results.Add(new Result(BestSolutionQualityValuesParameterName, new DataTable(BestSolutionQualityValuesParameterName, BestSolutionQualityValuesParameterName)));
309        Results.Add(new Result(BestSolutionQualityParameterName, new DoubleValue()));
310        Results.Add(new Result(CurrentBestValidationQualityParameterName, new DoubleValue()));
311      }
312      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
313      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestQuality);
314
315      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
316      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
317      AddValue(validationValues, bestQuality, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
318      return base.Apply();
319    }
320
321    [StorableHook(HookType.AfterDeserialization)]
322    private void Initialize() { }
323
324    private static void AddValue(DataTable table, double data, string name, string description) {
325      DataRow row;
326      table.Rows.TryGetValue(name, out row);
327      if (row == null) {
328        row = new DataRow(name, description);
329        row.Values.Add(data);
330        table.Rows.Add(row);
331      } else {
332        row.Values.Add(data);
333      }
334    }
335  }
336}
Note: See TracBrowser for help on using the repository browser.