Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis/3.3/Symbolic/Analyzer/ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer.cs @ 4475

Last change on this file since 4475 was 4475, checked in by gkronber, 13 years ago

Fixed bugs in time series prognosis classes #1142.

File size: 23.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Linq;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
32using HeuristicLab.Problems.DataAnalysis.Symbolic;
33using System.Collections.Generic;
34using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
35using HeuristicLab.Problems.DataAnalysis;
36using System;
37
38using HeuristicLab.Problems.DataAnalysis.Evaluators;
39using HeuristicLab.Problems.DataAnalysis.Regression;
40using HeuristicLab.Analysis;
41using HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis.Symbolic.Evaluators;
42using HeuristicLab.Problems.DataAnalysis.MultiVariate.Regression.Symbolic;
43using HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis.Symbolic.Interfaces;
44
45namespace HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis.Symbolic.Analyzers {
46  /// <summary>
47  /// An operator that analyzes the validation best scaled symbolic time series prognosis solution.
48  /// </summary>
49  [Item("ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic time series prognosis solution.")]
50  [StorableClass]
51  public sealed class ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer : SingleSuccessorOperator, IAnalyzer {
52    private const string RandomParameterName = "Random";
53    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
54    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
55    private const string ProblemDataParameterName = "ProblemData";
56    private const string ValidationSamplesStartParameterName = "SamplesStart";
57    private const string ValidationSamplesEndParameterName = "SamplesEnd";
58    private const string QualityParameterName = "Quality";
59    private const string ScaledQualityParameterName = "ScaledQuality";
60    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
61    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
62    private const string PredictionHorizonParameterName = "PredictionHorizon";
63    private const string ConditionVariableParameterName = "ConditionVariableName";
64    private const string AlphaParameterName = "Alpha";
65    private const string BetaParameterName = "Beta";
66    private const string BestSolutionParameterName = "Best solution (validation)";
67    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
68    private const string CurrentBestValidationQualityParameterName = "Current best validation quality";
69    private const string BestSolutionQualityValuesParameterName = "Validation Quality";
70    private const string ResultsParameterName = "Results";
71    private const string VariableFrequenciesParameterName = "VariableFrequencies";
72    private const string BestKnownQualityParameterName = "BestKnownQuality";
73    private const string GenerationsParameterName = "Generations";
74
75    private const string TrainingMeanSquaredErrorQualityParameterName = "Mean squared error (training)";
76    private const string MinTrainingMeanSquaredErrorQualityParameterName = "Min mean squared error (training)";
77    private const string MaxTrainingMeanSquaredErrorQualityParameterName = "Max mean squared error (training)";
78    private const string AverageTrainingMeanSquaredErrorQualityParameterName = "Average mean squared error (training)";
79    private const string BestTrainingMeanSquaredErrorQualityParameterName = "Best mean squared error (training)";
80
81    private const string TrainingAverageRelativeErrorQualityParameterName = "Average relative error (training)";
82    private const string MinTrainingAverageRelativeErrorQualityParameterName = "Min average relative error (training)";
83    private const string MaxTrainingAverageRelativeErrorQualityParameterName = "Max average relative error (training)";
84    private const string AverageTrainingAverageRelativeErrorQualityParameterName = "Average average relative error (training)";
85    private const string BestTrainingAverageRelativeErrorQualityParameterName = "Best average relative error (training)";
86
87    private const string TrainingRSquaredQualityParameterName = "R² (training)";
88    private const string MinTrainingRSquaredQualityParameterName = "Min R² (training)";
89    private const string MaxTrainingRSquaredQualityParameterName = "Max R² (training)";
90    private const string AverageTrainingRSquaredQualityParameterName = "Average R² (training)";
91    private const string BestTrainingRSquaredQualityParameterName = "Best R² (training)";
92
93    private const string TestMeanSquaredErrorQualityParameterName = "Mean squared error (test)";
94    private const string MinTestMeanSquaredErrorQualityParameterName = "Min mean squared error (test)";
95    private const string MaxTestMeanSquaredErrorQualityParameterName = "Max mean squared error (test)";
96    private const string AverageTestMeanSquaredErrorQualityParameterName = "Average mean squared error (test)";
97    private const string BestTestMeanSquaredErrorQualityParameterName = "Best mean squared error (test)";
98
99    private const string TestAverageRelativeErrorQualityParameterName = "Average relative error (test)";
100    private const string MinTestAverageRelativeErrorQualityParameterName = "Min average relative error (test)";
101    private const string MaxTestAverageRelativeErrorQualityParameterName = "Max average relative error (test)";
102    private const string AverageTestAverageRelativeErrorQualityParameterName = "Average average relative error (test)";
103    private const string BestTestAverageRelativeErrorQualityParameterName = "Best average relative error (test)";
104
105    private const string TestRSquaredQualityParameterName = "R² (test)";
106    private const string MinTestRSquaredQualityParameterName = "Min R² (test)";
107    private const string MaxTestRSquaredQualityParameterName = "Max R² (test)";
108    private const string AverageTestRSquaredQualityParameterName = "Average R² (test)";
109    private const string BestTestRSquaredQualityParameterName = "Best R² (test)";
110
111    private const string RSquaredValuesParameterName = "R²";
112    private const string MeanSquaredErrorValuesParameterName = "Mean squared error";
113    private const string RelativeErrorValuesParameterName = "Average relative error";
114    private const string BestSolutionResultName = "Best solution (on validiation set)";
115    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
116
117    #region parameter properties
118    public ILookupParameter<IRandom> RandomParameter {
119      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
120    }
121    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
122      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
123    }
124    public OptionalValueParameter<StringValue> ConditionVariableNameParameter {
125      get { return (OptionalValueParameter<StringValue>)Parameters[ConditionVariableParameterName]; }
126    }
127    public IValueLookupParameter<ISymbolicTimeSeriesExpressionInterpreter> SymbolicExpressionTreeInterpreterParameter {
128      get { return (IValueLookupParameter<ISymbolicTimeSeriesExpressionInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
129    }
130    public IValueLookupParameter<MultiVariateDataAnalysisProblemData> ProblemDataParameter {
131      get { return (IValueLookupParameter<MultiVariateDataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
132    }
133    public ILookupParameter<ISingleObjectiveSymbolicTimeSeriesPrognosisEvaluator> EvaluatorParameter {
134      get { return (ILookupParameter<ISingleObjectiveSymbolicTimeSeriesPrognosisEvaluator>)Parameters["Evaluator"]; }
135    }
136    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
137      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
138    }
139    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
140      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
141    }
142    public IValueLookupParameter<DoubleArray> UpperEstimationLimitParameter {
143      get { return (IValueLookupParameter<DoubleArray>)Parameters[UpperEstimationLimitParameterName]; }
144    }
145    public IValueLookupParameter<DoubleArray> LowerEstimationLimitParameter {
146      get { return (IValueLookupParameter<DoubleArray>)Parameters[LowerEstimationLimitParameterName]; }
147    }
148    public IValueLookupParameter<IntValue> PredictionHorizonParameter {
149      get { return (IValueLookupParameter<IntValue>)Parameters[PredictionHorizonParameterName]; }
150    }
151    public ILookupParameter<SymbolicTimeSeriesPrognosisSolution> BestSolutionParameter {
152      get { return (ILookupParameter<SymbolicTimeSeriesPrognosisSolution>)Parameters[BestSolutionParameterName]; }
153    }
154    public ILookupParameter<IntValue> GenerationsParameter {
155      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
156    }
157    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
158      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
159    }
160    public ILookupParameter<ResultCollection> ResultsParameter {
161      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
162    }
163    public ILookupParameter<DoubleValue> BestKnownQualityParameter {
164      get { return (ILookupParameter<DoubleValue>)Parameters[BestKnownQualityParameterName]; }
165    }
166    public ILookupParameter<DataTable> VariableFrequenciesParameter {
167      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
168    }
169    public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
170      get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
171    }
172    public ILookupParameter<BoolValue> MaximizationParameter {
173      get { return (ILookupParameter<BoolValue>)Parameters["Maximization"]; }
174    }
175    #endregion
176    #region properties
177    public IRandom Random {
178      get { return RandomParameter.ActualValue; }
179    }
180    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTree {
181      get { return SymbolicExpressionTreeParameter.ActualValue; }
182    }
183    public ISymbolicTimeSeriesExpressionInterpreter SymbolicExpressionTreeInterpreter {
184      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
185    }
186    public MultiVariateDataAnalysisProblemData ProblemData {
187      get { return ProblemDataParameter.ActualValue; }
188    }
189    public IntValue ValidiationSamplesStart {
190      get { return ValidationSamplesStartParameter.ActualValue; }
191    }
192    public IntValue ValidationSamplesEnd {
193      get { return ValidationSamplesEndParameter.ActualValue; }
194    }
195    public DoubleArray UpperEstimationLimit {
196      get { return UpperEstimationLimitParameter.ActualValue; }
197    }
198    public DoubleArray LowerEstimationLimit {
199      get { return LowerEstimationLimitParameter.ActualValue; }
200    }
201    public IntValue PredictionHorizon {
202      get { return PredictionHorizonParameter.ActualValue; }
203    }
204    public StringValue ConditionVariableName {
205      get { return ConditionVariableNameParameter.Value; }
206    }
207    public ResultCollection Results {
208      get { return ResultsParameter.ActualValue; }
209    }
210    public DataTable VariableFrequencies {
211      get { return VariableFrequenciesParameter.ActualValue; }
212    }
213    public IntValue Generations {
214      get { return GenerationsParameter.ActualValue; }
215    }
216    public PercentValue RelativeNumberOfEvaluatedSamples {
217      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
218    }
219    public BoolValue Maximization {
220      get { return MaximizationParameter.ActualValue; }
221    }
222    #endregion
223
224    public ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer()
225      : base() {
226      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "A random number generator."));
227      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
228      Parameters.Add(new OptionalValueParameter<StringValue>(ConditionVariableParameterName, "The name of the condition variable indicating if a row should be considered for evaluation or not."));
229      Parameters.Add(new ValueLookupParameter<ISymbolicTimeSeriesExpressionInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
230      Parameters.Add(new ValueLookupParameter<MultiVariateDataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
231      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
232      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
233      Parameters.Add(new ValueLookupParameter<IntValue>(PredictionHorizonParameterName, "The number of time steps for which to create a forecast."));
234      Parameters.Add(new LookupParameter<ISingleObjectiveSymbolicTimeSeriesPrognosisEvaluator>("Evaluator", ""));
235      Parameters.Add(new ValueLookupParameter<DoubleArray>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
236      Parameters.Add(new ValueLookupParameter<DoubleArray>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
237      Parameters.Add(new LookupParameter<SymbolicTimeSeriesPrognosisSolution>(BestSolutionParameterName, "The best symbolic time series prognosis solution."));
238      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
239      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
240      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
241      Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set."));
242      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts"));
243      Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
244      Parameters.Add(new LookupParameter<BoolValue>("Maximization"));
245    }
246
247    [StorableConstructor]
248    private ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer(bool deserializing) : base(deserializing) { }
249
250    [StorableHook(Persistence.Default.CompositeSerializers.Storable.HookType.AfterDeserialization)]
251    private void Initialize() {
252      //if (!Parameters.ContainsKey("Evaluator")) {
253      //  Parameters.Add(new LookupParameter<ISingleObjectiveSymbolicTimeSeriesPrognosisEvaluator>("Evaluator", ""));
254      //}
255    }
256
257    public override IOperation Apply() {
258      var trees = SymbolicExpressionTree;
259
260      ISingleObjectiveSymbolicTimeSeriesPrognosisEvaluator evaluator = EvaluatorParameter.ActualValue;
261
262      int trainingStart = ProblemData.TrainingSamplesStart.Value;
263      int trainingEnd = ProblemData.TrainingSamplesEnd.Value;
264      int testStart = ProblemData.TestSamplesStart.Value;
265      int testEnd = ProblemData.TestSamplesEnd.Value;
266
267      #region validation best model
268      int validationStart = ValidiationSamplesStart.Value;
269      int validationEnd = ValidationSamplesEnd.Value;
270      int rowCount = (int)Math.Ceiling((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
271      IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(Random.Next(), validationStart, validationEnd, rowCount);
272      double bestValidationQuality = Maximization.Value ? double.MinValue : double.MaxValue;
273      SymbolicExpressionTree bestTree = null;
274      string conditionalVariableName = ConditionVariableName == null ? null : ConditionVariableName.Value;
275      if (conditionalVariableName != null) {
276        rows = from row in rows
277               where !ProblemData.Dataset[conditionalVariableName, row].IsAlmost(0.0)
278               select row;
279      }
280
281      foreach (var tree in trees) {
282        double validationQuality;
283        validationQuality = evaluator.Evaluate(tree, ProblemData,
284          SymbolicExpressionTreeInterpreter, rows, PredictionHorizon.Value, LowerEstimationLimit, UpperEstimationLimit);
285        if ((Maximization.Value && validationQuality > bestValidationQuality) ||
286           (!Maximization.Value && validationQuality < bestValidationQuality)) {
287          bestValidationQuality = validationQuality;
288          bestTree = tree;
289        }
290      }
291
292
293      if (BestSolutionQualityParameter.ActualValue == null ||
294        (Maximization.Value && BestSolutionQualityParameter.ActualValue.Value < bestValidationQuality) ||
295        (!Maximization.Value && BestSolutionQualityParameter.ActualValue.Value > bestValidationQuality)) {
296        var scaledTree = GetScaledTree(bestTree);
297        var model = new SymbolicTimeSeriesPrognosisModel((ISymbolicTimeSeriesExpressionInterpreter)SymbolicExpressionTreeInterpreter.Clone(), scaledTree);
298        model.Name = "Time series prognosis model";
299        model.Description = "Best solution on validation partition found over the whole run.";
300
301        var solution = new SymbolicTimeSeriesPrognosisSolution((MultiVariateDataAnalysisProblemData)ProblemData.Clone(), model, PredictionHorizon.Value, conditionalVariableName, LowerEstimationLimit.ToArray(), UpperEstimationLimit.ToArray());
302        solution.Name = BestSolutionParameterName;
303        solution.Description = "Best solution on validation partition found over the whole run.";
304
305        BestSolutionParameter.ActualValue = solution;
306        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestValidationQuality);
307
308        // BestSymbolicTimeSeriesPrognosisSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies);
309      }
310
311      if (!Results.ContainsKey(BestSolutionQualityValuesParameterName)) {
312        Results.Add(new Result(BestSolutionResultName, BestSolutionParameter.ActualValue));
313        Results.Add(new Result(BestSolutionQualityValuesParameterName, new DataTable(BestSolutionQualityValuesParameterName, BestSolutionQualityValuesParameterName)));
314        Results.Add(new Result(BestSolutionQualityParameterName, new DoubleValue()));
315        Results.Add(new Result(CurrentBestValidationQualityParameterName, new DoubleValue()));
316      }
317      Results[BestSolutionResultName].Value = BestSolutionParameter.ActualValue;
318      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
319      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestValidationQuality);
320
321      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
322      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
323      AddValue(validationValues, bestValidationQuality, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
324      #endregion
325      return base.Apply();
326    }
327
328    private SymbolicExpressionTree GetScaledTree(SymbolicExpressionTree tree) {
329      double[] alpha, beta;
330      int trainingStart = ProblemData.TrainingSamplesStart.Value;
331      int trainingEnd = ProblemData.TrainingSamplesEnd.Value;
332      IEnumerable<int> trainingRows = Enumerable.Range(trainingStart, trainingEnd - trainingStart);
333      string conditionalVariableName = ConditionVariableName == null ? null : ConditionVariableName.Value;
334      if (conditionalVariableName != null) {
335        trainingRows = from row in trainingRows
336                       where !ProblemData.Dataset[conditionalVariableName, row].IsAlmost(0.0)
337                       select row;
338      }
339
340      // calculate scaling parameters based on one-step-predictions
341      IEnumerable<string> selectedTargetVariables = from item in ProblemData.TargetVariables
342                                                    where ProblemData.TargetVariables.ItemChecked(item)
343                                                    select item.Value;
344      int dimension = selectedTargetVariables.Count();
345
346      IEnumerable<double[]> oneStepPredictions =
347        SymbolicExpressionTreeInterpreter.GetSymbolicExpressionTreeValues(tree, ProblemData.Dataset, selectedTargetVariables, trainingRows, 1);
348      IEnumerable<double[]> originalValues = from row in trainingRows
349                                             select (from targetVariable in selectedTargetVariables
350                                                     select ProblemData.Dataset[targetVariable, row]).ToArray();
351      alpha = new double[dimension];
352      beta = new double[dimension];
353
354      SymbolicTimeSeriesPrognosisScaledNormalizedMseEvaluator.CalculateScalingParameters(originalValues, oneStepPredictions, ref beta, ref alpha);
355
356      // scale tree for solution
357      return SymbolicVectorRegressionSolutionLinearScaler.Scale(tree, beta, alpha);
358    }
359
360
361
362    private static void AddValue(DataTable table, double data, string name, string description) {
363      DataRow row;
364      table.Rows.TryGetValue(name, out row);
365      if (row == null) {
366        row = new DataRow(name, description);
367        row.Values.Add(data);
368        table.Rows.Add(row);
369      } else {
370        row.Values.Add(data);
371      }
372    }
373  }
374}
Note: See TracBrowser for help on using the repository browser.