Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis/3.3/Symbolic/Analyzer/ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer.cs @ 4113

Last change on this file since 4113 was 4113, checked in by gkronber, 14 years ago

Added plugin for time series prognosis. #1081

File size: 20.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Linq;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
32using HeuristicLab.Problems.DataAnalysis.Symbolic;
33using System.Collections.Generic;
34using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
35using HeuristicLab.Problems.DataAnalysis;
36using System;
37
38using HeuristicLab.Problems.DataAnalysis.Evaluators;
39using HeuristicLab.Problems.DataAnalysis.Regression;
40using HeuristicLab.Analysis;
41using HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis.Symbolic.Evaluators;
42using HeuristicLab.Problems.DataAnalysis.MultiVariate.Regression.Symbolic;
43using HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis.Symbolic.Interfaces;
44
45namespace HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis.Symbolic.Analyzers {
46  /// <summary>
47  /// An operator that analyzes the validation best scaled symbolic time series prognosis solution.
48  /// </summary>
49  [Item("ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic time series prognosis solution.")]
50  [StorableClass]
51  public sealed class ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer : SingleSuccessorOperator, IAnalyzer {
52    private const string RandomParameterName = "Random";
53    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
54    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
55    private const string ProblemDataParameterName = "ProblemData";
56    private const string ValidationSamplesStartParameterName = "SamplesStart";
57    private const string ValidationSamplesEndParameterName = "SamplesEnd";
58    private const string QualityParameterName = "Quality";
59    private const string ScaledQualityParameterName = "ScaledQuality";
60    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
61    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
62    private const string PredictionHorizonParameterName = "PredictionHorizon";
63    private const string ConditionVariableParameterName = "ConditionVariableName";
64    private const string AlphaParameterName = "Alpha";
65    private const string BetaParameterName = "Beta";
66    private const string BestSolutionParameterName = "Best solution (validation)";
67    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
68    private const string CurrentBestValidationQualityParameterName = "Current best validation quality";
69    private const string BestSolutionQualityValuesParameterName = "Validation Quality";
70    private const string ResultsParameterName = "Results";
71    private const string VariableFrequenciesParameterName = "VariableFrequencies";
72    private const string BestKnownQualityParameterName = "BestKnownQuality";
73    private const string GenerationsParameterName = "Generations";
74
75    private const string TrainingMeanSquaredErrorQualityParameterName = "Mean squared error (training)";
76    private const string MinTrainingMeanSquaredErrorQualityParameterName = "Min mean squared error (training)";
77    private const string MaxTrainingMeanSquaredErrorQualityParameterName = "Max mean squared error (training)";
78    private const string AverageTrainingMeanSquaredErrorQualityParameterName = "Average mean squared error (training)";
79    private const string BestTrainingMeanSquaredErrorQualityParameterName = "Best mean squared error (training)";
80
81    private const string TrainingAverageRelativeErrorQualityParameterName = "Average relative error (training)";
82    private const string MinTrainingAverageRelativeErrorQualityParameterName = "Min average relative error (training)";
83    private const string MaxTrainingAverageRelativeErrorQualityParameterName = "Max average relative error (training)";
84    private const string AverageTrainingAverageRelativeErrorQualityParameterName = "Average average relative error (training)";
85    private const string BestTrainingAverageRelativeErrorQualityParameterName = "Best average relative error (training)";
86
87    private const string TrainingRSquaredQualityParameterName = "R² (training)";
88    private const string MinTrainingRSquaredQualityParameterName = "Min R² (training)";
89    private const string MaxTrainingRSquaredQualityParameterName = "Max R² (training)";
90    private const string AverageTrainingRSquaredQualityParameterName = "Average R² (training)";
91    private const string BestTrainingRSquaredQualityParameterName = "Best R² (training)";
92
93    private const string TestMeanSquaredErrorQualityParameterName = "Mean squared error (test)";
94    private const string MinTestMeanSquaredErrorQualityParameterName = "Min mean squared error (test)";
95    private const string MaxTestMeanSquaredErrorQualityParameterName = "Max mean squared error (test)";
96    private const string AverageTestMeanSquaredErrorQualityParameterName = "Average mean squared error (test)";
97    private const string BestTestMeanSquaredErrorQualityParameterName = "Best mean squared error (test)";
98
99    private const string TestAverageRelativeErrorQualityParameterName = "Average relative error (test)";
100    private const string MinTestAverageRelativeErrorQualityParameterName = "Min average relative error (test)";
101    private const string MaxTestAverageRelativeErrorQualityParameterName = "Max average relative error (test)";
102    private const string AverageTestAverageRelativeErrorQualityParameterName = "Average average relative error (test)";
103    private const string BestTestAverageRelativeErrorQualityParameterName = "Best average relative error (test)";
104
105    private const string TestRSquaredQualityParameterName = "R² (test)";
106    private const string MinTestRSquaredQualityParameterName = "Min R² (test)";
107    private const string MaxTestRSquaredQualityParameterName = "Max R² (test)";
108    private const string AverageTestRSquaredQualityParameterName = "Average R² (test)";
109    private const string BestTestRSquaredQualityParameterName = "Best R² (test)";
110
111    private const string RSquaredValuesParameterName = "R²";
112    private const string MeanSquaredErrorValuesParameterName = "Mean squared error";
113    private const string RelativeErrorValuesParameterName = "Average relative error";
114    private const string BestSolutionResultName = "Best solution (on validiation set)";
115    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
116
117    #region parameter properties
118    public ILookupParameter<IRandom> RandomParameter {
119      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
120    }
121    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
122      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
123    }
124    public OptionalValueParameter<StringValue> ConditionVariableNameParameter {
125      get { return (OptionalValueParameter<StringValue>)Parameters[ConditionVariableParameterName]; }
126    }
127    public ScopeTreeLookupParameter<DoubleArray> AlphaParameter {
128      get { return (ScopeTreeLookupParameter<DoubleArray>)Parameters[AlphaParameterName]; }
129    }
130    public ScopeTreeLookupParameter<DoubleArray> BetaParameter {
131      get { return (ScopeTreeLookupParameter<DoubleArray>)Parameters[BetaParameterName]; }
132    }
133    public IValueLookupParameter<ISymbolicTimeSeriesExpressionInterpreter> SymbolicExpressionTreeInterpreterParameter {
134      get { return (IValueLookupParameter<ISymbolicTimeSeriesExpressionInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
135    }
136    public IValueLookupParameter<MultiVariateDataAnalysisProblemData> ProblemDataParameter {
137      get { return (IValueLookupParameter<MultiVariateDataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
138    }
139    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
140      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
141    }
142    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
143      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
144    }
145    public IValueLookupParameter<DoubleArray> UpperEstimationLimitParameter {
146      get { return (IValueLookupParameter<DoubleArray>)Parameters[UpperEstimationLimitParameterName]; }
147    }
148    public IValueLookupParameter<DoubleArray> LowerEstimationLimitParameter {
149      get { return (IValueLookupParameter<DoubleArray>)Parameters[LowerEstimationLimitParameterName]; }
150    }
151    public IValueLookupParameter<IntValue> PredictionHorizonParameter {
152      get { return (IValueLookupParameter<IntValue>)Parameters[PredictionHorizonParameterName]; }
153    }
154    public ILookupParameter<SymbolicExpressionTree> BestSolutionParameter {
155      get { return (ILookupParameter<SymbolicExpressionTree>)Parameters[BestSolutionParameterName]; }
156    }
157    public ILookupParameter<IntValue> GenerationsParameter {
158      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
159    }
160    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
161      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
162    }
163    public ILookupParameter<ResultCollection> ResultsParameter {
164      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
165    }
166    public ILookupParameter<DoubleValue> BestKnownQualityParameter {
167      get { return (ILookupParameter<DoubleValue>)Parameters[BestKnownQualityParameterName]; }
168    }
169    public ILookupParameter<DataTable> VariableFrequenciesParameter {
170      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
171    }
172    public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
173      get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
174    }
175    #endregion
176    #region properties
177    public IRandom Random {
178      get { return RandomParameter.ActualValue; }
179    }
180    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTree {
181      get { return SymbolicExpressionTreeParameter.ActualValue; }
182    }
183    public ItemArray<DoubleArray> Alpha {
184      get { return AlphaParameter.ActualValue; }
185    }
186    public ItemArray<DoubleArray> Beta {
187      get { return BetaParameter.ActualValue; }
188    }
189    public ISymbolicTimeSeriesExpressionInterpreter SymbolicExpressionTreeInterpreter {
190      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
191    }
192    public MultiVariateDataAnalysisProblemData ProblemData {
193      get { return ProblemDataParameter.ActualValue; }
194    }
195    public IntValue ValidiationSamplesStart {
196      get { return ValidationSamplesStartParameter.ActualValue; }
197    }
198    public IntValue ValidationSamplesEnd {
199      get { return ValidationSamplesEndParameter.ActualValue; }
200    }
201    public DoubleArray UpperEstimationLimit {
202      get { return UpperEstimationLimitParameter.ActualValue; }
203    }
204    public DoubleArray LowerEstimationLimit {
205      get { return LowerEstimationLimitParameter.ActualValue; }
206    }
207    public IntValue PredictionHorizon {
208      get { return PredictionHorizonParameter.ActualValue; }
209    }
210    public StringValue ConditionVariableName {
211      get { return ConditionVariableNameParameter.Value; }
212    }
213    public ResultCollection Results {
214      get { return ResultsParameter.ActualValue; }
215    }
216    public DataTable VariableFrequencies {
217      get { return VariableFrequenciesParameter.ActualValue; }
218    }
219    public IntValue Generations {
220      get { return GenerationsParameter.ActualValue; }
221    }
222    public PercentValue RelativeNumberOfEvaluatedSamples {
223      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
224    }
225    #endregion
226
227    public ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer()
228      : base() {
229      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "A random number generator."));
230      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
231      Parameters.Add(new OptionalValueParameter<StringValue>(ConditionVariableParameterName, "The name of the condition variable indicating if a row should be considered for evaluation or not."));
232      Parameters.Add(new ScopeTreeLookupParameter<DoubleArray>(AlphaParameterName, "The alpha parameter for linear scaling."));
233      Parameters.Add(new ScopeTreeLookupParameter<DoubleArray>(BetaParameterName, "The beta parameter for linear scaling."));
234      Parameters.Add(new ValueLookupParameter<ISymbolicTimeSeriesExpressionInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
235      Parameters.Add(new ValueLookupParameter<MultiVariateDataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
236      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
237      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
238      Parameters.Add(new ValueLookupParameter<IntValue>(PredictionHorizonParameterName, "The number of time steps for which to create a forecast."));
239      Parameters.Add(new ValueLookupParameter<DoubleArray>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
240      Parameters.Add(new ValueLookupParameter<DoubleArray>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
241      Parameters.Add(new LookupParameter<SymbolicExpressionTree>(BestSolutionParameterName, "The best symbolic time series prognosis solution."));
242      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
243      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
244      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
245      Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set."));
246      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts"));
247      Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
248
249    }
250
251    [StorableConstructor]
252    private ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer(bool deserializing) : base(deserializing) { }
253
254    public override IOperation Apply() {
255      var alphas = Alpha;
256      var betas = Beta;
257      var trees = SymbolicExpressionTree;
258
259      IEnumerable<SymbolicExpressionTree> scaledTrees;
260      if (alphas.Length == trees.Length) {
261        scaledTrees = from i in Enumerable.Range(0, trees.Length)
262                      select SymbolicVectorRegressionSolutionLinearScaler.Scale(trees[i], betas[i].ToArray(), alphas[i].ToArray());
263      } else {
264        scaledTrees = trees;
265      }
266
267      int trainingStart = ProblemData.TrainingSamplesStart.Value;
268      int trainingEnd = ProblemData.TrainingSamplesEnd.Value;
269      int testStart = ProblemData.TestSamplesStart.Value;
270      int testEnd = ProblemData.TestSamplesEnd.Value;
271
272      #region validation best model
273      int validationStart = ValidiationSamplesStart.Value;
274      int validationEnd = ValidationSamplesEnd.Value;
275      int rowCount = (int)Math.Ceiling((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
276      IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers((uint)Random.Next(), validationStart, validationEnd, rowCount);
277      double bestValidationNmse = double.MaxValue;
278      SymbolicExpressionTree bestTree = null;
279      string conditionalVariableName = ConditionVariableName == null ? null : ConditionVariableName.Value;
280      foreach (var tree in scaledTrees) {
281        double validationNmse;
282        validationNmse = SymbolicTimeSeriesPrognosisNormalizedMseEvaluator.Evaluate(tree, ProblemData,
283          SymbolicExpressionTreeInterpreter, conditionalVariableName,
284          rows, PredictionHorizon.Value, LowerEstimationLimit, UpperEstimationLimit);
285        if (validationNmse < bestValidationNmse) {
286          bestValidationNmse = validationNmse;
287          bestTree = tree;
288        }
289      }
290
291
292      if (BestSolutionQualityParameter.ActualValue == null || BestSolutionQualityParameter.ActualValue.Value > bestValidationNmse) {
293        var solution = bestTree;
294        //solution.Name = BestSolutionParameterName;
295        //solution.Description = "Best solution on validation partition found over the whole run.";
296
297        BestSolutionParameter.ActualValue = solution;
298        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestValidationNmse);
299
300        // BestSymbolicTimeSeriesPrognosisSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies);
301      }
302
303      if (!Results.ContainsKey(BestSolutionQualityValuesParameterName)) {
304        Results.Add(new Result(BestSolutionResultName, BestSolutionParameter.ActualValue));
305        Results.Add(new Result(BestSolutionQualityValuesParameterName, new DataTable(BestSolutionQualityValuesParameterName, BestSolutionQualityValuesParameterName)));
306        Results.Add(new Result(BestSolutionQualityParameterName, new DoubleValue()));
307        Results.Add(new Result(CurrentBestValidationQualityParameterName, new DoubleValue()));
308      }
309      Results[BestSolutionResultName].Value = BestSolutionParameter.ActualValue;
310      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
311      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestValidationNmse);
312
313      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
314      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
315      AddValue(validationValues, bestValidationNmse, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
316      #endregion
317      return base.Apply();
318    }
319
320    [StorableHook(HookType.AfterDeserialization)]
321    private void Initialize() {
322    }
323
324    private static void AddValue(DataTable table, double data, string name, string description) {
325      DataRow row;
326      table.Rows.TryGetValue(name, out row);
327      if (row == null) {
328        row = new DataRow(name, description);
329        row.Values.Add(data);
330        table.Rows.Add(row);
331      } else {
332        row.Values.Add(data);
333      }
334    }
335  }
336}
Note: See TracBrowser for help on using the repository browser.