Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis/3.3/Symbolic/Analyzer/ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer.cs @ 4457

Last change on this file since 4457 was 4457, checked in by gkronber, 14 years ago

made DataAnalysisProblem and DataAnalysisProblemData and SymbolicTimeSeriesPrognosisSolution savable and added field for conditional evaluation to Symbolic/SymbolicTimeSeriesPrognosisSolution. #1142

File size: 20.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Linq;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
32using HeuristicLab.Problems.DataAnalysis.Symbolic;
33using System.Collections.Generic;
34using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
35using HeuristicLab.Problems.DataAnalysis;
36using System;
37
38using HeuristicLab.Problems.DataAnalysis.Evaluators;
39using HeuristicLab.Problems.DataAnalysis.Regression;
40using HeuristicLab.Analysis;
41using HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis.Symbolic.Evaluators;
42using HeuristicLab.Problems.DataAnalysis.MultiVariate.Regression.Symbolic;
43using HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis.Symbolic.Interfaces;
44
45namespace HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis.Symbolic.Analyzers {
46  /// <summary>
47  /// An operator that analyzes the validation best scaled symbolic time series prognosis solution.
48  /// </summary>
49  [Item("ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic time series prognosis solution.")]
50  [StorableClass]
51  public sealed class ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer : SingleSuccessorOperator, IAnalyzer {
52    private const string RandomParameterName = "Random";
53    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
54    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
55    private const string ProblemDataParameterName = "ProblemData";
56    private const string ValidationSamplesStartParameterName = "SamplesStart";
57    private const string ValidationSamplesEndParameterName = "SamplesEnd";
58    private const string QualityParameterName = "Quality";
59    private const string ScaledQualityParameterName = "ScaledQuality";
60    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
61    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
62    private const string PredictionHorizonParameterName = "PredictionHorizon";
63    private const string ConditionVariableParameterName = "ConditionVariableName";
64    private const string AlphaParameterName = "Alpha";
65    private const string BetaParameterName = "Beta";
66    private const string BestSolutionParameterName = "Best solution (validation)";
67    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
68    private const string CurrentBestValidationQualityParameterName = "Current best validation quality";
69    private const string BestSolutionQualityValuesParameterName = "Validation Quality";
70    private const string ResultsParameterName = "Results";
71    private const string VariableFrequenciesParameterName = "VariableFrequencies";
72    private const string BestKnownQualityParameterName = "BestKnownQuality";
73    private const string GenerationsParameterName = "Generations";
74
75    private const string TrainingMeanSquaredErrorQualityParameterName = "Mean squared error (training)";
76    private const string MinTrainingMeanSquaredErrorQualityParameterName = "Min mean squared error (training)";
77    private const string MaxTrainingMeanSquaredErrorQualityParameterName = "Max mean squared error (training)";
78    private const string AverageTrainingMeanSquaredErrorQualityParameterName = "Average mean squared error (training)";
79    private const string BestTrainingMeanSquaredErrorQualityParameterName = "Best mean squared error (training)";
80
81    private const string TrainingAverageRelativeErrorQualityParameterName = "Average relative error (training)";
82    private const string MinTrainingAverageRelativeErrorQualityParameterName = "Min average relative error (training)";
83    private const string MaxTrainingAverageRelativeErrorQualityParameterName = "Max average relative error (training)";
84    private const string AverageTrainingAverageRelativeErrorQualityParameterName = "Average average relative error (training)";
85    private const string BestTrainingAverageRelativeErrorQualityParameterName = "Best average relative error (training)";
86
87    private const string TrainingRSquaredQualityParameterName = "R² (training)";
88    private const string MinTrainingRSquaredQualityParameterName = "Min R² (training)";
89    private const string MaxTrainingRSquaredQualityParameterName = "Max R² (training)";
90    private const string AverageTrainingRSquaredQualityParameterName = "Average R² (training)";
91    private const string BestTrainingRSquaredQualityParameterName = "Best R² (training)";
92
93    private const string TestMeanSquaredErrorQualityParameterName = "Mean squared error (test)";
94    private const string MinTestMeanSquaredErrorQualityParameterName = "Min mean squared error (test)";
95    private const string MaxTestMeanSquaredErrorQualityParameterName = "Max mean squared error (test)";
96    private const string AverageTestMeanSquaredErrorQualityParameterName = "Average mean squared error (test)";
97    private const string BestTestMeanSquaredErrorQualityParameterName = "Best mean squared error (test)";
98
99    private const string TestAverageRelativeErrorQualityParameterName = "Average relative error (test)";
100    private const string MinTestAverageRelativeErrorQualityParameterName = "Min average relative error (test)";
101    private const string MaxTestAverageRelativeErrorQualityParameterName = "Max average relative error (test)";
102    private const string AverageTestAverageRelativeErrorQualityParameterName = "Average average relative error (test)";
103    private const string BestTestAverageRelativeErrorQualityParameterName = "Best average relative error (test)";
104
105    private const string TestRSquaredQualityParameterName = "R² (test)";
106    private const string MinTestRSquaredQualityParameterName = "Min R² (test)";
107    private const string MaxTestRSquaredQualityParameterName = "Max R² (test)";
108    private const string AverageTestRSquaredQualityParameterName = "Average R² (test)";
109    private const string BestTestRSquaredQualityParameterName = "Best R² (test)";
110
111    private const string RSquaredValuesParameterName = "R²";
112    private const string MeanSquaredErrorValuesParameterName = "Mean squared error";
113    private const string RelativeErrorValuesParameterName = "Average relative error";
114    private const string BestSolutionResultName = "Best solution (on validiation set)";
115    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
116
117    #region parameter properties
118    public ILookupParameter<IRandom> RandomParameter {
119      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
120    }
121    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
122      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
123    }
124    public OptionalValueParameter<StringValue> ConditionVariableNameParameter {
125      get { return (OptionalValueParameter<StringValue>)Parameters[ConditionVariableParameterName]; }
126    }
127    public ScopeTreeLookupParameter<DoubleArray> AlphaParameter {
128      get { return (ScopeTreeLookupParameter<DoubleArray>)Parameters[AlphaParameterName]; }
129    }
130    public ScopeTreeLookupParameter<DoubleArray> BetaParameter {
131      get { return (ScopeTreeLookupParameter<DoubleArray>)Parameters[BetaParameterName]; }
132    }
133    public IValueLookupParameter<ISymbolicTimeSeriesExpressionInterpreter> SymbolicExpressionTreeInterpreterParameter {
134      get { return (IValueLookupParameter<ISymbolicTimeSeriesExpressionInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
135    }
136    public IValueLookupParameter<MultiVariateDataAnalysisProblemData> ProblemDataParameter {
137      get { return (IValueLookupParameter<MultiVariateDataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
138    }
139    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
140      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
141    }
142    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
143      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
144    }
145    public IValueLookupParameter<DoubleArray> UpperEstimationLimitParameter {
146      get { return (IValueLookupParameter<DoubleArray>)Parameters[UpperEstimationLimitParameterName]; }
147    }
148    public IValueLookupParameter<DoubleArray> LowerEstimationLimitParameter {
149      get { return (IValueLookupParameter<DoubleArray>)Parameters[LowerEstimationLimitParameterName]; }
150    }
151    public IValueLookupParameter<IntValue> PredictionHorizonParameter {
152      get { return (IValueLookupParameter<IntValue>)Parameters[PredictionHorizonParameterName]; }
153    }
154    public ILookupParameter<SymbolicTimeSeriesPrognosisSolution> BestSolutionParameter {
155      get { return (ILookupParameter<SymbolicTimeSeriesPrognosisSolution>)Parameters[BestSolutionParameterName]; }
156    }
157    public ILookupParameter<IntValue> GenerationsParameter {
158      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
159    }
160    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
161      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
162    }
163    public ILookupParameter<ResultCollection> ResultsParameter {
164      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
165    }
166    public ILookupParameter<DoubleValue> BestKnownQualityParameter {
167      get { return (ILookupParameter<DoubleValue>)Parameters[BestKnownQualityParameterName]; }
168    }
169    public ILookupParameter<DataTable> VariableFrequenciesParameter {
170      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
171    }
172    public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
173      get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
174    }
175    #endregion
176    #region properties
177    public IRandom Random {
178      get { return RandomParameter.ActualValue; }
179    }
180    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTree {
181      get { return SymbolicExpressionTreeParameter.ActualValue; }
182    }
183    public ItemArray<DoubleArray> Alpha {
184      get { return AlphaParameter.ActualValue; }
185    }
186    public ItemArray<DoubleArray> Beta {
187      get { return BetaParameter.ActualValue; }
188    }
189    public ISymbolicTimeSeriesExpressionInterpreter SymbolicExpressionTreeInterpreter {
190      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
191    }
192    public MultiVariateDataAnalysisProblemData ProblemData {
193      get { return ProblemDataParameter.ActualValue; }
194    }
195    public IntValue ValidiationSamplesStart {
196      get { return ValidationSamplesStartParameter.ActualValue; }
197    }
198    public IntValue ValidationSamplesEnd {
199      get { return ValidationSamplesEndParameter.ActualValue; }
200    }
201    public DoubleArray UpperEstimationLimit {
202      get { return UpperEstimationLimitParameter.ActualValue; }
203    }
204    public DoubleArray LowerEstimationLimit {
205      get { return LowerEstimationLimitParameter.ActualValue; }
206    }
207    public IntValue PredictionHorizon {
208      get { return PredictionHorizonParameter.ActualValue; }
209    }
210    public StringValue ConditionVariableName {
211      get { return ConditionVariableNameParameter.Value; }
212    }
213    public ResultCollection Results {
214      get { return ResultsParameter.ActualValue; }
215    }
216    public DataTable VariableFrequencies {
217      get { return VariableFrequenciesParameter.ActualValue; }
218    }
219    public IntValue Generations {
220      get { return GenerationsParameter.ActualValue; }
221    }
222    public PercentValue RelativeNumberOfEvaluatedSamples {
223      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
224    }
225    #endregion
226
227    public ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer()
228      : base() {
229      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "A random number generator."));
230      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
231      Parameters.Add(new OptionalValueParameter<StringValue>(ConditionVariableParameterName, "The name of the condition variable indicating if a row should be considered for evaluation or not."));
232      Parameters.Add(new ScopeTreeLookupParameter<DoubleArray>(AlphaParameterName, "The alpha parameter for linear scaling."));
233      Parameters.Add(new ScopeTreeLookupParameter<DoubleArray>(BetaParameterName, "The beta parameter for linear scaling."));
234      Parameters.Add(new ValueLookupParameter<ISymbolicTimeSeriesExpressionInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
235      Parameters.Add(new ValueLookupParameter<MultiVariateDataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
236      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
237      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
238      Parameters.Add(new ValueLookupParameter<IntValue>(PredictionHorizonParameterName, "The number of time steps for which to create a forecast."));
239      Parameters.Add(new ValueLookupParameter<DoubleArray>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
240      Parameters.Add(new ValueLookupParameter<DoubleArray>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
241      Parameters.Add(new LookupParameter<SymbolicTimeSeriesPrognosisSolution>(BestSolutionParameterName, "The best symbolic time series prognosis solution."));
242      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
243      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
244      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
245      Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set."));
246      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts"));
247      Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
248
249    }
250
251    [StorableConstructor]
252    private ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer(bool deserializing) : base(deserializing) { }
253
254    public override IOperation Apply() {
255      var alphas = Alpha;
256      var betas = Beta;
257      var trees = SymbolicExpressionTree;
258
259      IEnumerable<SymbolicExpressionTree> scaledTrees;
260      if (alphas.Length == trees.Length) {
261        scaledTrees = from i in Enumerable.Range(0, trees.Length)
262                      select SymbolicVectorRegressionSolutionLinearScaler.Scale(trees[i], betas[i].ToArray(), alphas[i].ToArray());
263      } else {
264        scaledTrees = trees;
265      }
266
267      int trainingStart = ProblemData.TrainingSamplesStart.Value;
268      int trainingEnd = ProblemData.TrainingSamplesEnd.Value;
269      int testStart = ProblemData.TestSamplesStart.Value;
270      int testEnd = ProblemData.TestSamplesEnd.Value;
271
272      #region validation best model
273      int validationStart = ValidiationSamplesStart.Value;
274      int validationEnd = ValidationSamplesEnd.Value;
275      int rowCount = (int)Math.Ceiling((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
276      IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(Random.Next(), validationStart, validationEnd, rowCount);
277      double bestValidationNmse = double.MaxValue;
278      SymbolicExpressionTree bestTree = null;
279      string conditionalVariableName = ConditionVariableName == null ? null : ConditionVariableName.Value;
280      foreach (var tree in scaledTrees) {
281        double validationNmse;
282        validationNmse = SymbolicTimeSeriesPrognosisNormalizedMseEvaluator.Evaluate(tree, ProblemData,
283          SymbolicExpressionTreeInterpreter, conditionalVariableName,
284          rows, PredictionHorizon.Value, LowerEstimationLimit, UpperEstimationLimit);
285        if (validationNmse < bestValidationNmse) {
286          bestValidationNmse = validationNmse;
287          bestTree = tree;
288        }
289      }
290
291
292      if (BestSolutionQualityParameter.ActualValue == null || BestSolutionQualityParameter.ActualValue.Value > bestValidationNmse) {
293        var model = new SymbolicTimeSeriesPrognosisModel((ISymbolicTimeSeriesExpressionInterpreter)SymbolicExpressionTreeInterpreter.Clone(), bestTree);
294        model.Name = "Time series prognosis model";
295        model.Description = "Best solution on validation partition found over the whole run.";
296
297        var solution = new SymbolicTimeSeriesPrognosisSolution(ProblemData, model, PredictionHorizon.Value, conditionalVariableName);
298        solution.Name = BestSolutionParameterName;
299        solution.Description = "Best solution on validation partition found over the whole run.";
300
301        BestSolutionParameter.ActualValue = solution;
302        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestValidationNmse);
303
304        // BestSymbolicTimeSeriesPrognosisSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies);
305      }
306
307      if (!Results.ContainsKey(BestSolutionQualityValuesParameterName)) {
308        Results.Add(new Result(BestSolutionResultName, BestSolutionParameter.ActualValue));
309        Results.Add(new Result(BestSolutionQualityValuesParameterName, new DataTable(BestSolutionQualityValuesParameterName, BestSolutionQualityValuesParameterName)));
310        Results.Add(new Result(BestSolutionQualityParameterName, new DoubleValue()));
311        Results.Add(new Result(CurrentBestValidationQualityParameterName, new DoubleValue()));
312      }
313      Results[BestSolutionResultName].Value = BestSolutionParameter.ActualValue;
314      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
315      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestValidationNmse);
316
317      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
318      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
319      AddValue(validationValues, bestValidationNmse, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
320      #endregion
321      return base.Apply();
322    }
323
324    [StorableHook(HookType.AfterDeserialization)]
325    private void Initialize() {
326    }
327
328    private static void AddValue(DataTable table, double data, string name, string description) {
329      DataRow row;
330      table.Rows.TryGetValue(name, out row);
331      if (row == null) {
332        row = new DataRow(name, description);
333        row.Values.Add(data);
334        table.Rows.Add(row);
335      } else {
336        row.Values.Add(data);
337      }
338    }
339  }
340}
Note: See TracBrowser for help on using the repository browser.