Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis/3.3/Symbolic/Analyzer/ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer.cs @ 5779

Last change on this file since 5779 was 5305, checked in by gkronber, 14 years ago

worked on data analysis feature exploration branch. #1142

File size: 37.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Linq;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
32using HeuristicLab.Problems.DataAnalysis.Symbolic;
33using System.Collections.Generic;
34using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
35using HeuristicLab.Problems.DataAnalysis;
36using System;
37
38using HeuristicLab.Problems.DataAnalysis.Evaluators;
39using HeuristicLab.Problems.DataAnalysis.Regression;
40using HeuristicLab.Analysis;
41using HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis.Symbolic.Evaluators;
42using HeuristicLab.Problems.DataAnalysis.MultiVariate.Regression.Symbolic;
43using HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis.Symbolic.Interfaces;
44
45namespace HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis.Symbolic.Analyzers {
46  /// <summary>
47  /// An operator that analyzes the validation best scaled symbolic time series prognosis solution.
48  /// </summary>
49  [Item("ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic time series prognosis solution.")]
50  [StorableClass]
51  public sealed class ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer : SingleSuccessorOperator, IAnalyzer {
52    private const string RandomParameterName = "Random";
53    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
54    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
55    private const string EvaluatorParameterName = "Evaluator";
56    private const string MaximizationParameterName = "Maximization";
57    private const string ProblemDataParameterName = "ProblemData";
58    private const string ValidationSamplesStartParameterName = "SamplesStart";
59    private const string ValidationSamplesEndParameterName = "SamplesEnd";
60    private const string QualityParameterName = "Quality";
61    private const string ScaledQualityParameterName = "ScaledQuality";
62    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
63    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
64    private const string ValidationPredictionHorizonParameterName = "ValidationPredictionHorizon";
65    private const string ModelPredictionHorizonParameterName = "ModelPredictionHorizon";
66    private const string ConditionVariableParameterName = "ConditionVariableName";
67    private const string ResultsParameterName = "Results";
68    private const string VariableFrequenciesParameterName = "VariableFrequencies";
69    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
70
71    private const string BestSolutionParameterName = "Best solution (validation)";
72    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
73    private const string CurrentBestValidationQualityParameterName = "Current best validation quality";
74    private const string BestSolutionQualityValuesParameterName = "Validation Quality";
75    private const string BestKnownQualityParameterName = "BestKnownQuality";
76    private const string GenerationsParameterName = "Generations";
77
78    private const string BestSolutionMeanSquaredErrorTrainingParameterName = "Best validation solution mean squared error (training)";
79    private const string BestSolutionMeanSquaredErrorTestParameterName = "Best validation solution mean squared error (test)";
80    private const string BestSolutionRSquaredTrainingParameterName = "Best validation solution R² (training)";
81    private const string BestSolutionRSquaredTestParameterName = "Best validation solution R² (test)";
82    private const string BestSolutionDirectionalSymmetryTrainingParameterName = "Best validation solution directional symmetry (training)";
83    private const string BestSolutionDirectionalSymmetryTestParameterName = "Best validation solution directional symmetry (test)";
84    private const string BestSolutionTheilsUTrainingParameterName = "Best validation solution Theil's U (training)";
85    private const string BestSolutionTheilsUTestParameterName = "Best validation solution Theil's U (test)";
86    private const string BestSolutionTheilsUTrendTrainingParameterName = "Best validation solution Theil's U with trend (training)";
87    private const string BestSolutionTheilsUTrendTestParameterName = "Best validation solution Theil's U with trend (test)";
88
89    #region parameter properties
90    public ILookupParameter<IRandom> RandomParameter {
91      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
92    }
93    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
94      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
95    }
96    public OptionalValueParameter<StringValue> ConditionVariableNameParameter {
97      get { return (OptionalValueParameter<StringValue>)Parameters[ConditionVariableParameterName]; }
98    }
99    public IValueLookupParameter<ISymbolicTimeSeriesExpressionInterpreter> SymbolicExpressionTreeInterpreterParameter {
100      get { return (IValueLookupParameter<ISymbolicTimeSeriesExpressionInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
101    }
102    public IValueLookupParameter<MultiVariateDataAnalysisProblemData> ProblemDataParameter {
103      get { return (IValueLookupParameter<MultiVariateDataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
104    }
105    public ILookupParameter<ISingleObjectiveSymbolicTimeSeriesPrognosisEvaluator> EvaluatorParameter {
106      get { return (ILookupParameter<ISingleObjectiveSymbolicTimeSeriesPrognosisEvaluator>)Parameters[EvaluatorParameterName]; }
107    }
108    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
109      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
110    }
111    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
112      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
113    }
114    public IValueLookupParameter<DoubleArray> UpperEstimationLimitParameter {
115      get { return (IValueLookupParameter<DoubleArray>)Parameters[UpperEstimationLimitParameterName]; }
116    }
117    public IValueLookupParameter<DoubleArray> LowerEstimationLimitParameter {
118      get { return (IValueLookupParameter<DoubleArray>)Parameters[LowerEstimationLimitParameterName]; }
119    }
120    public IValueLookupParameter<IntValue> ValidationPredictionHorizonParameter {
121      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationPredictionHorizonParameterName]; }
122    }
123    public IValueLookupParameter<IntValue> ModelPredictionHorizonParameter {
124      get { return (IValueLookupParameter<IntValue>)Parameters[ModelPredictionHorizonParameterName]; }
125    }
126    public ILookupParameter<SymbolicTimeSeriesPrognosisSolution> BestSolutionParameter {
127      get { return (ILookupParameter<SymbolicTimeSeriesPrognosisSolution>)Parameters[BestSolutionParameterName]; }
128    }
129    public ILookupParameter<IntValue> GenerationsParameter {
130      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
131    }
132    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
133      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
134    }
135    public ILookupParameter<ResultCollection> ResultsParameter {
136      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
137    }
138    public ILookupParameter<DoubleValue> BestKnownQualityParameter {
139      get { return (ILookupParameter<DoubleValue>)Parameters[BestKnownQualityParameterName]; }
140    }
141    public ILookupParameter<DataTable> VariableFrequenciesParameter {
142      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
143    }
144    public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
145      get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
146    }
147    public ILookupParameter<BoolValue> MaximizationParameter {
148      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
149    }
150    public ILookupParameter<DoubleArray> BestSolutionMeanSquaredErrorTrainingParameter {
151      get { return (ILookupParameter<DoubleArray>)Parameters[BestSolutionMeanSquaredErrorTrainingParameterName]; }
152    }
153    public ILookupParameter<DoubleArray> BestSolutionMeanSquaredErrorTestParameter {
154      get { return (ILookupParameter<DoubleArray>)Parameters[BestSolutionMeanSquaredErrorTestParameterName]; }
155    }
156    public ILookupParameter<DoubleArray> BestSolutionRSquaredTrainingParameter {
157      get { return (ILookupParameter<DoubleArray>)Parameters[BestSolutionRSquaredTrainingParameterName]; }
158    }
159    public ILookupParameter<DoubleArray> BestSolutionRSquaredTestParameter {
160      get { return (ILookupParameter<DoubleArray>)Parameters[BestSolutionRSquaredTestParameterName]; }
161    }
162    public ILookupParameter<DoubleArray> BestSolutionDirectionalSymmetryTrainingParameter {
163      get { return (ILookupParameter<DoubleArray>)Parameters[BestSolutionDirectionalSymmetryTrainingParameterName]; }
164    }
165    public ILookupParameter<DoubleArray> BestSolutionDirectionalSymmetryTestParameter {
166      get { return (ILookupParameter<DoubleArray>)Parameters[BestSolutionDirectionalSymmetryTestParameterName]; }
167    }
168    public ILookupParameter<DoubleArray> BestSolutionTheilsUTrainingParameter {
169      get { return (ILookupParameter<DoubleArray>)Parameters[BestSolutionTheilsUTrainingParameterName]; }
170    }
171    public ILookupParameter<DoubleArray> BestSolutionTheilsUTestParameter {
172      get { return (ILookupParameter<DoubleArray>)Parameters[BestSolutionTheilsUTestParameterName]; }
173    }
174    public ILookupParameter<DoubleArray> BestSolutionTheilsUTrendTrainingParameter {
175      get { return (ILookupParameter<DoubleArray>)Parameters[BestSolutionTheilsUTrendTrainingParameterName]; }
176    }
177    public ILookupParameter<DoubleArray> BestSolutionTheilsUTrendTestParameter {
178      get { return (ILookupParameter<DoubleArray>)Parameters[BestSolutionTheilsUTrendTestParameterName]; }
179    }
180    #endregion
181    #region properties
182    public IRandom Random {
183      get { return RandomParameter.ActualValue; }
184    }
185    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTree {
186      get { return SymbolicExpressionTreeParameter.ActualValue; }
187    }
188    public ISymbolicTimeSeriesExpressionInterpreter SymbolicExpressionTreeInterpreter {
189      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
190    }
191    public MultiVariateDataAnalysisProblemData ProblemData {
192      get { return ProblemDataParameter.ActualValue; }
193    }
194    public IntValue ValidiationSamplesStart {
195      get { return ValidationSamplesStartParameter.ActualValue; }
196    }
197    public IntValue ValidationSamplesEnd {
198      get { return ValidationSamplesEndParameter.ActualValue; }
199    }
200    public DoubleArray UpperEstimationLimit {
201      get { return UpperEstimationLimitParameter.ActualValue; }
202    }
203    public DoubleArray LowerEstimationLimit {
204      get { return LowerEstimationLimitParameter.ActualValue; }
205    }
206    public IntValue ValidationPredictionHorizon {
207      get { return ValidationPredictionHorizonParameter.ActualValue; }
208    }
209    public IntValue ModelPredictionHorizon {
210      get { return ModelPredictionHorizonParameter.ActualValue; }
211    }
212    public StringValue ConditionVariableName {
213      get { return ConditionVariableNameParameter.Value; }
214    }
215    public ResultCollection Results {
216      get { return ResultsParameter.ActualValue; }
217    }
218    public DataTable VariableFrequencies {
219      get { return VariableFrequenciesParameter.ActualValue; }
220    }
221    public IntValue Generations {
222      get { return GenerationsParameter.ActualValue; }
223    }
224    public PercentValue RelativeNumberOfEvaluatedSamples {
225      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
226    }
227    public BoolValue Maximization {
228      get { return MaximizationParameter.ActualValue; }
229    }
230    public DoubleArray BestSolutionMeanSquaredErrorTraining {
231      get { return BestSolutionMeanSquaredErrorTrainingParameter.ActualValue; }
232      set { BestSolutionMeanSquaredErrorTrainingParameter.ActualValue = value; }
233    }
234    public DoubleArray BestSolutionMeanSquaredErrorTest {
235      get { return BestSolutionMeanSquaredErrorTestParameter.ActualValue; }
236      set { BestSolutionMeanSquaredErrorTestParameter.ActualValue = value; }
237    }
238    public DoubleArray BestSolutionRSquaredTraining {
239      get { return BestSolutionRSquaredTrainingParameter.ActualValue; }
240      set { BestSolutionRSquaredTrainingParameter.ActualValue = value; }
241    }
242    public DoubleArray BestSolutionRSquaredTest {
243      get { return BestSolutionRSquaredTestParameter.ActualValue; }
244      set { BestSolutionRSquaredTestParameter.ActualValue = value; }
245    }
246    public DoubleArray BestSolutionDirectionalSymmetryTraining {
247      get { return BestSolutionDirectionalSymmetryTrainingParameter.ActualValue; }
248      set { BestSolutionDirectionalSymmetryTrainingParameter.ActualValue = value; }
249    }
250    public DoubleArray BestSolutionDirectionalSymmetryTest {
251      get { return BestSolutionDirectionalSymmetryTestParameter.ActualValue; }
252      set { BestSolutionDirectionalSymmetryTestParameter.ActualValue = value; }
253    }
254    public DoubleArray BestSolutionTheilsUTraining {
255      get { return BestSolutionTheilsUTrainingParameter.ActualValue; }
256      set { BestSolutionTheilsUTrainingParameter.ActualValue = value; }
257    }
258    public DoubleArray BestSolutionTheilsUTest {
259      get { return BestSolutionTheilsUTestParameter.ActualValue; }
260      set { BestSolutionTheilsUTestParameter.ActualValue = value; }
261    }
262    public DoubleArray BestSolutionTheilsUTrendTraining {
263      get { return BestSolutionTheilsUTrendTrainingParameter.ActualValue; }
264      set { BestSolutionTheilsUTrendTrainingParameter.ActualValue = value; }
265    }
266    public DoubleArray BestSolutionTheilsUTrendTest {
267      get { return BestSolutionTheilsUTrendTestParameter.ActualValue; }
268      set { BestSolutionTheilsUTrendTestParameter.ActualValue = value; }
269    }
270    #endregion
271
272    [StorableConstructor]
273    protected ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer(bool deserializing) : base(deserializing) { }
274    protected ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer(ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer original, Cloner cloner)
275      : base(original, cloner) {
276    }
277    public ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer()
278      : base() {
279      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "A random number generator."));
280      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
281      Parameters.Add(new OptionalValueParameter<StringValue>(ConditionVariableParameterName, "The name of the condition variable indicating if a row should be considered for evaluation or not."));
282      Parameters.Add(new ValueLookupParameter<ISymbolicTimeSeriesExpressionInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
283      Parameters.Add(new ValueLookupParameter<MultiVariateDataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
284      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
285      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
286      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationPredictionHorizonParameterName, "The number of time steps for which to create a forecast for the validation procedure."));
287      Parameters.Add(new ValueLookupParameter<IntValue>(ModelPredictionHorizonParameterName, "Prediction horizont stored in the validation best model."));
288      Parameters.Add(new LookupParameter<ISingleObjectiveSymbolicTimeSeriesPrognosisEvaluator>(EvaluatorParameterName, ""));
289      Parameters.Add(new ValueLookupParameter<DoubleArray>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
290      Parameters.Add(new ValueLookupParameter<DoubleArray>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
291      Parameters.Add(new LookupParameter<SymbolicTimeSeriesPrognosisSolution>(BestSolutionParameterName, "The best symbolic time series prognosis solution."));
292      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
293      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
294      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
295      Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set."));
296      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts"));
297      Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
298      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName));
299      Parameters.Add(new LookupParameter<DoubleArray>(BestSolutionMeanSquaredErrorTrainingParameterName));
300      Parameters.Add(new LookupParameter<DoubleArray>(BestSolutionMeanSquaredErrorTestParameterName));
301      Parameters.Add(new LookupParameter<DoubleArray>(BestSolutionRSquaredTrainingParameterName));
302      Parameters.Add(new LookupParameter<DoubleArray>(BestSolutionRSquaredTestParameterName));
303      Parameters.Add(new LookupParameter<DoubleArray>(BestSolutionDirectionalSymmetryTrainingParameterName));
304      Parameters.Add(new LookupParameter<DoubleArray>(BestSolutionDirectionalSymmetryTestParameterName));
305      Parameters.Add(new LookupParameter<DoubleArray>(BestSolutionTheilsUTrainingParameterName));
306      Parameters.Add(new LookupParameter<DoubleArray>(BestSolutionTheilsUTestParameterName));
307      Parameters.Add(new LookupParameter<DoubleArray>(BestSolutionTheilsUTrendTrainingParameterName));
308      Parameters.Add(new LookupParameter<DoubleArray>(BestSolutionTheilsUTrendTestParameterName));
309    }
310
311    public override IDeepCloneable Clone(Cloner cloner) {
312      return new ValidationBestScaledSymbolicTimeSeriesPrognosisSolutionAnalyzer(this, cloner);
313    }
314    [StorableHook(Persistence.Default.CompositeSerializers.Storable.HookType.AfterDeserialization)]
315    private void AfterDeserialization() {
316
317    }
318
319    public override IOperation Apply() {
320      var trees = SymbolicExpressionTree;
321
322      ISingleObjectiveSymbolicTimeSeriesPrognosisEvaluator evaluator = EvaluatorParameter.ActualValue;
323
324      int trainingStart = ProblemData.TrainingSamplesStart.Value;
325      int trainingEnd = ProblemData.TrainingSamplesEnd.Value;
326      int testStart = ProblemData.TestSamplesStart.Value;
327      int testEnd = ProblemData.TestSamplesEnd.Value;
328
329      #region validation best model
330      int validationStart = ValidiationSamplesStart.Value;
331      int validationEnd = ValidationSamplesEnd.Value;
332      int rowCount = (int)Math.Ceiling((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
333      IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(Random.Next(), validationStart, validationEnd, rowCount);
334      double bestValidationQuality = Maximization.Value ? double.MinValue : double.MaxValue;
335      SymbolicExpressionTree bestTree = null;
336      string conditionalVariableName = ConditionVariableName == null ? null : ConditionVariableName.Value;
337      if (conditionalVariableName != null) {
338        rows = from row in rows
339               where !ProblemData.Dataset[conditionalVariableName, row].IsAlmost(0.0)
340               select row;
341      }
342
343      foreach (var tree in trees) {
344        double validationQuality;
345        validationQuality = evaluator.Evaluate(tree, ProblemData,
346          SymbolicExpressionTreeInterpreter, rows, ValidationPredictionHorizon.Value, LowerEstimationLimit, UpperEstimationLimit);
347        if ((Maximization.Value && validationQuality > bestValidationQuality) ||
348           (!Maximization.Value && validationQuality < bestValidationQuality)) {
349          bestValidationQuality = validationQuality;
350          bestTree = tree;
351        }
352      }
353
354
355      if (BestSolutionQualityParameter.ActualValue == null ||
356        (Maximization.Value && BestSolutionQualityParameter.ActualValue.Value < bestValidationQuality) ||
357        (!Maximization.Value && BestSolutionQualityParameter.ActualValue.Value > bestValidationQuality)) {
358        var scaledTree = GetScaledTree(bestTree);
359        var model = new SymbolicTimeSeriesPrognosisModel((ISymbolicTimeSeriesExpressionInterpreter)SymbolicExpressionTreeInterpreter.Clone(), scaledTree);
360        model.Name = "Time series prognosis model";
361        model.Description = "Best solution on validation partition found over the whole run.";
362
363        var solution = new SymbolicTimeSeriesPrognosisSolution((MultiVariateDataAnalysisProblemData)ProblemData.Clone(), model, ModelPredictionHorizon.Value, conditionalVariableName, LowerEstimationLimit.ToArray(), UpperEstimationLimit.ToArray());
364        solution.Name = BestSolutionParameterName;
365        solution.Description = "Best solution on validation partition found over the whole run.";
366
367        BestSolutionParameter.ActualValue = solution;
368        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestValidationQuality);
369
370        #region calculate accuracy
371        List<string> targetVariables = ProblemData.TargetVariables.CheckedItems.Select(x => x.Value.Value).ToList();
372
373        // create a list of time series evaluators for each target variable
374        Dictionary<string, List<IOnlineEvaluator>> trainingEvaluators =
375          new Dictionary<string, List<IOnlineEvaluator>>();
376        Dictionary<string, List<IOnlineEvaluator>> testEvaluators =
377          new Dictionary<string, List<IOnlineEvaluator>>();
378        foreach (string targetVariable in targetVariables) {
379          trainingEvaluators.Add(targetVariable, new List<IOnlineEvaluator>());
380          trainingEvaluators[targetVariable].Add(new OnlineMeanSquaredErrorEvaluator());
381          trainingEvaluators[targetVariable].Add(new OnlinePearsonsRSquaredEvaluator());
382          trainingEvaluators[targetVariable].Add(new OnlineDirectionalSymmetryEvaluator());
383          trainingEvaluators[targetVariable].Add(new OnlineTheilsUStatisticEvaluator());
384          trainingEvaluators[targetVariable].Add(new OnlineTheilsUStatisticEvaluator(10));
385
386          testEvaluators.Add(targetVariable, new List<IOnlineEvaluator>());
387          testEvaluators[targetVariable].Add(new OnlineMeanSquaredErrorEvaluator());
388          testEvaluators[targetVariable].Add(new OnlinePearsonsRSquaredEvaluator());
389          testEvaluators[targetVariable].Add(new OnlineDirectionalSymmetryEvaluator());
390          testEvaluators[targetVariable].Add(new OnlineTheilsUStatisticEvaluator());
391          testEvaluators[targetVariable].Add(new OnlineTheilsUStatisticEvaluator(10));
392        }
393
394        Evaluate(solution, solution.ProblemData.Dataset, targetVariables, conditionalVariableName, trainingStart, trainingEnd, trainingEvaluators);
395        Evaluate(solution, solution.ProblemData.Dataset, targetVariables, conditionalVariableName, testStart, testEnd, testEvaluators);
396        #endregion
397        BestSolutionMeanSquaredErrorTraining = new DoubleArray((from variable in targetVariables
398                                                                let eval = trainingEvaluators[variable].OfType<OnlineMeanSquaredErrorEvaluator>().Single()
399                                                                select TryGetValue(eval))
400                                                                .ToArray());
401        BestSolutionMeanSquaredErrorTest = new DoubleArray((from variable in targetVariables
402                                                            select TryGetValue(testEvaluators[variable].OfType<OnlineMeanSquaredErrorEvaluator>().Single()))
403                                                          .ToArray());
404        BestSolutionRSquaredTraining = new DoubleArray((from variable in targetVariables
405                                                        select TryGetValue(trainingEvaluators[variable].OfType<OnlinePearsonsRSquaredEvaluator>().Single()))
406                                                                .ToArray());
407        BestSolutionRSquaredTest = new DoubleArray((from variable in targetVariables
408                                                    select TryGetValue(testEvaluators[variable].OfType<OnlinePearsonsRSquaredEvaluator>().Single()))
409                                                          .ToArray());
410        BestSolutionDirectionalSymmetryTraining = new DoubleArray((from variable in targetVariables
411                                                                   select TryGetValue(trainingEvaluators[variable].OfType<OnlineDirectionalSymmetryEvaluator>().Single()))
412                                                                .ToArray());
413        BestSolutionDirectionalSymmetryTest = new DoubleArray((from variable in targetVariables
414                                                               select TryGetValue(testEvaluators[variable].OfType<OnlineDirectionalSymmetryEvaluator>().Single()))
415                                                          .ToArray());
416        BestSolutionTheilsUTraining = new DoubleArray((from variable in targetVariables
417                                                       select TryGetValue(trainingEvaluators[variable].OfType<OnlineTheilsUStatisticEvaluator>().First()))
418                                                                .ToArray());
419        BestSolutionTheilsUTest = new DoubleArray((from variable in targetVariables
420                                                   select TryGetValue(testEvaluators[variable].OfType<OnlineTheilsUStatisticEvaluator>().First()))
421                                                          .ToArray());
422        BestSolutionTheilsUTrendTraining = new DoubleArray((from variable in targetVariables
423                                                            select TryGetValue(trainingEvaluators[variable].OfType<OnlineTheilsUStatisticEvaluator>().Skip(1).First()))
424                                                                .ToArray());
425        BestSolutionTheilsUTrendTest = new DoubleArray((from variable in targetVariables
426                                                        select TryGetValue(testEvaluators[variable].OfType<OnlineTheilsUStatisticEvaluator>().Skip(1).First()))
427                                                          .ToArray());
428        if (!Results.ContainsKey(BestSolutionParameterName)) {
429          for (int i = 0; i < targetVariables.Count; i++) {
430            Results.Add(new Result(BestSolutionMeanSquaredErrorTrainingParameterName + " (" + targetVariables[i] + ")",
431              new DoubleValue(BestSolutionMeanSquaredErrorTraining[i])));
432            Results.Add(new Result(BestSolutionMeanSquaredErrorTestParameterName + " (" + targetVariables[i] + ")",
433              new DoubleValue(BestSolutionMeanSquaredErrorTest[i])));
434            Results.Add(new Result(BestSolutionRSquaredTrainingParameterName + " (" + targetVariables[i] + ")",
435              new DoubleValue(BestSolutionRSquaredTraining[i])));
436            Results.Add(new Result(BestSolutionRSquaredTestParameterName + " (" + targetVariables[i] + ")",
437              new DoubleValue(BestSolutionRSquaredTest[i])));
438            Results.Add(new Result(BestSolutionDirectionalSymmetryTrainingParameterName + " (" + targetVariables[i] + ")",
439              new DoubleValue(BestSolutionDirectionalSymmetryTraining[i])));
440            Results.Add(new Result(BestSolutionDirectionalSymmetryTestParameterName + " (" + targetVariables[i] + ")",
441              new DoubleValue(BestSolutionDirectionalSymmetryTest[i])));
442            Results.Add(new Result(BestSolutionTheilsUTrainingParameterName + " (" + targetVariables[i] + ")",
443              new DoubleValue(BestSolutionTheilsUTraining[i])));
444            Results.Add(new Result(BestSolutionTheilsUTestParameterName + " (" + targetVariables[i] + ")",
445              new DoubleValue(BestSolutionTheilsUTest[i])));
446            Results.Add(new Result(BestSolutionTheilsUTrendTrainingParameterName + " (" + targetVariables[i] + ")",
447              new DoubleValue(BestSolutionTheilsUTrendTraining[i])));
448            Results.Add(new Result(BestSolutionTheilsUTrendTestParameterName + " (" + targetVariables[i] + ")",
449              new DoubleValue(BestSolutionTheilsUTrendTest[i])));
450          }
451        } else {
452          for (int i = 0; i < targetVariables.Count; i++) {
453            Results[BestSolutionMeanSquaredErrorTrainingParameterName + " (" + targetVariables[i] + ")"].Value =
454              new DoubleValue(BestSolutionMeanSquaredErrorTraining[i]);
455            Results[BestSolutionMeanSquaredErrorTestParameterName + " (" + targetVariables[i] + ")"].Value =
456              new DoubleValue(BestSolutionMeanSquaredErrorTest[i]);
457            Results[BestSolutionRSquaredTrainingParameterName + " (" + targetVariables[i] + ")"].Value =
458              new DoubleValue(BestSolutionRSquaredTraining[i]);
459            Results[BestSolutionRSquaredTestParameterName + " (" + targetVariables[i] + ")"].Value =
460              new DoubleValue(BestSolutionRSquaredTest[i]);
461            Results[BestSolutionDirectionalSymmetryTrainingParameterName + " (" + targetVariables[i] + ")"].Value =
462              new DoubleValue(BestSolutionDirectionalSymmetryTraining[i]);
463            Results[BestSolutionDirectionalSymmetryTestParameterName + " (" + targetVariables[i] + ")"].Value =
464              new DoubleValue(BestSolutionDirectionalSymmetryTest[i]);
465            Results[BestSolutionTheilsUTrainingParameterName + " (" + targetVariables[i] + ")"].Value =
466              new DoubleValue(BestSolutionTheilsUTraining[i]);
467            Results[BestSolutionTheilsUTestParameterName + " (" + targetVariables[i] + ")"].Value =
468              new DoubleValue(BestSolutionTheilsUTest[i]);
469            Results[BestSolutionTheilsUTrendTrainingParameterName + " (" + targetVariables[i] + ")"].Value =
470              new DoubleValue(BestSolutionTheilsUTrendTraining[i]);
471            Results[BestSolutionTheilsUTrendTestParameterName + " (" + targetVariables[i] + ")"].Value =
472              new DoubleValue(BestSolutionTheilsUTrendTest[i]);
473          }
474        }
475      }
476
477      if (!Results.ContainsKey(BestSolutionQualityValuesParameterName)) {
478        Results.Add(new Result(BestSolutionParameterName, BestSolutionParameter.ActualValue));
479        Results.Add(new Result(BestSolutionQualityValuesParameterName, new DataTable(BestSolutionQualityValuesParameterName, BestSolutionQualityValuesParameterName)));
480        Results.Add(new Result(BestSolutionQualityParameterName, new DoubleValue()));
481        Results.Add(new Result(CurrentBestValidationQualityParameterName, new DoubleValue()));
482      }
483      Results[BestSolutionParameterName].Value = BestSolutionParameter.ActualValue;
484      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
485      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestValidationQuality);
486
487      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
488      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
489      AddValue(validationValues, bestValidationQuality, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
490      #endregion
491      return base.Apply();
492    }
493
494    private double TryGetValue(IOnlineEvaluator eval) {
495      try {
496        return eval.Value;
497      }
498      catch {
499        return double.NaN;
500      }
501    }
502
503    private SymbolicExpressionTree GetScaledTree(SymbolicExpressionTree tree) {
504      double[] alpha, beta;
505      int trainingStart = ProblemData.TrainingSamplesStart.Value;
506      int trainingEnd = ProblemData.TrainingSamplesEnd.Value;
507      IEnumerable<int> trainingRows = Enumerable.Range(trainingStart, trainingEnd - trainingStart);
508      string conditionalVariableName = ConditionVariableName == null ? null : ConditionVariableName.Value;
509      if (conditionalVariableName != null) {
510        trainingRows = from row in trainingRows
511                       where !ProblemData.Dataset[conditionalVariableName, row].IsAlmost(0.0)
512                       select row;
513      }
514
515      // calculate scaling parameters based on one-step-predictions
516      IEnumerable<string> selectedTargetVariables = from item in ProblemData.TargetVariables
517                                                    where ProblemData.TargetVariables.ItemChecked(item)
518                                                    select item.Value;
519      int dimension = selectedTargetVariables.Count();
520
521      IEnumerable<IEnumerable<double>> oneStepPredictions =
522        SymbolicExpressionTreeInterpreter.GetSymbolicExpressionTreeValues(tree, ProblemData.Dataset, selectedTargetVariables, trainingRows, 1)
523        .Cast<IEnumerable<double>>();
524      IEnumerable<IEnumerable<double>> originalValues = from row in trainingRows
525                                                        select (from targetVariable in selectedTargetVariables
526                                                                select ProblemData.Dataset[targetVariable, row]);
527      alpha = new double[dimension];
528      beta = new double[dimension];
529
530      SymbolicTimeSeriesPrognosisScaledNormalizedMseEvaluator.CalculateScalingParameters(originalValues, oneStepPredictions, ref beta, ref alpha);
531
532      // scale tree for solution
533      return SymbolicVectorRegressionSolutionLinearScaler.Scale(tree, beta, alpha);
534    }
535
536    private void Evaluate(SymbolicTimeSeriesPrognosisSolution solution, Dataset dataset, IEnumerable<string> targetVariables, string conditionalEvaluationVariable, int start, int end, Dictionary<string, List<IOnlineEvaluator>> evaluators) {
537
538      for (int row = start; row < end; row++) {
539        if (string.IsNullOrEmpty(conditionalEvaluationVariable) || dataset[conditionalEvaluationVariable, row] != 0) {
540          // prepare evaluators for each target variable for a new prediction window
541          foreach (var entry in evaluators) {
542            double referenceOriginalValue = dataset[entry.Key, row - 1];
543            foreach (IOnlineTimeSeriesPrognosisEvaluator evaluator in entry.Value.OfType<IOnlineTimeSeriesPrognosisEvaluator>()) {
544              evaluator.StartNewPredictionWindow(referenceOriginalValue);
545            }
546          }
547
548
549          if (string.IsNullOrEmpty(conditionalEvaluationVariable) ||
550            dataset[conditionalEvaluationVariable, row] > 0) {
551            int timestep = 0;
552            foreach (double[] x in solution.GetPrognosis(row)) {
553              int targetIndex = 0;
554              if (row + timestep < dataset.Rows) {
555                foreach (var targetVariable in targetVariables) {
556                  double originalValue = dataset[targetVariable, row + timestep];
557                  double estimatedValue = x[targetIndex];
558                  if (IsValidValue(originalValue) && IsValidValue(estimatedValue)) {
559                    foreach (IOnlineEvaluator evaluator in evaluators[targetVariable]) {
560                      evaluator.Add(originalValue, estimatedValue);
561                    }
562                  }
563                  targetIndex++;
564                }
565              }
566              timestep++;
567            }
568          }
569        }
570      }
571    }
572    private bool IsValidValue(double d) {
573      return !(double.IsNaN(d) || double.IsInfinity(d));
574    }
575
576    private static void AddValue(DataTable table, double data, string name, string description) {
577      DataRow row;
578      table.Rows.TryGetValue(name, out row);
579      if (row == null) {
580        row = new DataRow(name, description);
581        row.Values.Add(data);
582        table.Rows.Add(row);
583      } else {
584        row.Values.Add(data);
585      }
586    }
587  }
588}
Note: See TracBrowser for help on using the repository browser.