Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis/3.3/Symbolic/Evaluators/SymbolicTimeSeriesPrognosisScaledNormalizedMseEvaluator.cs @ 4460

Last change on this file since 4460 was 4460, checked in by gkronber, 13 years ago

Fixed bug in scaled MSE evaluator for time series. #1142

File size: 17.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Linq;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Problems.DataAnalysis.SupportVectorMachine;
29using HeuristicLab.Problems.DataAnalysis;
30using HeuristicLab.Problems.DataAnalysis.Evaluators;
31using HeuristicLab.Parameters;
32using HeuristicLab.Optimization;
33using HeuristicLab.Operators;
34using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
35using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
36using System.Collections.Generic;
37using HeuristicLab.Problems.DataAnalysis.Regression;
38using HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis.Symbolic.Interfaces;
39
40namespace HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis.Symbolic.Evaluators {
41  [Item("SymbolicTimeSeriesPrognosisScaledNormalizedMseEvaluator", "")]
42  [StorableClass]
43  public class SymbolicTimeSeriesPrognosisScaledNormalizedMseEvaluator : SingleSuccessorOperator, ISingleObjectiveSymbolicTimeSeriesPrognosisEvaluator {
44    private const string RandomParameterName = "Random";
45    private const string DataAnalysisProblemDataParameterName = "MultiVariateDataAnalysisProblemData";
46    private const string TimeSeriesExpressionInterpreterParameterName = "TimeSeriesExpressionInterpreter";
47    private const string TimeSeriesPrognosisModelParameterName = "TimeSeriesPrognosisModel";
48    private const string PredictionHorizontParameterName = "PredictionHorizon";
49    private const string ConditionVariableParameterName = "ConditionVariableName";
50    private const string SamplesStartParameterName = "SamplesStart";
51    private const string SamplesEndParameterName = "SamplesEnd";
52    private const string AlphaParameterName = "Alpha";
53    private const string BetaParameterName = "Beta";
54    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
55    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
56    private const string QualityParameterName = "Quality";
57    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
58
59    #region parameter properties
60    public ILookupParameter<IRandom> RandomParameter {
61      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
62    }
63    public ILookupParameter<MultiVariateDataAnalysisProblemData> ProblemDataParameter {
64      get { return (ILookupParameter<MultiVariateDataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
65    }
66    public ILookupParameter<ISymbolicTimeSeriesExpressionInterpreter> TimeSeriesExpressionInterpreterParameter {
67      get { return (ILookupParameter<ISymbolicTimeSeriesExpressionInterpreter>)Parameters[TimeSeriesExpressionInterpreterParameterName]; }
68    }
69    public IValueLookupParameter<IntValue> PredictionHorizonParameter {
70      get { return (IValueLookupParameter<IntValue>)Parameters[PredictionHorizontParameterName]; }
71    }
72    public OptionalValueParameter<StringValue> ConditionVariableNameParameter {
73      get { return (OptionalValueParameter<StringValue>)Parameters[ConditionVariableParameterName]; }
74    }
75    public IValueLookupParameter<IntValue> SamplesStartParameter {
76      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesStartParameterName]; }
77    }
78    public IValueLookupParameter<IntValue> SamplesEndParameter {
79      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesEndParameterName]; }
80    }
81    public ILookupParameter<SymbolicExpressionTree> TimeSeriesPrognosisModelParameter {
82      get { return (ILookupParameter<SymbolicExpressionTree>)Parameters[TimeSeriesPrognosisModelParameterName]; }
83    }
84    public ILookupParameter<DoubleValue> QualityParameter {
85      get { return (ILookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
86    }
87    public ILookupParameter<DoubleArray> AlphaParameter {
88      get { return (ILookupParameter<DoubleArray>)Parameters[AlphaParameterName]; }
89    }
90    public ILookupParameter<DoubleArray> BetaParameter {
91      get { return (ILookupParameter<DoubleArray>)Parameters[BetaParameterName]; }
92    }
93    public IValueLookupParameter<DoubleArray> LowerEstimationLimitParameter {
94      get { return (IValueLookupParameter<DoubleArray>)Parameters[LowerEstimationLimitParameterName]; }
95    }
96    public IValueLookupParameter<DoubleArray> UpperEstimationLimitParameter {
97      get { return (IValueLookupParameter<DoubleArray>)Parameters[UpperEstimationLimitParameterName]; }
98    }
99    public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
100      get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
101    }
102
103    #endregion
104    #region properties
105    public IRandom Random {
106      get { return RandomParameter.ActualValue; }
107    }
108    public MultiVariateDataAnalysisProblemData ProblemData {
109      get { return ProblemDataParameter.ActualValue; }
110    }
111    public ISymbolicTimeSeriesExpressionInterpreter TimeSeriesExpressionInterpreter {
112      get { return TimeSeriesExpressionInterpreterParameter.ActualValue; }
113    }
114    public IntValue PredictionHorizon {
115      get { return PredictionHorizonParameter.ActualValue; }
116    }
117    public StringValue ConditionVariableName {
118      get { return ConditionVariableNameParameter.Value; }
119    }
120    public IntValue SamplesStart {
121      get { return SamplesStartParameter.ActualValue; }
122    }
123    public IntValue SamplesEnd {
124      get { return SamplesEndParameter.ActualValue; }
125    }
126    public DoubleArray LowerEstimationLimit {
127      get { return LowerEstimationLimitParameter.ActualValue; }
128    }
129    public DoubleArray UpperEstimationLimit {
130      get { return UpperEstimationLimitParameter.ActualValue; }
131    }
132    public SymbolicExpressionTree TimeSeriesPrognosisModel {
133      get { return TimeSeriesPrognosisModelParameter.ActualValue; }
134    }
135    public PercentValue RelativeNumberOfEvaluatedSamples {
136      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
137    }
138    #endregion
139
140    public SymbolicTimeSeriesPrognosisScaledNormalizedMseEvaluator()
141      : base() {
142      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "A random number generator."));
143      Parameters.Add(new LookupParameter<MultiVariateDataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The data analysis problem data to use for training."));
144      Parameters.Add(new LookupParameter<ISymbolicTimeSeriesExpressionInterpreter>(TimeSeriesExpressionInterpreterParameterName, "The interpreter that should be used to evaluate the time series model represented as a symbolic expression tree."));
145      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesStartParameterName, "The first index of the data set partition to use for training."));
146      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesEndParameterName, "The last index of the data set partition to use for training."));
147      Parameters.Add(new ValueLookupParameter<IntValue>(PredictionHorizontParameterName, "The number of time steps for which to create a forecast."));
148      Parameters.Add(new ValueLookupParameter<DoubleArray>(LowerEstimationLimitParameterName, "The lower limit for estimated values."));
149      Parameters.Add(new ValueLookupParameter<DoubleArray>(UpperEstimationLimitParameterName, "The upper limit for estimated values."));
150      Parameters.Add(new OptionalValueParameter<StringValue>(ConditionVariableParameterName, "The name of the condition variable indicating if a row should be considered for evaluation or not."));
151      Parameters.Add(new LookupParameter<SymbolicExpressionTree>(TimeSeriesPrognosisModelParameterName, "The time series prognosis model encoded as a symbolic expression tree."));
152      Parameters.Add(new LookupParameter<DoubleValue>(QualityParameterName, "The quality of the time series prognosis model encoded as a symbolic expression tree."));
153      Parameters.Add(new LookupParameter<DoubleArray>(AlphaParameterName, "The alpha parameter for linear scaling based on one step predictions."));
154      Parameters.Add(new LookupParameter<DoubleArray>(BetaParameterName, "The beta parameter for linear scaling based on one step predictions."));
155      Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
156    }
157
158    public override IOperation Apply() {
159      double[] alpha, beta;
160      double quality;
161      string conditionVariableName = ConditionVariableName == null ? null : ConditionVariableName.Value;
162      int nRows = (int)Math.Ceiling((SamplesEnd.Value - SamplesStart.Value) * RelativeNumberOfEvaluatedSamples.Value);
163
164      IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(Random.Next(), SamplesStart.Value, SamplesEnd.Value, nRows);
165      CalculateScalingParameters(TimeSeriesPrognosisModel, ProblemData, TimeSeriesExpressionInterpreter,
166        conditionVariableName, rows,
167        out beta, out alpha);
168
169      quality = Evaluate(TimeSeriesPrognosisModel, ProblemData, TimeSeriesExpressionInterpreter,
170        conditionVariableName, rows, PredictionHorizon.Value,
171        LowerEstimationLimit, UpperEstimationLimit,
172        beta, alpha);
173      QualityParameter.ActualValue = new DoubleValue(quality);
174      AlphaParameter.ActualValue = new DoubleArray(alpha);
175      BetaParameter.ActualValue = new DoubleArray(beta);
176      return base.Apply();
177    }
178
179    public static double Evaluate(SymbolicExpressionTree tree, MultiVariateDataAnalysisProblemData problemData,
180      ISymbolicTimeSeriesExpressionInterpreter interpreter,
181      IEnumerable<int> rows, int predictionHorizon,
182      DoubleArray lowerEstimationLimit, DoubleArray upperEstimationLimit,
183      double[] beta, double[] alpha) {
184      return Evaluate(tree, problemData, interpreter, null, rows, predictionHorizon, lowerEstimationLimit, upperEstimationLimit, beta, alpha);
185    }
186
187    public static double Evaluate(SymbolicExpressionTree tree, MultiVariateDataAnalysisProblemData problemData,
188      ISymbolicTimeSeriesExpressionInterpreter interpreter, string conditionVariableName,
189      IEnumerable<int> rows, int predictionHorizon,
190      DoubleArray lowerEstimationLimit, DoubleArray upperEstimationLimit,
191      double[] beta, double[] alpha) {
192      if (conditionVariableName != null) {
193        rows = from row in rows
194               where !problemData.Dataset[conditionVariableName, row].IsAlmost(0.0)
195               select row;
196      }
197      IEnumerable<string> selectedTargetVariables = from targetVariable in problemData.TargetVariables
198                                                    where problemData.TargetVariables.ItemChecked(targetVariable)
199                                                    select targetVariable.Value;
200
201      IEnumerable<double[]> estimatedValues =
202        interpreter.GetScaledSymbolicExpressionTreeValues(tree, problemData.Dataset, selectedTargetVariables,
203        rows, predictionHorizon, beta, alpha);
204
205      IEnumerable<double[]> originalValues = from row in rows
206                                             from step in Enumerable.Range(0, predictionHorizon)
207                                             select (from targetVariable in selectedTargetVariables
208                                                     select problemData.Dataset[targetVariable, row + step]).ToArray();
209
210      List<OnlineNormalizedMeanSquaredErrorEvaluator> evaluators = new List<OnlineNormalizedMeanSquaredErrorEvaluator>();
211      foreach (string targetVariable in selectedTargetVariables)
212        evaluators.Add(new OnlineNormalizedMeanSquaredErrorEvaluator());
213
214      var estimatedValuesEnumerator = estimatedValues.GetEnumerator();
215      var originalValuesEnumerator = originalValues.GetEnumerator();
216      while (originalValuesEnumerator.MoveNext() & estimatedValuesEnumerator.MoveNext()) {
217        double[] original = originalValuesEnumerator.Current;
218        double[] estimated = estimatedValuesEnumerator.Current;
219        for (int i = 0; i < evaluators.Count; i++) {
220          if (double.IsNaN(estimated[i])) estimated[i] = upperEstimationLimit[i];
221          else estimated[i] = Math.Min(upperEstimationLimit[i], Math.Max(lowerEstimationLimit[i], estimated[i]));
222          evaluators[i].Add(original[i], estimated[i]);
223        }
224      }
225
226      double quality = evaluators.Select(x => x.NormalizedMeanSquaredError).Sum();
227      return quality;
228    }
229
230    public static void CalculateScalingParameters(SymbolicExpressionTree tree, MultiVariateDataAnalysisProblemData problemData,
231      ISymbolicTimeSeriesExpressionInterpreter interpreter,
232      IEnumerable<int> rows,
233      out double[] betas, out double[] alphas) {
234      CalculateScalingParameters(tree, problemData, interpreter, null, rows, out betas, out alphas);
235    }
236
237    public static void CalculateScalingParameters(SymbolicExpressionTree tree, MultiVariateDataAnalysisProblemData problemData,
238      ISymbolicTimeSeriesExpressionInterpreter interpreter, string conditionVariableName,
239      IEnumerable<int> rows, out double[] betas, out double[] alphas) {
240      IEnumerable<string> selectedTargetVariables = from item in problemData.TargetVariables
241                                                    where problemData.TargetVariables.ItemChecked(item)
242                                                    select item.Value;
243      int dimension = selectedTargetVariables.Count();
244
245      if (conditionVariableName != null) {
246        rows = from row in rows
247               where !problemData.Dataset[conditionVariableName, row].IsAlmost(0.0)
248               select row;
249      }
250
251      IEnumerable<double[]> oneStepPredictions =
252        interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, selectedTargetVariables, rows, 1);
253      IEnumerable<double[]> originalValues = from row in rows
254                                             select (from targetVariable in selectedTargetVariables
255                                                     select problemData.Dataset[targetVariable, row]).ToArray();
256
257      alphas = new double[dimension];
258      betas = new double[dimension];
259      int[] cnt = new int[dimension];
260      List<OnlineMeanAndVarianceCalculator> estimatedVarianceEvaluators = new List<OnlineMeanAndVarianceCalculator>();
261      List<OnlineCovarianceEvaluator> covarianceEvaluators = new List<OnlineCovarianceEvaluator>();
262      List<OnlineMeanAndVarianceCalculator> originalMeanCalculators = new List<OnlineMeanAndVarianceCalculator>();
263      foreach (var selectedTargetVariable in selectedTargetVariables) {
264        estimatedVarianceEvaluators.Add(new OnlineMeanAndVarianceCalculator());
265        covarianceEvaluators.Add(new OnlineCovarianceEvaluator());
266        originalMeanCalculators.Add(new OnlineMeanAndVarianceCalculator());
267      }
268      var estimatedEnumerator = oneStepPredictions.GetEnumerator();
269      var originalEnumerator = originalValues.GetEnumerator();
270      while (estimatedEnumerator.MoveNext() & originalEnumerator.MoveNext()) {
271        double[] original = originalEnumerator.Current;
272        double[] estimated = estimatedEnumerator.Current;
273        for (int component = 0; component < dimension; component++) {
274          if (IsValidValue(original[component]) && IsValidValue(estimated[component])) {
275            cnt[component]++;
276            estimatedVarianceEvaluators[component].Add(estimated[component]);
277            covarianceEvaluators[component].Add(original[component], estimated[component]);
278            originalMeanCalculators[component].Add(original[component]);
279          }
280        }
281      }
282      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext())
283        throw new InvalidOperationException("Number of elements in estimated and original series doesn't match.");
284      for (int component = 0; component < dimension; component++) {
285        if (cnt[component] < 2) {
286          alphas[component] = 0;
287          betas[component] = 1;
288        } else {
289          if (estimatedVarianceEvaluators[component].PopulationVariance.IsAlmost(0.0))
290            betas[component] = 1;
291          else
292            betas[component] = covarianceEvaluators[component].Covariance / estimatedVarianceEvaluators[component].PopulationVariance;
293
294          alphas[component] = originalMeanCalculators[component].Mean - betas[component] * estimatedVarianceEvaluators[component].Mean;
295        }
296      }
297    }
298
299    private static bool IsValidValue(double d) {
300      return !double.IsInfinity(d) && !double.IsNaN(d) && d > -1.0E07 && d < 1.0E07;  // don't consider very large or very small values for scaling
301    }
302  }
303}
Note: See TracBrowser for help on using the repository browser.