Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis/3.3/Symbolic/Evaluators/SymbolicTimeSeriesPrognosisScaledNormalizedMseEvaluator.cs @ 4401

Last change on this file since 4401 was 4401, checked in by gkronber, 14 years ago

Added model and solution classes for time series prognosis and added views for time series prognosis solutions. #1142

File size: 17.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Linq;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Problems.DataAnalysis.SupportVectorMachine;
29using HeuristicLab.Problems.DataAnalysis;
30using HeuristicLab.Problems.DataAnalysis.Evaluators;
31using HeuristicLab.Parameters;
32using HeuristicLab.Optimization;
33using HeuristicLab.Operators;
34using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
35using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
36using System.Collections.Generic;
37using HeuristicLab.Problems.DataAnalysis.Regression;
38using HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis.Symbolic.Interfaces;
39
40namespace HeuristicLab.Problems.DataAnalysis.MultiVariate.TimeSeriesPrognosis.Symbolic.Evaluators {
41  [Item("SymbolicTimeSeriesPrognosisScaledNormalizedMseEvaluator", "")]
42  [StorableClass]
43  public class SymbolicTimeSeriesPrognosisScaledNormalizedMseEvaluator : SingleSuccessorOperator, ISingleObjectiveSymbolicTimeSeriesPrognosisEvaluator {
44    private const string RandomParameterName = "Random";
45    private const string DataAnalysisProblemDataParameterName = "MultiVariateDataAnalysisProblemData";
46    private const string TimeSeriesExpressionInterpreterParameterName = "TimeSeriesExpressionInterpreter";
47    private const string TimeSeriesPrognosisModelParameterName = "TimeSeriesPrognosisModel";
48    private const string PredictionHorizontParameterName = "PredictionHorizon";
49    private const string ConditionVariableParameterName = "ConditionVariableName";
50    private const string SamplesStartParameterName = "SamplesStart";
51    private const string SamplesEndParameterName = "SamplesEnd";
52    private const string AlphaParameterName = "Alpha";
53    private const string BetaParameterName = "Beta";
54    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
55    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
56    private const string QualityParameterName = "Quality";
57    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
58
59    #region parameter properties
60    public ILookupParameter<IRandom> RandomParameter {
61      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
62    }
63    public ILookupParameter<MultiVariateDataAnalysisProblemData> ProblemDataParameter {
64      get { return (ILookupParameter<MultiVariateDataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
65    }
66    public ILookupParameter<ISymbolicTimeSeriesExpressionInterpreter> TimeSeriesExpressionInterpreterParameter {
67      get { return (ILookupParameter<ISymbolicTimeSeriesExpressionInterpreter>)Parameters[TimeSeriesExpressionInterpreterParameterName]; }
68    }
69    public IValueLookupParameter<IntValue> PredictionHorizonParameter {
70      get { return (IValueLookupParameter<IntValue>)Parameters[PredictionHorizontParameterName]; }
71    }
72    public OptionalValueParameter<StringValue> ConditionVariableNameParameter {
73      get { return (OptionalValueParameter<StringValue>)Parameters[ConditionVariableParameterName]; }
74    }
75    public IValueLookupParameter<IntValue> SamplesStartParameter {
76      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesStartParameterName]; }
77    }
78    public IValueLookupParameter<IntValue> SamplesEndParameter {
79      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesEndParameterName]; }
80    }
81    public ILookupParameter<SymbolicExpressionTree> TimeSeriesPrognosisModelParameter {
82      get { return (ILookupParameter<SymbolicExpressionTree>)Parameters[TimeSeriesPrognosisModelParameterName]; }
83    }
84    public ILookupParameter<DoubleValue> QualityParameter {
85      get { return (ILookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
86    }
87    public ILookupParameter<DoubleArray> AlphaParameter {
88      get { return (ILookupParameter<DoubleArray>)Parameters[AlphaParameterName]; }
89    }
90    public ILookupParameter<DoubleArray> BetaParameter {
91      get { return (ILookupParameter<DoubleArray>)Parameters[BetaParameterName]; }
92    }
93    public IValueLookupParameter<DoubleArray> LowerEstimationLimitParameter {
94      get { return (IValueLookupParameter<DoubleArray>)Parameters[LowerEstimationLimitParameterName]; }
95    }
96    public IValueLookupParameter<DoubleArray> UpperEstimationLimitParameter {
97      get { return (IValueLookupParameter<DoubleArray>)Parameters[UpperEstimationLimitParameterName]; }
98    }
99    public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
100      get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
101    }
102
103    #endregion
104    #region properties
105    public IRandom Random {
106      get { return RandomParameter.ActualValue; }
107    }
108    public MultiVariateDataAnalysisProblemData ProblemData {
109      get { return ProblemDataParameter.ActualValue; }
110    }
111    public ISymbolicTimeSeriesExpressionInterpreter TimeSeriesExpressionInterpreter {
112      get { return TimeSeriesExpressionInterpreterParameter.ActualValue; }
113    }
114    public IntValue PredictionHorizon {
115      get { return PredictionHorizonParameter.ActualValue; }
116    }
117    public StringValue ConditionVariableName {
118      get { return ConditionVariableNameParameter.Value; }
119    }
120    public IntValue SamplesStart {
121      get { return SamplesStartParameter.ActualValue; }
122    }
123    public IntValue SamplesEnd {
124      get { return SamplesEndParameter.ActualValue; }
125    }
126    public DoubleArray LowerEstimationLimit {
127      get { return LowerEstimationLimitParameter.ActualValue; }
128    }
129    public DoubleArray UpperEstimationLimit {
130      get { return UpperEstimationLimitParameter.ActualValue; }
131    }
132    public SymbolicExpressionTree TimeSeriesPrognosisModel {
133      get { return TimeSeriesPrognosisModelParameter.ActualValue; }
134    }
135    public PercentValue RelativeNumberOfEvaluatedSamples {
136      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
137    }
138    #endregion
139
140    public SymbolicTimeSeriesPrognosisScaledNormalizedMseEvaluator()
141      : base() {
142      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "A random number generator."));
143      Parameters.Add(new LookupParameter<MultiVariateDataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The data analysis problem data to use for training."));
144      Parameters.Add(new LookupParameter<ISymbolicTimeSeriesExpressionInterpreter>(TimeSeriesExpressionInterpreterParameterName, "The interpreter that should be used to evaluate the time series model represented as a symbolic expression tree."));
145      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesStartParameterName, "The first index of the data set partition to use for training."));
146      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesEndParameterName, "The last index of the data set partition to use for training."));
147      Parameters.Add(new ValueLookupParameter<IntValue>(PredictionHorizontParameterName, "The number of time steps for which to create a forecast."));
148      Parameters.Add(new ValueLookupParameter<DoubleArray>(LowerEstimationLimitParameterName, "The lower limit for estimated values."));
149      Parameters.Add(new ValueLookupParameter<DoubleArray>(UpperEstimationLimitParameterName, "The upper limit for estimated values."));
150      Parameters.Add(new OptionalValueParameter<StringValue>(ConditionVariableParameterName, "The name of the condition variable indicating if a row should be considered for evaluation or not."));
151      Parameters.Add(new LookupParameter<SymbolicExpressionTree>(TimeSeriesPrognosisModelParameterName, "The time series prognosis model encoded as a symbolic expression tree."));
152      Parameters.Add(new LookupParameter<DoubleValue>(QualityParameterName, "The quality of the time series prognosis model encoded as a symbolic expression tree."));
153      Parameters.Add(new LookupParameter<DoubleArray>(AlphaParameterName, "The alpha parameter for linear scaling based on one step predictions."));
154      Parameters.Add(new LookupParameter<DoubleArray>(BetaParameterName, "The beta parameter for linear scaling based on one step predictions."));
155      Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
156    }
157
158    public override IOperation Apply() {
159      double[] alpha, beta;
160      double quality;
161      string conditionVariableName = ConditionVariableName == null ? null : ConditionVariableName.Value;
162      int nRows = (int)Math.Ceiling((SamplesEnd.Value - SamplesStart.Value) * RelativeNumberOfEvaluatedSamples.Value);
163
164      IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(Random.Next(), SamplesStart.Value, SamplesEnd.Value, nRows);
165      CalculateScalingParameters(TimeSeriesPrognosisModel, ProblemData, TimeSeriesExpressionInterpreter,
166        conditionVariableName, rows,
167        out beta, out alpha);
168
169      quality = Evaluate(TimeSeriesPrognosisModel, ProblemData, TimeSeriesExpressionInterpreter,
170        conditionVariableName, rows, PredictionHorizon.Value,
171        LowerEstimationLimit, UpperEstimationLimit,
172        beta, alpha);
173      QualityParameter.ActualValue = new DoubleValue(quality);
174      AlphaParameter.ActualValue = new DoubleArray(alpha);
175      BetaParameter.ActualValue = new DoubleArray(beta);
176      return base.Apply();
177    }
178
179    public static double Evaluate(SymbolicExpressionTree tree, MultiVariateDataAnalysisProblemData problemData,
180      ISymbolicTimeSeriesExpressionInterpreter interpreter,
181      IEnumerable<int> rows, int predictionHorizon,
182      DoubleArray lowerEstimationLimit, DoubleArray upperEstimationLimit,
183      double[] beta, double[] alpha) {
184      return Evaluate(tree, problemData, interpreter, null, rows, predictionHorizon, lowerEstimationLimit, upperEstimationLimit, beta, alpha);
185    }
186
187    public static double Evaluate(SymbolicExpressionTree tree, MultiVariateDataAnalysisProblemData problemData,
188      ISymbolicTimeSeriesExpressionInterpreter interpreter, string conditionVariableName,
189      IEnumerable<int> rows, int predictionHorizon,
190      DoubleArray lowerEstimationLimit, DoubleArray upperEstimationLimit,
191      double[] beta, double[] alpha) {
192      if (conditionVariableName != null) {
193        rows = from row in rows
194               where !problemData.Dataset[conditionVariableName, row].IsAlmost(0.0)
195               select row;
196      }
197      IEnumerable<string> selectedTargetVariables = from targetVariable in problemData.TargetVariables
198                                                    where problemData.TargetVariables.ItemChecked(targetVariable)
199                                                    select targetVariable.Value;
200
201      IEnumerable<double[]> estimatedValues =
202        interpreter.GetScaledSymbolicExpressionTreeValues(tree, problemData.Dataset, selectedTargetVariables,
203        rows, predictionHorizon, beta, alpha);
204
205      IEnumerable<double[]> originalValues = from row in rows
206                                             from step in Enumerable.Range(0, predictionHorizon)
207                                             select (from targetVariable in selectedTargetVariables
208                                                     select problemData.Dataset[targetVariable, row + step]).ToArray();
209
210      List<OnlineNormalizedMeanSquaredErrorEvaluator> evaluators = new List<OnlineNormalizedMeanSquaredErrorEvaluator>();
211      foreach (string targetVariable in selectedTargetVariables)
212        evaluators.Add(new OnlineNormalizedMeanSquaredErrorEvaluator());
213
214      var estimatedValuesEnumerator = estimatedValues.GetEnumerator();
215      var originalValuesEnumerator = originalValues.GetEnumerator();
216      while (originalValuesEnumerator.MoveNext() & estimatedValuesEnumerator.MoveNext()) {
217        double[] original = originalValuesEnumerator.Current;
218        double[] estimated = estimatedValuesEnumerator.Current;
219        for (int i = 0; i < evaluators.Count; i++) {
220          if (double.IsNaN(estimated[i])) estimated[i] = upperEstimationLimit[i];
221          else estimated[i] = Math.Min(upperEstimationLimit[i], Math.Max(lowerEstimationLimit[i], estimated[i]));
222          evaluators[i].Add(original[i], estimated[i]);
223        }
224      }
225
226      double quality = evaluators.Select(x => x.NormalizedMeanSquaredError).Sum();
227      return quality;
228    }
229
230    public static void CalculateScalingParameters(SymbolicExpressionTree tree, MultiVariateDataAnalysisProblemData problemData,
231      ISymbolicTimeSeriesExpressionInterpreter interpreter,
232      IEnumerable<int> rows,
233      out double[] betas, out double[] alphas) {
234      CalculateScalingParameters(tree, problemData, interpreter, null, rows, out betas, out alphas);
235    }
236
237    public static void CalculateScalingParameters(SymbolicExpressionTree tree, MultiVariateDataAnalysisProblemData problemData,
238      ISymbolicTimeSeriesExpressionInterpreter interpreter, string conditionVariableName,
239      IEnumerable<int> rows, out double[] betas, out double[] alphas) {
240      IEnumerable<string> selectedTargetVariables = from item in problemData.TargetVariables
241                                                    where problemData.TargetVariables.ItemChecked(item)
242                                                    select item.Value;
243      int dimension = selectedTargetVariables.Count();
244
245      if (conditionVariableName != null) {
246        rows = from row in rows
247               where !problemData.Dataset[conditionVariableName, row].IsAlmost(0.0)
248               select row;
249      }
250
251      IEnumerable<double[]> oneStepPredictions =
252        interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, selectedTargetVariables, rows, 1);
253      IEnumerable<double[]> originalValues = from row in rows
254                                             select (from targetVariable in selectedTargetVariables
255                                                     select problemData.Dataset[targetVariable, row]).ToArray();
256
257      alphas = new double[dimension];
258      betas = new double[dimension];
259      int[] cnt = new int[dimension];
260      List<OnlineMeanAndVarianceCalculator> estimatedVarianceEvaluators = new List<OnlineMeanAndVarianceCalculator>();
261      List<OnlineCovarianceEvaluator> covarianceEvaluators = new List<OnlineCovarianceEvaluator>();
262      List<OnlineMeanAndVarianceCalculator> originalMeanCalculators = new List<OnlineMeanAndVarianceCalculator>();
263      foreach (var selectedTargetVariable in selectedTargetVariables) {
264        estimatedVarianceEvaluators.Add(new OnlineMeanAndVarianceCalculator());
265        covarianceEvaluators.Add(new OnlineCovarianceEvaluator());
266        originalMeanCalculators.Add(new OnlineMeanAndVarianceCalculator());
267      }
268      var estimatedEnumerator = oneStepPredictions.GetEnumerator();
269      var originalEnumerator = originalValues.GetEnumerator();
270      while (estimatedEnumerator.MoveNext() & originalEnumerator.MoveNext()) {
271        double[] original = originalEnumerator.Current;
272        double[] estimated = estimatedEnumerator.Current;
273        for (int component = 0; component < dimension; component++) {
274          if (IsValidValue(original[component]) && IsValidValue(estimated[component])) {
275            cnt[component]++;
276            estimatedVarianceEvaluators[component].Add(estimated[component]);
277            covarianceEvaluators[component].Add(original[component], estimated[component]);
278            originalMeanCalculators[component].Add(original[component]);
279          }
280        }
281      }
282      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext())
283        throw new InvalidOperationException("Number of elements in estimated and original series doesn't match.");
284      for (int component = 0; component < dimension; component++) {
285        if (cnt[component] < 2) {
286          alphas[component] = 0;
287          betas[component] = 1;
288        } else {
289          if (estimatedVarianceEvaluators[component].Variance.IsAlmost(0.0))
290            betas[component] = 1;
291          else
292            betas[component] = covarianceEvaluators[component].Covariance / estimatedVarianceEvaluators[component].Variance;
293
294          alphas[component] = originalMeanCalculators[component].Mean - betas[component] * estimatedVarianceEvaluators[component].Mean;
295        }
296      }
297    }
298
299    private static bool IsValidValue(double d) {
300      return !double.IsInfinity(d) && !double.IsNaN(d) && d > -1.0E07 && d < 1.0E07;  // don't consider very large or very small values for scaling
301    }
302  }
303}
Note: See TracBrowser for help on using the repository browser.