Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2789_MathNetNumerics-Exploration/HeuristicLab.Algorithms.DataAnalysis.Experimental/GaussianProcessModelMKL.cs @ 16799

Last change on this file since 16799 was 14991, checked in by gkronber, 8 years ago

#2789 implemented an alternative GaussianProcessModel which uses Math.Numerics and Intel MKL

File size: 14.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Problems.DataAnalysis;
29using MathNet.Numerics.Data.Matlab;
30using MathNet.Numerics.LinearAlgebra;
31using MathNet.Numerics.LinearAlgebra.Double;
32using MathNet.Numerics.LinearAlgebra.Factorization;
33
34namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
35  // TODO: scale y
36  // TODO: remove dependence of scaling and export scaling parameters
37  // TODO: export / import all relevant data
38  [StorableClass]
39  [Item("GaussianProcessModelMKL", "Represents a Gaussian process posterior.")]
40  public sealed class GaussianProcessModelMKL : RegressionModel, IGaussianProcessModel {
41    public override IEnumerable<string> VariablesUsedForPrediction {
42      get { return allowedInputVariables; }
43    }
44
45    [Storable]
46    private double negativeLogLikelihood;
47    public double NegativeLogLikelihood {
48      get { return negativeLogLikelihood; }
49    }
50
51    [Storable]
52    private double negativeLooPredictiveProbability;
53    public double NegativeLooPredictiveProbability {
54      get { return negativeLooPredictiveProbability; }
55    }
56
57    [Storable]
58    private double[] hyperparameterGradients;
59    public double[] HyperparameterGradients {
60      get {
61        var copy = new double[hyperparameterGradients.Length];
62        Array.Copy(hyperparameterGradients, copy, copy.Length);
63        return copy;
64      }
65    }
66
67    [Storable]
68    private ICovarianceFunction covarianceFunction;
69    public ICovarianceFunction CovarianceFunction {
70      get { return covarianceFunction; }
71    }
72    [Storable]
73    private IMeanFunction meanFunction;
74    public IMeanFunction MeanFunction {
75      get { return meanFunction; }
76    }
77
78    [Storable]
79    private string[] allowedInputVariables;
80    public string[] AllowedInputVariables {
81      get { return allowedInputVariables; }
82    }
83
84    [Storable]
85    private Vector<double> alpha;
86    [Storable]
87    private double sqrSigmaNoise;
88    public double SigmaNoise {
89      get { return Math.Sqrt(sqrSigmaNoise); }
90    }
91
92    [Storable]
93    private double[] meanParameter;
94    [Storable]
95    private double[] covarianceParameter;
96
97    private Matrix<double> l;
98    private double[,] x; // scaled training dataset, used to be storable in previous versions (is calculated lazily now)
99
100
101    [Storable]
102    private IDataset trainingDataset; // it is better to store the original training dataset completely because this is more efficient in persistence
103    [Storable]
104    private int[] trainingRows;
105
106    [Storable]
107    private Scaling inputScaling;
108
109
110    [StorableConstructor]
111    private GaussianProcessModelMKL(bool deserializing) : base(deserializing) { }
112    private GaussianProcessModelMKL(GaussianProcessModelMKL original, Cloner cloner)
113      : base(original, cloner) {
114      this.meanFunction = cloner.Clone(original.meanFunction);
115      this.covarianceFunction = cloner.Clone(original.covarianceFunction);
116      if (original.inputScaling != null)
117        this.inputScaling = cloner.Clone(original.inputScaling);
118      this.trainingDataset = cloner.Clone(original.trainingDataset);
119      this.negativeLogLikelihood = original.negativeLogLikelihood;
120      this.negativeLooPredictiveProbability = original.negativeLooPredictiveProbability;
121      this.sqrSigmaNoise = original.sqrSigmaNoise;
122      if (original.meanParameter != null) {
123        this.meanParameter = (double[])original.meanParameter.Clone();
124      }
125      if (original.covarianceParameter != null) {
126        this.covarianceParameter = (double[])original.covarianceParameter.Clone();
127      }
128
129      // shallow copies of arrays because they cannot be modified
130      this.trainingRows = original.trainingRows;
131      this.allowedInputVariables = original.allowedInputVariables;
132      this.alpha = original.alpha;
133      this.l = original.l;
134      this.x = original.x;
135    }
136    public GaussianProcessModelMKL(IDataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,
137      IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction,
138      bool scaleInputs = true)
139      : base(targetVariable) {
140      // MathNet.Numerics.Control.UseNativeMKL();
141      // MathNet.Numerics.Control.UseSingleThread();
142      // this.Description += Control.LinearAlgebraProvider.ToString();
143      this.name = ItemName;
144      this.description = ItemDescription;
145      this.meanFunction = (IMeanFunction)meanFunction.Clone();
146      this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone();
147      this.allowedInputVariables = allowedInputVariables.ToArray();
148
149
150      int nVariables = this.allowedInputVariables.Length;
151      meanParameter = hyp
152        .Take(this.meanFunction.GetNumberOfParameters(nVariables))
153        .ToArray();
154
155      covarianceParameter = hyp.Skip(this.meanFunction.GetNumberOfParameters(nVariables))
156                                             .Take(this.covarianceFunction.GetNumberOfParameters(nVariables))
157                                             .ToArray();
158      sqrSigmaNoise = Math.Exp(2.0 * hyp.Last());
159      CalculateModel(ds, rows, scaleInputs);
160    }
161
162    private void CalculateModel(IDataset ds, IEnumerable<int> rows, bool scaleInputs = true) {
163      this.trainingDataset = (IDataset)ds.Clone();
164      this.trainingRows = rows.ToArray();
165      this.inputScaling = scaleInputs ? new Scaling(ds, allowedInputVariables, rows) : null;
166
167      x = GetData(ds, this.allowedInputVariables, this.trainingRows, this.inputScaling);
168
169      IEnumerable<double> y;
170      y = ds.GetDoubleValues(TargetVariable, rows);
171
172      int n = x.GetLength(0);
173
174      var columns = Enumerable.Range(0, x.GetLength(1)).ToArray();
175      // calculate cholesky decomposed (lower triangular) covariance matrix
176      var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
177      var chol = CalculateL(x, cov, sqrSigmaNoise);
178      this.l = chol.Factor;
179
180      // calculate mean
181      var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);
182      double[] m = Enumerable.Range(0, x.GetLength(0))
183        .Select(r => mean.Mean(x, r))
184        .ToArray();
185
186
187      // solve for alpha
188      Vector<double> ym = DenseVector.OfEnumerable(y.Zip(m, (a, b) => a - b));
189
190
191      alpha = chol.Solve(ym);
192      alpha = alpha * 1.0 / sqrSigmaNoise;
193
194      // calculate sum of diagonal elements for likelihood
195      double diagSum = chol.DeterminantLn;
196      negativeLogLikelihood = 0.5 * ym.DotProduct(alpha) + 0.5 * diagSum + (n / 2.0) * Math.Log(2.0 * Math.PI * sqrSigmaNoise);
197
198      // derivatives
199      int nAllowedVariables = x.GetLength(1);
200
201      Matrix<double> lCopy;
202
203      lCopy = chol.Solve(DenseMatrix.CreateIdentity(n));
204
205      // LOOCV log predictive probability (GPML page 116 and 117)
206      var sumLoo = 0.0;
207      var ki = new DenseVector(n);
208      for (int i = 0; i < n; i++) {
209        for (int j = 0; j < n; j++) ki[j] = cov.Covariance(x, i, j);
210        var yi = ki.DotProduct(alpha);
211        var yi_loo = yi - alpha[i] / lCopy[i, i] / sqrSigmaNoise;
212        var s2_loo = sqrSigmaNoise / lCopy[i, i];
213        var err = ym[i] - yi_loo;
214        var nll_loo = Math.Log(s2_loo) + err * err / s2_loo;
215        sumLoo += nll_loo;
216      }
217      sumLoo += n * Math.Log(2 * Math.PI);
218      negativeLooPredictiveProbability = 0.5 * sumLoo;
219
220      for (int i = 0; i < n; i++) {
221        for (int j = 0; j <= i; j++)
222          lCopy[i, j] = lCopy[i, j] / sqrSigmaNoise - alpha[i] * alpha[j];
223      }
224
225      double noiseGradient = sqrSigmaNoise * Enumerable.Range(0, n).Select(i => lCopy[i, i]).Sum();
226
227      double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)];
228      for (int k = 0; k < meanGradients.Length; k++) {
229        var meanGrad = new DenseVector(alpha.Count);
230        for (int g = 0; g < meanGrad.Count; g++)
231          meanGrad[g] = mean.Gradient(x, g, k);
232
233        meanGradients[k] = -meanGrad.DotProduct(alpha);
234      }
235
236      double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)];
237      if (covGradients.Length > 0) {
238        for (int i = 0; i < n; i++) {
239          for (int j = 0; j < i; j++) {
240            var g = cov.CovarianceGradient(x, i, j);
241            for (int k = 0; k < covGradients.Length; k++) {
242              covGradients[k] += lCopy[i, j] * g[k];
243            }
244          }
245
246          var gDiag = cov.CovarianceGradient(x, i, i);
247          for (int k = 0; k < covGradients.Length; k++) {
248            // diag
249            covGradients[k] += 0.5 * lCopy[i, i] * gDiag[k];
250          }
251        }
252      }
253
254      hyperparameterGradients =
255        meanGradients
256        .Concat(covGradients)
257        .Concat(new double[] { noiseGradient }).ToArray();
258
259    }
260
261    private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) {
262      if (scaling != null) {
263        // BackwardsCompatibility3.3
264        #region Backwards compatible code, remove with 3.4
265        // TODO: completely remove Scaling class
266        List<string> variablesList = allowedInputs.ToList();
267        List<int> rowsList = rows.ToList();
268
269        double[,] matrix = new double[rowsList.Count, variablesList.Count];
270
271        int col = 0;
272        foreach (string column in variablesList) {
273          var values = scaling.GetScaledValues(ds, column, rowsList);
274          int row = 0;
275          foreach (var value in values) {
276            matrix[row, col] = value;
277            row++;
278          }
279          col++;
280        }
281        return matrix;
282        #endregion
283      } else {
284        return ds.ToArray(allowedInputs, rows);
285      }
286    }
287
288    private static Cholesky<double> CalculateL(double[,] x, ParameterizedCovarianceFunction cov, double sqrSigmaNoise) {
289      int n = x.GetLength(0);
290      var l = new DenseMatrix(n, n);
291
292      // calculate covariances
293      for (int i = 0; i < n; i++) {
294        for (int j = i; j < n; j++) {
295          l[j, i] = l[i, j] = cov.Covariance(x, i, j) / sqrSigmaNoise;
296          if (j == i) l[j, i] += 1.0;
297        }
298      }
299
300      // cholesky decomposition
301      return l.Cholesky();
302    }
303
304
305    public override IDeepCloneable Clone(Cloner cloner) {
306      return new GaussianProcessModelMKL(this, cloner);
307    }
308
309    // is called by the solution creator to set all parameter values of the covariance and mean function
310    // to the optimized values (necessary to make the values visible in the GUI)
311    public void FixParameters() {
312      covarianceFunction.SetParameter(covarianceParameter);
313      meanFunction.SetParameter(meanParameter);
314      covarianceParameter = new double[0];
315      meanParameter = new double[0];
316    }
317
318    #region IRegressionModel Members
319    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
320      return GetEstimatedValuesHelper(dataset, rows);
321    }
322    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
323      return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData));
324    }
325    #endregion
326
327
328    private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) {
329      if (x == null) {
330        x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);
331      }
332      int n = x.GetLength(0);
333
334      double[,] newX = GetData(dataset, allowedInputVariables, rows, inputScaling);
335      int newN = newX.GetLength(0);
336
337      var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();
338      var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);
339      var ms = Enumerable.Range(0, newX.GetLength(0))
340      .Select(r => mean.Mean(newX, r))
341      .ToArray();
342      var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
343      for (int i = 0; i < newN; i++) {
344        var Ks = new DenseVector(n);
345        for (int j = 0; j < n; j++) {
346          Ks[j] = cov.CrossCovariance(x, newX, j, i);
347        }
348        yield return ms[i] + Ks.DotProduct(alpha);
349      }
350    }
351
352    public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
353      if (x == null) {
354        x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);
355      }
356      int n = x.GetLength(0);
357
358      var newX = GetData(dataset, allowedInputVariables, rows, inputScaling);
359      int newN = newX.GetLength(0);
360      if (newN == 0) return Enumerable.Empty<double>();
361
362      var kss = new DenseVector(newN);
363      Matrix<double> sWKs = new DenseMatrix(n, newN);
364      var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();
365      var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
366
367      if (l == null) {
368        l = CalculateL(x, cov, sqrSigmaNoise).Factor;
369      }
370
371      // for stddev
372      for (int i = 0; i < newN; i++)
373        kss[i] = cov.Covariance(newX, i, i);
374
375      for (int i = 0; i < newN; i++) {
376        for (int j = 0; j < n; j++) {
377          sWKs[j, i] = cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise);
378        }
379      }
380
381      sWKs = l.Solve(sWKs);
382
383      for (int i = 0; i < newN; i++) {
384        var col = sWKs.Column(i);
385        var sumV = col.DotProduct(col);
386        kss[i] += sqrSigmaNoise; // kss is V(f), add noise variance of predictive distibution to get V(y)
387        kss[i] -= sumV;
388        if (kss[i] < 0) kss[i] = 0;
389      }
390      return kss;
391    }
392
393
394    public void Export(string fileName) {
395      MatlabWriter.Write<double>(fileName,
396        new Matrix<double>[] { DenseMatrix.OfArray(x), l, alpha.ToRowMatrix()},
397        new string[] { "x", "l", "alpha", }
398        );
399    }
400  }
401}
Note: See TracBrowser for help on using the repository browser.