Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs @ 15490

Last change on this file since 15490 was 15165, checked in by gkronber, 8 years ago

#2782: fixed calculation of log pseudo-likelihood by adding the noise term to the covariance function

File size: 16.5 KB
RevLine 
[8323]1#region License Information
2/* HeuristicLab
[14185]3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[8323]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Problems.DataAnalysis;
29
[8371]30namespace HeuristicLab.Algorithms.DataAnalysis {
[8323]31  /// <summary>
32  /// Represents a Gaussian process model.
33  /// </summary>
34  [StorableClass]
35  [Item("GaussianProcessModel", "Represents a Gaussian process posterior.")]
[13941]36  public sealed class GaussianProcessModel : RegressionModel, IGaussianProcessModel {
37    public override IEnumerable<string> VariablesUsedForPrediction {
[13922]38      get { return allowedInputVariables; }
39    }
[13921]40
[8323]41    [Storable]
42    private double negativeLogLikelihood;
43    public double NegativeLogLikelihood {
44      get { return negativeLogLikelihood; }
45    }
46
47    [Storable]
[15160]48    private double loocvNegLogPseudoLikelihood;
49    public double LooCvNegativeLogPseudoLikelihood {
50      get { return loocvNegLogPseudoLikelihood; }
[14899]51    }
52
53    [Storable]
[8484]54    private double[] hyperparameterGradients;
55    public double[] HyperparameterGradients {
56      get {
57        var copy = new double[hyperparameterGradients.Length];
58        Array.Copy(hyperparameterGradients, copy, copy.Length);
59        return copy;
60      }
61    }
62
63    [Storable]
[8323]64    private ICovarianceFunction covarianceFunction;
65    public ICovarianceFunction CovarianceFunction {
66      get { return covarianceFunction; }
67    }
68    [Storable]
69    private IMeanFunction meanFunction;
70    public IMeanFunction MeanFunction {
71      get { return meanFunction; }
72    }
[13941]73
[8323]74    [Storable]
75    private string[] allowedInputVariables;
76    public string[] AllowedInputVariables {
77      get { return allowedInputVariables; }
78    }
79
80    [Storable]
81    private double[] alpha;
82    [Storable]
83    private double sqrSigmaNoise;
[8582]84    public double SigmaNoise {
85      get { return Math.Sqrt(sqrSigmaNoise); }
86    }
[8323]87
88    [Storable]
[8982]89    private double[] meanParameter;
90    [Storable]
91    private double[] covarianceParameter;
92
[12819]93    private double[,] l; // used to be storable in previous versions (is calculated lazily now)
94    private double[,] x; // scaled training dataset, used to be storable in previous versions (is calculated lazily now)
95
96    // BackwardsCompatibility3.4
97    #region Backwards compatible code, remove with 3.5
98    [Storable(Name = "l")] // restore if available but don't store anymore
99    private double[,] l_storable {
100      set { this.l = value; }
101      get {
102        if (trainingDataset == null) return l; // this model has been created with an old version
103        else return null; // if the training dataset is available l should not be serialized
104      }
105    }
106    [Storable(Name = "x")] // restore if available but don't store anymore
107    private double[,] x_storable {
108      set { this.x = value; }
109      get {
110        if (trainingDataset == null) return x; // this model has been created with an old version
111        else return null; // if the training dataset is available x should not be serialized
112      }
113    }
114    #endregion
115
116
[8982]117    [Storable]
[12819]118    private IDataset trainingDataset; // it is better to store the original training dataset completely because this is more efficient in persistence
119    [Storable]
120    private int[] trainingRows;
[8323]121
122    [Storable]
[8463]123    private Scaling inputScaling;
[8323]124
125
126    [StorableConstructor]
127    private GaussianProcessModel(bool deserializing) : base(deserializing) { }
128    private GaussianProcessModel(GaussianProcessModel original, Cloner cloner)
129      : base(original, cloner) {
130      this.meanFunction = cloner.Clone(original.meanFunction);
131      this.covarianceFunction = cloner.Clone(original.covarianceFunction);
[13118]132      if (original.inputScaling != null)
133        this.inputScaling = cloner.Clone(original.inputScaling);
[12819]134      this.trainingDataset = cloner.Clone(original.trainingDataset);
[8323]135      this.negativeLogLikelihood = original.negativeLogLikelihood;
[15160]136      this.loocvNegLogPseudoLikelihood = original.loocvNegLogPseudoLikelihood;
[8416]137      this.sqrSigmaNoise = original.sqrSigmaNoise;
[8982]138      if (original.meanParameter != null) {
139        this.meanParameter = (double[])original.meanParameter.Clone();
140      }
141      if (original.covarianceParameter != null) {
142        this.covarianceParameter = (double[])original.covarianceParameter.Clone();
143      }
[8416]144
145      // shallow copies of arrays because they cannot be modified
[12819]146      this.trainingRows = original.trainingRows;
[8323]147      this.allowedInputVariables = original.allowedInputVariables;
148      this.alpha = original.alpha;
149      this.l = original.l;
150      this.x = original.x;
151    }
[12509]152    public GaussianProcessModel(IDataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,
[13118]153      IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction,
154      bool scaleInputs = true)
[13941]155      : base(targetVariable) {
[8323]156      this.name = ItemName;
157      this.description = ItemDescription;
[8416]158      this.meanFunction = (IMeanFunction)meanFunction.Clone();
159      this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone();
[8323]160      this.allowedInputVariables = allowedInputVariables.ToArray();
161
162
[8416]163      int nVariables = this.allowedInputVariables.Length;
[8982]164      meanParameter = hyp
[8416]165        .Take(this.meanFunction.GetNumberOfParameters(nVariables))
[8982]166        .ToArray();
167
168      covarianceParameter = hyp.Skip(this.meanFunction.GetNumberOfParameters(nVariables))
169                                             .Take(this.covarianceFunction.GetNumberOfParameters(nVariables))
170                                             .ToArray();
[8473]171      sqrSigmaNoise = Math.Exp(2.0 * hyp.Last());
[13160]172      try {
173        CalculateModel(ds, rows, scaleInputs);
[14843]174      } catch (alglib.alglibexception ae) {
[13160]175        // wrap exception so that calling code doesn't have to know about alglib implementation
176        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
177      }
[8323]178    }
179
[13118]180    private void CalculateModel(IDataset ds, IEnumerable<int> rows, bool scaleInputs = true) {
[12819]181      this.trainingDataset = (IDataset)ds.Clone();
182      this.trainingRows = rows.ToArray();
[13118]183      this.inputScaling = scaleInputs ? new Scaling(ds, allowedInputVariables, rows) : null;
[8323]184
[13118]185      x = GetData(ds, this.allowedInputVariables, this.trainingRows, this.inputScaling);
186
187      IEnumerable<double> y;
[13941]188      y = ds.GetDoubleValues(TargetVariable, rows);
[13118]189
[8323]190      int n = x.GetLength(0);
191
[13721]192      var columns = Enumerable.Range(0, x.GetLength(1)).ToArray();
[12819]193      // calculate cholesky decomposed (lower triangular) covariance matrix
[13721]194      var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
[12819]195      this.l = CalculateL(x, cov, sqrSigmaNoise);
196
197      // calculate mean
[13721]198      var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);
[8982]199      double[] m = Enumerable.Range(0, x.GetLength(0))
200        .Select(r => mean.Mean(x, r))
201        .ToArray();
202
[8323]203      // calculate sum of diagonal elements for likelihood
204      double diagSum = Enumerable.Range(0, n).Select(i => Math.Log(l[i, i])).Sum();
205
206      // solve for alpha
207      double[] ym = y.Zip(m, (a, b) => a - b).ToArray();
208
[12819]209      int info;
210      alglib.densesolverreport denseSolveRep;
211
[8323]212      alglib.spdmatrixcholeskysolve(l, n, false, ym, out info, out denseSolveRep, out alpha);
213      for (int i = 0; i < alpha.Length; i++)
214        alpha[i] = alpha[i] / sqrSigmaNoise;
215      negativeLogLikelihood = 0.5 * Util.ScalarProd(ym, alpha) + diagSum + (n / 2.0) * Math.Log(2.0 * Math.PI * sqrSigmaNoise);
216
217      // derivatives
218      int nAllowedVariables = x.GetLength(1);
219
[8463]220      alglib.matinvreport matInvRep;
[8475]221      double[,] lCopy = new double[l.GetLength(0), l.GetLength(1)];
222      Array.Copy(l, lCopy, lCopy.Length);
[8323]223
[8475]224      alglib.spdmatrixcholeskyinverse(ref lCopy, n, false, out info, out matInvRep);
[8463]225      if (info != 1) throw new ArgumentException("Can't invert matrix to calculate gradients.");
[14899]226
[15160]227      // LOOCV log pseudo-likelihood (or log predictive probability) (GPML page 116 and 117)
[14899]228      var sumLoo = 0.0;
229      var ki = new double[n];
[8323]230      for (int i = 0; i < n; i++) {
[14899]231        for (int j = 0; j < n; j++) ki[j] = cov.Covariance(x, i, j);
[15165]232        ki[i] += sqrSigmaNoise;
233
[14899]234        var yi = Util.ScalarProd(ki, alpha);
[15163]235        var yi_loo = yi - alpha[i] / (lCopy[i, i] / sqrSigmaNoise);
[15165]236        var s2_loo = 1.0 / (lCopy[i, i] / sqrSigmaNoise);
[14899]237        var err = ym[i] - yi_loo;
[15165]238        var nll_loo = 0.5 * Math.Log(2 * Math.PI * s2_loo) + 0.5 * err * err / s2_loo;
[14899]239        sumLoo += nll_loo;
240      }
[15165]241      loocvNegLogPseudoLikelihood = sumLoo;
[14899]242
243      for (int i = 0; i < n; i++) {
[8463]244        for (int j = 0; j <= i; j++)
[8475]245          lCopy[i, j] = lCopy[i, j] / sqrSigmaNoise - alpha[i] * alpha[j];
[8323]246      }
247
[8475]248      double noiseGradient = sqrSigmaNoise * Enumerable.Range(0, n).Select(i => lCopy[i, i]).Sum();
[8323]249
250      double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)];
[8982]251      for (int k = 0; k < meanGradients.Length; k++) {
[13721]252        var meanGrad = new double[alpha.Length];
253        for (int g = 0; g < meanGrad.Length; g++)
254          meanGrad[g] = mean.Gradient(x, g, k);
[8982]255        meanGradients[k] = -Util.ScalarProd(meanGrad, alpha);
[8323]256      }
257
258      double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)];
[8366]259      if (covGradients.Length > 0) {
260        for (int i = 0; i < n; i++) {
[8484]261          for (int j = 0; j < i; j++) {
[13784]262            var g = cov.CovarianceGradient(x, i, j);
[8484]263            for (int k = 0; k < covGradients.Length; k++) {
264              covGradients[k] += lCopy[i, j] * g[k];
[8366]265            }
[8323]266          }
[8484]267
[13784]268          var gDiag = cov.CovarianceGradient(x, i, i);
[8484]269          for (int k = 0; k < covGradients.Length; k++) {
270            // diag
271            covGradients[k] += 0.5 * lCopy[i, i] * gDiag[k];
272          }
[8323]273        }
274      }
275
[8484]276      hyperparameterGradients =
[8473]277        meanGradients
278        .Concat(covGradients)
279        .Concat(new double[] { noiseGradient }).ToArray();
[8484]280
[8323]281    }
282
[13118]283    private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) {
284      if (scaling != null) {
[14854]285        // BackwardsCompatibility3.3
286        #region Backwards compatible code, remove with 3.4
[14843]287        // TODO: completely remove Scaling class
[14854]288        List<string> variablesList = allowedInputs.ToList();
289        List<int> rowsList = rows.ToList();
[14843]290
[14854]291        double[,] matrix = new double[rowsList.Count, variablesList.Count];
292
293        int col = 0;
294        foreach (string column in variablesList) {
295          var values = scaling.GetScaledValues(ds, column, rowsList);
296          int row = 0;
297          foreach (var value in values) {
298            matrix[row, col] = value;
299            row++;
300          }
301          col++;
[14843]302        }
[14854]303        return matrix;
304        #endregion
[13118]305      } else {
[14843]306        return ds.ToArray(allowedInputs, rows);
[13118]307      }
[12819]308    }
[8323]309
[12819]310    private static double[,] CalculateL(double[,] x, ParameterizedCovarianceFunction cov, double sqrSigmaNoise) {
311      int n = x.GetLength(0);
312      var l = new double[n, n];
313
314      // calculate covariances
315      for (int i = 0; i < n; i++) {
316        for (int j = i; j < n; j++) {
317          l[j, i] = cov.Covariance(x, i, j) / sqrSigmaNoise;
318          if (j == i) l[j, i] += 1.0;
319        }
320      }
321
322      // cholesky decomposition
323      var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);
324      if (!res) throw new ArgumentException("Matrix is not positive semidefinite");
325      return l;
326    }
327
328
[8323]329    public override IDeepCloneable Clone(Cloner cloner) {
330      return new GaussianProcessModel(this, cloner);
331    }
332
[8982]333    // is called by the solution creator to set all parameter values of the covariance and mean function
334    // to the optimized values (necessary to make the values visible in the GUI)
335    public void FixParameters() {
336      covarianceFunction.SetParameter(covarianceParameter);
337      meanFunction.SetParameter(meanParameter);
338      covarianceParameter = new double[0];
339      meanParameter = new double[0];
340    }
341
[8323]342    #region IRegressionModel Members
[13941]343    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
[8323]344      return GetEstimatedValuesHelper(dataset, rows);
345    }
[13941]346    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
[8528]347      return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData));
[8323]348    }
349    #endregion
350
[8623]351
[12509]352    private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) {
[13160]353      try {
354        if (x == null) {
355          x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);
356        }
357        int n = x.GetLength(0);
[12819]358
[13160]359        double[,] newX = GetData(dataset, allowedInputVariables, rows, inputScaling);
360        int newN = newX.GetLength(0);
[12819]361
[13721]362        var Ks = new double[newN][];
363        var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();
364        var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);
[13160]365        var ms = Enumerable.Range(0, newX.GetLength(0))
366        .Select(r => mean.Mean(newX, r))
367        .ToArray();
[13721]368        var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
[13160]369        for (int i = 0; i < newN; i++) {
[13721]370          Ks[i] = new double[n];
[13160]371          for (int j = 0; j < n; j++) {
[13721]372            Ks[i][j] = cov.CrossCovariance(x, newX, j, i);
[13160]373          }
[8323]374        }
[13160]375
376        return Enumerable.Range(0, newN)
[13721]377          .Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha));
[14843]378      } catch (alglib.alglibexception ae) {
[13160]379        // wrap exception so that calling code doesn't have to know about alglib implementation
380        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
[8323]381      }
382    }
[8473]383
[14095]384    public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
[13160]385      try {
386        if (x == null) {
387          x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);
388        }
389        int n = x.GetLength(0);
[12819]390
[13160]391        var newX = GetData(dataset, allowedInputVariables, rows, inputScaling);
392        int newN = newX.GetLength(0);
[8473]393
[13160]394        var kss = new double[newN];
395        double[,] sWKs = new double[n, newN];
[13721]396        var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();
397        var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
[8473]398
[13160]399        if (l == null) {
400          l = CalculateL(x, cov, sqrSigmaNoise);
401        }
[12819]402
[13160]403        // for stddev
404        for (int i = 0; i < newN; i++)
405          kss[i] = cov.Covariance(newX, i, i);
[8473]406
[13160]407        for (int i = 0; i < newN; i++) {
408          for (int j = 0; j < n; j++) {
409            sWKs[j, i] = cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise);
410          }
[8473]411        }
412
[13160]413        // for stddev
414        alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0);
[8473]415
[13160]416        for (int i = 0; i < newN; i++) {
[13721]417          var col = Util.GetCol(sWKs, i).ToArray();
418          var sumV = Util.ScalarProd(col, col);
[13160]419          kss[i] += sqrSigmaNoise; // kss is V(f), add noise variance of predictive distibution to get V(y)
420          kss[i] -= sumV;
421          if (kss[i] < 0) kss[i] = 0;
422        }
423        return kss;
[14843]424      } catch (alglib.alglibexception ae) {
[13160]425        // wrap exception so that calling code doesn't have to know about alglib implementation
426        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
[8473]427      }
428    }
[13921]429
[8323]430  }
431}
Note: See TracBrowser for help on using the repository browser.