Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs @ 14888

Last change on this file since 14888 was 14854, checked in by gkronber, 8 years ago

#2697: introduced original code for scaling from AlglibUtil into GaussianProcessModel to guarantee backwards compatibility regarding unit test results

File size: 15.6 KB
RevLine 
[8323]1#region License Information
2/* HeuristicLab
[14185]3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[8323]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Problems.DataAnalysis;
29
[8371]30namespace HeuristicLab.Algorithms.DataAnalysis {
[8323]31  /// <summary>
32  /// Represents a Gaussian process model.
33  /// </summary>
34  [StorableClass]
35  [Item("GaussianProcessModel", "Represents a Gaussian process posterior.")]
[13941]36  public sealed class GaussianProcessModel : RegressionModel, IGaussianProcessModel {
37    public override IEnumerable<string> VariablesUsedForPrediction {
[13922]38      get { return allowedInputVariables; }
39    }
[13921]40
[8323]41    [Storable]
42    private double negativeLogLikelihood;
43    public double NegativeLogLikelihood {
44      get { return negativeLogLikelihood; }
45    }
46
47    [Storable]
[8484]48    private double[] hyperparameterGradients;
49    public double[] HyperparameterGradients {
50      get {
51        var copy = new double[hyperparameterGradients.Length];
52        Array.Copy(hyperparameterGradients, copy, copy.Length);
53        return copy;
54      }
55    }
56
57    [Storable]
[8323]58    private ICovarianceFunction covarianceFunction;
59    public ICovarianceFunction CovarianceFunction {
60      get { return covarianceFunction; }
61    }
62    [Storable]
63    private IMeanFunction meanFunction;
64    public IMeanFunction MeanFunction {
65      get { return meanFunction; }
66    }
[13941]67
[8323]68    [Storable]
69    private string[] allowedInputVariables;
70    public string[] AllowedInputVariables {
71      get { return allowedInputVariables; }
72    }
73
74    [Storable]
75    private double[] alpha;
76    [Storable]
77    private double sqrSigmaNoise;
[8582]78    public double SigmaNoise {
79      get { return Math.Sqrt(sqrSigmaNoise); }
80    }
[8323]81
82    [Storable]
[8982]83    private double[] meanParameter;
84    [Storable]
85    private double[] covarianceParameter;
86
[12819]87    private double[,] l; // used to be storable in previous versions (is calculated lazily now)
88    private double[,] x; // scaled training dataset, used to be storable in previous versions (is calculated lazily now)
89
90    // BackwardsCompatibility3.4
91    #region Backwards compatible code, remove with 3.5
92    [Storable(Name = "l")] // restore if available but don't store anymore
93    private double[,] l_storable {
94      set { this.l = value; }
95      get {
96        if (trainingDataset == null) return l; // this model has been created with an old version
97        else return null; // if the training dataset is available l should not be serialized
98      }
99    }
100    [Storable(Name = "x")] // restore if available but don't store anymore
101    private double[,] x_storable {
102      set { this.x = value; }
103      get {
104        if (trainingDataset == null) return x; // this model has been created with an old version
105        else return null; // if the training dataset is available x should not be serialized
106      }
107    }
108    #endregion
109
110
[8982]111    [Storable]
[12819]112    private IDataset trainingDataset; // it is better to store the original training dataset completely because this is more efficient in persistence
113    [Storable]
114    private int[] trainingRows;
[8323]115
116    [Storable]
[8463]117    private Scaling inputScaling;
[8323]118
119
120    [StorableConstructor]
121    private GaussianProcessModel(bool deserializing) : base(deserializing) { }
122    private GaussianProcessModel(GaussianProcessModel original, Cloner cloner)
123      : base(original, cloner) {
124      this.meanFunction = cloner.Clone(original.meanFunction);
125      this.covarianceFunction = cloner.Clone(original.covarianceFunction);
[13118]126      if (original.inputScaling != null)
127        this.inputScaling = cloner.Clone(original.inputScaling);
[12819]128      this.trainingDataset = cloner.Clone(original.trainingDataset);
[8323]129      this.negativeLogLikelihood = original.negativeLogLikelihood;
[8416]130      this.sqrSigmaNoise = original.sqrSigmaNoise;
[8982]131      if (original.meanParameter != null) {
132        this.meanParameter = (double[])original.meanParameter.Clone();
133      }
134      if (original.covarianceParameter != null) {
135        this.covarianceParameter = (double[])original.covarianceParameter.Clone();
136      }
[8416]137
138      // shallow copies of arrays because they cannot be modified
[12819]139      this.trainingRows = original.trainingRows;
[8323]140      this.allowedInputVariables = original.allowedInputVariables;
141      this.alpha = original.alpha;
142      this.l = original.l;
143      this.x = original.x;
144    }
[12509]145    public GaussianProcessModel(IDataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,
[13118]146      IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction,
147      bool scaleInputs = true)
[13941]148      : base(targetVariable) {
[8323]149      this.name = ItemName;
150      this.description = ItemDescription;
[8416]151      this.meanFunction = (IMeanFunction)meanFunction.Clone();
152      this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone();
[8323]153      this.allowedInputVariables = allowedInputVariables.ToArray();
154
155
[8416]156      int nVariables = this.allowedInputVariables.Length;
[8982]157      meanParameter = hyp
[8416]158        .Take(this.meanFunction.GetNumberOfParameters(nVariables))
[8982]159        .ToArray();
160
161      covarianceParameter = hyp.Skip(this.meanFunction.GetNumberOfParameters(nVariables))
162                                             .Take(this.covarianceFunction.GetNumberOfParameters(nVariables))
163                                             .ToArray();
[8473]164      sqrSigmaNoise = Math.Exp(2.0 * hyp.Last());
[13160]165      try {
166        CalculateModel(ds, rows, scaleInputs);
[14843]167      } catch (alglib.alglibexception ae) {
[13160]168        // wrap exception so that calling code doesn't have to know about alglib implementation
169        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
170      }
[8323]171    }
172
[13118]173    private void CalculateModel(IDataset ds, IEnumerable<int> rows, bool scaleInputs = true) {
[12819]174      this.trainingDataset = (IDataset)ds.Clone();
175      this.trainingRows = rows.ToArray();
[13118]176      this.inputScaling = scaleInputs ? new Scaling(ds, allowedInputVariables, rows) : null;
[8323]177
[13118]178      x = GetData(ds, this.allowedInputVariables, this.trainingRows, this.inputScaling);
179
180      IEnumerable<double> y;
[13941]181      y = ds.GetDoubleValues(TargetVariable, rows);
[13118]182
[8323]183      int n = x.GetLength(0);
184
[13721]185      var columns = Enumerable.Range(0, x.GetLength(1)).ToArray();
[12819]186      // calculate cholesky decomposed (lower triangular) covariance matrix
[13721]187      var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
[12819]188      this.l = CalculateL(x, cov, sqrSigmaNoise);
189
190      // calculate mean
[13721]191      var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);
[8982]192      double[] m = Enumerable.Range(0, x.GetLength(0))
193        .Select(r => mean.Mean(x, r))
194        .ToArray();
195
[8323]196      // calculate sum of diagonal elements for likelihood
197      double diagSum = Enumerable.Range(0, n).Select(i => Math.Log(l[i, i])).Sum();
198
199      // solve for alpha
200      double[] ym = y.Zip(m, (a, b) => a - b).ToArray();
201
[12819]202      int info;
203      alglib.densesolverreport denseSolveRep;
204
[8323]205      alglib.spdmatrixcholeskysolve(l, n, false, ym, out info, out denseSolveRep, out alpha);
206      for (int i = 0; i < alpha.Length; i++)
207        alpha[i] = alpha[i] / sqrSigmaNoise;
208      negativeLogLikelihood = 0.5 * Util.ScalarProd(ym, alpha) + diagSum + (n / 2.0) * Math.Log(2.0 * Math.PI * sqrSigmaNoise);
209
210      // derivatives
211      int nAllowedVariables = x.GetLength(1);
212
[8463]213      alglib.matinvreport matInvRep;
[8475]214      double[,] lCopy = new double[l.GetLength(0), l.GetLength(1)];
215      Array.Copy(l, lCopy, lCopy.Length);
[8323]216
[8475]217      alglib.spdmatrixcholeskyinverse(ref lCopy, n, false, out info, out matInvRep);
[8463]218      if (info != 1) throw new ArgumentException("Can't invert matrix to calculate gradients.");
[8323]219      for (int i = 0; i < n; i++) {
[8463]220        for (int j = 0; j <= i; j++)
[8475]221          lCopy[i, j] = lCopy[i, j] / sqrSigmaNoise - alpha[i] * alpha[j];
[8323]222      }
223
[8475]224      double noiseGradient = sqrSigmaNoise * Enumerable.Range(0, n).Select(i => lCopy[i, i]).Sum();
[8323]225
226      double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)];
[8982]227      for (int k = 0; k < meanGradients.Length; k++) {
[13721]228        var meanGrad = new double[alpha.Length];
229        for (int g = 0; g < meanGrad.Length; g++)
230          meanGrad[g] = mean.Gradient(x, g, k);
[8982]231        meanGradients[k] = -Util.ScalarProd(meanGrad, alpha);
[8323]232      }
233
234      double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)];
[8366]235      if (covGradients.Length > 0) {
236        for (int i = 0; i < n; i++) {
[8484]237          for (int j = 0; j < i; j++) {
[13784]238            var g = cov.CovarianceGradient(x, i, j);
[8484]239            for (int k = 0; k < covGradients.Length; k++) {
240              covGradients[k] += lCopy[i, j] * g[k];
[8366]241            }
[8323]242          }
[8484]243
[13784]244          var gDiag = cov.CovarianceGradient(x, i, i);
[8484]245          for (int k = 0; k < covGradients.Length; k++) {
246            // diag
247            covGradients[k] += 0.5 * lCopy[i, i] * gDiag[k];
248          }
[8323]249        }
250      }
251
[8484]252      hyperparameterGradients =
[8473]253        meanGradients
254        .Concat(covGradients)
255        .Concat(new double[] { noiseGradient }).ToArray();
[8484]256
[8323]257    }
258
[13118]259    private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) {
260      if (scaling != null) {
[14854]261        // BackwardsCompatibility3.3
262        #region Backwards compatible code, remove with 3.4
[14843]263        // TODO: completely remove Scaling class
[14854]264        List<string> variablesList = allowedInputs.ToList();
265        List<int> rowsList = rows.ToList();
[14843]266
[14854]267        double[,] matrix = new double[rowsList.Count, variablesList.Count];
268
269        int col = 0;
270        foreach (string column in variablesList) {
271          var values = scaling.GetScaledValues(ds, column, rowsList);
272          int row = 0;
273          foreach (var value in values) {
274            matrix[row, col] = value;
275            row++;
276          }
277          col++;
[14843]278        }
[14854]279        return matrix;
280        #endregion
[13118]281      } else {
[14843]282        return ds.ToArray(allowedInputs, rows);
[13118]283      }
[12819]284    }
[8323]285
[12819]286    private static double[,] CalculateL(double[,] x, ParameterizedCovarianceFunction cov, double sqrSigmaNoise) {
287      int n = x.GetLength(0);
288      var l = new double[n, n];
289
290      // calculate covariances
291      for (int i = 0; i < n; i++) {
292        for (int j = i; j < n; j++) {
293          l[j, i] = cov.Covariance(x, i, j) / sqrSigmaNoise;
294          if (j == i) l[j, i] += 1.0;
295        }
296      }
297
298      // cholesky decomposition
299      var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);
300      if (!res) throw new ArgumentException("Matrix is not positive semidefinite");
301      return l;
302    }
303
304
[8323]305    public override IDeepCloneable Clone(Cloner cloner) {
306      return new GaussianProcessModel(this, cloner);
307    }
308
[8982]309    // is called by the solution creator to set all parameter values of the covariance and mean function
310    // to the optimized values (necessary to make the values visible in the GUI)
311    public void FixParameters() {
312      covarianceFunction.SetParameter(covarianceParameter);
313      meanFunction.SetParameter(meanParameter);
314      covarianceParameter = new double[0];
315      meanParameter = new double[0];
316    }
317
[8323]318    #region IRegressionModel Members
[13941]319    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
[8323]320      return GetEstimatedValuesHelper(dataset, rows);
321    }
[13941]322    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
[8528]323      return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData));
[8323]324    }
325    #endregion
326
[8623]327
[12509]328    private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) {
[13160]329      try {
330        if (x == null) {
331          x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);
332        }
333        int n = x.GetLength(0);
[12819]334
[13160]335        double[,] newX = GetData(dataset, allowedInputVariables, rows, inputScaling);
336        int newN = newX.GetLength(0);
[12819]337
[13721]338        var Ks = new double[newN][];
339        var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();
340        var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);
[13160]341        var ms = Enumerable.Range(0, newX.GetLength(0))
342        .Select(r => mean.Mean(newX, r))
343        .ToArray();
[13721]344        var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
[13160]345        for (int i = 0; i < newN; i++) {
[13721]346          Ks[i] = new double[n];
[13160]347          for (int j = 0; j < n; j++) {
[13721]348            Ks[i][j] = cov.CrossCovariance(x, newX, j, i);
[13160]349          }
[8323]350        }
[13160]351
352        return Enumerable.Range(0, newN)
[13721]353          .Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha));
[14843]354      } catch (alglib.alglibexception ae) {
[13160]355        // wrap exception so that calling code doesn't have to know about alglib implementation
356        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
[8323]357      }
358    }
[8473]359
[14095]360    public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
[13160]361      try {
362        if (x == null) {
363          x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);
364        }
365        int n = x.GetLength(0);
[12819]366
[13160]367        var newX = GetData(dataset, allowedInputVariables, rows, inputScaling);
368        int newN = newX.GetLength(0);
[8473]369
[13160]370        var kss = new double[newN];
371        double[,] sWKs = new double[n, newN];
[13721]372        var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();
373        var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
[8473]374
[13160]375        if (l == null) {
376          l = CalculateL(x, cov, sqrSigmaNoise);
377        }
[12819]378
[13160]379        // for stddev
380        for (int i = 0; i < newN; i++)
381          kss[i] = cov.Covariance(newX, i, i);
[8473]382
[13160]383        for (int i = 0; i < newN; i++) {
384          for (int j = 0; j < n; j++) {
385            sWKs[j, i] = cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise);
386          }
[8473]387        }
388
[13160]389        // for stddev
390        alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0);
[8473]391
[13160]392        for (int i = 0; i < newN; i++) {
[13721]393          var col = Util.GetCol(sWKs, i).ToArray();
394          var sumV = Util.ScalarProd(col, col);
[13160]395          kss[i] += sqrSigmaNoise; // kss is V(f), add noise variance of predictive distibution to get V(y)
396          kss[i] -= sumV;
397          if (kss[i] < 0) kss[i] = 0;
398        }
399        return kss;
[14843]400      } catch (alglib.alglibexception ae) {
[13160]401        // wrap exception so that calling code doesn't have to know about alglib implementation
402        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
[8473]403      }
404    }
[13921]405
[8323]406  }
407}
Note: See TracBrowser for help on using the repository browser.