source: trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs @ 13784

Last change on this file since 13784 was 13784, checked in by pfleck, 5 years ago

#2591 Made the creation of a GaussianProcessModel faster by avoiding additional iterators during calculation of the hyperparameter gradients.
The gradients of the hyperparameters are now calculated in one sweep and returned as IList, instead of returning an iterator (with yield return).
This avoids a large amount of Move-calls of the iterator, especially for covariance functions with a lot of hyperparameters.
Besides, the signature of the CovarianceGradientFunctionDelegate is changed, to return an IList instead of an IEnumerable to avoid unnececary ToList or ToArray calls.

File size: 15.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Problems.DataAnalysis;
29
30namespace HeuristicLab.Algorithms.DataAnalysis {
31  /// <summary>
32  /// Represents a Gaussian process model.
33  /// </summary>
34  [StorableClass]
35  [Item("GaussianProcessModel", "Represents a Gaussian process posterior.")]
36  public sealed class GaussianProcessModel : NamedItem, IGaussianProcessModel {
37    [Storable]
38    private double negativeLogLikelihood;
39    public double NegativeLogLikelihood {
40      get { return negativeLogLikelihood; }
41    }
42
43    [Storable]
44    private double[] hyperparameterGradients;
45    public double[] HyperparameterGradients {
46      get {
47        var copy = new double[hyperparameterGradients.Length];
48        Array.Copy(hyperparameterGradients, copy, copy.Length);
49        return copy;
50      }
51    }
52
53    [Storable]
54    private ICovarianceFunction covarianceFunction;
55    public ICovarianceFunction CovarianceFunction {
56      get { return covarianceFunction; }
57    }
58    [Storable]
59    private IMeanFunction meanFunction;
60    public IMeanFunction MeanFunction {
61      get { return meanFunction; }
62    }
63    [Storable]
64    private string targetVariable;
65    public string TargetVariable {
66      get { return targetVariable; }
67    }
68    [Storable]
69    private string[] allowedInputVariables;
70    public string[] AllowedInputVariables {
71      get { return allowedInputVariables; }
72    }
73
74    [Storable]
75    private double[] alpha;
76    [Storable]
77    private double sqrSigmaNoise;
78    public double SigmaNoise {
79      get { return Math.Sqrt(sqrSigmaNoise); }
80    }
81
82    [Storable]
83    private double[] meanParameter;
84    [Storable]
85    private double[] covarianceParameter;
86
87    private double[,] l; // used to be storable in previous versions (is calculated lazily now)
88    private double[,] x; // scaled training dataset, used to be storable in previous versions (is calculated lazily now)
89
90    // BackwardsCompatibility3.4
91    #region Backwards compatible code, remove with 3.5
92    [Storable(Name = "l")] // restore if available but don't store anymore
93    private double[,] l_storable {
94      set { this.l = value; }
95      get {
96        if (trainingDataset == null) return l; // this model has been created with an old version
97        else return null; // if the training dataset is available l should not be serialized
98      }
99    }
100    [Storable(Name = "x")] // restore if available but don't store anymore
101    private double[,] x_storable {
102      set { this.x = value; }
103      get {
104        if (trainingDataset == null) return x; // this model has been created with an old version
105        else return null; // if the training dataset is available x should not be serialized
106      }
107    }
108    #endregion
109
110
111    [Storable]
112    private IDataset trainingDataset; // it is better to store the original training dataset completely because this is more efficient in persistence
113    [Storable]
114    private int[] trainingRows;
115
116    [Storable]
117    private Scaling inputScaling;
118
119
120    [StorableConstructor]
121    private GaussianProcessModel(bool deserializing) : base(deserializing) { }
122    private GaussianProcessModel(GaussianProcessModel original, Cloner cloner)
123      : base(original, cloner) {
124      this.meanFunction = cloner.Clone(original.meanFunction);
125      this.covarianceFunction = cloner.Clone(original.covarianceFunction);
126      if (original.inputScaling != null)
127        this.inputScaling = cloner.Clone(original.inputScaling);
128      this.trainingDataset = cloner.Clone(original.trainingDataset);
129      this.negativeLogLikelihood = original.negativeLogLikelihood;
130      this.targetVariable = original.targetVariable;
131      this.sqrSigmaNoise = original.sqrSigmaNoise;
132      if (original.meanParameter != null) {
133        this.meanParameter = (double[])original.meanParameter.Clone();
134      }
135      if (original.covarianceParameter != null) {
136        this.covarianceParameter = (double[])original.covarianceParameter.Clone();
137      }
138
139      // shallow copies of arrays because they cannot be modified
140      this.trainingRows = original.trainingRows;
141      this.allowedInputVariables = original.allowedInputVariables;
142      this.alpha = original.alpha;
143      this.l = original.l;
144      this.x = original.x;
145    }
146    public GaussianProcessModel(IDataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,
147      IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction,
148      bool scaleInputs = true)
149      : base() {
150      this.name = ItemName;
151      this.description = ItemDescription;
152      this.meanFunction = (IMeanFunction)meanFunction.Clone();
153      this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone();
154      this.targetVariable = targetVariable;
155      this.allowedInputVariables = allowedInputVariables.ToArray();
156
157
158      int nVariables = this.allowedInputVariables.Length;
159      meanParameter = hyp
160        .Take(this.meanFunction.GetNumberOfParameters(nVariables))
161        .ToArray();
162
163      covarianceParameter = hyp.Skip(this.meanFunction.GetNumberOfParameters(nVariables))
164                                             .Take(this.covarianceFunction.GetNumberOfParameters(nVariables))
165                                             .ToArray();
166      sqrSigmaNoise = Math.Exp(2.0 * hyp.Last());
167      try {
168        CalculateModel(ds, rows, scaleInputs);
169      }
170      catch (alglib.alglibexception ae) {
171        // wrap exception so that calling code doesn't have to know about alglib implementation
172        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
173      }
174    }
175
176    private void CalculateModel(IDataset ds, IEnumerable<int> rows, bool scaleInputs = true) {
177      this.trainingDataset = (IDataset)ds.Clone();
178      this.trainingRows = rows.ToArray();
179      this.inputScaling = scaleInputs ? new Scaling(ds, allowedInputVariables, rows) : null;
180
181      x = GetData(ds, this.allowedInputVariables, this.trainingRows, this.inputScaling);
182
183      IEnumerable<double> y;
184      y = ds.GetDoubleValues(targetVariable, rows);
185
186      int n = x.GetLength(0);
187
188      var columns = Enumerable.Range(0, x.GetLength(1)).ToArray();
189      // calculate cholesky decomposed (lower triangular) covariance matrix
190      var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
191      this.l = CalculateL(x, cov, sqrSigmaNoise);
192
193      // calculate mean
194      var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);
195      double[] m = Enumerable.Range(0, x.GetLength(0))
196        .Select(r => mean.Mean(x, r))
197        .ToArray();
198
199      // calculate sum of diagonal elements for likelihood
200      double diagSum = Enumerable.Range(0, n).Select(i => Math.Log(l[i, i])).Sum();
201
202      // solve for alpha
203      double[] ym = y.Zip(m, (a, b) => a - b).ToArray();
204
205      int info;
206      alglib.densesolverreport denseSolveRep;
207
208      alglib.spdmatrixcholeskysolve(l, n, false, ym, out info, out denseSolveRep, out alpha);
209      for (int i = 0; i < alpha.Length; i++)
210        alpha[i] = alpha[i] / sqrSigmaNoise;
211      negativeLogLikelihood = 0.5 * Util.ScalarProd(ym, alpha) + diagSum + (n / 2.0) * Math.Log(2.0 * Math.PI * sqrSigmaNoise);
212
213      // derivatives
214      int nAllowedVariables = x.GetLength(1);
215
216      alglib.matinvreport matInvRep;
217      double[,] lCopy = new double[l.GetLength(0), l.GetLength(1)];
218      Array.Copy(l, lCopy, lCopy.Length);
219
220      alglib.spdmatrixcholeskyinverse(ref lCopy, n, false, out info, out matInvRep);
221      if (info != 1) throw new ArgumentException("Can't invert matrix to calculate gradients.");
222      for (int i = 0; i < n; i++) {
223        for (int j = 0; j <= i; j++)
224          lCopy[i, j] = lCopy[i, j] / sqrSigmaNoise - alpha[i] * alpha[j];
225      }
226
227      double noiseGradient = sqrSigmaNoise * Enumerable.Range(0, n).Select(i => lCopy[i, i]).Sum();
228
229      double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)];
230      for (int k = 0; k < meanGradients.Length; k++) {
231        var meanGrad = new double[alpha.Length];
232        for (int g = 0; g < meanGrad.Length; g++)
233          meanGrad[g] = mean.Gradient(x, g, k);
234        meanGradients[k] = -Util.ScalarProd(meanGrad, alpha);
235      }
236
237      double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)];
238      if (covGradients.Length > 0) {
239        for (int i = 0; i < n; i++) {
240          for (int j = 0; j < i; j++) {
241            var g = cov.CovarianceGradient(x, i, j);
242            for (int k = 0; k < covGradients.Length; k++) {
243              covGradients[k] += lCopy[i, j] * g[k];
244            }
245          }
246
247          var gDiag = cov.CovarianceGradient(x, i, i);
248          for (int k = 0; k < covGradients.Length; k++) {
249            // diag
250            covGradients[k] += 0.5 * lCopy[i, i] * gDiag[k];
251          }
252        }
253      }
254
255      hyperparameterGradients =
256        meanGradients
257        .Concat(covGradients)
258        .Concat(new double[] { noiseGradient }).ToArray();
259
260    }
261
262    private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) {
263      if (scaling != null) {
264        return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputs, rows, scaling);
265      } else {
266        return AlglibUtil.PrepareInputMatrix(ds, allowedInputs, rows);
267      }
268    }
269
270    private static double[,] CalculateL(double[,] x, ParameterizedCovarianceFunction cov, double sqrSigmaNoise) {
271      int n = x.GetLength(0);
272      var l = new double[n, n];
273
274      // calculate covariances
275      for (int i = 0; i < n; i++) {
276        for (int j = i; j < n; j++) {
277          l[j, i] = cov.Covariance(x, i, j) / sqrSigmaNoise;
278          if (j == i) l[j, i] += 1.0;
279        }
280      }
281
282      // cholesky decomposition
283      var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);
284      if (!res) throw new ArgumentException("Matrix is not positive semidefinite");
285      return l;
286    }
287
288
289    public override IDeepCloneable Clone(Cloner cloner) {
290      return new GaussianProcessModel(this, cloner);
291    }
292
293    // is called by the solution creator to set all parameter values of the covariance and mean function
294    // to the optimized values (necessary to make the values visible in the GUI)
295    public void FixParameters() {
296      covarianceFunction.SetParameter(covarianceParameter);
297      meanFunction.SetParameter(meanParameter);
298      covarianceParameter = new double[0];
299      meanParameter = new double[0];
300    }
301
302    #region IRegressionModel Members
303    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
304      return GetEstimatedValuesHelper(dataset, rows);
305    }
306    public GaussianProcessRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
307      return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData));
308    }
309    IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
310      return CreateRegressionSolution(problemData);
311    }
312    #endregion
313
314
315    private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) {
316      try {
317        if (x == null) {
318          x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);
319        }
320        int n = x.GetLength(0);
321
322        double[,] newX = GetData(dataset, allowedInputVariables, rows, inputScaling);
323        int newN = newX.GetLength(0);
324
325        var Ks = new double[newN][];
326        var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();
327        var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);
328        var ms = Enumerable.Range(0, newX.GetLength(0))
329        .Select(r => mean.Mean(newX, r))
330        .ToArray();
331        var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
332        for (int i = 0; i < newN; i++) {
333          Ks[i] = new double[n];
334          for (int j = 0; j < n; j++) {
335            Ks[i][j] = cov.CrossCovariance(x, newX, j, i);
336          }
337        }
338
339        return Enumerable.Range(0, newN)
340          .Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha));
341      }
342      catch (alglib.alglibexception ae) {
343        // wrap exception so that calling code doesn't have to know about alglib implementation
344        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
345      }
346    }
347
348    public IEnumerable<double> GetEstimatedVariance(IDataset dataset, IEnumerable<int> rows) {
349      try {
350        if (x == null) {
351          x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);
352        }
353        int n = x.GetLength(0);
354
355        var newX = GetData(dataset, allowedInputVariables, rows, inputScaling);
356        int newN = newX.GetLength(0);
357
358        var kss = new double[newN];
359        double[,] sWKs = new double[n, newN];
360        var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();
361        var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
362
363        if (l == null) {
364          l = CalculateL(x, cov, sqrSigmaNoise);
365        }
366
367        // for stddev
368        for (int i = 0; i < newN; i++)
369          kss[i] = cov.Covariance(newX, i, i);
370
371        for (int i = 0; i < newN; i++) {
372          for (int j = 0; j < n; j++) {
373            sWKs[j, i] = cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise);
374          }
375        }
376
377        // for stddev
378        alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0);
379
380        for (int i = 0; i < newN; i++) {
381          var col = Util.GetCol(sWKs, i).ToArray();
382          var sumV = Util.ScalarProd(col, col);
383          kss[i] += sqrSigmaNoise; // kss is V(f), add noise variance of predictive distibution to get V(y)
384          kss[i] -= sumV;
385          if (kss[i] < 0) kss[i] = 0;
386        }
387        return kss;
388      }
389      catch (alglib.alglibexception ae) {
390        // wrap exception so that calling code doesn't have to know about alglib implementation
391        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
392      }
393    }
394  }
395}
Note: See TracBrowser for help on using the repository browser.