Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs @ 13921

Last change on this file since 13921 was 13921, checked in by bburlacu, 8 years ago

#2604: Revert changes to DataAnalysisSolution and IDataAnalysisSolution and implement the desired properties in model classes that implement IDataAnalysisModel, IRegressionModel and IClassificationModel.

File size: 15.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Problems.DataAnalysis;
29
30namespace HeuristicLab.Algorithms.DataAnalysis {
31  /// <summary>
32  /// Represents a Gaussian process model.
33  /// </summary>
34  [StorableClass]
35  [Item("GaussianProcessModel", "Represents a Gaussian process posterior.")]
36  public sealed class GaussianProcessModel : NamedItem, IGaussianProcessModel {
37    public IEnumerable<string> VariablesUsedForPrediction { get; }
38
39    [Storable]
40    private double negativeLogLikelihood;
41    public double NegativeLogLikelihood {
42      get { return negativeLogLikelihood; }
43    }
44
45    [Storable]
46    private double[] hyperparameterGradients;
47    public double[] HyperparameterGradients {
48      get {
49        var copy = new double[hyperparameterGradients.Length];
50        Array.Copy(hyperparameterGradients, copy, copy.Length);
51        return copy;
52      }
53    }
54
55    [Storable]
56    private ICovarianceFunction covarianceFunction;
57    public ICovarianceFunction CovarianceFunction {
58      get { return covarianceFunction; }
59    }
60    [Storable]
61    private IMeanFunction meanFunction;
62    public IMeanFunction MeanFunction {
63      get { return meanFunction; }
64    }
65    [Storable]
66    private string targetVariable;
67    public string TargetVariable {
68      get { return targetVariable; }
69    }
70    [Storable]
71    private string[] allowedInputVariables;
72    public string[] AllowedInputVariables {
73      get { return allowedInputVariables; }
74    }
75
76    [Storable]
77    private double[] alpha;
78    [Storable]
79    private double sqrSigmaNoise;
80    public double SigmaNoise {
81      get { return Math.Sqrt(sqrSigmaNoise); }
82    }
83
84    [Storable]
85    private double[] meanParameter;
86    [Storable]
87    private double[] covarianceParameter;
88
89    private double[,] l; // used to be storable in previous versions (is calculated lazily now)
90    private double[,] x; // scaled training dataset, used to be storable in previous versions (is calculated lazily now)
91
92    // BackwardsCompatibility3.4
93    #region Backwards compatible code, remove with 3.5
94    [Storable(Name = "l")] // restore if available but don't store anymore
95    private double[,] l_storable {
96      set { this.l = value; }
97      get {
98        if (trainingDataset == null) return l; // this model has been created with an old version
99        else return null; // if the training dataset is available l should not be serialized
100      }
101    }
102    [Storable(Name = "x")] // restore if available but don't store anymore
103    private double[,] x_storable {
104      set { this.x = value; }
105      get {
106        if (trainingDataset == null) return x; // this model has been created with an old version
107        else return null; // if the training dataset is available x should not be serialized
108      }
109    }
110    #endregion
111
112
113    [Storable]
114    private IDataset trainingDataset; // it is better to store the original training dataset completely because this is more efficient in persistence
115    [Storable]
116    private int[] trainingRows;
117
118    [Storable]
119    private Scaling inputScaling;
120
121
122    [StorableConstructor]
123    private GaussianProcessModel(bool deserializing) : base(deserializing) { }
124    private GaussianProcessModel(GaussianProcessModel original, Cloner cloner)
125      : base(original, cloner) {
126      this.meanFunction = cloner.Clone(original.meanFunction);
127      this.covarianceFunction = cloner.Clone(original.covarianceFunction);
128      if (original.inputScaling != null)
129        this.inputScaling = cloner.Clone(original.inputScaling);
130      this.trainingDataset = cloner.Clone(original.trainingDataset);
131      this.negativeLogLikelihood = original.negativeLogLikelihood;
132      this.targetVariable = original.targetVariable;
133      this.sqrSigmaNoise = original.sqrSigmaNoise;
134      if (original.meanParameter != null) {
135        this.meanParameter = (double[])original.meanParameter.Clone();
136      }
137      if (original.covarianceParameter != null) {
138        this.covarianceParameter = (double[])original.covarianceParameter.Clone();
139      }
140
141      // shallow copies of arrays because they cannot be modified
142      this.trainingRows = original.trainingRows;
143      this.allowedInputVariables = original.allowedInputVariables;
144      this.alpha = original.alpha;
145      this.l = original.l;
146      this.x = original.x;
147    }
148    public GaussianProcessModel(IDataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,
149      IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction,
150      bool scaleInputs = true)
151      : base() {
152      this.name = ItemName;
153      this.description = ItemDescription;
154      this.meanFunction = (IMeanFunction)meanFunction.Clone();
155      this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone();
156      this.targetVariable = targetVariable;
157      this.allowedInputVariables = allowedInputVariables.ToArray();
158
159
160      int nVariables = this.allowedInputVariables.Length;
161      meanParameter = hyp
162        .Take(this.meanFunction.GetNumberOfParameters(nVariables))
163        .ToArray();
164
165      covarianceParameter = hyp.Skip(this.meanFunction.GetNumberOfParameters(nVariables))
166                                             .Take(this.covarianceFunction.GetNumberOfParameters(nVariables))
167                                             .ToArray();
168      sqrSigmaNoise = Math.Exp(2.0 * hyp.Last());
169      try {
170        CalculateModel(ds, rows, scaleInputs);
171      }
172      catch (alglib.alglibexception ae) {
173        // wrap exception so that calling code doesn't have to know about alglib implementation
174        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
175      }
176    }
177
178    private void CalculateModel(IDataset ds, IEnumerable<int> rows, bool scaleInputs = true) {
179      this.trainingDataset = (IDataset)ds.Clone();
180      this.trainingRows = rows.ToArray();
181      this.inputScaling = scaleInputs ? new Scaling(ds, allowedInputVariables, rows) : null;
182
183      x = GetData(ds, this.allowedInputVariables, this.trainingRows, this.inputScaling);
184
185      IEnumerable<double> y;
186      y = ds.GetDoubleValues(targetVariable, rows);
187
188      int n = x.GetLength(0);
189
190      var columns = Enumerable.Range(0, x.GetLength(1)).ToArray();
191      // calculate cholesky decomposed (lower triangular) covariance matrix
192      var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
193      this.l = CalculateL(x, cov, sqrSigmaNoise);
194
195      // calculate mean
196      var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);
197      double[] m = Enumerable.Range(0, x.GetLength(0))
198        .Select(r => mean.Mean(x, r))
199        .ToArray();
200
201      // calculate sum of diagonal elements for likelihood
202      double diagSum = Enumerable.Range(0, n).Select(i => Math.Log(l[i, i])).Sum();
203
204      // solve for alpha
205      double[] ym = y.Zip(m, (a, b) => a - b).ToArray();
206
207      int info;
208      alglib.densesolverreport denseSolveRep;
209
210      alglib.spdmatrixcholeskysolve(l, n, false, ym, out info, out denseSolveRep, out alpha);
211      for (int i = 0; i < alpha.Length; i++)
212        alpha[i] = alpha[i] / sqrSigmaNoise;
213      negativeLogLikelihood = 0.5 * Util.ScalarProd(ym, alpha) + diagSum + (n / 2.0) * Math.Log(2.0 * Math.PI * sqrSigmaNoise);
214
215      // derivatives
216      int nAllowedVariables = x.GetLength(1);
217
218      alglib.matinvreport matInvRep;
219      double[,] lCopy = new double[l.GetLength(0), l.GetLength(1)];
220      Array.Copy(l, lCopy, lCopy.Length);
221
222      alglib.spdmatrixcholeskyinverse(ref lCopy, n, false, out info, out matInvRep);
223      if (info != 1) throw new ArgumentException("Can't invert matrix to calculate gradients.");
224      for (int i = 0; i < n; i++) {
225        for (int j = 0; j <= i; j++)
226          lCopy[i, j] = lCopy[i, j] / sqrSigmaNoise - alpha[i] * alpha[j];
227      }
228
229      double noiseGradient = sqrSigmaNoise * Enumerable.Range(0, n).Select(i => lCopy[i, i]).Sum();
230
231      double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)];
232      for (int k = 0; k < meanGradients.Length; k++) {
233        var meanGrad = new double[alpha.Length];
234        for (int g = 0; g < meanGrad.Length; g++)
235          meanGrad[g] = mean.Gradient(x, g, k);
236        meanGradients[k] = -Util.ScalarProd(meanGrad, alpha);
237      }
238
239      double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)];
240      if (covGradients.Length > 0) {
241        for (int i = 0; i < n; i++) {
242          for (int j = 0; j < i; j++) {
243            var g = cov.CovarianceGradient(x, i, j);
244            for (int k = 0; k < covGradients.Length; k++) {
245              covGradients[k] += lCopy[i, j] * g[k];
246            }
247          }
248
249          var gDiag = cov.CovarianceGradient(x, i, i);
250          for (int k = 0; k < covGradients.Length; k++) {
251            // diag
252            covGradients[k] += 0.5 * lCopy[i, i] * gDiag[k];
253          }
254        }
255      }
256
257      hyperparameterGradients =
258        meanGradients
259        .Concat(covGradients)
260        .Concat(new double[] { noiseGradient }).ToArray();
261
262    }
263
264    private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) {
265      if (scaling != null) {
266        return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputs, rows, scaling);
267      } else {
268        return AlglibUtil.PrepareInputMatrix(ds, allowedInputs, rows);
269      }
270    }
271
272    private static double[,] CalculateL(double[,] x, ParameterizedCovarianceFunction cov, double sqrSigmaNoise) {
273      int n = x.GetLength(0);
274      var l = new double[n, n];
275
276      // calculate covariances
277      for (int i = 0; i < n; i++) {
278        for (int j = i; j < n; j++) {
279          l[j, i] = cov.Covariance(x, i, j) / sqrSigmaNoise;
280          if (j == i) l[j, i] += 1.0;
281        }
282      }
283
284      // cholesky decomposition
285      var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);
286      if (!res) throw new ArgumentException("Matrix is not positive semidefinite");
287      return l;
288    }
289
290
291    public override IDeepCloneable Clone(Cloner cloner) {
292      return new GaussianProcessModel(this, cloner);
293    }
294
295    // is called by the solution creator to set all parameter values of the covariance and mean function
296    // to the optimized values (necessary to make the values visible in the GUI)
297    public void FixParameters() {
298      covarianceFunction.SetParameter(covarianceParameter);
299      meanFunction.SetParameter(meanParameter);
300      covarianceParameter = new double[0];
301      meanParameter = new double[0];
302    }
303
304    #region IRegressionModel Members
305    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
306      return GetEstimatedValuesHelper(dataset, rows);
307    }
308    public GaussianProcessRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
309      return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData));
310    }
311    IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
312      return CreateRegressionSolution(problemData);
313    }
314    #endregion
315
316
317    private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) {
318      try {
319        if (x == null) {
320          x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);
321        }
322        int n = x.GetLength(0);
323
324        double[,] newX = GetData(dataset, allowedInputVariables, rows, inputScaling);
325        int newN = newX.GetLength(0);
326
327        var Ks = new double[newN][];
328        var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();
329        var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);
330        var ms = Enumerable.Range(0, newX.GetLength(0))
331        .Select(r => mean.Mean(newX, r))
332        .ToArray();
333        var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
334        for (int i = 0; i < newN; i++) {
335          Ks[i] = new double[n];
336          for (int j = 0; j < n; j++) {
337            Ks[i][j] = cov.CrossCovariance(x, newX, j, i);
338          }
339        }
340
341        return Enumerable.Range(0, newN)
342          .Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha));
343      }
344      catch (alglib.alglibexception ae) {
345        // wrap exception so that calling code doesn't have to know about alglib implementation
346        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
347      }
348    }
349
350    public IEnumerable<double> GetEstimatedVariance(IDataset dataset, IEnumerable<int> rows) {
351      try {
352        if (x == null) {
353          x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);
354        }
355        int n = x.GetLength(0);
356
357        var newX = GetData(dataset, allowedInputVariables, rows, inputScaling);
358        int newN = newX.GetLength(0);
359
360        var kss = new double[newN];
361        double[,] sWKs = new double[n, newN];
362        var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();
363        var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
364
365        if (l == null) {
366          l = CalculateL(x, cov, sqrSigmaNoise);
367        }
368
369        // for stddev
370        for (int i = 0; i < newN; i++)
371          kss[i] = cov.Covariance(newX, i, i);
372
373        for (int i = 0; i < newN; i++) {
374          for (int j = 0; j < n; j++) {
375            sWKs[j, i] = cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise);
376          }
377        }
378
379        // for stddev
380        alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0);
381
382        for (int i = 0; i < newN; i++) {
383          var col = Util.GetCol(sWKs, i).ToArray();
384          var sumV = Util.ScalarProd(col, col);
385          kss[i] += sqrSigmaNoise; // kss is V(f), add noise variance of predictive distibution to get V(y)
386          kss[i] -= sumV;
387          if (kss[i] < 0) kss[i] = 0;
388        }
389        return kss;
390      }
391      catch (alglib.alglibexception ae) {
392        // wrap exception so that calling code doesn't have to know about alglib implementation
393        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
394      }
395    }
396
397  }
398}
Note: See TracBrowser for help on using the repository browser.