Free cookie consent management tool by TermsFeed Policy Generator

source: branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs @ 9674

Last change on this file since 9674 was 8982, checked in by gkronber, 12 years ago

#1902: removed class HyperParameter and changed implementations of covariance and mean functions to remove the parameter value caching and event handlers for parameter caching. Instead it is now possible to create the actual covariance and mean functions as Func from templates and specified parameter values. The instances of mean and covariance functions configured in the GUI are actually templates where the structure and fixed parameters can be specified.

File size: 11.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Problems.DataAnalysis;
29
30namespace HeuristicLab.Algorithms.DataAnalysis {
31  /// <summary>
32  /// Represents a Gaussian process model.
33  /// </summary>
34  [StorableClass]
35  [Item("GaussianProcessModel", "Represents a Gaussian process posterior.")]
36  public sealed class GaussianProcessModel : NamedItem, IGaussianProcessModel {
37    [Storable]
38    private double negativeLogLikelihood;
39    public double NegativeLogLikelihood {
40      get { return negativeLogLikelihood; }
41    }
42
43    [Storable]
44    private double[] hyperparameterGradients;
45    public double[] HyperparameterGradients {
46      get {
47        var copy = new double[hyperparameterGradients.Length];
48        Array.Copy(hyperparameterGradients, copy, copy.Length);
49        return copy;
50      }
51    }
52
53    [Storable]
54    private ICovarianceFunction covarianceFunction;
55    public ICovarianceFunction CovarianceFunction {
56      get { return covarianceFunction; }
57    }
58    [Storable]
59    private IMeanFunction meanFunction;
60    public IMeanFunction MeanFunction {
61      get { return meanFunction; }
62    }
63    [Storable]
64    private string targetVariable;
65    public string TargetVariable {
66      get { return targetVariable; }
67    }
68    [Storable]
69    private string[] allowedInputVariables;
70    public string[] AllowedInputVariables {
71      get { return allowedInputVariables; }
72    }
73
74    [Storable]
75    private double[] alpha;
76    [Storable]
77    private double sqrSigmaNoise;
78    public double SigmaNoise {
79      get { return Math.Sqrt(sqrSigmaNoise); }
80    }
81
82    [Storable]
83    private double[] meanParameter;
84    [Storable]
85    private double[] covarianceParameter;
86
87    [Storable]
88    private double[,] l;
89
90    [Storable]
91    private double[,] x;
92    [Storable]
93    private Scaling inputScaling;
94
95
96    [StorableConstructor]
97    private GaussianProcessModel(bool deserializing) : base(deserializing) { }
98    private GaussianProcessModel(GaussianProcessModel original, Cloner cloner)
99      : base(original, cloner) {
100      this.meanFunction = cloner.Clone(original.meanFunction);
101      this.covarianceFunction = cloner.Clone(original.covarianceFunction);
102      this.inputScaling = cloner.Clone(original.inputScaling);
103      this.negativeLogLikelihood = original.negativeLogLikelihood;
104      this.targetVariable = original.targetVariable;
105      this.sqrSigmaNoise = original.sqrSigmaNoise;
106      if (original.meanParameter != null) {
107        this.meanParameter = (double[])original.meanParameter.Clone();
108      }
109      if (original.covarianceParameter != null) {
110        this.covarianceParameter = (double[])original.covarianceParameter.Clone();
111      }
112
113      // shallow copies of arrays because they cannot be modified
114      this.allowedInputVariables = original.allowedInputVariables;
115      this.alpha = original.alpha;
116      this.l = original.l;
117      this.x = original.x;
118    }
119    public GaussianProcessModel(Dataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,
120      IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction)
121      : base() {
122      this.name = ItemName;
123      this.description = ItemDescription;
124      this.meanFunction = (IMeanFunction)meanFunction.Clone();
125      this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone();
126      this.targetVariable = targetVariable;
127      this.allowedInputVariables = allowedInputVariables.ToArray();
128
129
130      int nVariables = this.allowedInputVariables.Length;
131      meanParameter = hyp
132        .Take(this.meanFunction.GetNumberOfParameters(nVariables))
133        .ToArray();
134
135      covarianceParameter = hyp.Skip(this.meanFunction.GetNumberOfParameters(nVariables))
136                                             .Take(this.covarianceFunction.GetNumberOfParameters(nVariables))
137                                             .ToArray();
138      sqrSigmaNoise = Math.Exp(2.0 * hyp.Last());
139
140      CalculateModel(ds, rows);
141    }
142
143    private void CalculateModel(Dataset ds, IEnumerable<int> rows) {
144      inputScaling = new Scaling(ds, allowedInputVariables, rows);
145      x = AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputVariables, rows, inputScaling);
146      var y = ds.GetDoubleValues(targetVariable, rows);
147
148      int n = x.GetLength(0);
149      l = new double[n, n];
150
151      // calculate means and covariances
152      var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, Enumerable.Range(0, x.GetLength(1)));
153      double[] m = Enumerable.Range(0, x.GetLength(0))
154        .Select(r => mean.Mean(x, r))
155        .ToArray();
156
157      var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, x.GetLength(1)));
158      for (int i = 0; i < n; i++) {
159        for (int j = i; j < n; j++) {
160          l[j, i] = cov.Covariance(x, i, j) / sqrSigmaNoise;
161          if (j == i) l[j, i] += 1.0;
162        }
163      }
164
165
166      // cholesky decomposition
167      int info;
168      alglib.densesolverreport denseSolveRep;
169
170      var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);
171      if (!res) throw new ArgumentException("Matrix is not positive semidefinite");
172
173      // calculate sum of diagonal elements for likelihood
174      double diagSum = Enumerable.Range(0, n).Select(i => Math.Log(l[i, i])).Sum();
175
176      // solve for alpha
177      double[] ym = y.Zip(m, (a, b) => a - b).ToArray();
178
179      alglib.spdmatrixcholeskysolve(l, n, false, ym, out info, out denseSolveRep, out alpha);
180      for (int i = 0; i < alpha.Length; i++)
181        alpha[i] = alpha[i] / sqrSigmaNoise;
182      negativeLogLikelihood = 0.5 * Util.ScalarProd(ym, alpha) + diagSum + (n / 2.0) * Math.Log(2.0 * Math.PI * sqrSigmaNoise);
183
184      // derivatives
185      int nAllowedVariables = x.GetLength(1);
186
187      alglib.matinvreport matInvRep;
188      double[,] lCopy = new double[l.GetLength(0), l.GetLength(1)];
189      Array.Copy(l, lCopy, lCopy.Length);
190
191      alglib.spdmatrixcholeskyinverse(ref lCopy, n, false, out info, out matInvRep);
192      if (info != 1) throw new ArgumentException("Can't invert matrix to calculate gradients.");
193      for (int i = 0; i < n; i++) {
194        for (int j = 0; j <= i; j++)
195          lCopy[i, j] = lCopy[i, j] / sqrSigmaNoise - alpha[i] * alpha[j];
196      }
197
198      double noiseGradient = sqrSigmaNoise * Enumerable.Range(0, n).Select(i => lCopy[i, i]).Sum();
199
200      double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)];
201      for (int k = 0; k < meanGradients.Length; k++) {
202        var meanGrad = Enumerable.Range(0, alpha.Length)
203        .Select(r => mean.Gradient(x, r, k));
204        meanGradients[k] = -Util.ScalarProd(meanGrad, alpha);
205      }
206
207      double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)];
208      if (covGradients.Length > 0) {
209        for (int i = 0; i < n; i++) {
210          for (int j = 0; j < i; j++) {
211            var g = cov.CovarianceGradient(x, i, j).ToArray();
212            for (int k = 0; k < covGradients.Length; k++) {
213              covGradients[k] += lCopy[i, j] * g[k];
214            }
215          }
216
217          var gDiag = cov.CovarianceGradient(x, i, i).ToArray();
218          for (int k = 0; k < covGradients.Length; k++) {
219            // diag
220            covGradients[k] += 0.5 * lCopy[i, i] * gDiag[k];
221          }
222        }
223      }
224
225      hyperparameterGradients =
226        meanGradients
227        .Concat(covGradients)
228        .Concat(new double[] { noiseGradient }).ToArray();
229
230    }
231
232
233    public override IDeepCloneable Clone(Cloner cloner) {
234      return new GaussianProcessModel(this, cloner);
235    }
236
237    // is called by the solution creator to set all parameter values of the covariance and mean function
238    // to the optimized values (necessary to make the values visible in the GUI)
239    public void FixParameters() {
240      covarianceFunction.SetParameter(covarianceParameter);
241      meanFunction.SetParameter(meanParameter);
242      covarianceParameter = new double[0];
243      meanParameter = new double[0];
244    }
245
246    #region IRegressionModel Members
247    public IEnumerable<double> GetEstimatedValues(Dataset dataset, IEnumerable<int> rows) {
248      return GetEstimatedValuesHelper(dataset, rows);
249    }
250    public GaussianProcessRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
251      return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData));
252    }
253    IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
254      return CreateRegressionSolution(problemData);
255    }
256    #endregion
257
258
259    private IEnumerable<double> GetEstimatedValuesHelper(Dataset dataset, IEnumerable<int> rows) {
260      var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, inputScaling);
261      int newN = newX.GetLength(0);
262      int n = x.GetLength(0);
263      var Ks = new double[newN, n];
264      var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, Enumerable.Range(0, newX.GetLength(1)));
265      var ms = Enumerable.Range(0, newX.GetLength(0))
266      .Select(r => mean.Mean(newX, r))
267      .ToArray();
268      var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, newX.GetLength(1)));
269      for (int i = 0; i < newN; i++) {
270        for (int j = 0; j < n; j++) {
271          Ks[i, j] = cov.CrossCovariance(x, newX, j, i);
272        }
273      }
274
275      return Enumerable.Range(0, newN)
276        .Select(i => ms[i] + Util.ScalarProd(Util.GetRow(Ks, i), alpha));
277    }
278
279    public IEnumerable<double> GetEstimatedVariance(Dataset dataset, IEnumerable<int> rows) {
280      var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, inputScaling);
281      int newN = newX.GetLength(0);
282      int n = x.GetLength(0);
283
284      var kss = new double[newN];
285      double[,] sWKs = new double[n, newN];
286      var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, newX.GetLength(1)));
287
288      // for stddev
289      for (int i = 0; i < newN; i++)
290        kss[i] = cov.Covariance(newX, i, i);
291
292      for (int i = 0; i < newN; i++) {
293        for (int j = 0; j < n; j++) {
294          sWKs[j, i] = cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise);
295        }
296      }
297
298      // for stddev
299      alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0);
300
301      for (int i = 0; i < newN; i++) {
302        var sumV = Util.ScalarProd(Util.GetCol(sWKs, i), Util.GetCol(sWKs, i));
303        kss[i] -= sumV;
304        if (kss[i] < 0) kss[i] = 0;
305      }
306      return kss;
307    }
308  }
309}
Note: See TracBrowser for help on using the repository browser.