Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GaussianProcessTuning/HeuristicLab.Algorithms.DataAnalysis.Experimental/TunedGaussianProcessModel.cs @ 12186

Last change on this file since 12186 was 9562, checked in by gkronber, 12 years ago

#1967 worked on Gaussian process evolution.

File size: 13.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Problems.DataAnalysis;
29using ILNumerics;
30
31namespace HeuristicLab.Algorithms.DataAnalysis {
32  /// <summary>
33  /// Represents a Gaussian process model.
34  /// </summary>
35  [StorableClass]
36  [Item("TunedGaussianProcessModel", "Gaussian process model implemented using ILNumerics.")]
37  public sealed class TunedGaussianProcessModel : NamedItem, IGaussianProcessModel {
38    [Storable]
39    private double negativeLogLikelihood;
40    public double NegativeLogLikelihood {
41      get { return negativeLogLikelihood; }
42    }
43
44    [Storable]
45    private double[] hyperparameterGradients;
46    public double[] HyperparameterGradients {
47      get {
48        var copy = new double[hyperparameterGradients.Length];
49        Array.Copy(hyperparameterGradients, copy, copy.Length);
50        return copy;
51      }
52    }
53
54    [Storable]
55    private ICovarianceFunction covarianceFunction;
56    public ICovarianceFunction CovarianceFunction {
57      get { return covarianceFunction; }
58    }
59    [Storable]
60    private IMeanFunction meanFunction;
61    public IMeanFunction MeanFunction {
62      get { return meanFunction; }
63    }
64    [Storable]
65    private string targetVariable;
66    public string TargetVariable {
67      get { return targetVariable; }
68    }
69    [Storable]
70    private string[] allowedInputVariables;
71    public string[] AllowedInputVariables {
72      get { return allowedInputVariables; }
73    }
74
75    [Storable]
76    private double[] alpha;
77
78
79    [Storable]
80    private double sqrSigmaNoise;
81    public double SigmaNoise {
82      get { return Math.Sqrt(sqrSigmaNoise); }
83    }
84
85    [Storable]
86    private double[] meanParameter;
87    [Storable]
88    private double[] covarianceParameter;
89
90    [Storable]
91    private double[] l;
92
93    [Storable]
94    private double[,] x;
95    [Storable]
96    private Scaling inputScaling;
97
98
99    [StorableConstructor]
100    private TunedGaussianProcessModel(bool deserializing) : base(deserializing) { }
101    private TunedGaussianProcessModel(TunedGaussianProcessModel original, Cloner cloner)
102      : base(original, cloner) {
103      this.meanFunction = cloner.Clone(original.meanFunction);
104      this.covarianceFunction = cloner.Clone(original.covarianceFunction);
105      this.inputScaling = cloner.Clone(original.inputScaling);
106      this.negativeLogLikelihood = original.negativeLogLikelihood;
107      this.targetVariable = original.targetVariable;
108      this.sqrSigmaNoise = original.sqrSigmaNoise;
109      if (original.meanParameter != null) {
110        this.meanParameter = (double[])original.meanParameter.Clone();
111      }
112      if (original.covarianceParameter != null) {
113        this.covarianceParameter = (double[])original.covarianceParameter.Clone();
114      }
115
116      // shallow copies of arrays because they cannot be modified
117      this.allowedInputVariables = original.allowedInputVariables;
118      this.alpha = original.alpha;
119      this.l = original.l;
120      this.x = original.x;
121    }
122    public TunedGaussianProcessModel(Dataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,
123      IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction)
124      : base() {
125      this.name = ItemName;
126      this.description = ItemDescription;
127      this.meanFunction = (IMeanFunction)meanFunction.Clone();
128      this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone();
129      this.targetVariable = targetVariable;
130      this.allowedInputVariables = allowedInputVariables.ToArray();
131
132
133      int nVariables = this.allowedInputVariables.Length;
134      meanParameter = hyp
135        .Take(this.meanFunction.GetNumberOfParameters(nVariables))
136        .ToArray();
137
138      covarianceParameter = hyp.Skip(this.meanFunction.GetNumberOfParameters(nVariables))
139                                             .Take(this.covarianceFunction.GetNumberOfParameters(nVariables))
140                                             .ToArray();
141      sqrSigmaNoise = Math.Exp(2.0 * hyp.Last());
142
143      CalculateModel(ds, rows);
144
145      //var cmpModel = new GaussianProcessModel(ds, targetVariable, allowedInputVariables, rows, hyp,
146      //                                        (IMeanFunction)meanFunction.Clone(),
147      //                                        (ICovarianceFunction)covarianceFunction.Clone());
148      //if (!cmpModel.NegativeLogLikelihood.IsAlmost(NegativeLogLikelihood) ||
149      //  !cmpModel.HyperparameterGradients.Sum().IsAlmost(HyperparameterGradients.Sum())
150      //  )
151      //  throw new ArgumentException();
152    }
153
154    private void CalculateModel(Dataset ds, IEnumerable<int> rows) {
155      inputScaling = new Scaling(ds, allowedInputVariables, rows);
156      x = AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputVariables, rows, inputScaling);
157      var y = ds.GetDoubleValues(targetVariable, rows);
158      ILNumerics.Settings.MaxNumberThreads = Environment.ProcessorCount;
159      using (ILScope.Enter()) {
160        int n = x.GetLength(0);
161
162        // calculate means and covariances
163        var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, Enumerable.Range(0, x.GetLength(1)));
164        double[] m = Enumerable.Range(0, x.GetLength(0))
165          .Select(r => mean.Mean(x, r))
166          .ToArray();
167
168        var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, x.GetLength(1)));
169        ILArray<double> myL = ILMath.zeros<double>(n, n);
170
171        for (int i = 0; i < n; i++) {
172          for (int j = i; j < n; j++) {
173            double c = cov.Covariance(x, i, j) / sqrSigmaNoise;
174            if (i == j) {
175              myL.SetValue(c + 1, i, j);
176            } else {
177              myL.SetValue(c, j, i);
178              myL.SetValue(c, i, j);
179            }
180          }
181        }
182
183        // cholesky decomposition
184        ILArray<double> chol;
185        try {
186          chol = ILNumerics.ILMath.chol(myL, false);
187        }
188        catch (Exception e) {
189          throw new ArgumentException("covariance matrix not positive definite", e);
190        }
191        if (chol == null || chol.IsEmpty || !chol.Size.Equals(myL.Size))
192          throw new ArgumentException("covariance matrix not positive definite");
193        this.l = new double[n * n];
194        chol.ExportValues(ref l);
195        // calculate sum of diagonal elements for likelihood
196
197        double diagSum = ILMath.log(ILMath.diag(chol)).Sum();
198
199        // solve for alpha
200        ILArray<double> ym = ILMath.array(y.Zip(m, (a, b) => a - b).ToArray());
201
202        MatrixProperties lowerTriProps = MatrixProperties.LowerTriangular;
203        MatrixProperties upperTriProps = MatrixProperties.UpperTriangular;
204        ILArray<double> alpha;
205        try {
206          ILArray<double> t1 = ILMath.linsolve(chol.T, ym, ref lowerTriProps);
207          alpha = ILMath.linsolve(chol, t1, ref upperTriProps) / sqrSigmaNoise;
208        }
209        catch (Exception e) {
210          throw new ArgumentException("covariance matrix is not positive definite", e);
211        }
212        if (alpha == null || alpha.IsEmpty) throw new ArgumentException();
213        this.alpha = new double[alpha.Length];
214        alpha.ExportValues(ref this.alpha);
215
216        negativeLogLikelihood = 0.5 * (double)ILMath.multiply(ym.T, alpha) + diagSum + (n / 2.0) * Math.Log(2.0 * Math.PI * sqrSigmaNoise);
217
218        // derivatives
219        int nAllowedVariables = x.GetLength(1);
220
221        ILArray<double> lCopy;
222        try {
223          ILArray<double> t2 = ILMath.linsolve(chol.T, ILMath.eye<double>(n, n), ref lowerTriProps);
224          lCopy = ILMath.linsolve(chol, t2, ref upperTriProps) / sqrSigmaNoise - ILMath.multiply(alpha, alpha.T);
225        }
226        catch (Exception e) {
227          throw new ArgumentException("covariance matrix is not positive definite", e);
228        }
229
230        double noiseGradient = sqrSigmaNoise * ILMath.sumall(ILMath.diag(lCopy)).GetValue(0, 0);
231
232        double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)];
233        for (int k = 0; k < meanGradients.Length; k++) {
234          ILArray<double> meanGrad =
235            ILMath.array(
236              Enumerable.Range(0, alpha.Length)
237                .Select(r => mean.Gradient(x, r, k))
238                .ToArray());
239          meanGradients[k] = -(double)ILMath.multiply(meanGrad.T, alpha);
240        }
241
242        double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)];
243        if (covGradients.Length > 0) {
244          for (int i = 0; i < n; i++) {
245            for (int j = 0; j < i; j++) {
246              var g = cov.CovarianceGradient(x, i, j).ToArray();
247              for (int k = 0; k < covGradients.Length; k++) {
248                covGradients[k] += lCopy.GetValue(i, j) * g[k];
249              }
250            }
251
252            var gDiag = cov.CovarianceGradient(x, i, i).ToArray();
253            for (int k = 0; k < covGradients.Length; k++) {
254              // diag
255              covGradients[k] += 0.5 * lCopy.GetValue(i, i) * gDiag[k];
256            }
257          }
258        }
259
260        hyperparameterGradients =
261          meanGradients
262            .Concat(covGradients)
263            .Concat(new double[] { noiseGradient }).ToArray();
264      }
265    }
266
267
268    public override IDeepCloneable Clone(Cloner cloner) {
269      return new TunedGaussianProcessModel(this, cloner);
270    }
271
272    // is called by the solution creator to set all parameter values of the covariance and mean function
273    // to the optimized values (necessary to make the values visible in the GUI)
274    public void FixParameters() {
275      covarianceFunction.SetParameter(covarianceParameter);
276      meanFunction.SetParameter(meanParameter);
277      covarianceParameter = new double[0];
278      meanParameter = new double[0];
279    }
280
281    #region IRegressionModel Members
282    public IEnumerable<double> GetEstimatedValues(Dataset dataset, IEnumerable<int> rows) {
283      return GetEstimatedValuesHelper(dataset, rows);
284    }
285    public GaussianProcessRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
286      return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData));
287    }
288    IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
289      return CreateRegressionSolution(problemData);
290    }
291    #endregion
292
293
294    private IEnumerable<double> GetEstimatedValuesHelper(Dataset dataset, IEnumerable<int> rows) {
295      var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, inputScaling);
296      int newN = newX.GetLength(0);
297      int n = x.GetLength(0);
298      var Ks = new double[newN, n];
299      var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, Enumerable.Range(0, newX.GetLength(1)));
300      var ms = Enumerable.Range(0, newX.GetLength(0))
301      .Select(r => mean.Mean(newX, r))
302      .ToArray();
303      var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, x.GetLength(1)));
304      for (int i = 0; i < newN; i++) {
305        for (int j = 0; j < n; j++) {
306          Ks[i, j] = cov.CrossCovariance(x, newX, j, i);
307        }
308      }
309
310      return Enumerable.Range(0, newN)
311        .Select(i => ms[i] + Util.ScalarProd(Util.GetRow(Ks, i), alpha));
312    }
313
314    public IEnumerable<double> GetEstimatedVariance(Dataset dataset, IEnumerable<int> rows) {
315      var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, inputScaling);
316      int newN = newX.GetLength(0);
317      int n = x.GetLength(0);
318      if (newN == 0) return Enumerable.Empty<double>();
319
320      var kss = new double[newN];
321      ILArray<double> sWKs = ILMath.zeros(n, newN);
322      var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, x.GetLength(1)));
323
324      // for stddev
325      for (int i = 0; i < newN; i++)
326        kss[i] = cov.Covariance(newX, i, i);
327
328      for (int i = 0; i < newN; i++) {
329        for (int j = 0; j < n; j++) {
330          sWKs.SetValue(cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise), j, i);
331        }
332      }
333
334      // for stddev
335      var lowerTriangular = MatrixProperties.LowerTriangular;
336      ILArray<double> V = ILMath.linsolve(ILMath.array(l, n, n).T, sWKs, ref lowerTriangular);
337
338      // alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0);
339
340      for (int i = 0; i < newN; i++) {
341        double sumV = (double)ILMath.multiply(V[ILMath.full, i].T, V[ILMath.full, i]);
342        kss[i] -= sumV;
343        if (kss[i] < 0) kss[i] = 0;
344      }
345      return kss;
346    }
347  }
348}
Note: See TracBrowser for help on using the repository browser.