Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

GaussianProcessModelMKL.cs @ 16799

Visit:

Last change on this file since 16799 was 14991, checked in by gkronber, 8 years ago
#2789 implemented an alternative GaussianProcessModel which uses Math.Numerics and Intel MKL
File size: 14.7 KB

Line
1	#region License Information
2	/* HeuristicLab
3	* Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4	*
5	* This file is part of HeuristicLab.
6	*
7	* HeuristicLab is free software: you can redistribute it and/or modify
8	* it under the terms of the GNU General Public License as published by
9	* the Free Software Foundation, either version 3 of the License, or
10	* (at your option) any later version.
11	*
12	* HeuristicLab is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	* GNU General Public License for more details.
16	*
17	* You should have received a copy of the GNU General Public License
18	* along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19	*/
20	#endregion
21
22	using System;
23	using System.Collections.Generic;
24	using System.Linq;
25	using HeuristicLab.Common;
26	using HeuristicLab.Core;
27	using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28	using HeuristicLab.Problems.DataAnalysis;
29	using MathNet.Numerics.Data.Matlab;
30	using MathNet.Numerics.LinearAlgebra;
31	using MathNet.Numerics.LinearAlgebra.Double;
32	using MathNet.Numerics.LinearAlgebra.Factorization;
33
34	namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
35	// TODO: scale y
36	// TODO: remove dependence of scaling and export scaling parameters
37	// TODO: export / import all relevant data
38	[StorableClass]
39	[Item("GaussianProcessModelMKL", "Represents a Gaussian process posterior.")]
40	public sealed class GaussianProcessModelMKL : RegressionModel, IGaussianProcessModel {
41	public override IEnumerable<string> VariablesUsedForPrediction {
42	get { return allowedInputVariables; }
43	}
44
45	[Storable]
46	private double negativeLogLikelihood;
47	public double NegativeLogLikelihood {
48	get { return negativeLogLikelihood; }
49	}
50
51	[Storable]
52	private double negativeLooPredictiveProbability;
53	public double NegativeLooPredictiveProbability {
54	get { return negativeLooPredictiveProbability; }
55	}
56
57	[Storable]
58	private double[] hyperparameterGradients;
59	public double[] HyperparameterGradients {
60	get {
61	var copy = new double[hyperparameterGradients.Length];
62	Array.Copy(hyperparameterGradients, copy, copy.Length);
63	return copy;
64	}
65	}
66
67	[Storable]
68	private ICovarianceFunction covarianceFunction;
69	public ICovarianceFunction CovarianceFunction {
70	get { return covarianceFunction; }
71	}
72	[Storable]
73	private IMeanFunction meanFunction;
74	public IMeanFunction MeanFunction {
75	get { return meanFunction; }
76	}
77
78	[Storable]
79	private string[] allowedInputVariables;
80	public string[] AllowedInputVariables {
81	get { return allowedInputVariables; }
82	}
83
84	[Storable]
85	private Vector<double> alpha;
86	[Storable]
87	private double sqrSigmaNoise;
88	public double SigmaNoise {
89	get { return Math.Sqrt(sqrSigmaNoise); }
90	}
91
92	[Storable]
93	private double[] meanParameter;
94	[Storable]
95	private double[] covarianceParameter;
96
97	private Matrix<double> l;
98	private double[,] x; // scaled training dataset, used to be storable in previous versions (is calculated lazily now)
99
100
101	[Storable]
102	private IDataset trainingDataset; // it is better to store the original training dataset completely because this is more efficient in persistence
103	[Storable]
104	private int[] trainingRows;
105
106	[Storable]
107	private Scaling inputScaling;
108
109
110	[StorableConstructor]
111	private GaussianProcessModelMKL(bool deserializing) : base(deserializing) { }
112	private GaussianProcessModelMKL(GaussianProcessModelMKL original, Cloner cloner)
113	: base(original, cloner) {
114	this.meanFunction = cloner.Clone(original.meanFunction);
115	this.covarianceFunction = cloner.Clone(original.covarianceFunction);
116	if (original.inputScaling != null)
117	this.inputScaling = cloner.Clone(original.inputScaling);
118	this.trainingDataset = cloner.Clone(original.trainingDataset);
119	this.negativeLogLikelihood = original.negativeLogLikelihood;
120	this.negativeLooPredictiveProbability = original.negativeLooPredictiveProbability;
121	this.sqrSigmaNoise = original.sqrSigmaNoise;
122	if (original.meanParameter != null) {
123	this.meanParameter = (double[])original.meanParameter.Clone();
124	}
125	if (original.covarianceParameter != null) {
126	this.covarianceParameter = (double[])original.covarianceParameter.Clone();
127	}
128
129	// shallow copies of arrays because they cannot be modified
130	this.trainingRows = original.trainingRows;
131	this.allowedInputVariables = original.allowedInputVariables;
132	this.alpha = original.alpha;
133	this.l = original.l;
134	this.x = original.x;
135	}
136	public GaussianProcessModelMKL(IDataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,
137	IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction,
138	bool scaleInputs = true)
139	: base(targetVariable) {
140	// MathNet.Numerics.Control.UseNativeMKL();
141	// MathNet.Numerics.Control.UseSingleThread();
142	// this.Description += Control.LinearAlgebraProvider.ToString();
143	this.name = ItemName;
144	this.description = ItemDescription;
145	this.meanFunction = (IMeanFunction)meanFunction.Clone();
146	this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone();
147	this.allowedInputVariables = allowedInputVariables.ToArray();
148
149
150	int nVariables = this.allowedInputVariables.Length;
151	meanParameter = hyp
152	.Take(this.meanFunction.GetNumberOfParameters(nVariables))
153	.ToArray();
154
155	covarianceParameter = hyp.Skip(this.meanFunction.GetNumberOfParameters(nVariables))
156	.Take(this.covarianceFunction.GetNumberOfParameters(nVariables))
157	.ToArray();
158	sqrSigmaNoise = Math.Exp(2.0 * hyp.Last());
159	CalculateModel(ds, rows, scaleInputs);
160	}
161
162	private void CalculateModel(IDataset ds, IEnumerable<int> rows, bool scaleInputs = true) {
163	this.trainingDataset = (IDataset)ds.Clone();
164	this.trainingRows = rows.ToArray();
165	this.inputScaling = scaleInputs ? new Scaling(ds, allowedInputVariables, rows) : null;
166
167	x = GetData(ds, this.allowedInputVariables, this.trainingRows, this.inputScaling);
168
169	IEnumerable<double> y;
170	y = ds.GetDoubleValues(TargetVariable, rows);
171
172	int n = x.GetLength(0);
173
174	var columns = Enumerable.Range(0, x.GetLength(1)).ToArray();
175	// calculate cholesky decomposed (lower triangular) covariance matrix
176	var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
177	var chol = CalculateL(x, cov, sqrSigmaNoise);
178	this.l = chol.Factor;
179
180	// calculate mean
181	var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);
182	double[] m = Enumerable.Range(0, x.GetLength(0))
183	.Select(r => mean.Mean(x, r))
184	.ToArray();
185
186
187	// solve for alpha
188	Vector<double> ym = DenseVector.OfEnumerable(y.Zip(m, (a, b) => a - b));
189
190
191	alpha = chol.Solve(ym);
192	alpha = alpha * 1.0 / sqrSigmaNoise;
193
194	// calculate sum of diagonal elements for likelihood
195	double diagSum = chol.DeterminantLn;
196	negativeLogLikelihood = 0.5 * ym.DotProduct(alpha) + 0.5 * diagSum + (n / 2.0) * Math.Log(2.0 * Math.PI * sqrSigmaNoise);
197
198	// derivatives
199	int nAllowedVariables = x.GetLength(1);
200
201	Matrix<double> lCopy;
202
203	lCopy = chol.Solve(DenseMatrix.CreateIdentity(n));
204
205	// LOOCV log predictive probability (GPML page 116 and 117)
206	var sumLoo = 0.0;
207	var ki = new DenseVector(n);
208	for (int i = 0; i < n; i++) {
209	for (int j = 0; j < n; j++) ki[j] = cov.Covariance(x, i, j);
210	var yi = ki.DotProduct(alpha);
211	var yi_loo = yi - alpha[i] / lCopy[i, i] / sqrSigmaNoise;
212	var s2_loo = sqrSigmaNoise / lCopy[i, i];
213	var err = ym[i] - yi_loo;
214	var nll_loo = Math.Log(s2_loo) + err * err / s2_loo;
215	sumLoo += nll_loo;
216	}
217	sumLoo += n * Math.Log(2 * Math.PI);
218	negativeLooPredictiveProbability = 0.5 * sumLoo;
219
220	for (int i = 0; i < n; i++) {
221	for (int j = 0; j <= i; j++)
222	lCopy[i, j] = lCopy[i, j] / sqrSigmaNoise - alpha[i] * alpha[j];
223	}
224
225	double noiseGradient = sqrSigmaNoise * Enumerable.Range(0, n).Select(i => lCopy[i, i]).Sum();
226
227	double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)];
228	for (int k = 0; k < meanGradients.Length; k++) {
229	var meanGrad = new DenseVector(alpha.Count);
230	for (int g = 0; g < meanGrad.Count; g++)
231	meanGrad[g] = mean.Gradient(x, g, k);
232
233	meanGradients[k] = -meanGrad.DotProduct(alpha);
234	}
235
236	double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)];
237	if (covGradients.Length > 0) {
238	for (int i = 0; i < n; i++) {
239	for (int j = 0; j < i; j++) {
240	var g = cov.CovarianceGradient(x, i, j);
241	for (int k = 0; k < covGradients.Length; k++) {
242	covGradients[k] += lCopy[i, j] * g[k];
243	}
244	}
245
246	var gDiag = cov.CovarianceGradient(x, i, i);
247	for (int k = 0; k < covGradients.Length; k++) {
248	// diag
249	covGradients[k] += 0.5 * lCopy[i, i] * gDiag[k];
250	}
251	}
252	}
253
254	hyperparameterGradients =
255	meanGradients
256	.Concat(covGradients)
257	.Concat(new double[] { noiseGradient }).ToArray();
258
259	}
260
261	private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) {
262	if (scaling != null) {
263	// BackwardsCompatibility3.3
264	#region Backwards compatible code, remove with 3.4
265	// TODO: completely remove Scaling class
266	List<string> variablesList = allowedInputs.ToList();
267	List<int> rowsList = rows.ToList();
268
269	double[,] matrix = new double[rowsList.Count, variablesList.Count];
270
271	int col = 0;
272	foreach (string column in variablesList) {
273	var values = scaling.GetScaledValues(ds, column, rowsList);
274	int row = 0;
275	foreach (var value in values) {
276	matrix[row, col] = value;
277	row++;
278	}
279	col++;
280	}
281	return matrix;
282	#endregion
283	} else {
284	return ds.ToArray(allowedInputs, rows);
285	}
286	}
287
288	private static Cholesky<double> CalculateL(double[,] x, ParameterizedCovarianceFunction cov, double sqrSigmaNoise) {
289	int n = x.GetLength(0);
290	var l = new DenseMatrix(n, n);
291
292	// calculate covariances
293	for (int i = 0; i < n; i++) {
294	for (int j = i; j < n; j++) {
295	l[j, i] = l[i, j] = cov.Covariance(x, i, j) / sqrSigmaNoise;
296	if (j == i) l[j, i] += 1.0;
297	}
298	}
299
300	// cholesky decomposition
301	return l.Cholesky();
302	}
303
304
305	public override IDeepCloneable Clone(Cloner cloner) {
306	return new GaussianProcessModelMKL(this, cloner);
307	}
308
309	// is called by the solution creator to set all parameter values of the covariance and mean function
310	// to the optimized values (necessary to make the values visible in the GUI)
311	public void FixParameters() {
312	covarianceFunction.SetParameter(covarianceParameter);
313	meanFunction.SetParameter(meanParameter);
314	covarianceParameter = new double[0];
315	meanParameter = new double[0];
316	}
317
318	#region IRegressionModel Members
319	public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
320	return GetEstimatedValuesHelper(dataset, rows);
321	}
322	public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
323	return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData));
324	}
325	#endregion
326
327
328	private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) {
329	if (x == null) {
330	x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);
331	}
332	int n = x.GetLength(0);
333
334	double[,] newX = GetData(dataset, allowedInputVariables, rows, inputScaling);
335	int newN = newX.GetLength(0);
336
337	var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();
338	var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);
339	var ms = Enumerable.Range(0, newX.GetLength(0))
340	.Select(r => mean.Mean(newX, r))
341	.ToArray();
342	var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
343	for (int i = 0; i < newN; i++) {
344	var Ks = new DenseVector(n);
345	for (int j = 0; j < n; j++) {
346	Ks[j] = cov.CrossCovariance(x, newX, j, i);
347	}
348	yield return ms[i] + Ks.DotProduct(alpha);
349	}
350	}
351
352	public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
353	if (x == null) {
354	x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);
355	}
356	int n = x.GetLength(0);
357
358	var newX = GetData(dataset, allowedInputVariables, rows, inputScaling);
359	int newN = newX.GetLength(0);
360	if (newN == 0) return Enumerable.Empty<double>();
361
362	var kss = new DenseVector(newN);
363	Matrix<double> sWKs = new DenseMatrix(n, newN);
364	var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();
365	var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
366
367	if (l == null) {
368	l = CalculateL(x, cov, sqrSigmaNoise).Factor;
369	}
370
371	// for stddev
372	for (int i = 0; i < newN; i++)
373	kss[i] = cov.Covariance(newX, i, i);
374
375	for (int i = 0; i < newN; i++) {
376	for (int j = 0; j < n; j++) {
377	sWKs[j, i] = cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise);
378	}
379	}
380
381	sWKs = l.Solve(sWKs);
382
383	for (int i = 0; i < newN; i++) {
384	var col = sWKs.Column(i);
385	var sumV = col.DotProduct(col);
386	kss[i] += sqrSigmaNoise; // kss is V(f), add noise variance of predictive distibution to get V(y)
387	kss[i] -= sumV;
388	if (kss[i] < 0) kss[i] = 0;
389	}
390	return kss;
391	}
392
393
394	public void Export(string fileName) {
395	MatlabWriter.Write<double>(fileName,
396	new Matrix<double>[] { DenseMatrix.OfArray(x), l, alpha.ToRowMatrix()},
397	new string[] { "x", "l", "alpha", }
398	);
399	}
400	}
401	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/2789_MathNetNumerics-Exploration/HeuristicLab.Algorithms.DataAnalysis.Experimental/GaussianProcessModelMKL.cs @ 16799

Download in other formats: