Context Navigation

source: trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/StudentTProcessModel.cs @ 13777

Visit:

Last change on this file since 13777 was 13721, checked in by mkommend, 9 years ago
#2591: Changed all GP covariance and mean functions to use int[] for column indices instead of IEnumerable<int>. Changed GP utils, GPModel and StudentTProcessModell as well to use fewer iterators and adapted unit tests to new interface.
File size: 15.7 KB

Line
1	#region License Information
2	/* HeuristicLab
3	* Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4	*
5	* This file is part of HeuristicLab.
6	*
7	* HeuristicLab is free software: you can redistribute it and/or modify
8	* it under the terms of the GNU General Public License as published by
9	* the Free Software Foundation, either version 3 of the License, or
10	* (at your option) any later version.
11	*
12	* HeuristicLab is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	* GNU General Public License for more details.
16	*
17	* You should have received a copy of the GNU General Public License
18	* along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19	*/
20	#endregion
21
22	using System;
23	using System.Collections.Generic;
24	using System.Linq;
25	using HeuristicLab.Common;
26	using HeuristicLab.Core;
27	using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28	using HeuristicLab.Problems.DataAnalysis;
29
30	namespace HeuristicLab.Algorithms.DataAnalysis {
31	/// <summary>
32	/// Represents a Gaussian process model.
33	/// </summary>
34	[StorableClass]
35	[Item("StudentTProcessModel", "Represents a Student-t process posterior.")]
36	public sealed class StudentTProcessModel : NamedItem, IGaussianProcessModel {
37	[Storable]
38	private double negativeLogLikelihood;
39	public double NegativeLogLikelihood {
40	get { return negativeLogLikelihood; }
41	}
42
43	[Storable]
44	private double[] hyperparameterGradients;
45	public double[] HyperparameterGradients {
46	get {
47	var copy = new double[hyperparameterGradients.Length];
48	Array.Copy(hyperparameterGradients, copy, copy.Length);
49	return copy;
50	}
51	}
52
53	[Storable]
54	private ICovarianceFunction covarianceFunction;
55	public ICovarianceFunction CovarianceFunction {
56	get { return covarianceFunction; }
57	}
58	[Storable]
59	private IMeanFunction meanFunction;
60	public IMeanFunction MeanFunction {
61	get { return meanFunction; }
62	}
63	[Storable]
64	private string targetVariable;
65	public string TargetVariable {
66	get { return targetVariable; }
67	}
68	[Storable]
69	private string[] allowedInputVariables;
70	public string[] AllowedInputVariables {
71	get { return allowedInputVariables; }
72	}
73
74	[Storable]
75	private double[] alpha;
76	[Storable]
77	private double beta;
78
79	public double SigmaNoise {
80	get { return 0; }
81	}
82
83	[Storable]
84	private double[] meanParameter;
85	[Storable]
86	private double[] covarianceParameter;
87	[Storable]
88	private double nu;
89
90	private double[,] l; // used to be storable in previous versions (is calculated lazily now)
91	private double[,] x; // scaled training dataset, used to be storable in previous versions (is calculated lazily now)
92
93	// BackwardsCompatibility3.4
94	#region Backwards compatible code, remove with 3.5
95	[Storable(Name = "l")] // restore if available but don't store anymore
96	private double[,] l_storable {
97	set { this.l = value; }
98	get {
99	if (trainingDataset == null) return l; // this model has been created with an old version
100	else return null; // if the training dataset is available l should not be serialized
101	}
102	}
103	[Storable(Name = "x")] // restore if available but don't store anymore
104	private double[,] x_storable {
105	set { this.x = value; }
106	get {
107	if (trainingDataset == null) return x; // this model has been created with an old version
108	else return null; // if the training dataset is available x should not be serialized
109	}
110	}
111	#endregion
112
113
114	[Storable]
115	private IDataset trainingDataset; // it is better to store the original training dataset completely because this is more efficient in persistence
116	[Storable]
117	private int[] trainingRows;
118
119	[Storable]
120	private Scaling inputScaling;
121
122
123	[StorableConstructor]
124	private StudentTProcessModel(bool deserializing) : base(deserializing) { }
125	private StudentTProcessModel(StudentTProcessModel original, Cloner cloner)
126	: base(original, cloner) {
127	this.meanFunction = cloner.Clone(original.meanFunction);
128	this.covarianceFunction = cloner.Clone(original.covarianceFunction);
129	if (original.inputScaling != null)
130	this.inputScaling = cloner.Clone(original.inputScaling);
131	this.trainingDataset = cloner.Clone(original.trainingDataset);
132	this.negativeLogLikelihood = original.negativeLogLikelihood;
133	this.targetVariable = original.targetVariable;
134	if (original.meanParameter != null) {
135	this.meanParameter = (double[])original.meanParameter.Clone();
136	}
137	if (original.covarianceParameter != null) {
138	this.covarianceParameter = (double[])original.covarianceParameter.Clone();
139	}
140	nu = original.nu;
141
142	// shallow copies of arrays because they cannot be modified
143	this.trainingRows = original.trainingRows;
144	this.allowedInputVariables = original.allowedInputVariables;
145	this.alpha = original.alpha;
146	this.beta = original.beta;
147	this.l = original.l;
148	this.x = original.x;
149	}
150	public StudentTProcessModel(IDataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,
151	IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction,
152	bool scaleInputs = true)
153	: base() {
154	this.name = ItemName;
155	this.description = ItemDescription;
156	this.meanFunction = (IMeanFunction)meanFunction.Clone();
157	this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone();
158	this.targetVariable = targetVariable;
159	this.allowedInputVariables = allowedInputVariables.ToArray();
160
161
162	int nVariables = this.allowedInputVariables.Length;
163	meanParameter = hyp
164	.Take(this.meanFunction.GetNumberOfParameters(nVariables))
165	.ToArray();
166
167	covarianceParameter = hyp.Skip(meanParameter.Length)
168	.Take(this.covarianceFunction.GetNumberOfParameters(nVariables))
169	.ToArray();
170	nu = Math.Exp(hyp.Skip(meanParameter.Length + covarianceParameter.Length).First()) + 2; //TODO check gradient
171	try {
172	CalculateModel(ds, rows, scaleInputs);
173	}
174	catch (alglib.alglibexception ae) {
175	// wrap exception so that calling code doesn't have to know about alglib implementation
176	throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
177	}
178	}
179
180	private void CalculateModel(IDataset ds, IEnumerable<int> rows, bool scaleInputs = true) {
181	this.trainingDataset = (IDataset)ds.Clone();
182	this.trainingRows = rows.ToArray();
183	this.inputScaling = scaleInputs ? new Scaling(ds, allowedInputVariables, rows) : null;
184
185	x = GetData(ds, this.allowedInputVariables, this.trainingRows, this.inputScaling);
186
187	IEnumerable<double> y;
188	y = ds.GetDoubleValues(targetVariable, rows);
189
190	int n = x.GetLength(0);
191	var columns = Enumerable.Range(0, x.GetLength(1)).ToArray();
192
193	// calculate cholesky decomposed (lower triangular) covariance matrix
194	var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
195	this.l = CalculateL(x, cov);
196
197	// calculate mean
198	var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);
199	double[] m = Enumerable.Range(0, x.GetLength(0))
200	.Select(r => mean.Mean(x, r))
201	.ToArray();
202
203	// calculate sum of diagonal elements for likelihood
204	double diagSum = Enumerable.Range(0, n).Select(i => Math.Log(l[i, i])).Sum();
205
206	// solve for alpha
207	double[] ym = y.Zip(m, (a, b) => a - b).ToArray();
208
209	int info;
210	alglib.densesolverreport denseSolveRep;
211
212	alglib.spdmatrixcholeskysolve(l, n, false, ym, out info, out denseSolveRep, out alpha);
213
214	beta = Util.ScalarProd(ym, alpha);
215	double sign0, sign1;
216	double lngamma0 = alglib.lngamma(0.5 * (nu + n), out sign0);
217	lngamma0 *= sign0;
218	double lngamma1 = alglib.lngamma(0.5 * nu, out sign1);
219	lngamma1 *= sign1;
220	negativeLogLikelihood =
221	0.5 * n * Math.Log((nu - 2) * Math.PI) +
222	diagSum +
223	-lngamma0 + lngamma1 +
224	//-Math.Log(alglib.gammafunction((n + nu) / 2) / alglib.gammafunction(nu / 2)) +
225	0.5 * (nu + n) * Math.Log(1 + beta / (nu - 2));
226
227	// derivatives
228	int nAllowedVariables = x.GetLength(1);
229
230	alglib.matinvreport matInvRep;
231	double[,] lCopy = new double[l.GetLength(0), l.GetLength(1)];
232	Array.Copy(l, lCopy, lCopy.Length);
233
234	alglib.spdmatrixcholeskyinverse(ref lCopy, n, false, out info, out matInvRep);
235	double c = (nu + n) / (nu + beta - 2);
236	if (info != 1) throw new ArgumentException("Can't invert matrix to calculate gradients.");
237	for (int i = 0; i < n; i++) {
238	for (int j = 0; j <= i; j++)
239	lCopy[i, j] = lCopy[i, j] - c * alpha[i] * alpha[j];
240	}
241
242	double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)];
243	for (int k = 0; k < meanGradients.Length; k++) {
244	var meanGrad = new double[alpha.Length];
245	for (int g = 0; g < meanGrad.Length; g++)
246	meanGrad[g] = mean.Gradient(x, g, k);
247	meanGradients[k] = -Util.ScalarProd(meanGrad, alpha);//TODO not working yet, try to fix with gradient check
248	}
249
250	double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)];
251	if (covGradients.Length > 0) {
252	for (int i = 0; i < n; i++) {
253	for (int j = 0; j < i; j++) {
254	var g = cov.CovarianceGradient(x, i, j).ToArray();
255	for (int k = 0; k < covGradients.Length; k++) {
256	covGradients[k] += lCopy[i, j] * g[k];
257	}
258	}
259
260	var gDiag = cov.CovarianceGradient(x, i, i).ToArray();
261	for (int k = 0; k < covGradients.Length; k++) {
262	// diag
263	covGradients[k] += 0.5 * lCopy[i, i] * gDiag[k];
264	}
265	}
266	}
267
268	double nuGradient = 0.5 * n
269	- 0.5 * (nu - 2) * alglib.psi((n + nu) / 2) + 0.5 * (nu - 2) * alglib.psi(nu / 2)
270	+ 0.5 * (nu - 2) * Math.Log(1 + beta / (nu - 2)) - beta * (n + nu) / (2 * (beta + (nu - 2)));
271
272	//nuGradient = (nu-2) * nuGradient;
273	hyperparameterGradients =
274	meanGradients
275	.Concat(covGradients)
276	.Concat(new double[] { nuGradient }).ToArray();
277
278	}
279
280	private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) {
281	if (scaling != null) {
282	return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputs, rows, scaling);
283	} else {
284	return AlglibUtil.PrepareInputMatrix(ds, allowedInputs, rows);
285	}
286	}
287
288	private static double[,] CalculateL(double[,] x, ParameterizedCovarianceFunction cov) {
289	int n = x.GetLength(0);
290	var l = new double[n, n];
291
292	// calculate covariances
293	for (int i = 0; i < n; i++) {
294	for (int j = i; j < n; j++) {
295	l[j, i] = cov.Covariance(x, i, j);
296	}
297	}
298
299	// cholesky decomposition
300	var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);
301	if (!res) throw new ArgumentException("Matrix is not positive semidefinite");
302	return l;
303	}
304
305
306	public override IDeepCloneable Clone(Cloner cloner) {
307	return new StudentTProcessModel(this, cloner);
308	}
309
310	// is called by the solution creator to set all parameter values of the covariance and mean function
311	// to the optimized values (necessary to make the values visible in the GUI)
312	public void FixParameters() {
313	covarianceFunction.SetParameter(covarianceParameter);
314	meanFunction.SetParameter(meanParameter);
315	covarianceParameter = new double[0];
316	meanParameter = new double[0];
317	}
318
319	#region IRegressionModel Members
320	public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
321	return GetEstimatedValuesHelper(dataset, rows);
322	}
323	public GaussianProcessRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
324	return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData));
325	}
326	IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
327	return CreateRegressionSolution(problemData);
328	}
329	#endregion
330
331
332	private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) {
333	try {
334	if (x == null) {
335	x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);
336	}
337	int n = x.GetLength(0);
338
339	double[,] newX = GetData(dataset, allowedInputVariables, rows, inputScaling);
340	int newN = newX.GetLength(0);
341	var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();
342
343	var Ks = new double[newN][];
344	var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);
345	var ms = Enumerable.Range(0, newX.GetLength(0))
346	.Select(r => mean.Mean(newX, r))
347	.ToArray();
348	var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
349	for (int i = 0; i < newN; i++) {
350	Ks[i] = new double[n];
351	for (int j = 0; j < n; j++) {
352	Ks[i][j] = cov.CrossCovariance(x, newX, j, i);
353	}
354	}
355
356	return Enumerable.Range(0, newN)
357	.Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha));
358	}
359	catch (alglib.alglibexception ae) {
360	// wrap exception so that calling code doesn't have to know about alglib implementation
361	throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
362	}
363	}
364
365	public IEnumerable<double> GetEstimatedVariance(IDataset dataset, IEnumerable<int> rows) {
366	try {
367	if (x == null) {
368	x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);
369	}
370	int n = x.GetLength(0);
371
372	var newX = GetData(dataset, allowedInputVariables, rows, inputScaling);
373	int newN = newX.GetLength(0);
374
375	var kss = new double[newN];
376	double[,] sWKs = new double[n, newN];
377	var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, x.GetLength(1)).ToArray());
378
379	if (l == null) {
380	l = CalculateL(x, cov);
381	}
382
383	// for stddev
384	for (int i = 0; i < newN; i++)
385	kss[i] = cov.Covariance(newX, i, i);
386
387	for (int i = 0; i < newN; i++) {
388	for (int j = 0; j < n; j++) {
389	sWKs[j, i] = cov.CrossCovariance(x, newX, j, i);
390	}
391	}
392
393	// for stddev
394	alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0);
395
396	for (int i = 0; i < newN; i++) {
397	var col = Util.GetCol(sWKs, i).ToArray();
398	var sumV = Util.ScalarProd(col, col);
399	kss[i] -= sumV;
400	kss[i] *= (nu + beta - 2) / (nu + n - 2);
401	if (kss[i] < 0) kss[i] = 0;
402	}
403	return kss;
404	}
405	catch (alglib.alglibexception ae) {
406	// wrap exception so that calling code doesn't have to know about alglib implementation
407	throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
408	}
409	}
410	}
411	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences