#region License Information
/* HeuristicLab
* Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Persistence;
using HeuristicLab.Problems.DataAnalysis;
namespace HeuristicLab.Algorithms.DataAnalysis {
///
/// Represents a Gaussian process model.
///
[StorableType("36ca62fa-4766-4269-b5f1-3bc24c0ed2e1")]
[Item("GaussianProcessModel", "Represents a Gaussian process posterior.")]
public sealed class GaussianProcessModel : RegressionModel, IGaussianProcessModel {
public override IEnumerable VariablesUsedForPrediction {
get { return allowedInputVariables; }
}
[Storable]
private double negativeLogLikelihood;
public double NegativeLogLikelihood {
get { return negativeLogLikelihood; }
}
[Storable]
private double negativeLooPredictiveProbability;
public double NegativeLooPredictiveProbability {
get { return negativeLooPredictiveProbability; }
}
[Storable]
private double[] hyperparameterGradients;
public double[] HyperparameterGradients {
get {
var copy = new double[hyperparameterGradients.Length];
Array.Copy(hyperparameterGradients, copy, copy.Length);
return copy;
}
}
[Storable]
private ICovarianceFunction covarianceFunction;
public ICovarianceFunction CovarianceFunction {
get { return covarianceFunction; }
}
[Storable]
private IMeanFunction meanFunction;
public IMeanFunction MeanFunction {
get { return meanFunction; }
}
[Storable]
private string[] allowedInputVariables;
public string[] AllowedInputVariables {
get { return allowedInputVariables; }
}
[Storable]
private double[] alpha;
[Storable]
private double sqrSigmaNoise;
public double SigmaNoise {
get { return Math.Sqrt(sqrSigmaNoise); }
}
[Storable]
private double[] meanParameter;
[Storable]
private double[] covarianceParameter;
private double[,] l; // used to be storable in previous versions (is calculated lazily now)
private double[,] x; // scaled training dataset, used to be storable in previous versions (is calculated lazily now)
// BackwardsCompatibility3.4
#region Backwards compatible code, remove with 3.5
[Storable(Name = "l")] // restore if available but don't store anymore
private double[,] l_storable {
set { this.l = value; }
get {
if (trainingDataset == null) return l; // this model has been created with an old version
else return null; // if the training dataset is available l should not be serialized
}
}
[Storable(Name = "x")] // restore if available but don't store anymore
private double[,] x_storable {
set { this.x = value; }
get {
if (trainingDataset == null) return x; // this model has been created with an old version
else return null; // if the training dataset is available x should not be serialized
}
}
#endregion
[Storable]
private IDataset trainingDataset; // it is better to store the original training dataset completely because this is more efficient in persistence
[Storable]
private int[] trainingRows;
[Storable]
private Scaling inputScaling;
[StorableConstructor]
private GaussianProcessModel(bool deserializing) : base(deserializing) { }
private GaussianProcessModel(GaussianProcessModel original, Cloner cloner)
: base(original, cloner) {
this.meanFunction = cloner.Clone(original.meanFunction);
this.covarianceFunction = cloner.Clone(original.covarianceFunction);
if (original.inputScaling != null)
this.inputScaling = cloner.Clone(original.inputScaling);
this.trainingDataset = cloner.Clone(original.trainingDataset);
this.negativeLogLikelihood = original.negativeLogLikelihood;
this.negativeLooPredictiveProbability = original.negativeLooPredictiveProbability;
this.sqrSigmaNoise = original.sqrSigmaNoise;
if (original.meanParameter != null) {
this.meanParameter = (double[])original.meanParameter.Clone();
}
if (original.covarianceParameter != null) {
this.covarianceParameter = (double[])original.covarianceParameter.Clone();
}
// shallow copies of arrays because they cannot be modified
this.trainingRows = original.trainingRows;
this.allowedInputVariables = original.allowedInputVariables;
this.alpha = original.alpha;
this.l = original.l;
this.x = original.x;
}
public GaussianProcessModel(IDataset ds, string targetVariable, IEnumerable allowedInputVariables, IEnumerable rows,
IEnumerable hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction,
bool scaleInputs = true)
: base(targetVariable) {
this.name = ItemName;
this.description = ItemDescription;
this.meanFunction = (IMeanFunction)meanFunction.Clone();
this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone();
this.allowedInputVariables = allowedInputVariables.ToArray();
int nVariables = this.allowedInputVariables.Length;
meanParameter = hyp
.Take(this.meanFunction.GetNumberOfParameters(nVariables))
.ToArray();
covarianceParameter = hyp.Skip(this.meanFunction.GetNumberOfParameters(nVariables))
.Take(this.covarianceFunction.GetNumberOfParameters(nVariables))
.ToArray();
sqrSigmaNoise = Math.Exp(2.0 * hyp.Last());
try {
CalculateModel(ds, rows, scaleInputs);
} catch (alglib.alglibexception ae) {
// wrap exception so that calling code doesn't have to know about alglib implementation
throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
}
}
private void CalculateModel(IDataset ds, IEnumerable rows, bool scaleInputs = true) {
this.trainingDataset = (IDataset)ds.Clone();
this.trainingRows = rows.ToArray();
this.inputScaling = scaleInputs ? new Scaling(ds, allowedInputVariables, rows) : null;
x = GetData(ds, this.allowedInputVariables, this.trainingRows, this.inputScaling);
IEnumerable y;
y = ds.GetDoubleValues(TargetVariable, rows);
int n = x.GetLength(0);
var columns = Enumerable.Range(0, x.GetLength(1)).ToArray();
// calculate cholesky decomposed (lower triangular) covariance matrix
var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
this.l = CalculateL(x, cov, sqrSigmaNoise);
// calculate mean
var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);
double[] m = Enumerable.Range(0, x.GetLength(0))
.Select(r => mean.Mean(x, r))
.ToArray();
// calculate sum of diagonal elements for likelihood
double diagSum = Enumerable.Range(0, n).Select(i => Math.Log(l[i, i])).Sum();
// solve for alpha
double[] ym = y.Zip(m, (a, b) => a - b).ToArray();
int info;
alglib.densesolverreport denseSolveRep;
alglib.spdmatrixcholeskysolve(l, n, false, ym, out info, out denseSolveRep, out alpha);
for (int i = 0; i < alpha.Length; i++)
alpha[i] = alpha[i] / sqrSigmaNoise;
negativeLogLikelihood = 0.5 * Util.ScalarProd(ym, alpha) + diagSum + (n / 2.0) * Math.Log(2.0 * Math.PI * sqrSigmaNoise);
// derivatives
int nAllowedVariables = x.GetLength(1);
alglib.matinvreport matInvRep;
double[,] lCopy = new double[l.GetLength(0), l.GetLength(1)];
Array.Copy(l, lCopy, lCopy.Length);
alglib.spdmatrixcholeskyinverse(ref lCopy, n, false, out info, out matInvRep);
if (info != 1) throw new ArgumentException("Can't invert matrix to calculate gradients.");
// LOOCV log predictive probability (GPML page 116 and 117)
var sumLoo = 0.0;
var ki = new double[n];
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) ki[j] = cov.Covariance(x, i, j);
var yi = Util.ScalarProd(ki, alpha);
var yi_loo = yi - alpha[i] / lCopy[i, i] / sqrSigmaNoise;
var s2_loo = sqrSigmaNoise / lCopy[i, i];
var err = ym[i] - yi_loo;
var nll_loo = Math.Log(s2_loo) + err * err / s2_loo;
sumLoo += nll_loo;
}
sumLoo += n * Math.Log(2 * Math.PI);
negativeLooPredictiveProbability = 0.5 * sumLoo;
for (int i = 0; i < n; i++) {
for (int j = 0; j <= i; j++)
lCopy[i, j] = lCopy[i, j] / sqrSigmaNoise - alpha[i] * alpha[j];
}
double noiseGradient = sqrSigmaNoise * Enumerable.Range(0, n).Select(i => lCopy[i, i]).Sum();
double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)];
for (int k = 0; k < meanGradients.Length; k++) {
var meanGrad = new double[alpha.Length];
for (int g = 0; g < meanGrad.Length; g++)
meanGrad[g] = mean.Gradient(x, g, k);
meanGradients[k] = -Util.ScalarProd(meanGrad, alpha);
}
double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)];
if (covGradients.Length > 0) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < i; j++) {
var g = cov.CovarianceGradient(x, i, j);
for (int k = 0; k < covGradients.Length; k++) {
covGradients[k] += lCopy[i, j] * g[k];
}
}
var gDiag = cov.CovarianceGradient(x, i, i);
for (int k = 0; k < covGradients.Length; k++) {
// diag
covGradients[k] += 0.5 * lCopy[i, i] * gDiag[k];
}
}
}
hyperparameterGradients =
meanGradients
.Concat(covGradients)
.Concat(new double[] { noiseGradient }).ToArray();
}
private static double[,] GetData(IDataset ds, IEnumerable allowedInputs, IEnumerable rows, Scaling scaling) {
if (scaling != null) {
// BackwardsCompatibility3.3
#region Backwards compatible code, remove with 3.4
// TODO: completely remove Scaling class
List variablesList = allowedInputs.ToList();
List rowsList = rows.ToList();
double[,] matrix = new double[rowsList.Count, variablesList.Count];
int col = 0;
foreach (string column in variablesList) {
var values = scaling.GetScaledValues(ds, column, rowsList);
int row = 0;
foreach (var value in values) {
matrix[row, col] = value;
row++;
}
col++;
}
return matrix;
#endregion
} else {
return ds.ToArray(allowedInputs, rows);
}
}
private static double[,] CalculateL(double[,] x, ParameterizedCovarianceFunction cov, double sqrSigmaNoise) {
int n = x.GetLength(0);
var l = new double[n, n];
// calculate covariances
for (int i = 0; i < n; i++) {
for (int j = i; j < n; j++) {
l[j, i] = cov.Covariance(x, i, j) / sqrSigmaNoise;
if (j == i) l[j, i] += 1.0;
}
}
// cholesky decomposition
var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);
if (!res) throw new ArgumentException("Matrix is not positive semidefinite");
return l;
}
public override IDeepCloneable Clone(Cloner cloner) {
return new GaussianProcessModel(this, cloner);
}
// is called by the solution creator to set all parameter values of the covariance and mean function
// to the optimized values (necessary to make the values visible in the GUI)
public void FixParameters() {
covarianceFunction.SetParameter(covarianceParameter);
meanFunction.SetParameter(meanParameter);
covarianceParameter = new double[0];
meanParameter = new double[0];
}
#region IRegressionModel Members
public override IEnumerable GetEstimatedValues(IDataset dataset, IEnumerable rows) {
return GetEstimatedValuesHelper(dataset, rows);
}
public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData));
}
#endregion
private IEnumerable GetEstimatedValuesHelper(IDataset dataset, IEnumerable rows) {
try {
if (x == null) {
x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);
}
int n = x.GetLength(0);
double[,] newX = GetData(dataset, allowedInputVariables, rows, inputScaling);
int newN = newX.GetLength(0);
var Ks = new double[newN][];
var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();
var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);
var ms = Enumerable.Range(0, newX.GetLength(0))
.Select(r => mean.Mean(newX, r))
.ToArray();
var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
for (int i = 0; i < newN; i++) {
Ks[i] = new double[n];
for (int j = 0; j < n; j++) {
Ks[i][j] = cov.CrossCovariance(x, newX, j, i);
}
}
return Enumerable.Range(0, newN)
.Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha));
} catch (alglib.alglibexception ae) {
// wrap exception so that calling code doesn't have to know about alglib implementation
throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
}
}
public IEnumerable GetEstimatedVariances(IDataset dataset, IEnumerable rows) {
try {
if (x == null) {
x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);
}
int n = x.GetLength(0);
var newX = GetData(dataset, allowedInputVariables, rows, inputScaling);
int newN = newX.GetLength(0);
var kss = new double[newN];
double[,] sWKs = new double[n, newN];
var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();
var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);
if (l == null) {
l = CalculateL(x, cov, sqrSigmaNoise);
}
// for stddev
for (int i = 0; i < newN; i++)
kss[i] = cov.Covariance(newX, i, i);
for (int i = 0; i < newN; i++) {
for (int j = 0; j < n; j++) {
sWKs[j, i] = cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise);
}
}
// for stddev
alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0);
for (int i = 0; i < newN; i++) {
var col = Util.GetCol(sWKs, i).ToArray();
var sumV = Util.ScalarProd(col, col);
kss[i] += sqrSigmaNoise; // kss is V(f), add noise variance of predictive distibution to get V(y)
kss[i] -= sumV;
if (kss[i] < 0) kss[i] = 0;
}
return kss;
} catch (alglib.alglibexception ae) {
// wrap exception so that calling code doesn't have to know about alglib implementation
throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
}
}
}
}