1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Linq;


25  using HeuristicLab.Common;


26  using HeuristicLab.Core;


27  using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;


28  using HeuristicLab.Problems.DataAnalysis;


29 


30  namespace HeuristicLab.Algorithms.DataAnalysis {


31  /// <summary>


32  /// Represents a Gaussian process model.


33  /// </summary>


34  [StorableClass]


35  [Item("GaussianProcessModel", "Represents a Gaussian process posterior.")]


36  public sealed class GaussianProcessModel : NamedItem, IGaussianProcessModel {


37  [Storable]


38  private double negativeLogLikelihood;


39  public double NegativeLogLikelihood {


40  get { return negativeLogLikelihood; }


41  }


42 


43  [Storable]


44  private double[] hyperparameterGradients;


45  public double[] HyperparameterGradients {


46  get {


47  var copy = new double[hyperparameterGradients.Length];


48  Array.Copy(hyperparameterGradients, copy, copy.Length);


49  return copy;


50  }


51  }


52 


53  [Storable]


54  private ICovarianceFunction covarianceFunction;


55  public ICovarianceFunction CovarianceFunction {


56  get { return covarianceFunction; }


57  }


58  [Storable]


59  private IMeanFunction meanFunction;


60  public IMeanFunction MeanFunction {


61  get { return meanFunction; }


62  }


63  [Storable]


64  private string targetVariable;


65  public string TargetVariable {


66  get { return targetVariable; }


67  }


68  [Storable]


69  private string[] allowedInputVariables;


70  public string[] AllowedInputVariables {


71  get { return allowedInputVariables; }


72  }


73 


74  [Storable]


75  private double[] alpha;


76  [Storable]


77  private double sqrSigmaNoise;


78  public double SigmaNoise {


79  get { return Math.Sqrt(sqrSigmaNoise); }


80  }


81 


82  [Storable]


83  private double[] meanParameter;


84  [Storable]


85  private double[] covarianceParameter;


86 


87  private double[,] l; // used to be storable in previous versions (is calculated lazily now)


88  private double[,] x; // scaled training dataset, used to be storable in previous versions (is calculated lazily now)


89 


90  // BackwardsCompatibility3.4


91  #region Backwards compatible code, remove with 3.5


92  [Storable(Name = "l")] // restore if available but don't store anymore


93  private double[,] l_storable {


94  set { this.l = value; }


95  get {


96  if (trainingDataset == null) return l; // this model has been created with an old version


97  else return null; // if the training dataset is available l should not be serialized


98  }


99  }


100  [Storable(Name = "x")] // restore if available but don't store anymore


101  private double[,] x_storable {


102  set { this.x = value; }


103  get {


104  if (trainingDataset == null) return x; // this model has been created with an old version


105  else return null; // if the training dataset is available x should not be serialized


106  }


107  }


108  #endregion


109 


110 


111  [Storable]


112  private IDataset trainingDataset; // it is better to store the original training dataset completely because this is more efficient in persistence


113  [Storable]


114  private int[] trainingRows;


115 


116  [Storable]


117  private Scaling inputScaling;


118 


119 


120  [StorableConstructor]


121  private GaussianProcessModel(bool deserializing) : base(deserializing) { }


122  private GaussianProcessModel(GaussianProcessModel original, Cloner cloner)


123  : base(original, cloner) {


124  this.meanFunction = cloner.Clone(original.meanFunction);


125  this.covarianceFunction = cloner.Clone(original.covarianceFunction);


126  if (original.inputScaling != null)


127  this.inputScaling = cloner.Clone(original.inputScaling);


128  this.trainingDataset = cloner.Clone(original.trainingDataset);


129  this.negativeLogLikelihood = original.negativeLogLikelihood;


130  this.targetVariable = original.targetVariable;


131  this.sqrSigmaNoise = original.sqrSigmaNoise;


132  if (original.meanParameter != null) {


133  this.meanParameter = (double[])original.meanParameter.Clone();


134  }


135  if (original.covarianceParameter != null) {


136  this.covarianceParameter = (double[])original.covarianceParameter.Clone();


137  }


138 


139  // shallow copies of arrays because they cannot be modified


140  this.trainingRows = original.trainingRows;


141  this.allowedInputVariables = original.allowedInputVariables;


142  this.alpha = original.alpha;


143  this.l = original.l;


144  this.x = original.x;


145  }


146  public GaussianProcessModel(IDataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,


147  IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction,


148  bool scaleInputs = true)


149  : base() {


150  this.name = ItemName;


151  this.description = ItemDescription;


152  this.meanFunction = (IMeanFunction)meanFunction.Clone();


153  this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone();


154  this.targetVariable = targetVariable;


155  this.allowedInputVariables = allowedInputVariables.ToArray();


156 


157 


158  int nVariables = this.allowedInputVariables.Length;


159  meanParameter = hyp


160  .Take(this.meanFunction.GetNumberOfParameters(nVariables))


161  .ToArray();


162 


163  covarianceParameter = hyp.Skip(this.meanFunction.GetNumberOfParameters(nVariables))


164  .Take(this.covarianceFunction.GetNumberOfParameters(nVariables))


165  .ToArray();


166  sqrSigmaNoise = Math.Exp(2.0 * hyp.Last());


167  try {


168  CalculateModel(ds, rows, scaleInputs);


169  }


170  catch (alglib.alglibexception ae) {


171  // wrap exception so that calling code doesn't have to know about alglib implementation


172  throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);


173  }


174  }


175 


176  private void CalculateModel(IDataset ds, IEnumerable<int> rows, bool scaleInputs = true) {


177  this.trainingDataset = (IDataset)ds.Clone();


178  this.trainingRows = rows.ToArray();


179  this.inputScaling = scaleInputs ? new Scaling(ds, allowedInputVariables, rows) : null;


180 


181  x = GetData(ds, this.allowedInputVariables, this.trainingRows, this.inputScaling);


182 


183  IEnumerable<double> y;


184  y = ds.GetDoubleValues(targetVariable, rows);


185 


186  int n = x.GetLength(0);


187 


188  var columns = Enumerable.Range(0, x.GetLength(1)).ToArray();


189  // calculate cholesky decomposed (lower triangular) covariance matrix


190  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);


191  this.l = CalculateL(x, cov, sqrSigmaNoise);


192 


193  // calculate mean


194  var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);


195  double[] m = Enumerable.Range(0, x.GetLength(0))


196  .Select(r => mean.Mean(x, r))


197  .ToArray();


198 


199  // calculate sum of diagonal elements for likelihood


200  double diagSum = Enumerable.Range(0, n).Select(i => Math.Log(l[i, i])).Sum();


201 


202  // solve for alpha


203  double[] ym = y.Zip(m, (a, b) => a  b).ToArray();


204 


205  int info;


206  alglib.densesolverreport denseSolveRep;


207 


208  alglib.spdmatrixcholeskysolve(l, n, false, ym, out info, out denseSolveRep, out alpha);


209  for (int i = 0; i < alpha.Length; i++)


210  alpha[i] = alpha[i] / sqrSigmaNoise;


211  negativeLogLikelihood = 0.5 * Util.ScalarProd(ym, alpha) + diagSum + (n / 2.0) * Math.Log(2.0 * Math.PI * sqrSigmaNoise);


212 


213  // derivatives


214  int nAllowedVariables = x.GetLength(1);


215 


216  alglib.matinvreport matInvRep;


217  double[,] lCopy = new double[l.GetLength(0), l.GetLength(1)];


218  Array.Copy(l, lCopy, lCopy.Length);


219 


220  alglib.spdmatrixcholeskyinverse(ref lCopy, n, false, out info, out matInvRep);


221  if (info != 1) throw new ArgumentException("Can't invert matrix to calculate gradients.");


222  for (int i = 0; i < n; i++) {


223  for (int j = 0; j <= i; j++)


224  lCopy[i, j] = lCopy[i, j] / sqrSigmaNoise  alpha[i] * alpha[j];


225  }


226 


227  double noiseGradient = sqrSigmaNoise * Enumerable.Range(0, n).Select(i => lCopy[i, i]).Sum();


228 


229  double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)];


230  for (int k = 0; k < meanGradients.Length; k++) {


231  var meanGrad = new double[alpha.Length];


232  for (int g = 0; g < meanGrad.Length; g++)


233  meanGrad[g] = mean.Gradient(x, g, k);


234  meanGradients[k] = Util.ScalarProd(meanGrad, alpha);


235  }


236 


237  double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)];


238  if (covGradients.Length > 0) {


239  for (int i = 0; i < n; i++) {


240  for (int j = 0; j < i; j++) {


241  var g = cov.CovarianceGradient(x, i, j);


242  for (int k = 0; k < covGradients.Length; k++) {


243  covGradients[k] += lCopy[i, j] * g[k];


244  }


245  }


246 


247  var gDiag = cov.CovarianceGradient(x, i, i);


248  for (int k = 0; k < covGradients.Length; k++) {


249  // diag


250  covGradients[k] += 0.5 * lCopy[i, i] * gDiag[k];


251  }


252  }


253  }


254 


255  hyperparameterGradients =


256  meanGradients


257  .Concat(covGradients)


258  .Concat(new double[] { noiseGradient }).ToArray();


259 


260  }


261 


262  private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) {


263  if (scaling != null) {


264  return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputs, rows, scaling);


265  } else {


266  return AlglibUtil.PrepareInputMatrix(ds, allowedInputs, rows);


267  }


268  }


269 


270  private static double[,] CalculateL(double[,] x, ParameterizedCovarianceFunction cov, double sqrSigmaNoise) {


271  int n = x.GetLength(0);


272  var l = new double[n, n];


273 


274  // calculate covariances


275  for (int i = 0; i < n; i++) {


276  for (int j = i; j < n; j++) {


277  l[j, i] = cov.Covariance(x, i, j) / sqrSigmaNoise;


278  if (j == i) l[j, i] += 1.0;


279  }


280  }


281 


282  // cholesky decomposition


283  var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);


284  if (!res) throw new ArgumentException("Matrix is not positive semidefinite");


285  return l;


286  }


287 


288 


289  public override IDeepCloneable Clone(Cloner cloner) {


290  return new GaussianProcessModel(this, cloner);


291  }


292 


293  // is called by the solution creator to set all parameter values of the covariance and mean function


294  // to the optimized values (necessary to make the values visible in the GUI)


295  public void FixParameters() {


296  covarianceFunction.SetParameter(covarianceParameter);


297  meanFunction.SetParameter(meanParameter);


298  covarianceParameter = new double[0];


299  meanParameter = new double[0];


300  }


301 


302  #region IRegressionModel Members


303  public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {


304  return GetEstimatedValuesHelper(dataset, rows);


305  }


306  public GaussianProcessRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {


307  return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData));


308  }


309  IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {


310  return CreateRegressionSolution(problemData);


311  }


312  #endregion


313 


314 


315  private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) {


316  try {


317  if (x == null) {


318  x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);


319  }


320  int n = x.GetLength(0);


321 


322  double[,] newX = GetData(dataset, allowedInputVariables, rows, inputScaling);


323  int newN = newX.GetLength(0);


324 


325  var Ks = new double[newN][];


326  var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();


327  var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);


328  var ms = Enumerable.Range(0, newX.GetLength(0))


329  .Select(r => mean.Mean(newX, r))


330  .ToArray();


331  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);


332  for (int i = 0; i < newN; i++) {


333  Ks[i] = new double[n];


334  for (int j = 0; j < n; j++) {


335  Ks[i][j] = cov.CrossCovariance(x, newX, j, i);


336  }


337  }


338 


339  return Enumerable.Range(0, newN)


340  .Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha));


341  }


342  catch (alglib.alglibexception ae) {


343  // wrap exception so that calling code doesn't have to know about alglib implementation


344  throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);


345  }


346  }


347 


348  public IEnumerable<double> GetEstimatedVariance(IDataset dataset, IEnumerable<int> rows) {


349  try {


350  if (x == null) {


351  x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);


352  }


353  int n = x.GetLength(0);


354 


355  var newX = GetData(dataset, allowedInputVariables, rows, inputScaling);


356  int newN = newX.GetLength(0);


357 


358  var kss = new double[newN];


359  double[,] sWKs = new double[n, newN];


360  var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();


361  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);


362 


363  if (l == null) {


364  l = CalculateL(x, cov, sqrSigmaNoise);


365  }


366 


367  // for stddev


368  for (int i = 0; i < newN; i++)


369  kss[i] = cov.Covariance(newX, i, i);


370 


371  for (int i = 0; i < newN; i++) {


372  for (int j = 0; j < n; j++) {


373  sWKs[j, i] = cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise);


374  }


375  }


376 


377  // for stddev


378  alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0);


379 


380  for (int i = 0; i < newN; i++) {


381  var col = Util.GetCol(sWKs, i).ToArray();


382  var sumV = Util.ScalarProd(col, col);


383  kss[i] += sqrSigmaNoise; // kss is V(f), add noise variance of predictive distibution to get V(y)


384  kss[i] = sumV;


385  if (kss[i] < 0) kss[i] = 0;


386  }


387  return kss;


388  }


389  catch (alglib.alglibexception ae) {


390  // wrap exception so that calling code doesn't have to know about alglib implementation


391  throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);


392  }


393  }


394  }


395  }

