1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Linq;


25  using HeuristicLab.Common;


26  using HeuristicLab.Core;


27  using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;


28  using HeuristicLab.Problems.DataAnalysis;


29 


30  namespace HeuristicLab.Algorithms.DataAnalysis {


31  /// <summary>


32  /// Represents a Gaussian process model.


33  /// </summary>


34  [StorableClass]


35  [Item("GaussianProcessModel", "Represents a Gaussian process posterior.")]


36  public sealed class GaussianProcessModel : NamedItem, IGaussianProcessModel {


37  public IEnumerable<string> VariablesUsedForPrediction { get; }


38 


39  [Storable]


40  private double negativeLogLikelihood;


41  public double NegativeLogLikelihood {


42  get { return negativeLogLikelihood; }


43  }


44 


45  [Storable]


46  private double[] hyperparameterGradients;


47  public double[] HyperparameterGradients {


48  get {


49  var copy = new double[hyperparameterGradients.Length];


50  Array.Copy(hyperparameterGradients, copy, copy.Length);


51  return copy;


52  }


53  }


54 


55  [Storable]


56  private ICovarianceFunction covarianceFunction;


57  public ICovarianceFunction CovarianceFunction {


58  get { return covarianceFunction; }


59  }


60  [Storable]


61  private IMeanFunction meanFunction;


62  public IMeanFunction MeanFunction {


63  get { return meanFunction; }


64  }


65  [Storable]


66  private string targetVariable;


67  public string TargetVariable {


68  get { return targetVariable; }


69  }


70  [Storable]


71  private string[] allowedInputVariables;


72  public string[] AllowedInputVariables {


73  get { return allowedInputVariables; }


74  }


75 


76  [Storable]


77  private double[] alpha;


78  [Storable]


79  private double sqrSigmaNoise;


80  public double SigmaNoise {


81  get { return Math.Sqrt(sqrSigmaNoise); }


82  }


83 


84  [Storable]


85  private double[] meanParameter;


86  [Storable]


87  private double[] covarianceParameter;


88 


89  private double[,] l; // used to be storable in previous versions (is calculated lazily now)


90  private double[,] x; // scaled training dataset, used to be storable in previous versions (is calculated lazily now)


91 


92  // BackwardsCompatibility3.4


93  #region Backwards compatible code, remove with 3.5


94  [Storable(Name = "l")] // restore if available but don't store anymore


95  private double[,] l_storable {


96  set { this.l = value; }


97  get {


98  if (trainingDataset == null) return l; // this model has been created with an old version


99  else return null; // if the training dataset is available l should not be serialized


100  }


101  }


102  [Storable(Name = "x")] // restore if available but don't store anymore


103  private double[,] x_storable {


104  set { this.x = value; }


105  get {


106  if (trainingDataset == null) return x; // this model has been created with an old version


107  else return null; // if the training dataset is available x should not be serialized


108  }


109  }


110  #endregion


111 


112 


113  [Storable]


114  private IDataset trainingDataset; // it is better to store the original training dataset completely because this is more efficient in persistence


115  [Storable]


116  private int[] trainingRows;


117 


118  [Storable]


119  private Scaling inputScaling;


120 


121 


122  [StorableConstructor]


123  private GaussianProcessModel(bool deserializing) : base(deserializing) { }


124  private GaussianProcessModel(GaussianProcessModel original, Cloner cloner)


125  : base(original, cloner) {


126  this.meanFunction = cloner.Clone(original.meanFunction);


127  this.covarianceFunction = cloner.Clone(original.covarianceFunction);


128  if (original.inputScaling != null)


129  this.inputScaling = cloner.Clone(original.inputScaling);


130  this.trainingDataset = cloner.Clone(original.trainingDataset);


131  this.negativeLogLikelihood = original.negativeLogLikelihood;


132  this.targetVariable = original.targetVariable;


133  this.sqrSigmaNoise = original.sqrSigmaNoise;


134  if (original.meanParameter != null) {


135  this.meanParameter = (double[])original.meanParameter.Clone();


136  }


137  if (original.covarianceParameter != null) {


138  this.covarianceParameter = (double[])original.covarianceParameter.Clone();


139  }


140 


141  // shallow copies of arrays because they cannot be modified


142  this.trainingRows = original.trainingRows;


143  this.allowedInputVariables = original.allowedInputVariables;


144  this.alpha = original.alpha;


145  this.l = original.l;


146  this.x = original.x;


147  }


148  public GaussianProcessModel(IDataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,


149  IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction,


150  bool scaleInputs = true)


151  : base() {


152  this.name = ItemName;


153  this.description = ItemDescription;


154  this.meanFunction = (IMeanFunction)meanFunction.Clone();


155  this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone();


156  this.targetVariable = targetVariable;


157  this.allowedInputVariables = allowedInputVariables.ToArray();


158 


159 


160  int nVariables = this.allowedInputVariables.Length;


161  meanParameter = hyp


162  .Take(this.meanFunction.GetNumberOfParameters(nVariables))


163  .ToArray();


164 


165  covarianceParameter = hyp.Skip(this.meanFunction.GetNumberOfParameters(nVariables))


166  .Take(this.covarianceFunction.GetNumberOfParameters(nVariables))


167  .ToArray();


168  sqrSigmaNoise = Math.Exp(2.0 * hyp.Last());


169  try {


170  CalculateModel(ds, rows, scaleInputs);


171  }


172  catch (alglib.alglibexception ae) {


173  // wrap exception so that calling code doesn't have to know about alglib implementation


174  throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);


175  }


176  }


177 


178  private void CalculateModel(IDataset ds, IEnumerable<int> rows, bool scaleInputs = true) {


179  this.trainingDataset = (IDataset)ds.Clone();


180  this.trainingRows = rows.ToArray();


181  this.inputScaling = scaleInputs ? new Scaling(ds, allowedInputVariables, rows) : null;


182 


183  x = GetData(ds, this.allowedInputVariables, this.trainingRows, this.inputScaling);


184 


185  IEnumerable<double> y;


186  y = ds.GetDoubleValues(targetVariable, rows);


187 


188  int n = x.GetLength(0);


189 


190  var columns = Enumerable.Range(0, x.GetLength(1)).ToArray();


191  // calculate cholesky decomposed (lower triangular) covariance matrix


192  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);


193  this.l = CalculateL(x, cov, sqrSigmaNoise);


194 


195  // calculate mean


196  var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);


197  double[] m = Enumerable.Range(0, x.GetLength(0))


198  .Select(r => mean.Mean(x, r))


199  .ToArray();


200 


201  // calculate sum of diagonal elements for likelihood


202  double diagSum = Enumerable.Range(0, n).Select(i => Math.Log(l[i, i])).Sum();


203 


204  // solve for alpha


205  double[] ym = y.Zip(m, (a, b) => a  b).ToArray();


206 


207  int info;


208  alglib.densesolverreport denseSolveRep;


209 


210  alglib.spdmatrixcholeskysolve(l, n, false, ym, out info, out denseSolveRep, out alpha);


211  for (int i = 0; i < alpha.Length; i++)


212  alpha[i] = alpha[i] / sqrSigmaNoise;


213  negativeLogLikelihood = 0.5 * Util.ScalarProd(ym, alpha) + diagSum + (n / 2.0) * Math.Log(2.0 * Math.PI * sqrSigmaNoise);


214 


215  // derivatives


216  int nAllowedVariables = x.GetLength(1);


217 


218  alglib.matinvreport matInvRep;


219  double[,] lCopy = new double[l.GetLength(0), l.GetLength(1)];


220  Array.Copy(l, lCopy, lCopy.Length);


221 


222  alglib.spdmatrixcholeskyinverse(ref lCopy, n, false, out info, out matInvRep);


223  if (info != 1) throw new ArgumentException("Can't invert matrix to calculate gradients.");


224  for (int i = 0; i < n; i++) {


225  for (int j = 0; j <= i; j++)


226  lCopy[i, j] = lCopy[i, j] / sqrSigmaNoise  alpha[i] * alpha[j];


227  }


228 


229  double noiseGradient = sqrSigmaNoise * Enumerable.Range(0, n).Select(i => lCopy[i, i]).Sum();


230 


231  double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)];


232  for (int k = 0; k < meanGradients.Length; k++) {


233  var meanGrad = new double[alpha.Length];


234  for (int g = 0; g < meanGrad.Length; g++)


235  meanGrad[g] = mean.Gradient(x, g, k);


236  meanGradients[k] = Util.ScalarProd(meanGrad, alpha);


237  }


238 


239  double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)];


240  if (covGradients.Length > 0) {


241  for (int i = 0; i < n; i++) {


242  for (int j = 0; j < i; j++) {


243  var g = cov.CovarianceGradient(x, i, j);


244  for (int k = 0; k < covGradients.Length; k++) {


245  covGradients[k] += lCopy[i, j] * g[k];


246  }


247  }


248 


249  var gDiag = cov.CovarianceGradient(x, i, i);


250  for (int k = 0; k < covGradients.Length; k++) {


251  // diag


252  covGradients[k] += 0.5 * lCopy[i, i] * gDiag[k];


253  }


254  }


255  }


256 


257  hyperparameterGradients =


258  meanGradients


259  .Concat(covGradients)


260  .Concat(new double[] { noiseGradient }).ToArray();


261 


262  }


263 


264  private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) {


265  if (scaling != null) {


266  return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputs, rows, scaling);


267  } else {


268  return AlglibUtil.PrepareInputMatrix(ds, allowedInputs, rows);


269  }


270  }


271 


272  private static double[,] CalculateL(double[,] x, ParameterizedCovarianceFunction cov, double sqrSigmaNoise) {


273  int n = x.GetLength(0);


274  var l = new double[n, n];


275 


276  // calculate covariances


277  for (int i = 0; i < n; i++) {


278  for (int j = i; j < n; j++) {


279  l[j, i] = cov.Covariance(x, i, j) / sqrSigmaNoise;


280  if (j == i) l[j, i] += 1.0;


281  }


282  }


283 


284  // cholesky decomposition


285  var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);


286  if (!res) throw new ArgumentException("Matrix is not positive semidefinite");


287  return l;


288  }


289 


290 


291  public override IDeepCloneable Clone(Cloner cloner) {


292  return new GaussianProcessModel(this, cloner);


293  }


294 


295  // is called by the solution creator to set all parameter values of the covariance and mean function


296  // to the optimized values (necessary to make the values visible in the GUI)


297  public void FixParameters() {


298  covarianceFunction.SetParameter(covarianceParameter);


299  meanFunction.SetParameter(meanParameter);


300  covarianceParameter = new double[0];


301  meanParameter = new double[0];


302  }


303 


304  #region IRegressionModel Members


305  public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {


306  return GetEstimatedValuesHelper(dataset, rows);


307  }


308  public GaussianProcessRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {


309  return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData));


310  }


311  IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {


312  return CreateRegressionSolution(problemData);


313  }


314  #endregion


315 


316 


317  private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) {


318  try {


319  if (x == null) {


320  x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);


321  }


322  int n = x.GetLength(0);


323 


324  double[,] newX = GetData(dataset, allowedInputVariables, rows, inputScaling);


325  int newN = newX.GetLength(0);


326 


327  var Ks = new double[newN][];


328  var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();


329  var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);


330  var ms = Enumerable.Range(0, newX.GetLength(0))


331  .Select(r => mean.Mean(newX, r))


332  .ToArray();


333  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);


334  for (int i = 0; i < newN; i++) {


335  Ks[i] = new double[n];


336  for (int j = 0; j < n; j++) {


337  Ks[i][j] = cov.CrossCovariance(x, newX, j, i);


338  }


339  }


340 


341  return Enumerable.Range(0, newN)


342  .Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha));


343  }


344  catch (alglib.alglibexception ae) {


345  // wrap exception so that calling code doesn't have to know about alglib implementation


346  throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);


347  }


348  }


349 


350  public IEnumerable<double> GetEstimatedVariance(IDataset dataset, IEnumerable<int> rows) {


351  try {


352  if (x == null) {


353  x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);


354  }


355  int n = x.GetLength(0);


356 


357  var newX = GetData(dataset, allowedInputVariables, rows, inputScaling);


358  int newN = newX.GetLength(0);


359 


360  var kss = new double[newN];


361  double[,] sWKs = new double[n, newN];


362  var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();


363  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);


364 


365  if (l == null) {


366  l = CalculateL(x, cov, sqrSigmaNoise);


367  }


368 


369  // for stddev


370  for (int i = 0; i < newN; i++)


371  kss[i] = cov.Covariance(newX, i, i);


372 


373  for (int i = 0; i < newN; i++) {


374  for (int j = 0; j < n; j++) {


375  sWKs[j, i] = cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise);


376  }


377  }


378 


379  // for stddev


380  alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0);


381 


382  for (int i = 0; i < newN; i++) {


383  var col = Util.GetCol(sWKs, i).ToArray();


384  var sumV = Util.ScalarProd(col, col);


385  kss[i] += sqrSigmaNoise; // kss is V(f), add noise variance of predictive distibution to get V(y)


386  kss[i] = sumV;


387  if (kss[i] < 0) kss[i] = 0;


388  }


389  return kss;


390  }


391  catch (alglib.alglibexception ae) {


392  // wrap exception so that calling code doesn't have to know about alglib implementation


393  throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);


394  }


395  }


396 


397  }


398  }

