1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Linq;


25  using HeuristicLab.Common;


26  using HeuristicLab.Core;


27  using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;


28  using HeuristicLab.Problems.DataAnalysis;


29 


30  namespace HeuristicLab.Algorithms.DataAnalysis {


31  /// <summary>


32  /// Represents a Gaussian process model.


33  /// </summary>


34  [StorableClass]


35  [Item("GaussianProcessModel", "Represents a Gaussian process posterior.")]


36  public sealed class GaussianProcessModel : RegressionModel, IGaussianProcessModel {


37  public override IEnumerable<string> VariablesUsedForPrediction {


38  get { return allowedInputVariables; }


39  }


40 


41  [Storable]


42  private double negativeLogLikelihood;


43  public double NegativeLogLikelihood {


44  get { return negativeLogLikelihood; }


45  }


46 


47  [Storable]


48  private double[] hyperparameterGradients;


49  public double[] HyperparameterGradients {


50  get {


51  var copy = new double[hyperparameterGradients.Length];


52  Array.Copy(hyperparameterGradients, copy, copy.Length);


53  return copy;


54  }


55  }


56 


57  [Storable]


58  private ICovarianceFunction covarianceFunction;


59  public ICovarianceFunction CovarianceFunction {


60  get { return covarianceFunction; }


61  }


62  [Storable]


63  private IMeanFunction meanFunction;


64  public IMeanFunction MeanFunction {


65  get { return meanFunction; }


66  }


67 


68  [Storable]


69  private string[] allowedInputVariables;


70  public string[] AllowedInputVariables {


71  get { return allowedInputVariables; }


72  }


73 


74  [Storable]


75  private double[] alpha;


76  [Storable]


77  private double sqrSigmaNoise;


78  public double SigmaNoise {


79  get { return Math.Sqrt(sqrSigmaNoise); }


80  }


81 


82  [Storable]


83  private double[] meanParameter;


84  [Storable]


85  private double[] covarianceParameter;


86 


87  private double[,] l; // used to be storable in previous versions (is calculated lazily now)


88  private double[,] x; // scaled training dataset, used to be storable in previous versions (is calculated lazily now)


89 


90  // BackwardsCompatibility3.4


91  #region Backwards compatible code, remove with 3.5


92  [Storable(Name = "l")] // restore if available but don't store anymore


93  private double[,] l_storable {


94  set { this.l = value; }


95  get {


96  if (trainingDataset == null) return l; // this model has been created with an old version


97  else return null; // if the training dataset is available l should not be serialized


98  }


99  }


100  [Storable(Name = "x")] // restore if available but don't store anymore


101  private double[,] x_storable {


102  set { this.x = value; }


103  get {


104  if (trainingDataset == null) return x; // this model has been created with an old version


105  else return null; // if the training dataset is available x should not be serialized


106  }


107  }


108  #endregion


109 


110 


111  [Storable]


112  private IDataset trainingDataset; // it is better to store the original training dataset completely because this is more efficient in persistence


113  [Storable]


114  private int[] trainingRows;


115 


116  [Storable]


117  private Scaling inputScaling;


118 


119 


120  [StorableConstructor]


121  private GaussianProcessModel(bool deserializing) : base(deserializing) { }


122  private GaussianProcessModel(GaussianProcessModel original, Cloner cloner)


123  : base(original, cloner) {


124  this.meanFunction = cloner.Clone(original.meanFunction);


125  this.covarianceFunction = cloner.Clone(original.covarianceFunction);


126  if (original.inputScaling != null)


127  this.inputScaling = cloner.Clone(original.inputScaling);


128  this.trainingDataset = cloner.Clone(original.trainingDataset);


129  this.negativeLogLikelihood = original.negativeLogLikelihood;


130  this.sqrSigmaNoise = original.sqrSigmaNoise;


131  if (original.meanParameter != null) {


132  this.meanParameter = (double[])original.meanParameter.Clone();


133  }


134  if (original.covarianceParameter != null) {


135  this.covarianceParameter = (double[])original.covarianceParameter.Clone();


136  }


137 


138  // shallow copies of arrays because they cannot be modified


139  this.trainingRows = original.trainingRows;


140  this.allowedInputVariables = original.allowedInputVariables;


141  this.alpha = original.alpha;


142  this.l = original.l;


143  this.x = original.x;


144  }


145  public GaussianProcessModel(IDataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,


146  IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction,


147  bool scaleInputs = true)


148  : base(targetVariable) {


149  this.name = ItemName;


150  this.description = ItemDescription;


151  this.meanFunction = (IMeanFunction)meanFunction.Clone();


152  this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone();


153  this.allowedInputVariables = allowedInputVariables.ToArray();


154 


155 


156  int nVariables = this.allowedInputVariables.Length;


157  meanParameter = hyp


158  .Take(this.meanFunction.GetNumberOfParameters(nVariables))


159  .ToArray();


160 


161  covarianceParameter = hyp.Skip(this.meanFunction.GetNumberOfParameters(nVariables))


162  .Take(this.covarianceFunction.GetNumberOfParameters(nVariables))


163  .ToArray();


164  sqrSigmaNoise = Math.Exp(2.0 * hyp.Last());


165  try {


166  CalculateModel(ds, rows, scaleInputs);


167  } catch (alglib.alglibexception ae) {


168  // wrap exception so that calling code doesn't have to know about alglib implementation


169  throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);


170  }


171  }


172 


173  private void CalculateModel(IDataset ds, IEnumerable<int> rows, bool scaleInputs = true) {


174  this.trainingDataset = (IDataset)ds.Clone();


175  this.trainingRows = rows.ToArray();


176  this.inputScaling = scaleInputs ? new Scaling(ds, allowedInputVariables, rows) : null;


177 


178  x = GetData(ds, this.allowedInputVariables, this.trainingRows, this.inputScaling);


179 


180  IEnumerable<double> y;


181  y = ds.GetDoubleValues(TargetVariable, rows);


182 


183  int n = x.GetLength(0);


184 


185  var columns = Enumerable.Range(0, x.GetLength(1)).ToArray();


186  // calculate cholesky decomposed (lower triangular) covariance matrix


187  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);


188  this.l = CalculateL(x, cov, sqrSigmaNoise);


189 


190  // calculate mean


191  var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);


192  double[] m = Enumerable.Range(0, x.GetLength(0))


193  .Select(r => mean.Mean(x, r))


194  .ToArray();


195 


196  // calculate sum of diagonal elements for likelihood


197  double diagSum = Enumerable.Range(0, n).Select(i => Math.Log(l[i, i])).Sum();


198 


199  // solve for alpha


200  double[] ym = y.Zip(m, (a, b) => a  b).ToArray();


201 


202  int info;


203  alglib.densesolverreport denseSolveRep;


204 


205  alglib.spdmatrixcholeskysolve(l, n, false, ym, out info, out denseSolveRep, out alpha);


206  for (int i = 0; i < alpha.Length; i++)


207  alpha[i] = alpha[i] / sqrSigmaNoise;


208  negativeLogLikelihood = 0.5 * Util.ScalarProd(ym, alpha) + diagSum + (n / 2.0) * Math.Log(2.0 * Math.PI * sqrSigmaNoise);


209 


210  // derivatives


211  int nAllowedVariables = x.GetLength(1);


212 


213  alglib.matinvreport matInvRep;


214  double[,] lCopy = new double[l.GetLength(0), l.GetLength(1)];


215  Array.Copy(l, lCopy, lCopy.Length);


216 


217  alglib.spdmatrixcholeskyinverse(ref lCopy, n, false, out info, out matInvRep);


218  if (info != 1) throw new ArgumentException("Can't invert matrix to calculate gradients.");


219  for (int i = 0; i < n; i++) {


220  for (int j = 0; j <= i; j++)


221  lCopy[i, j] = lCopy[i, j] / sqrSigmaNoise  alpha[i] * alpha[j];


222  }


223 


224  double noiseGradient = sqrSigmaNoise * Enumerable.Range(0, n).Select(i => lCopy[i, i]).Sum();


225 


226  double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)];


227  for (int k = 0; k < meanGradients.Length; k++) {


228  var meanGrad = new double[alpha.Length];


229  for (int g = 0; g < meanGrad.Length; g++)


230  meanGrad[g] = mean.Gradient(x, g, k);


231  meanGradients[k] = Util.ScalarProd(meanGrad, alpha);


232  }


233 


234  double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)];


235  if (covGradients.Length > 0) {


236  for (int i = 0; i < n; i++) {


237  for (int j = 0; j < i; j++) {


238  var g = cov.CovarianceGradient(x, i, j);


239  for (int k = 0; k < covGradients.Length; k++) {


240  covGradients[k] += lCopy[i, j] * g[k];


241  }


242  }


243 


244  var gDiag = cov.CovarianceGradient(x, i, i);


245  for (int k = 0; k < covGradients.Length; k++) {


246  // diag


247  covGradients[k] += 0.5 * lCopy[i, i] * gDiag[k];


248  }


249  }


250  }


251 


252  hyperparameterGradients =


253  meanGradients


254  .Concat(covGradients)


255  .Concat(new double[] { noiseGradient }).ToArray();


256 


257  }


258 


259  private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) {


260  if (scaling != null) {


261  // BackwardsCompatibility3.3


262  #region Backwards compatible code, remove with 3.4


263  // TODO: completely remove Scaling class


264  List<string> variablesList = allowedInputs.ToList();


265  List<int> rowsList = rows.ToList();


266 


267  double[,] matrix = new double[rowsList.Count, variablesList.Count];


268 


269  int col = 0;


270  foreach (string column in variablesList) {


271  var values = scaling.GetScaledValues(ds, column, rowsList);


272  int row = 0;


273  foreach (var value in values) {


274  matrix[row, col] = value;


275  row++;


276  }


277  col++;


278  }


279  return matrix;


280  #endregion


281  } else {


282  return ds.ToArray(allowedInputs, rows);


283  }


284  }


285 


286  private static double[,] CalculateL(double[,] x, ParameterizedCovarianceFunction cov, double sqrSigmaNoise) {


287  int n = x.GetLength(0);


288  var l = new double[n, n];


289 


290  // calculate covariances


291  for (int i = 0; i < n; i++) {


292  for (int j = i; j < n; j++) {


293  l[j, i] = cov.Covariance(x, i, j) / sqrSigmaNoise;


294  if (j == i) l[j, i] += 1.0;


295  }


296  }


297 


298  // cholesky decomposition


299  var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);


300  if (!res) throw new ArgumentException("Matrix is not positive semidefinite");


301  return l;


302  }


303 


304 


305  public override IDeepCloneable Clone(Cloner cloner) {


306  return new GaussianProcessModel(this, cloner);


307  }


308 


309  // is called by the solution creator to set all parameter values of the covariance and mean function


310  // to the optimized values (necessary to make the values visible in the GUI)


311  public void FixParameters() {


312  covarianceFunction.SetParameter(covarianceParameter);


313  meanFunction.SetParameter(meanParameter);


314  covarianceParameter = new double[0];


315  meanParameter = new double[0];


316  }


317 


318  #region IRegressionModel Members


319  public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {


320  return GetEstimatedValuesHelper(dataset, rows);


321  }


322  public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {


323  return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData));


324  }


325  #endregion


326 


327 


328  private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) {


329  try {


330  if (x == null) {


331  x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);


332  }


333  int n = x.GetLength(0);


334 


335  double[,] newX = GetData(dataset, allowedInputVariables, rows, inputScaling);


336  int newN = newX.GetLength(0);


337 


338  var Ks = new double[newN][];


339  var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();


340  var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);


341  var ms = Enumerable.Range(0, newX.GetLength(0))


342  .Select(r => mean.Mean(newX, r))


343  .ToArray();


344  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);


345  for (int i = 0; i < newN; i++) {


346  Ks[i] = new double[n];


347  for (int j = 0; j < n; j++) {


348  Ks[i][j] = cov.CrossCovariance(x, newX, j, i);


349  }


350  }


351 


352  return Enumerable.Range(0, newN)


353  .Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha));


354  } catch (alglib.alglibexception ae) {


355  // wrap exception so that calling code doesn't have to know about alglib implementation


356  throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);


357  }


358  }


359 


360  public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {


361  try {


362  if (x == null) {


363  x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);


364  }


365  int n = x.GetLength(0);


366 


367  var newX = GetData(dataset, allowedInputVariables, rows, inputScaling);


368  int newN = newX.GetLength(0);


369 


370  var kss = new double[newN];


371  double[,] sWKs = new double[n, newN];


372  var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();


373  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);


374 


375  if (l == null) {


376  l = CalculateL(x, cov, sqrSigmaNoise);


377  }


378 


379  // for stddev


380  for (int i = 0; i < newN; i++)


381  kss[i] = cov.Covariance(newX, i, i);


382 


383  for (int i = 0; i < newN; i++) {


384  for (int j = 0; j < n; j++) {


385  sWKs[j, i] = cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise);


386  }


387  }


388 


389  // for stddev


390  alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0);


391 


392  for (int i = 0; i < newN; i++) {


393  var col = Util.GetCol(sWKs, i).ToArray();


394  var sumV = Util.ScalarProd(col, col);


395  kss[i] += sqrSigmaNoise; // kss is V(f), add noise variance of predictive distibution to get V(y)


396  kss[i] = sumV;


397  if (kss[i] < 0) kss[i] = 0;


398  }


399  return kss;


400  } catch (alglib.alglibexception ae) {


401  // wrap exception so that calling code doesn't have to know about alglib implementation


402  throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);


403  }


404  }


405 


406  }


407  }

