1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Linq;


25  using HeuristicLab.Common;


26  using HeuristicLab.Core;


27  using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;


28  using HeuristicLab.Problems.DataAnalysis;


29 


30  namespace HeuristicLab.Algorithms.DataAnalysis {


31  /// <summary>


32  /// Represents a Gaussian process model.


33  /// </summary>


34  [StorableClass]


35  [Item("GaussianProcessModel", "Represents a Gaussian process posterior.")]


36  public sealed class GaussianProcessModel : NamedItem, IGaussianProcessModel {


37  [Storable]


38  private double negativeLogLikelihood;


39  public double NegativeLogLikelihood {


40  get { return negativeLogLikelihood; }


41  }


42 


43  [Storable]


44  private double[] hyperparameterGradients;


45  public double[] HyperparameterGradients {


46  get {


47  var copy = new double[hyperparameterGradients.Length];


48  Array.Copy(hyperparameterGradients, copy, copy.Length);


49  return copy;


50  }


51  }


52 


53  [Storable]


54  private ICovarianceFunction covarianceFunction;


55  public ICovarianceFunction CovarianceFunction {


56  get { return covarianceFunction; }


57  }


58  [Storable]


59  private IMeanFunction meanFunction;


60  public IMeanFunction MeanFunction {


61  get { return meanFunction; }


62  }


63  [Storable]


64  private string targetVariable;


65  public string TargetVariable {


66  get { return targetVariable; }


67  }


68  [Storable]


69  private string[] allowedInputVariables;


70  public string[] AllowedInputVariables {


71  get { return allowedInputVariables; }


72  }


73 


74  [Storable]


75  private double[] alpha;


76  [Storable]


77  private double sqrSigmaNoise;


78  public double SigmaNoise {


79  get { return Math.Sqrt(sqrSigmaNoise); }


80  }


81 


82  [Storable]


83  private double[] meanParameter;


84  [Storable]


85  private double[] covarianceParameter;


86 


87  private double[,] l; // used to be storable in previous versions (is calculated lazily now)


88  private double[,] x; // scaled training dataset, used to be storable in previous versions (is calculated lazily now)


89 


90  // BackwardsCompatibility3.4


91  #region Backwards compatible code, remove with 3.5


92  [Storable(Name = "l")] // restore if available but don't store anymore


93  private double[,] l_storable {


94  set { this.l = value; }


95  get {


96  if (trainingDataset == null) return l; // this model has been created with an old version


97  else return null; // if the training dataset is available l should not be serialized


98  }


99  }


100  [Storable(Name = "x")] // restore if available but don't store anymore


101  private double[,] x_storable {


102  set { this.x = value; }


103  get {


104  if (trainingDataset == null) return x; // this model has been created with an old version


105  else return null; // if the training dataset is available x should not be serialized


106  }


107  }


108  #endregion


109 


110 


111  [Storable]


112  private IDataset trainingDataset; // it is better to store the original training dataset completely because this is more efficient in persistence


113  [Storable]


114  private int[] trainingRows;


115 


116  [Storable]


117  private Scaling inputScaling;


118 


119 


120  [StorableConstructor]


121  private GaussianProcessModel(bool deserializing) : base(deserializing) { }


122  private GaussianProcessModel(GaussianProcessModel original, Cloner cloner)


123  : base(original, cloner) {


124  this.meanFunction = cloner.Clone(original.meanFunction);


125  this.covarianceFunction = cloner.Clone(original.covarianceFunction);


126  if (original.inputScaling != null)


127  this.inputScaling = cloner.Clone(original.inputScaling);


128  this.trainingDataset = cloner.Clone(original.trainingDataset);


129  this.negativeLogLikelihood = original.negativeLogLikelihood;


130  this.targetVariable = original.targetVariable;


131  this.sqrSigmaNoise = original.sqrSigmaNoise;


132  if (original.meanParameter != null) {


133  this.meanParameter = (double[])original.meanParameter.Clone();


134  }


135  if (original.covarianceParameter != null) {


136  this.covarianceParameter = (double[])original.covarianceParameter.Clone();


137  }


138 


139  // shallow copies of arrays because they cannot be modified


140  this.trainingRows = original.trainingRows;


141  this.allowedInputVariables = original.allowedInputVariables;


142  this.alpha = original.alpha;


143  this.l = original.l;


144  this.x = original.x;


145  }


146  public GaussianProcessModel(IDataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,


147  IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction,


148  bool scaleInputs = true)


149  : base() {


150  this.name = ItemName;


151  this.description = ItemDescription;


152  this.meanFunction = (IMeanFunction)meanFunction.Clone();


153  this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone();


154  this.targetVariable = targetVariable;


155  this.allowedInputVariables = allowedInputVariables.ToArray();


156 


157 


158  int nVariables = this.allowedInputVariables.Length;


159  meanParameter = hyp


160  .Take(this.meanFunction.GetNumberOfParameters(nVariables))


161  .ToArray();


162 


163  covarianceParameter = hyp.Skip(this.meanFunction.GetNumberOfParameters(nVariables))


164  .Take(this.covarianceFunction.GetNumberOfParameters(nVariables))


165  .ToArray();


166  sqrSigmaNoise = Math.Exp(2.0 * hyp.Last());


167  try {


168  CalculateModel(ds, rows, scaleInputs);


169  } catch (alglib.alglibexception ae) {


170  // wrap exception so that calling code doesn't have to know about alglib implementation


171  throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);


172  }


173  }


174 


175  private void CalculateModel(IDataset ds, IEnumerable<int> rows, bool scaleInputs = true) {


176  this.trainingDataset = (IDataset)ds.Clone();


177  this.trainingRows = rows.ToArray();


178  this.inputScaling = scaleInputs ? new Scaling(ds, allowedInputVariables, rows) : null;


179 


180  x = GetData(ds, this.allowedInputVariables, this.trainingRows, this.inputScaling);


181 


182  IEnumerable<double> y;


183  y = ds.GetDoubleValues(targetVariable, rows);


184 


185  int n = x.GetLength(0);


186 


187  // calculate cholesky decomposed (lower triangular) covariance matrix


188  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, x.GetLength(1)));


189  this.l = CalculateL(x, cov, sqrSigmaNoise);


190 


191  // calculate mean


192  var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, Enumerable.Range(0, x.GetLength(1)));


193  double[] m = Enumerable.Range(0, x.GetLength(0))


194  .Select(r => mean.Mean(x, r))


195  .ToArray();


196 


197  // calculate sum of diagonal elements for likelihood


198  double diagSum = Enumerable.Range(0, n).Select(i => Math.Log(l[i, i])).Sum();


199 


200  // solve for alpha


201  double[] ym = y.Zip(m, (a, b) => a  b).ToArray();


202 


203  int info;


204  alglib.densesolverreport denseSolveRep;


205 


206  alglib.spdmatrixcholeskysolve(l, n, false, ym, out info, out denseSolveRep, out alpha);


207  for (int i = 0; i < alpha.Length; i++)


208  alpha[i] = alpha[i] / sqrSigmaNoise;


209  negativeLogLikelihood = 0.5 * Util.ScalarProd(ym, alpha) + diagSum + (n / 2.0) * Math.Log(2.0 * Math.PI * sqrSigmaNoise);


210 


211  // derivatives


212  int nAllowedVariables = x.GetLength(1);


213 


214  alglib.matinvreport matInvRep;


215  double[,] lCopy = new double[l.GetLength(0), l.GetLength(1)];


216  Array.Copy(l, lCopy, lCopy.Length);


217 


218  alglib.spdmatrixcholeskyinverse(ref lCopy, n, false, out info, out matInvRep);


219  if (info != 1) throw new ArgumentException("Can't invert matrix to calculate gradients.");


220  for (int i = 0; i < n; i++) {


221  for (int j = 0; j <= i; j++)


222  lCopy[i, j] = lCopy[i, j] / sqrSigmaNoise  alpha[i] * alpha[j];


223  }


224 


225  double noiseGradient = sqrSigmaNoise * Enumerable.Range(0, n).Select(i => lCopy[i, i]).Sum();


226 


227  double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)];


228  for (int k = 0; k < meanGradients.Length; k++) {


229  var meanGrad = Enumerable.Range(0, alpha.Length)


230  .Select(r => mean.Gradient(x, r, k));


231  meanGradients[k] = Util.ScalarProd(meanGrad, alpha);


232  }


233 


234  double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)];


235  if (covGradients.Length > 0) {


236  for (int i = 0; i < n; i++) {


237  for (int j = 0; j < i; j++) {


238  var g = cov.CovarianceGradient(x, i, j).ToArray();


239  for (int k = 0; k < covGradients.Length; k++) {


240  covGradients[k] += lCopy[i, j] * g[k];


241  }


242  }


243 


244  var gDiag = cov.CovarianceGradient(x, i, i).ToArray();


245  for (int k = 0; k < covGradients.Length; k++) {


246  // diag


247  covGradients[k] += 0.5 * lCopy[i, i] * gDiag[k];


248  }


249  }


250  }


251 


252  hyperparameterGradients =


253  meanGradients


254  .Concat(covGradients)


255  .Concat(new double[] { noiseGradient }).ToArray();


256 


257  }


258 


259  private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) {


260  if (scaling != null) {


261  return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputs, rows, scaling);


262  } else {


263  return AlglibUtil.PrepareInputMatrix(ds, allowedInputs, rows);


264  }


265  }


266 


267  private static double[,] CalculateL(double[,] x, ParameterizedCovarianceFunction cov, double sqrSigmaNoise) {


268  int n = x.GetLength(0);


269  var l = new double[n, n];


270 


271  // calculate covariances


272  for (int i = 0; i < n; i++) {


273  for (int j = i; j < n; j++) {


274  l[j, i] = cov.Covariance(x, i, j) / sqrSigmaNoise;


275  if (j == i) l[j, i] += 1.0;


276  }


277  }


278 


279  // cholesky decomposition


280  var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);


281  if (!res) throw new ArgumentException("Matrix is not positive semidefinite");


282  return l;


283  }


284 


285 


286  public override IDeepCloneable Clone(Cloner cloner) {


287  return new GaussianProcessModel(this, cloner);


288  }


289 


290  // is called by the solution creator to set all parameter values of the covariance and mean function


291  // to the optimized values (necessary to make the values visible in the GUI)


292  public void FixParameters() {


293  covarianceFunction.SetParameter(covarianceParameter);


294  meanFunction.SetParameter(meanParameter);


295  covarianceParameter = new double[0];


296  meanParameter = new double[0];


297  }


298 


299  #region IRegressionModel Members


300  public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {


301  return GetEstimatedValuesHelper(dataset, rows);


302  }


303  public GaussianProcessRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {


304  return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData));


305  }


306  IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {


307  return CreateRegressionSolution(problemData);


308  }


309  #endregion


310 


311 


312  private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) {


313  try {


314  if (x == null) {


315  x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);


316  }


317  int n = x.GetLength(0);


318 


319  double[,] newX = GetData(dataset, allowedInputVariables, rows, inputScaling);


320  int newN = newX.GetLength(0);


321 


322  var Ks = new double[newN, n];


323  var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, Enumerable.Range(0, newX.GetLength(1)));


324  var ms = Enumerable.Range(0, newX.GetLength(0))


325  .Select(r => mean.Mean(newX, r))


326  .ToArray();


327  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, newX.GetLength(1)));


328  for (int i = 0; i < newN; i++) {


329  for (int j = 0; j < n; j++) {


330  Ks[i, j] = cov.CrossCovariance(x, newX, j, i);


331  }


332  }


333 


334  return Enumerable.Range(0, newN)


335  .Select(i => ms[i] + Util.ScalarProd(Util.GetRow(Ks, i), alpha));


336  } catch (alglib.alglibexception ae) {


337  // wrap exception so that calling code doesn't have to know about alglib implementation


338  throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);


339  }


340  }


341 


342  public IEnumerable<double> GetEstimatedVariance(IDataset dataset, IEnumerable<int> rows) {


343  try {


344  if (x == null) {


345  x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);


346  }


347  int n = x.GetLength(0);


348 


349  var newX = GetData(dataset, allowedInputVariables, rows, inputScaling);


350  int newN = newX.GetLength(0);


351 


352  var kss = new double[newN];


353  double[,] sWKs = new double[n, newN];


354  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, x.GetLength(1)));


355 


356  if (l == null) {


357  l = CalculateL(x, cov, sqrSigmaNoise);


358  }


359 


360  // for stddev


361  for (int i = 0; i < newN; i++)


362  kss[i] = cov.Covariance(newX, i, i);


363 


364  for (int i = 0; i < newN; i++) {


365  for (int j = 0; j < n; j++) {


366  sWKs[j, i] = cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise);


367  }


368  }


369 


370  // for stddev


371  alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0);


372 


373  for (int i = 0; i < newN; i++) {


374  var sumV = Util.ScalarProd(Util.GetCol(sWKs, i), Util.GetCol(sWKs, i));


375  kss[i] += sqrSigmaNoise; // kss is V(f), add noise variance of predictive distibution to get V(y)


376  kss[i] = sumV;


377  if (kss[i] < 0) kss[i] = 0;


378  }


379  return kss;


380  } catch (alglib.alglibexception ae) {


381  // wrap exception so that calling code doesn't have to know about alglib implementation


382  throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);


383  }


384  }


385  }


386  }

