1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Linq;


25  using HeuristicLab.Common;


26  using HeuristicLab.Core;


27  using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;


28  using HeuristicLab.Problems.DataAnalysis;


29 


30  namespace HeuristicLab.Algorithms.DataAnalysis {


31  /// <summary>


32  /// Represents a Gaussian process model.


33  /// </summary>


34  [StorableClass]


35  [Item("GaussianProcessModel", "Represents a Gaussian process posterior.")]


36  public sealed class GaussianProcessModel : NamedItem, IGaussianProcessModel {


37  [Storable]


38  private double negativeLogLikelihood;


39  public double NegativeLogLikelihood {


40  get { return negativeLogLikelihood; }


41  }


42 


43  [Storable]


44  private double[] hyperparameterGradients;


45  public double[] HyperparameterGradients {


46  get {


47  var copy = new double[hyperparameterGradients.Length];


48  Array.Copy(hyperparameterGradients, copy, copy.Length);


49  return copy;


50  }


51  }


52 


53  [Storable]


54  private ICovarianceFunction covarianceFunction;


55  public ICovarianceFunction CovarianceFunction {


56  get { return covarianceFunction; }


57  }


58  [Storable]


59  private IMeanFunction meanFunction;


60  public IMeanFunction MeanFunction {


61  get { return meanFunction; }


62  }


63  [Storable]


64  private string targetVariable;


65  public string TargetVariable {


66  get { return targetVariable; }


67  }


68  [Storable]


69  private string[] allowedInputVariables;


70  public string[] AllowedInputVariables {


71  get { return allowedInputVariables; }


72  }


73 


74  [Storable]


75  private double[] alpha;


76  [Storable]


77  private double sqrSigmaNoise;


78  public double SigmaNoise {


79  get { return Math.Sqrt(sqrSigmaNoise); }


80  }


81 


82  [Storable]


83  private double[] meanParameter;


84  [Storable]


85  private double[] covarianceParameter;


86 


87  private double[,] l; // used to be storable in previous versions (is calculated lazily now)


88  private double[,] x; // scaled training dataset, used to be storable in previous versions (is calculated lazily now)


89 


90  // BackwardsCompatibility3.4


91  #region Backwards compatible code, remove with 3.5


92  [Storable(Name = "l")] // restore if available but don't store anymore


93  private double[,] l_storable {


94  set { this.l = value; }


95  get {


96  if (trainingDataset == null) return l; // this model has been created with an old version


97  else return null; // if the training dataset is available l should not be serialized


98  }


99  }


100  [Storable(Name = "x")] // restore if available but don't store anymore


101  private double[,] x_storable {


102  set { this.x = value; }


103  get {


104  if (trainingDataset == null) return x; // this model has been created with an old version


105  else return null; // if the training dataset is available x should not be serialized


106  }


107  }


108  #endregion


109 


110 


111  [Storable]


112  private IDataset trainingDataset; // it is better to store the original training dataset completely because this is more efficient in persistence


113  [Storable]


114  private int[] trainingRows;


115 


116  [Storable]


117  private Scaling inputScaling;


118 


119 


120  [StorableConstructor]


121  private GaussianProcessModel(bool deserializing) : base(deserializing) { }


122  private GaussianProcessModel(GaussianProcessModel original, Cloner cloner)


123  : base(original, cloner) {


124  this.meanFunction = cloner.Clone(original.meanFunction);


125  this.covarianceFunction = cloner.Clone(original.covarianceFunction);


126  this.inputScaling = cloner.Clone(original.inputScaling);


127  this.trainingDataset = cloner.Clone(original.trainingDataset);


128  this.negativeLogLikelihood = original.negativeLogLikelihood;


129  this.targetVariable = original.targetVariable;


130  this.sqrSigmaNoise = original.sqrSigmaNoise;


131  if (original.meanParameter != null) {


132  this.meanParameter = (double[])original.meanParameter.Clone();


133  }


134  if (original.covarianceParameter != null) {


135  this.covarianceParameter = (double[])original.covarianceParameter.Clone();


136  }


137 


138  // shallow copies of arrays because they cannot be modified


139  this.trainingRows = original.trainingRows;


140  this.allowedInputVariables = original.allowedInputVariables;


141  this.alpha = original.alpha;


142  this.l = original.l;


143  this.x = original.x;


144  }


145  public GaussianProcessModel(IDataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,


146  IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction)


147  : base() {


148  this.name = ItemName;


149  this.description = ItemDescription;


150  this.meanFunction = (IMeanFunction)meanFunction.Clone();


151  this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone();


152  this.targetVariable = targetVariable;


153  this.allowedInputVariables = allowedInputVariables.ToArray();


154 


155 


156  int nVariables = this.allowedInputVariables.Length;


157  meanParameter = hyp


158  .Take(this.meanFunction.GetNumberOfParameters(nVariables))


159  .ToArray();


160 


161  covarianceParameter = hyp.Skip(this.meanFunction.GetNumberOfParameters(nVariables))


162  .Take(this.covarianceFunction.GetNumberOfParameters(nVariables))


163  .ToArray();


164  sqrSigmaNoise = Math.Exp(2.0 * hyp.Last());


165  CalculateModel(ds, rows);


166  }


167 


168  private void CalculateModel(IDataset ds, IEnumerable<int> rows) {


169  this.trainingDataset = (IDataset)ds.Clone();


170  this.trainingRows = rows.ToArray();


171  this.inputScaling = new Scaling(trainingDataset, allowedInputVariables, rows);


172  this.x = CalculateX(trainingDataset, allowedInputVariables, rows, inputScaling);


173  var y = ds.GetDoubleValues(targetVariable, rows);


174 


175  int n = x.GetLength(0);


176 


177  // calculate cholesky decomposed (lower triangular) covariance matrix


178  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, x.GetLength(1)));


179  this.l = CalculateL(x, cov, sqrSigmaNoise);


180 


181  // calculate mean


182  var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, Enumerable.Range(0, x.GetLength(1)));


183  double[] m = Enumerable.Range(0, x.GetLength(0))


184  .Select(r => mean.Mean(x, r))


185  .ToArray();


186 


187 


188 


189  // calculate sum of diagonal elements for likelihood


190  double diagSum = Enumerable.Range(0, n).Select(i => Math.Log(l[i, i])).Sum();


191 


192  // solve for alpha


193  double[] ym = y.Zip(m, (a, b) => a  b).ToArray();


194 


195  int info;


196  alglib.densesolverreport denseSolveRep;


197 


198  alglib.spdmatrixcholeskysolve(l, n, false, ym, out info, out denseSolveRep, out alpha);


199  for (int i = 0; i < alpha.Length; i++)


200  alpha[i] = alpha[i] / sqrSigmaNoise;


201  negativeLogLikelihood = 0.5 * Util.ScalarProd(ym, alpha) + diagSum + (n / 2.0) * Math.Log(2.0 * Math.PI * sqrSigmaNoise);


202 


203  // derivatives


204  int nAllowedVariables = x.GetLength(1);


205 


206  alglib.matinvreport matInvRep;


207  double[,] lCopy = new double[l.GetLength(0), l.GetLength(1)];


208  Array.Copy(l, lCopy, lCopy.Length);


209 


210  alglib.spdmatrixcholeskyinverse(ref lCopy, n, false, out info, out matInvRep);


211  if (info != 1) throw new ArgumentException("Can't invert matrix to calculate gradients.");


212  for (int i = 0; i < n; i++) {


213  for (int j = 0; j <= i; j++)


214  lCopy[i, j] = lCopy[i, j] / sqrSigmaNoise  alpha[i] * alpha[j];


215  }


216 


217  double noiseGradient = sqrSigmaNoise * Enumerable.Range(0, n).Select(i => lCopy[i, i]).Sum();


218 


219  double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)];


220  for (int k = 0; k < meanGradients.Length; k++) {


221  var meanGrad = Enumerable.Range(0, alpha.Length)


222  .Select(r => mean.Gradient(x, r, k));


223  meanGradients[k] = Util.ScalarProd(meanGrad, alpha);


224  }


225 


226  double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)];


227  if (covGradients.Length > 0) {


228  for (int i = 0; i < n; i++) {


229  for (int j = 0; j < i; j++) {


230  var g = cov.CovarianceGradient(x, i, j).ToArray();


231  for (int k = 0; k < covGradients.Length; k++) {


232  covGradients[k] += lCopy[i, j] * g[k];


233  }


234  }


235 


236  var gDiag = cov.CovarianceGradient(x, i, i).ToArray();


237  for (int k = 0; k < covGradients.Length; k++) {


238  // diag


239  covGradients[k] += 0.5 * lCopy[i, i] * gDiag[k];


240  }


241  }


242  }


243 


244  hyperparameterGradients =


245  meanGradients


246  .Concat(covGradients)


247  .Concat(new double[] { noiseGradient }).ToArray();


248 


249  }


250 


251  private static double[,] CalculateX(IDataset ds, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows, Scaling inputScaling) {


252  return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputVariables, rows, inputScaling);


253  }


254 


255  private static double[,] CalculateL(double[,] x, ParameterizedCovarianceFunction cov, double sqrSigmaNoise) {


256  int n = x.GetLength(0);


257  var l = new double[n, n];


258 


259  // calculate covariances


260  for (int i = 0; i < n; i++) {


261  for (int j = i; j < n; j++) {


262  l[j, i] = cov.Covariance(x, i, j) / sqrSigmaNoise;


263  if (j == i) l[j, i] += 1.0;


264  }


265  }


266 


267  // cholesky decomposition


268  var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);


269  if (!res) throw new ArgumentException("Matrix is not positive semidefinite");


270  return l;


271  }


272 


273 


274  public override IDeepCloneable Clone(Cloner cloner) {


275  return new GaussianProcessModel(this, cloner);


276  }


277 


278  // is called by the solution creator to set all parameter values of the covariance and mean function


279  // to the optimized values (necessary to make the values visible in the GUI)


280  public void FixParameters() {


281  covarianceFunction.SetParameter(covarianceParameter);


282  meanFunction.SetParameter(meanParameter);


283  covarianceParameter = new double[0];


284  meanParameter = new double[0];


285  }


286 


287  #region IRegressionModel Members


288  public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {


289  return GetEstimatedValuesHelper(dataset, rows);


290  }


291  public GaussianProcessRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {


292  return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData));


293  }


294  IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {


295  return CreateRegressionSolution(problemData);


296  }


297  #endregion


298 


299 


300  private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) {


301  if (x == null) {


302  this.x = CalculateX(trainingDataset, allowedInputVariables, trainingRows, inputScaling);


303  }


304  int n = x.GetLength(0);


305 


306  var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, inputScaling);


307  int newN = newX.GetLength(0);


308 


309  var Ks = new double[newN, n];


310  var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, Enumerable.Range(0, newX.GetLength(1)));


311  var ms = Enumerable.Range(0, newX.GetLength(0))


312  .Select(r => mean.Mean(newX, r))


313  .ToArray();


314  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, newX.GetLength(1)));


315  for (int i = 0; i < newN; i++) {


316  for (int j = 0; j < n; j++) {


317  Ks[i, j] = cov.CrossCovariance(x, newX, j, i);


318  }


319  }


320 


321  return Enumerable.Range(0, newN)


322  .Select(i => ms[i] + Util.ScalarProd(Util.GetRow(Ks, i), alpha));


323  }


324 


325  public IEnumerable<double> GetEstimatedVariance(IDataset dataset, IEnumerable<int> rows) {


326  if (x == null) {


327  this.x = CalculateX(trainingDataset, allowedInputVariables, trainingRows, inputScaling);


328  }


329  int n = x.GetLength(0);


330 


331  var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, inputScaling);


332  int newN = newX.GetLength(0);


333 


334  var kss = new double[newN];


335  double[,] sWKs = new double[n, newN];


336  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, x.GetLength(1)));


337 


338  if (l == null) {


339  l = CalculateL(x, cov, sqrSigmaNoise);


340  }


341 


342  // for stddev


343  for (int i = 0; i < newN; i++)


344  kss[i] = cov.Covariance(newX, i, i);


345 


346  for (int i = 0; i < newN; i++) {


347  for (int j = 0; j < n; j++) {


348  sWKs[j, i] = cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise);


349  }


350  }


351 


352  // for stddev


353  alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0);


354 


355  for (int i = 0; i < newN; i++) {


356  var sumV = Util.ScalarProd(Util.GetCol(sWKs, i), Util.GetCol(sWKs, i));


357  kss[i] += sqrSigmaNoise; // kss is V(f), add noise variance of predictive distibution to get V(y)


358  kss[i] = sumV;


359  if (kss[i] < 0) kss[i] = 0;


360  }


361  return kss;


362  }


363  }


364  }

