1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Linq;


25  using HeuristicLab.Common;


26  using HeuristicLab.Core;


27  using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;


28  using HeuristicLab.Problems.DataAnalysis;


29 


30  namespace HeuristicLab.Algorithms.DataAnalysis {


31  /// <summary>


32  /// Represents a Gaussian process model.


33  /// </summary>


34  [StorableClass]


35  [Item("StudentTProcessModel", "Represents a Studentt process posterior.")]


36  public sealed class StudentTProcessModel : RegressionModel, IGaussianProcessModel {


37  public override IEnumerable<string> VariablesUsedForPrediction {


38  get { return allowedInputVariables; }


39  }


40 


41  [Storable]


42  private double negativeLogLikelihood;


43  public double NegativeLogLikelihood {


44  get { return negativeLogLikelihood; }


45  }


46 


47  [Storable]


48  private double[] hyperparameterGradients;


49  public double[] HyperparameterGradients {


50  get {


51  var copy = new double[hyperparameterGradients.Length];


52  Array.Copy(hyperparameterGradients, copy, copy.Length);


53  return copy;


54  }


55  }


56 


57  [Storable]


58  private ICovarianceFunction covarianceFunction;


59  public ICovarianceFunction CovarianceFunction {


60  get { return covarianceFunction; }


61  }


62  [Storable]


63  private IMeanFunction meanFunction;


64  public IMeanFunction MeanFunction {


65  get { return meanFunction; }


66  }


67 


68  [Storable]


69  private string[] allowedInputVariables;


70  public string[] AllowedInputVariables {


71  get { return allowedInputVariables; }


72  }


73 


74  [Storable]


75  private double[] alpha;


76  [Storable]


77  private double beta;


78 


79  public double SigmaNoise {


80  get { return 0; }


81  }


82 


83  [Storable]


84  private double[] meanParameter;


85  [Storable]


86  private double[] covarianceParameter;


87  [Storable]


88  private double nu;


89 


90  private double[,] l; // used to be storable in previous versions (is calculated lazily now)


91  private double[,] x; // scaled training dataset, used to be storable in previous versions (is calculated lazily now)


92 


93  // BackwardsCompatibility3.4


94  #region Backwards compatible code, remove with 3.5


95  [Storable(Name = "l")] // restore if available but don't store anymore


96  private double[,] l_storable {


97  set { this.l = value; }


98  get {


99  if (trainingDataset == null) return l; // this model has been created with an old version


100  else return null; // if the training dataset is available l should not be serialized


101  }


102  }


103  [Storable(Name = "x")] // restore if available but don't store anymore


104  private double[,] x_storable {


105  set { this.x = value; }


106  get {


107  if (trainingDataset == null) return x; // this model has been created with an old version


108  else return null; // if the training dataset is available x should not be serialized


109  }


110  }


111  #endregion


112 


113 


114  [Storable]


115  private IDataset trainingDataset; // it is better to store the original training dataset completely because this is more efficient in persistence


116  [Storable]


117  private int[] trainingRows;


118 


119  [Storable]


120  private Scaling inputScaling;


121 


122 


123  [StorableConstructor]


124  private StudentTProcessModel(bool deserializing) : base(deserializing) { }


125  private StudentTProcessModel(StudentTProcessModel original, Cloner cloner)


126  : base(original, cloner) {


127  this.meanFunction = cloner.Clone(original.meanFunction);


128  this.covarianceFunction = cloner.Clone(original.covarianceFunction);


129  if (original.inputScaling != null)


130  this.inputScaling = cloner.Clone(original.inputScaling);


131  this.trainingDataset = cloner.Clone(original.trainingDataset);


132  this.negativeLogLikelihood = original.negativeLogLikelihood;


133  if (original.meanParameter != null) {


134  this.meanParameter = (double[])original.meanParameter.Clone();


135  }


136  if (original.covarianceParameter != null) {


137  this.covarianceParameter = (double[])original.covarianceParameter.Clone();


138  }


139  nu = original.nu;


140 


141  // shallow copies of arrays because they cannot be modified


142  this.trainingRows = original.trainingRows;


143  this.allowedInputVariables = original.allowedInputVariables;


144  this.alpha = original.alpha;


145  this.beta = original.beta;


146  this.l = original.l;


147  this.x = original.x;


148  }


149  public StudentTProcessModel(IDataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,


150  IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction,


151  bool scaleInputs = true)


152  : base(targetVariable) {


153  this.name = ItemName;


154  this.description = ItemDescription;


155  this.meanFunction = (IMeanFunction)meanFunction.Clone();


156  this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone();


157  this.allowedInputVariables = allowedInputVariables.ToArray();


158 


159 


160  int nVariables = this.allowedInputVariables.Length;


161  meanParameter = hyp


162  .Take(this.meanFunction.GetNumberOfParameters(nVariables))


163  .ToArray();


164 


165  covarianceParameter = hyp.Skip(meanParameter.Length)


166  .Take(this.covarianceFunction.GetNumberOfParameters(nVariables))


167  .ToArray();


168  nu = Math.Exp(hyp.Skip(meanParameter.Length + covarianceParameter.Length).First()) + 2; //TODO check gradient


169  try {


170  CalculateModel(ds, rows, scaleInputs);


171  }


172  catch (alglib.alglibexception ae) {


173  // wrap exception so that calling code doesn't have to know about alglib implementation


174  throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);


175  }


176  }


177 


178  private void CalculateModel(IDataset ds, IEnumerable<int> rows, bool scaleInputs = true) {


179  this.trainingDataset = (IDataset)ds.Clone();


180  this.trainingRows = rows.ToArray();


181  this.inputScaling = scaleInputs ? new Scaling(ds, allowedInputVariables, rows) : null;


182 


183  x = GetData(ds, this.allowedInputVariables, this.trainingRows, this.inputScaling);


184 


185  IEnumerable<double> y;


186  y = ds.GetDoubleValues(TargetVariable, rows);


187 


188  int n = x.GetLength(0);


189  var columns = Enumerable.Range(0, x.GetLength(1)).ToArray();


190 


191  // calculate cholesky decomposed (lower triangular) covariance matrix


192  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);


193  this.l = CalculateL(x, cov);


194 


195  // calculate mean


196  var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);


197  double[] m = Enumerable.Range(0, x.GetLength(0))


198  .Select(r => mean.Mean(x, r))


199  .ToArray();


200 


201  // calculate sum of diagonal elements for likelihood


202  double diagSum = Enumerable.Range(0, n).Select(i => Math.Log(l[i, i])).Sum();


203 


204  // solve for alpha


205  double[] ym = y.Zip(m, (a, b) => a  b).ToArray();


206 


207  int info;


208  alglib.densesolverreport denseSolveRep;


209 


210  alglib.spdmatrixcholeskysolve(l, n, false, ym, out info, out denseSolveRep, out alpha);


211 


212  beta = Util.ScalarProd(ym, alpha);


213  double sign0, sign1;


214  double lngamma0 = alglib.lngamma(0.5 * (nu + n), out sign0);


215  lngamma0 *= sign0;


216  double lngamma1 = alglib.lngamma(0.5 * nu, out sign1);


217  lngamma1 *= sign1;


218  negativeLogLikelihood =


219  0.5 * n * Math.Log((nu  2) * Math.PI) +


220  diagSum +


221  lngamma0 + lngamma1 +


222  //Math.Log(alglib.gammafunction((n + nu) / 2) / alglib.gammafunction(nu / 2)) +


223  0.5 * (nu + n) * Math.Log(1 + beta / (nu  2));


224 


225  // derivatives


226  int nAllowedVariables = x.GetLength(1);


227 


228  alglib.matinvreport matInvRep;


229  double[,] lCopy = new double[l.GetLength(0), l.GetLength(1)];


230  Array.Copy(l, lCopy, lCopy.Length);


231 


232  alglib.spdmatrixcholeskyinverse(ref lCopy, n, false, out info, out matInvRep);


233  double c = (nu + n) / (nu + beta  2);


234  if (info != 1) throw new ArgumentException("Can't invert matrix to calculate gradients.");


235  for (int i = 0; i < n; i++) {


236  for (int j = 0; j <= i; j++)


237  lCopy[i, j] = lCopy[i, j]  c * alpha[i] * alpha[j];


238  }


239 


240  double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)];


241  for (int k = 0; k < meanGradients.Length; k++) {


242  var meanGrad = new double[alpha.Length];


243  for (int g = 0; g < meanGrad.Length; g++)


244  meanGrad[g] = mean.Gradient(x, g, k);


245  meanGradients[k] = Util.ScalarProd(meanGrad, alpha);//TODO not working yet, try to fix with gradient check


246  }


247 


248  double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)];


249  if (covGradients.Length > 0) {


250  for (int i = 0; i < n; i++) {


251  for (int j = 0; j < i; j++) {


252  var g = cov.CovarianceGradient(x, i, j);


253  for (int k = 0; k < covGradients.Length; k++) {


254  covGradients[k] += lCopy[i, j] * g[k];


255  }


256  }


257 


258  var gDiag = cov.CovarianceGradient(x, i, i);


259  for (int k = 0; k < covGradients.Length; k++) {


260  // diag


261  covGradients[k] += 0.5 * lCopy[i, i] * gDiag[k];


262  }


263  }


264  }


265 


266  double nuGradient = 0.5 * n


267   0.5 * (nu  2) * alglib.psi((n + nu) / 2) + 0.5 * (nu  2) * alglib.psi(nu / 2)


268  + 0.5 * (nu  2) * Math.Log(1 + beta / (nu  2))  beta * (n + nu) / (2 * (beta + (nu  2)));


269 


270  //nuGradient = (nu2) * nuGradient;


271  hyperparameterGradients =


272  meanGradients


273  .Concat(covGradients)


274  .Concat(new double[] { nuGradient }).ToArray();


275 


276  }


277 


278  private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) {


279  if (scaling != null) {


280  return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputs, rows, scaling);


281  } else {


282  return AlglibUtil.PrepareInputMatrix(ds, allowedInputs, rows);


283  }


284  }


285 


286  private static double[,] CalculateL(double[,] x, ParameterizedCovarianceFunction cov) {


287  int n = x.GetLength(0);


288  var l = new double[n, n];


289 


290  // calculate covariances


291  for (int i = 0; i < n; i++) {


292  for (int j = i; j < n; j++) {


293  l[j, i] = cov.Covariance(x, i, j);


294  }


295  }


296 


297  // cholesky decomposition


298  var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);


299  if (!res) throw new ArgumentException("Matrix is not positive semidefinite");


300  return l;


301  }


302 


303 


304  public override IDeepCloneable Clone(Cloner cloner) {


305  return new StudentTProcessModel(this, cloner);


306  }


307 


308  // is called by the solution creator to set all parameter values of the covariance and mean function


309  // to the optimized values (necessary to make the values visible in the GUI)


310  public void FixParameters() {


311  covarianceFunction.SetParameter(covarianceParameter);


312  meanFunction.SetParameter(meanParameter);


313  covarianceParameter = new double[0];


314  meanParameter = new double[0];


315  }


316 


317  #region IRegressionModel Members


318  public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {


319  return GetEstimatedValuesHelper(dataset, rows);


320  }


321  public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {


322  return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData));


323  }


324  #endregion


325 


326 


327  private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) {


328  try {


329  if (x == null) {


330  x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);


331  }


332  int n = x.GetLength(0);


333 


334  double[,] newX = GetData(dataset, allowedInputVariables, rows, inputScaling);


335  int newN = newX.GetLength(0);


336  var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray();


337 


338  var Ks = new double[newN][];


339  var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns);


340  var ms = Enumerable.Range(0, newX.GetLength(0))


341  .Select(r => mean.Mean(newX, r))


342  .ToArray();


343  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns);


344  for (int i = 0; i < newN; i++) {


345  Ks[i] = new double[n];


346  for (int j = 0; j < n; j++) {


347  Ks[i][j] = cov.CrossCovariance(x, newX, j, i);


348  }


349  }


350 


351  return Enumerable.Range(0, newN)


352  .Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha));


353  }


354  catch (alglib.alglibexception ae) {


355  // wrap exception so that calling code doesn't have to know about alglib implementation


356  throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);


357  }


358  }


359 


360  public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {


361  try {


362  if (x == null) {


363  x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling);


364  }


365  int n = x.GetLength(0);


366 


367  var newX = GetData(dataset, allowedInputVariables, rows, inputScaling);


368  int newN = newX.GetLength(0);


369 


370  var kss = new double[newN];


371  double[,] sWKs = new double[n, newN];


372  var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, x.GetLength(1)).ToArray());


373 


374  if (l == null) {


375  l = CalculateL(x, cov);


376  }


377 


378  // for stddev


379  for (int i = 0; i < newN; i++)


380  kss[i] = cov.Covariance(newX, i, i);


381 


382  for (int i = 0; i < newN; i++) {


383  for (int j = 0; j < n; j++) {


384  sWKs[j, i] = cov.CrossCovariance(x, newX, j, i);


385  }


386  }


387 


388  // for stddev


389  alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0);


390 


391  for (int i = 0; i < newN; i++) {


392  var col = Util.GetCol(sWKs, i).ToArray();


393  var sumV = Util.ScalarProd(col, col);


394  kss[i] = sumV;


395  kss[i] *= (nu + beta  2) / (nu + n  2);


396  if (kss[i] < 0) kss[i] = 0;


397  }


398  return kss;


399  }


400  catch (alglib.alglibexception ae) {


401  // wrap exception so that calling code doesn't have to know about alglib implementation


402  throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);


403  }


404  }


405  }


406  }

