#region License Information /* HeuristicLab * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis; using ILNumerics; namespace HeuristicLab.Algorithms.DataAnalysis { /// /// Represents a Gaussian process model. /// [StorableClass] [Item("TunedGaussianProcessModel", "Gaussian process model implemented using ILNumerics.")] public sealed class TunedGaussianProcessModel : NamedItem, IGaussianProcessModel { [Storable] private double negativeLogLikelihood; public double NegativeLogLikelihood { get { return negativeLogLikelihood; } } [Storable] private double[] hyperparameterGradients; public double[] HyperparameterGradients { get { var copy = new double[hyperparameterGradients.Length]; Array.Copy(hyperparameterGradients, copy, copy.Length); return copy; } } [Storable] private ICovarianceFunction covarianceFunction; public ICovarianceFunction CovarianceFunction { get { return covarianceFunction; } } [Storable] private IMeanFunction meanFunction; public IMeanFunction MeanFunction { get { return meanFunction; } } [Storable] private string targetVariable; public string TargetVariable { get { return targetVariable; } } [Storable] private string[] allowedInputVariables; public string[] AllowedInputVariables { get { return allowedInputVariables; } } [Storable] private double[] alpha; [Storable] private double sqrSigmaNoise; public double SigmaNoise { get { return Math.Sqrt(sqrSigmaNoise); } } [Storable] private double[] meanParameter; [Storable] private double[] covarianceParameter; [Storable] private double[] l; [Storable] private double[,] x; [Storable] private Scaling inputScaling; [StorableConstructor] private TunedGaussianProcessModel(bool deserializing) : base(deserializing) { } private TunedGaussianProcessModel(TunedGaussianProcessModel original, Cloner cloner) : base(original, cloner) { this.meanFunction = cloner.Clone(original.meanFunction); this.covarianceFunction = cloner.Clone(original.covarianceFunction); this.inputScaling = cloner.Clone(original.inputScaling); this.negativeLogLikelihood = original.negativeLogLikelihood; this.targetVariable = original.targetVariable; this.sqrSigmaNoise = original.sqrSigmaNoise; if (original.meanParameter != null) { this.meanParameter = (double[])original.meanParameter.Clone(); } if (original.covarianceParameter != null) { this.covarianceParameter = (double[])original.covarianceParameter.Clone(); } // shallow copies of arrays because they cannot be modified this.allowedInputVariables = original.allowedInputVariables; this.alpha = original.alpha; this.l = original.l; this.x = original.x; } public TunedGaussianProcessModel(Dataset ds, string targetVariable, IEnumerable allowedInputVariables, IEnumerable rows, IEnumerable hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction) : base() { this.name = ItemName; this.description = ItemDescription; this.meanFunction = (IMeanFunction)meanFunction.Clone(); this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone(); this.targetVariable = targetVariable; this.allowedInputVariables = allowedInputVariables.ToArray(); int nVariables = this.allowedInputVariables.Length; meanParameter = hyp .Take(this.meanFunction.GetNumberOfParameters(nVariables)) .ToArray(); covarianceParameter = hyp.Skip(this.meanFunction.GetNumberOfParameters(nVariables)) .Take(this.covarianceFunction.GetNumberOfParameters(nVariables)) .ToArray(); sqrSigmaNoise = Math.Exp(2.0 * hyp.Last()); CalculateModel(ds, rows); //var cmpModel = new GaussianProcessModel(ds, targetVariable, allowedInputVariables, rows, hyp, // (IMeanFunction)meanFunction.Clone(), // (ICovarianceFunction)covarianceFunction.Clone()); //if (!cmpModel.NegativeLogLikelihood.IsAlmost(NegativeLogLikelihood) || // !cmpModel.HyperparameterGradients.Sum().IsAlmost(HyperparameterGradients.Sum()) // ) // throw new ArgumentException(); } private void CalculateModel(Dataset ds, IEnumerable rows) { inputScaling = new Scaling(ds, allowedInputVariables, rows); x = AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputVariables, rows, inputScaling); var y = ds.GetDoubleValues(targetVariable, rows); ILNumerics.Settings.MaxNumberThreads = Environment.ProcessorCount; using (ILScope.Enter()) { int n = x.GetLength(0); // calculate means and covariances var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, Enumerable.Range(0, x.GetLength(1))); double[] m = Enumerable.Range(0, x.GetLength(0)) .Select(r => mean.Mean(x, r)) .ToArray(); var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, x.GetLength(1))); ILArray myL = ILMath.zeros(n, n); for (int i = 0; i < n; i++) { for (int j = i; j < n; j++) { double c = cov.Covariance(x, i, j) / sqrSigmaNoise; if (i == j) { myL.SetValue(c + 1, i, j); } else { myL.SetValue(c, j, i); myL.SetValue(c, i, j); } } } // cholesky decomposition ILArray chol; try { chol = ILNumerics.ILMath.chol(myL, false); } catch (Exception e) { throw new ArgumentException("covariance matrix not positive definite", e); } if (chol == null || chol.IsEmpty || !chol.Size.Equals(myL.Size)) throw new ArgumentException("covariance matrix not positive definite"); this.l = new double[n * n]; chol.ExportValues(ref l); // calculate sum of diagonal elements for likelihood double diagSum = ILMath.log(ILMath.diag(chol)).Sum(); // solve for alpha ILArray ym = ILMath.array(y.Zip(m, (a, b) => a - b).ToArray()); MatrixProperties lowerTriProps = MatrixProperties.LowerTriangular; MatrixProperties upperTriProps = MatrixProperties.UpperTriangular; ILArray alpha; try { ILArray t1 = ILMath.linsolve(chol.T, ym, ref lowerTriProps); alpha = ILMath.linsolve(chol, t1, ref upperTriProps) / sqrSigmaNoise; } catch (Exception e) { throw new ArgumentException("covariance matrix is not positive definite", e); } if (alpha == null || alpha.IsEmpty) throw new ArgumentException(); this.alpha = new double[alpha.Length]; alpha.ExportValues(ref this.alpha); negativeLogLikelihood = 0.5 * (double)ILMath.multiply(ym.T, alpha) + diagSum + (n / 2.0) * Math.Log(2.0 * Math.PI * sqrSigmaNoise); // derivatives int nAllowedVariables = x.GetLength(1); ILArray lCopy; try { ILArray t2 = ILMath.linsolve(chol.T, ILMath.eye(n, n), ref lowerTriProps); lCopy = ILMath.linsolve(chol, t2, ref upperTriProps) / sqrSigmaNoise - ILMath.multiply(alpha, alpha.T); } catch (Exception e) { throw new ArgumentException("covariance matrix is not positive definite", e); } double noiseGradient = sqrSigmaNoise * ILMath.sumall(ILMath.diag(lCopy)).GetValue(0, 0); double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)]; for (int k = 0; k < meanGradients.Length; k++) { ILArray meanGrad = ILMath.array( Enumerable.Range(0, alpha.Length) .Select(r => mean.Gradient(x, r, k)) .ToArray()); meanGradients[k] = -(double)ILMath.multiply(meanGrad.T, alpha); } double[] covGradients = new double[covarianceFunction.GetNumberOfParameters(nAllowedVariables)]; if (covGradients.Length > 0) { for (int i = 0; i < n; i++) { for (int j = 0; j < i; j++) { var g = cov.CovarianceGradient(x, i, j).ToArray(); for (int k = 0; k < covGradients.Length; k++) { covGradients[k] += lCopy.GetValue(i, j) * g[k]; } } var gDiag = cov.CovarianceGradient(x, i, i).ToArray(); for (int k = 0; k < covGradients.Length; k++) { // diag covGradients[k] += 0.5 * lCopy.GetValue(i, i) * gDiag[k]; } } } hyperparameterGradients = meanGradients .Concat(covGradients) .Concat(new double[] { noiseGradient }).ToArray(); } } public override IDeepCloneable Clone(Cloner cloner) { return new TunedGaussianProcessModel(this, cloner); } // is called by the solution creator to set all parameter values of the covariance and mean function // to the optimized values (necessary to make the values visible in the GUI) public void FixParameters() { covarianceFunction.SetParameter(covarianceParameter); meanFunction.SetParameter(meanParameter); covarianceParameter = new double[0]; meanParameter = new double[0]; } #region IRegressionModel Members public IEnumerable GetEstimatedValues(Dataset dataset, IEnumerable rows) { return GetEstimatedValuesHelper(dataset, rows); } public GaussianProcessRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData)); } IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) { return CreateRegressionSolution(problemData); } #endregion private IEnumerable GetEstimatedValuesHelper(Dataset dataset, IEnumerable rows) { var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, inputScaling); int newN = newX.GetLength(0); int n = x.GetLength(0); var Ks = new double[newN, n]; var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, Enumerable.Range(0, newX.GetLength(1))); var ms = Enumerable.Range(0, newX.GetLength(0)) .Select(r => mean.Mean(newX, r)) .ToArray(); var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, x.GetLength(1))); for (int i = 0; i < newN; i++) { for (int j = 0; j < n; j++) { Ks[i, j] = cov.CrossCovariance(x, newX, j, i); } } return Enumerable.Range(0, newN) .Select(i => ms[i] + Util.ScalarProd(Util.GetRow(Ks, i), alpha)); } public IEnumerable GetEstimatedVariance(Dataset dataset, IEnumerable rows) { var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, inputScaling); int newN = newX.GetLength(0); int n = x.GetLength(0); if (newN == 0) return Enumerable.Empty(); var kss = new double[newN]; ILArray sWKs = ILMath.zeros(n, newN); var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, x.GetLength(1))); // for stddev for (int i = 0; i < newN; i++) kss[i] = cov.Covariance(newX, i, i); for (int i = 0; i < newN; i++) { for (int j = 0; j < n; j++) { sWKs.SetValue(cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise), j, i); } } // for stddev var lowerTriangular = MatrixProperties.LowerTriangular; ILArray V = ILMath.linsolve(ILMath.array(l, n, n).T, sWKs, ref lowerTriangular); // alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0); for (int i = 0; i < newN; i++) { double sumV = (double)ILMath.multiply(V[ILMath.full, i].T, V[ILMath.full, i]); kss[i] -= sumV; if (kss[i] < 0) kss[i] = 0; } return kss; } } }