#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis; namespace HeuristicLab.Algorithms.DataAnalysis { [StorableClass] [Item("KernelRidgeRegressionModel", "A kernel ridge regression model")] public sealed class KernelRidgeRegressionModel : RegressionModel { public override IEnumerable VariablesUsedForPrediction { get { return allowedInputVariables; } } [Storable] private readonly string[] allowedInputVariables; public string[] AllowedInputVariables { get { return allowedInputVariables.ToArray(); } } [Storable] public double LooCvRMSE { get; private set; } [Storable] private readonly double[] alpha; [Storable] private readonly double[,] trainX; // it is better to store the original training dataset completely because this is more efficient in persistence [Storable] private readonly ITransformation[] scaling; [Storable] private readonly ICovarianceFunction kernel; [Storable] private readonly double lambda; [Storable] private readonly double yOffset; // implementation works for zero-mean, unit-variance target variables [Storable] private readonly double yScale; [StorableConstructor] private KernelRidgeRegressionModel(bool deserializing) : base(deserializing) { } private KernelRidgeRegressionModel(KernelRidgeRegressionModel original, Cloner cloner) : base(original, cloner) { // shallow copies of arrays because they cannot be modified allowedInputVariables = original.allowedInputVariables; alpha = original.alpha; trainX = original.trainX; scaling = original.scaling; lambda = original.lambda; LooCvRMSE = original.LooCvRMSE; yOffset = original.yOffset; yScale = original.yScale; kernel = original.kernel; } public override IDeepCloneable Clone(Cloner cloner) { return new KernelRidgeRegressionModel(this, cloner); } public static KernelRidgeRegressionModel Create(IDataset dataset, string targetVariable, IEnumerable allowedInputVariables, IEnumerable rows, bool scaleInputs, ICovarianceFunction kernel, double lambda = 0.1) { var trainingRows = rows.ToArray(); var model = new KernelRidgeRegressionModel(dataset, targetVariable, allowedInputVariables, trainingRows, scaleInputs, kernel, lambda); try { int info; int n = model.trainX.GetLength(0); alglib.densesolverreport denseSolveRep; var gram = BuildGramMatrix(model.trainX, lambda, kernel); var l = new double[n, n]; Array.Copy(gram, l, l.Length); double[] alpha = new double[n]; double[,] invG; var y = dataset.GetDoubleValues(targetVariable, trainingRows).ToArray(); for (int i = 0; i < y.Length; i++) { y[i] -= model.yOffset; y[i] *= model.yScale; } // cholesky decomposition var res = alglib.trfac.spdmatrixcholesky(ref l, n, false); if (res == false) { //try lua decomposition if cholesky faild int[] pivots; var lua = new double[n, n]; Array.Copy(gram, lua, lua.Length); alglib.rmatrixlu(ref lua, n, n, out pivots); alglib.rmatrixlusolve(lua, pivots, n, y, out info, out denseSolveRep, out alpha); if (info != 1) throw new ArgumentException("Could not create model."); alglib.matinvreport rep; invG = lua; // rename alglib.rmatrixluinverse(ref invG, pivots, n, out info, out rep); } else { alglib.spdmatrixcholeskysolve(l, n, false, y, out info, out denseSolveRep, out alpha); if (info != 1) throw new ArgumentException("Could not create model."); // for LOO-CV we need to build the inverse of the gram matrix alglib.matinvreport rep; invG = l; // rename alglib.spdmatrixcholeskyinverse(ref invG, n, false, out info, out rep); } if (info != 1) throw new ArgumentException("Could not invert Gram matrix."); var ssqLooError = 0.0; for (int i = 0; i < n; i++) { var pred_i = Util.ScalarProd(Util.GetRow(gram, i).ToArray(), alpha); var looPred_i = pred_i - alpha[i] / invG[i, i]; var error = (y[i] - looPred_i) / model.yScale; ssqLooError += error * error; } Array.Copy(alpha, model.alpha, n); model.LooCvRMSE = Math.Sqrt(ssqLooError / n); } catch (alglib.alglibexception ae) { // wrap exception so that calling code doesn't have to know about alglib implementation throw new ArgumentException("There was a problem in the calculation of the kernel ridge regression model", ae); } return model; } private KernelRidgeRegressionModel(IDataset dataset, string targetVariable, IEnumerable allowedInputVariables, int[] rows, bool scaleInputs, ICovarianceFunction kernel, double lambda = 0.1) : base(targetVariable) { this.allowedInputVariables = allowedInputVariables.ToArray(); if (kernel.GetNumberOfParameters(this.allowedInputVariables.Length) > 0) throw new ArgumentException("All parameters in the kernel function must be specified."); name = ItemName; description = ItemDescription; this.kernel = (ICovarianceFunction)kernel.Clone(); this.lambda = lambda; if (scaleInputs) scaling = CreateScaling(dataset, rows, this.allowedInputVariables); trainX = ExtractData(dataset, rows, this.allowedInputVariables, scaling); var y = dataset.GetDoubleValues(targetVariable, rows).ToArray(); yOffset = y.Average(); yScale = 1.0 / y.StandardDeviation(); alpha = new double[trainX.GetLength(0)]; } #region IRegressionModel Members public override IEnumerable GetEstimatedValues(IDataset dataset, IEnumerable rows) { var newX = ExtractData(dataset, rows, allowedInputVariables, scaling); var dim = newX.GetLength(1); var cov = kernel.GetParameterizedCovarianceFunction(new double[0], Enumerable.Range(0, dim).ToArray()); var pred = new double[newX.GetLength(0)]; for (int i = 0; i < pred.Length; i++) { double sum = 0.0; for (int j = 0; j < alpha.Length; j++) { sum += alpha[j] * cov.CrossCovariance(trainX, newX, j, i); } pred[i] = sum / yScale + yOffset; } return pred; } public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { return new RegressionSolution(this, new RegressionProblemData(problemData)); } #endregion #region helpers private static double[,] BuildGramMatrix(double[,] data, double lambda, ICovarianceFunction kernel) { var n = data.GetLength(0); var dim = data.GetLength(1); var cov = kernel.GetParameterizedCovarianceFunction(new double[0], Enumerable.Range(0, dim).ToArray()); var gram = new double[n, n]; // G = (K + λ I) for (var i = 0; i < n; i++) { for (var j = i; j < n; j++) { gram[i, j] = gram[j, i] = cov.Covariance(data, i, j); // symmetric matrix } gram[i, i] += lambda; } return gram; } private static ITransformation[] CreateScaling(IDataset dataset, int[] rows, IReadOnlyCollection allowedInputVariables) { var trans = new ITransformation[allowedInputVariables.Count]; int i = 0; foreach (var variable in allowedInputVariables) { var lin = new LinearTransformation(allowedInputVariables); var max = dataset.GetDoubleValues(variable, rows).Max(); var min = dataset.GetDoubleValues(variable, rows).Min(); lin.Multiplier = 1.0 / (max - min); lin.Addend = -min / (max - min); trans[i] = lin; i++; } return trans; } private static double[,] ExtractData(IDataset dataset, IEnumerable rows, IReadOnlyCollection allowedInputVariables, ITransformation[] scaling = null) { double[][] variables; if (scaling != null) { variables = allowedInputVariables.Select((var, i) => scaling[i].Apply(dataset.GetDoubleValues(var, rows)).ToArray()) .ToArray(); } else { variables = allowedInputVariables.Select(var => dataset.GetDoubleValues(var, rows).ToArray()).ToArray(); } int n = variables.First().Length; var res = new double[n, variables.Length]; for (int r = 0; r < n; r++) for (int c = 0; c < variables.Length; c++) { res[r, c] = variables[c][r]; } return res; } #endregion } }