Changeset 15164


Ignore:
Timestamp:
07/06/17 15:56:54 (3 weeks ago)
Author:
bwerth
Message:

#2699 KRRModel: made helper functions static; made contets of "allowedInputVariables" immutable; made constructor private and added public "Create"-method that does most of the learning now;

Kernels: fixed inconsitency in error messages

Location:
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/PolysplineKernel.cs

    r15158 r15164  
    5757
    5858    protected override double Get(double norm) {
    59       if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance while Beta is null");
     59      if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance gradient while Beta is null");
    6060      var beta = Beta.Value;
    6161      if (Math.Abs(beta) < double.Epsilon) return double.NaN;
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/ThinPlatePolysplineKernel.cs

    r15158 r15164  
    6666    // (Degree/beta) * (norm/beta)^Degree * log(norm/beta)
    6767    protected override double GetGradient(double norm) {
    68       if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance while Beta is null");
     68      if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance gradient while Beta is null");
    6969      var beta = Beta.Value;
    7070      if (Math.Abs(beta) < double.Epsilon) return double.NaN;
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelRidgeRegression.cs

    r15156 r15164  
    123123
    124124    public static IRegressionSolution CreateRadialBasisRegressionSolution(IRegressionProblemData problemData, ICovarianceFunction kernel, double lambda, bool scaleInputs, out double rmsError, out double looCvRMSE) {
    125       var model = new KernelRidgeRegressionModel(problemData.Dataset, problemData.TargetVariable, problemData.AllowedInputVariables, problemData.TrainingIndices, scaleInputs, kernel, lambda);
     125      var model = KernelRidgeRegressionModel.Create(problemData.Dataset, problemData.TargetVariable, problemData.AllowedInputVariables, problemData.TrainingIndices, scaleInputs, kernel, lambda);
    126126      rmsError = double.NaN;
    127127      if (problemData.TestIndices.Any()) {
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelRidgeRegressionModel.cs

    r14936 r15164  
    3939    private readonly string[] allowedInputVariables;
    4040    public string[] AllowedInputVariables {
    41       get { return allowedInputVariables; }
     41      get { return allowedInputVariables.ToArray(); }
    4242    }
    4343
     
    8181      yOffset = original.yOffset;
    8282      yScale = original.yScale;
    83       if (original.kernel != null)
    84         kernel = cloner.Clone(original.kernel);
     83      kernel = original.kernel;
    8584    }
    8685    public override IDeepCloneable Clone(Cloner cloner) {
     
    8887    }
    8988
    90     public KernelRidgeRegressionModel(IDataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,
    91       bool scaleInputs, ICovarianceFunction kernel, double lambda = 0.1) : base(targetVariable) {
    92       if (kernel.GetNumberOfParameters(allowedInputVariables.Count()) > 0) throw new ArgumentException("All parameters in the kernel function must be specified.");
    93       name = ItemName;
    94       description = ItemDescription;
    95       this.allowedInputVariables = allowedInputVariables.ToArray();
     89    public static KernelRidgeRegressionModel Create(IDataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,
     90      bool scaleInputs, ICovarianceFunction kernel, double lambda = 0.1) {
    9691      var trainingRows = rows.ToArray();
    97       this.kernel = (ICovarianceFunction)kernel.Clone();
    98       this.lambda = lambda;
     92      var model = new KernelRidgeRegressionModel(dataset, targetVariable, allowedInputVariables, trainingRows, scaleInputs, kernel, lambda);
     93
    9994      try {
    100         if (scaleInputs)
    101           scaling = CreateScaling(dataset, trainingRows);
    102         trainX = ExtractData(dataset, trainingRows, scaling);
     95        int info;
     96        int n = model.trainX.GetLength(0);
     97        alglib.densesolverreport denseSolveRep;
     98        var gram = BuildGramMatrix(model.trainX, lambda, kernel);
     99        var l = new double[n, n];
     100        Array.Copy(gram, l, l.Length);
     101
     102        double[] alpha = new double[n];
     103        double[,] invG;
    103104        var y = dataset.GetDoubleValues(targetVariable, trainingRows).ToArray();
    104         yOffset = y.Average();
    105         yScale = 1.0 / y.StandardDeviation();
    106105        for (int i = 0; i < y.Length; i++) {
    107           y[i] -= yOffset;
    108           y[i] *= yScale;
    109         }
    110         int info;
    111         int n = trainX.GetLength(0);
    112         alglib.densesolverreport denseSolveRep;
    113         var gram = BuildGramMatrix(trainX, lambda);
    114         var l = new double[n, n]; Array.Copy(gram, l, l.Length);
    115 
    116         double[,] invG;
     106          y[i] -= model.yOffset;
     107          y[i] *= model.yScale;
     108        }
    117109        // cholesky decomposition
    118110        var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);
    119         if (res == false) { //throw new ArgumentException("Could not decompose matrix. Is it quadratic symmetric positive definite?");
     111        if (res == false) { //try lua decomposition if cholesky faild
    120112          int[] pivots;
    121113          var lua = new double[n, n];
     
    127119          invG = lua;  // rename
    128120          alglib.rmatrixluinverse(ref invG, pivots, n, out info, out rep);
    129           if (info != 1) throw new ArgumentException("Could not invert Gram matrix.");
    130121        } else {
    131122          alglib.spdmatrixcholeskysolve(l, n, false, y, out info, out denseSolveRep, out alpha);
     
    135126          invG = l;   // rename
    136127          alglib.spdmatrixcholeskyinverse(ref invG, n, false, out info, out rep);
    137           if (info != 1) throw new ArgumentException("Could not invert Gram matrix.");
    138         }
     128        }
     129        if (info != 1) throw new ArgumentException("Could not invert Gram matrix.");
    139130
    140131        var ssqLooError = 0.0;
     
    142133          var pred_i = Util.ScalarProd(Util.GetRow(gram, i).ToArray(), alpha);
    143134          var looPred_i = pred_i - alpha[i] / invG[i, i];
    144           var error = (y[i] - looPred_i) / yScale;
     135          var error = (y[i] - looPred_i) / model.yScale;
    145136          ssqLooError += error * error;
    146137        }
    147         LooCvRMSE = Math.Sqrt(ssqLooError / n);
     138
     139        Array.Copy(alpha, model.alpha, n);
     140        model.LooCvRMSE = Math.Sqrt(ssqLooError / n);
    148141      } catch (alglib.alglibexception ae) {
    149142        // wrap exception so that calling code doesn't have to know about alglib implementation
    150143        throw new ArgumentException("There was a problem in the calculation of the kernel ridge regression model", ae);
    151144      }
     145      return model;
     146    }
     147
     148    private KernelRidgeRegressionModel(IDataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, int[] rows,
     149      bool scaleInputs, ICovarianceFunction kernel, double lambda = 0.1) : base(targetVariable) {
     150      this.allowedInputVariables = allowedInputVariables.ToArray();
     151      if (kernel.GetNumberOfParameters(this.allowedInputVariables.Length) > 0) throw new ArgumentException("All parameters in the kernel function must be specified.");
     152      name = ItemName;
     153      description = ItemDescription;
     154
     155      this.kernel = (ICovarianceFunction)kernel.Clone();
     156      this.lambda = lambda;
     157      if (scaleInputs) scaling = CreateScaling(dataset, rows, this.allowedInputVariables);
     158      trainX = ExtractData(dataset, rows, this.allowedInputVariables, scaling);
     159      var y = dataset.GetDoubleValues(targetVariable, rows).ToArray();
     160      yOffset = y.Average();
     161      yScale = 1.0 / y.StandardDeviation();
     162      alpha = new double[trainX.GetLength(0)];
    152163    }
    153164
     
    155166    #region IRegressionModel Members
    156167    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    157       var newX = ExtractData(dataset, rows, scaling);
     168      var newX = ExtractData(dataset, rows, allowedInputVariables, scaling);
    158169      var dim = newX.GetLength(1);
    159170      var cov = kernel.GetParameterizedCovarianceFunction(new double[0], Enumerable.Range(0, dim).ToArray());
     
    175186
    176187    #region helpers
    177     private double[,] BuildGramMatrix(double[,] data, double lambda) {
     188    private static double[,] BuildGramMatrix(double[,] data, double lambda, ICovarianceFunction kernel) {
    178189      var n = data.GetLength(0);
    179190      var dim = data.GetLength(1);
     
    190201    }
    191202
    192     private ITransformation<double>[] CreateScaling(IDataset dataset, int[] rows) {
    193       var trans = new ITransformation<double>[allowedInputVariables.Length];
     203    private static ITransformation<double>[] CreateScaling(IDataset dataset, int[] rows, IReadOnlyCollection<string> allowedInputVariables) {
     204      var trans = new ITransformation<double>[allowedInputVariables.Count];
    194205      int i = 0;
    195206      foreach (var variable in allowedInputVariables) {
     
    205216    }
    206217
    207     private double[,] ExtractData(IDataset dataset, IEnumerable<int> rows, ITransformation<double>[] scaling = null) {
     218    private static double[,] ExtractData(IDataset dataset, IEnumerable<int> rows, IReadOnlyCollection<string> allowedInputVariables, ITransformation<double>[] scaling = null) {
    208219      double[][] variables;
    209220      if (scaling != null) {
Note: See TracChangeset for help on using the changeset viewer.