Free cookie consent management tool by TermsFeed Policy Generator

source: branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/Splines.cs @ 15450

Last change on this file since 15450 was 15450, checked in by gkronber, 6 years ago

#2789 more tests with CV and automatic determination of smoothing parameter

File size: 35.3 KB
RevLine 
[15352]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
[15450]24using System.Diagnostics;
[15352]25using System.Linq;
26using System.Threading;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Data;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis;
[15365]34using HeuristicLab.Random;
[15362]35using MathNet.Numerics.Interpolation;
[15364]36using MathNet.Numerics.LinearAlgebra.Double;
37using MathNet.Numerics.LinearAlgebra.Double.Solvers;
[15352]38
39namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
40  [Item("Splines", "")]
41  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 102)]
42  [StorableClass]
43  public sealed class Splines : FixedDataAnalysisAlgorithm<IRegressionProblem> {
44    [StorableConstructor]
45    private Splines(bool deserializing) : base(deserializing) { }
46    [StorableHook(HookType.AfterDeserialization)]
47    private void AfterDeserialization() {
48    }
49
50    private Splines(Splines original, Cloner cloner)
51      : base(original, cloner) {
52    }
53    public override IDeepCloneable Clone(Cloner cloner) {
54      return new Splines(this, cloner);
55    }
56
57    public Splines()
58      : base() {
59      var validTypes = new ItemSet<StringValue>(
60        new[] {
[15362]61        "Monotone", "Akima", "Catmull-Rom", "Cubic", "Linear",
62          "Cubic - Natural (Math.NET)",
63          "Polynomial (Math.NET)",
64          "Rational (Math.NET)",
65          "LogLinear (Math.NET)",
66          "Common (Math.NET)",
[15364]67          "Smoothing Spline (Mine)",
68          "Smoothing Spline (Reinsch)",
[15442]69          "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)",
[15369]70          "B-Spline Smoothing",
[15449]71          "Penalized Regression Spline (alglib)",
72          "CUBGCV"
[15352]73      }.Select(s => new StringValue(s)));
74
[15449]75      Parameters.Add(new ConstrainedValueParameter<StringValue>("Type", "The type of spline (as supported by alglib)", validTypes, validTypes.Last()));
[15364]76      Parameters.Add(new ValueParameter<DoubleValue>("Lambda", "Regularization parameter for smoothing splines (0..+inf)", new DoubleValue(100)));
[15365]77      Parameters.Add(new ValueParameter<DoubleValue>("StdDev (noise)", "Known error in y values. Used only be Reinsch Smoothing Splines", new DoubleValue(0.1)));
[15352]78      Problem = new RegressionProblem();
79    }
80
81
82    protected override void Run(CancellationToken cancellationToken) {
83      double[,] inputMatrix = Problem.ProblemData.Dataset.ToArray(Problem.ProblemData.AllowedInputVariables.Concat(new string[] { Problem.ProblemData.TargetVariable }), Problem.ProblemData.TrainingIndices);
84      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
85        throw new NotSupportedException("Splines does not support NaN or infinity values in the input dataset.");
86
87      var inputVars = Problem.ProblemData.AllowedInputVariables.ToArray();
88      if (inputVars.Length > 3) throw new ArgumentException();
89
90      var y = Problem.ProblemData.TargetVariableTrainingValues.ToArray();
91      if (inputVars.Length == 1) {
92
93        var x = Problem.ProblemData.Dataset.GetDoubleValues(inputVars.First(), Problem.ProblemData.TrainingIndices).ToArray();
94        alglib.spline1dinterpolant c;
95        var type = ((StringValue)Parameters["Type"].ActualValue).Value;
96        switch (type) {
97          case "Monotone":
[15364]98            alglib.spline1dbuildmonotone(x, y, out c);
[15362]99            AddAlglibSplineResult(c, inputVars);
100            break;
[15352]101          case "Akima":
[15362]102            alglib.spline1dbuildakima(x, y, out c); AddAlglibSplineResult(c, inputVars);
103            break;
104            ;
[15352]105          case "Catmull-Rom":
[15362]106            alglib.spline1dbuildcatmullrom(x, y, out c); AddAlglibSplineResult(c, inputVars);
107            break;
108
[15352]109          case "Cubic":
[15362]110            alglib.spline1dbuildcubic(x, y, out c); AddAlglibSplineResult(c, inputVars);
111            break;
112
[15352]113          case "Linear":
[15362]114            alglib.spline1dbuildlinear(x, y, out c); AddAlglibSplineResult(c, inputVars);
115            break;
116          case "Cubic - Natural (Math.NET)": {
117              var spline = MathNet.Numerics.Interpolation.CubicSpline.InterpolateNatural(x, y);
118              AddMathNetSplineResult(spline, inputVars);
119              break;
120            }
121          case "Common (Math.NET)": {
122              var spline = MathNet.Numerics.Interpolate.Common(x, y);
123              AddMathNetSplineResult(spline, inputVars);
124              break;
125            }
126          case "LogLinear (Math.NET)": {
127              var spline = MathNet.Numerics.Interpolate.LogLinear(x, y);
128              AddMathNetSplineResult(spline, inputVars);
129              break;
130            }
131          case "Polynomial (Math.NET)": {
132              var spline = MathNet.Numerics.Interpolate.Polynomial(x, y);
133              AddMathNetSplineResult(spline, inputVars);
134              break;
135            }
136          case "Rational (Math.NET)": {
137              var spline = MathNet.Numerics.Interpolate.RationalWithoutPoles(x, y);
138              AddMathNetSplineResult(spline, inputVars);
139              break;
140            }
[15364]141          case "Smoothing Spline (Mine)": {
142              CalculateSmoothingSpline(x, y, inputVars);
143              break;
144            }
145          case "Smoothing Spline (Reinsch)": {
146              CalculateSmoothingSplineReinsch(x, y, inputVars);
147              break;
148            }
[15442]149          case "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)": {
150              double optTol, looRMSE;
151              var model = CalculateSmoothingSplineReinsch(x, y, inputVars, Problem.ProblemData.TargetVariable, out optTol, out looRMSE);
152              Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone())));
153              Results.Add(new Result("Optimal tolerance", new DoubleValue(optTol)));
154              Results.Add(new Result("RMSE (LOO-CV)", new DoubleValue(looRMSE)));
[15365]155              break;
156            }
[15449]157          case "CUBGCV": {
158              CubicSplineGCV.CubGcvReport report;
[15450]159              var model =
160                CubicSplineGCV.CalculateCubicSpline(x, y,
[15449]161                Problem.ProblemData.TargetVariable, inputVars, out report);
162              var targetVar = Problem.ProblemData.TargetVariable;
163              var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
164              Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
165              Results.Add(new Result("GCV", new DoubleValue(report.generalizedCrossValidation)));
166              Results.Add(new Result("Estimated error variance", new DoubleValue(report.estimatedErrorVariance)));
167              Results.Add(new Result("Estimated RSS degrees of freedom", new DoubleValue(report.estimatedRSSDegreesOfFreedom)));
[15450]168              Results.Add(new Result("Estimated true mean squared error at data points", new DoubleValue(report.estimatedTrueMeanSquaredErrorAtDataPoints)));
[15449]169              Results.Add(new Result("Mean square of DF", new DoubleValue(report.meanSquareOfDf)));
170              Results.Add(new Result("Mean square residual", new DoubleValue(report.meanSquareResiudal)));
171              Results.Add(new Result("Smoothing parameter (optimized for GCV)", new DoubleValue(report.smoothingParameter)));
172              break;
173            }
[15365]174          case "B-Spline Smoothing": {
175              BSplineSmoothing(x, y, inputVars);
176              break;
177            }
[15442]178          case "Penalized Regression Spline (alglib)": {
[15369]179              var lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value; // controls extent of smoothing
180              var model = CalculatePenalizedRegressionSpline(x, y, lambda, Problem.ProblemData.TargetVariable, inputVars);
181              var targetVar = Problem.ProblemData.TargetVariable;
182              var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
183              Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
184
185              break;
[15442]186            }
[15352]187          default: throw new NotSupportedException();
188        }
189
190      }
191    }
[15362]192
[15369]193
[15442]194    public static IRegressionModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars) {
[15369]195      int info;
196      alglib.spline1dinterpolant interpolant;
197      alglib.spline1dfitreport rep;
198      int n = x.Length;
199      n = (int)Math.Max(50, 3 * Math.Sqrt(n));
200
201      alglib.spline1dfitpenalized(x, y, n, lambda, out info, out interpolant, out rep);
202      return new Spline1dModel(interpolant, targetVar, inputVars);
203    }
204
[15442]205    public static IRegressionEnsembleModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars,
206      out double avgTrainRMSE,
[15450]207      out double cvRMSE) {
[15442]208      int info;
209      alglib.spline1dinterpolant interpolant;
210      alglib.spline1dfitreport rep;
211      int n = x.Length;
212      n = (int)Math.Max(50, 3 * Math.Sqrt(n));
[15364]213
[15450]214      int folds = 10;
215      int foldSize = x.Length <= 30 ? 1 : x.Length / folds;
216      if (foldSize == 1) folds = x.Length;
[15365]217
[15450]218      double[] x_loo = new double[(folds - 1) * foldSize];
219      double[] y_loo = new double[(folds - 1) * foldSize];
220
[15442]221      var models = new List<IRegressionModel>();
[15450]222      var sumTrainSE = 0.0;
223      int nTrain = 0;
224      var sumTestSE = 0.0;
225      int nTest = 0;
[15365]226
[15450]227
228      for (int i = 0; i < folds; i++) {
229        if (i < folds - 1) {
230          //Console.WriteLine("Copy from {0} to {1} n = {2}", (i + 1) * foldSize, i * foldSize, (folds - i - 1) * foldSize);
231          Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, (folds - i - 1) * foldSize);
232          Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, (folds - i - 1) * foldSize);
233        }
[15442]234        alglib.spline1dfitpenalized(x_loo, y_loo, n, lambda, out info, out interpolant, out rep);
[15450]235        Debug.Assert(y_loo.All(yi => yi != 0.0));
236        Debug.Assert(x_loo.All(xi => xi != 0.0));
[15365]237
[15450]238        // training error
239        for (int j = 0; j < x_loo.Length; j++) {
240          var y_pred = alglib.spline1dcalc(interpolant, x[i]);
241          double res = y[j] - y_pred;
242          sumTrainSE += res * res;
243          nTrain++;
244        }
245        // test
246        for (int j = i * foldSize; j < (i + 1) * foldSize; j++) {
247          var y_pred = alglib.spline1dcalc(interpolant, x[i]);
248          double res = y[j] - y_pred;
249          sumTestSE += res * res;
250          nTest++;
251        }
252
253        models.Add(new Spline1dModel(interpolant, targetVar, inputVars));
254
255        if (i < folds - 1) {
256          //Console.WriteLine("Copy from {0} to {1} n = {2}", i * foldSize, i * foldSize, foldSize);
257          // add current fold for next iteration
258          Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize);
259          Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize);
260        }
[15442]261      }
262
[15450]263      cvRMSE = Math.Sqrt(sumTestSE / nTest);
264      avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain);
[15442]265
266      return new RegressionEnsembleModel(models);
[15365]267    }
268
[15442]269    public void BSplineSmoothing(double[] x, double[] y, string[] inputVars) {
270      Array.Sort(x, y);
271      CubicSpline2(x, y, inputVars);
272    }
273
[15365]274    private struct SplineData {
275      public double a, b, c, d, x, y;
276    };
277
278
279    /*
280     * The procedure Quincunx, takes as arguments
281       the vectors u, v and w which are respectively the diagonal, the first supradiagonal
282       and the second supradiagonal of the banded matrix. The vector on
283       the LHS of the equation (80) is placed in q which contains the solution on the
284       completion of the procedure.
285    */
286    private void Quincunx(int n, MyArray<double> u, MyArray<double> v, MyArray<double> w, MyArray<double> q) {
287      // { factorisation}
288      u[-1] = 0;
289      u[0] = 0;
290      for (int j = 1; j <= n - 1; j++) {
291        u[j] = u[j] - u[j - 2] * Math.Pow(w[j - 2], 2) - u[j - 1] * Math.Pow(v[j - 1], 2);
292        v[j] = (v[j] - u[j - 1] * v[j - 1] * w[j - 1]) / u[j];
293        w[j] = w[j] / u[j];
[15364]294      }
[15365]295      // { forward substitution}
296      for (int j = 1; j <= n - 1; j++) {
297        q[j] = q[j] - v[j - 1] * q[j - 1] - w[j - 2] * q[j - 2];
298      }
299      for (int j = 1; j < n - 1; j++) {
300        q[j] = q[j] / u[j];
301      }
302      // { back substitution}
303      q[n + 1] = 0;
304      q[n] = 0;
305      for (int j = n - 1; j >= 1; j--) {
306        q[j] = q[j] - v[j] * q[j + 1] - w[j] * q[j + 2];
307      }
308    }
[15364]309
[15365]310    public void CubicSpline2(double[] x, double[] y, string[] inputVars) {
311      // see Pollock: Smoothing Splines
312      int n = x.Length;
313      double lambda = 1.0;
314      double[] sigma = Enumerable.Repeat(1.0, n).ToArray();
315      SplineData[] S = new SplineData[x.Length + 1];
316      for (int i = 0; i < x.Length; i++) {
317        S[i].x = x[i];
318        S[i].y = y[i];
[15364]319      }
[15365]320      S[x.Length].x = S[x.Length - 1].x;
321      S[x.Length].y = S[x.Length - 1].y;
322
323      // var
324      double[] h = new double[n],
325        r = new double[n],
326        f = new double[n],
327        p = new double[n];
328      var q = new MyArray<double>(-1, n + 3);
329      var u = new MyArray<double>(-1, n + 3);
330      var v = new MyArray<double>(-1, n + 3);
331      var w = new MyArray<double>(-1, n + 3);
332      double mu;
333
334      mu = 2 * (1 - lambda) / (3 * lambda);
335      h[0] = S[1].x - S[0].x;
336      r[0] = 3 / h[0];
337      for (int i = 1; i < n - 1; i++) {
338        h[i] = S[i + 1].x - S[i].x;
339        r[i] = 3 / h[i];
340        f[i] = -(r[i - 1] + r[i]);
341        p[i] = 2 * (S[i + 1].x - S[i - 1].x);
342        q[i] = 3 * (S[i + 1].y - S[i].y) / h[i] - 3 * (S[i].y - S[i - 1].y) / h[i - 1];
[15364]343      }
[15365]344      for (int i = 1; i < n; i++) {
345        u[i] = Math.Pow(r[i - 1], 2) * sigma[i - 1] + Math.Pow(f[i], 2) * sigma[i] + Math.Pow(r[i], 2) * sigma[i + 1];    // TODO Sigma 1..n
346        u[i] = mu * u[i] + p[i];
347        v[i] = f[i] * r[i] * sigma[i] + r[i] * f[i + 1] * sigma[i + 1];
348        v[i] = mu * v[i] + h[i];
349        w[i] = mu * r[i] * r[i + 1] * sigma[i + 1];
350      }
351      Quincunx(n, u, v, w, q);
352      // { Spline P arameters}
353      S[0].d = S[0].y - mu * r[0] * q[1] * sigma[0];
354      S[1].d = S[1].y - mu * (f[1] * q[1] + r[1] * q[2]) * sigma[0];
355      S[0].a = q[1] / (3 * h[0]);
356      S[0].b = 0;
357      S[0].c = (S[1].d - S[0].d) / h[0] - q[1] * h[0] / 3;
358      r[0] = 0;
359      for (int j = 1; j < n; j++) {
360        S[j].a = (q[j + 1] - q[j]) / (3 * h[j]);
361        S[j].b = q[j];
362        S[j].c = (q[j] + q[j - 1]) * h[j - 1] + S[j - 1].c;
363        S[j].d = r[j - 1] * q[j - 1] + f[j] * q[j] + r[j] * q[j + 1];
364        S[j].d = y[j] - mu * S[j].d * sigma[j];
365      }
[15364]366
[15365]367      //   { SmoothingSpline}
[15364]368    }
369
[15442]370    public void CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars) {
371      double s = x.Length + 1;
[15365]372      double stdTol = ((IValueParameter<DoubleValue>)Parameters["StdDev (noise)"]).Value.Value; // controls extent of smoothing
[15364]373
[15442]374      var model = CalculateSmoothingSplineReinsch(x, y, inputVars, stdTol, s, Problem.ProblemData.TargetVariable);
[15364]375
[15442]376      Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone())));
377    }
[15364]378
379
[15442]380    public static IRegressionEnsembleModel CalculateSmoothingSplineReinsch(double[] x, double[] y, double tol, string targetVar, string[] inputVars,
381      out double avgTrainRMSE,
[15450]382      out double cvRMSE) {
[15442]383      int info;
[15364]384
[15450]385      int folds = 10;
386      int foldSize = x.Length <= 30 ? 1 : x.Length / folds;
387      if (foldSize == 1) folds = x.Length;
[15365]388
[15450]389      double[] x_loo = new double[(folds - 1) * foldSize];
390      double[] y_loo = new double[(folds - 1) * foldSize];
391
[15442]392      var models = new List<IRegressionModel>();
[15450]393      var sumTrainSE = 0.0;
394      int nTrain = 0;
395      var sumTestSE = 0.0;
396      int nTest = 0;
[15365]397
398
[15450]399      for (int i = 0; i < folds; i++) {
400        if (i < folds - 1) {
401          Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, x.Length - ((i + 1) * foldSize) - 1);
402          Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, y.Length - ((i + 1) * foldSize) - 1);
403        }
404        var model = CalculateSmoothingSplineReinsch(x_loo, y_loo, inputVars, tol, ((folds - 1) * foldSize) - 1, targetVar);
[15365]405
[15450]406        // training error
407        for(int j = 0;j<x_loo.Length;j++) {
408          var y_pred = model.GetEstimatedValue(x[j]);
409          double res = y[j] - y_pred;
410          sumTrainSE += res * res;
411          nTrain++;
412        }
413        // test
414        for (int j = i * foldSize; j < (i + 1) * foldSize; j++) {
415          var y_pred = model.GetEstimatedValue(x[j]);
416          double res = y[j] - y_pred;
417          sumTestSE += res * res;
418          nTest++;
419        }
[15365]420
[15442]421        models.Add(model);
[15365]422
[15450]423        if (i < folds - 1) {
424          // add current fold for next iteration
425          Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize);
426          Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize);
[15442]427        }
428      }
429
[15450]430      cvRMSE = Math.Sqrt(sumTestSE / nTest);
431      avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain);
[15442]432
433      return new RegressionEnsembleModel(models);
[15365]434    }
435
436
[15450]437    // automatically determines tolerance using loo cv, SLOW!
[15442]438    public static IRegressionModel CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars, string targetVar,
[15450]439      out double optTolerance, out double cvRMSE) {
[15442]440      var maxTolerance = y.StandardDeviation();
441      var minTolerance = maxTolerance * 1E-6;
442      optTolerance = maxTolerance;
443      var tol = maxTolerance;
[15450]444      cvRMSE = double.PositiveInfinity;
[15442]445      IRegressionModel bestModel = new ConstantModel(y.Average(), targetVar);
446      while (tol > minTolerance) {
[15450]447        double curCVRMSE;
[15442]448        double avgTrainRmse;
449        var model = Splines.CalculateSmoothingSplineReinsch(
[15450]450          x, y, tol, targetVar, inputVars, out avgTrainRmse, out curCVRMSE);
[15442]451
[15450]452        if (curCVRMSE < cvRMSE) {
453          cvRMSE = curCVRMSE;
[15442]454          optTolerance = tol;
455          bestModel = model;
456        }
457        tol *= 0.85;
458      }
459      return bestModel;
460    }
461
462    public static ReinschSmoothingSplineModel CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars, double stdTol, double s, string targetVar) {
[15364]463      var minX = xOrig.Min();
464      var maxX = xOrig.Max();
465      var range = maxX - minX;
466
[15365]467      double[] w = Enumerable.Repeat(stdTol, xOrig.Length).ToArray();
468      SortAndBin((double[])xOrig.Clone(), (double[])yOrig.Clone(), w, out xOrig, out yOrig, out w, scaling: false);
[15364]469
470      // See Smoothing by Spline Functions, Reinsch, 1967
471      // move x and y into an array indexed with 1..n to match indexing in Reinsch paper
472      int n = xOrig.Length;
473      var x = new MyArray<double>(1, xOrig);
474      var y = new MyArray<double>(1, yOrig);
[15365]475      var inv_dy = new MyArray<double>(1, w);
[15364]476
477      int n1 = 1;
[15365]478      int n2 = n;
[15364]479
480      // results
481      var a = new MyArray<double>(n1, n);
482      var b = new MyArray<double>(n1, n);
483      var c = new MyArray<double>(n1, n);
484      var d = new MyArray<double>(n1, n);
485
486      // smooth
487      {
488        int i, m1, m2; double e, f, f2, g = 0.0, h, p;
489        MyArray<double>
490          r = new MyArray<double>(n1 - 1, n + 2),
491          r1 = new MyArray<double>(n1 - 1, n + 2),
492          r2 = new MyArray<double>(n1 - 1, n + 2),
493          t = new MyArray<double>(n1 - 1, n + 2),
494          t1 = new MyArray<double>(n1 - 1, n + 2),
495          u = new MyArray<double>(n1 - 1, n + 2),
496          v = new MyArray<double>(n1 - 1, n + 2);
497        m1 = n1 - 1;
498        m2 = n2 + 1;
499        r[m1] = r[n1] = r1[n2] = r2[n2] = r2[m2] =
500          u[m1] = u[n1] = u[n2] = u[m2] = p = 0.0;
501        m1 = n1 + 1;
502        m2 = n2 - 1;
503        h = x[m1] - x[n1]; f = (y[m1] - y[n1]) / h;
504        for (i = m1; i <= m2; i++) {
505          g = h; h = x[i + 1] - x[i];
506          e = f; f = (y[i + 1] - y[i]) / h;
507          a[i] = f - e; t[i] = 2 * (g + h) / 3; t1[i] = h / 3;
508          r2[i] = inv_dy[i - 1] / g; r[i] = inv_dy[i + 1] / h;
509          r1[i] = -inv_dy[i] / g - inv_dy[i] / h;
510        }
511        for (i = m1; i <= m2; i++) {
512          b[i] = r[i] * r[i] + r1[i] * r1[i] + r2[i] * r2[i];
513          c[i] = r[i] * r1[i + 1] + r1[i] * r2[i + 1];
514          d[i] = r[i] * r2[i + 2];
515        }
516        f2 = -s;
517        next:
518        for (i = m1; i <= m2; i++) {
519          r1[i - 1] = f * r[i - 1]; r2[i - 2] = g * r[i - 2];
520          r[i] = 1 / (p * b[i] + t[i] - f * r1[i - 1] - g * r2[i - 2]);
521          u[i] = a[i] - r1[i - 1] * u[i - 1] - r2[i - 2] * u[i - 2];
522          f = p * c[i] + t1[i] - h * r1[i - 1]; g = h; h = d[i] * p;
523        }
524        for (i = m2; i >= m1; i--) {
525          u[i] = r[i] * u[i] - r1[i] * u[i + 1] - r2[i] * u[i + 2];
526        }
527        e = h = 0;
528        for (i = n1; i <= m2; i++) {
529          g = h; h = (u[i + 1] - u[i]) / (x[i + 1] - x[i]);
530          v[i] = (h - g) * inv_dy[i] * inv_dy[i]; e = e + v[i] * (h - g);
531        }
532        g = v[n2] = -h * inv_dy[n2] * inv_dy[n2]; e = e - g * h;
533        g = f2; f2 = e * p * p;
534        if (f2 >= s || f2 <= g) goto fin;
535        f = 0; h = (v[m1] - v[n1]) / (x[m1] - x[n1]);
536        for (i = m1; i <= m2; i++) {
537          g = h; h = (v[i + 1] - v[i]) / (x[i + 1] - x[i]);
538          g = h - g - r1[i - 1] * r[i - 1] - r2[i - 2] * r[i - 2];
539          f = f + g * r[i] * g; r[i] = g;
540        }
541        h = e - p * f; if (h <= 0) goto fin;
542        p = p + (s - f2) / ((Math.Sqrt(s / e) + p) * h); goto next;
543
544        fin:
545        for (i = n1; i <= n2; i++) {
546          a[i] = y[i] - p * v[i];
547          c[i] = u[i];
548        }
549        for (i = n1; i <= m2; i++) {
550          h = x[i + 1] - x[i];
551          d[i] = (c[i + 1] - c[i]) / (3 * h);
552          b[i] = (a[i + 1] - a[i]) / h - (h * d[i] + c[i]) * h;
553        }
554      }
555
[15449]556      // extrapolate for xx > x[n2]
557      b[b.Length] = b[b.Length - 1];
558      d[1] = 0;
559
[15365]560      return new ReinschSmoothingSplineModel(a, b, c, d, x, targetVar, inputVars);
[15364]561    }
562
563    private void CalculateSmoothingSpline(double[] x, double[] y, string[] inputVars) {
564      // see Smoothing and Non-Parametric Regression, Germán Rodríguez, 2001 2.3.1
[15365]565      double[] w = Enumerable.Repeat(1.0, x.Length).ToArray();  // weights necessary for sortAndBin but are ignored below (TODO)
566      SortAndBin(x, y, w, out x, out y, out w, scaling: false);
[15364]567      int n = x.Length;
568
569      SparseMatrix delta = new SparseMatrix(n - 2, n);
570      // double[,] delta = new double[n - 2, n];
571      //double[,] W = new double[n - 2, n - 2];
572      SparseMatrix W = new SparseMatrix(n - 2, n - 2);
573      Matrix WInvD = new DenseMatrix(n - 2, n);
574
575      // double[,] W_inv_D = new double[n - 2, n];
576      // double[,] K = new double[n, n];
577
578      // go over successive knots to determine distances and fill Delta and W
579      for (int i = 0; i < n - 2; i++) {
580        double h = x[i + 1] - x[i];
581        double h_next = x[i + 2] - x[i + 1];
582        delta[i, i] = 1.0 / h;
583        delta[i, i + 1] = -1.0 / h - 1.0 / h_next;
584        delta[i, i + 2] = 1.0 / h_next;
585        W[i, i] = (h + h_next) / 3;
586        if (i > 0) {
587          W[i - 1, i] =
588           W[i, i - 1] = h / 6.0;
589        }
590      }
591
[15365]592      // WInvD = W.Cholesky().Solve(delta);
[15364]593      var solvResult = W.TrySolveIterative(delta, WInvD, new MlkBiCgStab());
594
595      // alglib.ablas.rmatrixgemm(n - 2, n, n - 2, 1.0, W, 0, 0, 0, delta, 0, 0, 0, 1.0, W_inv_D, 0, 0); // W^-1(M = n-2, K = n-2) D(K = n-2, N=n)   
596      // alglib.ablas.rmatrixgemm(n, n, n - 2, 1.0, delta, 0, 0, 1, W_inv_D, 0, 0, 0, 1.0, K, 0, 0); // D(M=n-2, K=n)^T * W^-1D (K=n, N=n-2)
597
598      var K = delta.TransposeThisAndMultiply(WInvD);
599
600      double lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value;
601
602      for (int i = 0; i < n; i++) {
603        for (int j = 0; j < n; j++) {
604          K[i, j] *= lambda;
605          if (i == j) K[i, j] += 1;
606        }
607      }
608
609      // solve for y
610      // double[] s;
611      // int solverInfo;
612      // alglib.densesolverreport solverRep;
613      // alglib.rmatrixsolve(K, n, y, out solverInfo, out solverRep, out s);
614
615      var s = K.Solve(new DenseVector(y)).ToArray();
616
617      Results.Add(new Result("Solution", new RegressionSolution(new SmoothingSplineModel(s, x, Problem.ProblemData.TargetVariable, inputVars),
[15365]618  (IRegressionProblemData)Problem.ProblemData.Clone())));
[15364]619    }
620
[15449]621    public static void SortAndBin(double[] x, double[] y, double[] w, out double[] x2, out double[] y2, out double[] w2, bool scaling = false) {
[15365]622      var sortedIdx = Enumerable.Range(0, x.Length).ToArray();
623      // sort by x
624      Array.Sort(x, sortedIdx);
[15364]625
626      var xl = new List<double>();
627      var yl = new List<double>();
[15365]628      var wl = new List<double>();
[15364]629
630      int n = x.Length;
631      var range = x[n - 1] - x[0];
632      var binSize = range / n / 20;
633      {
634        // binning
635        int i = 0;
636        while (i < n) {
637          int k = 0;
638          int j = i;
639          double sumX = 0.0;
640          double sumY = 0.0;
[15365]641          double sumW = 0.0;
[15364]642          while (j < n && x[j] - x[i] <= binSize) {
643            k++;
644            sumX += x[j];
[15365]645            sumY += y[sortedIdx[j]];
646            sumW += w[sortedIdx[j]];
[15364]647            j++;
648          }
649          var avgX = sumX / k;
650          if (scaling) avgX = (avgX - x[0]) / range;
[15365]651          xl.Add(avgX);
[15364]652          yl.Add(sumY / k);
[15365]653          wl.Add(sumW);
[15364]654          i += k;
655        }
656      }
657
658      x2 = xl.ToArray();
659      y2 = yl.ToArray();
[15365]660      w2 = wl.ToArray();
[15364]661    }
662
[15362]663    private void AddAlglibSplineResult(alglib.spline1dinterpolant c, string[] inputVars) {
664      Results.Add(new Result("Solution", new RegressionSolution(new Spline1dModel(c, Problem.ProblemData.TargetVariable, inputVars),
665        (IRegressionProblemData)Problem.ProblemData.Clone())));
666
667    }
668    private void AddMathNetSplineResult(IInterpolation c, string[] inputVars) {
669      Results.Add(new Result("Solution", new RegressionSolution(new MathNetSplineModel(c, Problem.ProblemData.TargetVariable, inputVars),
670        (IRegressionProblemData)Problem.ProblemData.Clone())));
[15364]671    }
672  }
[15362]673
[15364]674
[15365]675  // array with non-zero lower index
676  internal class MyArray<T> {
677    private T[] arr;
678    private int lowerBound;
[15364]679
[15365]680    public int Length { get { return arr.Length; } }
681
682    public T this[int key] {
683      get {
684        return arr[key - lowerBound];
685      }
686      set {
687        arr[key - lowerBound] = value;
688      }
689    }
690
691    public MyArray(int lowerBound, int numElements) {
692      this.lowerBound = lowerBound;
693      arr = new T[numElements];
694    }
695    public MyArray(int lowerBound, T[] source) : this(lowerBound, source.Length) {
696      Array.Copy(source, arr, source.Length);
697    }
698
699    public T[] ToArray() {
700      var res = new T[arr.Length];
701      Array.Copy(arr, res, res.Length);
702      return res;
703    }
704  }
705
706
[15364]707  // UNFINISHED
[15442]708  public class ReinschSmoothingSplineModel : NamedItem, IRegressionModel {
[15365]709    private MyArray<double> a;
710    private MyArray<double> b;
711    private MyArray<double> c;
712    private MyArray<double> d;
713    private MyArray<double> x;
[15364]714    private double offset;
715    private double scale;
716
717    public string TargetVariable { get; set; }
718
719    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
720
721    public event EventHandler TargetVariableChanged;
722
723    public ReinschSmoothingSplineModel(ReinschSmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
724      this.TargetVariable = orig.TargetVariable;
725      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
726      this.a = orig.a;
727      this.b = orig.b;
728      this.c = orig.c;
729      this.d = orig.d;
730      this.x = orig.x;
731      this.scale = orig.scale;
732      this.offset = orig.offset;
[15362]733    }
[15442]734    internal ReinschSmoothingSplineModel(
[15365]735      MyArray<double> a,
736      MyArray<double> b,
737      MyArray<double> c,
738      MyArray<double> d,
739      MyArray<double> x,
740      string targetVar, string[] inputs, double offset = 0, double scale = 1) : base("SplineModel", "SplineModel") {
[15364]741      this.a = a;
742      this.b = b;
743      this.c = c;
744      this.d = d;
745      this.x = x;
746      this.TargetVariable = targetVar;
747      this.VariablesUsedForPrediction = inputs;
748      this.scale = scale;
749      this.offset = offset;
750    }
751
752    public override IDeepCloneable Clone(Cloner cloner) {
753      return new ReinschSmoothingSplineModel(this, cloner);
754    }
755
756    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
757      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
758    }
759
[15442]760    public double GetEstimatedValue(double xx) {
[15449]761      int n = a.Length;
[15442]762      if (xx <= x[1]) {
763        double h = xx - x[1];
764        return a[1] + h * (b[1] + h * (c[1] + h * d[1]));
765      } else if (xx >= x[n]) {
766        double h = xx - x[n];
767        return a[n] + h * (b[n] + h * (c[n] + h * d[n]));
768      } else {
769        // binary search
770        int lower = 1;
771        int upper = n;
772        while (true) {
773          if (upper < lower) throw new InvalidProgramException();
774          int i = lower + (upper - lower) / 2;
775          if (x[i] <= xx && xx < x[i + 1]) {
776            double h = xx - x[i];
777            return a[i] + h * (b[i] + h * (c[i] + h * d[i]));
778          } else if (xx < x[i]) {
779            upper = i - 1;
780          } else {
781            lower = i + 1;
[15364]782          }
783        }
784      }
785    }
[15442]786
787    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
788      int n = x.Length;
789      var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray();
790      foreach (var factor in VariablesUsedForPrediction.Skip(1)) {
791        product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray();
792      }
793      foreach (var xx in product.Select(xi => (xi - offset) * scale)) {
794        yield return GetEstimatedValue(xx);
795      }
796    }
[15352]797  }
798
[15364]799  // UNFINISHED
800  public class SmoothingSplineModel : NamedItem, IRegressionModel {
801    private double[] s;
802    private IInterpolation interpolation;
[15352]803
[15364]804    public string TargetVariable { get; set; }
805
806    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
807
808    public event EventHandler TargetVariableChanged;
809
810    public SmoothingSplineModel(SmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
811      this.TargetVariable = orig.TargetVariable;
812      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
813      this.s = orig.s; // TODO
814      this.interpolation = orig.interpolation;
815    }
816    public SmoothingSplineModel(double[] s, double[] x, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
817      this.s = s;
818      this.TargetVariable = targetVar;
819      this.VariablesUsedForPrediction = inputs;
820      this.interpolation = MathNet.Numerics.Interpolate.CubicSpline(x, s);
821    }
822
823    public override IDeepCloneable Clone(Cloner cloner) {
824      return new SmoothingSplineModel(this, cloner);
825    }
826
827    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
828      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
829    }
830
831    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
832      foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
833
834        yield return interpolation.Interpolate(x);
835
836      }
837    }
838  }
839
[15352]840  // UNFINISHED
841  public class Spline1dModel : NamedItem, IRegressionModel {
842    private alglib.spline1dinterpolant interpolant;
843
844    public string TargetVariable { get; set; }
845
846    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
847
848    public event EventHandler TargetVariableChanged;
849
850    public Spline1dModel(Spline1dModel orig, Cloner cloner) : base(orig, cloner) {
851      this.TargetVariable = orig.TargetVariable;
852      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
853      this.interpolant = (alglib.spline1dinterpolant)orig.interpolant.make_copy();
854    }
855    public Spline1dModel(alglib.spline1dinterpolant interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
856      this.interpolant = interpolant;
857      this.TargetVariable = targetVar;
858      this.VariablesUsedForPrediction = inputs;
859    }
860
861    public override IDeepCloneable Clone(Cloner cloner) {
862      return new Spline1dModel(this, cloner);
863    }
864
865    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
866      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
867    }
868
869    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
[15433]870      var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray();
[15442]871      foreach (var factor in VariablesUsedForPrediction.Skip(1)) {
[15433]872        product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray();
873      }
874
875      foreach (var x in product) {
[15352]876        yield return alglib.spline1dcalc(interpolant, x);
877      }
878    }
879  }
[15362]880
881
882  // UNFINISHED
883  public class MathNetSplineModel : NamedItem, IRegressionModel {
884    private IInterpolation interpolant;
885
886    public string TargetVariable { get; set; }
887
888    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
889
890    public event EventHandler TargetVariableChanged;
891
892    public MathNetSplineModel(MathNetSplineModel orig, Cloner cloner) : base(orig, cloner) {
893      this.TargetVariable = orig.TargetVariable;
894      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
895      this.interpolant = orig.interpolant; // TODO COPY!
896    }
897    public MathNetSplineModel(IInterpolation interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
898      this.interpolant = interpolant;
899      this.TargetVariable = targetVar;
900      this.VariablesUsedForPrediction = inputs;
901    }
902
903    public override IDeepCloneable Clone(Cloner cloner) {
904      return new MathNetSplineModel(this, cloner);
905    }
906
907    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
908      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
909    }
910
911    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
912      foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
913        yield return interpolant.Interpolate(x);
914      }
915    }
916  }
[15352]917}
Note: See TracBrowser for help on using the repository browser.