source: branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/Splines.cs @ 15450

Last change on this file since 15450 was 15450, checked in by gkronber, 4 years ago

#2789 more tests with CV and automatic determination of smoothing parameter

File size: 35.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Linq;
26using System.Threading;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Data;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis;
34using HeuristicLab.Random;
35using MathNet.Numerics.Interpolation;
36using MathNet.Numerics.LinearAlgebra.Double;
37using MathNet.Numerics.LinearAlgebra.Double.Solvers;
38
39namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
40  [Item("Splines", "")]
41  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 102)]
42  [StorableClass]
43  public sealed class Splines : FixedDataAnalysisAlgorithm<IRegressionProblem> {
44    [StorableConstructor]
45    private Splines(bool deserializing) : base(deserializing) { }
46    [StorableHook(HookType.AfterDeserialization)]
47    private void AfterDeserialization() {
48    }
49
50    private Splines(Splines original, Cloner cloner)
51      : base(original, cloner) {
52    }
53    public override IDeepCloneable Clone(Cloner cloner) {
54      return new Splines(this, cloner);
55    }
56
57    public Splines()
58      : base() {
59      var validTypes = new ItemSet<StringValue>(
60        new[] {
61        "Monotone", "Akima", "Catmull-Rom", "Cubic", "Linear",
62          "Cubic - Natural (Math.NET)",
63          "Polynomial (Math.NET)",
64          "Rational (Math.NET)",
65          "LogLinear (Math.NET)",
66          "Common (Math.NET)",
67          "Smoothing Spline (Mine)",
68          "Smoothing Spline (Reinsch)",
69          "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)",
70          "B-Spline Smoothing",
71          "Penalized Regression Spline (alglib)",
72          "CUBGCV"
73      }.Select(s => new StringValue(s)));
74
75      Parameters.Add(new ConstrainedValueParameter<StringValue>("Type", "The type of spline (as supported by alglib)", validTypes, validTypes.Last()));
76      Parameters.Add(new ValueParameter<DoubleValue>("Lambda", "Regularization parameter for smoothing splines (0..+inf)", new DoubleValue(100)));
77      Parameters.Add(new ValueParameter<DoubleValue>("StdDev (noise)", "Known error in y values. Used only be Reinsch Smoothing Splines", new DoubleValue(0.1)));
78      Problem = new RegressionProblem();
79    }
80
81
82    protected override void Run(CancellationToken cancellationToken) {
83      double[,] inputMatrix = Problem.ProblemData.Dataset.ToArray(Problem.ProblemData.AllowedInputVariables.Concat(new string[] { Problem.ProblemData.TargetVariable }), Problem.ProblemData.TrainingIndices);
84      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
85        throw new NotSupportedException("Splines does not support NaN or infinity values in the input dataset.");
86
87      var inputVars = Problem.ProblemData.AllowedInputVariables.ToArray();
88      if (inputVars.Length > 3) throw new ArgumentException();
89
90      var y = Problem.ProblemData.TargetVariableTrainingValues.ToArray();
91      if (inputVars.Length == 1) {
92
93        var x = Problem.ProblemData.Dataset.GetDoubleValues(inputVars.First(), Problem.ProblemData.TrainingIndices).ToArray();
94        alglib.spline1dinterpolant c;
95        var type = ((StringValue)Parameters["Type"].ActualValue).Value;
96        switch (type) {
97          case "Monotone":
98            alglib.spline1dbuildmonotone(x, y, out c);
99            AddAlglibSplineResult(c, inputVars);
100            break;
101          case "Akima":
102            alglib.spline1dbuildakima(x, y, out c); AddAlglibSplineResult(c, inputVars);
103            break;
104            ;
105          case "Catmull-Rom":
106            alglib.spline1dbuildcatmullrom(x, y, out c); AddAlglibSplineResult(c, inputVars);
107            break;
108
109          case "Cubic":
110            alglib.spline1dbuildcubic(x, y, out c); AddAlglibSplineResult(c, inputVars);
111            break;
112
113          case "Linear":
114            alglib.spline1dbuildlinear(x, y, out c); AddAlglibSplineResult(c, inputVars);
115            break;
116          case "Cubic - Natural (Math.NET)": {
117              var spline = MathNet.Numerics.Interpolation.CubicSpline.InterpolateNatural(x, y);
118              AddMathNetSplineResult(spline, inputVars);
119              break;
120            }
121          case "Common (Math.NET)": {
122              var spline = MathNet.Numerics.Interpolate.Common(x, y);
123              AddMathNetSplineResult(spline, inputVars);
124              break;
125            }
126          case "LogLinear (Math.NET)": {
127              var spline = MathNet.Numerics.Interpolate.LogLinear(x, y);
128              AddMathNetSplineResult(spline, inputVars);
129              break;
130            }
131          case "Polynomial (Math.NET)": {
132              var spline = MathNet.Numerics.Interpolate.Polynomial(x, y);
133              AddMathNetSplineResult(spline, inputVars);
134              break;
135            }
136          case "Rational (Math.NET)": {
137              var spline = MathNet.Numerics.Interpolate.RationalWithoutPoles(x, y);
138              AddMathNetSplineResult(spline, inputVars);
139              break;
140            }
141          case "Smoothing Spline (Mine)": {
142              CalculateSmoothingSpline(x, y, inputVars);
143              break;
144            }
145          case "Smoothing Spline (Reinsch)": {
146              CalculateSmoothingSplineReinsch(x, y, inputVars);
147              break;
148            }
149          case "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)": {
150              double optTol, looRMSE;
151              var model = CalculateSmoothingSplineReinsch(x, y, inputVars, Problem.ProblemData.TargetVariable, out optTol, out looRMSE);
152              Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone())));
153              Results.Add(new Result("Optimal tolerance", new DoubleValue(optTol)));
154              Results.Add(new Result("RMSE (LOO-CV)", new DoubleValue(looRMSE)));
155              break;
156            }
157          case "CUBGCV": {
158              CubicSplineGCV.CubGcvReport report;
159              var model =
160                CubicSplineGCV.CalculateCubicSpline(x, y,
161                Problem.ProblemData.TargetVariable, inputVars, out report);
162              var targetVar = Problem.ProblemData.TargetVariable;
163              var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
164              Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
165              Results.Add(new Result("GCV", new DoubleValue(report.generalizedCrossValidation)));
166              Results.Add(new Result("Estimated error variance", new DoubleValue(report.estimatedErrorVariance)));
167              Results.Add(new Result("Estimated RSS degrees of freedom", new DoubleValue(report.estimatedRSSDegreesOfFreedom)));
168              Results.Add(new Result("Estimated true mean squared error at data points", new DoubleValue(report.estimatedTrueMeanSquaredErrorAtDataPoints)));
169              Results.Add(new Result("Mean square of DF", new DoubleValue(report.meanSquareOfDf)));
170              Results.Add(new Result("Mean square residual", new DoubleValue(report.meanSquareResiudal)));
171              Results.Add(new Result("Smoothing parameter (optimized for GCV)", new DoubleValue(report.smoothingParameter)));
172              break;
173            }
174          case "B-Spline Smoothing": {
175              BSplineSmoothing(x, y, inputVars);
176              break;
177            }
178          case "Penalized Regression Spline (alglib)": {
179              var lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value; // controls extent of smoothing
180              var model = CalculatePenalizedRegressionSpline(x, y, lambda, Problem.ProblemData.TargetVariable, inputVars);
181              var targetVar = Problem.ProblemData.TargetVariable;
182              var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
183              Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
184
185              break;
186            }
187          default: throw new NotSupportedException();
188        }
189
190      }
191    }
192
193
194    public static IRegressionModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars) {
195      int info;
196      alglib.spline1dinterpolant interpolant;
197      alglib.spline1dfitreport rep;
198      int n = x.Length;
199      n = (int)Math.Max(50, 3 * Math.Sqrt(n));
200
201      alglib.spline1dfitpenalized(x, y, n, lambda, out info, out interpolant, out rep);
202      return new Spline1dModel(interpolant, targetVar, inputVars);
203    }
204
205    public static IRegressionEnsembleModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars,
206      out double avgTrainRMSE,
207      out double cvRMSE) {
208      int info;
209      alglib.spline1dinterpolant interpolant;
210      alglib.spline1dfitreport rep;
211      int n = x.Length;
212      n = (int)Math.Max(50, 3 * Math.Sqrt(n));
213
214      int folds = 10;
215      int foldSize = x.Length <= 30 ? 1 : x.Length / folds;
216      if (foldSize == 1) folds = x.Length;
217
218      double[] x_loo = new double[(folds - 1) * foldSize];
219      double[] y_loo = new double[(folds - 1) * foldSize];
220
221      var models = new List<IRegressionModel>();
222      var sumTrainSE = 0.0;
223      int nTrain = 0;
224      var sumTestSE = 0.0;
225      int nTest = 0;
226
227
228      for (int i = 0; i < folds; i++) {
229        if (i < folds - 1) {
230          //Console.WriteLine("Copy from {0} to {1} n = {2}", (i + 1) * foldSize, i * foldSize, (folds - i - 1) * foldSize);
231          Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, (folds - i - 1) * foldSize);
232          Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, (folds - i - 1) * foldSize);
233        }
234        alglib.spline1dfitpenalized(x_loo, y_loo, n, lambda, out info, out interpolant, out rep);
235        Debug.Assert(y_loo.All(yi => yi != 0.0));
236        Debug.Assert(x_loo.All(xi => xi != 0.0));
237
238        // training error
239        for (int j = 0; j < x_loo.Length; j++) {
240          var y_pred = alglib.spline1dcalc(interpolant, x[i]);
241          double res = y[j] - y_pred;
242          sumTrainSE += res * res;
243          nTrain++;
244        }
245        // test
246        for (int j = i * foldSize; j < (i + 1) * foldSize; j++) {
247          var y_pred = alglib.spline1dcalc(interpolant, x[i]);
248          double res = y[j] - y_pred;
249          sumTestSE += res * res;
250          nTest++;
251        }
252
253        models.Add(new Spline1dModel(interpolant, targetVar, inputVars));
254
255        if (i < folds - 1) {
256          //Console.WriteLine("Copy from {0} to {1} n = {2}", i * foldSize, i * foldSize, foldSize);
257          // add current fold for next iteration
258          Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize);
259          Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize);
260        }
261      }
262
263      cvRMSE = Math.Sqrt(sumTestSE / nTest);
264      avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain);
265
266      return new RegressionEnsembleModel(models);
267    }
268
269    public void BSplineSmoothing(double[] x, double[] y, string[] inputVars) {
270      Array.Sort(x, y);
271      CubicSpline2(x, y, inputVars);
272    }
273
274    private struct SplineData {
275      public double a, b, c, d, x, y;
276    };
277
278
279    /*
280     * The procedure Quincunx, takes as arguments
281       the vectors u, v and w which are respectively the diagonal, the first supradiagonal
282       and the second supradiagonal of the banded matrix. The vector on
283       the LHS of the equation (80) is placed in q which contains the solution on the
284       completion of the procedure.
285    */
286    private void Quincunx(int n, MyArray<double> u, MyArray<double> v, MyArray<double> w, MyArray<double> q) {
287      // { factorisation}
288      u[-1] = 0;
289      u[0] = 0;
290      for (int j = 1; j <= n - 1; j++) {
291        u[j] = u[j] - u[j - 2] * Math.Pow(w[j - 2], 2) - u[j - 1] * Math.Pow(v[j - 1], 2);
292        v[j] = (v[j] - u[j - 1] * v[j - 1] * w[j - 1]) / u[j];
293        w[j] = w[j] / u[j];
294      }
295      // { forward substitution}
296      for (int j = 1; j <= n - 1; j++) {
297        q[j] = q[j] - v[j - 1] * q[j - 1] - w[j - 2] * q[j - 2];
298      }
299      for (int j = 1; j < n - 1; j++) {
300        q[j] = q[j] / u[j];
301      }
302      // { back substitution}
303      q[n + 1] = 0;
304      q[n] = 0;
305      for (int j = n - 1; j >= 1; j--) {
306        q[j] = q[j] - v[j] * q[j + 1] - w[j] * q[j + 2];
307      }
308    }
309
310    public void CubicSpline2(double[] x, double[] y, string[] inputVars) {
311      // see Pollock: Smoothing Splines
312      int n = x.Length;
313      double lambda = 1.0;
314      double[] sigma = Enumerable.Repeat(1.0, n).ToArray();
315      SplineData[] S = new SplineData[x.Length + 1];
316      for (int i = 0; i < x.Length; i++) {
317        S[i].x = x[i];
318        S[i].y = y[i];
319      }
320      S[x.Length].x = S[x.Length - 1].x;
321      S[x.Length].y = S[x.Length - 1].y;
322
323      // var
324      double[] h = new double[n],
325        r = new double[n],
326        f = new double[n],
327        p = new double[n];
328      var q = new MyArray<double>(-1, n + 3);
329      var u = new MyArray<double>(-1, n + 3);
330      var v = new MyArray<double>(-1, n + 3);
331      var w = new MyArray<double>(-1, n + 3);
332      double mu;
333
334      mu = 2 * (1 - lambda) / (3 * lambda);
335      h[0] = S[1].x - S[0].x;
336      r[0] = 3 / h[0];
337      for (int i = 1; i < n - 1; i++) {
338        h[i] = S[i + 1].x - S[i].x;
339        r[i] = 3 / h[i];
340        f[i] = -(r[i - 1] + r[i]);
341        p[i] = 2 * (S[i + 1].x - S[i - 1].x);
342        q[i] = 3 * (S[i + 1].y - S[i].y) / h[i] - 3 * (S[i].y - S[i - 1].y) / h[i - 1];
343      }
344      for (int i = 1; i < n; i++) {
345        u[i] = Math.Pow(r[i - 1], 2) * sigma[i - 1] + Math.Pow(f[i], 2) * sigma[i] + Math.Pow(r[i], 2) * sigma[i + 1];    // TODO Sigma 1..n
346        u[i] = mu * u[i] + p[i];
347        v[i] = f[i] * r[i] * sigma[i] + r[i] * f[i + 1] * sigma[i + 1];
348        v[i] = mu * v[i] + h[i];
349        w[i] = mu * r[i] * r[i + 1] * sigma[i + 1];
350      }
351      Quincunx(n, u, v, w, q);
352      // { Spline P arameters}
353      S[0].d = S[0].y - mu * r[0] * q[1] * sigma[0];
354      S[1].d = S[1].y - mu * (f[1] * q[1] + r[1] * q[2]) * sigma[0];
355      S[0].a = q[1] / (3 * h[0]);
356      S[0].b = 0;
357      S[0].c = (S[1].d - S[0].d) / h[0] - q[1] * h[0] / 3;
358      r[0] = 0;
359      for (int j = 1; j < n; j++) {
360        S[j].a = (q[j + 1] - q[j]) / (3 * h[j]);
361        S[j].b = q[j];
362        S[j].c = (q[j] + q[j - 1]) * h[j - 1] + S[j - 1].c;
363        S[j].d = r[j - 1] * q[j - 1] + f[j] * q[j] + r[j] * q[j + 1];
364        S[j].d = y[j] - mu * S[j].d * sigma[j];
365      }
366
367      //   { SmoothingSpline}
368    }
369
370    public void CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars) {
371      double s = x.Length + 1;
372      double stdTol = ((IValueParameter<DoubleValue>)Parameters["StdDev (noise)"]).Value.Value; // controls extent of smoothing
373
374      var model = CalculateSmoothingSplineReinsch(x, y, inputVars, stdTol, s, Problem.ProblemData.TargetVariable);
375
376      Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone())));
377    }
378
379
380    public static IRegressionEnsembleModel CalculateSmoothingSplineReinsch(double[] x, double[] y, double tol, string targetVar, string[] inputVars,
381      out double avgTrainRMSE,
382      out double cvRMSE) {
383      int info;
384
385      int folds = 10;
386      int foldSize = x.Length <= 30 ? 1 : x.Length / folds;
387      if (foldSize == 1) folds = x.Length;
388
389      double[] x_loo = new double[(folds - 1) * foldSize];
390      double[] y_loo = new double[(folds - 1) * foldSize];
391
392      var models = new List<IRegressionModel>();
393      var sumTrainSE = 0.0;
394      int nTrain = 0;
395      var sumTestSE = 0.0;
396      int nTest = 0;
397
398
399      for (int i = 0; i < folds; i++) {
400        if (i < folds - 1) {
401          Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, x.Length - ((i + 1) * foldSize) - 1);
402          Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, y.Length - ((i + 1) * foldSize) - 1);
403        }
404        var model = CalculateSmoothingSplineReinsch(x_loo, y_loo, inputVars, tol, ((folds - 1) * foldSize) - 1, targetVar);
405
406        // training error
407        for(int j = 0;j<x_loo.Length;j++) {
408          var y_pred = model.GetEstimatedValue(x[j]);
409          double res = y[j] - y_pred;
410          sumTrainSE += res * res;
411          nTrain++;
412        }
413        // test
414        for (int j = i * foldSize; j < (i + 1) * foldSize; j++) {
415          var y_pred = model.GetEstimatedValue(x[j]);
416          double res = y[j] - y_pred;
417          sumTestSE += res * res;
418          nTest++;
419        }
420
421        models.Add(model);
422
423        if (i < folds - 1) {
424          // add current fold for next iteration
425          Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize);
426          Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize);
427        }
428      }
429
430      cvRMSE = Math.Sqrt(sumTestSE / nTest);
431      avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain);
432
433      return new RegressionEnsembleModel(models);
434    }
435
436
437    // automatically determines tolerance using loo cv, SLOW!
438    public static IRegressionModel CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars, string targetVar,
439      out double optTolerance, out double cvRMSE) {
440      var maxTolerance = y.StandardDeviation();
441      var minTolerance = maxTolerance * 1E-6;
442      optTolerance = maxTolerance;
443      var tol = maxTolerance;
444      cvRMSE = double.PositiveInfinity;
445      IRegressionModel bestModel = new ConstantModel(y.Average(), targetVar);
446      while (tol > minTolerance) {
447        double curCVRMSE;
448        double avgTrainRmse;
449        var model = Splines.CalculateSmoothingSplineReinsch(
450          x, y, tol, targetVar, inputVars, out avgTrainRmse, out curCVRMSE);
451
452        if (curCVRMSE < cvRMSE) {
453          cvRMSE = curCVRMSE;
454          optTolerance = tol;
455          bestModel = model;
456        }
457        tol *= 0.85;
458      }
459      return bestModel;
460    }
461
462    public static ReinschSmoothingSplineModel CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars, double stdTol, double s, string targetVar) {
463      var minX = xOrig.Min();
464      var maxX = xOrig.Max();
465      var range = maxX - minX;
466
467      double[] w = Enumerable.Repeat(stdTol, xOrig.Length).ToArray();
468      SortAndBin((double[])xOrig.Clone(), (double[])yOrig.Clone(), w, out xOrig, out yOrig, out w, scaling: false);
469
470      // See Smoothing by Spline Functions, Reinsch, 1967
471      // move x and y into an array indexed with 1..n to match indexing in Reinsch paper
472      int n = xOrig.Length;
473      var x = new MyArray<double>(1, xOrig);
474      var y = new MyArray<double>(1, yOrig);
475      var inv_dy = new MyArray<double>(1, w);
476
477      int n1 = 1;
478      int n2 = n;
479
480      // results
481      var a = new MyArray<double>(n1, n);
482      var b = new MyArray<double>(n1, n);
483      var c = new MyArray<double>(n1, n);
484      var d = new MyArray<double>(n1, n);
485
486      // smooth
487      {
488        int i, m1, m2; double e, f, f2, g = 0.0, h, p;
489        MyArray<double>
490          r = new MyArray<double>(n1 - 1, n + 2),
491          r1 = new MyArray<double>(n1 - 1, n + 2),
492          r2 = new MyArray<double>(n1 - 1, n + 2),
493          t = new MyArray<double>(n1 - 1, n + 2),
494          t1 = new MyArray<double>(n1 - 1, n + 2),
495          u = new MyArray<double>(n1 - 1, n + 2),
496          v = new MyArray<double>(n1 - 1, n + 2);
497        m1 = n1 - 1;
498        m2 = n2 + 1;
499        r[m1] = r[n1] = r1[n2] = r2[n2] = r2[m2] =
500          u[m1] = u[n1] = u[n2] = u[m2] = p = 0.0;
501        m1 = n1 + 1;
502        m2 = n2 - 1;
503        h = x[m1] - x[n1]; f = (y[m1] - y[n1]) / h;
504        for (i = m1; i <= m2; i++) {
505          g = h; h = x[i + 1] - x[i];
506          e = f; f = (y[i + 1] - y[i]) / h;
507          a[i] = f - e; t[i] = 2 * (g + h) / 3; t1[i] = h / 3;
508          r2[i] = inv_dy[i - 1] / g; r[i] = inv_dy[i + 1] / h;
509          r1[i] = -inv_dy[i] / g - inv_dy[i] / h;
510        }
511        for (i = m1; i <= m2; i++) {
512          b[i] = r[i] * r[i] + r1[i] * r1[i] + r2[i] * r2[i];
513          c[i] = r[i] * r1[i + 1] + r1[i] * r2[i + 1];
514          d[i] = r[i] * r2[i + 2];
515        }
516        f2 = -s;
517        next:
518        for (i = m1; i <= m2; i++) {
519          r1[i - 1] = f * r[i - 1]; r2[i - 2] = g * r[i - 2];
520          r[i] = 1 / (p * b[i] + t[i] - f * r1[i - 1] - g * r2[i - 2]);
521          u[i] = a[i] - r1[i - 1] * u[i - 1] - r2[i - 2] * u[i - 2];
522          f = p * c[i] + t1[i] - h * r1[i - 1]; g = h; h = d[i] * p;
523        }
524        for (i = m2; i >= m1; i--) {
525          u[i] = r[i] * u[i] - r1[i] * u[i + 1] - r2[i] * u[i + 2];
526        }
527        e = h = 0;
528        for (i = n1; i <= m2; i++) {
529          g = h; h = (u[i + 1] - u[i]) / (x[i + 1] - x[i]);
530          v[i] = (h - g) * inv_dy[i] * inv_dy[i]; e = e + v[i] * (h - g);
531        }
532        g = v[n2] = -h * inv_dy[n2] * inv_dy[n2]; e = e - g * h;
533        g = f2; f2 = e * p * p;
534        if (f2 >= s || f2 <= g) goto fin;
535        f = 0; h = (v[m1] - v[n1]) / (x[m1] - x[n1]);
536        for (i = m1; i <= m2; i++) {
537          g = h; h = (v[i + 1] - v[i]) / (x[i + 1] - x[i]);
538          g = h - g - r1[i - 1] * r[i - 1] - r2[i - 2] * r[i - 2];
539          f = f + g * r[i] * g; r[i] = g;
540        }
541        h = e - p * f; if (h <= 0) goto fin;
542        p = p + (s - f2) / ((Math.Sqrt(s / e) + p) * h); goto next;
543
544        fin:
545        for (i = n1; i <= n2; i++) {
546          a[i] = y[i] - p * v[i];
547          c[i] = u[i];
548        }
549        for (i = n1; i <= m2; i++) {
550          h = x[i + 1] - x[i];
551          d[i] = (c[i + 1] - c[i]) / (3 * h);
552          b[i] = (a[i + 1] - a[i]) / h - (h * d[i] + c[i]) * h;
553        }
554      }
555
556      // extrapolate for xx > x[n2]
557      b[b.Length] = b[b.Length - 1];
558      d[1] = 0;
559
560      return new ReinschSmoothingSplineModel(a, b, c, d, x, targetVar, inputVars);
561    }
562
563    private void CalculateSmoothingSpline(double[] x, double[] y, string[] inputVars) {
564      // see Smoothing and Non-Parametric Regression, Germán Rodríguez, 2001 2.3.1
565      double[] w = Enumerable.Repeat(1.0, x.Length).ToArray();  // weights necessary for sortAndBin but are ignored below (TODO)
566      SortAndBin(x, y, w, out x, out y, out w, scaling: false);
567      int n = x.Length;
568
569      SparseMatrix delta = new SparseMatrix(n - 2, n);
570      // double[,] delta = new double[n - 2, n];
571      //double[,] W = new double[n - 2, n - 2];
572      SparseMatrix W = new SparseMatrix(n - 2, n - 2);
573      Matrix WInvD = new DenseMatrix(n - 2, n);
574
575      // double[,] W_inv_D = new double[n - 2, n];
576      // double[,] K = new double[n, n];
577
578      // go over successive knots to determine distances and fill Delta and W
579      for (int i = 0; i < n - 2; i++) {
580        double h = x[i + 1] - x[i];
581        double h_next = x[i + 2] - x[i + 1];
582        delta[i, i] = 1.0 / h;
583        delta[i, i + 1] = -1.0 / h - 1.0 / h_next;
584        delta[i, i + 2] = 1.0 / h_next;
585        W[i, i] = (h + h_next) / 3;
586        if (i > 0) {
587          W[i - 1, i] =
588           W[i, i - 1] = h / 6.0;
589        }
590      }
591
592      // WInvD = W.Cholesky().Solve(delta);
593      var solvResult = W.TrySolveIterative(delta, WInvD, new MlkBiCgStab());
594
595      // alglib.ablas.rmatrixgemm(n - 2, n, n - 2, 1.0, W, 0, 0, 0, delta, 0, 0, 0, 1.0, W_inv_D, 0, 0); // W^-1(M = n-2, K = n-2) D(K = n-2, N=n)   
596      // alglib.ablas.rmatrixgemm(n, n, n - 2, 1.0, delta, 0, 0, 1, W_inv_D, 0, 0, 0, 1.0, K, 0, 0); // D(M=n-2, K=n)^T * W^-1D (K=n, N=n-2)
597
598      var K = delta.TransposeThisAndMultiply(WInvD);
599
600      double lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value;
601
602      for (int i = 0; i < n; i++) {
603        for (int j = 0; j < n; j++) {
604          K[i, j] *= lambda;
605          if (i == j) K[i, j] += 1;
606        }
607      }
608
609      // solve for y
610      // double[] s;
611      // int solverInfo;
612      // alglib.densesolverreport solverRep;
613      // alglib.rmatrixsolve(K, n, y, out solverInfo, out solverRep, out s);
614
615      var s = K.Solve(new DenseVector(y)).ToArray();
616
617      Results.Add(new Result("Solution", new RegressionSolution(new SmoothingSplineModel(s, x, Problem.ProblemData.TargetVariable, inputVars),
618  (IRegressionProblemData)Problem.ProblemData.Clone())));
619    }
620
621    public static void SortAndBin(double[] x, double[] y, double[] w, out double[] x2, out double[] y2, out double[] w2, bool scaling = false) {
622      var sortedIdx = Enumerable.Range(0, x.Length).ToArray();
623      // sort by x
624      Array.Sort(x, sortedIdx);
625
626      var xl = new List<double>();
627      var yl = new List<double>();
628      var wl = new List<double>();
629
630      int n = x.Length;
631      var range = x[n - 1] - x[0];
632      var binSize = range / n / 20;
633      {
634        // binning
635        int i = 0;
636        while (i < n) {
637          int k = 0;
638          int j = i;
639          double sumX = 0.0;
640          double sumY = 0.0;
641          double sumW = 0.0;
642          while (j < n && x[j] - x[i] <= binSize) {
643            k++;
644            sumX += x[j];
645            sumY += y[sortedIdx[j]];
646            sumW += w[sortedIdx[j]];
647            j++;
648          }
649          var avgX = sumX / k;
650          if (scaling) avgX = (avgX - x[0]) / range;
651          xl.Add(avgX);
652          yl.Add(sumY / k);
653          wl.Add(sumW);
654          i += k;
655        }
656      }
657
658      x2 = xl.ToArray();
659      y2 = yl.ToArray();
660      w2 = wl.ToArray();
661    }
662
663    private void AddAlglibSplineResult(alglib.spline1dinterpolant c, string[] inputVars) {
664      Results.Add(new Result("Solution", new RegressionSolution(new Spline1dModel(c, Problem.ProblemData.TargetVariable, inputVars),
665        (IRegressionProblemData)Problem.ProblemData.Clone())));
666
667    }
668    private void AddMathNetSplineResult(IInterpolation c, string[] inputVars) {
669      Results.Add(new Result("Solution", new RegressionSolution(new MathNetSplineModel(c, Problem.ProblemData.TargetVariable, inputVars),
670        (IRegressionProblemData)Problem.ProblemData.Clone())));
671    }
672  }
673
674
675  // array with non-zero lower index
676  internal class MyArray<T> {
677    private T[] arr;
678    private int lowerBound;
679
680    public int Length { get { return arr.Length; } }
681
682    public T this[int key] {
683      get {
684        return arr[key - lowerBound];
685      }
686      set {
687        arr[key - lowerBound] = value;
688      }
689    }
690
691    public MyArray(int lowerBound, int numElements) {
692      this.lowerBound = lowerBound;
693      arr = new T[numElements];
694    }
695    public MyArray(int lowerBound, T[] source) : this(lowerBound, source.Length) {
696      Array.Copy(source, arr, source.Length);
697    }
698
699    public T[] ToArray() {
700      var res = new T[arr.Length];
701      Array.Copy(arr, res, res.Length);
702      return res;
703    }
704  }
705
706
707  // UNFINISHED
708  public class ReinschSmoothingSplineModel : NamedItem, IRegressionModel {
709    private MyArray<double> a;
710    private MyArray<double> b;
711    private MyArray<double> c;
712    private MyArray<double> d;
713    private MyArray<double> x;
714    private double offset;
715    private double scale;
716
717    public string TargetVariable { get; set; }
718
719    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
720
721    public event EventHandler TargetVariableChanged;
722
723    public ReinschSmoothingSplineModel(ReinschSmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
724      this.TargetVariable = orig.TargetVariable;
725      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
726      this.a = orig.a;
727      this.b = orig.b;
728      this.c = orig.c;
729      this.d = orig.d;
730      this.x = orig.x;
731      this.scale = orig.scale;
732      this.offset = orig.offset;
733    }
734    internal ReinschSmoothingSplineModel(
735      MyArray<double> a,
736      MyArray<double> b,
737      MyArray<double> c,
738      MyArray<double> d,
739      MyArray<double> x,
740      string targetVar, string[] inputs, double offset = 0, double scale = 1) : base("SplineModel", "SplineModel") {
741      this.a = a;
742      this.b = b;
743      this.c = c;
744      this.d = d;
745      this.x = x;
746      this.TargetVariable = targetVar;
747      this.VariablesUsedForPrediction = inputs;
748      this.scale = scale;
749      this.offset = offset;
750    }
751
752    public override IDeepCloneable Clone(Cloner cloner) {
753      return new ReinschSmoothingSplineModel(this, cloner);
754    }
755
756    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
757      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
758    }
759
760    public double GetEstimatedValue(double xx) {
761      int n = a.Length;
762      if (xx <= x[1]) {
763        double h = xx - x[1];
764        return a[1] + h * (b[1] + h * (c[1] + h * d[1]));
765      } else if (xx >= x[n]) {
766        double h = xx - x[n];
767        return a[n] + h * (b[n] + h * (c[n] + h * d[n]));
768      } else {
769        // binary search
770        int lower = 1;
771        int upper = n;
772        while (true) {
773          if (upper < lower) throw new InvalidProgramException();
774          int i = lower + (upper - lower) / 2;
775          if (x[i] <= xx && xx < x[i + 1]) {
776            double h = xx - x[i];
777            return a[i] + h * (b[i] + h * (c[i] + h * d[i]));
778          } else if (xx < x[i]) {
779            upper = i - 1;
780          } else {
781            lower = i + 1;
782          }
783        }
784      }
785    }
786
787    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
788      int n = x.Length;
789      var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray();
790      foreach (var factor in VariablesUsedForPrediction.Skip(1)) {
791        product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray();
792      }
793      foreach (var xx in product.Select(xi => (xi - offset) * scale)) {
794        yield return GetEstimatedValue(xx);
795      }
796    }
797  }
798
799  // UNFINISHED
800  public class SmoothingSplineModel : NamedItem, IRegressionModel {
801    private double[] s;
802    private IInterpolation interpolation;
803
804    public string TargetVariable { get; set; }
805
806    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
807
808    public event EventHandler TargetVariableChanged;
809
810    public SmoothingSplineModel(SmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
811      this.TargetVariable = orig.TargetVariable;
812      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
813      this.s = orig.s; // TODO
814      this.interpolation = orig.interpolation;
815    }
816    public SmoothingSplineModel(double[] s, double[] x, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
817      this.s = s;
818      this.TargetVariable = targetVar;
819      this.VariablesUsedForPrediction = inputs;
820      this.interpolation = MathNet.Numerics.Interpolate.CubicSpline(x, s);
821    }
822
823    public override IDeepCloneable Clone(Cloner cloner) {
824      return new SmoothingSplineModel(this, cloner);
825    }
826
827    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
828      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
829    }
830
831    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
832      foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
833
834        yield return interpolation.Interpolate(x);
835
836      }
837    }
838  }
839
840  // UNFINISHED
841  public class Spline1dModel : NamedItem, IRegressionModel {
842    private alglib.spline1dinterpolant interpolant;
843
844    public string TargetVariable { get; set; }
845
846    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
847
848    public event EventHandler TargetVariableChanged;
849
850    public Spline1dModel(Spline1dModel orig, Cloner cloner) : base(orig, cloner) {
851      this.TargetVariable = orig.TargetVariable;
852      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
853      this.interpolant = (alglib.spline1dinterpolant)orig.interpolant.make_copy();
854    }
855    public Spline1dModel(alglib.spline1dinterpolant interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
856      this.interpolant = interpolant;
857      this.TargetVariable = targetVar;
858      this.VariablesUsedForPrediction = inputs;
859    }
860
861    public override IDeepCloneable Clone(Cloner cloner) {
862      return new Spline1dModel(this, cloner);
863    }
864
865    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
866      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
867    }
868
869    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
870      var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray();
871      foreach (var factor in VariablesUsedForPrediction.Skip(1)) {
872        product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray();
873      }
874
875      foreach (var x in product) {
876        yield return alglib.spline1dcalc(interpolant, x);
877      }
878    }
879  }
880
881
882  // UNFINISHED
883  public class MathNetSplineModel : NamedItem, IRegressionModel {
884    private IInterpolation interpolant;
885
886    public string TargetVariable { get; set; }
887
888    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
889
890    public event EventHandler TargetVariableChanged;
891
892    public MathNetSplineModel(MathNetSplineModel orig, Cloner cloner) : base(orig, cloner) {
893      this.TargetVariable = orig.TargetVariable;
894      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
895      this.interpolant = orig.interpolant; // TODO COPY!
896    }
897    public MathNetSplineModel(IInterpolation interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
898      this.interpolant = interpolant;
899      this.TargetVariable = targetVar;
900      this.VariablesUsedForPrediction = inputs;
901    }
902
903    public override IDeepCloneable Clone(Cloner cloner) {
904      return new MathNetSplineModel(this, cloner);
905    }
906
907    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
908      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
909    }
910
911    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
912      foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
913        yield return interpolant.Interpolate(x);
914      }
915    }
916  }
917}
Note: See TracBrowser for help on using the repository browser.