source: branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/Splines.cs @ 15457

Last change on this file since 15457 was 15457, checked in by gkronber, 4 years ago

#2789 added Finbarr O'Sullivan smoothing spline code

File size: 36.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Linq;
26using System.Threading;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Data;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis;
34using HeuristicLab.Random;
35using MathNet.Numerics.Interpolation;
36using MathNet.Numerics.LinearAlgebra.Double;
37using MathNet.Numerics.LinearAlgebra.Double.Solvers;
38
39namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
40  [Item("Splines", "")]
41  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 102)]
42  [StorableClass]
43  public sealed class Splines : FixedDataAnalysisAlgorithm<IRegressionProblem> {
44    [StorableConstructor]
45    private Splines(bool deserializing) : base(deserializing) { }
46    [StorableHook(HookType.AfterDeserialization)]
47    private void AfterDeserialization() {
48    }
49
50    private Splines(Splines original, Cloner cloner)
51      : base(original, cloner) {
52    }
53    public override IDeepCloneable Clone(Cloner cloner) {
54      return new Splines(this, cloner);
55    }
56
57    public Splines()
58      : base() {
59      var validTypes = new ItemSet<StringValue>(
60        new[] {
61        "Monotone", "Akima", "Catmull-Rom", "Cubic", "Linear",
62          "Cubic - Natural (Math.NET)",
63          "Polynomial (Math.NET)",
64          "Rational (Math.NET)",
65          "LogLinear (Math.NET)",
66          "Common (Math.NET)",
67          "Smoothing Spline (Mine)",
68          "Smoothing Spline (Reinsch)",
69          "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)",
70          "B-Spline Smoothing",
71          "Penalized Regression Spline (alglib)",
72          "CUBGCV",
73          "Finnbar O'Sullivan (sbart)"
74      }.Select(s => new StringValue(s)));
75
76      Parameters.Add(new ConstrainedValueParameter<StringValue>("Type", "The type of spline (as supported by alglib)", validTypes, validTypes.Last()));
77      Parameters.Add(new ValueParameter<DoubleValue>("Lambda", "Regularization parameter for smoothing splines (0..+inf)", new DoubleValue(100)));
78      Parameters.Add(new ValueParameter<DoubleValue>("StdDev (noise)", "Known error in y values. Used only be Reinsch Smoothing Splines", new DoubleValue(0.1)));
79      Problem = new RegressionProblem();
80    }
81
82
83    protected override void Run(CancellationToken cancellationToken) {
84      double[,] inputMatrix = Problem.ProblemData.Dataset.ToArray(Problem.ProblemData.AllowedInputVariables.Concat(new string[] { Problem.ProblemData.TargetVariable }), Problem.ProblemData.TrainingIndices);
85      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
86        throw new NotSupportedException("Splines does not support NaN or infinity values in the input dataset.");
87
88      var inputVars = Problem.ProblemData.AllowedInputVariables.ToArray();
89      if (inputVars.Length > 3) throw new ArgumentException();
90
91      var y = Problem.ProblemData.TargetVariableTrainingValues.ToArray();
92      if (inputVars.Length == 1) {
93
94        var x = Problem.ProblemData.Dataset.GetDoubleValues(inputVars.First(), Problem.ProblemData.TrainingIndices).ToArray();
95        alglib.spline1dinterpolant c;
96        var type = ((StringValue)Parameters["Type"].ActualValue).Value;
97        switch (type) {
98          case "Monotone":
99            alglib.spline1dbuildmonotone(x, y, out c);
100            AddAlglibSplineResult(c, inputVars);
101            break;
102          case "Akima":
103            alglib.spline1dbuildakima(x, y, out c); AddAlglibSplineResult(c, inputVars);
104            break;
105            ;
106          case "Catmull-Rom":
107            alglib.spline1dbuildcatmullrom(x, y, out c); AddAlglibSplineResult(c, inputVars);
108            break;
109
110          case "Cubic":
111            alglib.spline1dbuildcubic(x, y, out c); AddAlglibSplineResult(c, inputVars);
112            break;
113
114          case "Linear":
115            alglib.spline1dbuildlinear(x, y, out c); AddAlglibSplineResult(c, inputVars);
116            break;
117          case "Cubic - Natural (Math.NET)": {
118              var spline = MathNet.Numerics.Interpolation.CubicSpline.InterpolateNatural(x, y);
119              AddMathNetSplineResult(spline, inputVars);
120              break;
121            }
122          case "Common (Math.NET)": {
123              var spline = MathNet.Numerics.Interpolate.Common(x, y);
124              AddMathNetSplineResult(spline, inputVars);
125              break;
126            }
127          case "LogLinear (Math.NET)": {
128              var spline = MathNet.Numerics.Interpolate.LogLinear(x, y);
129              AddMathNetSplineResult(spline, inputVars);
130              break;
131            }
132          case "Polynomial (Math.NET)": {
133              var spline = MathNet.Numerics.Interpolate.Polynomial(x, y);
134              AddMathNetSplineResult(spline, inputVars);
135              break;
136            }
137          case "Rational (Math.NET)": {
138              var spline = MathNet.Numerics.Interpolate.RationalWithoutPoles(x, y);
139              AddMathNetSplineResult(spline, inputVars);
140              break;
141            }
142          case "Smoothing Spline (Mine)": {
143              CalculateSmoothingSpline(x, y, inputVars);
144              break;
145            }
146          case "Smoothing Spline (Reinsch)": {
147              CalculateSmoothingSplineReinsch(x, y, inputVars);
148              break;
149            }
150          case "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)": {
151              double optTol, looRMSE;
152              var model = CalculateSmoothingSplineReinsch(x, y, inputVars, Problem.ProblemData.TargetVariable, out optTol, out looRMSE);
153              Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone())));
154              Results.Add(new Result("Optimal tolerance", new DoubleValue(optTol)));
155              Results.Add(new Result("RMSE (LOO-CV)", new DoubleValue(looRMSE)));
156              break;
157            }
158          case "CUBGCV": {
159              CubicSplineGCV.CubGcvReport report;
160              var model =
161                CubicSplineGCV.CalculateCubicSpline(x, y,
162                Problem.ProblemData.TargetVariable, inputVars, out report);
163              var targetVar = Problem.ProblemData.TargetVariable;
164              var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
165              Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
166              Results.Add(new Result("GCV", new DoubleValue(report.generalizedCrossValidation)));
167              Results.Add(new Result("Estimated error variance", new DoubleValue(report.estimatedErrorVariance)));
168              Results.Add(new Result("Estimated RSS degrees of freedom", new DoubleValue(report.estimatedRSSDegreesOfFreedom)));
169              Results.Add(new Result("Estimated true mean squared error at data points", new DoubleValue(report.estimatedTrueMeanSquaredErrorAtDataPoints)));
170              Results.Add(new Result("Mean square of DF", new DoubleValue(report.meanSquareOfDf)));
171              Results.Add(new Result("Mean square residual", new DoubleValue(report.meanSquareResiudal)));
172              Results.Add(new Result("Smoothing parameter (optimized for GCV)", new DoubleValue(report.smoothingParameter)));
173              break;
174            }
175          case "Finnbar O'Sullivan (sbart)": {
176              SBART.SBART_Report report;
177              var lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value; // controls extent of smoothing
178              var model =
179                SBART.CalculateSBART(x, y,
180                Problem.ProblemData.TargetVariable, inputVars, (float)lambda, out report);
181              var targetVar = Problem.ProblemData.TargetVariable;
182              var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
183              Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
184              Results.Add(new Result("GCV", new DoubleValue(report.gcv)));
185              Results.Add(new Result("Smoothing parameter (optimized for GCV)", new DoubleValue(report.smoothingParameter)));
186              break;
187
188            }
189          case "B-Spline Smoothing": {
190              BSplineSmoothing(x, y, inputVars);
191              break;
192            }
193          case "Penalized Regression Spline (alglib)": {
194              var lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value; // controls extent of smoothing
195              var model = CalculatePenalizedRegressionSpline(x, y, lambda, Problem.ProblemData.TargetVariable, inputVars);
196              var targetVar = Problem.ProblemData.TargetVariable;
197              var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
198              Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
199
200              break;
201            }
202          default: throw new NotSupportedException();
203        }
204
205      }
206    }
207
208
209    public static IRegressionModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars) {
210      int info;
211      alglib.spline1dinterpolant interpolant;
212      alglib.spline1dfitreport rep;
213      int n = x.Length;
214      n = (int)Math.Max(50, 3 * Math.Sqrt(n));
215
216      alglib.spline1dfitpenalized(x, y, n, lambda, out info, out interpolant, out rep);
217      return new Spline1dModel(interpolant, targetVar, inputVars);
218    }
219
220    public static IRegressionEnsembleModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars,
221      out double avgTrainRMSE,
222      out double cvRMSE, out double[] predictions) {
223      int info;
224      alglib.spline1dinterpolant interpolant;
225      alglib.spline1dfitreport rep;
226      int n = x.Length;
227      n = (int)Math.Max(50, 3 * Math.Sqrt(n));
228
229      int folds = 10;
230      int foldSize = x.Length <= 30 ? 1 : x.Length / folds;
231      if (foldSize == 1) folds = x.Length;
232
233      double[] x_loo = new double[(folds - 1) * foldSize];
234      double[] y_loo = new double[(folds - 1) * foldSize];
235      double[] w_loo = Enumerable.Repeat(1.0, x_loo.Length).ToArray();
236
237      var models = new List<IRegressionModel>();
238      var sumTrainSE = 0.0;
239      int nTrain = 0;
240      var sumTestSE = 0.0;
241      int nTest = 0;
242
243      predictions = new double[y.Length];
244
245      for (int i = 0; i < folds; i++) {
246        if (i < folds - 1) {
247          //Console.WriteLine("Copy from {0} to {1} n = {2}", (i + 1) * foldSize, i * foldSize, (folds - i - 1) * foldSize);
248          Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, (folds - i - 1) * foldSize);
249          Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, (folds - i - 1) * foldSize);
250        }
251        alglib.spline1dfitpenalizedw(x_loo, y_loo, w_loo, n, lambda, out info, out interpolant, out rep);
252        //Debug.Assert(y_loo.All(yi => yi != 0.0));
253        //Debug.Assert(x_loo.All(xi => xi != 0.0));
254
255        // training error
256        for (int j = 0; j < x_loo.Length; j++) {
257          var y_pred = alglib.spline1dcalc(interpolant, x[j]);
258          double res = y[j] - y_pred;
259          sumTrainSE += res * res;
260          nTrain++;
261        }
262        // test
263        for (int j = i * foldSize; j < (i + 1) * foldSize; j++) {
264          predictions[j] = alglib.spline1dcalc(interpolant, x[j]);
265          double res = y[j] - predictions[j];
266          sumTestSE += res * res;
267          nTest++;
268        }
269
270        models.Add(new Spline1dModel(interpolant, targetVar, inputVars));
271
272        if (i < folds - 1) {
273          //Console.WriteLine("Copy from {0} to {1} n = {2}", i * foldSize, i * foldSize, foldSize);
274          // add current fold for next iteration
275          Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize);
276          Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize);
277        }
278      }
279
280      cvRMSE = Math.Sqrt(sumTestSE / nTest);
281      avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain);
282
283      return new RegressionEnsembleModel(models);
284    }
285
286    public void BSplineSmoothing(double[] x, double[] y, string[] inputVars) {
287      Array.Sort(x, y);
288      CubicSpline2(x, y, inputVars);
289    }
290
291    private struct SplineData {
292      public double a, b, c, d, x, y;
293    };
294
295
296    /*
297     * The procedure Quincunx, takes as arguments
298       the vectors u, v and w which are respectively the diagonal, the first supradiagonal
299       and the second supradiagonal of the banded matrix. The vector on
300       the LHS of the equation (80) is placed in q which contains the solution on the
301       completion of the procedure.
302    */
303    private void Quincunx(int n, MyArray<double> u, MyArray<double> v, MyArray<double> w, MyArray<double> q) {
304      // { factorisation}
305      u[-1] = 0;
306      u[0] = 0;
307      for (int j = 1; j <= n - 1; j++) {
308        u[j] = u[j] - u[j - 2] * Math.Pow(w[j - 2], 2) - u[j - 1] * Math.Pow(v[j - 1], 2);
309        v[j] = (v[j] - u[j - 1] * v[j - 1] * w[j - 1]) / u[j];
310        w[j] = w[j] / u[j];
311      }
312      // { forward substitution}
313      for (int j = 1; j <= n - 1; j++) {
314        q[j] = q[j] - v[j - 1] * q[j - 1] - w[j - 2] * q[j - 2];
315      }
316      for (int j = 1; j < n - 1; j++) {
317        q[j] = q[j] / u[j];
318      }
319      // { back substitution}
320      q[n + 1] = 0;
321      q[n] = 0;
322      for (int j = n - 1; j >= 1; j--) {
323        q[j] = q[j] - v[j] * q[j + 1] - w[j] * q[j + 2];
324      }
325    }
326
327    public void CubicSpline2(double[] x, double[] y, string[] inputVars) {
328      // see Pollock: Smoothing Splines
329      int n = x.Length;
330      double lambda = 1.0;
331      double[] sigma = Enumerable.Repeat(1.0, n).ToArray();
332      SplineData[] S = new SplineData[x.Length + 1];
333      for (int i = 0; i < x.Length; i++) {
334        S[i].x = x[i];
335        S[i].y = y[i];
336      }
337      S[x.Length].x = S[x.Length - 1].x;
338      S[x.Length].y = S[x.Length - 1].y;
339
340      // var
341      double[] h = new double[n],
342        r = new double[n],
343        f = new double[n],
344        p = new double[n];
345      var q = new MyArray<double>(-1, n + 3);
346      var u = new MyArray<double>(-1, n + 3);
347      var v = new MyArray<double>(-1, n + 3);
348      var w = new MyArray<double>(-1, n + 3);
349      double mu;
350
351      mu = 2 * (1 - lambda) / (3 * lambda);
352      h[0] = S[1].x - S[0].x;
353      r[0] = 3 / h[0];
354      for (int i = 1; i < n - 1; i++) {
355        h[i] = S[i + 1].x - S[i].x;
356        r[i] = 3 / h[i];
357        f[i] = -(r[i - 1] + r[i]);
358        p[i] = 2 * (S[i + 1].x - S[i - 1].x);
359        q[i] = 3 * (S[i + 1].y - S[i].y) / h[i] - 3 * (S[i].y - S[i - 1].y) / h[i - 1];
360      }
361      for (int i = 1; i < n; i++) {
362        u[i] = Math.Pow(r[i - 1], 2) * sigma[i - 1] + Math.Pow(f[i], 2) * sigma[i] + Math.Pow(r[i], 2) * sigma[i + 1];    // TODO Sigma 1..n
363        u[i] = mu * u[i] + p[i];
364        v[i] = f[i] * r[i] * sigma[i] + r[i] * f[i + 1] * sigma[i + 1];
365        v[i] = mu * v[i] + h[i];
366        w[i] = mu * r[i] * r[i + 1] * sigma[i + 1];
367      }
368      Quincunx(n, u, v, w, q);
369      // { Spline P arameters}
370      S[0].d = S[0].y - mu * r[0] * q[1] * sigma[0];
371      S[1].d = S[1].y - mu * (f[1] * q[1] + r[1] * q[2]) * sigma[0];
372      S[0].a = q[1] / (3 * h[0]);
373      S[0].b = 0;
374      S[0].c = (S[1].d - S[0].d) / h[0] - q[1] * h[0] / 3;
375      r[0] = 0;
376      for (int j = 1; j < n; j++) {
377        S[j].a = (q[j + 1] - q[j]) / (3 * h[j]);
378        S[j].b = q[j];
379        S[j].c = (q[j] + q[j - 1]) * h[j - 1] + S[j - 1].c;
380        S[j].d = r[j - 1] * q[j - 1] + f[j] * q[j] + r[j] * q[j + 1];
381        S[j].d = y[j] - mu * S[j].d * sigma[j];
382      }
383
384      //   { SmoothingSpline}
385    }
386
387    public void CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars) {
388      double s = x.Length + 1;
389      double stdTol = ((IValueParameter<DoubleValue>)Parameters["StdDev (noise)"]).Value.Value; // controls extent of smoothing
390
391      var model = CalculateSmoothingSplineReinsch(x, y, inputVars, stdTol, s, Problem.ProblemData.TargetVariable);
392
393      Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone())));
394    }
395
396
397    public static IRegressionEnsembleModel CalculateSmoothingSplineReinsch(double[] x, double[] y, double tol, string targetVar, string[] inputVars,
398      out double avgTrainRMSE,
399      out double cvRMSE) {
400      int info;
401
402      int folds = 10;
403      int foldSize = x.Length <= 30 ? 1 : x.Length / folds;
404      if (foldSize == 1) folds = x.Length;
405
406      double[] x_loo = new double[(folds - 1) * foldSize];
407      double[] y_loo = new double[(folds - 1) * foldSize];
408
409      var models = new List<IRegressionModel>();
410      var sumTrainSE = 0.0;
411      int nTrain = 0;
412      var sumTestSE = 0.0;
413      int nTest = 0;
414
415
416      for (int i = 0; i < folds; i++) {
417        if (i < folds - 1) {
418          Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, x.Length - ((i + 1) * foldSize) - 1);
419          Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, y.Length - ((i + 1) * foldSize) - 1);
420        }
421        var model = CalculateSmoothingSplineReinsch(x_loo, y_loo, inputVars, tol, ((folds - 1) * foldSize) - 1, targetVar);
422
423        // training error
424        for(int j = 0;j<x_loo.Length;j++) {
425          var y_pred = model.GetEstimatedValue(x[j]);
426          double res = y[j] - y_pred;
427          sumTrainSE += res * res;
428          nTrain++;
429        }
430        // test
431        for (int j = i * foldSize; j < (i + 1) * foldSize; j++) {
432          var y_pred = model.GetEstimatedValue(x[j]);
433          double res = y[j] - y_pred;
434          sumTestSE += res * res;
435          nTest++;
436        }
437
438        models.Add(model);
439
440        if (i < folds - 1) {
441          // add current fold for next iteration
442          Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize);
443          Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize);
444        }
445      }
446
447      cvRMSE = Math.Sqrt(sumTestSE / nTest);
448      avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain);
449
450      return new RegressionEnsembleModel(models);
451    }
452
453
454    // automatically determines tolerance using loo cv, SLOW!
455    public static IRegressionModel CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars, string targetVar,
456      out double optTolerance, out double cvRMSE) {
457      var maxTolerance = y.StandardDeviation();
458      var minTolerance = maxTolerance * 1E-6;
459      optTolerance = maxTolerance;
460      var tol = maxTolerance;
461      cvRMSE = double.PositiveInfinity;
462      IRegressionModel bestModel = new ConstantModel(y.Average(), targetVar);
463      while (tol > minTolerance) {
464        double curCVRMSE;
465        double avgTrainRmse;
466        var model = Splines.CalculateSmoothingSplineReinsch(
467          x, y, tol, targetVar, inputVars, out avgTrainRmse, out curCVRMSE);
468
469        if (curCVRMSE < cvRMSE) {
470          cvRMSE = curCVRMSE;
471          optTolerance = tol;
472          bestModel = model;
473        }
474        tol *= 0.85;
475      }
476      return bestModel;
477    }
478
479    public static ReinschSmoothingSplineModel CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars, double stdTol, double s, string targetVar) {
480      var minX = xOrig.Min();
481      var maxX = xOrig.Max();
482      var range = maxX - minX;
483
484      double[] w = Enumerable.Repeat(stdTol, xOrig.Length).ToArray();
485      SortAndBin((double[])xOrig.Clone(), (double[])yOrig.Clone(), w, out xOrig, out yOrig, out w, scaling: false);
486
487      // See Smoothing by Spline Functions, Reinsch, 1967
488      // move x and y into an array indexed with 1..n to match indexing in Reinsch paper
489      int n = xOrig.Length;
490      var x = new MyArray<double>(1, xOrig);
491      var y = new MyArray<double>(1, yOrig);
492      var inv_dy = new MyArray<double>(1, w);
493
494      int n1 = 1;
495      int n2 = n;
496
497      // results
498      var a = new MyArray<double>(n1, n);
499      var b = new MyArray<double>(n1, n);
500      var c = new MyArray<double>(n1, n);
501      var d = new MyArray<double>(n1, n);
502
503      // smooth
504      {
505        int i, m1, m2; double e, f, f2, g = 0.0, h, p;
506        MyArray<double>
507          r = new MyArray<double>(n1 - 1, n + 2),
508          r1 = new MyArray<double>(n1 - 1, n + 2),
509          r2 = new MyArray<double>(n1 - 1, n + 2),
510          t = new MyArray<double>(n1 - 1, n + 2),
511          t1 = new MyArray<double>(n1 - 1, n + 2),
512          u = new MyArray<double>(n1 - 1, n + 2),
513          v = new MyArray<double>(n1 - 1, n + 2);
514        m1 = n1 - 1;
515        m2 = n2 + 1;
516        r[m1] = r[n1] = r1[n2] = r2[n2] = r2[m2] =
517          u[m1] = u[n1] = u[n2] = u[m2] = p = 0.0;
518        m1 = n1 + 1;
519        m2 = n2 - 1;
520        h = x[m1] - x[n1]; f = (y[m1] - y[n1]) / h;
521        for (i = m1; i <= m2; i++) {
522          g = h; h = x[i + 1] - x[i];
523          e = f; f = (y[i + 1] - y[i]) / h;
524          a[i] = f - e; t[i] = 2 * (g + h) / 3; t1[i] = h / 3;
525          r2[i] = inv_dy[i - 1] / g; r[i] = inv_dy[i + 1] / h;
526          r1[i] = -inv_dy[i] / g - inv_dy[i] / h;
527        }
528        for (i = m1; i <= m2; i++) {
529          b[i] = r[i] * r[i] + r1[i] * r1[i] + r2[i] * r2[i];
530          c[i] = r[i] * r1[i + 1] + r1[i] * r2[i + 1];
531          d[i] = r[i] * r2[i + 2];
532        }
533        f2 = -s;
534        next:
535        for (i = m1; i <= m2; i++) {
536          r1[i - 1] = f * r[i - 1]; r2[i - 2] = g * r[i - 2];
537          r[i] = 1 / (p * b[i] + t[i] - f * r1[i - 1] - g * r2[i - 2]);
538          u[i] = a[i] - r1[i - 1] * u[i - 1] - r2[i - 2] * u[i - 2];
539          f = p * c[i] + t1[i] - h * r1[i - 1]; g = h; h = d[i] * p;
540        }
541        for (i = m2; i >= m1; i--) {
542          u[i] = r[i] * u[i] - r1[i] * u[i + 1] - r2[i] * u[i + 2];
543        }
544        e = h = 0;
545        for (i = n1; i <= m2; i++) {
546          g = h; h = (u[i + 1] - u[i]) / (x[i + 1] - x[i]);
547          v[i] = (h - g) * inv_dy[i] * inv_dy[i]; e = e + v[i] * (h - g);
548        }
549        g = v[n2] = -h * inv_dy[n2] * inv_dy[n2]; e = e - g * h;
550        g = f2; f2 = e * p * p;
551        if (f2 >= s || f2 <= g) goto fin;
552        f = 0; h = (v[m1] - v[n1]) / (x[m1] - x[n1]);
553        for (i = m1; i <= m2; i++) {
554          g = h; h = (v[i + 1] - v[i]) / (x[i + 1] - x[i]);
555          g = h - g - r1[i - 1] * r[i - 1] - r2[i - 2] * r[i - 2];
556          f = f + g * r[i] * g; r[i] = g;
557        }
558        h = e - p * f; if (h <= 0) goto fin;
559        p = p + (s - f2) / ((Math.Sqrt(s / e) + p) * h); goto next;
560
561        fin:
562        for (i = n1; i <= n2; i++) {
563          a[i] = y[i] - p * v[i];
564          c[i] = u[i];
565        }
566        for (i = n1; i <= m2; i++) {
567          h = x[i + 1] - x[i];
568          d[i] = (c[i + 1] - c[i]) / (3 * h);
569          b[i] = (a[i + 1] - a[i]) / h - (h * d[i] + c[i]) * h;
570        }
571      }
572
573      // extrapolate for xx > x[n2]
574      b[b.Length] = b[b.Length - 1];
575      d[1] = 0;
576
577      return new ReinschSmoothingSplineModel(a, b, c, d, x, targetVar, inputVars);
578    }
579
580    private void CalculateSmoothingSpline(double[] x, double[] y, string[] inputVars) {
581      // see Smoothing and Non-Parametric Regression, Germán Rodríguez, 2001 2.3.1
582      double[] w = Enumerable.Repeat(1.0, x.Length).ToArray();  // weights necessary for sortAndBin but are ignored below (TODO)
583      SortAndBin(x, y, w, out x, out y, out w, scaling: false);
584      int n = x.Length;
585
586      SparseMatrix delta = new SparseMatrix(n - 2, n);
587      // double[,] delta = new double[n - 2, n];
588      //double[,] W = new double[n - 2, n - 2];
589      SparseMatrix W = new SparseMatrix(n - 2, n - 2);
590      Matrix WInvD = new DenseMatrix(n - 2, n);
591
592      // double[,] W_inv_D = new double[n - 2, n];
593      // double[,] K = new double[n, n];
594
595      // go over successive knots to determine distances and fill Delta and W
596      for (int i = 0; i < n - 2; i++) {
597        double h = x[i + 1] - x[i];
598        double h_next = x[i + 2] - x[i + 1];
599        delta[i, i] = 1.0 / h;
600        delta[i, i + 1] = -1.0 / h - 1.0 / h_next;
601        delta[i, i + 2] = 1.0 / h_next;
602        W[i, i] = (h + h_next) / 3;
603        if (i > 0) {
604          W[i - 1, i] =
605           W[i, i - 1] = h / 6.0;
606        }
607      }
608
609      // WInvD = W.Cholesky().Solve(delta);
610      var solvResult = W.TrySolveIterative(delta, WInvD, new MlkBiCgStab());
611
612      // alglib.ablas.rmatrixgemm(n - 2, n, n - 2, 1.0, W, 0, 0, 0, delta, 0, 0, 0, 1.0, W_inv_D, 0, 0); // W^-1(M = n-2, K = n-2) D(K = n-2, N=n)   
613      // alglib.ablas.rmatrixgemm(n, n, n - 2, 1.0, delta, 0, 0, 1, W_inv_D, 0, 0, 0, 1.0, K, 0, 0); // D(M=n-2, K=n)^T * W^-1D (K=n, N=n-2)
614
615      var K = delta.TransposeThisAndMultiply(WInvD);
616
617      double lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value;
618
619      for (int i = 0; i < n; i++) {
620        for (int j = 0; j < n; j++) {
621          K[i, j] *= lambda;
622          if (i == j) K[i, j] += 1;
623        }
624      }
625
626      // solve for y
627      // double[] s;
628      // int solverInfo;
629      // alglib.densesolverreport solverRep;
630      // alglib.rmatrixsolve(K, n, y, out solverInfo, out solverRep, out s);
631
632      var s = K.Solve(new DenseVector(y)).ToArray();
633
634      Results.Add(new Result("Solution", new RegressionSolution(new SmoothingSplineModel(s, x, Problem.ProblemData.TargetVariable, inputVars),
635  (IRegressionProblemData)Problem.ProblemData.Clone())));
636    }
637
638    public static void SortAndBin(double[] x, double[] y, double[] w, out double[] x2, out double[] y2, out double[] w2, bool scaling = false) {
639      var sortedIdx = Enumerable.Range(0, x.Length).ToArray();
640      // sort by x
641      Array.Sort(x, sortedIdx);
642
643      var xl = new List<double>();
644      var yl = new List<double>();
645      var wl = new List<double>();
646
647      int n = x.Length;
648      var range = x[n - 1] - x[0];
649      var binSize = range / n / 10;
650      {
651        // binning
652        int i = 0;
653        while (i < n) {
654          int k = 0;
655          int j = i;
656          double sumX = 0.0;
657          double sumY = 0.0;
658          double sumW = 0.0;
659          while (j < n && x[j] - x[i] <= binSize) {
660            k++;
661            sumX += x[j];
662            sumY += y[sortedIdx[j]];
663            sumW += w[sortedIdx[j]];
664            j++;
665          }
666          var avgX = sumX / k;
667          if (scaling) avgX = (avgX - x[0]) / range;
668          xl.Add(avgX);
669          yl.Add(sumY / k);
670          wl.Add(sumW);
671          i += k;
672        }
673      }
674
675      x2 = xl.ToArray();
676      y2 = yl.ToArray();
677      w2 = wl.ToArray();
678    }
679
680    private void AddAlglibSplineResult(alglib.spline1dinterpolant c, string[] inputVars) {
681      Results.Add(new Result("Solution", new RegressionSolution(new Spline1dModel(c, Problem.ProblemData.TargetVariable, inputVars),
682        (IRegressionProblemData)Problem.ProblemData.Clone())));
683
684    }
685    private void AddMathNetSplineResult(IInterpolation c, string[] inputVars) {
686      Results.Add(new Result("Solution", new RegressionSolution(new MathNetSplineModel(c, Problem.ProblemData.TargetVariable, inputVars),
687        (IRegressionProblemData)Problem.ProblemData.Clone())));
688    }
689  }
690
691
692  // array with non-zero lower index
693  internal class MyArray<T> {
694    private T[] arr;
695    private int lowerBound;
696
697    public int Length { get { return arr.Length; } }
698
699    public T this[int key] {
700      get {
701        return arr[key - lowerBound];
702      }
703      set {
704        arr[key - lowerBound] = value;
705      }
706    }
707
708    public MyArray(int lowerBound, int numElements) {
709      this.lowerBound = lowerBound;
710      arr = new T[numElements];
711    }
712    public MyArray(int lowerBound, T[] source) : this(lowerBound, source.Length) {
713      Array.Copy(source, arr, source.Length);
714    }
715
716    public T[] ToArray() {
717      var res = new T[arr.Length];
718      Array.Copy(arr, res, res.Length);
719      return res;
720    }
721  }
722
723
724  // UNFINISHED
725  public class ReinschSmoothingSplineModel : NamedItem, IRegressionModel {
726    private MyArray<double> a;
727    private MyArray<double> b;
728    private MyArray<double> c;
729    private MyArray<double> d;
730    private MyArray<double> x;
731    private double offset;
732    private double scale;
733
734    public string TargetVariable { get; set; }
735
736    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
737
738    public event EventHandler TargetVariableChanged;
739
740    public ReinschSmoothingSplineModel(ReinschSmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
741      this.TargetVariable = orig.TargetVariable;
742      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
743      this.a = orig.a;
744      this.b = orig.b;
745      this.c = orig.c;
746      this.d = orig.d;
747      this.x = orig.x;
748      this.scale = orig.scale;
749      this.offset = orig.offset;
750    }
751    internal ReinschSmoothingSplineModel(
752      MyArray<double> a,
753      MyArray<double> b,
754      MyArray<double> c,
755      MyArray<double> d,
756      MyArray<double> x,
757      string targetVar, string[] inputs, double offset = 0, double scale = 1) : base("SplineModel", "SplineModel") {
758      this.a = a;
759      this.b = b;
760      this.c = c;
761      this.d = d;
762      this.x = x;
763      this.TargetVariable = targetVar;
764      this.VariablesUsedForPrediction = inputs;
765      this.scale = scale;
766      this.offset = offset;
767    }
768
769    public override IDeepCloneable Clone(Cloner cloner) {
770      return new ReinschSmoothingSplineModel(this, cloner);
771    }
772
773    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
774      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
775    }
776
777    public double GetEstimatedValue(double xx) {
778      xx = (xx - offset) * scale;
779      int n = a.Length;
780      if (xx <= x[1]) {
781        double h = xx - x[1];
782        return a[1] + h * (b[1] + h * (c[1] + h * d[1]));
783      } else if (xx >= x[n]) {
784        double h = xx - x[n];
785        return a[n] + h * (b[n] + h * (c[n] + h * d[n]));
786      } else {
787        // binary search
788        int lower = 1;
789        int upper = n;
790        while (true) {
791          if (upper < lower) throw new InvalidProgramException();
792          int i = lower + (upper - lower) / 2;
793          if (x[i] <= xx && xx < x[i + 1]) {
794            double h = xx - x[i];
795            return a[i] + h * (b[i] + h * (c[i] + h * d[i]));
796          } else if (xx < x[i]) {
797            upper = i - 1;
798          } else {
799            lower = i + 1;
800          }
801        }
802      }
803    }
804
805    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
806      int n = x.Length;
807      var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray();
808      foreach (var factor in VariablesUsedForPrediction.Skip(1)) {
809        product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray();
810      }
811      foreach (var xx in product) {
812        yield return GetEstimatedValue(xx);
813      }
814    }
815  }
816
817  // UNFINISHED
818  public class SmoothingSplineModel : NamedItem, IRegressionModel {
819    private double[] s;
820    private IInterpolation interpolation;
821
822    public string TargetVariable { get; set; }
823
824    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
825
826    public event EventHandler TargetVariableChanged;
827
828    public SmoothingSplineModel(SmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
829      this.TargetVariable = orig.TargetVariable;
830      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
831      this.s = orig.s; // TODO
832      this.interpolation = orig.interpolation;
833    }
834    public SmoothingSplineModel(double[] s, double[] x, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
835      this.s = s;
836      this.TargetVariable = targetVar;
837      this.VariablesUsedForPrediction = inputs;
838      this.interpolation = MathNet.Numerics.Interpolate.CubicSpline(x, s);
839    }
840
841    public override IDeepCloneable Clone(Cloner cloner) {
842      return new SmoothingSplineModel(this, cloner);
843    }
844
845    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
846      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
847    }
848
849    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
850      foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
851
852        yield return interpolation.Interpolate(x);
853
854      }
855    }
856  }
857
858  // UNFINISHED
859  public class Spline1dModel : NamedItem, IRegressionModel {
860    private alglib.spline1dinterpolant interpolant;
861
862    public string TargetVariable { get; set; }
863
864    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
865
866    public event EventHandler TargetVariableChanged;
867
868    public Spline1dModel(Spline1dModel orig, Cloner cloner) : base(orig, cloner) {
869      this.TargetVariable = orig.TargetVariable;
870      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
871      this.interpolant = (alglib.spline1dinterpolant)orig.interpolant.make_copy();
872    }
873    public Spline1dModel(alglib.spline1dinterpolant interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
874      this.interpolant = interpolant;
875      this.TargetVariable = targetVar;
876      this.VariablesUsedForPrediction = inputs;
877    }
878
879    public override IDeepCloneable Clone(Cloner cloner) {
880      return new Spline1dModel(this, cloner);
881    }
882
883    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
884      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
885    }
886
887    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
888      var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray();
889      foreach (var factor in VariablesUsedForPrediction.Skip(1)) {
890        product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray();
891      }
892
893      foreach (var x in product) {
894        yield return alglib.spline1dcalc(interpolant, x);
895      }
896    }
897  }
898
899
900  // UNFINISHED
901  public class MathNetSplineModel : NamedItem, IRegressionModel {
902    private IInterpolation interpolant;
903
904    public string TargetVariable { get; set; }
905
906    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
907
908    public event EventHandler TargetVariableChanged;
909
910    public MathNetSplineModel(MathNetSplineModel orig, Cloner cloner) : base(orig, cloner) {
911      this.TargetVariable = orig.TargetVariable;
912      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
913      this.interpolant = orig.interpolant; // TODO COPY!
914    }
915    public MathNetSplineModel(IInterpolation interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
916      this.interpolant = interpolant;
917      this.TargetVariable = targetVar;
918      this.VariablesUsedForPrediction = inputs;
919    }
920
921    public override IDeepCloneable Clone(Cloner cloner) {
922      return new MathNetSplineModel(this, cloner);
923    }
924
925    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
926      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
927    }
928
929    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
930      foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
931        yield return interpolant.Interpolate(x);
932      }
933    }
934  }
935}
Note: See TracBrowser for help on using the repository browser.