source: branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/Splines.cs @ 15449

Last change on this file since 15449 was 15449, checked in by gkronber, 4 years ago

#2789 created x64 build of CUBGCV algorithm, fixed some bugs

File size: 33.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.Problems.DataAnalysis;
33using HeuristicLab.Random;
34using MathNet.Numerics.Interpolation;
35using MathNet.Numerics.LinearAlgebra.Double;
36using MathNet.Numerics.LinearAlgebra.Double.Solvers;
37
38namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
39  [Item("Splines", "")]
40  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 102)]
41  [StorableClass]
42  public sealed class Splines : FixedDataAnalysisAlgorithm<IRegressionProblem> {
43    [StorableConstructor]
44    private Splines(bool deserializing) : base(deserializing) { }
45    [StorableHook(HookType.AfterDeserialization)]
46    private void AfterDeserialization() {
47    }
48
49    private Splines(Splines original, Cloner cloner)
50      : base(original, cloner) {
51    }
52    public override IDeepCloneable Clone(Cloner cloner) {
53      return new Splines(this, cloner);
54    }
55
56    public Splines()
57      : base() {
58      var validTypes = new ItemSet<StringValue>(
59        new[] {
60        "Monotone", "Akima", "Catmull-Rom", "Cubic", "Linear",
61          "Cubic - Natural (Math.NET)",
62          "Polynomial (Math.NET)",
63          "Rational (Math.NET)",
64          "LogLinear (Math.NET)",
65          "Common (Math.NET)",
66          "Smoothing Spline (Mine)",
67          "Smoothing Spline (Reinsch)",
68          "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)",
69          "B-Spline Smoothing",
70          "Penalized Regression Spline (alglib)",
71          "CUBGCV"
72      }.Select(s => new StringValue(s)));
73
74      Parameters.Add(new ConstrainedValueParameter<StringValue>("Type", "The type of spline (as supported by alglib)", validTypes, validTypes.Last()));
75      Parameters.Add(new ValueParameter<DoubleValue>("Lambda", "Regularization parameter for smoothing splines (0..+inf)", new DoubleValue(100)));
76      Parameters.Add(new ValueParameter<DoubleValue>("StdDev (noise)", "Known error in y values. Used only be Reinsch Smoothing Splines", new DoubleValue(0.1)));
77      Problem = new RegressionProblem();
78    }
79
80
81    protected override void Run(CancellationToken cancellationToken) {
82      double[,] inputMatrix = Problem.ProblemData.Dataset.ToArray(Problem.ProblemData.AllowedInputVariables.Concat(new string[] { Problem.ProblemData.TargetVariable }), Problem.ProblemData.TrainingIndices);
83      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
84        throw new NotSupportedException("Splines does not support NaN or infinity values in the input dataset.");
85
86      var inputVars = Problem.ProblemData.AllowedInputVariables.ToArray();
87      if (inputVars.Length > 3) throw new ArgumentException();
88
89      var y = Problem.ProblemData.TargetVariableTrainingValues.ToArray();
90      if (inputVars.Length == 1) {
91
92        var x = Problem.ProblemData.Dataset.GetDoubleValues(inputVars.First(), Problem.ProblemData.TrainingIndices).ToArray();
93        alglib.spline1dinterpolant c;
94        var type = ((StringValue)Parameters["Type"].ActualValue).Value;
95        switch (type) {
96          case "Monotone":
97            alglib.spline1dbuildmonotone(x, y, out c);
98            AddAlglibSplineResult(c, inputVars);
99            break;
100          case "Akima":
101            alglib.spline1dbuildakima(x, y, out c); AddAlglibSplineResult(c, inputVars);
102            break;
103            ;
104          case "Catmull-Rom":
105            alglib.spline1dbuildcatmullrom(x, y, out c); AddAlglibSplineResult(c, inputVars);
106            break;
107
108          case "Cubic":
109            alglib.spline1dbuildcubic(x, y, out c); AddAlglibSplineResult(c, inputVars);
110            break;
111
112          case "Linear":
113            alglib.spline1dbuildlinear(x, y, out c); AddAlglibSplineResult(c, inputVars);
114            break;
115          case "Cubic - Natural (Math.NET)": {
116              var spline = MathNet.Numerics.Interpolation.CubicSpline.InterpolateNatural(x, y);
117              AddMathNetSplineResult(spline, inputVars);
118              break;
119            }
120          case "Common (Math.NET)": {
121              var spline = MathNet.Numerics.Interpolate.Common(x, y);
122              AddMathNetSplineResult(spline, inputVars);
123              break;
124            }
125          case "LogLinear (Math.NET)": {
126              var spline = MathNet.Numerics.Interpolate.LogLinear(x, y);
127              AddMathNetSplineResult(spline, inputVars);
128              break;
129            }
130          case "Polynomial (Math.NET)": {
131              var spline = MathNet.Numerics.Interpolate.Polynomial(x, y);
132              AddMathNetSplineResult(spline, inputVars);
133              break;
134            }
135          case "Rational (Math.NET)": {
136              var spline = MathNet.Numerics.Interpolate.RationalWithoutPoles(x, y);
137              AddMathNetSplineResult(spline, inputVars);
138              break;
139            }
140          case "Smoothing Spline (Mine)": {
141              CalculateSmoothingSpline(x, y, inputVars);
142              break;
143            }
144          case "Smoothing Spline (Reinsch)": {
145              CalculateSmoothingSplineReinsch(x, y, inputVars);
146              break;
147            }
148          case "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)": {
149              double optTol, looRMSE;
150              var model = CalculateSmoothingSplineReinsch(x, y, inputVars, Problem.ProblemData.TargetVariable, out optTol, out looRMSE);
151              Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone())));
152              Results.Add(new Result("Optimal tolerance", new DoubleValue(optTol)));
153              Results.Add(new Result("RMSE (LOO-CV)", new DoubleValue(looRMSE)));
154              break;
155            }
156          case "CUBGCV": {
157              CubicSplineGCV.CubGcvReport report;
158              var model =
159                CubicSplineGCV.CalculateCubicSpline(x, y,
160                Problem.ProblemData.TargetVariable, inputVars, out report);
161              var targetVar = Problem.ProblemData.TargetVariable;
162              var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
163              Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
164              Results.Add(new Result("GCV", new DoubleValue(report.generalizedCrossValidation)));
165              Results.Add(new Result("Estimated error variance", new DoubleValue(report.estimatedErrorVariance)));
166              Results.Add(new Result("Estimated RSS degrees of freedom", new DoubleValue(report.estimatedRSSDegreesOfFreedom)));
167              Results.Add(new Result("Estimated treu mean squared error at data points", new DoubleValue(report.estimatedTrueMeanSquaredErrorAtDataPoints)));
168              Results.Add(new Result("Mean square of DF", new DoubleValue(report.meanSquareOfDf)));
169              Results.Add(new Result("Mean square residual", new DoubleValue(report.meanSquareResiudal)));
170              Results.Add(new Result("Smoothing parameter (optimized for GCV)", new DoubleValue(report.smoothingParameter)));
171              break;
172            }
173          case "B-Spline Smoothing": {
174              BSplineSmoothing(x, y, inputVars);
175              break;
176            }
177          case "Penalized Regression Spline (alglib)": {
178              var lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value; // controls extent of smoothing
179              var model = CalculatePenalizedRegressionSpline(x, y, lambda, Problem.ProblemData.TargetVariable, inputVars);
180              var targetVar = Problem.ProblemData.TargetVariable;
181              var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
182              Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
183
184              break;
185            }
186          default: throw new NotSupportedException();
187        }
188
189      }
190    }
191
192
193    public static IRegressionModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars) {
194      int info;
195      alglib.spline1dinterpolant interpolant;
196      alglib.spline1dfitreport rep;
197      int n = x.Length;
198      n = (int)Math.Max(50, 3 * Math.Sqrt(n));
199
200      alglib.spline1dfitpenalized(x, y, n, lambda, out info, out interpolant, out rep);
201      return new Spline1dModel(interpolant, targetVar, inputVars);
202    }
203
204    public static IRegressionEnsembleModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars,
205      out double avgTrainRMSE,
206      out double loocvRMSE) {
207      int info;
208      alglib.spline1dinterpolant interpolant;
209      alglib.spline1dfitreport rep;
210      int n = x.Length;
211      n = (int)Math.Max(50, 3 * Math.Sqrt(n));
212
213      double[] x_loo = new double[x.Length - 1];
214      double[] y_loo = new double[y.Length - 1];
215
216      var models = new List<IRegressionModel>();
217      var sumTrainRmse = 0.0;
218      var sse = 0.0;
219
220      for (int i = 0; i < x.Length; i++) {
221        Array.Copy(x, 0, x_loo, 0, i);
222        Array.Copy(x, i + 1, x_loo, i, x.Length - (i + 1));
223        Array.Copy(y, 0, y_loo, 0, i);
224        Array.Copy(y, i + 1, y_loo, i, y.Length - (i + 1));
225        alglib.spline1dfitpenalized(x_loo, y_loo, n, lambda, out info, out interpolant, out rep);
226        var y_pred = alglib.spline1dcalc(interpolant, x[i]);
227        double res = y[i] - y_pred;
228        sse += res * res;
229        sumTrainRmse += rep.rmserror;
230
231        var model = new Spline1dModel(interpolant, targetVar, inputVars);
232        models.Add(model);
233      }
234
235      loocvRMSE = Math.Sqrt(sse / x.Length);
236      avgTrainRMSE = sumTrainRmse / x.Length;
237
238      return new RegressionEnsembleModel(models);
239    }
240
241    public void BSplineSmoothing(double[] x, double[] y, string[] inputVars) {
242      Array.Sort(x, y);
243      CubicSpline2(x, y, inputVars);
244    }
245
246    private struct SplineData {
247      public double a, b, c, d, x, y;
248    };
249
250
251    /*
252     * The procedure Quincunx, takes as arguments
253       the vectors u, v and w which are respectively the diagonal, the first supradiagonal
254       and the second supradiagonal of the banded matrix. The vector on
255       the LHS of the equation (80) is placed in q which contains the solution on the
256       completion of the procedure.
257    */
258    private void Quincunx(int n, MyArray<double> u, MyArray<double> v, MyArray<double> w, MyArray<double> q) {
259      // { factorisation}
260      u[-1] = 0;
261      u[0] = 0;
262      for (int j = 1; j <= n - 1; j++) {
263        u[j] = u[j] - u[j - 2] * Math.Pow(w[j - 2], 2) - u[j - 1] * Math.Pow(v[j - 1], 2);
264        v[j] = (v[j] - u[j - 1] * v[j - 1] * w[j - 1]) / u[j];
265        w[j] = w[j] / u[j];
266      }
267      // { forward substitution}
268      for (int j = 1; j <= n - 1; j++) {
269        q[j] = q[j] - v[j - 1] * q[j - 1] - w[j - 2] * q[j - 2];
270      }
271      for (int j = 1; j < n - 1; j++) {
272        q[j] = q[j] / u[j];
273      }
274      // { back substitution}
275      q[n + 1] = 0;
276      q[n] = 0;
277      for (int j = n - 1; j >= 1; j--) {
278        q[j] = q[j] - v[j] * q[j + 1] - w[j] * q[j + 2];
279      }
280    }
281
282    public void CubicSpline2(double[] x, double[] y, string[] inputVars) {
283      // see Pollock: Smoothing Splines
284      int n = x.Length;
285      double lambda = 1.0;
286      double[] sigma = Enumerable.Repeat(1.0, n).ToArray();
287      SplineData[] S = new SplineData[x.Length + 1];
288      for (int i = 0; i < x.Length; i++) {
289        S[i].x = x[i];
290        S[i].y = y[i];
291      }
292      S[x.Length].x = S[x.Length - 1].x;
293      S[x.Length].y = S[x.Length - 1].y;
294
295      // var
296      double[] h = new double[n],
297        r = new double[n],
298        f = new double[n],
299        p = new double[n];
300      var q = new MyArray<double>(-1, n + 3);
301      var u = new MyArray<double>(-1, n + 3);
302      var v = new MyArray<double>(-1, n + 3);
303      var w = new MyArray<double>(-1, n + 3);
304      double mu;
305
306      mu = 2 * (1 - lambda) / (3 * lambda);
307      h[0] = S[1].x - S[0].x;
308      r[0] = 3 / h[0];
309      for (int i = 1; i < n - 1; i++) {
310        h[i] = S[i + 1].x - S[i].x;
311        r[i] = 3 / h[i];
312        f[i] = -(r[i - 1] + r[i]);
313        p[i] = 2 * (S[i + 1].x - S[i - 1].x);
314        q[i] = 3 * (S[i + 1].y - S[i].y) / h[i] - 3 * (S[i].y - S[i - 1].y) / h[i - 1];
315      }
316      for (int i = 1; i < n; i++) {
317        u[i] = Math.Pow(r[i - 1], 2) * sigma[i - 1] + Math.Pow(f[i], 2) * sigma[i] + Math.Pow(r[i], 2) * sigma[i + 1];    // TODO Sigma 1..n
318        u[i] = mu * u[i] + p[i];
319        v[i] = f[i] * r[i] * sigma[i] + r[i] * f[i + 1] * sigma[i + 1];
320        v[i] = mu * v[i] + h[i];
321        w[i] = mu * r[i] * r[i + 1] * sigma[i + 1];
322      }
323      Quincunx(n, u, v, w, q);
324      // { Spline P arameters}
325      S[0].d = S[0].y - mu * r[0] * q[1] * sigma[0];
326      S[1].d = S[1].y - mu * (f[1] * q[1] + r[1] * q[2]) * sigma[0];
327      S[0].a = q[1] / (3 * h[0]);
328      S[0].b = 0;
329      S[0].c = (S[1].d - S[0].d) / h[0] - q[1] * h[0] / 3;
330      r[0] = 0;
331      for (int j = 1; j < n; j++) {
332        S[j].a = (q[j + 1] - q[j]) / (3 * h[j]);
333        S[j].b = q[j];
334        S[j].c = (q[j] + q[j - 1]) * h[j - 1] + S[j - 1].c;
335        S[j].d = r[j - 1] * q[j - 1] + f[j] * q[j] + r[j] * q[j + 1];
336        S[j].d = y[j] - mu * S[j].d * sigma[j];
337      }
338
339      //   { SmoothingSpline}
340    }
341
342    public void CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars) {
343      double s = x.Length + 1;
344      double stdTol = ((IValueParameter<DoubleValue>)Parameters["StdDev (noise)"]).Value.Value; // controls extent of smoothing
345
346      var model = CalculateSmoothingSplineReinsch(x, y, inputVars, stdTol, s, Problem.ProblemData.TargetVariable);
347
348      Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone())));
349    }
350
351
352    public static IRegressionEnsembleModel CalculateSmoothingSplineReinsch(double[] x, double[] y, double tol, string targetVar, string[] inputVars,
353      out double avgTrainRMSE,
354      out double loocvRMSE) {
355      int info;
356
357      double[] x_loo = new double[x.Length - 1];
358      double[] y_loo = new double[y.Length - 1];
359
360      var models = new List<IRegressionModel>();
361      var sumTrainRmse = 0.0;
362      var sse = 0.0;
363
364      for (int i = 0; i < x.Length; i++) {
365        Array.Copy(x, i + 1, x_loo, i, x.Length - (i + 1));
366        Array.Copy(y, i + 1, y_loo, i, y.Length - (i + 1));
367
368        var model = CalculateSmoothingSplineReinsch(x_loo, y_loo, inputVars, tol, x_loo.Length + 1, targetVar);
369
370        var y_pred = model.GetEstimatedValue(x[i]);
371        double res = y[i] - y_pred;
372        sse += res * res;
373
374        models.Add(model);
375
376        // add current row for next iteration
377        if (i < x_loo.Length) {
378          x_loo[i] = x[i];
379          y_loo[i] = y[i];
380        }
381      }
382
383      loocvRMSE = Math.Sqrt(sse / x.Length);
384      avgTrainRMSE = sumTrainRmse / x.Length;
385
386      return new RegressionEnsembleModel(models);
387    }
388
389
390    // automatically determines tolerance using loo cv
391    public static IRegressionModel CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars, string targetVar,
392      out double optTolerance, out double looRMSE) {
393      var maxTolerance = y.StandardDeviation();
394      var minTolerance = maxTolerance * 1E-6;
395      optTolerance = maxTolerance;
396      var tol = maxTolerance;
397      looRMSE = double.PositiveInfinity;
398      IRegressionModel bestModel = new ConstantModel(y.Average(), targetVar);
399      while (tol > minTolerance) {
400        double loo_rmse;
401        double avgTrainRmse;
402        var model = Splines.CalculateSmoothingSplineReinsch(
403          x, y, tol, targetVar, inputVars, out avgTrainRmse, out loo_rmse);
404
405        if (loo_rmse < looRMSE) {
406          looRMSE = loo_rmse;
407          optTolerance = tol;
408          bestModel = model;
409        }
410        tol *= 0.85;
411      }
412      return bestModel;
413    }
414
415    public static ReinschSmoothingSplineModel CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars, double stdTol, double s, string targetVar) {
416      var minX = xOrig.Min();
417      var maxX = xOrig.Max();
418      var range = maxX - minX;
419
420      double[] w = Enumerable.Repeat(stdTol, xOrig.Length).ToArray();
421      SortAndBin((double[])xOrig.Clone(), (double[])yOrig.Clone(), w, out xOrig, out yOrig, out w, scaling: false);
422
423      // See Smoothing by Spline Functions, Reinsch, 1967
424      // move x and y into an array indexed with 1..n to match indexing in Reinsch paper
425      int n = xOrig.Length;
426      var x = new MyArray<double>(1, xOrig);
427      var y = new MyArray<double>(1, yOrig);
428      var inv_dy = new MyArray<double>(1, w);
429
430      int n1 = 1;
431      int n2 = n;
432
433      // results
434      var a = new MyArray<double>(n1, n);
435      var b = new MyArray<double>(n1, n);
436      var c = new MyArray<double>(n1, n);
437      var d = new MyArray<double>(n1, n);
438
439      // smooth
440      {
441        int i, m1, m2; double e, f, f2, g = 0.0, h, p;
442        MyArray<double>
443          r = new MyArray<double>(n1 - 1, n + 2),
444          r1 = new MyArray<double>(n1 - 1, n + 2),
445          r2 = new MyArray<double>(n1 - 1, n + 2),
446          t = new MyArray<double>(n1 - 1, n + 2),
447          t1 = new MyArray<double>(n1 - 1, n + 2),
448          u = new MyArray<double>(n1 - 1, n + 2),
449          v = new MyArray<double>(n1 - 1, n + 2);
450        m1 = n1 - 1;
451        m2 = n2 + 1;
452        r[m1] = r[n1] = r1[n2] = r2[n2] = r2[m2] =
453          u[m1] = u[n1] = u[n2] = u[m2] = p = 0.0;
454        m1 = n1 + 1;
455        m2 = n2 - 1;
456        h = x[m1] - x[n1]; f = (y[m1] - y[n1]) / h;
457        for (i = m1; i <= m2; i++) {
458          g = h; h = x[i + 1] - x[i];
459          e = f; f = (y[i + 1] - y[i]) / h;
460          a[i] = f - e; t[i] = 2 * (g + h) / 3; t1[i] = h / 3;
461          r2[i] = inv_dy[i - 1] / g; r[i] = inv_dy[i + 1] / h;
462          r1[i] = -inv_dy[i] / g - inv_dy[i] / h;
463        }
464        for (i = m1; i <= m2; i++) {
465          b[i] = r[i] * r[i] + r1[i] * r1[i] + r2[i] * r2[i];
466          c[i] = r[i] * r1[i + 1] + r1[i] * r2[i + 1];
467          d[i] = r[i] * r2[i + 2];
468        }
469        f2 = -s;
470        next:
471        for (i = m1; i <= m2; i++) {
472          r1[i - 1] = f * r[i - 1]; r2[i - 2] = g * r[i - 2];
473          r[i] = 1 / (p * b[i] + t[i] - f * r1[i - 1] - g * r2[i - 2]);
474          u[i] = a[i] - r1[i - 1] * u[i - 1] - r2[i - 2] * u[i - 2];
475          f = p * c[i] + t1[i] - h * r1[i - 1]; g = h; h = d[i] * p;
476        }
477        for (i = m2; i >= m1; i--) {
478          u[i] = r[i] * u[i] - r1[i] * u[i + 1] - r2[i] * u[i + 2];
479        }
480        e = h = 0;
481        for (i = n1; i <= m2; i++) {
482          g = h; h = (u[i + 1] - u[i]) / (x[i + 1] - x[i]);
483          v[i] = (h - g) * inv_dy[i] * inv_dy[i]; e = e + v[i] * (h - g);
484        }
485        g = v[n2] = -h * inv_dy[n2] * inv_dy[n2]; e = e - g * h;
486        g = f2; f2 = e * p * p;
487        if (f2 >= s || f2 <= g) goto fin;
488        f = 0; h = (v[m1] - v[n1]) / (x[m1] - x[n1]);
489        for (i = m1; i <= m2; i++) {
490          g = h; h = (v[i + 1] - v[i]) / (x[i + 1] - x[i]);
491          g = h - g - r1[i - 1] * r[i - 1] - r2[i - 2] * r[i - 2];
492          f = f + g * r[i] * g; r[i] = g;
493        }
494        h = e - p * f; if (h <= 0) goto fin;
495        p = p + (s - f2) / ((Math.Sqrt(s / e) + p) * h); goto next;
496
497        fin:
498        for (i = n1; i <= n2; i++) {
499          a[i] = y[i] - p * v[i];
500          c[i] = u[i];
501        }
502        for (i = n1; i <= m2; i++) {
503          h = x[i + 1] - x[i];
504          d[i] = (c[i + 1] - c[i]) / (3 * h);
505          b[i] = (a[i + 1] - a[i]) / h - (h * d[i] + c[i]) * h;
506        }
507      }
508
509      // extrapolate for xx > x[n2]
510      b[b.Length] = b[b.Length - 1];
511      d[1] = 0;
512
513      return new ReinschSmoothingSplineModel(a, b, c, d, x, targetVar, inputVars);
514    }
515
516    private void CalculateSmoothingSpline(double[] x, double[] y, string[] inputVars) {
517      // see Smoothing and Non-Parametric Regression, Germán Rodríguez, 2001 2.3.1
518      double[] w = Enumerable.Repeat(1.0, x.Length).ToArray();  // weights necessary for sortAndBin but are ignored below (TODO)
519      SortAndBin(x, y, w, out x, out y, out w, scaling: false);
520      int n = x.Length;
521
522      SparseMatrix delta = new SparseMatrix(n - 2, n);
523      // double[,] delta = new double[n - 2, n];
524      //double[,] W = new double[n - 2, n - 2];
525      SparseMatrix W = new SparseMatrix(n - 2, n - 2);
526      Matrix WInvD = new DenseMatrix(n - 2, n);
527
528      // double[,] W_inv_D = new double[n - 2, n];
529      // double[,] K = new double[n, n];
530
531      // go over successive knots to determine distances and fill Delta and W
532      for (int i = 0; i < n - 2; i++) {
533        double h = x[i + 1] - x[i];
534        double h_next = x[i + 2] - x[i + 1];
535        delta[i, i] = 1.0 / h;
536        delta[i, i + 1] = -1.0 / h - 1.0 / h_next;
537        delta[i, i + 2] = 1.0 / h_next;
538        W[i, i] = (h + h_next) / 3;
539        if (i > 0) {
540          W[i - 1, i] =
541           W[i, i - 1] = h / 6.0;
542        }
543      }
544
545      // WInvD = W.Cholesky().Solve(delta);
546      var solvResult = W.TrySolveIterative(delta, WInvD, new MlkBiCgStab());
547
548      // alglib.ablas.rmatrixgemm(n - 2, n, n - 2, 1.0, W, 0, 0, 0, delta, 0, 0, 0, 1.0, W_inv_D, 0, 0); // W^-1(M = n-2, K = n-2) D(K = n-2, N=n)   
549      // alglib.ablas.rmatrixgemm(n, n, n - 2, 1.0, delta, 0, 0, 1, W_inv_D, 0, 0, 0, 1.0, K, 0, 0); // D(M=n-2, K=n)^T * W^-1D (K=n, N=n-2)
550
551      var K = delta.TransposeThisAndMultiply(WInvD);
552
553      double lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value;
554
555      for (int i = 0; i < n; i++) {
556        for (int j = 0; j < n; j++) {
557          K[i, j] *= lambda;
558          if (i == j) K[i, j] += 1;
559        }
560      }
561
562      // solve for y
563      // double[] s;
564      // int solverInfo;
565      // alglib.densesolverreport solverRep;
566      // alglib.rmatrixsolve(K, n, y, out solverInfo, out solverRep, out s);
567
568      var s = K.Solve(new DenseVector(y)).ToArray();
569
570      Results.Add(new Result("Solution", new RegressionSolution(new SmoothingSplineModel(s, x, Problem.ProblemData.TargetVariable, inputVars),
571  (IRegressionProblemData)Problem.ProblemData.Clone())));
572    }
573
574    public static void SortAndBin(double[] x, double[] y, double[] w, out double[] x2, out double[] y2, out double[] w2, bool scaling = false) {
575      var sortedIdx = Enumerable.Range(0, x.Length).ToArray();
576      // sort by x
577      Array.Sort(x, sortedIdx);
578
579      var xl = new List<double>();
580      var yl = new List<double>();
581      var wl = new List<double>();
582
583      int n = x.Length;
584      var range = x[n - 1] - x[0];
585      var binSize = range / n / 20;
586      {
587        // binning
588        int i = 0;
589        while (i < n) {
590          int k = 0;
591          int j = i;
592          double sumX = 0.0;
593          double sumY = 0.0;
594          double sumW = 0.0;
595          while (j < n && x[j] - x[i] <= binSize) {
596            k++;
597            sumX += x[j];
598            sumY += y[sortedIdx[j]];
599            sumW += w[sortedIdx[j]];
600            j++;
601          }
602          var avgX = sumX / k;
603          if (scaling) avgX = (avgX - x[0]) / range;
604          xl.Add(avgX);
605          yl.Add(sumY / k);
606          wl.Add(sumW);
607          i += k;
608        }
609      }
610
611      x2 = xl.ToArray();
612      y2 = yl.ToArray();
613      w2 = wl.ToArray();
614    }
615
616    private void AddAlglibSplineResult(alglib.spline1dinterpolant c, string[] inputVars) {
617      Results.Add(new Result("Solution", new RegressionSolution(new Spline1dModel(c, Problem.ProblemData.TargetVariable, inputVars),
618        (IRegressionProblemData)Problem.ProblemData.Clone())));
619
620    }
621    private void AddMathNetSplineResult(IInterpolation c, string[] inputVars) {
622      Results.Add(new Result("Solution", new RegressionSolution(new MathNetSplineModel(c, Problem.ProblemData.TargetVariable, inputVars),
623        (IRegressionProblemData)Problem.ProblemData.Clone())));
624    }
625  }
626
627
628  // array with non-zero lower index
629  internal class MyArray<T> {
630    private T[] arr;
631    private int lowerBound;
632
633    public int Length { get { return arr.Length; } }
634
635    public T this[int key] {
636      get {
637        return arr[key - lowerBound];
638      }
639      set {
640        arr[key - lowerBound] = value;
641      }
642    }
643
644    public MyArray(int lowerBound, int numElements) {
645      this.lowerBound = lowerBound;
646      arr = new T[numElements];
647    }
648    public MyArray(int lowerBound, T[] source) : this(lowerBound, source.Length) {
649      Array.Copy(source, arr, source.Length);
650    }
651
652    public T[] ToArray() {
653      var res = new T[arr.Length];
654      Array.Copy(arr, res, res.Length);
655      return res;
656    }
657  }
658
659
660  // UNFINISHED
661  public class ReinschSmoothingSplineModel : NamedItem, IRegressionModel {
662    private MyArray<double> a;
663    private MyArray<double> b;
664    private MyArray<double> c;
665    private MyArray<double> d;
666    private MyArray<double> x;
667    private double offset;
668    private double scale;
669
670    public string TargetVariable { get; set; }
671
672    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
673
674    public event EventHandler TargetVariableChanged;
675
676    public ReinschSmoothingSplineModel(ReinschSmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
677      this.TargetVariable = orig.TargetVariable;
678      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
679      this.a = orig.a;
680      this.b = orig.b;
681      this.c = orig.c;
682      this.d = orig.d;
683      this.x = orig.x;
684      this.scale = orig.scale;
685      this.offset = orig.offset;
686    }
687    internal ReinschSmoothingSplineModel(
688      MyArray<double> a,
689      MyArray<double> b,
690      MyArray<double> c,
691      MyArray<double> d,
692      MyArray<double> x,
693      string targetVar, string[] inputs, double offset = 0, double scale = 1) : base("SplineModel", "SplineModel") {
694      this.a = a;
695      this.b = b;
696      this.c = c;
697      this.d = d;
698      this.x = x;
699      this.TargetVariable = targetVar;
700      this.VariablesUsedForPrediction = inputs;
701      this.scale = scale;
702      this.offset = offset;
703    }
704
705    public override IDeepCloneable Clone(Cloner cloner) {
706      return new ReinschSmoothingSplineModel(this, cloner);
707    }
708
709    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
710      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
711    }
712
713    public double GetEstimatedValue(double xx) {
714      int n = a.Length;
715      if (xx <= x[1]) {
716        double h = xx - x[1];
717        return a[1] + h * (b[1] + h * (c[1] + h * d[1]));
718      } else if (xx >= x[n]) {
719        double h = xx - x[n];
720        return a[n] + h * (b[n] + h * (c[n] + h * d[n]));
721      } else {
722        // binary search
723        int lower = 1;
724        int upper = n;
725        while (true) {
726          if (upper < lower) throw new InvalidProgramException();
727          int i = lower + (upper - lower) / 2;
728          if (x[i] <= xx && xx < x[i + 1]) {
729            double h = xx - x[i];
730            return a[i] + h * (b[i] + h * (c[i] + h * d[i]));
731          } else if (xx < x[i]) {
732            upper = i - 1;
733          } else {
734            lower = i + 1;
735          }
736        }
737      }
738    }
739
740    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
741      int n = x.Length;
742      var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray();
743      foreach (var factor in VariablesUsedForPrediction.Skip(1)) {
744        product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray();
745      }
746      foreach (var xx in product.Select(xi => (xi - offset) * scale)) {
747        yield return GetEstimatedValue(xx);
748      }
749    }
750  }
751
752  // UNFINISHED
753  public class SmoothingSplineModel : NamedItem, IRegressionModel {
754    private double[] s;
755    private IInterpolation interpolation;
756
757    public string TargetVariable { get; set; }
758
759    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
760
761    public event EventHandler TargetVariableChanged;
762
763    public SmoothingSplineModel(SmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
764      this.TargetVariable = orig.TargetVariable;
765      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
766      this.s = orig.s; // TODO
767      this.interpolation = orig.interpolation;
768    }
769    public SmoothingSplineModel(double[] s, double[] x, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
770      this.s = s;
771      this.TargetVariable = targetVar;
772      this.VariablesUsedForPrediction = inputs;
773      this.interpolation = MathNet.Numerics.Interpolate.CubicSpline(x, s);
774    }
775
776    public override IDeepCloneable Clone(Cloner cloner) {
777      return new SmoothingSplineModel(this, cloner);
778    }
779
780    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
781      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
782    }
783
784    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
785      foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
786
787        yield return interpolation.Interpolate(x);
788
789      }
790    }
791  }
792
793  // UNFINISHED
794  public class Spline1dModel : NamedItem, IRegressionModel {
795    private alglib.spline1dinterpolant interpolant;
796
797    public string TargetVariable { get; set; }
798
799    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
800
801    public event EventHandler TargetVariableChanged;
802
803    public Spline1dModel(Spline1dModel orig, Cloner cloner) : base(orig, cloner) {
804      this.TargetVariable = orig.TargetVariable;
805      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
806      this.interpolant = (alglib.spline1dinterpolant)orig.interpolant.make_copy();
807    }
808    public Spline1dModel(alglib.spline1dinterpolant interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
809      this.interpolant = interpolant;
810      this.TargetVariable = targetVar;
811      this.VariablesUsedForPrediction = inputs;
812    }
813
814    public override IDeepCloneable Clone(Cloner cloner) {
815      return new Spline1dModel(this, cloner);
816    }
817
818    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
819      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
820    }
821
822    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
823      var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray();
824      foreach (var factor in VariablesUsedForPrediction.Skip(1)) {
825        product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray();
826      }
827
828      foreach (var x in product) {
829        yield return alglib.spline1dcalc(interpolant, x);
830      }
831    }
832  }
833
834
835  // UNFINISHED
836  public class MathNetSplineModel : NamedItem, IRegressionModel {
837    private IInterpolation interpolant;
838
839    public string TargetVariable { get; set; }
840
841    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
842
843    public event EventHandler TargetVariableChanged;
844
845    public MathNetSplineModel(MathNetSplineModel orig, Cloner cloner) : base(orig, cloner) {
846      this.TargetVariable = orig.TargetVariable;
847      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
848      this.interpolant = orig.interpolant; // TODO COPY!
849    }
850    public MathNetSplineModel(IInterpolation interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
851      this.interpolant = interpolant;
852      this.TargetVariable = targetVar;
853      this.VariablesUsedForPrediction = inputs;
854    }
855
856    public override IDeepCloneable Clone(Cloner cloner) {
857      return new MathNetSplineModel(this, cloner);
858    }
859
860    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
861      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
862    }
863
864    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
865      foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
866        yield return interpolant.Interpolate(x);
867      }
868    }
869  }
870}
Note: See TracBrowser for help on using the repository browser.