source: branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/Splines.cs @ 15369

Last change on this file since 15369 was 15369, checked in by gkronber, 5 years ago

#2789 added penalized regression splines as implemented in alglib.

File size: 30.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Concurrent;
24using System.Collections.Generic;
25using System.Linq;
26using System.Threading;
27using System.Threading.Tasks;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Data;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
32using HeuristicLab.Optimization;
33using HeuristicLab.Parameters;
34using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
35using HeuristicLab.Problems.DataAnalysis;
36using HeuristicLab.Problems.DataAnalysis.Symbolic;
37using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
38using HeuristicLab.Random;
39using MathNet.Numerics.Interpolation;
40using MathNet.Numerics.LinearAlgebra.Double;
41using MathNet.Numerics.LinearAlgebra.Double.Solvers;
42
43namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
44  [Item("Splines", "")]
45  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 102)]
46  [StorableClass]
47  public sealed class Splines : FixedDataAnalysisAlgorithm<IRegressionProblem> {
48    [StorableConstructor]
49    private Splines(bool deserializing) : base(deserializing) { }
50    [StorableHook(HookType.AfterDeserialization)]
51    private void AfterDeserialization() {
52    }
53
54    private Splines(Splines original, Cloner cloner)
55      : base(original, cloner) {
56    }
57    public override IDeepCloneable Clone(Cloner cloner) {
58      return new Splines(this, cloner);
59    }
60
61    public Splines()
62      : base() {
63      var validTypes = new ItemSet<StringValue>(
64        new[] {
65        "Monotone", "Akima", "Catmull-Rom", "Cubic", "Linear",
66          "Cubic - Natural (Math.NET)",
67          "Polynomial (Math.NET)",
68          "Rational (Math.NET)",
69          "LogLinear (Math.NET)",
70          "Common (Math.NET)",
71          "Smoothing Spline (Mine)",
72          "Smoothing Spline (Reinsch)",
73          "Smoothing Spline (Reinsch with automatic tolerance determination using CV)",
74          "B-Spline Smoothing",
75          "Penalized Regression Spline (alglib)"
76      }.Select(s => new StringValue(s)));
77
78      Parameters.Add(new ConstrainedValueParameter<StringValue>("Type", "The type of spline (as supported by alglib)", validTypes, validTypes.First()));
79      Parameters.Add(new ValueParameter<DoubleValue>("Lambda", "Regularization parameter for smoothing splines (0..+inf)", new DoubleValue(100)));
80      Parameters.Add(new ValueParameter<DoubleValue>("StdDev (noise)", "Known error in y values. Used only be Reinsch Smoothing Splines", new DoubleValue(0.1)));
81      Problem = new RegressionProblem();
82    }
83
84
85    protected override void Run(CancellationToken cancellationToken) {
86      double[,] inputMatrix = Problem.ProblemData.Dataset.ToArray(Problem.ProblemData.AllowedInputVariables.Concat(new string[] { Problem.ProblemData.TargetVariable }), Problem.ProblemData.TrainingIndices);
87      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
88        throw new NotSupportedException("Splines does not support NaN or infinity values in the input dataset.");
89
90      var inputVars = Problem.ProblemData.AllowedInputVariables.ToArray();
91      if (inputVars.Length > 3) throw new ArgumentException();
92
93      var y = Problem.ProblemData.TargetVariableTrainingValues.ToArray();
94      if (inputVars.Length == 1) {
95
96        var x = Problem.ProblemData.Dataset.GetDoubleValues(inputVars.First(), Problem.ProblemData.TrainingIndices).ToArray();
97        alglib.spline1dinterpolant c;
98        var type = ((StringValue)Parameters["Type"].ActualValue).Value;
99        switch (type) {
100          case "Monotone":
101            alglib.spline1dbuildmonotone(x, y, out c);
102            AddAlglibSplineResult(c, inputVars);
103            break;
104          case "Akima":
105            alglib.spline1dbuildakima(x, y, out c); AddAlglibSplineResult(c, inputVars);
106            break;
107            ;
108          case "Catmull-Rom":
109            alglib.spline1dbuildcatmullrom(x, y, out c); AddAlglibSplineResult(c, inputVars);
110            break;
111
112          case "Cubic":
113            alglib.spline1dbuildcubic(x, y, out c); AddAlglibSplineResult(c, inputVars);
114            break;
115
116          case "Linear":
117            alglib.spline1dbuildlinear(x, y, out c); AddAlglibSplineResult(c, inputVars);
118            break;
119          case "Cubic - Natural (Math.NET)": {
120              var spline = MathNet.Numerics.Interpolation.CubicSpline.InterpolateNatural(x, y);
121              AddMathNetSplineResult(spline, inputVars);
122              break;
123            }
124          case "Common (Math.NET)": {
125              var spline = MathNet.Numerics.Interpolate.Common(x, y);
126              AddMathNetSplineResult(spline, inputVars);
127              break;
128            }
129          case "LogLinear (Math.NET)": {
130              var spline = MathNet.Numerics.Interpolate.LogLinear(x, y);
131              AddMathNetSplineResult(spline, inputVars);
132              break;
133            }
134          case "Polynomial (Math.NET)": {
135              var spline = MathNet.Numerics.Interpolate.Polynomial(x, y);
136              AddMathNetSplineResult(spline, inputVars);
137              break;
138            }
139          case "Rational (Math.NET)": {
140              var spline = MathNet.Numerics.Interpolate.RationalWithoutPoles(x, y);
141              AddMathNetSplineResult(spline, inputVars);
142              break;
143            }
144          case "Smoothing Spline (Mine)": {
145              CalculateSmoothingSpline(x, y, inputVars);
146              break;
147            }
148          case "Smoothing Spline (Reinsch)": {
149              CalculateSmoothingSplineReinsch(x, y, inputVars);
150              break;
151            }
152          case "Smoothing Spline (Reinsch with automatic tolerance determination using CV)": {
153              var model = CalculateSmoothingSplineReinschWithAutomaticTolerance(x, y, inputVars, Problem.ProblemData.TargetVariable);
154              Results.Add(new Result("Solution", new RegressionSolution(model, (IRegressionProblemData)Problem.ProblemData.Clone())));
155              break;
156            }
157          case "B-Spline Smoothing": {
158              BSplineSmoothing(x, y, inputVars);
159              break;
160            }
161          case "Penalized Regression Spline (alglib)":
162          {
163              var lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value; // controls extent of smoothing
164              var model = CalculatePenalizedRegressionSpline(x, y, lambda, Problem.ProblemData.TargetVariable, inputVars);
165              var targetVar = Problem.ProblemData.TargetVariable;
166              var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
167              Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
168
169              break;
170          }
171          default: throw new NotSupportedException();
172        }
173
174      }
175    }
176
177
178
179    public static IRegressionModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars)
180    {
181      int info;
182      alglib.spline1dinterpolant interpolant;
183      alglib.spline1dfitreport rep;
184      int n = x.Length;
185      n = (int)Math.Max(50, 3 * Math.Sqrt(n));
186
187      alglib.spline1dfitpenalized(x, y, n, lambda, out info, out interpolant, out rep);
188      return new Spline1dModel(interpolant, targetVar, inputVars);
189    }
190
191    public void BSplineSmoothing(double[] x, double[] y, string[] inputVars) {
192      Array.Sort(x, y);
193
194      CubicSpline2(x, y, inputVars);
195
196      //  // see Elements of Statistical Learning Appendix: Computations for Splines
197      //  int M = 4; // order of spline (cubic)
198      //  int K = x.Length;
199      //  int n = x.Length;
200      //
201      //  double[] t = new double[K + 2 * M];
202      //  for (int i = 0; i < M; i++) t[i] = x[0];
203      //  for (int i = K; i < K + 2 * M; i++) t[i] = x[K - 1];
204      //
205      //  double[,] B = new double[n, n + 4]; // basis function matrix
206      //  double[,] W = new double[n + 4, n + 4]; // penalty matrix (W[j,k] =
207      //  for (int i = 0; i < M; i++) B[i] = new double[K + 2 * M];
208      //  for (int j = 0; j < K; j++) {
209      //    for (int i = 0; i < K + 2M - 1; i++) {
210      //      if (t[i] <= x[j] && x[j] < t[i + 1])
211      //        B[1][i] = 1.0;
212      //    }
213      //  }
214
215
216    }
217
218    private struct SplineData {
219      public double a, b, c, d, x, y;
220    };
221
222
223    /*
224     * The procedure Quincunx, takes as arguments
225       the vectors u, v and w which are respectively the diagonal, the first supradiagonal
226       and the second supradiagonal of the banded matrix. The vector on
227       the LHS of the equation (80) is placed in q which contains the solution on the
228       completion of the procedure.
229    */
230    private void Quincunx(int n, MyArray<double> u, MyArray<double> v, MyArray<double> w, MyArray<double> q) {
231      // { factorisation}
232      u[-1] = 0;
233      u[0] = 0;
234      for (int j = 1; j <= n - 1; j++) {
235        u[j] = u[j] - u[j - 2] * Math.Pow(w[j - 2], 2) - u[j - 1] * Math.Pow(v[j - 1], 2);
236        v[j] = (v[j] - u[j - 1] * v[j - 1] * w[j - 1]) / u[j];
237        w[j] = w[j] / u[j];
238      }
239      // { forward substitution}
240      for (int j = 1; j <= n - 1; j++) {
241        q[j] = q[j] - v[j - 1] * q[j - 1] - w[j - 2] * q[j - 2];
242      }
243      for (int j = 1; j < n - 1; j++) {
244        q[j] = q[j] / u[j];
245      }
246      // { back substitution}
247      q[n + 1] = 0;
248      q[n] = 0;
249      for (int j = n - 1; j >= 1; j--) {
250        q[j] = q[j] - v[j] * q[j + 1] - w[j] * q[j + 2];
251      }
252    }
253
254    public void CubicSpline2(double[] x, double[] y, string[] inputVars) {
255      // see Pollock: Smoothing Splines
256      int n = x.Length;
257      double lambda = 1.0;
258      double[] sigma = Enumerable.Repeat(1.0, n).ToArray();
259      SplineData[] S = new SplineData[x.Length + 1];
260      for (int i = 0; i < x.Length; i++) {
261        S[i].x = x[i];
262        S[i].y = y[i];
263      }
264      S[x.Length].x = S[x.Length - 1].x;
265      S[x.Length].y = S[x.Length - 1].y;
266
267      // var
268      double[] h = new double[n],
269        r = new double[n],
270        f = new double[n],
271        p = new double[n];
272      var q = new MyArray<double>(-1, n + 3);
273      var u = new MyArray<double>(-1, n + 3);
274      var v = new MyArray<double>(-1, n + 3);
275      var w = new MyArray<double>(-1, n + 3);
276      double mu;
277
278      mu = 2 * (1 - lambda) / (3 * lambda);
279      h[0] = S[1].x - S[0].x;
280      r[0] = 3 / h[0];
281      for (int i = 1; i < n - 1; i++) {
282        h[i] = S[i + 1].x - S[i].x;
283        r[i] = 3 / h[i];
284        f[i] = -(r[i - 1] + r[i]);
285        p[i] = 2 * (S[i + 1].x - S[i - 1].x);
286        q[i] = 3 * (S[i + 1].y - S[i].y) / h[i] - 3 * (S[i].y - S[i - 1].y) / h[i - 1];
287      }
288      for (int i = 1; i < n; i++) {
289        u[i] = Math.Pow(r[i - 1], 2) * sigma[i - 1] + Math.Pow(f[i], 2) * sigma[i] + Math.Pow(r[i], 2) * sigma[i + 1];    // TODO Sigma 1..n
290        u[i] = mu * u[i] + p[i];
291        v[i] = f[i] * r[i] * sigma[i] + r[i] * f[i + 1] * sigma[i + 1];
292        v[i] = mu * v[i] + h[i];
293        w[i] = mu * r[i] * r[i + 1] * sigma[i + 1];
294      }
295      Quincunx(n, u, v, w, q);
296      // { Spline P arameters}
297      S[0].d = S[0].y - mu * r[0] * q[1] * sigma[0];
298      S[1].d = S[1].y - mu * (f[1] * q[1] + r[1] * q[2]) * sigma[0];
299      S[0].a = q[1] / (3 * h[0]);
300      S[0].b = 0;
301      S[0].c = (S[1].d - S[0].d) / h[0] - q[1] * h[0] / 3;
302      r[0] = 0;
303      for (int j = 1; j < n; j++) {
304        S[j].a = (q[j + 1] - q[j]) / (3 * h[j]);
305        S[j].b = q[j];
306        S[j].c = (q[j] + q[j - 1]) * h[j - 1] + S[j - 1].c;
307        S[j].d = r[j - 1] * q[j - 1] + f[j] * q[j] + r[j] * q[j + 1];
308        S[j].d = y[j] - mu * S[j].d * sigma[j];
309      }
310
311      //   { SmoothingSpline}
312    }
313
314    public void CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars) {
315      double s = xOrig.Length + 1;
316      double stdTol = ((IValueParameter<DoubleValue>)Parameters["StdDev (noise)"]).Value.Value; // controls extent of smoothing
317
318      var model = CalculateSmoothingSplineReinsch(xOrig, yOrig, inputVars, stdTol, s, Problem.ProblemData.TargetVariable);
319
320      Results.Add(new Result("Solution", new RegressionSolution(model, (IRegressionProblemData)Problem.ProblemData.Clone())));
321
322
323    }
324
325
326
327    public static IRegressionModel CalculateSmoothingSplineReinschWithAutomaticTolerance(double[] xOrig, double[] yOrig, string[] inputVars, string targetVar) {
328      int n = xOrig.Length;
329      var rows = Enumerable.Range(0, n);
330      var rand = new FastRandom(1234);
331      var trainRows = rows.Shuffle(rand).Take((int)(n * 0.66)).ToArray();
332      var testRows = rows.Except(trainRows).ToArray();
333
334      var trainX = trainRows.Select(i => xOrig[i]).ToArray();
335      var trainY = trainRows.Select(i => yOrig[i]).ToArray();
336      var testX = testRows.Select(i => xOrig[i]).ToArray();
337      var testY = testRows.Select(i => yOrig[i]).ToArray();
338
339      var shuffledDs = new Dataset(inputVars.Concat(new string[] { targetVar }), new[] { trainX.Concat(testX).ToArray(), trainY.Concat(testY).ToArray() });
340      var shuffeledProblemData = new RegressionProblemData(shuffledDs, inputVars, targetVar);
341      shuffeledProblemData.TrainingPartition.Start = 0;
342      shuffeledProblemData.TrainingPartition.End = trainX.Length;
343      shuffeledProblemData.TestPartition.Start = trainX.Length;
344      shuffeledProblemData.TestPartition.End = shuffledDs.Rows + 1;
345
346      double curTol = trainY.StandardDeviation();
347      double prevTestRMSE = double.PositiveInfinity;
348      double testRMSE = double.PositiveInfinity;
349      IRegressionModel prevModel = null;
350      IRegressionModel model = null;
351      do {                                     
352        prevModel = model;
353        prevTestRMSE = testRMSE;
354        model = CalculateSmoothingSplineReinsch(trainX, trainY, inputVars, curTol, s: trainX.Length + 1, targetVar: targetVar);
355
356        var sol = model.CreateRegressionSolution(shuffeledProblemData);
357        var trainRMSE = sol.TrainingRootMeanSquaredError;
358        testRMSE = sol.TestRootMeanSquaredError;
359        curTol = Math.Max(curTol * 0.5, 1e-12 * trainY.StandardDeviation());
360      } while (testRMSE < prevTestRMSE);
361      return prevModel;
362    }
363
364
365    public static IRegressionModel CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars, double stdTol, double s, string targetVar) {
366      var minX = xOrig.Min();
367      var maxX = xOrig.Max();
368      var range = maxX - minX;
369
370      double[] w = Enumerable.Repeat(stdTol, xOrig.Length).ToArray();
371      SortAndBin((double[])xOrig.Clone(), (double[])yOrig.Clone(), w, out xOrig, out yOrig, out w, scaling: false);
372
373      // See Smoothing by Spline Functions, Reinsch, 1967
374      // move x and y into an array indexed with 1..n to match indexing in Reinsch paper
375      int n = xOrig.Length;
376      var x = new MyArray<double>(1, xOrig);
377      var y = new MyArray<double>(1, yOrig);
378      var inv_dy = new MyArray<double>(1, w);
379
380      int n1 = 1;
381      int n2 = n;
382
383      // results
384      var a = new MyArray<double>(n1, n);
385      var b = new MyArray<double>(n1, n);
386      var c = new MyArray<double>(n1, n);
387      var d = new MyArray<double>(n1, n);
388
389      // smooth
390      {
391        int i, m1, m2; double e, f, f2, g = 0.0, h, p;
392        MyArray<double>
393          r = new MyArray<double>(n1 - 1, n + 2),
394          r1 = new MyArray<double>(n1 - 1, n + 2),
395          r2 = new MyArray<double>(n1 - 1, n + 2),
396          t = new MyArray<double>(n1 - 1, n + 2),
397          t1 = new MyArray<double>(n1 - 1, n + 2),
398          u = new MyArray<double>(n1 - 1, n + 2),
399          v = new MyArray<double>(n1 - 1, n + 2);
400        m1 = n1 - 1;
401        m2 = n2 + 1;
402        r[m1] = r[n1] = r1[n2] = r2[n2] = r2[m2] =
403          u[m1] = u[n1] = u[n2] = u[m2] = p = 0.0;
404        m1 = n1 + 1;
405        m2 = n2 - 1;
406        h = x[m1] - x[n1]; f = (y[m1] - y[n1]) / h;
407        for (i = m1; i <= m2; i++) {
408          g = h; h = x[i + 1] - x[i];
409          e = f; f = (y[i + 1] - y[i]) / h;
410          a[i] = f - e; t[i] = 2 * (g + h) / 3; t1[i] = h / 3;
411          r2[i] = inv_dy[i - 1] / g; r[i] = inv_dy[i + 1] / h;
412          r1[i] = -inv_dy[i] / g - inv_dy[i] / h;
413        }
414        for (i = m1; i <= m2; i++) {
415          b[i] = r[i] * r[i] + r1[i] * r1[i] + r2[i] * r2[i];
416          c[i] = r[i] * r1[i + 1] + r1[i] * r2[i + 1];
417          d[i] = r[i] * r2[i + 2];
418        }
419        f2 = -s;
420        next:
421        for (i = m1; i <= m2; i++) {
422          r1[i - 1] = f * r[i - 1]; r2[i - 2] = g * r[i - 2];
423          r[i] = 1 / (p * b[i] + t[i] - f * r1[i - 1] - g * r2[i - 2]);
424          u[i] = a[i] - r1[i - 1] * u[i - 1] - r2[i - 2] * u[i - 2];
425          f = p * c[i] + t1[i] - h * r1[i - 1]; g = h; h = d[i] * p;
426        }
427        for (i = m2; i >= m1; i--) {
428          u[i] = r[i] * u[i] - r1[i] * u[i + 1] - r2[i] * u[i + 2];
429        }
430        e = h = 0;
431        for (i = n1; i <= m2; i++) {
432          g = h; h = (u[i + 1] - u[i]) / (x[i + 1] - x[i]);
433          v[i] = (h - g) * inv_dy[i] * inv_dy[i]; e = e + v[i] * (h - g);
434        }
435        g = v[n2] = -h * inv_dy[n2] * inv_dy[n2]; e = e - g * h;
436        g = f2; f2 = e * p * p;
437        if (f2 >= s || f2 <= g) goto fin;
438        f = 0; h = (v[m1] - v[n1]) / (x[m1] - x[n1]);
439        for (i = m1; i <= m2; i++) {
440          g = h; h = (v[i + 1] - v[i]) / (x[i + 1] - x[i]);
441          g = h - g - r1[i - 1] * r[i - 1] - r2[i - 2] * r[i - 2];
442          f = f + g * r[i] * g; r[i] = g;
443        }
444        h = e - p * f; if (h <= 0) goto fin;
445        p = p + (s - f2) / ((Math.Sqrt(s / e) + p) * h); goto next;
446
447        fin:
448        for (i = n1; i <= n2; i++) {
449          a[i] = y[i] - p * v[i];
450          c[i] = u[i];
451        }
452        for (i = n1; i <= m2; i++) {
453          h = x[i + 1] - x[i];
454          d[i] = (c[i + 1] - c[i]) / (3 * h);
455          b[i] = (a[i + 1] - a[i]) / h - (h * d[i] + c[i]) * h;
456        }
457      }
458
459      return new ReinschSmoothingSplineModel(a, b, c, d, x, targetVar, inputVars);
460    }
461
462    private void CalculateSmoothingSpline(double[] x, double[] y, string[] inputVars) {
463      // see Smoothing and Non-Parametric Regression, Germán Rodríguez, 2001 2.3.1
464      double[] w = Enumerable.Repeat(1.0, x.Length).ToArray();  // weights necessary for sortAndBin but are ignored below (TODO)
465      SortAndBin(x, y, w, out x, out y, out w, scaling: false);
466      int n = x.Length;
467
468      SparseMatrix delta = new SparseMatrix(n - 2, n);
469      // double[,] delta = new double[n - 2, n];
470      //double[,] W = new double[n - 2, n - 2];
471      SparseMatrix W = new SparseMatrix(n - 2, n - 2);
472      Matrix WInvD = new DenseMatrix(n - 2, n);
473
474      // double[,] W_inv_D = new double[n - 2, n];
475      // double[,] K = new double[n, n];
476
477      // go over successive knots to determine distances and fill Delta and W
478      for (int i = 0; i < n - 2; i++) {
479        double h = x[i + 1] - x[i];
480        double h_next = x[i + 2] - x[i + 1];
481        delta[i, i] = 1.0 / h;
482        delta[i, i + 1] = -1.0 / h - 1.0 / h_next;
483        delta[i, i + 2] = 1.0 / h_next;
484        W[i, i] = (h + h_next) / 3;
485        if (i > 0) {
486          W[i - 1, i] =
487           W[i, i - 1] = h / 6.0;
488        }
489      }
490
491      // WInvD = W.Cholesky().Solve(delta);
492      var solvResult = W.TrySolveIterative(delta, WInvD, new MlkBiCgStab());
493
494      // alglib.ablas.rmatrixgemm(n - 2, n, n - 2, 1.0, W, 0, 0, 0, delta, 0, 0, 0, 1.0, W_inv_D, 0, 0); // W^-1(M = n-2, K = n-2) D(K = n-2, N=n)   
495      // alglib.ablas.rmatrixgemm(n, n, n - 2, 1.0, delta, 0, 0, 1, W_inv_D, 0, 0, 0, 1.0, K, 0, 0); // D(M=n-2, K=n)^T * W^-1D (K=n, N=n-2)
496
497      var K = delta.TransposeThisAndMultiply(WInvD);
498
499      double lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value;
500
501      for (int i = 0; i < n; i++) {
502        for (int j = 0; j < n; j++) {
503          K[i, j] *= lambda;
504          if (i == j) K[i, j] += 1;
505        }
506      }
507
508      // solve for y
509      // double[] s;
510      // int solverInfo;
511      // alglib.densesolverreport solverRep;
512      // alglib.rmatrixsolve(K, n, y, out solverInfo, out solverRep, out s);
513
514      var s = K.Solve(new DenseVector(y)).ToArray();
515
516      Results.Add(new Result("Solution", new RegressionSolution(new SmoothingSplineModel(s, x, Problem.ProblemData.TargetVariable, inputVars),
517  (IRegressionProblemData)Problem.ProblemData.Clone())));
518    }
519
520    private static void SortAndBin(double[] x, double[] y, double[] w, out double[] x2, out double[] y2, out double[] w2, bool scaling = false) {
521      var sortedIdx = Enumerable.Range(0, x.Length).ToArray();
522      // sort by x
523      Array.Sort(x, sortedIdx);
524
525      var xl = new List<double>();
526      var yl = new List<double>();
527      var wl = new List<double>();
528
529      int n = x.Length;
530      var range = x[n - 1] - x[0];
531      var binSize = range / n / 20;
532      {
533        // binning
534        int i = 0;
535        while (i < n) {
536          int k = 0;
537          int j = i;
538          double sumX = 0.0;
539          double sumY = 0.0;
540          double sumW = 0.0;
541          while (j < n && x[j] - x[i] <= binSize) {
542            k++;
543            sumX += x[j];
544            sumY += y[sortedIdx[j]];
545            sumW += w[sortedIdx[j]];
546            j++;
547          }
548          var avgX = sumX / k;
549          if (scaling) avgX = (avgX - x[0]) / range;
550          xl.Add(avgX);
551          yl.Add(sumY / k);
552          wl.Add(sumW);
553          i += k;
554        }
555      }
556
557      x2 = xl.ToArray();
558      y2 = yl.ToArray();
559      w2 = wl.ToArray();
560    }
561
562    private void AddAlglibSplineResult(alglib.spline1dinterpolant c, string[] inputVars) {
563      Results.Add(new Result("Solution", new RegressionSolution(new Spline1dModel(c, Problem.ProblemData.TargetVariable, inputVars),
564        (IRegressionProblemData)Problem.ProblemData.Clone())));
565
566    }
567    private void AddMathNetSplineResult(IInterpolation c, string[] inputVars) {
568      Results.Add(new Result("Solution", new RegressionSolution(new MathNetSplineModel(c, Problem.ProblemData.TargetVariable, inputVars),
569        (IRegressionProblemData)Problem.ProblemData.Clone())));
570    }
571  }
572
573
574  // array with non-zero lower index
575  internal class MyArray<T> {
576    private T[] arr;
577    private int lowerBound;
578
579    public int Length { get { return arr.Length; } }
580
581    public T this[int key] {
582      get {
583        return arr[key - lowerBound];
584      }
585      set {
586        arr[key - lowerBound] = value;
587      }
588    }
589
590    public MyArray(int lowerBound, int numElements) {
591      this.lowerBound = lowerBound;
592      arr = new T[numElements];
593    }
594    public MyArray(int lowerBound, T[] source) : this(lowerBound, source.Length) {
595      Array.Copy(source, arr, source.Length);
596    }
597
598    public T[] ToArray() {
599      var res = new T[arr.Length];
600      Array.Copy(arr, res, res.Length);
601      return res;
602    }
603  }
604
605
606  // UNFINISHED
607  internal class ReinschSmoothingSplineModel : NamedItem, IRegressionModel {
608    private MyArray<double> a;
609    private MyArray<double> b;
610    private MyArray<double> c;
611    private MyArray<double> d;
612    private MyArray<double> x;
613    private double offset;
614    private double scale;
615
616    public string TargetVariable { get; set; }
617
618    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
619
620    public event EventHandler TargetVariableChanged;
621
622    public ReinschSmoothingSplineModel(ReinschSmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
623      this.TargetVariable = orig.TargetVariable;
624      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
625      this.a = orig.a;
626      this.b = orig.b;
627      this.c = orig.c;
628      this.d = orig.d;
629      this.x = orig.x;
630      this.scale = orig.scale;
631      this.offset = orig.offset;
632    }
633    public ReinschSmoothingSplineModel(
634      MyArray<double> a,
635      MyArray<double> b,
636      MyArray<double> c,
637      MyArray<double> d,
638      MyArray<double> x,
639      string targetVar, string[] inputs, double offset = 0, double scale = 1) : base("SplineModel", "SplineModel") {
640      this.a = a;
641      this.b = b;
642      this.c = c;
643      this.d = d;
644      this.x = x;
645      this.TargetVariable = targetVar;
646      this.VariablesUsedForPrediction = inputs;
647      this.scale = scale;
648      this.offset = offset;
649
650      // extrapolate for xx > x[n2]
651      b[b.Length] = b[b.Length - 1];
652      d[b.Length] = d[d.Length - 1];
653    }
654
655    public override IDeepCloneable Clone(Cloner cloner) {
656      return new ReinschSmoothingSplineModel(this, cloner);
657    }
658
659    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
660      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
661    }
662
663    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
664      int n = x.Length;
665      foreach (var xx in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).Select(xi => (xi - offset) * scale)) {
666        if (xx <= x[1]) {
667          double h = xx - x[1];
668          yield return a[1] + h * (b[1] + h * (c[1] + h * d[1]));
669        } else if (xx >= x[n]) {
670          double h = xx - x[n];
671          yield return a[n] + h * (b[n] + h * (c[n] + h * d[n]));
672        } else {
673          // binary search
674          int lower = 1;
675          int upper = n;
676          while (true) {
677            if (upper < lower) throw new InvalidProgramException();
678            int i = lower + (upper - lower) / 2;
679            if (x[i] <= xx && xx < x[i + 1]) {
680              double h = xx - x[i];
681              yield return a[i] + h * (b[i] + h * (c[i] + h * d[i]));
682              break;
683            } else if (xx < x[i]) {
684              upper = i - 1;
685            } else {
686              lower = i + 1;
687            }
688          }
689        }
690      }
691    }
692  }
693
694  // UNFINISHED
695  public class SmoothingSplineModel : NamedItem, IRegressionModel {
696    private double[] s;
697    private IInterpolation interpolation;
698
699    public string TargetVariable { get; set; }
700
701    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
702
703    public event EventHandler TargetVariableChanged;
704
705    public SmoothingSplineModel(SmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
706      this.TargetVariable = orig.TargetVariable;
707      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
708      this.s = orig.s; // TODO
709      this.interpolation = orig.interpolation;
710    }
711    public SmoothingSplineModel(double[] s, double[] x, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
712      this.s = s;
713      this.TargetVariable = targetVar;
714      this.VariablesUsedForPrediction = inputs;
715      this.interpolation = MathNet.Numerics.Interpolate.CubicSpline(x, s);
716    }
717
718    public override IDeepCloneable Clone(Cloner cloner) {
719      return new SmoothingSplineModel(this, cloner);
720    }
721
722    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
723      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
724    }
725
726    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
727      foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
728
729        yield return interpolation.Interpolate(x);
730
731      }
732    }
733  }
734
735  // UNFINISHED
736  public class Spline1dModel : NamedItem, IRegressionModel {
737    private alglib.spline1dinterpolant interpolant;
738
739    public string TargetVariable { get; set; }
740
741    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
742
743    public event EventHandler TargetVariableChanged;
744
745    public Spline1dModel(Spline1dModel orig, Cloner cloner) : base(orig, cloner) {
746      this.TargetVariable = orig.TargetVariable;
747      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
748      this.interpolant = (alglib.spline1dinterpolant)orig.interpolant.make_copy();
749    }
750    public Spline1dModel(alglib.spline1dinterpolant interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
751      this.interpolant = interpolant;
752      this.TargetVariable = targetVar;
753      this.VariablesUsedForPrediction = inputs;
754    }
755
756    public override IDeepCloneable Clone(Cloner cloner) {
757      return new Spline1dModel(this, cloner);
758    }
759
760    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
761      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
762    }
763
764    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
765      foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
766        yield return alglib.spline1dcalc(interpolant, x);
767      }
768    }
769  }
770
771
772  // UNFINISHED
773  public class MathNetSplineModel : NamedItem, IRegressionModel {
774    private IInterpolation interpolant;
775
776    public string TargetVariable { get; set; }
777
778    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
779
780    public event EventHandler TargetVariableChanged;
781
782    public MathNetSplineModel(MathNetSplineModel orig, Cloner cloner) : base(orig, cloner) {
783      this.TargetVariable = orig.TargetVariable;
784      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
785      this.interpolant = orig.interpolant; // TODO COPY!
786    }
787    public MathNetSplineModel(IInterpolation interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
788      this.interpolant = interpolant;
789      this.TargetVariable = targetVar;
790      this.VariablesUsedForPrediction = inputs;
791    }
792
793    public override IDeepCloneable Clone(Cloner cloner) {
794      return new MathNetSplineModel(this, cloner);
795    }
796
797    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
798      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
799    }
800
801    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
802      foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
803        yield return interpolant.Interpolate(x);
804      }
805    }
806  }
807}
Note: See TracBrowser for help on using the repository browser.