Free cookie consent management tool by TermsFeed Policy Generator

source: branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/Splines.cs @ 15365

Last change on this file since 15365 was 15365, checked in by gkronber, 7 years ago

#2789 experiments with spline smoothing and additive models

File size: 29.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Concurrent;
24using System.Collections.Generic;
25using System.Linq;
26using System.Threading;
27using System.Threading.Tasks;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Data;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
32using HeuristicLab.Optimization;
33using HeuristicLab.Parameters;
34using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
35using HeuristicLab.Problems.DataAnalysis;
36using HeuristicLab.Problems.DataAnalysis.Symbolic;
37using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
38using HeuristicLab.Random;
39using MathNet.Numerics.Interpolation;
40using MathNet.Numerics.LinearAlgebra.Double;
41using MathNet.Numerics.LinearAlgebra.Double.Solvers;
42
43namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
44  [Item("Splines", "")]
45  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 102)]
46  [StorableClass]
47  public sealed class Splines : FixedDataAnalysisAlgorithm<IRegressionProblem> {
48    [StorableConstructor]
49    private Splines(bool deserializing) : base(deserializing) { }
50    [StorableHook(HookType.AfterDeserialization)]
51    private void AfterDeserialization() {
52    }
53
54    private Splines(Splines original, Cloner cloner)
55      : base(original, cloner) {
56    }
57    public override IDeepCloneable Clone(Cloner cloner) {
58      return new Splines(this, cloner);
59    }
60
61    public Splines()
62      : base() {
63      var validTypes = new ItemSet<StringValue>(
64        new[] {
65        "Monotone", "Akima", "Catmull-Rom", "Cubic", "Linear",
66          "Cubic - Natural (Math.NET)",
67          "Polynomial (Math.NET)",
68          "Rational (Math.NET)",
69          "LogLinear (Math.NET)",
70          "Common (Math.NET)",
71          "Smoothing Spline (Mine)",
72          "Smoothing Spline (Reinsch)",
73          "Smoothing Spline (Reinsch with automatic tolerance determination using CV)",
74          "B-Spline Smoothing"
75      }.Select(s => new StringValue(s)));
76
77      Parameters.Add(new ConstrainedValueParameter<StringValue>("Type", "The type of spline (as supported by alglib)", validTypes, validTypes.First()));
78      Parameters.Add(new ValueParameter<DoubleValue>("Lambda", "Regularization parameter for smoothing splines (0..+inf)", new DoubleValue(100)));
79      Parameters.Add(new ValueParameter<DoubleValue>("StdDev (noise)", "Known error in y values. Used only be Reinsch Smoothing Splines", new DoubleValue(0.1)));
80      Problem = new RegressionProblem();
81    }
82
83
84    protected override void Run(CancellationToken cancellationToken) {
85      double[,] inputMatrix = Problem.ProblemData.Dataset.ToArray(Problem.ProblemData.AllowedInputVariables.Concat(new string[] { Problem.ProblemData.TargetVariable }), Problem.ProblemData.TrainingIndices);
86      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
87        throw new NotSupportedException("Splines does not support NaN or infinity values in the input dataset.");
88
89      var inputVars = Problem.ProblemData.AllowedInputVariables.ToArray();
90      if (inputVars.Length > 3) throw new ArgumentException();
91
92      var y = Problem.ProblemData.TargetVariableTrainingValues.ToArray();
93      if (inputVars.Length == 1) {
94
95        var x = Problem.ProblemData.Dataset.GetDoubleValues(inputVars.First(), Problem.ProblemData.TrainingIndices).ToArray();
96        alglib.spline1dinterpolant c;
97        var type = ((StringValue)Parameters["Type"].ActualValue).Value;
98        switch (type) {
99          case "Monotone":
100            alglib.spline1dbuildmonotone(x, y, out c);
101            AddAlglibSplineResult(c, inputVars);
102            break;
103          case "Akima":
104            alglib.spline1dbuildakima(x, y, out c); AddAlglibSplineResult(c, inputVars);
105            break;
106            ;
107          case "Catmull-Rom":
108            alglib.spline1dbuildcatmullrom(x, y, out c); AddAlglibSplineResult(c, inputVars);
109            break;
110
111          case "Cubic":
112            alglib.spline1dbuildcubic(x, y, out c); AddAlglibSplineResult(c, inputVars);
113            break;
114
115          case "Linear":
116            alglib.spline1dbuildlinear(x, y, out c); AddAlglibSplineResult(c, inputVars);
117            break;
118          case "Cubic - Natural (Math.NET)": {
119              var spline = MathNet.Numerics.Interpolation.CubicSpline.InterpolateNatural(x, y);
120              AddMathNetSplineResult(spline, inputVars);
121              break;
122            }
123          case "Common (Math.NET)": {
124              var spline = MathNet.Numerics.Interpolate.Common(x, y);
125              AddMathNetSplineResult(spline, inputVars);
126              break;
127            }
128          case "LogLinear (Math.NET)": {
129              var spline = MathNet.Numerics.Interpolate.LogLinear(x, y);
130              AddMathNetSplineResult(spline, inputVars);
131              break;
132            }
133          case "Polynomial (Math.NET)": {
134              var spline = MathNet.Numerics.Interpolate.Polynomial(x, y);
135              AddMathNetSplineResult(spline, inputVars);
136              break;
137            }
138          case "Rational (Math.NET)": {
139              var spline = MathNet.Numerics.Interpolate.RationalWithoutPoles(x, y);
140              AddMathNetSplineResult(spline, inputVars);
141              break;
142            }
143          case "Smoothing Spline (Mine)": {
144              CalculateSmoothingSpline(x, y, inputVars);
145              break;
146            }
147          case "Smoothing Spline (Reinsch)": {
148              CalculateSmoothingSplineReinsch(x, y, inputVars);
149              break;
150            }
151          case "Smoothing Spline (Reinsch with automatic tolerance determination using CV)": {
152              var model = CalculateSmoothingSplineReinschWithAutomaticTolerance(x, y, inputVars, Problem.ProblemData.TargetVariable);
153              Results.Add(new Result("Solution", new RegressionSolution(model, (IRegressionProblemData)Problem.ProblemData.Clone())));
154              break;
155            }
156          case "B-Spline Smoothing": {
157              BSplineSmoothing(x, y, inputVars);
158              break;
159            }
160          default: throw new NotSupportedException();
161        }
162
163      }
164    }
165
166    public void BSplineSmoothing(double[] x, double[] y, string[] inputVars) {
167      Array.Sort(x, y);
168
169      CubicSpline2(x, y, inputVars);
170
171      //  // see Elements of Statistical Learning Appendix: Computations for Splines
172      //  int M = 4; // order of spline (cubic)
173      //  int K = x.Length;
174      //  int n = x.Length;
175      //
176      //  double[] t = new double[K + 2 * M];
177      //  for (int i = 0; i < M; i++) t[i] = x[0];
178      //  for (int i = K; i < K + 2 * M; i++) t[i] = x[K - 1];
179      //
180      //  double[,] B = new double[n, n + 4]; // basis function matrix
181      //  double[,] W = new double[n + 4, n + 4]; // penalty matrix (W[j,k] =
182      //  for (int i = 0; i < M; i++) B[i] = new double[K + 2 * M];
183      //  for (int j = 0; j < K; j++) {
184      //    for (int i = 0; i < K + 2M - 1; i++) {
185      //      if (t[i] <= x[j] && x[j] < t[i + 1])
186      //        B[1][i] = 1.0;
187      //    }
188      //  }
189
190
191    }
192
193    private struct SplineData {
194      public double a, b, c, d, x, y;
195    };
196
197
198    /*
199     * The procedure Quincunx, takes as arguments
200       the vectors u, v and w which are respectively the diagonal, the first supradiagonal
201       and the second supradiagonal of the banded matrix. The vector on
202       the LHS of the equation (80) is placed in q which contains the solution on the
203       completion of the procedure.
204    */
205    private void Quincunx(int n, MyArray<double> u, MyArray<double> v, MyArray<double> w, MyArray<double> q) {
206      // { factorisation}
207      u[-1] = 0;
208      u[0] = 0;
209      for (int j = 1; j <= n - 1; j++) {
210        u[j] = u[j] - u[j - 2] * Math.Pow(w[j - 2], 2) - u[j - 1] * Math.Pow(v[j - 1], 2);
211        v[j] = (v[j] - u[j - 1] * v[j - 1] * w[j - 1]) / u[j];
212        w[j] = w[j] / u[j];
213      }
214      // { forward substitution}
215      for (int j = 1; j <= n - 1; j++) {
216        q[j] = q[j] - v[j - 1] * q[j - 1] - w[j - 2] * q[j - 2];
217      }
218      for (int j = 1; j < n - 1; j++) {
219        q[j] = q[j] / u[j];
220      }
221      // { back substitution}
222      q[n + 1] = 0;
223      q[n] = 0;
224      for (int j = n - 1; j >= 1; j--) {
225        q[j] = q[j] - v[j] * q[j + 1] - w[j] * q[j + 2];
226      }
227    }
228
229    public void CubicSpline2(double[] x, double[] y, string[] inputVars) {
230      // see Pollock: Smoothing Splines
231      int n = x.Length;
232      double lambda = 1.0;
233      double[] sigma = Enumerable.Repeat(1.0, n).ToArray();
234      SplineData[] S = new SplineData[x.Length + 1];
235      for (int i = 0; i < x.Length; i++) {
236        S[i].x = x[i];
237        S[i].y = y[i];
238      }
239      S[x.Length].x = S[x.Length - 1].x;
240      S[x.Length].y = S[x.Length - 1].y;
241
242      // var
243      double[] h = new double[n],
244        r = new double[n],
245        f = new double[n],
246        p = new double[n];
247      var q = new MyArray<double>(-1, n + 3);
248      var u = new MyArray<double>(-1, n + 3);
249      var v = new MyArray<double>(-1, n + 3);
250      var w = new MyArray<double>(-1, n + 3);
251      double mu;
252
253      mu = 2 * (1 - lambda) / (3 * lambda);
254      h[0] = S[1].x - S[0].x;
255      r[0] = 3 / h[0];
256      for (int i = 1; i < n - 1; i++) {
257        h[i] = S[i + 1].x - S[i].x;
258        r[i] = 3 / h[i];
259        f[i] = -(r[i - 1] + r[i]);
260        p[i] = 2 * (S[i + 1].x - S[i - 1].x);
261        q[i] = 3 * (S[i + 1].y - S[i].y) / h[i] - 3 * (S[i].y - S[i - 1].y) / h[i - 1];
262      }
263      for (int i = 1; i < n; i++) {
264        u[i] = Math.Pow(r[i - 1], 2) * sigma[i - 1] + Math.Pow(f[i], 2) * sigma[i] + Math.Pow(r[i], 2) * sigma[i + 1];    // TODO Sigma 1..n
265        u[i] = mu * u[i] + p[i];
266        v[i] = f[i] * r[i] * sigma[i] + r[i] * f[i + 1] * sigma[i + 1];
267        v[i] = mu * v[i] + h[i];
268        w[i] = mu * r[i] * r[i + 1] * sigma[i + 1];
269      }
270      Quincunx(n, u, v, w, q);
271      // { Spline P arameters}
272      S[0].d = S[0].y - mu * r[0] * q[1] * sigma[0];
273      S[1].d = S[1].y - mu * (f[1] * q[1] + r[1] * q[2]) * sigma[0];
274      S[0].a = q[1] / (3 * h[0]);
275      S[0].b = 0;
276      S[0].c = (S[1].d - S[0].d) / h[0] - q[1] * h[0] / 3;
277      r[0] = 0;
278      for (int j = 1; j < n; j++) {
279        S[j].a = (q[j + 1] - q[j]) / (3 * h[j]);
280        S[j].b = q[j];
281        S[j].c = (q[j] + q[j - 1]) * h[j - 1] + S[j - 1].c;
282        S[j].d = r[j - 1] * q[j - 1] + f[j] * q[j] + r[j] * q[j + 1];
283        S[j].d = y[j] - mu * S[j].d * sigma[j];
284      }
285
286      //   { SmoothingSpline}
287    }
288
289    public void CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars) {
290      double s = xOrig.Length + 1;
291      double stdTol = ((IValueParameter<DoubleValue>)Parameters["StdDev (noise)"]).Value.Value; // controls extent of smoothing
292
293      var model = CalculateSmoothingSplineReinsch(xOrig, yOrig, inputVars, stdTol, s, Problem.ProblemData.TargetVariable);
294
295      Results.Add(new Result("Solution", new RegressionSolution(model, (IRegressionProblemData)Problem.ProblemData.Clone())));
296
297
298    }
299
300
301
302    public static IRegressionModel CalculateSmoothingSplineReinschWithAutomaticTolerance(double[] xOrig, double[] yOrig, string[] inputVars, string targetVar) {
303      int n = xOrig.Length;
304      var rows = Enumerable.Range(0, n);
305      var rand = new FastRandom(1234);
306      var trainRows = rows.Shuffle(rand).Take((int)(n * 0.66)).ToArray();
307      var testRows = rows.Except(trainRows).ToArray();
308
309      var trainX = trainRows.Select(i => xOrig[i]).ToArray();
310      var trainY = trainRows.Select(i => yOrig[i]).ToArray();
311      var testX = testRows.Select(i => xOrig[i]).ToArray();
312      var testY = testRows.Select(i => yOrig[i]).ToArray();
313
314      var shuffledDs = new Dataset(inputVars.Concat(new string[] { targetVar }), new[] { trainX.Concat(testX).ToArray(), trainY.Concat(testY).ToArray() });
315      var shuffeledProblemData = new RegressionProblemData(shuffledDs, inputVars, targetVar);
316      shuffeledProblemData.TrainingPartition.Start = 0;
317      shuffeledProblemData.TrainingPartition.End = trainX.Length;
318      shuffeledProblemData.TestPartition.Start = trainX.Length;
319      shuffeledProblemData.TestPartition.End = shuffledDs.Rows + 1;
320
321      double curTol = trainY.StandardDeviation();
322      double prevTestRMSE = double.PositiveInfinity;
323      double testRMSE = double.PositiveInfinity;
324      IRegressionModel prevModel = null;
325      IRegressionModel model = null;
326      do {                                     
327        prevModel = model;
328        prevTestRMSE = testRMSE;
329        model = CalculateSmoothingSplineReinsch(trainX, trainY, inputVars, curTol, s: trainX.Length + 1, targetVar: targetVar);
330
331        var sol = model.CreateRegressionSolution(shuffeledProblemData);
332        var trainRMSE = sol.TrainingRootMeanSquaredError;
333        testRMSE = sol.TestRootMeanSquaredError;
334        curTol = Math.Max(curTol * 0.5, 1e-12 * trainY.StandardDeviation());
335      } while (testRMSE < prevTestRMSE);
336      return prevModel;
337    }
338
339
340    public static IRegressionModel CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars, double stdTol, double s, string targetVar) {
341      var minX = xOrig.Min();
342      var maxX = xOrig.Max();
343      var range = maxX - minX;
344
345      double[] w = Enumerable.Repeat(stdTol, xOrig.Length).ToArray();
346      SortAndBin((double[])xOrig.Clone(), (double[])yOrig.Clone(), w, out xOrig, out yOrig, out w, scaling: false);
347
348      // See Smoothing by Spline Functions, Reinsch, 1967
349      // move x and y into an array indexed with 1..n to match indexing in Reinsch paper
350      int n = xOrig.Length;
351      var x = new MyArray<double>(1, xOrig);
352      var y = new MyArray<double>(1, yOrig);
353      var inv_dy = new MyArray<double>(1, w);
354
355      int n1 = 1;
356      int n2 = n;
357
358      // results
359      var a = new MyArray<double>(n1, n);
360      var b = new MyArray<double>(n1, n);
361      var c = new MyArray<double>(n1, n);
362      var d = new MyArray<double>(n1, n);
363
364      // smooth
365      {
366        int i, m1, m2; double e, f, f2, g = 0.0, h, p;
367        MyArray<double>
368          r = new MyArray<double>(n1 - 1, n + 2),
369          r1 = new MyArray<double>(n1 - 1, n + 2),
370          r2 = new MyArray<double>(n1 - 1, n + 2),
371          t = new MyArray<double>(n1 - 1, n + 2),
372          t1 = new MyArray<double>(n1 - 1, n + 2),
373          u = new MyArray<double>(n1 - 1, n + 2),
374          v = new MyArray<double>(n1 - 1, n + 2);
375        m1 = n1 - 1;
376        m2 = n2 + 1;
377        r[m1] = r[n1] = r1[n2] = r2[n2] = r2[m2] =
378          u[m1] = u[n1] = u[n2] = u[m2] = p = 0.0;
379        m1 = n1 + 1;
380        m2 = n2 - 1;
381        h = x[m1] - x[n1]; f = (y[m1] - y[n1]) / h;
382        for (i = m1; i <= m2; i++) {
383          g = h; h = x[i + 1] - x[i];
384          e = f; f = (y[i + 1] - y[i]) / h;
385          a[i] = f - e; t[i] = 2 * (g + h) / 3; t1[i] = h / 3;
386          r2[i] = inv_dy[i - 1] / g; r[i] = inv_dy[i + 1] / h;
387          r1[i] = -inv_dy[i] / g - inv_dy[i] / h;
388        }
389        for (i = m1; i <= m2; i++) {
390          b[i] = r[i] * r[i] + r1[i] * r1[i] + r2[i] * r2[i];
391          c[i] = r[i] * r1[i + 1] + r1[i] * r2[i + 1];
392          d[i] = r[i] * r2[i + 2];
393        }
394        f2 = -s;
395        next:
396        for (i = m1; i <= m2; i++) {
397          r1[i - 1] = f * r[i - 1]; r2[i - 2] = g * r[i - 2];
398          r[i] = 1 / (p * b[i] + t[i] - f * r1[i - 1] - g * r2[i - 2]);
399          u[i] = a[i] - r1[i - 1] * u[i - 1] - r2[i - 2] * u[i - 2];
400          f = p * c[i] + t1[i] - h * r1[i - 1]; g = h; h = d[i] * p;
401        }
402        for (i = m2; i >= m1; i--) {
403          u[i] = r[i] * u[i] - r1[i] * u[i + 1] - r2[i] * u[i + 2];
404        }
405        e = h = 0;
406        for (i = n1; i <= m2; i++) {
407          g = h; h = (u[i + 1] - u[i]) / (x[i + 1] - x[i]);
408          v[i] = (h - g) * inv_dy[i] * inv_dy[i]; e = e + v[i] * (h - g);
409        }
410        g = v[n2] = -h * inv_dy[n2] * inv_dy[n2]; e = e - g * h;
411        g = f2; f2 = e * p * p;
412        if (f2 >= s || f2 <= g) goto fin;
413        f = 0; h = (v[m1] - v[n1]) / (x[m1] - x[n1]);
414        for (i = m1; i <= m2; i++) {
415          g = h; h = (v[i + 1] - v[i]) / (x[i + 1] - x[i]);
416          g = h - g - r1[i - 1] * r[i - 1] - r2[i - 2] * r[i - 2];
417          f = f + g * r[i] * g; r[i] = g;
418        }
419        h = e - p * f; if (h <= 0) goto fin;
420        p = p + (s - f2) / ((Math.Sqrt(s / e) + p) * h); goto next;
421
422        fin:
423        for (i = n1; i <= n2; i++) {
424          a[i] = y[i] - p * v[i];
425          c[i] = u[i];
426        }
427        for (i = n1; i <= m2; i++) {
428          h = x[i + 1] - x[i];
429          d[i] = (c[i + 1] - c[i]) / (3 * h);
430          b[i] = (a[i + 1] - a[i]) / h - (h * d[i] + c[i]) * h;
431        }
432      }
433
434      return new ReinschSmoothingSplineModel(a, b, c, d, x, targetVar, inputVars);
435    }
436
437    private void CalculateSmoothingSpline(double[] x, double[] y, string[] inputVars) {
438      // see Smoothing and Non-Parametric Regression, Germán Rodríguez, 2001 2.3.1
439      double[] w = Enumerable.Repeat(1.0, x.Length).ToArray();  // weights necessary for sortAndBin but are ignored below (TODO)
440      SortAndBin(x, y, w, out x, out y, out w, scaling: false);
441      int n = x.Length;
442
443      SparseMatrix delta = new SparseMatrix(n - 2, n);
444      // double[,] delta = new double[n - 2, n];
445      //double[,] W = new double[n - 2, n - 2];
446      SparseMatrix W = new SparseMatrix(n - 2, n - 2);
447      Matrix WInvD = new DenseMatrix(n - 2, n);
448
449      // double[,] W_inv_D = new double[n - 2, n];
450      // double[,] K = new double[n, n];
451
452      // go over successive knots to determine distances and fill Delta and W
453      for (int i = 0; i < n - 2; i++) {
454        double h = x[i + 1] - x[i];
455        double h_next = x[i + 2] - x[i + 1];
456        delta[i, i] = 1.0 / h;
457        delta[i, i + 1] = -1.0 / h - 1.0 / h_next;
458        delta[i, i + 2] = 1.0 / h_next;
459        W[i, i] = (h + h_next) / 3;
460        if (i > 0) {
461          W[i - 1, i] =
462           W[i, i - 1] = h / 6.0;
463        }
464      }
465
466      // WInvD = W.Cholesky().Solve(delta);
467      var solvResult = W.TrySolveIterative(delta, WInvD, new MlkBiCgStab());
468
469      // alglib.ablas.rmatrixgemm(n - 2, n, n - 2, 1.0, W, 0, 0, 0, delta, 0, 0, 0, 1.0, W_inv_D, 0, 0); // W^-1(M = n-2, K = n-2) D(K = n-2, N=n)   
470      // alglib.ablas.rmatrixgemm(n, n, n - 2, 1.0, delta, 0, 0, 1, W_inv_D, 0, 0, 0, 1.0, K, 0, 0); // D(M=n-2, K=n)^T * W^-1D (K=n, N=n-2)
471
472      var K = delta.TransposeThisAndMultiply(WInvD);
473
474      double lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value;
475
476      for (int i = 0; i < n; i++) {
477        for (int j = 0; j < n; j++) {
478          K[i, j] *= lambda;
479          if (i == j) K[i, j] += 1;
480        }
481      }
482
483      // solve for y
484      // double[] s;
485      // int solverInfo;
486      // alglib.densesolverreport solverRep;
487      // alglib.rmatrixsolve(K, n, y, out solverInfo, out solverRep, out s);
488
489      var s = K.Solve(new DenseVector(y)).ToArray();
490
491      Results.Add(new Result("Solution", new RegressionSolution(new SmoothingSplineModel(s, x, Problem.ProblemData.TargetVariable, inputVars),
492  (IRegressionProblemData)Problem.ProblemData.Clone())));
493    }
494
495    private static void SortAndBin(double[] x, double[] y, double[] w, out double[] x2, out double[] y2, out double[] w2, bool scaling = false) {
496      var sortedIdx = Enumerable.Range(0, x.Length).ToArray();
497      // sort by x
498      Array.Sort(x, sortedIdx);
499
500      var xl = new List<double>();
501      var yl = new List<double>();
502      var wl = new List<double>();
503
504      int n = x.Length;
505      var range = x[n - 1] - x[0];
506      var binSize = range / n / 20;
507      {
508        // binning
509        int i = 0;
510        while (i < n) {
511          int k = 0;
512          int j = i;
513          double sumX = 0.0;
514          double sumY = 0.0;
515          double sumW = 0.0;
516          while (j < n && x[j] - x[i] <= binSize) {
517            k++;
518            sumX += x[j];
519            sumY += y[sortedIdx[j]];
520            sumW += w[sortedIdx[j]];
521            j++;
522          }
523          var avgX = sumX / k;
524          if (scaling) avgX = (avgX - x[0]) / range;
525          xl.Add(avgX);
526          yl.Add(sumY / k);
527          wl.Add(sumW);
528          i += k;
529        }
530      }
531
532      x2 = xl.ToArray();
533      y2 = yl.ToArray();
534      w2 = wl.ToArray();
535    }
536
537    private void AddAlglibSplineResult(alglib.spline1dinterpolant c, string[] inputVars) {
538      Results.Add(new Result("Solution", new RegressionSolution(new Spline1dModel(c, Problem.ProblemData.TargetVariable, inputVars),
539        (IRegressionProblemData)Problem.ProblemData.Clone())));
540
541    }
542    private void AddMathNetSplineResult(IInterpolation c, string[] inputVars) {
543      Results.Add(new Result("Solution", new RegressionSolution(new MathNetSplineModel(c, Problem.ProblemData.TargetVariable, inputVars),
544        (IRegressionProblemData)Problem.ProblemData.Clone())));
545    }
546  }
547
548
549  // array with non-zero lower index
550  internal class MyArray<T> {
551    private T[] arr;
552    private int lowerBound;
553
554    public int Length { get { return arr.Length; } }
555
556    public T this[int key] {
557      get {
558        return arr[key - lowerBound];
559      }
560      set {
561        arr[key - lowerBound] = value;
562      }
563    }
564
565    public MyArray(int lowerBound, int numElements) {
566      this.lowerBound = lowerBound;
567      arr = new T[numElements];
568    }
569    public MyArray(int lowerBound, T[] source) : this(lowerBound, source.Length) {
570      Array.Copy(source, arr, source.Length);
571    }
572
573    public T[] ToArray() {
574      var res = new T[arr.Length];
575      Array.Copy(arr, res, res.Length);
576      return res;
577    }
578  }
579
580
581  // UNFINISHED
582  internal class ReinschSmoothingSplineModel : NamedItem, IRegressionModel {
583    private MyArray<double> a;
584    private MyArray<double> b;
585    private MyArray<double> c;
586    private MyArray<double> d;
587    private MyArray<double> x;
588    private double offset;
589    private double scale;
590
591    public string TargetVariable { get; set; }
592
593    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
594
595    public event EventHandler TargetVariableChanged;
596
597    public ReinschSmoothingSplineModel(ReinschSmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
598      this.TargetVariable = orig.TargetVariable;
599      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
600      this.a = orig.a;
601      this.b = orig.b;
602      this.c = orig.c;
603      this.d = orig.d;
604      this.x = orig.x;
605      this.scale = orig.scale;
606      this.offset = orig.offset;
607    }
608    public ReinschSmoothingSplineModel(
609      MyArray<double> a,
610      MyArray<double> b,
611      MyArray<double> c,
612      MyArray<double> d,
613      MyArray<double> x,
614      string targetVar, string[] inputs, double offset = 0, double scale = 1) : base("SplineModel", "SplineModel") {
615      this.a = a;
616      this.b = b;
617      this.c = c;
618      this.d = d;
619      this.x = x;
620      this.TargetVariable = targetVar;
621      this.VariablesUsedForPrediction = inputs;
622      this.scale = scale;
623      this.offset = offset;
624
625      // extrapolate for xx > x[n2]
626      b[b.Length] = b[b.Length - 1];
627      d[b.Length] = d[d.Length - 1];
628    }
629
630    public override IDeepCloneable Clone(Cloner cloner) {
631      return new ReinschSmoothingSplineModel(this, cloner);
632    }
633
634    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
635      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
636    }
637
638    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
639      int n = x.Length;
640      foreach (var xx in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).Select(xi => (xi - offset) * scale)) {
641        if (xx <= x[1]) {
642          double h = xx - x[1];
643          yield return a[1] + h * (b[1] + h * (c[1] + h * d[1]));
644        } else if (xx >= x[n]) {
645          double h = xx - x[n];
646          yield return a[n] + h * (b[n] + h * (c[n] + h * d[n]));
647        } else {
648          // binary search
649          int lower = 1;
650          int upper = n;
651          while (true) {
652            if (upper < lower) throw new InvalidProgramException();
653            int i = lower + (upper - lower) / 2;
654            if (x[i] <= xx && xx < x[i + 1]) {
655              double h = xx - x[i];
656              yield return a[i] + h * (b[i] + h * (c[i] + h * d[i]));
657              break;
658            } else if (xx < x[i]) {
659              upper = i - 1;
660            } else {
661              lower = i + 1;
662            }
663          }
664        }
665      }
666    }
667  }
668
669  // UNFINISHED
670  public class SmoothingSplineModel : NamedItem, IRegressionModel {
671    private double[] s;
672    private IInterpolation interpolation;
673
674    public string TargetVariable { get; set; }
675
676    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
677
678    public event EventHandler TargetVariableChanged;
679
680    public SmoothingSplineModel(SmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
681      this.TargetVariable = orig.TargetVariable;
682      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
683      this.s = orig.s; // TODO
684      this.interpolation = orig.interpolation;
685    }
686    public SmoothingSplineModel(double[] s, double[] x, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
687      this.s = s;
688      this.TargetVariable = targetVar;
689      this.VariablesUsedForPrediction = inputs;
690      this.interpolation = MathNet.Numerics.Interpolate.CubicSpline(x, s);
691    }
692
693    public override IDeepCloneable Clone(Cloner cloner) {
694      return new SmoothingSplineModel(this, cloner);
695    }
696
697    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
698      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
699    }
700
701    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
702      foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
703
704        yield return interpolation.Interpolate(x);
705
706      }
707    }
708  }
709
710  // UNFINISHED
711  public class Spline1dModel : NamedItem, IRegressionModel {
712    private alglib.spline1dinterpolant interpolant;
713
714    public string TargetVariable { get; set; }
715
716    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
717
718    public event EventHandler TargetVariableChanged;
719
720    public Spline1dModel(Spline1dModel orig, Cloner cloner) : base(orig, cloner) {
721      this.TargetVariable = orig.TargetVariable;
722      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
723      this.interpolant = (alglib.spline1dinterpolant)orig.interpolant.make_copy();
724    }
725    public Spline1dModel(alglib.spline1dinterpolant interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
726      this.interpolant = interpolant;
727      this.TargetVariable = targetVar;
728      this.VariablesUsedForPrediction = inputs;
729    }
730
731    public override IDeepCloneable Clone(Cloner cloner) {
732      return new Spline1dModel(this, cloner);
733    }
734
735    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
736      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
737    }
738
739    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
740      foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
741        yield return alglib.spline1dcalc(interpolant, x);
742      }
743    }
744  }
745
746
747  // UNFINISHED
748  public class MathNetSplineModel : NamedItem, IRegressionModel {
749    private IInterpolation interpolant;
750
751    public string TargetVariable { get; set; }
752
753    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
754
755    public event EventHandler TargetVariableChanged;
756
757    public MathNetSplineModel(MathNetSplineModel orig, Cloner cloner) : base(orig, cloner) {
758      this.TargetVariable = orig.TargetVariable;
759      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
760      this.interpolant = orig.interpolant; // TODO COPY!
761    }
762    public MathNetSplineModel(IInterpolation interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
763      this.interpolant = interpolant;
764      this.TargetVariable = targetVar;
765      this.VariablesUsedForPrediction = inputs;
766    }
767
768    public override IDeepCloneable Clone(Cloner cloner) {
769      return new MathNetSplineModel(this, cloner);
770    }
771
772    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
773      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
774    }
775
776    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
777      foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
778        yield return interpolant.Interpolate(x);
779      }
780    }
781  }
782}
Note: See TracBrowser for help on using the repository browser.