Free cookie consent management tool by TermsFeed Policy Generator

source: branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/Splines.cs @ 15457

Last change on this file since 15457 was 15457, checked in by gkronber, 6 years ago

#2789 added Finbarr O'Sullivan smoothing spline code

File size: 36.3 KB
RevLine 
[15352]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
[15450]24using System.Diagnostics;
[15352]25using System.Linq;
26using System.Threading;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Data;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis;
[15365]34using HeuristicLab.Random;
[15362]35using MathNet.Numerics.Interpolation;
[15364]36using MathNet.Numerics.LinearAlgebra.Double;
37using MathNet.Numerics.LinearAlgebra.Double.Solvers;
[15352]38
39namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
40  [Item("Splines", "")]
41  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 102)]
42  [StorableClass]
43  public sealed class Splines : FixedDataAnalysisAlgorithm<IRegressionProblem> {
44    [StorableConstructor]
45    private Splines(bool deserializing) : base(deserializing) { }
46    [StorableHook(HookType.AfterDeserialization)]
47    private void AfterDeserialization() {
48    }
49
50    private Splines(Splines original, Cloner cloner)
51      : base(original, cloner) {
52    }
53    public override IDeepCloneable Clone(Cloner cloner) {
54      return new Splines(this, cloner);
55    }
56
57    public Splines()
58      : base() {
59      var validTypes = new ItemSet<StringValue>(
60        new[] {
[15362]61        "Monotone", "Akima", "Catmull-Rom", "Cubic", "Linear",
62          "Cubic - Natural (Math.NET)",
63          "Polynomial (Math.NET)",
64          "Rational (Math.NET)",
65          "LogLinear (Math.NET)",
66          "Common (Math.NET)",
[15364]67          "Smoothing Spline (Mine)",
68          "Smoothing Spline (Reinsch)",
[15442]69          "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)",
[15369]70          "B-Spline Smoothing",
[15449]71          "Penalized Regression Spline (alglib)",
[15457]72          "CUBGCV",
73          "Finnbar O'Sullivan (sbart)"
[15352]74      }.Select(s => new StringValue(s)));
75
[15449]76      Parameters.Add(new ConstrainedValueParameter<StringValue>("Type", "The type of spline (as supported by alglib)", validTypes, validTypes.Last()));
[15364]77      Parameters.Add(new ValueParameter<DoubleValue>("Lambda", "Regularization parameter for smoothing splines (0..+inf)", new DoubleValue(100)));
[15365]78      Parameters.Add(new ValueParameter<DoubleValue>("StdDev (noise)", "Known error in y values. Used only be Reinsch Smoothing Splines", new DoubleValue(0.1)));
[15352]79      Problem = new RegressionProblem();
80    }
81
82
83    protected override void Run(CancellationToken cancellationToken) {
84      double[,] inputMatrix = Problem.ProblemData.Dataset.ToArray(Problem.ProblemData.AllowedInputVariables.Concat(new string[] { Problem.ProblemData.TargetVariable }), Problem.ProblemData.TrainingIndices);
85      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
86        throw new NotSupportedException("Splines does not support NaN or infinity values in the input dataset.");
87
88      var inputVars = Problem.ProblemData.AllowedInputVariables.ToArray();
89      if (inputVars.Length > 3) throw new ArgumentException();
90
91      var y = Problem.ProblemData.TargetVariableTrainingValues.ToArray();
92      if (inputVars.Length == 1) {
93
94        var x = Problem.ProblemData.Dataset.GetDoubleValues(inputVars.First(), Problem.ProblemData.TrainingIndices).ToArray();
95        alglib.spline1dinterpolant c;
96        var type = ((StringValue)Parameters["Type"].ActualValue).Value;
97        switch (type) {
98          case "Monotone":
[15364]99            alglib.spline1dbuildmonotone(x, y, out c);
[15362]100            AddAlglibSplineResult(c, inputVars);
101            break;
[15352]102          case "Akima":
[15362]103            alglib.spline1dbuildakima(x, y, out c); AddAlglibSplineResult(c, inputVars);
104            break;
105            ;
[15352]106          case "Catmull-Rom":
[15362]107            alglib.spline1dbuildcatmullrom(x, y, out c); AddAlglibSplineResult(c, inputVars);
108            break;
109
[15352]110          case "Cubic":
[15362]111            alglib.spline1dbuildcubic(x, y, out c); AddAlglibSplineResult(c, inputVars);
112            break;
113
[15352]114          case "Linear":
[15362]115            alglib.spline1dbuildlinear(x, y, out c); AddAlglibSplineResult(c, inputVars);
116            break;
117          case "Cubic - Natural (Math.NET)": {
118              var spline = MathNet.Numerics.Interpolation.CubicSpline.InterpolateNatural(x, y);
119              AddMathNetSplineResult(spline, inputVars);
120              break;
121            }
122          case "Common (Math.NET)": {
123              var spline = MathNet.Numerics.Interpolate.Common(x, y);
124              AddMathNetSplineResult(spline, inputVars);
125              break;
126            }
127          case "LogLinear (Math.NET)": {
128              var spline = MathNet.Numerics.Interpolate.LogLinear(x, y);
129              AddMathNetSplineResult(spline, inputVars);
130              break;
131            }
132          case "Polynomial (Math.NET)": {
133              var spline = MathNet.Numerics.Interpolate.Polynomial(x, y);
134              AddMathNetSplineResult(spline, inputVars);
135              break;
136            }
137          case "Rational (Math.NET)": {
138              var spline = MathNet.Numerics.Interpolate.RationalWithoutPoles(x, y);
139              AddMathNetSplineResult(spline, inputVars);
140              break;
141            }
[15364]142          case "Smoothing Spline (Mine)": {
143              CalculateSmoothingSpline(x, y, inputVars);
144              break;
145            }
146          case "Smoothing Spline (Reinsch)": {
147              CalculateSmoothingSplineReinsch(x, y, inputVars);
148              break;
149            }
[15442]150          case "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)": {
151              double optTol, looRMSE;
152              var model = CalculateSmoothingSplineReinsch(x, y, inputVars, Problem.ProblemData.TargetVariable, out optTol, out looRMSE);
153              Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone())));
154              Results.Add(new Result("Optimal tolerance", new DoubleValue(optTol)));
155              Results.Add(new Result("RMSE (LOO-CV)", new DoubleValue(looRMSE)));
[15365]156              break;
157            }
[15449]158          case "CUBGCV": {
159              CubicSplineGCV.CubGcvReport report;
[15450]160              var model =
161                CubicSplineGCV.CalculateCubicSpline(x, y,
[15449]162                Problem.ProblemData.TargetVariable, inputVars, out report);
163              var targetVar = Problem.ProblemData.TargetVariable;
164              var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
165              Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
166              Results.Add(new Result("GCV", new DoubleValue(report.generalizedCrossValidation)));
167              Results.Add(new Result("Estimated error variance", new DoubleValue(report.estimatedErrorVariance)));
168              Results.Add(new Result("Estimated RSS degrees of freedom", new DoubleValue(report.estimatedRSSDegreesOfFreedom)));
[15450]169              Results.Add(new Result("Estimated true mean squared error at data points", new DoubleValue(report.estimatedTrueMeanSquaredErrorAtDataPoints)));
[15449]170              Results.Add(new Result("Mean square of DF", new DoubleValue(report.meanSquareOfDf)));
171              Results.Add(new Result("Mean square residual", new DoubleValue(report.meanSquareResiudal)));
172              Results.Add(new Result("Smoothing parameter (optimized for GCV)", new DoubleValue(report.smoothingParameter)));
173              break;
174            }
[15457]175          case "Finnbar O'Sullivan (sbart)": {
176              SBART.SBART_Report report;
177              var lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value; // controls extent of smoothing
178              var model =
179                SBART.CalculateSBART(x, y,
180                Problem.ProblemData.TargetVariable, inputVars, (float)lambda, out report);
181              var targetVar = Problem.ProblemData.TargetVariable;
182              var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
183              Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
184              Results.Add(new Result("GCV", new DoubleValue(report.gcv)));
185              Results.Add(new Result("Smoothing parameter (optimized for GCV)", new DoubleValue(report.smoothingParameter)));
186              break;
187
188            }
[15365]189          case "B-Spline Smoothing": {
190              BSplineSmoothing(x, y, inputVars);
191              break;
192            }
[15442]193          case "Penalized Regression Spline (alglib)": {
[15369]194              var lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value; // controls extent of smoothing
195              var model = CalculatePenalizedRegressionSpline(x, y, lambda, Problem.ProblemData.TargetVariable, inputVars);
196              var targetVar = Problem.ProblemData.TargetVariable;
197              var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
198              Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
199
200              break;
[15442]201            }
[15352]202          default: throw new NotSupportedException();
203        }
204
205      }
206    }
[15362]207
[15369]208
[15442]209    public static IRegressionModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars) {
[15369]210      int info;
211      alglib.spline1dinterpolant interpolant;
212      alglib.spline1dfitreport rep;
213      int n = x.Length;
214      n = (int)Math.Max(50, 3 * Math.Sqrt(n));
215
216      alglib.spline1dfitpenalized(x, y, n, lambda, out info, out interpolant, out rep);
217      return new Spline1dModel(interpolant, targetVar, inputVars);
218    }
219
[15442]220    public static IRegressionEnsembleModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars,
221      out double avgTrainRMSE,
[15457]222      out double cvRMSE, out double[] predictions) {
[15442]223      int info;
224      alglib.spline1dinterpolant interpolant;
225      alglib.spline1dfitreport rep;
226      int n = x.Length;
227      n = (int)Math.Max(50, 3 * Math.Sqrt(n));
[15364]228
[15450]229      int folds = 10;
230      int foldSize = x.Length <= 30 ? 1 : x.Length / folds;
231      if (foldSize == 1) folds = x.Length;
[15365]232
[15450]233      double[] x_loo = new double[(folds - 1) * foldSize];
234      double[] y_loo = new double[(folds - 1) * foldSize];
[15457]235      double[] w_loo = Enumerable.Repeat(1.0, x_loo.Length).ToArray();
[15450]236
[15442]237      var models = new List<IRegressionModel>();
[15450]238      var sumTrainSE = 0.0;
239      int nTrain = 0;
240      var sumTestSE = 0.0;
241      int nTest = 0;
[15365]242
[15457]243      predictions = new double[y.Length];
[15450]244
245      for (int i = 0; i < folds; i++) {
246        if (i < folds - 1) {
247          //Console.WriteLine("Copy from {0} to {1} n = {2}", (i + 1) * foldSize, i * foldSize, (folds - i - 1) * foldSize);
248          Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, (folds - i - 1) * foldSize);
249          Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, (folds - i - 1) * foldSize);
250        }
[15457]251        alglib.spline1dfitpenalizedw(x_loo, y_loo, w_loo, n, lambda, out info, out interpolant, out rep);
252        //Debug.Assert(y_loo.All(yi => yi != 0.0));
253        //Debug.Assert(x_loo.All(xi => xi != 0.0));
[15365]254
[15450]255        // training error
256        for (int j = 0; j < x_loo.Length; j++) {
[15457]257          var y_pred = alglib.spline1dcalc(interpolant, x[j]);
[15450]258          double res = y[j] - y_pred;
259          sumTrainSE += res * res;
260          nTrain++;
261        }
262        // test
263        for (int j = i * foldSize; j < (i + 1) * foldSize; j++) {
[15457]264          predictions[j] = alglib.spline1dcalc(interpolant, x[j]);
265          double res = y[j] - predictions[j];
[15450]266          sumTestSE += res * res;
267          nTest++;
268        }
269
270        models.Add(new Spline1dModel(interpolant, targetVar, inputVars));
271
272        if (i < folds - 1) {
273          //Console.WriteLine("Copy from {0} to {1} n = {2}", i * foldSize, i * foldSize, foldSize);
274          // add current fold for next iteration
275          Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize);
276          Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize);
277        }
[15442]278      }
279
[15450]280      cvRMSE = Math.Sqrt(sumTestSE / nTest);
281      avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain);
[15442]282
283      return new RegressionEnsembleModel(models);
[15365]284    }
285
[15442]286    public void BSplineSmoothing(double[] x, double[] y, string[] inputVars) {
287      Array.Sort(x, y);
288      CubicSpline2(x, y, inputVars);
289    }
290
[15365]291    private struct SplineData {
292      public double a, b, c, d, x, y;
293    };
294
295
296    /*
297     * The procedure Quincunx, takes as arguments
298       the vectors u, v and w which are respectively the diagonal, the first supradiagonal
299       and the second supradiagonal of the banded matrix. The vector on
300       the LHS of the equation (80) is placed in q which contains the solution on the
301       completion of the procedure.
302    */
303    private void Quincunx(int n, MyArray<double> u, MyArray<double> v, MyArray<double> w, MyArray<double> q) {
304      // { factorisation}
305      u[-1] = 0;
306      u[0] = 0;
307      for (int j = 1; j <= n - 1; j++) {
308        u[j] = u[j] - u[j - 2] * Math.Pow(w[j - 2], 2) - u[j - 1] * Math.Pow(v[j - 1], 2);
309        v[j] = (v[j] - u[j - 1] * v[j - 1] * w[j - 1]) / u[j];
310        w[j] = w[j] / u[j];
[15364]311      }
[15365]312      // { forward substitution}
313      for (int j = 1; j <= n - 1; j++) {
314        q[j] = q[j] - v[j - 1] * q[j - 1] - w[j - 2] * q[j - 2];
315      }
316      for (int j = 1; j < n - 1; j++) {
317        q[j] = q[j] / u[j];
318      }
319      // { back substitution}
320      q[n + 1] = 0;
321      q[n] = 0;
322      for (int j = n - 1; j >= 1; j--) {
323        q[j] = q[j] - v[j] * q[j + 1] - w[j] * q[j + 2];
324      }
325    }
[15364]326
[15365]327    public void CubicSpline2(double[] x, double[] y, string[] inputVars) {
328      // see Pollock: Smoothing Splines
329      int n = x.Length;
330      double lambda = 1.0;
331      double[] sigma = Enumerable.Repeat(1.0, n).ToArray();
332      SplineData[] S = new SplineData[x.Length + 1];
333      for (int i = 0; i < x.Length; i++) {
334        S[i].x = x[i];
335        S[i].y = y[i];
[15364]336      }
[15365]337      S[x.Length].x = S[x.Length - 1].x;
338      S[x.Length].y = S[x.Length - 1].y;
339
340      // var
341      double[] h = new double[n],
342        r = new double[n],
343        f = new double[n],
344        p = new double[n];
345      var q = new MyArray<double>(-1, n + 3);
346      var u = new MyArray<double>(-1, n + 3);
347      var v = new MyArray<double>(-1, n + 3);
348      var w = new MyArray<double>(-1, n + 3);
349      double mu;
350
351      mu = 2 * (1 - lambda) / (3 * lambda);
352      h[0] = S[1].x - S[0].x;
353      r[0] = 3 / h[0];
354      for (int i = 1; i < n - 1; i++) {
355        h[i] = S[i + 1].x - S[i].x;
356        r[i] = 3 / h[i];
357        f[i] = -(r[i - 1] + r[i]);
358        p[i] = 2 * (S[i + 1].x - S[i - 1].x);
359        q[i] = 3 * (S[i + 1].y - S[i].y) / h[i] - 3 * (S[i].y - S[i - 1].y) / h[i - 1];
[15364]360      }
[15365]361      for (int i = 1; i < n; i++) {
362        u[i] = Math.Pow(r[i - 1], 2) * sigma[i - 1] + Math.Pow(f[i], 2) * sigma[i] + Math.Pow(r[i], 2) * sigma[i + 1];    // TODO Sigma 1..n
363        u[i] = mu * u[i] + p[i];
364        v[i] = f[i] * r[i] * sigma[i] + r[i] * f[i + 1] * sigma[i + 1];
365        v[i] = mu * v[i] + h[i];
366        w[i] = mu * r[i] * r[i + 1] * sigma[i + 1];
367      }
368      Quincunx(n, u, v, w, q);
369      // { Spline P arameters}
370      S[0].d = S[0].y - mu * r[0] * q[1] * sigma[0];
371      S[1].d = S[1].y - mu * (f[1] * q[1] + r[1] * q[2]) * sigma[0];
372      S[0].a = q[1] / (3 * h[0]);
373      S[0].b = 0;
374      S[0].c = (S[1].d - S[0].d) / h[0] - q[1] * h[0] / 3;
375      r[0] = 0;
376      for (int j = 1; j < n; j++) {
377        S[j].a = (q[j + 1] - q[j]) / (3 * h[j]);
378        S[j].b = q[j];
379        S[j].c = (q[j] + q[j - 1]) * h[j - 1] + S[j - 1].c;
380        S[j].d = r[j - 1] * q[j - 1] + f[j] * q[j] + r[j] * q[j + 1];
381        S[j].d = y[j] - mu * S[j].d * sigma[j];
382      }
[15364]383
[15365]384      //   { SmoothingSpline}
[15364]385    }
386
[15442]387    public void CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars) {
388      double s = x.Length + 1;
[15365]389      double stdTol = ((IValueParameter<DoubleValue>)Parameters["StdDev (noise)"]).Value.Value; // controls extent of smoothing
[15364]390
[15442]391      var model = CalculateSmoothingSplineReinsch(x, y, inputVars, stdTol, s, Problem.ProblemData.TargetVariable);
[15364]392
[15442]393      Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone())));
394    }
[15364]395
396
[15442]397    public static IRegressionEnsembleModel CalculateSmoothingSplineReinsch(double[] x, double[] y, double tol, string targetVar, string[] inputVars,
398      out double avgTrainRMSE,
[15450]399      out double cvRMSE) {
[15442]400      int info;
[15364]401
[15450]402      int folds = 10;
403      int foldSize = x.Length <= 30 ? 1 : x.Length / folds;
404      if (foldSize == 1) folds = x.Length;
[15365]405
[15450]406      double[] x_loo = new double[(folds - 1) * foldSize];
407      double[] y_loo = new double[(folds - 1) * foldSize];
408
[15442]409      var models = new List<IRegressionModel>();
[15450]410      var sumTrainSE = 0.0;
411      int nTrain = 0;
412      var sumTestSE = 0.0;
413      int nTest = 0;
[15365]414
415
[15450]416      for (int i = 0; i < folds; i++) {
417        if (i < folds - 1) {
418          Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, x.Length - ((i + 1) * foldSize) - 1);
419          Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, y.Length - ((i + 1) * foldSize) - 1);
420        }
421        var model = CalculateSmoothingSplineReinsch(x_loo, y_loo, inputVars, tol, ((folds - 1) * foldSize) - 1, targetVar);
[15365]422
[15450]423        // training error
424        for(int j = 0;j<x_loo.Length;j++) {
425          var y_pred = model.GetEstimatedValue(x[j]);
426          double res = y[j] - y_pred;
427          sumTrainSE += res * res;
428          nTrain++;
429        }
430        // test
431        for (int j = i * foldSize; j < (i + 1) * foldSize; j++) {
432          var y_pred = model.GetEstimatedValue(x[j]);
433          double res = y[j] - y_pred;
434          sumTestSE += res * res;
435          nTest++;
436        }
[15365]437
[15442]438        models.Add(model);
[15365]439
[15450]440        if (i < folds - 1) {
441          // add current fold for next iteration
442          Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize);
443          Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize);
[15442]444        }
445      }
446
[15450]447      cvRMSE = Math.Sqrt(sumTestSE / nTest);
448      avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain);
[15442]449
450      return new RegressionEnsembleModel(models);
[15365]451    }
452
453
[15450]454    // automatically determines tolerance using loo cv, SLOW!
[15442]455    public static IRegressionModel CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars, string targetVar,
[15450]456      out double optTolerance, out double cvRMSE) {
[15442]457      var maxTolerance = y.StandardDeviation();
458      var minTolerance = maxTolerance * 1E-6;
459      optTolerance = maxTolerance;
460      var tol = maxTolerance;
[15450]461      cvRMSE = double.PositiveInfinity;
[15442]462      IRegressionModel bestModel = new ConstantModel(y.Average(), targetVar);
463      while (tol > minTolerance) {
[15450]464        double curCVRMSE;
[15442]465        double avgTrainRmse;
466        var model = Splines.CalculateSmoothingSplineReinsch(
[15450]467          x, y, tol, targetVar, inputVars, out avgTrainRmse, out curCVRMSE);
[15442]468
[15450]469        if (curCVRMSE < cvRMSE) {
470          cvRMSE = curCVRMSE;
[15442]471          optTolerance = tol;
472          bestModel = model;
473        }
474        tol *= 0.85;
475      }
476      return bestModel;
477    }
478
479    public static ReinschSmoothingSplineModel CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars, double stdTol, double s, string targetVar) {
[15364]480      var minX = xOrig.Min();
481      var maxX = xOrig.Max();
482      var range = maxX - minX;
483
[15365]484      double[] w = Enumerable.Repeat(stdTol, xOrig.Length).ToArray();
485      SortAndBin((double[])xOrig.Clone(), (double[])yOrig.Clone(), w, out xOrig, out yOrig, out w, scaling: false);
[15364]486
487      // See Smoothing by Spline Functions, Reinsch, 1967
488      // move x and y into an array indexed with 1..n to match indexing in Reinsch paper
489      int n = xOrig.Length;
490      var x = new MyArray<double>(1, xOrig);
491      var y = new MyArray<double>(1, yOrig);
[15365]492      var inv_dy = new MyArray<double>(1, w);
[15364]493
494      int n1 = 1;
[15365]495      int n2 = n;
[15364]496
497      // results
498      var a = new MyArray<double>(n1, n);
499      var b = new MyArray<double>(n1, n);
500      var c = new MyArray<double>(n1, n);
501      var d = new MyArray<double>(n1, n);
502
503      // smooth
504      {
505        int i, m1, m2; double e, f, f2, g = 0.0, h, p;
506        MyArray<double>
507          r = new MyArray<double>(n1 - 1, n + 2),
508          r1 = new MyArray<double>(n1 - 1, n + 2),
509          r2 = new MyArray<double>(n1 - 1, n + 2),
510          t = new MyArray<double>(n1 - 1, n + 2),
511          t1 = new MyArray<double>(n1 - 1, n + 2),
512          u = new MyArray<double>(n1 - 1, n + 2),
513          v = new MyArray<double>(n1 - 1, n + 2);
514        m1 = n1 - 1;
515        m2 = n2 + 1;
516        r[m1] = r[n1] = r1[n2] = r2[n2] = r2[m2] =
517          u[m1] = u[n1] = u[n2] = u[m2] = p = 0.0;
518        m1 = n1 + 1;
519        m2 = n2 - 1;
520        h = x[m1] - x[n1]; f = (y[m1] - y[n1]) / h;
521        for (i = m1; i <= m2; i++) {
522          g = h; h = x[i + 1] - x[i];
523          e = f; f = (y[i + 1] - y[i]) / h;
524          a[i] = f - e; t[i] = 2 * (g + h) / 3; t1[i] = h / 3;
525          r2[i] = inv_dy[i - 1] / g; r[i] = inv_dy[i + 1] / h;
526          r1[i] = -inv_dy[i] / g - inv_dy[i] / h;
527        }
528        for (i = m1; i <= m2; i++) {
529          b[i] = r[i] * r[i] + r1[i] * r1[i] + r2[i] * r2[i];
530          c[i] = r[i] * r1[i + 1] + r1[i] * r2[i + 1];
531          d[i] = r[i] * r2[i + 2];
532        }
533        f2 = -s;
534        next:
535        for (i = m1; i <= m2; i++) {
536          r1[i - 1] = f * r[i - 1]; r2[i - 2] = g * r[i - 2];
537          r[i] = 1 / (p * b[i] + t[i] - f * r1[i - 1] - g * r2[i - 2]);
538          u[i] = a[i] - r1[i - 1] * u[i - 1] - r2[i - 2] * u[i - 2];
539          f = p * c[i] + t1[i] - h * r1[i - 1]; g = h; h = d[i] * p;
540        }
541        for (i = m2; i >= m1; i--) {
542          u[i] = r[i] * u[i] - r1[i] * u[i + 1] - r2[i] * u[i + 2];
543        }
544        e = h = 0;
545        for (i = n1; i <= m2; i++) {
546          g = h; h = (u[i + 1] - u[i]) / (x[i + 1] - x[i]);
547          v[i] = (h - g) * inv_dy[i] * inv_dy[i]; e = e + v[i] * (h - g);
548        }
549        g = v[n2] = -h * inv_dy[n2] * inv_dy[n2]; e = e - g * h;
550        g = f2; f2 = e * p * p;
551        if (f2 >= s || f2 <= g) goto fin;
552        f = 0; h = (v[m1] - v[n1]) / (x[m1] - x[n1]);
553        for (i = m1; i <= m2; i++) {
554          g = h; h = (v[i + 1] - v[i]) / (x[i + 1] - x[i]);
555          g = h - g - r1[i - 1] * r[i - 1] - r2[i - 2] * r[i - 2];
556          f = f + g * r[i] * g; r[i] = g;
557        }
558        h = e - p * f; if (h <= 0) goto fin;
559        p = p + (s - f2) / ((Math.Sqrt(s / e) + p) * h); goto next;
560
561        fin:
562        for (i = n1; i <= n2; i++) {
563          a[i] = y[i] - p * v[i];
564          c[i] = u[i];
565        }
566        for (i = n1; i <= m2; i++) {
567          h = x[i + 1] - x[i];
568          d[i] = (c[i + 1] - c[i]) / (3 * h);
569          b[i] = (a[i + 1] - a[i]) / h - (h * d[i] + c[i]) * h;
570        }
571      }
572
[15449]573      // extrapolate for xx > x[n2]
574      b[b.Length] = b[b.Length - 1];
575      d[1] = 0;
576
[15365]577      return new ReinschSmoothingSplineModel(a, b, c, d, x, targetVar, inputVars);
[15364]578    }
579
580    private void CalculateSmoothingSpline(double[] x, double[] y, string[] inputVars) {
581      // see Smoothing and Non-Parametric Regression, Germán Rodríguez, 2001 2.3.1
[15365]582      double[] w = Enumerable.Repeat(1.0, x.Length).ToArray();  // weights necessary for sortAndBin but are ignored below (TODO)
583      SortAndBin(x, y, w, out x, out y, out w, scaling: false);
[15364]584      int n = x.Length;
585
586      SparseMatrix delta = new SparseMatrix(n - 2, n);
587      // double[,] delta = new double[n - 2, n];
588      //double[,] W = new double[n - 2, n - 2];
589      SparseMatrix W = new SparseMatrix(n - 2, n - 2);
590      Matrix WInvD = new DenseMatrix(n - 2, n);
591
592      // double[,] W_inv_D = new double[n - 2, n];
593      // double[,] K = new double[n, n];
594
595      // go over successive knots to determine distances and fill Delta and W
596      for (int i = 0; i < n - 2; i++) {
597        double h = x[i + 1] - x[i];
598        double h_next = x[i + 2] - x[i + 1];
599        delta[i, i] = 1.0 / h;
600        delta[i, i + 1] = -1.0 / h - 1.0 / h_next;
601        delta[i, i + 2] = 1.0 / h_next;
602        W[i, i] = (h + h_next) / 3;
603        if (i > 0) {
604          W[i - 1, i] =
605           W[i, i - 1] = h / 6.0;
606        }
607      }
608
[15365]609      // WInvD = W.Cholesky().Solve(delta);
[15364]610      var solvResult = W.TrySolveIterative(delta, WInvD, new MlkBiCgStab());
611
612      // alglib.ablas.rmatrixgemm(n - 2, n, n - 2, 1.0, W, 0, 0, 0, delta, 0, 0, 0, 1.0, W_inv_D, 0, 0); // W^-1(M = n-2, K = n-2) D(K = n-2, N=n)   
613      // alglib.ablas.rmatrixgemm(n, n, n - 2, 1.0, delta, 0, 0, 1, W_inv_D, 0, 0, 0, 1.0, K, 0, 0); // D(M=n-2, K=n)^T * W^-1D (K=n, N=n-2)
614
615      var K = delta.TransposeThisAndMultiply(WInvD);
616
617      double lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value;
618
619      for (int i = 0; i < n; i++) {
620        for (int j = 0; j < n; j++) {
621          K[i, j] *= lambda;
622          if (i == j) K[i, j] += 1;
623        }
624      }
625
626      // solve for y
627      // double[] s;
628      // int solverInfo;
629      // alglib.densesolverreport solverRep;
630      // alglib.rmatrixsolve(K, n, y, out solverInfo, out solverRep, out s);
631
632      var s = K.Solve(new DenseVector(y)).ToArray();
633
634      Results.Add(new Result("Solution", new RegressionSolution(new SmoothingSplineModel(s, x, Problem.ProblemData.TargetVariable, inputVars),
[15365]635  (IRegressionProblemData)Problem.ProblemData.Clone())));
[15364]636    }
637
[15449]638    public static void SortAndBin(double[] x, double[] y, double[] w, out double[] x2, out double[] y2, out double[] w2, bool scaling = false) {
[15365]639      var sortedIdx = Enumerable.Range(0, x.Length).ToArray();
640      // sort by x
641      Array.Sort(x, sortedIdx);
[15364]642
643      var xl = new List<double>();
644      var yl = new List<double>();
[15365]645      var wl = new List<double>();
[15364]646
647      int n = x.Length;
648      var range = x[n - 1] - x[0];
[15457]649      var binSize = range / n / 10;
[15364]650      {
651        // binning
652        int i = 0;
653        while (i < n) {
654          int k = 0;
655          int j = i;
656          double sumX = 0.0;
657          double sumY = 0.0;
[15365]658          double sumW = 0.0;
[15364]659          while (j < n && x[j] - x[i] <= binSize) {
660            k++;
661            sumX += x[j];
[15365]662            sumY += y[sortedIdx[j]];
663            sumW += w[sortedIdx[j]];
[15364]664            j++;
665          }
666          var avgX = sumX / k;
667          if (scaling) avgX = (avgX - x[0]) / range;
[15365]668          xl.Add(avgX);
[15364]669          yl.Add(sumY / k);
[15365]670          wl.Add(sumW);
[15364]671          i += k;
672        }
673      }
674
675      x2 = xl.ToArray();
676      y2 = yl.ToArray();
[15365]677      w2 = wl.ToArray();
[15364]678    }
679
[15362]680    private void AddAlglibSplineResult(alglib.spline1dinterpolant c, string[] inputVars) {
681      Results.Add(new Result("Solution", new RegressionSolution(new Spline1dModel(c, Problem.ProblemData.TargetVariable, inputVars),
682        (IRegressionProblemData)Problem.ProblemData.Clone())));
683
684    }
685    private void AddMathNetSplineResult(IInterpolation c, string[] inputVars) {
686      Results.Add(new Result("Solution", new RegressionSolution(new MathNetSplineModel(c, Problem.ProblemData.TargetVariable, inputVars),
687        (IRegressionProblemData)Problem.ProblemData.Clone())));
[15364]688    }
689  }
[15362]690
[15364]691
[15365]692  // array with non-zero lower index
693  internal class MyArray<T> {
694    private T[] arr;
695    private int lowerBound;
[15364]696
[15365]697    public int Length { get { return arr.Length; } }
698
699    public T this[int key] {
700      get {
701        return arr[key - lowerBound];
702      }
703      set {
704        arr[key - lowerBound] = value;
705      }
706    }
707
708    public MyArray(int lowerBound, int numElements) {
709      this.lowerBound = lowerBound;
710      arr = new T[numElements];
711    }
712    public MyArray(int lowerBound, T[] source) : this(lowerBound, source.Length) {
713      Array.Copy(source, arr, source.Length);
714    }
715
716    public T[] ToArray() {
717      var res = new T[arr.Length];
718      Array.Copy(arr, res, res.Length);
719      return res;
720    }
721  }
722
723
[15364]724  // UNFINISHED
[15442]725  public class ReinschSmoothingSplineModel : NamedItem, IRegressionModel {
[15365]726    private MyArray<double> a;
727    private MyArray<double> b;
728    private MyArray<double> c;
729    private MyArray<double> d;
730    private MyArray<double> x;
[15364]731    private double offset;
732    private double scale;
733
734    public string TargetVariable { get; set; }
735
736    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
737
738    public event EventHandler TargetVariableChanged;
739
740    public ReinschSmoothingSplineModel(ReinschSmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
741      this.TargetVariable = orig.TargetVariable;
742      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
743      this.a = orig.a;
744      this.b = orig.b;
745      this.c = orig.c;
746      this.d = orig.d;
747      this.x = orig.x;
748      this.scale = orig.scale;
749      this.offset = orig.offset;
[15362]750    }
[15442]751    internal ReinschSmoothingSplineModel(
[15365]752      MyArray<double> a,
753      MyArray<double> b,
754      MyArray<double> c,
755      MyArray<double> d,
756      MyArray<double> x,
757      string targetVar, string[] inputs, double offset = 0, double scale = 1) : base("SplineModel", "SplineModel") {
[15364]758      this.a = a;
759      this.b = b;
760      this.c = c;
761      this.d = d;
762      this.x = x;
763      this.TargetVariable = targetVar;
764      this.VariablesUsedForPrediction = inputs;
765      this.scale = scale;
766      this.offset = offset;
767    }
768
769    public override IDeepCloneable Clone(Cloner cloner) {
770      return new ReinschSmoothingSplineModel(this, cloner);
771    }
772
773    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
774      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
775    }
776
[15442]777    public double GetEstimatedValue(double xx) {
[15457]778      xx = (xx - offset) * scale;
[15449]779      int n = a.Length;
[15442]780      if (xx <= x[1]) {
781        double h = xx - x[1];
782        return a[1] + h * (b[1] + h * (c[1] + h * d[1]));
783      } else if (xx >= x[n]) {
784        double h = xx - x[n];
785        return a[n] + h * (b[n] + h * (c[n] + h * d[n]));
786      } else {
787        // binary search
788        int lower = 1;
789        int upper = n;
790        while (true) {
791          if (upper < lower) throw new InvalidProgramException();
792          int i = lower + (upper - lower) / 2;
793          if (x[i] <= xx && xx < x[i + 1]) {
794            double h = xx - x[i];
795            return a[i] + h * (b[i] + h * (c[i] + h * d[i]));
796          } else if (xx < x[i]) {
797            upper = i - 1;
798          } else {
799            lower = i + 1;
[15364]800          }
801        }
802      }
803    }
[15442]804
805    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
806      int n = x.Length;
807      var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray();
808      foreach (var factor in VariablesUsedForPrediction.Skip(1)) {
809        product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray();
810      }
[15457]811      foreach (var xx in product) {
[15442]812        yield return GetEstimatedValue(xx);
813      }
814    }
[15352]815  }
816
[15364]817  // UNFINISHED
818  public class SmoothingSplineModel : NamedItem, IRegressionModel {
819    private double[] s;
820    private IInterpolation interpolation;
[15352]821
[15364]822    public string TargetVariable { get; set; }
823
824    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
825
826    public event EventHandler TargetVariableChanged;
827
828    public SmoothingSplineModel(SmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
829      this.TargetVariable = orig.TargetVariable;
830      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
831      this.s = orig.s; // TODO
832      this.interpolation = orig.interpolation;
833    }
834    public SmoothingSplineModel(double[] s, double[] x, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
835      this.s = s;
836      this.TargetVariable = targetVar;
837      this.VariablesUsedForPrediction = inputs;
838      this.interpolation = MathNet.Numerics.Interpolate.CubicSpline(x, s);
839    }
840
841    public override IDeepCloneable Clone(Cloner cloner) {
842      return new SmoothingSplineModel(this, cloner);
843    }
844
845    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
846      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
847    }
848
849    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
850      foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
851
852        yield return interpolation.Interpolate(x);
853
854      }
855    }
856  }
857
[15352]858  // UNFINISHED
859  public class Spline1dModel : NamedItem, IRegressionModel {
860    private alglib.spline1dinterpolant interpolant;
861
862    public string TargetVariable { get; set; }
863
864    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
865
866    public event EventHandler TargetVariableChanged;
867
868    public Spline1dModel(Spline1dModel orig, Cloner cloner) : base(orig, cloner) {
869      this.TargetVariable = orig.TargetVariable;
870      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
871      this.interpolant = (alglib.spline1dinterpolant)orig.interpolant.make_copy();
872    }
873    public Spline1dModel(alglib.spline1dinterpolant interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
874      this.interpolant = interpolant;
875      this.TargetVariable = targetVar;
876      this.VariablesUsedForPrediction = inputs;
877    }
878
879    public override IDeepCloneable Clone(Cloner cloner) {
880      return new Spline1dModel(this, cloner);
881    }
882
883    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
884      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
885    }
886
887    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
[15433]888      var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray();
[15442]889      foreach (var factor in VariablesUsedForPrediction.Skip(1)) {
[15433]890        product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray();
891      }
892
893      foreach (var x in product) {
[15352]894        yield return alglib.spline1dcalc(interpolant, x);
895      }
896    }
897  }
[15362]898
899
900  // UNFINISHED
901  public class MathNetSplineModel : NamedItem, IRegressionModel {
902    private IInterpolation interpolant;
903
904    public string TargetVariable { get; set; }
905
906    public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
907
908    public event EventHandler TargetVariableChanged;
909
910    public MathNetSplineModel(MathNetSplineModel orig, Cloner cloner) : base(orig, cloner) {
911      this.TargetVariable = orig.TargetVariable;
912      this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
913      this.interpolant = orig.interpolant; // TODO COPY!
914    }
915    public MathNetSplineModel(IInterpolation interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
916      this.interpolant = interpolant;
917      this.TargetVariable = targetVar;
918      this.VariablesUsedForPrediction = inputs;
919    }
920
921    public override IDeepCloneable Clone(Cloner cloner) {
922      return new MathNetSplineModel(this, cloner);
923    }
924
925    public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
926      return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
927    }
928
929    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
930      foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
931        yield return interpolant.Interpolate(x);
932      }
933    }
934  }
[15352]935}
Note: See TracBrowser for help on using the repository browser.