#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Threading; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Random; using MathNet.Numerics.Interpolation; using MathNet.Numerics.LinearAlgebra.Double; using MathNet.Numerics.LinearAlgebra.Double.Solvers; /* * Experimenting with various spline implementations. * * There are many different variants of using splines. * - Spline Interpolation * - Natural Spline Interpolation * - Smoothing Splines (Reinsch) * - P-Splines * - B-Splines * - Regression Splines * - Penalized Regression Splines * - * * * Generally, a spline is a piecewise function whereby each piece connects two knots. * In almost all cases splines are univariate. There are extensions to more dimensions * but this is much less convenient. * A linear spline is a piecewise linear function. Usually the following constaints are enforced * for each knot point k and the two piecewise functions fl and fr * fl(k) = fr(k), fl(k)' = fr(k)', fl(k)''=0, fr(k)''=0, additionally constraints for * the boundary knots are often enforced. * Spline Interpolation solves the problem of finding a smooth interpolating function * which goes through all knots exactly. * * In spline smoothing one tries to find a function minimizing the squared residuals * and maximizing smoothness. For the smoothing spline g the optimization objective is * || y - g(x) ||² + \lambda integrate (xmin : xmax) g(x)'' dx. * The roughness penality is the integral over the second derivative of the smoothing spline * over the whole domain of x. Lambda is used to balance the amount of smoothing. * If lambda is set to zero, the solution is an interpolating spline. If lambda is set to * infinity the solution is a maximally smooth function (= line). * Interestingly, the estimation of a smoothing spline is a linear operation. As a consequence * a lot of theory from linear models can be reused (e.g. cross-validation and generalized * cross-validation) with minimal additional effort. GCV can also be used to tune the * smoothing parameter automatically. Tuned algorithms are available to solve smoothing * spline estimation efficiently O(n) or O(m²n) where m is the number of knots and n the * number of data points. * It is possible to calculate confidence intervals for the smoothing spline and given * an error model subsequently also confidence intervals for predictions. * * We are primarily interested in spline smoothing / regression. * Alglib and Math.NET provide several different implementations for interpolation * including polynomial (cubic), Hermite, Akima, Linear, Catmull-Rom, Rational, ... * * For spline smoothing or regression there are also many differen variants. * * Smoothing Splines by Reinsch (Reinsch, Smoothing by Spline Functions, 1967). * Pseudo-code for the algorithm is given in the paper. A small change in the algorithm * to improve convergence is discussed in (Reinsch, Smoothing by Spline Functions II, 1970) * The algorithm uses the properties of the smoothing matrix (bandedness) to calculate the * smoothing spline in O(n). Two parameters have to be set (S and rho). If the noise in the * data is known then both parameter can be set directly, otherwise S should be set to n-1 * and rho should be found via cross-validation. * It is not easy to calculate a the CV (PRESS) or GCV in the approach by Reinsch and * therefore the algorithm is not ideal. * Wahba and colleagues (1990) introduced CV and GCV estimation. * Hutchinson and colleages improved the algorithm (Fortran implementation available as * algorithm 642.f from ACM). * * CUBGCV (Algorithm 642 Hutchingson) * Several improvments over the Reinsch algorithm. In particular efficient calculation * of the * Finbarr O'Sullivan BART * ... * * B-Splines * ... * * P-Splines (Eilers and Marx), use the B-Spline basis in combination with a penality * for large variations of subsequent B-Spline coefficients. Conceptually, if the * coefficients of neighbouring B-Splines are almost the same then the resulting smoothing * spline is less wiggly. It has been critizized that this is rather subjective. * P-Splines also allow calculation of CV and GCV for a given smoothing parameter which * can be used for optimization of lambda. * It is easy to use P-Splines within GAMs (no backfitting necessary). * The paper contains MATLAB code also for the calculation of the B-Spline basis. * The computational effort is O(?). The authors argue that it is efficient. * The number of knots is not critical it is possible to use every data point as knot. * * Alglib: penalized regression spline * ... * * Alglib: penalized regression spline with support for weights * ... */ namespace HeuristicLab.Algorithms.DataAnalysis.Experimental { [Item("Splines", "")] [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 102)] [StorableClass] public sealed class Splines : FixedDataAnalysisAlgorithm { [StorableConstructor] private Splines(bool deserializing) : base(deserializing) { } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { } private Splines(Splines original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new Splines(this, cloner); } public Splines() : base() { var validTypes = new ItemSet( new[] { "Smoothing Spline (Reinsch)", "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)", "B-Spline Smoothing", "Penalized Regression Spline (alglib)", "CUBGCV", "Finnbar O'Sullivan (sbart)" }.Select(s => new StringValue(s))); Parameters.Add(new ConstrainedValueParameter("Type", "The type of spline (as supported by alglib)", validTypes, validTypes.Last())); Parameters.Add(new ValueParameter("Lambda", "Regularization parameter for smoothing splines (0..+inf)", new DoubleValue(100))); Parameters.Add(new ValueParameter("StdDev (noise)", "Known error in y values. Used only be Reinsch Smoothing Splines", new DoubleValue(0.1))); Problem = new RegressionProblem(); } protected override void Run(CancellationToken cancellationToken) { double[,] inputMatrix = Problem.ProblemData.Dataset.ToArray(Problem.ProblemData.AllowedInputVariables.Concat(new string[] { Problem.ProblemData.TargetVariable }), Problem.ProblemData.TrainingIndices); if (inputMatrix.Cast().Any(x => double.IsNaN(x) || double.IsInfinity(x))) throw new NotSupportedException("Splines does not support NaN or infinity values in the input dataset."); var inputVars = Problem.ProblemData.AllowedInputVariables.ToArray(); if (inputVars.Length > 3) throw new ArgumentException(); var y = Problem.ProblemData.TargetVariableTrainingValues.ToArray(); if (inputVars.Length == 1) { var x = Problem.ProblemData.Dataset.GetDoubleValues(inputVars.First(), Problem.ProblemData.TrainingIndices).ToArray(); alglib.spline1dinterpolant c; var type = ((StringValue)Parameters["Type"].ActualValue).Value; switch (type) { case "Smoothing Spline (Reinsch)": { CalculateSmoothingSplineReinsch(x, y, inputVars); break; } case "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)": { double optTol, looRMSE; var model = CalculateSmoothingSplineReinsch(x, y, inputVars, Problem.ProblemData.TargetVariable, out optTol, out looRMSE); Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone()))); Results.Add(new Result("Optimal tolerance", new DoubleValue(optTol))); Results.Add(new Result("RMSE (LOO-CV)", new DoubleValue(looRMSE))); break; } case "CUBGCV": { CubicSplineGCV.CubGcvReport report; var model = CubicSplineGCV.CalculateCubicSpline(x, y, Problem.ProblemData.TargetVariable, inputVars, out report); var targetVar = Problem.ProblemData.TargetVariable; var problemData = (IRegressionProblemData)Problem.ProblemData.Clone(); Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData))); Results.Add(new Result("GCV", new DoubleValue(report.generalizedCrossValidation))); Results.Add(new Result("Estimated error variance", new DoubleValue(report.estimatedErrorVariance))); Results.Add(new Result("Estimated RSS degrees of freedom", new DoubleValue(report.estimatedRSSDegreesOfFreedom))); Results.Add(new Result("Estimated true mean squared error at data points", new DoubleValue(report.estimatedTrueMeanSquaredErrorAtDataPoints))); Results.Add(new Result("Mean square of DF", new DoubleValue(report.meanSquareOfDf))); Results.Add(new Result("Mean square residual", new DoubleValue(report.meanSquareResiudal))); Results.Add(new Result("Smoothing parameter (optimized for GCV)", new DoubleValue(report.smoothingParameter))); break; } case "Finnbar O'Sullivan (sbart)": { SBART.SBART_Report report; var model = SBART.CalculateSBART(x, y, Problem.ProblemData.TargetVariable, inputVars, out report); var targetVar = Problem.ProblemData.TargetVariable; var problemData = (IRegressionProblemData)Problem.ProblemData.Clone(); Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData))); Results.Add(new Result("GCV", new DoubleValue(report.gcv))); Results.Add(new Result("Smoothing parameter (optimized for GCV)", new DoubleValue(report.smoothingParameter))); break; } case "Penalized Regression Spline (alglib)": { var lambda = ((IValueParameter)Parameters["Lambda"]).Value.Value; // controls extent of smoothing var model = CalculatePenalizedRegressionSpline(x, y, lambda, x.Length, Problem.ProblemData.TargetVariable, inputVars); var targetVar = Problem.ProblemData.TargetVariable; var problemData = (IRegressionProblemData)Problem.ProblemData.Clone(); Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData))); break; } default: throw new NotSupportedException(); } } } public static IRegressionModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, int numKnots, string targetVar, string[] inputVars) { int info; alglib.spline1dinterpolant interpolant; alglib.spline1dfitreport rep; alglib.spline1dfitpenalized(x, y, numKnots, lambda, out info, out interpolant, out rep); return new Spline1dModel(interpolant, targetVar, inputVars); } public static IRegressionEnsembleModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars, out double avgTrainRMSE, out double cvRMSE, out double[] predictions) { int info; alglib.spline1dinterpolant interpolant; alglib.spline1dfitreport rep; int n = x.Length; n = (int)Math.Max(50, 3 * Math.Sqrt(n)); int folds = 10; int foldSize = x.Length <= 30 ? 1 : x.Length / folds; if (foldSize == 1) folds = x.Length; double[] x_loo = new double[(folds - 1) * foldSize]; double[] y_loo = new double[(folds - 1) * foldSize]; double[] w_loo = Enumerable.Repeat(1.0, x_loo.Length).ToArray(); var models = new List(); var sumTrainSE = 0.0; int nTrain = 0; var sumTestSE = 0.0; int nTest = 0; predictions = new double[y.Length]; for (int i = 0; i < folds; i++) { if (i < folds - 1) { //Console.WriteLine("Copy from {0} to {1} n = {2}", (i + 1) * foldSize, i * foldSize, (folds - i - 1) * foldSize); Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, (folds - i - 1) * foldSize); Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, (folds - i - 1) * foldSize); } alglib.spline1dfitpenalizedw(x_loo, y_loo, w_loo, n, lambda, out info, out interpolant, out rep); //Debug.Assert(y_loo.All(yi => yi != 0.0)); //Debug.Assert(x_loo.All(xi => xi != 0.0)); // training error for (int j = 0; j < x_loo.Length; j++) { var y_pred = alglib.spline1dcalc(interpolant, x[j]); double res = y[j] - y_pred; sumTrainSE += res * res; nTrain++; } // test for (int j = i * foldSize; j < (i + 1) * foldSize; j++) { predictions[j] = alglib.spline1dcalc(interpolant, x[j]); double res = y[j] - predictions[j]; sumTestSE += res * res; nTest++; } models.Add(new Spline1dModel(interpolant, targetVar, inputVars)); if (i < folds - 1) { //Console.WriteLine("Copy from {0} to {1} n = {2}", i * foldSize, i * foldSize, foldSize); // add current fold for next iteration Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize); Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize); } } cvRMSE = Math.Sqrt(sumTestSE / nTest); avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain); return new RegressionEnsembleModel(models); } private struct SplineData { public double a, b, c, d, x, y; }; /* * The procedure Quincunx, takes as arguments the vectors u, v and w which are respectively the diagonal, the first supradiagonal and the second supradiagonal of the banded matrix. The vector on the LHS of the equation (80) is placed in q which contains the solution on the completion of the procedure. */ private void Quincunx(int n, MyArray u, MyArray v, MyArray w, MyArray q) { // { factorisation} u[-1] = 0; u[0] = 0; for (int j = 1; j <= n - 1; j++) { u[j] = u[j] - u[j - 2] * Math.Pow(w[j - 2], 2) - u[j - 1] * Math.Pow(v[j - 1], 2); v[j] = (v[j] - u[j - 1] * v[j - 1] * w[j - 1]) / u[j]; w[j] = w[j] / u[j]; } // { forward substitution} for (int j = 1; j <= n - 1; j++) { q[j] = q[j] - v[j - 1] * q[j - 1] - w[j - 2] * q[j - 2]; } for (int j = 1; j < n - 1; j++) { q[j] = q[j] / u[j]; } // { back substitution} q[n + 1] = 0; q[n] = 0; for (int j = n - 1; j >= 1; j--) { q[j] = q[j] - v[j] * q[j + 1] - w[j] * q[j + 2]; } } public void CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars) { double s = x.Length + 1; double stdTol = ((IValueParameter)Parameters["StdDev (noise)"]).Value.Value; // controls extent of smoothing var model = CalculateSmoothingSplineReinsch(x, y, inputVars, stdTol, s, Problem.ProblemData.TargetVariable); Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone()))); } public static IRegressionEnsembleModel CalculateSmoothingSplineReinsch(double[] x, double[] y, double tol, string targetVar, string[] inputVars, out double avgTrainRMSE, out double cvRMSE) { int info; int folds = 10; int foldSize = x.Length <= 30 ? 1 : x.Length / folds; if (foldSize == 1) folds = x.Length; double[] x_loo = new double[(folds - 1) * foldSize]; double[] y_loo = new double[(folds - 1) * foldSize]; var models = new List(); var sumTrainSE = 0.0; int nTrain = 0; var sumTestSE = 0.0; int nTest = 0; for (int i = 0; i < folds; i++) { if (i < folds - 1) { Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, x.Length - ((i + 1) * foldSize) - 1); Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, y.Length - ((i + 1) * foldSize) - 1); } var model = CalculateSmoothingSplineReinsch(x_loo, y_loo, inputVars, tol, ((folds - 1) * foldSize) - 1, targetVar); // training error for(int j = 0;j minTolerance) { double curCVRMSE; double avgTrainRmse; var model = Splines.CalculateSmoothingSplineReinsch( x, y, tol, targetVar, inputVars, out avgTrainRmse, out curCVRMSE); if (curCVRMSE < cvRMSE) { cvRMSE = curCVRMSE; optTolerance = tol; bestModel = model; } tol *= 0.85; } return bestModel; } public static ReinschSmoothingSplineModel CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars, double stdTol, double s, string targetVar) { var minX = xOrig.Min(); var maxX = xOrig.Max(); var range = maxX - minX; double[] w = Enumerable.Repeat(stdTol, xOrig.Length).ToArray(); SortAndBin((double[])xOrig.Clone(), (double[])yOrig.Clone(), w, out xOrig, out yOrig, out w, scaling: false); // See Smoothing by Spline Functions, Reinsch, 1967 // move x and y into an array indexed with 1..n to match indexing in Reinsch paper int n = xOrig.Length; var x = new MyArray(1, xOrig); var y = new MyArray(1, yOrig); var inv_dy = new MyArray(1, w); int n1 = 1; int n2 = n; // results var a = new MyArray(n1, n); var b = new MyArray(n1, n); var c = new MyArray(n1, n); var d = new MyArray(n1, n); // smooth { int i, m1, m2; double e, f, f2, g = 0.0, h, p; MyArray r = new MyArray(n1 - 1, n + 2), r1 = new MyArray(n1 - 1, n + 2), r2 = new MyArray(n1 - 1, n + 2), t = new MyArray(n1 - 1, n + 2), t1 = new MyArray(n1 - 1, n + 2), u = new MyArray(n1 - 1, n + 2), v = new MyArray(n1 - 1, n + 2); m1 = n1 - 1; m2 = n2 + 1; r[m1] = r[n1] = r1[n2] = r2[n2] = r2[m2] = u[m1] = u[n1] = u[n2] = u[m2] = p = 0.0; m1 = n1 + 1; m2 = n2 - 1; h = x[m1] - x[n1]; f = (y[m1] - y[n1]) / h; for (i = m1; i <= m2; i++) { g = h; h = x[i + 1] - x[i]; e = f; f = (y[i + 1] - y[i]) / h; a[i] = f - e; t[i] = 2 * (g + h) / 3; t1[i] = h / 3; r2[i] = inv_dy[i - 1] / g; r[i] = inv_dy[i + 1] / h; r1[i] = -inv_dy[i] / g - inv_dy[i] / h; } for (i = m1; i <= m2; i++) { b[i] = r[i] * r[i] + r1[i] * r1[i] + r2[i] * r2[i]; c[i] = r[i] * r1[i + 1] + r1[i] * r2[i + 1]; d[i] = r[i] * r2[i + 2]; } f2 = -s; next: for (i = m1; i <= m2; i++) { r1[i - 1] = f * r[i - 1]; r2[i - 2] = g * r[i - 2]; r[i] = 1 / (p * b[i] + t[i] - f * r1[i - 1] - g * r2[i - 2]); u[i] = a[i] - r1[i - 1] * u[i - 1] - r2[i - 2] * u[i - 2]; f = p * c[i] + t1[i] - h * r1[i - 1]; g = h; h = d[i] * p; } for (i = m2; i >= m1; i--) { u[i] = r[i] * u[i] - r1[i] * u[i + 1] - r2[i] * u[i + 2]; } e = h = 0; for (i = n1; i <= m2; i++) { g = h; h = (u[i + 1] - u[i]) / (x[i + 1] - x[i]); v[i] = (h - g) * inv_dy[i] * inv_dy[i]; e = e + v[i] * (h - g); } g = v[n2] = -h * inv_dy[n2] * inv_dy[n2]; e = e - g * h; g = f2; f2 = e * p * p; if (f2 >= s || f2 <= g) goto fin; f = 0; h = (v[m1] - v[n1]) / (x[m1] - x[n1]); for (i = m1; i <= m2; i++) { g = h; h = (v[i + 1] - v[i]) / (x[i + 1] - x[i]); g = h - g - r1[i - 1] * r[i - 1] - r2[i - 2] * r[i - 2]; f = f + g * r[i] * g; r[i] = g; } h = e - p * f; if (h <= 0) goto fin; p = p + (s - f2) / ((Math.Sqrt(s / e) + p) * h); goto next; fin: for (i = n1; i <= n2; i++) { a[i] = y[i] - p * v[i]; c[i] = u[i]; } for (i = n1; i <= m2; i++) { h = x[i + 1] - x[i]; d[i] = (c[i + 1] - c[i]) / (3 * h); b[i] = (a[i + 1] - a[i]) / h - (h * d[i] + c[i]) * h; } } // extrapolate for xx > x[n2] b[b.Length] = b[b.Length - 1]; d[1] = 0; return new ReinschSmoothingSplineModel(a, b, c, d, x, targetVar, inputVars); } public static void SortAndBin(double[] x, double[] y, double[] w, out double[] x2, out double[] y2, out double[] w2, bool scaling = false) { var sortedIdx = Enumerable.Range(0, x.Length).ToArray(); // sort by x Array.Sort(x, sortedIdx); var xl = new List(); var yl = new List(); var wl = new List(); int n = x.Length; var range = x[n - 1] - x[0]; var binSize = range / n / 10; { // binning int i = 0; while (i < n) { int k = 0; int j = i; double sumX = 0.0; double sumY = 0.0; double sumW = 0.0; while (j < n && x[j] - x[i] <= binSize) { k++; sumX += x[j]; sumY += y[sortedIdx[j]]; sumW += w[sortedIdx[j]]; j++; } var avgX = sumX / k; if (scaling) avgX = (avgX - x[0]) / range; xl.Add(avgX); yl.Add(sumY / k); wl.Add(sumW); i += k; } } x2 = xl.ToArray(); y2 = yl.ToArray(); w2 = wl.ToArray(); } } // array with non-zero lower index internal class MyArray { private T[] arr; private int lowerBound; public int Length { get { return arr.Length; } } public T this[int key] { get { return arr[key - lowerBound]; } set { arr[key - lowerBound] = value; } } public MyArray(int lowerBound, int numElements) { this.lowerBound = lowerBound; arr = new T[numElements]; } public MyArray(int lowerBound, T[] source) : this(lowerBound, source.Length) { Array.Copy(source, arr, source.Length); } public T[] ToArray() { var res = new T[arr.Length]; Array.Copy(arr, res, res.Length); return res; } } // UNFINISHED public class ReinschSmoothingSplineModel : NamedItem, IRegressionModel { private MyArray a; private MyArray b; private MyArray c; private MyArray d; private MyArray x; private double offset; private double scale; public string TargetVariable { get; set; } public IEnumerable VariablesUsedForPrediction { get; private set; } public event EventHandler TargetVariableChanged; public ReinschSmoothingSplineModel(ReinschSmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) { this.TargetVariable = orig.TargetVariable; this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray(); this.a = orig.a; this.b = orig.b; this.c = orig.c; this.d = orig.d; this.x = orig.x; this.scale = orig.scale; this.offset = orig.offset; } internal ReinschSmoothingSplineModel( MyArray a, MyArray b, MyArray c, MyArray d, MyArray x, string targetVar, string[] inputs, double offset = 0, double scale = 1) : base("SplineModel", "SplineModel") { this.a = a; this.b = b; this.c = c; this.d = d; this.x = x; this.TargetVariable = targetVar; this.VariablesUsedForPrediction = inputs; this.scale = scale; this.offset = offset; } public override IDeepCloneable Clone(Cloner cloner) { return new ReinschSmoothingSplineModel(this, cloner); } public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone()); } public double GetEstimatedValue(double xx) { xx = (xx - offset) * scale; int n = a.Length; if (xx <= x[1]) { double h = xx - x[1]; return a[1] + h * (b[1] + h * (c[1] + h * d[1])); } else if (xx >= x[n]) { double h = xx - x[n]; return a[n] + h * (b[n] + h * (c[n] + h * d[n])); } else { // binary search int lower = 1; int upper = n; while (true) { if (upper < lower) throw new InvalidProgramException(); int i = lower + (upper - lower) / 2; if (x[i] <= xx && xx < x[i + 1]) { double h = xx - x[i]; return a[i] + h * (b[i] + h * (c[i] + h * d[i])); } else if (xx < x[i]) { upper = i - 1; } else { lower = i + 1; } } } } public IEnumerable GetEstimatedValues(IDataset dataset, IEnumerable rows) { int n = x.Length; var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray(); foreach (var factor in VariablesUsedForPrediction.Skip(1)) { product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray(); } foreach (var xx in product) { yield return GetEstimatedValue(xx); } } } // UNFINISHED public class Spline1dModel : NamedItem, IRegressionModel { alglib.spline1dinterpolant interpolant; public string TargetVariable { get; set; } public IEnumerable VariablesUsedForPrediction { get; private set; } public event EventHandler TargetVariableChanged; public Spline1dModel(Spline1dModel orig, Cloner cloner) : base(orig, cloner) { this.TargetVariable = orig.TargetVariable; this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray(); this.interpolant = (alglib.spline1dinterpolant)orig.interpolant.make_copy(); } public Spline1dModel( alglib.spline1dinterpolant interpolant, string targetVar, string[] inputs, double offset = 0, double scale = 1) : base("SplineModel", "SplineModel") { this.interpolant = (alglib.spline1dinterpolant)interpolant.make_copy(); this.TargetVariable = targetVar; this.VariablesUsedForPrediction = inputs; } public override IDeepCloneable Clone(Cloner cloner) { return new Spline1dModel(this, cloner); } public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone()); } public double GetEstimatedValue(double xx) { return alglib.spline1dcalc(interpolant, xx); } public IEnumerable GetEstimatedValues(IDataset dataset, IEnumerable rows) { var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray(); foreach (var factor in VariablesUsedForPrediction.Skip(1)) { product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray(); } foreach (var xx in product) { yield return GetEstimatedValue(xx); } } } }