#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Threading; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Random; using MathNet.Numerics.Interpolation; using MathNet.Numerics.LinearAlgebra.Double; using MathNet.Numerics.LinearAlgebra.Double.Solvers; namespace HeuristicLab.Algorithms.DataAnalysis.Experimental { [Item("Splines", "")] [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 102)] [StorableClass] public sealed class Splines : FixedDataAnalysisAlgorithm { [StorableConstructor] private Splines(bool deserializing) : base(deserializing) { } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { } private Splines(Splines original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new Splines(this, cloner); } public Splines() : base() { var validTypes = new ItemSet( new[] { "Monotone", "Akima", "Catmull-Rom", "Cubic", "Linear", "Cubic - Natural (Math.NET)", "Polynomial (Math.NET)", "Rational (Math.NET)", "LogLinear (Math.NET)", "Common (Math.NET)", "Smoothing Spline (Mine)", "Smoothing Spline (Reinsch)", "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)", "B-Spline Smoothing", "Penalized Regression Spline (alglib)", "CUBGCV", "Finnbar O'Sullivan (sbart)" }.Select(s => new StringValue(s))); Parameters.Add(new ConstrainedValueParameter("Type", "The type of spline (as supported by alglib)", validTypes, validTypes.Last())); Parameters.Add(new ValueParameter("Lambda", "Regularization parameter for smoothing splines (0..+inf)", new DoubleValue(100))); Parameters.Add(new ValueParameter("StdDev (noise)", "Known error in y values. Used only be Reinsch Smoothing Splines", new DoubleValue(0.1))); Problem = new RegressionProblem(); } protected override void Run(CancellationToken cancellationToken) { double[,] inputMatrix = Problem.ProblemData.Dataset.ToArray(Problem.ProblemData.AllowedInputVariables.Concat(new string[] { Problem.ProblemData.TargetVariable }), Problem.ProblemData.TrainingIndices); if (inputMatrix.Cast().Any(x => double.IsNaN(x) || double.IsInfinity(x))) throw new NotSupportedException("Splines does not support NaN or infinity values in the input dataset."); var inputVars = Problem.ProblemData.AllowedInputVariables.ToArray(); if (inputVars.Length > 3) throw new ArgumentException(); var y = Problem.ProblemData.TargetVariableTrainingValues.ToArray(); if (inputVars.Length == 1) { var x = Problem.ProblemData.Dataset.GetDoubleValues(inputVars.First(), Problem.ProblemData.TrainingIndices).ToArray(); alglib.spline1dinterpolant c; var type = ((StringValue)Parameters["Type"].ActualValue).Value; switch (type) { case "Monotone": alglib.spline1dbuildmonotone(x, y, out c); AddAlglibSplineResult(c, inputVars); break; case "Akima": alglib.spline1dbuildakima(x, y, out c); AddAlglibSplineResult(c, inputVars); break; ; case "Catmull-Rom": alglib.spline1dbuildcatmullrom(x, y, out c); AddAlglibSplineResult(c, inputVars); break; case "Cubic": alglib.spline1dbuildcubic(x, y, out c); AddAlglibSplineResult(c, inputVars); break; case "Linear": alglib.spline1dbuildlinear(x, y, out c); AddAlglibSplineResult(c, inputVars); break; case "Cubic - Natural (Math.NET)": { var spline = MathNet.Numerics.Interpolation.CubicSpline.InterpolateNatural(x, y); AddMathNetSplineResult(spline, inputVars); break; } case "Common (Math.NET)": { var spline = MathNet.Numerics.Interpolate.Common(x, y); AddMathNetSplineResult(spline, inputVars); break; } case "LogLinear (Math.NET)": { var spline = MathNet.Numerics.Interpolate.LogLinear(x, y); AddMathNetSplineResult(spline, inputVars); break; } case "Polynomial (Math.NET)": { var spline = MathNet.Numerics.Interpolate.Polynomial(x, y); AddMathNetSplineResult(spline, inputVars); break; } case "Rational (Math.NET)": { var spline = MathNet.Numerics.Interpolate.RationalWithoutPoles(x, y); AddMathNetSplineResult(spline, inputVars); break; } case "Smoothing Spline (Mine)": { CalculateSmoothingSpline(x, y, inputVars); break; } case "Smoothing Spline (Reinsch)": { CalculateSmoothingSplineReinsch(x, y, inputVars); break; } case "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)": { double optTol, looRMSE; var model = CalculateSmoothingSplineReinsch(x, y, inputVars, Problem.ProblemData.TargetVariable, out optTol, out looRMSE); Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone()))); Results.Add(new Result("Optimal tolerance", new DoubleValue(optTol))); Results.Add(new Result("RMSE (LOO-CV)", new DoubleValue(looRMSE))); break; } case "CUBGCV": { CubicSplineGCV.CubGcvReport report; var model = CubicSplineGCV.CalculateCubicSpline(x, y, Problem.ProblemData.TargetVariable, inputVars, out report); var targetVar = Problem.ProblemData.TargetVariable; var problemData = (IRegressionProblemData)Problem.ProblemData.Clone(); Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData))); Results.Add(new Result("GCV", new DoubleValue(report.generalizedCrossValidation))); Results.Add(new Result("Estimated error variance", new DoubleValue(report.estimatedErrorVariance))); Results.Add(new Result("Estimated RSS degrees of freedom", new DoubleValue(report.estimatedRSSDegreesOfFreedom))); Results.Add(new Result("Estimated true mean squared error at data points", new DoubleValue(report.estimatedTrueMeanSquaredErrorAtDataPoints))); Results.Add(new Result("Mean square of DF", new DoubleValue(report.meanSquareOfDf))); Results.Add(new Result("Mean square residual", new DoubleValue(report.meanSquareResiudal))); Results.Add(new Result("Smoothing parameter (optimized for GCV)", new DoubleValue(report.smoothingParameter))); break; } case "Finnbar O'Sullivan (sbart)": { SBART.SBART_Report report; var lambda = ((IValueParameter)Parameters["Lambda"]).Value.Value; // controls extent of smoothing var model = SBART.CalculateSBART(x, y, Problem.ProblemData.TargetVariable, inputVars, (float)lambda, out report); var targetVar = Problem.ProblemData.TargetVariable; var problemData = (IRegressionProblemData)Problem.ProblemData.Clone(); Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData))); Results.Add(new Result("GCV", new DoubleValue(report.gcv))); Results.Add(new Result("Smoothing parameter (optimized for GCV)", new DoubleValue(report.smoothingParameter))); break; } case "B-Spline Smoothing": { BSplineSmoothing(x, y, inputVars); break; } case "Penalized Regression Spline (alglib)": { var lambda = ((IValueParameter)Parameters["Lambda"]).Value.Value; // controls extent of smoothing var model = CalculatePenalizedRegressionSpline(x, y, lambda, Problem.ProblemData.TargetVariable, inputVars); var targetVar = Problem.ProblemData.TargetVariable; var problemData = (IRegressionProblemData)Problem.ProblemData.Clone(); Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData))); break; } default: throw new NotSupportedException(); } } } public static IRegressionModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars) { int info; alglib.spline1dinterpolant interpolant; alglib.spline1dfitreport rep; int n = x.Length; n = (int)Math.Max(50, 3 * Math.Sqrt(n)); alglib.spline1dfitpenalized(x, y, n, lambda, out info, out interpolant, out rep); return new Spline1dModel(interpolant, targetVar, inputVars); } public static IRegressionEnsembleModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars, out double avgTrainRMSE, out double cvRMSE, out double[] predictions) { int info; alglib.spline1dinterpolant interpolant; alglib.spline1dfitreport rep; int n = x.Length; n = (int)Math.Max(50, 3 * Math.Sqrt(n)); int folds = 10; int foldSize = x.Length <= 30 ? 1 : x.Length / folds; if (foldSize == 1) folds = x.Length; double[] x_loo = new double[(folds - 1) * foldSize]; double[] y_loo = new double[(folds - 1) * foldSize]; double[] w_loo = Enumerable.Repeat(1.0, x_loo.Length).ToArray(); var models = new List(); var sumTrainSE = 0.0; int nTrain = 0; var sumTestSE = 0.0; int nTest = 0; predictions = new double[y.Length]; for (int i = 0; i < folds; i++) { if (i < folds - 1) { //Console.WriteLine("Copy from {0} to {1} n = {2}", (i + 1) * foldSize, i * foldSize, (folds - i - 1) * foldSize); Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, (folds - i - 1) * foldSize); Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, (folds - i - 1) * foldSize); } alglib.spline1dfitpenalizedw(x_loo, y_loo, w_loo, n, lambda, out info, out interpolant, out rep); //Debug.Assert(y_loo.All(yi => yi != 0.0)); //Debug.Assert(x_loo.All(xi => xi != 0.0)); // training error for (int j = 0; j < x_loo.Length; j++) { var y_pred = alglib.spline1dcalc(interpolant, x[j]); double res = y[j] - y_pred; sumTrainSE += res * res; nTrain++; } // test for (int j = i * foldSize; j < (i + 1) * foldSize; j++) { predictions[j] = alglib.spline1dcalc(interpolant, x[j]); double res = y[j] - predictions[j]; sumTestSE += res * res; nTest++; } models.Add(new Spline1dModel(interpolant, targetVar, inputVars)); if (i < folds - 1) { //Console.WriteLine("Copy from {0} to {1} n = {2}", i * foldSize, i * foldSize, foldSize); // add current fold for next iteration Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize); Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize); } } cvRMSE = Math.Sqrt(sumTestSE / nTest); avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain); return new RegressionEnsembleModel(models); } public void BSplineSmoothing(double[] x, double[] y, string[] inputVars) { Array.Sort(x, y); CubicSpline2(x, y, inputVars); } private struct SplineData { public double a, b, c, d, x, y; }; /* * The procedure Quincunx, takes as arguments the vectors u, v and w which are respectively the diagonal, the first supradiagonal and the second supradiagonal of the banded matrix. The vector on the LHS of the equation (80) is placed in q which contains the solution on the completion of the procedure. */ private void Quincunx(int n, MyArray u, MyArray v, MyArray w, MyArray q) { // { factorisation} u[-1] = 0; u[0] = 0; for (int j = 1; j <= n - 1; j++) { u[j] = u[j] - u[j - 2] * Math.Pow(w[j - 2], 2) - u[j - 1] * Math.Pow(v[j - 1], 2); v[j] = (v[j] - u[j - 1] * v[j - 1] * w[j - 1]) / u[j]; w[j] = w[j] / u[j]; } // { forward substitution} for (int j = 1; j <= n - 1; j++) { q[j] = q[j] - v[j - 1] * q[j - 1] - w[j - 2] * q[j - 2]; } for (int j = 1; j < n - 1; j++) { q[j] = q[j] / u[j]; } // { back substitution} q[n + 1] = 0; q[n] = 0; for (int j = n - 1; j >= 1; j--) { q[j] = q[j] - v[j] * q[j + 1] - w[j] * q[j + 2]; } } public void CubicSpline2(double[] x, double[] y, string[] inputVars) { // see Pollock: Smoothing Splines int n = x.Length; double lambda = 1.0; double[] sigma = Enumerable.Repeat(1.0, n).ToArray(); SplineData[] S = new SplineData[x.Length + 1]; for (int i = 0; i < x.Length; i++) { S[i].x = x[i]; S[i].y = y[i]; } S[x.Length].x = S[x.Length - 1].x; S[x.Length].y = S[x.Length - 1].y; // var double[] h = new double[n], r = new double[n], f = new double[n], p = new double[n]; var q = new MyArray(-1, n + 3); var u = new MyArray(-1, n + 3); var v = new MyArray(-1, n + 3); var w = new MyArray(-1, n + 3); double mu; mu = 2 * (1 - lambda) / (3 * lambda); h[0] = S[1].x - S[0].x; r[0] = 3 / h[0]; for (int i = 1; i < n - 1; i++) { h[i] = S[i + 1].x - S[i].x; r[i] = 3 / h[i]; f[i] = -(r[i - 1] + r[i]); p[i] = 2 * (S[i + 1].x - S[i - 1].x); q[i] = 3 * (S[i + 1].y - S[i].y) / h[i] - 3 * (S[i].y - S[i - 1].y) / h[i - 1]; } for (int i = 1; i < n; i++) { u[i] = Math.Pow(r[i - 1], 2) * sigma[i - 1] + Math.Pow(f[i], 2) * sigma[i] + Math.Pow(r[i], 2) * sigma[i + 1]; // TODO Sigma 1..n u[i] = mu * u[i] + p[i]; v[i] = f[i] * r[i] * sigma[i] + r[i] * f[i + 1] * sigma[i + 1]; v[i] = mu * v[i] + h[i]; w[i] = mu * r[i] * r[i + 1] * sigma[i + 1]; } Quincunx(n, u, v, w, q); // { Spline P arameters} S[0].d = S[0].y - mu * r[0] * q[1] * sigma[0]; S[1].d = S[1].y - mu * (f[1] * q[1] + r[1] * q[2]) * sigma[0]; S[0].a = q[1] / (3 * h[0]); S[0].b = 0; S[0].c = (S[1].d - S[0].d) / h[0] - q[1] * h[0] / 3; r[0] = 0; for (int j = 1; j < n; j++) { S[j].a = (q[j + 1] - q[j]) / (3 * h[j]); S[j].b = q[j]; S[j].c = (q[j] + q[j - 1]) * h[j - 1] + S[j - 1].c; S[j].d = r[j - 1] * q[j - 1] + f[j] * q[j] + r[j] * q[j + 1]; S[j].d = y[j] - mu * S[j].d * sigma[j]; } // { SmoothingSpline} } public void CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars) { double s = x.Length + 1; double stdTol = ((IValueParameter)Parameters["StdDev (noise)"]).Value.Value; // controls extent of smoothing var model = CalculateSmoothingSplineReinsch(x, y, inputVars, stdTol, s, Problem.ProblemData.TargetVariable); Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone()))); } public static IRegressionEnsembleModel CalculateSmoothingSplineReinsch(double[] x, double[] y, double tol, string targetVar, string[] inputVars, out double avgTrainRMSE, out double cvRMSE) { int info; int folds = 10; int foldSize = x.Length <= 30 ? 1 : x.Length / folds; if (foldSize == 1) folds = x.Length; double[] x_loo = new double[(folds - 1) * foldSize]; double[] y_loo = new double[(folds - 1) * foldSize]; var models = new List(); var sumTrainSE = 0.0; int nTrain = 0; var sumTestSE = 0.0; int nTest = 0; for (int i = 0; i < folds; i++) { if (i < folds - 1) { Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, x.Length - ((i + 1) * foldSize) - 1); Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, y.Length - ((i + 1) * foldSize) - 1); } var model = CalculateSmoothingSplineReinsch(x_loo, y_loo, inputVars, tol, ((folds - 1) * foldSize) - 1, targetVar); // training error for(int j = 0;j minTolerance) { double curCVRMSE; double avgTrainRmse; var model = Splines.CalculateSmoothingSplineReinsch( x, y, tol, targetVar, inputVars, out avgTrainRmse, out curCVRMSE); if (curCVRMSE < cvRMSE) { cvRMSE = curCVRMSE; optTolerance = tol; bestModel = model; } tol *= 0.85; } return bestModel; } public static ReinschSmoothingSplineModel CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars, double stdTol, double s, string targetVar) { var minX = xOrig.Min(); var maxX = xOrig.Max(); var range = maxX - minX; double[] w = Enumerable.Repeat(stdTol, xOrig.Length).ToArray(); SortAndBin((double[])xOrig.Clone(), (double[])yOrig.Clone(), w, out xOrig, out yOrig, out w, scaling: false); // See Smoothing by Spline Functions, Reinsch, 1967 // move x and y into an array indexed with 1..n to match indexing in Reinsch paper int n = xOrig.Length; var x = new MyArray(1, xOrig); var y = new MyArray(1, yOrig); var inv_dy = new MyArray(1, w); int n1 = 1; int n2 = n; // results var a = new MyArray(n1, n); var b = new MyArray(n1, n); var c = new MyArray(n1, n); var d = new MyArray(n1, n); // smooth { int i, m1, m2; double e, f, f2, g = 0.0, h, p; MyArray r = new MyArray(n1 - 1, n + 2), r1 = new MyArray(n1 - 1, n + 2), r2 = new MyArray(n1 - 1, n + 2), t = new MyArray(n1 - 1, n + 2), t1 = new MyArray(n1 - 1, n + 2), u = new MyArray(n1 - 1, n + 2), v = new MyArray(n1 - 1, n + 2); m1 = n1 - 1; m2 = n2 + 1; r[m1] = r[n1] = r1[n2] = r2[n2] = r2[m2] = u[m1] = u[n1] = u[n2] = u[m2] = p = 0.0; m1 = n1 + 1; m2 = n2 - 1; h = x[m1] - x[n1]; f = (y[m1] - y[n1]) / h; for (i = m1; i <= m2; i++) { g = h; h = x[i + 1] - x[i]; e = f; f = (y[i + 1] - y[i]) / h; a[i] = f - e; t[i] = 2 * (g + h) / 3; t1[i] = h / 3; r2[i] = inv_dy[i - 1] / g; r[i] = inv_dy[i + 1] / h; r1[i] = -inv_dy[i] / g - inv_dy[i] / h; } for (i = m1; i <= m2; i++) { b[i] = r[i] * r[i] + r1[i] * r1[i] + r2[i] * r2[i]; c[i] = r[i] * r1[i + 1] + r1[i] * r2[i + 1]; d[i] = r[i] * r2[i + 2]; } f2 = -s; next: for (i = m1; i <= m2; i++) { r1[i - 1] = f * r[i - 1]; r2[i - 2] = g * r[i - 2]; r[i] = 1 / (p * b[i] + t[i] - f * r1[i - 1] - g * r2[i - 2]); u[i] = a[i] - r1[i - 1] * u[i - 1] - r2[i - 2] * u[i - 2]; f = p * c[i] + t1[i] - h * r1[i - 1]; g = h; h = d[i] * p; } for (i = m2; i >= m1; i--) { u[i] = r[i] * u[i] - r1[i] * u[i + 1] - r2[i] * u[i + 2]; } e = h = 0; for (i = n1; i <= m2; i++) { g = h; h = (u[i + 1] - u[i]) / (x[i + 1] - x[i]); v[i] = (h - g) * inv_dy[i] * inv_dy[i]; e = e + v[i] * (h - g); } g = v[n2] = -h * inv_dy[n2] * inv_dy[n2]; e = e - g * h; g = f2; f2 = e * p * p; if (f2 >= s || f2 <= g) goto fin; f = 0; h = (v[m1] - v[n1]) / (x[m1] - x[n1]); for (i = m1; i <= m2; i++) { g = h; h = (v[i + 1] - v[i]) / (x[i + 1] - x[i]); g = h - g - r1[i - 1] * r[i - 1] - r2[i - 2] * r[i - 2]; f = f + g * r[i] * g; r[i] = g; } h = e - p * f; if (h <= 0) goto fin; p = p + (s - f2) / ((Math.Sqrt(s / e) + p) * h); goto next; fin: for (i = n1; i <= n2; i++) { a[i] = y[i] - p * v[i]; c[i] = u[i]; } for (i = n1; i <= m2; i++) { h = x[i + 1] - x[i]; d[i] = (c[i + 1] - c[i]) / (3 * h); b[i] = (a[i + 1] - a[i]) / h - (h * d[i] + c[i]) * h; } } // extrapolate for xx > x[n2] b[b.Length] = b[b.Length - 1]; d[1] = 0; return new ReinschSmoothingSplineModel(a, b, c, d, x, targetVar, inputVars); } private void CalculateSmoothingSpline(double[] x, double[] y, string[] inputVars) { // see Smoothing and Non-Parametric Regression, Germán Rodríguez, 2001 2.3.1 double[] w = Enumerable.Repeat(1.0, x.Length).ToArray(); // weights necessary for sortAndBin but are ignored below (TODO) SortAndBin(x, y, w, out x, out y, out w, scaling: false); int n = x.Length; SparseMatrix delta = new SparseMatrix(n - 2, n); // double[,] delta = new double[n - 2, n]; //double[,] W = new double[n - 2, n - 2]; SparseMatrix W = new SparseMatrix(n - 2, n - 2); Matrix WInvD = new DenseMatrix(n - 2, n); // double[,] W_inv_D = new double[n - 2, n]; // double[,] K = new double[n, n]; // go over successive knots to determine distances and fill Delta and W for (int i = 0; i < n - 2; i++) { double h = x[i + 1] - x[i]; double h_next = x[i + 2] - x[i + 1]; delta[i, i] = 1.0 / h; delta[i, i + 1] = -1.0 / h - 1.0 / h_next; delta[i, i + 2] = 1.0 / h_next; W[i, i] = (h + h_next) / 3; if (i > 0) { W[i - 1, i] = W[i, i - 1] = h / 6.0; } } // WInvD = W.Cholesky().Solve(delta); var solvResult = W.TrySolveIterative(delta, WInvD, new MlkBiCgStab()); // alglib.ablas.rmatrixgemm(n - 2, n, n - 2, 1.0, W, 0, 0, 0, delta, 0, 0, 0, 1.0, W_inv_D, 0, 0); // W^-1(M = n-2, K = n-2) D(K = n-2, N=n) // alglib.ablas.rmatrixgemm(n, n, n - 2, 1.0, delta, 0, 0, 1, W_inv_D, 0, 0, 0, 1.0, K, 0, 0); // D(M=n-2, K=n)^T * W^-1D (K=n, N=n-2) var K = delta.TransposeThisAndMultiply(WInvD); double lambda = ((IValueParameter)Parameters["Lambda"]).Value.Value; for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { K[i, j] *= lambda; if (i == j) K[i, j] += 1; } } // solve for y // double[] s; // int solverInfo; // alglib.densesolverreport solverRep; // alglib.rmatrixsolve(K, n, y, out solverInfo, out solverRep, out s); var s = K.Solve(new DenseVector(y)).ToArray(); Results.Add(new Result("Solution", new RegressionSolution(new SmoothingSplineModel(s, x, Problem.ProblemData.TargetVariable, inputVars), (IRegressionProblemData)Problem.ProblemData.Clone()))); } public static void SortAndBin(double[] x, double[] y, double[] w, out double[] x2, out double[] y2, out double[] w2, bool scaling = false) { var sortedIdx = Enumerable.Range(0, x.Length).ToArray(); // sort by x Array.Sort(x, sortedIdx); var xl = new List(); var yl = new List(); var wl = new List(); int n = x.Length; var range = x[n - 1] - x[0]; var binSize = range / n / 10; { // binning int i = 0; while (i < n) { int k = 0; int j = i; double sumX = 0.0; double sumY = 0.0; double sumW = 0.0; while (j < n && x[j] - x[i] <= binSize) { k++; sumX += x[j]; sumY += y[sortedIdx[j]]; sumW += w[sortedIdx[j]]; j++; } var avgX = sumX / k; if (scaling) avgX = (avgX - x[0]) / range; xl.Add(avgX); yl.Add(sumY / k); wl.Add(sumW); i += k; } } x2 = xl.ToArray(); y2 = yl.ToArray(); w2 = wl.ToArray(); } private void AddAlglibSplineResult(alglib.spline1dinterpolant c, string[] inputVars) { Results.Add(new Result("Solution", new RegressionSolution(new Spline1dModel(c, Problem.ProblemData.TargetVariable, inputVars), (IRegressionProblemData)Problem.ProblemData.Clone()))); } private void AddMathNetSplineResult(IInterpolation c, string[] inputVars) { Results.Add(new Result("Solution", new RegressionSolution(new MathNetSplineModel(c, Problem.ProblemData.TargetVariable, inputVars), (IRegressionProblemData)Problem.ProblemData.Clone()))); } } // array with non-zero lower index internal class MyArray { private T[] arr; private int lowerBound; public int Length { get { return arr.Length; } } public T this[int key] { get { return arr[key - lowerBound]; } set { arr[key - lowerBound] = value; } } public MyArray(int lowerBound, int numElements) { this.lowerBound = lowerBound; arr = new T[numElements]; } public MyArray(int lowerBound, T[] source) : this(lowerBound, source.Length) { Array.Copy(source, arr, source.Length); } public T[] ToArray() { var res = new T[arr.Length]; Array.Copy(arr, res, res.Length); return res; } } // UNFINISHED public class ReinschSmoothingSplineModel : NamedItem, IRegressionModel { private MyArray a; private MyArray b; private MyArray c; private MyArray d; private MyArray x; private double offset; private double scale; public string TargetVariable { get; set; } public IEnumerable VariablesUsedForPrediction { get; private set; } public event EventHandler TargetVariableChanged; public ReinschSmoothingSplineModel(ReinschSmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) { this.TargetVariable = orig.TargetVariable; this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray(); this.a = orig.a; this.b = orig.b; this.c = orig.c; this.d = orig.d; this.x = orig.x; this.scale = orig.scale; this.offset = orig.offset; } internal ReinschSmoothingSplineModel( MyArray a, MyArray b, MyArray c, MyArray d, MyArray x, string targetVar, string[] inputs, double offset = 0, double scale = 1) : base("SplineModel", "SplineModel") { this.a = a; this.b = b; this.c = c; this.d = d; this.x = x; this.TargetVariable = targetVar; this.VariablesUsedForPrediction = inputs; this.scale = scale; this.offset = offset; } public override IDeepCloneable Clone(Cloner cloner) { return new ReinschSmoothingSplineModel(this, cloner); } public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone()); } public double GetEstimatedValue(double xx) { xx = (xx - offset) * scale; int n = a.Length; if (xx <= x[1]) { double h = xx - x[1]; return a[1] + h * (b[1] + h * (c[1] + h * d[1])); } else if (xx >= x[n]) { double h = xx - x[n]; return a[n] + h * (b[n] + h * (c[n] + h * d[n])); } else { // binary search int lower = 1; int upper = n; while (true) { if (upper < lower) throw new InvalidProgramException(); int i = lower + (upper - lower) / 2; if (x[i] <= xx && xx < x[i + 1]) { double h = xx - x[i]; return a[i] + h * (b[i] + h * (c[i] + h * d[i])); } else if (xx < x[i]) { upper = i - 1; } else { lower = i + 1; } } } } public IEnumerable GetEstimatedValues(IDataset dataset, IEnumerable rows) { int n = x.Length; var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray(); foreach (var factor in VariablesUsedForPrediction.Skip(1)) { product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray(); } foreach (var xx in product) { yield return GetEstimatedValue(xx); } } } // UNFINISHED public class SmoothingSplineModel : NamedItem, IRegressionModel { private double[] s; private IInterpolation interpolation; public string TargetVariable { get; set; } public IEnumerable VariablesUsedForPrediction { get; private set; } public event EventHandler TargetVariableChanged; public SmoothingSplineModel(SmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) { this.TargetVariable = orig.TargetVariable; this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray(); this.s = orig.s; // TODO this.interpolation = orig.interpolation; } public SmoothingSplineModel(double[] s, double[] x, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") { this.s = s; this.TargetVariable = targetVar; this.VariablesUsedForPrediction = inputs; this.interpolation = MathNet.Numerics.Interpolate.CubicSpline(x, s); } public override IDeepCloneable Clone(Cloner cloner) { return new SmoothingSplineModel(this, cloner); } public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone()); } public IEnumerable GetEstimatedValues(IDataset dataset, IEnumerable rows) { foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) { yield return interpolation.Interpolate(x); } } } // UNFINISHED public class Spline1dModel : NamedItem, IRegressionModel { private alglib.spline1dinterpolant interpolant; public string TargetVariable { get; set; } public IEnumerable VariablesUsedForPrediction { get; private set; } public event EventHandler TargetVariableChanged; public Spline1dModel(Spline1dModel orig, Cloner cloner) : base(orig, cloner) { this.TargetVariable = orig.TargetVariable; this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray(); this.interpolant = (alglib.spline1dinterpolant)orig.interpolant.make_copy(); } public Spline1dModel(alglib.spline1dinterpolant interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") { this.interpolant = interpolant; this.TargetVariable = targetVar; this.VariablesUsedForPrediction = inputs; } public override IDeepCloneable Clone(Cloner cloner) { return new Spline1dModel(this, cloner); } public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone()); } public IEnumerable GetEstimatedValues(IDataset dataset, IEnumerable rows) { var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray(); foreach (var factor in VariablesUsedForPrediction.Skip(1)) { product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray(); } foreach (var x in product) { yield return alglib.spline1dcalc(interpolant, x); } } } // UNFINISHED public class MathNetSplineModel : NamedItem, IRegressionModel { private IInterpolation interpolant; public string TargetVariable { get; set; } public IEnumerable VariablesUsedForPrediction { get; private set; } public event EventHandler TargetVariableChanged; public MathNetSplineModel(MathNetSplineModel orig, Cloner cloner) : base(orig, cloner) { this.TargetVariable = orig.TargetVariable; this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray(); this.interpolant = orig.interpolant; // TODO COPY! } public MathNetSplineModel(IInterpolation interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") { this.interpolant = interpolant; this.TargetVariable = targetVar; this.VariablesUsedForPrediction = inputs; } public override IDeepCloneable Clone(Cloner cloner) { return new MathNetSplineModel(this, cloner); } public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone()); } public IEnumerable GetEstimatedValues(IDataset dataset, IEnumerable rows) { foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) { yield return interpolant.Interpolate(x); } } } }