[15352] | 1 | #region License Information
|
---|
| 2 | /* HeuristicLab
|
---|
| 3 | * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
| 4 | *
|
---|
| 5 | * This file is part of HeuristicLab.
|
---|
| 6 | *
|
---|
| 7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
| 8 | * it under the terms of the GNU General Public License as published by
|
---|
| 9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 10 | * (at your option) any later version.
|
---|
| 11 | *
|
---|
| 12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 15 | * GNU General Public License for more details.
|
---|
| 16 | *
|
---|
| 17 | * You should have received a copy of the GNU General Public License
|
---|
| 18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | */
|
---|
| 20 | #endregion
|
---|
| 21 |
|
---|
| 22 | using System;
|
---|
| 23 | using System.Collections.Generic;
|
---|
[15450] | 24 | using System.Diagnostics;
|
---|
[15352] | 25 | using System.Linq;
|
---|
| 26 | using System.Threading;
|
---|
| 27 | using HeuristicLab.Common;
|
---|
| 28 | using HeuristicLab.Core;
|
---|
| 29 | using HeuristicLab.Data;
|
---|
| 30 | using HeuristicLab.Optimization;
|
---|
| 31 | using HeuristicLab.Parameters;
|
---|
| 32 | using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
|
---|
| 33 | using HeuristicLab.Problems.DataAnalysis;
|
---|
[15365] | 34 | using HeuristicLab.Random;
|
---|
[15362] | 35 | using MathNet.Numerics.Interpolation;
|
---|
[15364] | 36 | using MathNet.Numerics.LinearAlgebra.Double;
|
---|
| 37 | using MathNet.Numerics.LinearAlgebra.Double.Solvers;
|
---|
[15352] | 38 |
|
---|
| 39 | namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
|
---|
| 40 | [Item("Splines", "")]
|
---|
| 41 | [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 102)]
|
---|
| 42 | [StorableClass]
|
---|
| 43 | public sealed class Splines : FixedDataAnalysisAlgorithm<IRegressionProblem> {
|
---|
| 44 | [StorableConstructor]
|
---|
| 45 | private Splines(bool deserializing) : base(deserializing) { }
|
---|
| 46 | [StorableHook(HookType.AfterDeserialization)]
|
---|
| 47 | private void AfterDeserialization() {
|
---|
| 48 | }
|
---|
| 49 |
|
---|
| 50 | private Splines(Splines original, Cloner cloner)
|
---|
| 51 | : base(original, cloner) {
|
---|
| 52 | }
|
---|
| 53 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
| 54 | return new Splines(this, cloner);
|
---|
| 55 | }
|
---|
| 56 |
|
---|
| 57 | public Splines()
|
---|
| 58 | : base() {
|
---|
| 59 | var validTypes = new ItemSet<StringValue>(
|
---|
| 60 | new[] {
|
---|
[15362] | 61 | "Monotone", "Akima", "Catmull-Rom", "Cubic", "Linear",
|
---|
| 62 | "Cubic - Natural (Math.NET)",
|
---|
| 63 | "Polynomial (Math.NET)",
|
---|
| 64 | "Rational (Math.NET)",
|
---|
| 65 | "LogLinear (Math.NET)",
|
---|
| 66 | "Common (Math.NET)",
|
---|
[15364] | 67 | "Smoothing Spline (Mine)",
|
---|
| 68 | "Smoothing Spline (Reinsch)",
|
---|
[15442] | 69 | "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)",
|
---|
[15369] | 70 | "B-Spline Smoothing",
|
---|
[15449] | 71 | "Penalized Regression Spline (alglib)",
|
---|
| 72 | "CUBGCV"
|
---|
[15352] | 73 | }.Select(s => new StringValue(s)));
|
---|
| 74 |
|
---|
[15449] | 75 | Parameters.Add(new ConstrainedValueParameter<StringValue>("Type", "The type of spline (as supported by alglib)", validTypes, validTypes.Last()));
|
---|
[15364] | 76 | Parameters.Add(new ValueParameter<DoubleValue>("Lambda", "Regularization parameter for smoothing splines (0..+inf)", new DoubleValue(100)));
|
---|
[15365] | 77 | Parameters.Add(new ValueParameter<DoubleValue>("StdDev (noise)", "Known error in y values. Used only be Reinsch Smoothing Splines", new DoubleValue(0.1)));
|
---|
[15352] | 78 | Problem = new RegressionProblem();
|
---|
| 79 | }
|
---|
| 80 |
|
---|
| 81 |
|
---|
| 82 | protected override void Run(CancellationToken cancellationToken) {
|
---|
| 83 | double[,] inputMatrix = Problem.ProblemData.Dataset.ToArray(Problem.ProblemData.AllowedInputVariables.Concat(new string[] { Problem.ProblemData.TargetVariable }), Problem.ProblemData.TrainingIndices);
|
---|
| 84 | if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
|
---|
| 85 | throw new NotSupportedException("Splines does not support NaN or infinity values in the input dataset.");
|
---|
| 86 |
|
---|
| 87 | var inputVars = Problem.ProblemData.AllowedInputVariables.ToArray();
|
---|
| 88 | if (inputVars.Length > 3) throw new ArgumentException();
|
---|
| 89 |
|
---|
| 90 | var y = Problem.ProblemData.TargetVariableTrainingValues.ToArray();
|
---|
| 91 | if (inputVars.Length == 1) {
|
---|
| 92 |
|
---|
| 93 | var x = Problem.ProblemData.Dataset.GetDoubleValues(inputVars.First(), Problem.ProblemData.TrainingIndices).ToArray();
|
---|
| 94 | alglib.spline1dinterpolant c;
|
---|
| 95 | var type = ((StringValue)Parameters["Type"].ActualValue).Value;
|
---|
| 96 | switch (type) {
|
---|
| 97 | case "Monotone":
|
---|
[15364] | 98 | alglib.spline1dbuildmonotone(x, y, out c);
|
---|
[15362] | 99 | AddAlglibSplineResult(c, inputVars);
|
---|
| 100 | break;
|
---|
[15352] | 101 | case "Akima":
|
---|
[15362] | 102 | alglib.spline1dbuildakima(x, y, out c); AddAlglibSplineResult(c, inputVars);
|
---|
| 103 | break;
|
---|
| 104 | ;
|
---|
[15352] | 105 | case "Catmull-Rom":
|
---|
[15362] | 106 | alglib.spline1dbuildcatmullrom(x, y, out c); AddAlglibSplineResult(c, inputVars);
|
---|
| 107 | break;
|
---|
| 108 |
|
---|
[15352] | 109 | case "Cubic":
|
---|
[15362] | 110 | alglib.spline1dbuildcubic(x, y, out c); AddAlglibSplineResult(c, inputVars);
|
---|
| 111 | break;
|
---|
| 112 |
|
---|
[15352] | 113 | case "Linear":
|
---|
[15362] | 114 | alglib.spline1dbuildlinear(x, y, out c); AddAlglibSplineResult(c, inputVars);
|
---|
| 115 | break;
|
---|
| 116 | case "Cubic - Natural (Math.NET)": {
|
---|
| 117 | var spline = MathNet.Numerics.Interpolation.CubicSpline.InterpolateNatural(x, y);
|
---|
| 118 | AddMathNetSplineResult(spline, inputVars);
|
---|
| 119 | break;
|
---|
| 120 | }
|
---|
| 121 | case "Common (Math.NET)": {
|
---|
| 122 | var spline = MathNet.Numerics.Interpolate.Common(x, y);
|
---|
| 123 | AddMathNetSplineResult(spline, inputVars);
|
---|
| 124 | break;
|
---|
| 125 | }
|
---|
| 126 | case "LogLinear (Math.NET)": {
|
---|
| 127 | var spline = MathNet.Numerics.Interpolate.LogLinear(x, y);
|
---|
| 128 | AddMathNetSplineResult(spline, inputVars);
|
---|
| 129 | break;
|
---|
| 130 | }
|
---|
| 131 | case "Polynomial (Math.NET)": {
|
---|
| 132 | var spline = MathNet.Numerics.Interpolate.Polynomial(x, y);
|
---|
| 133 | AddMathNetSplineResult(spline, inputVars);
|
---|
| 134 | break;
|
---|
| 135 | }
|
---|
| 136 | case "Rational (Math.NET)": {
|
---|
| 137 | var spline = MathNet.Numerics.Interpolate.RationalWithoutPoles(x, y);
|
---|
| 138 | AddMathNetSplineResult(spline, inputVars);
|
---|
| 139 | break;
|
---|
| 140 | }
|
---|
[15364] | 141 | case "Smoothing Spline (Mine)": {
|
---|
| 142 | CalculateSmoothingSpline(x, y, inputVars);
|
---|
| 143 | break;
|
---|
| 144 | }
|
---|
| 145 | case "Smoothing Spline (Reinsch)": {
|
---|
| 146 | CalculateSmoothingSplineReinsch(x, y, inputVars);
|
---|
| 147 | break;
|
---|
| 148 | }
|
---|
[15442] | 149 | case "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)": {
|
---|
| 150 | double optTol, looRMSE;
|
---|
| 151 | var model = CalculateSmoothingSplineReinsch(x, y, inputVars, Problem.ProblemData.TargetVariable, out optTol, out looRMSE);
|
---|
| 152 | Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone())));
|
---|
| 153 | Results.Add(new Result("Optimal tolerance", new DoubleValue(optTol)));
|
---|
| 154 | Results.Add(new Result("RMSE (LOO-CV)", new DoubleValue(looRMSE)));
|
---|
[15365] | 155 | break;
|
---|
| 156 | }
|
---|
[15449] | 157 | case "CUBGCV": {
|
---|
| 158 | CubicSplineGCV.CubGcvReport report;
|
---|
[15450] | 159 | var model =
|
---|
| 160 | CubicSplineGCV.CalculateCubicSpline(x, y,
|
---|
[15449] | 161 | Problem.ProblemData.TargetVariable, inputVars, out report);
|
---|
| 162 | var targetVar = Problem.ProblemData.TargetVariable;
|
---|
| 163 | var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
|
---|
| 164 | Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
|
---|
| 165 | Results.Add(new Result("GCV", new DoubleValue(report.generalizedCrossValidation)));
|
---|
| 166 | Results.Add(new Result("Estimated error variance", new DoubleValue(report.estimatedErrorVariance)));
|
---|
| 167 | Results.Add(new Result("Estimated RSS degrees of freedom", new DoubleValue(report.estimatedRSSDegreesOfFreedom)));
|
---|
[15450] | 168 | Results.Add(new Result("Estimated true mean squared error at data points", new DoubleValue(report.estimatedTrueMeanSquaredErrorAtDataPoints)));
|
---|
[15449] | 169 | Results.Add(new Result("Mean square of DF", new DoubleValue(report.meanSquareOfDf)));
|
---|
| 170 | Results.Add(new Result("Mean square residual", new DoubleValue(report.meanSquareResiudal)));
|
---|
| 171 | Results.Add(new Result("Smoothing parameter (optimized for GCV)", new DoubleValue(report.smoothingParameter)));
|
---|
| 172 | break;
|
---|
| 173 | }
|
---|
[15365] | 174 | case "B-Spline Smoothing": {
|
---|
| 175 | BSplineSmoothing(x, y, inputVars);
|
---|
| 176 | break;
|
---|
| 177 | }
|
---|
[15442] | 178 | case "Penalized Regression Spline (alglib)": {
|
---|
[15369] | 179 | var lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value; // controls extent of smoothing
|
---|
| 180 | var model = CalculatePenalizedRegressionSpline(x, y, lambda, Problem.ProblemData.TargetVariable, inputVars);
|
---|
| 181 | var targetVar = Problem.ProblemData.TargetVariable;
|
---|
| 182 | var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
|
---|
| 183 | Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
|
---|
| 184 |
|
---|
| 185 | break;
|
---|
[15442] | 186 | }
|
---|
[15352] | 187 | default: throw new NotSupportedException();
|
---|
| 188 | }
|
---|
| 189 |
|
---|
| 190 | }
|
---|
| 191 | }
|
---|
[15362] | 192 |
|
---|
[15369] | 193 |
|
---|
[15442] | 194 | public static IRegressionModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars) {
|
---|
[15369] | 195 | int info;
|
---|
| 196 | alglib.spline1dinterpolant interpolant;
|
---|
| 197 | alglib.spline1dfitreport rep;
|
---|
| 198 | int n = x.Length;
|
---|
| 199 | n = (int)Math.Max(50, 3 * Math.Sqrt(n));
|
---|
| 200 |
|
---|
| 201 | alglib.spline1dfitpenalized(x, y, n, lambda, out info, out interpolant, out rep);
|
---|
| 202 | return new Spline1dModel(interpolant, targetVar, inputVars);
|
---|
| 203 | }
|
---|
| 204 |
|
---|
[15442] | 205 | public static IRegressionEnsembleModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars,
|
---|
| 206 | out double avgTrainRMSE,
|
---|
[15450] | 207 | out double cvRMSE) {
|
---|
[15442] | 208 | int info;
|
---|
| 209 | alglib.spline1dinterpolant interpolant;
|
---|
| 210 | alglib.spline1dfitreport rep;
|
---|
| 211 | int n = x.Length;
|
---|
| 212 | n = (int)Math.Max(50, 3 * Math.Sqrt(n));
|
---|
[15364] | 213 |
|
---|
[15450] | 214 | int folds = 10;
|
---|
| 215 | int foldSize = x.Length <= 30 ? 1 : x.Length / folds;
|
---|
| 216 | if (foldSize == 1) folds = x.Length;
|
---|
[15365] | 217 |
|
---|
[15450] | 218 | double[] x_loo = new double[(folds - 1) * foldSize];
|
---|
| 219 | double[] y_loo = new double[(folds - 1) * foldSize];
|
---|
| 220 |
|
---|
[15442] | 221 | var models = new List<IRegressionModel>();
|
---|
[15450] | 222 | var sumTrainSE = 0.0;
|
---|
| 223 | int nTrain = 0;
|
---|
| 224 | var sumTestSE = 0.0;
|
---|
| 225 | int nTest = 0;
|
---|
[15365] | 226 |
|
---|
[15450] | 227 |
|
---|
| 228 | for (int i = 0; i < folds; i++) {
|
---|
| 229 | if (i < folds - 1) {
|
---|
| 230 | //Console.WriteLine("Copy from {0} to {1} n = {2}", (i + 1) * foldSize, i * foldSize, (folds - i - 1) * foldSize);
|
---|
| 231 | Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, (folds - i - 1) * foldSize);
|
---|
| 232 | Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, (folds - i - 1) * foldSize);
|
---|
| 233 | }
|
---|
[15442] | 234 | alglib.spline1dfitpenalized(x_loo, y_loo, n, lambda, out info, out interpolant, out rep);
|
---|
[15450] | 235 | Debug.Assert(y_loo.All(yi => yi != 0.0));
|
---|
| 236 | Debug.Assert(x_loo.All(xi => xi != 0.0));
|
---|
[15365] | 237 |
|
---|
[15450] | 238 | // training error
|
---|
| 239 | for (int j = 0; j < x_loo.Length; j++) {
|
---|
| 240 | var y_pred = alglib.spline1dcalc(interpolant, x[i]);
|
---|
| 241 | double res = y[j] - y_pred;
|
---|
| 242 | sumTrainSE += res * res;
|
---|
| 243 | nTrain++;
|
---|
| 244 | }
|
---|
| 245 | // test
|
---|
| 246 | for (int j = i * foldSize; j < (i + 1) * foldSize; j++) {
|
---|
| 247 | var y_pred = alglib.spline1dcalc(interpolant, x[i]);
|
---|
| 248 | double res = y[j] - y_pred;
|
---|
| 249 | sumTestSE += res * res;
|
---|
| 250 | nTest++;
|
---|
| 251 | }
|
---|
| 252 |
|
---|
| 253 | models.Add(new Spline1dModel(interpolant, targetVar, inputVars));
|
---|
| 254 |
|
---|
| 255 | if (i < folds - 1) {
|
---|
| 256 | //Console.WriteLine("Copy from {0} to {1} n = {2}", i * foldSize, i * foldSize, foldSize);
|
---|
| 257 | // add current fold for next iteration
|
---|
| 258 | Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize);
|
---|
| 259 | Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize);
|
---|
| 260 | }
|
---|
[15442] | 261 | }
|
---|
| 262 |
|
---|
[15450] | 263 | cvRMSE = Math.Sqrt(sumTestSE / nTest);
|
---|
| 264 | avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain);
|
---|
[15442] | 265 |
|
---|
| 266 | return new RegressionEnsembleModel(models);
|
---|
[15365] | 267 | }
|
---|
| 268 |
|
---|
[15442] | 269 | public void BSplineSmoothing(double[] x, double[] y, string[] inputVars) {
|
---|
| 270 | Array.Sort(x, y);
|
---|
| 271 | CubicSpline2(x, y, inputVars);
|
---|
| 272 | }
|
---|
| 273 |
|
---|
[15365] | 274 | private struct SplineData {
|
---|
| 275 | public double a, b, c, d, x, y;
|
---|
| 276 | };
|
---|
| 277 |
|
---|
| 278 |
|
---|
| 279 | /*
|
---|
| 280 | * The procedure Quincunx, takes as arguments
|
---|
| 281 | the vectors u, v and w which are respectively the diagonal, the first supradiagonal
|
---|
| 282 | and the second supradiagonal of the banded matrix. The vector on
|
---|
| 283 | the LHS of the equation (80) is placed in q which contains the solution on the
|
---|
| 284 | completion of the procedure.
|
---|
| 285 | */
|
---|
| 286 | private void Quincunx(int n, MyArray<double> u, MyArray<double> v, MyArray<double> w, MyArray<double> q) {
|
---|
| 287 | // { factorisation}
|
---|
| 288 | u[-1] = 0;
|
---|
| 289 | u[0] = 0;
|
---|
| 290 | for (int j = 1; j <= n - 1; j++) {
|
---|
| 291 | u[j] = u[j] - u[j - 2] * Math.Pow(w[j - 2], 2) - u[j - 1] * Math.Pow(v[j - 1], 2);
|
---|
| 292 | v[j] = (v[j] - u[j - 1] * v[j - 1] * w[j - 1]) / u[j];
|
---|
| 293 | w[j] = w[j] / u[j];
|
---|
[15364] | 294 | }
|
---|
[15365] | 295 | // { forward substitution}
|
---|
| 296 | for (int j = 1; j <= n - 1; j++) {
|
---|
| 297 | q[j] = q[j] - v[j - 1] * q[j - 1] - w[j - 2] * q[j - 2];
|
---|
| 298 | }
|
---|
| 299 | for (int j = 1; j < n - 1; j++) {
|
---|
| 300 | q[j] = q[j] / u[j];
|
---|
| 301 | }
|
---|
| 302 | // { back substitution}
|
---|
| 303 | q[n + 1] = 0;
|
---|
| 304 | q[n] = 0;
|
---|
| 305 | for (int j = n - 1; j >= 1; j--) {
|
---|
| 306 | q[j] = q[j] - v[j] * q[j + 1] - w[j] * q[j + 2];
|
---|
| 307 | }
|
---|
| 308 | }
|
---|
[15364] | 309 |
|
---|
[15365] | 310 | public void CubicSpline2(double[] x, double[] y, string[] inputVars) {
|
---|
| 311 | // see Pollock: Smoothing Splines
|
---|
| 312 | int n = x.Length;
|
---|
| 313 | double lambda = 1.0;
|
---|
| 314 | double[] sigma = Enumerable.Repeat(1.0, n).ToArray();
|
---|
| 315 | SplineData[] S = new SplineData[x.Length + 1];
|
---|
| 316 | for (int i = 0; i < x.Length; i++) {
|
---|
| 317 | S[i].x = x[i];
|
---|
| 318 | S[i].y = y[i];
|
---|
[15364] | 319 | }
|
---|
[15365] | 320 | S[x.Length].x = S[x.Length - 1].x;
|
---|
| 321 | S[x.Length].y = S[x.Length - 1].y;
|
---|
| 322 |
|
---|
| 323 | // var
|
---|
| 324 | double[] h = new double[n],
|
---|
| 325 | r = new double[n],
|
---|
| 326 | f = new double[n],
|
---|
| 327 | p = new double[n];
|
---|
| 328 | var q = new MyArray<double>(-1, n + 3);
|
---|
| 329 | var u = new MyArray<double>(-1, n + 3);
|
---|
| 330 | var v = new MyArray<double>(-1, n + 3);
|
---|
| 331 | var w = new MyArray<double>(-1, n + 3);
|
---|
| 332 | double mu;
|
---|
| 333 |
|
---|
| 334 | mu = 2 * (1 - lambda) / (3 * lambda);
|
---|
| 335 | h[0] = S[1].x - S[0].x;
|
---|
| 336 | r[0] = 3 / h[0];
|
---|
| 337 | for (int i = 1; i < n - 1; i++) {
|
---|
| 338 | h[i] = S[i + 1].x - S[i].x;
|
---|
| 339 | r[i] = 3 / h[i];
|
---|
| 340 | f[i] = -(r[i - 1] + r[i]);
|
---|
| 341 | p[i] = 2 * (S[i + 1].x - S[i - 1].x);
|
---|
| 342 | q[i] = 3 * (S[i + 1].y - S[i].y) / h[i] - 3 * (S[i].y - S[i - 1].y) / h[i - 1];
|
---|
[15364] | 343 | }
|
---|
[15365] | 344 | for (int i = 1; i < n; i++) {
|
---|
| 345 | u[i] = Math.Pow(r[i - 1], 2) * sigma[i - 1] + Math.Pow(f[i], 2) * sigma[i] + Math.Pow(r[i], 2) * sigma[i + 1]; // TODO Sigma 1..n
|
---|
| 346 | u[i] = mu * u[i] + p[i];
|
---|
| 347 | v[i] = f[i] * r[i] * sigma[i] + r[i] * f[i + 1] * sigma[i + 1];
|
---|
| 348 | v[i] = mu * v[i] + h[i];
|
---|
| 349 | w[i] = mu * r[i] * r[i + 1] * sigma[i + 1];
|
---|
| 350 | }
|
---|
| 351 | Quincunx(n, u, v, w, q);
|
---|
| 352 | // { Spline P arameters}
|
---|
| 353 | S[0].d = S[0].y - mu * r[0] * q[1] * sigma[0];
|
---|
| 354 | S[1].d = S[1].y - mu * (f[1] * q[1] + r[1] * q[2]) * sigma[0];
|
---|
| 355 | S[0].a = q[1] / (3 * h[0]);
|
---|
| 356 | S[0].b = 0;
|
---|
| 357 | S[0].c = (S[1].d - S[0].d) / h[0] - q[1] * h[0] / 3;
|
---|
| 358 | r[0] = 0;
|
---|
| 359 | for (int j = 1; j < n; j++) {
|
---|
| 360 | S[j].a = (q[j + 1] - q[j]) / (3 * h[j]);
|
---|
| 361 | S[j].b = q[j];
|
---|
| 362 | S[j].c = (q[j] + q[j - 1]) * h[j - 1] + S[j - 1].c;
|
---|
| 363 | S[j].d = r[j - 1] * q[j - 1] + f[j] * q[j] + r[j] * q[j + 1];
|
---|
| 364 | S[j].d = y[j] - mu * S[j].d * sigma[j];
|
---|
| 365 | }
|
---|
[15364] | 366 |
|
---|
[15365] | 367 | // { SmoothingSpline}
|
---|
[15364] | 368 | }
|
---|
| 369 |
|
---|
[15442] | 370 | public void CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars) {
|
---|
| 371 | double s = x.Length + 1;
|
---|
[15365] | 372 | double stdTol = ((IValueParameter<DoubleValue>)Parameters["StdDev (noise)"]).Value.Value; // controls extent of smoothing
|
---|
[15364] | 373 |
|
---|
[15442] | 374 | var model = CalculateSmoothingSplineReinsch(x, y, inputVars, stdTol, s, Problem.ProblemData.TargetVariable);
|
---|
[15364] | 375 |
|
---|
[15442] | 376 | Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone())));
|
---|
| 377 | }
|
---|
[15364] | 378 |
|
---|
| 379 |
|
---|
[15442] | 380 | public static IRegressionEnsembleModel CalculateSmoothingSplineReinsch(double[] x, double[] y, double tol, string targetVar, string[] inputVars,
|
---|
| 381 | out double avgTrainRMSE,
|
---|
[15450] | 382 | out double cvRMSE) {
|
---|
[15442] | 383 | int info;
|
---|
[15364] | 384 |
|
---|
[15450] | 385 | int folds = 10;
|
---|
| 386 | int foldSize = x.Length <= 30 ? 1 : x.Length / folds;
|
---|
| 387 | if (foldSize == 1) folds = x.Length;
|
---|
[15365] | 388 |
|
---|
[15450] | 389 | double[] x_loo = new double[(folds - 1) * foldSize];
|
---|
| 390 | double[] y_loo = new double[(folds - 1) * foldSize];
|
---|
| 391 |
|
---|
[15442] | 392 | var models = new List<IRegressionModel>();
|
---|
[15450] | 393 | var sumTrainSE = 0.0;
|
---|
| 394 | int nTrain = 0;
|
---|
| 395 | var sumTestSE = 0.0;
|
---|
| 396 | int nTest = 0;
|
---|
[15365] | 397 |
|
---|
| 398 |
|
---|
[15450] | 399 | for (int i = 0; i < folds; i++) {
|
---|
| 400 | if (i < folds - 1) {
|
---|
| 401 | Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, x.Length - ((i + 1) * foldSize) - 1);
|
---|
| 402 | Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, y.Length - ((i + 1) * foldSize) - 1);
|
---|
| 403 | }
|
---|
| 404 | var model = CalculateSmoothingSplineReinsch(x_loo, y_loo, inputVars, tol, ((folds - 1) * foldSize) - 1, targetVar);
|
---|
[15365] | 405 |
|
---|
[15450] | 406 | // training error
|
---|
| 407 | for(int j = 0;j<x_loo.Length;j++) {
|
---|
| 408 | var y_pred = model.GetEstimatedValue(x[j]);
|
---|
| 409 | double res = y[j] - y_pred;
|
---|
| 410 | sumTrainSE += res * res;
|
---|
| 411 | nTrain++;
|
---|
| 412 | }
|
---|
| 413 | // test
|
---|
| 414 | for (int j = i * foldSize; j < (i + 1) * foldSize; j++) {
|
---|
| 415 | var y_pred = model.GetEstimatedValue(x[j]);
|
---|
| 416 | double res = y[j] - y_pred;
|
---|
| 417 | sumTestSE += res * res;
|
---|
| 418 | nTest++;
|
---|
| 419 | }
|
---|
[15365] | 420 |
|
---|
[15442] | 421 | models.Add(model);
|
---|
[15365] | 422 |
|
---|
[15450] | 423 | if (i < folds - 1) {
|
---|
| 424 | // add current fold for next iteration
|
---|
| 425 | Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize);
|
---|
| 426 | Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize);
|
---|
[15442] | 427 | }
|
---|
| 428 | }
|
---|
| 429 |
|
---|
[15450] | 430 | cvRMSE = Math.Sqrt(sumTestSE / nTest);
|
---|
| 431 | avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain);
|
---|
[15442] | 432 |
|
---|
| 433 | return new RegressionEnsembleModel(models);
|
---|
[15365] | 434 | }
|
---|
| 435 |
|
---|
| 436 |
|
---|
[15450] | 437 | // automatically determines tolerance using loo cv, SLOW!
|
---|
[15442] | 438 | public static IRegressionModel CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars, string targetVar,
|
---|
[15450] | 439 | out double optTolerance, out double cvRMSE) {
|
---|
[15442] | 440 | var maxTolerance = y.StandardDeviation();
|
---|
| 441 | var minTolerance = maxTolerance * 1E-6;
|
---|
| 442 | optTolerance = maxTolerance;
|
---|
| 443 | var tol = maxTolerance;
|
---|
[15450] | 444 | cvRMSE = double.PositiveInfinity;
|
---|
[15442] | 445 | IRegressionModel bestModel = new ConstantModel(y.Average(), targetVar);
|
---|
| 446 | while (tol > minTolerance) {
|
---|
[15450] | 447 | double curCVRMSE;
|
---|
[15442] | 448 | double avgTrainRmse;
|
---|
| 449 | var model = Splines.CalculateSmoothingSplineReinsch(
|
---|
[15450] | 450 | x, y, tol, targetVar, inputVars, out avgTrainRmse, out curCVRMSE);
|
---|
[15442] | 451 |
|
---|
[15450] | 452 | if (curCVRMSE < cvRMSE) {
|
---|
| 453 | cvRMSE = curCVRMSE;
|
---|
[15442] | 454 | optTolerance = tol;
|
---|
| 455 | bestModel = model;
|
---|
| 456 | }
|
---|
| 457 | tol *= 0.85;
|
---|
| 458 | }
|
---|
| 459 | return bestModel;
|
---|
| 460 | }
|
---|
| 461 |
|
---|
| 462 | public static ReinschSmoothingSplineModel CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars, double stdTol, double s, string targetVar) {
|
---|
[15364] | 463 | var minX = xOrig.Min();
|
---|
| 464 | var maxX = xOrig.Max();
|
---|
| 465 | var range = maxX - minX;
|
---|
| 466 |
|
---|
[15365] | 467 | double[] w = Enumerable.Repeat(stdTol, xOrig.Length).ToArray();
|
---|
| 468 | SortAndBin((double[])xOrig.Clone(), (double[])yOrig.Clone(), w, out xOrig, out yOrig, out w, scaling: false);
|
---|
[15364] | 469 |
|
---|
| 470 | // See Smoothing by Spline Functions, Reinsch, 1967
|
---|
| 471 | // move x and y into an array indexed with 1..n to match indexing in Reinsch paper
|
---|
| 472 | int n = xOrig.Length;
|
---|
| 473 | var x = new MyArray<double>(1, xOrig);
|
---|
| 474 | var y = new MyArray<double>(1, yOrig);
|
---|
[15365] | 475 | var inv_dy = new MyArray<double>(1, w);
|
---|
[15364] | 476 |
|
---|
| 477 | int n1 = 1;
|
---|
[15365] | 478 | int n2 = n;
|
---|
[15364] | 479 |
|
---|
| 480 | // results
|
---|
| 481 | var a = new MyArray<double>(n1, n);
|
---|
| 482 | var b = new MyArray<double>(n1, n);
|
---|
| 483 | var c = new MyArray<double>(n1, n);
|
---|
| 484 | var d = new MyArray<double>(n1, n);
|
---|
| 485 |
|
---|
| 486 | // smooth
|
---|
| 487 | {
|
---|
| 488 | int i, m1, m2; double e, f, f2, g = 0.0, h, p;
|
---|
| 489 | MyArray<double>
|
---|
| 490 | r = new MyArray<double>(n1 - 1, n + 2),
|
---|
| 491 | r1 = new MyArray<double>(n1 - 1, n + 2),
|
---|
| 492 | r2 = new MyArray<double>(n1 - 1, n + 2),
|
---|
| 493 | t = new MyArray<double>(n1 - 1, n + 2),
|
---|
| 494 | t1 = new MyArray<double>(n1 - 1, n + 2),
|
---|
| 495 | u = new MyArray<double>(n1 - 1, n + 2),
|
---|
| 496 | v = new MyArray<double>(n1 - 1, n + 2);
|
---|
| 497 | m1 = n1 - 1;
|
---|
| 498 | m2 = n2 + 1;
|
---|
| 499 | r[m1] = r[n1] = r1[n2] = r2[n2] = r2[m2] =
|
---|
| 500 | u[m1] = u[n1] = u[n2] = u[m2] = p = 0.0;
|
---|
| 501 | m1 = n1 + 1;
|
---|
| 502 | m2 = n2 - 1;
|
---|
| 503 | h = x[m1] - x[n1]; f = (y[m1] - y[n1]) / h;
|
---|
| 504 | for (i = m1; i <= m2; i++) {
|
---|
| 505 | g = h; h = x[i + 1] - x[i];
|
---|
| 506 | e = f; f = (y[i + 1] - y[i]) / h;
|
---|
| 507 | a[i] = f - e; t[i] = 2 * (g + h) / 3; t1[i] = h / 3;
|
---|
| 508 | r2[i] = inv_dy[i - 1] / g; r[i] = inv_dy[i + 1] / h;
|
---|
| 509 | r1[i] = -inv_dy[i] / g - inv_dy[i] / h;
|
---|
| 510 | }
|
---|
| 511 | for (i = m1; i <= m2; i++) {
|
---|
| 512 | b[i] = r[i] * r[i] + r1[i] * r1[i] + r2[i] * r2[i];
|
---|
| 513 | c[i] = r[i] * r1[i + 1] + r1[i] * r2[i + 1];
|
---|
| 514 | d[i] = r[i] * r2[i + 2];
|
---|
| 515 | }
|
---|
| 516 | f2 = -s;
|
---|
| 517 | next:
|
---|
| 518 | for (i = m1; i <= m2; i++) {
|
---|
| 519 | r1[i - 1] = f * r[i - 1]; r2[i - 2] = g * r[i - 2];
|
---|
| 520 | r[i] = 1 / (p * b[i] + t[i] - f * r1[i - 1] - g * r2[i - 2]);
|
---|
| 521 | u[i] = a[i] - r1[i - 1] * u[i - 1] - r2[i - 2] * u[i - 2];
|
---|
| 522 | f = p * c[i] + t1[i] - h * r1[i - 1]; g = h; h = d[i] * p;
|
---|
| 523 | }
|
---|
| 524 | for (i = m2; i >= m1; i--) {
|
---|
| 525 | u[i] = r[i] * u[i] - r1[i] * u[i + 1] - r2[i] * u[i + 2];
|
---|
| 526 | }
|
---|
| 527 | e = h = 0;
|
---|
| 528 | for (i = n1; i <= m2; i++) {
|
---|
| 529 | g = h; h = (u[i + 1] - u[i]) / (x[i + 1] - x[i]);
|
---|
| 530 | v[i] = (h - g) * inv_dy[i] * inv_dy[i]; e = e + v[i] * (h - g);
|
---|
| 531 | }
|
---|
| 532 | g = v[n2] = -h * inv_dy[n2] * inv_dy[n2]; e = e - g * h;
|
---|
| 533 | g = f2; f2 = e * p * p;
|
---|
| 534 | if (f2 >= s || f2 <= g) goto fin;
|
---|
| 535 | f = 0; h = (v[m1] - v[n1]) / (x[m1] - x[n1]);
|
---|
| 536 | for (i = m1; i <= m2; i++) {
|
---|
| 537 | g = h; h = (v[i + 1] - v[i]) / (x[i + 1] - x[i]);
|
---|
| 538 | g = h - g - r1[i - 1] * r[i - 1] - r2[i - 2] * r[i - 2];
|
---|
| 539 | f = f + g * r[i] * g; r[i] = g;
|
---|
| 540 | }
|
---|
| 541 | h = e - p * f; if (h <= 0) goto fin;
|
---|
| 542 | p = p + (s - f2) / ((Math.Sqrt(s / e) + p) * h); goto next;
|
---|
| 543 |
|
---|
| 544 | fin:
|
---|
| 545 | for (i = n1; i <= n2; i++) {
|
---|
| 546 | a[i] = y[i] - p * v[i];
|
---|
| 547 | c[i] = u[i];
|
---|
| 548 | }
|
---|
| 549 | for (i = n1; i <= m2; i++) {
|
---|
| 550 | h = x[i + 1] - x[i];
|
---|
| 551 | d[i] = (c[i + 1] - c[i]) / (3 * h);
|
---|
| 552 | b[i] = (a[i + 1] - a[i]) / h - (h * d[i] + c[i]) * h;
|
---|
| 553 | }
|
---|
| 554 | }
|
---|
| 555 |
|
---|
[15449] | 556 | // extrapolate for xx > x[n2]
|
---|
| 557 | b[b.Length] = b[b.Length - 1];
|
---|
| 558 | d[1] = 0;
|
---|
| 559 |
|
---|
[15365] | 560 | return new ReinschSmoothingSplineModel(a, b, c, d, x, targetVar, inputVars);
|
---|
[15364] | 561 | }
|
---|
| 562 |
|
---|
| 563 | private void CalculateSmoothingSpline(double[] x, double[] y, string[] inputVars) {
|
---|
| 564 | // see Smoothing and Non-Parametric Regression, Germán Rodríguez, 2001 2.3.1
|
---|
[15365] | 565 | double[] w = Enumerable.Repeat(1.0, x.Length).ToArray(); // weights necessary for sortAndBin but are ignored below (TODO)
|
---|
| 566 | SortAndBin(x, y, w, out x, out y, out w, scaling: false);
|
---|
[15364] | 567 | int n = x.Length;
|
---|
| 568 |
|
---|
| 569 | SparseMatrix delta = new SparseMatrix(n - 2, n);
|
---|
| 570 | // double[,] delta = new double[n - 2, n];
|
---|
| 571 | //double[,] W = new double[n - 2, n - 2];
|
---|
| 572 | SparseMatrix W = new SparseMatrix(n - 2, n - 2);
|
---|
| 573 | Matrix WInvD = new DenseMatrix(n - 2, n);
|
---|
| 574 |
|
---|
| 575 | // double[,] W_inv_D = new double[n - 2, n];
|
---|
| 576 | // double[,] K = new double[n, n];
|
---|
| 577 |
|
---|
| 578 | // go over successive knots to determine distances and fill Delta and W
|
---|
| 579 | for (int i = 0; i < n - 2; i++) {
|
---|
| 580 | double h = x[i + 1] - x[i];
|
---|
| 581 | double h_next = x[i + 2] - x[i + 1];
|
---|
| 582 | delta[i, i] = 1.0 / h;
|
---|
| 583 | delta[i, i + 1] = -1.0 / h - 1.0 / h_next;
|
---|
| 584 | delta[i, i + 2] = 1.0 / h_next;
|
---|
| 585 | W[i, i] = (h + h_next) / 3;
|
---|
| 586 | if (i > 0) {
|
---|
| 587 | W[i - 1, i] =
|
---|
| 588 | W[i, i - 1] = h / 6.0;
|
---|
| 589 | }
|
---|
| 590 | }
|
---|
| 591 |
|
---|
[15365] | 592 | // WInvD = W.Cholesky().Solve(delta);
|
---|
[15364] | 593 | var solvResult = W.TrySolveIterative(delta, WInvD, new MlkBiCgStab());
|
---|
| 594 |
|
---|
| 595 | // alglib.ablas.rmatrixgemm(n - 2, n, n - 2, 1.0, W, 0, 0, 0, delta, 0, 0, 0, 1.0, W_inv_D, 0, 0); // W^-1(M = n-2, K = n-2) D(K = n-2, N=n)
|
---|
| 596 | // alglib.ablas.rmatrixgemm(n, n, n - 2, 1.0, delta, 0, 0, 1, W_inv_D, 0, 0, 0, 1.0, K, 0, 0); // D(M=n-2, K=n)^T * W^-1D (K=n, N=n-2)
|
---|
| 597 |
|
---|
| 598 | var K = delta.TransposeThisAndMultiply(WInvD);
|
---|
| 599 |
|
---|
| 600 | double lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value;
|
---|
| 601 |
|
---|
| 602 | for (int i = 0; i < n; i++) {
|
---|
| 603 | for (int j = 0; j < n; j++) {
|
---|
| 604 | K[i, j] *= lambda;
|
---|
| 605 | if (i == j) K[i, j] += 1;
|
---|
| 606 | }
|
---|
| 607 | }
|
---|
| 608 |
|
---|
| 609 | // solve for y
|
---|
| 610 | // double[] s;
|
---|
| 611 | // int solverInfo;
|
---|
| 612 | // alglib.densesolverreport solverRep;
|
---|
| 613 | // alglib.rmatrixsolve(K, n, y, out solverInfo, out solverRep, out s);
|
---|
| 614 |
|
---|
| 615 | var s = K.Solve(new DenseVector(y)).ToArray();
|
---|
| 616 |
|
---|
| 617 | Results.Add(new Result("Solution", new RegressionSolution(new SmoothingSplineModel(s, x, Problem.ProblemData.TargetVariable, inputVars),
|
---|
[15365] | 618 | (IRegressionProblemData)Problem.ProblemData.Clone())));
|
---|
[15364] | 619 | }
|
---|
| 620 |
|
---|
[15449] | 621 | public static void SortAndBin(double[] x, double[] y, double[] w, out double[] x2, out double[] y2, out double[] w2, bool scaling = false) {
|
---|
[15365] | 622 | var sortedIdx = Enumerable.Range(0, x.Length).ToArray();
|
---|
| 623 | // sort by x
|
---|
| 624 | Array.Sort(x, sortedIdx);
|
---|
[15364] | 625 |
|
---|
| 626 | var xl = new List<double>();
|
---|
| 627 | var yl = new List<double>();
|
---|
[15365] | 628 | var wl = new List<double>();
|
---|
[15364] | 629 |
|
---|
| 630 | int n = x.Length;
|
---|
| 631 | var range = x[n - 1] - x[0];
|
---|
| 632 | var binSize = range / n / 20;
|
---|
| 633 | {
|
---|
| 634 | // binning
|
---|
| 635 | int i = 0;
|
---|
| 636 | while (i < n) {
|
---|
| 637 | int k = 0;
|
---|
| 638 | int j = i;
|
---|
| 639 | double sumX = 0.0;
|
---|
| 640 | double sumY = 0.0;
|
---|
[15365] | 641 | double sumW = 0.0;
|
---|
[15364] | 642 | while (j < n && x[j] - x[i] <= binSize) {
|
---|
| 643 | k++;
|
---|
| 644 | sumX += x[j];
|
---|
[15365] | 645 | sumY += y[sortedIdx[j]];
|
---|
| 646 | sumW += w[sortedIdx[j]];
|
---|
[15364] | 647 | j++;
|
---|
| 648 | }
|
---|
| 649 | var avgX = sumX / k;
|
---|
| 650 | if (scaling) avgX = (avgX - x[0]) / range;
|
---|
[15365] | 651 | xl.Add(avgX);
|
---|
[15364] | 652 | yl.Add(sumY / k);
|
---|
[15365] | 653 | wl.Add(sumW);
|
---|
[15364] | 654 | i += k;
|
---|
| 655 | }
|
---|
| 656 | }
|
---|
| 657 |
|
---|
| 658 | x2 = xl.ToArray();
|
---|
| 659 | y2 = yl.ToArray();
|
---|
[15365] | 660 | w2 = wl.ToArray();
|
---|
[15364] | 661 | }
|
---|
| 662 |
|
---|
[15362] | 663 | private void AddAlglibSplineResult(alglib.spline1dinterpolant c, string[] inputVars) {
|
---|
| 664 | Results.Add(new Result("Solution", new RegressionSolution(new Spline1dModel(c, Problem.ProblemData.TargetVariable, inputVars),
|
---|
| 665 | (IRegressionProblemData)Problem.ProblemData.Clone())));
|
---|
| 666 |
|
---|
| 667 | }
|
---|
| 668 | private void AddMathNetSplineResult(IInterpolation c, string[] inputVars) {
|
---|
| 669 | Results.Add(new Result("Solution", new RegressionSolution(new MathNetSplineModel(c, Problem.ProblemData.TargetVariable, inputVars),
|
---|
| 670 | (IRegressionProblemData)Problem.ProblemData.Clone())));
|
---|
[15364] | 671 | }
|
---|
| 672 | }
|
---|
[15362] | 673 |
|
---|
[15364] | 674 |
|
---|
[15365] | 675 | // array with non-zero lower index
|
---|
| 676 | internal class MyArray<T> {
|
---|
| 677 | private T[] arr;
|
---|
| 678 | private int lowerBound;
|
---|
[15364] | 679 |
|
---|
[15365] | 680 | public int Length { get { return arr.Length; } }
|
---|
| 681 |
|
---|
| 682 | public T this[int key] {
|
---|
| 683 | get {
|
---|
| 684 | return arr[key - lowerBound];
|
---|
| 685 | }
|
---|
| 686 | set {
|
---|
| 687 | arr[key - lowerBound] = value;
|
---|
| 688 | }
|
---|
| 689 | }
|
---|
| 690 |
|
---|
| 691 | public MyArray(int lowerBound, int numElements) {
|
---|
| 692 | this.lowerBound = lowerBound;
|
---|
| 693 | arr = new T[numElements];
|
---|
| 694 | }
|
---|
| 695 | public MyArray(int lowerBound, T[] source) : this(lowerBound, source.Length) {
|
---|
| 696 | Array.Copy(source, arr, source.Length);
|
---|
| 697 | }
|
---|
| 698 |
|
---|
| 699 | public T[] ToArray() {
|
---|
| 700 | var res = new T[arr.Length];
|
---|
| 701 | Array.Copy(arr, res, res.Length);
|
---|
| 702 | return res;
|
---|
| 703 | }
|
---|
| 704 | }
|
---|
| 705 |
|
---|
| 706 |
|
---|
[15364] | 707 | // UNFINISHED
|
---|
[15442] | 708 | public class ReinschSmoothingSplineModel : NamedItem, IRegressionModel {
|
---|
[15365] | 709 | private MyArray<double> a;
|
---|
| 710 | private MyArray<double> b;
|
---|
| 711 | private MyArray<double> c;
|
---|
| 712 | private MyArray<double> d;
|
---|
| 713 | private MyArray<double> x;
|
---|
[15364] | 714 | private double offset;
|
---|
| 715 | private double scale;
|
---|
| 716 |
|
---|
| 717 | public string TargetVariable { get; set; }
|
---|
| 718 |
|
---|
| 719 | public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
|
---|
| 720 |
|
---|
| 721 | public event EventHandler TargetVariableChanged;
|
---|
| 722 |
|
---|
| 723 | public ReinschSmoothingSplineModel(ReinschSmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
|
---|
| 724 | this.TargetVariable = orig.TargetVariable;
|
---|
| 725 | this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
|
---|
| 726 | this.a = orig.a;
|
---|
| 727 | this.b = orig.b;
|
---|
| 728 | this.c = orig.c;
|
---|
| 729 | this.d = orig.d;
|
---|
| 730 | this.x = orig.x;
|
---|
| 731 | this.scale = orig.scale;
|
---|
| 732 | this.offset = orig.offset;
|
---|
[15362] | 733 | }
|
---|
[15442] | 734 | internal ReinschSmoothingSplineModel(
|
---|
[15365] | 735 | MyArray<double> a,
|
---|
| 736 | MyArray<double> b,
|
---|
| 737 | MyArray<double> c,
|
---|
| 738 | MyArray<double> d,
|
---|
| 739 | MyArray<double> x,
|
---|
| 740 | string targetVar, string[] inputs, double offset = 0, double scale = 1) : base("SplineModel", "SplineModel") {
|
---|
[15364] | 741 | this.a = a;
|
---|
| 742 | this.b = b;
|
---|
| 743 | this.c = c;
|
---|
| 744 | this.d = d;
|
---|
| 745 | this.x = x;
|
---|
| 746 | this.TargetVariable = targetVar;
|
---|
| 747 | this.VariablesUsedForPrediction = inputs;
|
---|
| 748 | this.scale = scale;
|
---|
| 749 | this.offset = offset;
|
---|
| 750 | }
|
---|
| 751 |
|
---|
| 752 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
| 753 | return new ReinschSmoothingSplineModel(this, cloner);
|
---|
| 754 | }
|
---|
| 755 |
|
---|
| 756 | public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
|
---|
| 757 | return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
|
---|
| 758 | }
|
---|
| 759 |
|
---|
[15442] | 760 | public double GetEstimatedValue(double xx) {
|
---|
[15449] | 761 | int n = a.Length;
|
---|
[15442] | 762 | if (xx <= x[1]) {
|
---|
| 763 | double h = xx - x[1];
|
---|
| 764 | return a[1] + h * (b[1] + h * (c[1] + h * d[1]));
|
---|
| 765 | } else if (xx >= x[n]) {
|
---|
| 766 | double h = xx - x[n];
|
---|
| 767 | return a[n] + h * (b[n] + h * (c[n] + h * d[n]));
|
---|
| 768 | } else {
|
---|
| 769 | // binary search
|
---|
| 770 | int lower = 1;
|
---|
| 771 | int upper = n;
|
---|
| 772 | while (true) {
|
---|
| 773 | if (upper < lower) throw new InvalidProgramException();
|
---|
| 774 | int i = lower + (upper - lower) / 2;
|
---|
| 775 | if (x[i] <= xx && xx < x[i + 1]) {
|
---|
| 776 | double h = xx - x[i];
|
---|
| 777 | return a[i] + h * (b[i] + h * (c[i] + h * d[i]));
|
---|
| 778 | } else if (xx < x[i]) {
|
---|
| 779 | upper = i - 1;
|
---|
| 780 | } else {
|
---|
| 781 | lower = i + 1;
|
---|
[15364] | 782 | }
|
---|
| 783 | }
|
---|
| 784 | }
|
---|
| 785 | }
|
---|
[15442] | 786 |
|
---|
| 787 | public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
|
---|
| 788 | int n = x.Length;
|
---|
| 789 | var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray();
|
---|
| 790 | foreach (var factor in VariablesUsedForPrediction.Skip(1)) {
|
---|
| 791 | product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray();
|
---|
| 792 | }
|
---|
| 793 | foreach (var xx in product.Select(xi => (xi - offset) * scale)) {
|
---|
| 794 | yield return GetEstimatedValue(xx);
|
---|
| 795 | }
|
---|
| 796 | }
|
---|
[15352] | 797 | }
|
---|
| 798 |
|
---|
[15364] | 799 | // UNFINISHED
|
---|
| 800 | public class SmoothingSplineModel : NamedItem, IRegressionModel {
|
---|
| 801 | private double[] s;
|
---|
| 802 | private IInterpolation interpolation;
|
---|
[15352] | 803 |
|
---|
[15364] | 804 | public string TargetVariable { get; set; }
|
---|
| 805 |
|
---|
| 806 | public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
|
---|
| 807 |
|
---|
| 808 | public event EventHandler TargetVariableChanged;
|
---|
| 809 |
|
---|
| 810 | public SmoothingSplineModel(SmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
|
---|
| 811 | this.TargetVariable = orig.TargetVariable;
|
---|
| 812 | this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
|
---|
| 813 | this.s = orig.s; // TODO
|
---|
| 814 | this.interpolation = orig.interpolation;
|
---|
| 815 | }
|
---|
| 816 | public SmoothingSplineModel(double[] s, double[] x, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
|
---|
| 817 | this.s = s;
|
---|
| 818 | this.TargetVariable = targetVar;
|
---|
| 819 | this.VariablesUsedForPrediction = inputs;
|
---|
| 820 | this.interpolation = MathNet.Numerics.Interpolate.CubicSpline(x, s);
|
---|
| 821 | }
|
---|
| 822 |
|
---|
| 823 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
| 824 | return new SmoothingSplineModel(this, cloner);
|
---|
| 825 | }
|
---|
| 826 |
|
---|
| 827 | public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
|
---|
| 828 | return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
|
---|
| 829 | }
|
---|
| 830 |
|
---|
| 831 | public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
|
---|
| 832 | foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
|
---|
| 833 |
|
---|
| 834 | yield return interpolation.Interpolate(x);
|
---|
| 835 |
|
---|
| 836 | }
|
---|
| 837 | }
|
---|
| 838 | }
|
---|
| 839 |
|
---|
[15352] | 840 | // UNFINISHED
|
---|
| 841 | public class Spline1dModel : NamedItem, IRegressionModel {
|
---|
| 842 | private alglib.spline1dinterpolant interpolant;
|
---|
| 843 |
|
---|
| 844 | public string TargetVariable { get; set; }
|
---|
| 845 |
|
---|
| 846 | public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
|
---|
| 847 |
|
---|
| 848 | public event EventHandler TargetVariableChanged;
|
---|
| 849 |
|
---|
| 850 | public Spline1dModel(Spline1dModel orig, Cloner cloner) : base(orig, cloner) {
|
---|
| 851 | this.TargetVariable = orig.TargetVariable;
|
---|
| 852 | this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
|
---|
| 853 | this.interpolant = (alglib.spline1dinterpolant)orig.interpolant.make_copy();
|
---|
| 854 | }
|
---|
| 855 | public Spline1dModel(alglib.spline1dinterpolant interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
|
---|
| 856 | this.interpolant = interpolant;
|
---|
| 857 | this.TargetVariable = targetVar;
|
---|
| 858 | this.VariablesUsedForPrediction = inputs;
|
---|
| 859 | }
|
---|
| 860 |
|
---|
| 861 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
| 862 | return new Spline1dModel(this, cloner);
|
---|
| 863 | }
|
---|
| 864 |
|
---|
| 865 | public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
|
---|
| 866 | return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
|
---|
| 867 | }
|
---|
| 868 |
|
---|
| 869 | public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
|
---|
[15433] | 870 | var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray();
|
---|
[15442] | 871 | foreach (var factor in VariablesUsedForPrediction.Skip(1)) {
|
---|
[15433] | 872 | product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray();
|
---|
| 873 | }
|
---|
| 874 |
|
---|
| 875 | foreach (var x in product) {
|
---|
[15352] | 876 | yield return alglib.spline1dcalc(interpolant, x);
|
---|
| 877 | }
|
---|
| 878 | }
|
---|
| 879 | }
|
---|
[15362] | 880 |
|
---|
| 881 |
|
---|
| 882 | // UNFINISHED
|
---|
| 883 | public class MathNetSplineModel : NamedItem, IRegressionModel {
|
---|
| 884 | private IInterpolation interpolant;
|
---|
| 885 |
|
---|
| 886 | public string TargetVariable { get; set; }
|
---|
| 887 |
|
---|
| 888 | public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
|
---|
| 889 |
|
---|
| 890 | public event EventHandler TargetVariableChanged;
|
---|
| 891 |
|
---|
| 892 | public MathNetSplineModel(MathNetSplineModel orig, Cloner cloner) : base(orig, cloner) {
|
---|
| 893 | this.TargetVariable = orig.TargetVariable;
|
---|
| 894 | this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
|
---|
| 895 | this.interpolant = orig.interpolant; // TODO COPY!
|
---|
| 896 | }
|
---|
| 897 | public MathNetSplineModel(IInterpolation interpolant, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") {
|
---|
| 898 | this.interpolant = interpolant;
|
---|
| 899 | this.TargetVariable = targetVar;
|
---|
| 900 | this.VariablesUsedForPrediction = inputs;
|
---|
| 901 | }
|
---|
| 902 |
|
---|
| 903 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
| 904 | return new MathNetSplineModel(this, cloner);
|
---|
| 905 | }
|
---|
| 906 |
|
---|
| 907 | public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
|
---|
| 908 | return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
|
---|
| 909 | }
|
---|
| 910 |
|
---|
| 911 | public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
|
---|
| 912 | foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
|
---|
| 913 | yield return interpolant.Interpolate(x);
|
---|
| 914 | }
|
---|
| 915 | }
|
---|
| 916 | }
|
---|
[15352] | 917 | }
|
---|