[15352] | 1 | #region License Information
|
---|
| 2 | /* HeuristicLab
|
---|
| 3 | * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
| 4 | *
|
---|
| 5 | * This file is part of HeuristicLab.
|
---|
| 6 | *
|
---|
| 7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
| 8 | * it under the terms of the GNU General Public License as published by
|
---|
| 9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 10 | * (at your option) any later version.
|
---|
| 11 | *
|
---|
| 12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 15 | * GNU General Public License for more details.
|
---|
| 16 | *
|
---|
| 17 | * You should have received a copy of the GNU General Public License
|
---|
| 18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | */
|
---|
| 20 | #endregion
|
---|
| 21 |
|
---|
| 22 | using System;
|
---|
| 23 | using System.Collections.Generic;
|
---|
[15450] | 24 | using System.Diagnostics;
|
---|
[15352] | 25 | using System.Linq;
|
---|
| 26 | using System.Threading;
|
---|
| 27 | using HeuristicLab.Common;
|
---|
| 28 | using HeuristicLab.Core;
|
---|
| 29 | using HeuristicLab.Data;
|
---|
| 30 | using HeuristicLab.Optimization;
|
---|
| 31 | using HeuristicLab.Parameters;
|
---|
| 32 | using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
|
---|
| 33 | using HeuristicLab.Problems.DataAnalysis;
|
---|
[15365] | 34 | using HeuristicLab.Random;
|
---|
[15362] | 35 | using MathNet.Numerics.Interpolation;
|
---|
[15364] | 36 | using MathNet.Numerics.LinearAlgebra.Double;
|
---|
| 37 | using MathNet.Numerics.LinearAlgebra.Double.Solvers;
|
---|
[15352] | 38 |
|
---|
[15465] | 39 |
|
---|
| 40 | /*
|
---|
| 41 | * Experimenting with various spline implementations.
|
---|
| 42 | *
|
---|
| 43 | * There are many different variants of using splines.
|
---|
| 44 | * - Spline Interpolation
|
---|
| 45 | * - Natural Spline Interpolation
|
---|
| 46 | * - Smoothing Splines (Reinsch)
|
---|
| 47 | * - P-Splines
|
---|
| 48 | * - B-Splines
|
---|
| 49 | * - Regression Splines
|
---|
| 50 | * - Penalized Regression Splines
|
---|
| 51 | * -
|
---|
| 52 | *
|
---|
| 53 | *
|
---|
| 54 | * Generally, a spline is a piecewise function whereby each piece connects two knots.
|
---|
| 55 | * In almost all cases splines are univariate. There are extensions to more dimensions
|
---|
| 56 | * but this is much less convenient.
|
---|
| 57 | * A linear spline is a piecewise linear function. Usually the following constaints are enforced
|
---|
| 58 | * for each knot point k and the two piecewise functions fl and fr
|
---|
| 59 | * fl(k) = fr(k), fl(k)' = fr(k)', fl(k)''=0, fr(k)''=0, additionally constraints for
|
---|
| 60 | * the boundary knots are often enforced.
|
---|
| 61 |
|
---|
| 62 | * Spline Interpolation solves the problem of finding a smooth interpolating function
|
---|
| 63 | * which goes through all knots exactly.
|
---|
| 64 | *
|
---|
| 65 | * In spline smoothing one tries to find a function minimizing the squared residuals
|
---|
| 66 | * and maximizing smoothness. For the smoothing spline g the optimization objective is
|
---|
| 67 | * || y - g(x) ||² + \lambda integrate (xmin : xmax) g(x)'' dx.
|
---|
| 68 | * The roughness penality is the integral over the second derivative of the smoothing spline
|
---|
| 69 | * over the whole domain of x. Lambda is used to balance the amount of smoothing.
|
---|
| 70 | * If lambda is set to zero, the solution is an interpolating spline. If lambda is set to
|
---|
| 71 | * infinity the solution is a maximally smooth function (= line).
|
---|
| 72 | * Interestingly, the estimation of a smoothing spline is a linear operation. As a consequence
|
---|
| 73 | * a lot of theory from linear models can be reused (e.g. cross-validation and generalized
|
---|
| 74 | * cross-validation) with minimal additional effort. GCV can also be used to tune the
|
---|
| 75 | * smoothing parameter automatically. Tuned algorithms are available to solve smoothing
|
---|
| 76 | * spline estimation efficiently O(n) or O(m²n) where m is the number of knots and n the
|
---|
| 77 | * number of data points.
|
---|
| 78 | * It is possible to calculate confidence intervals for the smoothing spline and given
|
---|
| 79 | * an error model subsequently also confidence intervals for predictions.
|
---|
| 80 | *
|
---|
| 81 | * We are primarily interested in spline smoothing / regression.
|
---|
| 82 | * Alglib and Math.NET provide several different implementations for interpolation
|
---|
| 83 | * including polynomial (cubic), Hermite, Akima, Linear, Catmull-Rom, Rational, ...
|
---|
| 84 | *
|
---|
| 85 | * For spline smoothing or regression there are also many differen variants.
|
---|
| 86 | *
|
---|
| 87 | * Smoothing Splines by Reinsch (Reinsch, Smoothing by Spline Functions, 1967).
|
---|
| 88 | * Pseudo-code for the algorithm is given in the paper. A small change in the algorithm
|
---|
| 89 | * to improve convergence is discussed in (Reinsch, Smoothing by Spline Functions II, 1970)
|
---|
| 90 | * The algorithm uses the properties of the smoothing matrix (bandedness) to calculate the
|
---|
| 91 | * smoothing spline in O(n). Two parameters have to be set (S and rho). If the noise in the
|
---|
| 92 | * data is known then both parameter can be set directly, otherwise S should be set to n-1
|
---|
| 93 | * and rho should be found via cross-validation.
|
---|
| 94 | * It is not easy to calculate a the CV (PRESS) or GCV in the approach by Reinsch and
|
---|
| 95 | * therefore the algorithm is not ideal.
|
---|
| 96 | * Wahba and colleagues (1990) introduced CV and GCV estimation.
|
---|
| 97 | * Hutchinson and colleages improved the algorithm (Fortran implementation available as
|
---|
| 98 | * algorithm 642.f from ACM).
|
---|
| 99 | *
|
---|
| 100 | * CUBGCV (Algorithm 642 Hutchingson)
|
---|
| 101 | * Several improvments over the Reinsch algorithm. In particular efficient calculation
|
---|
| 102 | * of the
|
---|
| 103 |
|
---|
| 104 | * Finbarr O'Sullivan BART
|
---|
| 105 | * ...
|
---|
| 106 | *
|
---|
| 107 | * B-Splines
|
---|
| 108 | * ...
|
---|
| 109 | *
|
---|
| 110 | * P-Splines (Eilers and Marx), use the B-Spline basis in combination with a penality
|
---|
| 111 | * for large variations of subsequent B-Spline coefficients. Conceptually, if the
|
---|
| 112 | * coefficients of neighbouring B-Splines are almost the same then the resulting smoothing
|
---|
| 113 | * spline is less wiggly. It has been critizized that this is rather subjective.
|
---|
| 114 | * P-Splines also allow calculation of CV and GCV for a given smoothing parameter which
|
---|
| 115 | * can be used for optimization of lambda.
|
---|
| 116 | * It is easy to use P-Splines within GAMs (no backfitting necessary).
|
---|
| 117 | * The paper contains MATLAB code also for the calculation of the B-Spline basis.
|
---|
| 118 | * The computational effort is O(?). The authors argue that it is efficient.
|
---|
| 119 | * The number of knots is not critical it is possible to use every data point as knot.
|
---|
| 120 | *
|
---|
| 121 | * Alglib: penalized regression spline
|
---|
| 122 | * ...
|
---|
| 123 | *
|
---|
| 124 | * Alglib: penalized regression spline with support for weights
|
---|
| 125 | * ...
|
---|
| 126 | */
|
---|
| 127 |
|
---|
[15352] | 128 | namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
|
---|
| 129 | [Item("Splines", "")]
|
---|
| 130 | [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 102)]
|
---|
| 131 | [StorableClass]
|
---|
| 132 | public sealed class Splines : FixedDataAnalysisAlgorithm<IRegressionProblem> {
|
---|
| 133 | [StorableConstructor]
|
---|
| 134 | private Splines(bool deserializing) : base(deserializing) { }
|
---|
| 135 | [StorableHook(HookType.AfterDeserialization)]
|
---|
| 136 | private void AfterDeserialization() {
|
---|
| 137 | }
|
---|
| 138 |
|
---|
| 139 | private Splines(Splines original, Cloner cloner)
|
---|
| 140 | : base(original, cloner) {
|
---|
| 141 | }
|
---|
| 142 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
| 143 | return new Splines(this, cloner);
|
---|
| 144 | }
|
---|
| 145 |
|
---|
| 146 | public Splines()
|
---|
| 147 | : base() {
|
---|
| 148 | var validTypes = new ItemSet<StringValue>(
|
---|
| 149 | new[] {
|
---|
[15364] | 150 | "Smoothing Spline (Reinsch)",
|
---|
[15442] | 151 | "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)",
|
---|
[15369] | 152 | "B-Spline Smoothing",
|
---|
[15449] | 153 | "Penalized Regression Spline (alglib)",
|
---|
[15457] | 154 | "CUBGCV",
|
---|
| 155 | "Finnbar O'Sullivan (sbart)"
|
---|
[15352] | 156 | }.Select(s => new StringValue(s)));
|
---|
| 157 |
|
---|
[15449] | 158 | Parameters.Add(new ConstrainedValueParameter<StringValue>("Type", "The type of spline (as supported by alglib)", validTypes, validTypes.Last()));
|
---|
[15364] | 159 | Parameters.Add(new ValueParameter<DoubleValue>("Lambda", "Regularization parameter for smoothing splines (0..+inf)", new DoubleValue(100)));
|
---|
[15365] | 160 | Parameters.Add(new ValueParameter<DoubleValue>("StdDev (noise)", "Known error in y values. Used only be Reinsch Smoothing Splines", new DoubleValue(0.1)));
|
---|
[15352] | 161 | Problem = new RegressionProblem();
|
---|
| 162 | }
|
---|
| 163 |
|
---|
| 164 |
|
---|
| 165 | protected override void Run(CancellationToken cancellationToken) {
|
---|
| 166 | double[,] inputMatrix = Problem.ProblemData.Dataset.ToArray(Problem.ProblemData.AllowedInputVariables.Concat(new string[] { Problem.ProblemData.TargetVariable }), Problem.ProblemData.TrainingIndices);
|
---|
| 167 | if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
|
---|
| 168 | throw new NotSupportedException("Splines does not support NaN or infinity values in the input dataset.");
|
---|
| 169 |
|
---|
| 170 | var inputVars = Problem.ProblemData.AllowedInputVariables.ToArray();
|
---|
| 171 | if (inputVars.Length > 3) throw new ArgumentException();
|
---|
| 172 |
|
---|
| 173 | var y = Problem.ProblemData.TargetVariableTrainingValues.ToArray();
|
---|
| 174 | if (inputVars.Length == 1) {
|
---|
| 175 |
|
---|
| 176 | var x = Problem.ProblemData.Dataset.GetDoubleValues(inputVars.First(), Problem.ProblemData.TrainingIndices).ToArray();
|
---|
| 177 | alglib.spline1dinterpolant c;
|
---|
| 178 | var type = ((StringValue)Parameters["Type"].ActualValue).Value;
|
---|
| 179 | switch (type) {
|
---|
[15364] | 180 | case "Smoothing Spline (Reinsch)": {
|
---|
| 181 | CalculateSmoothingSplineReinsch(x, y, inputVars);
|
---|
| 182 | break;
|
---|
| 183 | }
|
---|
[15442] | 184 | case "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)": {
|
---|
| 185 | double optTol, looRMSE;
|
---|
| 186 | var model = CalculateSmoothingSplineReinsch(x, y, inputVars, Problem.ProblemData.TargetVariable, out optTol, out looRMSE);
|
---|
| 187 | Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone())));
|
---|
| 188 | Results.Add(new Result("Optimal tolerance", new DoubleValue(optTol)));
|
---|
| 189 | Results.Add(new Result("RMSE (LOO-CV)", new DoubleValue(looRMSE)));
|
---|
[15365] | 190 | break;
|
---|
| 191 | }
|
---|
[15449] | 192 | case "CUBGCV": {
|
---|
| 193 | CubicSplineGCV.CubGcvReport report;
|
---|
[15450] | 194 | var model =
|
---|
| 195 | CubicSplineGCV.CalculateCubicSpline(x, y,
|
---|
[15449] | 196 | Problem.ProblemData.TargetVariable, inputVars, out report);
|
---|
| 197 | var targetVar = Problem.ProblemData.TargetVariable;
|
---|
| 198 | var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
|
---|
| 199 | Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
|
---|
| 200 | Results.Add(new Result("GCV", new DoubleValue(report.generalizedCrossValidation)));
|
---|
| 201 | Results.Add(new Result("Estimated error variance", new DoubleValue(report.estimatedErrorVariance)));
|
---|
| 202 | Results.Add(new Result("Estimated RSS degrees of freedom", new DoubleValue(report.estimatedRSSDegreesOfFreedom)));
|
---|
[15450] | 203 | Results.Add(new Result("Estimated true mean squared error at data points", new DoubleValue(report.estimatedTrueMeanSquaredErrorAtDataPoints)));
|
---|
[15449] | 204 | Results.Add(new Result("Mean square of DF", new DoubleValue(report.meanSquareOfDf)));
|
---|
| 205 | Results.Add(new Result("Mean square residual", new DoubleValue(report.meanSquareResiudal)));
|
---|
| 206 | Results.Add(new Result("Smoothing parameter (optimized for GCV)", new DoubleValue(report.smoothingParameter)));
|
---|
| 207 | break;
|
---|
| 208 | }
|
---|
[15457] | 209 | case "Finnbar O'Sullivan (sbart)": {
|
---|
| 210 | SBART.SBART_Report report;
|
---|
[15468] | 211 |
|
---|
[15457] | 212 | var model =
|
---|
| 213 | SBART.CalculateSBART(x, y,
|
---|
[15468] | 214 | Problem.ProblemData.TargetVariable, inputVars, out report);
|
---|
[15457] | 215 | var targetVar = Problem.ProblemData.TargetVariable;
|
---|
| 216 | var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
|
---|
| 217 | Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
|
---|
| 218 | Results.Add(new Result("GCV", new DoubleValue(report.gcv)));
|
---|
| 219 | Results.Add(new Result("Smoothing parameter (optimized for GCV)", new DoubleValue(report.smoothingParameter)));
|
---|
| 220 | break;
|
---|
| 221 |
|
---|
| 222 | }
|
---|
[15442] | 223 | case "Penalized Regression Spline (alglib)": {
|
---|
[15369] | 224 | var lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value; // controls extent of smoothing
|
---|
| 225 | var model = CalculatePenalizedRegressionSpline(x, y, lambda, Problem.ProblemData.TargetVariable, inputVars);
|
---|
| 226 | var targetVar = Problem.ProblemData.TargetVariable;
|
---|
| 227 | var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
|
---|
| 228 | Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
|
---|
| 229 |
|
---|
| 230 | break;
|
---|
[15442] | 231 | }
|
---|
[15352] | 232 | default: throw new NotSupportedException();
|
---|
| 233 | }
|
---|
| 234 |
|
---|
| 235 | }
|
---|
| 236 | }
|
---|
[15362] | 237 |
|
---|
[15369] | 238 |
|
---|
[15442] | 239 | public static IRegressionModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars) {
|
---|
[15369] | 240 | int info;
|
---|
| 241 | alglib.spline1dinterpolant interpolant;
|
---|
| 242 | alglib.spline1dfitreport rep;
|
---|
| 243 | int n = x.Length;
|
---|
| 244 | n = (int)Math.Max(50, 3 * Math.Sqrt(n));
|
---|
| 245 |
|
---|
| 246 | alglib.spline1dfitpenalized(x, y, n, lambda, out info, out interpolant, out rep);
|
---|
| 247 | return new Spline1dModel(interpolant, targetVar, inputVars);
|
---|
| 248 | }
|
---|
| 249 |
|
---|
[15442] | 250 | public static IRegressionEnsembleModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars,
|
---|
| 251 | out double avgTrainRMSE,
|
---|
[15457] | 252 | out double cvRMSE, out double[] predictions) {
|
---|
[15442] | 253 | int info;
|
---|
| 254 | alglib.spline1dinterpolant interpolant;
|
---|
| 255 | alglib.spline1dfitreport rep;
|
---|
| 256 | int n = x.Length;
|
---|
| 257 | n = (int)Math.Max(50, 3 * Math.Sqrt(n));
|
---|
[15364] | 258 |
|
---|
[15450] | 259 | int folds = 10;
|
---|
| 260 | int foldSize = x.Length <= 30 ? 1 : x.Length / folds;
|
---|
| 261 | if (foldSize == 1) folds = x.Length;
|
---|
[15365] | 262 |
|
---|
[15450] | 263 | double[] x_loo = new double[(folds - 1) * foldSize];
|
---|
| 264 | double[] y_loo = new double[(folds - 1) * foldSize];
|
---|
[15457] | 265 | double[] w_loo = Enumerable.Repeat(1.0, x_loo.Length).ToArray();
|
---|
[15450] | 266 |
|
---|
[15442] | 267 | var models = new List<IRegressionModel>();
|
---|
[15450] | 268 | var sumTrainSE = 0.0;
|
---|
| 269 | int nTrain = 0;
|
---|
| 270 | var sumTestSE = 0.0;
|
---|
| 271 | int nTest = 0;
|
---|
[15365] | 272 |
|
---|
[15457] | 273 | predictions = new double[y.Length];
|
---|
[15450] | 274 |
|
---|
| 275 | for (int i = 0; i < folds; i++) {
|
---|
| 276 | if (i < folds - 1) {
|
---|
| 277 | //Console.WriteLine("Copy from {0} to {1} n = {2}", (i + 1) * foldSize, i * foldSize, (folds - i - 1) * foldSize);
|
---|
| 278 | Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, (folds - i - 1) * foldSize);
|
---|
| 279 | Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, (folds - i - 1) * foldSize);
|
---|
| 280 | }
|
---|
[15457] | 281 | alglib.spline1dfitpenalizedw(x_loo, y_loo, w_loo, n, lambda, out info, out interpolant, out rep);
|
---|
| 282 | //Debug.Assert(y_loo.All(yi => yi != 0.0));
|
---|
| 283 | //Debug.Assert(x_loo.All(xi => xi != 0.0));
|
---|
[15365] | 284 |
|
---|
[15450] | 285 | // training error
|
---|
| 286 | for (int j = 0; j < x_loo.Length; j++) {
|
---|
[15457] | 287 | var y_pred = alglib.spline1dcalc(interpolant, x[j]);
|
---|
[15450] | 288 | double res = y[j] - y_pred;
|
---|
| 289 | sumTrainSE += res * res;
|
---|
| 290 | nTrain++;
|
---|
| 291 | }
|
---|
| 292 | // test
|
---|
| 293 | for (int j = i * foldSize; j < (i + 1) * foldSize; j++) {
|
---|
[15457] | 294 | predictions[j] = alglib.spline1dcalc(interpolant, x[j]);
|
---|
| 295 | double res = y[j] - predictions[j];
|
---|
[15450] | 296 | sumTestSE += res * res;
|
---|
| 297 | nTest++;
|
---|
| 298 | }
|
---|
| 299 |
|
---|
| 300 | models.Add(new Spline1dModel(interpolant, targetVar, inputVars));
|
---|
| 301 |
|
---|
| 302 | if (i < folds - 1) {
|
---|
| 303 | //Console.WriteLine("Copy from {0} to {1} n = {2}", i * foldSize, i * foldSize, foldSize);
|
---|
| 304 | // add current fold for next iteration
|
---|
| 305 | Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize);
|
---|
| 306 | Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize);
|
---|
| 307 | }
|
---|
[15442] | 308 | }
|
---|
| 309 |
|
---|
[15450] | 310 | cvRMSE = Math.Sqrt(sumTestSE / nTest);
|
---|
| 311 | avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain);
|
---|
[15442] | 312 |
|
---|
| 313 | return new RegressionEnsembleModel(models);
|
---|
[15365] | 314 | }
|
---|
| 315 |
|
---|
| 316 | private struct SplineData {
|
---|
| 317 | public double a, b, c, d, x, y;
|
---|
| 318 | };
|
---|
| 319 |
|
---|
| 320 |
|
---|
| 321 | /*
|
---|
| 322 | * The procedure Quincunx, takes as arguments
|
---|
| 323 | the vectors u, v and w which are respectively the diagonal, the first supradiagonal
|
---|
| 324 | and the second supradiagonal of the banded matrix. The vector on
|
---|
| 325 | the LHS of the equation (80) is placed in q which contains the solution on the
|
---|
| 326 | completion of the procedure.
|
---|
| 327 | */
|
---|
| 328 | private void Quincunx(int n, MyArray<double> u, MyArray<double> v, MyArray<double> w, MyArray<double> q) {
|
---|
| 329 | // { factorisation}
|
---|
| 330 | u[-1] = 0;
|
---|
| 331 | u[0] = 0;
|
---|
| 332 | for (int j = 1; j <= n - 1; j++) {
|
---|
| 333 | u[j] = u[j] - u[j - 2] * Math.Pow(w[j - 2], 2) - u[j - 1] * Math.Pow(v[j - 1], 2);
|
---|
| 334 | v[j] = (v[j] - u[j - 1] * v[j - 1] * w[j - 1]) / u[j];
|
---|
| 335 | w[j] = w[j] / u[j];
|
---|
[15364] | 336 | }
|
---|
[15365] | 337 | // { forward substitution}
|
---|
| 338 | for (int j = 1; j <= n - 1; j++) {
|
---|
| 339 | q[j] = q[j] - v[j - 1] * q[j - 1] - w[j - 2] * q[j - 2];
|
---|
| 340 | }
|
---|
| 341 | for (int j = 1; j < n - 1; j++) {
|
---|
| 342 | q[j] = q[j] / u[j];
|
---|
| 343 | }
|
---|
| 344 | // { back substitution}
|
---|
| 345 | q[n + 1] = 0;
|
---|
| 346 | q[n] = 0;
|
---|
| 347 | for (int j = n - 1; j >= 1; j--) {
|
---|
| 348 | q[j] = q[j] - v[j] * q[j + 1] - w[j] * q[j + 2];
|
---|
| 349 | }
|
---|
| 350 | }
|
---|
[15364] | 351 |
|
---|
[15442] | 352 | public void CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars) {
|
---|
| 353 | double s = x.Length + 1;
|
---|
[15365] | 354 | double stdTol = ((IValueParameter<DoubleValue>)Parameters["StdDev (noise)"]).Value.Value; // controls extent of smoothing
|
---|
[15364] | 355 |
|
---|
[15442] | 356 | var model = CalculateSmoothingSplineReinsch(x, y, inputVars, stdTol, s, Problem.ProblemData.TargetVariable);
|
---|
[15364] | 357 |
|
---|
[15442] | 358 | Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone())));
|
---|
| 359 | }
|
---|
[15364] | 360 |
|
---|
| 361 |
|
---|
[15442] | 362 | public static IRegressionEnsembleModel CalculateSmoothingSplineReinsch(double[] x, double[] y, double tol, string targetVar, string[] inputVars,
|
---|
| 363 | out double avgTrainRMSE,
|
---|
[15450] | 364 | out double cvRMSE) {
|
---|
[15442] | 365 | int info;
|
---|
[15364] | 366 |
|
---|
[15450] | 367 | int folds = 10;
|
---|
| 368 | int foldSize = x.Length <= 30 ? 1 : x.Length / folds;
|
---|
| 369 | if (foldSize == 1) folds = x.Length;
|
---|
[15365] | 370 |
|
---|
[15450] | 371 | double[] x_loo = new double[(folds - 1) * foldSize];
|
---|
| 372 | double[] y_loo = new double[(folds - 1) * foldSize];
|
---|
| 373 |
|
---|
[15442] | 374 | var models = new List<IRegressionModel>();
|
---|
[15450] | 375 | var sumTrainSE = 0.0;
|
---|
| 376 | int nTrain = 0;
|
---|
| 377 | var sumTestSE = 0.0;
|
---|
| 378 | int nTest = 0;
|
---|
[15365] | 379 |
|
---|
| 380 |
|
---|
[15450] | 381 | for (int i = 0; i < folds; i++) {
|
---|
| 382 | if (i < folds - 1) {
|
---|
| 383 | Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, x.Length - ((i + 1) * foldSize) - 1);
|
---|
| 384 | Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, y.Length - ((i + 1) * foldSize) - 1);
|
---|
| 385 | }
|
---|
| 386 | var model = CalculateSmoothingSplineReinsch(x_loo, y_loo, inputVars, tol, ((folds - 1) * foldSize) - 1, targetVar);
|
---|
[15365] | 387 |
|
---|
[15450] | 388 | // training error
|
---|
| 389 | for(int j = 0;j<x_loo.Length;j++) {
|
---|
| 390 | var y_pred = model.GetEstimatedValue(x[j]);
|
---|
| 391 | double res = y[j] - y_pred;
|
---|
| 392 | sumTrainSE += res * res;
|
---|
| 393 | nTrain++;
|
---|
| 394 | }
|
---|
| 395 | // test
|
---|
| 396 | for (int j = i * foldSize; j < (i + 1) * foldSize; j++) {
|
---|
| 397 | var y_pred = model.GetEstimatedValue(x[j]);
|
---|
| 398 | double res = y[j] - y_pred;
|
---|
| 399 | sumTestSE += res * res;
|
---|
| 400 | nTest++;
|
---|
| 401 | }
|
---|
[15365] | 402 |
|
---|
[15442] | 403 | models.Add(model);
|
---|
[15365] | 404 |
|
---|
[15450] | 405 | if (i < folds - 1) {
|
---|
| 406 | // add current fold for next iteration
|
---|
| 407 | Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize);
|
---|
| 408 | Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize);
|
---|
[15442] | 409 | }
|
---|
| 410 | }
|
---|
| 411 |
|
---|
[15450] | 412 | cvRMSE = Math.Sqrt(sumTestSE / nTest);
|
---|
| 413 | avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain);
|
---|
[15442] | 414 |
|
---|
| 415 | return new RegressionEnsembleModel(models);
|
---|
[15365] | 416 | }
|
---|
| 417 |
|
---|
| 418 |
|
---|
[15468] | 419 | // automatically determines tolerance using cv, SLOW!
|
---|
[15442] | 420 | public static IRegressionModel CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars, string targetVar,
|
---|
[15450] | 421 | out double optTolerance, out double cvRMSE) {
|
---|
[15442] | 422 | var maxTolerance = y.StandardDeviation();
|
---|
| 423 | var minTolerance = maxTolerance * 1E-6;
|
---|
| 424 | optTolerance = maxTolerance;
|
---|
| 425 | var tol = maxTolerance;
|
---|
[15450] | 426 | cvRMSE = double.PositiveInfinity;
|
---|
[15442] | 427 | IRegressionModel bestModel = new ConstantModel(y.Average(), targetVar);
|
---|
| 428 | while (tol > minTolerance) {
|
---|
[15450] | 429 | double curCVRMSE;
|
---|
[15442] | 430 | double avgTrainRmse;
|
---|
| 431 | var model = Splines.CalculateSmoothingSplineReinsch(
|
---|
[15450] | 432 | x, y, tol, targetVar, inputVars, out avgTrainRmse, out curCVRMSE);
|
---|
[15442] | 433 |
|
---|
[15450] | 434 | if (curCVRMSE < cvRMSE) {
|
---|
| 435 | cvRMSE = curCVRMSE;
|
---|
[15442] | 436 | optTolerance = tol;
|
---|
| 437 | bestModel = model;
|
---|
| 438 | }
|
---|
| 439 | tol *= 0.85;
|
---|
| 440 | }
|
---|
| 441 | return bestModel;
|
---|
| 442 | }
|
---|
| 443 |
|
---|
| 444 | public static ReinschSmoothingSplineModel CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars, double stdTol, double s, string targetVar) {
|
---|
[15364] | 445 | var minX = xOrig.Min();
|
---|
| 446 | var maxX = xOrig.Max();
|
---|
| 447 | var range = maxX - minX;
|
---|
| 448 |
|
---|
[15365] | 449 | double[] w = Enumerable.Repeat(stdTol, xOrig.Length).ToArray();
|
---|
| 450 | SortAndBin((double[])xOrig.Clone(), (double[])yOrig.Clone(), w, out xOrig, out yOrig, out w, scaling: false);
|
---|
[15364] | 451 |
|
---|
| 452 | // See Smoothing by Spline Functions, Reinsch, 1967
|
---|
| 453 | // move x and y into an array indexed with 1..n to match indexing in Reinsch paper
|
---|
| 454 | int n = xOrig.Length;
|
---|
| 455 | var x = new MyArray<double>(1, xOrig);
|
---|
| 456 | var y = new MyArray<double>(1, yOrig);
|
---|
[15365] | 457 | var inv_dy = new MyArray<double>(1, w);
|
---|
[15364] | 458 |
|
---|
| 459 | int n1 = 1;
|
---|
[15365] | 460 | int n2 = n;
|
---|
[15364] | 461 |
|
---|
| 462 | // results
|
---|
| 463 | var a = new MyArray<double>(n1, n);
|
---|
| 464 | var b = new MyArray<double>(n1, n);
|
---|
| 465 | var c = new MyArray<double>(n1, n);
|
---|
| 466 | var d = new MyArray<double>(n1, n);
|
---|
| 467 |
|
---|
| 468 | // smooth
|
---|
| 469 | {
|
---|
| 470 | int i, m1, m2; double e, f, f2, g = 0.0, h, p;
|
---|
| 471 | MyArray<double>
|
---|
| 472 | r = new MyArray<double>(n1 - 1, n + 2),
|
---|
| 473 | r1 = new MyArray<double>(n1 - 1, n + 2),
|
---|
| 474 | r2 = new MyArray<double>(n1 - 1, n + 2),
|
---|
| 475 | t = new MyArray<double>(n1 - 1, n + 2),
|
---|
| 476 | t1 = new MyArray<double>(n1 - 1, n + 2),
|
---|
| 477 | u = new MyArray<double>(n1 - 1, n + 2),
|
---|
| 478 | v = new MyArray<double>(n1 - 1, n + 2);
|
---|
| 479 | m1 = n1 - 1;
|
---|
| 480 | m2 = n2 + 1;
|
---|
| 481 | r[m1] = r[n1] = r1[n2] = r2[n2] = r2[m2] =
|
---|
| 482 | u[m1] = u[n1] = u[n2] = u[m2] = p = 0.0;
|
---|
| 483 | m1 = n1 + 1;
|
---|
| 484 | m2 = n2 - 1;
|
---|
| 485 | h = x[m1] - x[n1]; f = (y[m1] - y[n1]) / h;
|
---|
| 486 | for (i = m1; i <= m2; i++) {
|
---|
| 487 | g = h; h = x[i + 1] - x[i];
|
---|
| 488 | e = f; f = (y[i + 1] - y[i]) / h;
|
---|
| 489 | a[i] = f - e; t[i] = 2 * (g + h) / 3; t1[i] = h / 3;
|
---|
| 490 | r2[i] = inv_dy[i - 1] / g; r[i] = inv_dy[i + 1] / h;
|
---|
| 491 | r1[i] = -inv_dy[i] / g - inv_dy[i] / h;
|
---|
| 492 | }
|
---|
| 493 | for (i = m1; i <= m2; i++) {
|
---|
| 494 | b[i] = r[i] * r[i] + r1[i] * r1[i] + r2[i] * r2[i];
|
---|
| 495 | c[i] = r[i] * r1[i + 1] + r1[i] * r2[i + 1];
|
---|
| 496 | d[i] = r[i] * r2[i + 2];
|
---|
| 497 | }
|
---|
| 498 | f2 = -s;
|
---|
| 499 | next:
|
---|
| 500 | for (i = m1; i <= m2; i++) {
|
---|
| 501 | r1[i - 1] = f * r[i - 1]; r2[i - 2] = g * r[i - 2];
|
---|
| 502 | r[i] = 1 / (p * b[i] + t[i] - f * r1[i - 1] - g * r2[i - 2]);
|
---|
| 503 | u[i] = a[i] - r1[i - 1] * u[i - 1] - r2[i - 2] * u[i - 2];
|
---|
| 504 | f = p * c[i] + t1[i] - h * r1[i - 1]; g = h; h = d[i] * p;
|
---|
| 505 | }
|
---|
| 506 | for (i = m2; i >= m1; i--) {
|
---|
| 507 | u[i] = r[i] * u[i] - r1[i] * u[i + 1] - r2[i] * u[i + 2];
|
---|
| 508 | }
|
---|
| 509 | e = h = 0;
|
---|
| 510 | for (i = n1; i <= m2; i++) {
|
---|
| 511 | g = h; h = (u[i + 1] - u[i]) / (x[i + 1] - x[i]);
|
---|
| 512 | v[i] = (h - g) * inv_dy[i] * inv_dy[i]; e = e + v[i] * (h - g);
|
---|
| 513 | }
|
---|
| 514 | g = v[n2] = -h * inv_dy[n2] * inv_dy[n2]; e = e - g * h;
|
---|
| 515 | g = f2; f2 = e * p * p;
|
---|
| 516 | if (f2 >= s || f2 <= g) goto fin;
|
---|
| 517 | f = 0; h = (v[m1] - v[n1]) / (x[m1] - x[n1]);
|
---|
| 518 | for (i = m1; i <= m2; i++) {
|
---|
| 519 | g = h; h = (v[i + 1] - v[i]) / (x[i + 1] - x[i]);
|
---|
| 520 | g = h - g - r1[i - 1] * r[i - 1] - r2[i - 2] * r[i - 2];
|
---|
| 521 | f = f + g * r[i] * g; r[i] = g;
|
---|
| 522 | }
|
---|
| 523 | h = e - p * f; if (h <= 0) goto fin;
|
---|
| 524 | p = p + (s - f2) / ((Math.Sqrt(s / e) + p) * h); goto next;
|
---|
| 525 |
|
---|
| 526 | fin:
|
---|
| 527 | for (i = n1; i <= n2; i++) {
|
---|
| 528 | a[i] = y[i] - p * v[i];
|
---|
| 529 | c[i] = u[i];
|
---|
| 530 | }
|
---|
| 531 | for (i = n1; i <= m2; i++) {
|
---|
| 532 | h = x[i + 1] - x[i];
|
---|
| 533 | d[i] = (c[i + 1] - c[i]) / (3 * h);
|
---|
| 534 | b[i] = (a[i + 1] - a[i]) / h - (h * d[i] + c[i]) * h;
|
---|
| 535 | }
|
---|
| 536 | }
|
---|
| 537 |
|
---|
[15449] | 538 | // extrapolate for xx > x[n2]
|
---|
| 539 | b[b.Length] = b[b.Length - 1];
|
---|
| 540 | d[1] = 0;
|
---|
| 541 |
|
---|
[15365] | 542 | return new ReinschSmoothingSplineModel(a, b, c, d, x, targetVar, inputVars);
|
---|
[15364] | 543 | }
|
---|
| 544 |
|
---|
[15449] | 545 | public static void SortAndBin(double[] x, double[] y, double[] w, out double[] x2, out double[] y2, out double[] w2, bool scaling = false) {
|
---|
[15365] | 546 | var sortedIdx = Enumerable.Range(0, x.Length).ToArray();
|
---|
| 547 | // sort by x
|
---|
| 548 | Array.Sort(x, sortedIdx);
|
---|
[15364] | 549 |
|
---|
| 550 | var xl = new List<double>();
|
---|
| 551 | var yl = new List<double>();
|
---|
[15365] | 552 | var wl = new List<double>();
|
---|
[15364] | 553 |
|
---|
| 554 | int n = x.Length;
|
---|
| 555 | var range = x[n - 1] - x[0];
|
---|
[15457] | 556 | var binSize = range / n / 10;
|
---|
[15364] | 557 | {
|
---|
| 558 | // binning
|
---|
| 559 | int i = 0;
|
---|
| 560 | while (i < n) {
|
---|
| 561 | int k = 0;
|
---|
| 562 | int j = i;
|
---|
| 563 | double sumX = 0.0;
|
---|
| 564 | double sumY = 0.0;
|
---|
[15365] | 565 | double sumW = 0.0;
|
---|
[15364] | 566 | while (j < n && x[j] - x[i] <= binSize) {
|
---|
| 567 | k++;
|
---|
| 568 | sumX += x[j];
|
---|
[15365] | 569 | sumY += y[sortedIdx[j]];
|
---|
| 570 | sumW += w[sortedIdx[j]];
|
---|
[15364] | 571 | j++;
|
---|
| 572 | }
|
---|
| 573 | var avgX = sumX / k;
|
---|
| 574 | if (scaling) avgX = (avgX - x[0]) / range;
|
---|
[15365] | 575 | xl.Add(avgX);
|
---|
[15364] | 576 | yl.Add(sumY / k);
|
---|
[15365] | 577 | wl.Add(sumW);
|
---|
[15364] | 578 | i += k;
|
---|
| 579 | }
|
---|
| 580 | }
|
---|
| 581 |
|
---|
| 582 | x2 = xl.ToArray();
|
---|
| 583 | y2 = yl.ToArray();
|
---|
[15365] | 584 | w2 = wl.ToArray();
|
---|
[15364] | 585 | }
|
---|
| 586 | }
|
---|
[15362] | 587 |
|
---|
[15364] | 588 |
|
---|
[15365] | 589 | // array with non-zero lower index
|
---|
| 590 | internal class MyArray<T> {
|
---|
| 591 | private T[] arr;
|
---|
| 592 | private int lowerBound;
|
---|
[15364] | 593 |
|
---|
[15365] | 594 | public int Length { get { return arr.Length; } }
|
---|
| 595 |
|
---|
| 596 | public T this[int key] {
|
---|
| 597 | get {
|
---|
| 598 | return arr[key - lowerBound];
|
---|
| 599 | }
|
---|
| 600 | set {
|
---|
| 601 | arr[key - lowerBound] = value;
|
---|
| 602 | }
|
---|
| 603 | }
|
---|
| 604 |
|
---|
| 605 | public MyArray(int lowerBound, int numElements) {
|
---|
| 606 | this.lowerBound = lowerBound;
|
---|
| 607 | arr = new T[numElements];
|
---|
| 608 | }
|
---|
| 609 | public MyArray(int lowerBound, T[] source) : this(lowerBound, source.Length) {
|
---|
| 610 | Array.Copy(source, arr, source.Length);
|
---|
| 611 | }
|
---|
| 612 |
|
---|
| 613 | public T[] ToArray() {
|
---|
| 614 | var res = new T[arr.Length];
|
---|
| 615 | Array.Copy(arr, res, res.Length);
|
---|
| 616 | return res;
|
---|
| 617 | }
|
---|
| 618 | }
|
---|
| 619 |
|
---|
| 620 |
|
---|
[15364] | 621 | // UNFINISHED
|
---|
[15442] | 622 | public class ReinschSmoothingSplineModel : NamedItem, IRegressionModel {
|
---|
[15365] | 623 | private MyArray<double> a;
|
---|
| 624 | private MyArray<double> b;
|
---|
| 625 | private MyArray<double> c;
|
---|
| 626 | private MyArray<double> d;
|
---|
| 627 | private MyArray<double> x;
|
---|
[15364] | 628 | private double offset;
|
---|
| 629 | private double scale;
|
---|
| 630 |
|
---|
| 631 | public string TargetVariable { get; set; }
|
---|
| 632 |
|
---|
| 633 | public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
|
---|
| 634 |
|
---|
| 635 | public event EventHandler TargetVariableChanged;
|
---|
| 636 |
|
---|
| 637 | public ReinschSmoothingSplineModel(ReinschSmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) {
|
---|
| 638 | this.TargetVariable = orig.TargetVariable;
|
---|
| 639 | this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
|
---|
| 640 | this.a = orig.a;
|
---|
| 641 | this.b = orig.b;
|
---|
| 642 | this.c = orig.c;
|
---|
| 643 | this.d = orig.d;
|
---|
| 644 | this.x = orig.x;
|
---|
| 645 | this.scale = orig.scale;
|
---|
| 646 | this.offset = orig.offset;
|
---|
[15362] | 647 | }
|
---|
[15442] | 648 | internal ReinschSmoothingSplineModel(
|
---|
[15365] | 649 | MyArray<double> a,
|
---|
| 650 | MyArray<double> b,
|
---|
| 651 | MyArray<double> c,
|
---|
| 652 | MyArray<double> d,
|
---|
| 653 | MyArray<double> x,
|
---|
| 654 | string targetVar, string[] inputs, double offset = 0, double scale = 1) : base("SplineModel", "SplineModel") {
|
---|
[15364] | 655 | this.a = a;
|
---|
| 656 | this.b = b;
|
---|
| 657 | this.c = c;
|
---|
| 658 | this.d = d;
|
---|
| 659 | this.x = x;
|
---|
| 660 | this.TargetVariable = targetVar;
|
---|
| 661 | this.VariablesUsedForPrediction = inputs;
|
---|
| 662 | this.scale = scale;
|
---|
| 663 | this.offset = offset;
|
---|
| 664 | }
|
---|
| 665 |
|
---|
| 666 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
| 667 | return new ReinschSmoothingSplineModel(this, cloner);
|
---|
| 668 | }
|
---|
| 669 |
|
---|
| 670 | public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
|
---|
| 671 | return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
|
---|
| 672 | }
|
---|
| 673 |
|
---|
[15442] | 674 | public double GetEstimatedValue(double xx) {
|
---|
[15457] | 675 | xx = (xx - offset) * scale;
|
---|
[15449] | 676 | int n = a.Length;
|
---|
[15442] | 677 | if (xx <= x[1]) {
|
---|
| 678 | double h = xx - x[1];
|
---|
| 679 | return a[1] + h * (b[1] + h * (c[1] + h * d[1]));
|
---|
| 680 | } else if (xx >= x[n]) {
|
---|
| 681 | double h = xx - x[n];
|
---|
| 682 | return a[n] + h * (b[n] + h * (c[n] + h * d[n]));
|
---|
| 683 | } else {
|
---|
| 684 | // binary search
|
---|
| 685 | int lower = 1;
|
---|
| 686 | int upper = n;
|
---|
| 687 | while (true) {
|
---|
| 688 | if (upper < lower) throw new InvalidProgramException();
|
---|
| 689 | int i = lower + (upper - lower) / 2;
|
---|
| 690 | if (x[i] <= xx && xx < x[i + 1]) {
|
---|
| 691 | double h = xx - x[i];
|
---|
| 692 | return a[i] + h * (b[i] + h * (c[i] + h * d[i]));
|
---|
| 693 | } else if (xx < x[i]) {
|
---|
| 694 | upper = i - 1;
|
---|
| 695 | } else {
|
---|
| 696 | lower = i + 1;
|
---|
[15364] | 697 | }
|
---|
| 698 | }
|
---|
| 699 | }
|
---|
| 700 | }
|
---|
[15442] | 701 |
|
---|
| 702 | public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
|
---|
| 703 | int n = x.Length;
|
---|
| 704 | var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray();
|
---|
| 705 | foreach (var factor in VariablesUsedForPrediction.Skip(1)) {
|
---|
| 706 | product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray();
|
---|
| 707 | }
|
---|
[15457] | 708 | foreach (var xx in product) {
|
---|
[15442] | 709 | yield return GetEstimatedValue(xx);
|
---|
| 710 | }
|
---|
| 711 | }
|
---|
[15352] | 712 | }
|
---|
| 713 |
|
---|
[15364] | 714 | // UNFINISHED
|
---|
[15352] | 715 | public class Spline1dModel : NamedItem, IRegressionModel {
|
---|
[15468] | 716 | alglib.spline1dinterpolant interpolant;
|
---|
[15352] | 717 |
|
---|
| 718 | public string TargetVariable { get; set; }
|
---|
| 719 |
|
---|
| 720 | public IEnumerable<string> VariablesUsedForPrediction { get; private set; }
|
---|
| 721 |
|
---|
| 722 | public event EventHandler TargetVariableChanged;
|
---|
| 723 |
|
---|
| 724 | public Spline1dModel(Spline1dModel orig, Cloner cloner) : base(orig, cloner) {
|
---|
| 725 | this.TargetVariable = orig.TargetVariable;
|
---|
| 726 | this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray();
|
---|
[15468] | 727 |
|
---|
[15352] | 728 | this.interpolant = (alglib.spline1dinterpolant)orig.interpolant.make_copy();
|
---|
| 729 | }
|
---|
[15468] | 730 | internal Spline1dModel(
|
---|
| 731 | alglib.spline1dinterpolant interpolant,
|
---|
| 732 | string targetVar, string[] inputs, double offset = 0, double scale = 1) : base("SplineModel", "SplineModel") {
|
---|
| 733 | this.interpolant = (alglib.spline1dinterpolant)interpolant.make_copy();
|
---|
[15352] | 734 | this.TargetVariable = targetVar;
|
---|
| 735 | this.VariablesUsedForPrediction = inputs;
|
---|
| 736 | }
|
---|
| 737 |
|
---|
| 738 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
| 739 | return new Spline1dModel(this, cloner);
|
---|
| 740 | }
|
---|
| 741 |
|
---|
| 742 | public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
|
---|
| 743 | return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
|
---|
| 744 | }
|
---|
| 745 |
|
---|
[15468] | 746 | public double GetEstimatedValue(double xx) {
|
---|
| 747 | return alglib.spline1dcalc(interpolant, xx);
|
---|
| 748 | }
|
---|
| 749 |
|
---|
[15352] | 750 | public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
|
---|
[15433] | 751 | var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray();
|
---|
[15442] | 752 | foreach (var factor in VariablesUsedForPrediction.Skip(1)) {
|
---|
[15433] | 753 | product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray();
|
---|
| 754 | }
|
---|
[15468] | 755 | foreach (var xx in product) {
|
---|
| 756 | yield return GetEstimatedValue(xx);
|
---|
[15352] | 757 | }
|
---|
| 758 | }
|
---|
| 759 | }
|
---|
| 760 | }
|
---|