Changeset 15450 for branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/Splines.cs
- Timestamp:
- 11/06/17 13:12:41 (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/Splines.cs
r15449 r15450 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Diagnostics; 24 25 using System.Linq; 25 26 using System.Threading; … … 156 157 case "CUBGCV": { 157 158 CubicSplineGCV.CubGcvReport report; 158 var model = 159 CubicSplineGCV.CalculateCubicSpline(x, y, 159 var model = 160 CubicSplineGCV.CalculateCubicSpline(x, y, 160 161 Problem.ProblemData.TargetVariable, inputVars, out report); 161 162 var targetVar = Problem.ProblemData.TargetVariable; … … 165 166 Results.Add(new Result("Estimated error variance", new DoubleValue(report.estimatedErrorVariance))); 166 167 Results.Add(new Result("Estimated RSS degrees of freedom", new DoubleValue(report.estimatedRSSDegreesOfFreedom))); 167 Results.Add(new Result("Estimated tr eumean squared error at data points", new DoubleValue(report.estimatedTrueMeanSquaredErrorAtDataPoints)));168 Results.Add(new Result("Estimated true mean squared error at data points", new DoubleValue(report.estimatedTrueMeanSquaredErrorAtDataPoints))); 168 169 Results.Add(new Result("Mean square of DF", new DoubleValue(report.meanSquareOfDf))); 169 170 Results.Add(new Result("Mean square residual", new DoubleValue(report.meanSquareResiudal))); … … 204 205 public static IRegressionEnsembleModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars, 205 206 out double avgTrainRMSE, 206 out double loocvRMSE) {207 out double cvRMSE) { 207 208 int info; 208 209 alglib.spline1dinterpolant interpolant; … … 211 212 n = (int)Math.Max(50, 3 * Math.Sqrt(n)); 212 213 213 double[] x_loo = new double[x.Length - 1]; 214 double[] y_loo = new double[y.Length - 1]; 214 int folds = 10; 215 int foldSize = x.Length <= 30 ? 1 : x.Length / folds; 216 if (foldSize == 1) folds = x.Length; 217 218 double[] x_loo = new double[(folds - 1) * foldSize]; 219 double[] y_loo = new double[(folds - 1) * foldSize]; 215 220 216 221 var models = new List<IRegressionModel>(); 217 var sumTrainRmse = 0.0; 218 var sse = 0.0; 219 220 for (int i = 0; i < x.Length; i++) { 221 Array.Copy(x, 0, x_loo, 0, i); 222 Array.Copy(x, i + 1, x_loo, i, x.Length - (i + 1)); 223 Array.Copy(y, 0, y_loo, 0, i); 224 Array.Copy(y, i + 1, y_loo, i, y.Length - (i + 1)); 222 var sumTrainSE = 0.0; 223 int nTrain = 0; 224 var sumTestSE = 0.0; 225 int nTest = 0; 226 227 228 for (int i = 0; i < folds; i++) { 229 if (i < folds - 1) { 230 //Console.WriteLine("Copy from {0} to {1} n = {2}", (i + 1) * foldSize, i * foldSize, (folds - i - 1) * foldSize); 231 Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, (folds - i - 1) * foldSize); 232 Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, (folds - i - 1) * foldSize); 233 } 225 234 alglib.spline1dfitpenalized(x_loo, y_loo, n, lambda, out info, out interpolant, out rep); 226 var y_pred = alglib.spline1dcalc(interpolant, x[i]); 227 double res = y[i] - y_pred; 228 sse += res * res; 229 sumTrainRmse += rep.rmserror; 230 231 var model = new Spline1dModel(interpolant, targetVar, inputVars); 232 models.Add(model); 233 } 234 235 loocvRMSE = Math.Sqrt(sse / x.Length); 236 avgTrainRMSE = sumTrainRmse / x.Length; 235 Debug.Assert(y_loo.All(yi => yi != 0.0)); 236 Debug.Assert(x_loo.All(xi => xi != 0.0)); 237 238 // training error 239 for (int j = 0; j < x_loo.Length; j++) { 240 var y_pred = alglib.spline1dcalc(interpolant, x[i]); 241 double res = y[j] - y_pred; 242 sumTrainSE += res * res; 243 nTrain++; 244 } 245 // test 246 for (int j = i * foldSize; j < (i + 1) * foldSize; j++) { 247 var y_pred = alglib.spline1dcalc(interpolant, x[i]); 248 double res = y[j] - y_pred; 249 sumTestSE += res * res; 250 nTest++; 251 } 252 253 models.Add(new Spline1dModel(interpolant, targetVar, inputVars)); 254 255 if (i < folds - 1) { 256 //Console.WriteLine("Copy from {0} to {1} n = {2}", i * foldSize, i * foldSize, foldSize); 257 // add current fold for next iteration 258 Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize); 259 Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize); 260 } 261 } 262 263 cvRMSE = Math.Sqrt(sumTestSE / nTest); 264 avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain); 237 265 238 266 return new RegressionEnsembleModel(models); … … 352 380 public static IRegressionEnsembleModel CalculateSmoothingSplineReinsch(double[] x, double[] y, double tol, string targetVar, string[] inputVars, 353 381 out double avgTrainRMSE, 354 out double loocvRMSE) {382 out double cvRMSE) { 355 383 int info; 356 384 357 double[] x_loo = new double[x.Length - 1]; 358 double[] y_loo = new double[y.Length - 1]; 385 int folds = 10; 386 int foldSize = x.Length <= 30 ? 1 : x.Length / folds; 387 if (foldSize == 1) folds = x.Length; 388 389 double[] x_loo = new double[(folds - 1) * foldSize]; 390 double[] y_loo = new double[(folds - 1) * foldSize]; 359 391 360 392 var models = new List<IRegressionModel>(); 361 var sumTrainRmse = 0.0; 362 var sse = 0.0; 363 364 for (int i = 0; i < x.Length; i++) { 365 Array.Copy(x, i + 1, x_loo, i, x.Length - (i + 1)); 366 Array.Copy(y, i + 1, y_loo, i, y.Length - (i + 1)); 367 368 var model = CalculateSmoothingSplineReinsch(x_loo, y_loo, inputVars, tol, x_loo.Length + 1, targetVar); 369 370 var y_pred = model.GetEstimatedValue(x[i]); 371 double res = y[i] - y_pred; 372 sse += res * res; 393 var sumTrainSE = 0.0; 394 int nTrain = 0; 395 var sumTestSE = 0.0; 396 int nTest = 0; 397 398 399 for (int i = 0; i < folds; i++) { 400 if (i < folds - 1) { 401 Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, x.Length - ((i + 1) * foldSize) - 1); 402 Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, y.Length - ((i + 1) * foldSize) - 1); 403 } 404 var model = CalculateSmoothingSplineReinsch(x_loo, y_loo, inputVars, tol, ((folds - 1) * foldSize) - 1, targetVar); 405 406 // training error 407 for(int j = 0;j<x_loo.Length;j++) { 408 var y_pred = model.GetEstimatedValue(x[j]); 409 double res = y[j] - y_pred; 410 sumTrainSE += res * res; 411 nTrain++; 412 } 413 // test 414 for (int j = i * foldSize; j < (i + 1) * foldSize; j++) { 415 var y_pred = model.GetEstimatedValue(x[j]); 416 double res = y[j] - y_pred; 417 sumTestSE += res * res; 418 nTest++; 419 } 373 420 374 421 models.Add(model); 375 422 376 // add current row for next iteration377 if (i < x_loo.Length) {378 x_loo[i] = x[i];379 y_loo[i] = y[i];380 } 381 } 382 383 loocvRMSE = Math.Sqrt(sse / x.Length);384 avgTrainRMSE = sumTrainRmse / x.Length;423 if (i < folds - 1) { 424 // add current fold for next iteration 425 Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize); 426 Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize); 427 } 428 } 429 430 cvRMSE = Math.Sqrt(sumTestSE / nTest); 431 avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain); 385 432 386 433 return new RegressionEnsembleModel(models); … … 388 435 389 436 390 // automatically determines tolerance using loo cv 437 // automatically determines tolerance using loo cv, SLOW! 391 438 public static IRegressionModel CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars, string targetVar, 392 out double optTolerance, out double looRMSE) {439 out double optTolerance, out double cvRMSE) { 393 440 var maxTolerance = y.StandardDeviation(); 394 441 var minTolerance = maxTolerance * 1E-6; 395 442 optTolerance = maxTolerance; 396 443 var tol = maxTolerance; 397 looRMSE = double.PositiveInfinity;444 cvRMSE = double.PositiveInfinity; 398 445 IRegressionModel bestModel = new ConstantModel(y.Average(), targetVar); 399 446 while (tol > minTolerance) { 400 double loo_rmse;447 double curCVRMSE; 401 448 double avgTrainRmse; 402 449 var model = Splines.CalculateSmoothingSplineReinsch( 403 x, y, tol, targetVar, inputVars, out avgTrainRmse, out loo_rmse);404 405 if ( loo_rmse < looRMSE) {406 looRMSE = loo_rmse;450 x, y, tol, targetVar, inputVars, out avgTrainRmse, out curCVRMSE); 451 452 if (curCVRMSE < cvRMSE) { 453 cvRMSE = curCVRMSE; 407 454 optTolerance = tol; 408 455 bestModel = model;
Note: See TracChangeset
for help on using the changeset viewer.