Changeset 15442 for branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental
- Timestamp:
- 10/31/17 20:01:02 (7 years ago)
- Location:
- branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/GAM.cs
r15436 r15442 218 218 product = product.Zip(problemData.Dataset.GetDoubleValues(inputVars[i], problemData.TrainingIndices), (pi, vi) => pi * vi).ToArray(); 219 219 } 220 // Umständlich!221 return Splines.Calculate PenalizedRegressionSpline(220 double optTolerance, looRMSE; 221 return Splines.CalculateSmoothingSplineReinsch( 222 222 product, 223 (double[])target.Clone(), lambda, 224 problemData.TargetVariable, inputVars 223 (double[])target.Clone(), inputVars, 224 problemData.TargetVariable, 225 out optTolerance, out looRMSE 225 226 ); 226 227 } else return new ConstantModel(target.Average(), problemData.TargetVariable); -
branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/Splines.cs
r15433 r15442 21 21 22 22 using System; 23 using System.Collections.Concurrent;24 23 using System.Collections.Generic; 25 24 using System.Linq; 26 25 using System.Threading; 27 using System.Threading.Tasks;28 26 using HeuristicLab.Common; 29 27 using HeuristicLab.Core; 30 28 using HeuristicLab.Data; 31 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;32 29 using HeuristicLab.Optimization; 33 30 using HeuristicLab.Parameters; 34 31 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 35 32 using HeuristicLab.Problems.DataAnalysis; 36 using HeuristicLab.Problems.DataAnalysis.Symbolic;37 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;38 33 using HeuristicLab.Random; 39 34 using MathNet.Numerics.Interpolation; … … 71 66 "Smoothing Spline (Mine)", 72 67 "Smoothing Spline (Reinsch)", 73 "Smoothing Spline (Reinsch with automatic tolerance determination using CV)",68 "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)", 74 69 "B-Spline Smoothing", 75 70 "Penalized Regression Spline (alglib)" … … 150 145 break; 151 146 } 152 case "Smoothing Spline (Reinsch with automatic tolerance determination using CV)": { 153 var model = CalculateSmoothingSplineReinschWithAutomaticTolerance(x, y, inputVars, Problem.ProblemData.TargetVariable); 154 Results.Add(new Result("Solution", new RegressionSolution(model, (IRegressionProblemData)Problem.ProblemData.Clone()))); 147 case "Smoothing Spline (Reinsch with automatic tolerance determination using LOOCV)": { 148 double optTol, looRMSE; 149 var model = CalculateSmoothingSplineReinsch(x, y, inputVars, Problem.ProblemData.TargetVariable, out optTol, out looRMSE); 150 Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone()))); 151 Results.Add(new Result("Optimal tolerance", new DoubleValue(optTol))); 152 Results.Add(new Result("RMSE (LOO-CV)", new DoubleValue(looRMSE))); 155 153 break; 156 154 } … … 159 157 break; 160 158 } 161 case "Penalized Regression Spline (alglib)": 162 { 159 case "Penalized Regression Spline (alglib)": { 163 160 var lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value; // controls extent of smoothing 164 161 var model = CalculatePenalizedRegressionSpline(x, y, lambda, Problem.ProblemData.TargetVariable, inputVars); … … 168 165 169 166 break; 170 }167 } 171 168 default: throw new NotSupportedException(); 172 169 } … … 176 173 177 174 178 179 public static IRegressionModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars) 180 { 175 public static IRegressionModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars) { 181 176 int info; 182 177 alglib.spline1dinterpolant interpolant; … … 189 184 } 190 185 186 public static IRegressionEnsembleModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars, 187 out double avgTrainRMSE, 188 out double loocvRMSE) { 189 int info; 190 alglib.spline1dinterpolant interpolant; 191 alglib.spline1dfitreport rep; 192 int n = x.Length; 193 n = (int)Math.Max(50, 3 * Math.Sqrt(n)); 194 195 double[] x_loo = new double[x.Length - 1]; 196 double[] y_loo = new double[y.Length - 1]; 197 198 var models = new List<IRegressionModel>(); 199 var sumTrainRmse = 0.0; 200 var sse = 0.0; 201 202 for (int i = 0; i < x.Length; i++) { 203 Array.Copy(x, 0, x_loo, 0, i); 204 Array.Copy(x, i + 1, x_loo, i, x.Length - (i + 1)); 205 Array.Copy(y, 0, y_loo, 0, i); 206 Array.Copy(y, i + 1, y_loo, i, y.Length - (i + 1)); 207 alglib.spline1dfitpenalized(x_loo, y_loo, n, lambda, out info, out interpolant, out rep); 208 var y_pred = alglib.spline1dcalc(interpolant, x[i]); 209 double res = y[i] - y_pred; 210 sse += res * res; 211 sumTrainRmse += rep.rmserror; 212 213 var model = new Spline1dModel(interpolant, targetVar, inputVars); 214 models.Add(model); 215 } 216 217 loocvRMSE = Math.Sqrt(sse / x.Length); 218 avgTrainRMSE = sumTrainRmse / x.Length; 219 220 return new RegressionEnsembleModel(models); 221 } 222 191 223 public void BSplineSmoothing(double[] x, double[] y, string[] inputVars) { 192 224 Array.Sort(x, y); 193 194 225 CubicSpline2(x, y, inputVars); 195 196 // // see Elements of Statistical Learning Appendix: Computations for Splines197 // int M = 4; // order of spline (cubic)198 // int K = x.Length;199 // int n = x.Length;200 //201 // double[] t = new double[K + 2 * M];202 // for (int i = 0; i < M; i++) t[i] = x[0];203 // for (int i = K; i < K + 2 * M; i++) t[i] = x[K - 1];204 //205 // double[,] B = new double[n, n + 4]; // basis function matrix206 // double[,] W = new double[n + 4, n + 4]; // penalty matrix (W[j,k] =207 // for (int i = 0; i < M; i++) B[i] = new double[K + 2 * M];208 // for (int j = 0; j < K; j++) {209 // for (int i = 0; i < K + 2M - 1; i++) {210 // if (t[i] <= x[j] && x[j] < t[i + 1])211 // B[1][i] = 1.0;212 // }213 // }214 215 216 226 } 217 227 … … 312 322 } 313 323 314 public void CalculateSmoothingSplineReinsch(double[] x Orig, double[] yOrig, string[] inputVars) {315 double s = x Orig.Length + 1;324 public void CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars) { 325 double s = x.Length + 1; 316 326 double stdTol = ((IValueParameter<DoubleValue>)Parameters["StdDev (noise)"]).Value.Value; // controls extent of smoothing 317 327 318 var model = CalculateSmoothingSplineReinsch(xOrig, yOrig, inputVars, stdTol, s, Problem.ProblemData.TargetVariable); 319 320 Results.Add(new Result("Solution", new RegressionSolution(model, (IRegressionProblemData)Problem.ProblemData.Clone()))); 321 322 323 } 324 325 326 327 public static IRegressionModel CalculateSmoothingSplineReinschWithAutomaticTolerance(double[] xOrig, double[] yOrig, string[] inputVars, string targetVar) { 328 int n = xOrig.Length; 329 var rows = Enumerable.Range(0, n); 330 var rand = new FastRandom(1234); 331 var trainRows = rows.Shuffle(rand).Take((int)(n * 0.66)).ToArray(); 332 var testRows = rows.Except(trainRows).ToArray(); 333 334 var trainX = trainRows.Select(i => xOrig[i]).ToArray(); 335 var trainY = trainRows.Select(i => yOrig[i]).ToArray(); 336 var testX = testRows.Select(i => xOrig[i]).ToArray(); 337 var testY = testRows.Select(i => yOrig[i]).ToArray(); 338 339 var shuffledDs = new Dataset(inputVars.Concat(new string[] { targetVar }), new[] { trainX.Concat(testX).ToArray(), trainY.Concat(testY).ToArray() }); 340 var shuffeledProblemData = new RegressionProblemData(shuffledDs, inputVars, targetVar); 341 shuffeledProblemData.TrainingPartition.Start = 0; 342 shuffeledProblemData.TrainingPartition.End = trainX.Length; 343 shuffeledProblemData.TestPartition.Start = trainX.Length; 344 shuffeledProblemData.TestPartition.End = shuffledDs.Rows + 1; 345 346 double curTol = trainY.StandardDeviation(); 347 double prevTestRMSE = double.PositiveInfinity; 348 double testRMSE = double.PositiveInfinity; 349 IRegressionModel prevModel = null; 350 IRegressionModel model = null; 351 do { 352 prevModel = model; 353 prevTestRMSE = testRMSE; 354 model = CalculateSmoothingSplineReinsch(trainX, trainY, inputVars, curTol, s: trainX.Length + 1, targetVar: targetVar); 355 356 var sol = model.CreateRegressionSolution(shuffeledProblemData); 357 var trainRMSE = sol.TrainingRootMeanSquaredError; 358 testRMSE = sol.TestRootMeanSquaredError; 359 curTol = Math.Max(curTol * 0.5, 1e-12 * trainY.StandardDeviation()); 360 } while (testRMSE < prevTestRMSE); 361 return prevModel; 362 } 363 364 365 public static IRegressionModel CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars, double stdTol, double s, string targetVar) { 328 var model = CalculateSmoothingSplineReinsch(x, y, inputVars, stdTol, s, Problem.ProblemData.TargetVariable); 329 330 Results.Add(new Result("Solution", model.CreateRegressionSolution((IRegressionProblemData)Problem.ProblemData.Clone()))); 331 } 332 333 334 public static IRegressionEnsembleModel CalculateSmoothingSplineReinsch(double[] x, double[] y, double tol, string targetVar, string[] inputVars, 335 out double avgTrainRMSE, 336 out double loocvRMSE) { 337 int info; 338 339 double[] x_loo = new double[x.Length - 1]; 340 double[] y_loo = new double[y.Length - 1]; 341 342 var models = new List<IRegressionModel>(); 343 var sumTrainRmse = 0.0; 344 var sse = 0.0; 345 346 for (int i = 0; i < x.Length; i++) { 347 Array.Copy(x, i + 1, x_loo, i, x.Length - (i + 1)); 348 Array.Copy(y, i + 1, y_loo, i, y.Length - (i + 1)); 349 350 var model = CalculateSmoothingSplineReinsch(x_loo, y_loo, inputVars, tol, x_loo.Length + 1, targetVar); 351 352 var y_pred = model.GetEstimatedValue(x[i]); 353 double res = y[i] - y_pred; 354 sse += res * res; 355 356 models.Add(model); 357 358 // add current row for next iteration 359 if (i < x_loo.Length) { 360 x_loo[i] = x[i]; 361 y_loo[i] = y[i]; 362 } 363 } 364 365 loocvRMSE = Math.Sqrt(sse / x.Length); 366 avgTrainRMSE = sumTrainRmse / x.Length; 367 368 return new RegressionEnsembleModel(models); 369 } 370 371 372 // automatically determines tolerance using loo cv 373 public static IRegressionModel CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars, string targetVar, 374 out double optTolerance, out double looRMSE) { 375 var maxTolerance = y.StandardDeviation(); 376 var minTolerance = maxTolerance * 1E-6; 377 optTolerance = maxTolerance; 378 var tol = maxTolerance; 379 looRMSE = double.PositiveInfinity; 380 IRegressionModel bestModel = new ConstantModel(y.Average(), targetVar); 381 while (tol > minTolerance) { 382 double loo_rmse; 383 double avgTrainRmse; 384 var model = Splines.CalculateSmoothingSplineReinsch( 385 x, y, tol, targetVar, inputVars, out avgTrainRmse, out loo_rmse); 386 387 if (loo_rmse < looRMSE) { 388 looRMSE = loo_rmse; 389 optTolerance = tol; 390 bestModel = model; 391 } 392 tol *= 0.85; 393 } 394 return bestModel; 395 } 396 397 public static ReinschSmoothingSplineModel CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars, double stdTol, double s, string targetVar) { 366 398 var minX = xOrig.Min(); 367 399 var maxX = xOrig.Max(); … … 605 637 606 638 // UNFINISHED 607 internalclass ReinschSmoothingSplineModel : NamedItem, IRegressionModel {639 public class ReinschSmoothingSplineModel : NamedItem, IRegressionModel { 608 640 private MyArray<double> a; 609 641 private MyArray<double> b; … … 631 663 this.offset = orig.offset; 632 664 } 633 publicReinschSmoothingSplineModel(665 internal ReinschSmoothingSplineModel( 634 666 MyArray<double> a, 635 667 MyArray<double> b, … … 650 682 // extrapolate for xx > x[n2] 651 683 b[b.Length] = b[b.Length - 1]; 652 d[b.Length] = d[d.Length - 1]; 684 d[1] = 0; 685 // d[b.Length] = -d[d.Length - 1]; 653 686 } 654 687 … … 661 694 } 662 695 696 public double GetEstimatedValue(double xx) { 697 int n = x.Length; 698 if (xx <= x[1]) { 699 double h = xx - x[1]; 700 return a[1] + h * (b[1] + h * (c[1] + h * d[1])); 701 } else if (xx >= x[n]) { 702 double h = xx - x[n]; 703 return a[n] + h * (b[n] + h * (c[n] + h * d[n])); 704 } else { 705 // binary search 706 int lower = 1; 707 int upper = n; 708 while (true) { 709 if (upper < lower) throw new InvalidProgramException(); 710 int i = lower + (upper - lower) / 2; 711 if (x[i] <= xx && xx < x[i + 1]) { 712 double h = xx - x[i]; 713 return a[i] + h * (b[i] + h * (c[i] + h * d[i])); 714 } else if (xx < x[i]) { 715 upper = i - 1; 716 } else { 717 lower = i + 1; 718 } 719 } 720 } 721 } 722 663 723 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 664 724 int n = x.Length; 665 foreach (var xx in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).Select(xi => (xi - offset) * scale)) { 666 if (xx <= x[1]) { 667 double h = xx - x[1]; 668 yield return a[1] + h * (b[1] + h * (c[1] + h * d[1])); 669 } else if (xx >= x[n]) { 670 double h = xx - x[n]; 671 yield return a[n] + h * (b[n] + h * (c[n] + h * d[n])); 672 } else { 673 // binary search 674 int lower = 1; 675 int upper = n; 676 while (true) { 677 if (upper < lower) throw new InvalidProgramException(); 678 int i = lower + (upper - lower) / 2; 679 if (x[i] <= xx && xx < x[i + 1]) { 680 double h = xx - x[i]; 681 yield return a[i] + h * (b[i] + h * (c[i] + h * d[i])); 682 break; 683 } else if (xx < x[i]) { 684 upper = i - 1; 685 } else { 686 lower = i + 1; 687 } 688 } 689 } 725 var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray(); 726 foreach (var factor in VariablesUsedForPrediction.Skip(1)) { 727 product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray(); 728 } 729 foreach (var xx in product.Select(xi => (xi - offset) * scale)) { 730 yield return GetEstimatedValue(xx); 690 731 } 691 732 } … … 764 805 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 765 806 var product = dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).ToArray(); 766 foreach (var factor in VariablesUsedForPrediction.Skip(1)) {807 foreach (var factor in VariablesUsedForPrediction.Skip(1)) { 767 808 product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray(); 768 809 }
Note: See TracChangeset
for help on using the changeset viewer.