Changeset 15450
- Timestamp:
- 11/06/17 13:12:41 (7 years ago)
- Location:
- branches/MathNetNumerics-Exploration-2789
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/GAM.cs
r15449 r15450 111 111 nTerms += inputVars.Combinations(i).Count(); 112 112 } 113 113 114 IRegressionModel[] f = new IRegressionModel[nTerms]; 114 115 for (int i = 0; i < f.Length; i++) { … … 130 131 string[] terms = new string[f.Length]; 131 132 Results.Add(new Result("RSS Values", typeof(DoubleMatrix))); 133 134 var combinations = new List<string[]>(); 135 for(int i=1;i<=maxInteractions;i++) 136 combinations.AddRange(HeuristicLab.Common.EnumerableExtensions.Combinations(inputVars, i).Select(c => c.ToArray())); 137 // combinations.Add(new string[] { "X1", "X2" }); 138 // combinations.Add(new string[] { "X3", "X4" }); 139 // combinations.Add(new string[] { "X5", "X6" }); 140 // combinations.Add(new string[] { "X1", "X7", "X9" }); 141 // combinations.Add(new string[] { "X3", "X6", "X10" }); 142 143 132 144 133 145 // until convergence … … 144 156 //} 145 157 146 for (int interaction = 1; interaction <= maxInteractions; interaction++) { 147 var selectedVars = HeuristicLab.Common.EnumerableExtensions.Combinations(inputVars, interaction); 148 149 foreach (var element in selectedVars) { 150 var res = CalculateResiduals(problemData, f, j, avgY, problemData.TrainingIndices); 151 rss[j] = res.Variance(); 152 terms[j] = string.Format("f({0})", string.Join(",", element)); 153 f[j] = RegressSpline(problemData, element.ToArray(), res, lambda); 154 j++; 155 } 158 159 160 foreach (var element in combinations) { 161 var res = CalculateResiduals(problemData, f, j, avgY, problemData.TrainingIndices); 162 rss[j] = res.Variance(); 163 terms[j] = string.Format("f({0})", string.Join(",", element)); 164 f[j] = RegressSpline(problemData, element.ToArray(), res, lambda); 165 j++; 156 166 } 157 167 … … 218 228 product = product.Zip(problemData.Dataset.GetDoubleValues(inputVars[i], problemData.TrainingIndices), (pi, vi) => pi * vi).ToArray(); 219 229 } 220 CubicSplineGCV.CubGcvReport report; 221 return CubicSplineGCV.CalculateCubicSpline( 222 product, 223 (double[])target.Clone(), 224 problemData.TargetVariable, inputVars, out report 225 ); 230 // CubicSplineGCV.CubGcvReport report; 231 // return CubicSplineGCV.CalculateCubicSpline( 232 // product, 233 // (double[])target.Clone(), 234 // problemData.TargetVariable, inputVars, out report 235 // ); 236 237 double optTolerance; double cvRMSE; 238 // find tolerance 239 // var ensemble = Splines.CalculateSmoothingSplineReinsch(product, (double[])target.Clone(), inputVars, problemData.TargetVariable, out optTolerance, out cvRMSE); 240 // // train on whole data 241 // return Splines.CalculateSmoothingSplineReinsch(product, (double[])target.Clone(), inputVars, optTolerance, product.Length - 1, problemData.TargetVariable); 242 243 244 // find tolerance 245 var bestLambda = -5.0; 246 double bestCVRMSE = double.PositiveInfinity; 247 double avgTrainRMSE = double.PositiveInfinity; 248 for (double curLambda = -5.0; curLambda <= 6.0; curLambda += 1.0) { 249 var ensemble = Splines.CalculatePenalizedRegressionSpline(product, (double[])target.Clone(), curLambda, problemData.TargetVariable, inputVars, out avgTrainRMSE, out cvRMSE); 250 Console.Write("{0} {1} {2}", curLambda, avgTrainRMSE, cvRMSE); 251 if (bestCVRMSE > cvRMSE) { 252 Console.Write(" *"); 253 bestCVRMSE = cvRMSE; 254 bestLambda = curLambda; 255 } 256 Console.WriteLine(); 257 } 258 // train on whole data 259 return Splines.CalculatePenalizedRegressionSpline(product, (double[])target.Clone(), bestLambda, problemData.TargetVariable, inputVars, out avgTrainRMSE, out cvRMSE); 260 226 261 } else return new ConstantModel(target.Average(), problemData.TargetVariable); 227 262 } -
branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/Splines.cs
r15449 r15450 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Diagnostics; 24 25 using System.Linq; 25 26 using System.Threading; … … 156 157 case "CUBGCV": { 157 158 CubicSplineGCV.CubGcvReport report; 158 var model = 159 CubicSplineGCV.CalculateCubicSpline(x, y, 159 var model = 160 CubicSplineGCV.CalculateCubicSpline(x, y, 160 161 Problem.ProblemData.TargetVariable, inputVars, out report); 161 162 var targetVar = Problem.ProblemData.TargetVariable; … … 165 166 Results.Add(new Result("Estimated error variance", new DoubleValue(report.estimatedErrorVariance))); 166 167 Results.Add(new Result("Estimated RSS degrees of freedom", new DoubleValue(report.estimatedRSSDegreesOfFreedom))); 167 Results.Add(new Result("Estimated tr eumean squared error at data points", new DoubleValue(report.estimatedTrueMeanSquaredErrorAtDataPoints)));168 Results.Add(new Result("Estimated true mean squared error at data points", new DoubleValue(report.estimatedTrueMeanSquaredErrorAtDataPoints))); 168 169 Results.Add(new Result("Mean square of DF", new DoubleValue(report.meanSquareOfDf))); 169 170 Results.Add(new Result("Mean square residual", new DoubleValue(report.meanSquareResiudal))); … … 204 205 public static IRegressionEnsembleModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars, 205 206 out double avgTrainRMSE, 206 out double loocvRMSE) {207 out double cvRMSE) { 207 208 int info; 208 209 alglib.spline1dinterpolant interpolant; … … 211 212 n = (int)Math.Max(50, 3 * Math.Sqrt(n)); 212 213 213 double[] x_loo = new double[x.Length - 1]; 214 double[] y_loo = new double[y.Length - 1]; 214 int folds = 10; 215 int foldSize = x.Length <= 30 ? 1 : x.Length / folds; 216 if (foldSize == 1) folds = x.Length; 217 218 double[] x_loo = new double[(folds - 1) * foldSize]; 219 double[] y_loo = new double[(folds - 1) * foldSize]; 215 220 216 221 var models = new List<IRegressionModel>(); 217 var sumTrainRmse = 0.0; 218 var sse = 0.0; 219 220 for (int i = 0; i < x.Length; i++) { 221 Array.Copy(x, 0, x_loo, 0, i); 222 Array.Copy(x, i + 1, x_loo, i, x.Length - (i + 1)); 223 Array.Copy(y, 0, y_loo, 0, i); 224 Array.Copy(y, i + 1, y_loo, i, y.Length - (i + 1)); 222 var sumTrainSE = 0.0; 223 int nTrain = 0; 224 var sumTestSE = 0.0; 225 int nTest = 0; 226 227 228 for (int i = 0; i < folds; i++) { 229 if (i < folds - 1) { 230 //Console.WriteLine("Copy from {0} to {1} n = {2}", (i + 1) * foldSize, i * foldSize, (folds - i - 1) * foldSize); 231 Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, (folds - i - 1) * foldSize); 232 Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, (folds - i - 1) * foldSize); 233 } 225 234 alglib.spline1dfitpenalized(x_loo, y_loo, n, lambda, out info, out interpolant, out rep); 226 var y_pred = alglib.spline1dcalc(interpolant, x[i]); 227 double res = y[i] - y_pred; 228 sse += res * res; 229 sumTrainRmse += rep.rmserror; 230 231 var model = new Spline1dModel(interpolant, targetVar, inputVars); 232 models.Add(model); 233 } 234 235 loocvRMSE = Math.Sqrt(sse / x.Length); 236 avgTrainRMSE = sumTrainRmse / x.Length; 235 Debug.Assert(y_loo.All(yi => yi != 0.0)); 236 Debug.Assert(x_loo.All(xi => xi != 0.0)); 237 238 // training error 239 for (int j = 0; j < x_loo.Length; j++) { 240 var y_pred = alglib.spline1dcalc(interpolant, x[i]); 241 double res = y[j] - y_pred; 242 sumTrainSE += res * res; 243 nTrain++; 244 } 245 // test 246 for (int j = i * foldSize; j < (i + 1) * foldSize; j++) { 247 var y_pred = alglib.spline1dcalc(interpolant, x[i]); 248 double res = y[j] - y_pred; 249 sumTestSE += res * res; 250 nTest++; 251 } 252 253 models.Add(new Spline1dModel(interpolant, targetVar, inputVars)); 254 255 if (i < folds - 1) { 256 //Console.WriteLine("Copy from {0} to {1} n = {2}", i * foldSize, i * foldSize, foldSize); 257 // add current fold for next iteration 258 Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize); 259 Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize); 260 } 261 } 262 263 cvRMSE = Math.Sqrt(sumTestSE / nTest); 264 avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain); 237 265 238 266 return new RegressionEnsembleModel(models); … … 352 380 public static IRegressionEnsembleModel CalculateSmoothingSplineReinsch(double[] x, double[] y, double tol, string targetVar, string[] inputVars, 353 381 out double avgTrainRMSE, 354 out double loocvRMSE) {382 out double cvRMSE) { 355 383 int info; 356 384 357 double[] x_loo = new double[x.Length - 1]; 358 double[] y_loo = new double[y.Length - 1]; 385 int folds = 10; 386 int foldSize = x.Length <= 30 ? 1 : x.Length / folds; 387 if (foldSize == 1) folds = x.Length; 388 389 double[] x_loo = new double[(folds - 1) * foldSize]; 390 double[] y_loo = new double[(folds - 1) * foldSize]; 359 391 360 392 var models = new List<IRegressionModel>(); 361 var sumTrainRmse = 0.0; 362 var sse = 0.0; 363 364 for (int i = 0; i < x.Length; i++) { 365 Array.Copy(x, i + 1, x_loo, i, x.Length - (i + 1)); 366 Array.Copy(y, i + 1, y_loo, i, y.Length - (i + 1)); 367 368 var model = CalculateSmoothingSplineReinsch(x_loo, y_loo, inputVars, tol, x_loo.Length + 1, targetVar); 369 370 var y_pred = model.GetEstimatedValue(x[i]); 371 double res = y[i] - y_pred; 372 sse += res * res; 393 var sumTrainSE = 0.0; 394 int nTrain = 0; 395 var sumTestSE = 0.0; 396 int nTest = 0; 397 398 399 for (int i = 0; i < folds; i++) { 400 if (i < folds - 1) { 401 Array.Copy(x, (i + 1) * foldSize, x_loo, i * foldSize, x.Length - ((i + 1) * foldSize) - 1); 402 Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, y.Length - ((i + 1) * foldSize) - 1); 403 } 404 var model = CalculateSmoothingSplineReinsch(x_loo, y_loo, inputVars, tol, ((folds - 1) * foldSize) - 1, targetVar); 405 406 // training error 407 for(int j = 0;j<x_loo.Length;j++) { 408 var y_pred = model.GetEstimatedValue(x[j]); 409 double res = y[j] - y_pred; 410 sumTrainSE += res * res; 411 nTrain++; 412 } 413 // test 414 for (int j = i * foldSize; j < (i + 1) * foldSize; j++) { 415 var y_pred = model.GetEstimatedValue(x[j]); 416 double res = y[j] - y_pred; 417 sumTestSE += res * res; 418 nTest++; 419 } 373 420 374 421 models.Add(model); 375 422 376 // add current row for next iteration377 if (i < x_loo.Length) {378 x_loo[i] = x[i];379 y_loo[i] = y[i];380 } 381 } 382 383 loocvRMSE = Math.Sqrt(sse / x.Length);384 avgTrainRMSE = sumTrainRmse / x.Length;423 if (i < folds - 1) { 424 // add current fold for next iteration 425 Array.Copy(x, i * foldSize, x_loo, i * foldSize, foldSize); 426 Array.Copy(y, i * foldSize, y_loo, i * foldSize, foldSize); 427 } 428 } 429 430 cvRMSE = Math.Sqrt(sumTestSE / nTest); 431 avgTrainRMSE = Math.Sqrt(sumTrainSE / nTrain); 385 432 386 433 return new RegressionEnsembleModel(models); … … 388 435 389 436 390 // automatically determines tolerance using loo cv 437 // automatically determines tolerance using loo cv, SLOW! 391 438 public static IRegressionModel CalculateSmoothingSplineReinsch(double[] x, double[] y, string[] inputVars, string targetVar, 392 out double optTolerance, out double looRMSE) {439 out double optTolerance, out double cvRMSE) { 393 440 var maxTolerance = y.StandardDeviation(); 394 441 var minTolerance = maxTolerance * 1E-6; 395 442 optTolerance = maxTolerance; 396 443 var tol = maxTolerance; 397 looRMSE = double.PositiveInfinity;444 cvRMSE = double.PositiveInfinity; 398 445 IRegressionModel bestModel = new ConstantModel(y.Average(), targetVar); 399 446 while (tol > minTolerance) { 400 double loo_rmse;447 double curCVRMSE; 401 448 double avgTrainRmse; 402 449 var model = Splines.CalculateSmoothingSplineReinsch( 403 x, y, tol, targetVar, inputVars, out avgTrainRmse, out loo_rmse);404 405 if ( loo_rmse < looRMSE) {406 looRMSE = loo_rmse;450 x, y, tol, targetVar, inputVars, out avgTrainRmse, out curCVRMSE); 451 452 if (curCVRMSE < cvRMSE) { 453 cvRMSE = curCVRMSE; 407 454 optTolerance = tol; 408 455 bestModel = model; -
branches/MathNetNumerics-Exploration-2789/Main/Main.csproj
r15449 r15450 38 38 <HintPath>..\..\..\trunk\sources\bin\ALGLIB-3.7.0.dll</HintPath> 39 39 </Reference> 40 <Reference Include="HeuristicLab.Algorithms.DataAnalysis-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 41 <SpecificVersion>False</SpecificVersion> 42 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Algorithms.DataAnalysis-3.4.dll</HintPath> 43 </Reference> 44 <Reference Include="HeuristicLab.Collections-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec" /> 40 45 <Reference Include="HeuristicLab.Common-3.3"> 41 46 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Common-3.3.dll</HintPath> 42 47 </Reference> 43 48 <Reference Include="HeuristicLab.Core-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec" /> 49 <Reference Include="HeuristicLab.Optimization-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec" /> 44 50 <Reference Include="HeuristicLab.Problems.DataAnalysis-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 45 51 <SpecificVersion>False</SpecificVersion> 46 52 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.DataAnalysis-3.4.dll</HintPath> 53 </Reference> 54 <Reference Include="HeuristicLab.Problems.Instances-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 55 <SpecificVersion>False</SpecificVersion> 56 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances-3.3.dll</HintPath> 57 </Reference> 58 <Reference Include="HeuristicLab.Problems.Instances.DataAnalysis-3.3"> 59 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances.DataAnalysis-3.3.dll</HintPath> 47 60 </Reference> 48 61 <Reference Include="System" /> -
branches/MathNetNumerics-Exploration-2789/Main/Program.cs
r15449 r15450 12 12 class Program { 13 13 static void Main(string[] args) { 14 var xs = HeuristicLab.Common.SequenceGenerator.GenerateSteps(-3.5, 3.5, 0.1, includeEnd: true).ToList(); 15 var ys = xs.Select(xi => 1.0 / Math.Sqrt(2 * Math.PI) * Math.Exp(-0.5 * xi * xi)).ToArray(); // 1.0 / (Math.Sqrt(2 * Math.PI) * Math.Exp(-0.5 * xi * xi))).ToArray(); 14 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(); 15 var problemData = provider.LoadData(provider.GetDataDescriptors().First(dd => dd.Name.Contains("Poly"))); 16 // var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); 17 // var problemData = provider.LoadData(provider.GetDataDescriptors().First(dd => dd.Name.Contains("Chem"))); 16 18 17 int n = xs.Count(); 18 alglib.hqrndstate state; 19 alglib.hqrndseed(1234, 5678, out state); 20 var ys_noise = ys.Select(yi => yi + alglib.hqrndnormal(state) * 0.1).ToList(); 19 var gam = new GAM(); 20 gam.MaxIterations = 10; 21 gam.MaxInteractions = 3; 22 gam.Problem.ProblemData = problemData; 23 gam.Start(); 21 24 22 CubicSplineGCV.CubGcvReport report; 23 var model = CubicSplineGCV.CalculateCubicSpline( 24 xs.ToArray(), ys_noise.ToArray(), "y", new string[] { "x" }, out report); 25 var solution = (IRegressionSolution)gam.Results["Ensemble solution"].Value; 25 26 26 Console.WriteLine("Smoothing Parameter (= RHO/(RHO + 1) {0}", report.smoothingParameter); 27 Console.WriteLine("Estimated DOF of RSS {0}", report.estimatedRSSDegreesOfFreedom); 28 Console.WriteLine("GCV {0}", report.generalizedCrossValidation); 29 Console.WriteLine("Mean squared residual {0}", report.meanSquareResiudal); 30 Console.WriteLine("Estimate of true MSE at data points {0}", report.estimatedTrueMeanSquaredErrorAtDataPoints); 31 Console.WriteLine("Estimate of error variance {0}", report.estimatedErrorVariance); 32 Console.WriteLine("Mean square value of DF(I) {0}", report.meanSquareOfDf); 33 34 OnlineCalculatorError error; 35 var ys_smoothed = xs.Select(xi => model.GetEstimatedValue(xi)).ToArray(); 36 var mse = OnlineMeanSquaredErrorCalculator.Calculate(ys, ys_smoothed, out error); 37 Console.WriteLine("MSE(ys, ys_smooth) = {0}", mse); 38 mse = OnlineMeanSquaredErrorCalculator.Calculate(ys, ys_noise, out error); 39 Console.WriteLine("MSE(ys, ys_noise) = {0}", mse); 40 41 Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture; 42 43 for (int i = 0; i < n; i++) { 44 Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}", 45 xs[i], ys[i], ys_smoothed[i], ys_noise[i], ys_smoothed[i] + 1.96 * report.se[i], ys_smoothed[i] - 1.96 * report.se[i]); 46 } 27 Console.WriteLine("RMSE (train) {0}", solution.TrainingRootMeanSquaredError); 28 Console.WriteLine("RMSE (test) {0}", solution.TestRootMeanSquaredError); 47 29 } 48 30 } -
branches/MathNetNumerics-Exploration-2789/Test/Test.csproj
r15443 r15450 62 62 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Algorithms.GeneticAlgorithm-3.3.dll</HintPath> 63 63 </Reference> 64 <Reference Include="HeuristicLab.Collections-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec" /> 64 65 <Reference Include="HeuristicLab.Common-3.3"> 65 66 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Common-3.3.dll</HintPath> … … 90 91 <Reference Include="HeuristicLab.Problems.Instances-3.3"> 91 92 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances-3.3.dll</HintPath> 93 </Reference> 94 <Reference Include="HeuristicLab.Problems.Instances.DataAnalysis-3.3"> 95 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances.DataAnalysis-3.3.dll</HintPath> 92 96 </Reference> 93 97 <Reference Include="HeuristicLab.SequentialEngine-3.3"> -
branches/MathNetNumerics-Exploration-2789/Test/UnitTest1.cs
r15443 r15450 110 110 111 111 Console.WriteLine("Best tolerance {0}, RMSE (LOO): {1}", optTol, looRMSE); 112 } 112 113 114 [TestMethod] 115 public void TestGAM() { 116 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(); 117 var problemData = provider.LoadData(provider.GetDataDescriptors().First(dd => dd.Name.Contains("Poly"))); 118 // var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); 119 // var problemData = provider.LoadData(provider.GetDataDescriptors().First(dd => dd.Name.Contains("Chem"))); 120 121 var gam = new GAM(); 122 gam.MaxIterations = 10; 123 gam.MaxInteractions = 3; 124 gam.Problem.ProblemData = problemData; 125 gam.Start(); 126 127 var solution = (IRegressionSolution)gam.Results["Ensemble solution"].Value; 128 129 Console.WriteLine("RMSE (train) {0}", solution.TrainingRootMeanSquaredError); 130 Console.WriteLine("RMSE (test) {0}", solution.TestRootMeanSquaredError); 113 131 } 114 132 115 133 } 116 134 }
Note: See TracChangeset
for help on using the changeset viewer.