Changeset 14225
 Timestamp:
 08/02/16 16:03:36 (3 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

branches/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/ElasticNetLinearRegression.cs
r13961 r14225 2 2 using System.Linq; 3 3 using System.Runtime.InteropServices; 4 using HeuristicLab.Algorithms.DataAnalysis;5 4 using HeuristicLab.Analysis; 6 5 using HeuristicLab.Common; … … 15 14 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression; 16 15 17 namespace HeuristicLab. LibGlmNet {16 namespace HeuristicLab.Algorithms.DataAnalysis.Glmnet { 18 17 [Item("Elasticnet Linear Regression (LR)", "Linear regression with elasticnet regularization (wrapper for glmnet)")] 19 18 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 110)] … … 69 68 70 69 private void CreateSolution(double logLambda) { 71 double train Rsq;72 double test Rsq;73 var coeff = CreateElasticNetLinearRegressionSolution(Problem.ProblemData, Penality, Math.Pow(10, logLambda), out train Rsq, out testRsq);74 Results.Add(new Result(" R² (train)", new DoubleValue(trainRsq)));75 Results.Add(new Result(" R² (test)", new DoubleValue(testRsq)));70 double trainNMSE; 71 double testNMSE; 72 var coeff = CreateElasticNetLinearRegressionSolution(Problem.ProblemData, Penality, Math.Pow(10, logLambda), out trainNMSE, out testNMSE); 73 Results.Add(new Result("NMSE (train)", new DoubleValue(trainNMSE))); 74 Results.Add(new Result("NMSE (test)", new DoubleValue(testNMSE))); 76 75 77 76 // copied from LR => TODO: reuse code (but skip coefficients = 0.0) … … 100 99 101 100 SymbolicRegressionSolution solution = new SymbolicRegressionSolution( 102 new SymbolicRegressionModel(Problem.ProblemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), 101 new SymbolicRegressionModel(Problem.ProblemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), 103 102 (IRegressionProblemData)Problem.ProblemData.Clone()); 104 103 solution.Model.Name = "Elasticnet Linear Regression Model"; … … 110 109 private void CreateSolutionPath() { 111 110 double[] lambda; 112 double[] train Rsq;113 double[] test Rsq;111 double[] trainNMSE; 112 double[] testNMSE; 114 113 double[,] coeff; 115 114 double[] intercept; 116 RunElasticNetLinearRegression(Problem.ProblemData, Penality, out lambda, out train Rsq, out testRsq, out coeff, out intercept);115 RunElasticNetLinearRegression(Problem.ProblemData, Penality, out lambda, out trainNMSE, out testNMSE, out coeff, out intercept); 117 116 118 117 var coeffTable = new DataTable("Coefficient Paths", "The paths of standarized coefficient values over different lambda values"); … … 131 130 Results.Add(new Result(coeffTable.Name, coeffTable.Description, coeffTable)); 132 131 133 var rsqPlot = new ScatterPlot("RSquared", "Path of R²values over different lambda values");134 rsqPlot.VisualProperties.YAxisMaximumAuto = false;135 rsqPlot.VisualProperties.YAxisMinimumAuto = false;136 rsqPlot.VisualProperties.XAxisMaximumAuto = false;137 rsqPlot.VisualProperties.XAxisMinimumAuto = false;138 139 rsqPlot.VisualProperties.YAxisMinimumFixedValue = 0;140 rsqPlot.VisualProperties.YAxisMaximumFixedValue = 1.0;141 rsqPlot.VisualProperties.XAxisTitle = "Log10(Lambda)";142 rsqPlot.VisualProperties.YAxisTitle = "R²";143 rsqPlot.Rows.Add(new ScatterPlotDataRow("R² (train)", "Path of R² values over different lambda values", lambda.Zip(trainRsq, (l, r) => new Point2D<double>(Math.Log10(l), r))));144 rsqPlot.Rows.Add(new ScatterPlotDataRow("R² (test)", "Path of R² values over different lambda values", lambda.Zip(testRsq, (l, r) => new Point2D<double>(Math.Log10(l), r))));132 var nmsePlot = new ScatterPlot("NMSE", "Path of NMSE values over different lambda values"); 133 nmsePlot.VisualProperties.YAxisMaximumAuto = false; 134 nmsePlot.VisualProperties.YAxisMinimumAuto = false; 135 nmsePlot.VisualProperties.XAxisMaximumAuto = false; 136 nmsePlot.VisualProperties.XAxisMinimumAuto = false; 137 138 nmsePlot.VisualProperties.YAxisMinimumFixedValue = 0; 139 nmsePlot.VisualProperties.YAxisMaximumFixedValue = 1.0; 140 nmsePlot.VisualProperties.XAxisTitle = "Log10(Lambda)"; 141 nmsePlot.VisualProperties.YAxisTitle = "Normalized mean of squared errors (NMSE)"; 142 nmsePlot.Rows.Add(new ScatterPlotDataRow("NMSE (train)", "Path of NMSE values over different lambda values", lambda.Zip(trainNMSE, (l, v) => new Point2D<double>(Math.Log10(l), v)))); 143 nmsePlot.Rows.Add(new ScatterPlotDataRow("NMSE (test)", "Path of NMSE values over different lambda values", lambda.Zip(testNMSE, (l, v) => new Point2D<double>(Math.Log10(l), v)))); 145 144 if (lambda.Length > 2) { 146 rsqPlot.VisualProperties.XAxisMinimumFixedValue = Math.Floor(Math.Log10(lambda.Last()));147 rsqPlot.VisualProperties.XAxisMaximumFixedValue = Math.Ceiling(Math.Log10(lambda.Skip(1).First()));148 } 149 rsqPlot.Rows["R²(train)"].VisualProperties.PointSize = 5;150 rsqPlot.Rows["R²(test)"].VisualProperties.PointSize = 5;151 152 Results.Add(new Result( rsqPlot.Name, rsqPlot.Description, rsqPlot));145 nmsePlot.VisualProperties.XAxisMinimumFixedValue = Math.Floor(Math.Log10(lambda.Last())); 146 nmsePlot.VisualProperties.XAxisMaximumFixedValue = Math.Ceiling(Math.Log10(lambda.Skip(1).First())); 147 } 148 nmsePlot.Rows["NMSE (train)"].VisualProperties.PointSize = 5; 149 nmsePlot.Rows["NMSE (test)"].VisualProperties.PointSize = 5; 150 151 Results.Add(new Result(nmsePlot.Name, nmsePlot.Description, nmsePlot)); 153 152 } 154 153 155 154 public static double[] CreateElasticNetLinearRegressionSolution(IRegressionProblemData problemData, double penalty, double lambda, 156 out double train Rsq, out double testRsq,155 out double trainNMSE, out double testNMSE, 157 156 double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity) { 158 double[] train Rsqs;159 double[] test Rsqs;157 double[] trainNMSEs; 158 double[] testNMSEs; 160 159 // run for exactly one lambda 161 var coeffs = CreateElasticNetLinearRegressionSolution(problemData, penalty, new double[] { lambda }, out train Rsqs, out testRsqs, coeffLowerBound, coeffUpperBound);162 train Rsq = trainRsqs[0];163 test Rsq = testRsqs[0];160 var coeffs = CreateElasticNetLinearRegressionSolution(problemData, penalty, new double[] { lambda }, out trainNMSEs, out testNMSEs, coeffLowerBound, coeffUpperBound); 161 trainNMSE = trainNMSEs[0]; 162 testNMSE = testNMSEs[0]; 164 163 return coeffs[0]; 165 164 } … … 187 186 188 187 public static void RunElasticNetLinearRegression(IRegressionProblemData problemData, double penalty, 189 out double[] lambda, out double[] train Rsq, out double[] testRsq, out double[,] coeff, out double[] intercept,188 out double[] lambda, out double[] trainNMSE, out double[] testNMSE, out double[,] coeff, out double[] intercept, 190 189 double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity, 191 190 int maxVars = 1 … … 193 192 double[] userLambda = new double[0]; 194 193 // automatically determine lambda values (maximum 100 different lambda values) 195 RunElasticNetLinearRegression(problemData, penalty, 100, 0.0, userLambda, out lambda, out train Rsq, out testRsq, out coeff, out intercept, coeffLowerBound, coeffUpperBound, maxVars);194 RunElasticNetLinearRegression(problemData, penalty, 100, 0.0, userLambda, out lambda, out trainNMSE, out testNMSE, out coeff, out intercept, coeffLowerBound, coeffUpperBound, maxVars); 196 195 } 197 196 … … 205 204 /// <param name="ulam">User supplied lambda values</param> 206 205 /// <param name="lambda">Output lambda values</param> 207 /// <param name="train Rsq">Vector of R²values on the training set for each set of coefficients along the path</param>208 /// <param name="test Rsq">Vector of R²values on the test set for each set of coefficients along the path</param>206 /// <param name="trainNMSE">Vector of normalized mean of squared error (NMSE = Variance(res) / Variance(y)) values on the training set for each set of coefficients along the path</param> 207 /// <param name="testNMSE">Vector of normalized mean of squared error (NMSE = Variance(res) / Variance(y)) values on the test set for each set of coefficients along the path</param> 209 208 /// <param name="coeff">Vector of coefficient vectors for each solution along the path</param> 210 209 /// <param name="intercept">Vector of intercepts for each solution along the path</param> … … 213 212 /// <param name="maxVars">Maximum allowed number of variables in each solution along the path (1 => all variables are allowed)</param> 214 213 private static void RunElasticNetLinearRegression(IRegressionProblemData problemData, double penalty, 215 int nlam, double flmin, double[] ulam, out double[] lambda, out double[] train Rsq, out double[] testRsq, out double[,] coeff, out double[] intercept,214 int nlam, double flmin, double[] ulam, out double[] lambda, out double[] trainNMSE, out double[] testNMSE, out double[,] coeff, out double[] intercept, 216 215 double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity, 217 216 int maxVars = 1 … … 252 251 int nlp = 99; 253 252 int jerr = 99; 254 255 elnet(ka, parm, numTrainObs, numVars, trainX, trainY, w, jd, vp, cl, ne, nx, nlam, flmin, ulam, thr, isd, intr, maxit, out lmu, out intercept, out ca, out ia, out nin, out trainRsq, out lambda, out nlp, out jerr); 256 257 testRsq = new double[lmu]; 253 double[] trainR2; 254 elnet(ka, parm, numTrainObs, numVars, trainX, trainY, w, jd, vp, cl, ne, nx, nlam, flmin, ulam, thr, isd, intr, maxit, out lmu, out intercept, out ca, out ia, out nin, out trainR2, out lambda, out nlp, out jerr); 255 256 trainNMSE = new double[lmu]; // elnet returns R**2 as 1  NMSE 257 testNMSE = new double[lmu]; 258 258 coeff = new double[lmu, numVars]; 259 259 for (int solIdx = 0; solIdx < lmu; solIdx++) { 260 trainNMSE[solIdx] = 1.0  trainR2[solIdx]; 261 260 262 // uncompress coefficients of solution 261 263 int selectedNin = nin[solIdx]; … … 266 268 } 267 269 268 // apply to test set to calculate test R²values for each lambda step270 // apply to test set to calculate test NMSE values for each lambda step 269 271 double[] fn; 270 272 modval(intercept[solIdx], selectedCa, ia, selectedNin, numTestObs, testX, out fn); 271 273 OnlineCalculatorError error; 272 var r = OnlinePearsonsRCalculator.Calculate(testY, fn, out error);273 if (error != OnlineCalculatorError.None) r = 0;274 test Rsq[solIdx] = r * r;274 var nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(testY, fn, out error); 275 if (error != OnlineCalculatorError.None) nmse = double.MaxValue; 276 testNMSE[solIdx] = nmse; 275 277 276 278 // uncompress coefficients
Note: See TracChangeset
for help on using the changeset viewer.