Changeset 14225

Ignore:
Timestamp:
08/02/16 16:03:36 (4 years ago)
Message:

#745: used NMSE instead of squared Pearson's correlation coeff as results.

File:
1 edited

Unmodified
Removed
• branches/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/ElasticNetLinearRegression.cs

 r13961 using System.Linq; using System.Runtime.InteropServices; using HeuristicLab.Algorithms.DataAnalysis; using HeuristicLab.Analysis; using HeuristicLab.Common; using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression; namespace HeuristicLab.LibGlmNet { namespace HeuristicLab.Algorithms.DataAnalysis.Glmnet { [Item("Elastic-net Linear Regression (LR)", "Linear regression with elastic-net regularization (wrapper for glmnet)")] [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 110)] private void CreateSolution(double logLambda) { double trainRsq; double testRsq; var coeff = CreateElasticNetLinearRegressionSolution(Problem.ProblemData, Penality, Math.Pow(10, logLambda), out trainRsq, out testRsq); Results.Add(new Result("R² (train)", new DoubleValue(trainRsq))); Results.Add(new Result("R² (test)", new DoubleValue(testRsq))); double trainNMSE; double testNMSE; var coeff = CreateElasticNetLinearRegressionSolution(Problem.ProblemData, Penality, Math.Pow(10, logLambda), out trainNMSE, out testNMSE); Results.Add(new Result("NMSE (train)", new DoubleValue(trainNMSE))); Results.Add(new Result("NMSE (test)", new DoubleValue(testNMSE))); // copied from LR => TODO: reuse code (but skip coefficients = 0.0) SymbolicRegressionSolution solution = new SymbolicRegressionSolution( new SymbolicRegressionModel(Problem.ProblemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), new SymbolicRegressionModel(Problem.ProblemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)Problem.ProblemData.Clone()); solution.Model.Name = "Elastic-net Linear Regression Model"; private void CreateSolutionPath() { double[] lambda; double[] trainRsq; double[] testRsq; double[] trainNMSE; double[] testNMSE; double[,] coeff; double[] intercept; RunElasticNetLinearRegression(Problem.ProblemData, Penality, out lambda, out trainRsq, out testRsq, out coeff, out intercept); RunElasticNetLinearRegression(Problem.ProblemData, Penality, out lambda, out trainNMSE, out testNMSE, out coeff, out intercept); var coeffTable = new DataTable("Coefficient Paths", "The paths of standarized coefficient values over different lambda values"); Results.Add(new Result(coeffTable.Name, coeffTable.Description, coeffTable)); var rsqPlot = new ScatterPlot("R-Squared", "Path of R² values over different lambda values"); rsqPlot.VisualProperties.YAxisMaximumAuto = false; rsqPlot.VisualProperties.YAxisMinimumAuto = false; rsqPlot.VisualProperties.XAxisMaximumAuto = false; rsqPlot.VisualProperties.XAxisMinimumAuto = false; rsqPlot.VisualProperties.YAxisMinimumFixedValue = 0; rsqPlot.VisualProperties.YAxisMaximumFixedValue = 1.0; rsqPlot.VisualProperties.XAxisTitle = "Log10(Lambda)"; rsqPlot.VisualProperties.YAxisTitle = "R²"; rsqPlot.Rows.Add(new ScatterPlotDataRow("R² (train)", "Path of R² values over different lambda values", lambda.Zip(trainRsq, (l, r) => new Point2D(Math.Log10(l), r)))); rsqPlot.Rows.Add(new ScatterPlotDataRow("R² (test)", "Path of R² values over different lambda values", lambda.Zip(testRsq, (l, r) => new Point2D(Math.Log10(l), r)))); var nmsePlot = new ScatterPlot("NMSE", "Path of NMSE values over different lambda values"); nmsePlot.VisualProperties.YAxisMaximumAuto = false; nmsePlot.VisualProperties.YAxisMinimumAuto = false; nmsePlot.VisualProperties.XAxisMaximumAuto = false; nmsePlot.VisualProperties.XAxisMinimumAuto = false; nmsePlot.VisualProperties.YAxisMinimumFixedValue = 0; nmsePlot.VisualProperties.YAxisMaximumFixedValue = 1.0; nmsePlot.VisualProperties.XAxisTitle = "Log10(Lambda)"; nmsePlot.VisualProperties.YAxisTitle = "Normalized mean of squared errors (NMSE)"; nmsePlot.Rows.Add(new ScatterPlotDataRow("NMSE (train)", "Path of NMSE values over different lambda values", lambda.Zip(trainNMSE, (l, v) => new Point2D(Math.Log10(l), v)))); nmsePlot.Rows.Add(new ScatterPlotDataRow("NMSE (test)", "Path of NMSE values over different lambda values", lambda.Zip(testNMSE, (l, v) => new Point2D(Math.Log10(l), v)))); if (lambda.Length > 2) { rsqPlot.VisualProperties.XAxisMinimumFixedValue = Math.Floor(Math.Log10(lambda.Last())); rsqPlot.VisualProperties.XAxisMaximumFixedValue = Math.Ceiling(Math.Log10(lambda.Skip(1).First())); } rsqPlot.Rows["R² (train)"].VisualProperties.PointSize = 5; rsqPlot.Rows["R² (test)"].VisualProperties.PointSize = 5; Results.Add(new Result(rsqPlot.Name, rsqPlot.Description, rsqPlot)); nmsePlot.VisualProperties.XAxisMinimumFixedValue = Math.Floor(Math.Log10(lambda.Last())); nmsePlot.VisualProperties.XAxisMaximumFixedValue = Math.Ceiling(Math.Log10(lambda.Skip(1).First())); } nmsePlot.Rows["NMSE (train)"].VisualProperties.PointSize = 5; nmsePlot.Rows["NMSE (test)"].VisualProperties.PointSize = 5; Results.Add(new Result(nmsePlot.Name, nmsePlot.Description, nmsePlot)); } public static double[] CreateElasticNetLinearRegressionSolution(IRegressionProblemData problemData, double penalty, double lambda, out double trainRsq, out double testRsq, out double trainNMSE, out double testNMSE, double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity) { double[] trainRsqs; double[] testRsqs; double[] trainNMSEs; double[] testNMSEs; // run for exactly one lambda var coeffs = CreateElasticNetLinearRegressionSolution(problemData, penalty, new double[] { lambda }, out trainRsqs, out testRsqs, coeffLowerBound, coeffUpperBound); trainRsq = trainRsqs[0]; testRsq = testRsqs[0]; var coeffs = CreateElasticNetLinearRegressionSolution(problemData, penalty, new double[] { lambda }, out trainNMSEs, out testNMSEs, coeffLowerBound, coeffUpperBound); trainNMSE = trainNMSEs[0]; testNMSE = testNMSEs[0]; return coeffs[0]; } public static void RunElasticNetLinearRegression(IRegressionProblemData problemData, double penalty, out double[] lambda, out double[] trainRsq, out double[] testRsq, out double[,] coeff, out double[] intercept, out double[] lambda, out double[] trainNMSE, out double[] testNMSE, out double[,] coeff, out double[] intercept, double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity, int maxVars = -1 double[] userLambda = new double[0]; // automatically determine lambda values (maximum 100 different lambda values) RunElasticNetLinearRegression(problemData, penalty, 100, 0.0, userLambda, out lambda, out trainRsq, out testRsq, out coeff, out intercept, coeffLowerBound, coeffUpperBound, maxVars); RunElasticNetLinearRegression(problemData, penalty, 100, 0.0, userLambda, out lambda, out trainNMSE, out testNMSE, out coeff, out intercept, coeffLowerBound, coeffUpperBound, maxVars); } /// User supplied lambda values /// Output lambda values /// Vector of R² values on the training set for each set of coefficients along the path /// Vector of R² values on the test set for each set of coefficients along the path /// Vector of normalized mean of squared error (NMSE = Variance(res) / Variance(y)) values on the training set for each set of coefficients along the path /// Vector of normalized mean of squared error (NMSE = Variance(res) / Variance(y)) values on the test set for each set of coefficients along the path /// Vector of coefficient vectors for each solution along the path /// Vector of intercepts for each solution along the path /// Maximum allowed number of variables in each solution along the path (-1 => all variables are allowed) private static void RunElasticNetLinearRegression(IRegressionProblemData problemData, double penalty, int nlam, double flmin, double[] ulam, out double[] lambda, out double[] trainRsq, out double[] testRsq, out double[,] coeff, out double[] intercept, int nlam, double flmin, double[] ulam, out double[] lambda, out double[] trainNMSE, out double[] testNMSE, out double[,] coeff, out double[] intercept, double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity, int maxVars = -1 int nlp = -99; int jerr = -99; elnet(ka, parm, numTrainObs, numVars, trainX, trainY, w, jd, vp, cl, ne, nx, nlam, flmin, ulam, thr, isd, intr, maxit, out lmu, out intercept, out ca, out ia, out nin, out trainRsq, out lambda, out nlp, out jerr); testRsq = new double[lmu]; double[] trainR2; elnet(ka, parm, numTrainObs, numVars, trainX, trainY, w, jd, vp, cl, ne, nx, nlam, flmin, ulam, thr, isd, intr, maxit, out lmu, out intercept, out ca, out ia, out nin, out trainR2, out lambda, out nlp, out jerr); trainNMSE = new double[lmu]; // elnet returns R**2 as 1 - NMSE testNMSE = new double[lmu]; coeff = new double[lmu, numVars]; for (int solIdx = 0; solIdx < lmu; solIdx++) { trainNMSE[solIdx] = 1.0 - trainR2[solIdx]; // uncompress coefficients of solution int selectedNin = nin[solIdx]; } // apply to test set to calculate test R² values for each lambda step // apply to test set to calculate test NMSE values for each lambda step double[] fn; modval(intercept[solIdx], selectedCa, ia, selectedNin, numTestObs, testX, out fn); OnlineCalculatorError error; var r = OnlinePearsonsRCalculator.Calculate(testY, fn, out error); if (error != OnlineCalculatorError.None) r = 0; testRsq[solIdx] = r * r; var nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(testY, fn, out error); if (error != OnlineCalculatorError.None) nmse = double.MaxValue; testNMSE[solIdx] = nmse; // uncompress coefficients
Note: See TracChangeset for help on using the changeset viewer.