Free cookie consent management tool by TermsFeed Policy Generator

Changeset 14225


Ignore:
Timestamp:
08/02/16 16:03:36 (8 years ago)
Author:
gkronber
Message:

#745: used NMSE instead of squared Pearson's correlation coeff as results.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/ElasticNetLinearRegression.cs

    r13961 r14225  
    22using System.Linq;
    33using System.Runtime.InteropServices;
    4 using HeuristicLab.Algorithms.DataAnalysis;
    54using HeuristicLab.Analysis;
    65using HeuristicLab.Common;
     
    1514using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
    1615
    17 namespace HeuristicLab.LibGlmNet {
     16namespace HeuristicLab.Algorithms.DataAnalysis.Glmnet {
    1817  [Item("Elastic-net Linear Regression (LR)", "Linear regression with elastic-net regularization (wrapper for glmnet)")]
    1918  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 110)]
     
    6968
    7069    private void CreateSolution(double logLambda) {
    71       double trainRsq;
    72       double testRsq;
    73       var coeff = CreateElasticNetLinearRegressionSolution(Problem.ProblemData, Penality, Math.Pow(10, logLambda), out trainRsq, out testRsq);
    74       Results.Add(new Result("R² (train)", new DoubleValue(trainRsq)));
    75       Results.Add(new Result("R² (test)", new DoubleValue(testRsq)));
     70      double trainNMSE;
     71      double testNMSE;
     72      var coeff = CreateElasticNetLinearRegressionSolution(Problem.ProblemData, Penality, Math.Pow(10, logLambda), out trainNMSE, out testNMSE);
     73      Results.Add(new Result("NMSE (train)", new DoubleValue(trainNMSE)));
     74      Results.Add(new Result("NMSE (test)", new DoubleValue(testNMSE)));
    7675
    7776      // copied from LR => TODO: reuse code (but skip coefficients = 0.0)
     
    10099
    101100      SymbolicRegressionSolution solution = new SymbolicRegressionSolution(
    102         new SymbolicRegressionModel(Problem.ProblemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), 
     101        new SymbolicRegressionModel(Problem.ProblemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter()),
    103102        (IRegressionProblemData)Problem.ProblemData.Clone());
    104103      solution.Model.Name = "Elastic-net Linear Regression Model";
     
    110109    private void CreateSolutionPath() {
    111110      double[] lambda;
    112       double[] trainRsq;
    113       double[] testRsq;
     111      double[] trainNMSE;
     112      double[] testNMSE;
    114113      double[,] coeff;
    115114      double[] intercept;
    116       RunElasticNetLinearRegression(Problem.ProblemData, Penality, out lambda, out trainRsq, out testRsq, out coeff, out intercept);
     115      RunElasticNetLinearRegression(Problem.ProblemData, Penality, out lambda, out trainNMSE, out testNMSE, out coeff, out intercept);
    117116
    118117      var coeffTable = new DataTable("Coefficient Paths", "The paths of standarized coefficient values over different lambda values");
     
    131130      Results.Add(new Result(coeffTable.Name, coeffTable.Description, coeffTable));
    132131
    133       var rsqPlot = new ScatterPlot("R-Squared", "Path of R² values over different lambda values");
    134       rsqPlot.VisualProperties.YAxisMaximumAuto = false;
    135       rsqPlot.VisualProperties.YAxisMinimumAuto = false;
    136       rsqPlot.VisualProperties.XAxisMaximumAuto = false;
    137       rsqPlot.VisualProperties.XAxisMinimumAuto = false;
    138 
    139       rsqPlot.VisualProperties.YAxisMinimumFixedValue = 0;
    140       rsqPlot.VisualProperties.YAxisMaximumFixedValue = 1.0;
    141       rsqPlot.VisualProperties.XAxisTitle = "Log10(Lambda)";
    142       rsqPlot.VisualProperties.YAxisTitle = "R²";
    143       rsqPlot.Rows.Add(new ScatterPlotDataRow("R² (train)", "Path of R² values over different lambda values", lambda.Zip(trainRsq, (l, r) => new Point2D<double>(Math.Log10(l), r))));
    144       rsqPlot.Rows.Add(new ScatterPlotDataRow("R² (test)", "Path of R² values over different lambda values", lambda.Zip(testRsq, (l, r) => new Point2D<double>(Math.Log10(l), r))));
     132      var nmsePlot = new ScatterPlot("NMSE", "Path of NMSE values over different lambda values");
     133      nmsePlot.VisualProperties.YAxisMaximumAuto = false;
     134      nmsePlot.VisualProperties.YAxisMinimumAuto = false;
     135      nmsePlot.VisualProperties.XAxisMaximumAuto = false;
     136      nmsePlot.VisualProperties.XAxisMinimumAuto = false;
     137
     138      nmsePlot.VisualProperties.YAxisMinimumFixedValue = 0;
     139      nmsePlot.VisualProperties.YAxisMaximumFixedValue = 1.0;
     140      nmsePlot.VisualProperties.XAxisTitle = "Log10(Lambda)";
     141      nmsePlot.VisualProperties.YAxisTitle = "Normalized mean of squared errors (NMSE)";
     142      nmsePlot.Rows.Add(new ScatterPlotDataRow("NMSE (train)", "Path of NMSE values over different lambda values", lambda.Zip(trainNMSE, (l, v) => new Point2D<double>(Math.Log10(l), v))));
     143      nmsePlot.Rows.Add(new ScatterPlotDataRow("NMSE (test)", "Path of NMSE values over different lambda values", lambda.Zip(testNMSE, (l, v) => new Point2D<double>(Math.Log10(l), v))));
    145144      if (lambda.Length > 2) {
    146         rsqPlot.VisualProperties.XAxisMinimumFixedValue = Math.Floor(Math.Log10(lambda.Last()));
    147         rsqPlot.VisualProperties.XAxisMaximumFixedValue = Math.Ceiling(Math.Log10(lambda.Skip(1).First()));
    148       }
    149       rsqPlot.Rows["R² (train)"].VisualProperties.PointSize = 5;
    150       rsqPlot.Rows["R² (test)"].VisualProperties.PointSize = 5;
    151 
    152       Results.Add(new Result(rsqPlot.Name, rsqPlot.Description, rsqPlot));
     145        nmsePlot.VisualProperties.XAxisMinimumFixedValue = Math.Floor(Math.Log10(lambda.Last()));
     146        nmsePlot.VisualProperties.XAxisMaximumFixedValue = Math.Ceiling(Math.Log10(lambda.Skip(1).First()));
     147      }
     148      nmsePlot.Rows["NMSE (train)"].VisualProperties.PointSize = 5;
     149      nmsePlot.Rows["NMSE (test)"].VisualProperties.PointSize = 5;
     150
     151      Results.Add(new Result(nmsePlot.Name, nmsePlot.Description, nmsePlot));
    153152    }
    154153
    155154    public static double[] CreateElasticNetLinearRegressionSolution(IRegressionProblemData problemData, double penalty, double lambda,
    156             out double trainRsq, out double testRsq,
     155            out double trainNMSE, out double testNMSE,
    157156            double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity) {
    158       double[] trainRsqs;
    159       double[] testRsqs;
     157      double[] trainNMSEs;
     158      double[] testNMSEs;
    160159      // run for exactly one lambda
    161       var coeffs = CreateElasticNetLinearRegressionSolution(problemData, penalty, new double[] { lambda }, out trainRsqs, out testRsqs, coeffLowerBound, coeffUpperBound);
    162       trainRsq = trainRsqs[0];
    163       testRsq = testRsqs[0];
     160      var coeffs = CreateElasticNetLinearRegressionSolution(problemData, penalty, new double[] { lambda }, out trainNMSEs, out testNMSEs, coeffLowerBound, coeffUpperBound);
     161      trainNMSE = trainNMSEs[0];
     162      testNMSE = testNMSEs[0];
    164163      return coeffs[0];
    165164    }
     
    187186
    188187    public static void RunElasticNetLinearRegression(IRegressionProblemData problemData, double penalty,
    189       out double[] lambda, out double[] trainRsq, out double[] testRsq, out double[,] coeff, out double[] intercept,
     188      out double[] lambda, out double[] trainNMSE, out double[] testNMSE, out double[,] coeff, out double[] intercept,
    190189      double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity,
    191190      int maxVars = -1
     
    193192      double[] userLambda = new double[0];
    194193      // automatically determine lambda values (maximum 100 different lambda values)
    195       RunElasticNetLinearRegression(problemData, penalty, 100, 0.0, userLambda, out lambda, out trainRsq, out testRsq, out coeff, out intercept, coeffLowerBound, coeffUpperBound, maxVars);
     194      RunElasticNetLinearRegression(problemData, penalty, 100, 0.0, userLambda, out lambda, out trainNMSE, out testNMSE, out coeff, out intercept, coeffLowerBound, coeffUpperBound, maxVars);
    196195    }
    197196
     
    205204    /// <param name="ulam">User supplied lambda values</param>
    206205    /// <param name="lambda">Output lambda values</param>
    207     /// <param name="trainRsq">Vector of R² values on the training set for each set of coefficients along the path</param>
    208     /// <param name="testRsq">Vector of R² values on the test set for each set of coefficients along the path</param>
     206    /// <param name="trainNMSE">Vector of normalized mean of squared error (NMSE = Variance(res) / Variance(y)) values on the training set for each set of coefficients along the path</param>
     207    /// <param name="testNMSE">Vector of normalized mean of squared error (NMSE = Variance(res) / Variance(y)) values on the test set for each set of coefficients along the path</param>
    209208    /// <param name="coeff">Vector of coefficient vectors for each solution along the path</param>
    210209    /// <param name="intercept">Vector of intercepts for each solution along the path</param>
     
    213212    /// <param name="maxVars">Maximum allowed number of variables in each solution along the path (-1 => all variables are allowed)</param>
    214213    private static void RunElasticNetLinearRegression(IRegressionProblemData problemData, double penalty,
    215   int nlam, double flmin, double[] ulam, out double[] lambda, out double[] trainRsq, out double[] testRsq, out double[,] coeff, out double[] intercept,
     214  int nlam, double flmin, double[] ulam, out double[] lambda, out double[] trainNMSE, out double[] testNMSE, out double[,] coeff, out double[] intercept,
    216215  double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity,
    217216  int maxVars = -1
     
    252251      int nlp = -99;
    253252      int jerr = -99;
    254 
    255       elnet(ka, parm, numTrainObs, numVars, trainX, trainY, w, jd, vp, cl, ne, nx, nlam, flmin, ulam, thr, isd, intr, maxit, out lmu, out intercept, out ca, out ia, out nin, out trainRsq, out lambda, out nlp, out jerr);
    256 
    257       testRsq = new double[lmu];
     253      double[] trainR2;
     254      elnet(ka, parm, numTrainObs, numVars, trainX, trainY, w, jd, vp, cl, ne, nx, nlam, flmin, ulam, thr, isd, intr, maxit, out lmu, out intercept, out ca, out ia, out nin, out trainR2, out lambda, out nlp, out jerr);
     255
     256      trainNMSE = new double[lmu]; // elnet returns R**2 as 1 - NMSE
     257      testNMSE = new double[lmu];
    258258      coeff = new double[lmu, numVars];
    259259      for (int solIdx = 0; solIdx < lmu; solIdx++) {
     260        trainNMSE[solIdx] = 1.0 - trainR2[solIdx];
     261
    260262        // uncompress coefficients of solution
    261263        int selectedNin = nin[solIdx];
     
    266268        }
    267269
    268         // apply to test set to calculate test values for each lambda step
     270        // apply to test set to calculate test NMSE values for each lambda step
    269271        double[] fn;
    270272        modval(intercept[solIdx], selectedCa, ia, selectedNin, numTestObs, testX, out fn);
    271273        OnlineCalculatorError error;
    272         var r = OnlinePearsonsRCalculator.Calculate(testY, fn, out error);
    273         if (error != OnlineCalculatorError.None) r = 0;
    274         testRsq[solIdx] = r * r;
     274        var nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(testY, fn, out error);
     275        if (error != OnlineCalculatorError.None) nmse = double.MaxValue;
     276        testNMSE[solIdx] = nmse;
    275277
    276278        // uncompress coefficients
Note: See TracChangeset for help on using the changeset viewer.