[14370] | 1 | #region License Information
|
---|
| 2 | /* HeuristicLab
|
---|
| 3 | * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
| 4 | *
|
---|
| 5 | * This file is part of HeuristicLab.
|
---|
| 6 | *
|
---|
| 7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
| 8 | * it under the terms of the GNU General Public License as published by
|
---|
| 9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 10 | * (at your option) any later version.
|
---|
| 11 | *
|
---|
| 12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 15 | * GNU General Public License for more details.
|
---|
| 16 | *
|
---|
| 17 | * You should have received a copy of the GNU General Public License
|
---|
| 18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | */
|
---|
| 20 | #endregion
|
---|
| 21 |
|
---|
| 22 | using System;
|
---|
[13927] | 23 | using System.Linq;
|
---|
[14674] | 24 | using System.Threading;
|
---|
[13927] | 25 | using HeuristicLab.Analysis;
|
---|
| 26 | using HeuristicLab.Common;
|
---|
| 27 | using HeuristicLab.Core;
|
---|
| 28 | using HeuristicLab.Data;
|
---|
| 29 | using HeuristicLab.Optimization;
|
---|
| 30 | using HeuristicLab.Parameters;
|
---|
| 31 | using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
|
---|
| 32 | using HeuristicLab.Problems.DataAnalysis;
|
---|
[13930] | 33 | using HeuristicLab.Problems.DataAnalysis.Symbolic;
|
---|
| 34 | using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
|
---|
[13927] | 35 |
|
---|
[14225] | 36 | namespace HeuristicLab.Algorithms.DataAnalysis.Glmnet {
|
---|
[13927] | 37 | [Item("Elastic-net Linear Regression (LR)", "Linear regression with elastic-net regularization (wrapper for glmnet)")]
|
---|
| 38 | [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 110)]
|
---|
| 39 | [StorableClass]
|
---|
| 40 | public sealed class ElasticNetLinearRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> {
|
---|
| 41 | private const string PenalityParameterName = "Penality";
|
---|
[14395] | 42 | private const string LambdaParameterName = "Lambda";
|
---|
[13927] | 43 | #region parameters
|
---|
| 44 | public IFixedValueParameter<DoubleValue> PenalityParameter {
|
---|
| 45 | get { return (IFixedValueParameter<DoubleValue>)Parameters[PenalityParameterName]; }
|
---|
| 46 | }
|
---|
[14395] | 47 | public IValueParameter<DoubleValue> LambdaParameter {
|
---|
| 48 | get { return (IValueParameter<DoubleValue>)Parameters[LambdaParameterName]; }
|
---|
[13930] | 49 | }
|
---|
[13927] | 50 | #endregion
|
---|
| 51 | #region properties
|
---|
| 52 | public double Penality {
|
---|
| 53 | get { return PenalityParameter.Value.Value; }
|
---|
| 54 | set { PenalityParameter.Value.Value = value; }
|
---|
| 55 | }
|
---|
[14395] | 56 | public DoubleValue Lambda {
|
---|
| 57 | get { return LambdaParameter.Value; }
|
---|
| 58 | set { LambdaParameter.Value = value; }
|
---|
[13930] | 59 | }
|
---|
[13927] | 60 | #endregion
|
---|
| 61 |
|
---|
| 62 | [StorableConstructor]
|
---|
| 63 | private ElasticNetLinearRegression(bool deserializing) : base(deserializing) { }
|
---|
| 64 | private ElasticNetLinearRegression(ElasticNetLinearRegression original, Cloner cloner)
|
---|
| 65 | : base(original, cloner) {
|
---|
| 66 | }
|
---|
[13940] | 67 | public ElasticNetLinearRegression()
|
---|
| 68 | : base() {
|
---|
[13927] | 69 | Problem = new RegressionProblem();
|
---|
[14377] | 70 | Parameters.Add(new FixedValueParameter<DoubleValue>(PenalityParameterName, "Penalty factor (alpha) for balancing between ridge (0.0) and lasso (1.0) regression", new DoubleValue(0.5)));
|
---|
[14395] | 71 | Parameters.Add(new OptionalValueParameter<DoubleValue>(LambdaParameterName, "Optional: the value of lambda for which to calculate an elastic-net solution. lambda == null => calculate the whole path of all lambdas"));
|
---|
[13927] | 72 | }
|
---|
| 73 |
|
---|
| 74 | [StorableHook(HookType.AfterDeserialization)]
|
---|
| 75 | private void AfterDeserialization() { }
|
---|
| 76 |
|
---|
| 77 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
| 78 | return new ElasticNetLinearRegression(this, cloner);
|
---|
| 79 | }
|
---|
| 80 |
|
---|
[14674] | 81 | protected override void Run(CancellationToken cancellationToken) {
|
---|
[14395] | 82 | if (Lambda == null) {
|
---|
[13930] | 83 | CreateSolutionPath();
|
---|
| 84 | } else {
|
---|
[14395] | 85 | CreateSolution(Lambda.Value);
|
---|
[13930] | 86 | }
|
---|
| 87 | }
|
---|
| 88 |
|
---|
[14395] | 89 | private void CreateSolution(double lambda) {
|
---|
[14225] | 90 | double trainNMSE;
|
---|
| 91 | double testNMSE;
|
---|
[14395] | 92 | var coeff = CreateElasticNetLinearRegressionSolution(Problem.ProblemData, Penality, lambda, out trainNMSE, out testNMSE);
|
---|
[14225] | 93 | Results.Add(new Result("NMSE (train)", new DoubleValue(trainNMSE)));
|
---|
| 94 | Results.Add(new Result("NMSE (test)", new DoubleValue(testNMSE)));
|
---|
[13930] | 95 |
|
---|
[14395] | 96 | var allVariables = Problem.ProblemData.AllowedInputVariables.ToArray();
|
---|
[13930] | 97 |
|
---|
[14395] | 98 | var remainingVars = Enumerable.Range(0, allVariables.Length)
|
---|
| 99 | .Where(idx => !coeff[idx].IsAlmost(0.0)).Select(idx => allVariables[idx])
|
---|
| 100 | .ToArray();
|
---|
| 101 | var remainingCoeff = Enumerable.Range(0, allVariables.Length)
|
---|
| 102 | .Select(idx => coeff[idx])
|
---|
| 103 | .Where(c => !c.IsAlmost(0.0))
|
---|
| 104 | .ToArray();
|
---|
[13930] | 105 |
|
---|
[14395] | 106 | var tree = LinearModelToTreeConverter.CreateTree(remainingVars, remainingCoeff, coeff.Last());
|
---|
[13930] | 107 |
|
---|
[14395] | 108 |
|
---|
[13961] | 109 | SymbolicRegressionSolution solution = new SymbolicRegressionSolution(
|
---|
[14225] | 110 | new SymbolicRegressionModel(Problem.ProblemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter()),
|
---|
[13961] | 111 | (IRegressionProblemData)Problem.ProblemData.Clone());
|
---|
[13930] | 112 | solution.Model.Name = "Elastic-net Linear Regression Model";
|
---|
| 113 | solution.Name = "Elastic-net Linear Regression Solution";
|
---|
| 114 |
|
---|
| 115 | Results.Add(new Result(solution.Name, solution.Description, solution));
|
---|
| 116 | }
|
---|
| 117 |
|
---|
| 118 | private void CreateSolutionPath() {
|
---|
[13927] | 119 | double[] lambda;
|
---|
[14225] | 120 | double[] trainNMSE;
|
---|
| 121 | double[] testNMSE;
|
---|
[13927] | 122 | double[,] coeff;
|
---|
| 123 | double[] intercept;
|
---|
[14225] | 124 | RunElasticNetLinearRegression(Problem.ProblemData, Penality, out lambda, out trainNMSE, out testNMSE, out coeff, out intercept);
|
---|
[13927] | 125 |
|
---|
[14375] | 126 | var coeffTable = new IndexedDataTable<double>("Coefficients", "The paths of standarized coefficient values over different lambda values");
|
---|
[14374] | 127 | coeffTable.VisualProperties.YAxisMaximumAuto = false;
|
---|
| 128 | coeffTable.VisualProperties.YAxisMinimumAuto = false;
|
---|
| 129 | coeffTable.VisualProperties.XAxisMaximumAuto = false;
|
---|
| 130 | coeffTable.VisualProperties.XAxisMinimumAuto = false;
|
---|
| 131 |
|
---|
| 132 | coeffTable.VisualProperties.XAxisLogScale = true;
|
---|
[14395] | 133 | coeffTable.VisualProperties.XAxisTitle = "Lambda";
|
---|
[14375] | 134 | coeffTable.VisualProperties.YAxisTitle = "Coefficients";
|
---|
[14374] | 135 | coeffTable.VisualProperties.SecondYAxisTitle = "Number of variables";
|
---|
| 136 |
|
---|
[13927] | 137 | var nLambdas = lambda.Length;
|
---|
| 138 | var nCoeff = coeff.GetLength(1);
|
---|
[14374] | 139 | var dataRows = new IndexedDataRow<double>[nCoeff];
|
---|
[13928] | 140 | var allowedVars = Problem.ProblemData.AllowedInputVariables.ToArray();
|
---|
[14373] | 141 | var numNonZeroCoeffs = new int[nLambdas];
|
---|
[13927] | 142 | for (int i = 0; i < nCoeff; i++) {
|
---|
[13928] | 143 | var coeffId = allowedVars[i];
|
---|
[13940] | 144 | double sigma = Problem.ProblemData.Dataset.GetDoubleValues(coeffId).StandardDeviation();
|
---|
[14374] | 145 | var path = Enumerable.Range(0, nLambdas).Select(r => Tuple.Create(lambda[r], coeff[r, i] * sigma)).ToArray();
|
---|
| 146 | dataRows[i] = new IndexedDataRow<double>(coeffId, coeffId, path);
|
---|
[13927] | 147 | }
|
---|
[14844] | 148 | // add to coeffTable by total weight (larger area under the curve => more important);
|
---|
| 149 | foreach (var r in dataRows.OrderByDescending(r => r.Values.Select(t => t.Item2).Sum(x => Math.Abs(x)))) {
|
---|
| 150 | coeffTable.Rows.Add(r);
|
---|
| 151 | }
|
---|
| 152 |
|
---|
[14373] | 153 | for (int i = 0; i < coeff.GetLength(0); i++) {
|
---|
| 154 | for (int j = 0; j < coeff.GetLength(1); j++) {
|
---|
| 155 | if (!coeff[i, j].IsAlmost(0.0)) {
|
---|
| 156 | numNonZeroCoeffs[i]++;
|
---|
| 157 | }
|
---|
| 158 | }
|
---|
| 159 | }
|
---|
[14374] | 160 | if (lambda.Length > 2) {
|
---|
| 161 | coeffTable.VisualProperties.XAxisMinimumFixedValue = Math.Pow(10, Math.Floor(Math.Log10(lambda.Last())));
|
---|
| 162 | coeffTable.VisualProperties.XAxisMaximumFixedValue = Math.Pow(10, Math.Ceiling(Math.Log10(lambda.Skip(1).First())));
|
---|
| 163 | }
|
---|
| 164 | coeffTable.Rows.Add(new IndexedDataRow<double>("Number of variables", "The number of non-zero coefficients for each step in the path", lambda.Zip(numNonZeroCoeffs, (l, v) => Tuple.Create(l, (double)v))));
|
---|
| 165 | coeffTable.Rows["Number of variables"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points;
|
---|
| 166 | coeffTable.Rows["Number of variables"].VisualProperties.SecondYAxis = true;
|
---|
[13927] | 167 |
|
---|
| 168 | Results.Add(new Result(coeffTable.Name, coeffTable.Description, coeffTable));
|
---|
| 169 |
|
---|
[14375] | 170 | var errorTable = new IndexedDataTable<double>("NMSE", "Path of NMSE values over different lambda values");
|
---|
| 171 | errorTable.VisualProperties.YAxisMaximumAuto = false;
|
---|
| 172 | errorTable.VisualProperties.YAxisMinimumAuto = false;
|
---|
| 173 | errorTable.VisualProperties.XAxisMaximumAuto = false;
|
---|
| 174 | errorTable.VisualProperties.XAxisMinimumAuto = false;
|
---|
[13940] | 175 |
|
---|
[14375] | 176 | errorTable.VisualProperties.YAxisMinimumFixedValue = 0;
|
---|
| 177 | errorTable.VisualProperties.YAxisMaximumFixedValue = 1.0;
|
---|
| 178 | errorTable.VisualProperties.XAxisLogScale = true;
|
---|
[14395] | 179 | errorTable.VisualProperties.XAxisTitle = "Lambda";
|
---|
[14375] | 180 | errorTable.VisualProperties.YAxisTitle = "Normalized mean of squared errors (NMSE)";
|
---|
[14395] | 181 | errorTable.VisualProperties.SecondYAxisTitle = "Number of variables";
|
---|
[14375] | 182 | errorTable.Rows.Add(new IndexedDataRow<double>("NMSE (train)", "Path of NMSE values over different lambda values", lambda.Zip(trainNMSE, (l, v) => Tuple.Create(l, v))));
|
---|
| 183 | errorTable.Rows.Add(new IndexedDataRow<double>("NMSE (test)", "Path of NMSE values over different lambda values", lambda.Zip(testNMSE, (l, v) => Tuple.Create(l, v))));
|
---|
| 184 | errorTable.Rows.Add(new IndexedDataRow<double>("Number of variables", "The number of non-zero coefficients for each step in the path", lambda.Zip(numNonZeroCoeffs, (l, v) => Tuple.Create(l, (double)v))));
|
---|
[13940] | 185 | if (lambda.Length > 2) {
|
---|
[14375] | 186 | errorTable.VisualProperties.XAxisMinimumFixedValue = Math.Pow(10, Math.Floor(Math.Log10(lambda.Last())));
|
---|
| 187 | errorTable.VisualProperties.XAxisMaximumFixedValue = Math.Pow(10, Math.Ceiling(Math.Log10(lambda.Skip(1).First())));
|
---|
[13940] | 188 | }
|
---|
[14375] | 189 | errorTable.Rows["NMSE (train)"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points;
|
---|
| 190 | errorTable.Rows["NMSE (test)"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points;
|
---|
| 191 | errorTable.Rows["Number of variables"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points;
|
---|
| 192 | errorTable.Rows["Number of variables"].VisualProperties.SecondYAxis = true;
|
---|
[14395] | 193 |
|
---|
[14375] | 194 | Results.Add(new Result(errorTable.Name, errorTable.Description, errorTable));
|
---|
[13927] | 195 | }
|
---|
| 196 |
|
---|
| 197 | public static double[] CreateElasticNetLinearRegressionSolution(IRegressionProblemData problemData, double penalty, double lambda,
|
---|
[14225] | 198 | out double trainNMSE, out double testNMSE,
|
---|
[13927] | 199 | double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity) {
|
---|
[14225] | 200 | double[] trainNMSEs;
|
---|
| 201 | double[] testNMSEs;
|
---|
[13927] | 202 | // run for exactly one lambda
|
---|
[14225] | 203 | var coeffs = CreateElasticNetLinearRegressionSolution(problemData, penalty, new double[] { lambda }, out trainNMSEs, out testNMSEs, coeffLowerBound, coeffUpperBound);
|
---|
| 204 | trainNMSE = trainNMSEs[0];
|
---|
| 205 | testNMSE = testNMSEs[0];
|
---|
[13927] | 206 | return coeffs[0];
|
---|
| 207 | }
|
---|
| 208 | public static double[][] CreateElasticNetLinearRegressionSolution(IRegressionProblemData problemData, double penalty, double[] lambda,
|
---|
[14370] | 209 | out double[] trainNMSEs, out double[] testNMSEs,
|
---|
[13927] | 210 | double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity,
|
---|
| 211 | int maxVars = -1) {
|
---|
| 212 | // run for multiple user-supplied lambdas
|
---|
| 213 | double[,] coeff;
|
---|
| 214 | double[] intercept;
|
---|
[14370] | 215 | RunElasticNetLinearRegression(problemData, penalty, lambda.Length, 1.0, lambda, out lambda, out trainNMSEs, out testNMSEs, out coeff, out intercept, coeffLowerBound, coeffUpperBound, maxVars);
|
---|
[13927] | 216 |
|
---|
| 217 | int nRows = intercept.Length;
|
---|
| 218 | int nCols = coeff.GetLength(1) + 1;
|
---|
| 219 | double[][] sols = new double[nRows][];
|
---|
| 220 | for (int solIdx = 0; solIdx < nRows; solIdx++) {
|
---|
| 221 | sols[solIdx] = new double[nCols];
|
---|
| 222 | for (int cIdx = 0; cIdx < nCols - 1; cIdx++) {
|
---|
| 223 | sols[solIdx][cIdx] = coeff[solIdx, cIdx];
|
---|
| 224 | }
|
---|
| 225 | sols[solIdx][nCols - 1] = intercept[solIdx];
|
---|
| 226 | }
|
---|
| 227 | return sols;
|
---|
| 228 | }
|
---|
| 229 |
|
---|
| 230 | public static void RunElasticNetLinearRegression(IRegressionProblemData problemData, double penalty,
|
---|
[14225] | 231 | out double[] lambda, out double[] trainNMSE, out double[] testNMSE, out double[,] coeff, out double[] intercept,
|
---|
[13927] | 232 | double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity,
|
---|
| 233 | int maxVars = -1
|
---|
| 234 | ) {
|
---|
| 235 | double[] userLambda = new double[0];
|
---|
| 236 | // automatically determine lambda values (maximum 100 different lambda values)
|
---|
[14225] | 237 | RunElasticNetLinearRegression(problemData, penalty, 100, 0.0, userLambda, out lambda, out trainNMSE, out testNMSE, out coeff, out intercept, coeffLowerBound, coeffUpperBound, maxVars);
|
---|
[13927] | 238 | }
|
---|
| 239 |
|
---|
| 240 | /// <summary>
|
---|
| 241 | /// Elastic net with squared-error-loss for dense predictor matrix, runs the full path of all lambdas
|
---|
| 242 | /// </summary>
|
---|
| 243 | /// <param name="problemData">Predictor target matrix x and target vector y</param>
|
---|
| 244 | /// <param name="penalty">Penalty for balance between ridge (0.0) and lasso (1.0) regression</param>
|
---|
| 245 | /// <param name="nlam">Maximum number of lambda values (default 100)</param>
|
---|
| 246 | /// <param name="flmin">User control of lambda values (<1.0 => minimum lambda = flmin * (largest lambda value), >= 1.0 => use supplied lambda values</param>
|
---|
| 247 | /// <param name="ulam">User supplied lambda values</param>
|
---|
| 248 | /// <param name="lambda">Output lambda values</param>
|
---|
[14225] | 249 | /// <param name="trainNMSE">Vector of normalized mean of squared error (NMSE = Variance(res) / Variance(y)) values on the training set for each set of coefficients along the path</param>
|
---|
| 250 | /// <param name="testNMSE">Vector of normalized mean of squared error (NMSE = Variance(res) / Variance(y)) values on the test set for each set of coefficients along the path</param>
|
---|
[13927] | 251 | /// <param name="coeff">Vector of coefficient vectors for each solution along the path</param>
|
---|
| 252 | /// <param name="intercept">Vector of intercepts for each solution along the path</param>
|
---|
| 253 | /// <param name="coeffLowerBound">Optional lower bound for all coefficients</param>
|
---|
| 254 | /// <param name="coeffUpperBound">Optional upper bound for all coefficients</param>
|
---|
| 255 | /// <param name="maxVars">Maximum allowed number of variables in each solution along the path (-1 => all variables are allowed)</param>
|
---|
| 256 | private static void RunElasticNetLinearRegression(IRegressionProblemData problemData, double penalty,
|
---|
[14225] | 257 | int nlam, double flmin, double[] ulam, out double[] lambda, out double[] trainNMSE, out double[] testNMSE, out double[,] coeff, out double[] intercept,
|
---|
[13927] | 258 | double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity,
|
---|
| 259 | int maxVars = -1
|
---|
| 260 | ) {
|
---|
| 261 | if (penalty < 0.0 || penalty > 1.0) throw new ArgumentException("0 <= penalty <= 1", "penalty");
|
---|
| 262 |
|
---|
[13929] | 263 | double[,] trainX;
|
---|
| 264 | double[,] testX;
|
---|
| 265 | double[] trainY;
|
---|
| 266 | double[] testY;
|
---|
[13927] | 267 |
|
---|
[14395] | 268 | PrepareData(problemData, out trainX, out trainY, out testX, out testY);
|
---|
| 269 | var numTrainObs = trainX.GetLength(1);
|
---|
| 270 | var numTestObs = testX.GetLength(1);
|
---|
| 271 | var numVars = trainX.GetLength(0);
|
---|
| 272 |
|
---|
[13927] | 273 | int ka = 1; // => covariance updating algorithm
|
---|
| 274 | double parm = penalty;
|
---|
[13929] | 275 | double[] w = Enumerable.Repeat(1.0, numTrainObs).ToArray(); // all observations have the same weight
|
---|
[13927] | 276 | int[] jd = new int[1]; // do not force to use any of the variables
|
---|
| 277 | double[] vp = Enumerable.Repeat(1.0, numVars).ToArray(); // all predictor variables are unpenalized
|
---|
| 278 | double[,] cl = new double[numVars, 2]; // use the same bounds for all coefficients
|
---|
| 279 | for (int i = 0; i < numVars; i++) {
|
---|
| 280 | cl[i, 0] = coeffLowerBound;
|
---|
| 281 | cl[i, 1] = coeffUpperBound;
|
---|
| 282 | }
|
---|
| 283 |
|
---|
| 284 | int ne = maxVars > 0 ? maxVars : numVars;
|
---|
| 285 | int nx = numVars;
|
---|
| 286 | double thr = 1.0e-5; // default value as recommended in glmnet
|
---|
[13940] | 287 | int isd = 1; // => regression on standardized predictor variables
|
---|
[13927] | 288 | int intr = 1; // => do include intercept in model
|
---|
| 289 | int maxit = 100000; // default value as recommended in glmnet
|
---|
| 290 | // outputs
|
---|
| 291 | int lmu = -1;
|
---|
| 292 | double[,] ca;
|
---|
| 293 | int[] ia;
|
---|
| 294 | int[] nin;
|
---|
| 295 | int nlp = -99;
|
---|
| 296 | int jerr = -99;
|
---|
[14225] | 297 | double[] trainR2;
|
---|
[14370] | 298 | Glmnet.elnet(ka, parm, numTrainObs, numVars, trainX, trainY, w, jd, vp, cl, ne, nx, nlam, flmin, ulam, thr, isd, intr, maxit, out lmu, out intercept, out ca, out ia, out nin, out trainR2, out lambda, out nlp, out jerr);
|
---|
[13927] | 299 |
|
---|
[14225] | 300 | trainNMSE = new double[lmu]; // elnet returns R**2 as 1 - NMSE
|
---|
| 301 | testNMSE = new double[lmu];
|
---|
[13927] | 302 | coeff = new double[lmu, numVars];
|
---|
| 303 | for (int solIdx = 0; solIdx < lmu; solIdx++) {
|
---|
[14225] | 304 | trainNMSE[solIdx] = 1.0 - trainR2[solIdx];
|
---|
| 305 |
|
---|
[13927] | 306 | // uncompress coefficients of solution
|
---|
| 307 | int selectedNin = nin[solIdx];
|
---|
| 308 | double[] coefficients;
|
---|
| 309 | double[] selectedCa = new double[nx];
|
---|
[13929] | 310 | for (int i = 0; i < nx; i++) {
|
---|
| 311 | selectedCa[i] = ca[solIdx, i];
|
---|
| 312 | }
|
---|
[13927] | 313 |
|
---|
[14225] | 314 | // apply to test set to calculate test NMSE values for each lambda step
|
---|
[13929] | 315 | double[] fn;
|
---|
[14370] | 316 | Glmnet.modval(intercept[solIdx], selectedCa, ia, selectedNin, numTestObs, testX, out fn);
|
---|
[13929] | 317 | OnlineCalculatorError error;
|
---|
[14225] | 318 | var nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(testY, fn, out error);
|
---|
[14461] | 319 | if (error != OnlineCalculatorError.None) nmse = double.NaN;
|
---|
[14225] | 320 | testNMSE[solIdx] = nmse;
|
---|
[13929] | 321 |
|
---|
| 322 | // uncompress coefficients
|
---|
[14370] | 323 | Glmnet.uncomp(numVars, selectedCa, ia, selectedNin, out coefficients);
|
---|
[13927] | 324 | for (int i = 0; i < coefficients.Length; i++) {
|
---|
| 325 | coeff[solIdx, i] = coefficients[i];
|
---|
| 326 | }
|
---|
| 327 | }
|
---|
| 328 | }
|
---|
| 329 |
|
---|
[14395] | 330 | private static void PrepareData(IRegressionProblemData problemData, out double[,] trainX, out double[] trainY,
|
---|
| 331 | out double[,] testX, out double[] testY) {
|
---|
[13927] | 332 |
|
---|
| 333 | var ds = problemData.Dataset;
|
---|
[14395] | 334 | trainX = ds.ToArray(problemData.AllowedInputVariables, problemData.TrainingIndices);
|
---|
| 335 | trainX = trainX.Transpose();
|
---|
| 336 | trainY = problemData.Dataset.GetDoubleValues(problemData.TargetVariable,
|
---|
| 337 | problemData.TrainingIndices)
|
---|
| 338 | .ToArray();
|
---|
| 339 | testX = ds.ToArray(problemData.AllowedInputVariables, problemData.TestIndices);
|
---|
| 340 | testX = testX.Transpose();
|
---|
| 341 | testY = problemData.Dataset.GetDoubleValues(problemData.TargetVariable,
|
---|
| 342 | problemData.TestIndices)
|
---|
| 343 | .ToArray();
|
---|
[13927] | 344 | }
|
---|
| 345 | }
|
---|
| 346 | }
|
---|