- Timestamp:
- 11/04/16 17:33:51 (8 years ago)
- Location:
- branches/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4
- Files:
-
- 1 added
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/ElasticNetLinearRegression.cs
r14225 r14370 1 using System; 1 #region License Information 2 /* HeuristicLab 3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 * 5 * This file is part of HeuristicLab. 6 * 7 * HeuristicLab is free software: you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation, either version 3 of the License, or 10 * (at your option) any later version. 11 * 12 * HeuristicLab is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>. 19 */ 20 #endregion 21 22 using System; 2 23 using System.Linq; 3 using System.Runtime.InteropServices;4 24 using HeuristicLab.Analysis; 5 25 using HeuristicLab.Common; … … 164 184 } 165 185 public static double[][] CreateElasticNetLinearRegressionSolution(IRegressionProblemData problemData, double penalty, double[] lambda, 166 out double[] train Rsq, out double[] testRsq,186 out double[] trainNMSEs, out double[] testNMSEs, 167 187 double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity, 168 188 int maxVars = -1) { … … 170 190 double[,] coeff; 171 191 double[] intercept; 172 RunElasticNetLinearRegression(problemData, penalty, lambda.Length, 1.0, lambda, out lambda, out train Rsq, out testRsq, out coeff, out intercept, coeffLowerBound, coeffUpperBound, maxVars);192 RunElasticNetLinearRegression(problemData, penalty, lambda.Length, 1.0, lambda, out lambda, out trainNMSEs, out testNMSEs, out coeff, out intercept, coeffLowerBound, coeffUpperBound, maxVars); 173 193 174 194 int nRows = intercept.Length; … … 252 272 int jerr = -99; 253 273 double[] trainR2; 254 elnet(ka, parm, numTrainObs, numVars, trainX, trainY, w, jd, vp, cl, ne, nx, nlam, flmin, ulam, thr, isd, intr, maxit, out lmu, out intercept, out ca, out ia, out nin, out trainR2, out lambda, out nlp, out jerr);274 Glmnet.elnet(ka, parm, numTrainObs, numVars, trainX, trainY, w, jd, vp, cl, ne, nx, nlam, flmin, ulam, thr, isd, intr, maxit, out lmu, out intercept, out ca, out ia, out nin, out trainR2, out lambda, out nlp, out jerr); 255 275 256 276 trainNMSE = new double[lmu]; // elnet returns R**2 as 1 - NMSE … … 270 290 // apply to test set to calculate test NMSE values for each lambda step 271 291 double[] fn; 272 modval(intercept[solIdx], selectedCa, ia, selectedNin, numTestObs, testX, out fn);292 Glmnet.modval(intercept[solIdx], selectedCa, ia, selectedNin, numTestObs, testX, out fn); 273 293 OnlineCalculatorError error; 274 294 var nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(testY, fn, out error); … … 277 297 278 298 // uncompress coefficients 279 uncomp(numVars, selectedCa, ia, selectedNin, out coefficients);299 Glmnet.uncomp(numVars, selectedCa, ia, selectedNin, out coefficients); 280 300 for (int i = 0; i < coefficients.Length; i++) { 281 301 coeff[solIdx, i] = coefficients[i]; … … 320 340 } 321 341 322 323 #region dllimport324 /// <summary>Wrapper for elnet procedure in glmnet library</summary>325 /// (see: https://cran.r-project.org/web/packages/glmnet/index.html)326 ///327 /// ka = algorithm flag328 /// ka=1 => covariance updating algorithm329 /// ka=2 => naive algorithm330 /// parm = penalty member index(0<= parm <= 1)331 /// = 0.0 => ridge332 /// = 1.0 => lasso333 /// no = number of observations334 /// ni = number of predictor variables335 /// y(no) = response vector(overwritten)336 /// w(no)= observation weights(overwritten)337 /// jd(jd(1)+1) = predictor variable deletion flag338 /// jd(1) = 0 => use all variables339 /// jd(1) != 0 => do not use variables jd(2)...jd(jd(1)+1)340 /// vp(ni) = relative penalties for each predictor variable341 /// vp(j) = 0 => jth variable unpenalized342 /// cl(2, ni) = interval constraints on coefficient values(overwritten)343 /// cl(1, j) = lower bound for jth coefficient value(<= 0.0)344 /// cl(2, j) = upper bound for jth coefficient value(>= 0.0)345 /// ne = maximum number of variables allowed to enter largest model346 /// (stopping criterion)347 /// nx = maximum number of variables allowed to enter all modesl348 /// along path(memory allocation, nx > ne).349 /// nlam = (maximum)number of lamda values350 /// flmin = user control of lamda values(>=0)351 /// flmin< 1.0 => minimum lamda = flmin * (largest lamda value)352 /// flmin >= 1.0 => use supplied lamda values(see below)353 /// ulam(nlam) = user supplied lamda values(ignored if flmin< 1.0)354 /// thr = convergence threshold for each lamda solution.355 /// iterations stop when the maximum reduction in the criterion value356 /// as a result of each parameter update over a single pass357 /// is less than thr times the null criterion value.358 /// (suggested value, thr= 1.0e-5)359 /// isd = predictor variable standarization flag:360 /// isd = 0 => regression on original predictor variables361 /// isd = 1 => regression on standardized predictor variables362 /// Note: output solutions always reference original363 /// variables locations and scales.364 /// intr = intercept flag365 /// intr = 0 / 1 => don't/do include intercept in model366 /// maxit = maximum allowed number of passes over the data for all lambda367 /// values (suggested values, maxit = 100000)368 ///369 /// output:370 ///371 /// lmu = actual number of lamda values(solutions)372 /// a0(lmu) = intercept values for each solution373 /// ca(nx, lmu) = compressed coefficient values for each solution374 /// ia(nx) = pointers to compressed coefficients375 /// nin(lmu) = number of compressed coefficients for each solution376 /// rsq(lmu) = R**2 values for each solution377 /// alm(lmu) = lamda values corresponding to each solution378 /// nlp = actual number of passes over the data for all lamda values379 /// jerr = error flag:380 /// jerr = 0 => no error381 /// jerr > 0 => fatal error - no output returned382 /// jerr< 7777 => memory allocation error383 /// jerr = 7777 => all used predictors have zero variance384 /// jerr = 10000 => maxval(vp) <= 0.0385 /// jerr< 0 => non fatal error - partial output:386 /// Solutions for larger lamdas (1:(k-1)) returned.387 /// jerr = -k => convergence for kth lamda value not reached388 /// after maxit(see above) iterations.389 /// jerr = -10000 - k => number of non zero coefficients along path390 /// exceeds nx(see above) at kth lamda value.391 ///392 private static void elnet(393 int ka,394 double parm,395 int no,396 int ni,397 double[,] x,398 double[] y,399 double[] w,400 int[] jd,401 double[] vp,402 double[,] cl,403 int ne,404 int nx,405 int nlam,406 double flmin,407 double[] ulam,408 double thr,409 int isd,410 int intr,411 int maxit,412 // outputs413 out int lmu,414 out double[] a0,415 out double[,] ca,416 out int[] ia,417 out int[] nin,418 out double[] rsq,419 out double[] alm,420 out int nlp,421 out int jerr422 ) {423 // initialize output values and allocate arrays big enough424 a0 = new double[nlam];425 ca = new double[nlam, nx];426 ia = new int[nx];427 nin = new int[nlam];428 rsq = new double[nlam];429 alm = new double[nlam];430 nlp = -1;431 jerr = -1;432 lmu = -1;433 434 // load correct version of native dll based on process (x86/x64)435 if (Environment.Is64BitProcess) {436 elnet_x64(ref ka, ref parm, ref no, ref ni, x, y, w, jd, vp, cl, ref ne, ref ni, ref nlam, ref flmin, ulam, ref thr, ref isd, ref intr, ref maxit, ref lmu, a0, ca, ia, nin, rsq, alm, ref nlp, ref jerr);437 } else {438 elnet_x86(ref ka, ref parm, ref no, ref ni, x, y, w, jd, vp, cl, ref ne, ref ni, ref nlam, ref flmin, ulam, ref thr, ref isd, ref intr, ref maxit, ref lmu, a0, ca, ia, nin, rsq, alm, ref nlp, ref jerr);439 }440 // jerr = error flag:441 // jerr = 0 => no error442 // jerr > 0 => fatal error -no output returned443 // jerr < 7777 => memory allocation error444 // jerr = 7777 => all used predictors have zero variance445 // jerr = 10000 => maxval(vp) <= 0.0446 // jerr < 0 => non fatal error - partial output:447 // c Solutions for larger lamdas (1:(k - 1)) returned.448 // jerr = -k => convergence for kth lamda value not reached449 // after maxit(see above) iterations.450 // jerr = -10000 - k => number of non zero coefficients along path451 // exceeds nx(see above) at kth lamda value.452 if (jerr != 0) {453 if (jerr > 0 && jerr < 7777) throw new InvalidOperationException("glmnet: memory allocation error");454 else if (jerr == 7777) throw new InvalidOperationException("glmnet: all used predictors have zero variance");455 else if (jerr == 10000) throw new InvalidOperationException("glmnet: maxval(vp) <= 0.0");456 else if (jerr < 0 && jerr > -1000) throw new InvalidOperationException(string.Format("glmnet: convergence for {0}th lamda value not reached after maxit iterations ", -jerr));457 else if (jerr <= -10000) throw new InvalidOperationException(string.Format("glmnet: number of non zero coefficients along path exceeds number of maximally allowed variables (nx) at {0}th lamda value", -jerr - 10000));458 else throw new InvalidOperationException(string.Format("glmnet: error {0}", jerr));459 }460 461 462 // resize arrays to the capacity that is acutally necessary for the results463 Array.Resize(ref a0, lmu);464 Array.Resize(ref nin, lmu);465 Array.Resize(ref rsq, lmu);466 Array.Resize(ref alm, lmu);467 }468 469 [DllImport("glmnet-x86.dll", EntryPoint = "elnet_", CallingConvention = CallingConvention.Cdecl)]470 private static extern void elnet_x86(471 ref int ka,472 ref double parm,473 ref int no,474 ref int ni,475 double[,] x,476 double[] y,477 double[] w,478 int[] jd,479 double[] vp,480 double[,] cl,481 ref int ne,482 ref int nx,483 ref int nlam,484 ref double flmin,485 double[] ulam,486 ref double thr,487 ref int isd,488 ref int intr,489 ref int maxit,490 // outputs:491 ref int lmu,492 [Out] double[] a0,493 [Out] double[,] ca,494 [Out] int[] ia,495 [Out] int[] nin,496 [Out] double[] rsq,497 [Out] double[] alm,498 ref int nlp,499 ref int jerr500 );501 [DllImport("glmnet-x64.dll", EntryPoint = "elnet_", CallingConvention = CallingConvention.Cdecl)]502 private static extern void elnet_x64(503 ref int ka,504 ref double parm,505 ref int no,506 ref int ni,507 double[,] x,508 double[] y,509 double[] w,510 int[] jd,511 double[] vp,512 double[,] cl,513 ref int ne,514 ref int nx,515 ref int nlam,516 ref double flmin,517 double[] ulam,518 ref double thr,519 ref int isd,520 ref int intr,521 ref int maxit,522 // outputs:523 ref int lmu,524 [Out] double[] a0,525 [Out] double[,] ca,526 [Out] int[] ia,527 [Out] int[] nin,528 [Out] double[] rsq,529 [Out] double[] alm,530 ref int nlp,531 ref int jerr532 );533 534 535 /// <summary>Wrapper for uncompress coefficient vector for particular solution in glmnet</summary>536 /// (see: https://cran.r-project.org/web/packages/glmnet/index.html)537 ///538 /// call uncomp(ni, ca, ia, nin, a)539 ///540 /// input:541 ///542 /// ni = total number of predictor variables543 /// ca(nx) = compressed coefficient values for the solution544 /// ia(nx) = pointers to compressed coefficients545 /// nin = number of compressed coefficients for the solution546 ///547 /// output:548 ///549 /// a(ni) = uncompressed coefficient vector550 /// referencing original variables551 ///552 private static void uncomp(int numVars, double[] ca, int[] ia, int nin, out double[] a) {553 a = new double[numVars];554 // load correct version of native dll based on process (x86/x64)555 if (Environment.Is64BitProcess) {556 uncomp_x64(ref numVars, ca, ia, ref nin, a);557 } else {558 uncomp_x86(ref numVars, ca, ia, ref nin, a);559 }560 }561 562 [DllImport("glmnet-x86.dll", EntryPoint = "uncomp_", CallingConvention = CallingConvention.Cdecl)]563 private static extern void uncomp_x86(ref int numVars, double[] ca, int[] ia, ref int nin, double[] a);564 [DllImport("glmnet-x64.dll", EntryPoint = "uncomp_", CallingConvention = CallingConvention.Cdecl)]565 private static extern void uncomp_x64(ref int numVars, double[] ca, int[] ia, ref int nin, double[] a);566 567 private static void modval(double a0, double[] ca, int[] ia, int nin, int numObs, double[,] x, out double[] fn) {568 fn = new double[numObs];569 if (Environment.Is64BitProcess) {570 modval_x64(ref a0, ca, ia, ref nin, ref numObs, x, fn);571 } else {572 modval_x86(ref a0, ca, ia, ref nin, ref numObs, x, fn);573 }574 }575 // evaluate linear model from compressed coefficients and576 // uncompressed predictor matrix:577 //578 // call modval(a0, ca, ia, nin, n, x, f);579 // c580 // c input:581 //582 // a0 = intercept583 // ca(nx) = compressed coefficient values for a solution584 // ia(nx) = pointers to compressed coefficients585 // nin = number of compressed coefficients for solution586 // n = number of predictor vectors(observations)587 // x(n, ni) = full(uncompressed) predictor matrix588 //589 // output:590 //591 // f(n) = model predictions592 [DllImport("glmnet-x86.dll", EntryPoint = "modval_", CallingConvention = CallingConvention.Cdecl)]593 private static extern void modval_x86(ref double a0, double[] ca, int[] ia, ref int nin, ref int numObs, [Out] double[,] x, double[] fn);594 [DllImport("glmnet-x64.dll", EntryPoint = "modval_", CallingConvention = CallingConvention.Cdecl)]595 private static extern void modval_x64(ref double a0, double[] ca, int[] ia, ref int nin, ref int numObs, [Out] double[,] x, double[] fn);596 597 #endregion598 342 } 599 343 } -
branches/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/HeuristicLab.Algorithms.DataAnalysis.Glmnet.csproj
r13931 r14370 153 153 <ItemGroup> 154 154 <Compile Include="ElasticNetLinearRegression.cs" /> 155 <Compile Include="Glmnet.cs" /> 155 156 <Compile Include="Plugin.cs" /> 156 157 <Compile Include="Properties\AssemblyInfo.cs" /> -
branches/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/Plugin.cs.frame
r13930 r14370 44 44 [PluginDependency("HeuristicLab.Problems.DataAnalysis.Symbolic", "3.4")] 45 45 [PluginDependency("HeuristicLab.Problems.DataAnalysis.Symbolic.Regression", "3.4")] 46 [PluginDependency("HeuristicLab.Problems.Instances", "3.3")]47 46 public class HeuristicLabAlgorithmsDataAnalysisGlmnetPlugin : PluginBase { 48 47 }
Note: See TracChangeset
for help on using the changeset viewer.