Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
11/04/16 17:33:51 (7 years ago)
Author:
mkommend
Message:

#745: Addressed some easy to implement review comments:

  • missing license header
  • renaming of variables
  • extracted DLLImports into a separate file
  • corrected plugin dependencies
Location:
branches/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4
Files:
1 added
3 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/ElasticNetLinearRegression.cs

    r14225 r14370  
    1 using System;
     1#region License Information
     2/* HeuristicLab
     3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     4 *
     5 * This file is part of HeuristicLab.
     6 *
     7 * HeuristicLab is free software: you can redistribute it and/or modify
     8 * it under the terms of the GNU General Public License as published by
     9 * the Free Software Foundation, either version 3 of the License, or
     10 * (at your option) any later version.
     11 *
     12 * HeuristicLab is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 * GNU General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU General Public License
     18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
     19 */
     20#endregion
     21
     22using System;
    223using System.Linq;
    3 using System.Runtime.InteropServices;
    424using HeuristicLab.Analysis;
    525using HeuristicLab.Common;
     
    164184    }
    165185    public static double[][] CreateElasticNetLinearRegressionSolution(IRegressionProblemData problemData, double penalty, double[] lambda,
    166             out double[] trainRsq, out double[] testRsq,
     186            out double[] trainNMSEs, out double[] testNMSEs,
    167187            double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity,
    168188            int maxVars = -1) {
     
    170190      double[,] coeff;
    171191      double[] intercept;
    172       RunElasticNetLinearRegression(problemData, penalty, lambda.Length, 1.0, lambda, out lambda, out trainRsq, out testRsq, out coeff, out intercept, coeffLowerBound, coeffUpperBound, maxVars);
     192      RunElasticNetLinearRegression(problemData, penalty, lambda.Length, 1.0, lambda, out lambda, out trainNMSEs, out testNMSEs, out coeff, out intercept, coeffLowerBound, coeffUpperBound, maxVars);
    173193
    174194      int nRows = intercept.Length;
     
    252272      int jerr = -99;
    253273      double[] trainR2;
    254       elnet(ka, parm, numTrainObs, numVars, trainX, trainY, w, jd, vp, cl, ne, nx, nlam, flmin, ulam, thr, isd, intr, maxit, out lmu, out intercept, out ca, out ia, out nin, out trainR2, out lambda, out nlp, out jerr);
     274      Glmnet.elnet(ka, parm, numTrainObs, numVars, trainX, trainY, w, jd, vp, cl, ne, nx, nlam, flmin, ulam, thr, isd, intr, maxit, out lmu, out intercept, out ca, out ia, out nin, out trainR2, out lambda, out nlp, out jerr);
    255275
    256276      trainNMSE = new double[lmu]; // elnet returns R**2 as 1 - NMSE
     
    270290        // apply to test set to calculate test NMSE values for each lambda step
    271291        double[] fn;
    272         modval(intercept[solIdx], selectedCa, ia, selectedNin, numTestObs, testX, out fn);
     292        Glmnet.modval(intercept[solIdx], selectedCa, ia, selectedNin, numTestObs, testX, out fn);
    273293        OnlineCalculatorError error;
    274294        var nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(testY, fn, out error);
     
    277297
    278298        // uncompress coefficients
    279         uncomp(numVars, selectedCa, ia, selectedNin, out coefficients);
     299        Glmnet.uncomp(numVars, selectedCa, ia, selectedNin, out coefficients);
    280300        for (int i = 0; i < coefficients.Length; i++) {
    281301          coeff[solIdx, i] = coefficients[i];
     
    320340    }
    321341
    322 
    323     #region dllimport
    324     /// <summary>Wrapper for elnet procedure in glmnet library</summary>
    325     /// (see: https://cran.r-project.org/web/packages/glmnet/index.html)
    326     ///
    327     ///  ka = algorithm flag
    328     ///       ka=1 => covariance updating algorithm
    329     ///       ka=2 => naive algorithm
    330     ///  parm = penalty member index(0&lt;= parm &lt;= 1)
    331     ///         = 0.0 => ridge
    332     ///  = 1.0 => lasso
    333     ///    no = number of observations
    334     ///    ni = number of predictor variables
    335     ///  y(no) = response vector(overwritten)
    336     ///  w(no)= observation weights(overwritten)
    337     ///  jd(jd(1)+1) = predictor variable deletion flag
    338     ///  jd(1) = 0  => use all variables
    339     ///       jd(1) != 0 => do not use variables jd(2)...jd(jd(1)+1)
    340     ///  vp(ni) = relative penalties for each predictor variable
    341     ///       vp(j) = 0 => jth variable unpenalized
    342     ///    cl(2, ni) = interval constraints on coefficient values(overwritten)
    343     ///  cl(1, j) = lower bound for jth coefficient value(&lt;= 0.0)
    344     ///  cl(2, j) = upper bound for jth coefficient value(>= 0.0)
    345     ///  ne = maximum number of variables allowed to enter largest model
    346     /// (stopping criterion)
    347     ///  nx = maximum number of variables allowed to enter all modesl
    348     ///  along path(memory allocation, nx > ne).
    349     ///  nlam = (maximum)number of lamda values
    350     ///    flmin = user control of lamda values(>=0)
    351     ///  flmin&lt; 1.0 => minimum lamda = flmin * (largest lamda value)
    352     ///  flmin >= 1.0 => use supplied lamda values(see below)
    353     ///  ulam(nlam) = user supplied lamda values(ignored if flmin&lt; 1.0)
    354     ///  thr = convergence threshold for each lamda solution.
    355     ///  iterations stop when the maximum reduction in the criterion value
    356     ///       as a result of each parameter update over a single pass
    357     ///       is less than thr times the null criterion value.
    358     /// (suggested value, thr= 1.0e-5)
    359     ///  isd = predictor variable standarization flag:
    360     ///  isd = 0 => regression on original predictor variables
    361     ///       isd = 1 => regression on standardized predictor variables
    362     ///       Note: output solutions always reference original
    363     ///             variables locations and scales.
    364     ///    intr = intercept flag
    365     ///       intr = 0 / 1 => don't/do include intercept in model
    366     ///  maxit = maximum allowed number of passes over the data for all lambda
    367     ///  values (suggested values, maxit = 100000)
    368     ///
    369     ///  output:
    370     ///
    371     ///    lmu = actual number of lamda values(solutions)
    372     ///  a0(lmu) = intercept values for each solution
    373     ///  ca(nx, lmu) = compressed coefficient values for each solution
    374     ///  ia(nx) = pointers to compressed coefficients
    375     ///  nin(lmu) = number of compressed coefficients for each solution
    376     ///  rsq(lmu) = R**2 values for each solution
    377     ///  alm(lmu) = lamda values corresponding to each solution
    378     ///  nlp = actual number of passes over the data for all lamda values
    379     ///    jerr = error flag:
    380     ///  jerr = 0 => no error
    381     ///  jerr > 0 => fatal error - no output returned
    382     ///          jerr&lt; 7777 => memory allocation error
    383     ///          jerr = 7777 => all used predictors have zero variance
    384     ///          jerr = 10000 => maxval(vp) &lt;= 0.0
    385     ///  jerr&lt; 0 => non fatal error - partial output:
    386     ///  Solutions for larger lamdas (1:(k-1)) returned.
    387     ///  jerr = -k => convergence for kth lamda value not reached
    388     ///             after maxit(see above) iterations.
    389     ///  jerr = -10000 - k => number of non zero coefficients along path
    390     ///             exceeds nx(see above) at kth lamda value.
    391     ///
    392     private static void elnet(
    393       int ka,
    394       double parm,
    395       int no,
    396       int ni,
    397       double[,] x,
    398       double[] y,
    399       double[] w,
    400       int[] jd,
    401       double[] vp,
    402       double[,] cl,
    403       int ne,
    404       int nx,
    405       int nlam,
    406       double flmin,
    407       double[] ulam,
    408       double thr,
    409       int isd,
    410       int intr,
    411       int maxit,
    412       // outputs
    413       out int lmu,
    414       out double[] a0,
    415       out double[,] ca,
    416       out int[] ia,
    417       out int[] nin,
    418       out double[] rsq,
    419       out double[] alm,
    420       out int nlp,
    421       out int jerr
    422       ) {
    423       // initialize output values and allocate arrays big enough
    424       a0 = new double[nlam];
    425       ca = new double[nlam, nx];
    426       ia = new int[nx];
    427       nin = new int[nlam];
    428       rsq = new double[nlam];
    429       alm = new double[nlam];
    430       nlp = -1;
    431       jerr = -1;
    432       lmu = -1;
    433 
    434       // load correct version of native dll based on process (x86/x64)
    435       if (Environment.Is64BitProcess) {
    436         elnet_x64(ref ka, ref parm, ref no, ref ni, x, y, w, jd, vp, cl, ref ne, ref ni, ref nlam, ref flmin, ulam, ref thr, ref isd, ref intr, ref maxit, ref lmu, a0, ca, ia, nin, rsq, alm, ref nlp, ref jerr);
    437       } else {
    438         elnet_x86(ref ka, ref parm, ref no, ref ni, x, y, w, jd, vp, cl, ref ne, ref ni, ref nlam, ref flmin, ulam, ref thr, ref isd, ref intr, ref maxit, ref lmu, a0, ca, ia, nin, rsq, alm, ref nlp, ref jerr);
    439       }
    440       //  jerr = error flag:
    441       //  jerr = 0 => no error
    442       //  jerr > 0 => fatal error -no output returned
    443       //  jerr < 7777 => memory allocation error
    444       //          jerr = 7777 => all used predictors have zero variance
    445       //  jerr = 10000 => maxval(vp) <= 0.0
    446       //  jerr < 0 => non fatal error - partial output:
    447       //      c Solutions for larger lamdas (1:(k - 1)) returned.
    448       //  jerr = -k => convergence for kth lamda value not reached
    449       //             after maxit(see above) iterations.
    450       //          jerr = -10000 - k => number of non zero coefficients along path
    451       //             exceeds nx(see above) at kth lamda value.
    452       if (jerr != 0) {
    453         if (jerr > 0 && jerr < 7777) throw new InvalidOperationException("glmnet: memory allocation error");
    454         else if (jerr == 7777) throw new InvalidOperationException("glmnet: all used predictors have zero variance");
    455         else if (jerr == 10000) throw new InvalidOperationException("glmnet: maxval(vp) <= 0.0");
    456         else if (jerr < 0 && jerr > -1000) throw new InvalidOperationException(string.Format("glmnet: convergence for {0}th lamda value not reached after maxit iterations ", -jerr));
    457         else if (jerr <= -10000) throw new InvalidOperationException(string.Format("glmnet: number of non zero coefficients along path exceeds number of maximally allowed variables (nx) at {0}th lamda value", -jerr - 10000));
    458         else throw new InvalidOperationException(string.Format("glmnet: error {0}", jerr));
    459       }
    460 
    461 
    462       // resize arrays to the capacity that is acutally necessary for the results
    463       Array.Resize(ref a0, lmu);
    464       Array.Resize(ref nin, lmu);
    465       Array.Resize(ref rsq, lmu);
    466       Array.Resize(ref alm, lmu);
    467     }
    468 
    469     [DllImport("glmnet-x86.dll", EntryPoint = "elnet_", CallingConvention = CallingConvention.Cdecl)]
    470     private static extern void elnet_x86(
    471       ref int ka,
    472       ref double parm,
    473       ref int no,
    474       ref int ni,
    475       double[,] x,
    476       double[] y,
    477       double[] w,
    478       int[] jd,
    479       double[] vp,
    480       double[,] cl,
    481       ref int ne,
    482       ref int nx,
    483       ref int nlam,
    484       ref double flmin,
    485       double[] ulam,
    486       ref double thr,
    487       ref int isd,
    488       ref int intr,
    489       ref int maxit,
    490       // outputs:
    491       ref int lmu,
    492       [Out] double[] a0,
    493       [Out] double[,] ca,
    494       [Out] int[] ia,
    495       [Out] int[] nin,
    496       [Out] double[] rsq,
    497       [Out] double[] alm,
    498       ref int nlp,
    499       ref int jerr
    500       );
    501     [DllImport("glmnet-x64.dll", EntryPoint = "elnet_", CallingConvention = CallingConvention.Cdecl)]
    502     private static extern void elnet_x64(
    503       ref int ka,
    504       ref double parm,
    505       ref int no,
    506       ref int ni,
    507       double[,] x,
    508       double[] y,
    509       double[] w,
    510       int[] jd,
    511       double[] vp,
    512       double[,] cl,
    513       ref int ne,
    514       ref int nx,
    515       ref int nlam,
    516       ref double flmin,
    517       double[] ulam,
    518       ref double thr,
    519       ref int isd,
    520       ref int intr,
    521       ref int maxit,
    522       // outputs:
    523       ref int lmu,
    524       [Out] double[] a0,
    525       [Out] double[,] ca,
    526       [Out] int[] ia,
    527       [Out] int[] nin,
    528       [Out] double[] rsq,
    529       [Out] double[] alm,
    530       ref int nlp,
    531       ref int jerr
    532       );
    533 
    534 
    535     /// <summary>Wrapper for uncompress coefficient vector for particular solution in glmnet</summary>
    536     /// (see: https://cran.r-project.org/web/packages/glmnet/index.html)
    537     ///
    538     /// call uncomp(ni, ca, ia, nin, a)
    539     ///
    540     /// input:
    541     ///
    542     ///    ni = total number of predictor variables
    543     ///    ca(nx) = compressed coefficient values for the solution
    544     /// ia(nx) = pointers to compressed coefficients
    545     /// nin = number of compressed coefficients for the solution
    546     ///
    547     /// output:
    548     ///
    549     ///    a(ni) =  uncompressed coefficient vector
    550     ///             referencing original variables
    551     ///
    552     private static void uncomp(int numVars, double[] ca, int[] ia, int nin, out double[] a) {
    553       a = new double[numVars];
    554       // load correct version of native dll based on process (x86/x64)
    555       if (Environment.Is64BitProcess) {
    556         uncomp_x64(ref numVars, ca, ia, ref nin, a);
    557       } else {
    558         uncomp_x86(ref numVars, ca, ia, ref nin, a);
    559       }
    560     }
    561 
    562     [DllImport("glmnet-x86.dll", EntryPoint = "uncomp_", CallingConvention = CallingConvention.Cdecl)]
    563     private static extern void uncomp_x86(ref int numVars, double[] ca, int[] ia, ref int nin, double[] a);
    564     [DllImport("glmnet-x64.dll", EntryPoint = "uncomp_", CallingConvention = CallingConvention.Cdecl)]
    565     private static extern void uncomp_x64(ref int numVars, double[] ca, int[] ia, ref int nin, double[] a);
    566 
    567     private static void modval(double a0, double[] ca, int[] ia, int nin, int numObs, double[,] x, out double[] fn) {
    568       fn = new double[numObs];
    569       if (Environment.Is64BitProcess) {
    570         modval_x64(ref a0, ca, ia, ref nin, ref numObs, x, fn);
    571       } else {
    572         modval_x86(ref a0, ca, ia, ref nin, ref numObs, x, fn);
    573       }
    574     }
    575     // evaluate linear model from compressed coefficients and
    576     // uncompressed predictor matrix:
    577     //
    578     // call modval(a0, ca, ia, nin, n, x, f);
    579     //   c
    580     //   c input:
    581     //
    582     //    a0 = intercept
    583     //    ca(nx) = compressed coefficient values for a solution
    584     // ia(nx) = pointers to compressed coefficients
    585     // nin = number of compressed coefficients for solution
    586     //    n = number of predictor vectors(observations)
    587     // x(n, ni) = full(uncompressed) predictor matrix
    588     //
    589     // output:
    590     //
    591     //    f(n) = model predictions
    592     [DllImport("glmnet-x86.dll", EntryPoint = "modval_", CallingConvention = CallingConvention.Cdecl)]
    593     private static extern void modval_x86(ref double a0, double[] ca, int[] ia, ref int nin, ref int numObs, [Out] double[,] x, double[] fn);
    594     [DllImport("glmnet-x64.dll", EntryPoint = "modval_", CallingConvention = CallingConvention.Cdecl)]
    595     private static extern void modval_x64(ref double a0, double[] ca, int[] ia, ref int nin, ref int numObs, [Out] double[,] x, double[] fn);
    596 
    597     #endregion
    598342  }
    599343}
  • branches/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/HeuristicLab.Algorithms.DataAnalysis.Glmnet.csproj

    r13931 r14370  
    153153  <ItemGroup>
    154154    <Compile Include="ElasticNetLinearRegression.cs" />
     155    <Compile Include="Glmnet.cs" />
    155156    <Compile Include="Plugin.cs" />
    156157    <Compile Include="Properties\AssemblyInfo.cs" />
  • branches/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/Plugin.cs.frame

    r13930 r14370  
    4444  [PluginDependency("HeuristicLab.Problems.DataAnalysis.Symbolic", "3.4")]
    4545  [PluginDependency("HeuristicLab.Problems.DataAnalysis.Symbolic.Regression", "3.4")]
    46   [PluginDependency("HeuristicLab.Problems.Instances", "3.3")]
    4746  public class HeuristicLabAlgorithmsDataAnalysisGlmnetPlugin : PluginBase {
    4847  }
Note: See TracChangeset for help on using the changeset viewer.