Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2695_dataset-ids/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/Glmnet.cs @ 16724

Last change on this file since 16724 was 14370, checked in by mkommend, 8 years ago

#745: Addressed some easy to implement review comments:

  • missing license header
  • renaming of variables
  • extracted DLLImports into a separate file
  • corrected plugin dependencies
File size: 12.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Runtime.InteropServices;
24
25namespace HeuristicLab.Algorithms.DataAnalysis.Glmnet {
26  public static class Glmnet {
27    /// <summary>Wrapper for elnet procedure in glmnet library</summary>
28    /// (see: https://cran.r-project.org/web/packages/glmnet/index.html)
29    ///
30    ///  ka = algorithm flag
31    ///       ka=1 => covariance updating algorithm
32    ///       ka=2 => naive algorithm
33    ///  parm = penalty member index(0&lt;= parm &lt;= 1)
34    ///         = 0.0 => ridge
35    ///  = 1.0 => lasso
36    ///    no = number of observations
37    ///    ni = number of predictor variables
38    ///  y(no) = response vector(overwritten)
39    ///  w(no)= observation weights(overwritten)
40    ///  jd(jd(1)+1) = predictor variable deletion flag
41    ///  jd(1) = 0  => use all variables
42    ///       jd(1) != 0 => do not use variables jd(2)...jd(jd(1)+1)
43    ///  vp(ni) = relative penalties for each predictor variable
44    ///       vp(j) = 0 => jth variable unpenalized
45    ///    cl(2, ni) = interval constraints on coefficient values(overwritten)
46    ///  cl(1, j) = lower bound for jth coefficient value(&lt;= 0.0)
47    ///  cl(2, j) = upper bound for jth coefficient value(>= 0.0)
48    ///  ne = maximum number of variables allowed to enter largest model
49    /// (stopping criterion)
50    ///  nx = maximum number of variables allowed to enter all modesl
51    ///  along path(memory allocation, nx > ne).
52    ///  nlam = (maximum)number of lamda values
53    ///    flmin = user control of lamda values(>=0)
54    ///  flmin&lt; 1.0 => minimum lamda = flmin * (largest lamda value)
55    ///  flmin >= 1.0 => use supplied lamda values(see below)
56    ///  ulam(nlam) = user supplied lamda values(ignored if flmin&lt; 1.0)
57    ///  thr = convergence threshold for each lamda solution.
58    ///  iterations stop when the maximum reduction in the criterion value
59    ///       as a result of each parameter update over a single pass
60    ///       is less than thr times the null criterion value.
61    /// (suggested value, thr= 1.0e-5)
62    ///  isd = predictor variable standarization flag:
63    ///  isd = 0 => regression on original predictor variables
64    ///       isd = 1 => regression on standardized predictor variables
65    ///       Note: output solutions always reference original
66    ///             variables locations and scales.
67    ///    intr = intercept flag
68    ///       intr = 0 / 1 => don't/do include intercept in model
69    ///  maxit = maximum allowed number of passes over the data for all lambda
70    ///  values (suggested values, maxit = 100000)
71    ///
72    ///  output:
73    ///
74    ///    lmu = actual number of lamda values(solutions)
75    ///  a0(lmu) = intercept values for each solution
76    ///  ca(nx, lmu) = compressed coefficient values for each solution
77    ///  ia(nx) = pointers to compressed coefficients
78    ///  nin(lmu) = number of compressed coefficients for each solution
79    ///  rsq(lmu) = R**2 values for each solution
80    ///  alm(lmu) = lamda values corresponding to each solution
81    ///  nlp = actual number of passes over the data for all lamda values
82    ///    jerr = error flag:
83    ///  jerr = 0 => no error
84    ///  jerr > 0 => fatal error - no output returned
85    ///          jerr&lt; 7777 => memory allocation error
86    ///          jerr = 7777 => all used predictors have zero variance
87    ///          jerr = 10000 => maxval(vp) &lt;= 0.0
88    ///  jerr&lt; 0 => non fatal error - partial output:
89    ///  Solutions for larger lamdas (1:(k-1)) returned.
90    ///  jerr = -k => convergence for kth lamda value not reached
91    ///             after maxit(see above) iterations.
92    ///  jerr = -10000 - k => number of non zero coefficients along path
93    ///             exceeds nx(see above) at kth lamda value.
94    ///
95    public static void elnet(
96      int ka,
97      double parm,
98      int no,
99      int ni,
100      double[,] x,
101      double[] y,
102      double[] w,
103      int[] jd,
104      double[] vp,
105      double[,] cl,
106      int ne,
107      int nx,
108      int nlam,
109      double flmin,
110      double[] ulam,
111      double thr,
112      int isd,
113      int intr,
114      int maxit,
115      // outputs
116      out int lmu,
117      out double[] a0,
118      out double[,] ca,
119      out int[] ia,
120      out int[] nin,
121      out double[] rsq,
122      out double[] alm,
123      out int nlp,
124      out int jerr
125      ) {
126      // initialize output values and allocate arrays big enough
127      a0 = new double[nlam];
128      ca = new double[nlam, nx];
129      ia = new int[nx];
130      nin = new int[nlam];
131      rsq = new double[nlam];
132      alm = new double[nlam];
133      nlp = -1;
134      jerr = -1;
135      lmu = -1;
136
137      // load correct version of native dll based on process (x86/x64)
138      if (Environment.Is64BitProcess) {
139        elnet_x64(ref ka, ref parm, ref no, ref ni, x, y, w, jd, vp, cl, ref ne, ref ni, ref nlam, ref flmin, ulam, ref thr, ref isd, ref intr, ref maxit, ref lmu, a0, ca, ia, nin, rsq, alm, ref nlp, ref jerr);
140      } else {
141        elnet_x86(ref ka, ref parm, ref no, ref ni, x, y, w, jd, vp, cl, ref ne, ref ni, ref nlam, ref flmin, ulam, ref thr, ref isd, ref intr, ref maxit, ref lmu, a0, ca, ia, nin, rsq, alm, ref nlp, ref jerr);
142      }
143      //  jerr = error flag:
144      //  jerr = 0 => no error
145      //  jerr > 0 => fatal error -no output returned
146      //  jerr < 7777 => memory allocation error
147      //          jerr = 7777 => all used predictors have zero variance
148      //  jerr = 10000 => maxval(vp) <= 0.0
149      //  jerr < 0 => non fatal error - partial output:
150      //      c Solutions for larger lamdas (1:(k - 1)) returned.
151      //  jerr = -k => convergence for kth lamda value not reached
152      //             after maxit(see above) iterations.
153      //          jerr = -10000 - k => number of non zero coefficients along path
154      //             exceeds nx(see above) at kth lamda value.
155      if (jerr != 0) {
156        if (jerr > 0 && jerr < 7777) throw new InvalidOperationException("glmnet: memory allocation error");
157        else if (jerr == 7777) throw new InvalidOperationException("glmnet: all used predictors have zero variance");
158        else if (jerr == 10000) throw new InvalidOperationException("glmnet: maxval(vp) <= 0.0");
159        else if (jerr < 0 && jerr > -1000) throw new InvalidOperationException(string.Format("glmnet: convergence for {0}th lamda value not reached after maxit iterations ", -jerr));
160        else if (jerr <= -10000) throw new InvalidOperationException(string.Format("glmnet: number of non zero coefficients along path exceeds number of maximally allowed variables (nx) at {0}th lamda value", -jerr - 10000));
161        else throw new InvalidOperationException(string.Format("glmnet: error {0}", jerr));
162      }
163
164
165      // resize arrays to the capacity that is acutally necessary for the results
166      Array.Resize(ref a0, lmu);
167      Array.Resize(ref nin, lmu);
168      Array.Resize(ref rsq, lmu);
169      Array.Resize(ref alm, lmu);
170    }
171
172    [DllImport("glmnet-x86.dll", EntryPoint = "elnet_", CallingConvention = CallingConvention.Cdecl)]
173    private static extern void elnet_x86(
174      ref int ka,
175      ref double parm,
176      ref int no,
177      ref int ni,
178      double[,] x,
179      double[] y,
180      double[] w,
181      int[] jd,
182      double[] vp,
183      double[,] cl,
184      ref int ne,
185      ref int nx,
186      ref int nlam,
187      ref double flmin,
188      double[] ulam,
189      ref double thr,
190      ref int isd,
191      ref int intr,
192      ref int maxit,
193      // outputs:
194      ref int lmu,
195      [Out] double[] a0,
196      [Out] double[,] ca,
197      [Out] int[] ia,
198      [Out] int[] nin,
199      [Out] double[] rsq,
200      [Out] double[] alm,
201      ref int nlp,
202      ref int jerr
203      );
204    [DllImport("glmnet-x64.dll", EntryPoint = "elnet_", CallingConvention = CallingConvention.Cdecl)]
205    private static extern void elnet_x64(
206      ref int ka,
207      ref double parm,
208      ref int no,
209      ref int ni,
210      double[,] x,
211      double[] y,
212      double[] w,
213      int[] jd,
214      double[] vp,
215      double[,] cl,
216      ref int ne,
217      ref int nx,
218      ref int nlam,
219      ref double flmin,
220      double[] ulam,
221      ref double thr,
222      ref int isd,
223      ref int intr,
224      ref int maxit,
225      // outputs:
226      ref int lmu,
227      [Out] double[] a0,
228      [Out] double[,] ca,
229      [Out] int[] ia,
230      [Out] int[] nin,
231      [Out] double[] rsq,
232      [Out] double[] alm,
233      ref int nlp,
234      ref int jerr
235      );
236
237
238    /// <summary>Wrapper for uncompress coefficient vector for particular solution in glmnet</summary>
239    /// (see: https://cran.r-project.org/web/packages/glmnet/index.html)
240    ///
241    /// call uncomp(ni, ca, ia, nin, a)
242    ///
243    /// input:
244    ///
245    ///    ni = total number of predictor variables
246    ///    ca(nx) = compressed coefficient values for the solution
247    /// ia(nx) = pointers to compressed coefficients
248    /// nin = number of compressed coefficients for the solution
249    ///
250    /// output:
251    ///
252    ///    a(ni) =  uncompressed coefficient vector
253    ///             referencing original variables
254    ///
255    public static void uncomp(int numVars, double[] ca, int[] ia, int nin, out double[] a) {
256      a = new double[numVars];
257      // load correct version of native dll based on process (x86/x64)
258      if (Environment.Is64BitProcess) {
259        uncomp_x64(ref numVars, ca, ia, ref nin, a);
260      } else {
261        uncomp_x86(ref numVars, ca, ia, ref nin, a);
262      }
263    }
264
265    [DllImport("glmnet-x86.dll", EntryPoint = "uncomp_", CallingConvention = CallingConvention.Cdecl)]
266    private static extern void uncomp_x86(ref int numVars, double[] ca, int[] ia, ref int nin, double[] a);
267    [DllImport("glmnet-x64.dll", EntryPoint = "uncomp_", CallingConvention = CallingConvention.Cdecl)]
268    private static extern void uncomp_x64(ref int numVars, double[] ca, int[] ia, ref int nin, double[] a);
269
270    public static void modval(double a0, double[] ca, int[] ia, int nin, int numObs, double[,] x, out double[] fn) {
271      fn = new double[numObs];
272      if (Environment.Is64BitProcess) {
273        modval_x64(ref a0, ca, ia, ref nin, ref numObs, x, fn);
274      } else {
275        modval_x86(ref a0, ca, ia, ref nin, ref numObs, x, fn);
276      }
277    }
278    // evaluate linear model from compressed coefficients and
279    // uncompressed predictor matrix:
280    //
281    // call modval(a0, ca, ia, nin, n, x, f);
282    //   c
283    //   c input:
284    //
285    //    a0 = intercept
286    //    ca(nx) = compressed coefficient values for a solution
287    // ia(nx) = pointers to compressed coefficients
288    // nin = number of compressed coefficients for solution
289    //    n = number of predictor vectors(observations)
290    // x(n, ni) = full(uncompressed) predictor matrix
291    //
292    // output:
293    //
294    //    f(n) = model predictions
295    [DllImport("glmnet-x86.dll", EntryPoint = "modval_", CallingConvention = CallingConvention.Cdecl)]
296    private static extern void modval_x86(ref double a0, double[] ca, int[] ia, ref int nin, ref int numObs, [Out] double[,] x, double[] fn);
297    [DllImport("glmnet-x64.dll", EntryPoint = "modval_", CallingConvention = CallingConvention.Cdecl)]
298    private static extern void modval_x64(ref double a0, double[] ca, int[] ia, ref int nin, ref int numObs, [Out] double[,] x, double[] fn);
299
300  }
301}
Note: See TracBrowser for help on using the repository browser.