1 | #region License Information
|
---|
2 | /* HeuristicLab
|
---|
3 | * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
4 | *
|
---|
5 | * This file is part of HeuristicLab.
|
---|
6 | *
|
---|
7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
8 | * it under the terms of the GNU General Public License as published by
|
---|
9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
10 | * (at your option) any later version.
|
---|
11 | *
|
---|
12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
15 | * GNU General Public License for more details.
|
---|
16 | *
|
---|
17 | * You should have received a copy of the GNU General Public License
|
---|
18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
19 | */
|
---|
20 | #endregion
|
---|
21 |
|
---|
22 | using System;
|
---|
23 | using System.Runtime.InteropServices;
|
---|
24 |
|
---|
25 | namespace HeuristicLab.Algorithms.DataAnalysis.Glmnet {
|
---|
26 | public static class Glmnet {
|
---|
27 | /// <summary>Wrapper for elnet procedure in glmnet library</summary>
|
---|
28 | /// (see: https://cran.r-project.org/web/packages/glmnet/index.html)
|
---|
29 | ///
|
---|
30 | /// ka = algorithm flag
|
---|
31 | /// ka=1 => covariance updating algorithm
|
---|
32 | /// ka=2 => naive algorithm
|
---|
33 | /// parm = penalty member index(0 <= parm <= 1)
|
---|
34 | /// = 0.0 => ridge
|
---|
35 | /// = 1.0 => lasso
|
---|
36 | /// no = number of observations
|
---|
37 | /// ni = number of predictor variables
|
---|
38 | /// y(no) = response vector(overwritten)
|
---|
39 | /// w(no)= observation weights(overwritten)
|
---|
40 | /// jd(jd(1)+1) = predictor variable deletion flag
|
---|
41 | /// jd(1) = 0 => use all variables
|
---|
42 | /// jd(1) != 0 => do not use variables jd(2)...jd(jd(1)+1)
|
---|
43 | /// vp(ni) = relative penalties for each predictor variable
|
---|
44 | /// vp(j) = 0 => jth variable unpenalized
|
---|
45 | /// cl(2, ni) = interval constraints on coefficient values (overwritten)
|
---|
46 | /// cl(1, j) = lower bound for jth coefficient value(<= 0.0)
|
---|
47 | /// cl(2, j) = upper bound for jth coefficient value(>= 0.0)
|
---|
48 | /// ne = maximum number of variables allowed to enter largest model
|
---|
49 | /// (stopping criterion)
|
---|
50 | /// nx = maximum number of variables allowed to enter all models
|
---|
51 | /// along path(memory allocation, nx > ne).
|
---|
52 | /// nlam = (maximum)number of lamda values
|
---|
53 | /// flmin = user control of lamda values(>=0)
|
---|
54 | /// flmin < 1.0 => minimum lamda = flmin * (largest lamda value)
|
---|
55 | /// flmin >= 1.0 => use supplied lamda values(see below)
|
---|
56 | /// ulam(nlam) = user supplied lamda values(ignored if flmin< 1.0)
|
---|
57 | /// thr = convergence threshold for each lamda solution.
|
---|
58 | /// iterations stop when the maximum reduction in the criterion value
|
---|
59 | /// as a result of each parameter update over a single pass
|
---|
60 | /// is less than thr times the null criterion value.
|
---|
61 | /// (suggested value, thr= 1.0e-5)
|
---|
62 | /// isd = predictor variable standarization flag:
|
---|
63 | /// isd = 0 => regression on original predictor variables
|
---|
64 | /// isd = 1 => regression on standardized predictor variables
|
---|
65 | /// Note: output solutions always reference original
|
---|
66 | /// variables locations and scales.
|
---|
67 | /// intr = intercept flag
|
---|
68 | /// intr = 0 / 1 => don't/do include intercept in model
|
---|
69 | /// maxit = maximum allowed number of passes over the data for all lambda
|
---|
70 | /// values (suggested values, maxit = 100000)
|
---|
71 | ///
|
---|
72 | /// output:
|
---|
73 | ///
|
---|
74 | /// lmu = actual number of lamda values(solutions)
|
---|
75 | /// a0(lmu) = intercept values for each solution
|
---|
76 | /// ca(nx, lmu) = compressed coefficient values for each solution
|
---|
77 | /// ia(nx) = pointers to compressed coefficients
|
---|
78 | /// nin(lmu) = number of compressed coefficients for each solution
|
---|
79 | /// rsq(lmu) = R**2 values for each solution
|
---|
80 | /// alm(lmu) = lamda values corresponding to each solution
|
---|
81 | /// nlp = actual number of passes over the data for all lamda values
|
---|
82 | /// jerr = error flag:
|
---|
83 | /// jerr = 0 => no error
|
---|
84 | /// jerr > 0 => fatal error - no output returned
|
---|
85 | /// jerr< 7777 => memory allocation error
|
---|
86 | /// jerr = 7777 => all used predictors have zero variance
|
---|
87 | /// jerr = 10000 => maxval(vp) <= 0.0
|
---|
88 | /// jerr< 0 => non fatal error - partial output:
|
---|
89 | /// Solutions for larger lamdas (1:(k-1)) returned.
|
---|
90 | /// jerr = -k => convergence for kth lamda value not reached
|
---|
91 | /// after maxit(see above) iterations.
|
---|
92 | /// jerr = -10000 - k => number of non zero coefficients along path
|
---|
93 | /// exceeds nx(see above) at kth lamda value.
|
---|
94 | ///
|
---|
95 | public static void elnet(
|
---|
96 | int ka,
|
---|
97 | double parm,
|
---|
98 | int no,
|
---|
99 | int ni,
|
---|
100 | double[,] x,
|
---|
101 | double[] y,
|
---|
102 | double[] w,
|
---|
103 | int[] jd,
|
---|
104 | double[] vp,
|
---|
105 | double[,] cl,
|
---|
106 | int ne,
|
---|
107 | int nx,
|
---|
108 | int nlam,
|
---|
109 | double flmin,
|
---|
110 | double[] ulam,
|
---|
111 | double thr,
|
---|
112 | int isd,
|
---|
113 | int intr,
|
---|
114 | int maxit,
|
---|
115 | // outputs
|
---|
116 | out int lmu,
|
---|
117 | out double[] a0,
|
---|
118 | out double[,] ca,
|
---|
119 | out int[] ia,
|
---|
120 | out int[] nin,
|
---|
121 | out double[] rsq,
|
---|
122 | out double[] alm,
|
---|
123 | out int nlp,
|
---|
124 | out int jerr
|
---|
125 | ) {
|
---|
126 | // initialize output values and allocate arrays big enough
|
---|
127 | a0 = new double[nlam];
|
---|
128 | ca = new double[nlam, nx];
|
---|
129 | ia = new int[nx];
|
---|
130 | nin = new int[nlam];
|
---|
131 | rsq = new double[nlam];
|
---|
132 | alm = new double[nlam];
|
---|
133 | nlp = -1;
|
---|
134 | jerr = -1;
|
---|
135 | lmu = -1;
|
---|
136 |
|
---|
137 | // load correct version of native dll based on process (x86/x64)
|
---|
138 | if (Environment.Is64BitProcess) {
|
---|
139 | elnet_x64(ref ka, ref parm, ref no, ref ni, x, y, w, jd, vp, cl, ref ne, ref ni, ref nlam, ref flmin, ulam, ref thr, ref isd, ref intr, ref maxit, ref lmu, a0, ca, ia, nin, rsq, alm, ref nlp, ref jerr);
|
---|
140 | } else {
|
---|
141 | elnet_x86(ref ka, ref parm, ref no, ref ni, x, y, w, jd, vp, cl, ref ne, ref ni, ref nlam, ref flmin, ulam, ref thr, ref isd, ref intr, ref maxit, ref lmu, a0, ca, ia, nin, rsq, alm, ref nlp, ref jerr);
|
---|
142 | }
|
---|
143 | // jerr = error flag:
|
---|
144 | // jerr = 0 => no error
|
---|
145 | // jerr > 0 => fatal error -no output returned
|
---|
146 | // jerr < 7777 => memory allocation error
|
---|
147 | // jerr = 7777 => all used predictors have zero variance
|
---|
148 | // jerr = 10000 => maxval(vp) <= 0.0
|
---|
149 | // jerr < 0 => non fatal error - partial output:
|
---|
150 | // c Solutions for larger lamdas (1:(k - 1)) returned.
|
---|
151 | // jerr = -k => convergence for kth lamda value not reached
|
---|
152 | // after maxit(see above) iterations.
|
---|
153 | // jerr = -10000 - k => number of non zero coefficients along path
|
---|
154 | // exceeds nx(see above) at kth lamda value.
|
---|
155 | if (jerr != 0) {
|
---|
156 | if (jerr > 0 && jerr < 7777) throw new InvalidOperationException("glmnet: memory allocation error");
|
---|
157 | else if (jerr == 7777) throw new InvalidOperationException("glmnet: all used predictors have zero variance");
|
---|
158 | else if (jerr == 10000) throw new InvalidOperationException("glmnet: maxval(vp) <= 0.0");
|
---|
159 | else if (jerr < 0 && jerr > -1000) throw new InvalidOperationException(string.Format("glmnet: convergence for {0}th lamda value not reached after maxit iterations ", -jerr));
|
---|
160 | else if (jerr <= -10000) throw new InvalidOperationException(string.Format("glmnet: number of non zero coefficients along path exceeds number of maximally allowed variables (nx) at {0}th lamda value", -jerr - 10000));
|
---|
161 | else throw new InvalidOperationException(string.Format("glmnet: error {0}", jerr));
|
---|
162 | }
|
---|
163 |
|
---|
164 |
|
---|
165 | // resize arrays to the capacity that is acutally necessary for the results
|
---|
166 | Array.Resize(ref a0, lmu);
|
---|
167 | Array.Resize(ref nin, lmu);
|
---|
168 | Array.Resize(ref rsq, lmu);
|
---|
169 | Array.Resize(ref alm, lmu);
|
---|
170 | }
|
---|
171 |
|
---|
172 | [DllImport("glmnet-x86.dll", EntryPoint = "elnet_", CallingConvention = CallingConvention.Cdecl)]
|
---|
173 | private static extern void elnet_x86(
|
---|
174 | ref int ka,
|
---|
175 | ref double parm,
|
---|
176 | ref int no,
|
---|
177 | ref int ni,
|
---|
178 | double[,] x,
|
---|
179 | double[] y,
|
---|
180 | double[] w,
|
---|
181 | int[] jd,
|
---|
182 | double[] vp,
|
---|
183 | double[,] cl,
|
---|
184 | ref int ne,
|
---|
185 | ref int nx,
|
---|
186 | ref int nlam,
|
---|
187 | ref double flmin,
|
---|
188 | double[] ulam,
|
---|
189 | ref double thr,
|
---|
190 | ref int isd,
|
---|
191 | ref int intr,
|
---|
192 | ref int maxit,
|
---|
193 | // outputs:
|
---|
194 | ref int lmu,
|
---|
195 | [Out] double[] a0,
|
---|
196 | [Out] double[,] ca,
|
---|
197 | [Out] int[] ia,
|
---|
198 | [Out] int[] nin,
|
---|
199 | [Out] double[] rsq,
|
---|
200 | [Out] double[] alm,
|
---|
201 | ref int nlp,
|
---|
202 | ref int jerr
|
---|
203 | );
|
---|
204 | [DllImport("glmnet-x64.dll", EntryPoint = "elnet_", CallingConvention = CallingConvention.Cdecl)]
|
---|
205 | private static extern void elnet_x64(
|
---|
206 | ref int ka,
|
---|
207 | ref double parm,
|
---|
208 | ref int no,
|
---|
209 | ref int ni,
|
---|
210 | double[,] x,
|
---|
211 | double[] y,
|
---|
212 | double[] w,
|
---|
213 | int[] jd,
|
---|
214 | double[] vp,
|
---|
215 | double[,] cl,
|
---|
216 | ref int ne,
|
---|
217 | ref int nx,
|
---|
218 | ref int nlam,
|
---|
219 | ref double flmin,
|
---|
220 | double[] ulam,
|
---|
221 | ref double thr,
|
---|
222 | ref int isd,
|
---|
223 | ref int intr,
|
---|
224 | ref int maxit,
|
---|
225 | // outputs:
|
---|
226 | ref int lmu,
|
---|
227 | [Out] double[] a0,
|
---|
228 | [Out] double[,] ca,
|
---|
229 | [Out] int[] ia,
|
---|
230 | [Out] int[] nin,
|
---|
231 | [Out] double[] rsq,
|
---|
232 | [Out] double[] alm,
|
---|
233 | ref int nlp,
|
---|
234 | ref int jerr
|
---|
235 | );
|
---|
236 |
|
---|
237 |
|
---|
238 | /// <summary>Wrapper for uncompress coefficient vector for particular solution in glmnet</summary>
|
---|
239 | /// (see: https://cran.r-project.org/web/packages/glmnet/index.html)
|
---|
240 | ///
|
---|
241 | /// call uncomp(ni, ca, ia, nin, a)
|
---|
242 | ///
|
---|
243 | /// input:
|
---|
244 | ///
|
---|
245 | /// ni = total number of predictor variables
|
---|
246 | /// ca(nx) = compressed coefficient values for the solution
|
---|
247 | /// ia(nx) = pointers to compressed coefficients
|
---|
248 | /// nin = number of compressed coefficients for the solution
|
---|
249 | ///
|
---|
250 | /// output:
|
---|
251 | ///
|
---|
252 | /// a(ni) = uncompressed coefficient vector
|
---|
253 | /// referencing original variables
|
---|
254 | ///
|
---|
255 | public static void uncomp(int numVars, double[] ca, int[] ia, int nin, out double[] a) {
|
---|
256 | a = new double[numVars];
|
---|
257 | // load correct version of native dll based on process (x86/x64)
|
---|
258 | if (Environment.Is64BitProcess) {
|
---|
259 | uncomp_x64(ref numVars, ca, ia, ref nin, a);
|
---|
260 | } else {
|
---|
261 | uncomp_x86(ref numVars, ca, ia, ref nin, a);
|
---|
262 | }
|
---|
263 | }
|
---|
264 |
|
---|
265 | [DllImport("glmnet-x86.dll", EntryPoint = "uncomp_", CallingConvention = CallingConvention.Cdecl)]
|
---|
266 | private static extern void uncomp_x86(ref int numVars, double[] ca, int[] ia, ref int nin, double[] a);
|
---|
267 | [DllImport("glmnet-x64.dll", EntryPoint = "uncomp_", CallingConvention = CallingConvention.Cdecl)]
|
---|
268 | private static extern void uncomp_x64(ref int numVars, double[] ca, int[] ia, ref int nin, double[] a);
|
---|
269 |
|
---|
270 | public static void modval(double a0, double[] ca, int[] ia, int nin, int numObs, double[,] x, out double[] fn) {
|
---|
271 | fn = new double[numObs];
|
---|
272 | if (Environment.Is64BitProcess) {
|
---|
273 | modval_x64(ref a0, ca, ia, ref nin, ref numObs, x, fn);
|
---|
274 | } else {
|
---|
275 | modval_x86(ref a0, ca, ia, ref nin, ref numObs, x, fn);
|
---|
276 | }
|
---|
277 | }
|
---|
278 | // evaluate linear model from compressed coefficients and
|
---|
279 | // uncompressed predictor matrix:
|
---|
280 | //
|
---|
281 | // call modval(a0, ca, ia, nin, n, x, f);
|
---|
282 | // c
|
---|
283 | // c input:
|
---|
284 | //
|
---|
285 | // a0 = intercept
|
---|
286 | // ca(nx) = compressed coefficient values for a solution
|
---|
287 | // ia(nx) = pointers to compressed coefficients
|
---|
288 | // nin = number of compressed coefficients for solution
|
---|
289 | // n = number of predictor vectors(observations)
|
---|
290 | // x(n, ni) = full(uncompressed) predictor matrix
|
---|
291 | //
|
---|
292 | // output:
|
---|
293 | //
|
---|
294 | // f(n) = model predictions
|
---|
295 | [DllImport("glmnet-x86.dll", EntryPoint = "modval_", CallingConvention = CallingConvention.Cdecl)]
|
---|
296 | private static extern void modval_x86(ref double a0, double[] ca, int[] ia, ref int nin, ref int numObs, [Out] double[,] x, double[] fn);
|
---|
297 | [DllImport("glmnet-x64.dll", EntryPoint = "modval_", CallingConvention = CallingConvention.Cdecl)]
|
---|
298 | private static extern void modval_x64(ref double a0, double[] ca, int[] ia, ref int nin, ref int numObs, [Out] double[,] x, double[] fn);
|
---|
299 |
|
---|
300 | }
|
---|
301 | }
|
---|