Context Navigation

source: branches/2695_dataset-ids/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/Glmnet.cs @ 16724

Visit:

Last change on this file since 16724 was 14370, checked in by mkommend, 8 years ago

#745: Addressed some easy to implement review comments:

missing license header
renaming of variables
extracted DLLImports into a separate file
corrected plugin dependencies

File size: 12.1 KB

Line
1	#region License Information
2	/* HeuristicLab
3	* Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4	*
5	* This file is part of HeuristicLab.
6	*
7	* HeuristicLab is free software: you can redistribute it and/or modify
8	* it under the terms of the GNU General Public License as published by
9	* the Free Software Foundation, either version 3 of the License, or
10	* (at your option) any later version.
11	*
12	* HeuristicLab is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	* GNU General Public License for more details.
16	*
17	* You should have received a copy of the GNU General Public License
18	* along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19	*/
20	#endregion
21
22	using System;
23	using System.Runtime.InteropServices;
24
25	namespace HeuristicLab.Algorithms.DataAnalysis.Glmnet {
26	public static class Glmnet {
27	/// <summary>Wrapper for elnet procedure in glmnet library</summary>
28	/// (see: https://cran.r-project.org/web/packages/glmnet/index.html)
29	///
30	/// ka = algorithm flag
31	/// ka=1 => covariance updating algorithm
32	/// ka=2 => naive algorithm
33	/// parm = penalty member index(0<= parm <= 1)
34	/// = 0.0 => ridge
35	/// = 1.0 => lasso
36	/// no = number of observations
37	/// ni = number of predictor variables
38	/// y(no) = response vector(overwritten)
39	/// w(no)= observation weights(overwritten)
40	/// jd(jd(1)+1) = predictor variable deletion flag
41	/// jd(1) = 0 => use all variables
42	/// jd(1) != 0 => do not use variables jd(2)...jd(jd(1)+1)
43	/// vp(ni) = relative penalties for each predictor variable
44	/// vp(j) = 0 => jth variable unpenalized
45	/// cl(2, ni) = interval constraints on coefficient values(overwritten)
46	/// cl(1, j) = lower bound for jth coefficient value(<= 0.0)
47	/// cl(2, j) = upper bound for jth coefficient value(>= 0.0)
48	/// ne = maximum number of variables allowed to enter largest model
49	/// (stopping criterion)
50	/// nx = maximum number of variables allowed to enter all modesl
51	/// along path(memory allocation, nx > ne).
52	/// nlam = (maximum)number of lamda values
53	/// flmin = user control of lamda values(>=0)
54	/// flmin< 1.0 => minimum lamda = flmin * (largest lamda value)
55	/// flmin >= 1.0 => use supplied lamda values(see below)
56	/// ulam(nlam) = user supplied lamda values(ignored if flmin< 1.0)
57	/// thr = convergence threshold for each lamda solution.
58	/// iterations stop when the maximum reduction in the criterion value
59	/// as a result of each parameter update over a single pass
60	/// is less than thr times the null criterion value.
61	/// (suggested value, thr= 1.0e-5)
62	/// isd = predictor variable standarization flag:
63	/// isd = 0 => regression on original predictor variables
64	/// isd = 1 => regression on standardized predictor variables
65	/// Note: output solutions always reference original
66	/// variables locations and scales.
67	/// intr = intercept flag
68	/// intr = 0 / 1 => don't/do include intercept in model
69	/// maxit = maximum allowed number of passes over the data for all lambda
70	/// values (suggested values, maxit = 100000)
71	///
72	/// output:
73	///
74	/// lmu = actual number of lamda values(solutions)
75	/// a0(lmu) = intercept values for each solution
76	/// ca(nx, lmu) = compressed coefficient values for each solution
77	/// ia(nx) = pointers to compressed coefficients
78	/// nin(lmu) = number of compressed coefficients for each solution
79	/// rsq(lmu) = R**2 values for each solution
80	/// alm(lmu) = lamda values corresponding to each solution
81	/// nlp = actual number of passes over the data for all lamda values
82	/// jerr = error flag:
83	/// jerr = 0 => no error
84	/// jerr > 0 => fatal error - no output returned
85	/// jerr< 7777 => memory allocation error
86	/// jerr = 7777 => all used predictors have zero variance
87	/// jerr = 10000 => maxval(vp) <= 0.0
88	/// jerr< 0 => non fatal error - partial output:
89	/// Solutions for larger lamdas (1:(k-1)) returned.
90	/// jerr = -k => convergence for kth lamda value not reached
91	/// after maxit(see above) iterations.
92	/// jerr = -10000 - k => number of non zero coefficients along path
93	/// exceeds nx(see above) at kth lamda value.
94	///
95	public static void elnet(
96	int ka,
97	double parm,
98	int no,
99	int ni,
100	double[,] x,
101	double[] y,
102	double[] w,
103	int[] jd,
104	double[] vp,
105	double[,] cl,
106	int ne,
107	int nx,
108	int nlam,
109	double flmin,
110	double[] ulam,
111	double thr,
112	int isd,
113	int intr,
114	int maxit,
115	// outputs
116	out int lmu,
117	out double[] a0,
118	out double[,] ca,
119	out int[] ia,
120	out int[] nin,
121	out double[] rsq,
122	out double[] alm,
123	out int nlp,
124	out int jerr
125	) {
126	// initialize output values and allocate arrays big enough
127	a0 = new double[nlam];
128	ca = new double[nlam, nx];
129	ia = new int[nx];
130	nin = new int[nlam];
131	rsq = new double[nlam];
132	alm = new double[nlam];
133	nlp = -1;
134	jerr = -1;
135	lmu = -1;
136
137	// load correct version of native dll based on process (x86/x64)
138	if (Environment.Is64BitProcess) {
139	elnet_x64(ref ka, ref parm, ref no, ref ni, x, y, w, jd, vp, cl, ref ne, ref ni, ref nlam, ref flmin, ulam, ref thr, ref isd, ref intr, ref maxit, ref lmu, a0, ca, ia, nin, rsq, alm, ref nlp, ref jerr);
140	} else {
141	elnet_x86(ref ka, ref parm, ref no, ref ni, x, y, w, jd, vp, cl, ref ne, ref ni, ref nlam, ref flmin, ulam, ref thr, ref isd, ref intr, ref maxit, ref lmu, a0, ca, ia, nin, rsq, alm, ref nlp, ref jerr);
142	}
143	// jerr = error flag:
144	// jerr = 0 => no error
145	// jerr > 0 => fatal error -no output returned
146	// jerr < 7777 => memory allocation error
147	// jerr = 7777 => all used predictors have zero variance
148	// jerr = 10000 => maxval(vp) <= 0.0
149	// jerr < 0 => non fatal error - partial output:
150	// c Solutions for larger lamdas (1:(k - 1)) returned.
151	// jerr = -k => convergence for kth lamda value not reached
152	// after maxit(see above) iterations.
153	// jerr = -10000 - k => number of non zero coefficients along path
154	// exceeds nx(see above) at kth lamda value.
155	if (jerr != 0) {
156	if (jerr > 0 && jerr < 7777) throw new InvalidOperationException("glmnet: memory allocation error");
157	else if (jerr == 7777) throw new InvalidOperationException("glmnet: all used predictors have zero variance");
158	else if (jerr == 10000) throw new InvalidOperationException("glmnet: maxval(vp) <= 0.0");
159	else if (jerr < 0 && jerr > -1000) throw new InvalidOperationException(string.Format("glmnet: convergence for {0}th lamda value not reached after maxit iterations ", -jerr));
160	else if (jerr <= -10000) throw new InvalidOperationException(string.Format("glmnet: number of non zero coefficients along path exceeds number of maximally allowed variables (nx) at {0}th lamda value", -jerr - 10000));
161	else throw new InvalidOperationException(string.Format("glmnet: error {0}", jerr));
162	}
163
164
165	// resize arrays to the capacity that is acutally necessary for the results
166	Array.Resize(ref a0, lmu);
167	Array.Resize(ref nin, lmu);
168	Array.Resize(ref rsq, lmu);
169	Array.Resize(ref alm, lmu);
170	}
171
172	[DllImport("glmnet-x86.dll", EntryPoint = "elnet_", CallingConvention = CallingConvention.Cdecl)]
173	private static extern void elnet_x86(
174	ref int ka,
175	ref double parm,
176	ref int no,
177	ref int ni,
178	double[,] x,
179	double[] y,
180	double[] w,
181	int[] jd,
182	double[] vp,
183	double[,] cl,
184	ref int ne,
185	ref int nx,
186	ref int nlam,
187	ref double flmin,
188	double[] ulam,
189	ref double thr,
190	ref int isd,
191	ref int intr,
192	ref int maxit,
193	// outputs:
194	ref int lmu,
195	[Out] double[] a0,
196	[Out] double[,] ca,
197	[Out] int[] ia,
198	[Out] int[] nin,
199	[Out] double[] rsq,
200	[Out] double[] alm,
201	ref int nlp,
202	ref int jerr
203	);
204	[DllImport("glmnet-x64.dll", EntryPoint = "elnet_", CallingConvention = CallingConvention.Cdecl)]
205	private static extern void elnet_x64(
206	ref int ka,
207	ref double parm,
208	ref int no,
209	ref int ni,
210	double[,] x,
211	double[] y,
212	double[] w,
213	int[] jd,
214	double[] vp,
215	double[,] cl,
216	ref int ne,
217	ref int nx,
218	ref int nlam,
219	ref double flmin,
220	double[] ulam,
221	ref double thr,
222	ref int isd,
223	ref int intr,
224	ref int maxit,
225	// outputs:
226	ref int lmu,
227	[Out] double[] a0,
228	[Out] double[,] ca,
229	[Out] int[] ia,
230	[Out] int[] nin,
231	[Out] double[] rsq,
232	[Out] double[] alm,
233	ref int nlp,
234	ref int jerr
235	);
236
237
238	/// <summary>Wrapper for uncompress coefficient vector for particular solution in glmnet</summary>
239	/// (see: https://cran.r-project.org/web/packages/glmnet/index.html)
240	///
241	/// call uncomp(ni, ca, ia, nin, a)
242	///
243	/// input:
244	///
245	/// ni = total number of predictor variables
246	/// ca(nx) = compressed coefficient values for the solution
247	/// ia(nx) = pointers to compressed coefficients
248	/// nin = number of compressed coefficients for the solution
249	///
250	/// output:
251	///
252	/// a(ni) = uncompressed coefficient vector
253	/// referencing original variables
254	///
255	public static void uncomp(int numVars, double[] ca, int[] ia, int nin, out double[] a) {
256	a = new double[numVars];
257	// load correct version of native dll based on process (x86/x64)
258	if (Environment.Is64BitProcess) {
259	uncomp_x64(ref numVars, ca, ia, ref nin, a);
260	} else {
261	uncomp_x86(ref numVars, ca, ia, ref nin, a);
262	}
263	}
264
265	[DllImport("glmnet-x86.dll", EntryPoint = "uncomp_", CallingConvention = CallingConvention.Cdecl)]
266	private static extern void uncomp_x86(ref int numVars, double[] ca, int[] ia, ref int nin, double[] a);
267	[DllImport("glmnet-x64.dll", EntryPoint = "uncomp_", CallingConvention = CallingConvention.Cdecl)]
268	private static extern void uncomp_x64(ref int numVars, double[] ca, int[] ia, ref int nin, double[] a);
269
270	public static void modval(double a0, double[] ca, int[] ia, int nin, int numObs, double[,] x, out double[] fn) {
271	fn = new double[numObs];
272	if (Environment.Is64BitProcess) {
273	modval_x64(ref a0, ca, ia, ref nin, ref numObs, x, fn);
274	} else {
275	modval_x86(ref a0, ca, ia, ref nin, ref numObs, x, fn);
276	}
277	}
278	// evaluate linear model from compressed coefficients and
279	// uncompressed predictor matrix:
280	//
281	// call modval(a0, ca, ia, nin, n, x, f);
282	// c
283	// c input:
284	//
285	// a0 = intercept
286	// ca(nx) = compressed coefficient values for a solution
287	// ia(nx) = pointers to compressed coefficients
288	// nin = number of compressed coefficients for solution
289	// n = number of predictor vectors(observations)
290	// x(n, ni) = full(uncompressed) predictor matrix
291	//
292	// output:
293	//
294	// f(n) = model predictions
295	[DllImport("glmnet-x86.dll", EntryPoint = "modval_", CallingConvention = CallingConvention.Cdecl)]
296	private static extern void modval_x86(ref double a0, double[] ca, int[] ia, ref int nin, ref int numObs, [Out] double[,] x, double[] fn);
297	[DllImport("glmnet-x64.dll", EntryPoint = "modval_", CallingConvention = CallingConvention.Cdecl)]
298	private static extern void modval_x64(ref double a0, double[] ca, int[] ia, ref int nin, ref int numObs, [Out] double[,] x, double[] fn);
299
300	}
301	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences