Context Navigation

Glmnet.cs @ 17328

Visit:

Last change on this file since 17328 was 16057, checked in by jkarder, 6 years ago

merged [15377-15681/trunk/sources] into branch
merged [15683-16056/trunk] into branch

File size: 12.1 KB

Rev	Line
[14370]	1	#region License Information
	2	/* HeuristicLab
[16057]	3	* Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[14370]	4	*
	5	* This file is part of HeuristicLab.
	6	*
	7	* HeuristicLab is free software: you can redistribute it and/or modify
	8	* it under the terms of the GNU General Public License as published by
	9	* the Free Software Foundation, either version 3 of the License, or
	10	* (at your option) any later version.
	11	*
	12	* HeuristicLab is distributed in the hope that it will be useful,
	13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	* GNU General Public License for more details.
	16	*
	17	* You should have received a copy of the GNU General Public License
	18	* along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
	19	*/
	20	#endregion
	21
	22	using System;
	23	using System.Runtime.InteropServices;
	24
	25	namespace HeuristicLab.Algorithms.DataAnalysis.Glmnet {
	26	public static class Glmnet {
	27	/// <summary>Wrapper for elnet procedure in glmnet library</summary>
	28	/// (see: https://cran.r-project.org/web/packages/glmnet/index.html)
	29	///
	30	/// ka = algorithm flag
	31	/// ka=1 => covariance updating algorithm
	32	/// ka=2 => naive algorithm
	33	/// parm = penalty member index(0<= parm <= 1)
	34	/// = 0.0 => ridge
	35	/// = 1.0 => lasso
	36	/// no = number of observations
	37	/// ni = number of predictor variables
	38	/// y(no) = response vector(overwritten)
	39	/// w(no)= observation weights(overwritten)
	40	/// jd(jd(1)+1) = predictor variable deletion flag
	41	/// jd(1) = 0 => use all variables
	42	/// jd(1) != 0 => do not use variables jd(2)...jd(jd(1)+1)
	43	/// vp(ni) = relative penalties for each predictor variable
	44	/// vp(j) = 0 => jth variable unpenalized
	45	/// cl(2, ni) = interval constraints on coefficient values(overwritten)
	46	/// cl(1, j) = lower bound for jth coefficient value(<= 0.0)
	47	/// cl(2, j) = upper bound for jth coefficient value(>= 0.0)
	48	/// ne = maximum number of variables allowed to enter largest model
	49	/// (stopping criterion)
	50	/// nx = maximum number of variables allowed to enter all modesl
	51	/// along path(memory allocation, nx > ne).
	52	/// nlam = (maximum)number of lamda values
	53	/// flmin = user control of lamda values(>=0)
	54	/// flmin< 1.0 => minimum lamda = flmin * (largest lamda value)
	55	/// flmin >= 1.0 => use supplied lamda values(see below)
	56	/// ulam(nlam) = user supplied lamda values(ignored if flmin< 1.0)
	57	/// thr = convergence threshold for each lamda solution.
	58	/// iterations stop when the maximum reduction in the criterion value
	59	/// as a result of each parameter update over a single pass
	60	/// is less than thr times the null criterion value.
	61	/// (suggested value, thr= 1.0e-5)
	62	/// isd = predictor variable standarization flag:
	63	/// isd = 0 => regression on original predictor variables
	64	/// isd = 1 => regression on standardized predictor variables
	65	/// Note: output solutions always reference original
	66	/// variables locations and scales.
	67	/// intr = intercept flag
	68	/// intr = 0 / 1 => don't/do include intercept in model
	69	/// maxit = maximum allowed number of passes over the data for all lambda
	70	/// values (suggested values, maxit = 100000)
	71	///
	72	/// output:
	73	///
	74	/// lmu = actual number of lamda values(solutions)
	75	/// a0(lmu) = intercept values for each solution
	76	/// ca(nx, lmu) = compressed coefficient values for each solution
	77	/// ia(nx) = pointers to compressed coefficients
	78	/// nin(lmu) = number of compressed coefficients for each solution
	79	/// rsq(lmu) = R**2 values for each solution
	80	/// alm(lmu) = lamda values corresponding to each solution
	81	/// nlp = actual number of passes over the data for all lamda values
	82	/// jerr = error flag:
	83	/// jerr = 0 => no error
	84	/// jerr > 0 => fatal error - no output returned
	85	/// jerr< 7777 => memory allocation error
	86	/// jerr = 7777 => all used predictors have zero variance
	87	/// jerr = 10000 => maxval(vp) <= 0.0
	88	/// jerr< 0 => non fatal error - partial output:
	89	/// Solutions for larger lamdas (1:(k-1)) returned.
	90	/// jerr = -k => convergence for kth lamda value not reached
	91	/// after maxit(see above) iterations.
	92	/// jerr = -10000 - k => number of non zero coefficients along path
	93	/// exceeds nx(see above) at kth lamda value.
	94	///
	95	public static void elnet(
	96	int ka,
	97	double parm,
	98	int no,
	99	int ni,
	100	double[,] x,
	101	double[] y,
	102	double[] w,
	103	int[] jd,
	104	double[] vp,
	105	double[,] cl,
	106	int ne,
	107	int nx,
	108	int nlam,
	109	double flmin,
	110	double[] ulam,
	111	double thr,
	112	int isd,
	113	int intr,
	114	int maxit,
	115	// outputs
	116	out int lmu,
	117	out double[] a0,
	118	out double[,] ca,
	119	out int[] ia,
	120	out int[] nin,
	121	out double[] rsq,
	122	out double[] alm,
	123	out int nlp,
	124	out int jerr
	125	) {
	126	// initialize output values and allocate arrays big enough
	127	a0 = new double[nlam];
	128	ca = new double[nlam, nx];
	129	ia = new int[nx];
	130	nin = new int[nlam];
	131	rsq = new double[nlam];
	132	alm = new double[nlam];
	133	nlp = -1;
	134	jerr = -1;
	135	lmu = -1;
	136
	137	// load correct version of native dll based on process (x86/x64)
	138	if (Environment.Is64BitProcess) {
	139	elnet_x64(ref ka, ref parm, ref no, ref ni, x, y, w, jd, vp, cl, ref ne, ref ni, ref nlam, ref flmin, ulam, ref thr, ref isd, ref intr, ref maxit, ref lmu, a0, ca, ia, nin, rsq, alm, ref nlp, ref jerr);
	140	} else {
	141	elnet_x86(ref ka, ref parm, ref no, ref ni, x, y, w, jd, vp, cl, ref ne, ref ni, ref nlam, ref flmin, ulam, ref thr, ref isd, ref intr, ref maxit, ref lmu, a0, ca, ia, nin, rsq, alm, ref nlp, ref jerr);
	142	}
	143	// jerr = error flag:
	144	// jerr = 0 => no error
	145	// jerr > 0 => fatal error -no output returned
	146	// jerr < 7777 => memory allocation error
	147	// jerr = 7777 => all used predictors have zero variance
	148	// jerr = 10000 => maxval(vp) <= 0.0
	149	// jerr < 0 => non fatal error - partial output:
	150	// c Solutions for larger lamdas (1:(k - 1)) returned.
	151	// jerr = -k => convergence for kth lamda value not reached
	152	// after maxit(see above) iterations.
	153	// jerr = -10000 - k => number of non zero coefficients along path
	154	// exceeds nx(see above) at kth lamda value.
	155	if (jerr != 0) {
	156	if (jerr > 0 && jerr < 7777) throw new InvalidOperationException("glmnet: memory allocation error");
	157	else if (jerr == 7777) throw new InvalidOperationException("glmnet: all used predictors have zero variance");
	158	else if (jerr == 10000) throw new InvalidOperationException("glmnet: maxval(vp) <= 0.0");
	159	else if (jerr < 0 && jerr > -1000) throw new InvalidOperationException(string.Format("glmnet: convergence for {0}th lamda value not reached after maxit iterations ", -jerr));
	160	else if (jerr <= -10000) throw new InvalidOperationException(string.Format("glmnet: number of non zero coefficients along path exceeds number of maximally allowed variables (nx) at {0}th lamda value", -jerr - 10000));
	161	else throw new InvalidOperationException(string.Format("glmnet: error {0}", jerr));
	162	}
	163
	164
	165	// resize arrays to the capacity that is acutally necessary for the results
	166	Array.Resize(ref a0, lmu);
	167	Array.Resize(ref nin, lmu);
	168	Array.Resize(ref rsq, lmu);
	169	Array.Resize(ref alm, lmu);
	170	}
	171
	172	[DllImport("glmnet-x86.dll", EntryPoint = "elnet_", CallingConvention = CallingConvention.Cdecl)]
	173	private static extern void elnet_x86(
	174	ref int ka,
	175	ref double parm,
	176	ref int no,
	177	ref int ni,
	178	double[,] x,
	179	double[] y,
	180	double[] w,
	181	int[] jd,
	182	double[] vp,
	183	double[,] cl,
	184	ref int ne,
	185	ref int nx,
	186	ref int nlam,
	187	ref double flmin,
	188	double[] ulam,
	189	ref double thr,
	190	ref int isd,
	191	ref int intr,
	192	ref int maxit,
	193	// outputs:
	194	ref int lmu,
	195	[Out] double[] a0,
	196	[Out] double[,] ca,
	197	[Out] int[] ia,
	198	[Out] int[] nin,
	199	[Out] double[] rsq,
	200	[Out] double[] alm,
	201	ref int nlp,
	202	ref int jerr
	203	);
	204	[DllImport("glmnet-x64.dll", EntryPoint = "elnet_", CallingConvention = CallingConvention.Cdecl)]
	205	private static extern void elnet_x64(
	206	ref int ka,
	207	ref double parm,
	208	ref int no,
	209	ref int ni,
	210	double[,] x,
	211	double[] y,
	212	double[] w,
	213	int[] jd,
	214	double[] vp,
	215	double[,] cl,
	216	ref int ne,
	217	ref int nx,
	218	ref int nlam,
	219	ref double flmin,
	220	double[] ulam,
	221	ref double thr,
	222	ref int isd,
	223	ref int intr,
	224	ref int maxit,
	225	// outputs:
	226	ref int lmu,
	227	[Out] double[] a0,
	228	[Out] double[,] ca,
	229	[Out] int[] ia,
	230	[Out] int[] nin,
	231	[Out] double[] rsq,
	232	[Out] double[] alm,
	233	ref int nlp,
	234	ref int jerr
	235	);
	236
	237
	238	/// <summary>Wrapper for uncompress coefficient vector for particular solution in glmnet</summary>
	239	/// (see: https://cran.r-project.org/web/packages/glmnet/index.html)
	240	///
	241	/// call uncomp(ni, ca, ia, nin, a)
	242	///
	243	/// input:
	244	///
	245	/// ni = total number of predictor variables
	246	/// ca(nx) = compressed coefficient values for the solution
	247	/// ia(nx) = pointers to compressed coefficients
	248	/// nin = number of compressed coefficients for the solution
	249	///
	250	/// output:
	251	///
	252	/// a(ni) = uncompressed coefficient vector
	253	/// referencing original variables
	254	///
	255	public static void uncomp(int numVars, double[] ca, int[] ia, int nin, out double[] a) {
	256	a = new double[numVars];
	257	// load correct version of native dll based on process (x86/x64)
	258	if (Environment.Is64BitProcess) {
	259	uncomp_x64(ref numVars, ca, ia, ref nin, a);
	260	} else {
	261	uncomp_x86(ref numVars, ca, ia, ref nin, a);
	262	}
	263	}
	264
	265	[DllImport("glmnet-x86.dll", EntryPoint = "uncomp_", CallingConvention = CallingConvention.Cdecl)]
	266	private static extern void uncomp_x86(ref int numVars, double[] ca, int[] ia, ref int nin, double[] a);
	267	[DllImport("glmnet-x64.dll", EntryPoint = "uncomp_", CallingConvention = CallingConvention.Cdecl)]
	268	private static extern void uncomp_x64(ref int numVars, double[] ca, int[] ia, ref int nin, double[] a);
	269
	270	public static void modval(double a0, double[] ca, int[] ia, int nin, int numObs, double[,] x, out double[] fn) {
	271	fn = new double[numObs];
	272	if (Environment.Is64BitProcess) {
	273	modval_x64(ref a0, ca, ia, ref nin, ref numObs, x, fn);
	274	} else {
	275	modval_x86(ref a0, ca, ia, ref nin, ref numObs, x, fn);
	276	}
	277	}
	278	// evaluate linear model from compressed coefficients and
	279	// uncompressed predictor matrix:
	280	//
	281	// call modval(a0, ca, ia, nin, n, x, f);
	282	// c
	283	// c input:
	284	//
	285	// a0 = intercept
	286	// ca(nx) = compressed coefficient values for a solution
	287	// ia(nx) = pointers to compressed coefficients
	288	// nin = number of compressed coefficients for solution
	289	// n = number of predictor vectors(observations)
	290	// x(n, ni) = full(uncompressed) predictor matrix
	291	//
	292	// output:
	293	//
	294	// f(n) = model predictions
	295	[DllImport("glmnet-x86.dll", EntryPoint = "modval_", CallingConvention = CallingConvention.Cdecl)]
	296	private static extern void modval_x86(ref double a0, double[] ca, int[] ia, ref int nin, ref int numObs, [Out] double[,] x, double[] fn);
	297	[DllImport("glmnet-x64.dll", EntryPoint = "modval_", CallingConvention = CallingConvention.Cdecl)]
	298	private static extern void modval_x64(ref double a0, double[] ca, int[] ia, ref int nin, ref int numObs, [Out] double[,] x, double[] fn);
	299
	300	}
	301	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/2839_HiveProjectManagement/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/Glmnet.cs @ 17328

Download in other formats: