Context Navigation

source: trunk/sources/HeuristicLab.ExtLibs/HeuristicLab.ALGLIB/2.5.0/ALGLIB-2.5.0/lda.cs @ 4065

Visit:

Last change on this file since 4065 was 3839, checked in by mkommend, 15 years ago
implemented first version of LR (ticket #1012)
File size: 17.2 KB

Rev	Line
[3839]	1	/*************************************************************************
	2	Copyright (c) 2008, Sergey Bochkanov (ALGLIB project).
	3
	4	>>> SOURCE LICENSE >>>
	5	This program is free software; you can redistribute it and/or modify
	6	it under the terms of the GNU General Public License as published by
	7	the Free Software Foundation (www.fsf.org); either version 2 of the
	8	License, or (at your option) any later version.
	9
	10	This program is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	GNU General Public License for more details.
	14
	15	A copy of the GNU General Public License is available at
	16	http://www.fsf.org/licensing/licenses
	17
	18	>>> END OF LICENSE >>>
	19	*************************************************************************/
	20
	21	using System;
	22
	23	namespace alglib
	24	{
	25	public class lda
	26	{
	27	/*************************************************************************
	28	Multiclass Fisher LDA
	29
	30	Subroutine finds coefficients of linear combination which optimally separates
	31	training set on classes.
	32
	33	INPUT PARAMETERS:
	34	XY - training set, array[0..NPoints-1,0..NVars].
	35	First NVars columns store values of independent
	36	variables, next column stores number of class (from 0
	37	to NClasses-1) which dataset element belongs to. Fractional
	38	values are rounded to nearest integer.
	39	NPoints - training set size, NPoints>=0
	40	NVars - number of independent variables, NVars>=1
	41	NClasses - number of classes, NClasses>=2
	42
	43
	44	OUTPUT PARAMETERS:
	45	Info - return code:
	46	* -4, if internal EVD subroutine hasn't converged
	47	* -2, if there is a point with class number
	48	outside of [0..NClasses-1].
	49	* -1, if incorrect parameters was passed (NPoints<0,
	50	NVars<1, NClasses<2)
	51	* 1, if task has been solved
	52	* 2, if there was a multicollinearity in training set,
	53	but task has been solved.
	54	W - linear combination coefficients, array[0..NVars-1]
	55
	56	-- ALGLIB --
	57	Copyright 31.05.2008 by Bochkanov Sergey
	58	*************************************************************************/
	59	public static void fisherlda(ref double[,] xy,
	60	int npoints,
	61	int nvars,
	62	int nclasses,
	63	ref int info,
	64	ref double[] w)
	65	{
	66	double[,] w2 = new double[0,0];
	67	int i_ = 0;
	68
	69	fisherldan(ref xy, npoints, nvars, nclasses, ref info, ref w2);
	70	if( info>0 )
	71	{
	72	w = new double[nvars-1+1];
	73	for(i_=0; i_<=nvars-1;i_++)
	74	{
	75	w[i_] = w2[i_,0];
	76	}
	77	}
	78	}
	79
	80
	81	/*************************************************************************
	82	N-dimensional multiclass Fisher LDA
	83
	84	Subroutine finds coefficients of linear combinations which optimally separates
	85	training set on classes. It returns N-dimensional basis whose vector are sorted
	86	by quality of training set separation (in descending order).
	87
	88	INPUT PARAMETERS:
	89	XY - training set, array[0..NPoints-1,0..NVars].
	90	First NVars columns store values of independent
	91	variables, next column stores number of class (from 0
	92	to NClasses-1) which dataset element belongs to. Fractional
	93	values are rounded to nearest integer.
	94	NPoints - training set size, NPoints>=0
	95	NVars - number of independent variables, NVars>=1
	96	NClasses - number of classes, NClasses>=2
	97
	98
	99	OUTPUT PARAMETERS:
	100	Info - return code:
	101	* -4, if internal EVD subroutine hasn't converged
	102	* -2, if there is a point with class number
	103	outside of [0..NClasses-1].
	104	* -1, if incorrect parameters was passed (NPoints<0,
	105	NVars<1, NClasses<2)
	106	* 1, if task has been solved
	107	* 2, if there was a multicollinearity in training set,
	108	but task has been solved.
	109	W - basis, array[0..NVars-1,0..NVars-1]
	110	columns of matrix stores basis vectors, sorted by
	111	quality of training set separation (in descending order)
	112
	113	-- ALGLIB --
	114	Copyright 31.05.2008 by Bochkanov Sergey
	115	*************************************************************************/
	116	public static void fisherldan(ref double[,] xy,
	117	int npoints,
	118	int nvars,
	119	int nclasses,
	120	ref int info,
	121	ref double[,] w)
	122	{
	123	int i = 0;
	124	int j = 0;
	125	int k = 0;
	126	int m = 0;
	127	double v = 0;
	128	int[] c = new int[0];
	129	double[] mu = new double[0];
	130	double[,] muc = new double[0,0];
	131	int[] nc = new int[0];
	132	double[,] sw = new double[0,0];
	133	double[,] st = new double[0,0];
	134	double[,] z = new double[0,0];
	135	double[,] z2 = new double[0,0];
	136	double[,] tm = new double[0,0];
	137	double[,] sbroot = new double[0,0];
	138	double[,] a = new double[0,0];
	139	double[,] xyproj = new double[0,0];
	140	double[,] wproj = new double[0,0];
	141	double[] tf = new double[0];
	142	double[] d = new double[0];
	143	double[] d2 = new double[0];
	144	double[] work = new double[0];
	145	int i_ = 0;
	146
	147
	148	//
	149	// Test data
	150	//
	151	if( npoints<0 \| nvars<1 \| nclasses<2 )
	152	{
	153	info = -1;
	154	return;
	155	}
	156	for(i=0; i<=npoints-1; i++)
	157	{
	158	if( (int)Math.Round(xy[i,nvars])<0 \| (int)Math.Round(xy[i,nvars])>=nclasses )
	159	{
	160	info = -2;
	161	return;
	162	}
	163	}
	164	info = 1;
	165
	166	//
	167	// Special case: NPoints<=1
	168	// Degenerate task.
	169	//
	170	if( npoints<=1 )
	171	{
	172	info = 2;
	173	w = new double[nvars-1+1, nvars-1+1];
	174	for(i=0; i<=nvars-1; i++)
	175	{
	176	for(j=0; j<=nvars-1; j++)
	177	{
	178	if( i==j )
	179	{
	180	w[i,j] = 1;
	181	}
	182	else
	183	{
	184	w[i,j] = 0;
	185	}
	186	}
	187	}
	188	return;
	189	}
	190
	191	//
	192	// Prepare temporaries
	193	//
	194	tf = new double[nvars-1+1];
	195	work = new double[Math.Max(nvars, npoints)+1];
	196
	197	//
	198	// Convert class labels from reals to integers (just for convenience)
	199	//
	200	c = new int[npoints-1+1];
	201	for(i=0; i<=npoints-1; i++)
	202	{
	203	c[i] = (int)Math.Round(xy[i,nvars]);
	204	}
	205
	206	//
	207	// Calculate class sizes and means
	208	//
	209	mu = new double[nvars-1+1];
	210	muc = new double[nclasses-1+1, nvars-1+1];
	211	nc = new int[nclasses-1+1];
	212	for(j=0; j<=nvars-1; j++)
	213	{
	214	mu[j] = 0;
	215	}
	216	for(i=0; i<=nclasses-1; i++)
	217	{
	218	nc[i] = 0;
	219	for(j=0; j<=nvars-1; j++)
	220	{
	221	muc[i,j] = 0;
	222	}
	223	}
	224	for(i=0; i<=npoints-1; i++)
	225	{
	226	for(i_=0; i_<=nvars-1;i_++)
	227	{
	228	mu[i_] = mu[i_] + xy[i,i_];
	229	}
	230	for(i_=0; i_<=nvars-1;i_++)
	231	{
	232	muc[c[i],i_] = muc[c[i],i_] + xy[i,i_];
	233	}
	234	nc[c[i]] = nc[c[i]]+1;
	235	}
	236	for(i=0; i<=nclasses-1; i++)
	237	{
	238	v = (double)(1)/(double)(nc[i]);
	239	for(i_=0; i_<=nvars-1;i_++)
	240	{
	241	muc[i,i_] = v*muc[i,i_];
	242	}
	243	}
	244	v = (double)(1)/(double)(npoints);
	245	for(i_=0; i_<=nvars-1;i_++)
	246	{
	247	mu[i_] = v*mu[i_];
	248	}
	249
	250	//
	251	// Create ST matrix
	252	//
	253	st = new double[nvars-1+1, nvars-1+1];
	254	for(i=0; i<=nvars-1; i++)
	255	{
	256	for(j=0; j<=nvars-1; j++)
	257	{
	258	st[i,j] = 0;
	259	}
	260	}
	261	for(k=0; k<=npoints-1; k++)
	262	{
	263	for(i_=0; i_<=nvars-1;i_++)
	264	{
	265	tf[i_] = xy[k,i_];
	266	}
	267	for(i_=0; i_<=nvars-1;i_++)
	268	{
	269	tf[i_] = tf[i_] - mu[i_];
	270	}
	271	for(i=0; i<=nvars-1; i++)
	272	{
	273	v = tf[i];
	274	for(i_=0; i_<=nvars-1;i_++)
	275	{
	276	st[i,i_] = st[i,i_] + v*tf[i_];
	277	}
	278	}
	279	}
	280
	281	//
	282	// Create SW matrix
	283	//
	284	sw = new double[nvars-1+1, nvars-1+1];
	285	for(i=0; i<=nvars-1; i++)
	286	{
	287	for(j=0; j<=nvars-1; j++)
	288	{
	289	sw[i,j] = 0;
	290	}
	291	}
	292	for(k=0; k<=npoints-1; k++)
	293	{
	294	for(i_=0; i_<=nvars-1;i_++)
	295	{
	296	tf[i_] = xy[k,i_];
	297	}
	298	for(i_=0; i_<=nvars-1;i_++)
	299	{
	300	tf[i_] = tf[i_] - muc[c[k],i_];
	301	}
	302	for(i=0; i<=nvars-1; i++)
	303	{
	304	v = tf[i];
	305	for(i_=0; i_<=nvars-1;i_++)
	306	{
	307	sw[i,i_] = sw[i,i_] + v*tf[i_];
	308	}
	309	}
	310	}
	311
	312	//
	313	// Maximize ratio J=(w'STw)/(w'SWw).
	314	//
	315	// First, make transition from w to v such that w'STw becomes v'*v:
	316	// v = root(ST)w = Rw
	317	// R = root(D)*Z'
	318	// w = (root(ST)^-1)v = RIv
	319	// RI = Z*inv(root(D))
	320	// J = (v'v)/(v'(RI'SWRI)*v)
	321	// ST = ZDZ'
	322	//
	323	// so we have
	324	//
	325	// J = (v'v) / (v'(inv(root(D))Z'SWZinv(root(D)))*v) =
	326	// = (v'v) / (v'A*v)
	327	//
	328	if( !evd.smatrixevd(st, nvars, 1, true, ref d, ref z) )
	329	{
	330	info = -4;
	331	return;
	332	}
	333	w = new double[nvars-1+1, nvars-1+1];
	334	if( (double)(d[nvars-1])<=(double)(0) \| (double)(d[0])<=(double)(1000AP.Math.MachineEpsilond[nvars-1]) )
	335	{
	336
	337	//
	338	// Special case: D[NVars-1]<=0
	339	// Degenerate task (all variables takes the same value).
	340	//
	341	if( (double)(d[nvars-1])<=(double)(0) )
	342	{
	343	info = 2;
	344	for(i=0; i<=nvars-1; i++)
	345	{
	346	for(j=0; j<=nvars-1; j++)
	347	{
	348	if( i==j )
	349	{
	350	w[i,j] = 1;
	351	}
	352	else
	353	{
	354	w[i,j] = 0;
	355	}
	356	}
	357	}
	358	return;
	359	}
	360
	361	//
	362	// Special case: degenerate ST matrix, multicollinearity found.
	363	// Since we know ST eigenvalues/vectors we can translate task to
	364	// non-degenerate form.
	365	//
	366	// Let WG is orthogonal basis of the non zero variance subspace
	367	// of the ST and let WZ is orthogonal basis of the zero variance
	368	// subspace.
	369	//
	370	// Projection on WG allows us to use LDA on reduced M-dimensional
	371	// subspace, N-M vectors of WZ allows us to update reduced LDA
	372	// factors to full N-dimensional subspace.
	373	//
	374	m = 0;
	375	for(k=0; k<=nvars-1; k++)
	376	{
	377	if( (double)(d[k])<=(double)(1000AP.Math.MachineEpsilond[nvars-1]) )
	378	{
	379	m = k+1;
	380	}
	381	}
	382	System.Diagnostics.Debug.Assert(m!=0, "FisherLDAN: internal error #1");
	383	xyproj = new double[npoints-1+1, nvars-m+1];
	384	blas.matrixmatrixmultiply(ref xy, 0, npoints-1, 0, nvars-1, false, ref z, 0, nvars-1, m, nvars-1, false, 1.0, ref xyproj, 0, npoints-1, 0, nvars-m-1, 0.0, ref work);
	385	for(i=0; i<=npoints-1; i++)
	386	{
	387	xyproj[i,nvars-m] = xy[i,nvars];
	388	}
	389	fisherldan(ref xyproj, npoints, nvars-m, nclasses, ref info, ref wproj);
	390	if( info<0 )
	391	{
	392	return;
	393	}
	394	blas.matrixmatrixmultiply(ref z, 0, nvars-1, m, nvars-1, false, ref wproj, 0, nvars-m-1, 0, nvars-m-1, false, 1.0, ref w, 0, nvars-1, 0, nvars-m-1, 0.0, ref work);
	395	for(k=nvars-m; k<=nvars-1; k++)
	396	{
	397	for(i_=0; i_<=nvars-1;i_++)
	398	{
	399	w[i_,k] = z[i_,k-(nvars-m)];
	400	}
	401	}
	402	info = 2;
	403	}
	404	else
	405	{
	406
	407	//
	408	// General case: no multicollinearity
	409	//
	410	tm = new double[nvars-1+1, nvars-1+1];
	411	a = new double[nvars-1+1, nvars-1+1];
	412	blas.matrixmatrixmultiply(ref sw, 0, nvars-1, 0, nvars-1, false, ref z, 0, nvars-1, 0, nvars-1, false, 1.0, ref tm, 0, nvars-1, 0, nvars-1, 0.0, ref work);
	413	blas.matrixmatrixmultiply(ref z, 0, nvars-1, 0, nvars-1, true, ref tm, 0, nvars-1, 0, nvars-1, false, 1.0, ref a, 0, nvars-1, 0, nvars-1, 0.0, ref work);
	414	for(i=0; i<=nvars-1; i++)
	415	{
	416	for(j=0; j<=nvars-1; j++)
	417	{
	418	a[i,j] = a[i,j]/Math.Sqrt(d[i]*d[j]);
	419	}
	420	}
	421	if( !evd.smatrixevd(a, nvars, 1, true, ref d2, ref z2) )
	422	{
	423	info = -4;
	424	return;
	425	}
	426	for(k=0; k<=nvars-1; k++)
	427	{
	428	for(i=0; i<=nvars-1; i++)
	429	{
	430	tf[i] = z2[i,k]/Math.Sqrt(d[i]);
	431	}
	432	for(i=0; i<=nvars-1; i++)
	433	{
	434	v = 0.0;
	435	for(i_=0; i_<=nvars-1;i_++)
	436	{
	437	v += z[i,i_]*tf[i_];
	438	}
	439	w[i,k] = v;
	440	}
	441	}
	442	}
	443
	444	//
	445	// Post-processing:
	446	// * normalization
	447	// * converting to non-negative form, if possible
	448	//
	449	for(k=0; k<=nvars-1; k++)
	450	{
	451	v = 0.0;
	452	for(i_=0; i_<=nvars-1;i_++)
	453	{
	454	v += w[i_,k]*w[i_,k];
	455	}
	456	v = 1/Math.Sqrt(v);
	457	for(i_=0; i_<=nvars-1;i_++)
	458	{
	459	w[i_,k] = v*w[i_,k];
	460	}
	461	v = 0;
	462	for(i=0; i<=nvars-1; i++)
	463	{
	464	v = v+w[i,k];
	465	}
	466	if( (double)(v)<(double)(0) )
	467	{
	468	for(i_=0; i_<=nvars-1;i_++)
	469	{
	470	w[i_,k] = -1*w[i_,k];
	471	}
	472	}
	473	}
	474	}
	475	}
	476	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences