Context Navigation

source: trunk/sources/ALGLIB/linreg.cs @ 2515

Visit:

Last change on this file since 2515 was 2445, checked in by gkronber, 15 years ago
Fixed #787 (LinearRegressionOperator uses leastsquares function of ALGLIB instead of linearregression function)
File size: 44.5 KB

Rev	Line
[2445]	1	/*************************************************************************
	2	Copyright (c) 2007-2008, Sergey Bochkanov (ALGLIB project).
	3
	4	>>> SOURCE LICENSE >>>
	5	This program is free software; you can redistribute it and/or modify
	6	it under the terms of the GNU General Public License as published by
	7	the Free Software Foundation (www.fsf.org); either version 2 of the
	8	License, or (at your option) any later version.
	9
	10	This program is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	GNU General Public License for more details.
	14
	15	A copy of the GNU General Public License is available at
	16	http://www.fsf.org/licensing/licenses
	17
	18	>>> END OF LICENSE >>>
	19	*************************************************************************/
	20
	21	using System;
	22
	23	namespace alglib
	24	{
	25	public class linreg
	26	{
	27	public struct linearmodel
	28	{
	29	public double[] w;
	30	};
	31
	32
	33	/*************************************************************************
	34	LRReport structure contains additional information about linear model:
	35	* C - covariation matrix, array[0..NVars,0..NVars].
	36	C[i,j] = Cov(A[i],A[j])
	37	* RMSError - root mean square error on a training set
	38	* AvgError - average error on a training set
	39	* AvgRelError - average relative error on a training set (excluding
	40	observations with zero function value).
	41	* CVRMSError - leave-one-out cross-validation estimate of
	42	generalization error. Calculated using fast algorithm
	43	with O(NVars*NPoints) complexity.
	44	* CVAvgError - cross-validation estimate of average error
	45	* CVAvgRelError - cross-validation estimate of average relative error
	46
	47	All other fields of the structure are intended for internal use and should
	48	not be used outside ALGLIB.
	49	*************************************************************************/
	50	public struct lrreport
	51	{
	52	public double[,] c;
	53	public double rmserror;
	54	public double avgerror;
	55	public double avgrelerror;
	56	public double cvrmserror;
	57	public double cvavgerror;
	58	public double cvavgrelerror;
	59	public int ncvdefects;
	60	public int[] cvdefects;
	61	};
	62
	63
	64
	65
	66	public const int lrvnum = 5;
	67
	68
	69	/*************************************************************************
	70	Linear regression
	71
	72	Subroutine builds model:
	73
	74	Y = A(0)X[0] + ... + A(N-1)X[N-1] + A(N)
	75
	76	and model found in ALGLIB format, covariation matrix, training set errors
	77	(rms, average, average relative) and leave-one-out cross-validation
	78	estimate of the generalization error. CV estimate calculated using fast
	79	algorithm with O(NPoints*NVars) complexity.
	80
	81	When covariation matrix is calculated standard deviations of function
	82	values are assumed to be equal to RMS error on the training set.
	83
	84	INPUT PARAMETERS:
	85	XY - training set, array [0..NPoints-1,0..NVars]:
	86	* NVars columns - independent variables
	87	* last column - dependent variable
	88	NPoints - training set size, NPoints>NVars+1
	89	NVars - number of independent variables
	90
	91	OUTPUT PARAMETERS:
	92	Info - return code:
	93	* -255, in case of unknown internal error
	94	* -4, if internal SVD subroutine haven't converged
	95	* -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
	96	* 1, if subroutine successfully finished
	97	LM - linear model in the ALGLIB format. Use subroutines of
	98	this unit to work with the model.
	99	AR - additional results
	100
	101
	102	-- ALGLIB --
	103	Copyright 02.08.2008 by Bochkanov Sergey
	104	*************************************************************************/
	105	public static void lrbuild(ref double[,] xy,
	106	int npoints,
	107	int nvars,
	108	ref int info,
	109	ref linearmodel lm,
	110	ref lrreport ar)
	111	{
	112	double[] s = new double[0];
	113	int i = 0;
	114	double sigma2 = 0;
	115	int i_ = 0;
	116
	117	if( npoints<=nvars+1 \| nvars<1 )
	118	{
	119	info = -1;
	120	return;
	121	}
	122	s = new double[npoints-1+1];
	123	for(i=0; i<=npoints-1; i++)
	124	{
	125	s[i] = 1;
	126	}
	127	lrbuilds(ref xy, ref s, npoints, nvars, ref info, ref lm, ref ar);
	128	if( info<0 )
	129	{
	130	return;
	131	}
	132	sigma2 = AP.Math.Sqr(ar.rmserror)*npoints/(npoints-nvars-1);
	133	for(i=0; i<=nvars; i++)
	134	{
	135	for(i_=0; i_<=nvars;i_++)
	136	{
	137	ar.c[i,i_] = sigma2*ar.c[i,i_];
	138	}
	139	}
	140	}
	141
	142
	143	/*************************************************************************
	144	Linear regression
	145
	146	Variant of LRBuild which uses vector of standatd deviations (errors in
	147	function values).
	148
	149	INPUT PARAMETERS:
	150	XY - training set, array [0..NPoints-1,0..NVars]:
	151	* NVars columns - independent variables
	152	* last column - dependent variable
	153	S - standard deviations (errors in function values)
	154	array[0..NPoints-1], S[i]>0.
	155	NPoints - training set size, NPoints>NVars+1
	156	NVars - number of independent variables
	157
	158	OUTPUT PARAMETERS:
	159	Info - return code:
	160	* -255, in case of unknown internal error
	161	* -4, if internal SVD subroutine haven't converged
	162	* -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
	163	* -2, if S[I]<=0
	164	* 1, if subroutine successfully finished
	165	LM - linear model in the ALGLIB format. Use subroutines of
	166	this unit to work with the model.
	167	AR - additional results
	168
	169
	170	-- ALGLIB --
	171	Copyright 02.08.2008 by Bochkanov Sergey
	172	*************************************************************************/
	173	public static void lrbuilds(ref double[,] xy,
	174	ref double[] s,
	175	int npoints,
	176	int nvars,
	177	ref int info,
	178	ref linearmodel lm,
	179	ref lrreport ar)
	180	{
	181	double[,] xyi = new double[0,0];
	182	double[] x = new double[0];
	183	double[] means = new double[0];
	184	double[] sigmas = new double[0];
	185	int i = 0;
	186	int j = 0;
	187	double v = 0;
	188	int offs = 0;
	189	double mean = 0;
	190	double variance = 0;
	191	double skewness = 0;
	192	double kurtosis = 0;
	193	int i_ = 0;
	194
	195
	196	//
	197	// Test parameters
	198	//
	199	if( npoints<=nvars+1 \| nvars<1 )
	200	{
	201	info = -1;
	202	return;
	203	}
	204
	205	//
	206	// Copy data, add one more column (constant term)
	207	//
	208	xyi = new double[npoints-1+1, nvars+1+1];
	209	for(i=0; i<=npoints-1; i++)
	210	{
	211	for(i_=0; i_<=nvars-1;i_++)
	212	{
	213	xyi[i,i_] = xy[i,i_];
	214	}
	215	xyi[i,nvars] = 1;
	216	xyi[i,nvars+1] = xy[i,nvars];
	217	}
	218
	219	//
	220	// Standartization
	221	//
	222	x = new double[npoints-1+1];
	223	means = new double[nvars-1+1];
	224	sigmas = new double[nvars-1+1];
	225	for(j=0; j<=nvars-1; j++)
	226	{
	227	for(i_=0; i_<=npoints-1;i_++)
	228	{
	229	x[i_] = xy[i_,j];
	230	}
	231	descriptivestatistics.calculatemoments(ref x, npoints, ref mean, ref variance, ref skewness, ref kurtosis);
	232	means[j] = mean;
	233	sigmas[j] = Math.Sqrt(variance);
	234	if( sigmas[j]==0 )
	235	{
	236	sigmas[j] = 1;
	237	}
	238	for(i=0; i<=npoints-1; i++)
	239	{
	240	xyi[i,j] = (xyi[i,j]-means[j])/sigmas[j];
	241	}
	242	}
	243
	244	//
	245	// Internal processing
	246	//
	247	lrinternal(ref xyi, ref s, npoints, nvars+1, ref info, ref lm, ref ar);
	248	if( info<0 )
	249	{
	250	return;
	251	}
	252
	253	//
	254	// Un-standartization
	255	//
	256	offs = (int)Math.Round(lm.w[3]);
	257	for(j=0; j<=nvars-1; j++)
	258	{
	259
	260	//
	261	// Constant term is updated (and its covariance too,
	262	// since it gets some variance from J-th component)
	263	//
	264	lm.w[offs+nvars] = lm.w[offs+nvars]-lm.w[offs+j]*means[j]/sigmas[j];
	265	v = means[j]/sigmas[j];
	266	for(i_=0; i_<=nvars;i_++)
	267	{
	268	ar.c[nvars,i_] = ar.c[nvars,i_] - v*ar.c[j,i_];
	269	}
	270	for(i_=0; i_<=nvars;i_++)
	271	{
	272	ar.c[i_,nvars] = ar.c[i_,nvars] - v*ar.c[i_,j];
	273	}
	274
	275	//
	276	// J-th term is updated
	277	//
	278	lm.w[offs+j] = lm.w[offs+j]/sigmas[j];
	279	v = 1/sigmas[j];
	280	for(i_=0; i_<=nvars;i_++)
	281	{
	282	ar.c[j,i_] = v*ar.c[j,i_];
	283	}
	284	for(i_=0; i_<=nvars;i_++)
	285	{
	286	ar.c[i_,j] = v*ar.c[i_,j];
	287	}
	288	}
	289	}
	290
	291
	292	/*************************************************************************
	293	Like LRBuildS, but builds model
	294
	295	Y = A(0)X[0] + ... + A(N-1)X[N-1]
	296
	297	i.e. with zero constant term.
	298
	299	-- ALGLIB --
	300	Copyright 30.10.2008 by Bochkanov Sergey
	301	*************************************************************************/
	302	public static void lrbuildzs(ref double[,] xy,
	303	ref double[] s,
	304	int npoints,
	305	int nvars,
	306	ref int info,
	307	ref linearmodel lm,
	308	ref lrreport ar)
	309	{
	310	double[,] xyi = new double[0,0];
	311	double[] x = new double[0];
	312	double[] c = new double[0];
	313	int i = 0;
	314	int j = 0;
	315	double v = 0;
	316	int offs = 0;
	317	double mean = 0;
	318	double variance = 0;
	319	double skewness = 0;
	320	double kurtosis = 0;
	321	int i_ = 0;
	322
	323
	324	//
	325	// Test parameters
	326	//
	327	if( npoints<=nvars+1 \| nvars<1 )
	328	{
	329	info = -1;
	330	return;
	331	}
	332
	333	//
	334	// Copy data, add one more column (constant term)
	335	//
	336	xyi = new double[npoints-1+1, nvars+1+1];
	337	for(i=0; i<=npoints-1; i++)
	338	{
	339	for(i_=0; i_<=nvars-1;i_++)
	340	{
	341	xyi[i,i_] = xy[i,i_];
	342	}
	343	xyi[i,nvars] = 0;
	344	xyi[i,nvars+1] = xy[i,nvars];
	345	}
	346
	347	//
	348	// Standartization: unusual scaling
	349	//
	350	x = new double[npoints-1+1];
	351	c = new double[nvars-1+1];
	352	for(j=0; j<=nvars-1; j++)
	353	{
	354	for(i_=0; i_<=npoints-1;i_++)
	355	{
	356	x[i_] = xy[i_,j];
	357	}
	358	descriptivestatistics.calculatemoments(ref x, npoints, ref mean, ref variance, ref skewness, ref kurtosis);
	359	if( Math.Abs(mean)>Math.Sqrt(variance) )
	360	{
	361
	362	//
	363	// variation is relatively small, it is better to
	364	// bring mean value to 1
	365	//
	366	c[j] = mean;
	367	}
	368	else
	369	{
	370
	371	//
	372	// variation is large, it is better to bring variance to 1
	373	//
	374	if( variance==0 )
	375	{
	376	variance = 1;
	377	}
	378	c[j] = Math.Sqrt(variance);
	379	}
	380	for(i=0; i<=npoints-1; i++)
	381	{
	382	xyi[i,j] = xyi[i,j]/c[j];
	383	}
	384	}
	385
	386	//
	387	// Internal processing
	388	//
	389	lrinternal(ref xyi, ref s, npoints, nvars+1, ref info, ref lm, ref ar);
	390	if( info<0 )
	391	{
	392	return;
	393	}
	394
	395	//
	396	// Un-standartization
	397	//
	398	offs = (int)Math.Round(lm.w[3]);
	399	for(j=0; j<=nvars-1; j++)
	400	{
	401
	402	//
	403	// J-th term is updated
	404	//
	405	lm.w[offs+j] = lm.w[offs+j]/c[j];
	406	v = 1/c[j];
	407	for(i_=0; i_<=nvars;i_++)
	408	{
	409	ar.c[j,i_] = v*ar.c[j,i_];
	410	}
	411	for(i_=0; i_<=nvars;i_++)
	412	{
	413	ar.c[i_,j] = v*ar.c[i_,j];
	414	}
	415	}
	416	}
	417
	418
	419	/*************************************************************************
	420	Like LRBuild but builds model
	421
	422	Y = A(0)X[0] + ... + A(N-1)X[N-1]
	423
	424	i.e. with zero constant term.
	425
	426	-- ALGLIB --
	427	Copyright 30.10.2008 by Bochkanov Sergey
	428	*************************************************************************/
	429	public static void lrbuildz(ref double[,] xy,
	430	int npoints,
	431	int nvars,
	432	ref int info,
	433	ref linearmodel lm,
	434	ref lrreport ar)
	435	{
	436	double[] s = new double[0];
	437	int i = 0;
	438	double sigma2 = 0;
	439	int i_ = 0;
	440
	441	if( npoints<=nvars+1 \| nvars<1 )
	442	{
	443	info = -1;
	444	return;
	445	}
	446	s = new double[npoints-1+1];
	447	for(i=0; i<=npoints-1; i++)
	448	{
	449	s[i] = 1;
	450	}
	451	lrbuildzs(ref xy, ref s, npoints, nvars, ref info, ref lm, ref ar);
	452	if( info<0 )
	453	{
	454	return;
	455	}
	456	sigma2 = AP.Math.Sqr(ar.rmserror)*npoints/(npoints-nvars-1);
	457	for(i=0; i<=nvars; i++)
	458	{
	459	for(i_=0; i_<=nvars;i_++)
	460	{
	461	ar.c[i,i_] = sigma2*ar.c[i,i_];
	462	}
	463	}
	464	}
	465
	466
	467	/*************************************************************************
	468	Unpacks coefficients of linear model.
	469
	470	INPUT PARAMETERS:
	471	LM - linear model in ALGLIB format
	472
	473	OUTPUT PARAMETERS:
	474	V - coefficients, array[0..NVars]
	475	NVars - number of independent variables (one less than number
	476	of coefficients)
	477
	478	-- ALGLIB --
	479	Copyright 30.08.2008 by Bochkanov Sergey
	480	*************************************************************************/
	481	public static void lrunpack(ref linearmodel lm,
	482	ref double[] v,
	483	ref int nvars)
	484	{
	485	int offs = 0;
	486	int i_ = 0;
	487	int i1_ = 0;
	488
	489	System.Diagnostics.Debug.Assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!");
	490	nvars = (int)Math.Round(lm.w[2]);
	491	offs = (int)Math.Round(lm.w[3]);
	492	v = new double[nvars+1];
	493	i1_ = (offs) - (0);
	494	for(i_=0; i_<=nvars;i_++)
	495	{
	496	v[i_] = lm.w[i_+i1_];
	497	}
	498	}
	499
	500
	501	/*************************************************************************
	502	"Packs" coefficients and creates linear model in ALGLIB format (LRUnpack
	503	reversed).
	504
	505	INPUT PARAMETERS:
	506	V - coefficients, array[0..NVars]
	507	NVars - number of independent variables
	508
	509	OUTPUT PAREMETERS:
	510	LM - linear model.
	511
	512	-- ALGLIB --
	513	Copyright 30.08.2008 by Bochkanov Sergey
	514	*************************************************************************/
	515	public static void lrpack(ref double[] v,
	516	int nvars,
	517	ref linearmodel lm)
	518	{
	519	int offs = 0;
	520	int i_ = 0;
	521	int i1_ = 0;
	522
	523	lm.w = new double[4+nvars+1];
	524	offs = 4;
	525	lm.w[0] = 4+nvars+1;
	526	lm.w[1] = lrvnum;
	527	lm.w[2] = nvars;
	528	lm.w[3] = offs;
	529	i1_ = (0) - (offs);
	530	for(i_=offs; i_<=offs+nvars;i_++)
	531	{
	532	lm.w[i_] = v[i_+i1_];
	533	}
	534	}
	535
	536
	537	/*************************************************************************
	538	Procesing
	539
	540	INPUT PARAMETERS:
	541	LM - linear model
	542	X - input vector, array[0..NVars-1].
	543
	544	Result:
	545	value of linear model regression estimate
	546
	547	-- ALGLIB --
	548	Copyright 03.09.2008 by Bochkanov Sergey
	549	*************************************************************************/
	550	public static double lrprocess(ref linearmodel lm,
	551	ref double[] x)
	552	{
	553	double result = 0;
	554	double v = 0;
	555	int offs = 0;
	556	int nvars = 0;
	557	int i_ = 0;
	558	int i1_ = 0;
	559
	560	System.Diagnostics.Debug.Assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!");
	561	nvars = (int)Math.Round(lm.w[2]);
	562	offs = (int)Math.Round(lm.w[3]);
	563	i1_ = (offs)-(0);
	564	v = 0.0;
	565	for(i_=0; i_<=nvars-1;i_++)
	566	{
	567	v += x[i_]*lm.w[i_+i1_];
	568	}
	569	result = v+lm.w[offs+nvars];
	570	return result;
	571	}
	572
	573
	574	/*************************************************************************
	575	RMS error on the test set
	576
	577	INPUT PARAMETERS:
	578	LM - linear model
	579	XY - test set
	580	NPoints - test set size
	581
	582	RESULT:
	583	root mean square error.
	584
	585	-- ALGLIB --
	586	Copyright 30.08.2008 by Bochkanov Sergey
	587	*************************************************************************/
	588	public static double lrrmserror(ref linearmodel lm,
	589	ref double[,] xy,
	590	int npoints)
	591	{
	592	double result = 0;
	593	int i = 0;
	594	double v = 0;
	595	int offs = 0;
	596	int nvars = 0;
	597	int i_ = 0;
	598	int i1_ = 0;
	599
	600	System.Diagnostics.Debug.Assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!");
	601	nvars = (int)Math.Round(lm.w[2]);
	602	offs = (int)Math.Round(lm.w[3]);
	603	result = 0;
	604	for(i=0; i<=npoints-1; i++)
	605	{
	606	i1_ = (offs)-(0);
	607	v = 0.0;
	608	for(i_=0; i_<=nvars-1;i_++)
	609	{
	610	v += xy[i,i_]*lm.w[i_+i1_];
	611	}
	612	v = v+lm.w[offs+nvars];
	613	result = result+AP.Math.Sqr(v-xy[i,nvars]);
	614	}
	615	result = Math.Sqrt(result/npoints);
	616	return result;
	617	}
	618
	619
	620	/*************************************************************************
	621	Average error on the test set
	622
	623	INPUT PARAMETERS:
	624	LM - linear model
	625	XY - test set
	626	NPoints - test set size
	627
	628	RESULT:
	629	average error.
	630
	631	-- ALGLIB --
	632	Copyright 30.08.2008 by Bochkanov Sergey
	633	*************************************************************************/
	634	public static double lravgerror(ref linearmodel lm,
	635	ref double[,] xy,
	636	int npoints)
	637	{
	638	double result = 0;
	639	int i = 0;
	640	double v = 0;
	641	int offs = 0;
	642	int nvars = 0;
	643	int i_ = 0;
	644	int i1_ = 0;
	645
	646	System.Diagnostics.Debug.Assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!");
	647	nvars = (int)Math.Round(lm.w[2]);
	648	offs = (int)Math.Round(lm.w[3]);
	649	result = 0;
	650	for(i=0; i<=npoints-1; i++)
	651	{
	652	i1_ = (offs)-(0);
	653	v = 0.0;
	654	for(i_=0; i_<=nvars-1;i_++)
	655	{
	656	v += xy[i,i_]*lm.w[i_+i1_];
	657	}
	658	v = v+lm.w[offs+nvars];
	659	result = result+Math.Abs(v-xy[i,nvars]);
	660	}
	661	result = result/npoints;
	662	return result;
	663	}
	664
	665
	666	/*************************************************************************
	667	RMS error on the test set
	668
	669	INPUT PARAMETERS:
	670	LM - linear model
	671	XY - test set
	672	NPoints - test set size
	673
	674	RESULT:
	675	average relative error.
	676
	677	-- ALGLIB --
	678	Copyright 30.08.2008 by Bochkanov Sergey
	679	*************************************************************************/
	680	public static double lravgrelerror(ref linearmodel lm,
	681	ref double[,] xy,
	682	int npoints)
	683	{
	684	double result = 0;
	685	int i = 0;
	686	int k = 0;
	687	double v = 0;
	688	int offs = 0;
	689	int nvars = 0;
	690	int i_ = 0;
	691	int i1_ = 0;
	692
	693	System.Diagnostics.Debug.Assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!");
	694	nvars = (int)Math.Round(lm.w[2]);
	695	offs = (int)Math.Round(lm.w[3]);
	696	result = 0;
	697	k = 0;
	698	for(i=0; i<=npoints-1; i++)
	699	{
	700	if( xy[i,nvars]!=0 )
	701	{
	702	i1_ = (offs)-(0);
	703	v = 0.0;
	704	for(i_=0; i_<=nvars-1;i_++)
	705	{
	706	v += xy[i,i_]*lm.w[i_+i1_];
	707	}
	708	v = v+lm.w[offs+nvars];
	709	result = result+Math.Abs((v-xy[i,nvars])/xy[i,nvars]);
	710	k = k+1;
	711	}
	712	}
	713	if( k!=0 )
	714	{
	715	result = result/k;
	716	}
	717	return result;
	718	}
	719
	720
	721	/*************************************************************************
	722	Copying of LinearModel strucure
	723
	724	INPUT PARAMETERS:
	725	LM1 - original
	726
	727	OUTPUT PARAMETERS:
	728	LM2 - copy
	729
	730	-- ALGLIB --
	731	Copyright 15.03.2009 by Bochkanov Sergey
	732	*************************************************************************/
	733	public static void lrcopy(ref linearmodel lm1,
	734	ref linearmodel lm2)
	735	{
	736	int k = 0;
	737	int i_ = 0;
	738
	739	k = (int)Math.Round(lm1.w[0]);
	740	lm2.w = new double[k-1+1];
	741	for(i_=0; i_<=k-1;i_++)
	742	{
	743	lm2.w[i_] = lm1.w[i_];
	744	}
	745	}
	746
	747
	748	/*************************************************************************
	749	Serialization of LinearModel strucure
	750
	751	INPUT PARAMETERS:
	752	LM - original
	753
	754	OUTPUT PARAMETERS:
	755	RA - array of real numbers which stores model,
	756	array[0..RLen-1]
	757	RLen - RA lenght
	758
	759	-- ALGLIB --
	760	Copyright 15.03.2009 by Bochkanov Sergey
	761	*************************************************************************/
	762	public static void lrserialize(ref linearmodel lm,
	763	ref double[] ra,
	764	ref int rlen)
	765	{
	766	int i_ = 0;
	767	int i1_ = 0;
	768
	769	rlen = (int)Math.Round(lm.w[0])+1;
	770	ra = new double[rlen-1+1];
	771	ra[0] = lrvnum;
	772	i1_ = (0) - (1);
	773	for(i_=1; i_<=rlen-1;i_++)
	774	{
	775	ra[i_] = lm.w[i_+i1_];
	776	}
	777	}
	778
	779
	780	/*************************************************************************
	781	Unserialization of DecisionForest strucure
	782
	783	INPUT PARAMETERS:
	784	RA - real array which stores decision forest
	785
	786	OUTPUT PARAMETERS:
	787	LM - unserialized structure
	788
	789	-- ALGLIB --
	790	Copyright 15.03.2009 by Bochkanov Sergey
	791	*************************************************************************/
	792	public static void lrunserialize(ref double[] ra,
	793	ref linearmodel lm)
	794	{
	795	int i_ = 0;
	796	int i1_ = 0;
	797
	798	System.Diagnostics.Debug.Assert((int)Math.Round(ra[0])==lrvnum, "LRUnserialize: incorrect array!");
	799	lm.w = new double[(int)Math.Round(ra[1])-1+1];
	800	i1_ = (1) - (0);
	801	for(i_=0; i_<=(int)Math.Round(ra[1])-1;i_++)
	802	{
	803	lm.w[i_] = ra[i_+i1_];
	804	}
	805	}
	806
	807
	808	/*************************************************************************
	809	Obsolete subroutine, use LRBuildS
	810
	811	-- ALGLIB --
	812	Copyright 26.04.2008 by Bochkanov Sergey
	813
	814	References:
	815	1. Numerical Recipes in C, "15.2 Fitting Data to a Straight Line"
	816	*************************************************************************/
	817	public static void lrlines(ref double[,] xy,
	818	ref double[] s,
	819	int n,
	820	ref int info,
	821	ref double a,
	822	ref double b,
	823	ref double vara,
	824	ref double varb,
	825	ref double covab,
	826	ref double corrab,
	827	ref double p)
	828	{
	829	int i = 0;
	830	double ss = 0;
	831	double sx = 0;
	832	double sxx = 0;
	833	double sy = 0;
	834	double stt = 0;
	835	double e1 = 0;
	836	double e2 = 0;
	837	double t = 0;
	838	double chi2 = 0;
	839
	840	if( n<2 )
	841	{
	842	info = -1;
	843	return;
	844	}
	845	for(i=0; i<=n-1; i++)
	846	{
	847	if( s[i]<=0 )
	848	{
	849	info = -2;
	850	return;
	851	}
	852	}
	853	info = 1;
	854
	855	//
	856	// Calculate S, SX, SY, SXX
	857	//
	858	ss = 0;
	859	sx = 0;
	860	sy = 0;
	861	sxx = 0;
	862	for(i=0; i<=n-1; i++)
	863	{
	864	t = AP.Math.Sqr(s[i]);
	865	ss = ss+1/t;
	866	sx = sx+xy[i,0]/t;
	867	sy = sy+xy[i,1]/t;
	868	sxx = sxx+AP.Math.Sqr(xy[i,0])/t;
	869	}
	870
	871	//
	872	// Test for condition number
	873	//
	874	t = Math.Sqrt(4*AP.Math.Sqr(sx)+AP.Math.Sqr(ss-sxx));
	875	e1 = 0.5*(ss+sxx+t);
	876	e2 = 0.5*(ss+sxx-t);
	877	if( Math.Min(e1, e2)<=1000AP.Math.MachineEpsilonMath.Max(e1, e2) )
	878	{
	879	info = -3;
	880	return;
	881	}
	882
	883	//
	884	// Calculate A, B
	885	//
	886	a = 0;
	887	b = 0;
	888	stt = 0;
	889	for(i=0; i<=n-1; i++)
	890	{
	891	t = (xy[i,0]-sx/ss)/s[i];
	892	b = b+t*xy[i,1]/s[i];
	893	stt = stt+AP.Math.Sqr(t);
	894	}
	895	b = b/stt;
	896	a = (sy-sx*b)/ss;
	897
	898	//
	899	// Calculate goodness-of-fit
	900	//
	901	if( n>2 )
	902	{
	903	chi2 = 0;
	904	for(i=0; i<=n-1; i++)
	905	{
	906	chi2 = chi2+AP.Math.Sqr((xy[i,1]-a-b*xy[i,0])/s[i]);
	907	}
	908	p = igammaf.incompletegammac(((double)(n-2))/(double)(2), chi2/2);
	909	}
	910	else
	911	{
	912	p = 1;
	913	}
	914
	915	//
	916	// Calculate other parameters
	917	//
	918	vara = (1+AP.Math.Sqr(sx)/(ss*stt))/ss;
	919	varb = 1/stt;
	920	covab = -(sx/(ss*stt));
	921	corrab = covab/Math.Sqrt(vara*varb);
	922	}
	923
	924
	925	/*************************************************************************
	926	Obsolete subroutine, use LRBuild
	927
	928	-- ALGLIB --
	929	Copyright 02.08.2008 by Bochkanov Sergey
	930	*************************************************************************/
	931	public static void lrline(ref double[,] xy,
	932	int n,
	933	ref int info,
	934	ref double a,
	935	ref double b)
	936	{
	937	double[] s = new double[0];
	938	int i = 0;
	939	double vara = 0;
	940	double varb = 0;
	941	double covab = 0;
	942	double corrab = 0;
	943	double p = 0;
	944
	945	if( n<2 )
	946	{
	947	info = -1;
	948	return;
	949	}
	950	s = new double[n-1+1];
	951	for(i=0; i<=n-1; i++)
	952	{
	953	s[i] = 1;
	954	}
	955	lrlines(ref xy, ref s, n, ref info, ref a, ref b, ref vara, ref varb, ref covab, ref corrab, ref p);
	956	}
	957
	958
	959	/*************************************************************************
	960	Internal linear regression subroutine
	961	*************************************************************************/
	962	private static void lrinternal(ref double[,] xy,
	963	ref double[] s,
	964	int npoints,
	965	int nvars,
	966	ref int info,
	967	ref linearmodel lm,
	968	ref lrreport ar)
	969	{
	970	double[,] a = new double[0,0];
	971	double[,] u = new double[0,0];
	972	double[,] vt = new double[0,0];
	973	double[,] vm = new double[0,0];
	974	double[,] xym = new double[0,0];
	975	double[] b = new double[0];
	976	double[] sv = new double[0];
	977	double[] t = new double[0];
	978	double[] svi = new double[0];
	979	double[] work = new double[0];
	980	int i = 0;
	981	int j = 0;
	982	int k = 0;
	983	int ncv = 0;
	984	int na = 0;
	985	int nacv = 0;
	986	double r = 0;
	987	double p = 0;
	988	double epstol = 0;
	989	lrreport ar2 = new lrreport();
	990	int offs = 0;
	991	linearmodel tlm = new linearmodel();
	992	int i_ = 0;
	993	int i1_ = 0;
	994
	995	epstol = 1000;
	996
	997	//
	998	// Check for errors in data
	999	//
	1000	if( npoints<nvars \| nvars<1 )
	1001	{
	1002	info = -1;
	1003	return;
	1004	}
	1005	for(i=0; i<=npoints-1; i++)
	1006	{
	1007	if( s[i]<=0 )
	1008	{
	1009	info = -2;
	1010	return;
	1011	}
	1012	}
	1013	info = 1;
	1014
	1015	//
	1016	// Create design matrix
	1017	//
	1018	a = new double[npoints-1+1, nvars-1+1];
	1019	b = new double[npoints-1+1];
	1020	for(i=0; i<=npoints-1; i++)
	1021	{
	1022	r = 1/s[i];
	1023	for(i_=0; i_<=nvars-1;i_++)
	1024	{
	1025	a[i,i_] = r*xy[i,i_];
	1026	}
	1027	b[i] = xy[i,nvars]/s[i];
	1028	}
	1029
	1030	//
	1031	// Allocate W:
	1032	// W[0] array size
	1033	// W[1] version number, 0
	1034	// W[2] NVars (minus 1, to be compatible with external representation)
	1035	// W[3] coefficients offset
	1036	//
	1037	lm.w = new double[4+nvars-1+1];
	1038	offs = 4;
	1039	lm.w[0] = 4+nvars;
	1040	lm.w[1] = lrvnum;
	1041	lm.w[2] = nvars-1;
	1042	lm.w[3] = offs;
	1043
	1044	//
	1045	// Solve problem using SVD:
	1046	//
	1047	// 0. check for degeneracy (different types)
	1048	// 1. A = Udiag(sv)V'
	1049	// 2. T = b'*U
	1050	// 3. w = SUM((T[i]/sv[i])*V[..,i])
	1051	// 4. cov(wi,wj) = SUM(Vji*Vjk/sv[i]^2,K=1..M)
	1052	//
	1053	// see $15.4 of "Numerical Recipes in C" for more information
	1054	//
	1055	t = new double[nvars-1+1];
	1056	svi = new double[nvars-1+1];
	1057	ar.c = new double[nvars-1+1, nvars-1+1];
	1058	vm = new double[nvars-1+1, nvars-1+1];
	1059	if( !svd.rmatrixsvd(a, npoints, nvars, 1, 1, 2, ref sv, ref u, ref vt) )
	1060	{
	1061	info = -4;
	1062	return;
	1063	}
	1064	if( sv[0]<=0 )
	1065	{
	1066
	1067	//
	1068	// Degenerate case: zero design matrix.
	1069	//
	1070	for(i=offs; i<=offs+nvars-1; i++)
	1071	{
	1072	lm.w[i] = 0;
	1073	}
	1074	ar.rmserror = lrrmserror(ref lm, ref xy, npoints);
	1075	ar.avgerror = lravgerror(ref lm, ref xy, npoints);
	1076	ar.avgrelerror = lravgrelerror(ref lm, ref xy, npoints);
	1077	ar.cvrmserror = ar.rmserror;
	1078	ar.cvavgerror = ar.avgerror;
	1079	ar.cvavgrelerror = ar.avgrelerror;
	1080	ar.ncvdefects = 0;
	1081	ar.cvdefects = new int[nvars-1+1];
	1082	ar.c = new double[nvars-1+1, nvars-1+1];
	1083	for(i=0; i<=nvars-1; i++)
	1084	{
	1085	for(j=0; j<=nvars-1; j++)
	1086	{
	1087	ar.c[i,j] = 0;
	1088	}
	1089	}
	1090	return;
	1091	}
	1092	if( sv[nvars-1]<=epstolAP.Math.MachineEpsilonsv[0] )
	1093	{
	1094
	1095	//
	1096	// Degenerate case, non-zero design matrix.
	1097	//
	1098	// We can leave it and solve task in SVD least squares fashion.
	1099	// Solution and covariance matrix will be obtained correctly,
	1100	// but CV error estimates - will not. It is better to reduce
	1101	// it to non-degenerate task and to obtain correct CV estimates.
	1102	//
	1103	for(k=nvars; k>=1; k--)
	1104	{
	1105	if( sv[k-1]>epstolAP.Math.MachineEpsilonsv[0] )
	1106	{
	1107
	1108	//
	1109	// Reduce
	1110	//
	1111	xym = new double[npoints-1+1, k+1];
	1112	for(i=0; i<=npoints-1; i++)
	1113	{
	1114	for(j=0; j<=k-1; j++)
	1115	{
	1116	r = 0.0;
	1117	for(i_=0; i_<=nvars-1;i_++)
	1118	{
	1119	r += xy[i,i_]*vt[j,i_];
	1120	}
	1121	xym[i,j] = r;
	1122	}
	1123	xym[i,k] = xy[i,nvars];
	1124	}
	1125
	1126	//
	1127	// Solve
	1128	//
	1129	lrinternal(ref xym, ref s, npoints, k, ref info, ref tlm, ref ar2);
	1130	if( info!=1 )
	1131	{
	1132	return;
	1133	}
	1134
	1135	//
	1136	// Convert back to un-reduced format
	1137	//
	1138	for(j=0; j<=nvars-1; j++)
	1139	{
	1140	lm.w[offs+j] = 0;
	1141	}
	1142	for(j=0; j<=k-1; j++)
	1143	{
	1144	r = tlm.w[offs+j];
	1145	i1_ = (0) - (offs);
	1146	for(i_=offs; i_<=offs+nvars-1;i_++)
	1147	{
	1148	lm.w[i_] = lm.w[i_] + r*vt[j,i_+i1_];
	1149	}
	1150	}
	1151	ar.rmserror = ar2.rmserror;
	1152	ar.avgerror = ar2.avgerror;
	1153	ar.avgrelerror = ar2.avgrelerror;
	1154	ar.cvrmserror = ar2.cvrmserror;
	1155	ar.cvavgerror = ar2.cvavgerror;
	1156	ar.cvavgrelerror = ar2.cvavgrelerror;
	1157	ar.ncvdefects = ar2.ncvdefects;
	1158	ar.cvdefects = new int[nvars-1+1];
	1159	for(j=0; j<=ar.ncvdefects-1; j++)
	1160	{
	1161	ar.cvdefects[j] = ar2.cvdefects[j];
	1162	}
	1163	ar.c = new double[nvars-1+1, nvars-1+1];
	1164	work = new double[nvars+1];
	1165	blas.matrixmatrixmultiply(ref ar2.c, 0, k-1, 0, k-1, false, ref vt, 0, k-1, 0, nvars-1, false, 1.0, ref vm, 0, k-1, 0, nvars-1, 0.0, ref work);
	1166	blas.matrixmatrixmultiply(ref vt, 0, k-1, 0, nvars-1, true, ref vm, 0, k-1, 0, nvars-1, false, 1.0, ref ar.c, 0, nvars-1, 0, nvars-1, 0.0, ref work);
	1167	return;
	1168	}
	1169	}
	1170	info = -255;
	1171	return;
	1172	}
	1173	for(i=0; i<=nvars-1; i++)
	1174	{
	1175	if( sv[i]>epstolAP.Math.MachineEpsilonsv[0] )
	1176	{
	1177	svi[i] = 1/sv[i];
	1178	}
	1179	else
	1180	{
	1181	svi[i] = 0;
	1182	}
	1183	}
	1184	for(i=0; i<=nvars-1; i++)
	1185	{
	1186	t[i] = 0;
	1187	}
	1188	for(i=0; i<=npoints-1; i++)
	1189	{
	1190	r = b[i];
	1191	for(i_=0; i_<=nvars-1;i_++)
	1192	{
	1193	t[i_] = t[i_] + r*u[i,i_];
	1194	}
	1195	}
	1196	for(i=0; i<=nvars-1; i++)
	1197	{
	1198	lm.w[offs+i] = 0;
	1199	}
	1200	for(i=0; i<=nvars-1; i++)
	1201	{
	1202	r = t[i]*svi[i];
	1203	i1_ = (0) - (offs);
	1204	for(i_=offs; i_<=offs+nvars-1;i_++)
	1205	{
	1206	lm.w[i_] = lm.w[i_] + r*vt[i,i_+i1_];
	1207	}
	1208	}
	1209	for(j=0; j<=nvars-1; j++)
	1210	{
	1211	r = svi[j];
	1212	for(i_=0; i_<=nvars-1;i_++)
	1213	{
	1214	vm[i_,j] = r*vt[j,i_];
	1215	}
	1216	}
	1217	for(i=0; i<=nvars-1; i++)
	1218	{
	1219	for(j=i; j<=nvars-1; j++)
	1220	{
	1221	r = 0.0;
	1222	for(i_=0; i_<=nvars-1;i_++)
	1223	{
	1224	r += vm[i,i_]*vm[j,i_];
	1225	}
	1226	ar.c[i,j] = r;
	1227	ar.c[j,i] = r;
	1228	}
	1229	}
	1230
	1231	//
	1232	// Leave-1-out cross-validation error.
	1233	//
	1234	// NOTATIONS:
	1235	// A design matrix
	1236	// A*x = b original linear least squares task
	1237	// USV' SVD of A
	1238	// ai i-th row of the A
	1239	// bi i-th element of the b
	1240	// xf solution of the original LLS task
	1241	//
	1242	// Cross-validation error of i-th element from a sample is
	1243	// calculated using following formula:
	1244	//
	1245	// ERRi = aixf - (aixf-bi(uiui'))/(1-ui*ui') (1)
	1246	//
	1247	// This formula can be derived from normal equations of the
	1248	// original task
	1249	//
	1250	// (A'A)x = A'b (2)
	1251	//
	1252	// by applying modification (zeroing out i-th row of A) to (2):
	1253	//
	1254	// (A-ai)'(A-ai) = (A-ai)'b
	1255	//
	1256	// and using Sherman-Morrison formula for updating matrix inverse
	1257	//
	1258	// NOTE 1: b is not zeroed out since it is much simpler and
	1259	// does not influence final result.
	1260	//
	1261	// NOTE 2: some design matrices A have such ui that 1-ui*ui'=0.
	1262	// Formula (1) can't be applied for such cases and they are skipped
	1263	// from CV calculation (which distorts resulting CV estimate).
	1264	// But from the properties of U we can conclude that there can
	1265	// be no more than NVars such vectors. Usually
	1266	// NVars << NPoints, so in a normal case it only slightly
	1267	// influences result.
	1268	//
	1269	ncv = 0;
	1270	na = 0;
	1271	nacv = 0;
	1272	ar.rmserror = 0;
	1273	ar.avgerror = 0;
	1274	ar.avgrelerror = 0;
	1275	ar.cvrmserror = 0;
	1276	ar.cvavgerror = 0;
	1277	ar.cvavgrelerror = 0;
	1278	ar.ncvdefects = 0;
	1279	ar.cvdefects = new int[nvars-1+1];
	1280	for(i=0; i<=npoints-1; i++)
	1281	{
	1282
	1283	//
	1284	// Error on a training set
	1285	//
	1286	i1_ = (offs)-(0);
	1287	r = 0.0;
	1288	for(i_=0; i_<=nvars-1;i_++)
	1289	{
	1290	r += xy[i,i_]*lm.w[i_+i1_];
	1291	}
	1292	ar.rmserror = ar.rmserror+AP.Math.Sqr(r-xy[i,nvars]);
	1293	ar.avgerror = ar.avgerror+Math.Abs(r-xy[i,nvars]);
	1294	if( xy[i,nvars]!=0 )
	1295	{
	1296	ar.avgrelerror = ar.avgrelerror+Math.Abs((r-xy[i,nvars])/xy[i,nvars]);
	1297	na = na+1;
	1298	}
	1299
	1300	//
	1301	// Error using fast leave-one-out cross-validation
	1302	//
	1303	p = 0.0;
	1304	for(i_=0; i_<=nvars-1;i_++)
	1305	{
	1306	p += u[i,i_]*u[i,i_];
	1307	}
	1308	if( p>1-epstol*AP.Math.MachineEpsilon )
	1309	{
	1310	ar.cvdefects[ar.ncvdefects] = i;
	1311	ar.ncvdefects = ar.ncvdefects+1;
	1312	continue;
	1313	}
	1314	r = s[i](r/s[i]-b[i]p)/(1-p);
	1315	ar.cvrmserror = ar.cvrmserror+AP.Math.Sqr(r-xy[i,nvars]);
	1316	ar.cvavgerror = ar.cvavgerror+Math.Abs(r-xy[i,nvars]);
	1317	if( xy[i,nvars]!=0 )
	1318	{
	1319	ar.cvavgrelerror = ar.cvavgrelerror+Math.Abs((r-xy[i,nvars])/xy[i,nvars]);
	1320	nacv = nacv+1;
	1321	}
	1322	ncv = ncv+1;
	1323	}
	1324	if( ncv==0 )
	1325	{
	1326
	1327	//
	1328	// Something strange: ALL ui are degenerate.
	1329	// Unexpected...
	1330	//
	1331	info = -255;
	1332	return;
	1333	}
	1334	ar.rmserror = Math.Sqrt(ar.rmserror/npoints);
	1335	ar.avgerror = ar.avgerror/npoints;
	1336	if( na!=0 )
	1337	{
	1338	ar.avgrelerror = ar.avgrelerror/na;
	1339	}
	1340	ar.cvrmserror = Math.Sqrt(ar.cvrmserror/ncv);
	1341	ar.cvavgerror = ar.cvavgerror/ncv;
	1342	if( nacv!=0 )
	1343	{
	1344	ar.cvavgrelerror = ar.cvavgrelerror/nacv;
	1345	}
	1346	}
	1347	}
	1348	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences