Context Navigation

source: trunk/sources/ALGLIB/linreg.cs @ 2452

Visit:

Last change on this file since 2452 was 2445, checked in by gkronber, 15 years ago
Fixed #787 (LinearRegressionOperator uses leastsquares function of ALGLIB instead of linearregression function)
File size: 44.5 KB

Line
1	/*************************************************************************
2	Copyright (c) 2007-2008, Sergey Bochkanov (ALGLIB project).
3
4	>>> SOURCE LICENSE >>>
5	This program is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published by
7	the Free Software Foundation (www.fsf.org); either version 2 of the
8	License, or (at your option) any later version.
9
10	This program is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	A copy of the GNU General Public License is available at
16	http://www.fsf.org/licensing/licenses
17
18	>>> END OF LICENSE >>>
19	*************************************************************************/
20
21	using System;
22
23	namespace alglib
24	{
25	public class linreg
26	{
27	public struct linearmodel
28	{
29	public double[] w;
30	};
31
32
33	/*************************************************************************
34	LRReport structure contains additional information about linear model:
35	* C - covariation matrix, array[0..NVars,0..NVars].
36	C[i,j] = Cov(A[i],A[j])
37	* RMSError - root mean square error on a training set
38	* AvgError - average error on a training set
39	* AvgRelError - average relative error on a training set (excluding
40	observations with zero function value).
41	* CVRMSError - leave-one-out cross-validation estimate of
42	generalization error. Calculated using fast algorithm
43	with O(NVars*NPoints) complexity.
44	* CVAvgError - cross-validation estimate of average error
45	* CVAvgRelError - cross-validation estimate of average relative error
46
47	All other fields of the structure are intended for internal use and should
48	not be used outside ALGLIB.
49	*************************************************************************/
50	public struct lrreport
51	{
52	public double[,] c;
53	public double rmserror;
54	public double avgerror;
55	public double avgrelerror;
56	public double cvrmserror;
57	public double cvavgerror;
58	public double cvavgrelerror;
59	public int ncvdefects;
60	public int[] cvdefects;
61	};
62
63
64
65
66	public const int lrvnum = 5;
67
68
69	/*************************************************************************
70	Linear regression
71
72	Subroutine builds model:
73
74	Y = A(0)X[0] + ... + A(N-1)X[N-1] + A(N)
75
76	and model found in ALGLIB format, covariation matrix, training set errors
77	(rms, average, average relative) and leave-one-out cross-validation
78	estimate of the generalization error. CV estimate calculated using fast
79	algorithm with O(NPoints*NVars) complexity.
80
81	When covariation matrix is calculated standard deviations of function
82	values are assumed to be equal to RMS error on the training set.
83
84	INPUT PARAMETERS:
85	XY - training set, array [0..NPoints-1,0..NVars]:
86	* NVars columns - independent variables
87	* last column - dependent variable
88	NPoints - training set size, NPoints>NVars+1
89	NVars - number of independent variables
90
91	OUTPUT PARAMETERS:
92	Info - return code:
93	* -255, in case of unknown internal error
94	* -4, if internal SVD subroutine haven't converged
95	* -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
96	* 1, if subroutine successfully finished
97	LM - linear model in the ALGLIB format. Use subroutines of
98	this unit to work with the model.
99	AR - additional results
100
101
102	-- ALGLIB --
103	Copyright 02.08.2008 by Bochkanov Sergey
104	*************************************************************************/
105	public static void lrbuild(ref double[,] xy,
106	int npoints,
107	int nvars,
108	ref int info,
109	ref linearmodel lm,
110	ref lrreport ar)
111	{
112	double[] s = new double[0];
113	int i = 0;
114	double sigma2 = 0;
115	int i_ = 0;
116
117	if( npoints<=nvars+1 \| nvars<1 )
118	{
119	info = -1;
120	return;
121	}
122	s = new double[npoints-1+1];
123	for(i=0; i<=npoints-1; i++)
124	{
125	s[i] = 1;
126	}
127	lrbuilds(ref xy, ref s, npoints, nvars, ref info, ref lm, ref ar);
128	if( info<0 )
129	{
130	return;
131	}
132	sigma2 = AP.Math.Sqr(ar.rmserror)*npoints/(npoints-nvars-1);
133	for(i=0; i<=nvars; i++)
134	{
135	for(i_=0; i_<=nvars;i_++)
136	{
137	ar.c[i,i_] = sigma2*ar.c[i,i_];
138	}
139	}
140	}
141
142
143	/*************************************************************************
144	Linear regression
145
146	Variant of LRBuild which uses vector of standatd deviations (errors in
147	function values).
148
149	INPUT PARAMETERS:
150	XY - training set, array [0..NPoints-1,0..NVars]:
151	* NVars columns - independent variables
152	* last column - dependent variable
153	S - standard deviations (errors in function values)
154	array[0..NPoints-1], S[i]>0.
155	NPoints - training set size, NPoints>NVars+1
156	NVars - number of independent variables
157
158	OUTPUT PARAMETERS:
159	Info - return code:
160	* -255, in case of unknown internal error
161	* -4, if internal SVD subroutine haven't converged
162	* -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
163	* -2, if S[I]<=0
164	* 1, if subroutine successfully finished
165	LM - linear model in the ALGLIB format. Use subroutines of
166	this unit to work with the model.
167	AR - additional results
168
169
170	-- ALGLIB --
171	Copyright 02.08.2008 by Bochkanov Sergey
172	*************************************************************************/
173	public static void lrbuilds(ref double[,] xy,
174	ref double[] s,
175	int npoints,
176	int nvars,
177	ref int info,
178	ref linearmodel lm,
179	ref lrreport ar)
180	{
181	double[,] xyi = new double[0,0];
182	double[] x = new double[0];
183	double[] means = new double[0];
184	double[] sigmas = new double[0];
185	int i = 0;
186	int j = 0;
187	double v = 0;
188	int offs = 0;
189	double mean = 0;
190	double variance = 0;
191	double skewness = 0;
192	double kurtosis = 0;
193	int i_ = 0;
194
195
196	//
197	// Test parameters
198	//
199	if( npoints<=nvars+1 \| nvars<1 )
200	{
201	info = -1;
202	return;
203	}
204
205	//
206	// Copy data, add one more column (constant term)
207	//
208	xyi = new double[npoints-1+1, nvars+1+1];
209	for(i=0; i<=npoints-1; i++)
210	{
211	for(i_=0; i_<=nvars-1;i_++)
212	{
213	xyi[i,i_] = xy[i,i_];
214	}
215	xyi[i,nvars] = 1;
216	xyi[i,nvars+1] = xy[i,nvars];
217	}
218
219	//
220	// Standartization
221	//
222	x = new double[npoints-1+1];
223	means = new double[nvars-1+1];
224	sigmas = new double[nvars-1+1];
225	for(j=0; j<=nvars-1; j++)
226	{
227	for(i_=0; i_<=npoints-1;i_++)
228	{
229	x[i_] = xy[i_,j];
230	}
231	descriptivestatistics.calculatemoments(ref x, npoints, ref mean, ref variance, ref skewness, ref kurtosis);
232	means[j] = mean;
233	sigmas[j] = Math.Sqrt(variance);
234	if( sigmas[j]==0 )
235	{
236	sigmas[j] = 1;
237	}
238	for(i=0; i<=npoints-1; i++)
239	{
240	xyi[i,j] = (xyi[i,j]-means[j])/sigmas[j];
241	}
242	}
243
244	//
245	// Internal processing
246	//
247	lrinternal(ref xyi, ref s, npoints, nvars+1, ref info, ref lm, ref ar);
248	if( info<0 )
249	{
250	return;
251	}
252
253	//
254	// Un-standartization
255	//
256	offs = (int)Math.Round(lm.w[3]);
257	for(j=0; j<=nvars-1; j++)
258	{
259
260	//
261	// Constant term is updated (and its covariance too,
262	// since it gets some variance from J-th component)
263	//
264	lm.w[offs+nvars] = lm.w[offs+nvars]-lm.w[offs+j]*means[j]/sigmas[j];
265	v = means[j]/sigmas[j];
266	for(i_=0; i_<=nvars;i_++)
267	{
268	ar.c[nvars,i_] = ar.c[nvars,i_] - v*ar.c[j,i_];
269	}
270	for(i_=0; i_<=nvars;i_++)
271	{
272	ar.c[i_,nvars] = ar.c[i_,nvars] - v*ar.c[i_,j];
273	}
274
275	//
276	// J-th term is updated
277	//
278	lm.w[offs+j] = lm.w[offs+j]/sigmas[j];
279	v = 1/sigmas[j];
280	for(i_=0; i_<=nvars;i_++)
281	{
282	ar.c[j,i_] = v*ar.c[j,i_];
283	}
284	for(i_=0; i_<=nvars;i_++)
285	{
286	ar.c[i_,j] = v*ar.c[i_,j];
287	}
288	}
289	}
290
291
292	/*************************************************************************
293	Like LRBuildS, but builds model
294
295	Y = A(0)X[0] + ... + A(N-1)X[N-1]
296
297	i.e. with zero constant term.
298
299	-- ALGLIB --
300	Copyright 30.10.2008 by Bochkanov Sergey
301	*************************************************************************/
302	public static void lrbuildzs(ref double[,] xy,
303	ref double[] s,
304	int npoints,
305	int nvars,
306	ref int info,
307	ref linearmodel lm,
308	ref lrreport ar)
309	{
310	double[,] xyi = new double[0,0];
311	double[] x = new double[0];
312	double[] c = new double[0];
313	int i = 0;
314	int j = 0;
315	double v = 0;
316	int offs = 0;
317	double mean = 0;
318	double variance = 0;
319	double skewness = 0;
320	double kurtosis = 0;
321	int i_ = 0;
322
323
324	//
325	// Test parameters
326	//
327	if( npoints<=nvars+1 \| nvars<1 )
328	{
329	info = -1;
330	return;
331	}
332
333	//
334	// Copy data, add one more column (constant term)
335	//
336	xyi = new double[npoints-1+1, nvars+1+1];
337	for(i=0; i<=npoints-1; i++)
338	{
339	for(i_=0; i_<=nvars-1;i_++)
340	{
341	xyi[i,i_] = xy[i,i_];
342	}
343	xyi[i,nvars] = 0;
344	xyi[i,nvars+1] = xy[i,nvars];
345	}
346
347	//
348	// Standartization: unusual scaling
349	//
350	x = new double[npoints-1+1];
351	c = new double[nvars-1+1];
352	for(j=0; j<=nvars-1; j++)
353	{
354	for(i_=0; i_<=npoints-1;i_++)
355	{
356	x[i_] = xy[i_,j];
357	}
358	descriptivestatistics.calculatemoments(ref x, npoints, ref mean, ref variance, ref skewness, ref kurtosis);
359	if( Math.Abs(mean)>Math.Sqrt(variance) )
360	{
361
362	//
363	// variation is relatively small, it is better to
364	// bring mean value to 1
365	//
366	c[j] = mean;
367	}
368	else
369	{
370
371	//
372	// variation is large, it is better to bring variance to 1
373	//
374	if( variance==0 )
375	{
376	variance = 1;
377	}
378	c[j] = Math.Sqrt(variance);
379	}
380	for(i=0; i<=npoints-1; i++)
381	{
382	xyi[i,j] = xyi[i,j]/c[j];
383	}
384	}
385
386	//
387	// Internal processing
388	//
389	lrinternal(ref xyi, ref s, npoints, nvars+1, ref info, ref lm, ref ar);
390	if( info<0 )
391	{
392	return;
393	}
394
395	//
396	// Un-standartization
397	//
398	offs = (int)Math.Round(lm.w[3]);
399	for(j=0; j<=nvars-1; j++)
400	{
401
402	//
403	// J-th term is updated
404	//
405	lm.w[offs+j] = lm.w[offs+j]/c[j];
406	v = 1/c[j];
407	for(i_=0; i_<=nvars;i_++)
408	{
409	ar.c[j,i_] = v*ar.c[j,i_];
410	}
411	for(i_=0; i_<=nvars;i_++)
412	{
413	ar.c[i_,j] = v*ar.c[i_,j];
414	}
415	}
416	}
417
418
419	/*************************************************************************
420	Like LRBuild but builds model
421
422	Y = A(0)X[0] + ... + A(N-1)X[N-1]
423
424	i.e. with zero constant term.
425
426	-- ALGLIB --
427	Copyright 30.10.2008 by Bochkanov Sergey
428	*************************************************************************/
429	public static void lrbuildz(ref double[,] xy,
430	int npoints,
431	int nvars,
432	ref int info,
433	ref linearmodel lm,
434	ref lrreport ar)
435	{
436	double[] s = new double[0];
437	int i = 0;
438	double sigma2 = 0;
439	int i_ = 0;
440
441	if( npoints<=nvars+1 \| nvars<1 )
442	{
443	info = -1;
444	return;
445	}
446	s = new double[npoints-1+1];
447	for(i=0; i<=npoints-1; i++)
448	{
449	s[i] = 1;
450	}
451	lrbuildzs(ref xy, ref s, npoints, nvars, ref info, ref lm, ref ar);
452	if( info<0 )
453	{
454	return;
455	}
456	sigma2 = AP.Math.Sqr(ar.rmserror)*npoints/(npoints-nvars-1);
457	for(i=0; i<=nvars; i++)
458	{
459	for(i_=0; i_<=nvars;i_++)
460	{
461	ar.c[i,i_] = sigma2*ar.c[i,i_];
462	}
463	}
464	}
465
466
467	/*************************************************************************
468	Unpacks coefficients of linear model.
469
470	INPUT PARAMETERS:
471	LM - linear model in ALGLIB format
472
473	OUTPUT PARAMETERS:
474	V - coefficients, array[0..NVars]
475	NVars - number of independent variables (one less than number
476	of coefficients)
477
478	-- ALGLIB --
479	Copyright 30.08.2008 by Bochkanov Sergey
480	*************************************************************************/
481	public static void lrunpack(ref linearmodel lm,
482	ref double[] v,
483	ref int nvars)
484	{
485	int offs = 0;
486	int i_ = 0;
487	int i1_ = 0;
488
489	System.Diagnostics.Debug.Assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!");
490	nvars = (int)Math.Round(lm.w[2]);
491	offs = (int)Math.Round(lm.w[3]);
492	v = new double[nvars+1];
493	i1_ = (offs) - (0);
494	for(i_=0; i_<=nvars;i_++)
495	{
496	v[i_] = lm.w[i_+i1_];
497	}
498	}
499
500
501	/*************************************************************************
502	"Packs" coefficients and creates linear model in ALGLIB format (LRUnpack
503	reversed).
504
505	INPUT PARAMETERS:
506	V - coefficients, array[0..NVars]
507	NVars - number of independent variables
508
509	OUTPUT PAREMETERS:
510	LM - linear model.
511
512	-- ALGLIB --
513	Copyright 30.08.2008 by Bochkanov Sergey
514	*************************************************************************/
515	public static void lrpack(ref double[] v,
516	int nvars,
517	ref linearmodel lm)
518	{
519	int offs = 0;
520	int i_ = 0;
521	int i1_ = 0;
522
523	lm.w = new double[4+nvars+1];
524	offs = 4;
525	lm.w[0] = 4+nvars+1;
526	lm.w[1] = lrvnum;
527	lm.w[2] = nvars;
528	lm.w[3] = offs;
529	i1_ = (0) - (offs);
530	for(i_=offs; i_<=offs+nvars;i_++)
531	{
532	lm.w[i_] = v[i_+i1_];
533	}
534	}
535
536
537	/*************************************************************************
538	Procesing
539
540	INPUT PARAMETERS:
541	LM - linear model
542	X - input vector, array[0..NVars-1].
543
544	Result:
545	value of linear model regression estimate
546
547	-- ALGLIB --
548	Copyright 03.09.2008 by Bochkanov Sergey
549	*************************************************************************/
550	public static double lrprocess(ref linearmodel lm,
551	ref double[] x)
552	{
553	double result = 0;
554	double v = 0;
555	int offs = 0;
556	int nvars = 0;
557	int i_ = 0;
558	int i1_ = 0;
559
560	System.Diagnostics.Debug.Assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!");
561	nvars = (int)Math.Round(lm.w[2]);
562	offs = (int)Math.Round(lm.w[3]);
563	i1_ = (offs)-(0);
564	v = 0.0;
565	for(i_=0; i_<=nvars-1;i_++)
566	{
567	v += x[i_]*lm.w[i_+i1_];
568	}
569	result = v+lm.w[offs+nvars];
570	return result;
571	}
572
573
574	/*************************************************************************
575	RMS error on the test set
576
577	INPUT PARAMETERS:
578	LM - linear model
579	XY - test set
580	NPoints - test set size
581
582	RESULT:
583	root mean square error.
584
585	-- ALGLIB --
586	Copyright 30.08.2008 by Bochkanov Sergey
587	*************************************************************************/
588	public static double lrrmserror(ref linearmodel lm,
589	ref double[,] xy,
590	int npoints)
591	{
592	double result = 0;
593	int i = 0;
594	double v = 0;
595	int offs = 0;
596	int nvars = 0;
597	int i_ = 0;
598	int i1_ = 0;
599
600	System.Diagnostics.Debug.Assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!");
601	nvars = (int)Math.Round(lm.w[2]);
602	offs = (int)Math.Round(lm.w[3]);
603	result = 0;
604	for(i=0; i<=npoints-1; i++)
605	{
606	i1_ = (offs)-(0);
607	v = 0.0;
608	for(i_=0; i_<=nvars-1;i_++)
609	{
610	v += xy[i,i_]*lm.w[i_+i1_];
611	}
612	v = v+lm.w[offs+nvars];
613	result = result+AP.Math.Sqr(v-xy[i,nvars]);
614	}
615	result = Math.Sqrt(result/npoints);
616	return result;
617	}
618
619
620	/*************************************************************************
621	Average error on the test set
622
623	INPUT PARAMETERS:
624	LM - linear model
625	XY - test set
626	NPoints - test set size
627
628	RESULT:
629	average error.
630
631	-- ALGLIB --
632	Copyright 30.08.2008 by Bochkanov Sergey
633	*************************************************************************/
634	public static double lravgerror(ref linearmodel lm,
635	ref double[,] xy,
636	int npoints)
637	{
638	double result = 0;
639	int i = 0;
640	double v = 0;
641	int offs = 0;
642	int nvars = 0;
643	int i_ = 0;
644	int i1_ = 0;
645
646	System.Diagnostics.Debug.Assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!");
647	nvars = (int)Math.Round(lm.w[2]);
648	offs = (int)Math.Round(lm.w[3]);
649	result = 0;
650	for(i=0; i<=npoints-1; i++)
651	{
652	i1_ = (offs)-(0);
653	v = 0.0;
654	for(i_=0; i_<=nvars-1;i_++)
655	{
656	v += xy[i,i_]*lm.w[i_+i1_];
657	}
658	v = v+lm.w[offs+nvars];
659	result = result+Math.Abs(v-xy[i,nvars]);
660	}
661	result = result/npoints;
662	return result;
663	}
664
665
666	/*************************************************************************
667	RMS error on the test set
668
669	INPUT PARAMETERS:
670	LM - linear model
671	XY - test set
672	NPoints - test set size
673
674	RESULT:
675	average relative error.
676
677	-- ALGLIB --
678	Copyright 30.08.2008 by Bochkanov Sergey
679	*************************************************************************/
680	public static double lravgrelerror(ref linearmodel lm,
681	ref double[,] xy,
682	int npoints)
683	{
684	double result = 0;
685	int i = 0;
686	int k = 0;
687	double v = 0;
688	int offs = 0;
689	int nvars = 0;
690	int i_ = 0;
691	int i1_ = 0;
692
693	System.Diagnostics.Debug.Assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!");
694	nvars = (int)Math.Round(lm.w[2]);
695	offs = (int)Math.Round(lm.w[3]);
696	result = 0;
697	k = 0;
698	for(i=0; i<=npoints-1; i++)
699	{
700	if( xy[i,nvars]!=0 )
701	{
702	i1_ = (offs)-(0);
703	v = 0.0;
704	for(i_=0; i_<=nvars-1;i_++)
705	{
706	v += xy[i,i_]*lm.w[i_+i1_];
707	}
708	v = v+lm.w[offs+nvars];
709	result = result+Math.Abs((v-xy[i,nvars])/xy[i,nvars]);
710	k = k+1;
711	}
712	}
713	if( k!=0 )
714	{
715	result = result/k;
716	}
717	return result;
718	}
719
720
721	/*************************************************************************
722	Copying of LinearModel strucure
723
724	INPUT PARAMETERS:
725	LM1 - original
726
727	OUTPUT PARAMETERS:
728	LM2 - copy
729
730	-- ALGLIB --
731	Copyright 15.03.2009 by Bochkanov Sergey
732	*************************************************************************/
733	public static void lrcopy(ref linearmodel lm1,
734	ref linearmodel lm2)
735	{
736	int k = 0;
737	int i_ = 0;
738
739	k = (int)Math.Round(lm1.w[0]);
740	lm2.w = new double[k-1+1];
741	for(i_=0; i_<=k-1;i_++)
742	{
743	lm2.w[i_] = lm1.w[i_];
744	}
745	}
746
747
748	/*************************************************************************
749	Serialization of LinearModel strucure
750
751	INPUT PARAMETERS:
752	LM - original
753
754	OUTPUT PARAMETERS:
755	RA - array of real numbers which stores model,
756	array[0..RLen-1]
757	RLen - RA lenght
758
759	-- ALGLIB --
760	Copyright 15.03.2009 by Bochkanov Sergey
761	*************************************************************************/
762	public static void lrserialize(ref linearmodel lm,
763	ref double[] ra,
764	ref int rlen)
765	{
766	int i_ = 0;
767	int i1_ = 0;
768
769	rlen = (int)Math.Round(lm.w[0])+1;
770	ra = new double[rlen-1+1];
771	ra[0] = lrvnum;
772	i1_ = (0) - (1);
773	for(i_=1; i_<=rlen-1;i_++)
774	{
775	ra[i_] = lm.w[i_+i1_];
776	}
777	}
778
779
780	/*************************************************************************
781	Unserialization of DecisionForest strucure
782
783	INPUT PARAMETERS:
784	RA - real array which stores decision forest
785
786	OUTPUT PARAMETERS:
787	LM - unserialized structure
788
789	-- ALGLIB --
790	Copyright 15.03.2009 by Bochkanov Sergey
791	*************************************************************************/
792	public static void lrunserialize(ref double[] ra,
793	ref linearmodel lm)
794	{
795	int i_ = 0;
796	int i1_ = 0;
797
798	System.Diagnostics.Debug.Assert((int)Math.Round(ra[0])==lrvnum, "LRUnserialize: incorrect array!");
799	lm.w = new double[(int)Math.Round(ra[1])-1+1];
800	i1_ = (1) - (0);
801	for(i_=0; i_<=(int)Math.Round(ra[1])-1;i_++)
802	{
803	lm.w[i_] = ra[i_+i1_];
804	}
805	}
806
807
808	/*************************************************************************
809	Obsolete subroutine, use LRBuildS
810
811	-- ALGLIB --
812	Copyright 26.04.2008 by Bochkanov Sergey
813
814	References:
815	1. Numerical Recipes in C, "15.2 Fitting Data to a Straight Line"
816	*************************************************************************/
817	public static void lrlines(ref double[,] xy,
818	ref double[] s,
819	int n,
820	ref int info,
821	ref double a,
822	ref double b,
823	ref double vara,
824	ref double varb,
825	ref double covab,
826	ref double corrab,
827	ref double p)
828	{
829	int i = 0;
830	double ss = 0;
831	double sx = 0;
832	double sxx = 0;
833	double sy = 0;
834	double stt = 0;
835	double e1 = 0;
836	double e2 = 0;
837	double t = 0;
838	double chi2 = 0;
839
840	if( n<2 )
841	{
842	info = -1;
843	return;
844	}
845	for(i=0; i<=n-1; i++)
846	{
847	if( s[i]<=0 )
848	{
849	info = -2;
850	return;
851	}
852	}
853	info = 1;
854
855	//
856	// Calculate S, SX, SY, SXX
857	//
858	ss = 0;
859	sx = 0;
860	sy = 0;
861	sxx = 0;
862	for(i=0; i<=n-1; i++)
863	{
864	t = AP.Math.Sqr(s[i]);
865	ss = ss+1/t;
866	sx = sx+xy[i,0]/t;
867	sy = sy+xy[i,1]/t;
868	sxx = sxx+AP.Math.Sqr(xy[i,0])/t;
869	}
870
871	//
872	// Test for condition number
873	//
874	t = Math.Sqrt(4*AP.Math.Sqr(sx)+AP.Math.Sqr(ss-sxx));
875	e1 = 0.5*(ss+sxx+t);
876	e2 = 0.5*(ss+sxx-t);
877	if( Math.Min(e1, e2)<=1000AP.Math.MachineEpsilonMath.Max(e1, e2) )
878	{
879	info = -3;
880	return;
881	}
882
883	//
884	// Calculate A, B
885	//
886	a = 0;
887	b = 0;
888	stt = 0;
889	for(i=0; i<=n-1; i++)
890	{
891	t = (xy[i,0]-sx/ss)/s[i];
892	b = b+t*xy[i,1]/s[i];
893	stt = stt+AP.Math.Sqr(t);
894	}
895	b = b/stt;
896	a = (sy-sx*b)/ss;
897
898	//
899	// Calculate goodness-of-fit
900	//
901	if( n>2 )
902	{
903	chi2 = 0;
904	for(i=0; i<=n-1; i++)
905	{
906	chi2 = chi2+AP.Math.Sqr((xy[i,1]-a-b*xy[i,0])/s[i]);
907	}
908	p = igammaf.incompletegammac(((double)(n-2))/(double)(2), chi2/2);
909	}
910	else
911	{
912	p = 1;
913	}
914
915	//
916	// Calculate other parameters
917	//
918	vara = (1+AP.Math.Sqr(sx)/(ss*stt))/ss;
919	varb = 1/stt;
920	covab = -(sx/(ss*stt));
921	corrab = covab/Math.Sqrt(vara*varb);
922	}
923
924
925	/*************************************************************************
926	Obsolete subroutine, use LRBuild
927
928	-- ALGLIB --
929	Copyright 02.08.2008 by Bochkanov Sergey
930	*************************************************************************/
931	public static void lrline(ref double[,] xy,
932	int n,
933	ref int info,
934	ref double a,
935	ref double b)
936	{
937	double[] s = new double[0];
938	int i = 0;
939	double vara = 0;
940	double varb = 0;
941	double covab = 0;
942	double corrab = 0;
943	double p = 0;
944
945	if( n<2 )
946	{
947	info = -1;
948	return;
949	}
950	s = new double[n-1+1];
951	for(i=0; i<=n-1; i++)
952	{
953	s[i] = 1;
954	}
955	lrlines(ref xy, ref s, n, ref info, ref a, ref b, ref vara, ref varb, ref covab, ref corrab, ref p);
956	}
957
958
959	/*************************************************************************
960	Internal linear regression subroutine
961	*************************************************************************/
962	private static void lrinternal(ref double[,] xy,
963	ref double[] s,
964	int npoints,
965	int nvars,
966	ref int info,
967	ref linearmodel lm,
968	ref lrreport ar)
969	{
970	double[,] a = new double[0,0];
971	double[,] u = new double[0,0];
972	double[,] vt = new double[0,0];
973	double[,] vm = new double[0,0];
974	double[,] xym = new double[0,0];
975	double[] b = new double[0];
976	double[] sv = new double[0];
977	double[] t = new double[0];
978	double[] svi = new double[0];
979	double[] work = new double[0];
980	int i = 0;
981	int j = 0;
982	int k = 0;
983	int ncv = 0;
984	int na = 0;
985	int nacv = 0;
986	double r = 0;
987	double p = 0;
988	double epstol = 0;
989	lrreport ar2 = new lrreport();
990	int offs = 0;
991	linearmodel tlm = new linearmodel();
992	int i_ = 0;
993	int i1_ = 0;
994
995	epstol = 1000;
996
997	//
998	// Check for errors in data
999	//
1000	if( npoints<nvars \| nvars<1 )
1001	{
1002	info = -1;
1003	return;
1004	}
1005	for(i=0; i<=npoints-1; i++)
1006	{
1007	if( s[i]<=0 )
1008	{
1009	info = -2;
1010	return;
1011	}
1012	}
1013	info = 1;
1014
1015	//
1016	// Create design matrix
1017	//
1018	a = new double[npoints-1+1, nvars-1+1];
1019	b = new double[npoints-1+1];
1020	for(i=0; i<=npoints-1; i++)
1021	{
1022	r = 1/s[i];
1023	for(i_=0; i_<=nvars-1;i_++)
1024	{
1025	a[i,i_] = r*xy[i,i_];
1026	}
1027	b[i] = xy[i,nvars]/s[i];
1028	}
1029
1030	//
1031	// Allocate W:
1032	// W[0] array size
1033	// W[1] version number, 0
1034	// W[2] NVars (minus 1, to be compatible with external representation)
1035	// W[3] coefficients offset
1036	//
1037	lm.w = new double[4+nvars-1+1];
1038	offs = 4;
1039	lm.w[0] = 4+nvars;
1040	lm.w[1] = lrvnum;
1041	lm.w[2] = nvars-1;
1042	lm.w[3] = offs;
1043
1044	//
1045	// Solve problem using SVD:
1046	//
1047	// 0. check for degeneracy (different types)
1048	// 1. A = Udiag(sv)V'
1049	// 2. T = b'*U
1050	// 3. w = SUM((T[i]/sv[i])*V[..,i])
1051	// 4. cov(wi,wj) = SUM(Vji*Vjk/sv[i]^2,K=1..M)
1052	//
1053	// see $15.4 of "Numerical Recipes in C" for more information
1054	//
1055	t = new double[nvars-1+1];
1056	svi = new double[nvars-1+1];
1057	ar.c = new double[nvars-1+1, nvars-1+1];
1058	vm = new double[nvars-1+1, nvars-1+1];
1059	if( !svd.rmatrixsvd(a, npoints, nvars, 1, 1, 2, ref sv, ref u, ref vt) )
1060	{
1061	info = -4;
1062	return;
1063	}
1064	if( sv[0]<=0 )
1065	{
1066
1067	//
1068	// Degenerate case: zero design matrix.
1069	//
1070	for(i=offs; i<=offs+nvars-1; i++)
1071	{
1072	lm.w[i] = 0;
1073	}
1074	ar.rmserror = lrrmserror(ref lm, ref xy, npoints);
1075	ar.avgerror = lravgerror(ref lm, ref xy, npoints);
1076	ar.avgrelerror = lravgrelerror(ref lm, ref xy, npoints);
1077	ar.cvrmserror = ar.rmserror;
1078	ar.cvavgerror = ar.avgerror;
1079	ar.cvavgrelerror = ar.avgrelerror;
1080	ar.ncvdefects = 0;
1081	ar.cvdefects = new int[nvars-1+1];
1082	ar.c = new double[nvars-1+1, nvars-1+1];
1083	for(i=0; i<=nvars-1; i++)
1084	{
1085	for(j=0; j<=nvars-1; j++)
1086	{
1087	ar.c[i,j] = 0;
1088	}
1089	}
1090	return;
1091	}
1092	if( sv[nvars-1]<=epstolAP.Math.MachineEpsilonsv[0] )
1093	{
1094
1095	//
1096	// Degenerate case, non-zero design matrix.
1097	//
1098	// We can leave it and solve task in SVD least squares fashion.
1099	// Solution and covariance matrix will be obtained correctly,
1100	// but CV error estimates - will not. It is better to reduce
1101	// it to non-degenerate task and to obtain correct CV estimates.
1102	//
1103	for(k=nvars; k>=1; k--)
1104	{
1105	if( sv[k-1]>epstolAP.Math.MachineEpsilonsv[0] )
1106	{
1107
1108	//
1109	// Reduce
1110	//
1111	xym = new double[npoints-1+1, k+1];
1112	for(i=0; i<=npoints-1; i++)
1113	{
1114	for(j=0; j<=k-1; j++)
1115	{
1116	r = 0.0;
1117	for(i_=0; i_<=nvars-1;i_++)
1118	{
1119	r += xy[i,i_]*vt[j,i_];
1120	}
1121	xym[i,j] = r;
1122	}
1123	xym[i,k] = xy[i,nvars];
1124	}
1125
1126	//
1127	// Solve
1128	//
1129	lrinternal(ref xym, ref s, npoints, k, ref info, ref tlm, ref ar2);
1130	if( info!=1 )
1131	{
1132	return;
1133	}
1134
1135	//
1136	// Convert back to un-reduced format
1137	//
1138	for(j=0; j<=nvars-1; j++)
1139	{
1140	lm.w[offs+j] = 0;
1141	}
1142	for(j=0; j<=k-1; j++)
1143	{
1144	r = tlm.w[offs+j];
1145	i1_ = (0) - (offs);
1146	for(i_=offs; i_<=offs+nvars-1;i_++)
1147	{
1148	lm.w[i_] = lm.w[i_] + r*vt[j,i_+i1_];
1149	}
1150	}
1151	ar.rmserror = ar2.rmserror;
1152	ar.avgerror = ar2.avgerror;
1153	ar.avgrelerror = ar2.avgrelerror;
1154	ar.cvrmserror = ar2.cvrmserror;
1155	ar.cvavgerror = ar2.cvavgerror;
1156	ar.cvavgrelerror = ar2.cvavgrelerror;
1157	ar.ncvdefects = ar2.ncvdefects;
1158	ar.cvdefects = new int[nvars-1+1];
1159	for(j=0; j<=ar.ncvdefects-1; j++)
1160	{
1161	ar.cvdefects[j] = ar2.cvdefects[j];
1162	}
1163	ar.c = new double[nvars-1+1, nvars-1+1];
1164	work = new double[nvars+1];
1165	blas.matrixmatrixmultiply(ref ar2.c, 0, k-1, 0, k-1, false, ref vt, 0, k-1, 0, nvars-1, false, 1.0, ref vm, 0, k-1, 0, nvars-1, 0.0, ref work);
1166	blas.matrixmatrixmultiply(ref vt, 0, k-1, 0, nvars-1, true, ref vm, 0, k-1, 0, nvars-1, false, 1.0, ref ar.c, 0, nvars-1, 0, nvars-1, 0.0, ref work);
1167	return;
1168	}
1169	}
1170	info = -255;
1171	return;
1172	}
1173	for(i=0; i<=nvars-1; i++)
1174	{
1175	if( sv[i]>epstolAP.Math.MachineEpsilonsv[0] )
1176	{
1177	svi[i] = 1/sv[i];
1178	}
1179	else
1180	{
1181	svi[i] = 0;
1182	}
1183	}
1184	for(i=0; i<=nvars-1; i++)
1185	{
1186	t[i] = 0;
1187	}
1188	for(i=0; i<=npoints-1; i++)
1189	{
1190	r = b[i];
1191	for(i_=0; i_<=nvars-1;i_++)
1192	{
1193	t[i_] = t[i_] + r*u[i,i_];
1194	}
1195	}
1196	for(i=0; i<=nvars-1; i++)
1197	{
1198	lm.w[offs+i] = 0;
1199	}
1200	for(i=0; i<=nvars-1; i++)
1201	{
1202	r = t[i]*svi[i];
1203	i1_ = (0) - (offs);
1204	for(i_=offs; i_<=offs+nvars-1;i_++)
1205	{
1206	lm.w[i_] = lm.w[i_] + r*vt[i,i_+i1_];
1207	}
1208	}
1209	for(j=0; j<=nvars-1; j++)
1210	{
1211	r = svi[j];
1212	for(i_=0; i_<=nvars-1;i_++)
1213	{
1214	vm[i_,j] = r*vt[j,i_];
1215	}
1216	}
1217	for(i=0; i<=nvars-1; i++)
1218	{
1219	for(j=i; j<=nvars-1; j++)
1220	{
1221	r = 0.0;
1222	for(i_=0; i_<=nvars-1;i_++)
1223	{
1224	r += vm[i,i_]*vm[j,i_];
1225	}
1226	ar.c[i,j] = r;
1227	ar.c[j,i] = r;
1228	}
1229	}
1230
1231	//
1232	// Leave-1-out cross-validation error.
1233	//
1234	// NOTATIONS:
1235	// A design matrix
1236	// A*x = b original linear least squares task
1237	// USV' SVD of A
1238	// ai i-th row of the A
1239	// bi i-th element of the b
1240	// xf solution of the original LLS task
1241	//
1242	// Cross-validation error of i-th element from a sample is
1243	// calculated using following formula:
1244	//
1245	// ERRi = aixf - (aixf-bi(uiui'))/(1-ui*ui') (1)
1246	//
1247	// This formula can be derived from normal equations of the
1248	// original task
1249	//
1250	// (A'A)x = A'b (2)
1251	//
1252	// by applying modification (zeroing out i-th row of A) to (2):
1253	//
1254	// (A-ai)'(A-ai) = (A-ai)'b
1255	//
1256	// and using Sherman-Morrison formula for updating matrix inverse
1257	//
1258	// NOTE 1: b is not zeroed out since it is much simpler and
1259	// does not influence final result.
1260	//
1261	// NOTE 2: some design matrices A have such ui that 1-ui*ui'=0.
1262	// Formula (1) can't be applied for such cases and they are skipped
1263	// from CV calculation (which distorts resulting CV estimate).
1264	// But from the properties of U we can conclude that there can
1265	// be no more than NVars such vectors. Usually
1266	// NVars << NPoints, so in a normal case it only slightly
1267	// influences result.
1268	//
1269	ncv = 0;
1270	na = 0;
1271	nacv = 0;
1272	ar.rmserror = 0;
1273	ar.avgerror = 0;
1274	ar.avgrelerror = 0;
1275	ar.cvrmserror = 0;
1276	ar.cvavgerror = 0;
1277	ar.cvavgrelerror = 0;
1278	ar.ncvdefects = 0;
1279	ar.cvdefects = new int[nvars-1+1];
1280	for(i=0; i<=npoints-1; i++)
1281	{
1282
1283	//
1284	// Error on a training set
1285	//
1286	i1_ = (offs)-(0);
1287	r = 0.0;
1288	for(i_=0; i_<=nvars-1;i_++)
1289	{
1290	r += xy[i,i_]*lm.w[i_+i1_];
1291	}
1292	ar.rmserror = ar.rmserror+AP.Math.Sqr(r-xy[i,nvars]);
1293	ar.avgerror = ar.avgerror+Math.Abs(r-xy[i,nvars]);
1294	if( xy[i,nvars]!=0 )
1295	{
1296	ar.avgrelerror = ar.avgrelerror+Math.Abs((r-xy[i,nvars])/xy[i,nvars]);
1297	na = na+1;
1298	}
1299
1300	//
1301	// Error using fast leave-one-out cross-validation
1302	//
1303	p = 0.0;
1304	for(i_=0; i_<=nvars-1;i_++)
1305	{
1306	p += u[i,i_]*u[i,i_];
1307	}
1308	if( p>1-epstol*AP.Math.MachineEpsilon )
1309	{
1310	ar.cvdefects[ar.ncvdefects] = i;
1311	ar.ncvdefects = ar.ncvdefects+1;
1312	continue;
1313	}
1314	r = s[i](r/s[i]-b[i]p)/(1-p);
1315	ar.cvrmserror = ar.cvrmserror+AP.Math.Sqr(r-xy[i,nvars]);
1316	ar.cvavgerror = ar.cvavgerror+Math.Abs(r-xy[i,nvars]);
1317	if( xy[i,nvars]!=0 )
1318	{
1319	ar.cvavgrelerror = ar.cvavgrelerror+Math.Abs((r-xy[i,nvars])/xy[i,nvars]);
1320	nacv = nacv+1;
1321	}
1322	ncv = ncv+1;
1323	}
1324	if( ncv==0 )
1325	{
1326
1327	//
1328	// Something strange: ALL ui are degenerate.
1329	// Unexpected...
1330	//
1331	info = -255;
1332	return;
1333	}
1334	ar.rmserror = Math.Sqrt(ar.rmserror/npoints);
1335	ar.avgerror = ar.avgerror/npoints;
1336	if( na!=0 )
1337	{
1338	ar.avgrelerror = ar.avgrelerror/na;
1339	}
1340	ar.cvrmserror = Math.Sqrt(ar.cvrmserror/ncv);
1341	ar.cvavgerror = ar.cvavgerror/ncv;
1342	if( nacv!=0 )
1343	{
1344	ar.cvavgrelerror = ar.cvavgrelerror/nacv;
1345	}
1346	}
1347	}
1348	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences