Context Navigation

source: trunk/sources/ALGLIB/linreg.cs @ 2594

Visit:

Last change on this file since 2594 was 2563, checked in by gkronber, 15 years ago
Updated ALGLIB to latest version. #751 (Plugin for for data-modeling with ANN (integrated into CEDMA))
File size: 44.2 KB

Line
1	/*************************************************************************
2	Copyright (c) 2007-2008, Sergey Bochkanov (ALGLIB project).
3
4	>>> SOURCE LICENSE >>>
5	This program is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published by
7	the Free Software Foundation (www.fsf.org); either version 2 of the
8	License, or (at your option) any later version.
9
10	This program is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	A copy of the GNU General Public License is available at
16	http://www.fsf.org/licensing/licenses
17
18	>>> END OF LICENSE >>>
19	*************************************************************************/
20
21	using System;
22
23	namespace alglib
24	{
25	public class linreg
26	{
27	public struct linearmodel
28	{
29	public double[] w;
30	};
31
32
33	/*************************************************************************
34	LRReport structure contains additional information about linear model:
35	* C - covariation matrix, array[0..NVars,0..NVars].
36	C[i,j] = Cov(A[i],A[j])
37	* RMSError - root mean square error on a training set
38	* AvgError - average error on a training set
39	* AvgRelError - average relative error on a training set (excluding
40	observations with zero function value).
41	* CVRMSError - leave-one-out cross-validation estimate of
42	generalization error. Calculated using fast algorithm
43	with O(NVars*NPoints) complexity.
44	* CVAvgError - cross-validation estimate of average error
45	* CVAvgRelError - cross-validation estimate of average relative error
46
47	All other fields of the structure are intended for internal use and should
48	not be used outside ALGLIB.
49	*************************************************************************/
50	public struct lrreport
51	{
52	public double[,] c;
53	public double rmserror;
54	public double avgerror;
55	public double avgrelerror;
56	public double cvrmserror;
57	public double cvavgerror;
58	public double cvavgrelerror;
59	public int ncvdefects;
60	public int[] cvdefects;
61	};
62
63
64
65
66	public const int lrvnum = 5;
67
68
69	/*************************************************************************
70	Linear regression
71
72	Subroutine builds model:
73
74	Y = A(0)X[0] + ... + A(N-1)X[N-1] + A(N)
75
76	and model found in ALGLIB format, covariation matrix, training set errors
77	(rms, average, average relative) and leave-one-out cross-validation
78	estimate of the generalization error. CV estimate calculated using fast
79	algorithm with O(NPoints*NVars) complexity.
80
81	When covariation matrix is calculated standard deviations of function
82	values are assumed to be equal to RMS error on the training set.
83
84	INPUT PARAMETERS:
85	XY - training set, array [0..NPoints-1,0..NVars]:
86	* NVars columns - independent variables
87	* last column - dependent variable
88	NPoints - training set size, NPoints>NVars+1
89	NVars - number of independent variables
90
91	OUTPUT PARAMETERS:
92	Info - return code:
93	* -255, in case of unknown internal error
94	* -4, if internal SVD subroutine haven't converged
95	* -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
96	* 1, if subroutine successfully finished
97	LM - linear model in the ALGLIB format. Use subroutines of
98	this unit to work with the model.
99	AR - additional results
100
101
102	-- ALGLIB --
103	Copyright 02.08.2008 by Bochkanov Sergey
104	*************************************************************************/
105	public static void lrbuild(ref double[,] xy,
106	int npoints,
107	int nvars,
108	ref int info,
109	ref linearmodel lm,
110	ref lrreport ar)
111	{
112	double[] s = new double[0];
113	int i = 0;
114	double sigma2 = 0;
115	int i_ = 0;
116
117	if( npoints<=nvars+1 \| nvars<1 )
118	{
119	info = -1;
120	return;
121	}
122	s = new double[npoints-1+1];
123	for(i=0; i<=npoints-1; i++)
124	{
125	s[i] = 1;
126	}
127	lrbuilds(ref xy, ref s, npoints, nvars, ref info, ref lm, ref ar);
128	if( info<0 )
129	{
130	return;
131	}
132	sigma2 = AP.Math.Sqr(ar.rmserror)*npoints/(npoints-nvars-1);
133	for(i=0; i<=nvars; i++)
134	{
135	for(i_=0; i_<=nvars;i_++)
136	{
137	ar.c[i,i_] = sigma2*ar.c[i,i_];
138	}
139	}
140	}
141
142
143	/*************************************************************************
144	Linear regression
145
146	Variant of LRBuild which uses vector of standatd deviations (errors in
147	function values).
148
149	INPUT PARAMETERS:
150	XY - training set, array [0..NPoints-1,0..NVars]:
151	* NVars columns - independent variables
152	* last column - dependent variable
153	S - standard deviations (errors in function values)
154	array[0..NPoints-1], S[i]>0.
155	NPoints - training set size, NPoints>NVars+1
156	NVars - number of independent variables
157
158	OUTPUT PARAMETERS:
159	Info - return code:
160	* -255, in case of unknown internal error
161	* -4, if internal SVD subroutine haven't converged
162	* -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
163	* -2, if S[I]<=0
164	* 1, if subroutine successfully finished
165	LM - linear model in the ALGLIB format. Use subroutines of
166	this unit to work with the model.
167	AR - additional results
168
169
170	-- ALGLIB --
171	Copyright 02.08.2008 by Bochkanov Sergey
172	*************************************************************************/
173	public static void lrbuilds(ref double[,] xy,
174	ref double[] s,
175	int npoints,
176	int nvars,
177	ref int info,
178	ref linearmodel lm,
179	ref lrreport ar)
180	{
181	double[,] xyi = new double[0,0];
182	double[] x = new double[0];
183	double[] means = new double[0];
184	double[] sigmas = new double[0];
185	int i = 0;
186	int j = 0;
187	double v = 0;
188	int offs = 0;
189	double mean = 0;
190	double variance = 0;
191	double skewness = 0;
192	double kurtosis = 0;
193	int i_ = 0;
194
195
196	//
197	// Test parameters
198	//
199	if( npoints<=nvars+1 \| nvars<1 )
200	{
201	info = -1;
202	return;
203	}
204
205	//
206	// Copy data, add one more column (constant term)
207	//
208	xyi = new double[npoints-1+1, nvars+1+1];
209	for(i=0; i<=npoints-1; i++)
210	{
211	for(i_=0; i_<=nvars-1;i_++)
212	{
213	xyi[i,i_] = xy[i,i_];
214	}
215	xyi[i,nvars] = 1;
216	xyi[i,nvars+1] = xy[i,nvars];
217	}
218
219	//
220	// Standartization
221	//
222	x = new double[npoints-1+1];
223	means = new double[nvars-1+1];
224	sigmas = new double[nvars-1+1];
225	for(j=0; j<=nvars-1; j++)
226	{
227	for(i_=0; i_<=npoints-1;i_++)
228	{
229	x[i_] = xy[i_,j];
230	}
231	descriptivestatistics.calculatemoments(ref x, npoints, ref mean, ref variance, ref skewness, ref kurtosis);
232	means[j] = mean;
233	sigmas[j] = Math.Sqrt(variance);
234	if( (double)(sigmas[j])==(double)(0) )
235	{
236	sigmas[j] = 1;
237	}
238	for(i=0; i<=npoints-1; i++)
239	{
240	xyi[i,j] = (xyi[i,j]-means[j])/sigmas[j];
241	}
242	}
243
244	//
245	// Internal processing
246	//
247	lrinternal(ref xyi, ref s, npoints, nvars+1, ref info, ref lm, ref ar);
248	if( info<0 )
249	{
250	return;
251	}
252
253	//
254	// Un-standartization
255	//
256	offs = (int)Math.Round(lm.w[3]);
257	for(j=0; j<=nvars-1; j++)
258	{
259
260	//
261	// Constant term is updated (and its covariance too,
262	// since it gets some variance from J-th component)
263	//
264	lm.w[offs+nvars] = lm.w[offs+nvars]-lm.w[offs+j]*means[j]/sigmas[j];
265	v = means[j]/sigmas[j];
266	for(i_=0; i_<=nvars;i_++)
267	{
268	ar.c[nvars,i_] = ar.c[nvars,i_] - v*ar.c[j,i_];
269	}
270	for(i_=0; i_<=nvars;i_++)
271	{
272	ar.c[i_,nvars] = ar.c[i_,nvars] - v*ar.c[i_,j];
273	}
274
275	//
276	// J-th term is updated
277	//
278	lm.w[offs+j] = lm.w[offs+j]/sigmas[j];
279	v = 1/sigmas[j];
280	for(i_=0; i_<=nvars;i_++)
281	{
282	ar.c[j,i_] = v*ar.c[j,i_];
283	}
284	for(i_=0; i_<=nvars;i_++)
285	{
286	ar.c[i_,j] = v*ar.c[i_,j];
287	}
288	}
289	}
290
291
292	/*************************************************************************
293	Like LRBuildS, but builds model
294
295	Y = A(0)X[0] + ... + A(N-1)X[N-1]
296
297	i.e. with zero constant term.
298
299	-- ALGLIB --
300	Copyright 30.10.2008 by Bochkanov Sergey
301	*************************************************************************/
302	public static void lrbuildzs(ref double[,] xy,
303	ref double[] s,
304	int npoints,
305	int nvars,
306	ref int info,
307	ref linearmodel lm,
308	ref lrreport ar)
309	{
310	double[,] xyi = new double[0,0];
311	double[] x = new double[0];
312	double[] c = new double[0];
313	int i = 0;
314	int j = 0;
315	double v = 0;
316	int offs = 0;
317	double mean = 0;
318	double variance = 0;
319	double skewness = 0;
320	double kurtosis = 0;
321	int i_ = 0;
322
323
324	//
325	// Test parameters
326	//
327	if( npoints<=nvars+1 \| nvars<1 )
328	{
329	info = -1;
330	return;
331	}
332
333	//
334	// Copy data, add one more column (constant term)
335	//
336	xyi = new double[npoints-1+1, nvars+1+1];
337	for(i=0; i<=npoints-1; i++)
338	{
339	for(i_=0; i_<=nvars-1;i_++)
340	{
341	xyi[i,i_] = xy[i,i_];
342	}
343	xyi[i,nvars] = 0;
344	xyi[i,nvars+1] = xy[i,nvars];
345	}
346
347	//
348	// Standartization: unusual scaling
349	//
350	x = new double[npoints-1+1];
351	c = new double[nvars-1+1];
352	for(j=0; j<=nvars-1; j++)
353	{
354	for(i_=0; i_<=npoints-1;i_++)
355	{
356	x[i_] = xy[i_,j];
357	}
358	descriptivestatistics.calculatemoments(ref x, npoints, ref mean, ref variance, ref skewness, ref kurtosis);
359	if( (double)(Math.Abs(mean))>(double)(Math.Sqrt(variance)) )
360	{
361
362	//
363	// variation is relatively small, it is better to
364	// bring mean value to 1
365	//
366	c[j] = mean;
367	}
368	else
369	{
370
371	//
372	// variation is large, it is better to bring variance to 1
373	//
374	if( (double)(variance)==(double)(0) )
375	{
376	variance = 1;
377	}
378	c[j] = Math.Sqrt(variance);
379	}
380	for(i=0; i<=npoints-1; i++)
381	{
382	xyi[i,j] = xyi[i,j]/c[j];
383	}
384	}
385
386	//
387	// Internal processing
388	//
389	lrinternal(ref xyi, ref s, npoints, nvars+1, ref info, ref lm, ref ar);
390	if( info<0 )
391	{
392	return;
393	}
394
395	//
396	// Un-standartization
397	//
398	offs = (int)Math.Round(lm.w[3]);
399	for(j=0; j<=nvars-1; j++)
400	{
401
402	//
403	// J-th term is updated
404	//
405	lm.w[offs+j] = lm.w[offs+j]/c[j];
406	v = 1/c[j];
407	for(i_=0; i_<=nvars;i_++)
408	{
409	ar.c[j,i_] = v*ar.c[j,i_];
410	}
411	for(i_=0; i_<=nvars;i_++)
412	{
413	ar.c[i_,j] = v*ar.c[i_,j];
414	}
415	}
416	}
417
418
419	/*************************************************************************
420	Like LRBuild but builds model
421
422	Y = A(0)X[0] + ... + A(N-1)X[N-1]
423
424	i.e. with zero constant term.
425
426	-- ALGLIB --
427	Copyright 30.10.2008 by Bochkanov Sergey
428	*************************************************************************/
429	public static void lrbuildz(ref double[,] xy,
430	int npoints,
431	int nvars,
432	ref int info,
433	ref linearmodel lm,
434	ref lrreport ar)
435	{
436	double[] s = new double[0];
437	int i = 0;
438	double sigma2 = 0;
439	int i_ = 0;
440
441	if( npoints<=nvars+1 \| nvars<1 )
442	{
443	info = -1;
444	return;
445	}
446	s = new double[npoints-1+1];
447	for(i=0; i<=npoints-1; i++)
448	{
449	s[i] = 1;
450	}
451	lrbuildzs(ref xy, ref s, npoints, nvars, ref info, ref lm, ref ar);
452	if( info<0 )
453	{
454	return;
455	}
456	sigma2 = AP.Math.Sqr(ar.rmserror)*npoints/(npoints-nvars-1);
457	for(i=0; i<=nvars; i++)
458	{
459	for(i_=0; i_<=nvars;i_++)
460	{
461	ar.c[i,i_] = sigma2*ar.c[i,i_];
462	}
463	}
464	}
465
466
467	/*************************************************************************
468	Unpacks coefficients of linear model.
469
470	INPUT PARAMETERS:
471	LM - linear model in ALGLIB format
472
473	OUTPUT PARAMETERS:
474	V - coefficients, array[0..NVars]
475	NVars - number of independent variables (one less than number
476	of coefficients)
477
478	-- ALGLIB --
479	Copyright 30.08.2008 by Bochkanov Sergey
480	*************************************************************************/
481	public static void lrunpack(ref linearmodel lm,
482	ref double[] v,
483	ref int nvars)
484	{
485	int offs = 0;
486	int i_ = 0;
487	int i1_ = 0;
488
489	System.Diagnostics.Debug.Assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!");
490	nvars = (int)Math.Round(lm.w[2]);
491	offs = (int)Math.Round(lm.w[3]);
492	v = new double[nvars+1];
493	i1_ = (offs) - (0);
494	for(i_=0; i_<=nvars;i_++)
495	{
496	v[i_] = lm.w[i_+i1_];
497	}
498	}
499
500
501	/*************************************************************************
502	"Packs" coefficients and creates linear model in ALGLIB format (LRUnpack
503	reversed).
504
505	INPUT PARAMETERS:
506	V - coefficients, array[0..NVars]
507	NVars - number of independent variables
508
509	OUTPUT PAREMETERS:
510	LM - linear model.
511
512	-- ALGLIB --
513	Copyright 30.08.2008 by Bochkanov Sergey
514	*************************************************************************/
515	public static void lrpack(ref double[] v,
516	int nvars,
517	ref linearmodel lm)
518	{
519	int offs = 0;
520	int i_ = 0;
521	int i1_ = 0;
522
523	lm.w = new double[4+nvars+1];
524	offs = 4;
525	lm.w[0] = 4+nvars+1;
526	lm.w[1] = lrvnum;
527	lm.w[2] = nvars;
528	lm.w[3] = offs;
529	i1_ = (0) - (offs);
530	for(i_=offs; i_<=offs+nvars;i_++)
531	{
532	lm.w[i_] = v[i_+i1_];
533	}
534	}
535
536
537	/*************************************************************************
538	Procesing
539
540	INPUT PARAMETERS:
541	LM - linear model
542	X - input vector, array[0..NVars-1].
543
544	Result:
545	value of linear model regression estimate
546
547	-- ALGLIB --
548	Copyright 03.09.2008 by Bochkanov Sergey
549	*************************************************************************/
550	public static double lrprocess(ref linearmodel lm,
551	ref double[] x)
552	{
553	double result = 0;
554	double v = 0;
555	int offs = 0;
556	int nvars = 0;
557	int i_ = 0;
558	int i1_ = 0;
559
560	System.Diagnostics.Debug.Assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!");
561	nvars = (int)Math.Round(lm.w[2]);
562	offs = (int)Math.Round(lm.w[3]);
563	i1_ = (offs)-(0);
564	v = 0.0;
565	for(i_=0; i_<=nvars-1;i_++)
566	{
567	v += x[i_]*lm.w[i_+i1_];
568	}
569	result = v+lm.w[offs+nvars];
570	return result;
571	}
572
573
574	/*************************************************************************
575	RMS error on the test set
576
577	INPUT PARAMETERS:
578	LM - linear model
579	XY - test set
580	NPoints - test set size
581
582	RESULT:
583	root mean square error.
584
585	-- ALGLIB --
586	Copyright 30.08.2008 by Bochkanov Sergey
587	*************************************************************************/
588	public static double lrrmserror(ref linearmodel lm,
589	ref double[,] xy,
590	int npoints)
591	{
592	double result = 0;
593	int i = 0;
594	double v = 0;
595	int offs = 0;
596	int nvars = 0;
597	int i_ = 0;
598	int i1_ = 0;
599
600	System.Diagnostics.Debug.Assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!");
601	nvars = (int)Math.Round(lm.w[2]);
602	offs = (int)Math.Round(lm.w[3]);
603	result = 0;
604	for(i=0; i<=npoints-1; i++)
605	{
606	i1_ = (offs)-(0);
607	v = 0.0;
608	for(i_=0; i_<=nvars-1;i_++)
609	{
610	v += xy[i,i_]*lm.w[i_+i1_];
611	}
612	v = v+lm.w[offs+nvars];
613	result = result+AP.Math.Sqr(v-xy[i,nvars]);
614	}
615	result = Math.Sqrt(result/npoints);
616	return result;
617	}
618
619
620	/*************************************************************************
621	Average error on the test set
622
623	INPUT PARAMETERS:
624	LM - linear model
625	XY - test set
626	NPoints - test set size
627
628	RESULT:
629	average error.
630
631	-- ALGLIB --
632	Copyright 30.08.2008 by Bochkanov Sergey
633	*************************************************************************/
634	public static double lravgerror(ref linearmodel lm,
635	ref double[,] xy,
636	int npoints)
637	{
638	double result = 0;
639	int i = 0;
640	double v = 0;
641	int offs = 0;
642	int nvars = 0;
643	int i_ = 0;
644	int i1_ = 0;
645
646	System.Diagnostics.Debug.Assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!");
647	nvars = (int)Math.Round(lm.w[2]);
648	offs = (int)Math.Round(lm.w[3]);
649	result = 0;
650	for(i=0; i<=npoints-1; i++)
651	{
652	i1_ = (offs)-(0);
653	v = 0.0;
654	for(i_=0; i_<=nvars-1;i_++)
655	{
656	v += xy[i,i_]*lm.w[i_+i1_];
657	}
658	v = v+lm.w[offs+nvars];
659	result = result+Math.Abs(v-xy[i,nvars]);
660	}
661	result = result/npoints;
662	return result;
663	}
664
665
666	/*************************************************************************
667	RMS error on the test set
668
669	INPUT PARAMETERS:
670	LM - linear model
671	XY - test set
672	NPoints - test set size
673
674	RESULT:
675	average relative error.
676
677	-- ALGLIB --
678	Copyright 30.08.2008 by Bochkanov Sergey
679	*************************************************************************/
680	public static double lravgrelerror(ref linearmodel lm,
681	ref double[,] xy,
682	int npoints)
683	{
684	double result = 0;
685	int i = 0;
686	int k = 0;
687	double v = 0;
688	int offs = 0;
689	int nvars = 0;
690	int i_ = 0;
691	int i1_ = 0;
692
693	System.Diagnostics.Debug.Assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!");
694	nvars = (int)Math.Round(lm.w[2]);
695	offs = (int)Math.Round(lm.w[3]);
696	result = 0;
697	k = 0;
698	for(i=0; i<=npoints-1; i++)
699	{
700	if( (double)(xy[i,nvars])!=(double)(0) )
701	{
702	i1_ = (offs)-(0);
703	v = 0.0;
704	for(i_=0; i_<=nvars-1;i_++)
705	{
706	v += xy[i,i_]*lm.w[i_+i1_];
707	}
708	v = v+lm.w[offs+nvars];
709	result = result+Math.Abs((v-xy[i,nvars])/xy[i,nvars]);
710	k = k+1;
711	}
712	}
713	if( k!=0 )
714	{
715	result = result/k;
716	}
717	return result;
718	}
719
720
721	/*************************************************************************
722	Copying of LinearModel strucure
723
724	INPUT PARAMETERS:
725	LM1 - original
726
727	OUTPUT PARAMETERS:
728	LM2 - copy
729
730	-- ALGLIB --
731	Copyright 15.03.2009 by Bochkanov Sergey
732	*************************************************************************/
733	public static void lrcopy(ref linearmodel lm1,
734	ref linearmodel lm2)
735	{
736	int k = 0;
737	int i_ = 0;
738
739	k = (int)Math.Round(lm1.w[0]);
740	lm2.w = new double[k-1+1];
741	for(i_=0; i_<=k-1;i_++)
742	{
743	lm2.w[i_] = lm1.w[i_];
744	}
745	}
746
747
748	/*************************************************************************
749	Serialization of LinearModel strucure
750
751	INPUT PARAMETERS:
752	LM - original
753
754	OUTPUT PARAMETERS:
755	RA - array of real numbers which stores model,
756	array[0..RLen-1]
757	RLen - RA lenght
758
759	-- ALGLIB --
760	Copyright 15.03.2009 by Bochkanov Sergey
761	*************************************************************************/
762	public static void lrserialize(ref linearmodel lm,
763	ref double[] ra,
764	ref int rlen)
765	{
766	int i_ = 0;
767	int i1_ = 0;
768
769	rlen = (int)Math.Round(lm.w[0])+1;
770	ra = new double[rlen-1+1];
771	ra[0] = lrvnum;
772	i1_ = (0) - (1);
773	for(i_=1; i_<=rlen-1;i_++)
774	{
775	ra[i_] = lm.w[i_+i1_];
776	}
777	}
778
779
780	/*************************************************************************
781	Unserialization of DecisionForest strucure
782
783	INPUT PARAMETERS:
784	RA - real array which stores decision forest
785
786	OUTPUT PARAMETERS:
787	LM - unserialized structure
788
789	-- ALGLIB --
790	Copyright 15.03.2009 by Bochkanov Sergey
791	*************************************************************************/
792	public static void lrunserialize(ref double[] ra,
793	ref linearmodel lm)
794	{
795	int i_ = 0;
796	int i1_ = 0;
797
798	System.Diagnostics.Debug.Assert((int)Math.Round(ra[0])==lrvnum, "LRUnserialize: incorrect array!");
799	lm.w = new double[(int)Math.Round(ra[1])-1+1];
800	i1_ = (1) - (0);
801	for(i_=0; i_<=(int)Math.Round(ra[1])-1;i_++)
802	{
803	lm.w[i_] = ra[i_+i1_];
804	}
805	}
806
807
808	public static void lrlines(ref double[,] xy,
809	ref double[] s,
810	int n,
811	ref int info,
812	ref double a,
813	ref double b,
814	ref double vara,
815	ref double varb,
816	ref double covab,
817	ref double corrab,
818	ref double p)
819	{
820	int i = 0;
821	double ss = 0;
822	double sx = 0;
823	double sxx = 0;
824	double sy = 0;
825	double stt = 0;
826	double e1 = 0;
827	double e2 = 0;
828	double t = 0;
829	double chi2 = 0;
830
831	if( n<2 )
832	{
833	info = -1;
834	return;
835	}
836	for(i=0; i<=n-1; i++)
837	{
838	if( (double)(s[i])<=(double)(0) )
839	{
840	info = -2;
841	return;
842	}
843	}
844	info = 1;
845
846	//
847	// Calculate S, SX, SY, SXX
848	//
849	ss = 0;
850	sx = 0;
851	sy = 0;
852	sxx = 0;
853	for(i=0; i<=n-1; i++)
854	{
855	t = AP.Math.Sqr(s[i]);
856	ss = ss+1/t;
857	sx = sx+xy[i,0]/t;
858	sy = sy+xy[i,1]/t;
859	sxx = sxx+AP.Math.Sqr(xy[i,0])/t;
860	}
861
862	//
863	// Test for condition number
864	//
865	t = Math.Sqrt(4*AP.Math.Sqr(sx)+AP.Math.Sqr(ss-sxx));
866	e1 = 0.5*(ss+sxx+t);
867	e2 = 0.5*(ss+sxx-t);
868	if( (double)(Math.Min(e1, e2))<=(double)(1000AP.Math.MachineEpsilonMath.Max(e1, e2)) )
869	{
870	info = -3;
871	return;
872	}
873
874	//
875	// Calculate A, B
876	//
877	a = 0;
878	b = 0;
879	stt = 0;
880	for(i=0; i<=n-1; i++)
881	{
882	t = (xy[i,0]-sx/ss)/s[i];
883	b = b+t*xy[i,1]/s[i];
884	stt = stt+AP.Math.Sqr(t);
885	}
886	b = b/stt;
887	a = (sy-sx*b)/ss;
888
889	//
890	// Calculate goodness-of-fit
891	//
892	if( n>2 )
893	{
894	chi2 = 0;
895	for(i=0; i<=n-1; i++)
896	{
897	chi2 = chi2+AP.Math.Sqr((xy[i,1]-a-b*xy[i,0])/s[i]);
898	}
899	p = igammaf.incompletegammac(((double)(n-2))/(double)(2), chi2/2);
900	}
901	else
902	{
903	p = 1;
904	}
905
906	//
907	// Calculate other parameters
908	//
909	vara = (1+AP.Math.Sqr(sx)/(ss*stt))/ss;
910	varb = 1/stt;
911	covab = -(sx/(ss*stt));
912	corrab = covab/Math.Sqrt(vara*varb);
913	}
914
915
916	public static void lrline(ref double[,] xy,
917	int n,
918	ref int info,
919	ref double a,
920	ref double b)
921	{
922	double[] s = new double[0];
923	int i = 0;
924	double vara = 0;
925	double varb = 0;
926	double covab = 0;
927	double corrab = 0;
928	double p = 0;
929
930	if( n<2 )
931	{
932	info = -1;
933	return;
934	}
935	s = new double[n-1+1];
936	for(i=0; i<=n-1; i++)
937	{
938	s[i] = 1;
939	}
940	lrlines(ref xy, ref s, n, ref info, ref a, ref b, ref vara, ref varb, ref covab, ref corrab, ref p);
941	}
942
943
944	/*************************************************************************
945	Internal linear regression subroutine
946	*************************************************************************/
947	private static void lrinternal(ref double[,] xy,
948	ref double[] s,
949	int npoints,
950	int nvars,
951	ref int info,
952	ref linearmodel lm,
953	ref lrreport ar)
954	{
955	double[,] a = new double[0,0];
956	double[,] u = new double[0,0];
957	double[,] vt = new double[0,0];
958	double[,] vm = new double[0,0];
959	double[,] xym = new double[0,0];
960	double[] b = new double[0];
961	double[] sv = new double[0];
962	double[] t = new double[0];
963	double[] svi = new double[0];
964	double[] work = new double[0];
965	int i = 0;
966	int j = 0;
967	int k = 0;
968	int ncv = 0;
969	int na = 0;
970	int nacv = 0;
971	double r = 0;
972	double p = 0;
973	double epstol = 0;
974	lrreport ar2 = new lrreport();
975	int offs = 0;
976	linearmodel tlm = new linearmodel();
977	int i_ = 0;
978	int i1_ = 0;
979
980	epstol = 1000;
981
982	//
983	// Check for errors in data
984	//
985	if( npoints<nvars \| nvars<1 )
986	{
987	info = -1;
988	return;
989	}
990	for(i=0; i<=npoints-1; i++)
991	{
992	if( (double)(s[i])<=(double)(0) )
993	{
994	info = -2;
995	return;
996	}
997	}
998	info = 1;
999
1000	//
1001	// Create design matrix
1002	//
1003	a = new double[npoints-1+1, nvars-1+1];
1004	b = new double[npoints-1+1];
1005	for(i=0; i<=npoints-1; i++)
1006	{
1007	r = 1/s[i];
1008	for(i_=0; i_<=nvars-1;i_++)
1009	{
1010	a[i,i_] = r*xy[i,i_];
1011	}
1012	b[i] = xy[i,nvars]/s[i];
1013	}
1014
1015	//
1016	// Allocate W:
1017	// W[0] array size
1018	// W[1] version number, 0
1019	// W[2] NVars (minus 1, to be compatible with external representation)
1020	// W[3] coefficients offset
1021	//
1022	lm.w = new double[4+nvars-1+1];
1023	offs = 4;
1024	lm.w[0] = 4+nvars;
1025	lm.w[1] = lrvnum;
1026	lm.w[2] = nvars-1;
1027	lm.w[3] = offs;
1028
1029	//
1030	// Solve problem using SVD:
1031	//
1032	// 0. check for degeneracy (different types)
1033	// 1. A = Udiag(sv)V'
1034	// 2. T = b'*U
1035	// 3. w = SUM((T[i]/sv[i])*V[..,i])
1036	// 4. cov(wi,wj) = SUM(Vji*Vjk/sv[i]^2,K=1..M)
1037	//
1038	// see $15.4 of "Numerical Recipes in C" for more information
1039	//
1040	t = new double[nvars-1+1];
1041	svi = new double[nvars-1+1];
1042	ar.c = new double[nvars-1+1, nvars-1+1];
1043	vm = new double[nvars-1+1, nvars-1+1];
1044	if( !svd.rmatrixsvd(a, npoints, nvars, 1, 1, 2, ref sv, ref u, ref vt) )
1045	{
1046	info = -4;
1047	return;
1048	}
1049	if( (double)(sv[0])<=(double)(0) )
1050	{
1051
1052	//
1053	// Degenerate case: zero design matrix.
1054	//
1055	for(i=offs; i<=offs+nvars-1; i++)
1056	{
1057	lm.w[i] = 0;
1058	}
1059	ar.rmserror = lrrmserror(ref lm, ref xy, npoints);
1060	ar.avgerror = lravgerror(ref lm, ref xy, npoints);
1061	ar.avgrelerror = lravgrelerror(ref lm, ref xy, npoints);
1062	ar.cvrmserror = ar.rmserror;
1063	ar.cvavgerror = ar.avgerror;
1064	ar.cvavgrelerror = ar.avgrelerror;
1065	ar.ncvdefects = 0;
1066	ar.cvdefects = new int[nvars-1+1];
1067	ar.c = new double[nvars-1+1, nvars-1+1];
1068	for(i=0; i<=nvars-1; i++)
1069	{
1070	for(j=0; j<=nvars-1; j++)
1071	{
1072	ar.c[i,j] = 0;
1073	}
1074	}
1075	return;
1076	}
1077	if( (double)(sv[nvars-1])<=(double)(epstolAP.Math.MachineEpsilonsv[0]) )
1078	{
1079
1080	//
1081	// Degenerate case, non-zero design matrix.
1082	//
1083	// We can leave it and solve task in SVD least squares fashion.
1084	// Solution and covariance matrix will be obtained correctly,
1085	// but CV error estimates - will not. It is better to reduce
1086	// it to non-degenerate task and to obtain correct CV estimates.
1087	//
1088	for(k=nvars; k>=1; k--)
1089	{
1090	if( (double)(sv[k-1])>(double)(epstolAP.Math.MachineEpsilonsv[0]) )
1091	{
1092
1093	//
1094	// Reduce
1095	//
1096	xym = new double[npoints-1+1, k+1];
1097	for(i=0; i<=npoints-1; i++)
1098	{
1099	for(j=0; j<=k-1; j++)
1100	{
1101	r = 0.0;
1102	for(i_=0; i_<=nvars-1;i_++)
1103	{
1104	r += xy[i,i_]*vt[j,i_];
1105	}
1106	xym[i,j] = r;
1107	}
1108	xym[i,k] = xy[i,nvars];
1109	}
1110
1111	//
1112	// Solve
1113	//
1114	lrinternal(ref xym, ref s, npoints, k, ref info, ref tlm, ref ar2);
1115	if( info!=1 )
1116	{
1117	return;
1118	}
1119
1120	//
1121	// Convert back to un-reduced format
1122	//
1123	for(j=0; j<=nvars-1; j++)
1124	{
1125	lm.w[offs+j] = 0;
1126	}
1127	for(j=0; j<=k-1; j++)
1128	{
1129	r = tlm.w[offs+j];
1130	i1_ = (0) - (offs);
1131	for(i_=offs; i_<=offs+nvars-1;i_++)
1132	{
1133	lm.w[i_] = lm.w[i_] + r*vt[j,i_+i1_];
1134	}
1135	}
1136	ar.rmserror = ar2.rmserror;
1137	ar.avgerror = ar2.avgerror;
1138	ar.avgrelerror = ar2.avgrelerror;
1139	ar.cvrmserror = ar2.cvrmserror;
1140	ar.cvavgerror = ar2.cvavgerror;
1141	ar.cvavgrelerror = ar2.cvavgrelerror;
1142	ar.ncvdefects = ar2.ncvdefects;
1143	ar.cvdefects = new int[nvars-1+1];
1144	for(j=0; j<=ar.ncvdefects-1; j++)
1145	{
1146	ar.cvdefects[j] = ar2.cvdefects[j];
1147	}
1148	ar.c = new double[nvars-1+1, nvars-1+1];
1149	work = new double[nvars+1];
1150	blas.matrixmatrixmultiply(ref ar2.c, 0, k-1, 0, k-1, false, ref vt, 0, k-1, 0, nvars-1, false, 1.0, ref vm, 0, k-1, 0, nvars-1, 0.0, ref work);
1151	blas.matrixmatrixmultiply(ref vt, 0, k-1, 0, nvars-1, true, ref vm, 0, k-1, 0, nvars-1, false, 1.0, ref ar.c, 0, nvars-1, 0, nvars-1, 0.0, ref work);
1152	return;
1153	}
1154	}
1155	info = -255;
1156	return;
1157	}
1158	for(i=0; i<=nvars-1; i++)
1159	{
1160	if( (double)(sv[i])>(double)(epstolAP.Math.MachineEpsilonsv[0]) )
1161	{
1162	svi[i] = 1/sv[i];
1163	}
1164	else
1165	{
1166	svi[i] = 0;
1167	}
1168	}
1169	for(i=0; i<=nvars-1; i++)
1170	{
1171	t[i] = 0;
1172	}
1173	for(i=0; i<=npoints-1; i++)
1174	{
1175	r = b[i];
1176	for(i_=0; i_<=nvars-1;i_++)
1177	{
1178	t[i_] = t[i_] + r*u[i,i_];
1179	}
1180	}
1181	for(i=0; i<=nvars-1; i++)
1182	{
1183	lm.w[offs+i] = 0;
1184	}
1185	for(i=0; i<=nvars-1; i++)
1186	{
1187	r = t[i]*svi[i];
1188	i1_ = (0) - (offs);
1189	for(i_=offs; i_<=offs+nvars-1;i_++)
1190	{
1191	lm.w[i_] = lm.w[i_] + r*vt[i,i_+i1_];
1192	}
1193	}
1194	for(j=0; j<=nvars-1; j++)
1195	{
1196	r = svi[j];
1197	for(i_=0; i_<=nvars-1;i_++)
1198	{
1199	vm[i_,j] = r*vt[j,i_];
1200	}
1201	}
1202	for(i=0; i<=nvars-1; i++)
1203	{
1204	for(j=i; j<=nvars-1; j++)
1205	{
1206	r = 0.0;
1207	for(i_=0; i_<=nvars-1;i_++)
1208	{
1209	r += vm[i,i_]*vm[j,i_];
1210	}
1211	ar.c[i,j] = r;
1212	ar.c[j,i] = r;
1213	}
1214	}
1215
1216	//
1217	// Leave-1-out cross-validation error.
1218	//
1219	// NOTATIONS:
1220	// A design matrix
1221	// A*x = b original linear least squares task
1222	// USV' SVD of A
1223	// ai i-th row of the A
1224	// bi i-th element of the b
1225	// xf solution of the original LLS task
1226	//
1227	// Cross-validation error of i-th element from a sample is
1228	// calculated using following formula:
1229	//
1230	// ERRi = aixf - (aixf-bi(uiui'))/(1-ui*ui') (1)
1231	//
1232	// This formula can be derived from normal equations of the
1233	// original task
1234	//
1235	// (A'A)x = A'b (2)
1236	//
1237	// by applying modification (zeroing out i-th row of A) to (2):
1238	//
1239	// (A-ai)'(A-ai) = (A-ai)'b
1240	//
1241	// and using Sherman-Morrison formula for updating matrix inverse
1242	//
1243	// NOTE 1: b is not zeroed out since it is much simpler and
1244	// does not influence final result.
1245	//
1246	// NOTE 2: some design matrices A have such ui that 1-ui*ui'=0.
1247	// Formula (1) can't be applied for such cases and they are skipped
1248	// from CV calculation (which distorts resulting CV estimate).
1249	// But from the properties of U we can conclude that there can
1250	// be no more than NVars such vectors. Usually
1251	// NVars << NPoints, so in a normal case it only slightly
1252	// influences result.
1253	//
1254	ncv = 0;
1255	na = 0;
1256	nacv = 0;
1257	ar.rmserror = 0;
1258	ar.avgerror = 0;
1259	ar.avgrelerror = 0;
1260	ar.cvrmserror = 0;
1261	ar.cvavgerror = 0;
1262	ar.cvavgrelerror = 0;
1263	ar.ncvdefects = 0;
1264	ar.cvdefects = new int[nvars-1+1];
1265	for(i=0; i<=npoints-1; i++)
1266	{
1267
1268	//
1269	// Error on a training set
1270	//
1271	i1_ = (offs)-(0);
1272	r = 0.0;
1273	for(i_=0; i_<=nvars-1;i_++)
1274	{
1275	r += xy[i,i_]*lm.w[i_+i1_];
1276	}
1277	ar.rmserror = ar.rmserror+AP.Math.Sqr(r-xy[i,nvars]);
1278	ar.avgerror = ar.avgerror+Math.Abs(r-xy[i,nvars]);
1279	if( (double)(xy[i,nvars])!=(double)(0) )
1280	{
1281	ar.avgrelerror = ar.avgrelerror+Math.Abs((r-xy[i,nvars])/xy[i,nvars]);
1282	na = na+1;
1283	}
1284
1285	//
1286	// Error using fast leave-one-out cross-validation
1287	//
1288	p = 0.0;
1289	for(i_=0; i_<=nvars-1;i_++)
1290	{
1291	p += u[i,i_]*u[i,i_];
1292	}
1293	if( (double)(p)>(double)(1-epstol*AP.Math.MachineEpsilon) )
1294	{
1295	ar.cvdefects[ar.ncvdefects] = i;
1296	ar.ncvdefects = ar.ncvdefects+1;
1297	continue;
1298	}
1299	r = s[i](r/s[i]-b[i]p)/(1-p);
1300	ar.cvrmserror = ar.cvrmserror+AP.Math.Sqr(r-xy[i,nvars]);
1301	ar.cvavgerror = ar.cvavgerror+Math.Abs(r-xy[i,nvars]);
1302	if( (double)(xy[i,nvars])!=(double)(0) )
1303	{
1304	ar.cvavgrelerror = ar.cvavgrelerror+Math.Abs((r-xy[i,nvars])/xy[i,nvars]);
1305	nacv = nacv+1;
1306	}
1307	ncv = ncv+1;
1308	}
1309	if( ncv==0 )
1310	{
1311
1312	//
1313	// Something strange: ALL ui are degenerate.
1314	// Unexpected...
1315	//
1316	info = -255;
1317	return;
1318	}
1319	ar.rmserror = Math.Sqrt(ar.rmserror/npoints);
1320	ar.avgerror = ar.avgerror/npoints;
1321	if( na!=0 )
1322	{
1323	ar.avgrelerror = ar.avgrelerror/na;
1324	}
1325	ar.cvrmserror = Math.Sqrt(ar.cvrmserror/ncv);
1326	ar.cvavgerror = ar.cvavgerror/ncv;
1327	if( nacv!=0 )
1328	{
1329	ar.cvavgrelerror = ar.cvavgrelerror/nacv;
1330	}
1331	}
1332	}
1333	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences