Context Navigation

source: trunk/sources/ALGLIB/mlptrain.cs @ 2636

Visit:

Last change on this file since 2636 was 2563, checked in by gkronber, 15 years ago
Updated ALGLIB to latest version. #751 (Plugin for for data-modeling with ANN (integrated into CEDMA))
File size: 45.6 KB

Line
1	/*************************************************************************
2	Copyright (c) 2007-2008, Sergey Bochkanov (ALGLIB project).
3
4	>>> SOURCE LICENSE >>>
5	This program is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published by
7	the Free Software Foundation (www.fsf.org); either version 2 of the
8	License, or (at your option) any later version.
9
10	This program is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	A copy of the GNU General Public License is available at
16	http://www.fsf.org/licensing/licenses
17
18	>>> END OF LICENSE >>>
19	*************************************************************************/
20
21	using System;
22
23	namespace alglib
24	{
25	public class mlptrain
26	{
27	/*************************************************************************
28	Training report:
29	* NGrad - number of gradient calculations
30	* NHess - number of Hessian calculations
31	* NCholesky - number of Cholesky decompositions
32	*************************************************************************/
33	public struct mlpreport
34	{
35	public int ngrad;
36	public int nhess;
37	public int ncholesky;
38	};
39
40
41	/*************************************************************************
42	Cross-validation estimates of generalization error
43	*************************************************************************/
44	public struct mlpcvreport
45	{
46	public double relclserror;
47	public double avgce;
48	public double rmserror;
49	public double avgerror;
50	public double avgrelerror;
51	};
52
53
54
55
56	public const double mindecay = 0.001;
57
58
59	/*************************************************************************
60	Neural network training using modified Levenberg-Marquardt with exact
61	Hessian calculation and regularization. Subroutine trains neural network
62	with restarts from random positions. Algorithm is well suited for small
63	and medium scale problems (hundreds of weights).
64
65	INPUT PARAMETERS:
66	Network - neural network with initialized geometry
67	XY - training set
68	NPoints - training set size
69	Decay - weight decay constant, >=0.001
70	Decay term 'Decay*\|\|Weights\|\|^2' is added to error
71	function.
72	If you don't know what Decay to choose, use 0.001.
73	Restarts - number of restarts from random position, >0.
74	If you don't know what Restarts to choose, use 2.
75
76	OUTPUT PARAMETERS:
77	Network - trained neural network.
78	Info - return code:
79	* -9, if internal matrix inverse subroutine failed
80	* -2, if there is a point with class number
81	outside of [0..NOut-1].
82	* -1, if wrong parameters specified
83	(NPoints<0, Restarts<1).
84	* 2, if task has been solved.
85	Rep - training report
86
87	-- ALGLIB --
88	Copyright 10.03.2009 by Bochkanov Sergey
89	*************************************************************************/
90	public static void mlptrainlm(ref mlpbase.multilayerperceptron network,
91	ref double[,] xy,
92	int npoints,
93	double decay,
94	int restarts,
95	ref int info,
96	ref mlpreport rep)
97	{
98	int nin = 0;
99	int nout = 0;
100	int wcount = 0;
101	double lmftol = 0;
102	double lmsteptol = 0;
103	int i = 0;
104	int j = 0;
105	int k = 0;
106	int mx = 0;
107	double v = 0;
108	double e = 0;
109	double enew = 0;
110	double xnorm2 = 0;
111	double stepnorm = 0;
112	double[] g = new double[0];
113	double[] d = new double[0];
114	double[,] h = new double[0,0];
115	double[,] hmod = new double[0,0];
116	double[,] z = new double[0,0];
117	bool spd = new bool();
118	double nu = 0;
119	double lambda = 0;
120	double lambdaup = 0;
121	double lambdadown = 0;
122	int cvcnt = 0;
123	double cvrelcnt = 0;
124	lbfgs.lbfgsreport internalrep = new lbfgs.lbfgsreport();
125	lbfgs.lbfgsstate state = new lbfgs.lbfgsstate();
126	double[] x = new double[0];
127	double[] y = new double[0];
128	double[] wbase = new double[0];
129	double wstep = 0;
130	double[] wdir = new double[0];
131	double[] wt = new double[0];
132	double[] wx = new double[0];
133	int pass = 0;
134	double[] wbest = new double[0];
135	double ebest = 0;
136	int i_ = 0;
137
138	mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
139	lambdaup = 10;
140	lambdadown = 0.3;
141	lmftol = 0.001;
142	lmsteptol = 0.001;
143
144	//
145	// Test for inputs
146	//
147	if( npoints<=0 \| restarts<1 )
148	{
149	info = -1;
150	return;
151	}
152	if( mlpbase.mlpissoftmax(ref network) )
153	{
154	for(i=0; i<=npoints-1; i++)
155	{
156	if( (int)Math.Round(xy[i,nin])<0 \| (int)Math.Round(xy[i,nin])>=nout )
157	{
158	info = -2;
159	return;
160	}
161	}
162	}
163	decay = Math.Max(decay, mindecay);
164	info = 2;
165
166	//
167	// Initialize data
168	//
169	rep.ngrad = 0;
170	rep.nhess = 0;
171	rep.ncholesky = 0;
172
173	//
174	// General case.
175	// Prepare task and network. Allocate space.
176	//
177	mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints);
178	g = new double[wcount-1+1];
179	h = new double[wcount-1+1, wcount-1+1];
180	hmod = new double[wcount-1+1, wcount-1+1];
181	wbase = new double[wcount-1+1];
182	wdir = new double[wcount-1+1];
183	wbest = new double[wcount-1+1];
184	wt = new double[wcount-1+1];
185	wx = new double[wcount-1+1];
186	ebest = AP.Math.MaxRealNumber;
187
188	//
189	// Multiple passes
190	//
191	for(pass=1; pass<=restarts; pass++)
192	{
193
194	//
195	// Initialize weights
196	//
197	mlpbase.mlprandomize(ref network);
198
199	//
200	// First stage of the hybrid algorithm: LBFGS
201	//
202	for(i_=0; i_<=wcount-1;i_++)
203	{
204	wbase[i_] = network.weights[i_];
205	}
206	lbfgs.minlbfgs(wcount, Math.Min(wcount, 5), ref wbase, 0.0, 0.0, 0.0, Math.Max(25, wcount), 0, ref state);
207	while( lbfgs.minlbfgsiteration(ref state) )
208	{
209
210	//
211	// gradient
212	//
213	for(i_=0; i_<=wcount-1;i_++)
214	{
215	network.weights[i_] = state.x[i_];
216	}
217	mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref state.g);
218
219	//
220	// weight decay
221	//
222	v = 0.0;
223	for(i_=0; i_<=wcount-1;i_++)
224	{
225	v += network.weights[i_]*network.weights[i_];
226	}
227	state.f = state.f+0.5decayv;
228	for(i_=0; i_<=wcount-1;i_++)
229	{
230	state.g[i_] = state.g[i_] + decay*network.weights[i_];
231	}
232
233	//
234	// next iteration
235	//
236	rep.ngrad = rep.ngrad+1;
237	}
238	lbfgs.minlbfgsresults(ref state, ref wbase, ref internalrep);
239	for(i_=0; i_<=wcount-1;i_++)
240	{
241	network.weights[i_] = wbase[i_];
242	}
243
244	//
245	// Second stage of the hybrid algorithm: LM
246	//
247	// Initialize H with identity matrix,
248	// G with gradient,
249	// E with regularized error.
250	//
251	mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h);
252	v = 0.0;
253	for(i_=0; i_<=wcount-1;i_++)
254	{
255	v += network.weights[i_]*network.weights[i_];
256	}
257	e = e+0.5decayv;
258	for(i_=0; i_<=wcount-1;i_++)
259	{
260	g[i_] = g[i_] + decay*network.weights[i_];
261	}
262	for(k=0; k<=wcount-1; k++)
263	{
264	h[k,k] = h[k,k]+decay;
265	}
266	rep.nhess = rep.nhess+1;
267	lambda = 0.001;
268	nu = 2;
269	while( true )
270	{
271
272	//
273	// 1. HMod = H+lambda*I
274	// 2. Try to solve (H+LambdaI)dx = -g.
275	// Increase lambda if left part is not positive definite.
276	//
277	for(i=0; i<=wcount-1; i++)
278	{
279	for(i_=0; i_<=wcount-1;i_++)
280	{
281	hmod[i,i_] = h[i,i_];
282	}
283	hmod[i,i] = hmod[i,i]+lambda;
284	}
285	spd = cholesky.spdmatrixcholesky(ref hmod, wcount, true);
286	rep.ncholesky = rep.ncholesky+1;
287	if( !spd )
288	{
289	lambda = lambdalambdaupnu;
290	nu = nu*2;
291	continue;
292	}
293	if( !spdsolve.spdmatrixcholeskysolve(ref hmod, g, wcount, true, ref wdir) )
294	{
295	lambda = lambdalambdaupnu;
296	nu = nu*2;
297	continue;
298	}
299	for(i_=0; i_<=wcount-1;i_++)
300	{
301	wdir[i_] = -1*wdir[i_];
302	}
303
304	//
305	// Lambda found.
306	// 1. Save old w in WBase
307	// 1. Test some stopping criterions
308	// 2. If error(w+wdir)>error(w), increase lambda
309	//
310	for(i_=0; i_<=wcount-1;i_++)
311	{
312	network.weights[i_] = network.weights[i_] + wdir[i_];
313	}
314	xnorm2 = 0.0;
315	for(i_=0; i_<=wcount-1;i_++)
316	{
317	xnorm2 += network.weights[i_]*network.weights[i_];
318	}
319	stepnorm = 0.0;
320	for(i_=0; i_<=wcount-1;i_++)
321	{
322	stepnorm += wdir[i_]*wdir[i_];
323	}
324	stepnorm = Math.Sqrt(stepnorm);
325	enew = mlpbase.mlperror(ref network, ref xy, npoints)+0.5decayxnorm2;
326	if( (double)(stepnorm)<(double)(lmsteptol*(1+Math.Sqrt(xnorm2))) )
327	{
328	break;
329	}
330	if( (double)(enew)>(double)(e) )
331	{
332	lambda = lambdalambdaupnu;
333	nu = nu*2;
334	continue;
335	}
336
337	//
338	// Optimize using inv(cholesky(H)) as preconditioner
339	//
340	if( !trinverse.rmatrixtrinverse(ref hmod, wcount, true, false) )
341	{
342
343	//
344	// if matrix can't be inverted then exit with errors
345	// TODO: make WCount steps in direction suggested by HMod
346	//
347	info = -9;
348	return;
349	}
350	for(i_=0; i_<=wcount-1;i_++)
351	{
352	wbase[i_] = network.weights[i_];
353	}
354	for(i=0; i<=wcount-1; i++)
355	{
356	wt[i] = 0;
357	}
358	lbfgs.minlbfgs(wcount, wcount, ref wt, 0.0, 0.0, 0.0, 5, 0, ref state);
359	while( lbfgs.minlbfgsiteration(ref state) )
360	{
361
362	//
363	// gradient
364	//
365	for(i=0; i<=wcount-1; i++)
366	{
367	v = 0.0;
368	for(i_=i; i_<=wcount-1;i_++)
369	{
370	v += state.x[i_]*hmod[i,i_];
371	}
372	network.weights[i] = wbase[i]+v;
373	}
374	mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref g);
375	for(i=0; i<=wcount-1; i++)
376	{
377	state.g[i] = 0;
378	}
379	for(i=0; i<=wcount-1; i++)
380	{
381	v = g[i];
382	for(i_=i; i_<=wcount-1;i_++)
383	{
384	state.g[i_] = state.g[i_] + v*hmod[i,i_];
385	}
386	}
387
388	//
389	// weight decay
390	// grad(x'x) = A'(x0+A*t)
391	//
392	v = 0.0;
393	for(i_=0; i_<=wcount-1;i_++)
394	{
395	v += network.weights[i_]*network.weights[i_];
396	}
397	state.f = state.f+0.5decayv;
398	for(i=0; i<=wcount-1; i++)
399	{
400	v = decay*network.weights[i];
401	for(i_=i; i_<=wcount-1;i_++)
402	{
403	state.g[i_] = state.g[i_] + v*hmod[i,i_];
404	}
405	}
406
407	//
408	// next iteration
409	//
410	rep.ngrad = rep.ngrad+1;
411	}
412	lbfgs.minlbfgsresults(ref state, ref wt, ref internalrep);
413
414	//
415	// Accept new position.
416	// Calculate Hessian
417	//
418	for(i=0; i<=wcount-1; i++)
419	{
420	v = 0.0;
421	for(i_=i; i_<=wcount-1;i_++)
422	{
423	v += wt[i_]*hmod[i,i_];
424	}
425	network.weights[i] = wbase[i]+v;
426	}
427	mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h);
428	v = 0.0;
429	for(i_=0; i_<=wcount-1;i_++)
430	{
431	v += network.weights[i_]*network.weights[i_];
432	}
433	e = e+0.5decayv;
434	for(i_=0; i_<=wcount-1;i_++)
435	{
436	g[i_] = g[i_] + decay*network.weights[i_];
437	}
438	for(k=0; k<=wcount-1; k++)
439	{
440	h[k,k] = h[k,k]+decay;
441	}
442	rep.nhess = rep.nhess+1;
443
444	//
445	// Update lambda
446	//
447	lambda = lambda*lambdadown;
448	nu = 2;
449	}
450
451	//
452	// update WBest
453	//
454	v = 0.0;
455	for(i_=0; i_<=wcount-1;i_++)
456	{
457	v += network.weights[i_]*network.weights[i_];
458	}
459	e = 0.5decayv+mlpbase.mlperror(ref network, ref xy, npoints);
460	if( (double)(e)<(double)(ebest) )
461	{
462	ebest = e;
463	for(i_=0; i_<=wcount-1;i_++)
464	{
465	wbest[i_] = network.weights[i_];
466	}
467	}
468	}
469
470	//
471	// copy WBest to output
472	//
473	for(i_=0; i_<=wcount-1;i_++)
474	{
475	network.weights[i_] = wbest[i_];
476	}
477	}
478
479
480	/*************************************************************************
481	Neural network training using L-BFGS algorithm with regularization.
482	Subroutine trains neural network with restarts from random positions.
483	Algorithm is well suited for problems of any dimensionality (memory
484	requirements and step complexity are linear by weights number).
485
486	INPUT PARAMETERS:
487	Network - neural network with initialized geometry
488	XY - training set
489	NPoints - training set size
490	Decay - weight decay constant, >=0.001
491	Decay term 'Decay*\|\|Weights\|\|^2' is added to error
492	function.
493	If you don't know what Decay to choose, use 0.001.
494	Restarts - number of restarts from random position, >0.
495	If you don't know what Restarts to choose, use 2.
496	WStep - stopping criterion. Algorithm stops if step size is
497	less than WStep. Recommended value - 0.01. Zero step
498	size means stopping after MaxIts iterations.
499	MaxIts - stopping criterion. Algorithm stops after MaxIts
500	iterations (NOT gradient calculations). Zero MaxIts
501	means stopping when step is sufficiently small.
502
503	OUTPUT PARAMETERS:
504	Network - trained neural network.
505	Info - return code:
506	* -8, if both WStep=0 and MaxIts=0
507	* -2, if there is a point with class number
508	outside of [0..NOut-1].
509	* -1, if wrong parameters specified
510	(NPoints<0, Restarts<1).
511	* 2, if task has been solved.
512	Rep - training report
513
514	-- ALGLIB --
515	Copyright 09.12.2007 by Bochkanov Sergey
516	*************************************************************************/
517	public static void mlptrainlbfgs(ref mlpbase.multilayerperceptron network,
518	ref double[,] xy,
519	int npoints,
520	double decay,
521	int restarts,
522	double wstep,
523	int maxits,
524	ref int info,
525	ref mlpreport rep)
526	{
527	int i = 0;
528	int j = 0;
529	int pass = 0;
530	int nin = 0;
531	int nout = 0;
532	int wcount = 0;
533	double[] w = new double[0];
534	double[] wbest = new double[0];
535	double e = 0;
536	double v = 0;
537	double ebest = 0;
538	lbfgs.lbfgsreport internalrep = new lbfgs.lbfgsreport();
539	lbfgs.lbfgsstate state = new lbfgs.lbfgsstate();
540	int i_ = 0;
541
542
543	//
544	// Test inputs, parse flags, read network geometry
545	//
546	if( (double)(wstep)==(double)(0) & maxits==0 )
547	{
548	info = -8;
549	return;
550	}
551	if( npoints<=0 \| restarts<1 \| (double)(wstep)<(double)(0) \| maxits<0 )
552	{
553	info = -1;
554	return;
555	}
556	mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
557	if( mlpbase.mlpissoftmax(ref network) )
558	{
559	for(i=0; i<=npoints-1; i++)
560	{
561	if( (int)Math.Round(xy[i,nin])<0 \| (int)Math.Round(xy[i,nin])>=nout )
562	{
563	info = -2;
564	return;
565	}
566	}
567	}
568	decay = Math.Max(decay, mindecay);
569	info = 2;
570
571	//
572	// Prepare
573	//
574	mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints);
575	w = new double[wcount-1+1];
576	wbest = new double[wcount-1+1];
577	ebest = AP.Math.MaxRealNumber;
578
579	//
580	// Multiple starts
581	//
582	rep.ncholesky = 0;
583	rep.nhess = 0;
584	rep.ngrad = 0;
585	for(pass=1; pass<=restarts; pass++)
586	{
587
588	//
589	// Process
590	//
591	mlpbase.mlprandomize(ref network);
592	for(i_=0; i_<=wcount-1;i_++)
593	{
594	w[i_] = network.weights[i_];
595	}
596	lbfgs.minlbfgs(wcount, Math.Min(wcount, 50), ref w, 0.0, 0.0, wstep, maxits, 0, ref state);
597	while( lbfgs.minlbfgsiteration(ref state) )
598	{
599	for(i_=0; i_<=wcount-1;i_++)
600	{
601	network.weights[i_] = state.x[i_];
602	}
603	mlpbase.mlpgradnbatch(ref network, ref xy, npoints, ref state.f, ref state.g);
604	v = 0.0;
605	for(i_=0; i_<=wcount-1;i_++)
606	{
607	v += network.weights[i_]*network.weights[i_];
608	}
609	state.f = state.f+0.5decayv;
610	for(i_=0; i_<=wcount-1;i_++)
611	{
612	state.g[i_] = state.g[i_] + decay*network.weights[i_];
613	}
614	rep.ngrad = rep.ngrad+1;
615	}
616	lbfgs.minlbfgsresults(ref state, ref w, ref internalrep);
617	for(i_=0; i_<=wcount-1;i_++)
618	{
619	network.weights[i_] = w[i_];
620	}
621
622	//
623	// Compare with best
624	//
625	v = 0.0;
626	for(i_=0; i_<=wcount-1;i_++)
627	{
628	v += network.weights[i_]*network.weights[i_];
629	}
630	e = mlpbase.mlperrorn(ref network, ref xy, npoints)+0.5decayv;
631	if( (double)(e)<(double)(ebest) )
632	{
633	for(i_=0; i_<=wcount-1;i_++)
634	{
635	wbest[i_] = network.weights[i_];
636	}
637	ebest = e;
638	}
639	}
640
641	//
642	// The best network
643	//
644	for(i_=0; i_<=wcount-1;i_++)
645	{
646	network.weights[i_] = wbest[i_];
647	}
648	}
649
650
651	/*************************************************************************
652	Neural network training using early stopping (base algorithm - L-BFGS with
653	regularization).
654
655	INPUT PARAMETERS:
656	Network - neural network with initialized geometry
657	TrnXY - training set
658	TrnSize - training set size
659	ValXY - validation set
660	ValSize - validation set size
661	Decay - weight decay constant, >=0.001
662	Decay term 'Decay*\|\|Weights\|\|^2' is added to error
663	function.
664	If you don't know what Decay to choose, use 0.001.
665	Restarts - number of restarts from random position, >0.
666	If you don't know what Restarts to choose, use 2.
667
668	OUTPUT PARAMETERS:
669	Network - trained neural network.
670	Info - return code:
671	* -2, if there is a point with class number
672	outside of [0..NOut-1].
673	* -1, if wrong parameters specified
674	(NPoints<0, Restarts<1, ...).
675	* 2, task has been solved, stopping criterion met -
676	sufficiently small step size. Not expected (we
677	use EARLY stopping) but possible and not an
678	error.
679	* 6, task has been solved, stopping criterion met -
680	increasing of validation set error.
681	Rep - training report
682
683	NOTE:
684
685	Algorithm stops if validation set error increases for a long enough or
686	step size is small enought (there are task where validation set may
687	decrease for eternity). In any case solution returned corresponds to the
688	minimum of validation set error.
689
690	-- ALGLIB --
691	Copyright 10.03.2009 by Bochkanov Sergey
692	*************************************************************************/
693	public static void mlptraines(ref mlpbase.multilayerperceptron network,
694	ref double[,] trnxy,
695	int trnsize,
696	ref double[,] valxy,
697	int valsize,
698	double decay,
699	int restarts,
700	ref int info,
701	ref mlpreport rep)
702	{
703	int i = 0;
704	int j = 0;
705	int pass = 0;
706	int nin = 0;
707	int nout = 0;
708	int wcount = 0;
709	double[] w = new double[0];
710	double[] wbest = new double[0];
711	double e = 0;
712	double v = 0;
713	double ebest = 0;
714	double[] wfinal = new double[0];
715	double efinal = 0;
716	int itbest = 0;
717	lbfgs.lbfgsreport internalrep = new lbfgs.lbfgsreport();
718	lbfgs.lbfgsstate state = new lbfgs.lbfgsstate();
719	double wstep = 0;
720	int i_ = 0;
721
722	wstep = 0.001;
723
724	//
725	// Test inputs, parse flags, read network geometry
726	//
727	if( trnsize<=0 \| valsize<=0 \| restarts<1 \| (double)(decay)<(double)(0) )
728	{
729	info = -1;
730	return;
731	}
732	mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
733	if( mlpbase.mlpissoftmax(ref network) )
734	{
735	for(i=0; i<=trnsize-1; i++)
736	{
737	if( (int)Math.Round(trnxy[i,nin])<0 \| (int)Math.Round(trnxy[i,nin])>=nout )
738	{
739	info = -2;
740	return;
741	}
742	}
743	for(i=0; i<=valsize-1; i++)
744	{
745	if( (int)Math.Round(valxy[i,nin])<0 \| (int)Math.Round(valxy[i,nin])>=nout )
746	{
747	info = -2;
748	return;
749	}
750	}
751	}
752	info = 2;
753
754	//
755	// Prepare
756	//
757	mlpbase.mlpinitpreprocessor(ref network, ref trnxy, trnsize);
758	w = new double[wcount-1+1];
759	wbest = new double[wcount-1+1];
760	wfinal = new double[wcount-1+1];
761	efinal = AP.Math.MaxRealNumber;
762	for(i=0; i<=wcount-1; i++)
763	{
764	wfinal[i] = 0;
765	}
766
767	//
768	// Multiple starts
769	//
770	rep.ncholesky = 0;
771	rep.nhess = 0;
772	rep.ngrad = 0;
773	for(pass=1; pass<=restarts; pass++)
774	{
775
776	//
777	// Process
778	//
779	mlpbase.mlprandomize(ref network);
780	ebest = mlpbase.mlperror(ref network, ref valxy, valsize);
781	for(i_=0; i_<=wcount-1;i_++)
782	{
783	wbest[i_] = network.weights[i_];
784	}
785	itbest = 0;
786	for(i_=0; i_<=wcount-1;i_++)
787	{
788	w[i_] = network.weights[i_];
789	}
790	lbfgs.minlbfgs(wcount, Math.Min(wcount, 50), ref w, 0.0, 0.0, wstep, 0, 0, ref state);
791	while( lbfgs.minlbfgsiteration(ref state) )
792	{
793
794	//
795	// Calculate gradient
796	//
797	for(i_=0; i_<=wcount-1;i_++)
798	{
799	network.weights[i_] = state.x[i_];
800	}
801	mlpbase.mlpgradnbatch(ref network, ref trnxy, trnsize, ref state.f, ref state.g);
802	v = 0.0;
803	for(i_=0; i_<=wcount-1;i_++)
804	{
805	v += network.weights[i_]*network.weights[i_];
806	}
807	state.f = state.f+0.5decayv;
808	for(i_=0; i_<=wcount-1;i_++)
809	{
810	state.g[i_] = state.g[i_] + decay*network.weights[i_];
811	}
812	rep.ngrad = rep.ngrad+1;
813
814	//
815	// Validation set
816	//
817	if( state.xupdated )
818	{
819	for(i_=0; i_<=wcount-1;i_++)
820	{
821	network.weights[i_] = w[i_];
822	}
823	e = mlpbase.mlperror(ref network, ref valxy, valsize);
824	if( (double)(e)<(double)(ebest) )
825	{
826	ebest = e;
827	for(i_=0; i_<=wcount-1;i_++)
828	{
829	wbest[i_] = network.weights[i_];
830	}
831	itbest = internalrep.iterationscount;
832	}
833	if( internalrep.iterationscount>30 & (double)(internalrep.iterationscount)>(double)(1.5*itbest) )
834	{
835	info = 6;
836	break;
837	}
838	}
839	}
840	lbfgs.minlbfgsresults(ref state, ref w, ref internalrep);
841
842	//
843	// Compare with final answer
844	//
845	if( (double)(ebest)<(double)(efinal) )
846	{
847	for(i_=0; i_<=wcount-1;i_++)
848	{
849	wfinal[i_] = wbest[i_];
850	}
851	efinal = ebest;
852	}
853	}
854
855	//
856	// The best network
857	//
858	for(i_=0; i_<=wcount-1;i_++)
859	{
860	network.weights[i_] = wfinal[i_];
861	}
862	}
863
864
865	/*************************************************************************
866	Cross-validation estimate of generalization error.
867
868	Base algorithm - L-BFGS.
869
870	INPUT PARAMETERS:
871	Network - neural network with initialized geometry. Network is
872	not changed during cross-validation - it is used only
873	as a representative of its architecture.
874	XY - training set.
875	SSize - training set size
876	Decay - weight decay, same as in MLPTrainLBFGS
877	Restarts - number of restarts, >0.
878	restarts are counted for each partition separately, so
879	total number of restarts will be Restarts*FoldsCount.
880	WStep - stopping criterion, same as in MLPTrainLBFGS
881	MaxIts - stopping criterion, same as in MLPTrainLBFGS
882	FoldsCount - number of folds in k-fold cross-validation,
883	2<=FoldsCount<=SSize.
884	recommended value: 10.
885
886	OUTPUT PARAMETERS:
887	Info - return code, same as in MLPTrainLBFGS
888	Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
889	CVRep - generalization error estimates
890
891	-- ALGLIB --
892	Copyright 09.12.2007 by Bochkanov Sergey
893	*************************************************************************/
894	public static void mlpkfoldcvlbfgs(ref mlpbase.multilayerperceptron network,
895	ref double[,] xy,
896	int npoints,
897	double decay,
898	int restarts,
899	double wstep,
900	int maxits,
901	int foldscount,
902	ref int info,
903	ref mlpreport rep,
904	ref mlpcvreport cvrep)
905	{
906	mlpkfoldcvgeneral(ref network, ref xy, npoints, decay, restarts, foldscount, false, wstep, maxits, ref info, ref rep, ref cvrep);
907	}
908
909
910	/*************************************************************************
911	Cross-validation estimate of generalization error.
912
913	Base algorithm - Levenberg-Marquardt.
914
915	INPUT PARAMETERS:
916	Network - neural network with initialized geometry. Network is
917	not changed during cross-validation - it is used only
918	as a representative of its architecture.
919	XY - training set.
920	SSize - training set size
921	Decay - weight decay, same as in MLPTrainLBFGS
922	Restarts - number of restarts, >0.
923	restarts are counted for each partition separately, so
924	total number of restarts will be Restarts*FoldsCount.
925	FoldsCount - number of folds in k-fold cross-validation,
926	2<=FoldsCount<=SSize.
927	recommended value: 10.
928
929	OUTPUT PARAMETERS:
930	Info - return code, same as in MLPTrainLBFGS
931	Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
932	CVRep - generalization error estimates
933
934	-- ALGLIB --
935	Copyright 09.12.2007 by Bochkanov Sergey
936	*************************************************************************/
937	public static void mlpkfoldcvlm(ref mlpbase.multilayerperceptron network,
938	ref double[,] xy,
939	int npoints,
940	double decay,
941	int restarts,
942	int foldscount,
943	ref int info,
944	ref mlpreport rep,
945	ref mlpcvreport cvrep)
946	{
947	mlpkfoldcvgeneral(ref network, ref xy, npoints, decay, restarts, foldscount, true, 0.0, 0, ref info, ref rep, ref cvrep);
948	}
949
950
951	/*************************************************************************
952	Internal cross-validation subroutine
953	*************************************************************************/
954	private static void mlpkfoldcvgeneral(ref mlpbase.multilayerperceptron n,
955	ref double[,] xy,
956	int npoints,
957	double decay,
958	int restarts,
959	int foldscount,
960	bool lmalgorithm,
961	double wstep,
962	int maxits,
963	ref int info,
964	ref mlpreport rep,
965	ref mlpcvreport cvrep)
966	{
967	int i = 0;
968	int fold = 0;
969	int j = 0;
970	int k = 0;
971	mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron();
972	int nin = 0;
973	int nout = 0;
974	int rowlen = 0;
975	int wcount = 0;
976	int nclasses = 0;
977	int tssize = 0;
978	int cvssize = 0;
979	double[,] cvset = new double[0,0];
980	double[,] testset = new double[0,0];
981	int[] folds = new int[0];
982	int relcnt = 0;
983	mlpreport internalrep = new mlpreport();
984	double[] x = new double[0];
985	double[] y = new double[0];
986	int i_ = 0;
987
988
989	//
990	// Read network geometry, test parameters
991	//
992	mlpbase.mlpproperties(ref n, ref nin, ref nout, ref wcount);
993	if( mlpbase.mlpissoftmax(ref n) )
994	{
995	nclasses = nout;
996	rowlen = nin+1;
997	}
998	else
999	{
1000	nclasses = -nout;
1001	rowlen = nin+nout;
1002	}
1003	if( npoints<=0 \| foldscount<2 \| foldscount>npoints )
1004	{
1005	info = -1;
1006	return;
1007	}
1008	mlpbase.mlpcopy(ref n, ref network);
1009
1010	//
1011	// K-fold out cross-validation.
1012	// First, estimate generalization error
1013	//
1014	testset = new double[npoints-1+1, rowlen-1+1];
1015	cvset = new double[npoints-1+1, rowlen-1+1];
1016	x = new double[nin-1+1];
1017	y = new double[nout-1+1];
1018	mlpkfoldsplit(ref xy, npoints, nclasses, foldscount, false, ref folds);
1019	cvrep.relclserror = 0;
1020	cvrep.avgce = 0;
1021	cvrep.rmserror = 0;
1022	cvrep.avgerror = 0;
1023	cvrep.avgrelerror = 0;
1024	rep.ngrad = 0;
1025	rep.nhess = 0;
1026	rep.ncholesky = 0;
1027	relcnt = 0;
1028	for(fold=0; fold<=foldscount-1; fold++)
1029	{
1030
1031	//
1032	// Separate set
1033	//
1034	tssize = 0;
1035	cvssize = 0;
1036	for(i=0; i<=npoints-1; i++)
1037	{
1038	if( folds[i]==fold )
1039	{
1040	for(i_=0; i_<=rowlen-1;i_++)
1041	{
1042	testset[tssize,i_] = xy[i,i_];
1043	}
1044	tssize = tssize+1;
1045	}
1046	else
1047	{
1048	for(i_=0; i_<=rowlen-1;i_++)
1049	{
1050	cvset[cvssize,i_] = xy[i,i_];
1051	}
1052	cvssize = cvssize+1;
1053	}
1054	}
1055
1056	//
1057	// Train on CV training set
1058	//
1059	if( lmalgorithm )
1060	{
1061	mlptrainlm(ref network, ref cvset, cvssize, decay, restarts, ref info, ref internalrep);
1062	}
1063	else
1064	{
1065	mlptrainlbfgs(ref network, ref cvset, cvssize, decay, restarts, wstep, maxits, ref info, ref internalrep);
1066	}
1067	if( info<0 )
1068	{
1069	cvrep.relclserror = 0;
1070	cvrep.avgce = 0;
1071	cvrep.rmserror = 0;
1072	cvrep.avgerror = 0;
1073	cvrep.avgrelerror = 0;
1074	return;
1075	}
1076	rep.ngrad = rep.ngrad+internalrep.ngrad;
1077	rep.nhess = rep.nhess+internalrep.nhess;
1078	rep.ncholesky = rep.ncholesky+internalrep.ncholesky;
1079
1080	//
1081	// Estimate error using CV test set
1082	//
1083	if( mlpbase.mlpissoftmax(ref network) )
1084	{
1085
1086	//
1087	// classification-only code
1088	//
1089	cvrep.relclserror = cvrep.relclserror+mlpbase.mlpclserror(ref network, ref testset, tssize);
1090	cvrep.avgce = cvrep.avgce+mlpbase.mlperrorn(ref network, ref testset, tssize);
1091	}
1092	for(i=0; i<=tssize-1; i++)
1093	{
1094	for(i_=0; i_<=nin-1;i_++)
1095	{
1096	x[i_] = testset[i,i_];
1097	}
1098	mlpbase.mlpprocess(ref network, ref x, ref y);
1099	if( mlpbase.mlpissoftmax(ref network) )
1100	{
1101
1102	//
1103	// Classification-specific code
1104	//
1105	k = (int)Math.Round(testset[i,nin]);
1106	for(j=0; j<=nout-1; j++)
1107	{
1108	if( j==k )
1109	{
1110	cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]-1);
1111	cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-1);
1112	cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs(y[j]-1);
1113	relcnt = relcnt+1;
1114	}
1115	else
1116	{
1117	cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]);
1118	cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]);
1119	}
1120	}
1121	}
1122	else
1123	{
1124
1125	//
1126	// Regression-specific code
1127	//
1128	for(j=0; j<=nout-1; j++)
1129	{
1130	cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]-testset[i,nin+j]);
1131	cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-testset[i,nin+j]);
1132	if( (double)(testset[i,nin+j])!=(double)(0) )
1133	{
1134	cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs((y[j]-testset[i,nin+j])/testset[i,nin+j]);
1135	relcnt = relcnt+1;
1136	}
1137	}
1138	}
1139	}
1140	}
1141	if( mlpbase.mlpissoftmax(ref network) )
1142	{
1143	cvrep.relclserror = cvrep.relclserror/npoints;
1144	cvrep.avgce = cvrep.avgce/(Math.Log(2)*npoints);
1145	}
1146	cvrep.rmserror = Math.Sqrt(cvrep.rmserror/(npoints*nout));
1147	cvrep.avgerror = cvrep.avgerror/(npoints*nout);
1148	cvrep.avgrelerror = cvrep.avgrelerror/relcnt;
1149	info = 1;
1150	}
1151
1152
1153	/*************************************************************************
1154	Subroutine prepares K-fold split of the training set.
1155
1156	NOTES:
1157	"NClasses>0" means that we have classification task.
1158	"NClasses<0" means regression task with -NClasses real outputs.
1159	*************************************************************************/
1160	private static void mlpkfoldsplit(ref double[,] xy,
1161	int npoints,
1162	int nclasses,
1163	int foldscount,
1164	bool stratifiedsplits,
1165	ref int[] folds)
1166	{
1167	int i = 0;
1168	int j = 0;
1169	int k = 0;
1170
1171
1172	//
1173	// test parameters
1174	//
1175	System.Diagnostics.Debug.Assert(npoints>0, "MLPKFoldSplit: wrong NPoints!");
1176	System.Diagnostics.Debug.Assert(nclasses>1 \| nclasses<0, "MLPKFoldSplit: wrong NClasses!");
1177	System.Diagnostics.Debug.Assert(foldscount>=2 & foldscount<=npoints, "MLPKFoldSplit: wrong FoldsCount!");
1178	System.Diagnostics.Debug.Assert(!stratifiedsplits, "MLPKFoldSplit: stratified splits are not supported!");
1179
1180	//
1181	// Folds
1182	//
1183	folds = new int[npoints-1+1];
1184	for(i=0; i<=npoints-1; i++)
1185	{
1186	folds[i] = i*foldscount/npoints;
1187	}
1188	for(i=0; i<=npoints-2; i++)
1189	{
1190	j = i+AP.Math.RandomInteger(npoints-i);
1191	if( j!=i )
1192	{
1193	k = folds[i];
1194	folds[i] = folds[j];
1195	folds[j] = k;
1196	}
1197	}
1198	}
1199	}
1200	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences