Context Navigation

source: branches/3.2/sources/HeuristicLab.ExtLibs/HeuristicLab.ALGLIB/2.3.0/ALGLIB-2.3.0/mlptrain.cs @ 9280

Visit:

Last change on this file since 9280 was 2806, checked in by gkronber, 15 years ago
Added plugin for new version of ALGLIB. #875 (Update ALGLIB sources)
File size: 45.6 KB

Line
1	/*************************************************************************
2	Copyright (c) 2007-2008, Sergey Bochkanov (ALGLIB project).
3
4	>>> SOURCE LICENSE >>>
5	This program is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published by
7	the Free Software Foundation (www.fsf.org); either version 2 of the
8	License, or (at your option) any later version.
9
10	This program is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	A copy of the GNU General Public License is available at
16	http://www.fsf.org/licensing/licenses
17
18	>>> END OF LICENSE >>>
19	*************************************************************************/
20
21	using System;
22
23	namespace alglib
24	{
25	public class mlptrain
26	{
27	/*************************************************************************
28	Training report:
29	* NGrad - number of gradient calculations
30	* NHess - number of Hessian calculations
31	* NCholesky - number of Cholesky decompositions
32	*************************************************************************/
33	public struct mlpreport
34	{
35	public int ngrad;
36	public int nhess;
37	public int ncholesky;
38	};
39
40
41	/*************************************************************************
42	Cross-validation estimates of generalization error
43	*************************************************************************/
44	public struct mlpcvreport
45	{
46	public double relclserror;
47	public double avgce;
48	public double rmserror;
49	public double avgerror;
50	public double avgrelerror;
51	};
52
53
54
55
56	public const double mindecay = 0.001;
57
58
59	/*************************************************************************
60	Neural network training using modified Levenberg-Marquardt with exact
61	Hessian calculation and regularization. Subroutine trains neural network
62	with restarts from random positions. Algorithm is well suited for small
63	and medium scale problems (hundreds of weights).
64
65	INPUT PARAMETERS:
66	Network - neural network with initialized geometry
67	XY - training set
68	NPoints - training set size
69	Decay - weight decay constant, >=0.001
70	Decay term 'Decay*\|\|Weights\|\|^2' is added to error
71	function.
72	If you don't know what Decay to choose, use 0.001.
73	Restarts - number of restarts from random position, >0.
74	If you don't know what Restarts to choose, use 2.
75
76	OUTPUT PARAMETERS:
77	Network - trained neural network.
78	Info - return code:
79	* -9, if internal matrix inverse subroutine failed
80	* -2, if there is a point with class number
81	outside of [0..NOut-1].
82	* -1, if wrong parameters specified
83	(NPoints<0, Restarts<1).
84	* 2, if task has been solved.
85	Rep - training report
86
87	-- ALGLIB --
88	Copyright 10.03.2009 by Bochkanov Sergey
89	*************************************************************************/
90	public static void mlptrainlm(ref mlpbase.multilayerperceptron network,
91	ref double[,] xy,
92	int npoints,
93	double decay,
94	int restarts,
95	ref int info,
96	ref mlpreport rep)
97	{
98	int nin = 0;
99	int nout = 0;
100	int wcount = 0;
101	double lmftol = 0;
102	double lmsteptol = 0;
103	int i = 0;
104	int k = 0;
105	double v = 0;
106	double e = 0;
107	double enew = 0;
108	double xnorm2 = 0;
109	double stepnorm = 0;
110	double[] g = new double[0];
111	double[] d = new double[0];
112	double[,] h = new double[0,0];
113	double[,] hmod = new double[0,0];
114	double[,] z = new double[0,0];
115	bool spd = new bool();
116	double nu = 0;
117	double lambda = 0;
118	double lambdaup = 0;
119	double lambdadown = 0;
120	lbfgs.lbfgsreport internalrep = new lbfgs.lbfgsreport();
121	lbfgs.lbfgsstate state = new lbfgs.lbfgsstate();
122	double[] x = new double[0];
123	double[] y = new double[0];
124	double[] wbase = new double[0];
125	double[] wdir = new double[0];
126	double[] wt = new double[0];
127	double[] wx = new double[0];
128	int pass = 0;
129	double[] wbest = new double[0];
130	double ebest = 0;
131	int solverinfo = 0;
132	densesolver.densesolverreport solverrep = new densesolver.densesolverreport();
133	int i_ = 0;
134
135	mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
136	lambdaup = 10;
137	lambdadown = 0.3;
138	lmftol = 0.001;
139	lmsteptol = 0.001;
140
141	//
142	// Test for inputs
143	//
144	if( npoints<=0 \| restarts<1 )
145	{
146	info = -1;
147	return;
148	}
149	if( mlpbase.mlpissoftmax(ref network) )
150	{
151	for(i=0; i<=npoints-1; i++)
152	{
153	if( (int)Math.Round(xy[i,nin])<0 \| (int)Math.Round(xy[i,nin])>=nout )
154	{
155	info = -2;
156	return;
157	}
158	}
159	}
160	decay = Math.Max(decay, mindecay);
161	info = 2;
162
163	//
164	// Initialize data
165	//
166	rep.ngrad = 0;
167	rep.nhess = 0;
168	rep.ncholesky = 0;
169
170	//
171	// General case.
172	// Prepare task and network. Allocate space.
173	//
174	mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints);
175	g = new double[wcount-1+1];
176	h = new double[wcount-1+1, wcount-1+1];
177	hmod = new double[wcount-1+1, wcount-1+1];
178	wbase = new double[wcount-1+1];
179	wdir = new double[wcount-1+1];
180	wbest = new double[wcount-1+1];
181	wt = new double[wcount-1+1];
182	wx = new double[wcount-1+1];
183	ebest = AP.Math.MaxRealNumber;
184
185	//
186	// Multiple passes
187	//
188	for(pass=1; pass<=restarts; pass++)
189	{
190
191	//
192	// Initialize weights
193	//
194	mlpbase.mlprandomize(ref network);
195
196	//
197	// First stage of the hybrid algorithm: LBFGS
198	//
199	for(i_=0; i_<=wcount-1;i_++)
200	{
201	wbase[i_] = network.weights[i_];
202	}
203	lbfgs.minlbfgs(wcount, Math.Min(wcount, 5), ref wbase, 0.0, 0.0, 0.0, Math.Max(25, wcount), 0, ref state);
204	while( lbfgs.minlbfgsiteration(ref state) )
205	{
206
207	//
208	// gradient
209	//
210	for(i_=0; i_<=wcount-1;i_++)
211	{
212	network.weights[i_] = state.x[i_];
213	}
214	mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref state.g);
215
216	//
217	// weight decay
218	//
219	v = 0.0;
220	for(i_=0; i_<=wcount-1;i_++)
221	{
222	v += network.weights[i_]*network.weights[i_];
223	}
224	state.f = state.f+0.5decayv;
225	for(i_=0; i_<=wcount-1;i_++)
226	{
227	state.g[i_] = state.g[i_] + decay*network.weights[i_];
228	}
229
230	//
231	// next iteration
232	//
233	rep.ngrad = rep.ngrad+1;
234	}
235	lbfgs.minlbfgsresults(ref state, ref wbase, ref internalrep);
236	for(i_=0; i_<=wcount-1;i_++)
237	{
238	network.weights[i_] = wbase[i_];
239	}
240
241	//
242	// Second stage of the hybrid algorithm: LM
243	//
244	// Initialize H with identity matrix,
245	// G with gradient,
246	// E with regularized error.
247	//
248	mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h);
249	v = 0.0;
250	for(i_=0; i_<=wcount-1;i_++)
251	{
252	v += network.weights[i_]*network.weights[i_];
253	}
254	e = e+0.5decayv;
255	for(i_=0; i_<=wcount-1;i_++)
256	{
257	g[i_] = g[i_] + decay*network.weights[i_];
258	}
259	for(k=0; k<=wcount-1; k++)
260	{
261	h[k,k] = h[k,k]+decay;
262	}
263	rep.nhess = rep.nhess+1;
264	lambda = 0.001;
265	nu = 2;
266	while( true )
267	{
268
269	//
270	// 1. HMod = H+lambda*I
271	// 2. Try to solve (H+LambdaI)dx = -g.
272	// Increase lambda if left part is not positive definite.
273	//
274	for(i=0; i<=wcount-1; i++)
275	{
276	for(i_=0; i_<=wcount-1;i_++)
277	{
278	hmod[i,i_] = h[i,i_];
279	}
280	hmod[i,i] = hmod[i,i]+lambda;
281	}
282	spd = trfac.spdmatrixcholesky(ref hmod, wcount, true);
283	rep.ncholesky = rep.ncholesky+1;
284	if( !spd )
285	{
286	lambda = lambdalambdaupnu;
287	nu = nu*2;
288	continue;
289	}
290	densesolver.spdmatrixcholeskysolve(ref hmod, wcount, true, ref g, ref solverinfo, ref solverrep, ref wdir);
291	if( solverinfo<0 )
292	{
293	lambda = lambdalambdaupnu;
294	nu = nu*2;
295	continue;
296	}
297	for(i_=0; i_<=wcount-1;i_++)
298	{
299	wdir[i_] = -1*wdir[i_];
300	}
301
302	//
303	// Lambda found.
304	// 1. Save old w in WBase
305	// 1. Test some stopping criterions
306	// 2. If error(w+wdir)>error(w), increase lambda
307	//
308	for(i_=0; i_<=wcount-1;i_++)
309	{
310	network.weights[i_] = network.weights[i_] + wdir[i_];
311	}
312	xnorm2 = 0.0;
313	for(i_=0; i_<=wcount-1;i_++)
314	{
315	xnorm2 += network.weights[i_]*network.weights[i_];
316	}
317	stepnorm = 0.0;
318	for(i_=0; i_<=wcount-1;i_++)
319	{
320	stepnorm += wdir[i_]*wdir[i_];
321	}
322	stepnorm = Math.Sqrt(stepnorm);
323	enew = mlpbase.mlperror(ref network, ref xy, npoints)+0.5decayxnorm2;
324	if( (double)(stepnorm)<(double)(lmsteptol*(1+Math.Sqrt(xnorm2))) )
325	{
326	break;
327	}
328	if( (double)(enew)>(double)(e) )
329	{
330	lambda = lambdalambdaupnu;
331	nu = nu*2;
332	continue;
333	}
334
335	//
336	// Optimize using inv(cholesky(H)) as preconditioner
337	//
338	if( !trinverse.rmatrixtrinverse(ref hmod, wcount, true, false) )
339	{
340
341	//
342	// if matrix can't be inverted then exit with errors
343	// TODO: make WCount steps in direction suggested by HMod
344	//
345	info = -9;
346	return;
347	}
348	for(i_=0; i_<=wcount-1;i_++)
349	{
350	wbase[i_] = network.weights[i_];
351	}
352	for(i=0; i<=wcount-1; i++)
353	{
354	wt[i] = 0;
355	}
356	lbfgs.minlbfgs(wcount, wcount, ref wt, 0.0, 0.0, 0.0, 5, 0, ref state);
357	while( lbfgs.minlbfgsiteration(ref state) )
358	{
359
360	//
361	// gradient
362	//
363	for(i=0; i<=wcount-1; i++)
364	{
365	v = 0.0;
366	for(i_=i; i_<=wcount-1;i_++)
367	{
368	v += state.x[i_]*hmod[i,i_];
369	}
370	network.weights[i] = wbase[i]+v;
371	}
372	mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref g);
373	for(i=0; i<=wcount-1; i++)
374	{
375	state.g[i] = 0;
376	}
377	for(i=0; i<=wcount-1; i++)
378	{
379	v = g[i];
380	for(i_=i; i_<=wcount-1;i_++)
381	{
382	state.g[i_] = state.g[i_] + v*hmod[i,i_];
383	}
384	}
385
386	//
387	// weight decay
388	// grad(x'x) = A'(x0+A*t)
389	//
390	v = 0.0;
391	for(i_=0; i_<=wcount-1;i_++)
392	{
393	v += network.weights[i_]*network.weights[i_];
394	}
395	state.f = state.f+0.5decayv;
396	for(i=0; i<=wcount-1; i++)
397	{
398	v = decay*network.weights[i];
399	for(i_=i; i_<=wcount-1;i_++)
400	{
401	state.g[i_] = state.g[i_] + v*hmod[i,i_];
402	}
403	}
404
405	//
406	// next iteration
407	//
408	rep.ngrad = rep.ngrad+1;
409	}
410	lbfgs.minlbfgsresults(ref state, ref wt, ref internalrep);
411
412	//
413	// Accept new position.
414	// Calculate Hessian
415	//
416	for(i=0; i<=wcount-1; i++)
417	{
418	v = 0.0;
419	for(i_=i; i_<=wcount-1;i_++)
420	{
421	v += wt[i_]*hmod[i,i_];
422	}
423	network.weights[i] = wbase[i]+v;
424	}
425	mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h);
426	v = 0.0;
427	for(i_=0; i_<=wcount-1;i_++)
428	{
429	v += network.weights[i_]*network.weights[i_];
430	}
431	e = e+0.5decayv;
432	for(i_=0; i_<=wcount-1;i_++)
433	{
434	g[i_] = g[i_] + decay*network.weights[i_];
435	}
436	for(k=0; k<=wcount-1; k++)
437	{
438	h[k,k] = h[k,k]+decay;
439	}
440	rep.nhess = rep.nhess+1;
441
442	//
443	// Update lambda
444	//
445	lambda = lambda*lambdadown;
446	nu = 2;
447	}
448
449	//
450	// update WBest
451	//
452	v = 0.0;
453	for(i_=0; i_<=wcount-1;i_++)
454	{
455	v += network.weights[i_]*network.weights[i_];
456	}
457	e = 0.5decayv+mlpbase.mlperror(ref network, ref xy, npoints);
458	if( (double)(e)<(double)(ebest) )
459	{
460	ebest = e;
461	for(i_=0; i_<=wcount-1;i_++)
462	{
463	wbest[i_] = network.weights[i_];
464	}
465	}
466	}
467
468	//
469	// copy WBest to output
470	//
471	for(i_=0; i_<=wcount-1;i_++)
472	{
473	network.weights[i_] = wbest[i_];
474	}
475	}
476
477
478	/*************************************************************************
479	Neural network training using L-BFGS algorithm with regularization.
480	Subroutine trains neural network with restarts from random positions.
481	Algorithm is well suited for problems of any dimensionality (memory
482	requirements and step complexity are linear by weights number).
483
484	INPUT PARAMETERS:
485	Network - neural network with initialized geometry
486	XY - training set
487	NPoints - training set size
488	Decay - weight decay constant, >=0.001
489	Decay term 'Decay*\|\|Weights\|\|^2' is added to error
490	function.
491	If you don't know what Decay to choose, use 0.001.
492	Restarts - number of restarts from random position, >0.
493	If you don't know what Restarts to choose, use 2.
494	WStep - stopping criterion. Algorithm stops if step size is
495	less than WStep. Recommended value - 0.01. Zero step
496	size means stopping after MaxIts iterations.
497	MaxIts - stopping criterion. Algorithm stops after MaxIts
498	iterations (NOT gradient calculations). Zero MaxIts
499	means stopping when step is sufficiently small.
500
501	OUTPUT PARAMETERS:
502	Network - trained neural network.
503	Info - return code:
504	* -8, if both WStep=0 and MaxIts=0
505	* -2, if there is a point with class number
506	outside of [0..NOut-1].
507	* -1, if wrong parameters specified
508	(NPoints<0, Restarts<1).
509	* 2, if task has been solved.
510	Rep - training report
511
512	-- ALGLIB --
513	Copyright 09.12.2007 by Bochkanov Sergey
514	*************************************************************************/
515	public static void mlptrainlbfgs(ref mlpbase.multilayerperceptron network,
516	ref double[,] xy,
517	int npoints,
518	double decay,
519	int restarts,
520	double wstep,
521	int maxits,
522	ref int info,
523	ref mlpreport rep)
524	{
525	int i = 0;
526	int pass = 0;
527	int nin = 0;
528	int nout = 0;
529	int wcount = 0;
530	double[] w = new double[0];
531	double[] wbest = new double[0];
532	double e = 0;
533	double v = 0;
534	double ebest = 0;
535	lbfgs.lbfgsreport internalrep = new lbfgs.lbfgsreport();
536	lbfgs.lbfgsstate state = new lbfgs.lbfgsstate();
537	int i_ = 0;
538
539
540	//
541	// Test inputs, parse flags, read network geometry
542	//
543	if( (double)(wstep)==(double)(0) & maxits==0 )
544	{
545	info = -8;
546	return;
547	}
548	if( npoints<=0 \| restarts<1 \| (double)(wstep)<(double)(0) \| maxits<0 )
549	{
550	info = -1;
551	return;
552	}
553	mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
554	if( mlpbase.mlpissoftmax(ref network) )
555	{
556	for(i=0; i<=npoints-1; i++)
557	{
558	if( (int)Math.Round(xy[i,nin])<0 \| (int)Math.Round(xy[i,nin])>=nout )
559	{
560	info = -2;
561	return;
562	}
563	}
564	}
565	decay = Math.Max(decay, mindecay);
566	info = 2;
567
568	//
569	// Prepare
570	//
571	mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints);
572	w = new double[wcount-1+1];
573	wbest = new double[wcount-1+1];
574	ebest = AP.Math.MaxRealNumber;
575
576	//
577	// Multiple starts
578	//
579	rep.ncholesky = 0;
580	rep.nhess = 0;
581	rep.ngrad = 0;
582	for(pass=1; pass<=restarts; pass++)
583	{
584
585	//
586	// Process
587	//
588	mlpbase.mlprandomize(ref network);
589	for(i_=0; i_<=wcount-1;i_++)
590	{
591	w[i_] = network.weights[i_];
592	}
593	lbfgs.minlbfgs(wcount, Math.Min(wcount, 50), ref w, 0.0, 0.0, wstep, maxits, 0, ref state);
594	while( lbfgs.minlbfgsiteration(ref state) )
595	{
596	for(i_=0; i_<=wcount-1;i_++)
597	{
598	network.weights[i_] = state.x[i_];
599	}
600	mlpbase.mlpgradnbatch(ref network, ref xy, npoints, ref state.f, ref state.g);
601	v = 0.0;
602	for(i_=0; i_<=wcount-1;i_++)
603	{
604	v += network.weights[i_]*network.weights[i_];
605	}
606	state.f = state.f+0.5decayv;
607	for(i_=0; i_<=wcount-1;i_++)
608	{
609	state.g[i_] = state.g[i_] + decay*network.weights[i_];
610	}
611	rep.ngrad = rep.ngrad+1;
612	}
613	lbfgs.minlbfgsresults(ref state, ref w, ref internalrep);
614	for(i_=0; i_<=wcount-1;i_++)
615	{
616	network.weights[i_] = w[i_];
617	}
618
619	//
620	// Compare with best
621	//
622	v = 0.0;
623	for(i_=0; i_<=wcount-1;i_++)
624	{
625	v += network.weights[i_]*network.weights[i_];
626	}
627	e = mlpbase.mlperrorn(ref network, ref xy, npoints)+0.5decayv;
628	if( (double)(e)<(double)(ebest) )
629	{
630	for(i_=0; i_<=wcount-1;i_++)
631	{
632	wbest[i_] = network.weights[i_];
633	}
634	ebest = e;
635	}
636	}
637
638	//
639	// The best network
640	//
641	for(i_=0; i_<=wcount-1;i_++)
642	{
643	network.weights[i_] = wbest[i_];
644	}
645	}
646
647
648	/*************************************************************************
649	Neural network training using early stopping (base algorithm - L-BFGS with
650	regularization).
651
652	INPUT PARAMETERS:
653	Network - neural network with initialized geometry
654	TrnXY - training set
655	TrnSize - training set size
656	ValXY - validation set
657	ValSize - validation set size
658	Decay - weight decay constant, >=0.001
659	Decay term 'Decay*\|\|Weights\|\|^2' is added to error
660	function.
661	If you don't know what Decay to choose, use 0.001.
662	Restarts - number of restarts from random position, >0.
663	If you don't know what Restarts to choose, use 2.
664
665	OUTPUT PARAMETERS:
666	Network - trained neural network.
667	Info - return code:
668	* -2, if there is a point with class number
669	outside of [0..NOut-1].
670	* -1, if wrong parameters specified
671	(NPoints<0, Restarts<1, ...).
672	* 2, task has been solved, stopping criterion met -
673	sufficiently small step size. Not expected (we
674	use EARLY stopping) but possible and not an
675	error.
676	* 6, task has been solved, stopping criterion met -
677	increasing of validation set error.
678	Rep - training report
679
680	NOTE:
681
682	Algorithm stops if validation set error increases for a long enough or
683	step size is small enought (there are task where validation set may
684	decrease for eternity). In any case solution returned corresponds to the
685	minimum of validation set error.
686
687	-- ALGLIB --
688	Copyright 10.03.2009 by Bochkanov Sergey
689	*************************************************************************/
690	public static void mlptraines(ref mlpbase.multilayerperceptron network,
691	ref double[,] trnxy,
692	int trnsize,
693	ref double[,] valxy,
694	int valsize,
695	double decay,
696	int restarts,
697	ref int info,
698	ref mlpreport rep)
699	{
700	int i = 0;
701	int pass = 0;
702	int nin = 0;
703	int nout = 0;
704	int wcount = 0;
705	double[] w = new double[0];
706	double[] wbest = new double[0];
707	double e = 0;
708	double v = 0;
709	double ebest = 0;
710	double[] wfinal = new double[0];
711	double efinal = 0;
712	int itbest = 0;
713	lbfgs.lbfgsreport internalrep = new lbfgs.lbfgsreport();
714	lbfgs.lbfgsstate state = new lbfgs.lbfgsstate();
715	double wstep = 0;
716	int i_ = 0;
717
718	wstep = 0.001;
719
720	//
721	// Test inputs, parse flags, read network geometry
722	//
723	if( trnsize<=0 \| valsize<=0 \| restarts<1 \| (double)(decay)<(double)(0) )
724	{
725	info = -1;
726	return;
727	}
728	mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
729	if( mlpbase.mlpissoftmax(ref network) )
730	{
731	for(i=0; i<=trnsize-1; i++)
732	{
733	if( (int)Math.Round(trnxy[i,nin])<0 \| (int)Math.Round(trnxy[i,nin])>=nout )
734	{
735	info = -2;
736	return;
737	}
738	}
739	for(i=0; i<=valsize-1; i++)
740	{
741	if( (int)Math.Round(valxy[i,nin])<0 \| (int)Math.Round(valxy[i,nin])>=nout )
742	{
743	info = -2;
744	return;
745	}
746	}
747	}
748	info = 2;
749
750	//
751	// Prepare
752	//
753	mlpbase.mlpinitpreprocessor(ref network, ref trnxy, trnsize);
754	w = new double[wcount-1+1];
755	wbest = new double[wcount-1+1];
756	wfinal = new double[wcount-1+1];
757	efinal = AP.Math.MaxRealNumber;
758	for(i=0; i<=wcount-1; i++)
759	{
760	wfinal[i] = 0;
761	}
762
763	//
764	// Multiple starts
765	//
766	rep.ncholesky = 0;
767	rep.nhess = 0;
768	rep.ngrad = 0;
769	for(pass=1; pass<=restarts; pass++)
770	{
771
772	//
773	// Process
774	//
775	mlpbase.mlprandomize(ref network);
776	ebest = mlpbase.mlperror(ref network, ref valxy, valsize);
777	for(i_=0; i_<=wcount-1;i_++)
778	{
779	wbest[i_] = network.weights[i_];
780	}
781	itbest = 0;
782	for(i_=0; i_<=wcount-1;i_++)
783	{
784	w[i_] = network.weights[i_];
785	}
786	lbfgs.minlbfgs(wcount, Math.Min(wcount, 50), ref w, 0.0, 0.0, wstep, 0, 0, ref state);
787	while( lbfgs.minlbfgsiteration(ref state) )
788	{
789
790	//
791	// Calculate gradient
792	//
793	for(i_=0; i_<=wcount-1;i_++)
794	{
795	network.weights[i_] = state.x[i_];
796	}
797	mlpbase.mlpgradnbatch(ref network, ref trnxy, trnsize, ref state.f, ref state.g);
798	v = 0.0;
799	for(i_=0; i_<=wcount-1;i_++)
800	{
801	v += network.weights[i_]*network.weights[i_];
802	}
803	state.f = state.f+0.5decayv;
804	for(i_=0; i_<=wcount-1;i_++)
805	{
806	state.g[i_] = state.g[i_] + decay*network.weights[i_];
807	}
808	rep.ngrad = rep.ngrad+1;
809
810	//
811	// Validation set
812	//
813	if( state.xupdated )
814	{
815	for(i_=0; i_<=wcount-1;i_++)
816	{
817	network.weights[i_] = w[i_];
818	}
819	e = mlpbase.mlperror(ref network, ref valxy, valsize);
820	if( (double)(e)<(double)(ebest) )
821	{
822	ebest = e;
823	for(i_=0; i_<=wcount-1;i_++)
824	{
825	wbest[i_] = network.weights[i_];
826	}
827	itbest = internalrep.iterationscount;
828	}
829	if( internalrep.iterationscount>30 & (double)(internalrep.iterationscount)>(double)(1.5*itbest) )
830	{
831	info = 6;
832	break;
833	}
834	}
835	}
836	lbfgs.minlbfgsresults(ref state, ref w, ref internalrep);
837
838	//
839	// Compare with final answer
840	//
841	if( (double)(ebest)<(double)(efinal) )
842	{
843	for(i_=0; i_<=wcount-1;i_++)
844	{
845	wfinal[i_] = wbest[i_];
846	}
847	efinal = ebest;
848	}
849	}
850
851	//
852	// The best network
853	//
854	for(i_=0; i_<=wcount-1;i_++)
855	{
856	network.weights[i_] = wfinal[i_];
857	}
858	}
859
860
861	/*************************************************************************
862	Cross-validation estimate of generalization error.
863
864	Base algorithm - L-BFGS.
865
866	INPUT PARAMETERS:
867	Network - neural network with initialized geometry. Network is
868	not changed during cross-validation - it is used only
869	as a representative of its architecture.
870	XY - training set.
871	SSize - training set size
872	Decay - weight decay, same as in MLPTrainLBFGS
873	Restarts - number of restarts, >0.
874	restarts are counted for each partition separately, so
875	total number of restarts will be Restarts*FoldsCount.
876	WStep - stopping criterion, same as in MLPTrainLBFGS
877	MaxIts - stopping criterion, same as in MLPTrainLBFGS
878	FoldsCount - number of folds in k-fold cross-validation,
879	2<=FoldsCount<=SSize.
880	recommended value: 10.
881
882	OUTPUT PARAMETERS:
883	Info - return code, same as in MLPTrainLBFGS
884	Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
885	CVRep - generalization error estimates
886
887	-- ALGLIB --
888	Copyright 09.12.2007 by Bochkanov Sergey
889	*************************************************************************/
890	public static void mlpkfoldcvlbfgs(ref mlpbase.multilayerperceptron network,
891	ref double[,] xy,
892	int npoints,
893	double decay,
894	int restarts,
895	double wstep,
896	int maxits,
897	int foldscount,
898	ref int info,
899	ref mlpreport rep,
900	ref mlpcvreport cvrep)
901	{
902	mlpkfoldcvgeneral(ref network, ref xy, npoints, decay, restarts, foldscount, false, wstep, maxits, ref info, ref rep, ref cvrep);
903	}
904
905
906	/*************************************************************************
907	Cross-validation estimate of generalization error.
908
909	Base algorithm - Levenberg-Marquardt.
910
911	INPUT PARAMETERS:
912	Network - neural network with initialized geometry. Network is
913	not changed during cross-validation - it is used only
914	as a representative of its architecture.
915	XY - training set.
916	SSize - training set size
917	Decay - weight decay, same as in MLPTrainLBFGS
918	Restarts - number of restarts, >0.
919	restarts are counted for each partition separately, so
920	total number of restarts will be Restarts*FoldsCount.
921	FoldsCount - number of folds in k-fold cross-validation,
922	2<=FoldsCount<=SSize.
923	recommended value: 10.
924
925	OUTPUT PARAMETERS:
926	Info - return code, same as in MLPTrainLBFGS
927	Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
928	CVRep - generalization error estimates
929
930	-- ALGLIB --
931	Copyright 09.12.2007 by Bochkanov Sergey
932	*************************************************************************/
933	public static void mlpkfoldcvlm(ref mlpbase.multilayerperceptron network,
934	ref double[,] xy,
935	int npoints,
936	double decay,
937	int restarts,
938	int foldscount,
939	ref int info,
940	ref mlpreport rep,
941	ref mlpcvreport cvrep)
942	{
943	mlpkfoldcvgeneral(ref network, ref xy, npoints, decay, restarts, foldscount, true, 0.0, 0, ref info, ref rep, ref cvrep);
944	}
945
946
947	/*************************************************************************
948	Internal cross-validation subroutine
949	*************************************************************************/
950	private static void mlpkfoldcvgeneral(ref mlpbase.multilayerperceptron n,
951	ref double[,] xy,
952	int npoints,
953	double decay,
954	int restarts,
955	int foldscount,
956	bool lmalgorithm,
957	double wstep,
958	int maxits,
959	ref int info,
960	ref mlpreport rep,
961	ref mlpcvreport cvrep)
962	{
963	int i = 0;
964	int fold = 0;
965	int j = 0;
966	int k = 0;
967	mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron();
968	int nin = 0;
969	int nout = 0;
970	int rowlen = 0;
971	int wcount = 0;
972	int nclasses = 0;
973	int tssize = 0;
974	int cvssize = 0;
975	double[,] cvset = new double[0,0];
976	double[,] testset = new double[0,0];
977	int[] folds = new int[0];
978	int relcnt = 0;
979	mlpreport internalrep = new mlpreport();
980	double[] x = new double[0];
981	double[] y = new double[0];
982	int i_ = 0;
983
984
985	//
986	// Read network geometry, test parameters
987	//
988	mlpbase.mlpproperties(ref n, ref nin, ref nout, ref wcount);
989	if( mlpbase.mlpissoftmax(ref n) )
990	{
991	nclasses = nout;
992	rowlen = nin+1;
993	}
994	else
995	{
996	nclasses = -nout;
997	rowlen = nin+nout;
998	}
999	if( npoints<=0 \| foldscount<2 \| foldscount>npoints )
1000	{
1001	info = -1;
1002	return;
1003	}
1004	mlpbase.mlpcopy(ref n, ref network);
1005
1006	//
1007	// K-fold out cross-validation.
1008	// First, estimate generalization error
1009	//
1010	testset = new double[npoints-1+1, rowlen-1+1];
1011	cvset = new double[npoints-1+1, rowlen-1+1];
1012	x = new double[nin-1+1];
1013	y = new double[nout-1+1];
1014	mlpkfoldsplit(ref xy, npoints, nclasses, foldscount, false, ref folds);
1015	cvrep.relclserror = 0;
1016	cvrep.avgce = 0;
1017	cvrep.rmserror = 0;
1018	cvrep.avgerror = 0;
1019	cvrep.avgrelerror = 0;
1020	rep.ngrad = 0;
1021	rep.nhess = 0;
1022	rep.ncholesky = 0;
1023	relcnt = 0;
1024	for(fold=0; fold<=foldscount-1; fold++)
1025	{
1026
1027	//
1028	// Separate set
1029	//
1030	tssize = 0;
1031	cvssize = 0;
1032	for(i=0; i<=npoints-1; i++)
1033	{
1034	if( folds[i]==fold )
1035	{
1036	for(i_=0; i_<=rowlen-1;i_++)
1037	{
1038	testset[tssize,i_] = xy[i,i_];
1039	}
1040	tssize = tssize+1;
1041	}
1042	else
1043	{
1044	for(i_=0; i_<=rowlen-1;i_++)
1045	{
1046	cvset[cvssize,i_] = xy[i,i_];
1047	}
1048	cvssize = cvssize+1;
1049	}
1050	}
1051
1052	//
1053	// Train on CV training set
1054	//
1055	if( lmalgorithm )
1056	{
1057	mlptrainlm(ref network, ref cvset, cvssize, decay, restarts, ref info, ref internalrep);
1058	}
1059	else
1060	{
1061	mlptrainlbfgs(ref network, ref cvset, cvssize, decay, restarts, wstep, maxits, ref info, ref internalrep);
1062	}
1063	if( info<0 )
1064	{
1065	cvrep.relclserror = 0;
1066	cvrep.avgce = 0;
1067	cvrep.rmserror = 0;
1068	cvrep.avgerror = 0;
1069	cvrep.avgrelerror = 0;
1070	return;
1071	}
1072	rep.ngrad = rep.ngrad+internalrep.ngrad;
1073	rep.nhess = rep.nhess+internalrep.nhess;
1074	rep.ncholesky = rep.ncholesky+internalrep.ncholesky;
1075
1076	//
1077	// Estimate error using CV test set
1078	//
1079	if( mlpbase.mlpissoftmax(ref network) )
1080	{
1081
1082	//
1083	// classification-only code
1084	//
1085	cvrep.relclserror = cvrep.relclserror+mlpbase.mlpclserror(ref network, ref testset, tssize);
1086	cvrep.avgce = cvrep.avgce+mlpbase.mlperrorn(ref network, ref testset, tssize);
1087	}
1088	for(i=0; i<=tssize-1; i++)
1089	{
1090	for(i_=0; i_<=nin-1;i_++)
1091	{
1092	x[i_] = testset[i,i_];
1093	}
1094	mlpbase.mlpprocess(ref network, ref x, ref y);
1095	if( mlpbase.mlpissoftmax(ref network) )
1096	{
1097
1098	//
1099	// Classification-specific code
1100	//
1101	k = (int)Math.Round(testset[i,nin]);
1102	for(j=0; j<=nout-1; j++)
1103	{
1104	if( j==k )
1105	{
1106	cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]-1);
1107	cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-1);
1108	cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs(y[j]-1);
1109	relcnt = relcnt+1;
1110	}
1111	else
1112	{
1113	cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]);
1114	cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]);
1115	}
1116	}
1117	}
1118	else
1119	{
1120
1121	//
1122	// Regression-specific code
1123	//
1124	for(j=0; j<=nout-1; j++)
1125	{
1126	cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]-testset[i,nin+j]);
1127	cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-testset[i,nin+j]);
1128	if( (double)(testset[i,nin+j])!=(double)(0) )
1129	{
1130	cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs((y[j]-testset[i,nin+j])/testset[i,nin+j]);
1131	relcnt = relcnt+1;
1132	}
1133	}
1134	}
1135	}
1136	}
1137	if( mlpbase.mlpissoftmax(ref network) )
1138	{
1139	cvrep.relclserror = cvrep.relclserror/npoints;
1140	cvrep.avgce = cvrep.avgce/(Math.Log(2)*npoints);
1141	}
1142	cvrep.rmserror = Math.Sqrt(cvrep.rmserror/(npoints*nout));
1143	cvrep.avgerror = cvrep.avgerror/(npoints*nout);
1144	cvrep.avgrelerror = cvrep.avgrelerror/relcnt;
1145	info = 1;
1146	}
1147
1148
1149	/*************************************************************************
1150	Subroutine prepares K-fold split of the training set.
1151
1152	NOTES:
1153	"NClasses>0" means that we have classification task.
1154	"NClasses<0" means regression task with -NClasses real outputs.
1155	*************************************************************************/
1156	private static void mlpkfoldsplit(ref double[,] xy,
1157	int npoints,
1158	int nclasses,
1159	int foldscount,
1160	bool stratifiedsplits,
1161	ref int[] folds)
1162	{
1163	int i = 0;
1164	int j = 0;
1165	int k = 0;
1166
1167
1168	//
1169	// test parameters
1170	//
1171	System.Diagnostics.Debug.Assert(npoints>0, "MLPKFoldSplit: wrong NPoints!");
1172	System.Diagnostics.Debug.Assert(nclasses>1 \| nclasses<0, "MLPKFoldSplit: wrong NClasses!");
1173	System.Diagnostics.Debug.Assert(foldscount>=2 & foldscount<=npoints, "MLPKFoldSplit: wrong FoldsCount!");
1174	System.Diagnostics.Debug.Assert(!stratifiedsplits, "MLPKFoldSplit: stratified splits are not supported!");
1175
1176	//
1177	// Folds
1178	//
1179	folds = new int[npoints-1+1];
1180	for(i=0; i<=npoints-1; i++)
1181	{
1182	folds[i] = i*foldscount/npoints;
1183	}
1184	for(i=0; i<=npoints-2; i++)
1185	{
1186	j = i+AP.Math.RandomInteger(npoints-i);
1187	if( j!=i )
1188	{
1189	k = folds[i];
1190	folds[i] = folds[j];
1191	folds[j] = k;
1192	}
1193	}
1194	}
1195	}
1196	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences