Context Navigation

source: branches/ParameterBinding/HeuristicLab.ExtLibs/HeuristicLab.ALGLIB/2.5.0/ALGLIB-2.5.0/mlptrain.cs @ 6451

Visit:

Last change on this file since 6451 was 3839, checked in by mkommend, 15 years ago
implemented first version of LR (ticket #1012)
File size: 46.2 KB

Line
1	/*************************************************************************
2	Copyright (c) 2007-2008, Sergey Bochkanov (ALGLIB project).
3
4	>>> SOURCE LICENSE >>>
5	This program is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published by
7	the Free Software Foundation (www.fsf.org); either version 2 of the
8	License, or (at your option) any later version.
9
10	This program is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	A copy of the GNU General Public License is available at
16	http://www.fsf.org/licensing/licenses
17
18	>>> END OF LICENSE >>>
19	*************************************************************************/
20
21	using System;
22
23	namespace alglib
24	{
25	public class mlptrain
26	{
27	/*************************************************************************
28	Training report:
29	* NGrad - number of gradient calculations
30	* NHess - number of Hessian calculations
31	* NCholesky - number of Cholesky decompositions
32	*************************************************************************/
33	public struct mlpreport
34	{
35	public int ngrad;
36	public int nhess;
37	public int ncholesky;
38	};
39
40
41	/*************************************************************************
42	Cross-validation estimates of generalization error
43	*************************************************************************/
44	public struct mlpcvreport
45	{
46	public double relclserror;
47	public double avgce;
48	public double rmserror;
49	public double avgerror;
50	public double avgrelerror;
51	};
52
53
54
55
56	public const double mindecay = 0.001;
57
58
59	/*************************************************************************
60	Neural network training using modified Levenberg-Marquardt with exact
61	Hessian calculation and regularization. Subroutine trains neural network
62	with restarts from random positions. Algorithm is well suited for small
63	and medium scale problems (hundreds of weights).
64
65	INPUT PARAMETERS:
66	Network - neural network with initialized geometry
67	XY - training set
68	NPoints - training set size
69	Decay - weight decay constant, >=0.001
70	Decay term 'Decay*\|\|Weights\|\|^2' is added to error
71	function.
72	If you don't know what Decay to choose, use 0.001.
73	Restarts - number of restarts from random position, >0.
74	If you don't know what Restarts to choose, use 2.
75
76	OUTPUT PARAMETERS:
77	Network - trained neural network.
78	Info - return code:
79	* -9, if internal matrix inverse subroutine failed
80	* -2, if there is a point with class number
81	outside of [0..NOut-1].
82	* -1, if wrong parameters specified
83	(NPoints<0, Restarts<1).
84	* 2, if task has been solved.
85	Rep - training report
86
87	-- ALGLIB --
88	Copyright 10.03.2009 by Bochkanov Sergey
89	*************************************************************************/
90	public static void mlptrainlm(ref mlpbase.multilayerperceptron network,
91	ref double[,] xy,
92	int npoints,
93	double decay,
94	int restarts,
95	ref int info,
96	ref mlpreport rep)
97	{
98	int nin = 0;
99	int nout = 0;
100	int wcount = 0;
101	double lmftol = 0;
102	double lmsteptol = 0;
103	int i = 0;
104	int k = 0;
105	double v = 0;
106	double e = 0;
107	double enew = 0;
108	double xnorm2 = 0;
109	double stepnorm = 0;
110	double[] g = new double[0];
111	double[] d = new double[0];
112	double[,] h = new double[0,0];
113	double[,] hmod = new double[0,0];
114	double[,] z = new double[0,0];
115	bool spd = new bool();
116	double nu = 0;
117	double lambda = 0;
118	double lambdaup = 0;
119	double lambdadown = 0;
120	minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport();
121	minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate();
122	double[] x = new double[0];
123	double[] y = new double[0];
124	double[] wbase = new double[0];
125	double[] wdir = new double[0];
126	double[] wt = new double[0];
127	double[] wx = new double[0];
128	int pass = 0;
129	double[] wbest = new double[0];
130	double ebest = 0;
131	int invinfo = 0;
132	matinv.matinvreport invrep = new matinv.matinvreport();
133	int solverinfo = 0;
134	densesolver.densesolverreport solverrep = new densesolver.densesolverreport();
135	int i_ = 0;
136
137	mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
138	lambdaup = 10;
139	lambdadown = 0.3;
140	lmftol = 0.001;
141	lmsteptol = 0.001;
142
143	//
144	// Test for inputs
145	//
146	if( npoints<=0 \| restarts<1 )
147	{
148	info = -1;
149	return;
150	}
151	if( mlpbase.mlpissoftmax(ref network) )
152	{
153	for(i=0; i<=npoints-1; i++)
154	{
155	if( (int)Math.Round(xy[i,nin])<0 \| (int)Math.Round(xy[i,nin])>=nout )
156	{
157	info = -2;
158	return;
159	}
160	}
161	}
162	decay = Math.Max(decay, mindecay);
163	info = 2;
164
165	//
166	// Initialize data
167	//
168	rep.ngrad = 0;
169	rep.nhess = 0;
170	rep.ncholesky = 0;
171
172	//
173	// General case.
174	// Prepare task and network. Allocate space.
175	//
176	mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints);
177	g = new double[wcount-1+1];
178	h = new double[wcount-1+1, wcount-1+1];
179	hmod = new double[wcount-1+1, wcount-1+1];
180	wbase = new double[wcount-1+1];
181	wdir = new double[wcount-1+1];
182	wbest = new double[wcount-1+1];
183	wt = new double[wcount-1+1];
184	wx = new double[wcount-1+1];
185	ebest = AP.Math.MaxRealNumber;
186
187	//
188	// Multiple passes
189	//
190	for(pass=1; pass<=restarts; pass++)
191	{
192
193	//
194	// Initialize weights
195	//
196	mlpbase.mlprandomize(ref network);
197
198	//
199	// First stage of the hybrid algorithm: LBFGS
200	//
201	for(i_=0; i_<=wcount-1;i_++)
202	{
203	wbase[i_] = network.weights[i_];
204	}
205	minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 5), ref wbase, ref state);
206	minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, Math.Max(25, wcount));
207	while( minlbfgs.minlbfgsiteration(ref state) )
208	{
209
210	//
211	// gradient
212	//
213	for(i_=0; i_<=wcount-1;i_++)
214	{
215	network.weights[i_] = state.x[i_];
216	}
217	mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref state.g);
218
219	//
220	// weight decay
221	//
222	v = 0.0;
223	for(i_=0; i_<=wcount-1;i_++)
224	{
225	v += network.weights[i_]*network.weights[i_];
226	}
227	state.f = state.f+0.5decayv;
228	for(i_=0; i_<=wcount-1;i_++)
229	{
230	state.g[i_] = state.g[i_] + decay*network.weights[i_];
231	}
232
233	//
234	// next iteration
235	//
236	rep.ngrad = rep.ngrad+1;
237	}
238	minlbfgs.minlbfgsresults(ref state, ref wbase, ref internalrep);
239	for(i_=0; i_<=wcount-1;i_++)
240	{
241	network.weights[i_] = wbase[i_];
242	}
243
244	//
245	// Second stage of the hybrid algorithm: LM
246	//
247	// Initialize H with identity matrix,
248	// G with gradient,
249	// E with regularized error.
250	//
251	mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h);
252	v = 0.0;
253	for(i_=0; i_<=wcount-1;i_++)
254	{
255	v += network.weights[i_]*network.weights[i_];
256	}
257	e = e+0.5decayv;
258	for(i_=0; i_<=wcount-1;i_++)
259	{
260	g[i_] = g[i_] + decay*network.weights[i_];
261	}
262	for(k=0; k<=wcount-1; k++)
263	{
264	h[k,k] = h[k,k]+decay;
265	}
266	rep.nhess = rep.nhess+1;
267	lambda = 0.001;
268	nu = 2;
269	while( true )
270	{
271
272	//
273	// 1. HMod = H+lambda*I
274	// 2. Try to solve (H+LambdaI)dx = -g.
275	// Increase lambda if left part is not positive definite.
276	//
277	for(i=0; i<=wcount-1; i++)
278	{
279	for(i_=0; i_<=wcount-1;i_++)
280	{
281	hmod[i,i_] = h[i,i_];
282	}
283	hmod[i,i] = hmod[i,i]+lambda;
284	}
285	spd = trfac.spdmatrixcholesky(ref hmod, wcount, true);
286	rep.ncholesky = rep.ncholesky+1;
287	if( !spd )
288	{
289	lambda = lambdalambdaupnu;
290	nu = nu*2;
291	continue;
292	}
293	densesolver.spdmatrixcholeskysolve(ref hmod, wcount, true, ref g, ref solverinfo, ref solverrep, ref wdir);
294	if( solverinfo<0 )
295	{
296	lambda = lambdalambdaupnu;
297	nu = nu*2;
298	continue;
299	}
300	for(i_=0; i_<=wcount-1;i_++)
301	{
302	wdir[i_] = -1*wdir[i_];
303	}
304
305	//
306	// Lambda found.
307	// 1. Save old w in WBase
308	// 1. Test some stopping criterions
309	// 2. If error(w+wdir)>error(w), increase lambda
310	//
311	for(i_=0; i_<=wcount-1;i_++)
312	{
313	network.weights[i_] = network.weights[i_] + wdir[i_];
314	}
315	xnorm2 = 0.0;
316	for(i_=0; i_<=wcount-1;i_++)
317	{
318	xnorm2 += network.weights[i_]*network.weights[i_];
319	}
320	stepnorm = 0.0;
321	for(i_=0; i_<=wcount-1;i_++)
322	{
323	stepnorm += wdir[i_]*wdir[i_];
324	}
325	stepnorm = Math.Sqrt(stepnorm);
326	enew = mlpbase.mlperror(ref network, ref xy, npoints)+0.5decayxnorm2;
327	if( (double)(stepnorm)<(double)(lmsteptol*(1+Math.Sqrt(xnorm2))) )
328	{
329	break;
330	}
331	if( (double)(enew)>(double)(e) )
332	{
333	lambda = lambdalambdaupnu;
334	nu = nu*2;
335	continue;
336	}
337
338	//
339	// Optimize using inv(cholesky(H)) as preconditioner
340	//
341	matinv.rmatrixtrinverse(ref hmod, wcount, true, false, ref invinfo, ref invrep);
342	if( invinfo<=0 )
343	{
344
345	//
346	// if matrix can't be inverted then exit with errors
347	// TODO: make WCount steps in direction suggested by HMod
348	//
349	info = -9;
350	return;
351	}
352	for(i_=0; i_<=wcount-1;i_++)
353	{
354	wbase[i_] = network.weights[i_];
355	}
356	for(i=0; i<=wcount-1; i++)
357	{
358	wt[i] = 0;
359	}
360	minlbfgs.minlbfgscreatex(wcount, wcount, ref wt, 1, ref state);
361	minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, 5);
362	while( minlbfgs.minlbfgsiteration(ref state) )
363	{
364
365	//
366	// gradient
367	//
368	for(i=0; i<=wcount-1; i++)
369	{
370	v = 0.0;
371	for(i_=i; i_<=wcount-1;i_++)
372	{
373	v += state.x[i_]*hmod[i,i_];
374	}
375	network.weights[i] = wbase[i]+v;
376	}
377	mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref g);
378	for(i=0; i<=wcount-1; i++)
379	{
380	state.g[i] = 0;
381	}
382	for(i=0; i<=wcount-1; i++)
383	{
384	v = g[i];
385	for(i_=i; i_<=wcount-1;i_++)
386	{
387	state.g[i_] = state.g[i_] + v*hmod[i,i_];
388	}
389	}
390
391	//
392	// weight decay
393	// grad(x'x) = A'(x0+A*t)
394	//
395	v = 0.0;
396	for(i_=0; i_<=wcount-1;i_++)
397	{
398	v += network.weights[i_]*network.weights[i_];
399	}
400	state.f = state.f+0.5decayv;
401	for(i=0; i<=wcount-1; i++)
402	{
403	v = decay*network.weights[i];
404	for(i_=i; i_<=wcount-1;i_++)
405	{
406	state.g[i_] = state.g[i_] + v*hmod[i,i_];
407	}
408	}
409
410	//
411	// next iteration
412	//
413	rep.ngrad = rep.ngrad+1;
414	}
415	minlbfgs.minlbfgsresults(ref state, ref wt, ref internalrep);
416
417	//
418	// Accept new position.
419	// Calculate Hessian
420	//
421	for(i=0; i<=wcount-1; i++)
422	{
423	v = 0.0;
424	for(i_=i; i_<=wcount-1;i_++)
425	{
426	v += wt[i_]*hmod[i,i_];
427	}
428	network.weights[i] = wbase[i]+v;
429	}
430	mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h);
431	v = 0.0;
432	for(i_=0; i_<=wcount-1;i_++)
433	{
434	v += network.weights[i_]*network.weights[i_];
435	}
436	e = e+0.5decayv;
437	for(i_=0; i_<=wcount-1;i_++)
438	{
439	g[i_] = g[i_] + decay*network.weights[i_];
440	}
441	for(k=0; k<=wcount-1; k++)
442	{
443	h[k,k] = h[k,k]+decay;
444	}
445	rep.nhess = rep.nhess+1;
446
447	//
448	// Update lambda
449	//
450	lambda = lambda*lambdadown;
451	nu = 2;
452	}
453
454	//
455	// update WBest
456	//
457	v = 0.0;
458	for(i_=0; i_<=wcount-1;i_++)
459	{
460	v += network.weights[i_]*network.weights[i_];
461	}
462	e = 0.5decayv+mlpbase.mlperror(ref network, ref xy, npoints);
463	if( (double)(e)<(double)(ebest) )
464	{
465	ebest = e;
466	for(i_=0; i_<=wcount-1;i_++)
467	{
468	wbest[i_] = network.weights[i_];
469	}
470	}
471	}
472
473	//
474	// copy WBest to output
475	//
476	for(i_=0; i_<=wcount-1;i_++)
477	{
478	network.weights[i_] = wbest[i_];
479	}
480	}
481
482
483	/*************************************************************************
484	Neural network training using L-BFGS algorithm with regularization.
485	Subroutine trains neural network with restarts from random positions.
486	Algorithm is well suited for problems of any dimensionality (memory
487	requirements and step complexity are linear by weights number).
488
489	INPUT PARAMETERS:
490	Network - neural network with initialized geometry
491	XY - training set
492	NPoints - training set size
493	Decay - weight decay constant, >=0.001
494	Decay term 'Decay*\|\|Weights\|\|^2' is added to error
495	function.
496	If you don't know what Decay to choose, use 0.001.
497	Restarts - number of restarts from random position, >0.
498	If you don't know what Restarts to choose, use 2.
499	WStep - stopping criterion. Algorithm stops if step size is
500	less than WStep. Recommended value - 0.01. Zero step
501	size means stopping after MaxIts iterations.
502	MaxIts - stopping criterion. Algorithm stops after MaxIts
503	iterations (NOT gradient calculations). Zero MaxIts
504	means stopping when step is sufficiently small.
505
506	OUTPUT PARAMETERS:
507	Network - trained neural network.
508	Info - return code:
509	* -8, if both WStep=0 and MaxIts=0
510	* -2, if there is a point with class number
511	outside of [0..NOut-1].
512	* -1, if wrong parameters specified
513	(NPoints<0, Restarts<1).
514	* 2, if task has been solved.
515	Rep - training report
516
517	-- ALGLIB --
518	Copyright 09.12.2007 by Bochkanov Sergey
519	*************************************************************************/
520	public static void mlptrainlbfgs(ref mlpbase.multilayerperceptron network,
521	ref double[,] xy,
522	int npoints,
523	double decay,
524	int restarts,
525	double wstep,
526	int maxits,
527	ref int info,
528	ref mlpreport rep)
529	{
530	int i = 0;
531	int pass = 0;
532	int nin = 0;
533	int nout = 0;
534	int wcount = 0;
535	double[] w = new double[0];
536	double[] wbest = new double[0];
537	double e = 0;
538	double v = 0;
539	double ebest = 0;
540	minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport();
541	minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate();
542	int i_ = 0;
543
544
545	//
546	// Test inputs, parse flags, read network geometry
547	//
548	if( (double)(wstep)==(double)(0) & maxits==0 )
549	{
550	info = -8;
551	return;
552	}
553	if( npoints<=0 \| restarts<1 \| (double)(wstep)<(double)(0) \| maxits<0 )
554	{
555	info = -1;
556	return;
557	}
558	mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
559	if( mlpbase.mlpissoftmax(ref network) )
560	{
561	for(i=0; i<=npoints-1; i++)
562	{
563	if( (int)Math.Round(xy[i,nin])<0 \| (int)Math.Round(xy[i,nin])>=nout )
564	{
565	info = -2;
566	return;
567	}
568	}
569	}
570	decay = Math.Max(decay, mindecay);
571	info = 2;
572
573	//
574	// Prepare
575	//
576	mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints);
577	w = new double[wcount-1+1];
578	wbest = new double[wcount-1+1];
579	ebest = AP.Math.MaxRealNumber;
580
581	//
582	// Multiple starts
583	//
584	rep.ncholesky = 0;
585	rep.nhess = 0;
586	rep.ngrad = 0;
587	for(pass=1; pass<=restarts; pass++)
588	{
589
590	//
591	// Process
592	//
593	mlpbase.mlprandomize(ref network);
594	for(i_=0; i_<=wcount-1;i_++)
595	{
596	w[i_] = network.weights[i_];
597	}
598	minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 10), ref w, ref state);
599	minlbfgs.minlbfgssetcond(ref state, 0.0, 0.0, wstep, maxits);
600	while( minlbfgs.minlbfgsiteration(ref state) )
601	{
602	for(i_=0; i_<=wcount-1;i_++)
603	{
604	network.weights[i_] = state.x[i_];
605	}
606	mlpbase.mlpgradnbatch(ref network, ref xy, npoints, ref state.f, ref state.g);
607	v = 0.0;
608	for(i_=0; i_<=wcount-1;i_++)
609	{
610	v += network.weights[i_]*network.weights[i_];
611	}
612	state.f = state.f+0.5decayv;
613	for(i_=0; i_<=wcount-1;i_++)
614	{
615	state.g[i_] = state.g[i_] + decay*network.weights[i_];
616	}
617	rep.ngrad = rep.ngrad+1;
618	}
619	minlbfgs.minlbfgsresults(ref state, ref w, ref internalrep);
620	for(i_=0; i_<=wcount-1;i_++)
621	{
622	network.weights[i_] = w[i_];
623	}
624
625	//
626	// Compare with best
627	//
628	v = 0.0;
629	for(i_=0; i_<=wcount-1;i_++)
630	{
631	v += network.weights[i_]*network.weights[i_];
632	}
633	e = mlpbase.mlperrorn(ref network, ref xy, npoints)+0.5decayv;
634	if( (double)(e)<(double)(ebest) )
635	{
636	for(i_=0; i_<=wcount-1;i_++)
637	{
638	wbest[i_] = network.weights[i_];
639	}
640	ebest = e;
641	}
642	}
643
644	//
645	// The best network
646	//
647	for(i_=0; i_<=wcount-1;i_++)
648	{
649	network.weights[i_] = wbest[i_];
650	}
651	}
652
653
654	/*************************************************************************
655	Neural network training using early stopping (base algorithm - L-BFGS with
656	regularization).
657
658	INPUT PARAMETERS:
659	Network - neural network with initialized geometry
660	TrnXY - training set
661	TrnSize - training set size
662	ValXY - validation set
663	ValSize - validation set size
664	Decay - weight decay constant, >=0.001
665	Decay term 'Decay*\|\|Weights\|\|^2' is added to error
666	function.
667	If you don't know what Decay to choose, use 0.001.
668	Restarts - number of restarts from random position, >0.
669	If you don't know what Restarts to choose, use 2.
670
671	OUTPUT PARAMETERS:
672	Network - trained neural network.
673	Info - return code:
674	* -2, if there is a point with class number
675	outside of [0..NOut-1].
676	* -1, if wrong parameters specified
677	(NPoints<0, Restarts<1, ...).
678	* 2, task has been solved, stopping criterion met -
679	sufficiently small step size. Not expected (we
680	use EARLY stopping) but possible and not an
681	error.
682	* 6, task has been solved, stopping criterion met -
683	increasing of validation set error.
684	Rep - training report
685
686	NOTE:
687
688	Algorithm stops if validation set error increases for a long enough or
689	step size is small enought (there are task where validation set may
690	decrease for eternity). In any case solution returned corresponds to the
691	minimum of validation set error.
692
693	-- ALGLIB --
694	Copyright 10.03.2009 by Bochkanov Sergey
695	*************************************************************************/
696	public static void mlptraines(ref mlpbase.multilayerperceptron network,
697	ref double[,] trnxy,
698	int trnsize,
699	ref double[,] valxy,
700	int valsize,
701	double decay,
702	int restarts,
703	ref int info,
704	ref mlpreport rep)
705	{
706	int i = 0;
707	int pass = 0;
708	int nin = 0;
709	int nout = 0;
710	int wcount = 0;
711	double[] w = new double[0];
712	double[] wbest = new double[0];
713	double e = 0;
714	double v = 0;
715	double ebest = 0;
716	double[] wfinal = new double[0];
717	double efinal = 0;
718	int itbest = 0;
719	minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport();
720	minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate();
721	double wstep = 0;
722	int i_ = 0;
723
724	wstep = 0.001;
725
726	//
727	// Test inputs, parse flags, read network geometry
728	//
729	if( trnsize<=0 \| valsize<=0 \| restarts<1 \| (double)(decay)<(double)(0) )
730	{
731	info = -1;
732	return;
733	}
734	mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
735	if( mlpbase.mlpissoftmax(ref network) )
736	{
737	for(i=0; i<=trnsize-1; i++)
738	{
739	if( (int)Math.Round(trnxy[i,nin])<0 \| (int)Math.Round(trnxy[i,nin])>=nout )
740	{
741	info = -2;
742	return;
743	}
744	}
745	for(i=0; i<=valsize-1; i++)
746	{
747	if( (int)Math.Round(valxy[i,nin])<0 \| (int)Math.Round(valxy[i,nin])>=nout )
748	{
749	info = -2;
750	return;
751	}
752	}
753	}
754	info = 2;
755
756	//
757	// Prepare
758	//
759	mlpbase.mlpinitpreprocessor(ref network, ref trnxy, trnsize);
760	w = new double[wcount-1+1];
761	wbest = new double[wcount-1+1];
762	wfinal = new double[wcount-1+1];
763	efinal = AP.Math.MaxRealNumber;
764	for(i=0; i<=wcount-1; i++)
765	{
766	wfinal[i] = 0;
767	}
768
769	//
770	// Multiple starts
771	//
772	rep.ncholesky = 0;
773	rep.nhess = 0;
774	rep.ngrad = 0;
775	for(pass=1; pass<=restarts; pass++)
776	{
777
778	//
779	// Process
780	//
781	mlpbase.mlprandomize(ref network);
782	ebest = mlpbase.mlperror(ref network, ref valxy, valsize);
783	for(i_=0; i_<=wcount-1;i_++)
784	{
785	wbest[i_] = network.weights[i_];
786	}
787	itbest = 0;
788	for(i_=0; i_<=wcount-1;i_++)
789	{
790	w[i_] = network.weights[i_];
791	}
792	minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 10), ref w, ref state);
793	minlbfgs.minlbfgssetcond(ref state, 0.0, 0.0, wstep, 0);
794	minlbfgs.minlbfgssetxrep(ref state, true);
795	while( minlbfgs.minlbfgsiteration(ref state) )
796	{
797
798	//
799	// Calculate gradient
800	//
801	for(i_=0; i_<=wcount-1;i_++)
802	{
803	network.weights[i_] = state.x[i_];
804	}
805	mlpbase.mlpgradnbatch(ref network, ref trnxy, trnsize, ref state.f, ref state.g);
806	v = 0.0;
807	for(i_=0; i_<=wcount-1;i_++)
808	{
809	v += network.weights[i_]*network.weights[i_];
810	}
811	state.f = state.f+0.5decayv;
812	for(i_=0; i_<=wcount-1;i_++)
813	{
814	state.g[i_] = state.g[i_] + decay*network.weights[i_];
815	}
816	rep.ngrad = rep.ngrad+1;
817
818	//
819	// Validation set
820	//
821	if( state.xupdated )
822	{
823	for(i_=0; i_<=wcount-1;i_++)
824	{
825	network.weights[i_] = w[i_];
826	}
827	e = mlpbase.mlperror(ref network, ref valxy, valsize);
828	if( (double)(e)<(double)(ebest) )
829	{
830	ebest = e;
831	for(i_=0; i_<=wcount-1;i_++)
832	{
833	wbest[i_] = network.weights[i_];
834	}
835	itbest = internalrep.iterationscount;
836	}
837	if( internalrep.iterationscount>30 & (double)(internalrep.iterationscount)>(double)(1.5*itbest) )
838	{
839	info = 6;
840	break;
841	}
842	}
843	}
844	minlbfgs.minlbfgsresults(ref state, ref w, ref internalrep);
845
846	//
847	// Compare with final answer
848	//
849	if( (double)(ebest)<(double)(efinal) )
850	{
851	for(i_=0; i_<=wcount-1;i_++)
852	{
853	wfinal[i_] = wbest[i_];
854	}
855	efinal = ebest;
856	}
857	}
858
859	//
860	// The best network
861	//
862	for(i_=0; i_<=wcount-1;i_++)
863	{
864	network.weights[i_] = wfinal[i_];
865	}
866	}
867
868
869	/*************************************************************************
870	Cross-validation estimate of generalization error.
871
872	Base algorithm - L-BFGS.
873
874	INPUT PARAMETERS:
875	Network - neural network with initialized geometry. Network is
876	not changed during cross-validation - it is used only
877	as a representative of its architecture.
878	XY - training set.
879	SSize - training set size
880	Decay - weight decay, same as in MLPTrainLBFGS
881	Restarts - number of restarts, >0.
882	restarts are counted for each partition separately, so
883	total number of restarts will be Restarts*FoldsCount.
884	WStep - stopping criterion, same as in MLPTrainLBFGS
885	MaxIts - stopping criterion, same as in MLPTrainLBFGS
886	FoldsCount - number of folds in k-fold cross-validation,
887	2<=FoldsCount<=SSize.
888	recommended value: 10.
889
890	OUTPUT PARAMETERS:
891	Info - return code, same as in MLPTrainLBFGS
892	Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
893	CVRep - generalization error estimates
894
895	-- ALGLIB --
896	Copyright 09.12.2007 by Bochkanov Sergey
897	*************************************************************************/
898	public static void mlpkfoldcvlbfgs(ref mlpbase.multilayerperceptron network,
899	ref double[,] xy,
900	int npoints,
901	double decay,
902	int restarts,
903	double wstep,
904	int maxits,
905	int foldscount,
906	ref int info,
907	ref mlpreport rep,
908	ref mlpcvreport cvrep)
909	{
910	mlpkfoldcvgeneral(ref network, ref xy, npoints, decay, restarts, foldscount, false, wstep, maxits, ref info, ref rep, ref cvrep);
911	}
912
913
914	/*************************************************************************
915	Cross-validation estimate of generalization error.
916
917	Base algorithm - Levenberg-Marquardt.
918
919	INPUT PARAMETERS:
920	Network - neural network with initialized geometry. Network is
921	not changed during cross-validation - it is used only
922	as a representative of its architecture.
923	XY - training set.
924	SSize - training set size
925	Decay - weight decay, same as in MLPTrainLBFGS
926	Restarts - number of restarts, >0.
927	restarts are counted for each partition separately, so
928	total number of restarts will be Restarts*FoldsCount.
929	FoldsCount - number of folds in k-fold cross-validation,
930	2<=FoldsCount<=SSize.
931	recommended value: 10.
932
933	OUTPUT PARAMETERS:
934	Info - return code, same as in MLPTrainLBFGS
935	Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
936	CVRep - generalization error estimates
937
938	-- ALGLIB --
939	Copyright 09.12.2007 by Bochkanov Sergey
940	*************************************************************************/
941	public static void mlpkfoldcvlm(ref mlpbase.multilayerperceptron network,
942	ref double[,] xy,
943	int npoints,
944	double decay,
945	int restarts,
946	int foldscount,
947	ref int info,
948	ref mlpreport rep,
949	ref mlpcvreport cvrep)
950	{
951	mlpkfoldcvgeneral(ref network, ref xy, npoints, decay, restarts, foldscount, true, 0.0, 0, ref info, ref rep, ref cvrep);
952	}
953
954
955	/*************************************************************************
956	Internal cross-validation subroutine
957	*************************************************************************/
958	private static void mlpkfoldcvgeneral(ref mlpbase.multilayerperceptron n,
959	ref double[,] xy,
960	int npoints,
961	double decay,
962	int restarts,
963	int foldscount,
964	bool lmalgorithm,
965	double wstep,
966	int maxits,
967	ref int info,
968	ref mlpreport rep,
969	ref mlpcvreport cvrep)
970	{
971	int i = 0;
972	int fold = 0;
973	int j = 0;
974	int k = 0;
975	mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron();
976	int nin = 0;
977	int nout = 0;
978	int rowlen = 0;
979	int wcount = 0;
980	int nclasses = 0;
981	int tssize = 0;
982	int cvssize = 0;
983	double[,] cvset = new double[0,0];
984	double[,] testset = new double[0,0];
985	int[] folds = new int[0];
986	int relcnt = 0;
987	mlpreport internalrep = new mlpreport();
988	double[] x = new double[0];
989	double[] y = new double[0];
990	int i_ = 0;
991
992
993	//
994	// Read network geometry, test parameters
995	//
996	mlpbase.mlpproperties(ref n, ref nin, ref nout, ref wcount);
997	if( mlpbase.mlpissoftmax(ref n) )
998	{
999	nclasses = nout;
1000	rowlen = nin+1;
1001	}
1002	else
1003	{
1004	nclasses = -nout;
1005	rowlen = nin+nout;
1006	}
1007	if( npoints<=0 \| foldscount<2 \| foldscount>npoints )
1008	{
1009	info = -1;
1010	return;
1011	}
1012	mlpbase.mlpcopy(ref n, ref network);
1013
1014	//
1015	// K-fold out cross-validation.
1016	// First, estimate generalization error
1017	//
1018	testset = new double[npoints-1+1, rowlen-1+1];
1019	cvset = new double[npoints-1+1, rowlen-1+1];
1020	x = new double[nin-1+1];
1021	y = new double[nout-1+1];
1022	mlpkfoldsplit(ref xy, npoints, nclasses, foldscount, false, ref folds);
1023	cvrep.relclserror = 0;
1024	cvrep.avgce = 0;
1025	cvrep.rmserror = 0;
1026	cvrep.avgerror = 0;
1027	cvrep.avgrelerror = 0;
1028	rep.ngrad = 0;
1029	rep.nhess = 0;
1030	rep.ncholesky = 0;
1031	relcnt = 0;
1032	for(fold=0; fold<=foldscount-1; fold++)
1033	{
1034
1035	//
1036	// Separate set
1037	//
1038	tssize = 0;
1039	cvssize = 0;
1040	for(i=0; i<=npoints-1; i++)
1041	{
1042	if( folds[i]==fold )
1043	{
1044	for(i_=0; i_<=rowlen-1;i_++)
1045	{
1046	testset[tssize,i_] = xy[i,i_];
1047	}
1048	tssize = tssize+1;
1049	}
1050	else
1051	{
1052	for(i_=0; i_<=rowlen-1;i_++)
1053	{
1054	cvset[cvssize,i_] = xy[i,i_];
1055	}
1056	cvssize = cvssize+1;
1057	}
1058	}
1059
1060	//
1061	// Train on CV training set
1062	//
1063	if( lmalgorithm )
1064	{
1065	mlptrainlm(ref network, ref cvset, cvssize, decay, restarts, ref info, ref internalrep);
1066	}
1067	else
1068	{
1069	mlptrainlbfgs(ref network, ref cvset, cvssize, decay, restarts, wstep, maxits, ref info, ref internalrep);
1070	}
1071	if( info<0 )
1072	{
1073	cvrep.relclserror = 0;
1074	cvrep.avgce = 0;
1075	cvrep.rmserror = 0;
1076	cvrep.avgerror = 0;
1077	cvrep.avgrelerror = 0;
1078	return;
1079	}
1080	rep.ngrad = rep.ngrad+internalrep.ngrad;
1081	rep.nhess = rep.nhess+internalrep.nhess;
1082	rep.ncholesky = rep.ncholesky+internalrep.ncholesky;
1083
1084	//
1085	// Estimate error using CV test set
1086	//
1087	if( mlpbase.mlpissoftmax(ref network) )
1088	{
1089
1090	//
1091	// classification-only code
1092	//
1093	cvrep.relclserror = cvrep.relclserror+mlpbase.mlpclserror(ref network, ref testset, tssize);
1094	cvrep.avgce = cvrep.avgce+mlpbase.mlperrorn(ref network, ref testset, tssize);
1095	}
1096	for(i=0; i<=tssize-1; i++)
1097	{
1098	for(i_=0; i_<=nin-1;i_++)
1099	{
1100	x[i_] = testset[i,i_];
1101	}
1102	mlpbase.mlpprocess(ref network, ref x, ref y);
1103	if( mlpbase.mlpissoftmax(ref network) )
1104	{
1105
1106	//
1107	// Classification-specific code
1108	//
1109	k = (int)Math.Round(testset[i,nin]);
1110	for(j=0; j<=nout-1; j++)
1111	{
1112	if( j==k )
1113	{
1114	cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]-1);
1115	cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-1);
1116	cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs(y[j]-1);
1117	relcnt = relcnt+1;
1118	}
1119	else
1120	{
1121	cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]);
1122	cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]);
1123	}
1124	}
1125	}
1126	else
1127	{
1128
1129	//
1130	// Regression-specific code
1131	//
1132	for(j=0; j<=nout-1; j++)
1133	{
1134	cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]-testset[i,nin+j]);
1135	cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-testset[i,nin+j]);
1136	if( (double)(testset[i,nin+j])!=(double)(0) )
1137	{
1138	cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs((y[j]-testset[i,nin+j])/testset[i,nin+j]);
1139	relcnt = relcnt+1;
1140	}
1141	}
1142	}
1143	}
1144	}
1145	if( mlpbase.mlpissoftmax(ref network) )
1146	{
1147	cvrep.relclserror = cvrep.relclserror/npoints;
1148	cvrep.avgce = cvrep.avgce/(Math.Log(2)*npoints);
1149	}
1150	cvrep.rmserror = Math.Sqrt(cvrep.rmserror/(npoints*nout));
1151	cvrep.avgerror = cvrep.avgerror/(npoints*nout);
1152	cvrep.avgrelerror = cvrep.avgrelerror/relcnt;
1153	info = 1;
1154	}
1155
1156
1157	/*************************************************************************
1158	Subroutine prepares K-fold split of the training set.
1159
1160	NOTES:
1161	"NClasses>0" means that we have classification task.
1162	"NClasses<0" means regression task with -NClasses real outputs.
1163	*************************************************************************/
1164	private static void mlpkfoldsplit(ref double[,] xy,
1165	int npoints,
1166	int nclasses,
1167	int foldscount,
1168	bool stratifiedsplits,
1169	ref int[] folds)
1170	{
1171	int i = 0;
1172	int j = 0;
1173	int k = 0;
1174
1175
1176	//
1177	// test parameters
1178	//
1179	System.Diagnostics.Debug.Assert(npoints>0, "MLPKFoldSplit: wrong NPoints!");
1180	System.Diagnostics.Debug.Assert(nclasses>1 \| nclasses<0, "MLPKFoldSplit: wrong NClasses!");
1181	System.Diagnostics.Debug.Assert(foldscount>=2 & foldscount<=npoints, "MLPKFoldSplit: wrong FoldsCount!");
1182	System.Diagnostics.Debug.Assert(!stratifiedsplits, "MLPKFoldSplit: stratified splits are not supported!");
1183
1184	//
1185	// Folds
1186	//
1187	folds = new int[npoints-1+1];
1188	for(i=0; i<=npoints-1; i++)
1189	{
1190	folds[i] = i*foldscount/npoints;
1191	}
1192	for(i=0; i<=npoints-2; i++)
1193	{
1194	j = i+AP.Math.RandomInteger(npoints-i);
1195	if( j!=i )
1196	{
1197	k = folds[i];
1198	folds[i] = folds[j];
1199	folds[j] = k;
1200	}
1201	}
1202	}
1203	}
1204	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences