Context Navigation

source: trunk/sources/ALGLIB/mlptrain.cs @ 2575

Visit:

Last change on this file since 2575 was 2563, checked in by gkronber, 15 years ago
Updated ALGLIB to latest version. #751 (Plugin for for data-modeling with ANN (integrated into CEDMA))
File size: 45.6 KB

Rev	Line
[2563]	1	/*************************************************************************
	2	Copyright (c) 2007-2008, Sergey Bochkanov (ALGLIB project).
	3
	4	>>> SOURCE LICENSE >>>
	5	This program is free software; you can redistribute it and/or modify
	6	it under the terms of the GNU General Public License as published by
	7	the Free Software Foundation (www.fsf.org); either version 2 of the
	8	License, or (at your option) any later version.
	9
	10	This program is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	GNU General Public License for more details.
	14
	15	A copy of the GNU General Public License is available at
	16	http://www.fsf.org/licensing/licenses
	17
	18	>>> END OF LICENSE >>>
	19	*************************************************************************/
	20
	21	using System;
	22
	23	namespace alglib
	24	{
	25	public class mlptrain
	26	{
	27	/*************************************************************************
	28	Training report:
	29	* NGrad - number of gradient calculations
	30	* NHess - number of Hessian calculations
	31	* NCholesky - number of Cholesky decompositions
	32	*************************************************************************/
	33	public struct mlpreport
	34	{
	35	public int ngrad;
	36	public int nhess;
	37	public int ncholesky;
	38	};
	39
	40
	41	/*************************************************************************
	42	Cross-validation estimates of generalization error
	43	*************************************************************************/
	44	public struct mlpcvreport
	45	{
	46	public double relclserror;
	47	public double avgce;
	48	public double rmserror;
	49	public double avgerror;
	50	public double avgrelerror;
	51	};
	52
	53
	54
	55
	56	public const double mindecay = 0.001;
	57
	58
	59	/*************************************************************************
	60	Neural network training using modified Levenberg-Marquardt with exact
	61	Hessian calculation and regularization. Subroutine trains neural network
	62	with restarts from random positions. Algorithm is well suited for small
	63	and medium scale problems (hundreds of weights).
	64
	65	INPUT PARAMETERS:
	66	Network - neural network with initialized geometry
	67	XY - training set
	68	NPoints - training set size
	69	Decay - weight decay constant, >=0.001
	70	Decay term 'Decay*\|\|Weights\|\|^2' is added to error
	71	function.
	72	If you don't know what Decay to choose, use 0.001.
	73	Restarts - number of restarts from random position, >0.
	74	If you don't know what Restarts to choose, use 2.
	75
	76	OUTPUT PARAMETERS:
	77	Network - trained neural network.
	78	Info - return code:
	79	* -9, if internal matrix inverse subroutine failed
	80	* -2, if there is a point with class number
	81	outside of [0..NOut-1].
	82	* -1, if wrong parameters specified
	83	(NPoints<0, Restarts<1).
	84	* 2, if task has been solved.
	85	Rep - training report
	86
	87	-- ALGLIB --
	88	Copyright 10.03.2009 by Bochkanov Sergey
	89	*************************************************************************/
	90	public static void mlptrainlm(ref mlpbase.multilayerperceptron network,
	91	ref double[,] xy,
	92	int npoints,
	93	double decay,
	94	int restarts,
	95	ref int info,
	96	ref mlpreport rep)
	97	{
	98	int nin = 0;
	99	int nout = 0;
	100	int wcount = 0;
	101	double lmftol = 0;
	102	double lmsteptol = 0;
	103	int i = 0;
	104	int j = 0;
	105	int k = 0;
	106	int mx = 0;
	107	double v = 0;
	108	double e = 0;
	109	double enew = 0;
	110	double xnorm2 = 0;
	111	double stepnorm = 0;
	112	double[] g = new double[0];
	113	double[] d = new double[0];
	114	double[,] h = new double[0,0];
	115	double[,] hmod = new double[0,0];
	116	double[,] z = new double[0,0];
	117	bool spd = new bool();
	118	double nu = 0;
	119	double lambda = 0;
	120	double lambdaup = 0;
	121	double lambdadown = 0;
	122	int cvcnt = 0;
	123	double cvrelcnt = 0;
	124	lbfgs.lbfgsreport internalrep = new lbfgs.lbfgsreport();
	125	lbfgs.lbfgsstate state = new lbfgs.lbfgsstate();
	126	double[] x = new double[0];
	127	double[] y = new double[0];
	128	double[] wbase = new double[0];
	129	double wstep = 0;
	130	double[] wdir = new double[0];
	131	double[] wt = new double[0];
	132	double[] wx = new double[0];
	133	int pass = 0;
	134	double[] wbest = new double[0];
	135	double ebest = 0;
	136	int i_ = 0;
	137
	138	mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
	139	lambdaup = 10;
	140	lambdadown = 0.3;
	141	lmftol = 0.001;
	142	lmsteptol = 0.001;
	143
	144	//
	145	// Test for inputs
	146	//
	147	if( npoints<=0 \| restarts<1 )
	148	{
	149	info = -1;
	150	return;
	151	}
	152	if( mlpbase.mlpissoftmax(ref network) )
	153	{
	154	for(i=0; i<=npoints-1; i++)
	155	{
	156	if( (int)Math.Round(xy[i,nin])<0 \| (int)Math.Round(xy[i,nin])>=nout )
	157	{
	158	info = -2;
	159	return;
	160	}
	161	}
	162	}
	163	decay = Math.Max(decay, mindecay);
	164	info = 2;
	165
	166	//
	167	// Initialize data
	168	//
	169	rep.ngrad = 0;
	170	rep.nhess = 0;
	171	rep.ncholesky = 0;
	172
	173	//
	174	// General case.
	175	// Prepare task and network. Allocate space.
	176	//
	177	mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints);
	178	g = new double[wcount-1+1];
	179	h = new double[wcount-1+1, wcount-1+1];
	180	hmod = new double[wcount-1+1, wcount-1+1];
	181	wbase = new double[wcount-1+1];
	182	wdir = new double[wcount-1+1];
	183	wbest = new double[wcount-1+1];
	184	wt = new double[wcount-1+1];
	185	wx = new double[wcount-1+1];
	186	ebest = AP.Math.MaxRealNumber;
	187
	188	//
	189	// Multiple passes
	190	//
	191	for(pass=1; pass<=restarts; pass++)
	192	{
	193
	194	//
	195	// Initialize weights
	196	//
	197	mlpbase.mlprandomize(ref network);
	198
	199	//
	200	// First stage of the hybrid algorithm: LBFGS
	201	//
	202	for(i_=0; i_<=wcount-1;i_++)
	203	{
	204	wbase[i_] = network.weights[i_];
	205	}
	206	lbfgs.minlbfgs(wcount, Math.Min(wcount, 5), ref wbase, 0.0, 0.0, 0.0, Math.Max(25, wcount), 0, ref state);
	207	while( lbfgs.minlbfgsiteration(ref state) )
	208	{
	209
	210	//
	211	// gradient
	212	//
	213	for(i_=0; i_<=wcount-1;i_++)
	214	{
	215	network.weights[i_] = state.x[i_];
	216	}
	217	mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref state.g);
	218
	219	//
	220	// weight decay
	221	//
	222	v = 0.0;
	223	for(i_=0; i_<=wcount-1;i_++)
	224	{
	225	v += network.weights[i_]*network.weights[i_];
	226	}
	227	state.f = state.f+0.5decayv;
	228	for(i_=0; i_<=wcount-1;i_++)
	229	{
	230	state.g[i_] = state.g[i_] + decay*network.weights[i_];
	231	}
	232
	233	//
	234	// next iteration
	235	//
	236	rep.ngrad = rep.ngrad+1;
	237	}
	238	lbfgs.minlbfgsresults(ref state, ref wbase, ref internalrep);
	239	for(i_=0; i_<=wcount-1;i_++)
	240	{
	241	network.weights[i_] = wbase[i_];
	242	}
	243
	244	//
	245	// Second stage of the hybrid algorithm: LM
	246	//
	247	// Initialize H with identity matrix,
	248	// G with gradient,
	249	// E with regularized error.
	250	//
	251	mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h);
	252	v = 0.0;
	253	for(i_=0; i_<=wcount-1;i_++)
	254	{
	255	v += network.weights[i_]*network.weights[i_];
	256	}
	257	e = e+0.5decayv;
	258	for(i_=0; i_<=wcount-1;i_++)
	259	{
	260	g[i_] = g[i_] + decay*network.weights[i_];
	261	}
	262	for(k=0; k<=wcount-1; k++)
	263	{
	264	h[k,k] = h[k,k]+decay;
	265	}
	266	rep.nhess = rep.nhess+1;
	267	lambda = 0.001;
	268	nu = 2;
	269	while( true )
	270	{
	271
	272	//
	273	// 1. HMod = H+lambda*I
	274	// 2. Try to solve (H+LambdaI)dx = -g.
	275	// Increase lambda if left part is not positive definite.
	276	//
	277	for(i=0; i<=wcount-1; i++)
	278	{
	279	for(i_=0; i_<=wcount-1;i_++)
	280	{
	281	hmod[i,i_] = h[i,i_];
	282	}
	283	hmod[i,i] = hmod[i,i]+lambda;
	284	}
	285	spd = cholesky.spdmatrixcholesky(ref hmod, wcount, true);
	286	rep.ncholesky = rep.ncholesky+1;
	287	if( !spd )
	288	{
	289	lambda = lambdalambdaupnu;
	290	nu = nu*2;
	291	continue;
	292	}
	293	if( !spdsolve.spdmatrixcholeskysolve(ref hmod, g, wcount, true, ref wdir) )
	294	{
	295	lambda = lambdalambdaupnu;
	296	nu = nu*2;
	297	continue;
	298	}
	299	for(i_=0; i_<=wcount-1;i_++)
	300	{
	301	wdir[i_] = -1*wdir[i_];
	302	}
	303
	304	//
	305	// Lambda found.
	306	// 1. Save old w in WBase
	307	// 1. Test some stopping criterions
	308	// 2. If error(w+wdir)>error(w), increase lambda
	309	//
	310	for(i_=0; i_<=wcount-1;i_++)
	311	{
	312	network.weights[i_] = network.weights[i_] + wdir[i_];
	313	}
	314	xnorm2 = 0.0;
	315	for(i_=0; i_<=wcount-1;i_++)
	316	{
	317	xnorm2 += network.weights[i_]*network.weights[i_];
	318	}
	319	stepnorm = 0.0;
	320	for(i_=0; i_<=wcount-1;i_++)
	321	{
	322	stepnorm += wdir[i_]*wdir[i_];
	323	}
	324	stepnorm = Math.Sqrt(stepnorm);
	325	enew = mlpbase.mlperror(ref network, ref xy, npoints)+0.5decayxnorm2;
	326	if( (double)(stepnorm)<(double)(lmsteptol*(1+Math.Sqrt(xnorm2))) )
	327	{
	328	break;
	329	}
	330	if( (double)(enew)>(double)(e) )
	331	{
	332	lambda = lambdalambdaupnu;
	333	nu = nu*2;
	334	continue;
	335	}
	336
	337	//
	338	// Optimize using inv(cholesky(H)) as preconditioner
	339	//
	340	if( !trinverse.rmatrixtrinverse(ref hmod, wcount, true, false) )
	341	{
	342
	343	//
	344	// if matrix can't be inverted then exit with errors
	345	// TODO: make WCount steps in direction suggested by HMod
	346	//
	347	info = -9;
	348	return;
	349	}
	350	for(i_=0; i_<=wcount-1;i_++)
	351	{
	352	wbase[i_] = network.weights[i_];
	353	}
	354	for(i=0; i<=wcount-1; i++)
	355	{
	356	wt[i] = 0;
	357	}
	358	lbfgs.minlbfgs(wcount, wcount, ref wt, 0.0, 0.0, 0.0, 5, 0, ref state);
	359	while( lbfgs.minlbfgsiteration(ref state) )
	360	{
	361
	362	//
	363	// gradient
	364	//
	365	for(i=0; i<=wcount-1; i++)
	366	{
	367	v = 0.0;
	368	for(i_=i; i_<=wcount-1;i_++)
	369	{
	370	v += state.x[i_]*hmod[i,i_];
	371	}
	372	network.weights[i] = wbase[i]+v;
	373	}
	374	mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref g);
	375	for(i=0; i<=wcount-1; i++)
	376	{
	377	state.g[i] = 0;
	378	}
	379	for(i=0; i<=wcount-1; i++)
	380	{
	381	v = g[i];
	382	for(i_=i; i_<=wcount-1;i_++)
	383	{
	384	state.g[i_] = state.g[i_] + v*hmod[i,i_];
	385	}
	386	}
	387
	388	//
	389	// weight decay
	390	// grad(x'x) = A'(x0+A*t)
	391	//
	392	v = 0.0;
	393	for(i_=0; i_<=wcount-1;i_++)
	394	{
	395	v += network.weights[i_]*network.weights[i_];
	396	}
	397	state.f = state.f+0.5decayv;
	398	for(i=0; i<=wcount-1; i++)
	399	{
	400	v = decay*network.weights[i];
	401	for(i_=i; i_<=wcount-1;i_++)
	402	{
	403	state.g[i_] = state.g[i_] + v*hmod[i,i_];
	404	}
	405	}
	406
	407	//
	408	// next iteration
	409	//
	410	rep.ngrad = rep.ngrad+1;
	411	}
	412	lbfgs.minlbfgsresults(ref state, ref wt, ref internalrep);
	413
	414	//
	415	// Accept new position.
	416	// Calculate Hessian
	417	//
	418	for(i=0; i<=wcount-1; i++)
	419	{
	420	v = 0.0;
	421	for(i_=i; i_<=wcount-1;i_++)
	422	{
	423	v += wt[i_]*hmod[i,i_];
	424	}
	425	network.weights[i] = wbase[i]+v;
	426	}
	427	mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h);
	428	v = 0.0;
	429	for(i_=0; i_<=wcount-1;i_++)
	430	{
	431	v += network.weights[i_]*network.weights[i_];
	432	}
	433	e = e+0.5decayv;
	434	for(i_=0; i_<=wcount-1;i_++)
	435	{
	436	g[i_] = g[i_] + decay*network.weights[i_];
	437	}
	438	for(k=0; k<=wcount-1; k++)
	439	{
	440	h[k,k] = h[k,k]+decay;
	441	}
	442	rep.nhess = rep.nhess+1;
	443
	444	//
	445	// Update lambda
	446	//
	447	lambda = lambda*lambdadown;
	448	nu = 2;
	449	}
	450
	451	//
	452	// update WBest
	453	//
	454	v = 0.0;
	455	for(i_=0; i_<=wcount-1;i_++)
	456	{
	457	v += network.weights[i_]*network.weights[i_];
	458	}
	459	e = 0.5decayv+mlpbase.mlperror(ref network, ref xy, npoints);
	460	if( (double)(e)<(double)(ebest) )
	461	{
	462	ebest = e;
	463	for(i_=0; i_<=wcount-1;i_++)
	464	{
	465	wbest[i_] = network.weights[i_];
	466	}
	467	}
	468	}
	469
	470	//
	471	// copy WBest to output
	472	//
	473	for(i_=0; i_<=wcount-1;i_++)
	474	{
	475	network.weights[i_] = wbest[i_];
	476	}
	477	}
	478
	479
	480	/*************************************************************************
	481	Neural network training using L-BFGS algorithm with regularization.
	482	Subroutine trains neural network with restarts from random positions.
	483	Algorithm is well suited for problems of any dimensionality (memory
	484	requirements and step complexity are linear by weights number).
	485
	486	INPUT PARAMETERS:
	487	Network - neural network with initialized geometry
	488	XY - training set
	489	NPoints - training set size
	490	Decay - weight decay constant, >=0.001
	491	Decay term 'Decay*\|\|Weights\|\|^2' is added to error
	492	function.
	493	If you don't know what Decay to choose, use 0.001.
	494	Restarts - number of restarts from random position, >0.
	495	If you don't know what Restarts to choose, use 2.
	496	WStep - stopping criterion. Algorithm stops if step size is
	497	less than WStep. Recommended value - 0.01. Zero step
	498	size means stopping after MaxIts iterations.
	499	MaxIts - stopping criterion. Algorithm stops after MaxIts
	500	iterations (NOT gradient calculations). Zero MaxIts
	501	means stopping when step is sufficiently small.
	502
	503	OUTPUT PARAMETERS:
	504	Network - trained neural network.
	505	Info - return code:
	506	* -8, if both WStep=0 and MaxIts=0
	507	* -2, if there is a point with class number
	508	outside of [0..NOut-1].
	509	* -1, if wrong parameters specified
	510	(NPoints<0, Restarts<1).
	511	* 2, if task has been solved.
	512	Rep - training report
	513
	514	-- ALGLIB --
	515	Copyright 09.12.2007 by Bochkanov Sergey
	516	*************************************************************************/
	517	public static void mlptrainlbfgs(ref mlpbase.multilayerperceptron network,
	518	ref double[,] xy,
	519	int npoints,
	520	double decay,
	521	int restarts,
	522	double wstep,
	523	int maxits,
	524	ref int info,
	525	ref mlpreport rep)
	526	{
	527	int i = 0;
	528	int j = 0;
	529	int pass = 0;
	530	int nin = 0;
	531	int nout = 0;
	532	int wcount = 0;
	533	double[] w = new double[0];
	534	double[] wbest = new double[0];
	535	double e = 0;
	536	double v = 0;
	537	double ebest = 0;
	538	lbfgs.lbfgsreport internalrep = new lbfgs.lbfgsreport();
	539	lbfgs.lbfgsstate state = new lbfgs.lbfgsstate();
	540	int i_ = 0;
	541
	542
	543	//
	544	// Test inputs, parse flags, read network geometry
	545	//
	546	if( (double)(wstep)==(double)(0) & maxits==0 )
	547	{
	548	info = -8;
	549	return;
	550	}
	551	if( npoints<=0 \| restarts<1 \| (double)(wstep)<(double)(0) \| maxits<0 )
	552	{
	553	info = -1;
	554	return;
	555	}
	556	mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
	557	if( mlpbase.mlpissoftmax(ref network) )
	558	{
	559	for(i=0; i<=npoints-1; i++)
	560	{
	561	if( (int)Math.Round(xy[i,nin])<0 \| (int)Math.Round(xy[i,nin])>=nout )
	562	{
	563	info = -2;
	564	return;
	565	}
	566	}
	567	}
	568	decay = Math.Max(decay, mindecay);
	569	info = 2;
	570
	571	//
	572	// Prepare
	573	//
	574	mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints);
	575	w = new double[wcount-1+1];
	576	wbest = new double[wcount-1+1];
	577	ebest = AP.Math.MaxRealNumber;
	578
	579	//
	580	// Multiple starts
	581	//
	582	rep.ncholesky = 0;
	583	rep.nhess = 0;
	584	rep.ngrad = 0;
	585	for(pass=1; pass<=restarts; pass++)
	586	{
	587
	588	//
	589	// Process
	590	//
	591	mlpbase.mlprandomize(ref network);
	592	for(i_=0; i_<=wcount-1;i_++)
	593	{
	594	w[i_] = network.weights[i_];
	595	}
	596	lbfgs.minlbfgs(wcount, Math.Min(wcount, 50), ref w, 0.0, 0.0, wstep, maxits, 0, ref state);
	597	while( lbfgs.minlbfgsiteration(ref state) )
	598	{
	599	for(i_=0; i_<=wcount-1;i_++)
	600	{
	601	network.weights[i_] = state.x[i_];
	602	}
	603	mlpbase.mlpgradnbatch(ref network, ref xy, npoints, ref state.f, ref state.g);
	604	v = 0.0;
	605	for(i_=0; i_<=wcount-1;i_++)
	606	{
	607	v += network.weights[i_]*network.weights[i_];
	608	}
	609	state.f = state.f+0.5decayv;
	610	for(i_=0; i_<=wcount-1;i_++)
	611	{
	612	state.g[i_] = state.g[i_] + decay*network.weights[i_];
	613	}
	614	rep.ngrad = rep.ngrad+1;
	615	}
	616	lbfgs.minlbfgsresults(ref state, ref w, ref internalrep);
	617	for(i_=0; i_<=wcount-1;i_++)
	618	{
	619	network.weights[i_] = w[i_];
	620	}
	621
	622	//
	623	// Compare with best
	624	//
	625	v = 0.0;
	626	for(i_=0; i_<=wcount-1;i_++)
	627	{
	628	v += network.weights[i_]*network.weights[i_];
	629	}
	630	e = mlpbase.mlperrorn(ref network, ref xy, npoints)+0.5decayv;
	631	if( (double)(e)<(double)(ebest) )
	632	{
	633	for(i_=0; i_<=wcount-1;i_++)
	634	{
	635	wbest[i_] = network.weights[i_];
	636	}
	637	ebest = e;
	638	}
	639	}
	640
	641	//
	642	// The best network
	643	//
	644	for(i_=0; i_<=wcount-1;i_++)
	645	{
	646	network.weights[i_] = wbest[i_];
	647	}
	648	}
	649
	650
	651	/*************************************************************************
	652	Neural network training using early stopping (base algorithm - L-BFGS with
	653	regularization).
	654
	655	INPUT PARAMETERS:
	656	Network - neural network with initialized geometry
	657	TrnXY - training set
	658	TrnSize - training set size
	659	ValXY - validation set
	660	ValSize - validation set size
	661	Decay - weight decay constant, >=0.001
	662	Decay term 'Decay*\|\|Weights\|\|^2' is added to error
	663	function.
	664	If you don't know what Decay to choose, use 0.001.
	665	Restarts - number of restarts from random position, >0.
	666	If you don't know what Restarts to choose, use 2.
	667
	668	OUTPUT PARAMETERS:
	669	Network - trained neural network.
	670	Info - return code:
	671	* -2, if there is a point with class number
	672	outside of [0..NOut-1].
	673	* -1, if wrong parameters specified
	674	(NPoints<0, Restarts<1, ...).
	675	* 2, task has been solved, stopping criterion met -
	676	sufficiently small step size. Not expected (we
	677	use EARLY stopping) but possible and not an
	678	error.
	679	* 6, task has been solved, stopping criterion met -
	680	increasing of validation set error.
	681	Rep - training report
	682
	683	NOTE:
	684
	685	Algorithm stops if validation set error increases for a long enough or
	686	step size is small enought (there are task where validation set may
	687	decrease for eternity). In any case solution returned corresponds to the
	688	minimum of validation set error.
	689
	690	-- ALGLIB --
	691	Copyright 10.03.2009 by Bochkanov Sergey
	692	*************************************************************************/
	693	public static void mlptraines(ref mlpbase.multilayerperceptron network,
	694	ref double[,] trnxy,
	695	int trnsize,
	696	ref double[,] valxy,
	697	int valsize,
	698	double decay,
	699	int restarts,
	700	ref int info,
	701	ref mlpreport rep)
	702	{
	703	int i = 0;
	704	int j = 0;
	705	int pass = 0;
	706	int nin = 0;
	707	int nout = 0;
	708	int wcount = 0;
	709	double[] w = new double[0];
	710	double[] wbest = new double[0];
	711	double e = 0;
	712	double v = 0;
	713	double ebest = 0;
	714	double[] wfinal = new double[0];
	715	double efinal = 0;
	716	int itbest = 0;
	717	lbfgs.lbfgsreport internalrep = new lbfgs.lbfgsreport();
	718	lbfgs.lbfgsstate state = new lbfgs.lbfgsstate();
	719	double wstep = 0;
	720	int i_ = 0;
	721
	722	wstep = 0.001;
	723
	724	//
	725	// Test inputs, parse flags, read network geometry
	726	//
	727	if( trnsize<=0 \| valsize<=0 \| restarts<1 \| (double)(decay)<(double)(0) )
	728	{
	729	info = -1;
	730	return;
	731	}
	732	mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
	733	if( mlpbase.mlpissoftmax(ref network) )
	734	{
	735	for(i=0; i<=trnsize-1; i++)
	736	{
	737	if( (int)Math.Round(trnxy[i,nin])<0 \| (int)Math.Round(trnxy[i,nin])>=nout )
	738	{
	739	info = -2;
	740	return;
	741	}
	742	}
	743	for(i=0; i<=valsize-1; i++)
	744	{
	745	if( (int)Math.Round(valxy[i,nin])<0 \| (int)Math.Round(valxy[i,nin])>=nout )
	746	{
	747	info = -2;
	748	return;
	749	}
	750	}
	751	}
	752	info = 2;
	753
	754	//
	755	// Prepare
	756	//
	757	mlpbase.mlpinitpreprocessor(ref network, ref trnxy, trnsize);
	758	w = new double[wcount-1+1];
	759	wbest = new double[wcount-1+1];
	760	wfinal = new double[wcount-1+1];
	761	efinal = AP.Math.MaxRealNumber;
	762	for(i=0; i<=wcount-1; i++)
	763	{
	764	wfinal[i] = 0;
	765	}
	766
	767	//
	768	// Multiple starts
	769	//
	770	rep.ncholesky = 0;
	771	rep.nhess = 0;
	772	rep.ngrad = 0;
	773	for(pass=1; pass<=restarts; pass++)
	774	{
	775
	776	//
	777	// Process
	778	//
	779	mlpbase.mlprandomize(ref network);
	780	ebest = mlpbase.mlperror(ref network, ref valxy, valsize);
	781	for(i_=0; i_<=wcount-1;i_++)
	782	{
	783	wbest[i_] = network.weights[i_];
	784	}
	785	itbest = 0;
	786	for(i_=0; i_<=wcount-1;i_++)
	787	{
	788	w[i_] = network.weights[i_];
	789	}
	790	lbfgs.minlbfgs(wcount, Math.Min(wcount, 50), ref w, 0.0, 0.0, wstep, 0, 0, ref state);
	791	while( lbfgs.minlbfgsiteration(ref state) )
	792	{
	793
	794	//
	795	// Calculate gradient
	796	//
	797	for(i_=0; i_<=wcount-1;i_++)
	798	{
	799	network.weights[i_] = state.x[i_];
	800	}
	801	mlpbase.mlpgradnbatch(ref network, ref trnxy, trnsize, ref state.f, ref state.g);
	802	v = 0.0;
	803	for(i_=0; i_<=wcount-1;i_++)
	804	{
	805	v += network.weights[i_]*network.weights[i_];
	806	}
	807	state.f = state.f+0.5decayv;
	808	for(i_=0; i_<=wcount-1;i_++)
	809	{
	810	state.g[i_] = state.g[i_] + decay*network.weights[i_];
	811	}
	812	rep.ngrad = rep.ngrad+1;
	813
	814	//
	815	// Validation set
	816	//
	817	if( state.xupdated )
	818	{
	819	for(i_=0; i_<=wcount-1;i_++)
	820	{
	821	network.weights[i_] = w[i_];
	822	}
	823	e = mlpbase.mlperror(ref network, ref valxy, valsize);
	824	if( (double)(e)<(double)(ebest) )
	825	{
	826	ebest = e;
	827	for(i_=0; i_<=wcount-1;i_++)
	828	{
	829	wbest[i_] = network.weights[i_];
	830	}
	831	itbest = internalrep.iterationscount;
	832	}
	833	if( internalrep.iterationscount>30 & (double)(internalrep.iterationscount)>(double)(1.5*itbest) )
	834	{
	835	info = 6;
	836	break;
	837	}
	838	}
	839	}
	840	lbfgs.minlbfgsresults(ref state, ref w, ref internalrep);
	841
	842	//
	843	// Compare with final answer
	844	//
	845	if( (double)(ebest)<(double)(efinal) )
	846	{
	847	for(i_=0; i_<=wcount-1;i_++)
	848	{
	849	wfinal[i_] = wbest[i_];
	850	}
	851	efinal = ebest;
	852	}
	853	}
	854
	855	//
	856	// The best network
	857	//
	858	for(i_=0; i_<=wcount-1;i_++)
	859	{
	860	network.weights[i_] = wfinal[i_];
	861	}
	862	}
	863
	864
	865	/*************************************************************************
	866	Cross-validation estimate of generalization error.
	867
	868	Base algorithm - L-BFGS.
	869
	870	INPUT PARAMETERS:
	871	Network - neural network with initialized geometry. Network is
	872	not changed during cross-validation - it is used only
	873	as a representative of its architecture.
	874	XY - training set.
	875	SSize - training set size
	876	Decay - weight decay, same as in MLPTrainLBFGS
	877	Restarts - number of restarts, >0.
	878	restarts are counted for each partition separately, so
	879	total number of restarts will be Restarts*FoldsCount.
	880	WStep - stopping criterion, same as in MLPTrainLBFGS
	881	MaxIts - stopping criterion, same as in MLPTrainLBFGS
	882	FoldsCount - number of folds in k-fold cross-validation,
	883	2<=FoldsCount<=SSize.
	884	recommended value: 10.
	885
	886	OUTPUT PARAMETERS:
	887	Info - return code, same as in MLPTrainLBFGS
	888	Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
	889	CVRep - generalization error estimates
	890
	891	-- ALGLIB --
	892	Copyright 09.12.2007 by Bochkanov Sergey
	893	*************************************************************************/
	894	public static void mlpkfoldcvlbfgs(ref mlpbase.multilayerperceptron network,
	895	ref double[,] xy,
	896	int npoints,
	897	double decay,
	898	int restarts,
	899	double wstep,
	900	int maxits,
	901	int foldscount,
	902	ref int info,
	903	ref mlpreport rep,
	904	ref mlpcvreport cvrep)
	905	{
	906	mlpkfoldcvgeneral(ref network, ref xy, npoints, decay, restarts, foldscount, false, wstep, maxits, ref info, ref rep, ref cvrep);
	907	}
	908
	909
	910	/*************************************************************************
	911	Cross-validation estimate of generalization error.
	912
	913	Base algorithm - Levenberg-Marquardt.
	914
	915	INPUT PARAMETERS:
	916	Network - neural network with initialized geometry. Network is
	917	not changed during cross-validation - it is used only
	918	as a representative of its architecture.
	919	XY - training set.
	920	SSize - training set size
	921	Decay - weight decay, same as in MLPTrainLBFGS
	922	Restarts - number of restarts, >0.
	923	restarts are counted for each partition separately, so
	924	total number of restarts will be Restarts*FoldsCount.
	925	FoldsCount - number of folds in k-fold cross-validation,
	926	2<=FoldsCount<=SSize.
	927	recommended value: 10.
	928
	929	OUTPUT PARAMETERS:
	930	Info - return code, same as in MLPTrainLBFGS
	931	Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
	932	CVRep - generalization error estimates
	933
	934	-- ALGLIB --
	935	Copyright 09.12.2007 by Bochkanov Sergey
	936	*************************************************************************/
	937	public static void mlpkfoldcvlm(ref mlpbase.multilayerperceptron network,
	938	ref double[,] xy,
	939	int npoints,
	940	double decay,
	941	int restarts,
	942	int foldscount,
	943	ref int info,
	944	ref mlpreport rep,
	945	ref mlpcvreport cvrep)
	946	{
	947	mlpkfoldcvgeneral(ref network, ref xy, npoints, decay, restarts, foldscount, true, 0.0, 0, ref info, ref rep, ref cvrep);
	948	}
	949
	950
	951	/*************************************************************************
	952	Internal cross-validation subroutine
	953	*************************************************************************/
	954	private static void mlpkfoldcvgeneral(ref mlpbase.multilayerperceptron n,
	955	ref double[,] xy,
	956	int npoints,
	957	double decay,
	958	int restarts,
	959	int foldscount,
	960	bool lmalgorithm,
	961	double wstep,
	962	int maxits,
	963	ref int info,
	964	ref mlpreport rep,
	965	ref mlpcvreport cvrep)
	966	{
	967	int i = 0;
	968	int fold = 0;
	969	int j = 0;
	970	int k = 0;
	971	mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron();
	972	int nin = 0;
	973	int nout = 0;
	974	int rowlen = 0;
	975	int wcount = 0;
	976	int nclasses = 0;
	977	int tssize = 0;
	978	int cvssize = 0;
	979	double[,] cvset = new double[0,0];
	980	double[,] testset = new double[0,0];
	981	int[] folds = new int[0];
	982	int relcnt = 0;
	983	mlpreport internalrep = new mlpreport();
	984	double[] x = new double[0];
	985	double[] y = new double[0];
	986	int i_ = 0;
	987
	988
	989	//
	990	// Read network geometry, test parameters
	991	//
	992	mlpbase.mlpproperties(ref n, ref nin, ref nout, ref wcount);
	993	if( mlpbase.mlpissoftmax(ref n) )
	994	{
	995	nclasses = nout;
	996	rowlen = nin+1;
	997	}
	998	else
	999	{
	1000	nclasses = -nout;
	1001	rowlen = nin+nout;
	1002	}
	1003	if( npoints<=0 \| foldscount<2 \| foldscount>npoints )
	1004	{
	1005	info = -1;
	1006	return;
	1007	}
	1008	mlpbase.mlpcopy(ref n, ref network);
	1009
	1010	//
	1011	// K-fold out cross-validation.
	1012	// First, estimate generalization error
	1013	//
	1014	testset = new double[npoints-1+1, rowlen-1+1];
	1015	cvset = new double[npoints-1+1, rowlen-1+1];
	1016	x = new double[nin-1+1];
	1017	y = new double[nout-1+1];
	1018	mlpkfoldsplit(ref xy, npoints, nclasses, foldscount, false, ref folds);
	1019	cvrep.relclserror = 0;
	1020	cvrep.avgce = 0;
	1021	cvrep.rmserror = 0;
	1022	cvrep.avgerror = 0;
	1023	cvrep.avgrelerror = 0;
	1024	rep.ngrad = 0;
	1025	rep.nhess = 0;
	1026	rep.ncholesky = 0;
	1027	relcnt = 0;
	1028	for(fold=0; fold<=foldscount-1; fold++)
	1029	{
	1030
	1031	//
	1032	// Separate set
	1033	//
	1034	tssize = 0;
	1035	cvssize = 0;
	1036	for(i=0; i<=npoints-1; i++)
	1037	{
	1038	if( folds[i]==fold )
	1039	{
	1040	for(i_=0; i_<=rowlen-1;i_++)
	1041	{
	1042	testset[tssize,i_] = xy[i,i_];
	1043	}
	1044	tssize = tssize+1;
	1045	}
	1046	else
	1047	{
	1048	for(i_=0; i_<=rowlen-1;i_++)
	1049	{
	1050	cvset[cvssize,i_] = xy[i,i_];
	1051	}
	1052	cvssize = cvssize+1;
	1053	}
	1054	}
	1055
	1056	//
	1057	// Train on CV training set
	1058	//
	1059	if( lmalgorithm )
	1060	{
	1061	mlptrainlm(ref network, ref cvset, cvssize, decay, restarts, ref info, ref internalrep);
	1062	}
	1063	else
	1064	{
	1065	mlptrainlbfgs(ref network, ref cvset, cvssize, decay, restarts, wstep, maxits, ref info, ref internalrep);
	1066	}
	1067	if( info<0 )
	1068	{
	1069	cvrep.relclserror = 0;
	1070	cvrep.avgce = 0;
	1071	cvrep.rmserror = 0;
	1072	cvrep.avgerror = 0;
	1073	cvrep.avgrelerror = 0;
	1074	return;
	1075	}
	1076	rep.ngrad = rep.ngrad+internalrep.ngrad;
	1077	rep.nhess = rep.nhess+internalrep.nhess;
	1078	rep.ncholesky = rep.ncholesky+internalrep.ncholesky;
	1079
	1080	//
	1081	// Estimate error using CV test set
	1082	//
	1083	if( mlpbase.mlpissoftmax(ref network) )
	1084	{
	1085
	1086	//
	1087	// classification-only code
	1088	//
	1089	cvrep.relclserror = cvrep.relclserror+mlpbase.mlpclserror(ref network, ref testset, tssize);
	1090	cvrep.avgce = cvrep.avgce+mlpbase.mlperrorn(ref network, ref testset, tssize);
	1091	}
	1092	for(i=0; i<=tssize-1; i++)
	1093	{
	1094	for(i_=0; i_<=nin-1;i_++)
	1095	{
	1096	x[i_] = testset[i,i_];
	1097	}
	1098	mlpbase.mlpprocess(ref network, ref x, ref y);
	1099	if( mlpbase.mlpissoftmax(ref network) )
	1100	{
	1101
	1102	//
	1103	// Classification-specific code
	1104	//
	1105	k = (int)Math.Round(testset[i,nin]);
	1106	for(j=0; j<=nout-1; j++)
	1107	{
	1108	if( j==k )
	1109	{
	1110	cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]-1);
	1111	cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-1);
	1112	cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs(y[j]-1);
	1113	relcnt = relcnt+1;
	1114	}
	1115	else
	1116	{
	1117	cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]);
	1118	cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]);
	1119	}
	1120	}
	1121	}
	1122	else
	1123	{
	1124
	1125	//
	1126	// Regression-specific code
	1127	//
	1128	for(j=0; j<=nout-1; j++)
	1129	{
	1130	cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]-testset[i,nin+j]);
	1131	cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-testset[i,nin+j]);
	1132	if( (double)(testset[i,nin+j])!=(double)(0) )
	1133	{
	1134	cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs((y[j]-testset[i,nin+j])/testset[i,nin+j]);
	1135	relcnt = relcnt+1;
	1136	}
	1137	}
	1138	}
	1139	}
	1140	}
	1141	if( mlpbase.mlpissoftmax(ref network) )
	1142	{
	1143	cvrep.relclserror = cvrep.relclserror/npoints;
	1144	cvrep.avgce = cvrep.avgce/(Math.Log(2)*npoints);
	1145	}
	1146	cvrep.rmserror = Math.Sqrt(cvrep.rmserror/(npoints*nout));
	1147	cvrep.avgerror = cvrep.avgerror/(npoints*nout);
	1148	cvrep.avgrelerror = cvrep.avgrelerror/relcnt;
	1149	info = 1;
	1150	}
	1151
	1152
	1153	/*************************************************************************
	1154	Subroutine prepares K-fold split of the training set.
	1155
	1156	NOTES:
	1157	"NClasses>0" means that we have classification task.
	1158	"NClasses<0" means regression task with -NClasses real outputs.
	1159	*************************************************************************/
	1160	private static void mlpkfoldsplit(ref double[,] xy,
	1161	int npoints,
	1162	int nclasses,
	1163	int foldscount,
	1164	bool stratifiedsplits,
	1165	ref int[] folds)
	1166	{
	1167	int i = 0;
	1168	int j = 0;
	1169	int k = 0;
	1170
	1171
	1172	//
	1173	// test parameters
	1174	//
	1175	System.Diagnostics.Debug.Assert(npoints>0, "MLPKFoldSplit: wrong NPoints!");
	1176	System.Diagnostics.Debug.Assert(nclasses>1 \| nclasses<0, "MLPKFoldSplit: wrong NClasses!");
	1177	System.Diagnostics.Debug.Assert(foldscount>=2 & foldscount<=npoints, "MLPKFoldSplit: wrong FoldsCount!");
	1178	System.Diagnostics.Debug.Assert(!stratifiedsplits, "MLPKFoldSplit: stratified splits are not supported!");
	1179
	1180	//
	1181	// Folds
	1182	//
	1183	folds = new int[npoints-1+1];
	1184	for(i=0; i<=npoints-1; i++)
	1185	{
	1186	folds[i] = i*foldscount/npoints;
	1187	}
	1188	for(i=0; i<=npoints-2; i++)
	1189	{
	1190	j = i+AP.Math.RandomInteger(npoints-i);
	1191	if( j!=i )
	1192	{
	1193	k = folds[i];
	1194	folds[i] = folds[j];
	1195	folds[j] = k;
	1196	}
	1197	}
	1198	}
	1199	}
	1200	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences