[2563] | 1 | /*************************************************************************
|
---|
| 2 | Copyright (c) 2007-2008, Sergey Bochkanov (ALGLIB project).
|
---|
| 3 |
|
---|
| 4 | >>> SOURCE LICENSE >>>
|
---|
| 5 | This program is free software; you can redistribute it and/or modify
|
---|
| 6 | it under the terms of the GNU General Public License as published by
|
---|
| 7 | the Free Software Foundation (www.fsf.org); either version 2 of the
|
---|
| 8 | License, or (at your option) any later version.
|
---|
| 9 |
|
---|
| 10 | This program is distributed in the hope that it will be useful,
|
---|
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 13 | GNU General Public License for more details.
|
---|
| 14 |
|
---|
| 15 | A copy of the GNU General Public License is available at
|
---|
| 16 | http://www.fsf.org/licensing/licenses
|
---|
| 17 |
|
---|
| 18 | >>> END OF LICENSE >>>
|
---|
| 19 | *************************************************************************/
|
---|
| 20 |
|
---|
| 21 | using System;
|
---|
| 22 |
|
---|
| 23 | namespace alglib
|
---|
| 24 | {
|
---|
| 25 | public class mlptrain
|
---|
| 26 | {
|
---|
| 27 | /*************************************************************************
|
---|
| 28 | Training report:
|
---|
| 29 | * NGrad - number of gradient calculations
|
---|
| 30 | * NHess - number of Hessian calculations
|
---|
| 31 | * NCholesky - number of Cholesky decompositions
|
---|
| 32 | *************************************************************************/
|
---|
| 33 | public struct mlpreport
|
---|
| 34 | {
|
---|
| 35 | public int ngrad;
|
---|
| 36 | public int nhess;
|
---|
| 37 | public int ncholesky;
|
---|
| 38 | };
|
---|
| 39 |
|
---|
| 40 |
|
---|
| 41 | /*************************************************************************
|
---|
| 42 | Cross-validation estimates of generalization error
|
---|
| 43 | *************************************************************************/
|
---|
| 44 | public struct mlpcvreport
|
---|
| 45 | {
|
---|
| 46 | public double relclserror;
|
---|
| 47 | public double avgce;
|
---|
| 48 | public double rmserror;
|
---|
| 49 | public double avgerror;
|
---|
| 50 | public double avgrelerror;
|
---|
| 51 | };
|
---|
| 52 |
|
---|
| 53 |
|
---|
| 54 |
|
---|
| 55 |
|
---|
| 56 | public const double mindecay = 0.001;
|
---|
| 57 |
|
---|
| 58 |
|
---|
| 59 | /*************************************************************************
|
---|
| 60 | Neural network training using modified Levenberg-Marquardt with exact
|
---|
| 61 | Hessian calculation and regularization. Subroutine trains neural network
|
---|
| 62 | with restarts from random positions. Algorithm is well suited for small
|
---|
| 63 | and medium scale problems (hundreds of weights).
|
---|
| 64 |
|
---|
| 65 | INPUT PARAMETERS:
|
---|
| 66 | Network - neural network with initialized geometry
|
---|
| 67 | XY - training set
|
---|
| 68 | NPoints - training set size
|
---|
| 69 | Decay - weight decay constant, >=0.001
|
---|
| 70 | Decay term 'Decay*||Weights||^2' is added to error
|
---|
| 71 | function.
|
---|
| 72 | If you don't know what Decay to choose, use 0.001.
|
---|
| 73 | Restarts - number of restarts from random position, >0.
|
---|
| 74 | If you don't know what Restarts to choose, use 2.
|
---|
| 75 |
|
---|
| 76 | OUTPUT PARAMETERS:
|
---|
| 77 | Network - trained neural network.
|
---|
| 78 | Info - return code:
|
---|
| 79 | * -9, if internal matrix inverse subroutine failed
|
---|
| 80 | * -2, if there is a point with class number
|
---|
| 81 | outside of [0..NOut-1].
|
---|
| 82 | * -1, if wrong parameters specified
|
---|
| 83 | (NPoints<0, Restarts<1).
|
---|
| 84 | * 2, if task has been solved.
|
---|
| 85 | Rep - training report
|
---|
| 86 |
|
---|
| 87 | -- ALGLIB --
|
---|
| 88 | Copyright 10.03.2009 by Bochkanov Sergey
|
---|
| 89 | *************************************************************************/
|
---|
| 90 | public static void mlptrainlm(ref mlpbase.multilayerperceptron network,
|
---|
| 91 | ref double[,] xy,
|
---|
| 92 | int npoints,
|
---|
| 93 | double decay,
|
---|
| 94 | int restarts,
|
---|
| 95 | ref int info,
|
---|
| 96 | ref mlpreport rep)
|
---|
| 97 | {
|
---|
| 98 | int nin = 0;
|
---|
| 99 | int nout = 0;
|
---|
| 100 | int wcount = 0;
|
---|
| 101 | double lmftol = 0;
|
---|
| 102 | double lmsteptol = 0;
|
---|
| 103 | int i = 0;
|
---|
| 104 | int j = 0;
|
---|
| 105 | int k = 0;
|
---|
| 106 | int mx = 0;
|
---|
| 107 | double v = 0;
|
---|
| 108 | double e = 0;
|
---|
| 109 | double enew = 0;
|
---|
| 110 | double xnorm2 = 0;
|
---|
| 111 | double stepnorm = 0;
|
---|
| 112 | double[] g = new double[0];
|
---|
| 113 | double[] d = new double[0];
|
---|
| 114 | double[,] h = new double[0,0];
|
---|
| 115 | double[,] hmod = new double[0,0];
|
---|
| 116 | double[,] z = new double[0,0];
|
---|
| 117 | bool spd = new bool();
|
---|
| 118 | double nu = 0;
|
---|
| 119 | double lambda = 0;
|
---|
| 120 | double lambdaup = 0;
|
---|
| 121 | double lambdadown = 0;
|
---|
| 122 | int cvcnt = 0;
|
---|
| 123 | double cvrelcnt = 0;
|
---|
| 124 | lbfgs.lbfgsreport internalrep = new lbfgs.lbfgsreport();
|
---|
| 125 | lbfgs.lbfgsstate state = new lbfgs.lbfgsstate();
|
---|
| 126 | double[] x = new double[0];
|
---|
| 127 | double[] y = new double[0];
|
---|
| 128 | double[] wbase = new double[0];
|
---|
| 129 | double wstep = 0;
|
---|
| 130 | double[] wdir = new double[0];
|
---|
| 131 | double[] wt = new double[0];
|
---|
| 132 | double[] wx = new double[0];
|
---|
| 133 | int pass = 0;
|
---|
| 134 | double[] wbest = new double[0];
|
---|
| 135 | double ebest = 0;
|
---|
| 136 | int i_ = 0;
|
---|
| 137 |
|
---|
| 138 | mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
|
---|
| 139 | lambdaup = 10;
|
---|
| 140 | lambdadown = 0.3;
|
---|
| 141 | lmftol = 0.001;
|
---|
| 142 | lmsteptol = 0.001;
|
---|
| 143 |
|
---|
| 144 | //
|
---|
| 145 | // Test for inputs
|
---|
| 146 | //
|
---|
| 147 | if( npoints<=0 | restarts<1 )
|
---|
| 148 | {
|
---|
| 149 | info = -1;
|
---|
| 150 | return;
|
---|
| 151 | }
|
---|
| 152 | if( mlpbase.mlpissoftmax(ref network) )
|
---|
| 153 | {
|
---|
| 154 | for(i=0; i<=npoints-1; i++)
|
---|
| 155 | {
|
---|
| 156 | if( (int)Math.Round(xy[i,nin])<0 | (int)Math.Round(xy[i,nin])>=nout )
|
---|
| 157 | {
|
---|
| 158 | info = -2;
|
---|
| 159 | return;
|
---|
| 160 | }
|
---|
| 161 | }
|
---|
| 162 | }
|
---|
| 163 | decay = Math.Max(decay, mindecay);
|
---|
| 164 | info = 2;
|
---|
| 165 |
|
---|
| 166 | //
|
---|
| 167 | // Initialize data
|
---|
| 168 | //
|
---|
| 169 | rep.ngrad = 0;
|
---|
| 170 | rep.nhess = 0;
|
---|
| 171 | rep.ncholesky = 0;
|
---|
| 172 |
|
---|
| 173 | //
|
---|
| 174 | // General case.
|
---|
| 175 | // Prepare task and network. Allocate space.
|
---|
| 176 | //
|
---|
| 177 | mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints);
|
---|
| 178 | g = new double[wcount-1+1];
|
---|
| 179 | h = new double[wcount-1+1, wcount-1+1];
|
---|
| 180 | hmod = new double[wcount-1+1, wcount-1+1];
|
---|
| 181 | wbase = new double[wcount-1+1];
|
---|
| 182 | wdir = new double[wcount-1+1];
|
---|
| 183 | wbest = new double[wcount-1+1];
|
---|
| 184 | wt = new double[wcount-1+1];
|
---|
| 185 | wx = new double[wcount-1+1];
|
---|
| 186 | ebest = AP.Math.MaxRealNumber;
|
---|
| 187 |
|
---|
| 188 | //
|
---|
| 189 | // Multiple passes
|
---|
| 190 | //
|
---|
| 191 | for(pass=1; pass<=restarts; pass++)
|
---|
| 192 | {
|
---|
| 193 |
|
---|
| 194 | //
|
---|
| 195 | // Initialize weights
|
---|
| 196 | //
|
---|
| 197 | mlpbase.mlprandomize(ref network);
|
---|
| 198 |
|
---|
| 199 | //
|
---|
| 200 | // First stage of the hybrid algorithm: LBFGS
|
---|
| 201 | //
|
---|
| 202 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 203 | {
|
---|
| 204 | wbase[i_] = network.weights[i_];
|
---|
| 205 | }
|
---|
| 206 | lbfgs.minlbfgs(wcount, Math.Min(wcount, 5), ref wbase, 0.0, 0.0, 0.0, Math.Max(25, wcount), 0, ref state);
|
---|
| 207 | while( lbfgs.minlbfgsiteration(ref state) )
|
---|
| 208 | {
|
---|
| 209 |
|
---|
| 210 | //
|
---|
| 211 | // gradient
|
---|
| 212 | //
|
---|
| 213 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 214 | {
|
---|
| 215 | network.weights[i_] = state.x[i_];
|
---|
| 216 | }
|
---|
| 217 | mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref state.g);
|
---|
| 218 |
|
---|
| 219 | //
|
---|
| 220 | // weight decay
|
---|
| 221 | //
|
---|
| 222 | v = 0.0;
|
---|
| 223 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 224 | {
|
---|
| 225 | v += network.weights[i_]*network.weights[i_];
|
---|
| 226 | }
|
---|
| 227 | state.f = state.f+0.5*decay*v;
|
---|
| 228 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 229 | {
|
---|
| 230 | state.g[i_] = state.g[i_] + decay*network.weights[i_];
|
---|
| 231 | }
|
---|
| 232 |
|
---|
| 233 | //
|
---|
| 234 | // next iteration
|
---|
| 235 | //
|
---|
| 236 | rep.ngrad = rep.ngrad+1;
|
---|
| 237 | }
|
---|
| 238 | lbfgs.minlbfgsresults(ref state, ref wbase, ref internalrep);
|
---|
| 239 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 240 | {
|
---|
| 241 | network.weights[i_] = wbase[i_];
|
---|
| 242 | }
|
---|
| 243 |
|
---|
| 244 | //
|
---|
| 245 | // Second stage of the hybrid algorithm: LM
|
---|
| 246 | //
|
---|
| 247 | // Initialize H with identity matrix,
|
---|
| 248 | // G with gradient,
|
---|
| 249 | // E with regularized error.
|
---|
| 250 | //
|
---|
| 251 | mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h);
|
---|
| 252 | v = 0.0;
|
---|
| 253 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 254 | {
|
---|
| 255 | v += network.weights[i_]*network.weights[i_];
|
---|
| 256 | }
|
---|
| 257 | e = e+0.5*decay*v;
|
---|
| 258 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 259 | {
|
---|
| 260 | g[i_] = g[i_] + decay*network.weights[i_];
|
---|
| 261 | }
|
---|
| 262 | for(k=0; k<=wcount-1; k++)
|
---|
| 263 | {
|
---|
| 264 | h[k,k] = h[k,k]+decay;
|
---|
| 265 | }
|
---|
| 266 | rep.nhess = rep.nhess+1;
|
---|
| 267 | lambda = 0.001;
|
---|
| 268 | nu = 2;
|
---|
| 269 | while( true )
|
---|
| 270 | {
|
---|
| 271 |
|
---|
| 272 | //
|
---|
| 273 | // 1. HMod = H+lambda*I
|
---|
| 274 | // 2. Try to solve (H+Lambda*I)*dx = -g.
|
---|
| 275 | // Increase lambda if left part is not positive definite.
|
---|
| 276 | //
|
---|
| 277 | for(i=0; i<=wcount-1; i++)
|
---|
| 278 | {
|
---|
| 279 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 280 | {
|
---|
| 281 | hmod[i,i_] = h[i,i_];
|
---|
| 282 | }
|
---|
| 283 | hmod[i,i] = hmod[i,i]+lambda;
|
---|
| 284 | }
|
---|
| 285 | spd = cholesky.spdmatrixcholesky(ref hmod, wcount, true);
|
---|
| 286 | rep.ncholesky = rep.ncholesky+1;
|
---|
| 287 | if( !spd )
|
---|
| 288 | {
|
---|
| 289 | lambda = lambda*lambdaup*nu;
|
---|
| 290 | nu = nu*2;
|
---|
| 291 | continue;
|
---|
| 292 | }
|
---|
| 293 | if( !spdsolve.spdmatrixcholeskysolve(ref hmod, g, wcount, true, ref wdir) )
|
---|
| 294 | {
|
---|
| 295 | lambda = lambda*lambdaup*nu;
|
---|
| 296 | nu = nu*2;
|
---|
| 297 | continue;
|
---|
| 298 | }
|
---|
| 299 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 300 | {
|
---|
| 301 | wdir[i_] = -1*wdir[i_];
|
---|
| 302 | }
|
---|
| 303 |
|
---|
| 304 | //
|
---|
| 305 | // Lambda found.
|
---|
| 306 | // 1. Save old w in WBase
|
---|
| 307 | // 1. Test some stopping criterions
|
---|
| 308 | // 2. If error(w+wdir)>error(w), increase lambda
|
---|
| 309 | //
|
---|
| 310 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 311 | {
|
---|
| 312 | network.weights[i_] = network.weights[i_] + wdir[i_];
|
---|
| 313 | }
|
---|
| 314 | xnorm2 = 0.0;
|
---|
| 315 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 316 | {
|
---|
| 317 | xnorm2 += network.weights[i_]*network.weights[i_];
|
---|
| 318 | }
|
---|
| 319 | stepnorm = 0.0;
|
---|
| 320 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 321 | {
|
---|
| 322 | stepnorm += wdir[i_]*wdir[i_];
|
---|
| 323 | }
|
---|
| 324 | stepnorm = Math.Sqrt(stepnorm);
|
---|
| 325 | enew = mlpbase.mlperror(ref network, ref xy, npoints)+0.5*decay*xnorm2;
|
---|
| 326 | if( (double)(stepnorm)<(double)(lmsteptol*(1+Math.Sqrt(xnorm2))) )
|
---|
| 327 | {
|
---|
| 328 | break;
|
---|
| 329 | }
|
---|
| 330 | if( (double)(enew)>(double)(e) )
|
---|
| 331 | {
|
---|
| 332 | lambda = lambda*lambdaup*nu;
|
---|
| 333 | nu = nu*2;
|
---|
| 334 | continue;
|
---|
| 335 | }
|
---|
| 336 |
|
---|
| 337 | //
|
---|
| 338 | // Optimize using inv(cholesky(H)) as preconditioner
|
---|
| 339 | //
|
---|
| 340 | if( !trinverse.rmatrixtrinverse(ref hmod, wcount, true, false) )
|
---|
| 341 | {
|
---|
| 342 |
|
---|
| 343 | //
|
---|
| 344 | // if matrix can't be inverted then exit with errors
|
---|
| 345 | // TODO: make WCount steps in direction suggested by HMod
|
---|
| 346 | //
|
---|
| 347 | info = -9;
|
---|
| 348 | return;
|
---|
| 349 | }
|
---|
| 350 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 351 | {
|
---|
| 352 | wbase[i_] = network.weights[i_];
|
---|
| 353 | }
|
---|
| 354 | for(i=0; i<=wcount-1; i++)
|
---|
| 355 | {
|
---|
| 356 | wt[i] = 0;
|
---|
| 357 | }
|
---|
| 358 | lbfgs.minlbfgs(wcount, wcount, ref wt, 0.0, 0.0, 0.0, 5, 0, ref state);
|
---|
| 359 | while( lbfgs.minlbfgsiteration(ref state) )
|
---|
| 360 | {
|
---|
| 361 |
|
---|
| 362 | //
|
---|
| 363 | // gradient
|
---|
| 364 | //
|
---|
| 365 | for(i=0; i<=wcount-1; i++)
|
---|
| 366 | {
|
---|
| 367 | v = 0.0;
|
---|
| 368 | for(i_=i; i_<=wcount-1;i_++)
|
---|
| 369 | {
|
---|
| 370 | v += state.x[i_]*hmod[i,i_];
|
---|
| 371 | }
|
---|
| 372 | network.weights[i] = wbase[i]+v;
|
---|
| 373 | }
|
---|
| 374 | mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref g);
|
---|
| 375 | for(i=0; i<=wcount-1; i++)
|
---|
| 376 | {
|
---|
| 377 | state.g[i] = 0;
|
---|
| 378 | }
|
---|
| 379 | for(i=0; i<=wcount-1; i++)
|
---|
| 380 | {
|
---|
| 381 | v = g[i];
|
---|
| 382 | for(i_=i; i_<=wcount-1;i_++)
|
---|
| 383 | {
|
---|
| 384 | state.g[i_] = state.g[i_] + v*hmod[i,i_];
|
---|
| 385 | }
|
---|
| 386 | }
|
---|
| 387 |
|
---|
| 388 | //
|
---|
| 389 | // weight decay
|
---|
| 390 | // grad(x'*x) = A'*(x0+A*t)
|
---|
| 391 | //
|
---|
| 392 | v = 0.0;
|
---|
| 393 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 394 | {
|
---|
| 395 | v += network.weights[i_]*network.weights[i_];
|
---|
| 396 | }
|
---|
| 397 | state.f = state.f+0.5*decay*v;
|
---|
| 398 | for(i=0; i<=wcount-1; i++)
|
---|
| 399 | {
|
---|
| 400 | v = decay*network.weights[i];
|
---|
| 401 | for(i_=i; i_<=wcount-1;i_++)
|
---|
| 402 | {
|
---|
| 403 | state.g[i_] = state.g[i_] + v*hmod[i,i_];
|
---|
| 404 | }
|
---|
| 405 | }
|
---|
| 406 |
|
---|
| 407 | //
|
---|
| 408 | // next iteration
|
---|
| 409 | //
|
---|
| 410 | rep.ngrad = rep.ngrad+1;
|
---|
| 411 | }
|
---|
| 412 | lbfgs.minlbfgsresults(ref state, ref wt, ref internalrep);
|
---|
| 413 |
|
---|
| 414 | //
|
---|
| 415 | // Accept new position.
|
---|
| 416 | // Calculate Hessian
|
---|
| 417 | //
|
---|
| 418 | for(i=0; i<=wcount-1; i++)
|
---|
| 419 | {
|
---|
| 420 | v = 0.0;
|
---|
| 421 | for(i_=i; i_<=wcount-1;i_++)
|
---|
| 422 | {
|
---|
| 423 | v += wt[i_]*hmod[i,i_];
|
---|
| 424 | }
|
---|
| 425 | network.weights[i] = wbase[i]+v;
|
---|
| 426 | }
|
---|
| 427 | mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h);
|
---|
| 428 | v = 0.0;
|
---|
| 429 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 430 | {
|
---|
| 431 | v += network.weights[i_]*network.weights[i_];
|
---|
| 432 | }
|
---|
| 433 | e = e+0.5*decay*v;
|
---|
| 434 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 435 | {
|
---|
| 436 | g[i_] = g[i_] + decay*network.weights[i_];
|
---|
| 437 | }
|
---|
| 438 | for(k=0; k<=wcount-1; k++)
|
---|
| 439 | {
|
---|
| 440 | h[k,k] = h[k,k]+decay;
|
---|
| 441 | }
|
---|
| 442 | rep.nhess = rep.nhess+1;
|
---|
| 443 |
|
---|
| 444 | //
|
---|
| 445 | // Update lambda
|
---|
| 446 | //
|
---|
| 447 | lambda = lambda*lambdadown;
|
---|
| 448 | nu = 2;
|
---|
| 449 | }
|
---|
| 450 |
|
---|
| 451 | //
|
---|
| 452 | // update WBest
|
---|
| 453 | //
|
---|
| 454 | v = 0.0;
|
---|
| 455 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 456 | {
|
---|
| 457 | v += network.weights[i_]*network.weights[i_];
|
---|
| 458 | }
|
---|
| 459 | e = 0.5*decay*v+mlpbase.mlperror(ref network, ref xy, npoints);
|
---|
| 460 | if( (double)(e)<(double)(ebest) )
|
---|
| 461 | {
|
---|
| 462 | ebest = e;
|
---|
| 463 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 464 | {
|
---|
| 465 | wbest[i_] = network.weights[i_];
|
---|
| 466 | }
|
---|
| 467 | }
|
---|
| 468 | }
|
---|
| 469 |
|
---|
| 470 | //
|
---|
| 471 | // copy WBest to output
|
---|
| 472 | //
|
---|
| 473 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 474 | {
|
---|
| 475 | network.weights[i_] = wbest[i_];
|
---|
| 476 | }
|
---|
| 477 | }
|
---|
| 478 |
|
---|
| 479 |
|
---|
| 480 | /*************************************************************************
|
---|
| 481 | Neural network training using L-BFGS algorithm with regularization.
|
---|
| 482 | Subroutine trains neural network with restarts from random positions.
|
---|
| 483 | Algorithm is well suited for problems of any dimensionality (memory
|
---|
| 484 | requirements and step complexity are linear by weights number).
|
---|
| 485 |
|
---|
| 486 | INPUT PARAMETERS:
|
---|
| 487 | Network - neural network with initialized geometry
|
---|
| 488 | XY - training set
|
---|
| 489 | NPoints - training set size
|
---|
| 490 | Decay - weight decay constant, >=0.001
|
---|
| 491 | Decay term 'Decay*||Weights||^2' is added to error
|
---|
| 492 | function.
|
---|
| 493 | If you don't know what Decay to choose, use 0.001.
|
---|
| 494 | Restarts - number of restarts from random position, >0.
|
---|
| 495 | If you don't know what Restarts to choose, use 2.
|
---|
| 496 | WStep - stopping criterion. Algorithm stops if step size is
|
---|
| 497 | less than WStep. Recommended value - 0.01. Zero step
|
---|
| 498 | size means stopping after MaxIts iterations.
|
---|
| 499 | MaxIts - stopping criterion. Algorithm stops after MaxIts
|
---|
| 500 | iterations (NOT gradient calculations). Zero MaxIts
|
---|
| 501 | means stopping when step is sufficiently small.
|
---|
| 502 |
|
---|
| 503 | OUTPUT PARAMETERS:
|
---|
| 504 | Network - trained neural network.
|
---|
| 505 | Info - return code:
|
---|
| 506 | * -8, if both WStep=0 and MaxIts=0
|
---|
| 507 | * -2, if there is a point with class number
|
---|
| 508 | outside of [0..NOut-1].
|
---|
| 509 | * -1, if wrong parameters specified
|
---|
| 510 | (NPoints<0, Restarts<1).
|
---|
| 511 | * 2, if task has been solved.
|
---|
| 512 | Rep - training report
|
---|
| 513 |
|
---|
| 514 | -- ALGLIB --
|
---|
| 515 | Copyright 09.12.2007 by Bochkanov Sergey
|
---|
| 516 | *************************************************************************/
|
---|
| 517 | public static void mlptrainlbfgs(ref mlpbase.multilayerperceptron network,
|
---|
| 518 | ref double[,] xy,
|
---|
| 519 | int npoints,
|
---|
| 520 | double decay,
|
---|
| 521 | int restarts,
|
---|
| 522 | double wstep,
|
---|
| 523 | int maxits,
|
---|
| 524 | ref int info,
|
---|
| 525 | ref mlpreport rep)
|
---|
| 526 | {
|
---|
| 527 | int i = 0;
|
---|
| 528 | int j = 0;
|
---|
| 529 | int pass = 0;
|
---|
| 530 | int nin = 0;
|
---|
| 531 | int nout = 0;
|
---|
| 532 | int wcount = 0;
|
---|
| 533 | double[] w = new double[0];
|
---|
| 534 | double[] wbest = new double[0];
|
---|
| 535 | double e = 0;
|
---|
| 536 | double v = 0;
|
---|
| 537 | double ebest = 0;
|
---|
| 538 | lbfgs.lbfgsreport internalrep = new lbfgs.lbfgsreport();
|
---|
| 539 | lbfgs.lbfgsstate state = new lbfgs.lbfgsstate();
|
---|
| 540 | int i_ = 0;
|
---|
| 541 |
|
---|
| 542 |
|
---|
| 543 | //
|
---|
| 544 | // Test inputs, parse flags, read network geometry
|
---|
| 545 | //
|
---|
| 546 | if( (double)(wstep)==(double)(0) & maxits==0 )
|
---|
| 547 | {
|
---|
| 548 | info = -8;
|
---|
| 549 | return;
|
---|
| 550 | }
|
---|
| 551 | if( npoints<=0 | restarts<1 | (double)(wstep)<(double)(0) | maxits<0 )
|
---|
| 552 | {
|
---|
| 553 | info = -1;
|
---|
| 554 | return;
|
---|
| 555 | }
|
---|
| 556 | mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
|
---|
| 557 | if( mlpbase.mlpissoftmax(ref network) )
|
---|
| 558 | {
|
---|
| 559 | for(i=0; i<=npoints-1; i++)
|
---|
| 560 | {
|
---|
| 561 | if( (int)Math.Round(xy[i,nin])<0 | (int)Math.Round(xy[i,nin])>=nout )
|
---|
| 562 | {
|
---|
| 563 | info = -2;
|
---|
| 564 | return;
|
---|
| 565 | }
|
---|
| 566 | }
|
---|
| 567 | }
|
---|
| 568 | decay = Math.Max(decay, mindecay);
|
---|
| 569 | info = 2;
|
---|
| 570 |
|
---|
| 571 | //
|
---|
| 572 | // Prepare
|
---|
| 573 | //
|
---|
| 574 | mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints);
|
---|
| 575 | w = new double[wcount-1+1];
|
---|
| 576 | wbest = new double[wcount-1+1];
|
---|
| 577 | ebest = AP.Math.MaxRealNumber;
|
---|
| 578 |
|
---|
| 579 | //
|
---|
| 580 | // Multiple starts
|
---|
| 581 | //
|
---|
| 582 | rep.ncholesky = 0;
|
---|
| 583 | rep.nhess = 0;
|
---|
| 584 | rep.ngrad = 0;
|
---|
| 585 | for(pass=1; pass<=restarts; pass++)
|
---|
| 586 | {
|
---|
| 587 |
|
---|
| 588 | //
|
---|
| 589 | // Process
|
---|
| 590 | //
|
---|
| 591 | mlpbase.mlprandomize(ref network);
|
---|
| 592 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 593 | {
|
---|
| 594 | w[i_] = network.weights[i_];
|
---|
| 595 | }
|
---|
| 596 | lbfgs.minlbfgs(wcount, Math.Min(wcount, 50), ref w, 0.0, 0.0, wstep, maxits, 0, ref state);
|
---|
| 597 | while( lbfgs.minlbfgsiteration(ref state) )
|
---|
| 598 | {
|
---|
| 599 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 600 | {
|
---|
| 601 | network.weights[i_] = state.x[i_];
|
---|
| 602 | }
|
---|
| 603 | mlpbase.mlpgradnbatch(ref network, ref xy, npoints, ref state.f, ref state.g);
|
---|
| 604 | v = 0.0;
|
---|
| 605 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 606 | {
|
---|
| 607 | v += network.weights[i_]*network.weights[i_];
|
---|
| 608 | }
|
---|
| 609 | state.f = state.f+0.5*decay*v;
|
---|
| 610 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 611 | {
|
---|
| 612 | state.g[i_] = state.g[i_] + decay*network.weights[i_];
|
---|
| 613 | }
|
---|
| 614 | rep.ngrad = rep.ngrad+1;
|
---|
| 615 | }
|
---|
| 616 | lbfgs.minlbfgsresults(ref state, ref w, ref internalrep);
|
---|
| 617 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 618 | {
|
---|
| 619 | network.weights[i_] = w[i_];
|
---|
| 620 | }
|
---|
| 621 |
|
---|
| 622 | //
|
---|
| 623 | // Compare with best
|
---|
| 624 | //
|
---|
| 625 | v = 0.0;
|
---|
| 626 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 627 | {
|
---|
| 628 | v += network.weights[i_]*network.weights[i_];
|
---|
| 629 | }
|
---|
| 630 | e = mlpbase.mlperrorn(ref network, ref xy, npoints)+0.5*decay*v;
|
---|
| 631 | if( (double)(e)<(double)(ebest) )
|
---|
| 632 | {
|
---|
| 633 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 634 | {
|
---|
| 635 | wbest[i_] = network.weights[i_];
|
---|
| 636 | }
|
---|
| 637 | ebest = e;
|
---|
| 638 | }
|
---|
| 639 | }
|
---|
| 640 |
|
---|
| 641 | //
|
---|
| 642 | // The best network
|
---|
| 643 | //
|
---|
| 644 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 645 | {
|
---|
| 646 | network.weights[i_] = wbest[i_];
|
---|
| 647 | }
|
---|
| 648 | }
|
---|
| 649 |
|
---|
| 650 |
|
---|
| 651 | /*************************************************************************
|
---|
| 652 | Neural network training using early stopping (base algorithm - L-BFGS with
|
---|
| 653 | regularization).
|
---|
| 654 |
|
---|
| 655 | INPUT PARAMETERS:
|
---|
| 656 | Network - neural network with initialized geometry
|
---|
| 657 | TrnXY - training set
|
---|
| 658 | TrnSize - training set size
|
---|
| 659 | ValXY - validation set
|
---|
| 660 | ValSize - validation set size
|
---|
| 661 | Decay - weight decay constant, >=0.001
|
---|
| 662 | Decay term 'Decay*||Weights||^2' is added to error
|
---|
| 663 | function.
|
---|
| 664 | If you don't know what Decay to choose, use 0.001.
|
---|
| 665 | Restarts - number of restarts from random position, >0.
|
---|
| 666 | If you don't know what Restarts to choose, use 2.
|
---|
| 667 |
|
---|
| 668 | OUTPUT PARAMETERS:
|
---|
| 669 | Network - trained neural network.
|
---|
| 670 | Info - return code:
|
---|
| 671 | * -2, if there is a point with class number
|
---|
| 672 | outside of [0..NOut-1].
|
---|
| 673 | * -1, if wrong parameters specified
|
---|
| 674 | (NPoints<0, Restarts<1, ...).
|
---|
| 675 | * 2, task has been solved, stopping criterion met -
|
---|
| 676 | sufficiently small step size. Not expected (we
|
---|
| 677 | use EARLY stopping) but possible and not an
|
---|
| 678 | error.
|
---|
| 679 | * 6, task has been solved, stopping criterion met -
|
---|
| 680 | increasing of validation set error.
|
---|
| 681 | Rep - training report
|
---|
| 682 |
|
---|
| 683 | NOTE:
|
---|
| 684 |
|
---|
| 685 | Algorithm stops if validation set error increases for a long enough or
|
---|
| 686 | step size is small enought (there are task where validation set may
|
---|
| 687 | decrease for eternity). In any case solution returned corresponds to the
|
---|
| 688 | minimum of validation set error.
|
---|
| 689 |
|
---|
| 690 | -- ALGLIB --
|
---|
| 691 | Copyright 10.03.2009 by Bochkanov Sergey
|
---|
| 692 | *************************************************************************/
|
---|
| 693 | public static void mlptraines(ref mlpbase.multilayerperceptron network,
|
---|
| 694 | ref double[,] trnxy,
|
---|
| 695 | int trnsize,
|
---|
| 696 | ref double[,] valxy,
|
---|
| 697 | int valsize,
|
---|
| 698 | double decay,
|
---|
| 699 | int restarts,
|
---|
| 700 | ref int info,
|
---|
| 701 | ref mlpreport rep)
|
---|
| 702 | {
|
---|
| 703 | int i = 0;
|
---|
| 704 | int j = 0;
|
---|
| 705 | int pass = 0;
|
---|
| 706 | int nin = 0;
|
---|
| 707 | int nout = 0;
|
---|
| 708 | int wcount = 0;
|
---|
| 709 | double[] w = new double[0];
|
---|
| 710 | double[] wbest = new double[0];
|
---|
| 711 | double e = 0;
|
---|
| 712 | double v = 0;
|
---|
| 713 | double ebest = 0;
|
---|
| 714 | double[] wfinal = new double[0];
|
---|
| 715 | double efinal = 0;
|
---|
| 716 | int itbest = 0;
|
---|
| 717 | lbfgs.lbfgsreport internalrep = new lbfgs.lbfgsreport();
|
---|
| 718 | lbfgs.lbfgsstate state = new lbfgs.lbfgsstate();
|
---|
| 719 | double wstep = 0;
|
---|
| 720 | int i_ = 0;
|
---|
| 721 |
|
---|
| 722 | wstep = 0.001;
|
---|
| 723 |
|
---|
| 724 | //
|
---|
| 725 | // Test inputs, parse flags, read network geometry
|
---|
| 726 | //
|
---|
| 727 | if( trnsize<=0 | valsize<=0 | restarts<1 | (double)(decay)<(double)(0) )
|
---|
| 728 | {
|
---|
| 729 | info = -1;
|
---|
| 730 | return;
|
---|
| 731 | }
|
---|
| 732 | mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
|
---|
| 733 | if( mlpbase.mlpissoftmax(ref network) )
|
---|
| 734 | {
|
---|
| 735 | for(i=0; i<=trnsize-1; i++)
|
---|
| 736 | {
|
---|
| 737 | if( (int)Math.Round(trnxy[i,nin])<0 | (int)Math.Round(trnxy[i,nin])>=nout )
|
---|
| 738 | {
|
---|
| 739 | info = -2;
|
---|
| 740 | return;
|
---|
| 741 | }
|
---|
| 742 | }
|
---|
| 743 | for(i=0; i<=valsize-1; i++)
|
---|
| 744 | {
|
---|
| 745 | if( (int)Math.Round(valxy[i,nin])<0 | (int)Math.Round(valxy[i,nin])>=nout )
|
---|
| 746 | {
|
---|
| 747 | info = -2;
|
---|
| 748 | return;
|
---|
| 749 | }
|
---|
| 750 | }
|
---|
| 751 | }
|
---|
| 752 | info = 2;
|
---|
| 753 |
|
---|
| 754 | //
|
---|
| 755 | // Prepare
|
---|
| 756 | //
|
---|
| 757 | mlpbase.mlpinitpreprocessor(ref network, ref trnxy, trnsize);
|
---|
| 758 | w = new double[wcount-1+1];
|
---|
| 759 | wbest = new double[wcount-1+1];
|
---|
| 760 | wfinal = new double[wcount-1+1];
|
---|
| 761 | efinal = AP.Math.MaxRealNumber;
|
---|
| 762 | for(i=0; i<=wcount-1; i++)
|
---|
| 763 | {
|
---|
| 764 | wfinal[i] = 0;
|
---|
| 765 | }
|
---|
| 766 |
|
---|
| 767 | //
|
---|
| 768 | // Multiple starts
|
---|
| 769 | //
|
---|
| 770 | rep.ncholesky = 0;
|
---|
| 771 | rep.nhess = 0;
|
---|
| 772 | rep.ngrad = 0;
|
---|
| 773 | for(pass=1; pass<=restarts; pass++)
|
---|
| 774 | {
|
---|
| 775 |
|
---|
| 776 | //
|
---|
| 777 | // Process
|
---|
| 778 | //
|
---|
| 779 | mlpbase.mlprandomize(ref network);
|
---|
| 780 | ebest = mlpbase.mlperror(ref network, ref valxy, valsize);
|
---|
| 781 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 782 | {
|
---|
| 783 | wbest[i_] = network.weights[i_];
|
---|
| 784 | }
|
---|
| 785 | itbest = 0;
|
---|
| 786 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 787 | {
|
---|
| 788 | w[i_] = network.weights[i_];
|
---|
| 789 | }
|
---|
| 790 | lbfgs.minlbfgs(wcount, Math.Min(wcount, 50), ref w, 0.0, 0.0, wstep, 0, 0, ref state);
|
---|
| 791 | while( lbfgs.minlbfgsiteration(ref state) )
|
---|
| 792 | {
|
---|
| 793 |
|
---|
| 794 | //
|
---|
| 795 | // Calculate gradient
|
---|
| 796 | //
|
---|
| 797 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 798 | {
|
---|
| 799 | network.weights[i_] = state.x[i_];
|
---|
| 800 | }
|
---|
| 801 | mlpbase.mlpgradnbatch(ref network, ref trnxy, trnsize, ref state.f, ref state.g);
|
---|
| 802 | v = 0.0;
|
---|
| 803 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 804 | {
|
---|
| 805 | v += network.weights[i_]*network.weights[i_];
|
---|
| 806 | }
|
---|
| 807 | state.f = state.f+0.5*decay*v;
|
---|
| 808 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 809 | {
|
---|
| 810 | state.g[i_] = state.g[i_] + decay*network.weights[i_];
|
---|
| 811 | }
|
---|
| 812 | rep.ngrad = rep.ngrad+1;
|
---|
| 813 |
|
---|
| 814 | //
|
---|
| 815 | // Validation set
|
---|
| 816 | //
|
---|
| 817 | if( state.xupdated )
|
---|
| 818 | {
|
---|
| 819 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 820 | {
|
---|
| 821 | network.weights[i_] = w[i_];
|
---|
| 822 | }
|
---|
| 823 | e = mlpbase.mlperror(ref network, ref valxy, valsize);
|
---|
| 824 | if( (double)(e)<(double)(ebest) )
|
---|
| 825 | {
|
---|
| 826 | ebest = e;
|
---|
| 827 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 828 | {
|
---|
| 829 | wbest[i_] = network.weights[i_];
|
---|
| 830 | }
|
---|
| 831 | itbest = internalrep.iterationscount;
|
---|
| 832 | }
|
---|
| 833 | if( internalrep.iterationscount>30 & (double)(internalrep.iterationscount)>(double)(1.5*itbest) )
|
---|
| 834 | {
|
---|
| 835 | info = 6;
|
---|
| 836 | break;
|
---|
| 837 | }
|
---|
| 838 | }
|
---|
| 839 | }
|
---|
| 840 | lbfgs.minlbfgsresults(ref state, ref w, ref internalrep);
|
---|
| 841 |
|
---|
| 842 | //
|
---|
| 843 | // Compare with final answer
|
---|
| 844 | //
|
---|
| 845 | if( (double)(ebest)<(double)(efinal) )
|
---|
| 846 | {
|
---|
| 847 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 848 | {
|
---|
| 849 | wfinal[i_] = wbest[i_];
|
---|
| 850 | }
|
---|
| 851 | efinal = ebest;
|
---|
| 852 | }
|
---|
| 853 | }
|
---|
| 854 |
|
---|
| 855 | //
|
---|
| 856 | // The best network
|
---|
| 857 | //
|
---|
| 858 | for(i_=0; i_<=wcount-1;i_++)
|
---|
| 859 | {
|
---|
| 860 | network.weights[i_] = wfinal[i_];
|
---|
| 861 | }
|
---|
| 862 | }
|
---|
| 863 |
|
---|
| 864 |
|
---|
| 865 | /*************************************************************************
|
---|
| 866 | Cross-validation estimate of generalization error.
|
---|
| 867 |
|
---|
| 868 | Base algorithm - L-BFGS.
|
---|
| 869 |
|
---|
| 870 | INPUT PARAMETERS:
|
---|
| 871 | Network - neural network with initialized geometry. Network is
|
---|
| 872 | not changed during cross-validation - it is used only
|
---|
| 873 | as a representative of its architecture.
|
---|
| 874 | XY - training set.
|
---|
| 875 | SSize - training set size
|
---|
| 876 | Decay - weight decay, same as in MLPTrainLBFGS
|
---|
| 877 | Restarts - number of restarts, >0.
|
---|
| 878 | restarts are counted for each partition separately, so
|
---|
| 879 | total number of restarts will be Restarts*FoldsCount.
|
---|
| 880 | WStep - stopping criterion, same as in MLPTrainLBFGS
|
---|
| 881 | MaxIts - stopping criterion, same as in MLPTrainLBFGS
|
---|
| 882 | FoldsCount - number of folds in k-fold cross-validation,
|
---|
| 883 | 2<=FoldsCount<=SSize.
|
---|
| 884 | recommended value: 10.
|
---|
| 885 |
|
---|
| 886 | OUTPUT PARAMETERS:
|
---|
| 887 | Info - return code, same as in MLPTrainLBFGS
|
---|
| 888 | Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
|
---|
| 889 | CVRep - generalization error estimates
|
---|
| 890 |
|
---|
| 891 | -- ALGLIB --
|
---|
| 892 | Copyright 09.12.2007 by Bochkanov Sergey
|
---|
| 893 | *************************************************************************/
|
---|
| 894 | public static void mlpkfoldcvlbfgs(ref mlpbase.multilayerperceptron network,
|
---|
| 895 | ref double[,] xy,
|
---|
| 896 | int npoints,
|
---|
| 897 | double decay,
|
---|
| 898 | int restarts,
|
---|
| 899 | double wstep,
|
---|
| 900 | int maxits,
|
---|
| 901 | int foldscount,
|
---|
| 902 | ref int info,
|
---|
| 903 | ref mlpreport rep,
|
---|
| 904 | ref mlpcvreport cvrep)
|
---|
| 905 | {
|
---|
| 906 | mlpkfoldcvgeneral(ref network, ref xy, npoints, decay, restarts, foldscount, false, wstep, maxits, ref info, ref rep, ref cvrep);
|
---|
| 907 | }
|
---|
| 908 |
|
---|
| 909 |
|
---|
| 910 | /*************************************************************************
|
---|
| 911 | Cross-validation estimate of generalization error.
|
---|
| 912 |
|
---|
| 913 | Base algorithm - Levenberg-Marquardt.
|
---|
| 914 |
|
---|
| 915 | INPUT PARAMETERS:
|
---|
| 916 | Network - neural network with initialized geometry. Network is
|
---|
| 917 | not changed during cross-validation - it is used only
|
---|
| 918 | as a representative of its architecture.
|
---|
| 919 | XY - training set.
|
---|
| 920 | SSize - training set size
|
---|
| 921 | Decay - weight decay, same as in MLPTrainLBFGS
|
---|
| 922 | Restarts - number of restarts, >0.
|
---|
| 923 | restarts are counted for each partition separately, so
|
---|
| 924 | total number of restarts will be Restarts*FoldsCount.
|
---|
| 925 | FoldsCount - number of folds in k-fold cross-validation,
|
---|
| 926 | 2<=FoldsCount<=SSize.
|
---|
| 927 | recommended value: 10.
|
---|
| 928 |
|
---|
| 929 | OUTPUT PARAMETERS:
|
---|
| 930 | Info - return code, same as in MLPTrainLBFGS
|
---|
| 931 | Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
|
---|
| 932 | CVRep - generalization error estimates
|
---|
| 933 |
|
---|
| 934 | -- ALGLIB --
|
---|
| 935 | Copyright 09.12.2007 by Bochkanov Sergey
|
---|
| 936 | *************************************************************************/
|
---|
| 937 | public static void mlpkfoldcvlm(ref mlpbase.multilayerperceptron network,
|
---|
| 938 | ref double[,] xy,
|
---|
| 939 | int npoints,
|
---|
| 940 | double decay,
|
---|
| 941 | int restarts,
|
---|
| 942 | int foldscount,
|
---|
| 943 | ref int info,
|
---|
| 944 | ref mlpreport rep,
|
---|
| 945 | ref mlpcvreport cvrep)
|
---|
| 946 | {
|
---|
| 947 | mlpkfoldcvgeneral(ref network, ref xy, npoints, decay, restarts, foldscount, true, 0.0, 0, ref info, ref rep, ref cvrep);
|
---|
| 948 | }
|
---|
| 949 |
|
---|
| 950 |
|
---|
| 951 | /*************************************************************************
|
---|
| 952 | Internal cross-validation subroutine
|
---|
| 953 | *************************************************************************/
|
---|
| 954 | private static void mlpkfoldcvgeneral(ref mlpbase.multilayerperceptron n,
|
---|
| 955 | ref double[,] xy,
|
---|
| 956 | int npoints,
|
---|
| 957 | double decay,
|
---|
| 958 | int restarts,
|
---|
| 959 | int foldscount,
|
---|
| 960 | bool lmalgorithm,
|
---|
| 961 | double wstep,
|
---|
| 962 | int maxits,
|
---|
| 963 | ref int info,
|
---|
| 964 | ref mlpreport rep,
|
---|
| 965 | ref mlpcvreport cvrep)
|
---|
| 966 | {
|
---|
| 967 | int i = 0;
|
---|
| 968 | int fold = 0;
|
---|
| 969 | int j = 0;
|
---|
| 970 | int k = 0;
|
---|
| 971 | mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron();
|
---|
| 972 | int nin = 0;
|
---|
| 973 | int nout = 0;
|
---|
| 974 | int rowlen = 0;
|
---|
| 975 | int wcount = 0;
|
---|
| 976 | int nclasses = 0;
|
---|
| 977 | int tssize = 0;
|
---|
| 978 | int cvssize = 0;
|
---|
| 979 | double[,] cvset = new double[0,0];
|
---|
| 980 | double[,] testset = new double[0,0];
|
---|
| 981 | int[] folds = new int[0];
|
---|
| 982 | int relcnt = 0;
|
---|
| 983 | mlpreport internalrep = new mlpreport();
|
---|
| 984 | double[] x = new double[0];
|
---|
| 985 | double[] y = new double[0];
|
---|
| 986 | int i_ = 0;
|
---|
| 987 |
|
---|
| 988 |
|
---|
| 989 | //
|
---|
| 990 | // Read network geometry, test parameters
|
---|
| 991 | //
|
---|
| 992 | mlpbase.mlpproperties(ref n, ref nin, ref nout, ref wcount);
|
---|
| 993 | if( mlpbase.mlpissoftmax(ref n) )
|
---|
| 994 | {
|
---|
| 995 | nclasses = nout;
|
---|
| 996 | rowlen = nin+1;
|
---|
| 997 | }
|
---|
| 998 | else
|
---|
| 999 | {
|
---|
| 1000 | nclasses = -nout;
|
---|
| 1001 | rowlen = nin+nout;
|
---|
| 1002 | }
|
---|
| 1003 | if( npoints<=0 | foldscount<2 | foldscount>npoints )
|
---|
| 1004 | {
|
---|
| 1005 | info = -1;
|
---|
| 1006 | return;
|
---|
| 1007 | }
|
---|
| 1008 | mlpbase.mlpcopy(ref n, ref network);
|
---|
| 1009 |
|
---|
| 1010 | //
|
---|
| 1011 | // K-fold out cross-validation.
|
---|
| 1012 | // First, estimate generalization error
|
---|
| 1013 | //
|
---|
| 1014 | testset = new double[npoints-1+1, rowlen-1+1];
|
---|
| 1015 | cvset = new double[npoints-1+1, rowlen-1+1];
|
---|
| 1016 | x = new double[nin-1+1];
|
---|
| 1017 | y = new double[nout-1+1];
|
---|
| 1018 | mlpkfoldsplit(ref xy, npoints, nclasses, foldscount, false, ref folds);
|
---|
| 1019 | cvrep.relclserror = 0;
|
---|
| 1020 | cvrep.avgce = 0;
|
---|
| 1021 | cvrep.rmserror = 0;
|
---|
| 1022 | cvrep.avgerror = 0;
|
---|
| 1023 | cvrep.avgrelerror = 0;
|
---|
| 1024 | rep.ngrad = 0;
|
---|
| 1025 | rep.nhess = 0;
|
---|
| 1026 | rep.ncholesky = 0;
|
---|
| 1027 | relcnt = 0;
|
---|
| 1028 | for(fold=0; fold<=foldscount-1; fold++)
|
---|
| 1029 | {
|
---|
| 1030 |
|
---|
| 1031 | //
|
---|
| 1032 | // Separate set
|
---|
| 1033 | //
|
---|
| 1034 | tssize = 0;
|
---|
| 1035 | cvssize = 0;
|
---|
| 1036 | for(i=0; i<=npoints-1; i++)
|
---|
| 1037 | {
|
---|
| 1038 | if( folds[i]==fold )
|
---|
| 1039 | {
|
---|
| 1040 | for(i_=0; i_<=rowlen-1;i_++)
|
---|
| 1041 | {
|
---|
| 1042 | testset[tssize,i_] = xy[i,i_];
|
---|
| 1043 | }
|
---|
| 1044 | tssize = tssize+1;
|
---|
| 1045 | }
|
---|
| 1046 | else
|
---|
| 1047 | {
|
---|
| 1048 | for(i_=0; i_<=rowlen-1;i_++)
|
---|
| 1049 | {
|
---|
| 1050 | cvset[cvssize,i_] = xy[i,i_];
|
---|
| 1051 | }
|
---|
| 1052 | cvssize = cvssize+1;
|
---|
| 1053 | }
|
---|
| 1054 | }
|
---|
| 1055 |
|
---|
| 1056 | //
|
---|
| 1057 | // Train on CV training set
|
---|
| 1058 | //
|
---|
| 1059 | if( lmalgorithm )
|
---|
| 1060 | {
|
---|
| 1061 | mlptrainlm(ref network, ref cvset, cvssize, decay, restarts, ref info, ref internalrep);
|
---|
| 1062 | }
|
---|
| 1063 | else
|
---|
| 1064 | {
|
---|
| 1065 | mlptrainlbfgs(ref network, ref cvset, cvssize, decay, restarts, wstep, maxits, ref info, ref internalrep);
|
---|
| 1066 | }
|
---|
| 1067 | if( info<0 )
|
---|
| 1068 | {
|
---|
| 1069 | cvrep.relclserror = 0;
|
---|
| 1070 | cvrep.avgce = 0;
|
---|
| 1071 | cvrep.rmserror = 0;
|
---|
| 1072 | cvrep.avgerror = 0;
|
---|
| 1073 | cvrep.avgrelerror = 0;
|
---|
| 1074 | return;
|
---|
| 1075 | }
|
---|
| 1076 | rep.ngrad = rep.ngrad+internalrep.ngrad;
|
---|
| 1077 | rep.nhess = rep.nhess+internalrep.nhess;
|
---|
| 1078 | rep.ncholesky = rep.ncholesky+internalrep.ncholesky;
|
---|
| 1079 |
|
---|
| 1080 | //
|
---|
| 1081 | // Estimate error using CV test set
|
---|
| 1082 | //
|
---|
| 1083 | if( mlpbase.mlpissoftmax(ref network) )
|
---|
| 1084 | {
|
---|
| 1085 |
|
---|
| 1086 | //
|
---|
| 1087 | // classification-only code
|
---|
| 1088 | //
|
---|
| 1089 | cvrep.relclserror = cvrep.relclserror+mlpbase.mlpclserror(ref network, ref testset, tssize);
|
---|
| 1090 | cvrep.avgce = cvrep.avgce+mlpbase.mlperrorn(ref network, ref testset, tssize);
|
---|
| 1091 | }
|
---|
| 1092 | for(i=0; i<=tssize-1; i++)
|
---|
| 1093 | {
|
---|
| 1094 | for(i_=0; i_<=nin-1;i_++)
|
---|
| 1095 | {
|
---|
| 1096 | x[i_] = testset[i,i_];
|
---|
| 1097 | }
|
---|
| 1098 | mlpbase.mlpprocess(ref network, ref x, ref y);
|
---|
| 1099 | if( mlpbase.mlpissoftmax(ref network) )
|
---|
| 1100 | {
|
---|
| 1101 |
|
---|
| 1102 | //
|
---|
| 1103 | // Classification-specific code
|
---|
| 1104 | //
|
---|
| 1105 | k = (int)Math.Round(testset[i,nin]);
|
---|
| 1106 | for(j=0; j<=nout-1; j++)
|
---|
| 1107 | {
|
---|
| 1108 | if( j==k )
|
---|
| 1109 | {
|
---|
| 1110 | cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]-1);
|
---|
| 1111 | cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-1);
|
---|
| 1112 | cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs(y[j]-1);
|
---|
| 1113 | relcnt = relcnt+1;
|
---|
| 1114 | }
|
---|
| 1115 | else
|
---|
| 1116 | {
|
---|
| 1117 | cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]);
|
---|
| 1118 | cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]);
|
---|
| 1119 | }
|
---|
| 1120 | }
|
---|
| 1121 | }
|
---|
| 1122 | else
|
---|
| 1123 | {
|
---|
| 1124 |
|
---|
| 1125 | //
|
---|
| 1126 | // Regression-specific code
|
---|
| 1127 | //
|
---|
| 1128 | for(j=0; j<=nout-1; j++)
|
---|
| 1129 | {
|
---|
| 1130 | cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]-testset[i,nin+j]);
|
---|
| 1131 | cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-testset[i,nin+j]);
|
---|
| 1132 | if( (double)(testset[i,nin+j])!=(double)(0) )
|
---|
| 1133 | {
|
---|
| 1134 | cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs((y[j]-testset[i,nin+j])/testset[i,nin+j]);
|
---|
| 1135 | relcnt = relcnt+1;
|
---|
| 1136 | }
|
---|
| 1137 | }
|
---|
| 1138 | }
|
---|
| 1139 | }
|
---|
| 1140 | }
|
---|
| 1141 | if( mlpbase.mlpissoftmax(ref network) )
|
---|
| 1142 | {
|
---|
| 1143 | cvrep.relclserror = cvrep.relclserror/npoints;
|
---|
| 1144 | cvrep.avgce = cvrep.avgce/(Math.Log(2)*npoints);
|
---|
| 1145 | }
|
---|
| 1146 | cvrep.rmserror = Math.Sqrt(cvrep.rmserror/(npoints*nout));
|
---|
| 1147 | cvrep.avgerror = cvrep.avgerror/(npoints*nout);
|
---|
| 1148 | cvrep.avgrelerror = cvrep.avgrelerror/relcnt;
|
---|
| 1149 | info = 1;
|
---|
| 1150 | }
|
---|
| 1151 |
|
---|
| 1152 |
|
---|
| 1153 | /*************************************************************************
|
---|
| 1154 | Subroutine prepares K-fold split of the training set.
|
---|
| 1155 |
|
---|
| 1156 | NOTES:
|
---|
| 1157 | "NClasses>0" means that we have classification task.
|
---|
| 1158 | "NClasses<0" means regression task with -NClasses real outputs.
|
---|
| 1159 | *************************************************************************/
|
---|
| 1160 | private static void mlpkfoldsplit(ref double[,] xy,
|
---|
| 1161 | int npoints,
|
---|
| 1162 | int nclasses,
|
---|
| 1163 | int foldscount,
|
---|
| 1164 | bool stratifiedsplits,
|
---|
| 1165 | ref int[] folds)
|
---|
| 1166 | {
|
---|
| 1167 | int i = 0;
|
---|
| 1168 | int j = 0;
|
---|
| 1169 | int k = 0;
|
---|
| 1170 |
|
---|
| 1171 |
|
---|
| 1172 | //
|
---|
| 1173 | // test parameters
|
---|
| 1174 | //
|
---|
| 1175 | System.Diagnostics.Debug.Assert(npoints>0, "MLPKFoldSplit: wrong NPoints!");
|
---|
| 1176 | System.Diagnostics.Debug.Assert(nclasses>1 | nclasses<0, "MLPKFoldSplit: wrong NClasses!");
|
---|
| 1177 | System.Diagnostics.Debug.Assert(foldscount>=2 & foldscount<=npoints, "MLPKFoldSplit: wrong FoldsCount!");
|
---|
| 1178 | System.Diagnostics.Debug.Assert(!stratifiedsplits, "MLPKFoldSplit: stratified splits are not supported!");
|
---|
| 1179 |
|
---|
| 1180 | //
|
---|
| 1181 | // Folds
|
---|
| 1182 | //
|
---|
| 1183 | folds = new int[npoints-1+1];
|
---|
| 1184 | for(i=0; i<=npoints-1; i++)
|
---|
| 1185 | {
|
---|
| 1186 | folds[i] = i*foldscount/npoints;
|
---|
| 1187 | }
|
---|
| 1188 | for(i=0; i<=npoints-2; i++)
|
---|
| 1189 | {
|
---|
| 1190 | j = i+AP.Math.RandomInteger(npoints-i);
|
---|
| 1191 | if( j!=i )
|
---|
| 1192 | {
|
---|
| 1193 | k = folds[i];
|
---|
| 1194 | folds[i] = folds[j];
|
---|
| 1195 | folds[j] = k;
|
---|
| 1196 | }
|
---|
| 1197 | }
|
---|
| 1198 | }
|
---|
| 1199 | }
|
---|
| 1200 | }
|
---|