/************************************************************************* Copyright (c) Sergey Bochkanov (ALGLIB project). >>> SOURCE LICENSE >>> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation (www.fsf.org); either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. A copy of the GNU General Public License is available at http://www.fsf.org/licensing/licenses >>> END OF LICENSE >>> *************************************************************************/ #pragma warning disable 162 #pragma warning disable 219 using System; public partial class alglib { /************************************************************************* Optimal binary classification Algorithms finds optimal (=with minimal cross-entropy) binary partition. Internal subroutine. INPUT PARAMETERS: A - array[0..N-1], variable C - array[0..N-1], class numbers (0 or 1). N - array size OUTPUT PARAMETERS: Info - completetion code: * -3, all values of A[] are same (partition is impossible) * -2, one of C[] is incorrect (<0, >1) * -1, incorrect pararemets were passed (N<=0). * 1, OK Threshold- partiton boundary. Left part contains values which are strictly less than Threshold. Right part contains values which are greater than or equal to Threshold. PAL, PBL- probabilities P(0|v=Threshold) and P(1|v>=Threshold) CVE - cross-validation estimate of cross-entropy -- ALGLIB -- Copyright 22.05.2008 by Bochkanov Sergey *************************************************************************/ public static void dsoptimalsplit2(double[] a, int[] c, int n, out int info, out double threshold, out double pal, out double pbl, out double par, out double pbr, out double cve) { info = 0; threshold = 0; pal = 0; pbl = 0; par = 0; pbr = 0; cve = 0; bdss.dsoptimalsplit2(a, c, n, ref info, ref threshold, ref pal, ref pbl, ref par, ref pbr, ref cve); return; } /************************************************************************* Optimal partition, internal subroutine. Fast version. Accepts: A array[0..N-1] array of attributes array[0..N-1] C array[0..N-1] array of class labels TiesBuf array[0..N] temporaries (ties) CntBuf array[0..2*NC-1] temporaries (counts) Alpha centering factor (0<=alpha<=1, recommended value - 0.05) BufR array[0..N-1] temporaries BufI array[0..N-1] temporaries Output: Info error code (">0"=OK, "<0"=bad) RMS training set RMS error CVRMS leave-one-out RMS error Note: content of all arrays is changed by subroutine; it doesn't allocate temporaries. -- ALGLIB -- Copyright 11.12.2008 by Bochkanov Sergey *************************************************************************/ public static void dsoptimalsplit2fast(ref double[] a, ref int[] c, ref int[] tiesbuf, ref int[] cntbuf, ref double[] bufr, ref int[] bufi, int n, int nc, double alpha, out int info, out double threshold, out double rms, out double cvrms) { info = 0; threshold = 0; rms = 0; cvrms = 0; bdss.dsoptimalsplit2fast(ref a, ref c, ref tiesbuf, ref cntbuf, ref bufr, ref bufi, n, nc, alpha, ref info, ref threshold, ref rms, ref cvrms); return; } } public partial class alglib { /************************************************************************* This structure is a clusterization engine. You should not try to access its fields directly. Use ALGLIB functions in order to work with this object. -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public class clusterizerstate { // // Public declarations // public clusterizerstate() { _innerobj = new clustering.clusterizerstate(); } // // Although some of declarations below are public, you should not use them // They are intended for internal use only // private clustering.clusterizerstate _innerobj; public clustering.clusterizerstate innerobj { get { return _innerobj; } } public clusterizerstate(clustering.clusterizerstate obj) { _innerobj = obj; } } /************************************************************************* This structure is used to store results of the agglomerative hierarchical clustering (AHC). Following information is returned: * NPoints contains number of points in the original dataset * Z contains information about merges performed (see below). Z contains indexes from the original (unsorted) dataset and it can be used when you need to know what points were merged. However, it is not convenient when you want to build a dendrograd (see below). * if you want to build dendrogram, you can use Z, but it is not good option, because Z contains indexes from unsorted dataset. Dendrogram built from such dataset is likely to have intersections. So, you have to reorder you points before building dendrogram. Permutation which reorders point is returned in P. Another representation of merges, which is more convenient for dendorgram construction, is returned in PM. * more information on format of Z, P and PM can be found below and in the examples from ALGLIB Reference Manual. FORMAL DESCRIPTION OF FIELDS: NPoints number of points Z array[NPoints-1,2], contains indexes of clusters linked in pairs to form clustering tree. I-th row corresponds to I-th merge: * Z[I,0] - index of the first cluster to merge * Z[I,1] - index of the second cluster to merge * Z[I,0]=0 NFeatures number of variables, >=1 TerminationType completion code: * -5 if distance type is anything different from Euclidean metric * -3 for degenerate dataset: a) less than K distinct points, b) K=0 for non-empty dataset. * +1 for successful completion K number of clusters C array[K,NFeatures], rows of the array store centers CIdx array[NPoints], which contains cluster indexes -- ALGLIB -- Copyright 27.11.2012 by Bochkanov Sergey *************************************************************************/ public class kmeansreport { // // Public declarations // public int npoints { get { return _innerobj.npoints; } set { _innerobj.npoints = value; } } public int nfeatures { get { return _innerobj.nfeatures; } set { _innerobj.nfeatures = value; } } public int terminationtype { get { return _innerobj.terminationtype; } set { _innerobj.terminationtype = value; } } public int k { get { return _innerobj.k; } set { _innerobj.k = value; } } public double[,] c { get { return _innerobj.c; } set { _innerobj.c = value; } } public int[] cidx { get { return _innerobj.cidx; } set { _innerobj.cidx = value; } } public kmeansreport() { _innerobj = new clustering.kmeansreport(); } // // Although some of declarations below are public, you should not use them // They are intended for internal use only // private clustering.kmeansreport _innerobj; public clustering.kmeansreport innerobj { get { return _innerobj; } } public kmeansreport(clustering.kmeansreport obj) { _innerobj = obj; } } /************************************************************************* This function initializes clusterizer object. Newly initialized object is empty, i.e. it does not contain dataset. You should use it as follows: 1. creation 2. dataset is added with ClusterizerSetPoints() 3. additional parameters are set 3. clusterization is performed with one of the clustering functions -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public static void clusterizercreate(out clusterizerstate s) { s = new clusterizerstate(); clustering.clusterizercreate(s.innerobj); return; } /************************************************************************* This function adds dataset to the clusterizer structure. This function overrides all previous calls of ClusterizerSetPoints() or ClusterizerSetDistances(). INPUT PARAMETERS: S - clusterizer state, initialized by ClusterizerCreate() XY - array[NPoints,NFeatures], dataset NPoints - number of points, >=0 NFeatures- number of features, >=1 DistType- distance function: * 0 Chebyshev distance (L-inf norm) * 1 city block distance (L1 norm) * 2 Euclidean distance (L2 norm) * 10 Pearson correlation: dist(a,b) = 1-corr(a,b) * 11 Absolute Pearson correlation: dist(a,b) = 1-|corr(a,b)| * 12 Uncentered Pearson correlation (cosine of the angle): dist(a,b) = a'*b/(|a|*|b|) * 13 Absolute uncentered Pearson correlation dist(a,b) = |a'*b|/(|a|*|b|) * 20 Spearman rank correlation: dist(a,b) = 1-rankcorr(a,b) * 21 Absolute Spearman rank correlation dist(a,b) = 1-|rankcorr(a,b)| NOTE 1: different distance functions have different performance penalty: * Euclidean or Pearson correlation distances are the fastest ones * Spearman correlation distance function is a bit slower * city block and Chebyshev distances are order of magnitude slower The reason behing difference in performance is that correlation-based distance functions are computed using optimized linear algebra kernels, while Chebyshev and city block distance functions are computed using simple nested loops with two branches at each iteration. NOTE 2: different clustering algorithms have different limitations: * agglomerative hierarchical clustering algorithms may be used with any kind of distance metric * k-means++ clustering algorithm may be used only with Euclidean distance function Thus, list of specific clustering algorithms you may use depends on distance function you specify when you set your dataset. -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public static void clusterizersetpoints(clusterizerstate s, double[,] xy, int npoints, int nfeatures, int disttype) { clustering.clusterizersetpoints(s.innerobj, xy, npoints, nfeatures, disttype); return; } public static void clusterizersetpoints(clusterizerstate s, double[,] xy, int disttype) { int npoints; int nfeatures; npoints = ap.rows(xy); nfeatures = ap.cols(xy); clustering.clusterizersetpoints(s.innerobj, xy, npoints, nfeatures, disttype); return; } /************************************************************************* This function adds dataset given by distance matrix to the clusterizer structure. It is important that dataset is not given explicitly - only distance matrix is given. This function overrides all previous calls of ClusterizerSetPoints() or ClusterizerSetDistances(). INPUT PARAMETERS: S - clusterizer state, initialized by ClusterizerCreate() D - array[NPoints,NPoints], distance matrix given by its upper or lower triangle (main diagonal is ignored because its entries are expected to be zero). NPoints - number of points IsUpper - whether upper or lower triangle of D is given. NOTE 1: different clustering algorithms have different limitations: * agglomerative hierarchical clustering algorithms may be used with any kind of distance metric, including one which is given by distance matrix * k-means++ clustering algorithm may be used only with Euclidean distance function and explicitly given points - it can not be used with dataset given by distance matrix Thus, if you call this function, you will be unable to use k-means clustering algorithm to process your problem. -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public static void clusterizersetdistances(clusterizerstate s, double[,] d, int npoints, bool isupper) { clustering.clusterizersetdistances(s.innerobj, d, npoints, isupper); return; } public static void clusterizersetdistances(clusterizerstate s, double[,] d, bool isupper) { int npoints; if( (ap.rows(d)!=ap.cols(d))) throw new alglibexception("Error while calling 'clusterizersetdistances': looks like one of arguments has wrong size"); npoints = ap.rows(d); clustering.clusterizersetdistances(s.innerobj, d, npoints, isupper); return; } /************************************************************************* This function sets agglomerative hierarchical clustering algorithm INPUT PARAMETERS: S - clusterizer state, initialized by ClusterizerCreate() Algo - algorithm type: * 0 complete linkage (default algorithm) * 1 single linkage * 2 unweighted average linkage * 3 weighted average linkage -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public static void clusterizersetahcalgo(clusterizerstate s, int algo) { clustering.clusterizersetahcalgo(s.innerobj, algo); return; } /************************************************************************* This function sets k-means++ properties : number of restarts and maximum number of iterations per one run. INPUT PARAMETERS: S - clusterizer state, initialized by ClusterizerCreate() Restarts- restarts count, >=1. k-means++ algorithm performs several restarts and chooses best set of centers (one with minimum squared distance). MaxIts - maximum number of k-means iterations performed during one run. >=0, zero value means that algorithm performs unlimited number of iterations. -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public static void clusterizersetkmeanslimits(clusterizerstate s, int restarts, int maxits) { clustering.clusterizersetkmeanslimits(s.innerobj, restarts, maxits); return; } /************************************************************************* This function performs agglomerative hierarchical clustering INPUT PARAMETERS: S - clusterizer state, initialized by ClusterizerCreate() OUTPUT PARAMETERS: Rep - clustering results; see description of AHCReport structure for more information. NOTE 1: hierarchical clustering algorithms require large amounts of memory. In particular, this implementation needs sizeof(double)*NPoints^2 bytes, which are used to store distance matrix. In case we work with user-supplied matrix, this amount is multiplied by 2 (we have to store original matrix and to work with its copy). For example, problem with 10000 points would require 800M of RAM, even when working in a 1-dimensional space. -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public static void clusterizerrunahc(clusterizerstate s, out ahcreport rep) { rep = new ahcreport(); clustering.clusterizerrunahc(s.innerobj, rep.innerobj); return; } /************************************************************************* This function performs clustering by k-means++ algorithm. You may change algorithm properties like number of restarts or iterations limit by calling ClusterizerSetKMeansLimits() functions. INPUT PARAMETERS: S - clusterizer state, initialized by ClusterizerCreate() K - number of clusters, K>=0. K can be zero only when algorithm is called for empty dataset, in this case completion code is set to success (+1). If K=0 and dataset size is non-zero, we can not meaningfully assign points to some center (there are no centers because K=0) and return -3 as completion code (failure). OUTPUT PARAMETERS: Rep - clustering results; see description of KMeansReport structure for more information. NOTE 1: k-means clustering can be performed only for datasets with Euclidean distance function. Algorithm will return negative completion code in Rep.TerminationType in case dataset was added to clusterizer with DistType other than Euclidean (or dataset was specified by distance matrix instead of explicitly given points). -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public static void clusterizerrunkmeans(clusterizerstate s, int k, out kmeansreport rep) { rep = new kmeansreport(); clustering.clusterizerrunkmeans(s.innerobj, k, rep.innerobj); return; } /************************************************************************* This function returns distance matrix for dataset INPUT PARAMETERS: XY - array[NPoints,NFeatures], dataset NPoints - number of points, >=0 NFeatures- number of features, >=1 DistType- distance function: * 0 Chebyshev distance (L-inf norm) * 1 city block distance (L1 norm) * 2 Euclidean distance (L2 norm) * 10 Pearson correlation: dist(a,b) = 1-corr(a,b) * 11 Absolute Pearson correlation: dist(a,b) = 1-|corr(a,b)| * 12 Uncentered Pearson correlation (cosine of the angle): dist(a,b) = a'*b/(|a|*|b|) * 13 Absolute uncentered Pearson correlation dist(a,b) = |a'*b|/(|a|*|b|) * 20 Spearman rank correlation: dist(a,b) = 1-rankcorr(a,b) * 21 Absolute Spearman rank correlation dist(a,b) = 1-|rankcorr(a,b)| OUTPUT PARAMETERS: D - array[NPoints,NPoints], distance matrix (full matrix is returned, with lower and upper triangles) NOTES: different distance functions have different performance penalty: * Euclidean or Pearson correlation distances are the fastest ones * Spearman correlation distance function is a bit slower * city block and Chebyshev distances are order of magnitude slower The reason behing difference in performance is that correlation-based distance functions are computed using optimized linear algebra kernels, while Chebyshev and city block distance functions are computed using simple nested loops with two branches at each iteration. -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public static void clusterizergetdistances(double[,] xy, int npoints, int nfeatures, int disttype, out double[,] d) { d = new double[0,0]; clustering.clusterizergetdistances(xy, npoints, nfeatures, disttype, ref d); return; } /************************************************************************* This function takes as input clusterization report Rep, desired clusters count K, and builds top K clusters from hierarchical clusterization tree. It returns assignment of points to clusters (array of cluster indexes). INPUT PARAMETERS: Rep - report from ClusterizerRunAHC() performed on XY K - desired number of clusters, 1<=K<=NPoints. K can be zero only when NPoints=0. OUTPUT PARAMETERS: CIdx - array[NPoints], I-th element contains cluster index (from 0 to K-1) for I-th point of the dataset. CZ - array[K]. This array allows to convert cluster indexes returned by this function to indexes used by Rep.Z. J-th cluster returned by this function corresponds to CZ[J]-th cluster stored in Rep.Z/PZ/PM. It is guaranteed that CZ[I]=0 OUTPUT PARAMETERS: K - number of clusters, 1<=K<=NPoints CIdx - array[NPoints], I-th element contains cluster index (from 0 to K-1) for I-th point of the dataset. CZ - array[K]. This array allows to convert cluster indexes returned by this function to indexes used by Rep.Z. J-th cluster returned by this function corresponds to CZ[J]-th cluster stored in Rep.Z/PZ/PM. It is guaranteed that CZ[I]=1 NVars - number of independent variables, NVars>=1 NClasses - task type: * NClasses=1 - regression task with one dependent variable * NClasses>1 - classification task with NClasses classes. NTrees - number of trees in a forest, NTrees>=1. recommended values: 50-100. R - percent of a training set used to build individual trees. 01). * 1, if task has been solved DF - model built Rep - training report, contains error on a training set and out-of-bag estimates of generalization error. -- ALGLIB -- Copyright 19.02.2009 by Bochkanov Sergey *************************************************************************/ public static void dfbuildrandomdecisionforest(double[,] xy, int npoints, int nvars, int nclasses, int ntrees, double r, out int info, out decisionforest df, out dfreport rep) { info = 0; df = new decisionforest(); rep = new dfreport(); dforest.dfbuildrandomdecisionforest(xy, npoints, nvars, nclasses, ntrees, r, ref info, df.innerobj, rep.innerobj); return; } /************************************************************************* This subroutine builds random decision forest. This function gives ability to tune number of variables used when choosing best split. INPUT PARAMETERS: XY - training set NPoints - training set size, NPoints>=1 NVars - number of independent variables, NVars>=1 NClasses - task type: * NClasses=1 - regression task with one dependent variable * NClasses>1 - classification task with NClasses classes. NTrees - number of trees in a forest, NTrees>=1. recommended values: 50-100. NRndVars - number of variables used when choosing best split R - percent of a training set used to build individual trees. 01). * 1, if task has been solved DF - model built Rep - training report, contains error on a training set and out-of-bag estimates of generalization error. -- ALGLIB -- Copyright 19.02.2009 by Bochkanov Sergey *************************************************************************/ public static void dfbuildrandomdecisionforestx1(double[,] xy, int npoints, int nvars, int nclasses, int ntrees, int nrndvars, double r, out int info, out decisionforest df, out dfreport rep) { info = 0; df = new decisionforest(); rep = new dfreport(); dforest.dfbuildrandomdecisionforestx1(xy, npoints, nvars, nclasses, ntrees, nrndvars, r, ref info, df.innerobj, rep.innerobj); return; } /************************************************************************* Procesing INPUT PARAMETERS: DF - decision forest model X - input vector, array[0..NVars-1]. OUTPUT PARAMETERS: Y - result. Regression estimate when solving regression task, vector of posterior probabilities for classification task. See also DFProcessI. -- ALGLIB -- Copyright 16.02.2009 by Bochkanov Sergey *************************************************************************/ public static void dfprocess(decisionforest df, double[] x, ref double[] y) { dforest.dfprocess(df.innerobj, x, ref y); return; } /************************************************************************* 'interactive' variant of DFProcess for languages like Python which support constructs like "Y = DFProcessI(DF,X)" and interactive mode of interpreter This function allocates new array on each call, so it is significantly slower than its 'non-interactive' counterpart, but it is more convenient when you call it from command line. -- ALGLIB -- Copyright 28.02.2010 by Bochkanov Sergey *************************************************************************/ public static void dfprocessi(decisionforest df, double[] x, out double[] y) { y = new double[0]; dforest.dfprocessi(df.innerobj, x, ref y); return; } /************************************************************************* Relative classification error on the test set INPUT PARAMETERS: DF - decision forest model XY - test set NPoints - test set size RESULT: percent of incorrectly classified cases. Zero if model solves regression task. -- ALGLIB -- Copyright 16.02.2009 by Bochkanov Sergey *************************************************************************/ public static double dfrelclserror(decisionforest df, double[,] xy, int npoints) { double result = dforest.dfrelclserror(df.innerobj, xy, npoints); return result; } /************************************************************************* Average cross-entropy (in bits per element) on the test set INPUT PARAMETERS: DF - decision forest model XY - test set NPoints - test set size RESULT: CrossEntropy/(NPoints*LN(2)). Zero if model solves regression task. -- ALGLIB -- Copyright 16.02.2009 by Bochkanov Sergey *************************************************************************/ public static double dfavgce(decisionforest df, double[,] xy, int npoints) { double result = dforest.dfavgce(df.innerobj, xy, npoints); return result; } /************************************************************************* RMS error on the test set INPUT PARAMETERS: DF - decision forest model XY - test set NPoints - test set size RESULT: root mean square error. Its meaning for regression task is obvious. As for classification task, RMS error means error when estimating posterior probabilities. -- ALGLIB -- Copyright 16.02.2009 by Bochkanov Sergey *************************************************************************/ public static double dfrmserror(decisionforest df, double[,] xy, int npoints) { double result = dforest.dfrmserror(df.innerobj, xy, npoints); return result; } /************************************************************************* Average error on the test set INPUT PARAMETERS: DF - decision forest model XY - test set NPoints - test set size RESULT: Its meaning for regression task is obvious. As for classification task, it means average error when estimating posterior probabilities. -- ALGLIB -- Copyright 16.02.2009 by Bochkanov Sergey *************************************************************************/ public static double dfavgerror(decisionforest df, double[,] xy, int npoints) { double result = dforest.dfavgerror(df.innerobj, xy, npoints); return result; } /************************************************************************* Average relative error on the test set INPUT PARAMETERS: DF - decision forest model XY - test set NPoints - test set size RESULT: Its meaning for regression task is obvious. As for classification task, it means average relative error when estimating posterior probability of belonging to the correct class. -- ALGLIB -- Copyright 16.02.2009 by Bochkanov Sergey *************************************************************************/ public static double dfavgrelerror(decisionforest df, double[,] xy, int npoints) { double result = dforest.dfavgrelerror(df.innerobj, xy, npoints); return result; } } public partial class alglib { /************************************************************************* *************************************************************************/ public class linearmodel { // // Public declarations // public linearmodel() { _innerobj = new linreg.linearmodel(); } // // Although some of declarations below are public, you should not use them // They are intended for internal use only // private linreg.linearmodel _innerobj; public linreg.linearmodel innerobj { get { return _innerobj; } } public linearmodel(linreg.linearmodel obj) { _innerobj = obj; } } /************************************************************************* LRReport structure contains additional information about linear model: * C - covariation matrix, array[0..NVars,0..NVars]. C[i,j] = Cov(A[i],A[j]) * RMSError - root mean square error on a training set * AvgError - average error on a training set * AvgRelError - average relative error on a training set (excluding observations with zero function value). * CVRMSError - leave-one-out cross-validation estimate of generalization error. Calculated using fast algorithm with O(NVars*NPoints) complexity. * CVAvgError - cross-validation estimate of average error * CVAvgRelError - cross-validation estimate of average relative error All other fields of the structure are intended for internal use and should not be used outside ALGLIB. *************************************************************************/ public class lrreport { // // Public declarations // public double[,] c { get { return _innerobj.c; } set { _innerobj.c = value; } } public double rmserror { get { return _innerobj.rmserror; } set { _innerobj.rmserror = value; } } public double avgerror { get { return _innerobj.avgerror; } set { _innerobj.avgerror = value; } } public double avgrelerror { get { return _innerobj.avgrelerror; } set { _innerobj.avgrelerror = value; } } public double cvrmserror { get { return _innerobj.cvrmserror; } set { _innerobj.cvrmserror = value; } } public double cvavgerror { get { return _innerobj.cvavgerror; } set { _innerobj.cvavgerror = value; } } public double cvavgrelerror { get { return _innerobj.cvavgrelerror; } set { _innerobj.cvavgrelerror = value; } } public int ncvdefects { get { return _innerobj.ncvdefects; } set { _innerobj.ncvdefects = value; } } public int[] cvdefects { get { return _innerobj.cvdefects; } set { _innerobj.cvdefects = value; } } public lrreport() { _innerobj = new linreg.lrreport(); } // // Although some of declarations below are public, you should not use them // They are intended for internal use only // private linreg.lrreport _innerobj; public linreg.lrreport innerobj { get { return _innerobj; } } public lrreport(linreg.lrreport obj) { _innerobj = obj; } } /************************************************************************* Linear regression Subroutine builds model: Y = A(0)*X[0] + ... + A(N-1)*X[N-1] + A(N) and model found in ALGLIB format, covariation matrix, training set errors (rms, average, average relative) and leave-one-out cross-validation estimate of the generalization error. CV estimate calculated using fast algorithm with O(NPoints*NVars) complexity. When covariation matrix is calculated standard deviations of function values are assumed to be equal to RMS error on the training set. INPUT PARAMETERS: XY - training set, array [0..NPoints-1,0..NVars]: * NVars columns - independent variables * last column - dependent variable NPoints - training set size, NPoints>NVars+1 NVars - number of independent variables OUTPUT PARAMETERS: Info - return code: * -255, in case of unknown internal error * -4, if internal SVD subroutine haven't converged * -1, if incorrect parameters was passed (NPoints0. NPoints - training set size, NPoints>NVars+1 NVars - number of independent variables OUTPUT PARAMETERS: Info - return code: * -255, in case of unknown internal error * -4, if internal SVD subroutine haven't converged * -1, if incorrect parameters was passed (NPoints=0 K - K>=1 (K can be larger than N , such cases will be correctly handled). Window width. K=1 corresponds to identity transformation (nothing changes). OUTPUT PARAMETERS: X - array, whose first N elements were processed with SMA(K) NOTE 1: this function uses efficient in-place algorithm which does not allocate temporary arrays. NOTE 2: this algorithm makes only one pass through array and uses running sum to speed-up calculation of the averages. Additional measures are taken to ensure that running sum on a long sequence of zero elements will be correctly reset to zero even in the presence of round-off error. NOTE 3: this is unsymmetric version of the algorithm, which does NOT averages points after the current one. Only X[i], X[i-1], ... are used when calculating new value of X[i]. We should also note that this algorithm uses BOTH previous points and current one, i.e. new value of X[i] depends on BOTH previous point and X[i] itself. -- ALGLIB -- Copyright 25.10.2011 by Bochkanov Sergey *************************************************************************/ public static void filtersma(ref double[] x, int n, int k) { filters.filtersma(ref x, n, k); return; } public static void filtersma(ref double[] x, int k) { int n; n = ap.len(x); filters.filtersma(ref x, n, k); return; } /************************************************************************* Filters: exponential moving averages. This filter replaces array by results of EMA(alpha) filter. EMA(alpha) is defined as filter which replaces X[] by S[]: S[0] = X[0] S[t] = alpha*X[t] + (1-alpha)*S[t-1] INPUT PARAMETERS: X - array[N], array to process. It can be larger than N, in this case only first N points are processed. N - points count, N>=0 alpha - 0=0 K - K>=1 (K can be larger than N , such cases will be correctly handled). Window width. K=1 corresponds to identity transformation (nothing changes). OUTPUT PARAMETERS: X - array, whose first N elements were processed with SMA(K) NOTE 1: this function uses efficient in-place algorithm which does not allocate temporary arrays. NOTE 2: this algorithm makes only one pass through array and uses running sum to speed-up calculation of the averages. Additional measures are taken to ensure that running sum on a long sequence of zero elements will be correctly reset to zero even in the presence of round-off error. NOTE 3: this is unsymmetric version of the algorithm, which does NOT averages points after the current one. Only X[i], X[i-1], ... are used when calculating new value of X[i]. We should also note that this algorithm uses BOTH previous points and current one, i.e. new value of X[i] depends on BOTH previous point and X[i] itself. -- ALGLIB -- Copyright 25.10.2011 by Bochkanov Sergey *************************************************************************/ public static void filterlrma(ref double[] x, int n, int k) { filters.filterlrma(ref x, n, k); return; } public static void filterlrma(ref double[] x, int k) { int n; n = ap.len(x); filters.filterlrma(ref x, n, k); return; } } public partial class alglib { /************************************************************************* Multiclass Fisher LDA Subroutine finds coefficients of linear combination which optimally separates training set on classes. INPUT PARAMETERS: XY - training set, array[0..NPoints-1,0..NVars]. First NVars columns store values of independent variables, next column stores number of class (from 0 to NClasses-1) which dataset element belongs to. Fractional values are rounded to nearest integer. NPoints - training set size, NPoints>=0 NVars - number of independent variables, NVars>=1 NClasses - number of classes, NClasses>=2 OUTPUT PARAMETERS: Info - return code: * -4, if internal EVD subroutine hasn't converged * -2, if there is a point with class number outside of [0..NClasses-1]. * -1, if incorrect parameters was passed (NPoints<0, NVars<1, NClasses<2) * 1, if task has been solved * 2, if there was a multicollinearity in training set, but task has been solved. W - linear combination coefficients, array[0..NVars-1] -- ALGLIB -- Copyright 31.05.2008 by Bochkanov Sergey *************************************************************************/ public static void fisherlda(double[,] xy, int npoints, int nvars, int nclasses, out int info, out double[] w) { info = 0; w = new double[0]; lda.fisherlda(xy, npoints, nvars, nclasses, ref info, ref w); return; } /************************************************************************* N-dimensional multiclass Fisher LDA Subroutine finds coefficients of linear combinations which optimally separates training set on classes. It returns N-dimensional basis whose vector are sorted by quality of training set separation (in descending order). INPUT PARAMETERS: XY - training set, array[0..NPoints-1,0..NVars]. First NVars columns store values of independent variables, next column stores number of class (from 0 to NClasses-1) which dataset element belongs to. Fractional values are rounded to nearest integer. NPoints - training set size, NPoints>=0 NVars - number of independent variables, NVars>=1 NClasses - number of classes, NClasses>=2 OUTPUT PARAMETERS: Info - return code: * -4, if internal EVD subroutine hasn't converged * -2, if there is a point with class number outside of [0..NClasses-1]. * -1, if incorrect parameters was passed (NPoints<0, NVars<1, NClasses<2) * 1, if task has been solved * 2, if there was a multicollinearity in training set, but task has been solved. W - basis, array[0..NVars-1,0..NVars-1] columns of matrix stores basis vectors, sorted by quality of training set separation (in descending order) -- ALGLIB -- Copyright 31.05.2008 by Bochkanov Sergey *************************************************************************/ public static void fisherldan(double[,] xy, int npoints, int nvars, int nclasses, out int info, out double[,] w) { info = 0; w = new double[0,0]; lda.fisherldan(xy, npoints, nvars, nclasses, ref info, ref w); return; } } public partial class alglib { /************************************************************************* *************************************************************************/ public class multilayerperceptron { // // Public declarations // public multilayerperceptron() { _innerobj = new mlpbase.multilayerperceptron(); } // // Although some of declarations below are public, you should not use them // They are intended for internal use only // private mlpbase.multilayerperceptron _innerobj; public mlpbase.multilayerperceptron innerobj { get { return _innerobj; } } public multilayerperceptron(mlpbase.multilayerperceptron obj) { _innerobj = obj; } } /************************************************************************* Model's errors: * RelCLSError - fraction of misclassified cases. * AvgCE - acerage cross-entropy * RMSError - root-mean-square error * AvgError - average error * AvgRelError - average relative error NOTE 1: RelCLSError/AvgCE are zero on regression problems. NOTE 2: on classification problems RMSError/AvgError/AvgRelError contain errors in prediction of posterior probabilities *************************************************************************/ public class modelerrors { // // Public declarations // public double relclserror { get { return _innerobj.relclserror; } set { _innerobj.relclserror = value; } } public double avgce { get { return _innerobj.avgce; } set { _innerobj.avgce = value; } } public double rmserror { get { return _innerobj.rmserror; } set { _innerobj.rmserror = value; } } public double avgerror { get { return _innerobj.avgerror; } set { _innerobj.avgerror = value; } } public double avgrelerror { get { return _innerobj.avgrelerror; } set { _innerobj.avgrelerror = value; } } public modelerrors() { _innerobj = new mlpbase.modelerrors(); } // // Although some of declarations below are public, you should not use them // They are intended for internal use only // private mlpbase.modelerrors _innerobj; public mlpbase.modelerrors innerobj { get { return _innerobj; } } public modelerrors(mlpbase.modelerrors obj) { _innerobj = obj; } } /************************************************************************* This function serializes data structure to string. Important properties of s_out: * it contains alphanumeric characters, dots, underscores, minus signs * these symbols are grouped into words, which are separated by spaces and Windows-style (CR+LF) newlines * although serializer uses spaces and CR+LF as separators, you can replace any separator character by arbitrary combination of spaces, tabs, Windows or Unix newlines. It allows flexible reformatting of the string in case you want to include it into text or XML file. But you should not insert separators into the middle of the "words" nor you should change case of letters. * s_out can be freely moved between 32-bit and 64-bit systems, little and big endian machines, and so on. You can serialize structure on 32-bit machine and unserialize it on 64-bit one (or vice versa), or serialize it on SPARC and unserialize on x86. You can also serialize it in C# version of ALGLIB and unserialize in C++ one, and vice versa. *************************************************************************/ public static void mlpserialize(multilayerperceptron obj, out string s_out) { alglib.serializer s = new alglib.serializer(); s.alloc_start(); mlpbase.mlpalloc(s, obj.innerobj); s.sstart_str(); mlpbase.mlpserialize(s, obj.innerobj); s.stop(); s_out = s.get_string(); } /************************************************************************* This function unserializes data structure from string. *************************************************************************/ public static void mlpunserialize(string s_in, out multilayerperceptron obj) { alglib.serializer s = new alglib.serializer(); obj = new multilayerperceptron(); s.ustart_str(s_in); mlpbase.mlpunserialize(s, obj.innerobj); s.stop(); } /************************************************************************* Creates neural network with NIn inputs, NOut outputs, without hidden layers, with linear output layer. Network weights are filled with small random values. -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpcreate0(int nin, int nout, out multilayerperceptron network) { network = new multilayerperceptron(); mlpbase.mlpcreate0(nin, nout, network.innerobj); return; } /************************************************************************* Same as MLPCreate0, but with one hidden layer (NHid neurons) with non-linear activation function. Output layer is linear. -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpcreate1(int nin, int nhid, int nout, out multilayerperceptron network) { network = new multilayerperceptron(); mlpbase.mlpcreate1(nin, nhid, nout, network.innerobj); return; } /************************************************************************* Same as MLPCreate0, but with two hidden layers (NHid1 and NHid2 neurons) with non-linear activation function. Output layer is linear. $ALL -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpcreate2(int nin, int nhid1, int nhid2, int nout, out multilayerperceptron network) { network = new multilayerperceptron(); mlpbase.mlpcreate2(nin, nhid1, nhid2, nout, network.innerobj); return; } /************************************************************************* Creates neural network with NIn inputs, NOut outputs, without hidden layers with non-linear output layer. Network weights are filled with small random values. Activation function of the output layer takes values: (B, +INF), if D>=0 or (-INF, B), if D<0. -- ALGLIB -- Copyright 30.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlpcreateb0(int nin, int nout, double b, double d, out multilayerperceptron network) { network = new multilayerperceptron(); mlpbase.mlpcreateb0(nin, nout, b, d, network.innerobj); return; } /************************************************************************* Same as MLPCreateB0 but with non-linear hidden layer. -- ALGLIB -- Copyright 30.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlpcreateb1(int nin, int nhid, int nout, double b, double d, out multilayerperceptron network) { network = new multilayerperceptron(); mlpbase.mlpcreateb1(nin, nhid, nout, b, d, network.innerobj); return; } /************************************************************************* Same as MLPCreateB0 but with two non-linear hidden layers. -- ALGLIB -- Copyright 30.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlpcreateb2(int nin, int nhid1, int nhid2, int nout, double b, double d, out multilayerperceptron network) { network = new multilayerperceptron(); mlpbase.mlpcreateb2(nin, nhid1, nhid2, nout, b, d, network.innerobj); return; } /************************************************************************* Creates neural network with NIn inputs, NOut outputs, without hidden layers with non-linear output layer. Network weights are filled with small random values. Activation function of the output layer takes values [A,B]. -- ALGLIB -- Copyright 30.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlpcreater0(int nin, int nout, double a, double b, out multilayerperceptron network) { network = new multilayerperceptron(); mlpbase.mlpcreater0(nin, nout, a, b, network.innerobj); return; } /************************************************************************* Same as MLPCreateR0, but with non-linear hidden layer. -- ALGLIB -- Copyright 30.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlpcreater1(int nin, int nhid, int nout, double a, double b, out multilayerperceptron network) { network = new multilayerperceptron(); mlpbase.mlpcreater1(nin, nhid, nout, a, b, network.innerobj); return; } /************************************************************************* Same as MLPCreateR0, but with two non-linear hidden layers. -- ALGLIB -- Copyright 30.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlpcreater2(int nin, int nhid1, int nhid2, int nout, double a, double b, out multilayerperceptron network) { network = new multilayerperceptron(); mlpbase.mlpcreater2(nin, nhid1, nhid2, nout, a, b, network.innerobj); return; } /************************************************************************* Creates classifier network with NIn inputs and NOut possible classes. Network contains no hidden layers and linear output layer with SOFTMAX- normalization (so outputs sums up to 1.0 and converge to posterior probabilities). -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpcreatec0(int nin, int nout, out multilayerperceptron network) { network = new multilayerperceptron(); mlpbase.mlpcreatec0(nin, nout, network.innerobj); return; } /************************************************************************* Same as MLPCreateC0, but with one non-linear hidden layer. -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpcreatec1(int nin, int nhid, int nout, out multilayerperceptron network) { network = new multilayerperceptron(); mlpbase.mlpcreatec1(nin, nhid, nout, network.innerobj); return; } /************************************************************************* Same as MLPCreateC0, but with two non-linear hidden layers. -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpcreatec2(int nin, int nhid1, int nhid2, int nout, out multilayerperceptron network) { network = new multilayerperceptron(); mlpbase.mlpcreatec2(nin, nhid1, nhid2, nout, network.innerobj); return; } /************************************************************************* Randomization of neural network weights -- ALGLIB -- Copyright 06.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlprandomize(multilayerperceptron network) { mlpbase.mlprandomize(network.innerobj); return; } /************************************************************************* Randomization of neural network weights and standartisator -- ALGLIB -- Copyright 10.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlprandomizefull(multilayerperceptron network) { mlpbase.mlprandomizefull(network.innerobj); return; } /************************************************************************* Returns information about initialized network: number of inputs, outputs, weights. -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpproperties(multilayerperceptron network, out int nin, out int nout, out int wcount) { nin = 0; nout = 0; wcount = 0; mlpbase.mlpproperties(network.innerobj, ref nin, ref nout, ref wcount); return; } /************************************************************************* Returns number of inputs. -- ALGLIB -- Copyright 19.10.2011 by Bochkanov Sergey *************************************************************************/ public static int mlpgetinputscount(multilayerperceptron network) { int result = mlpbase.mlpgetinputscount(network.innerobj); return result; } /************************************************************************* Returns number of outputs. -- ALGLIB -- Copyright 19.10.2011 by Bochkanov Sergey *************************************************************************/ public static int mlpgetoutputscount(multilayerperceptron network) { int result = mlpbase.mlpgetoutputscount(network.innerobj); return result; } /************************************************************************* Returns number of weights. -- ALGLIB -- Copyright 19.10.2011 by Bochkanov Sergey *************************************************************************/ public static int mlpgetweightscount(multilayerperceptron network) { int result = mlpbase.mlpgetweightscount(network.innerobj); return result; } /************************************************************************* Tells whether network is SOFTMAX-normalized (i.e. classifier) or not. -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static bool mlpissoftmax(multilayerperceptron network) { bool result = mlpbase.mlpissoftmax(network.innerobj); return result; } /************************************************************************* This function returns total number of layers (including input, hidden and output layers). -- ALGLIB -- Copyright 25.03.2011 by Bochkanov Sergey *************************************************************************/ public static int mlpgetlayerscount(multilayerperceptron network) { int result = mlpbase.mlpgetlayerscount(network.innerobj); return result; } /************************************************************************* This function returns size of K-th layer. K=0 corresponds to input layer, K=CNT-1 corresponds to output layer. Size of the output layer is always equal to the number of outputs, although when we have softmax-normalized network, last neuron doesn't have any connections - it is just zero. -- ALGLIB -- Copyright 25.03.2011 by Bochkanov Sergey *************************************************************************/ public static int mlpgetlayersize(multilayerperceptron network, int k) { int result = mlpbase.mlpgetlayersize(network.innerobj, k); return result; } /************************************************************************* This function returns offset/scaling coefficients for I-th input of the network. INPUT PARAMETERS: Network - network I - input index OUTPUT PARAMETERS: Mean - mean term Sigma - sigma term, guaranteed to be nonzero. I-th input is passed through linear transformation IN[i] = (IN[i]-Mean)/Sigma before feeding to the network -- ALGLIB -- Copyright 25.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpgetinputscaling(multilayerperceptron network, int i, out double mean, out double sigma) { mean = 0; sigma = 0; mlpbase.mlpgetinputscaling(network.innerobj, i, ref mean, ref sigma); return; } /************************************************************************* This function returns offset/scaling coefficients for I-th output of the network. INPUT PARAMETERS: Network - network I - input index OUTPUT PARAMETERS: Mean - mean term Sigma - sigma term, guaranteed to be nonzero. I-th output is passed through linear transformation OUT[i] = OUT[i]*Sigma+Mean before returning it to user. In case we have SOFTMAX-normalized network, we return (Mean,Sigma)=(0.0,1.0). -- ALGLIB -- Copyright 25.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpgetoutputscaling(multilayerperceptron network, int i, out double mean, out double sigma) { mean = 0; sigma = 0; mlpbase.mlpgetoutputscaling(network.innerobj, i, ref mean, ref sigma); return; } /************************************************************************* This function returns information about Ith neuron of Kth layer INPUT PARAMETERS: Network - network K - layer index I - neuron index (within layer) OUTPUT PARAMETERS: FKind - activation function type (used by MLPActivationFunction()) this value is zero for input or linear neurons Threshold - also called offset, bias zero for input neurons NOTE: this function throws exception if layer or neuron with given index do not exists. -- ALGLIB -- Copyright 25.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpgetneuroninfo(multilayerperceptron network, int k, int i, out int fkind, out double threshold) { fkind = 0; threshold = 0; mlpbase.mlpgetneuroninfo(network.innerobj, k, i, ref fkind, ref threshold); return; } /************************************************************************* This function returns information about connection from I0-th neuron of K0-th layer to I1-th neuron of K1-th layer. INPUT PARAMETERS: Network - network K0 - layer index I0 - neuron index (within layer) K1 - layer index I1 - neuron index (within layer) RESULT: connection weight (zero for non-existent connections) This function: 1. throws exception if layer or neuron with given index do not exists. 2. returns zero if neurons exist, but there is no connection between them -- ALGLIB -- Copyright 25.03.2011 by Bochkanov Sergey *************************************************************************/ public static double mlpgetweight(multilayerperceptron network, int k0, int i0, int k1, int i1) { double result = mlpbase.mlpgetweight(network.innerobj, k0, i0, k1, i1); return result; } /************************************************************************* This function sets offset/scaling coefficients for I-th input of the network. INPUT PARAMETERS: Network - network I - input index Mean - mean term Sigma - sigma term (if zero, will be replaced by 1.0) NTE: I-th input is passed through linear transformation IN[i] = (IN[i]-Mean)/Sigma before feeding to the network. This function sets Mean and Sigma. -- ALGLIB -- Copyright 25.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpsetinputscaling(multilayerperceptron network, int i, double mean, double sigma) { mlpbase.mlpsetinputscaling(network.innerobj, i, mean, sigma); return; } /************************************************************************* This function sets offset/scaling coefficients for I-th output of the network. INPUT PARAMETERS: Network - network I - input index Mean - mean term Sigma - sigma term (if zero, will be replaced by 1.0) OUTPUT PARAMETERS: NOTE: I-th output is passed through linear transformation OUT[i] = OUT[i]*Sigma+Mean before returning it to user. This function sets Sigma/Mean. In case we have SOFTMAX-normalized network, you can not set (Sigma,Mean) to anything other than(0.0,1.0) - this function will throw exception. -- ALGLIB -- Copyright 25.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpsetoutputscaling(multilayerperceptron network, int i, double mean, double sigma) { mlpbase.mlpsetoutputscaling(network.innerobj, i, mean, sigma); return; } /************************************************************************* This function modifies information about Ith neuron of Kth layer INPUT PARAMETERS: Network - network K - layer index I - neuron index (within layer) FKind - activation function type (used by MLPActivationFunction()) this value must be zero for input neurons (you can not set activation function for input neurons) Threshold - also called offset, bias this value must be zero for input neurons (you can not set threshold for input neurons) NOTES: 1. this function throws exception if layer or neuron with given index do not exists. 2. this function also throws exception when you try to set non-linear activation function for input neurons (any kind of network) or for output neurons of classifier network. 3. this function throws exception when you try to set non-zero threshold for input neurons (any kind of network). -- ALGLIB -- Copyright 25.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpsetneuroninfo(multilayerperceptron network, int k, int i, int fkind, double threshold) { mlpbase.mlpsetneuroninfo(network.innerobj, k, i, fkind, threshold); return; } /************************************************************************* This function modifies information about connection from I0-th neuron of K0-th layer to I1-th neuron of K1-th layer. INPUT PARAMETERS: Network - network K0 - layer index I0 - neuron index (within layer) K1 - layer index I1 - neuron index (within layer) W - connection weight (must be zero for non-existent connections) This function: 1. throws exception if layer or neuron with given index do not exists. 2. throws exception if you try to set non-zero weight for non-existent connection -- ALGLIB -- Copyright 25.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpsetweight(multilayerperceptron network, int k0, int i0, int k1, int i1, double w) { mlpbase.mlpsetweight(network.innerobj, k0, i0, k1, i1, w); return; } /************************************************************************* Neural network activation function INPUT PARAMETERS: NET - neuron input K - function index (zero for linear function) OUTPUT PARAMETERS: F - function DF - its derivative D2F - its second derivative -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpactivationfunction(double net, int k, out double f, out double df, out double d2f) { f = 0; df = 0; d2f = 0; mlpbase.mlpactivationfunction(net, k, ref f, ref df, ref d2f); return; } /************************************************************************* Procesing INPUT PARAMETERS: Network - neural network X - input vector, array[0..NIn-1]. OUTPUT PARAMETERS: Y - result. Regression estimate when solving regression task, vector of posterior probabilities for classification task. See also MLPProcessI -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpprocess(multilayerperceptron network, double[] x, ref double[] y) { mlpbase.mlpprocess(network.innerobj, x, ref y); return; } /************************************************************************* 'interactive' variant of MLPProcess for languages like Python which support constructs like "Y = MLPProcess(NN,X)" and interactive mode of the interpreter This function allocates new array on each call, so it is significantly slower than its 'non-interactive' counterpart, but it is more convenient when you call it from command line. -- ALGLIB -- Copyright 21.09.2010 by Bochkanov Sergey *************************************************************************/ public static void mlpprocessi(multilayerperceptron network, double[] x, out double[] y) { y = new double[0]; mlpbase.mlpprocessi(network.innerobj, x, ref y); return; } /************************************************************************* Error of the neural network on dataset. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format; SSize - points count. RESULT: sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2) DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static double mlperror(multilayerperceptron network, double[,] xy, int ssize) { double result = mlpbase.mlperror(network.innerobj, xy, ssize); return result; } /************************************************************************* Error of the neural network on dataset given by sparse matrix. INPUT PARAMETERS: Network - neural network XY - training set, see below for information on the training set format. This function checks correctness of the dataset (no NANs/INFs, class numbers are correct) and throws exception when incorrect dataset is passed. Sparse matrix must use CRS format for storage. NPoints - points count, >=0 RESULT: sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2) DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static double mlperrorsparse(multilayerperceptron network, sparsematrix xy, int npoints) { double result = mlpbase.mlperrorsparse(network.innerobj, xy.innerobj, npoints); return result; } /************************************************************************* Natural error function for neural network, internal subroutine. -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static double mlperrorn(multilayerperceptron network, double[,] xy, int ssize) { double result = mlpbase.mlperrorn(network.innerobj, xy, ssize); return result; } /************************************************************************* Classification error -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static int mlpclserror(multilayerperceptron network, double[,] xy, int ssize) { int result = mlpbase.mlpclserror(network.innerobj, xy, ssize); return result; } /************************************************************************* Relative classification error on the test set. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format; NPoints - points count. RESULT: Percent of incorrectly classified cases. Works both for classifier networks and general purpose networks used as classifiers. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 25.12.2008 by Bochkanov Sergey *************************************************************************/ public static double mlprelclserror(multilayerperceptron network, double[,] xy, int npoints) { double result = mlpbase.mlprelclserror(network.innerobj, xy, npoints); return result; } /************************************************************************* Relative classification error on the test set given by sparse matrix. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format. This function checks correctness of the dataset (no NANs/INFs, class numbers are correct) and throws exception when incorrect dataset is passed. Sparse matrix must use CRS format for storage. NPoints - points count, >=0. RESULT: Percent of incorrectly classified cases. Works both for classifier networks and general purpose networks used as classifiers. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 09.08.2012 by Bochkanov Sergey *************************************************************************/ public static double mlprelclserrorsparse(multilayerperceptron network, sparsematrix xy, int npoints) { double result = mlpbase.mlprelclserrorsparse(network.innerobj, xy.innerobj, npoints); return result; } /************************************************************************* Average cross-entropy (in bits per element) on the test set. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format; NPoints - points count. RESULT: CrossEntropy/(NPoints*LN(2)). Zero if network solves regression task. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 08.01.2009 by Bochkanov Sergey *************************************************************************/ public static double mlpavgce(multilayerperceptron network, double[,] xy, int npoints) { double result = mlpbase.mlpavgce(network.innerobj, xy, npoints); return result; } /************************************************************************* Average cross-entropy (in bits per element) on the test set given by sparse matrix. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format. This function checks correctness of the dataset (no NANs/INFs, class numbers are correct) and throws exception when incorrect dataset is passed. Sparse matrix must use CRS format for storage. NPoints - points count, >=0. RESULT: CrossEntropy/(NPoints*LN(2)). Zero if network solves regression task. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 9.08.2012 by Bochkanov Sergey *************************************************************************/ public static double mlpavgcesparse(multilayerperceptron network, sparsematrix xy, int npoints) { double result = mlpbase.mlpavgcesparse(network.innerobj, xy.innerobj, npoints); return result; } /************************************************************************* RMS error on the test set given. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format; NPoints - points count. RESULT: Root mean square error. Its meaning for regression task is obvious. As for classification task, RMS error means error when estimating posterior probabilities. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static double mlprmserror(multilayerperceptron network, double[,] xy, int npoints) { double result = mlpbase.mlprmserror(network.innerobj, xy, npoints); return result; } /************************************************************************* RMS error on the test set given by sparse matrix. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format. This function checks correctness of the dataset (no NANs/INFs, class numbers are correct) and throws exception when incorrect dataset is passed. Sparse matrix must use CRS format for storage. NPoints - points count, >=0. RESULT: Root mean square error. Its meaning for regression task is obvious. As for classification task, RMS error means error when estimating posterior probabilities. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 09.08.2012 by Bochkanov Sergey *************************************************************************/ public static double mlprmserrorsparse(multilayerperceptron network, sparsematrix xy, int npoints) { double result = mlpbase.mlprmserrorsparse(network.innerobj, xy.innerobj, npoints); return result; } /************************************************************************* Average error on the test set. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format; NPoints - points count. RESULT: Its meaning for regression task is obvious. As for classification task, it means average error when estimating posterior probabilities. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 11.03.2008 by Bochkanov Sergey *************************************************************************/ public static double mlpavgerror(multilayerperceptron network, double[,] xy, int npoints) { double result = mlpbase.mlpavgerror(network.innerobj, xy, npoints); return result; } /************************************************************************* Average error on the test set given by sparse matrix. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format. This function checks correctness of the dataset (no NANs/INFs, class numbers are correct) and throws exception when incorrect dataset is passed. Sparse matrix must use CRS format for storage. NPoints - points count, >=0. RESULT: Its meaning for regression task is obvious. As for classification task, it means average error when estimating posterior probabilities. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 09.08.2012 by Bochkanov Sergey *************************************************************************/ public static double mlpavgerrorsparse(multilayerperceptron network, sparsematrix xy, int npoints) { double result = mlpbase.mlpavgerrorsparse(network.innerobj, xy.innerobj, npoints); return result; } /************************************************************************* Average relative error on the test set. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format; NPoints - points count. RESULT: Its meaning for regression task is obvious. As for classification task, it means average relative error when estimating posterior probability of belonging to the correct class. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 11.03.2008 by Bochkanov Sergey *************************************************************************/ public static double mlpavgrelerror(multilayerperceptron network, double[,] xy, int npoints) { double result = mlpbase.mlpavgrelerror(network.innerobj, xy, npoints); return result; } /************************************************************************* Average relative error on the test set given by sparse matrix. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format. This function checks correctness of the dataset (no NANs/INFs, class numbers are correct) and throws exception when incorrect dataset is passed. Sparse matrix must use CRS format for storage. NPoints - points count, >=0. RESULT: Its meaning for regression task is obvious. As for classification task, it means average relative error when estimating posterior probability of belonging to the correct class. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 09.08.2012 by Bochkanov Sergey *************************************************************************/ public static double mlpavgrelerrorsparse(multilayerperceptron network, sparsematrix xy, int npoints) { double result = mlpbase.mlpavgrelerrorsparse(network.innerobj, xy.innerobj, npoints); return result; } /************************************************************************* Gradient calculation INPUT PARAMETERS: Network - network initialized with one of the network creation funcs X - input vector, length of array must be at least NIn DesiredY- desired outputs, length of array must be at least NOut Grad - possibly preallocated array. If size of array is smaller than WCount, it will be reallocated. It is recommended to reuse previously allocated array to reduce allocation overhead. OUTPUT PARAMETERS: E - error function, SUM(sqr(y[i]-desiredy[i])/2,i) Grad - gradient of E with respect to weights of network, array[WCount] -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpgrad(multilayerperceptron network, double[] x, double[] desiredy, out double e, ref double[] grad) { e = 0; mlpbase.mlpgrad(network.innerobj, x, desiredy, ref e, ref grad); return; } /************************************************************************* Gradient calculation (natural error function is used) INPUT PARAMETERS: Network - network initialized with one of the network creation funcs X - input vector, length of array must be at least NIn DesiredY- desired outputs, length of array must be at least NOut Grad - possibly preallocated array. If size of array is smaller than WCount, it will be reallocated. It is recommended to reuse previously allocated array to reduce allocation overhead. OUTPUT PARAMETERS: E - error function, sum-of-squares for regression networks, cross-entropy for classification networks. Grad - gradient of E with respect to weights of network, array[WCount] -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpgradn(multilayerperceptron network, double[] x, double[] desiredy, out double e, ref double[] grad) { e = 0; mlpbase.mlpgradn(network.innerobj, x, desiredy, ref e, ref grad); return; } /************************************************************************* Batch gradient calculation for a set of inputs/outputs INPUT PARAMETERS: Network - network initialized with one of the network creation funcs XY - set of inputs/outputs; one sample = one row; first NIn columns contain inputs, next NOut columns - desired outputs. SSize - number of elements in XY Grad - possibly preallocated array. If size of array is smaller than WCount, it will be reallocated. It is recommended to reuse previously allocated array to reduce allocation overhead. OUTPUT PARAMETERS: E - error function, SUM(sqr(y[i]-desiredy[i])/2,i) Grad - gradient of E with respect to weights of network, array[WCount] -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpgradbatch(multilayerperceptron network, double[,] xy, int ssize, out double e, ref double[] grad) { e = 0; mlpbase.mlpgradbatch(network.innerobj, xy, ssize, ref e, ref grad); return; } /************************************************************************* Batch gradient calculation for a set of inputs/outputs given by sparse matrices INPUT PARAMETERS: Network - network initialized with one of the network creation funcs XY - set of inputs/outputs; one sample = one row; first NIn columns contain inputs, next NOut columns - desired outputs. SSize - number of elements in XY Grad - possibly preallocated array. If size of array is smaller than WCount, it will be reallocated. It is recommended to reuse previously allocated array to reduce allocation overhead. OUTPUT PARAMETERS: E - error function, SUM(sqr(y[i]-desiredy[i])/2,i) Grad - gradient of E with respect to weights of network, array[WCount] -- ALGLIB -- Copyright 26.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpgradbatchsparse(multilayerperceptron network, sparsematrix xy, int ssize, out double e, ref double[] grad) { e = 0; mlpbase.mlpgradbatchsparse(network.innerobj, xy.innerobj, ssize, ref e, ref grad); return; } /************************************************************************* Batch gradient calculation for a subset of dataset INPUT PARAMETERS: Network - network initialized with one of the network creation funcs XY - original dataset; one sample = one row; first NIn columns contain inputs, next NOut columns - desired outputs. SetSize - real size of XY, SetSize>=0; Idx - subset of SubsetSize elements, array[SubsetSize]: * Idx[I] stores row index in the original dataset which is given by XY. Gradient is calculated with respect to rows whose indexes are stored in Idx[]. * Idx[] must store correct indexes; this function throws an exception in case incorrect index (less than 0 or larger than rows(XY)) is given * Idx[] may store indexes in any order and even with repetitions. SubsetSize- number of elements in Idx[] array. Grad - possibly preallocated array. If size of array is smaller than WCount, it will be reallocated. It is recommended to reuse previously allocated array to reduce allocation overhead. OUTPUT PARAMETERS: E - error function, SUM(sqr(y[i]-desiredy[i])/2,i) Grad - gradient of E with respect to weights of network, array[WCount] NOTE: when SubsetSize<0 is used full dataset by call MLPGradBatch function. -- ALGLIB -- Copyright 26.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpgradbatchsubset(multilayerperceptron network, double[,] xy, int setsize, int[] idx, int subsetsize, out double e, ref double[] grad) { e = 0; mlpbase.mlpgradbatchsubset(network.innerobj, xy, setsize, idx, subsetsize, ref e, ref grad); return; } /************************************************************************* Batch gradient calculation for a set of inputs/outputs for a subset of dataset given by boolean mask. INPUT PARAMETERS: Network - network initialized with one of the network creation funcs XY - set of inputs/outputs; one sample = one row; first NIn columns contain inputs, next NOut columns - desired outputs. SetSize - real size of XY, SetSize>=0; Idx - subset of SubsetSize elements, array[SubsetSize]: * Idx[I] stores row index in the original dataset which is given by XY. Gradient is calculated with respect to rows whose indexes are stored in Idx[]. * Idx[] must store correct indexes; this function throws an exception in case incorrect index (less than 0 or larger than rows(XY)) is given * Idx[] may store indexes in any order and even with repetitions. SubsetSize- number of elements in Idx[] array. Grad - possibly preallocated array. If size of array is smaller than WCount, it will be reallocated. It is recommended to reuse previously allocated array to reduce allocation overhead. OUTPUT PARAMETERS: E - error function, SUM(sqr(y[i]-desiredy[i])/2,i) Grad - gradient of E with respect to weights of network, array[WCount] NOTE: when SubsetSize<0 is used full dataset by call MLPGradBatchSparse function. -- ALGLIB -- Copyright 26.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpgradbatchsparsesubset(multilayerperceptron network, sparsematrix xy, int setsize, int[] idx, int subsetsize, out double e, ref double[] grad) { e = 0; mlpbase.mlpgradbatchsparsesubset(network.innerobj, xy.innerobj, setsize, idx, subsetsize, ref e, ref grad); return; } /************************************************************************* Batch gradient calculation for a set of inputs/outputs (natural error function is used) INPUT PARAMETERS: Network - network initialized with one of the network creation funcs XY - set of inputs/outputs; one sample = one row; first NIn columns contain inputs, next NOut columns - desired outputs. SSize - number of elements in XY Grad - possibly preallocated array. If size of array is smaller than WCount, it will be reallocated. It is recommended to reuse previously allocated array to reduce allocation overhead. OUTPUT PARAMETERS: E - error function, sum-of-squares for regression networks, cross-entropy for classification networks. Grad - gradient of E with respect to weights of network, array[WCount] -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpgradnbatch(multilayerperceptron network, double[,] xy, int ssize, out double e, ref double[] grad) { e = 0; mlpbase.mlpgradnbatch(network.innerobj, xy, ssize, ref e, ref grad); return; } /************************************************************************* Batch Hessian calculation (natural error function) using R-algorithm. Internal subroutine. -- ALGLIB -- Copyright 26.01.2008 by Bochkanov Sergey. Hessian calculation based on R-algorithm described in "Fast Exact Multiplication by the Hessian", B. A. Pearlmutter, Neural Computation, 1994. *************************************************************************/ public static void mlphessiannbatch(multilayerperceptron network, double[,] xy, int ssize, out double e, ref double[] grad, ref double[,] h) { e = 0; mlpbase.mlphessiannbatch(network.innerobj, xy, ssize, ref e, ref grad, ref h); return; } /************************************************************************* Batch Hessian calculation using R-algorithm. Internal subroutine. -- ALGLIB -- Copyright 26.01.2008 by Bochkanov Sergey. Hessian calculation based on R-algorithm described in "Fast Exact Multiplication by the Hessian", B. A. Pearlmutter, Neural Computation, 1994. *************************************************************************/ public static void mlphessianbatch(multilayerperceptron network, double[,] xy, int ssize, out double e, ref double[] grad, ref double[,] h) { e = 0; mlpbase.mlphessianbatch(network.innerobj, xy, ssize, ref e, ref grad, ref h); return; } /************************************************************************* Calculation of all types of errors. INPUT PARAMETERS: Network - network initialized with one of the network creation funcs XY - original dataset; one sample = one row; first NIn columns contain inputs, next NOut columns - desired outputs. SetSize - real size of XY, SetSize>=0; Subset - subset of SubsetSize elements, array[SubsetSize]; SubsetSize- number of elements in Subset[] array. OUTPUT PARAMETERS: Rep - it contains all type of errors. NOTE: when SubsetSize<0 is used full dataset by call MLPGradBatch function. -- ALGLIB -- Copyright 04.09.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpallerrorssubset(multilayerperceptron network, double[,] xy, int setsize, int[] subset, int subsetsize, out modelerrors rep) { rep = new modelerrors(); mlpbase.mlpallerrorssubset(network.innerobj, xy, setsize, subset, subsetsize, rep.innerobj); return; } /************************************************************************* Calculation of all types of errors. INPUT PARAMETERS: Network - network initialized with one of the network creation funcs XY - original dataset given by sparse matrix; one sample = one row; first NIn columns contain inputs, next NOut columns - desired outputs. SetSize - real size of XY, SetSize>=0; Subset - subset of SubsetSize elements, array[SubsetSize]; SubsetSize- number of elements in Subset[] array. OUTPUT PARAMETERS: Rep - it contains all type of errors. NOTE: when SubsetSize<0 is used full dataset by call MLPGradBatch function. -- ALGLIB -- Copyright 04.09.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpallerrorssparsesubset(multilayerperceptron network, sparsematrix xy, int setsize, int[] subset, int subsetsize, out modelerrors rep) { rep = new modelerrors(); mlpbase.mlpallerrorssparsesubset(network.innerobj, xy.innerobj, setsize, subset, subsetsize, rep.innerobj); return; } /************************************************************************* Error of the neural network on dataset. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format; SetSize - real size of XY, SetSize>=0; Subset - subset of SubsetSize elements, array[SubsetSize]; SubsetSize- number of elements in Subset[] array. RESULT: sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2) DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 04.09.2012 by Bochkanov Sergey *************************************************************************/ public static double mlperrorsubset(multilayerperceptron network, double[,] xy, int setsize, int[] subset, int subsetsize) { double result = mlpbase.mlperrorsubset(network.innerobj, xy, setsize, subset, subsetsize); return result; } /************************************************************************* Error of the neural network on dataset. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format. This function checks correctness of the dataset (no NANs/INFs, class numbers are correct) and throws exception when incorrect dataset is passed. Sparse matrix must use CRS format for storage. SetSize - real size of XY, SetSize>=0; it is used when SubsetSize<0; Subset - subset of SubsetSize elements, array[SubsetSize]; SubsetSize- number of elements in Subset[] array. RESULT: sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2) DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 04.09.2012 by Bochkanov Sergey *************************************************************************/ public static double mlperrorsparsesubset(multilayerperceptron network, sparsematrix xy, int setsize, int[] subset, int subsetsize) { double result = mlpbase.mlperrorsparsesubset(network.innerobj, xy.innerobj, setsize, subset, subsetsize); return result; } } public partial class alglib { /************************************************************************* *************************************************************************/ public class logitmodel { // // Public declarations // public logitmodel() { _innerobj = new logit.logitmodel(); } // // Although some of declarations below are public, you should not use them // They are intended for internal use only // private logit.logitmodel _innerobj; public logit.logitmodel innerobj { get { return _innerobj; } } public logitmodel(logit.logitmodel obj) { _innerobj = obj; } } /************************************************************************* MNLReport structure contains information about training process: * NGrad - number of gradient calculations * NHess - number of Hessian calculations *************************************************************************/ public class mnlreport { // // Public declarations // public int ngrad { get { return _innerobj.ngrad; } set { _innerobj.ngrad = value; } } public int nhess { get { return _innerobj.nhess; } set { _innerobj.nhess = value; } } public mnlreport() { _innerobj = new logit.mnlreport(); } // // Although some of declarations below are public, you should not use them // They are intended for internal use only // private logit.mnlreport _innerobj; public logit.mnlreport innerobj { get { return _innerobj; } } public mnlreport(logit.mnlreport obj) { _innerobj = obj; } } /************************************************************************* This subroutine trains logit model. INPUT PARAMETERS: XY - training set, array[0..NPoints-1,0..NVars] First NVars columns store values of independent variables, next column stores number of class (from 0 to NClasses-1) which dataset element belongs to. Fractional values are rounded to nearest integer. NPoints - training set size, NPoints>=1 NVars - number of independent variables, NVars>=1 NClasses - number of classes, NClasses>=2 OUTPUT PARAMETERS: Info - return code: * -2, if there is a point with class number outside of [0..NClasses-1]. * -1, if incorrect parameters was passed (NPoints=1 OUTPUT PARAMETERS: State - structure stores algorithm state -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdcreate(int n, out mcpdstate s) { s = new mcpdstate(); mcpd.mcpdcreate(n, s.innerobj); return; } /************************************************************************* DESCRIPTION: This function is a specialized version of MCPDCreate() function, and we recommend you to read comments for this function for general information about MCPD solver. This function creates MCPD (Markov Chains for Population Data) solver for "Entry-state" model, i.e. model where transition from X[i] to X[i+1] is modelled as X[i+1] = P*X[i] where X[i] and X[i+1] are N-dimensional state vectors P is a N*N transition matrix and one selected component of X[] is called "entry" state and is treated in a special way: system state always transits from "entry" state to some another state system state can not transit from any state into "entry" state Such conditions basically mean that row of P which corresponds to "entry" state is zero. Such models arise when: * there is some population of individuals * individuals can have different states * individuals can transit from one state to another * population size is NOT constant - at every moment of time there is some (unpredictable) amount of "new" individuals, which can transit into one of the states at the next turn, but still no one leaves population * you want to model transitions of individuals from one state into another * but you do NOT want to predict amount of "new" individuals because it does not depends on individuals already present (hence system can not transit INTO entry state - it can only transit FROM it). This model is discussed in more details in the ALGLIB User Guide (see http://www.alglib.net/dataanalysis/ for more data). INPUT PARAMETERS: N - problem dimension, N>=2 EntryState- index of entry state, in 0..N-1 OUTPUT PARAMETERS: State - structure stores algorithm state -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdcreateentry(int n, int entrystate, out mcpdstate s) { s = new mcpdstate(); mcpd.mcpdcreateentry(n, entrystate, s.innerobj); return; } /************************************************************************* DESCRIPTION: This function is a specialized version of MCPDCreate() function, and we recommend you to read comments for this function for general information about MCPD solver. This function creates MCPD (Markov Chains for Population Data) solver for "Exit-state" model, i.e. model where transition from X[i] to X[i+1] is modelled as X[i+1] = P*X[i] where X[i] and X[i+1] are N-dimensional state vectors P is a N*N transition matrix and one selected component of X[] is called "exit" state and is treated in a special way: system state can transit from any state into "exit" state system state can not transit from "exit" state into any other state transition operator discards "exit" state (makes it zero at each turn) Such conditions basically mean that column of P which corresponds to "exit" state is zero. Multiplication by such P may decrease sum of vector components. Such models arise when: * there is some population of individuals * individuals can have different states * individuals can transit from one state to another * population size is NOT constant - individuals can move into "exit" state and leave population at the next turn, but there are no new individuals * amount of individuals which leave population can be predicted * you want to model transitions of individuals from one state into another (including transitions into the "exit" state) This model is discussed in more details in the ALGLIB User Guide (see http://www.alglib.net/dataanalysis/ for more data). INPUT PARAMETERS: N - problem dimension, N>=2 ExitState- index of exit state, in 0..N-1 OUTPUT PARAMETERS: State - structure stores algorithm state -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdcreateexit(int n, int exitstate, out mcpdstate s) { s = new mcpdstate(); mcpd.mcpdcreateexit(n, exitstate, s.innerobj); return; } /************************************************************************* DESCRIPTION: This function is a specialized version of MCPDCreate() function, and we recommend you to read comments for this function for general information about MCPD solver. This function creates MCPD (Markov Chains for Population Data) solver for "Entry-Exit-states" model, i.e. model where transition from X[i] to X[i+1] is modelled as X[i+1] = P*X[i] where X[i] and X[i+1] are N-dimensional state vectors P is a N*N transition matrix one selected component of X[] is called "entry" state and is treated in a special way: system state always transits from "entry" state to some another state system state can not transit from any state into "entry" state and another one component of X[] is called "exit" state and is treated in a special way too: system state can transit from any state into "exit" state system state can not transit from "exit" state into any other state transition operator discards "exit" state (makes it zero at each turn) Such conditions basically mean that: row of P which corresponds to "entry" state is zero column of P which corresponds to "exit" state is zero Multiplication by such P may decrease sum of vector components. Such models arise when: * there is some population of individuals * individuals can have different states * individuals can transit from one state to another * population size is NOT constant * at every moment of time there is some (unpredictable) amount of "new" individuals, which can transit into one of the states at the next turn * some individuals can move (predictably) into "exit" state and leave population at the next turn * you want to model transitions of individuals from one state into another, including transitions from the "entry" state and into the "exit" state. * but you do NOT want to predict amount of "new" individuals because it does not depends on individuals already present (hence system can not transit INTO entry state - it can only transit FROM it). This model is discussed in more details in the ALGLIB User Guide (see http://www.alglib.net/dataanalysis/ for more data). INPUT PARAMETERS: N - problem dimension, N>=2 EntryState- index of entry state, in 0..N-1 ExitState- index of exit state, in 0..N-1 OUTPUT PARAMETERS: State - structure stores algorithm state -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdcreateentryexit(int n, int entrystate, int exitstate, out mcpdstate s) { s = new mcpdstate(); mcpd.mcpdcreateentryexit(n, entrystate, exitstate, s.innerobj); return; } /************************************************************************* This function is used to add a track - sequence of system states at the different moments of its evolution. You may add one or several tracks to the MCPD solver. In case you have several tracks, they won't overwrite each other. For example, if you pass two tracks, A1-A2-A3 (system at t=A+1, t=A+2 and t=A+3) and B1-B2-B3, then solver will try to model transitions from t=A+1 to t=A+2, t=A+2 to t=A+3, t=B+1 to t=B+2, t=B+2 to t=B+3. But it WONT mix these two tracks - i.e. it wont try to model transition from t=A+3 to t=B+1. INPUT PARAMETERS: S - solver XY - track, array[K,N]: * I-th row is a state at t=I * elements of XY must be non-negative (exception will be thrown on negative elements) K - number of points in a track * if given, only leading K rows of XY are used * if not given, automatically determined from size of XY NOTES: 1. Track may contain either proportional or population data: * with proportional data all rows of XY must sum to 1.0, i.e. we have proportions instead of absolute population values * with population data rows of XY contain population counts and generally do not sum to 1.0 (although they still must be non-negative) -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdaddtrack(mcpdstate s, double[,] xy, int k) { mcpd.mcpdaddtrack(s.innerobj, xy, k); return; } public static void mcpdaddtrack(mcpdstate s, double[,] xy) { int k; k = ap.rows(xy); mcpd.mcpdaddtrack(s.innerobj, xy, k); return; } /************************************************************************* This function is used to add equality constraints on the elements of the transition matrix P. MCPD solver has four types of constraints which can be placed on P: * user-specified equality constraints (optional) * user-specified bound constraints (optional) * user-specified general linear constraints (optional) * basic constraints (always present): * non-negativity: P[i,j]>=0 * consistency: every column of P sums to 1.0 Final constraints which are passed to the underlying optimizer are calculated as intersection of all present constraints. For example, you may specify boundary constraint on P[0,0] and equality one: 0.1<=P[0,0]<=0.9 P[0,0]=0.5 Such combination of constraints will be silently reduced to their intersection, which is P[0,0]=0.5. This function can be used to place equality constraints on arbitrary subset of elements of P. Set of constraints is specified by EC, which may contain either NAN's or finite numbers from [0,1]. NAN denotes absence of constraint, finite number denotes equality constraint on specific element of P. You can also use MCPDAddEC() function which allows to ADD equality constraint for one element of P without changing constraints for other elements. These functions (MCPDSetEC and MCPDAddEC) interact as follows: * there is internal matrix of equality constraints which is stored in the MCPD solver * MCPDSetEC() replaces this matrix by another one (SET) * MCPDAddEC() modifies one element of this matrix and leaves other ones unchanged (ADD) * thus MCPDAddEC() call preserves all modifications done by previous calls, while MCPDSetEC() completely discards all changes done to the equality constraints. INPUT PARAMETERS: S - solver EC - equality constraints, array[N,N]. Elements of EC can be either NAN's or finite numbers from [0,1]. NAN denotes absence of constraints, while finite value denotes equality constraint on the corresponding element of P. NOTES: 1. infinite values of EC will lead to exception being thrown. Values less than 0.0 or greater than 1.0 will lead to error code being returned after call to MCPDSolve(). -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdsetec(mcpdstate s, double[,] ec) { mcpd.mcpdsetec(s.innerobj, ec); return; } /************************************************************************* This function is used to add equality constraints on the elements of the transition matrix P. MCPD solver has four types of constraints which can be placed on P: * user-specified equality constraints (optional) * user-specified bound constraints (optional) * user-specified general linear constraints (optional) * basic constraints (always present): * non-negativity: P[i,j]>=0 * consistency: every column of P sums to 1.0 Final constraints which are passed to the underlying optimizer are calculated as intersection of all present constraints. For example, you may specify boundary constraint on P[0,0] and equality one: 0.1<=P[0,0]<=0.9 P[0,0]=0.5 Such combination of constraints will be silently reduced to their intersection, which is P[0,0]=0.5. This function can be used to ADD equality constraint for one element of P without changing constraints for other elements. You can also use MCPDSetEC() function which allows you to specify arbitrary set of equality constraints in one call. These functions (MCPDSetEC and MCPDAddEC) interact as follows: * there is internal matrix of equality constraints which is stored in the MCPD solver * MCPDSetEC() replaces this matrix by another one (SET) * MCPDAddEC() modifies one element of this matrix and leaves other ones unchanged (ADD) * thus MCPDAddEC() call preserves all modifications done by previous calls, while MCPDSetEC() completely discards all changes done to the equality constraints. INPUT PARAMETERS: S - solver I - row index of element being constrained J - column index of element being constrained C - value (constraint for P[I,J]). Can be either NAN (no constraint) or finite value from [0,1]. NOTES: 1. infinite values of C will lead to exception being thrown. Values less than 0.0 or greater than 1.0 will lead to error code being returned after call to MCPDSolve(). -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdaddec(mcpdstate s, int i, int j, double c) { mcpd.mcpdaddec(s.innerobj, i, j, c); return; } /************************************************************************* This function is used to add bound constraints on the elements of the transition matrix P. MCPD solver has four types of constraints which can be placed on P: * user-specified equality constraints (optional) * user-specified bound constraints (optional) * user-specified general linear constraints (optional) * basic constraints (always present): * non-negativity: P[i,j]>=0 * consistency: every column of P sums to 1.0 Final constraints which are passed to the underlying optimizer are calculated as intersection of all present constraints. For example, you may specify boundary constraint on P[0,0] and equality one: 0.1<=P[0,0]<=0.9 P[0,0]=0.5 Such combination of constraints will be silently reduced to their intersection, which is P[0,0]=0.5. This function can be used to place bound constraints on arbitrary subset of elements of P. Set of constraints is specified by BndL/BndU matrices, which may contain arbitrary combination of finite numbers or infinities (like -INF=0 * consistency: every column of P sums to 1.0 Final constraints which are passed to the underlying optimizer are calculated as intersection of all present constraints. For example, you may specify boundary constraint on P[0,0] and equality one: 0.1<=P[0,0]<=0.9 P[0,0]=0.5 Such combination of constraints will be silently reduced to their intersection, which is P[0,0]=0.5. This function can be used to ADD bound constraint for one element of P without changing constraints for other elements. You can also use MCPDSetBC() function which allows to place bound constraints on arbitrary subset of elements of P. Set of constraints is specified by BndL/BndU matrices, which may contain arbitrary combination of finite numbers or infinities (like -INF=" (CT[i]>0). Your constraint may involve only some subset of P (less than N*N elements). For example it can be something like P[0,0] + P[0,1] = 0.5 In this case you still should pass matrix with N*N+1 columns, but all its elements (except for C[0,0], C[0,1] and C[0,N*N-1]) will be zero. INPUT PARAMETERS: S - solver C - array[K,N*N+1] - coefficients of constraints (see above for complete description) CT - array[K] - constraint types (see above for complete description) K - number of equality/inequality constraints, K>=0: * if given, only leading K elements of C/CT are used * if not given, automatically determined from sizes of C/CT -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdsetlc(mcpdstate s, double[,] c, int[] ct, int k) { mcpd.mcpdsetlc(s.innerobj, c, ct, k); return; } public static void mcpdsetlc(mcpdstate s, double[,] c, int[] ct) { int k; if( (ap.rows(c)!=ap.len(ct))) throw new alglibexception("Error while calling 'mcpdsetlc': looks like one of arguments has wrong size"); k = ap.rows(c); mcpd.mcpdsetlc(s.innerobj, c, ct, k); return; } /************************************************************************* This function allows to tune amount of Tikhonov regularization being applied to your problem. By default, regularizing term is equal to r*||P-prior_P||^2, where r is a small non-zero value, P is transition matrix, prior_P is identity matrix, ||X||^2 is a sum of squared elements of X. This function allows you to change coefficient r. You can also change prior values with MCPDSetPrior() function. INPUT PARAMETERS: S - solver V - regularization coefficient, finite non-negative value. It is not recommended to specify zero value unless you are pretty sure that you want it. -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdsettikhonovregularizer(mcpdstate s, double v) { mcpd.mcpdsettikhonovregularizer(s.innerobj, v); return; } /************************************************************************* This function allows to set prior values used for regularization of your problem. By default, regularizing term is equal to r*||P-prior_P||^2, where r is a small non-zero value, P is transition matrix, prior_P is identity matrix, ||X||^2 is a sum of squared elements of X. This function allows you to change prior values prior_P. You can also change r with MCPDSetTikhonovRegularizer() function. INPUT PARAMETERS: S - solver PP - array[N,N], matrix of prior values: 1. elements must be real numbers from [0,1] 2. columns must sum to 1.0. First property is checked (exception is thrown otherwise), while second one is not checked/enforced. -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdsetprior(mcpdstate s, double[,] pp) { mcpd.mcpdsetprior(s.innerobj, pp); return; } /************************************************************************* This function is used to change prediction weights MCPD solver scales prediction errors as follows Error(P) = ||W*(y-P*x)||^2 where x is a system state at time t y is a system state at time t+1 P is a transition matrix W is a diagonal scaling matrix By default, weights are chosen in order to minimize relative prediction error instead of absolute one. For example, if one component of state is about 0.5 in magnitude and another one is about 0.05, then algorithm will make corresponding weights equal to 2.0 and 20.0. INPUT PARAMETERS: S - solver PW - array[N], weights: * must be non-negative values (exception will be thrown otherwise) * zero values will be replaced by automatically chosen values -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdsetpredictionweights(mcpdstate s, double[] pw) { mcpd.mcpdsetpredictionweights(s.innerobj, pw); return; } /************************************************************************* This function is used to start solution of the MCPD problem. After return from this function, you can use MCPDResults() to get solution and completion code. -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdsolve(mcpdstate s) { mcpd.mcpdsolve(s.innerobj); return; } /************************************************************************* MCPD results INPUT PARAMETERS: State - algorithm state OUTPUT PARAMETERS: P - array[N,N], transition matrix Rep - optimization report. You should check Rep.TerminationType in order to distinguish successful termination from unsuccessful one. Speaking short, positive values denote success, negative ones are failures. More information about fields of this structure can be found in the comments on MCPDReport datatype. -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdresults(mcpdstate s, out double[,] p, out mcpdreport rep) { p = new double[0,0]; rep = new mcpdreport(); mcpd.mcpdresults(s.innerobj, ref p, rep.innerobj); return; } } public partial class alglib { /************************************************************************* Neural networks ensemble *************************************************************************/ public class mlpensemble { // // Public declarations // public mlpensemble() { _innerobj = new mlpe.mlpensemble(); } // // Although some of declarations below are public, you should not use them // They are intended for internal use only // private mlpe.mlpensemble _innerobj; public mlpe.mlpensemble innerobj { get { return _innerobj; } } public mlpensemble(mlpe.mlpensemble obj) { _innerobj = obj; } } /************************************************************************* This function serializes data structure to string. Important properties of s_out: * it contains alphanumeric characters, dots, underscores, minus signs * these symbols are grouped into words, which are separated by spaces and Windows-style (CR+LF) newlines * although serializer uses spaces and CR+LF as separators, you can replace any separator character by arbitrary combination of spaces, tabs, Windows or Unix newlines. It allows flexible reformatting of the string in case you want to include it into text or XML file. But you should not insert separators into the middle of the "words" nor you should change case of letters. * s_out can be freely moved between 32-bit and 64-bit systems, little and big endian machines, and so on. You can serialize structure on 32-bit machine and unserialize it on 64-bit one (or vice versa), or serialize it on SPARC and unserialize on x86. You can also serialize it in C# version of ALGLIB and unserialize in C++ one, and vice versa. *************************************************************************/ public static void mlpeserialize(mlpensemble obj, out string s_out) { alglib.serializer s = new alglib.serializer(); s.alloc_start(); mlpe.mlpealloc(s, obj.innerobj); s.sstart_str(); mlpe.mlpeserialize(s, obj.innerobj); s.stop(); s_out = s.get_string(); } /************************************************************************* This function unserializes data structure from string. *************************************************************************/ public static void mlpeunserialize(string s_in, out mlpensemble obj) { alglib.serializer s = new alglib.serializer(); obj = new mlpensemble(); s.ustart_str(s_in); mlpe.mlpeunserialize(s, obj.innerobj); s.stop(); } /************************************************************************* Like MLPCreate0, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreate0(int nin, int nout, int ensemblesize, out mlpensemble ensemble) { ensemble = new mlpensemble(); mlpe.mlpecreate0(nin, nout, ensemblesize, ensemble.innerobj); return; } /************************************************************************* Like MLPCreate1, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreate1(int nin, int nhid, int nout, int ensemblesize, out mlpensemble ensemble) { ensemble = new mlpensemble(); mlpe.mlpecreate1(nin, nhid, nout, ensemblesize, ensemble.innerobj); return; } /************************************************************************* Like MLPCreate2, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreate2(int nin, int nhid1, int nhid2, int nout, int ensemblesize, out mlpensemble ensemble) { ensemble = new mlpensemble(); mlpe.mlpecreate2(nin, nhid1, nhid2, nout, ensemblesize, ensemble.innerobj); return; } /************************************************************************* Like MLPCreateB0, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreateb0(int nin, int nout, double b, double d, int ensemblesize, out mlpensemble ensemble) { ensemble = new mlpensemble(); mlpe.mlpecreateb0(nin, nout, b, d, ensemblesize, ensemble.innerobj); return; } /************************************************************************* Like MLPCreateB1, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreateb1(int nin, int nhid, int nout, double b, double d, int ensemblesize, out mlpensemble ensemble) { ensemble = new mlpensemble(); mlpe.mlpecreateb1(nin, nhid, nout, b, d, ensemblesize, ensemble.innerobj); return; } /************************************************************************* Like MLPCreateB2, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreateb2(int nin, int nhid1, int nhid2, int nout, double b, double d, int ensemblesize, out mlpensemble ensemble) { ensemble = new mlpensemble(); mlpe.mlpecreateb2(nin, nhid1, nhid2, nout, b, d, ensemblesize, ensemble.innerobj); return; } /************************************************************************* Like MLPCreateR0, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreater0(int nin, int nout, double a, double b, int ensemblesize, out mlpensemble ensemble) { ensemble = new mlpensemble(); mlpe.mlpecreater0(nin, nout, a, b, ensemblesize, ensemble.innerobj); return; } /************************************************************************* Like MLPCreateR1, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreater1(int nin, int nhid, int nout, double a, double b, int ensemblesize, out mlpensemble ensemble) { ensemble = new mlpensemble(); mlpe.mlpecreater1(nin, nhid, nout, a, b, ensemblesize, ensemble.innerobj); return; } /************************************************************************* Like MLPCreateR2, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreater2(int nin, int nhid1, int nhid2, int nout, double a, double b, int ensemblesize, out mlpensemble ensemble) { ensemble = new mlpensemble(); mlpe.mlpecreater2(nin, nhid1, nhid2, nout, a, b, ensemblesize, ensemble.innerobj); return; } /************************************************************************* Like MLPCreateC0, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreatec0(int nin, int nout, int ensemblesize, out mlpensemble ensemble) { ensemble = new mlpensemble(); mlpe.mlpecreatec0(nin, nout, ensemblesize, ensemble.innerobj); return; } /************************************************************************* Like MLPCreateC1, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreatec1(int nin, int nhid, int nout, int ensemblesize, out mlpensemble ensemble) { ensemble = new mlpensemble(); mlpe.mlpecreatec1(nin, nhid, nout, ensemblesize, ensemble.innerobj); return; } /************************************************************************* Like MLPCreateC2, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreatec2(int nin, int nhid1, int nhid2, int nout, int ensemblesize, out mlpensemble ensemble) { ensemble = new mlpensemble(); mlpe.mlpecreatec2(nin, nhid1, nhid2, nout, ensemblesize, ensemble.innerobj); return; } /************************************************************************* Creates ensemble from network. Only network geometry is copied. -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreatefromnetwork(multilayerperceptron network, int ensemblesize, out mlpensemble ensemble) { ensemble = new mlpensemble(); mlpe.mlpecreatefromnetwork(network.innerobj, ensemblesize, ensemble.innerobj); return; } /************************************************************************* Randomization of MLP ensemble -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlperandomize(mlpensemble ensemble) { mlpe.mlperandomize(ensemble.innerobj); return; } /************************************************************************* Return ensemble properties (number of inputs and outputs). -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpeproperties(mlpensemble ensemble, out int nin, out int nout) { nin = 0; nout = 0; mlpe.mlpeproperties(ensemble.innerobj, ref nin, ref nout); return; } /************************************************************************* Return normalization type (whether ensemble is SOFTMAX-normalized or not). -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static bool mlpeissoftmax(mlpensemble ensemble) { bool result = mlpe.mlpeissoftmax(ensemble.innerobj); return result; } /************************************************************************* Procesing INPUT PARAMETERS: Ensemble- neural networks ensemble X - input vector, array[0..NIn-1]. Y - (possibly) preallocated buffer; if size of Y is less than NOut, it will be reallocated. If it is large enough, it is NOT reallocated, so we can save some time on reallocation. OUTPUT PARAMETERS: Y - result. Regression estimate when solving regression task, vector of posterior probabilities for classification task. -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpeprocess(mlpensemble ensemble, double[] x, ref double[] y) { mlpe.mlpeprocess(ensemble.innerobj, x, ref y); return; } /************************************************************************* 'interactive' variant of MLPEProcess for languages like Python which support constructs like "Y = MLPEProcess(LM,X)" and interactive mode of the interpreter This function allocates new array on each call, so it is significantly slower than its 'non-interactive' counterpart, but it is more convenient when you call it from command line. -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpeprocessi(mlpensemble ensemble, double[] x, out double[] y) { y = new double[0]; mlpe.mlpeprocessi(ensemble.innerobj, x, ref y); return; } /************************************************************************* Relative classification error on the test set INPUT PARAMETERS: Ensemble- ensemble XY - test set NPoints - test set size RESULT: percent of incorrectly classified cases. Works both for classifier betwork and for regression networks which are used as classifiers. -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static double mlperelclserror(mlpensemble ensemble, double[,] xy, int npoints) { double result = mlpe.mlperelclserror(ensemble.innerobj, xy, npoints); return result; } /************************************************************************* Average cross-entropy (in bits per element) on the test set INPUT PARAMETERS: Ensemble- ensemble XY - test set NPoints - test set size RESULT: CrossEntropy/(NPoints*LN(2)). Zero if ensemble solves regression task. -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static double mlpeavgce(mlpensemble ensemble, double[,] xy, int npoints) { double result = mlpe.mlpeavgce(ensemble.innerobj, xy, npoints); return result; } /************************************************************************* RMS error on the test set INPUT PARAMETERS: Ensemble- ensemble XY - test set NPoints - test set size RESULT: root mean square error. Its meaning for regression task is obvious. As for classification task RMS error means error when estimating posterior probabilities. -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static double mlpermserror(mlpensemble ensemble, double[,] xy, int npoints) { double result = mlpe.mlpermserror(ensemble.innerobj, xy, npoints); return result; } /************************************************************************* Average error on the test set INPUT PARAMETERS: Ensemble- ensemble XY - test set NPoints - test set size RESULT: Its meaning for regression task is obvious. As for classification task it means average error when estimating posterior probabilities. -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static double mlpeavgerror(mlpensemble ensemble, double[,] xy, int npoints) { double result = mlpe.mlpeavgerror(ensemble.innerobj, xy, npoints); return result; } /************************************************************************* Average relative error on the test set INPUT PARAMETERS: Ensemble- ensemble XY - test set NPoints - test set size RESULT: Its meaning for regression task is obvious. As for classification task it means average relative error when estimating posterior probabilities. -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static double mlpeavgrelerror(mlpensemble ensemble, double[,] xy, int npoints) { double result = mlpe.mlpeavgrelerror(ensemble.innerobj, xy, npoints); return result; } } public partial class alglib { /************************************************************************* Training report: * RelCLSError - fraction of misclassified cases. * AvgCE - acerage cross-entropy * RMSError - root-mean-square error * AvgError - average error * AvgRelError - average relative error * NGrad - number of gradient calculations * NHess - number of Hessian calculations * NCholesky - number of Cholesky decompositions NOTE 1: RelCLSError/AvgCE are zero on regression problems. NOTE 2: on classification problems RMSError/AvgError/AvgRelError contain errors in prediction of posterior probabilities *************************************************************************/ public class mlpreport { // // Public declarations // public double relclserror { get { return _innerobj.relclserror; } set { _innerobj.relclserror = value; } } public double avgce { get { return _innerobj.avgce; } set { _innerobj.avgce = value; } } public double rmserror { get { return _innerobj.rmserror; } set { _innerobj.rmserror = value; } } public double avgerror { get { return _innerobj.avgerror; } set { _innerobj.avgerror = value; } } public double avgrelerror { get { return _innerobj.avgrelerror; } set { _innerobj.avgrelerror = value; } } public int ngrad { get { return _innerobj.ngrad; } set { _innerobj.ngrad = value; } } public int nhess { get { return _innerobj.nhess; } set { _innerobj.nhess = value; } } public int ncholesky { get { return _innerobj.ncholesky; } set { _innerobj.ncholesky = value; } } public mlpreport() { _innerobj = new mlptrain.mlpreport(); } // // Although some of declarations below are public, you should not use them // They are intended for internal use only // private mlptrain.mlpreport _innerobj; public mlptrain.mlpreport innerobj { get { return _innerobj; } } public mlpreport(mlptrain.mlpreport obj) { _innerobj = obj; } } /************************************************************************* Cross-validation estimates of generalization error *************************************************************************/ public class mlpcvreport { // // Public declarations // public double relclserror { get { return _innerobj.relclserror; } set { _innerobj.relclserror = value; } } public double avgce { get { return _innerobj.avgce; } set { _innerobj.avgce = value; } } public double rmserror { get { return _innerobj.rmserror; } set { _innerobj.rmserror = value; } } public double avgerror { get { return _innerobj.avgerror; } set { _innerobj.avgerror = value; } } public double avgrelerror { get { return _innerobj.avgrelerror; } set { _innerobj.avgrelerror = value; } } public mlpcvreport() { _innerobj = new mlptrain.mlpcvreport(); } // // Although some of declarations below are public, you should not use them // They are intended for internal use only // private mlptrain.mlpcvreport _innerobj; public mlptrain.mlpcvreport innerobj { get { return _innerobj; } } public mlpcvreport(mlptrain.mlpcvreport obj) { _innerobj = obj; } } /************************************************************************* Trainer object for neural network. You should not try to access fields of this object directly - use ALGLIB functions to work with this object. *************************************************************************/ public class mlptrainer { // // Public declarations // public mlptrainer() { _innerobj = new mlptrain.mlptrainer(); } // // Although some of declarations below are public, you should not use them // They are intended for internal use only // private mlptrain.mlptrainer _innerobj; public mlptrain.mlptrainer innerobj { get { return _innerobj; } } public mlptrainer(mlptrain.mlptrainer obj) { _innerobj = obj; } } /************************************************************************* Neural network training using modified Levenberg-Marquardt with exact Hessian calculation and regularization. Subroutine trains neural network with restarts from random positions. Algorithm is well suited for small and medium scale problems (hundreds of weights). INPUT PARAMETERS: Network - neural network with initialized geometry XY - training set NPoints - training set size Decay - weight decay constant, >=0.001 Decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 0.001. Restarts - number of restarts from random position, >0. If you don't know what Restarts to choose, use 2. OUTPUT PARAMETERS: Network - trained neural network. Info - return code: * -9, if internal matrix inverse subroutine failed * -2, if there is a point with class number outside of [0..NOut-1]. * -1, if wrong parameters specified (NPoints<0, Restarts<1). * 2, if task has been solved. Rep - training report -- ALGLIB -- Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlptrainlm(multilayerperceptron network, double[,] xy, int npoints, double decay, int restarts, out int info, out mlpreport rep) { info = 0; rep = new mlpreport(); mlptrain.mlptrainlm(network.innerobj, xy, npoints, decay, restarts, ref info, rep.innerobj); return; } /************************************************************************* Neural network training using L-BFGS algorithm with regularization. Subroutine trains neural network with restarts from random positions. Algorithm is well suited for problems of any dimensionality (memory requirements and step complexity are linear by weights number). INPUT PARAMETERS: Network - neural network with initialized geometry XY - training set NPoints - training set size Decay - weight decay constant, >=0.001 Decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 0.001. Restarts - number of restarts from random position, >0. If you don't know what Restarts to choose, use 2. WStep - stopping criterion. Algorithm stops if step size is less than WStep. Recommended value - 0.01. Zero step size means stopping after MaxIts iterations. MaxIts - stopping criterion. Algorithm stops after MaxIts iterations (NOT gradient calculations). Zero MaxIts means stopping when step is sufficiently small. OUTPUT PARAMETERS: Network - trained neural network. Info - return code: * -8, if both WStep=0 and MaxIts=0 * -2, if there is a point with class number outside of [0..NOut-1]. * -1, if wrong parameters specified (NPoints<0, Restarts<1). * 2, if task has been solved. Rep - training report -- ALGLIB -- Copyright 09.12.2007 by Bochkanov Sergey *************************************************************************/ public static void mlptrainlbfgs(multilayerperceptron network, double[,] xy, int npoints, double decay, int restarts, double wstep, int maxits, out int info, out mlpreport rep) { info = 0; rep = new mlpreport(); mlptrain.mlptrainlbfgs(network.innerobj, xy, npoints, decay, restarts, wstep, maxits, ref info, rep.innerobj); return; } /************************************************************************* Neural network training using early stopping (base algorithm - L-BFGS with regularization). INPUT PARAMETERS: Network - neural network with initialized geometry TrnXY - training set TrnSize - training set size, TrnSize>0 ValXY - validation set ValSize - validation set size, ValSize>0 Decay - weight decay constant, >=0.001 Decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 0.001. Restarts - number of restarts, either: * strictly positive number - algorithm make specified number of restarts from random position. * -1, in which case algorithm makes exactly one run from the initial state of the network (no randomization). If you don't know what Restarts to choose, choose one one the following: * -1 (deterministic start) * +1 (one random restart) * +5 (moderate amount of random restarts) OUTPUT PARAMETERS: Network - trained neural network. Info - return code: * -2, if there is a point with class number outside of [0..NOut-1]. * -1, if wrong parameters specified (NPoints<0, Restarts<1, ...). * 2, task has been solved, stopping criterion met - sufficiently small step size. Not expected (we use EARLY stopping) but possible and not an error. * 6, task has been solved, stopping criterion met - increasing of validation set error. Rep - training report NOTE: Algorithm stops if validation set error increases for a long enough or step size is small enought (there are task where validation set may decrease for eternity). In any case solution returned corresponds to the minimum of validation set error. -- ALGLIB -- Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlptraines(multilayerperceptron network, double[,] trnxy, int trnsize, double[,] valxy, int valsize, double decay, int restarts, out int info, out mlpreport rep) { info = 0; rep = new mlpreport(); mlptrain.mlptraines(network.innerobj, trnxy, trnsize, valxy, valsize, decay, restarts, ref info, rep.innerobj); return; } /************************************************************************* Cross-validation estimate of generalization error. Base algorithm - L-BFGS. INPUT PARAMETERS: Network - neural network with initialized geometry. Network is not changed during cross-validation - it is used only as a representative of its architecture. XY - training set. SSize - training set size Decay - weight decay, same as in MLPTrainLBFGS Restarts - number of restarts, >0. restarts are counted for each partition separately, so total number of restarts will be Restarts*FoldsCount. WStep - stopping criterion, same as in MLPTrainLBFGS MaxIts - stopping criterion, same as in MLPTrainLBFGS FoldsCount - number of folds in k-fold cross-validation, 2<=FoldsCount<=SSize. recommended value: 10. OUTPUT PARAMETERS: Info - return code, same as in MLPTrainLBFGS Rep - report, same as in MLPTrainLM/MLPTrainLBFGS CVRep - generalization error estimates -- ALGLIB -- Copyright 09.12.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpkfoldcvlbfgs(multilayerperceptron network, double[,] xy, int npoints, double decay, int restarts, double wstep, int maxits, int foldscount, out int info, out mlpreport rep, out mlpcvreport cvrep) { info = 0; rep = new mlpreport(); cvrep = new mlpcvreport(); mlptrain.mlpkfoldcvlbfgs(network.innerobj, xy, npoints, decay, restarts, wstep, maxits, foldscount, ref info, rep.innerobj, cvrep.innerobj); return; } /************************************************************************* Cross-validation estimate of generalization error. Base algorithm - Levenberg-Marquardt. INPUT PARAMETERS: Network - neural network with initialized geometry. Network is not changed during cross-validation - it is used only as a representative of its architecture. XY - training set. SSize - training set size Decay - weight decay, same as in MLPTrainLBFGS Restarts - number of restarts, >0. restarts are counted for each partition separately, so total number of restarts will be Restarts*FoldsCount. FoldsCount - number of folds in k-fold cross-validation, 2<=FoldsCount<=SSize. recommended value: 10. OUTPUT PARAMETERS: Info - return code, same as in MLPTrainLBFGS Rep - report, same as in MLPTrainLM/MLPTrainLBFGS CVRep - generalization error estimates -- ALGLIB -- Copyright 09.12.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpkfoldcvlm(multilayerperceptron network, double[,] xy, int npoints, double decay, int restarts, int foldscount, out int info, out mlpreport rep, out mlpcvreport cvrep) { info = 0; rep = new mlpreport(); cvrep = new mlpcvreport(); mlptrain.mlpkfoldcvlm(network.innerobj, xy, npoints, decay, restarts, foldscount, ref info, rep.innerobj, cvrep.innerobj); return; } /************************************************************************* This function estimates generalization error using cross-validation on the current dataset with current training settings. INPUT PARAMETERS: S - trainer object Network - neural network. It must have same number of inputs and output/classes as was specified during creation of the trainer object. Network is not changed during cross- validation and is not trained - it is used only as representative of its architecture. I.e., we estimate generalization properties of ARCHITECTURE, not some specific network. NRestarts - number of restarts, >=0: * NRestarts>0 means that for each cross-validation round specified number of random restarts is performed, with best network being chosen after training. * NRestarts=0 is same as NRestarts=1 FoldsCount - number of folds in k-fold cross-validation: * 2<=FoldsCount<=size of dataset * recommended value: 10. * values larger than dataset size will be silently truncated down to dataset size OUTPUT PARAMETERS: Rep - structure which contains cross-validation estimates: * Rep.RelCLSError - fraction of misclassified cases. * Rep.AvgCE - acerage cross-entropy * Rep.RMSError - root-mean-square error * Rep.AvgError - average error * Rep.AvgRelError - average relative error NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(), or subset with only one point was given, zeros are returned as estimates. NOTE: this method performs FoldsCount cross-validation rounds, each one with NRestarts random starts. Thus, FoldsCount*NRestarts networks are trained in total. NOTE: Rep.RelCLSError/Rep.AvgCE are zero on regression problems. NOTE: on classification problems Rep.RMSError/Rep.AvgError/Rep.AvgRelError contain errors in prediction of posterior probabilities. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpkfoldcv(mlptrainer s, multilayerperceptron network, int nrestarts, int foldscount, out mlpreport rep) { rep = new mlpreport(); mlptrain.mlpkfoldcv(s.innerobj, network.innerobj, nrestarts, foldscount, rep.innerobj); return; } public static void smp_mlpkfoldcv(mlptrainer s, multilayerperceptron network, int nrestarts, int foldscount, out mlpreport rep) { rep = new mlpreport(); mlptrain._pexec_mlpkfoldcv(s.innerobj, network.innerobj, nrestarts, foldscount, rep.innerobj); return; } /************************************************************************* Creation of the network trainer object for regression networks INPUT PARAMETERS: NIn - number of inputs, NIn>=1 NOut - number of outputs, NOut>=1 OUTPUT PARAMETERS: S - neural network trainer object. This structure can be used to train any regression network with NIn inputs and NOut outputs. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpcreatetrainer(int nin, int nout, out mlptrainer s) { s = new mlptrainer(); mlptrain.mlpcreatetrainer(nin, nout, s.innerobj); return; } /************************************************************************* Creation of the network trainer object for classification networks INPUT PARAMETERS: NIn - number of inputs, NIn>=1 NClasses - number of classes, NClasses>=2 OUTPUT PARAMETERS: S - neural network trainer object. This structure can be used to train any classification network with NIn inputs and NOut outputs. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpcreatetrainercls(int nin, int nclasses, out mlptrainer s) { s = new mlptrainer(); mlptrain.mlpcreatetrainercls(nin, nclasses, s.innerobj); return; } /************************************************************************* This function sets "current dataset" of the trainer object to one passed by user. INPUT PARAMETERS: S - trainer object XY - training set, see below for information on the training set format. This function checks correctness of the dataset (no NANs/INFs, class numbers are correct) and throws exception when incorrect dataset is passed. NPoints - points count, >=0. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following datasetformat is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpsetdataset(mlptrainer s, double[,] xy, int npoints) { mlptrain.mlpsetdataset(s.innerobj, xy, npoints); return; } /************************************************************************* This function sets "current dataset" of the trainer object to one passed by user (sparse matrix is used to store dataset). INPUT PARAMETERS: S - trainer object XY - training set, see below for information on the training set format. This function checks correctness of the dataset (no NANs/INFs, class numbers are correct) and throws exception when incorrect dataset is passed. Any sparse storage format can be used: Hash-table, CRS... NPoints - points count, >=0 DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following datasetformat is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpsetsparsedataset(mlptrainer s, sparsematrix xy, int npoints) { mlptrain.mlpsetsparsedataset(s.innerobj, xy.innerobj, npoints); return; } /************************************************************************* This function sets weight decay coefficient which is used for training. INPUT PARAMETERS: S - trainer object Decay - weight decay coefficient, >=0. Weight decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 1.0E-3. Weight decay can be set to zero, in this case network is trained without weight decay. NOTE: by default network uses some small nonzero value for weight decay. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpsetdecay(mlptrainer s, double decay) { mlptrain.mlpsetdecay(s.innerobj, decay); return; } /************************************************************************* This function sets stopping criteria for the optimizer. INPUT PARAMETERS: S - trainer object WStep - stopping criterion. Algorithm stops if step size is less than WStep. Recommended value - 0.01. Zero step size means stopping after MaxIts iterations. WStep>=0. MaxIts - stopping criterion. Algorithm stops after MaxIts iterations (NOT gradient calculations). Zero MaxIts means stopping when step is sufficiently small. MaxIts>=0. NOTE: by default, WStep=0.005 and MaxIts=0 are used. These values are also used when MLPSetCond() is called with WStep=0 and MaxIts=0. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpsetcond(mlptrainer s, double wstep, int maxits) { mlptrain.mlpsetcond(s.innerobj, wstep, maxits); return; } /************************************************************************* This function trains neural network passed to this function, using current dataset (one which was passed to MLPSetDataset() or MLPSetSparseDataset()) and current training settings. Training from NRestarts random starting positions is performed, best network is chosen. Training is performed using current training algorithm. INPUT PARAMETERS: S - trainer object Network - neural network. It must have same number of inputs and output/classes as was specified during creation of the trainer object. NRestarts - number of restarts, >=0: * NRestarts>0 means that specified number of random restarts are performed, best network is chosen after training * NRestarts=0 means that current state of the network is used for training. OUTPUT PARAMETERS: Network - trained network NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(), network is filled by zero values. Same behavior for functions MLPStartTraining and MLPContinueTraining. NOTE: this method uses sum-of-squares error function for training. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlptrainnetwork(mlptrainer s, multilayerperceptron network, int nrestarts, out mlpreport rep) { rep = new mlpreport(); mlptrain.mlptrainnetwork(s.innerobj, network.innerobj, nrestarts, rep.innerobj); return; } /************************************************************************* IMPORTANT: this is an "expert" version of the MLPTrain() function. We do not recommend you to use it unless you are pretty sure that you need ability to monitor training progress. This function performs step-by-step training of the neural network. Here "step-by-step" means that training starts with MLPStartTraining() call, and then user subsequently calls MLPContinueTraining() to perform one more iteration of the training. After call to this function trainer object remembers network and is ready to train it. However, no training is performed until first call to MLPContinueTraining() function. Subsequent calls to MLPContinueTraining() will advance training progress one iteration further. EXAMPLE: > > ...initialize network and trainer object.... > > MLPStartTraining(Trainer, Network, True) > while MLPContinueTraining(Trainer, Network) do > ...visualize training progress... > INPUT PARAMETERS: S - trainer object Network - neural network. It must have same number of inputs and output/classes as was specified during creation of the trainer object. RandomStart - randomize network before training or not: * True means that network is randomized and its initial state (one which was passed to the trainer object) is lost. * False means that training is started from the current state of the network OUTPUT PARAMETERS: Network - neural network which is ready to training (weights are initialized, preprocessor is initialized using current training set) NOTE: this method uses sum-of-squares error function for training. NOTE: it is expected that trainer object settings are NOT changed during step-by-step training, i.e. no one changes stopping criteria or training set during training. It is possible and there is no defense against such actions, but algorithm behavior in such cases is undefined and can be unpredictable. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpstarttraining(mlptrainer s, multilayerperceptron network, bool randomstart) { mlptrain.mlpstarttraining(s.innerobj, network.innerobj, randomstart); return; } /************************************************************************* IMPORTANT: this is an "expert" version of the MLPTrain() function. We do not recommend you to use it unless you are pretty sure that you need ability to monitor training progress. This function performs step-by-step training of the neural network. Here "step-by-step" means that training starts with MLPStartTraining() call, and then user subsequently calls MLPContinueTraining() to perform one more iteration of the training. This function performs one more iteration of the training and returns either True (training continues) or False (training stopped). In case True was returned, Network weights are updated according to the current state of the optimization progress. In case False was returned, no additional updates is performed (previous update of the network weights moved us to the final point, and no additional updates is needed). EXAMPLE: > > [initialize network and trainer object] > > MLPStartTraining(Trainer, Network, True) > while MLPContinueTraining(Trainer, Network) do > [visualize training progress] > INPUT PARAMETERS: S - trainer object Network - neural network structure, which is used to store current state of the training process. OUTPUT PARAMETERS: Network - weights of the neural network are rewritten by the current approximation. NOTE: this method uses sum-of-squares error function for training. NOTE: it is expected that trainer object settings are NOT changed during step-by-step training, i.e. no one changes stopping criteria or training set during training. It is possible and there is no defense against such actions, but algorithm behavior in such cases is undefined and can be unpredictable. NOTE: It is expected that Network is the same one which was passed to MLPStartTraining() function. However, THIS function checks only following: * that number of network inputs is consistent with trainer object settings * that number of network outputs/classes is consistent with trainer object settings * that number of network weights is the same as number of weights in the network passed to MLPStartTraining() function Exception is thrown when these conditions are violated. It is also expected that you do not change state of the network on your own - the only party who has right to change network during its training is a trainer object. Any attempt to interfere with trainer may lead to unpredictable results. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static bool mlpcontinuetraining(mlptrainer s, multilayerperceptron network) { bool result = mlptrain.mlpcontinuetraining(s.innerobj, network.innerobj); return result; } /************************************************************************* Training neural networks ensemble using bootstrap aggregating (bagging). Modified Levenberg-Marquardt algorithm is used as base training method. INPUT PARAMETERS: Ensemble - model with initialized geometry XY - training set NPoints - training set size Decay - weight decay coefficient, >=0.001 Restarts - restarts, >0. OUTPUT PARAMETERS: Ensemble - trained model Info - return code: * -2, if there is a point with class number outside of [0..NClasses-1]. * -1, if incorrect parameters was passed (NPoints<0, Restarts<1). * 2, if task has been solved. Rep - training report. OOBErrors - out-of-bag generalization error estimate -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpebagginglm(mlpensemble ensemble, double[,] xy, int npoints, double decay, int restarts, out int info, out mlpreport rep, out mlpcvreport ooberrors) { info = 0; rep = new mlpreport(); ooberrors = new mlpcvreport(); mlptrain.mlpebagginglm(ensemble.innerobj, xy, npoints, decay, restarts, ref info, rep.innerobj, ooberrors.innerobj); return; } /************************************************************************* Training neural networks ensemble using bootstrap aggregating (bagging). L-BFGS algorithm is used as base training method. INPUT PARAMETERS: Ensemble - model with initialized geometry XY - training set NPoints - training set size Decay - weight decay coefficient, >=0.001 Restarts - restarts, >0. WStep - stopping criterion, same as in MLPTrainLBFGS MaxIts - stopping criterion, same as in MLPTrainLBFGS OUTPUT PARAMETERS: Ensemble - trained model Info - return code: * -8, if both WStep=0 and MaxIts=0 * -2, if there is a point with class number outside of [0..NClasses-1]. * -1, if incorrect parameters was passed (NPoints<0, Restarts<1). * 2, if task has been solved. Rep - training report. OOBErrors - out-of-bag generalization error estimate -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpebagginglbfgs(mlpensemble ensemble, double[,] xy, int npoints, double decay, int restarts, double wstep, int maxits, out int info, out mlpreport rep, out mlpcvreport ooberrors) { info = 0; rep = new mlpreport(); ooberrors = new mlpcvreport(); mlptrain.mlpebagginglbfgs(ensemble.innerobj, xy, npoints, decay, restarts, wstep, maxits, ref info, rep.innerobj, ooberrors.innerobj); return; } /************************************************************************* Training neural networks ensemble using early stopping. INPUT PARAMETERS: Ensemble - model with initialized geometry XY - training set NPoints - training set size Decay - weight decay coefficient, >=0.001 Restarts - restarts, >0. OUTPUT PARAMETERS: Ensemble - trained model Info - return code: * -2, if there is a point with class number outside of [0..NClasses-1]. * -1, if incorrect parameters was passed (NPoints<0, Restarts<1). * 6, if task has been solved. Rep - training report. OOBErrors - out-of-bag generalization error estimate -- ALGLIB -- Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpetraines(mlpensemble ensemble, double[,] xy, int npoints, double decay, int restarts, out int info, out mlpreport rep) { info = 0; rep = new mlpreport(); mlptrain.mlpetraines(ensemble.innerobj, xy, npoints, decay, restarts, ref info, rep.innerobj); return; } /************************************************************************* This function trains neural network ensemble passed to this function using current dataset and early stopping training algorithm. Each early stopping round performs NRestarts random restarts (thus, EnsembleSize*NRestarts training rounds is performed in total). INPUT PARAMETERS: S - trainer object; Ensemble - neural network ensemble. It must have same number of inputs and outputs/classes as was specified during creation of the trainer object. NRestarts - number of restarts, >=0: * NRestarts>0 means that specified number of random restarts are performed during each ES round; * NRestarts=0 is silently replaced by 1. OUTPUT PARAMETERS: Ensemble - trained ensemble; Rep - it contains all type of errors. NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(), or single-point dataset was passed, ensemble is filled by zero values. NOTE: this method uses sum-of-squares error function for training. -- ALGLIB -- Copyright 22.08.2012 by Bochkanov Sergey *************************************************************************/ public static void mlptrainensemblees(mlptrainer s, mlpensemble ensemble, int nrestarts, out mlpreport rep) { rep = new mlpreport(); mlptrain.mlptrainensemblees(s.innerobj, ensemble.innerobj, nrestarts, rep.innerobj); return; } } public partial class alglib { /************************************************************************* Principal components analysis Subroutine builds orthogonal basis where first axis corresponds to direction with maximum variance, second axis maximizes variance in subspace orthogonal to first axis and so on. It should be noted that, unlike LDA, PCA does not use class labels. INPUT PARAMETERS: X - dataset, array[0..NPoints-1,0..NVars-1]. matrix contains ONLY INDEPENDENT VARIABLES. NPoints - dataset size, NPoints>=0 NVars - number of independent variables, NVars>=1 ÂÛÕÎÄÍÛÅ ÏÀÐÀÌÅÒÐÛ: Info - return code: * -4, if SVD subroutine haven't converged * -1, if wrong parameters has been passed (NPoints<0, NVars<1) * 1, if task is solved S2 - array[0..NVars-1]. variance values corresponding to basis vectors. V - array[0..NVars-1,0..NVars-1] matrix, whose columns store basis vectors. -- ALGLIB -- Copyright 25.08.2008 by Bochkanov Sergey *************************************************************************/ public static void pcabuildbasis(double[,] x, int npoints, int nvars, out int info, out double[] s2, out double[,] v) { info = 0; s2 = new double[0]; v = new double[0,0]; pca.pcabuildbasis(x, npoints, nvars, ref info, ref s2, ref v); return; } } public partial class alglib { public class bdss { public class cvreport : apobject { public double relclserror; public double avgce; public double rmserror; public double avgerror; public double avgrelerror; public cvreport() { init(); } public override void init() { } public override alglib.apobject make_copy() { cvreport _result = new cvreport(); _result.relclserror = relclserror; _result.avgce = avgce; _result.rmserror = rmserror; _result.avgerror = avgerror; _result.avgrelerror = avgrelerror; return _result; } }; /************************************************************************* This set of routines (DSErrAllocate, DSErrAccumulate, DSErrFinish) calculates different error functions (classification error, cross-entropy, rms, avg, avg.rel errors). 1. DSErrAllocate prepares buffer. 2. DSErrAccumulate accumulates individual errors: * Y contains predicted output (posterior probabilities for classification) * DesiredY contains desired output (class number for classification) 3. DSErrFinish outputs results: * Buf[0] contains relative classification error (zero for regression tasks) * Buf[1] contains avg. cross-entropy (zero for regression tasks) * Buf[2] contains rms error (regression, classification) * Buf[3] contains average error (regression, classification) * Buf[4] contains average relative error (regression, classification) NOTES(1): "NClasses>0" means that we have classification task. "NClasses<0" means regression task with -NClasses real outputs. NOTES(2): rms. avg, avg.rel errors for classification tasks are interpreted as errors in posterior probabilities with respect to probabilities given by training/test set. -- ALGLIB -- Copyright 11.01.2009 by Bochkanov Sergey *************************************************************************/ public static void dserrallocate(int nclasses, ref double[] buf) { buf = new double[0]; buf = new double[7+1]; buf[0] = 0; buf[1] = 0; buf[2] = 0; buf[3] = 0; buf[4] = 0; buf[5] = nclasses; buf[6] = 0; buf[7] = 0; } /************************************************************************* See DSErrAllocate for comments on this routine. -- ALGLIB -- Copyright 11.01.2009 by Bochkanov Sergey *************************************************************************/ public static void dserraccumulate(ref double[] buf, double[] y, double[] desiredy) { int nclasses = 0; int nout = 0; int offs = 0; int mmax = 0; int rmax = 0; int j = 0; double v = 0; double ev = 0; offs = 5; nclasses = (int)Math.Round(buf[offs]); if( nclasses>0 ) { // // Classification // rmax = (int)Math.Round(desiredy[0]); mmax = 0; for(j=1; j<=nclasses-1; j++) { if( (double)(y[j])>(double)(y[mmax]) ) { mmax = j; } } if( mmax!=rmax ) { buf[0] = buf[0]+1; } if( (double)(y[rmax])>(double)(0) ) { buf[1] = buf[1]-Math.Log(y[rmax]); } else { buf[1] = buf[1]+Math.Log(math.maxrealnumber); } for(j=0; j<=nclasses-1; j++) { v = y[j]; if( j==rmax ) { ev = 1; } else { ev = 0; } buf[2] = buf[2]+math.sqr(v-ev); buf[3] = buf[3]+Math.Abs(v-ev); if( (double)(ev)!=(double)(0) ) { buf[4] = buf[4]+Math.Abs((v-ev)/ev); buf[offs+2] = buf[offs+2]+1; } } buf[offs+1] = buf[offs+1]+1; } else { // // Regression // nout = -nclasses; rmax = 0; for(j=1; j<=nout-1; j++) { if( (double)(desiredy[j])>(double)(desiredy[rmax]) ) { rmax = j; } } mmax = 0; for(j=1; j<=nout-1; j++) { if( (double)(y[j])>(double)(y[mmax]) ) { mmax = j; } } if( mmax!=rmax ) { buf[0] = buf[0]+1; } for(j=0; j<=nout-1; j++) { v = y[j]; ev = desiredy[j]; buf[2] = buf[2]+math.sqr(v-ev); buf[3] = buf[3]+Math.Abs(v-ev); if( (double)(ev)!=(double)(0) ) { buf[4] = buf[4]+Math.Abs((v-ev)/ev); buf[offs+2] = buf[offs+2]+1; } } buf[offs+1] = buf[offs+1]+1; } } /************************************************************************* See DSErrAllocate for comments on this routine. -- ALGLIB -- Copyright 11.01.2009 by Bochkanov Sergey *************************************************************************/ public static void dserrfinish(ref double[] buf) { int nout = 0; int offs = 0; offs = 5; nout = Math.Abs((int)Math.Round(buf[offs])); if( (double)(buf[offs+1])!=(double)(0) ) { buf[0] = buf[0]/buf[offs+1]; buf[1] = buf[1]/buf[offs+1]; buf[2] = Math.Sqrt(buf[2]/(nout*buf[offs+1])); buf[3] = buf[3]/(nout*buf[offs+1]); } if( (double)(buf[offs+2])!=(double)(0) ) { buf[4] = buf[4]/buf[offs+2]; } } /************************************************************************* -- ALGLIB -- Copyright 19.05.2008 by Bochkanov Sergey *************************************************************************/ public static void dsnormalize(ref double[,] xy, int npoints, int nvars, ref int info, ref double[] means, ref double[] sigmas) { int i = 0; int j = 0; double[] tmp = new double[0]; double mean = 0; double variance = 0; double skewness = 0; double kurtosis = 0; int i_ = 0; info = 0; means = new double[0]; sigmas = new double[0]; // // Test parameters // if( npoints<=0 || nvars<1 ) { info = -1; return; } info = 1; // // Standartization // means = new double[nvars-1+1]; sigmas = new double[nvars-1+1]; tmp = new double[npoints-1+1]; for(j=0; j<=nvars-1; j++) { for(i_=0; i_<=npoints-1;i_++) { tmp[i_] = xy[i_,j]; } basestat.samplemoments(tmp, npoints, ref mean, ref variance, ref skewness, ref kurtosis); means[j] = mean; sigmas[j] = Math.Sqrt(variance); if( (double)(sigmas[j])==(double)(0) ) { sigmas[j] = 1; } for(i=0; i<=npoints-1; i++) { xy[i,j] = (xy[i,j]-means[j])/sigmas[j]; } } } /************************************************************************* -- ALGLIB -- Copyright 19.05.2008 by Bochkanov Sergey *************************************************************************/ public static void dsnormalizec(double[,] xy, int npoints, int nvars, ref int info, ref double[] means, ref double[] sigmas) { int j = 0; double[] tmp = new double[0]; double mean = 0; double variance = 0; double skewness = 0; double kurtosis = 0; int i_ = 0; info = 0; means = new double[0]; sigmas = new double[0]; // // Test parameters // if( npoints<=0 || nvars<1 ) { info = -1; return; } info = 1; // // Standartization // means = new double[nvars-1+1]; sigmas = new double[nvars-1+1]; tmp = new double[npoints-1+1]; for(j=0; j<=nvars-1; j++) { for(i_=0; i_<=npoints-1;i_++) { tmp[i_] = xy[i_,j]; } basestat.samplemoments(tmp, npoints, ref mean, ref variance, ref skewness, ref kurtosis); means[j] = mean; sigmas[j] = Math.Sqrt(variance); if( (double)(sigmas[j])==(double)(0) ) { sigmas[j] = 1; } } } /************************************************************************* -- ALGLIB -- Copyright 19.05.2008 by Bochkanov Sergey *************************************************************************/ public static double dsgetmeanmindistance(double[,] xy, int npoints, int nvars) { double result = 0; int i = 0; int j = 0; double[] tmp = new double[0]; double[] tmp2 = new double[0]; double v = 0; int i_ = 0; // // Test parameters // if( npoints<=0 || nvars<1 ) { result = 0; return result; } // // Process // tmp = new double[npoints-1+1]; for(i=0; i<=npoints-1; i++) { tmp[i] = math.maxrealnumber; } tmp2 = new double[nvars-1+1]; for(i=0; i<=npoints-1; i++) { for(j=i+1; j<=npoints-1; j++) { for(i_=0; i_<=nvars-1;i_++) { tmp2[i_] = xy[i,i_]; } for(i_=0; i_<=nvars-1;i_++) { tmp2[i_] = tmp2[i_] - xy[j,i_]; } v = 0.0; for(i_=0; i_<=nvars-1;i_++) { v += tmp2[i_]*tmp2[i_]; } v = Math.Sqrt(v); tmp[i] = Math.Min(tmp[i], v); tmp[j] = Math.Min(tmp[j], v); } } result = 0; for(i=0; i<=npoints-1; i++) { result = result+tmp[i]/npoints; } return result; } /************************************************************************* -- ALGLIB -- Copyright 19.05.2008 by Bochkanov Sergey *************************************************************************/ public static void dstie(ref double[] a, int n, ref int[] ties, ref int tiecount, ref int[] p1, ref int[] p2) { int i = 0; int k = 0; int[] tmp = new int[0]; ties = new int[0]; tiecount = 0; p1 = new int[0]; p2 = new int[0]; // // Special case // if( n<=0 ) { tiecount = 0; return; } // // Sort A // tsort.tagsort(ref a, n, ref p1, ref p2); // // Process ties // tiecount = 1; for(i=1; i<=n-1; i++) { if( (double)(a[i])!=(double)(a[i-1]) ) { tiecount = tiecount+1; } } ties = new int[tiecount+1]; ties[0] = 0; k = 1; for(i=1; i<=n-1; i++) { if( (double)(a[i])!=(double)(a[i-1]) ) { ties[k] = i; k = k+1; } } ties[tiecount] = n; } /************************************************************************* -- ALGLIB -- Copyright 11.12.2008 by Bochkanov Sergey *************************************************************************/ public static void dstiefasti(ref double[] a, ref int[] b, int n, ref int[] ties, ref int tiecount, ref double[] bufr, ref int[] bufi) { int i = 0; int k = 0; int[] tmp = new int[0]; tiecount = 0; // // Special case // if( n<=0 ) { tiecount = 0; return; } // // Sort A // tsort.tagsortfasti(ref a, ref b, ref bufr, ref bufi, n); // // Process ties // ties[0] = 0; k = 1; for(i=1; i<=n-1; i++) { if( (double)(a[i])!=(double)(a[i-1]) ) { ties[k] = i; k = k+1; } } ties[k] = n; tiecount = k; } /************************************************************************* Optimal binary classification Algorithms finds optimal (=with minimal cross-entropy) binary partition. Internal subroutine. INPUT PARAMETERS: A - array[0..N-1], variable C - array[0..N-1], class numbers (0 or 1). N - array size OUTPUT PARAMETERS: Info - completetion code: * -3, all values of A[] are same (partition is impossible) * -2, one of C[] is incorrect (<0, >1) * -1, incorrect pararemets were passed (N<=0). * 1, OK Threshold- partiton boundary. Left part contains values which are strictly less than Threshold. Right part contains values which are greater than or equal to Threshold. PAL, PBL- probabilities P(0|v=Threshold) and P(1|v>=Threshold) CVE - cross-validation estimate of cross-entropy -- ALGLIB -- Copyright 22.05.2008 by Bochkanov Sergey *************************************************************************/ public static void dsoptimalsplit2(double[] a, int[] c, int n, ref int info, ref double threshold, ref double pal, ref double pbl, ref double par, ref double pbr, ref double cve) { int i = 0; int t = 0; double s = 0; int[] ties = new int[0]; int tiecount = 0; int[] p1 = new int[0]; int[] p2 = new int[0]; int k = 0; int koptimal = 0; double pak = 0; double pbk = 0; double cvoptimal = 0; double cv = 0; a = (double[])a.Clone(); c = (int[])c.Clone(); info = 0; threshold = 0; pal = 0; pbl = 0; par = 0; pbr = 0; cve = 0; // // Test for errors in inputs // if( n<=0 ) { info = -1; return; } for(i=0; i<=n-1; i++) { if( c[i]!=0 && c[i]!=1 ) { info = -2; return; } } info = 1; // // Tie // dstie(ref a, n, ref ties, ref tiecount, ref p1, ref p2); for(i=0; i<=n-1; i++) { if( p2[i]!=i ) { t = c[i]; c[i] = c[p2[i]]; c[p2[i]] = t; } } // // Special case: number of ties is 1. // // NOTE: we assume that P[i,j] equals to 0 or 1, // intermediate values are not allowed. // if( tiecount==1 ) { info = -3; return; } // // General case, number of ties > 1 // // NOTE: we assume that P[i,j] equals to 0 or 1, // intermediate values are not allowed. // pal = 0; pbl = 0; par = 0; pbr = 0; for(i=0; i<=n-1; i++) { if( c[i]==0 ) { par = par+1; } if( c[i]==1 ) { pbr = pbr+1; } } koptimal = -1; cvoptimal = math.maxrealnumber; for(k=0; k<=tiecount-2; k++) { // // first, obtain information about K-th tie which is // moved from R-part to L-part // pak = 0; pbk = 0; for(i=ties[k]; i<=ties[k+1]-1; i++) { if( c[i]==0 ) { pak = pak+1; } if( c[i]==1 ) { pbk = pbk+1; } } // // Calculate cross-validation CE // cv = 0; cv = cv-xlny(pal+pak, (pal+pak)/(pal+pak+pbl+pbk+1)); cv = cv-xlny(pbl+pbk, (pbl+pbk)/(pal+pak+1+pbl+pbk)); cv = cv-xlny(par-pak, (par-pak)/(par-pak+pbr-pbk+1)); cv = cv-xlny(pbr-pbk, (pbr-pbk)/(par-pak+1+pbr-pbk)); // // Compare with best // if( (double)(cv)<(double)(cvoptimal) ) { cvoptimal = cv; koptimal = k; } // // update // pal = pal+pak; pbl = pbl+pbk; par = par-pak; pbr = pbr-pbk; } cve = cvoptimal; threshold = 0.5*(a[ties[koptimal]]+a[ties[koptimal+1]]); pal = 0; pbl = 0; par = 0; pbr = 0; for(i=0; i<=n-1; i++) { if( (double)(a[i])<(double)(threshold) ) { if( c[i]==0 ) { pal = pal+1; } else { pbl = pbl+1; } } else { if( c[i]==0 ) { par = par+1; } else { pbr = pbr+1; } } } s = pal+pbl; pal = pal/s; pbl = pbl/s; s = par+pbr; par = par/s; pbr = pbr/s; } /************************************************************************* Optimal partition, internal subroutine. Fast version. Accepts: A array[0..N-1] array of attributes array[0..N-1] C array[0..N-1] array of class labels TiesBuf array[0..N] temporaries (ties) CntBuf array[0..2*NC-1] temporaries (counts) Alpha centering factor (0<=alpha<=1, recommended value - 0.05) BufR array[0..N-1] temporaries BufI array[0..N-1] temporaries Output: Info error code (">0"=OK, "<0"=bad) RMS training set RMS error CVRMS leave-one-out RMS error Note: content of all arrays is changed by subroutine; it doesn't allocate temporaries. -- ALGLIB -- Copyright 11.12.2008 by Bochkanov Sergey *************************************************************************/ public static void dsoptimalsplit2fast(ref double[] a, ref int[] c, ref int[] tiesbuf, ref int[] cntbuf, ref double[] bufr, ref int[] bufi, int n, int nc, double alpha, ref int info, ref double threshold, ref double rms, ref double cvrms) { int i = 0; int k = 0; int cl = 0; int tiecount = 0; double cbest = 0; double cc = 0; int koptimal = 0; int sl = 0; int sr = 0; double v = 0; double w = 0; double x = 0; info = 0; threshold = 0; rms = 0; cvrms = 0; // // Test for errors in inputs // if( n<=0 || nc<2 ) { info = -1; return; } for(i=0; i<=n-1; i++) { if( c[i]<0 || c[i]>=nc ) { info = -2; return; } } info = 1; // // Tie // dstiefasti(ref a, ref c, n, ref tiesbuf, ref tiecount, ref bufr, ref bufi); // // Special case: number of ties is 1. // if( tiecount==1 ) { info = -3; return; } // // General case, number of ties > 1 // for(i=0; i<=2*nc-1; i++) { cntbuf[i] = 0; } for(i=0; i<=n-1; i++) { cntbuf[nc+c[i]] = cntbuf[nc+c[i]]+1; } koptimal = -1; threshold = a[n-1]; cbest = math.maxrealnumber; sl = 0; sr = n; for(k=0; k<=tiecount-2; k++) { // // first, move Kth tie from right to left // for(i=tiesbuf[k]; i<=tiesbuf[k+1]-1; i++) { cl = c[i]; cntbuf[cl] = cntbuf[cl]+1; cntbuf[nc+cl] = cntbuf[nc+cl]-1; } sl = sl+(tiesbuf[k+1]-tiesbuf[k]); sr = sr-(tiesbuf[k+1]-tiesbuf[k]); // // Calculate RMS error // v = 0; for(i=0; i<=nc-1; i++) { w = cntbuf[i]; v = v+w*math.sqr(w/sl-1); v = v+(sl-w)*math.sqr(w/sl); w = cntbuf[nc+i]; v = v+w*math.sqr(w/sr-1); v = v+(sr-w)*math.sqr(w/sr); } v = Math.Sqrt(v/(nc*n)); // // Compare with best // x = (double)(2*sl)/(double)(sl+sr)-1; cc = v*(1-alpha+alpha*math.sqr(x)); if( (double)(cc)<(double)(cbest) ) { // // store split // rms = v; koptimal = k; cbest = cc; // // calculate CVRMS error // cvrms = 0; for(i=0; i<=nc-1; i++) { if( sl>1 ) { w = cntbuf[i]; cvrms = cvrms+w*math.sqr((w-1)/(sl-1)-1); cvrms = cvrms+(sl-w)*math.sqr(w/(sl-1)); } else { w = cntbuf[i]; cvrms = cvrms+w*math.sqr((double)1/(double)nc-1); cvrms = cvrms+(sl-w)*math.sqr((double)1/(double)nc); } if( sr>1 ) { w = cntbuf[nc+i]; cvrms = cvrms+w*math.sqr((w-1)/(sr-1)-1); cvrms = cvrms+(sr-w)*math.sqr(w/(sr-1)); } else { w = cntbuf[nc+i]; cvrms = cvrms+w*math.sqr((double)1/(double)nc-1); cvrms = cvrms+(sr-w)*math.sqr((double)1/(double)nc); } } cvrms = Math.Sqrt(cvrms/(nc*n)); } } // // Calculate threshold. // Code is a bit complicated because there can be such // numbers that 0.5(A+B) equals to A or B (if A-B=epsilon) // threshold = 0.5*(a[tiesbuf[koptimal]]+a[tiesbuf[koptimal+1]]); if( (double)(threshold)<=(double)(a[tiesbuf[koptimal]]) ) { threshold = a[tiesbuf[koptimal+1]]; } } /************************************************************************* Automatic non-optimal discretization, internal subroutine. -- ALGLIB -- Copyright 22.05.2008 by Bochkanov Sergey *************************************************************************/ public static void dssplitk(double[] a, int[] c, int n, int nc, int kmax, ref int info, ref double[] thresholds, ref int ni, ref double cve) { int i = 0; int j = 0; int j1 = 0; int k = 0; int[] ties = new int[0]; int tiecount = 0; int[] p1 = new int[0]; int[] p2 = new int[0]; int[] cnt = new int[0]; double v2 = 0; int bestk = 0; double bestcve = 0; int[] bestsizes = new int[0]; double curcve = 0; int[] cursizes = new int[0]; a = (double[])a.Clone(); c = (int[])c.Clone(); info = 0; thresholds = new double[0]; ni = 0; cve = 0; // // Test for errors in inputs // if( (n<=0 || nc<2) || kmax<2 ) { info = -1; return; } for(i=0; i<=n-1; i++) { if( c[i]<0 || c[i]>=nc ) { info = -2; return; } } info = 1; // // Tie // dstie(ref a, n, ref ties, ref tiecount, ref p1, ref p2); for(i=0; i<=n-1; i++) { if( p2[i]!=i ) { k = c[i]; c[i] = c[p2[i]]; c[p2[i]] = k; } } // // Special cases // if( tiecount==1 ) { info = -3; return; } // // General case: // 0. allocate arrays // kmax = Math.Min(kmax, tiecount); bestsizes = new int[kmax-1+1]; cursizes = new int[kmax-1+1]; cnt = new int[nc-1+1]; // // General case: // 1. prepare "weak" solution (two subintervals, divided at median) // v2 = math.maxrealnumber; j = -1; for(i=1; i<=tiecount-1; i++) { if( (double)(Math.Abs(ties[i]-0.5*(n-1)))<(double)(v2) ) { v2 = Math.Abs(ties[i]-0.5*n); j = i; } } alglib.ap.assert(j>0, "DSSplitK: internal error #1!"); bestk = 2; bestsizes[0] = ties[j]; bestsizes[1] = n-j; bestcve = 0; for(i=0; i<=nc-1; i++) { cnt[i] = 0; } for(i=0; i<=j-1; i++) { tieaddc(c, ties, i, nc, ref cnt); } bestcve = bestcve+getcv(cnt, nc); for(i=0; i<=nc-1; i++) { cnt[i] = 0; } for(i=j; i<=tiecount-1; i++) { tieaddc(c, ties, i, nc, ref cnt); } bestcve = bestcve+getcv(cnt, nc); // // General case: // 2. Use greedy algorithm to find sub-optimal split in O(KMax*N) time // for(k=2; k<=kmax; k++) { // // Prepare greedy K-interval split // for(i=0; i<=k-1; i++) { cursizes[i] = 0; } i = 0; j = 0; while( j<=tiecount-1 && i<=k-1 ) { // // Rule: I-th bin is empty, fill it // if( cursizes[i]==0 ) { cursizes[i] = ties[j+1]-ties[j]; j = j+1; continue; } // // Rule: (K-1-I) bins left, (K-1-I) ties left (1 tie per bin); next bin // if( tiecount-j==k-1-i ) { i = i+1; continue; } // // Rule: last bin, always place in current // if( i==k-1 ) { cursizes[i] = cursizes[i]+ties[j+1]-ties[j]; j = j+1; continue; } // // Place J-th tie in I-th bin, or leave for I+1-th bin. // if( (double)(Math.Abs(cursizes[i]+ties[j+1]-ties[j]-(double)n/(double)k))<(double)(Math.Abs(cursizes[i]-(double)n/(double)k)) ) { cursizes[i] = cursizes[i]+ties[j+1]-ties[j]; j = j+1; } else { i = i+1; } } alglib.ap.assert(cursizes[k-1]!=0 && j==tiecount, "DSSplitK: internal error #1"); // // Calculate CVE // curcve = 0; j = 0; for(i=0; i<=k-1; i++) { for(j1=0; j1<=nc-1; j1++) { cnt[j1] = 0; } for(j1=j; j1<=j+cursizes[i]-1; j1++) { cnt[c[j1]] = cnt[c[j1]]+1; } curcve = curcve+getcv(cnt, nc); j = j+cursizes[i]; } // // Choose best variant // if( (double)(curcve)<(double)(bestcve) ) { for(i=0; i<=k-1; i++) { bestsizes[i] = cursizes[i]; } bestcve = curcve; bestk = k; } } // // Transform from sizes to thresholds // cve = bestcve; ni = bestk; thresholds = new double[ni-2+1]; j = bestsizes[0]; for(i=1; i<=bestk-1; i++) { thresholds[i-1] = 0.5*(a[j-1]+a[j]); j = j+bestsizes[i]; } } /************************************************************************* Automatic optimal discretization, internal subroutine. -- ALGLIB -- Copyright 22.05.2008 by Bochkanov Sergey *************************************************************************/ public static void dsoptimalsplitk(double[] a, int[] c, int n, int nc, int kmax, ref int info, ref double[] thresholds, ref int ni, ref double cve) { int i = 0; int j = 0; int s = 0; int jl = 0; int jr = 0; double v2 = 0; int[] ties = new int[0]; int tiecount = 0; int[] p1 = new int[0]; int[] p2 = new int[0]; double cvtemp = 0; int[] cnt = new int[0]; int[] cnt2 = new int[0]; double[,] cv = new double[0,0]; int[,] splits = new int[0,0]; int k = 0; int koptimal = 0; double cvoptimal = 0; a = (double[])a.Clone(); c = (int[])c.Clone(); info = 0; thresholds = new double[0]; ni = 0; cve = 0; // // Test for errors in inputs // if( (n<=0 || nc<2) || kmax<2 ) { info = -1; return; } for(i=0; i<=n-1; i++) { if( c[i]<0 || c[i]>=nc ) { info = -2; return; } } info = 1; // // Tie // dstie(ref a, n, ref ties, ref tiecount, ref p1, ref p2); for(i=0; i<=n-1; i++) { if( p2[i]!=i ) { k = c[i]; c[i] = c[p2[i]]; c[p2[i]] = k; } } // // Special cases // if( tiecount==1 ) { info = -3; return; } // // General case // Use dynamic programming to find best split in O(KMax*NC*TieCount^2) time // kmax = Math.Min(kmax, tiecount); cv = new double[kmax-1+1, tiecount-1+1]; splits = new int[kmax-1+1, tiecount-1+1]; cnt = new int[nc-1+1]; cnt2 = new int[nc-1+1]; for(j=0; j<=nc-1; j++) { cnt[j] = 0; } for(j=0; j<=tiecount-1; j++) { tieaddc(c, ties, j, nc, ref cnt); splits[0,j] = 0; cv[0,j] = getcv(cnt, nc); } for(k=1; k<=kmax-1; k++) { for(j=0; j<=nc-1; j++) { cnt[j] = 0; } // // Subtask size J in [K..TieCount-1]: // optimal K-splitting on ties from 0-th to J-th. // for(j=k; j<=tiecount-1; j++) { // // Update Cnt - let it contain classes of ties from K-th to J-th // tieaddc(c, ties, j, nc, ref cnt); // // Search for optimal split point S in [K..J] // for(i=0; i<=nc-1; i++) { cnt2[i] = cnt[i]; } cv[k,j] = cv[k-1,j-1]+getcv(cnt2, nc); splits[k,j] = j; for(s=k+1; s<=j; s++) { // // Update Cnt2 - let it contain classes of ties from S-th to J-th // tiesubc(c, ties, s-1, nc, ref cnt2); // // Calculate CVE // cvtemp = cv[k-1,s-1]+getcv(cnt2, nc); if( (double)(cvtemp)<(double)(cv[k,j]) ) { cv[k,j] = cvtemp; splits[k,j] = s; } } } } // // Choose best partition, output result // koptimal = -1; cvoptimal = math.maxrealnumber; for(k=0; k<=kmax-1; k++) { if( (double)(cv[k,tiecount-1])<(double)(cvoptimal) ) { cvoptimal = cv[k,tiecount-1]; koptimal = k; } } alglib.ap.assert(koptimal>=0, "DSOptimalSplitK: internal error #1!"); if( koptimal==0 ) { // // Special case: best partition is one big interval. // Even 2-partition is not better. // This is possible when dealing with "weak" predictor variables. // // Make binary split as close to the median as possible. // v2 = math.maxrealnumber; j = -1; for(i=1; i<=tiecount-1; i++) { if( (double)(Math.Abs(ties[i]-0.5*(n-1)))<(double)(v2) ) { v2 = Math.Abs(ties[i]-0.5*(n-1)); j = i; } } alglib.ap.assert(j>0, "DSOptimalSplitK: internal error #2!"); thresholds = new double[0+1]; thresholds[0] = 0.5*(a[ties[j-1]]+a[ties[j]]); ni = 2; cve = 0; for(i=0; i<=nc-1; i++) { cnt[i] = 0; } for(i=0; i<=j-1; i++) { tieaddc(c, ties, i, nc, ref cnt); } cve = cve+getcv(cnt, nc); for(i=0; i<=nc-1; i++) { cnt[i] = 0; } for(i=j; i<=tiecount-1; i++) { tieaddc(c, ties, i, nc, ref cnt); } cve = cve+getcv(cnt, nc); } else { // // General case: 2 or more intervals // thresholds = new double[koptimal-1+1]; ni = koptimal+1; cve = cv[koptimal,tiecount-1]; jl = splits[koptimal,tiecount-1]; jr = tiecount-1; for(k=koptimal; k>=1; k--) { thresholds[k-1] = 0.5*(a[ties[jl-1]]+a[ties[jl]]); jr = jl-1; jl = splits[k-1,jl-1]; } } } /************************************************************************* Internal function *************************************************************************/ private static double xlny(double x, double y) { double result = 0; if( (double)(x)==(double)(0) ) { result = 0; } else { result = x*Math.Log(y); } return result; } /************************************************************************* Internal function, returns number of samples of class I in Cnt[I] *************************************************************************/ private static double getcv(int[] cnt, int nc) { double result = 0; int i = 0; double s = 0; s = 0; for(i=0; i<=nc-1; i++) { s = s+cnt[i]; } result = 0; for(i=0; i<=nc-1; i++) { result = result-xlny(cnt[i], cnt[i]/(s+nc-1)); } return result; } /************************************************************************* Internal function, adds number of samples of class I in tie NTie to Cnt[I] *************************************************************************/ private static void tieaddc(int[] c, int[] ties, int ntie, int nc, ref int[] cnt) { int i = 0; for(i=ties[ntie]; i<=ties[ntie+1]-1; i++) { cnt[c[i]] = cnt[c[i]]+1; } } /************************************************************************* Internal function, subtracts number of samples of class I in tie NTie to Cnt[I] *************************************************************************/ private static void tiesubc(int[] c, int[] ties, int ntie, int nc, ref int[] cnt) { int i = 0; for(i=ties[ntie]; i<=ties[ntie+1]-1; i++) { cnt[c[i]] = cnt[c[i]]-1; } } } public class clustering { /************************************************************************* This structure is a clusterization engine. You should not try to access its fields directly. Use ALGLIB functions in order to work with this object. -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public class clusterizerstate : apobject { public int npoints; public int nfeatures; public int disttype; public double[,] xy; public double[,] d; public int ahcalgo; public int kmeansrestarts; public int kmeansmaxits; public clusterizerstate() { init(); } public override void init() { xy = new double[0,0]; d = new double[0,0]; } public override alglib.apobject make_copy() { clusterizerstate _result = new clusterizerstate(); _result.npoints = npoints; _result.nfeatures = nfeatures; _result.disttype = disttype; _result.xy = (double[,])xy.Clone(); _result.d = (double[,])d.Clone(); _result.ahcalgo = ahcalgo; _result.kmeansrestarts = kmeansrestarts; _result.kmeansmaxits = kmeansmaxits; return _result; } }; /************************************************************************* This structure is used to store results of the agglomerative hierarchical clustering (AHC). Following information is returned: * NPoints contains number of points in the original dataset * Z contains information about merges performed (see below). Z contains indexes from the original (unsorted) dataset and it can be used when you need to know what points were merged. However, it is not convenient when you want to build a dendrograd (see below). * if you want to build dendrogram, you can use Z, but it is not good option, because Z contains indexes from unsorted dataset. Dendrogram built from such dataset is likely to have intersections. So, you have to reorder you points before building dendrogram. Permutation which reorders point is returned in P. Another representation of merges, which is more convenient for dendorgram construction, is returned in PM. * more information on format of Z, P and PM can be found below and in the examples from ALGLIB Reference Manual. FORMAL DESCRIPTION OF FIELDS: NPoints number of points Z array[NPoints-1,2], contains indexes of clusters linked in pairs to form clustering tree. I-th row corresponds to I-th merge: * Z[I,0] - index of the first cluster to merge * Z[I,1] - index of the second cluster to merge * Z[I,0]=0 NFeatures number of variables, >=1 TerminationType completion code: * -5 if distance type is anything different from Euclidean metric * -3 for degenerate dataset: a) less than K distinct points, b) K=0 for non-empty dataset. * +1 for successful completion K number of clusters C array[K,NFeatures], rows of the array store centers CIdx array[NPoints], which contains cluster indexes -- ALGLIB -- Copyright 27.11.2012 by Bochkanov Sergey *************************************************************************/ public class kmeansreport : apobject { public int npoints; public int nfeatures; public int terminationtype; public int k; public double[,] c; public int[] cidx; public kmeansreport() { init(); } public override void init() { c = new double[0,0]; cidx = new int[0]; } public override alglib.apobject make_copy() { kmeansreport _result = new kmeansreport(); _result.npoints = npoints; _result.nfeatures = nfeatures; _result.terminationtype = terminationtype; _result.k = k; _result.c = (double[,])c.Clone(); _result.cidx = (int[])cidx.Clone(); return _result; } }; /************************************************************************* This function initializes clusterizer object. Newly initialized object is empty, i.e. it does not contain dataset. You should use it as follows: 1. creation 2. dataset is added with ClusterizerSetPoints() 3. additional parameters are set 3. clusterization is performed with one of the clustering functions -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public static void clusterizercreate(clusterizerstate s) { s.npoints = 0; s.nfeatures = 0; s.disttype = 2; s.ahcalgo = 0; s.kmeansrestarts = 1; s.kmeansmaxits = 0; } /************************************************************************* This function adds dataset to the clusterizer structure. This function overrides all previous calls of ClusterizerSetPoints() or ClusterizerSetDistances(). INPUT PARAMETERS: S - clusterizer state, initialized by ClusterizerCreate() XY - array[NPoints,NFeatures], dataset NPoints - number of points, >=0 NFeatures- number of features, >=1 DistType- distance function: * 0 Chebyshev distance (L-inf norm) * 1 city block distance (L1 norm) * 2 Euclidean distance (L2 norm) * 10 Pearson correlation: dist(a,b) = 1-corr(a,b) * 11 Absolute Pearson correlation: dist(a,b) = 1-|corr(a,b)| * 12 Uncentered Pearson correlation (cosine of the angle): dist(a,b) = a'*b/(|a|*|b|) * 13 Absolute uncentered Pearson correlation dist(a,b) = |a'*b|/(|a|*|b|) * 20 Spearman rank correlation: dist(a,b) = 1-rankcorr(a,b) * 21 Absolute Spearman rank correlation dist(a,b) = 1-|rankcorr(a,b)| NOTE 1: different distance functions have different performance penalty: * Euclidean or Pearson correlation distances are the fastest ones * Spearman correlation distance function is a bit slower * city block and Chebyshev distances are order of magnitude slower The reason behing difference in performance is that correlation-based distance functions are computed using optimized linear algebra kernels, while Chebyshev and city block distance functions are computed using simple nested loops with two branches at each iteration. NOTE 2: different clustering algorithms have different limitations: * agglomerative hierarchical clustering algorithms may be used with any kind of distance metric * k-means++ clustering algorithm may be used only with Euclidean distance function Thus, list of specific clustering algorithms you may use depends on distance function you specify when you set your dataset. -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public static void clusterizersetpoints(clusterizerstate s, double[,] xy, int npoints, int nfeatures, int disttype) { int i = 0; int i_ = 0; alglib.ap.assert((((((((disttype==0 || disttype==1) || disttype==2) || disttype==10) || disttype==11) || disttype==12) || disttype==13) || disttype==20) || disttype==21, "ClusterizerSetPoints: incorrect DistType"); alglib.ap.assert(npoints>=0, "ClusterizerSetPoints: NPoints<0"); alglib.ap.assert(nfeatures>=1, "ClusterizerSetPoints: NFeatures<1"); alglib.ap.assert(alglib.ap.rows(xy)>=npoints, "ClusterizerSetPoints: Rows(XY)=nfeatures, "ClusterizerSetPoints: Cols(XY)=0, "ClusterizerSetDistances: NPoints<0"); alglib.ap.assert(alglib.ap.rows(d)>=npoints, "ClusterizerSetDistances: Rows(D)=npoints, "ClusterizerSetDistances: Cols(D)=(double)(0), "ClusterizerSetDistances: D contains infinite, NAN or negative elements"); s.d[i,j] = d[i,j]; s.d[j,i] = d[i,j]; } s.d[i,i] = 0; } } /************************************************************************* This function sets agglomerative hierarchical clustering algorithm INPUT PARAMETERS: S - clusterizer state, initialized by ClusterizerCreate() Algo - algorithm type: * 0 complete linkage (default algorithm) * 1 single linkage * 2 unweighted average linkage * 3 weighted average linkage -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public static void clusterizersetahcalgo(clusterizerstate s, int algo) { alglib.ap.assert(((algo==0 || algo==1) || algo==2) || algo==3, "ClusterizerSetHCAlgo: incorrect algorithm type"); s.ahcalgo = algo; } /************************************************************************* This function sets k-means++ properties : number of restarts and maximum number of iterations per one run. INPUT PARAMETERS: S - clusterizer state, initialized by ClusterizerCreate() Restarts- restarts count, >=1. k-means++ algorithm performs several restarts and chooses best set of centers (one with minimum squared distance). MaxIts - maximum number of k-means iterations performed during one run. >=0, zero value means that algorithm performs unlimited number of iterations. -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public static void clusterizersetkmeanslimits(clusterizerstate s, int restarts, int maxits) { alglib.ap.assert(restarts>=1, "ClusterizerSetKMeansLimits: Restarts<=0"); alglib.ap.assert(maxits>=0, "ClusterizerSetKMeansLimits: MaxIts<0"); s.kmeansrestarts = restarts; s.kmeansmaxits = maxits; } /************************************************************************* This function performs agglomerative hierarchical clustering INPUT PARAMETERS: S - clusterizer state, initialized by ClusterizerCreate() OUTPUT PARAMETERS: Rep - clustering results; see description of AHCReport structure for more information. NOTE 1: hierarchical clustering algorithms require large amounts of memory. In particular, this implementation needs sizeof(double)*NPoints^2 bytes, which are used to store distance matrix. In case we work with user-supplied matrix, this amount is multiplied by 2 (we have to store original matrix and to work with its copy). For example, problem with 10000 points would require 800M of RAM, even when working in a 1-dimensional space. -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public static void clusterizerrunahc(clusterizerstate s, ahcreport rep) { int npoints = 0; int nfeatures = 0; double[,] d = new double[0,0]; npoints = s.npoints; nfeatures = s.nfeatures; // // Fill Rep.NPoints, quick exit when NPoints<=1 // rep.npoints = npoints; if( npoints==0 ) { rep.p = new int[0]; rep.z = new int[0, 0]; rep.pz = new int[0, 0]; rep.pm = new int[0, 0]; rep.mergedist = new double[0]; return; } if( npoints==1 ) { rep.p = new int[1]; rep.z = new int[0, 0]; rep.pz = new int[0, 0]; rep.pm = new int[0, 0]; rep.mergedist = new double[0]; rep.p[0] = 0; return; } // // More than one point // if( s.disttype==-1 ) { // // Run clusterizer with user-supplied distance matrix // clusterizerrunahcinternal(s, ref s.d, rep); return; } else { // // Build distance matrix D. // clusterizergetdistances(s.xy, npoints, nfeatures, s.disttype, ref d); // // Run clusterizer // clusterizerrunahcinternal(s, ref d, rep); return; } } /************************************************************************* This function performs clustering by k-means++ algorithm. You may change algorithm properties like number of restarts or iterations limit by calling ClusterizerSetKMeansLimits() functions. INPUT PARAMETERS: S - clusterizer state, initialized by ClusterizerCreate() K - number of clusters, K>=0. K can be zero only when algorithm is called for empty dataset, in this case completion code is set to success (+1). If K=0 and dataset size is non-zero, we can not meaningfully assign points to some center (there are no centers because K=0) and return -3 as completion code (failure). OUTPUT PARAMETERS: Rep - clustering results; see description of KMeansReport structure for more information. NOTE 1: k-means clustering can be performed only for datasets with Euclidean distance function. Algorithm will return negative completion code in Rep.TerminationType in case dataset was added to clusterizer with DistType other than Euclidean (or dataset was specified by distance matrix instead of explicitly given points). -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public static void clusterizerrunkmeans(clusterizerstate s, int k, kmeansreport rep) { double[,] dummy = new double[0,0]; alglib.ap.assert(k>=0, "ClusterizerRunKMeans: K<0"); // // Incorrect distance type // if( s.disttype!=2 ) { rep.npoints = s.npoints; rep.terminationtype = -5; rep.k = k; return; } // // K>NPoints or (K=0 and NPoints>0) // if( k>s.npoints || (k==0 && s.npoints>0) ) { rep.npoints = s.npoints; rep.terminationtype = -3; rep.k = k; return; } // // No points // if( s.npoints==0 ) { rep.npoints = 0; rep.terminationtype = 1; rep.k = k; return; } // // Normal case: // 1<=K<=NPoints, Euclidean distance // rep.npoints = s.npoints; rep.nfeatures = s.nfeatures; rep.k = k; rep.npoints = s.npoints; rep.nfeatures = s.nfeatures; kmeansgenerateinternal(s.xy, s.npoints, s.nfeatures, k, s.kmeansmaxits, s.kmeansrestarts, ref rep.terminationtype, ref dummy, false, ref rep.c, true, ref rep.cidx); } /************************************************************************* This function returns distance matrix for dataset INPUT PARAMETERS: XY - array[NPoints,NFeatures], dataset NPoints - number of points, >=0 NFeatures- number of features, >=1 DistType- distance function: * 0 Chebyshev distance (L-inf norm) * 1 city block distance (L1 norm) * 2 Euclidean distance (L2 norm) * 10 Pearson correlation: dist(a,b) = 1-corr(a,b) * 11 Absolute Pearson correlation: dist(a,b) = 1-|corr(a,b)| * 12 Uncentered Pearson correlation (cosine of the angle): dist(a,b) = a'*b/(|a|*|b|) * 13 Absolute uncentered Pearson correlation dist(a,b) = |a'*b|/(|a|*|b|) * 20 Spearman rank correlation: dist(a,b) = 1-rankcorr(a,b) * 21 Absolute Spearman rank correlation dist(a,b) = 1-|rankcorr(a,b)| OUTPUT PARAMETERS: D - array[NPoints,NPoints], distance matrix (full matrix is returned, with lower and upper triangles) NOTES: different distance functions have different performance penalty: * Euclidean or Pearson correlation distances are the fastest ones * Spearman correlation distance function is a bit slower * city block and Chebyshev distances are order of magnitude slower The reason behing difference in performance is that correlation-based distance functions are computed using optimized linear algebra kernels, while Chebyshev and city block distance functions are computed using simple nested loops with two branches at each iteration. -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ public static void clusterizergetdistances(double[,] xy, int npoints, int nfeatures, int disttype, ref double[,] d) { int i = 0; int j = 0; int k = 0; double v = 0; double vv = 0; double[,] tmpxy = new double[0,0]; double[] tmpx = new double[0]; double[] tmpy = new double[0]; apserv.apbuffers buf = new apserv.apbuffers(); int i_ = 0; d = new double[0,0]; alglib.ap.assert(nfeatures>=1, "ClusterizerGetDistances: NFeatures<1"); alglib.ap.assert(npoints>=0, "ClusterizerGetDistances: NPoints<1"); alglib.ap.assert((((((((disttype==0 || disttype==1) || disttype==2) || disttype==10) || disttype==11) || disttype==12) || disttype==13) || disttype==20) || disttype==21, "ClusterizerGetDistances: incorrect DistType"); alglib.ap.assert(alglib.ap.rows(xy)>=npoints, "ClusterizerGetDistances: Rows(XY)=nfeatures, "ClusterizerGetDistances: Cols(XY)(double)(v) ) { v = vv; } } d[i,j] = v; d[j,i] = v; } } return; } if( disttype==1 ) { // // City block distance // d = new double[npoints, npoints]; for(i=0; i<=npoints-1; i++) { d[i,i] = 0.0; for(j=i+1; j<=npoints-1; j++) { v = 0.0; for(k=0; k<=nfeatures-1; k++) { vv = xy[i,k]-xy[j,k]; if( (double)(vv)<(double)(0) ) { vv = -vv; } v = v+vv; } v = v/nfeatures; d[i,j] = v; d[j,i] = v; } } return; } if( disttype==2 ) { // // Euclidean distance // d = new double[npoints, npoints]; tmpxy = new double[npoints, nfeatures]; tmpx = new double[nfeatures]; for(j=0; j<=nfeatures-1; j++) { tmpx[j] = 0.0; } v = (double)1/(double)npoints; for(i=0; i<=npoints-1; i++) { for(i_=0; i_<=nfeatures-1;i_++) { tmpx[i_] = tmpx[i_] + v*xy[i,i_]; } } for(i=0; i<=npoints-1; i++) { for(i_=0; i_<=nfeatures-1;i_++) { tmpxy[i,i_] = xy[i,i_]; } for(i_=0; i_<=nfeatures-1;i_++) { tmpxy[i,i_] = tmpxy[i,i_] - tmpx[i_]; } } ablas.rmatrixsyrk(npoints, nfeatures, 1.0, tmpxy, 0, 0, 0, 0.0, ref d, 0, 0, true); for(i=0; i<=npoints-1; i++) { for(j=i+1; j<=npoints-1; j++) { v = Math.Sqrt(Math.Max(d[i,i]+d[j,j]-2*d[i,j], 0.0)); d[i,j] = v; d[j,i] = v; } } for(i=0; i<=npoints-1; i++) { d[i,i] = 0.0; } return; } if( disttype==10 || disttype==11 ) { // // Absolute/nonabsolute Pearson correlation distance // d = new double[npoints, npoints]; tmpxy = new double[nfeatures, npoints]; for(i=0; i<=npoints-1; i++) { for(i_=0; i_<=nfeatures-1;i_++) { tmpxy[i_,i] = xy[i,i_]; } } basestat.pearsoncorrm(tmpxy, nfeatures, npoints, ref d); for(i=0; i<=npoints-1; i++) { for(j=i+1; j<=npoints-1; j++) { if( disttype==10 ) { v = 1-d[i,j]; } else { v = 1-Math.Abs(d[i,j]); } v = Math.Max(v, 0.0); d[i,j] = v; d[j,i] = v; } } for(i=0; i<=npoints-1; i++) { d[i,i] = 0.0; } return; } if( disttype==12 || disttype==13 ) { // // Absolute/nonabsolute uncentered Pearson correlation distance // d = new double[npoints, npoints]; ablas.rmatrixsyrk(npoints, nfeatures, 1.0, xy, 0, 0, 0, 0.0, ref d, 0, 0, true); for(i=0; i<=npoints-1; i++) { for(j=i+1; j<=npoints-1; j++) { v = d[i,j]/Math.Sqrt(d[i,i]*d[j,j]); if( disttype==13 ) { v = Math.Abs(v); } v = Math.Min(v, 1.0); d[i,j] = 1-v; d[j,i] = 1-v; } } for(i=0; i<=npoints-1; i++) { d[i,i] = 0.0; } return; } if( disttype==20 || disttype==21 ) { // // Spearman rank correlation // d = new double[npoints, npoints]; tmpx = new double[nfeatures]; tmpy = new double[nfeatures]; tmpxy = new double[nfeatures, npoints]; for(i=0; i<=npoints-1; i++) { for(i_=0; i_<=nfeatures-1;i_++) { tmpx[i_] = xy[i,i_]; } basicstatops.rankx(ref tmpx, nfeatures, buf); for(i_=0; i_<=nfeatures-1;i_++) { tmpxy[i_,i] = tmpx[i_]; } } basestat.pearsoncorrm(tmpxy, nfeatures, npoints, ref d); for(i=0; i<=npoints-1; i++) { for(j=i+1; j<=npoints-1; j++) { if( disttype==20 ) { v = 1-d[i,j]; } else { v = 1-Math.Abs(d[i,j]); } v = Math.Max(v, 0.0); d[i,j] = v; d[j,i] = v; } } for(i=0; i<=npoints-1; i++) { d[i,i] = 0.0; } return; } alglib.ap.assert(false); } /************************************************************************* This function takes as input clusterization report Rep, desired clusters count K, and builds top K clusters from hierarchical clusterization tree. It returns assignment of points to clusters (array of cluster indexes). INPUT PARAMETERS: Rep - report from ClusterizerRunAHC() performed on XY K - desired number of clusters, 1<=K<=NPoints. K can be zero only when NPoints=0. OUTPUT PARAMETERS: CIdx - array[NPoints], I-th element contains cluster index (from 0 to K-1) for I-th point of the dataset. CZ - array[K]. This array allows to convert cluster indexes returned by this function to indexes used by Rep.Z. J-th cluster returned by this function corresponds to CZ[J]-th cluster stored in Rep.Z/PZ/PM. It is guaranteed that CZ[I]=0, "ClusterizerGetKClusters: internal error in Rep integrity"); alglib.ap.assert(k>=0, "ClusterizerGetKClusters: K<=0"); alglib.ap.assert(k<=npoints, "ClusterizerGetKClusters: K>NPoints"); alglib.ap.assert(k>0 || npoints==0, "ClusterizerGetKClusters: K<=0"); alglib.ap.assert(npoints==rep.npoints, "ClusterizerGetKClusters: NPoints<>Rep.NPoints"); // // Quick exit // if( npoints==0 ) { return; } if( npoints==1 ) { cz = new int[1]; cidx = new int[1]; cz[0] = 0; cidx[0] = 0; return; } // // Replay merges, from top to bottom, // keep track of clusters being present at the moment // presentclusters = new bool[2*npoints-1]; tmpidx = new int[npoints]; for(i=0; i<=2*npoints-3; i++) { presentclusters[i] = false; } presentclusters[2*npoints-2] = true; for(i=0; i<=npoints-1; i++) { tmpidx[i] = 2*npoints-2; } for(mergeidx=npoints-2; mergeidx>=npoints-k; mergeidx--) { // // Update information about clusters being present at the moment // presentclusters[npoints+mergeidx] = false; presentclusters[rep.z[mergeidx,0]] = true; presentclusters[rep.z[mergeidx,1]] = true; // // Update TmpIdx according to the current state of the dataset // // NOTE: TmpIdx contains cluster indexes from [0..2*NPoints-2]; // we will convert them to [0..K-1] later. // i0 = rep.pm[mergeidx,0]; i1 = rep.pm[mergeidx,1]; t = rep.z[mergeidx,0]; for(i=i0; i<=i1; i++) { tmpidx[i] = t; } i0 = rep.pm[mergeidx,2]; i1 = rep.pm[mergeidx,3]; t = rep.z[mergeidx,1]; for(i=i0; i<=i1; i++) { tmpidx[i] = t; } } // // Fill CZ - array which allows us to convert cluster indexes // from one system to another. // cz = new int[k]; clusterindexes = new int[2*npoints-1]; t = 0; for(i=0; i<=2*npoints-2; i++) { if( presentclusters[i] ) { cz[t] = i; clusterindexes[i] = t; t = t+1; } } alglib.ap.assert(t==k, "ClusterizerGetKClusters: internal error"); // // Convert indexes stored in CIdx // cidx = new int[npoints]; for(i=0; i<=npoints-1; i++) { cidx[i] = clusterindexes[tmpidx[rep.p[i]]]; } } /************************************************************************* This function accepts AHC report Rep, desired minimum intercluster distance and returns top clusters from hierarchical clusterization tree which are separated by distance R or HIGHER. It returns assignment of points to clusters (array of cluster indexes). There is one more function with similar name - ClusterizerSeparatedByCorr, which returns clusters with intercluster correlation equal to R or LOWER (note: higher for distance, lower for correlation). INPUT PARAMETERS: Rep - report from ClusterizerRunAHC() performed on XY R - desired minimum intercluster distance, R>=0 OUTPUT PARAMETERS: K - number of clusters, 1<=K<=NPoints CIdx - array[NPoints], I-th element contains cluster index (from 0 to K-1) for I-th point of the dataset. CZ - array[K]. This array allows to convert cluster indexes returned by this function to indexes used by Rep.Z. J-th cluster returned by this function corresponds to CZ[J]-th cluster stored in Rep.Z/PZ/PM. It is guaranteed that CZ[I]=(double)(0), "ClusterizerSeparatedByDist: R is infinite or less than 0"); k = 1; while( k=(double)(r) ) { k = k+1; } clusterizergetkclusters(rep, k, ref cidx, ref cz); } /************************************************************************* This function accepts AHC report Rep, desired maximum intercluster correlation and returns top clusters from hierarchical clusterization tree which are separated by correlation R or LOWER. It returns assignment of points to clusters (array of cluster indexes). There is one more function with similar name - ClusterizerSeparatedByDist, which returns clusters with intercluster distance equal to R or HIGHER (note: higher for distance, lower for correlation). INPUT PARAMETERS: Rep - report from ClusterizerRunAHC() performed on XY R - desired maximum intercluster correlation, -1<=R<=+1 OUTPUT PARAMETERS: K - number of clusters, 1<=K<=NPoints CIdx - array[NPoints], I-th element contains cluster index (from 0 to K-1) for I-th point of the dataset. CZ - array[K]. This array allows to convert cluster indexes returned by this function to indexes used by Rep.Z. J-th cluster returned by this function corresponds to CZ[J]-th cluster stored in Rep.Z/PZ/PM. It is guaranteed that CZ[I]=(double)(-1)) && (double)(r)<=(double)(1), "ClusterizerSeparatedByCorr: R is infinite or less than 0"); k = 1; while( k=(double)(1-r) ) { k = k+1; } clusterizergetkclusters(rep, k, ref cidx, ref cz); } /************************************************************************* K-means++ clusterization INPUT PARAMETERS: XY - dataset, array [0..NPoints-1,0..NVars-1]. NPoints - dataset size, NPoints>=K NVars - number of variables, NVars>=1 K - desired number of clusters, K>=1 Restarts - number of restarts, Restarts>=1 OUTPUT PARAMETERS: Info - return code: * -3, if task is degenerate (number of distinct points is less than K) * -1, if incorrect NPoints/NFeatures/K/Restarts was passed * 1, if subroutine finished successfully CCol - array[0..NVars-1,0..K-1].matrix whose columns store cluster's centers NeedCCol - True in case caller requires to store result in CCol CRow - array[0..K-1,0..NVars-1], same as CCol, but centers are stored in rows NeedCRow - True in case caller requires to store result in CCol XYC - array[NPoints], which contains cluster indexes -- ALGLIB -- Copyright 21.03.2009 by Bochkanov Sergey *************************************************************************/ public static void kmeansgenerateinternal(double[,] xy, int npoints, int nvars, int k, int maxits, int restarts, ref int info, ref double[,] ccol, bool needccol, ref double[,] crow, bool needcrow, ref int[] xyc) { int i = 0; int j = 0; double[,] ct = new double[0,0]; double[,] ctbest = new double[0,0]; int[] xycbest = new int[0]; double e = 0; double eprev = 0; double ebest = 0; double[] x = new double[0]; double[] tmp = new double[0]; double[] d2 = new double[0]; double[] p = new double[0]; int[] csizes = new int[0]; bool[] cbusy = new bool[0]; double v = 0; int cclosest = 0; double dclosest = 0; double[] work = new double[0]; bool waschanges = new bool(); bool zerosizeclusters = new bool(); int pass = 0; int itcnt = 0; int i_ = 0; info = 0; ccol = new double[0,0]; crow = new double[0,0]; xyc = new int[0]; // // Test parameters // if( ((npoints=(double)(eprev) ) { break; } // // Update EPrev // eprev = e; } // // 3. Calculate E, compare with best centers found so far // if( (double)(e)<(double)(ebest) ) { // // store partition. // ebest = e; blas.copymatrix(ct, 0, k-1, 0, nvars-1, ref ctbest, 0, k-1, 0, nvars-1); for(i=0; i<=npoints-1; i++) { xycbest[i] = xyc[i]; } } } // // Copy and transpose // if( needccol ) { ccol = new double[nvars, k]; blas.copyandtranspose(ctbest, 0, k-1, 0, nvars-1, ref ccol, 0, nvars-1, 0, k-1); } if( needcrow ) { crow = new double[k, nvars]; ablas.rmatrixcopy(k, nvars, ctbest, 0, 0, ref crow, 0, 0); } for(i=0; i<=npoints-1; i++) { xyc[i] = xycbest[i]; } } /************************************************************************* Select center for a new cluster using k-means++ rule *************************************************************************/ private static bool selectcenterpp(double[,] xy, int npoints, int nvars, ref double[,] centers, ref bool[] busycenters, int ccnt, ref double[] d2, ref double[] p, ref double[] tmp) { bool result = new bool(); int i = 0; int j = 0; int cc = 0; double v = 0; double s = 0; int i_ = 0; result = true; for(cc=0; cc<=ccnt-1; cc++) { if( !busycenters[cc] ) { // // fill D2 // for(i=0; i<=npoints-1; i++) { d2[i] = math.maxrealnumber; for(j=0; j<=ccnt-1; j++) { if( busycenters[j] ) { for(i_=0; i_<=nvars-1;i_++) { tmp[i_] = xy[i,i_]; } for(i_=0; i_<=nvars-1;i_++) { tmp[i_] = tmp[i_] - centers[j,i_]; } v = 0.0; for(i_=0; i_<=nvars-1;i_++) { v += tmp[i_]*tmp[i_]; } if( (double)(v)<(double)(d2[i]) ) { d2[i] = v; } } } } // // calculate P (non-cumulative) // s = 0; for(i=0; i<=npoints-1; i++) { s = s+d2[i]; } if( (double)(s)==(double)(0) ) { result = false; return result; } s = 1/s; for(i_=0; i_<=npoints-1;i_++) { p[i_] = s*d2[i_]; } // // choose one of points with probability P // random number within (0,1) is generated and // inverse empirical CDF is used to randomly choose a point. // s = 0; v = math.randomreal(); for(i=0; i<=npoints-1; i++) { s = s+p[i]; if( (double)(v)<=(double)(s) || i==npoints-1 ) { for(i_=0; i_<=nvars-1;i_++) { centers[cc,i_] = xy[i,i_]; } busycenters[cc] = true; break; } } } } return result; } /************************************************************************* This function performs agglomerative hierarchical clustering using precomputed distance matrix. Internal function, should not be called directly. INPUT PARAMETERS: S - clusterizer state, initialized by ClusterizerCreate() D - distance matrix, array[S.NFeatures,S.NFeatures] Contents of the matrix is destroyed during algorithm operation. OUTPUT PARAMETERS: Rep - clustering results; see description of AHCReport structure for more information. -- ALGLIB -- Copyright 10.07.2012 by Bochkanov Sergey *************************************************************************/ private static void clusterizerrunahcinternal(clusterizerstate s, ref double[,] d, ahcreport rep) { int i = 0; int j = 0; int k = 0; double v = 0; int mergeidx = 0; int c0 = 0; int c1 = 0; int s0 = 0; int s1 = 0; int ar = 0; int br = 0; int npoints = 0; int[] cidx = new int[0]; int[] csizes = new int[0]; int[] nnidx = new int[0]; int[,] cinfo = new int[0,0]; npoints = s.npoints; // // Fill Rep.NPoints, quick exit when NPoints<=1 // rep.npoints = npoints; if( npoints==0 ) { rep.p = new int[0]; rep.z = new int[0, 0]; rep.pz = new int[0, 0]; rep.pm = new int[0, 0]; rep.mergedist = new double[0]; return; } if( npoints==1 ) { rep.p = new int[1]; rep.z = new int[0, 0]; rep.pz = new int[0, 0]; rep.pm = new int[0, 0]; rep.mergedist = new double[0]; rep.p[0] = 0; return; } rep.z = new int[npoints-1, 2]; rep.mergedist = new double[npoints-1]; // // Build list of nearest neighbors // nnidx = new int[npoints]; for(i=0; i<=npoints-1; i++) { // // Calculate index of the nearest neighbor // k = -1; v = math.maxrealnumber; for(j=0; j<=npoints-1; j++) { if( j!=i && (double)(d[i,j])<(double)(v) ) { k = j; v = d[i,j]; } } alglib.ap.assert((double)(v)<(double)(math.maxrealnumber), "ClusterizerRunAHC: internal error"); nnidx[i] = k; } // // Distance matrix is built, perform merges. // // NOTE 1: CIdx is array[NPoints] which maps rows/columns of the // distance matrix D to indexes of clusters. Values of CIdx // from [0,NPoints) denote single-point clusters, and values // from [NPoints,2*NPoints-1) denote ones obtained by merging // smaller clusters. Negative calues correspond to absent clusters. // // Initially it contains [0...NPoints-1], after each merge // one element of CIdx (one with index C0) is replaced by // NPoints+MergeIdx, and another one with index C1 is // rewritten by -1. // // NOTE 2: CSizes is array[NPoints] which stores sizes of clusters. // // cidx = new int[npoints]; csizes = new int[npoints]; for(i=0; i<=npoints-1; i++) { cidx[i] = i; csizes[i] = 1; } for(mergeidx=0; mergeidx<=npoints-2; mergeidx++) { // // Select pair of clusters (C0,C1) with CIdx[C0]=0 ) { if( (double)(d[i,nnidx[i]])<(double)(v) ) { c0 = i; c1 = nnidx[i]; v = d[i,nnidx[i]]; } } } alglib.ap.assert((double)(v)<(double)(math.maxrealnumber), "ClusterizerRunAHC: internal error"); if( cidx[c0]>cidx[c1] ) { i = c1; c1 = c0; c0 = i; } // // Fill one row of Rep.Z and one element of Rep.MergeDist // rep.z[mergeidx,0] = cidx[c0]; rep.z[mergeidx,1] = cidx[c1]; rep.mergedist[mergeidx] = v; // // Update distance matrix: // * row/column C0 are updated by distances to the new cluster // * row/column C1 are considered empty (we can fill them by zeros, // but do not want to spend time - we just ignore them) // // NOTE: it is important to update distance matrix BEFORE CIdx/CSizes // are updated. // alglib.ap.assert(((s.ahcalgo==0 || s.ahcalgo==1) || s.ahcalgo==2) || s.ahcalgo==3, "ClusterizerRunAHC: internal error"); for(i=0; i<=npoints-1; i++) { if( i!=c0 && i!=c1 ) { if( s.ahcalgo==0 ) { d[i,c0] = Math.Max(d[i,c0], d[i,c1]); } if( s.ahcalgo==1 ) { d[i,c0] = Math.Min(d[i,c0], d[i,c1]); } if( s.ahcalgo==2 ) { d[i,c0] = (csizes[c0]*d[i,c0]+csizes[c1]*d[i,c1])/(csizes[c0]+csizes[c1]); } if( s.ahcalgo==3 ) { d[i,c0] = (d[i,c0]+d[i,c1])/2; } d[c0,i] = d[i,c0]; } } // // Update CIdx and CSizes // cidx[c0] = npoints+mergeidx; cidx[c1] = -1; csizes[c0] = csizes[c0]+csizes[c1]; csizes[c1] = 0; // // Update nearest neighbors array: // * update nearest neighbors of everything except for C0/C1 // * update neighbors of C0/C1 // for(i=0; i<=npoints-1; i++) { if( (cidx[i]>=0 && i!=c0) && (nnidx[i]==c0 || nnidx[i]==c1) ) { // // I-th cluster which is distinct from C0/C1 has former C0/C1 cluster as its nearest // neighbor. We handle this issue depending on specific AHC algorithm being used. // if( s.ahcalgo==1 ) { // // Single linkage. Merging of two clusters together // does NOT change distances between new cluster and // other clusters. // // The only thing we have to do is to update nearest neighbor index // nnidx[i] = c0; } else { // // Something other than single linkage. We have to re-examine // all the row to find nearest neighbor. // k = -1; v = math.maxrealnumber; for(j=0; j<=npoints-1; j++) { if( (cidx[j]>=0 && j!=i) && (double)(d[i,j])<(double)(v) ) { k = j; v = d[i,j]; } } alglib.ap.assert((double)(v)<(double)(math.maxrealnumber) || mergeidx==npoints-2, "ClusterizerRunAHC: internal error"); nnidx[i] = k; } } } k = -1; v = math.maxrealnumber; for(j=0; j<=npoints-1; j++) { if( (cidx[j]>=0 && j!=c0) && (double)(d[c0,j])<(double)(v) ) { k = j; v = d[c0,j]; } } alglib.ap.assert((double)(v)<(double)(math.maxrealnumber) || mergeidx==npoints-2, "ClusterizerRunAHC: internal error"); nnidx[c0] = k; } // // Calculate Rep.P and Rep.PM. // // In order to do that, we fill CInfo matrix - (2*NPoints-1)*3 matrix, // with I-th row containing: // * CInfo[I,0] - size of I-th cluster // * CInfo[I,1] - beginning of I-th cluster // * CInfo[I,2] - end of I-th cluster // * CInfo[I,3] - height of I-th cluster // // We perform it as follows: // * first NPoints clusters have unit size (CInfo[I,0]=1) and zero // height (CInfo[I,3]=0) // * we replay NPoints-1 merges from first to last and fill sizes of // corresponding clusters (new size is a sum of sizes of clusters // being merged) and height (new height is max(heights)+1). // * now we ready to determine locations of clusters. Last cluster // spans entire dataset, we know it. We replay merges from last to // first, during each merge we already know location of the merge // result, and we can position first cluster to the left part of // the result, and second cluster to the right part. // rep.p = new int[npoints]; rep.pm = new int[npoints-1, 6]; cinfo = new int[2*npoints-1, 4]; for(i=0; i<=npoints-1; i++) { cinfo[i,0] = 1; cinfo[i,3] = 0; } for(i=0; i<=npoints-2; i++) { cinfo[npoints+i,0] = cinfo[rep.z[i,0],0]+cinfo[rep.z[i,1],0]; cinfo[npoints+i,3] = Math.Max(cinfo[rep.z[i,0],3], cinfo[rep.z[i,1],3])+1; } cinfo[2*npoints-2,1] = 0; cinfo[2*npoints-2,2] = npoints-1; for(i=npoints-2; i>=0; i--) { // // We merge C0 which spans [A0,B0] and C1 (spans [A1,B1]), // with unknown A0, B0, A1, B1. However, we know that result // is CR, which spans [AR,BR] with known AR/BR, and we know // sizes of C0, C1, CR (denotes as S0, S1, SR). // c0 = rep.z[i,0]; c1 = rep.z[i,1]; s0 = cinfo[c0,0]; s1 = cinfo[c1,0]; ar = cinfo[npoints+i,1]; br = cinfo[npoints+i,2]; cinfo[c0,1] = ar; cinfo[c0,2] = ar+s0-1; cinfo[c1,1] = br-(s1-1); cinfo[c1,2] = br; rep.pm[i,0] = cinfo[c0,1]; rep.pm[i,1] = cinfo[c0,2]; rep.pm[i,2] = cinfo[c1,1]; rep.pm[i,3] = cinfo[c1,2]; rep.pm[i,4] = cinfo[c0,3]; rep.pm[i,5] = cinfo[c1,3]; } for(i=0; i<=npoints-1; i++) { alglib.ap.assert(cinfo[i,1]==cinfo[i,2]); rep.p[i] = cinfo[i,1]; } // // Calculate Rep.PZ // rep.pz = new int[npoints-1, 2]; for(i=0; i<=npoints-2; i++) { rep.pz[i,0] = rep.z[i,0]; rep.pz[i,1] = rep.z[i,1]; if( rep.pz[i,0]=1 NVars - number of independent variables, NVars>=1 NClasses - task type: * NClasses=1 - regression task with one dependent variable * NClasses>1 - classification task with NClasses classes. NTrees - number of trees in a forest, NTrees>=1. recommended values: 50-100. R - percent of a training set used to build individual trees. 01). * 1, if task has been solved DF - model built Rep - training report, contains error on a training set and out-of-bag estimates of generalization error. -- ALGLIB -- Copyright 19.02.2009 by Bochkanov Sergey *************************************************************************/ public static void dfbuildrandomdecisionforest(double[,] xy, int npoints, int nvars, int nclasses, int ntrees, double r, ref int info, decisionforest df, dfreport rep) { int samplesize = 0; info = 0; if( (double)(r)<=(double)(0) || (double)(r)>(double)(1) ) { info = -1; return; } samplesize = Math.Max((int)Math.Round(r*npoints), 1); dfbuildinternal(xy, npoints, nvars, nclasses, ntrees, samplesize, Math.Max(nvars/2, 1), dfusestrongsplits+dfuseevs, ref info, df, rep); } /************************************************************************* This subroutine builds random decision forest. This function gives ability to tune number of variables used when choosing best split. INPUT PARAMETERS: XY - training set NPoints - training set size, NPoints>=1 NVars - number of independent variables, NVars>=1 NClasses - task type: * NClasses=1 - regression task with one dependent variable * NClasses>1 - classification task with NClasses classes. NTrees - number of trees in a forest, NTrees>=1. recommended values: 50-100. NRndVars - number of variables used when choosing best split R - percent of a training set used to build individual trees. 01). * 1, if task has been solved DF - model built Rep - training report, contains error on a training set and out-of-bag estimates of generalization error. -- ALGLIB -- Copyright 19.02.2009 by Bochkanov Sergey *************************************************************************/ public static void dfbuildrandomdecisionforestx1(double[,] xy, int npoints, int nvars, int nclasses, int ntrees, int nrndvars, double r, ref int info, decisionforest df, dfreport rep) { int samplesize = 0; info = 0; if( (double)(r)<=(double)(0) || (double)(r)>(double)(1) ) { info = -1; return; } if( nrndvars<=0 || nrndvars>nvars ) { info = -1; return; } samplesize = Math.Max((int)Math.Round(r*npoints), 1); dfbuildinternal(xy, npoints, nvars, nclasses, ntrees, samplesize, nrndvars, dfusestrongsplits+dfuseevs, ref info, df, rep); } public static void dfbuildinternal(double[,] xy, int npoints, int nvars, int nclasses, int ntrees, int samplesize, int nfeatures, int flags, ref int info, decisionforest df, dfreport rep) { int i = 0; int j = 0; int k = 0; int tmpi = 0; int lasttreeoffs = 0; int offs = 0; int ooboffs = 0; int treesize = 0; int nvarsinpool = 0; bool useevs = new bool(); dfinternalbuffers bufs = new dfinternalbuffers(); int[] permbuf = new int[0]; double[] oobbuf = new double[0]; int[] oobcntbuf = new int[0]; double[,] xys = new double[0,0]; double[] x = new double[0]; double[] y = new double[0]; int oobcnt = 0; int oobrelcnt = 0; double v = 0; double vmin = 0; double vmax = 0; bool bflag = new bool(); int i_ = 0; int i1_ = 0; info = 0; // // Test for inputs // if( (((((npoints<1 || samplesize<1) || samplesize>npoints) || nvars<1) || nclasses<1) || ntrees<1) || nfeatures<1 ) { info = -1; return; } if( nclasses>1 ) { for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nvars])<0 || (int)Math.Round(xy[i,nvars])>=nclasses ) { info = -2; return; } } } info = 1; // // Flags // useevs = flags/dfuseevs%2!=0; // // Allocate data, prepare header // treesize = 1+innernodewidth*(samplesize-1)+leafnodewidth*samplesize; permbuf = new int[npoints-1+1]; bufs.treebuf = new double[treesize-1+1]; bufs.idxbuf = new int[npoints-1+1]; bufs.tmpbufr = new double[npoints-1+1]; bufs.tmpbufr2 = new double[npoints-1+1]; bufs.tmpbufi = new int[npoints-1+1]; bufs.sortrbuf = new double[npoints]; bufs.sortrbuf2 = new double[npoints]; bufs.sortibuf = new int[npoints]; bufs.varpool = new int[nvars-1+1]; bufs.evsbin = new bool[nvars-1+1]; bufs.evssplits = new double[nvars-1+1]; bufs.classibuf = new int[2*nclasses-1+1]; oobbuf = new double[nclasses*npoints-1+1]; oobcntbuf = new int[npoints-1+1]; df.trees = new double[ntrees*treesize-1+1]; xys = new double[samplesize-1+1, nvars+1]; x = new double[nvars-1+1]; y = new double[nclasses-1+1]; for(i=0; i<=npoints-1; i++) { permbuf[i] = i; } for(i=0; i<=npoints*nclasses-1; i++) { oobbuf[i] = 0; } for(i=0; i<=npoints-1; i++) { oobcntbuf[i] = 0; } // // Prepare variable pool and EVS (extended variable selection/splitting) buffers // (whether EVS is turned on or not): // 1. detect binary variables and pre-calculate splits for them // 2. detect variables with non-distinct values and exclude them from pool // for(i=0; i<=nvars-1; i++) { bufs.varpool[i] = i; } nvarsinpool = nvars; if( useevs ) { for(j=0; j<=nvars-1; j++) { vmin = xy[0,j]; vmax = vmin; for(i=0; i<=npoints-1; i++) { v = xy[i,j]; vmin = Math.Min(vmin, v); vmax = Math.Max(vmax, v); } if( (double)(vmin)==(double)(vmax) ) { // // exclude variable from pool // bufs.varpool[j] = bufs.varpool[nvarsinpool-1]; bufs.varpool[nvarsinpool-1] = -1; nvarsinpool = nvarsinpool-1; continue; } bflag = false; for(i=0; i<=npoints-1; i++) { v = xy[i,j]; if( (double)(v)!=(double)(vmin) && (double)(v)!=(double)(vmax) ) { bflag = true; break; } } if( bflag ) { // // non-binary variable // bufs.evsbin[j] = false; } else { // // Prepare // bufs.evsbin[j] = true; bufs.evssplits[j] = 0.5*(vmin+vmax); if( (double)(bufs.evssplits[j])<=(double)(vmin) ) { bufs.evssplits[j] = vmax; } } } } // // RANDOM FOREST FORMAT // W[0] - size of array // W[1] - version number // W[2] - NVars // W[3] - NClasses (1 for regression) // W[4] - NTrees // W[5] - trees offset // // // TREE FORMAT // W[Offs] - size of sub-array // node info: // W[K+0] - variable number (-1 for leaf mode) // W[K+1] - threshold (class/value for leaf node) // W[K+2] - ">=" branch index (absent for leaf node) // // df.nvars = nvars; df.nclasses = nclasses; df.ntrees = ntrees; // // Build forest // offs = 0; for(i=0; i<=ntrees-1; i++) { // // Prepare sample // for(k=0; k<=samplesize-1; k++) { j = k+math.randominteger(npoints-k); tmpi = permbuf[k]; permbuf[k] = permbuf[j]; permbuf[j] = tmpi; j = permbuf[k]; for(i_=0; i_<=nvars;i_++) { xys[k,i_] = xy[j,i_]; } } // // build tree, copy // dfbuildtree(xys, samplesize, nvars, nclasses, nfeatures, nvarsinpool, flags, bufs); j = (int)Math.Round(bufs.treebuf[0]); i1_ = (0) - (offs); for(i_=offs; i_<=offs+j-1;i_++) { df.trees[i_] = bufs.treebuf[i_+i1_]; } lasttreeoffs = offs; offs = offs+j; // // OOB estimates // for(k=samplesize; k<=npoints-1; k++) { for(j=0; j<=nclasses-1; j++) { y[j] = 0; } j = permbuf[k]; for(i_=0; i_<=nvars-1;i_++) { x[i_] = xy[j,i_]; } dfprocessinternal(df, lasttreeoffs, x, ref y); i1_ = (0) - (j*nclasses); for(i_=j*nclasses; i_<=(j+1)*nclasses-1;i_++) { oobbuf[i_] = oobbuf[i_] + y[i_+i1_]; } oobcntbuf[j] = oobcntbuf[j]+1; } } df.bufsize = offs; // // Normalize OOB results // for(i=0; i<=npoints-1; i++) { if( oobcntbuf[i]!=0 ) { v = (double)1/(double)oobcntbuf[i]; for(i_=i*nclasses; i_<=i*nclasses+nclasses-1;i_++) { oobbuf[i_] = v*oobbuf[i_]; } } } // // Calculate training set estimates // rep.relclserror = dfrelclserror(df, xy, npoints); rep.avgce = dfavgce(df, xy, npoints); rep.rmserror = dfrmserror(df, xy, npoints); rep.avgerror = dfavgerror(df, xy, npoints); rep.avgrelerror = dfavgrelerror(df, xy, npoints); // // Calculate OOB estimates. // rep.oobrelclserror = 0; rep.oobavgce = 0; rep.oobrmserror = 0; rep.oobavgerror = 0; rep.oobavgrelerror = 0; oobcnt = 0; oobrelcnt = 0; for(i=0; i<=npoints-1; i++) { if( oobcntbuf[i]!=0 ) { ooboffs = i*nclasses; if( nclasses>1 ) { // // classification-specific code // k = (int)Math.Round(xy[i,nvars]); tmpi = 0; for(j=1; j<=nclasses-1; j++) { if( (double)(oobbuf[ooboffs+j])>(double)(oobbuf[ooboffs+tmpi]) ) { tmpi = j; } } if( tmpi!=k ) { rep.oobrelclserror = rep.oobrelclserror+1; } if( (double)(oobbuf[ooboffs+k])!=(double)(0) ) { rep.oobavgce = rep.oobavgce-Math.Log(oobbuf[ooboffs+k]); } else { rep.oobavgce = rep.oobavgce-Math.Log(math.minrealnumber); } for(j=0; j<=nclasses-1; j++) { if( j==k ) { rep.oobrmserror = rep.oobrmserror+math.sqr(oobbuf[ooboffs+j]-1); rep.oobavgerror = rep.oobavgerror+Math.Abs(oobbuf[ooboffs+j]-1); rep.oobavgrelerror = rep.oobavgrelerror+Math.Abs(oobbuf[ooboffs+j]-1); oobrelcnt = oobrelcnt+1; } else { rep.oobrmserror = rep.oobrmserror+math.sqr(oobbuf[ooboffs+j]); rep.oobavgerror = rep.oobavgerror+Math.Abs(oobbuf[ooboffs+j]); } } } else { // // regression-specific code // rep.oobrmserror = rep.oobrmserror+math.sqr(oobbuf[ooboffs]-xy[i,nvars]); rep.oobavgerror = rep.oobavgerror+Math.Abs(oobbuf[ooboffs]-xy[i,nvars]); if( (double)(xy[i,nvars])!=(double)(0) ) { rep.oobavgrelerror = rep.oobavgrelerror+Math.Abs((oobbuf[ooboffs]-xy[i,nvars])/xy[i,nvars]); oobrelcnt = oobrelcnt+1; } } // // update OOB estimates count. // oobcnt = oobcnt+1; } } if( oobcnt>0 ) { rep.oobrelclserror = rep.oobrelclserror/oobcnt; rep.oobavgce = rep.oobavgce/oobcnt; rep.oobrmserror = Math.Sqrt(rep.oobrmserror/(oobcnt*nclasses)); rep.oobavgerror = rep.oobavgerror/(oobcnt*nclasses); if( oobrelcnt>0 ) { rep.oobavgrelerror = rep.oobavgrelerror/oobrelcnt; } } } /************************************************************************* Procesing INPUT PARAMETERS: DF - decision forest model X - input vector, array[0..NVars-1]. OUTPUT PARAMETERS: Y - result. Regression estimate when solving regression task, vector of posterior probabilities for classification task. See also DFProcessI. -- ALGLIB -- Copyright 16.02.2009 by Bochkanov Sergey *************************************************************************/ public static void dfprocess(decisionforest df, double[] x, ref double[] y) { int offs = 0; int i = 0; double v = 0; int i_ = 0; // // Proceed // if( alglib.ap.len(y)1 ) { // // classification-specific code // k = (int)Math.Round(xy[i,df.nvars]); tmpi = 0; for(j=1; j<=df.nclasses-1; j++) { if( (double)(y[j])>(double)(y[tmpi]) ) { tmpi = j; } } if( (double)(y[k])!=(double)(0) ) { result = result-Math.Log(y[k]); } else { result = result-Math.Log(math.minrealnumber); } } } result = result/npoints; return result; } /************************************************************************* RMS error on the test set INPUT PARAMETERS: DF - decision forest model XY - test set NPoints - test set size RESULT: root mean square error. Its meaning for regression task is obvious. As for classification task, RMS error means error when estimating posterior probabilities. -- ALGLIB -- Copyright 16.02.2009 by Bochkanov Sergey *************************************************************************/ public static double dfrmserror(decisionforest df, double[,] xy, int npoints) { double result = 0; double[] x = new double[0]; double[] y = new double[0]; int i = 0; int j = 0; int k = 0; int tmpi = 0; int i_ = 0; x = new double[df.nvars-1+1]; y = new double[df.nclasses-1+1]; result = 0; for(i=0; i<=npoints-1; i++) { for(i_=0; i_<=df.nvars-1;i_++) { x[i_] = xy[i,i_]; } dfprocess(df, x, ref y); if( df.nclasses>1 ) { // // classification-specific code // k = (int)Math.Round(xy[i,df.nvars]); tmpi = 0; for(j=1; j<=df.nclasses-1; j++) { if( (double)(y[j])>(double)(y[tmpi]) ) { tmpi = j; } } for(j=0; j<=df.nclasses-1; j++) { if( j==k ) { result = result+math.sqr(y[j]-1); } else { result = result+math.sqr(y[j]); } } } else { // // regression-specific code // result = result+math.sqr(y[0]-xy[i,df.nvars]); } } result = Math.Sqrt(result/(npoints*df.nclasses)); return result; } /************************************************************************* Average error on the test set INPUT PARAMETERS: DF - decision forest model XY - test set NPoints - test set size RESULT: Its meaning for regression task is obvious. As for classification task, it means average error when estimating posterior probabilities. -- ALGLIB -- Copyright 16.02.2009 by Bochkanov Sergey *************************************************************************/ public static double dfavgerror(decisionforest df, double[,] xy, int npoints) { double result = 0; double[] x = new double[0]; double[] y = new double[0]; int i = 0; int j = 0; int k = 0; int i_ = 0; x = new double[df.nvars-1+1]; y = new double[df.nclasses-1+1]; result = 0; for(i=0; i<=npoints-1; i++) { for(i_=0; i_<=df.nvars-1;i_++) { x[i_] = xy[i,i_]; } dfprocess(df, x, ref y); if( df.nclasses>1 ) { // // classification-specific code // k = (int)Math.Round(xy[i,df.nvars]); for(j=0; j<=df.nclasses-1; j++) { if( j==k ) { result = result+Math.Abs(y[j]-1); } else { result = result+Math.Abs(y[j]); } } } else { // // regression-specific code // result = result+Math.Abs(y[0]-xy[i,df.nvars]); } } result = result/(npoints*df.nclasses); return result; } /************************************************************************* Average relative error on the test set INPUT PARAMETERS: DF - decision forest model XY - test set NPoints - test set size RESULT: Its meaning for regression task is obvious. As for classification task, it means average relative error when estimating posterior probability of belonging to the correct class. -- ALGLIB -- Copyright 16.02.2009 by Bochkanov Sergey *************************************************************************/ public static double dfavgrelerror(decisionforest df, double[,] xy, int npoints) { double result = 0; double[] x = new double[0]; double[] y = new double[0]; int relcnt = 0; int i = 0; int j = 0; int k = 0; int i_ = 0; x = new double[df.nvars-1+1]; y = new double[df.nclasses-1+1]; result = 0; relcnt = 0; for(i=0; i<=npoints-1; i++) { for(i_=0; i_<=df.nvars-1;i_++) { x[i_] = xy[i,i_]; } dfprocess(df, x, ref y); if( df.nclasses>1 ) { // // classification-specific code // k = (int)Math.Round(xy[i,df.nvars]); for(j=0; j<=df.nclasses-1; j++) { if( j==k ) { result = result+Math.Abs(y[j]-1); relcnt = relcnt+1; } } } else { // // regression-specific code // if( (double)(xy[i,df.nvars])!=(double)(0) ) { result = result+Math.Abs((y[0]-xy[i,df.nvars])/xy[i,df.nvars]); relcnt = relcnt+1; } } } if( relcnt>0 ) { result = result/relcnt; } return result; } /************************************************************************* Copying of DecisionForest strucure INPUT PARAMETERS: DF1 - original OUTPUT PARAMETERS: DF2 - copy -- ALGLIB -- Copyright 13.02.2009 by Bochkanov Sergey *************************************************************************/ public static void dfcopy(decisionforest df1, decisionforest df2) { int i_ = 0; df2.nvars = df1.nvars; df2.nclasses = df1.nclasses; df2.ntrees = df1.ntrees; df2.bufsize = df1.bufsize; df2.trees = new double[df1.bufsize-1+1]; for(i_=0; i_<=df1.bufsize-1;i_++) { df2.trees[i_] = df1.trees[i_]; } } /************************************************************************* Serializer: allocation -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void dfalloc(alglib.serializer s, decisionforest forest) { s.alloc_entry(); s.alloc_entry(); s.alloc_entry(); s.alloc_entry(); s.alloc_entry(); s.alloc_entry(); apserv.allocrealarray(s, forest.trees, forest.bufsize); } /************************************************************************* Serializer: serialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void dfserialize(alglib.serializer s, decisionforest forest) { s.serialize_int(scodes.getrdfserializationcode()); s.serialize_int(dffirstversion); s.serialize_int(forest.nvars); s.serialize_int(forest.nclasses); s.serialize_int(forest.ntrees); s.serialize_int(forest.bufsize); apserv.serializerealarray(s, forest.trees, forest.bufsize); } /************************************************************************* Serializer: unserialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void dfunserialize(alglib.serializer s, decisionforest forest) { int i0 = 0; int i1 = 0; // // check correctness of header // i0 = s.unserialize_int(); alglib.ap.assert(i0==scodes.getrdfserializationcode(), "DFUnserialize: stream header corrupted"); i1 = s.unserialize_int(); alglib.ap.assert(i1==dffirstversion, "DFUnserialize: stream header corrupted"); // // Unserialize data // forest.nvars = s.unserialize_int(); forest.nclasses = s.unserialize_int(); forest.ntrees = s.unserialize_int(); forest.bufsize = s.unserialize_int(); apserv.unserializerealarray(s, ref forest.trees); } /************************************************************************* Classification error *************************************************************************/ private static int dfclserror(decisionforest df, double[,] xy, int npoints) { int result = 0; double[] x = new double[0]; double[] y = new double[0]; int i = 0; int j = 0; int k = 0; int tmpi = 0; int i_ = 0; if( df.nclasses<=1 ) { result = 0; return result; } x = new double[df.nvars-1+1]; y = new double[df.nclasses-1+1]; result = 0; for(i=0; i<=npoints-1; i++) { for(i_=0; i_<=df.nvars-1;i_++) { x[i_] = xy[i,i_]; } dfprocess(df, x, ref y); k = (int)Math.Round(xy[i,df.nvars]); tmpi = 0; for(j=1; j<=df.nclasses-1; j++) { if( (double)(y[j])>(double)(y[tmpi]) ) { tmpi = j; } } if( tmpi!=k ) { result = result+1; } } return result; } /************************************************************************* Internal subroutine for processing one decision tree starting at Offs *************************************************************************/ private static void dfprocessinternal(decisionforest df, int offs, double[] x, ref double[] y) { int k = 0; int idx = 0; // // Set pointer to the root // k = offs+1; // // Navigate through the tree // while( true ) { if( (double)(df.trees[k])==(double)(-1) ) { if( df.nclasses==1 ) { y[0] = y[0]+df.trees[k+1]; } else { idx = (int)Math.Round(df.trees[k+1]); y[idx] = y[idx]+1; } break; } if( (double)(x[(int)Math.Round(df.trees[k])])<(double)(df.trees[k+1]) ) { k = k+innernodewidth; } else { k = offs+(int)Math.Round(df.trees[k+2]); } } } /************************************************************************* Builds one decision tree. Just a wrapper for the DFBuildTreeRec. *************************************************************************/ private static void dfbuildtree(double[,] xy, int npoints, int nvars, int nclasses, int nfeatures, int nvarsinpool, int flags, dfinternalbuffers bufs) { int numprocessed = 0; int i = 0; alglib.ap.assert(npoints>0); // // Prepare IdxBuf. It stores indices of the training set elements. // When training set is being split, contents of IdxBuf is // correspondingly reordered so we can know which elements belong // to which branch of decision tree. // for(i=0; i<=npoints-1; i++) { bufs.idxbuf[i] = i; } // // Recursive procedure // numprocessed = 1; dfbuildtreerec(xy, npoints, nvars, nclasses, nfeatures, nvarsinpool, flags, ref numprocessed, 0, npoints-1, bufs); bufs.treebuf[0] = numprocessed; } /************************************************************************* Builds one decision tree (internal recursive subroutine) Parameters: TreeBuf - large enough array, at least TreeSize IdxBuf - at least NPoints elements TmpBufR - at least NPoints TmpBufR2 - at least NPoints TmpBufI - at least NPoints TmpBufI2 - at least NPoints+1 *************************************************************************/ private static void dfbuildtreerec(double[,] xy, int npoints, int nvars, int nclasses, int nfeatures, int nvarsinpool, int flags, ref int numprocessed, int idx1, int idx2, dfinternalbuffers bufs) { int i = 0; int j = 0; int k = 0; bool bflag = new bool(); int i1 = 0; int i2 = 0; int info = 0; double sl = 0; double sr = 0; double w = 0; int idxbest = 0; double ebest = 0; double tbest = 0; int varcur = 0; double s = 0; double v = 0; double v1 = 0; double v2 = 0; double threshold = 0; int oldnp = 0; double currms = 0; bool useevs = new bool(); // // these initializers are not really necessary, // but without them compiler complains about uninitialized locals // tbest = 0; // // Prepare // alglib.ap.assert(npoints>0); alglib.ap.assert(idx2>=idx1); useevs = flags/dfuseevs%2!=0; // // Leaf node // if( idx2==idx1 ) { bufs.treebuf[numprocessed] = -1; bufs.treebuf[numprocessed+1] = xy[bufs.idxbuf[idx1],nvars]; numprocessed = numprocessed+leafnodewidth; return; } // // Non-leaf node. // Select random variable, prepare split: // 1. prepare default solution - no splitting, class at random // 2. investigate possible splits, compare with default/best // idxbest = -1; if( nclasses>1 ) { // // default solution for classification // for(i=0; i<=nclasses-1; i++) { bufs.classibuf[i] = 0; } s = idx2-idx1+1; for(i=idx1; i<=idx2; i++) { j = (int)Math.Round(xy[bufs.idxbuf[i],nvars]); bufs.classibuf[j] = bufs.classibuf[j]+1; } ebest = 0; for(i=0; i<=nclasses-1; i++) { ebest = ebest+bufs.classibuf[i]*math.sqr(1-bufs.classibuf[i]/s)+(s-bufs.classibuf[i])*math.sqr(bufs.classibuf[i]/s); } ebest = Math.Sqrt(ebest/(nclasses*(idx2-idx1+1))); } else { // // default solution for regression // v = 0; for(i=idx1; i<=idx2; i++) { v = v+xy[bufs.idxbuf[i],nvars]; } v = v/(idx2-idx1+1); ebest = 0; for(i=idx1; i<=idx2; i++) { ebest = ebest+math.sqr(xy[bufs.idxbuf[i],nvars]-v); } ebest = Math.Sqrt(ebest/(idx2-idx1+1)); } i = 0; while( i<=Math.Min(nfeatures, nvarsinpool)-1 ) { // // select variables from pool // j = i+math.randominteger(nvarsinpool-i); k = bufs.varpool[i]; bufs.varpool[i] = bufs.varpool[j]; bufs.varpool[j] = k; varcur = bufs.varpool[i]; // // load variable values to working array // // apply EVS preprocessing: if all variable values are same, // variable is excluded from pool. // // This is necessary for binary pre-splits (see later) to work. // for(j=idx1; j<=idx2; j++) { bufs.tmpbufr[j-idx1] = xy[bufs.idxbuf[j],varcur]; } if( useevs ) { bflag = false; v = bufs.tmpbufr[0]; for(j=0; j<=idx2-idx1; j++) { if( (double)(bufs.tmpbufr[j])!=(double)(v) ) { bflag = true; break; } } if( !bflag ) { // // exclude variable from pool, // go to the next iteration. // I is not increased. // k = bufs.varpool[i]; bufs.varpool[i] = bufs.varpool[nvarsinpool-1]; bufs.varpool[nvarsinpool-1] = k; nvarsinpool = nvarsinpool-1; continue; } } // // load labels to working array // if( nclasses>1 ) { for(j=idx1; j<=idx2; j++) { bufs.tmpbufi[j-idx1] = (int)Math.Round(xy[bufs.idxbuf[j],nvars]); } } else { for(j=idx1; j<=idx2; j++) { bufs.tmpbufr2[j-idx1] = xy[bufs.idxbuf[j],nvars]; } } // // calculate split // if( useevs && bufs.evsbin[varcur] ) { // // Pre-calculated splits for binary variables. // Threshold is already known, just calculate RMS error // threshold = bufs.evssplits[varcur]; if( nclasses>1 ) { // // classification-specific code // for(j=0; j<=2*nclasses-1; j++) { bufs.classibuf[j] = 0; } sl = 0; sr = 0; for(j=0; j<=idx2-idx1; j++) { k = bufs.tmpbufi[j]; if( (double)(bufs.tmpbufr[j])<(double)(threshold) ) { bufs.classibuf[k] = bufs.classibuf[k]+1; sl = sl+1; } else { bufs.classibuf[k+nclasses] = bufs.classibuf[k+nclasses]+1; sr = sr+1; } } alglib.ap.assert((double)(sl)!=(double)(0) && (double)(sr)!=(double)(0), "DFBuildTreeRec: something strange!"); currms = 0; for(j=0; j<=nclasses-1; j++) { w = bufs.classibuf[j]; currms = currms+w*math.sqr(w/sl-1); currms = currms+(sl-w)*math.sqr(w/sl); w = bufs.classibuf[nclasses+j]; currms = currms+w*math.sqr(w/sr-1); currms = currms+(sr-w)*math.sqr(w/sr); } currms = Math.Sqrt(currms/(nclasses*(idx2-idx1+1))); } else { // // regression-specific code // sl = 0; sr = 0; v1 = 0; v2 = 0; for(j=0; j<=idx2-idx1; j++) { if( (double)(bufs.tmpbufr[j])<(double)(threshold) ) { v1 = v1+bufs.tmpbufr2[j]; sl = sl+1; } else { v2 = v2+bufs.tmpbufr2[j]; sr = sr+1; } } alglib.ap.assert((double)(sl)!=(double)(0) && (double)(sr)!=(double)(0), "DFBuildTreeRec: something strange!"); v1 = v1/sl; v2 = v2/sr; currms = 0; for(j=0; j<=idx2-idx1; j++) { if( (double)(bufs.tmpbufr[j])<(double)(threshold) ) { currms = currms+math.sqr(v1-bufs.tmpbufr2[j]); } else { currms = currms+math.sqr(v2-bufs.tmpbufr2[j]); } } currms = Math.Sqrt(currms/(idx2-idx1+1)); } info = 1; } else { // // Generic splits // if( nclasses>1 ) { dfsplitc(ref bufs.tmpbufr, ref bufs.tmpbufi, ref bufs.classibuf, idx2-idx1+1, nclasses, dfusestrongsplits, ref info, ref threshold, ref currms, ref bufs.sortrbuf, ref bufs.sortibuf); } else { dfsplitr(ref bufs.tmpbufr, ref bufs.tmpbufr2, idx2-idx1+1, dfusestrongsplits, ref info, ref threshold, ref currms, ref bufs.sortrbuf, ref bufs.sortrbuf2); } } if( info>0 ) { if( (double)(currms)<=(double)(ebest) ) { ebest = currms; idxbest = varcur; tbest = threshold; } } // // Next iteration // i = i+1; } // // to split or not to split // if( idxbest<0 ) { // // All values are same, cannot split. // bufs.treebuf[numprocessed] = -1; if( nclasses>1 ) { // // Select random class label (randomness allows us to // approximate distribution of the classes) // bufs.treebuf[numprocessed+1] = (int)Math.Round(xy[bufs.idxbuf[idx1+math.randominteger(idx2-idx1+1)],nvars]); } else { // // Select average (for regression task). // v = 0; for(i=idx1; i<=idx2; i++) { v = v+xy[bufs.idxbuf[i],nvars]/(idx2-idx1+1); } bufs.treebuf[numprocessed+1] = v; } numprocessed = numprocessed+leafnodewidth; } else { // // we can split // bufs.treebuf[numprocessed] = idxbest; bufs.treebuf[numprocessed+1] = tbest; i1 = idx1; i2 = idx2; while( i1<=i2 ) { // // Reorder indices so that left partition is in [Idx1..I1-1], // and right partition is in [I2+1..Idx2] // if( (double)(xy[bufs.idxbuf[i1],idxbest])<(double)(tbest) ) { i1 = i1+1; continue; } if( (double)(xy[bufs.idxbuf[i2],idxbest])>=(double)(tbest) ) { i2 = i2-1; continue; } j = bufs.idxbuf[i1]; bufs.idxbuf[i1] = bufs.idxbuf[i2]; bufs.idxbuf[i2] = j; i1 = i1+1; i2 = i2-1; } oldnp = numprocessed; numprocessed = numprocessed+innernodewidth; dfbuildtreerec(xy, npoints, nvars, nclasses, nfeatures, nvarsinpool, flags, ref numprocessed, idx1, i1-1, bufs); bufs.treebuf[oldnp+2] = numprocessed; dfbuildtreerec(xy, npoints, nvars, nclasses, nfeatures, nvarsinpool, flags, ref numprocessed, i2+1, idx2, bufs); } } /************************************************************************* Makes split on attribute *************************************************************************/ private static void dfsplitc(ref double[] x, ref int[] c, ref int[] cntbuf, int n, int nc, int flags, ref int info, ref double threshold, ref double e, ref double[] sortrbuf, ref int[] sortibuf) { int i = 0; int neq = 0; int nless = 0; int ngreater = 0; int q = 0; int qmin = 0; int qmax = 0; int qcnt = 0; double cursplit = 0; int nleft = 0; double v = 0; double cure = 0; double w = 0; double sl = 0; double sr = 0; info = 0; threshold = 0; e = 0; tsort.tagsortfasti(ref x, ref c, ref sortrbuf, ref sortibuf, n); e = math.maxrealnumber; threshold = 0.5*(x[0]+x[n-1]); info = -3; if( flags/dfusestrongsplits%2==0 ) { // // weak splits, split at half // qcnt = 2; qmin = 1; qmax = 1; } else { // // strong splits: choose best quartile // qcnt = 4; qmin = 1; qmax = 3; } for(q=qmin; q<=qmax; q++) { cursplit = x[n*q/qcnt]; neq = 0; nless = 0; ngreater = 0; for(i=0; i<=n-1; i++) { if( (double)(x[i])<(double)(cursplit) ) { nless = nless+1; } if( (double)(x[i])==(double)(cursplit) ) { neq = neq+1; } if( (double)(x[i])>(double)(cursplit) ) { ngreater = ngreater+1; } } alglib.ap.assert(neq!=0, "DFSplitR: NEq=0, something strange!!!"); if( nless!=0 || ngreater!=0 ) { // // set threshold between two partitions, with // some tweaking to avoid problems with floating point // arithmetics. // // The problem is that when you calculates C = 0.5*(A+B) there // can be no C which lies strictly between A and B (for example, // there is no floating point number which is // greater than 1 and less than 1+eps). In such situations // we choose right side as theshold (remember that // points which lie on threshold falls to the right side). // if( nless(double)(cursplit) ) { ngreater = ngreater+1; } } alglib.ap.assert(neq!=0, "DFSplitR: NEq=0, something strange!!!"); if( nless!=0 || ngreater!=0 ) { // // set threshold between two partitions, with // some tweaking to avoid problems with floating point // arithmetics. // // The problem is that when you calculates C = 0.5*(A+B) there // can be no C which lies strictly between A and B (for example, // there is no floating point number which is // greater than 1 and less than 1+eps). In such situations // we choose right side as theshold (remember that // points which lie on threshold falls to the right side). // if( nlessNVars+1 NVars - number of independent variables OUTPUT PARAMETERS: Info - return code: * -255, in case of unknown internal error * -4, if internal SVD subroutine haven't converged * -1, if incorrect parameters was passed (NPoints0. NPoints - training set size, NPoints>NVars+1 NVars - number of independent variables OUTPUT PARAMETERS: Info - return code: * -255, in case of unknown internal error * -4, if internal SVD subroutine haven't converged * -1, if incorrect parameters was passed (NPoints(double)(Math.Sqrt(variance)) ) { // // variation is relatively small, it is better to // bring mean value to 1 // c[j] = mean; } else { // // variation is large, it is better to bring variance to 1 // if( (double)(variance)==(double)(0) ) { variance = 1; } c[j] = Math.Sqrt(variance); } for(i=0; i<=npoints-1; i++) { xyi[i,j] = xyi[i,j]/c[j]; } } // // Internal processing // lrinternal(xyi, s, npoints, nvars+1, ref info, lm, ar); if( info<0 ) { return; } // // Un-standartization // offs = (int)Math.Round(lm.w[3]); for(j=0; j<=nvars-1; j++) { // // J-th term is updated // lm.w[offs+j] = lm.w[offs+j]/c[j]; v = 1/c[j]; for(i_=0; i_<=nvars;i_++) { ar.c[j,i_] = v*ar.c[j,i_]; } for(i_=0; i_<=nvars;i_++) { ar.c[i_,j] = v*ar.c[i_,j]; } } } /************************************************************************* Like LRBuild but builds model Y = A(0)*X[0] + ... + A(N-1)*X[N-1] i.e. with zero constant term. -- ALGLIB -- Copyright 30.10.2008 by Bochkanov Sergey *************************************************************************/ public static void lrbuildz(double[,] xy, int npoints, int nvars, ref int info, linearmodel lm, lrreport ar) { double[] s = new double[0]; int i = 0; double sigma2 = 0; int i_ = 0; info = 0; if( npoints<=nvars+1 || nvars<1 ) { info = -1; return; } s = new double[npoints-1+1]; for(i=0; i<=npoints-1; i++) { s[i] = 1; } lrbuildzs(xy, s, npoints, nvars, ref info, lm, ar); if( info<0 ) { return; } sigma2 = math.sqr(ar.rmserror)*npoints/(npoints-nvars-1); for(i=0; i<=nvars; i++) { for(i_=0; i_<=nvars;i_++) { ar.c[i,i_] = sigma2*ar.c[i,i_]; } } } /************************************************************************* Unpacks coefficients of linear model. INPUT PARAMETERS: LM - linear model in ALGLIB format OUTPUT PARAMETERS: V - coefficients, array[0..NVars] constant term (intercept) is stored in the V[NVars]. NVars - number of independent variables (one less than number of coefficients) -- ALGLIB -- Copyright 30.08.2008 by Bochkanov Sergey *************************************************************************/ public static void lrunpack(linearmodel lm, ref double[] v, ref int nvars) { int offs = 0; int i_ = 0; int i1_ = 0; v = new double[0]; nvars = 0; alglib.ap.assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!"); nvars = (int)Math.Round(lm.w[2]); offs = (int)Math.Round(lm.w[3]); v = new double[nvars+1]; i1_ = (offs) - (0); for(i_=0; i_<=nvars;i_++) { v[i_] = lm.w[i_+i1_]; } } /************************************************************************* "Packs" coefficients and creates linear model in ALGLIB format (LRUnpack reversed). INPUT PARAMETERS: V - coefficients, array[0..NVars] NVars - number of independent variables OUTPUT PAREMETERS: LM - linear model. -- ALGLIB -- Copyright 30.08.2008 by Bochkanov Sergey *************************************************************************/ public static void lrpack(double[] v, int nvars, linearmodel lm) { int offs = 0; int i_ = 0; int i1_ = 0; lm.w = new double[4+nvars+1]; offs = 4; lm.w[0] = 4+nvars+1; lm.w[1] = lrvnum; lm.w[2] = nvars; lm.w[3] = offs; i1_ = (0) - (offs); for(i_=offs; i_<=offs+nvars;i_++) { lm.w[i_] = v[i_+i1_]; } } /************************************************************************* Procesing INPUT PARAMETERS: LM - linear model X - input vector, array[0..NVars-1]. Result: value of linear model regression estimate -- ALGLIB -- Copyright 03.09.2008 by Bochkanov Sergey *************************************************************************/ public static double lrprocess(linearmodel lm, double[] x) { double result = 0; double v = 0; int offs = 0; int nvars = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!"); nvars = (int)Math.Round(lm.w[2]); offs = (int)Math.Round(lm.w[3]); i1_ = (offs)-(0); v = 0.0; for(i_=0; i_<=nvars-1;i_++) { v += x[i_]*lm.w[i_+i1_]; } result = v+lm.w[offs+nvars]; return result; } /************************************************************************* RMS error on the test set INPUT PARAMETERS: LM - linear model XY - test set NPoints - test set size RESULT: root mean square error. -- ALGLIB -- Copyright 30.08.2008 by Bochkanov Sergey *************************************************************************/ public static double lrrmserror(linearmodel lm, double[,] xy, int npoints) { double result = 0; int i = 0; double v = 0; int offs = 0; int nvars = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!"); nvars = (int)Math.Round(lm.w[2]); offs = (int)Math.Round(lm.w[3]); result = 0; for(i=0; i<=npoints-1; i++) { i1_ = (offs)-(0); v = 0.0; for(i_=0; i_<=nvars-1;i_++) { v += xy[i,i_]*lm.w[i_+i1_]; } v = v+lm.w[offs+nvars]; result = result+math.sqr(v-xy[i,nvars]); } result = Math.Sqrt(result/npoints); return result; } /************************************************************************* Average error on the test set INPUT PARAMETERS: LM - linear model XY - test set NPoints - test set size RESULT: average error. -- ALGLIB -- Copyright 30.08.2008 by Bochkanov Sergey *************************************************************************/ public static double lravgerror(linearmodel lm, double[,] xy, int npoints) { double result = 0; int i = 0; double v = 0; int offs = 0; int nvars = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!"); nvars = (int)Math.Round(lm.w[2]); offs = (int)Math.Round(lm.w[3]); result = 0; for(i=0; i<=npoints-1; i++) { i1_ = (offs)-(0); v = 0.0; for(i_=0; i_<=nvars-1;i_++) { v += xy[i,i_]*lm.w[i_+i1_]; } v = v+lm.w[offs+nvars]; result = result+Math.Abs(v-xy[i,nvars]); } result = result/npoints; return result; } /************************************************************************* RMS error on the test set INPUT PARAMETERS: LM - linear model XY - test set NPoints - test set size RESULT: average relative error. -- ALGLIB -- Copyright 30.08.2008 by Bochkanov Sergey *************************************************************************/ public static double lravgrelerror(linearmodel lm, double[,] xy, int npoints) { double result = 0; int i = 0; int k = 0; double v = 0; int offs = 0; int nvars = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert((int)Math.Round(lm.w[1])==lrvnum, "LINREG: Incorrect LINREG version!"); nvars = (int)Math.Round(lm.w[2]); offs = (int)Math.Round(lm.w[3]); result = 0; k = 0; for(i=0; i<=npoints-1; i++) { if( (double)(xy[i,nvars])!=(double)(0) ) { i1_ = (offs)-(0); v = 0.0; for(i_=0; i_<=nvars-1;i_++) { v += xy[i,i_]*lm.w[i_+i1_]; } v = v+lm.w[offs+nvars]; result = result+Math.Abs((v-xy[i,nvars])/xy[i,nvars]); k = k+1; } } if( k!=0 ) { result = result/k; } return result; } /************************************************************************* Copying of LinearModel strucure INPUT PARAMETERS: LM1 - original OUTPUT PARAMETERS: LM2 - copy -- ALGLIB -- Copyright 15.03.2009 by Bochkanov Sergey *************************************************************************/ public static void lrcopy(linearmodel lm1, linearmodel lm2) { int k = 0; int i_ = 0; k = (int)Math.Round(lm1.w[0]); lm2.w = new double[k-1+1]; for(i_=0; i_<=k-1;i_++) { lm2.w[i_] = lm1.w[i_]; } } public static void lrlines(double[,] xy, double[] s, int n, ref int info, ref double a, ref double b, ref double vara, ref double varb, ref double covab, ref double corrab, ref double p) { int i = 0; double ss = 0; double sx = 0; double sxx = 0; double sy = 0; double stt = 0; double e1 = 0; double e2 = 0; double t = 0; double chi2 = 0; info = 0; a = 0; b = 0; vara = 0; varb = 0; covab = 0; corrab = 0; p = 0; if( n<2 ) { info = -1; return; } for(i=0; i<=n-1; i++) { if( (double)(s[i])<=(double)(0) ) { info = -2; return; } } info = 1; // // Calculate S, SX, SY, SXX // ss = 0; sx = 0; sy = 0; sxx = 0; for(i=0; i<=n-1; i++) { t = math.sqr(s[i]); ss = ss+1/t; sx = sx+xy[i,0]/t; sy = sy+xy[i,1]/t; sxx = sxx+math.sqr(xy[i,0])/t; } // // Test for condition number // t = Math.Sqrt(4*math.sqr(sx)+math.sqr(ss-sxx)); e1 = 0.5*(ss+sxx+t); e2 = 0.5*(ss+sxx-t); if( (double)(Math.Min(e1, e2))<=(double)(1000*math.machineepsilon*Math.Max(e1, e2)) ) { info = -3; return; } // // Calculate A, B // a = 0; b = 0; stt = 0; for(i=0; i<=n-1; i++) { t = (xy[i,0]-sx/ss)/s[i]; b = b+t*xy[i,1]/s[i]; stt = stt+math.sqr(t); } b = b/stt; a = (sy-sx*b)/ss; // // Calculate goodness-of-fit // if( n>2 ) { chi2 = 0; for(i=0; i<=n-1; i++) { chi2 = chi2+math.sqr((xy[i,1]-a-b*xy[i,0])/s[i]); } p = igammaf.incompletegammac((double)(n-2)/(double)2, chi2/2); } else { p = 1; } // // Calculate other parameters // vara = (1+math.sqr(sx)/(ss*stt))/ss; varb = 1/stt; covab = -(sx/(ss*stt)); corrab = covab/Math.Sqrt(vara*varb); } public static void lrline(double[,] xy, int n, ref int info, ref double a, ref double b) { double[] s = new double[0]; int i = 0; double vara = 0; double varb = 0; double covab = 0; double corrab = 0; double p = 0; info = 0; a = 0; b = 0; if( n<2 ) { info = -1; return; } s = new double[n-1+1]; for(i=0; i<=n-1; i++) { s[i] = 1; } lrlines(xy, s, n, ref info, ref a, ref b, ref vara, ref varb, ref covab, ref corrab, ref p); } /************************************************************************* Internal linear regression subroutine *************************************************************************/ private static void lrinternal(double[,] xy, double[] s, int npoints, int nvars, ref int info, linearmodel lm, lrreport ar) { double[,] a = new double[0,0]; double[,] u = new double[0,0]; double[,] vt = new double[0,0]; double[,] vm = new double[0,0]; double[,] xym = new double[0,0]; double[] b = new double[0]; double[] sv = new double[0]; double[] t = new double[0]; double[] svi = new double[0]; double[] work = new double[0]; int i = 0; int j = 0; int k = 0; int ncv = 0; int na = 0; int nacv = 0; double r = 0; double p = 0; double epstol = 0; lrreport ar2 = new lrreport(); int offs = 0; linearmodel tlm = new linearmodel(); int i_ = 0; int i1_ = 0; info = 0; epstol = 1000; // // Check for errors in data // if( npoints=1; k--) { if( (double)(sv[k-1])>(double)(epstol*math.machineepsilon*sv[0]) ) { // // Reduce // xym = new double[npoints-1+1, k+1]; for(i=0; i<=npoints-1; i++) { for(j=0; j<=k-1; j++) { r = 0.0; for(i_=0; i_<=nvars-1;i_++) { r += xy[i,i_]*vt[j,i_]; } xym[i,j] = r; } xym[i,k] = xy[i,nvars]; } // // Solve // lrinternal(xym, s, npoints, k, ref info, tlm, ar2); if( info!=1 ) { return; } // // Convert back to un-reduced format // for(j=0; j<=nvars-1; j++) { lm.w[offs+j] = 0; } for(j=0; j<=k-1; j++) { r = tlm.w[offs+j]; i1_ = (0) - (offs); for(i_=offs; i_<=offs+nvars-1;i_++) { lm.w[i_] = lm.w[i_] + r*vt[j,i_+i1_]; } } ar.rmserror = ar2.rmserror; ar.avgerror = ar2.avgerror; ar.avgrelerror = ar2.avgrelerror; ar.cvrmserror = ar2.cvrmserror; ar.cvavgerror = ar2.cvavgerror; ar.cvavgrelerror = ar2.cvavgrelerror; ar.ncvdefects = ar2.ncvdefects; ar.cvdefects = new int[nvars-1+1]; for(j=0; j<=ar.ncvdefects-1; j++) { ar.cvdefects[j] = ar2.cvdefects[j]; } ar.c = new double[nvars-1+1, nvars-1+1]; work = new double[nvars+1]; blas.matrixmatrixmultiply(ar2.c, 0, k-1, 0, k-1, false, vt, 0, k-1, 0, nvars-1, false, 1.0, ref vm, 0, k-1, 0, nvars-1, 0.0, ref work); blas.matrixmatrixmultiply(vt, 0, k-1, 0, nvars-1, true, vm, 0, k-1, 0, nvars-1, false, 1.0, ref ar.c, 0, nvars-1, 0, nvars-1, 0.0, ref work); return; } } info = -255; return; } for(i=0; i<=nvars-1; i++) { if( (double)(sv[i])>(double)(epstol*math.machineepsilon*sv[0]) ) { svi[i] = 1/sv[i]; } else { svi[i] = 0; } } for(i=0; i<=nvars-1; i++) { t[i] = 0; } for(i=0; i<=npoints-1; i++) { r = b[i]; for(i_=0; i_<=nvars-1;i_++) { t[i_] = t[i_] + r*u[i,i_]; } } for(i=0; i<=nvars-1; i++) { lm.w[offs+i] = 0; } for(i=0; i<=nvars-1; i++) { r = t[i]*svi[i]; i1_ = (0) - (offs); for(i_=offs; i_<=offs+nvars-1;i_++) { lm.w[i_] = lm.w[i_] + r*vt[i,i_+i1_]; } } for(j=0; j<=nvars-1; j++) { r = svi[j]; for(i_=0; i_<=nvars-1;i_++) { vm[i_,j] = r*vt[j,i_]; } } for(i=0; i<=nvars-1; i++) { for(j=i; j<=nvars-1; j++) { r = 0.0; for(i_=0; i_<=nvars-1;i_++) { r += vm[i,i_]*vm[j,i_]; } ar.c[i,j] = r; ar.c[j,i] = r; } } // // Leave-1-out cross-validation error. // // NOTATIONS: // A design matrix // A*x = b original linear least squares task // U*S*V' SVD of A // ai i-th row of the A // bi i-th element of the b // xf solution of the original LLS task // // Cross-validation error of i-th element from a sample is // calculated using following formula: // // ERRi = ai*xf - (ai*xf-bi*(ui*ui'))/(1-ui*ui') (1) // // This formula can be derived from normal equations of the // original task // // (A'*A)x = A'*b (2) // // by applying modification (zeroing out i-th row of A) to (2): // // (A-ai)'*(A-ai) = (A-ai)'*b // // and using Sherman-Morrison formula for updating matrix inverse // // NOTE 1: b is not zeroed out since it is much simpler and // does not influence final result. // // NOTE 2: some design matrices A have such ui that 1-ui*ui'=0. // Formula (1) can't be applied for such cases and they are skipped // from CV calculation (which distorts resulting CV estimate). // But from the properties of U we can conclude that there can // be no more than NVars such vectors. Usually // NVars << NPoints, so in a normal case it only slightly // influences result. // ncv = 0; na = 0; nacv = 0; ar.rmserror = 0; ar.avgerror = 0; ar.avgrelerror = 0; ar.cvrmserror = 0; ar.cvavgerror = 0; ar.cvavgrelerror = 0; ar.ncvdefects = 0; ar.cvdefects = new int[nvars-1+1]; for(i=0; i<=npoints-1; i++) { // // Error on a training set // i1_ = (offs)-(0); r = 0.0; for(i_=0; i_<=nvars-1;i_++) { r += xy[i,i_]*lm.w[i_+i1_]; } ar.rmserror = ar.rmserror+math.sqr(r-xy[i,nvars]); ar.avgerror = ar.avgerror+Math.Abs(r-xy[i,nvars]); if( (double)(xy[i,nvars])!=(double)(0) ) { ar.avgrelerror = ar.avgrelerror+Math.Abs((r-xy[i,nvars])/xy[i,nvars]); na = na+1; } // // Error using fast leave-one-out cross-validation // p = 0.0; for(i_=0; i_<=nvars-1;i_++) { p += u[i,i_]*u[i,i_]; } if( (double)(p)>(double)(1-epstol*math.machineepsilon) ) { ar.cvdefects[ar.ncvdefects] = i; ar.ncvdefects = ar.ncvdefects+1; continue; } r = s[i]*(r/s[i]-b[i]*p)/(1-p); ar.cvrmserror = ar.cvrmserror+math.sqr(r-xy[i,nvars]); ar.cvavgerror = ar.cvavgerror+Math.Abs(r-xy[i,nvars]); if( (double)(xy[i,nvars])!=(double)(0) ) { ar.cvavgrelerror = ar.cvavgrelerror+Math.Abs((r-xy[i,nvars])/xy[i,nvars]); nacv = nacv+1; } ncv = ncv+1; } if( ncv==0 ) { // // Something strange: ALL ui are degenerate. // Unexpected... // info = -255; return; } ar.rmserror = Math.Sqrt(ar.rmserror/npoints); ar.avgerror = ar.avgerror/npoints; if( na!=0 ) { ar.avgrelerror = ar.avgrelerror/na; } ar.cvrmserror = Math.Sqrt(ar.cvrmserror/ncv); ar.cvavgerror = ar.cvavgerror/ncv; if( nacv!=0 ) { ar.cvavgrelerror = ar.cvavgrelerror/nacv; } } } public class filters { /************************************************************************* Filters: simple moving averages (unsymmetric). This filter replaces array by results of SMA(K) filter. SMA(K) is defined as filter which averages at most K previous points (previous - not points AROUND central point) - or less, in case of the first K-1 points. INPUT PARAMETERS: X - array[N], array to process. It can be larger than N, in this case only first N points are processed. N - points count, N>=0 K - K>=1 (K can be larger than N , such cases will be correctly handled). Window width. K=1 corresponds to identity transformation (nothing changes). OUTPUT PARAMETERS: X - array, whose first N elements were processed with SMA(K) NOTE 1: this function uses efficient in-place algorithm which does not allocate temporary arrays. NOTE 2: this algorithm makes only one pass through array and uses running sum to speed-up calculation of the averages. Additional measures are taken to ensure that running sum on a long sequence of zero elements will be correctly reset to zero even in the presence of round-off error. NOTE 3: this is unsymmetric version of the algorithm, which does NOT averages points after the current one. Only X[i], X[i-1], ... are used when calculating new value of X[i]. We should also note that this algorithm uses BOTH previous points and current one, i.e. new value of X[i] depends on BOTH previous point and X[i] itself. -- ALGLIB -- Copyright 25.10.2011 by Bochkanov Sergey *************************************************************************/ public static void filtersma(ref double[] x, int n, int k) { int i = 0; double runningsum = 0; double termsinsum = 0; int zeroprefix = 0; double v = 0; alglib.ap.assert(n>=0, "FilterSMA: N<0"); alglib.ap.assert(alglib.ap.len(x)>=n, "FilterSMA: Length(X)=1, "FilterSMA: K<1"); // // Quick exit, if necessary // if( n<=1 || k==1 ) { return; } // // Prepare variables (see below for explanation) // runningsum = 0.0; termsinsum = 0; for(i=Math.Max(n-k, 0); i<=n-1; i++) { runningsum = runningsum+x[i]; termsinsum = termsinsum+1; } i = Math.Max(n-k, 0); zeroprefix = 0; while( i<=n-1 && (double)(x[i])==(double)(0) ) { zeroprefix = zeroprefix+1; i = i+1; } // // General case: we assume that N>1 and K>1 // // Make one pass through all elements. At the beginning of // the iteration we have: // * I element being processed // * RunningSum current value of the running sum // (including I-th element) // * TermsInSum number of terms in sum, 0<=TermsInSum<=K // * ZeroPrefix length of the sequence of zero elements // which starts at X[I-K+1] and continues towards X[I]. // Equal to zero in case X[I-K+1] is non-zero. // This value is used to make RunningSum exactly zero // when it follows from the problem properties. // for(i=n-1; i>=0; i--) { // // Store new value of X[i], save old value in V // v = x[i]; x[i] = runningsum/termsinsum; // // Update RunningSum and TermsInSum // if( i-k>=0 ) { runningsum = runningsum-v+x[i-k]; } else { runningsum = runningsum-v; termsinsum = termsinsum-1; } // // Update ZeroPrefix. // In case we have ZeroPrefix=TermsInSum, // RunningSum is reset to zero. // if( i-k>=0 ) { if( (double)(x[i-k])!=(double)(0) ) { zeroprefix = 0; } else { zeroprefix = Math.Min(zeroprefix+1, k); } } else { zeroprefix = Math.Min(zeroprefix, i+1); } if( (double)(zeroprefix)==(double)(termsinsum) ) { runningsum = 0; } } } /************************************************************************* Filters: exponential moving averages. This filter replaces array by results of EMA(alpha) filter. EMA(alpha) is defined as filter which replaces X[] by S[]: S[0] = X[0] S[t] = alpha*X[t] + (1-alpha)*S[t-1] INPUT PARAMETERS: X - array[N], array to process. It can be larger than N, in this case only first N points are processed. N - points count, N>=0 alpha - 0=0, "FilterEMA: N<0"); alglib.ap.assert(alglib.ap.len(x)>=n, "FilterEMA: Length(X)(double)(0), "FilterEMA: Alpha<=0"); alglib.ap.assert((double)(alpha)<=(double)(1), "FilterEMA: Alpha>1"); // // Quick exit, if necessary // if( n<=1 || (double)(alpha)==(double)(1) ) { return; } // // Process // for(i=1; i<=n-1; i++) { x[i] = alpha*x[i]+(1-alpha)*x[i-1]; } } /************************************************************************* Filters: linear regression moving averages. This filter replaces array by results of LRMA(K) filter. LRMA(K) is defined as filter which, for each data point, builds linear regression model using K prevous points (point itself is included in these K points) and calculates value of this linear model at the point in question. INPUT PARAMETERS: X - array[N], array to process. It can be larger than N, in this case only first N points are processed. N - points count, N>=0 K - K>=1 (K can be larger than N , such cases will be correctly handled). Window width. K=1 corresponds to identity transformation (nothing changes). OUTPUT PARAMETERS: X - array, whose first N elements were processed with SMA(K) NOTE 1: this function uses efficient in-place algorithm which does not allocate temporary arrays. NOTE 2: this algorithm makes only one pass through array and uses running sum to speed-up calculation of the averages. Additional measures are taken to ensure that running sum on a long sequence of zero elements will be correctly reset to zero even in the presence of round-off error. NOTE 3: this is unsymmetric version of the algorithm, which does NOT averages points after the current one. Only X[i], X[i-1], ... are used when calculating new value of X[i]. We should also note that this algorithm uses BOTH previous points and current one, i.e. new value of X[i] depends on BOTH previous point and X[i] itself. -- ALGLIB -- Copyright 25.10.2011 by Bochkanov Sergey *************************************************************************/ public static void filterlrma(ref double[] x, int n, int k) { int i = 0; int m = 0; double[,] xy = new double[0,0]; double[] s = new double[0]; int info = 0; double a = 0; double b = 0; double vara = 0; double varb = 0; double covab = 0; double corrab = 0; double p = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert(n>=0, "FilterLRMA: N<0"); alglib.ap.assert(alglib.ap.len(x)>=n, "FilterLRMA: Length(X)=1, "FilterLRMA: K<1"); // // Quick exit, if necessary: // * either N is equal to 1 (nothing to average) // * or K is 1 (only point itself is used) or 2 (model is too simple, // we will always get identity transformation) // if( n<=1 || k<=2 ) { return; } // // General case: K>2, N>1. // We do not process points with I<2 because first two points (I=0 and I=1) will be // left unmodified by LRMA filter in any case. // xy = new double[k, 2]; s = new double[k]; for(i=0; i<=k-1; i++) { xy[i,0] = i; s[i] = 1.0; } for(i=n-1; i>=2; i--) { m = Math.Min(i+1, k); i1_ = (i-m+1) - (0); for(i_=0; i_<=m-1;i_++) { xy[i_,1] = x[i_+i1_]; } linreg.lrlines(xy, s, m, ref info, ref a, ref b, ref vara, ref varb, ref covab, ref corrab, ref p); alglib.ap.assert(info==1, "FilterLRMA: internal error"); x[i] = a+b*(m-1); } } } public class lda { /************************************************************************* Multiclass Fisher LDA Subroutine finds coefficients of linear combination which optimally separates training set on classes. INPUT PARAMETERS: XY - training set, array[0..NPoints-1,0..NVars]. First NVars columns store values of independent variables, next column stores number of class (from 0 to NClasses-1) which dataset element belongs to. Fractional values are rounded to nearest integer. NPoints - training set size, NPoints>=0 NVars - number of independent variables, NVars>=1 NClasses - number of classes, NClasses>=2 OUTPUT PARAMETERS: Info - return code: * -4, if internal EVD subroutine hasn't converged * -2, if there is a point with class number outside of [0..NClasses-1]. * -1, if incorrect parameters was passed (NPoints<0, NVars<1, NClasses<2) * 1, if task has been solved * 2, if there was a multicollinearity in training set, but task has been solved. W - linear combination coefficients, array[0..NVars-1] -- ALGLIB -- Copyright 31.05.2008 by Bochkanov Sergey *************************************************************************/ public static void fisherlda(double[,] xy, int npoints, int nvars, int nclasses, ref int info, ref double[] w) { double[,] w2 = new double[0,0]; int i_ = 0; info = 0; w = new double[0]; fisherldan(xy, npoints, nvars, nclasses, ref info, ref w2); if( info>0 ) { w = new double[nvars-1+1]; for(i_=0; i_<=nvars-1;i_++) { w[i_] = w2[i_,0]; } } } /************************************************************************* N-dimensional multiclass Fisher LDA Subroutine finds coefficients of linear combinations which optimally separates training set on classes. It returns N-dimensional basis whose vector are sorted by quality of training set separation (in descending order). INPUT PARAMETERS: XY - training set, array[0..NPoints-1,0..NVars]. First NVars columns store values of independent variables, next column stores number of class (from 0 to NClasses-1) which dataset element belongs to. Fractional values are rounded to nearest integer. NPoints - training set size, NPoints>=0 NVars - number of independent variables, NVars>=1 NClasses - number of classes, NClasses>=2 OUTPUT PARAMETERS: Info - return code: * -4, if internal EVD subroutine hasn't converged * -2, if there is a point with class number outside of [0..NClasses-1]. * -1, if incorrect parameters was passed (NPoints<0, NVars<1, NClasses<2) * 1, if task has been solved * 2, if there was a multicollinearity in training set, but task has been solved. W - basis, array[0..NVars-1,0..NVars-1] columns of matrix stores basis vectors, sorted by quality of training set separation (in descending order) -- ALGLIB -- Copyright 31.05.2008 by Bochkanov Sergey *************************************************************************/ public static void fisherldan(double[,] xy, int npoints, int nvars, int nclasses, ref int info, ref double[,] w) { int i = 0; int j = 0; int k = 0; int m = 0; double v = 0; int[] c = new int[0]; double[] mu = new double[0]; double[,] muc = new double[0,0]; int[] nc = new int[0]; double[,] sw = new double[0,0]; double[,] st = new double[0,0]; double[,] z = new double[0,0]; double[,] z2 = new double[0,0]; double[,] tm = new double[0,0]; double[,] sbroot = new double[0,0]; double[,] a = new double[0,0]; double[,] xyproj = new double[0,0]; double[,] wproj = new double[0,0]; double[] tf = new double[0]; double[] d = new double[0]; double[] d2 = new double[0]; double[] work = new double[0]; int i_ = 0; info = 0; w = new double[0,0]; // // Test data // if( (npoints<0 || nvars<1) || nclasses<2 ) { info = -1; return; } for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nvars])<0 || (int)Math.Round(xy[i,nvars])>=nclasses ) { info = -2; return; } } info = 1; // // Special case: NPoints<=1 // Degenerate task. // if( npoints<=1 ) { info = 2; w = new double[nvars-1+1, nvars-1+1]; for(i=0; i<=nvars-1; i++) { for(j=0; j<=nvars-1; j++) { if( i==j ) { w[i,j] = 1; } else { w[i,j] = 0; } } } return; } // // Prepare temporaries // tf = new double[nvars-1+1]; work = new double[Math.Max(nvars, npoints)+1]; // // Convert class labels from reals to integers (just for convenience) // c = new int[npoints-1+1]; for(i=0; i<=npoints-1; i++) { c[i] = (int)Math.Round(xy[i,nvars]); } // // Calculate class sizes and means // mu = new double[nvars-1+1]; muc = new double[nclasses-1+1, nvars-1+1]; nc = new int[nclasses-1+1]; for(j=0; j<=nvars-1; j++) { mu[j] = 0; } for(i=0; i<=nclasses-1; i++) { nc[i] = 0; for(j=0; j<=nvars-1; j++) { muc[i,j] = 0; } } for(i=0; i<=npoints-1; i++) { for(i_=0; i_<=nvars-1;i_++) { mu[i_] = mu[i_] + xy[i,i_]; } for(i_=0; i_<=nvars-1;i_++) { muc[c[i],i_] = muc[c[i],i_] + xy[i,i_]; } nc[c[i]] = nc[c[i]]+1; } for(i=0; i<=nclasses-1; i++) { v = (double)1/(double)nc[i]; for(i_=0; i_<=nvars-1;i_++) { muc[i,i_] = v*muc[i,i_]; } } v = (double)1/(double)npoints; for(i_=0; i_<=nvars-1;i_++) { mu[i_] = v*mu[i_]; } // // Create ST matrix // st = new double[nvars-1+1, nvars-1+1]; for(i=0; i<=nvars-1; i++) { for(j=0; j<=nvars-1; j++) { st[i,j] = 0; } } for(k=0; k<=npoints-1; k++) { for(i_=0; i_<=nvars-1;i_++) { tf[i_] = xy[k,i_]; } for(i_=0; i_<=nvars-1;i_++) { tf[i_] = tf[i_] - mu[i_]; } for(i=0; i<=nvars-1; i++) { v = tf[i]; for(i_=0; i_<=nvars-1;i_++) { st[i,i_] = st[i,i_] + v*tf[i_]; } } } // // Create SW matrix // sw = new double[nvars-1+1, nvars-1+1]; for(i=0; i<=nvars-1; i++) { for(j=0; j<=nvars-1; j++) { sw[i,j] = 0; } } for(k=0; k<=npoints-1; k++) { for(i_=0; i_<=nvars-1;i_++) { tf[i_] = xy[k,i_]; } for(i_=0; i_<=nvars-1;i_++) { tf[i_] = tf[i_] - muc[c[k],i_]; } for(i=0; i<=nvars-1; i++) { v = tf[i]; for(i_=0; i_<=nvars-1;i_++) { sw[i,i_] = sw[i,i_] + v*tf[i_]; } } } // // Maximize ratio J=(w'*ST*w)/(w'*SW*w). // // First, make transition from w to v such that w'*ST*w becomes v'*v: // v = root(ST)*w = R*w // R = root(D)*Z' // w = (root(ST)^-1)*v = RI*v // RI = Z*inv(root(D)) // J = (v'*v)/(v'*(RI'*SW*RI)*v) // ST = Z*D*Z' // // so we have // // J = (v'*v) / (v'*(inv(root(D))*Z'*SW*Z*inv(root(D)))*v) = // = (v'*v) / (v'*A*v) // if( !evd.smatrixevd(st, nvars, 1, true, ref d, ref z) ) { info = -4; return; } w = new double[nvars-1+1, nvars-1+1]; if( (double)(d[nvars-1])<=(double)(0) || (double)(d[0])<=(double)(1000*math.machineepsilon*d[nvars-1]) ) { // // Special case: D[NVars-1]<=0 // Degenerate task (all variables takes the same value). // if( (double)(d[nvars-1])<=(double)(0) ) { info = 2; for(i=0; i<=nvars-1; i++) { for(j=0; j<=nvars-1; j++) { if( i==j ) { w[i,j] = 1; } else { w[i,j] = 0; } } } return; } // // Special case: degenerate ST matrix, multicollinearity found. // Since we know ST eigenvalues/vectors we can translate task to // non-degenerate form. // // Let WG is orthogonal basis of the non zero variance subspace // of the ST and let WZ is orthogonal basis of the zero variance // subspace. // // Projection on WG allows us to use LDA on reduced M-dimensional // subspace, N-M vectors of WZ allows us to update reduced LDA // factors to full N-dimensional subspace. // m = 0; for(k=0; k<=nvars-1; k++) { if( (double)(d[k])<=(double)(1000*math.machineepsilon*d[nvars-1]) ) { m = k+1; } } alglib.ap.assert(m!=0, "FisherLDAN: internal error #1"); xyproj = new double[npoints-1+1, nvars-m+1]; blas.matrixmatrixmultiply(xy, 0, npoints-1, 0, nvars-1, false, z, 0, nvars-1, m, nvars-1, false, 1.0, ref xyproj, 0, npoints-1, 0, nvars-m-1, 0.0, ref work); for(i=0; i<=npoints-1; i++) { xyproj[i,nvars-m] = xy[i,nvars]; } fisherldan(xyproj, npoints, nvars-m, nclasses, ref info, ref wproj); if( info<0 ) { return; } blas.matrixmatrixmultiply(z, 0, nvars-1, m, nvars-1, false, wproj, 0, nvars-m-1, 0, nvars-m-1, false, 1.0, ref w, 0, nvars-1, 0, nvars-m-1, 0.0, ref work); for(k=nvars-m; k<=nvars-1; k++) { for(i_=0; i_<=nvars-1;i_++) { w[i_,k] = z[i_,k-(nvars-m)]; } } info = 2; } else { // // General case: no multicollinearity // tm = new double[nvars-1+1, nvars-1+1]; a = new double[nvars-1+1, nvars-1+1]; blas.matrixmatrixmultiply(sw, 0, nvars-1, 0, nvars-1, false, z, 0, nvars-1, 0, nvars-1, false, 1.0, ref tm, 0, nvars-1, 0, nvars-1, 0.0, ref work); blas.matrixmatrixmultiply(z, 0, nvars-1, 0, nvars-1, true, tm, 0, nvars-1, 0, nvars-1, false, 1.0, ref a, 0, nvars-1, 0, nvars-1, 0.0, ref work); for(i=0; i<=nvars-1; i++) { for(j=0; j<=nvars-1; j++) { a[i,j] = a[i,j]/Math.Sqrt(d[i]*d[j]); } } if( !evd.smatrixevd(a, nvars, 1, true, ref d2, ref z2) ) { info = -4; return; } for(k=0; k<=nvars-1; k++) { for(i=0; i<=nvars-1; i++) { tf[i] = z2[i,k]/Math.Sqrt(d[i]); } for(i=0; i<=nvars-1; i++) { v = 0.0; for(i_=0; i_<=nvars-1;i_++) { v += z[i,i_]*tf[i_]; } w[i,k] = v; } } } // // Post-processing: // * normalization // * converting to non-negative form, if possible // for(k=0; k<=nvars-1; k++) { v = 0.0; for(i_=0; i_<=nvars-1;i_++) { v += w[i_,k]*w[i_,k]; } v = 1/Math.Sqrt(v); for(i_=0; i_<=nvars-1;i_++) { w[i_,k] = v*w[i_,k]; } v = 0; for(i=0; i<=nvars-1; i++) { v = v+w[i,k]; } if( (double)(v)<(double)(0) ) { for(i_=0; i_<=nvars-1;i_++) { w[i_,k] = -1*w[i_,k]; } } } } } public class mlpbase { public class multilayerperceptron : apobject { public int hlnetworktype; public int hlnormtype; public int[] hllayersizes; public int[] hlconnections; public int[] hlneurons; public int[] structinfo; public double[] weights; public double[] columnmeans; public double[] columnsigmas; public double[] neurons; public double[] dfdnet; public double[] derror; public double[] x; public double[] y; public double[,] xy; public double[] xyrow; public double[,] chunks; public double[] nwbuf; public int[] integerbuf; public multilayerperceptron() { init(); } public override void init() { hllayersizes = new int[0]; hlconnections = new int[0]; hlneurons = new int[0]; structinfo = new int[0]; weights = new double[0]; columnmeans = new double[0]; columnsigmas = new double[0]; neurons = new double[0]; dfdnet = new double[0]; derror = new double[0]; x = new double[0]; y = new double[0]; xy = new double[0,0]; xyrow = new double[0]; chunks = new double[0,0]; nwbuf = new double[0]; integerbuf = new int[0]; } public override alglib.apobject make_copy() { multilayerperceptron _result = new multilayerperceptron(); _result.hlnetworktype = hlnetworktype; _result.hlnormtype = hlnormtype; _result.hllayersizes = (int[])hllayersizes.Clone(); _result.hlconnections = (int[])hlconnections.Clone(); _result.hlneurons = (int[])hlneurons.Clone(); _result.structinfo = (int[])structinfo.Clone(); _result.weights = (double[])weights.Clone(); _result.columnmeans = (double[])columnmeans.Clone(); _result.columnsigmas = (double[])columnsigmas.Clone(); _result.neurons = (double[])neurons.Clone(); _result.dfdnet = (double[])dfdnet.Clone(); _result.derror = (double[])derror.Clone(); _result.x = (double[])x.Clone(); _result.y = (double[])y.Clone(); _result.xy = (double[,])xy.Clone(); _result.xyrow = (double[])xyrow.Clone(); _result.chunks = (double[,])chunks.Clone(); _result.nwbuf = (double[])nwbuf.Clone(); _result.integerbuf = (int[])integerbuf.Clone(); return _result; } }; /************************************************************************* Model's errors: * RelCLSError - fraction of misclassified cases. * AvgCE - acerage cross-entropy * RMSError - root-mean-square error * AvgError - average error * AvgRelError - average relative error NOTE 1: RelCLSError/AvgCE are zero on regression problems. NOTE 2: on classification problems RMSError/AvgError/AvgRelError contain errors in prediction of posterior probabilities *************************************************************************/ public class modelerrors : apobject { public double relclserror; public double avgce; public double rmserror; public double avgerror; public double avgrelerror; public modelerrors() { init(); } public override void init() { } public override alglib.apobject make_copy() { modelerrors _result = new modelerrors(); _result.relclserror = relclserror; _result.avgce = avgce; _result.rmserror = rmserror; _result.avgerror = avgerror; _result.avgrelerror = avgrelerror; return _result; } }; public const int mlpvnum = 7; public const int mlpfirstversion = 0; public const int nfieldwidth = 4; public const int hlconnfieldwidth = 5; public const int hlnfieldwidth = 4; public const int chunksize = 32; /************************************************************************* Creates neural network with NIn inputs, NOut outputs, without hidden layers, with linear output layer. Network weights are filled with small random values. -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpcreate0(int nin, int nout, multilayerperceptron network) { int[] lsizes = new int[0]; int[] ltypes = new int[0]; int[] lconnfirst = new int[0]; int[] lconnlast = new int[0]; int layerscount = 0; int lastproc = 0; layerscount = 1+3; // // Allocate arrays // lsizes = new int[layerscount-1+1]; ltypes = new int[layerscount-1+1]; lconnfirst = new int[layerscount-1+1]; lconnlast = new int[layerscount-1+1]; // // Layers // addinputlayer(nin, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nout, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(-5, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); // // Create // mlpcreate(nin, nout, lsizes, ltypes, lconnfirst, lconnlast, layerscount, false, network); fillhighlevelinformation(network, nin, 0, 0, nout, false, true); } /************************************************************************* Same as MLPCreate0, but with one hidden layer (NHid neurons) with non-linear activation function. Output layer is linear. -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpcreate1(int nin, int nhid, int nout, multilayerperceptron network) { int[] lsizes = new int[0]; int[] ltypes = new int[0]; int[] lconnfirst = new int[0]; int[] lconnlast = new int[0]; int layerscount = 0; int lastproc = 0; layerscount = 1+3+3; // // Allocate arrays // lsizes = new int[layerscount-1+1]; ltypes = new int[layerscount-1+1]; lconnfirst = new int[layerscount-1+1]; lconnlast = new int[layerscount-1+1]; // // Layers // addinputlayer(nin, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nhid, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nout, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(-5, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); // // Create // mlpcreate(nin, nout, lsizes, ltypes, lconnfirst, lconnlast, layerscount, false, network); fillhighlevelinformation(network, nin, nhid, 0, nout, false, true); } /************************************************************************* Same as MLPCreate0, but with two hidden layers (NHid1 and NHid2 neurons) with non-linear activation function. Output layer is linear. $ALL -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpcreate2(int nin, int nhid1, int nhid2, int nout, multilayerperceptron network) { int[] lsizes = new int[0]; int[] ltypes = new int[0]; int[] lconnfirst = new int[0]; int[] lconnlast = new int[0]; int layerscount = 0; int lastproc = 0; layerscount = 1+3+3+3; // // Allocate arrays // lsizes = new int[layerscount-1+1]; ltypes = new int[layerscount-1+1]; lconnfirst = new int[layerscount-1+1]; lconnlast = new int[layerscount-1+1]; // // Layers // addinputlayer(nin, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nhid1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nhid2, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nout, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(-5, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); // // Create // mlpcreate(nin, nout, lsizes, ltypes, lconnfirst, lconnlast, layerscount, false, network); fillhighlevelinformation(network, nin, nhid1, nhid2, nout, false, true); } /************************************************************************* Creates neural network with NIn inputs, NOut outputs, without hidden layers with non-linear output layer. Network weights are filled with small random values. Activation function of the output layer takes values: (B, +INF), if D>=0 or (-INF, B), if D<0. -- ALGLIB -- Copyright 30.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlpcreateb0(int nin, int nout, double b, double d, multilayerperceptron network) { int[] lsizes = new int[0]; int[] ltypes = new int[0]; int[] lconnfirst = new int[0]; int[] lconnlast = new int[0]; int layerscount = 0; int lastproc = 0; int i = 0; layerscount = 1+3; if( (double)(d)>=(double)(0) ) { d = 1; } else { d = -1; } // // Allocate arrays // lsizes = new int[layerscount-1+1]; ltypes = new int[layerscount-1+1]; lconnfirst = new int[layerscount-1+1]; lconnlast = new int[layerscount-1+1]; // // Layers // addinputlayer(nin, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nout, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(3, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); // // Create // mlpcreate(nin, nout, lsizes, ltypes, lconnfirst, lconnlast, layerscount, false, network); fillhighlevelinformation(network, nin, 0, 0, nout, false, false); // // Turn on ouputs shift/scaling. // for(i=nin; i<=nin+nout-1; i++) { network.columnmeans[i] = b; network.columnsigmas[i] = d; } } /************************************************************************* Same as MLPCreateB0 but with non-linear hidden layer. -- ALGLIB -- Copyright 30.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlpcreateb1(int nin, int nhid, int nout, double b, double d, multilayerperceptron network) { int[] lsizes = new int[0]; int[] ltypes = new int[0]; int[] lconnfirst = new int[0]; int[] lconnlast = new int[0]; int layerscount = 0; int lastproc = 0; int i = 0; layerscount = 1+3+3; if( (double)(d)>=(double)(0) ) { d = 1; } else { d = -1; } // // Allocate arrays // lsizes = new int[layerscount-1+1]; ltypes = new int[layerscount-1+1]; lconnfirst = new int[layerscount-1+1]; lconnlast = new int[layerscount-1+1]; // // Layers // addinputlayer(nin, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nhid, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nout, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(3, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); // // Create // mlpcreate(nin, nout, lsizes, ltypes, lconnfirst, lconnlast, layerscount, false, network); fillhighlevelinformation(network, nin, nhid, 0, nout, false, false); // // Turn on ouputs shift/scaling. // for(i=nin; i<=nin+nout-1; i++) { network.columnmeans[i] = b; network.columnsigmas[i] = d; } } /************************************************************************* Same as MLPCreateB0 but with two non-linear hidden layers. -- ALGLIB -- Copyright 30.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlpcreateb2(int nin, int nhid1, int nhid2, int nout, double b, double d, multilayerperceptron network) { int[] lsizes = new int[0]; int[] ltypes = new int[0]; int[] lconnfirst = new int[0]; int[] lconnlast = new int[0]; int layerscount = 0; int lastproc = 0; int i = 0; layerscount = 1+3+3+3; if( (double)(d)>=(double)(0) ) { d = 1; } else { d = -1; } // // Allocate arrays // lsizes = new int[layerscount-1+1]; ltypes = new int[layerscount-1+1]; lconnfirst = new int[layerscount-1+1]; lconnlast = new int[layerscount-1+1]; // // Layers // addinputlayer(nin, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nhid1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nhid2, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nout, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(3, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); // // Create // mlpcreate(nin, nout, lsizes, ltypes, lconnfirst, lconnlast, layerscount, false, network); fillhighlevelinformation(network, nin, nhid1, nhid2, nout, false, false); // // Turn on ouputs shift/scaling. // for(i=nin; i<=nin+nout-1; i++) { network.columnmeans[i] = b; network.columnsigmas[i] = d; } } /************************************************************************* Creates neural network with NIn inputs, NOut outputs, without hidden layers with non-linear output layer. Network weights are filled with small random values. Activation function of the output layer takes values [A,B]. -- ALGLIB -- Copyright 30.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlpcreater0(int nin, int nout, double a, double b, multilayerperceptron network) { int[] lsizes = new int[0]; int[] ltypes = new int[0]; int[] lconnfirst = new int[0]; int[] lconnlast = new int[0]; int layerscount = 0; int lastproc = 0; int i = 0; layerscount = 1+3; // // Allocate arrays // lsizes = new int[layerscount-1+1]; ltypes = new int[layerscount-1+1]; lconnfirst = new int[layerscount-1+1]; lconnlast = new int[layerscount-1+1]; // // Layers // addinputlayer(nin, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nout, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); // // Create // mlpcreate(nin, nout, lsizes, ltypes, lconnfirst, lconnlast, layerscount, false, network); fillhighlevelinformation(network, nin, 0, 0, nout, false, false); // // Turn on outputs shift/scaling. // for(i=nin; i<=nin+nout-1; i++) { network.columnmeans[i] = 0.5*(a+b); network.columnsigmas[i] = 0.5*(a-b); } } /************************************************************************* Same as MLPCreateR0, but with non-linear hidden layer. -- ALGLIB -- Copyright 30.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlpcreater1(int nin, int nhid, int nout, double a, double b, multilayerperceptron network) { int[] lsizes = new int[0]; int[] ltypes = new int[0]; int[] lconnfirst = new int[0]; int[] lconnlast = new int[0]; int layerscount = 0; int lastproc = 0; int i = 0; layerscount = 1+3+3; // // Allocate arrays // lsizes = new int[layerscount-1+1]; ltypes = new int[layerscount-1+1]; lconnfirst = new int[layerscount-1+1]; lconnlast = new int[layerscount-1+1]; // // Layers // addinputlayer(nin, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nhid, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nout, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); // // Create // mlpcreate(nin, nout, lsizes, ltypes, lconnfirst, lconnlast, layerscount, false, network); fillhighlevelinformation(network, nin, nhid, 0, nout, false, false); // // Turn on outputs shift/scaling. // for(i=nin; i<=nin+nout-1; i++) { network.columnmeans[i] = 0.5*(a+b); network.columnsigmas[i] = 0.5*(a-b); } } /************************************************************************* Same as MLPCreateR0, but with two non-linear hidden layers. -- ALGLIB -- Copyright 30.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlpcreater2(int nin, int nhid1, int nhid2, int nout, double a, double b, multilayerperceptron network) { int[] lsizes = new int[0]; int[] ltypes = new int[0]; int[] lconnfirst = new int[0]; int[] lconnlast = new int[0]; int layerscount = 0; int lastproc = 0; int i = 0; layerscount = 1+3+3+3; // // Allocate arrays // lsizes = new int[layerscount-1+1]; ltypes = new int[layerscount-1+1]; lconnfirst = new int[layerscount-1+1]; lconnlast = new int[layerscount-1+1]; // // Layers // addinputlayer(nin, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nhid1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nhid2, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nout, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); // // Create // mlpcreate(nin, nout, lsizes, ltypes, lconnfirst, lconnlast, layerscount, false, network); fillhighlevelinformation(network, nin, nhid1, nhid2, nout, false, false); // // Turn on outputs shift/scaling. // for(i=nin; i<=nin+nout-1; i++) { network.columnmeans[i] = 0.5*(a+b); network.columnsigmas[i] = 0.5*(a-b); } } /************************************************************************* Creates classifier network with NIn inputs and NOut possible classes. Network contains no hidden layers and linear output layer with SOFTMAX- normalization (so outputs sums up to 1.0 and converge to posterior probabilities). -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpcreatec0(int nin, int nout, multilayerperceptron network) { int[] lsizes = new int[0]; int[] ltypes = new int[0]; int[] lconnfirst = new int[0]; int[] lconnlast = new int[0]; int layerscount = 0; int lastproc = 0; alglib.ap.assert(nout>=2, "MLPCreateC0: NOut<2!"); layerscount = 1+2+1; // // Allocate arrays // lsizes = new int[layerscount-1+1]; ltypes = new int[layerscount-1+1]; lconnfirst = new int[layerscount-1+1]; lconnlast = new int[layerscount-1+1]; // // Layers // addinputlayer(nin, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nout-1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addzerolayer(ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); // // Create // mlpcreate(nin, nout, lsizes, ltypes, lconnfirst, lconnlast, layerscount, true, network); fillhighlevelinformation(network, nin, 0, 0, nout, true, true); } /************************************************************************* Same as MLPCreateC0, but with one non-linear hidden layer. -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpcreatec1(int nin, int nhid, int nout, multilayerperceptron network) { int[] lsizes = new int[0]; int[] ltypes = new int[0]; int[] lconnfirst = new int[0]; int[] lconnlast = new int[0]; int layerscount = 0; int lastproc = 0; alglib.ap.assert(nout>=2, "MLPCreateC1: NOut<2!"); layerscount = 1+3+2+1; // // Allocate arrays // lsizes = new int[layerscount-1+1]; ltypes = new int[layerscount-1+1]; lconnfirst = new int[layerscount-1+1]; lconnlast = new int[layerscount-1+1]; // // Layers // addinputlayer(nin, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nhid, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nout-1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addzerolayer(ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); // // Create // mlpcreate(nin, nout, lsizes, ltypes, lconnfirst, lconnlast, layerscount, true, network); fillhighlevelinformation(network, nin, nhid, 0, nout, true, true); } /************************************************************************* Same as MLPCreateC0, but with two non-linear hidden layers. -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpcreatec2(int nin, int nhid1, int nhid2, int nout, multilayerperceptron network) { int[] lsizes = new int[0]; int[] ltypes = new int[0]; int[] lconnfirst = new int[0]; int[] lconnlast = new int[0]; int layerscount = 0; int lastproc = 0; alglib.ap.assert(nout>=2, "MLPCreateC2: NOut<2!"); layerscount = 1+3+3+2+1; // // Allocate arrays // lsizes = new int[layerscount-1+1]; ltypes = new int[layerscount-1+1]; lconnfirst = new int[layerscount-1+1]; lconnlast = new int[layerscount-1+1]; // // Layers // addinputlayer(nin, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nhid1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nhid2, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addactivationlayer(1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addbiasedsummatorlayer(nout-1, ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); addzerolayer(ref lsizes, ref ltypes, ref lconnfirst, ref lconnlast, ref lastproc); // // Create // mlpcreate(nin, nout, lsizes, ltypes, lconnfirst, lconnlast, layerscount, true, network); fillhighlevelinformation(network, nin, nhid1, nhid2, nout, true, true); } /************************************************************************* Copying of neural network INPUT PARAMETERS: Network1 - original OUTPUT PARAMETERS: Network2 - copy -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpcopy(multilayerperceptron network1, multilayerperceptron network2) { network2.hlnetworktype = network1.hlnetworktype; network2.hlnormtype = network1.hlnormtype; apserv.copyintegerarray(network1.hllayersizes, ref network2.hllayersizes); apserv.copyintegerarray(network1.hlconnections, ref network2.hlconnections); apserv.copyintegerarray(network1.hlneurons, ref network2.hlneurons); apserv.copyintegerarray(network1.structinfo, ref network2.structinfo); apserv.copyrealarray(network1.weights, ref network2.weights); apserv.copyrealarray(network1.columnmeans, ref network2.columnmeans); apserv.copyrealarray(network1.columnsigmas, ref network2.columnsigmas); apserv.copyrealarray(network1.neurons, ref network2.neurons); apserv.copyrealarray(network1.dfdnet, ref network2.dfdnet); apserv.copyrealarray(network1.derror, ref network2.derror); apserv.copyrealarray(network1.x, ref network2.x); apserv.copyrealarray(network1.y, ref network2.y); apserv.copyrealmatrix(network1.chunks, ref network2.chunks); apserv.copyrealarray(network1.nwbuf, ref network2.nwbuf); apserv.copyintegerarray(network1.integerbuf, ref network2.integerbuf); } /************************************************************************* Serialization of MultiLayerPerceptron strucure INPUT PARAMETERS: Network - original OUTPUT PARAMETERS: RA - array of real numbers which stores network, array[0..RLen-1] RLen - RA lenght -- ALGLIB -- Copyright 29.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlpserializeold(multilayerperceptron network, ref double[] ra, ref int rlen) { int i = 0; int ssize = 0; int ntotal = 0; int nin = 0; int nout = 0; int wcount = 0; int sigmalen = 0; int offs = 0; int i_ = 0; int i1_ = 0; ra = new double[0]; rlen = 0; // // Unload info // ssize = network.structinfo[0]; nin = network.structinfo[1]; nout = network.structinfo[2]; ntotal = network.structinfo[3]; wcount = network.structinfo[4]; if( mlpissoftmax(network) ) { sigmalen = nin; } else { sigmalen = nin+nout; } // // RA format: // LEN DESRC. // 1 RLen // 1 version (MLPVNum) // 1 StructInfo size // SSize StructInfo // WCount Weights // SigmaLen ColumnMeans // SigmaLen ColumnSigmas // rlen = 3+ssize+wcount+2*sigmalen; ra = new double[rlen-1+1]; ra[0] = rlen; ra[1] = mlpvnum; ra[2] = ssize; offs = 3; for(i=0; i<=ssize-1; i++) { ra[offs+i] = network.structinfo[i]; } offs = offs+ssize; i1_ = (0) - (offs); for(i_=offs; i_<=offs+wcount-1;i_++) { ra[i_] = network.weights[i_+i1_]; } offs = offs+wcount; i1_ = (0) - (offs); for(i_=offs; i_<=offs+sigmalen-1;i_++) { ra[i_] = network.columnmeans[i_+i1_]; } offs = offs+sigmalen; i1_ = (0) - (offs); for(i_=offs; i_<=offs+sigmalen-1;i_++) { ra[i_] = network.columnsigmas[i_+i1_]; } offs = offs+sigmalen; } /************************************************************************* Unserialization of MultiLayerPerceptron strucure INPUT PARAMETERS: RA - real array which stores network OUTPUT PARAMETERS: Network - restored network -- ALGLIB -- Copyright 29.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlpunserializeold(double[] ra, multilayerperceptron network) { int i = 0; int ssize = 0; int ntotal = 0; int nin = 0; int nout = 0; int wcount = 0; int sigmalen = 0; int offs = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert((int)Math.Round(ra[1])==mlpvnum, "MLPUnserialize: incorrect array!"); // // Unload StructInfo from IA // offs = 3; ssize = (int)Math.Round(ra[2]); network.structinfo = new int[ssize-1+1]; for(i=0; i<=ssize-1; i++) { network.structinfo[i] = (int)Math.Round(ra[offs+i]); } offs = offs+ssize; // // Unload info from StructInfo // ssize = network.structinfo[0]; nin = network.structinfo[1]; nout = network.structinfo[2]; ntotal = network.structinfo[3]; wcount = network.structinfo[4]; if( network.structinfo[6]==0 ) { sigmalen = nin+nout; } else { sigmalen = nin; } // // Allocate space for other fields // network.weights = new double[wcount-1+1]; network.columnmeans = new double[sigmalen-1+1]; network.columnsigmas = new double[sigmalen-1+1]; network.neurons = new double[ntotal-1+1]; network.chunks = new double[3*ntotal+1, chunksize-1+1]; network.nwbuf = new double[Math.Max(wcount, 2*nout)-1+1]; network.dfdnet = new double[ntotal-1+1]; network.x = new double[nin-1+1]; network.y = new double[nout-1+1]; network.derror = new double[ntotal-1+1]; // // Copy parameters from RA // i1_ = (offs) - (0); for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = ra[i_+i1_]; } offs = offs+wcount; i1_ = (offs) - (0); for(i_=0; i_<=sigmalen-1;i_++) { network.columnmeans[i_] = ra[i_+i1_]; } offs = offs+sigmalen; i1_ = (offs) - (0); for(i_=0; i_<=sigmalen-1;i_++) { network.columnsigmas[i_] = ra[i_+i1_]; } offs = offs+sigmalen; } /************************************************************************* Randomization of neural network weights -- ALGLIB -- Copyright 06.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlprandomize(multilayerperceptron network) { int i = 0; int nin = 0; int nout = 0; int wcount = 0; mlpproperties(network, ref nin, ref nout, ref wcount); for(i=0; i<=wcount-1; i++) { network.weights[i] = math.randomreal()-0.5; } } /************************************************************************* Randomization of neural network weights and standartisator -- ALGLIB -- Copyright 10.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlprandomizefull(multilayerperceptron network) { int i = 0; int nin = 0; int nout = 0; int wcount = 0; int ntotal = 0; int istart = 0; int offs = 0; int ntype = 0; mlpproperties(network, ref nin, ref nout, ref wcount); ntotal = network.structinfo[3]; istart = network.structinfo[5]; // // Process network // for(i=0; i<=wcount-1; i++) { network.weights[i] = math.randomreal()-0.5; } for(i=0; i<=nin-1; i++) { network.columnmeans[i] = 2*math.randomreal()-1; network.columnsigmas[i] = 1.5*math.randomreal()+0.5; } if( !mlpissoftmax(network) ) { for(i=0; i<=nout-1; i++) { offs = istart+(ntotal-nout+i)*nfieldwidth; ntype = network.structinfo[offs+0]; if( ntype==0 ) { // // Shifts are changed only for linear outputs neurons // network.columnmeans[nin+i] = 2*math.randomreal()-1; } if( ntype==0 || ntype==3 ) { // // Scales are changed only for linear or bounded outputs neurons. // Note that scale randomization preserves sign. // network.columnsigmas[nin+i] = Math.Sign(network.columnsigmas[nin+i])*(1.5*math.randomreal()+0.5); } } } } /************************************************************************* Internal subroutine. -- ALGLIB -- Copyright 30.03.2008 by Bochkanov Sergey *************************************************************************/ public static void mlpinitpreprocessor(multilayerperceptron network, double[,] xy, int ssize) { int i = 0; int j = 0; int jmax = 0; int nin = 0; int nout = 0; int wcount = 0; int ntotal = 0; int istart = 0; int offs = 0; int ntype = 0; double[] means = new double[0]; double[] sigmas = new double[0]; double s = 0; mlpproperties(network, ref nin, ref nout, ref wcount); ntotal = network.structinfo[3]; istart = network.structinfo[5]; // // Means/Sigmas // if( mlpissoftmax(network) ) { jmax = nin-1; } else { jmax = nin+nout-1; } means = new double[jmax+1]; sigmas = new double[jmax+1]; for(i=0; i<=jmax; i++) { means[i] = 0; sigmas[i] = 0; } for(i=0; i<=ssize-1; i++) { for(j=0; j<=jmax; j++) { means[j] = means[j]+xy[i,j]; } } for(i=0; i<=jmax; i++) { means[i] = means[i]/ssize; } for(i=0; i<=ssize-1; i++) { for(j=0; j<=jmax; j++) { sigmas[j] = sigmas[j]+math.sqr(xy[i,j]-means[j]); } } for(i=0; i<=jmax; i++) { sigmas[i] = Math.Sqrt(sigmas[i]/ssize); } // // Inputs // for(i=0; i<=nin-1; i++) { network.columnmeans[i] = means[i]; network.columnsigmas[i] = sigmas[i]; if( (double)(network.columnsigmas[i])==(double)(0) ) { network.columnsigmas[i] = 1; } } // // Outputs // if( !mlpissoftmax(network) ) { for(i=0; i<=nout-1; i++) { offs = istart+(ntotal-nout+i)*nfieldwidth; ntype = network.structinfo[offs+0]; // // Linear outputs // if( ntype==0 ) { network.columnmeans[nin+i] = means[nin+i]; network.columnsigmas[nin+i] = sigmas[nin+i]; if( (double)(network.columnsigmas[nin+i])==(double)(0) ) { network.columnsigmas[nin+i] = 1; } } // // Bounded outputs (half-interval) // if( ntype==3 ) { s = means[nin+i]-network.columnmeans[nin+i]; if( (double)(s)==(double)(0) ) { s = Math.Sign(network.columnsigmas[nin+i]); } if( (double)(s)==(double)(0) ) { s = 1.0; } network.columnsigmas[nin+i] = Math.Sign(network.columnsigmas[nin+i])*Math.Abs(s); if( (double)(network.columnsigmas[nin+i])==(double)(0) ) { network.columnsigmas[nin+i] = 1; } } } } } /************************************************************************* Internal subroutine. Initialization for preprocessor based on a sample. INPUT Network - initialized neural network; XY - sample, given by sparse matrix; SSize - sample size. OUTPUT Network - neural network with initialised preprocessor. -- ALGLIB -- Copyright 26.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpinitpreprocessorsparse(multilayerperceptron network, sparse.sparsematrix xy, int ssize) { int jmax = 0; int nin = 0; int nout = 0; int wcount = 0; int ntotal = 0; int istart = 0; int offs = 0; int ntype = 0; double[] means = new double[0]; double[] sigmas = new double[0]; double s = 0; int i = 0; int j = 0; mlpproperties(network, ref nin, ref nout, ref wcount); ntotal = network.structinfo[3]; istart = network.structinfo[5]; // // Means/Sigmas // if( mlpissoftmax(network) ) { jmax = nin-1; } else { jmax = nin+nout-1; } means = new double[jmax+1]; sigmas = new double[jmax+1]; for(i=0; i<=jmax; i++) { means[i] = 0; sigmas[i] = 0; } for(i=0; i<=ssize-1; i++) { sparse.sparsegetrow(xy, i, ref network.xyrow); for(j=0; j<=jmax; j++) { means[j] = means[j]+network.xyrow[j]; } } for(i=0; i<=jmax; i++) { means[i] = means[i]/ssize; } for(i=0; i<=ssize-1; i++) { sparse.sparsegetrow(xy, i, ref network.xyrow); for(j=0; j<=jmax; j++) { sigmas[j] = sigmas[j]+math.sqr(network.xyrow[j]-means[j]); } } for(i=0; i<=jmax; i++) { sigmas[i] = Math.Sqrt(sigmas[i]/ssize); } // // Inputs // for(i=0; i<=nin-1; i++) { network.columnmeans[i] = means[i]; network.columnsigmas[i] = sigmas[i]; if( (double)(network.columnsigmas[i])==(double)(0) ) { network.columnsigmas[i] = 1; } } // // Outputs // if( !mlpissoftmax(network) ) { for(i=0; i<=nout-1; i++) { offs = istart+(ntotal-nout+i)*nfieldwidth; ntype = network.structinfo[offs+0]; // // Linear outputs // if( ntype==0 ) { network.columnmeans[nin+i] = means[nin+i]; network.columnsigmas[nin+i] = sigmas[nin+i]; if( (double)(network.columnsigmas[nin+i])==(double)(0) ) { network.columnsigmas[nin+i] = 1; } } // // Bounded outputs (half-interval) // if( ntype==3 ) { s = means[nin+i]-network.columnmeans[nin+i]; if( (double)(s)==(double)(0) ) { s = Math.Sign(network.columnsigmas[nin+i]); } if( (double)(s)==(double)(0) ) { s = 1.0; } network.columnsigmas[nin+i] = Math.Sign(network.columnsigmas[nin+i])*Math.Abs(s); if( (double)(network.columnsigmas[nin+i])==(double)(0) ) { network.columnsigmas[nin+i] = 1; } } } } } /************************************************************************* Internal subroutine. Initialization for preprocessor based on a subsample. INPUT PARAMETERS: Network - network initialized with one of the network creation funcs XY - original dataset; one sample = one row; first NIn columns contain inputs, next NOut columns - desired outputs. SetSize - real size of XY, SetSize>=0; Idx - subset of SubsetSize elements, array[SubsetSize]: * Idx[I] stores row index in the original dataset which is given by XY. Gradient is calculated with respect to rows whose indexes are stored in Idx[]. * Idx[] must store correct indexes; this function throws an exception in case incorrect index (less than 0 or larger than rows(XY)) is given * Idx[] may store indexes in any order and even with repetitions. SubsetSize- number of elements in Idx[] array. OUTPUT: Network - neural network with initialised preprocessor. NOTE: when SubsetSize<0 is used full dataset by call MLPInitPreprocessor function. -- ALGLIB -- Copyright 23.08.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpinitpreprocessorsubset(multilayerperceptron network, double[,] xy, int setsize, int[] idx, int subsetsize) { int jmax = 0; int nin = 0; int nout = 0; int wcount = 0; int ntotal = 0; int istart = 0; int offs = 0; int ntype = 0; double[] means = new double[0]; double[] sigmas = new double[0]; double s = 0; int npoints = 0; int i = 0; int j = 0; alglib.ap.assert(setsize>=0, "MLPInitPreprocessorSubset: SetSize<0"); if( subsetsize<0 ) { mlpinitpreprocessor(network, xy, setsize); return; } alglib.ap.assert(subsetsize<=alglib.ap.len(idx), "MLPInitPreprocessorSubset: SubsetSize>Length(Idx)"); npoints = setsize; for(i=0; i<=subsetsize-1; i++) { alglib.ap.assert(idx[i]>=0, "MLPInitPreprocessorSubset: incorrect index of XY row(Idx[I]<0)"); alglib.ap.assert(idx[i]<=npoints-1, "MLPInitPreprocessorSubset: incorrect index of XY row(Idx[I]>Rows(XY)-1)"); } mlpproperties(network, ref nin, ref nout, ref wcount); ntotal = network.structinfo[3]; istart = network.structinfo[5]; // // Means/Sigmas // if( mlpissoftmax(network) ) { jmax = nin-1; } else { jmax = nin+nout-1; } means = new double[jmax+1]; sigmas = new double[jmax+1]; for(i=0; i<=jmax; i++) { means[i] = 0; sigmas[i] = 0; } for(i=0; i<=subsetsize-1; i++) { for(j=0; j<=jmax; j++) { means[j] = means[j]+xy[idx[i],j]; } } for(i=0; i<=jmax; i++) { means[i] = means[i]/subsetsize; } for(i=0; i<=subsetsize-1; i++) { for(j=0; j<=jmax; j++) { sigmas[j] = sigmas[j]+math.sqr(xy[idx[i],j]-means[j]); } } for(i=0; i<=jmax; i++) { sigmas[i] = Math.Sqrt(sigmas[i]/subsetsize); } // // Inputs // for(i=0; i<=nin-1; i++) { network.columnmeans[i] = means[i]; network.columnsigmas[i] = sigmas[i]; if( (double)(network.columnsigmas[i])==(double)(0) ) { network.columnsigmas[i] = 1; } } // // Outputs // if( !mlpissoftmax(network) ) { for(i=0; i<=nout-1; i++) { offs = istart+(ntotal-nout+i)*nfieldwidth; ntype = network.structinfo[offs+0]; // // Linear outputs // if( ntype==0 ) { network.columnmeans[nin+i] = means[nin+i]; network.columnsigmas[nin+i] = sigmas[nin+i]; if( (double)(network.columnsigmas[nin+i])==(double)(0) ) { network.columnsigmas[nin+i] = 1; } } // // Bounded outputs (half-interval) // if( ntype==3 ) { s = means[nin+i]-network.columnmeans[nin+i]; if( (double)(s)==(double)(0) ) { s = Math.Sign(network.columnsigmas[nin+i]); } if( (double)(s)==(double)(0) ) { s = 1.0; } network.columnsigmas[nin+i] = Math.Sign(network.columnsigmas[nin+i])*Math.Abs(s); if( (double)(network.columnsigmas[nin+i])==(double)(0) ) { network.columnsigmas[nin+i] = 1; } } } } } /************************************************************************* Internal subroutine. Initialization for preprocessor based on a subsample. INPUT PARAMETERS: Network - network initialized with one of the network creation funcs XY - original dataset, given by sparse matrix; one sample = one row; first NIn columns contain inputs, next NOut columns - desired outputs. SetSize - real size of XY, SetSize>=0; Idx - subset of SubsetSize elements, array[SubsetSize]: * Idx[I] stores row index in the original dataset which is given by XY. Gradient is calculated with respect to rows whose indexes are stored in Idx[]. * Idx[] must store correct indexes; this function throws an exception in case incorrect index (less than 0 or larger than rows(XY)) is given * Idx[] may store indexes in any order and even with repetitions. SubsetSize- number of elements in Idx[] array. OUTPUT: Network - neural network with initialised preprocessor. NOTE: when SubsetSize<0 is used full dataset by call MLPInitPreprocessorSparse function. -- ALGLIB -- Copyright 26.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpinitpreprocessorsparsesubset(multilayerperceptron network, sparse.sparsematrix xy, int setsize, int[] idx, int subsetsize) { int jmax = 0; int nin = 0; int nout = 0; int wcount = 0; int ntotal = 0; int istart = 0; int offs = 0; int ntype = 0; double[] means = new double[0]; double[] sigmas = new double[0]; double s = 0; int npoints = 0; int i = 0; int j = 0; alglib.ap.assert(setsize>=0, "MLPInitPreprocessorSparseSubset: SetSize<0"); if( subsetsize<0 ) { mlpinitpreprocessorsparse(network, xy, setsize); return; } alglib.ap.assert(subsetsize<=alglib.ap.len(idx), "MLPInitPreprocessorSparseSubset: SubsetSize>Length(Idx)"); npoints = setsize; for(i=0; i<=subsetsize-1; i++) { alglib.ap.assert(idx[i]>=0, "MLPInitPreprocessorSparseSubset: incorrect index of XY row(Idx[I]<0)"); alglib.ap.assert(idx[i]<=npoints-1, "MLPInitPreprocessorSparseSubset: incorrect index of XY row(Idx[I]>Rows(XY)-1)"); } mlpproperties(network, ref nin, ref nout, ref wcount); ntotal = network.structinfo[3]; istart = network.structinfo[5]; // // Means/Sigmas // if( mlpissoftmax(network) ) { jmax = nin-1; } else { jmax = nin+nout-1; } means = new double[jmax+1]; sigmas = new double[jmax+1]; for(i=0; i<=jmax; i++) { means[i] = 0; sigmas[i] = 0; } for(i=0; i<=subsetsize-1; i++) { sparse.sparsegetrow(xy, idx[i], ref network.xyrow); for(j=0; j<=jmax; j++) { means[j] = means[j]+network.xyrow[j]; } } for(i=0; i<=jmax; i++) { means[i] = means[i]/subsetsize; } for(i=0; i<=subsetsize-1; i++) { sparse.sparsegetrow(xy, idx[i], ref network.xyrow); for(j=0; j<=jmax; j++) { sigmas[j] = sigmas[j]+math.sqr(network.xyrow[j]-means[j]); } } for(i=0; i<=jmax; i++) { sigmas[i] = Math.Sqrt(sigmas[i]/subsetsize); } // // Inputs // for(i=0; i<=nin-1; i++) { network.columnmeans[i] = means[i]; network.columnsigmas[i] = sigmas[i]; if( (double)(network.columnsigmas[i])==(double)(0) ) { network.columnsigmas[i] = 1; } } // // Outputs // if( !mlpissoftmax(network) ) { for(i=0; i<=nout-1; i++) { offs = istart+(ntotal-nout+i)*nfieldwidth; ntype = network.structinfo[offs+0]; // // Linear outputs // if( ntype==0 ) { network.columnmeans[nin+i] = means[nin+i]; network.columnsigmas[nin+i] = sigmas[nin+i]; if( (double)(network.columnsigmas[nin+i])==(double)(0) ) { network.columnsigmas[nin+i] = 1; } } // // Bounded outputs (half-interval) // if( ntype==3 ) { s = means[nin+i]-network.columnmeans[nin+i]; if( (double)(s)==(double)(0) ) { s = Math.Sign(network.columnsigmas[nin+i]); } if( (double)(s)==(double)(0) ) { s = 1.0; } network.columnsigmas[nin+i] = Math.Sign(network.columnsigmas[nin+i])*Math.Abs(s); if( (double)(network.columnsigmas[nin+i])==(double)(0) ) { network.columnsigmas[nin+i] = 1; } } } } } /************************************************************************* Returns information about initialized network: number of inputs, outputs, weights. -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpproperties(multilayerperceptron network, ref int nin, ref int nout, ref int wcount) { nin = 0; nout = 0; wcount = 0; nin = network.structinfo[1]; nout = network.structinfo[2]; wcount = network.structinfo[4]; } /************************************************************************* Returns number of inputs. -- ALGLIB -- Copyright 19.10.2011 by Bochkanov Sergey *************************************************************************/ public static int mlpgetinputscount(multilayerperceptron network) { int result = 0; result = network.structinfo[1]; return result; } /************************************************************************* Returns number of outputs. -- ALGLIB -- Copyright 19.10.2011 by Bochkanov Sergey *************************************************************************/ public static int mlpgetoutputscount(multilayerperceptron network) { int result = 0; result = network.structinfo[2]; return result; } /************************************************************************* Returns number of weights. -- ALGLIB -- Copyright 19.10.2011 by Bochkanov Sergey *************************************************************************/ public static int mlpgetweightscount(multilayerperceptron network) { int result = 0; result = network.structinfo[4]; return result; } /************************************************************************* Tells whether network is SOFTMAX-normalized (i.e. classifier) or not. -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static bool mlpissoftmax(multilayerperceptron network) { bool result = new bool(); result = network.structinfo[6]==1; return result; } /************************************************************************* This function returns total number of layers (including input, hidden and output layers). -- ALGLIB -- Copyright 25.03.2011 by Bochkanov Sergey *************************************************************************/ public static int mlpgetlayerscount(multilayerperceptron network) { int result = 0; result = alglib.ap.len(network.hllayersizes); return result; } /************************************************************************* This function returns size of K-th layer. K=0 corresponds to input layer, K=CNT-1 corresponds to output layer. Size of the output layer is always equal to the number of outputs, although when we have softmax-normalized network, last neuron doesn't have any connections - it is just zero. -- ALGLIB -- Copyright 25.03.2011 by Bochkanov Sergey *************************************************************************/ public static int mlpgetlayersize(multilayerperceptron network, int k) { int result = 0; alglib.ap.assert(k>=0 && k=0 && i=0 && i=0, "MLPGetNeuronInfo: incorrect (nonexistent) layer or neuron index"); // // 1. find offset of the activation function record in the // if( network.hlneurons[highlevelidx*hlnfieldwidth+2]>=0 ) { activationoffset = istart+network.hlneurons[highlevelidx*hlnfieldwidth+2]*nfieldwidth; fkind = network.structinfo[activationoffset+0]; } else { fkind = 0; } if( network.hlneurons[highlevelidx*hlnfieldwidth+3]>=0 ) { threshold = network.weights[network.hlneurons[highlevelidx*hlnfieldwidth+3]]; } else { threshold = 0; } } /************************************************************************* This function returns information about connection from I0-th neuron of K0-th layer to I1-th neuron of K1-th layer. INPUT PARAMETERS: Network - network K0 - layer index I0 - neuron index (within layer) K1 - layer index I1 - neuron index (within layer) RESULT: connection weight (zero for non-existent connections) This function: 1. throws exception if layer or neuron with given index do not exists. 2. returns zero if neurons exist, but there is no connection between them -- ALGLIB -- Copyright 25.03.2011 by Bochkanov Sergey *************************************************************************/ public static double mlpgetweight(multilayerperceptron network, int k0, int i0, int k1, int i1) { double result = 0; int ccnt = 0; int highlevelidx = 0; ccnt = alglib.ap.len(network.hlconnections)/hlconnfieldwidth; // // check params // alglib.ap.assert(k0>=0 && k0=0 && i0=0 && k1=0 && i1=0 ) { result = network.weights[network.hlconnections[highlevelidx*hlconnfieldwidth+4]]; } else { result = 0; } return result; } /************************************************************************* This function sets offset/scaling coefficients for I-th input of the network. INPUT PARAMETERS: Network - network I - input index Mean - mean term Sigma - sigma term (if zero, will be replaced by 1.0) NTE: I-th input is passed through linear transformation IN[i] = (IN[i]-Mean)/Sigma before feeding to the network. This function sets Mean and Sigma. -- ALGLIB -- Copyright 25.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpsetinputscaling(multilayerperceptron network, int i, double mean, double sigma) { alglib.ap.assert(i>=0 && i=0 && i=0, "MLPSetNeuronInfo: incorrect (nonexistent) layer or neuron index"); // // activation function // if( network.hlneurons[highlevelidx*hlnfieldwidth+2]>=0 ) { activationoffset = istart+network.hlneurons[highlevelidx*hlnfieldwidth+2]*nfieldwidth; network.structinfo[activationoffset+0] = fkind; } else { alglib.ap.assert(fkind==0, "MLPSetNeuronInfo: you try to set activation function for neuron which can not have one"); } // // Threshold // if( network.hlneurons[highlevelidx*hlnfieldwidth+3]>=0 ) { network.weights[network.hlneurons[highlevelidx*hlnfieldwidth+3]] = threshold; } else { alglib.ap.assert((double)(threshold)==(double)(0), "MLPSetNeuronInfo: you try to set non-zero threshold for neuron which can not have one"); } } /************************************************************************* This function modifies information about connection from I0-th neuron of K0-th layer to I1-th neuron of K1-th layer. INPUT PARAMETERS: Network - network K0 - layer index I0 - neuron index (within layer) K1 - layer index I1 - neuron index (within layer) W - connection weight (must be zero for non-existent connections) This function: 1. throws exception if layer or neuron with given index do not exists. 2. throws exception if you try to set non-zero weight for non-existent connection -- ALGLIB -- Copyright 25.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpsetweight(multilayerperceptron network, int k0, int i0, int k1, int i1, double w) { int ccnt = 0; int highlevelidx = 0; ccnt = alglib.ap.len(network.hlconnections)/hlconnfieldwidth; // // check params // alglib.ap.assert(k0>=0 && k0=0 && i0=0 && k1=0 && i1=0 ) { network.weights[network.hlconnections[highlevelidx*hlconnfieldwidth+4]] = w; } else { alglib.ap.assert((double)(w)==(double)(0), "MLPSetWeight: you try to set non-zero weight for non-existent connection"); } } /************************************************************************* Neural network activation function INPUT PARAMETERS: NET - neuron input K - function index (zero for linear function) OUTPUT PARAMETERS: F - function DF - its derivative D2F - its second derivative -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpactivationfunction(double net, int k, ref double f, ref double df, ref double d2f) { double net2 = 0; double arg = 0; double root = 0; double r = 0; f = 0; df = 0; d2f = 0; if( k==0 || k==-5 ) { f = net; df = 1; d2f = 0; return; } if( k==1 ) { // // TanH activation function // if( (double)(Math.Abs(net))<(double)(100) ) { f = Math.Tanh(net); } else { f = Math.Sign(net); } df = 1-math.sqr(f); d2f = -(2*f*df); return; } if( k==3 ) { // // EX activation function // if( (double)(net)>=(double)(0) ) { net2 = net*net; arg = net2+1; root = Math.Sqrt(arg); f = net+root; r = net/root; df = 1+r; d2f = (root-net*r)/arg; } else { f = Math.Exp(net); df = f; d2f = f; } return; } if( k==2 ) { f = Math.Exp(-math.sqr(net)); df = -(2*net*f); d2f = -(2*(f+df*net)); return; } f = 0; df = 0; d2f = 0; } /************************************************************************* Procesing INPUT PARAMETERS: Network - neural network X - input vector, array[0..NIn-1]. OUTPUT PARAMETERS: Y - result. Regression estimate when solving regression task, vector of posterior probabilities for classification task. See also MLPProcessI -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpprocess(multilayerperceptron network, double[] x, ref double[] y) { if( alglib.ap.len(y)=0 && k=0 RESULT: sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2) DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static double mlperrorsparse(multilayerperceptron network, sparse.sparsematrix xy, int npoints) { double result = 0; int nin = 0; int nout = 0; int wcount = 0; double e = 0; int t0 = 0; int t1 = 0; bool iscls = new bool(); int i = 0; int j = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert(npoints>=0, "MLPErrorSparse: NPoints<0."); if( npoints==0 ) { result = 0; return result; } alglib.ap.assert(sparse.sparseiscrs(xy), "MLPErrorSparse: sparse matrix XY has not CRS format."); // // Check dataset correctness // t0 = 0; t1 = 0; mlpproperties(network, ref nin, ref nout, ref wcount); iscls = mlpissoftmax(network); if( !iscls ) { while( sparse.sparseenumerate(xy, ref t0, ref t1, ref i, ref j, ref e) ) { alglib.ap.assert(math.isfinite(e), "MLPErrorSparse: sparse matrix XY contains Infinite or NaN."); } } else { while( sparse.sparseenumerate(xy, ref t0, ref t1, ref i, ref j, ref e) ) { if( j!=nin ) { alglib.ap.assert(math.isfinite(e), "MLPErrorSparse: sparse matrix XY contains Infinite or NaN."); } else { alglib.ap.assert((math.isfinite(e) && (int)Math.Round(e)>=0) && (int)Math.Round(e)=NClasses)."); } } } result = 0; for(i=0; i<=npoints-1; i++) { sparse.sparsegetrow(xy, i, ref network.xyrow); for(i_=0; i_<=nin-1;i_++) { network.x[i_] = network.xyrow[i_]; } mlpprocess(network, network.x, ref network.y); if( iscls ) { // // Class labels outputs // j = (int)Math.Round(network.xyrow[nin]); alglib.ap.assert(j>=0 && j=NOut when J is class number)"); network.y[j] = network.y[j]-1; } else { // // Real outputs // i1_ = (nin) - (0); for(i_=0; i_<=nout-1;i_++) { network.y[i_] = network.y[i_] - network.xyrow[i_+i1_]; } } e = 0.0; for(i_=0; i_<=nout-1;i_++) { e += network.y[i_]*network.y[i_]; } result = result+e/2; } return result; } /************************************************************************* Natural error function for neural network, internal subroutine. -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static double mlperrorn(multilayerperceptron network, double[,] xy, int ssize) { double result = 0; int i = 0; int k = 0; int nin = 0; int nout = 0; int wcount = 0; double e = 0; int i_ = 0; int i1_ = 0; mlpproperties(network, ref nin, ref nout, ref wcount); result = 0; for(i=0; i<=ssize-1; i++) { // // Process vector // for(i_=0; i_<=nin-1;i_++) { network.x[i_] = xy[i,i_]; } mlpprocess(network, network.x, ref network.y); // // Update error function // if( network.structinfo[6]==0 ) { // // Least squares error function // i1_ = (nin) - (0); for(i_=0; i_<=nout-1;i_++) { network.y[i_] = network.y[i_] - xy[i,i_+i1_]; } e = 0.0; for(i_=0; i_<=nout-1;i_++) { e += network.y[i_]*network.y[i_]; } result = result+e/2; } else { // // Cross-entropy error function // k = (int)Math.Round(xy[i,nin]); if( k>=0 && k(double)(worky[nmax]) ) { nmax = j; } } nn = nmax; // // Right answer // if( mlpissoftmax(network) ) { ns = (int)Math.Round(xy[i,nin]); } else { nmax = 0; for(j=0; j<=nout-1; j++) { if( (double)(xy[i,nin+j])>(double)(xy[i,nin+nmax]) ) { nmax = j; } } ns = nmax; } // // compare // if( nn!=ns ) { result = result+1; } } return result; } /************************************************************************* Relative classification error on the test set. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format; NPoints - points count. RESULT: Percent of incorrectly classified cases. Works both for classifier networks and general purpose networks used as classifiers. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 25.12.2008 by Bochkanov Sergey *************************************************************************/ public static double mlprelclserror(multilayerperceptron network, double[,] xy, int npoints) { double result = 0; result = (double)mlpclserror(network, xy, npoints)/(double)npoints; return result; } /************************************************************************* Relative classification error on the test set given by sparse matrix. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format. This function checks correctness of the dataset (no NANs/INFs, class numbers are correct) and throws exception when incorrect dataset is passed. Sparse matrix must use CRS format for storage. NPoints - points count, >=0. RESULT: Percent of incorrectly classified cases. Works both for classifier networks and general purpose networks used as classifiers. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 09.08.2012 by Bochkanov Sergey *************************************************************************/ public static double mlprelclserrorsparse(multilayerperceptron network, sparse.sparsematrix xy, int npoints) { double result = 0; int nin = 0; int nout = 0; int wcount = 0; double e = 0; int t0 = 0; int t1 = 0; bool iscls = new bool(); double[] workx = new double[0]; double[] worky = new double[0]; int nn = 0; int ns = 0; int nmax = 0; int i = 0; int j = 0; int i_ = 0; alglib.ap.assert(npoints>=0, "MLPRelClsErrorSparse: NPoints<0."); if( npoints==0 ) { result = 0; return result; } alglib.ap.assert(sparse.sparseiscrs(xy), "MLPRelClsErrorSparse: sparse matrix XY has not CRS format."); // // Check dataset correctness // t0 = 0; t1 = 0; mlpproperties(network, ref nin, ref nout, ref wcount); iscls = mlpissoftmax(network); if( !iscls ) { while( sparse.sparseenumerate(xy, ref t0, ref t1, ref i, ref j, ref e) ) { alglib.ap.assert(math.isfinite(e), "MLPRelClsErrorSparse: sparse matrix XY contains Infinite or NaN."); } } else { while( sparse.sparseenumerate(xy, ref t0, ref t1, ref i, ref j, ref e) ) { if( j!=nin ) { alglib.ap.assert(math.isfinite(e), "MLPRelClsErrorSparse: sparse matrix XY contains Infinite or NaN."); } else { alglib.ap.assert((math.isfinite(e) && (int)Math.Round(e)>=0) && (int)Math.Round(e)=NClasses)."); } } } workx = new double[nin]; worky = new double[nout]; result = 0; for(i=0; i<=npoints-1; i++) { // // Process // sparse.sparsegetrow(xy, i, ref network.xyrow); for(i_=0; i_<=nin-1;i_++) { workx[i_] = network.xyrow[i_]; } mlpprocess(network, workx, ref worky); // // Network version of the answer // nmax = 0; for(j=0; j<=nout-1; j++) { if( (double)(worky[j])>(double)(worky[nmax]) ) { nmax = j; } } nn = nmax; // // Right answer // if( iscls ) { ns = (int)Math.Round(network.xyrow[nin]); } else { nmax = 0; for(j=0; j<=nout-1; j++) { if( (double)(network.xyrow[nin+j])>(double)(network.xyrow[nin+nmax]) ) { nmax = j; } } ns = nmax; } // // compare // if( nn!=ns ) { result = result+1; } } result = result/npoints; return result; } /************************************************************************* Average cross-entropy (in bits per element) on the test set. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format; NPoints - points count. RESULT: CrossEntropy/(NPoints*LN(2)). Zero if network solves regression task. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 08.01.2009 by Bochkanov Sergey *************************************************************************/ public static double mlpavgce(multilayerperceptron network, double[,] xy, int npoints) { double result = 0; if( mlpissoftmax(network) ) { result = mlperrorn(network, xy, npoints)/(npoints*Math.Log(2)); } else { result = 0; } return result; } /************************************************************************* Average cross-entropy (in bits per element) on the test set given by sparse matrix. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format. This function checks correctness of the dataset (no NANs/INFs, class numbers are correct) and throws exception when incorrect dataset is passed. Sparse matrix must use CRS format for storage. NPoints - points count, >=0. RESULT: CrossEntropy/(NPoints*LN(2)). Zero if network solves regression task. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 9.08.2012 by Bochkanov Sergey *************************************************************************/ public static double mlpavgcesparse(multilayerperceptron network, sparse.sparsematrix xy, int npoints) { double result = 0; int nin = 0; int nout = 0; int wcount = 0; double e = 0; int t0 = 0; int t1 = 0; int i = 0; int j = 0; int i_ = 0; alglib.ap.assert(npoints>=0, "MLPAvgCESparse: NPoints<0."); if( npoints==0 ) { result = 0; return result; } alglib.ap.assert(sparse.sparseiscrs(xy), "MLPAvgCESparse: sparse matrix XY has not CRS format."); // // Check dataset correctness // t0 = 0; t1 = 0; mlpproperties(network, ref nin, ref nout, ref wcount); if( !mlpissoftmax(network) ) { while( sparse.sparseenumerate(xy, ref t0, ref t1, ref i, ref j, ref e) ) { alglib.ap.assert(math.isfinite(e), "MLPAvgCESparse: sparse matrix XY contains Infinite or NaN."); } } else { while( sparse.sparseenumerate(xy, ref t0, ref t1, ref i, ref j, ref e) ) { if( j!=nin ) { alglib.ap.assert(math.isfinite(e), "MLPAvgCESparse: sparse matrix XY contains Infinite or NaN."); } else { alglib.ap.assert((math.isfinite(e) && (int)Math.Round(e)>=0) && (int)Math.Round(e)=NClasses)."); } } } result = 0; if( mlpissoftmax(network) ) { for(i=0; i<=npoints-1; i++) { // // Process vector // sparse.sparsegetrow(xy, i, ref network.xyrow); for(i_=0; i_<=nin-1;i_++) { network.x[i_] = network.xyrow[i_]; } mlpprocess(network, network.x, ref network.y); // // Update cross-entropy error function // j = (int)Math.Round(network.xyrow[nin]); if( j>=0 && j=0. RESULT: Root mean square error. Its meaning for regression task is obvious. As for classification task, RMS error means error when estimating posterior probabilities. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 09.08.2012 by Bochkanov Sergey *************************************************************************/ public static double mlprmserrorsparse(multilayerperceptron network, sparse.sparsematrix xy, int npoints) { double result = 0; int nin = 0; int nout = 0; int wcount = 0; double e = 0; int t0 = 0; int t1 = 0; int i = 0; int j = 0; alglib.ap.assert(npoints>=0, "MLPRMSErrorSparse: NPoints<0."); if( npoints==0 ) { result = 0; return result; } alglib.ap.assert(sparse.sparseiscrs(xy), "MLPRMSErrorSparse: sparse matrix XY has not CRS format."); // // Check dataset correctness // t0 = 0; t1 = 0; mlpproperties(network, ref nin, ref nout, ref wcount); if( !mlpissoftmax(network) ) { while( sparse.sparseenumerate(xy, ref t0, ref t1, ref i, ref j, ref e) ) { alglib.ap.assert(math.isfinite(e), "MLPRMSErrorSparse: sparse matrix XY contains Infinite or NaN."); } } else { while( sparse.sparseenumerate(xy, ref t0, ref t1, ref i, ref j, ref e) ) { if( j!=nin ) { alglib.ap.assert(math.isfinite(e), "MLPRMSErrorSparse: sparse matrix XY contains Infinite or NaN."); } else { alglib.ap.assert((math.isfinite(e) && (int)Math.Round(e)>=0) && (int)Math.Round(e)=NClasses)."); } } } result = Math.Sqrt(2*mlperrorsparse(network, xy, npoints)/(npoints*nout)); return result; } /************************************************************************* Average error on the test set. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format; NPoints - points count. RESULT: Its meaning for regression task is obvious. As for classification task, it means average error when estimating posterior probabilities. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 11.03.2008 by Bochkanov Sergey *************************************************************************/ public static double mlpavgerror(multilayerperceptron network, double[,] xy, int npoints) { double result = 0; int i = 0; int j = 0; int k = 0; int nin = 0; int nout = 0; int wcount = 0; int i_ = 0; mlpproperties(network, ref nin, ref nout, ref wcount); result = 0; for(i=0; i<=npoints-1; i++) { for(i_=0; i_<=nin-1;i_++) { network.x[i_] = xy[i,i_]; } mlpprocess(network, network.x, ref network.y); if( mlpissoftmax(network) ) { // // class labels // k = (int)Math.Round(xy[i,nin]); for(j=0; j<=nout-1; j++) { if( j==k ) { result = result+Math.Abs(1-network.y[j]); } else { result = result+Math.Abs(network.y[j]); } } } else { // // real outputs // for(j=0; j<=nout-1; j++) { result = result+Math.Abs(xy[i,nin+j]-network.y[j]); } } } result = result/(npoints*nout); return result; } /************************************************************************* Average error on the test set given by sparse matrix. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format. This function checks correctness of the dataset (no NANs/INFs, class numbers are correct) and throws exception when incorrect dataset is passed. Sparse matrix must use CRS format for storage. NPoints - points count, >=0. RESULT: Its meaning for regression task is obvious. As for classification task, it means average error when estimating posterior probabilities. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 09.08.2012 by Bochkanov Sergey *************************************************************************/ public static double mlpavgerrorsparse(multilayerperceptron network, sparse.sparsematrix xy, int npoints) { double result = 0; int nin = 0; int nout = 0; int wcount = 0; double e = 0; int t0 = 0; int t1 = 0; bool iscls = new bool(); int i = 0; int j = 0; int k = 0; alglib.ap.assert(npoints>=0, "MLPAvgErrorSparse: NPoints<0."); if( npoints==0 ) { result = 0; return result; } alglib.ap.assert(sparse.sparseiscrs(xy), "MLPAvgErrorSparse: sparse matrix XY has not CRS format."); // // Check dataset correctness // t0 = 0; t1 = 0; mlpproperties(network, ref nin, ref nout, ref wcount); iscls = mlpissoftmax(network); if( !iscls ) { while( sparse.sparseenumerate(xy, ref t0, ref t1, ref i, ref j, ref e) ) { alglib.ap.assert(math.isfinite(e), "MLPAvgErrorSparse: sparse matrix XY contains Infinite or NaN."); } } else { while( sparse.sparseenumerate(xy, ref t0, ref t1, ref i, ref j, ref e) ) { if( j!=nin ) { alglib.ap.assert(math.isfinite(e), "MLPAvgErrorSparse: sparse matrix XY contains Infinite or NaN."); } else { alglib.ap.assert((math.isfinite(e) && (int)Math.Round(e)>=0) && (int)Math.Round(e)=NClasses)."); } } } result = 0; for(i=0; i<=npoints-1; i++) { sparse.sparsegetrow(xy, i, ref network.x); mlpprocess(network, network.x, ref network.y); if( iscls ) { // // class labels // k = (int)Math.Round(network.x[nin]); for(j=0; j<=nout-1; j++) { if( j==k ) { result = result+Math.Abs(1-network.y[j]); } else { result = result+Math.Abs(network.y[j]); } } } else { // // real outputs // for(j=0; j<=nout-1; j++) { result = result+Math.Abs(network.x[nin+j]-network.y[j]); } } } result = result/(npoints*nout); return result; } /************************************************************************* Average relative error on the test set. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format; NPoints - points count. RESULT: Its meaning for regression task is obvious. As for classification task, it means average relative error when estimating posterior probability of belonging to the correct class. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 11.03.2008 by Bochkanov Sergey *************************************************************************/ public static double mlpavgrelerror(multilayerperceptron network, double[,] xy, int npoints) { double result = 0; int i = 0; int j = 0; int k = 0; int lk = 0; int nin = 0; int nout = 0; int wcount = 0; int i_ = 0; mlpproperties(network, ref nin, ref nout, ref wcount); result = 0; k = 0; for(i=0; i<=npoints-1; i++) { for(i_=0; i_<=nin-1;i_++) { network.x[i_] = xy[i,i_]; } mlpprocess(network, network.x, ref network.y); if( mlpissoftmax(network) ) { // // class labels // lk = (int)Math.Round(xy[i,nin]); for(j=0; j<=nout-1; j++) { if( j==lk ) { result = result+Math.Abs(1-network.y[j]); k = k+1; } } } else { // // real outputs // for(j=0; j<=nout-1; j++) { if( (double)(xy[i,nin+j])!=(double)(0) ) { result = result+Math.Abs(xy[i,nin+j]-network.y[j])/Math.Abs(xy[i,nin+j]); k = k+1; } } } } if( k!=0 ) { result = result/k; } return result; } /************************************************************************* Average relative error on the test set given by sparse matrix. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format. This function checks correctness of the dataset (no NANs/INFs, class numbers are correct) and throws exception when incorrect dataset is passed. Sparse matrix must use CRS format for storage. NPoints - points count, >=0. RESULT: Its meaning for regression task is obvious. As for classification task, it means average relative error when estimating posterior probability of belonging to the correct class. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 09.08.2012 by Bochkanov Sergey *************************************************************************/ public static double mlpavgrelerrorsparse(multilayerperceptron network, sparse.sparsematrix xy, int npoints) { double result = 0; int nin = 0; int nout = 0; int wcount = 0; double e = 0; int t0 = 0; int t1 = 0; bool iscls = new bool(); int i = 0; int j = 0; int k = 0; int lk = 0; alglib.ap.assert(npoints>=0, "MLPAvgRelErrorSparse: NPoints<0."); if( npoints==0 ) { result = 0; return result; } alglib.ap.assert(sparse.sparseiscrs(xy), "MLPAvgRelErrorSparse: sparse matrix XY has not CRS format."); // // Check dataset correctness // t0 = 0; t1 = 0; mlpproperties(network, ref nin, ref nout, ref wcount); iscls = mlpissoftmax(network); if( !iscls ) { while( sparse.sparseenumerate(xy, ref t0, ref t1, ref i, ref j, ref e) ) { alglib.ap.assert(math.isfinite(e), "MLPAvgRelErrorSparse: sparse matrix XY contains Infinite or NaN."); } } else { while( sparse.sparseenumerate(xy, ref t0, ref t1, ref i, ref j, ref e) ) { if( j!=nin ) { alglib.ap.assert(math.isfinite(e), "MLPAvgRelErrorSparse: sparse matrix XY contains Infinite or NaN."); } else { alglib.ap.assert((math.isfinite(e) && (int)Math.Round(e)>=0) && (int)Math.Round(e)=NClasses)."); } } } result = 0; k = 0; for(i=0; i<=npoints-1; i++) { sparse.sparsegetrow(xy, i, ref network.x); mlpprocess(network, network.x, ref network.y); if( iscls ) { // // class labels // lk = (int)Math.Round(network.x[nin]); for(j=0; j<=nout-1; j++) { if( j==lk ) { result = result+Math.Abs(1-network.y[j]); k = k+1; } } } else { // // real outputs // for(j=0; j<=nout-1; j++) { if( (double)(network.x[nin+j])!=(double)(0) ) { result = result+Math.Abs(network.x[nin+j]-network.y[j])/Math.Abs(network.x[nin+j]); k = k+1; } } } } if( k!=0 ) { result = result/k; } return result; } /************************************************************************* Gradient calculation INPUT PARAMETERS: Network - network initialized with one of the network creation funcs X - input vector, length of array must be at least NIn DesiredY- desired outputs, length of array must be at least NOut Grad - possibly preallocated array. If size of array is smaller than WCount, it will be reallocated. It is recommended to reuse previously allocated array to reduce allocation overhead. OUTPUT PARAMETERS: E - error function, SUM(sqr(y[i]-desiredy[i])/2,i) Grad - gradient of E with respect to weights of network, array[WCount] -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpgrad(multilayerperceptron network, double[] x, double[] desiredy, ref double e, ref double[] grad) { int i = 0; int nout = 0; int ntotal = 0; e = 0; // // Alloc // apserv.rvectorsetlengthatleast(ref grad, network.structinfo[4]); // // Prepare dError/dOut, internal structures // mlpprocess(network, x, ref network.y); nout = network.structinfo[2]; ntotal = network.structinfo[3]; e = 0; for(i=0; i<=ntotal-1; i++) { network.derror[i] = 0; } for(i=0; i<=nout-1; i++) { network.derror[ntotal-nout+i] = network.y[i]-desiredy[i]; e = e+math.sqr(network.y[i]-desiredy[i])/2; } // // gradient // mlpinternalcalculategradient(network, network.neurons, network.weights, ref network.derror, ref grad, false); } /************************************************************************* Gradient calculation (natural error function is used) INPUT PARAMETERS: Network - network initialized with one of the network creation funcs X - input vector, length of array must be at least NIn DesiredY- desired outputs, length of array must be at least NOut Grad - possibly preallocated array. If size of array is smaller than WCount, it will be reallocated. It is recommended to reuse previously allocated array to reduce allocation overhead. OUTPUT PARAMETERS: E - error function, sum-of-squares for regression networks, cross-entropy for classification networks. Grad - gradient of E with respect to weights of network, array[WCount] -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpgradn(multilayerperceptron network, double[] x, double[] desiredy, ref double e, ref double[] grad) { double s = 0; int i = 0; int nout = 0; int ntotal = 0; e = 0; // // Alloc // apserv.rvectorsetlengthatleast(ref grad, network.structinfo[4]); // // Prepare dError/dOut, internal structures // mlpprocess(network, x, ref network.y); nout = network.structinfo[2]; ntotal = network.structinfo[3]; for(i=0; i<=ntotal-1; i++) { network.derror[i] = 0; } e = 0; if( network.structinfo[6]==0 ) { // // Regression network, least squares // for(i=0; i<=nout-1; i++) { network.derror[ntotal-nout+i] = network.y[i]-desiredy[i]; e = e+math.sqr(network.y[i]-desiredy[i])/2; } } else { // // Classification network, cross-entropy // s = 0; for(i=0; i<=nout-1; i++) { s = s+desiredy[i]; } for(i=0; i<=nout-1; i++) { network.derror[ntotal-nout+i] = s*network.y[i]-desiredy[i]; e = e+safecrossentropy(desiredy[i], network.y[i]); } } // // gradient // mlpinternalcalculategradient(network, network.neurons, network.weights, ref network.derror, ref grad, true); } /************************************************************************* Batch gradient calculation for a set of inputs/outputs INPUT PARAMETERS: Network - network initialized with one of the network creation funcs XY - set of inputs/outputs; one sample = one row; first NIn columns contain inputs, next NOut columns - desired outputs. SSize - number of elements in XY Grad - possibly preallocated array. If size of array is smaller than WCount, it will be reallocated. It is recommended to reuse previously allocated array to reduce allocation overhead. OUTPUT PARAMETERS: E - error function, SUM(sqr(y[i]-desiredy[i])/2,i) Grad - gradient of E with respect to weights of network, array[WCount] -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpgradbatch(multilayerperceptron network, double[,] xy, int ssize, ref double e, ref double[] grad) { int nin = 0; int nout = 0; int wcount = 0; int i = 0; e = 0; // // Alloc // mlpproperties(network, ref nin, ref nout, ref wcount); apserv.rvectorsetlengthatleast(ref grad, wcount); for(i=0; i<=wcount-1; i++) { grad[i] = 0; } e = 0; i = 0; while( i<=ssize-1 ) { mlpchunkedgradient(network, xy, i, Math.Min(ssize, i+chunksize)-i, ref e, ref grad, false); i = i+chunksize; } } /************************************************************************* Batch gradient calculation for a set of inputs/outputs given by sparse matrices INPUT PARAMETERS: Network - network initialized with one of the network creation funcs XY - set of inputs/outputs; one sample = one row; first NIn columns contain inputs, next NOut columns - desired outputs. SSize - number of elements in XY Grad - possibly preallocated array. If size of array is smaller than WCount, it will be reallocated. It is recommended to reuse previously allocated array to reduce allocation overhead. OUTPUT PARAMETERS: E - error function, SUM(sqr(y[i]-desiredy[i])/2,i) Grad - gradient of E with respect to weights of network, array[WCount] -- ALGLIB -- Copyright 26.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpgradbatchsparse(multilayerperceptron network, sparse.sparsematrix xy, int ssize, ref double e, ref double[] grad) { int nin = 0; int nout = 0; int wcount = 0; int csize = 0; int rowsize = 0; int i = 0; int j = 0; int i_ = 0; e = 0; // // Alloc // mlpproperties(network, ref nin, ref nout, ref wcount); apserv.rvectorsetlengthatleast(ref grad, wcount); if( mlpissoftmax(network) ) { rowsize = nin+1; apserv.rmatrixsetlengthatleast(ref network.xy, chunksize, rowsize); } else { rowsize = nin+nout; apserv.rmatrixsetlengthatleast(ref network.xy, chunksize, rowsize); } for(i=0; i<=wcount-1; i++) { grad[i] = 0; } e = 0; i = 0; while( i<=ssize-1 ) { csize = Math.Min(ssize, i+chunksize)-i; for(j=0; j<=csize-1; j++) { sparse.sparsegetrow(xy, i+j, ref network.xyrow); for(i_=0; i_<=rowsize-1;i_++) { network.xy[j,i_] = network.xyrow[i_]; } } mlpchunkedgradient(network, network.xy, 0, csize, ref e, ref grad, false); i = i+chunksize; } } /************************************************************************* Batch gradient calculation for a subset of dataset INPUT PARAMETERS: Network - network initialized with one of the network creation funcs XY - original dataset; one sample = one row; first NIn columns contain inputs, next NOut columns - desired outputs. SetSize - real size of XY, SetSize>=0; Idx - subset of SubsetSize elements, array[SubsetSize]: * Idx[I] stores row index in the original dataset which is given by XY. Gradient is calculated with respect to rows whose indexes are stored in Idx[]. * Idx[] must store correct indexes; this function throws an exception in case incorrect index (less than 0 or larger than rows(XY)) is given * Idx[] may store indexes in any order and even with repetitions. SubsetSize- number of elements in Idx[] array. Grad - possibly preallocated array. If size of array is smaller than WCount, it will be reallocated. It is recommended to reuse previously allocated array to reduce allocation overhead. OUTPUT PARAMETERS: E - error function, SUM(sqr(y[i]-desiredy[i])/2,i) Grad - gradient of E with respect to weights of network, array[WCount] NOTE: when SubsetSize<0 is used full dataset by call MLPGradBatch function. -- ALGLIB -- Copyright 26.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpgradbatchsubset(multilayerperceptron network, double[,] xy, int setsize, int[] idx, int subsetsize, ref double e, ref double[] grad) { int nin = 0; int nout = 0; int wcount = 0; int csize = 0; int npoints = 0; int rowsize = 0; int i = 0; int j = 0; int i_ = 0; e = 0; alglib.ap.assert(setsize>=0, "MLPGradBatchSubset: SetSize<0"); if( subsetsize<0 ) { mlpgradbatch(network, xy, setsize, ref e, ref grad); return; } alglib.ap.assert(subsetsize<=alglib.ap.len(idx), "MLPGradBatchSubset: SubsetSize>Length(Idx)"); npoints = setsize; for(i=0; i<=subsetsize-1; i++) { alglib.ap.assert(idx[i]>=0, "MLPGradBatchSubset: incorrect index of XY row(Idx[I]<0)"); alglib.ap.assert(idx[i]<=npoints-1, "MLPGradBatchSubset: incorrect index of XY row(Idx[I]>Rows(XY)-1)"); } // // Alloc // mlpproperties(network, ref nin, ref nout, ref wcount); apserv.rvectorsetlengthatleast(ref grad, wcount); if( mlpissoftmax(network) ) { rowsize = nin+1; apserv.rmatrixsetlengthatleast(ref network.xy, chunksize, rowsize); } else { rowsize = nin+nout; apserv.rmatrixsetlengthatleast(ref network.xy, chunksize, rowsize); } for(i=0; i<=wcount-1; i++) { grad[i] = 0; } e = 0; i = 0; while( i<=subsetsize-1 ) { csize = Math.Min(subsetsize, i+chunksize)-i; for(j=0; j<=csize-1; j++) { for(i_=0; i_<=rowsize-1;i_++) { network.xy[j,i_] = xy[idx[i+j],i_]; } } mlpchunkedgradient(network, network.xy, 0, csize, ref e, ref grad, false); i = i+chunksize; } } /************************************************************************* Batch gradient calculation for a set of inputs/outputs for a subset of dataset given by boolean mask. INPUT PARAMETERS: Network - network initialized with one of the network creation funcs XY - set of inputs/outputs; one sample = one row; first NIn columns contain inputs, next NOut columns - desired outputs. SetSize - real size of XY, SetSize>=0; Idx - subset of SubsetSize elements, array[SubsetSize]: * Idx[I] stores row index in the original dataset which is given by XY. Gradient is calculated with respect to rows whose indexes are stored in Idx[]. * Idx[] must store correct indexes; this function throws an exception in case incorrect index (less than 0 or larger than rows(XY)) is given * Idx[] may store indexes in any order and even with repetitions. SubsetSize- number of elements in Idx[] array. Grad - possibly preallocated array. If size of array is smaller than WCount, it will be reallocated. It is recommended to reuse previously allocated array to reduce allocation overhead. OUTPUT PARAMETERS: E - error function, SUM(sqr(y[i]-desiredy[i])/2,i) Grad - gradient of E with respect to weights of network, array[WCount] NOTE: when SubsetSize<0 is used full dataset by call MLPGradBatchSparse function. -- ALGLIB -- Copyright 26.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpgradbatchsparsesubset(multilayerperceptron network, sparse.sparsematrix xy, int setsize, int[] idx, int subsetsize, ref double e, ref double[] grad) { int nin = 0; int nout = 0; int wcount = 0; int csize = 0; int npoints = 0; int rowsize = 0; int i = 0; int j = 0; int i_ = 0; e = 0; alglib.ap.assert(setsize>=0, "MLPGradBatchSparseSubset: SetSize<0"); if( subsetsize<0 ) { mlpgradbatchsparse(network, xy, setsize, ref e, ref grad); return; } alglib.ap.assert(subsetsize<=alglib.ap.len(idx), "MLPGradBatchSparseSubset: SubsetSize>Length(Idx)"); npoints = setsize; for(i=0; i<=subsetsize-1; i++) { alglib.ap.assert(idx[i]>=0, "MLPGradBatchSparseSubset: incorrect index of XY row(Idx[I]<0)"); alglib.ap.assert(idx[i]<=npoints-1, "MLPGradBatchSparseSubset: incorrect index of XY row(Idx[I]>Rows(XY)-1)"); } // // Alloc // mlpproperties(network, ref nin, ref nout, ref wcount); apserv.rvectorsetlengthatleast(ref grad, wcount); if( mlpissoftmax(network) ) { rowsize = nin+1; apserv.rmatrixsetlengthatleast(ref network.xy, chunksize, rowsize); } else { rowsize = nin+nout; apserv.rmatrixsetlengthatleast(ref network.xy, chunksize, rowsize); } for(i=0; i<=wcount-1; i++) { grad[i] = 0; } e = 0; i = 0; while( i<=subsetsize-1 ) { csize = Math.Min(subsetsize, i+chunksize)-i; for(j=0; j<=csize-1; j++) { sparse.sparsegetrow(xy, idx[i+j], ref network.xyrow); for(i_=0; i_<=rowsize-1;i_++) { network.xy[j,i_] = network.xyrow[i_]; } } mlpchunkedgradient(network, network.xy, 0, csize, ref e, ref grad, false); i = i+chunksize; } } /************************************************************************* Batch gradient calculation for a set of inputs/outputs (natural error function is used) INPUT PARAMETERS: Network - network initialized with one of the network creation funcs XY - set of inputs/outputs; one sample = one row; first NIn columns contain inputs, next NOut columns - desired outputs. SSize - number of elements in XY Grad - possibly preallocated array. If size of array is smaller than WCount, it will be reallocated. It is recommended to reuse previously allocated array to reduce allocation overhead. OUTPUT PARAMETERS: E - error function, sum-of-squares for regression networks, cross-entropy for classification networks. Grad - gradient of E with respect to weights of network, array[WCount] -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpgradnbatch(multilayerperceptron network, double[,] xy, int ssize, ref double e, ref double[] grad) { int i = 0; int nin = 0; int nout = 0; int wcount = 0; e = 0; // // Alloc // mlpproperties(network, ref nin, ref nout, ref wcount); apserv.rvectorsetlengthatleast(ref grad, wcount); for(i=0; i<=wcount-1; i++) { grad[i] = 0; } e = 0; i = 0; while( i<=ssize-1 ) { mlpchunkedgradient(network, xy, i, Math.Min(ssize, i+chunksize)-i, ref e, ref grad, true); i = i+chunksize; } } /************************************************************************* Batch Hessian calculation (natural error function) using R-algorithm. Internal subroutine. -- ALGLIB -- Copyright 26.01.2008 by Bochkanov Sergey. Hessian calculation based on R-algorithm described in "Fast Exact Multiplication by the Hessian", B. A. Pearlmutter, Neural Computation, 1994. *************************************************************************/ public static void mlphessiannbatch(multilayerperceptron network, double[,] xy, int ssize, ref double e, ref double[] grad, ref double[,] h) { e = 0; mlphessianbatchinternal(network, xy, ssize, true, ref e, ref grad, ref h); } /************************************************************************* Batch Hessian calculation using R-algorithm. Internal subroutine. -- ALGLIB -- Copyright 26.01.2008 by Bochkanov Sergey. Hessian calculation based on R-algorithm described in "Fast Exact Multiplication by the Hessian", B. A. Pearlmutter, Neural Computation, 1994. *************************************************************************/ public static void mlphessianbatch(multilayerperceptron network, double[,] xy, int ssize, ref double e, ref double[] grad, ref double[,] h) { e = 0; mlphessianbatchinternal(network, xy, ssize, false, ref e, ref grad, ref h); } /************************************************************************* Internal subroutine, shouldn't be called by user. *************************************************************************/ public static void mlpinternalprocessvector(int[] structinfo, double[] weights, double[] columnmeans, double[] columnsigmas, ref double[] neurons, ref double[] dfdnet, double[] x, ref double[] y) { int i = 0; int n1 = 0; int n2 = 0; int w1 = 0; int w2 = 0; int ntotal = 0; int nin = 0; int nout = 0; int istart = 0; int offs = 0; double net = 0; double f = 0; double df = 0; double d2f = 0; double mx = 0; bool perr = new bool(); int i_ = 0; int i1_ = 0; // // Read network geometry // nin = structinfo[1]; nout = structinfo[2]; ntotal = structinfo[3]; istart = structinfo[5]; // // Inputs standartisation and putting in the network // for(i=0; i<=nin-1; i++) { if( (double)(columnsigmas[i])!=(double)(0) ) { neurons[i] = (x[i]-columnmeans[i])/columnsigmas[i]; } else { neurons[i] = x[i]-columnmeans[i]; } } // // Process network // for(i=0; i<=ntotal-1; i++) { offs = istart+i*nfieldwidth; if( structinfo[offs+0]>0 || structinfo[offs+0]==-5 ) { // // Activation function // mlpactivationfunction(neurons[structinfo[offs+2]], structinfo[offs+0], ref f, ref df, ref d2f); neurons[i] = f; dfdnet[i] = df; continue; } if( structinfo[offs+0]==0 ) { // // Adaptive summator // n1 = structinfo[offs+2]; n2 = n1+structinfo[offs+1]-1; w1 = structinfo[offs+3]; w2 = w1+structinfo[offs+1]-1; i1_ = (n1)-(w1); net = 0.0; for(i_=w1; i_<=w2;i_++) { net += weights[i_]*neurons[i_+i1_]; } neurons[i] = net; dfdnet[i] = 1.0; continue; } if( structinfo[offs+0]<0 ) { perr = true; if( structinfo[offs+0]==-2 ) { // // input neuron, left unchanged // perr = false; } if( structinfo[offs+0]==-3 ) { // // "-1" neuron // neurons[i] = -1; perr = false; } if( structinfo[offs+0]==-4 ) { // // "0" neuron // neurons[i] = 0; perr = false; } alglib.ap.assert(!perr, "MLPInternalProcessVector: internal error - unknown neuron type!"); continue; } } // // Extract result // i1_ = (ntotal-nout) - (0); for(i_=0; i_<=nout-1;i_++) { y[i_] = neurons[i_+i1_]; } // // Softmax post-processing or standardisation if needed // alglib.ap.assert(structinfo[6]==0 || structinfo[6]==1, "MLPInternalProcessVector: unknown normalization type!"); if( structinfo[6]==1 ) { // // Softmax // mx = y[0]; for(i=1; i<=nout-1; i++) { mx = Math.Max(mx, y[i]); } net = 0; for(i=0; i<=nout-1; i++) { y[i] = Math.Exp(y[i]-mx); net = net+y[i]; } for(i=0; i<=nout-1; i++) { y[i] = y[i]/net; } } else { // // Standardisation // for(i=0; i<=nout-1; i++) { y[i] = y[i]*columnsigmas[nin+i]+columnmeans[nin+i]; } } } /************************************************************************* Serializer: allocation -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpalloc(alglib.serializer s, multilayerperceptron network) { int i = 0; int j = 0; int k = 0; int fkind = 0; double threshold = 0; double v0 = 0; double v1 = 0; int nin = 0; int nout = 0; nin = network.hllayersizes[0]; nout = network.hllayersizes[alglib.ap.len(network.hllayersizes)-1]; s.alloc_entry(); s.alloc_entry(); s.alloc_entry(); apserv.allocintegerarray(s, network.hllayersizes, -1); for(i=1; i<=alglib.ap.len(network.hllayersizes)-1; i++) { for(j=0; j<=network.hllayersizes[i]-1; j++) { mlpgetneuroninfo(network, i, j, ref fkind, ref threshold); s.alloc_entry(); s.alloc_entry(); for(k=0; k<=network.hllayersizes[i-1]-1; k++) { s.alloc_entry(); } } } for(j=0; j<=nin-1; j++) { mlpgetinputscaling(network, j, ref v0, ref v1); s.alloc_entry(); s.alloc_entry(); } for(j=0; j<=nout-1; j++) { mlpgetoutputscaling(network, j, ref v0, ref v1); s.alloc_entry(); s.alloc_entry(); } } /************************************************************************* Serializer: serialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpserialize(alglib.serializer s, multilayerperceptron network) { int i = 0; int j = 0; int k = 0; int fkind = 0; double threshold = 0; double v0 = 0; double v1 = 0; int nin = 0; int nout = 0; nin = network.hllayersizes[0]; nout = network.hllayersizes[alglib.ap.len(network.hllayersizes)-1]; s.serialize_int(scodes.getmlpserializationcode()); s.serialize_int(mlpfirstversion); s.serialize_bool(mlpissoftmax(network)); apserv.serializeintegerarray(s, network.hllayersizes, -1); for(i=1; i<=alglib.ap.len(network.hllayersizes)-1; i++) { for(j=0; j<=network.hllayersizes[i]-1; j++) { mlpgetneuroninfo(network, i, j, ref fkind, ref threshold); s.serialize_int(fkind); s.serialize_double(threshold); for(k=0; k<=network.hllayersizes[i-1]-1; k++) { s.serialize_double(mlpgetweight(network, i-1, k, i, j)); } } } for(j=0; j<=nin-1; j++) { mlpgetinputscaling(network, j, ref v0, ref v1); s.serialize_double(v0); s.serialize_double(v1); } for(j=0; j<=nout-1; j++) { mlpgetoutputscaling(network, j, ref v0, ref v1); s.serialize_double(v0); s.serialize_double(v1); } } /************************************************************************* Serializer: unserialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpunserialize(alglib.serializer s, multilayerperceptron network) { int i0 = 0; int i1 = 0; int i = 0; int j = 0; int k = 0; int fkind = 0; double threshold = 0; double v0 = 0; double v1 = 0; int nin = 0; int nout = 0; bool issoftmax = new bool(); int[] layersizes = new int[0]; // // check correctness of header // i0 = s.unserialize_int(); alglib.ap.assert(i0==scodes.getmlpserializationcode(), "MLPUnserialize: stream header corrupted"); i1 = s.unserialize_int(); alglib.ap.assert(i1==mlpfirstversion, "MLPUnserialize: stream header corrupted"); // // Create network // issoftmax = s.unserialize_bool(); apserv.unserializeintegerarray(s, ref layersizes); alglib.ap.assert((alglib.ap.len(layersizes)==2 || alglib.ap.len(layersizes)==3) || alglib.ap.len(layersizes)==4, "MLPUnserialize: too many hidden layers!"); nin = layersizes[0]; nout = layersizes[alglib.ap.len(layersizes)-1]; if( alglib.ap.len(layersizes)==2 ) { if( issoftmax ) { mlpcreatec0(layersizes[0], layersizes[1], network); } else { mlpcreate0(layersizes[0], layersizes[1], network); } } if( alglib.ap.len(layersizes)==3 ) { if( issoftmax ) { mlpcreatec1(layersizes[0], layersizes[1], layersizes[2], network); } else { mlpcreate1(layersizes[0], layersizes[1], layersizes[2], network); } } if( alglib.ap.len(layersizes)==4 ) { if( issoftmax ) { mlpcreatec2(layersizes[0], layersizes[1], layersizes[2], layersizes[3], network); } else { mlpcreate2(layersizes[0], layersizes[1], layersizes[2], layersizes[3], network); } } // // Load neurons and weights // for(i=1; i<=alglib.ap.len(layersizes)-1; i++) { for(j=0; j<=layersizes[i]-1; j++) { fkind = s.unserialize_int(); threshold = s.unserialize_double(); mlpsetneuroninfo(network, i, j, fkind, threshold); for(k=0; k<=layersizes[i-1]-1; k++) { v0 = s.unserialize_double(); mlpsetweight(network, i-1, k, i, j, v0); } } } // // Load standartizator // for(j=0; j<=nin-1; j++) { v0 = s.unserialize_double(); v1 = s.unserialize_double(); mlpsetinputscaling(network, j, v0, v1); } for(j=0; j<=nout-1; j++) { v0 = s.unserialize_double(); v1 = s.unserialize_double(); mlpsetoutputscaling(network, j, v0, v1); } } /************************************************************************* Calculation of all types of errors. INPUT PARAMETERS: Network - network initialized with one of the network creation funcs XY - original dataset; one sample = one row; first NIn columns contain inputs, next NOut columns - desired outputs. SetSize - real size of XY, SetSize>=0; Subset - subset of SubsetSize elements, array[SubsetSize]; SubsetSize- number of elements in Subset[] array. OUTPUT PARAMETERS: Rep - it contains all type of errors. NOTE: when SubsetSize<0 is used full dataset by call MLPGradBatch function. -- ALGLIB -- Copyright 04.09.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpallerrorssubset(multilayerperceptron network, double[,] xy, int setsize, int[] subset, int subsetsize, modelerrors rep) { double[] buf = new double[0]; double[] dy = new double[0]; int rowsize = 0; int nin = 0; int nout = 0; int wcount = 0; bool iscls = new bool(); int i = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert(setsize>=0, "MLPAllErrorsSubset: SetSize<0"); mlpproperties(network, ref nin, ref nout, ref wcount); iscls = mlpissoftmax(network); // // Estimate error using subset of training set. // apserv.rvectorsetlengthatleast(ref network.x, nin); if( iscls ) { rowsize = nin+1; apserv.rvectorsetlengthatleast(ref network.y, 1); dy = new double[1]; bdss.dserrallocate(nout, ref buf); } else { rowsize = nin+nout; apserv.rvectorsetlengthatleast(ref network.y, nout); dy = new double[nout]; bdss.dserrallocate(-nout, ref buf); } if( subsetsize<0 ) { for(i=0; i<=setsize-1; i++) { for(i_=0; i_<=nin-1;i_++) { network.x[i_] = xy[i,i_]; } mlpprocess(network, network.x, ref network.y); if( iscls ) { dy[0] = xy[i,nin]; } else { i1_ = (nin) - (0); for(i_=0; i_<=nout-1;i_++) { dy[i_] = xy[i,i_+i1_]; } } bdss.dserraccumulate(ref buf, network.y, dy); } } else { for(i=0; i<=subsetsize-1; i++) { for(i_=0; i_<=nin-1;i_++) { network.x[i_] = xy[subset[i],i_]; } mlpprocess(network, network.x, ref network.y); if( iscls ) { dy[0] = xy[subset[i],nin]; } else { i1_ = (nin) - (0); for(i_=0; i_<=nout-1;i_++) { dy[i_] = xy[subset[i],i_+i1_]; } } bdss.dserraccumulate(ref buf, network.y, dy); } } bdss.dserrfinish(ref buf); rep.relclserror = buf[0]; rep.avgce = buf[1]; rep.rmserror = buf[2]; rep.avgerror = buf[3]; rep.avgrelerror = buf[4]; } /************************************************************************* Calculation of all types of errors. INPUT PARAMETERS: Network - network initialized with one of the network creation funcs XY - original dataset given by sparse matrix; one sample = one row; first NIn columns contain inputs, next NOut columns - desired outputs. SetSize - real size of XY, SetSize>=0; Subset - subset of SubsetSize elements, array[SubsetSize]; SubsetSize- number of elements in Subset[] array. OUTPUT PARAMETERS: Rep - it contains all type of errors. NOTE: when SubsetSize<0 is used full dataset by call MLPGradBatch function. -- ALGLIB -- Copyright 04.09.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpallerrorssparsesubset(multilayerperceptron network, sparse.sparsematrix xy, int setsize, int[] subset, int subsetsize, modelerrors rep) { double[] buf = new double[0]; double[] dy = new double[0]; int rowsize = 0; int nin = 0; int nout = 0; int wcount = 0; bool iscls = new bool(); int i = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert(setsize>=0, "MLPAllErrorsSparseSubset: SetSize<0"); mlpproperties(network, ref nin, ref nout, ref wcount); iscls = mlpissoftmax(network); // // Estimate error using subset of training set. // apserv.rvectorsetlengthatleast(ref network.x, nin); if( iscls ) { rowsize = nin+1; apserv.rvectorsetlengthatleast(ref network.y, 1); dy = new double[1]; bdss.dserrallocate(nout, ref buf); } else { rowsize = nin+nout; apserv.rvectorsetlengthatleast(ref network.y, nout); dy = new double[nout]; bdss.dserrallocate(-nout, ref buf); } if( subsetsize<0 ) { for(i=0; i<=setsize-1; i++) { sparse.sparsegetrow(xy, i, ref network.xyrow); for(i_=0; i_<=nin-1;i_++) { network.x[i_] = network.xyrow[i_]; } mlpprocess(network, network.x, ref network.y); if( iscls ) { dy[0] = network.xyrow[nin]; } else { i1_ = (nin) - (0); for(i_=0; i_<=nout-1;i_++) { dy[i_] = network.xyrow[i_+i1_]; } } bdss.dserraccumulate(ref buf, network.y, dy); } } else { for(i=0; i<=subsetsize-1; i++) { sparse.sparsegetrow(xy, subset[i], ref network.xyrow); for(i_=0; i_<=nin-1;i_++) { network.x[i_] = network.xyrow[i_]; } mlpprocess(network, network.x, ref network.y); if( iscls ) { dy[0] = network.xyrow[nin]; } else { i1_ = (nin) - (0); for(i_=0; i_<=nout-1;i_++) { dy[i_] = network.xyrow[i_+i1_]; } } bdss.dserraccumulate(ref buf, network.y, dy); } } bdss.dserrfinish(ref buf); rep.relclserror = buf[0]; rep.avgce = buf[1]; rep.rmserror = buf[2]; rep.avgerror = buf[3]; rep.avgrelerror = buf[4]; } /************************************************************************* Error of the neural network on dataset. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format; SetSize - real size of XY, SetSize>=0; Subset - subset of SubsetSize elements, array[SubsetSize]; SubsetSize- number of elements in Subset[] array. RESULT: sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2) DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 04.09.2012 by Bochkanov Sergey *************************************************************************/ public static double mlperrorsubset(multilayerperceptron network, double[,] xy, int setsize, int[] subset, int subsetsize) { double result = 0; int rowsize = 0; int nin = 0; int nout = 0; int wcount = 0; int i = 0; int i_ = 0; alglib.ap.assert(setsize>=0, "MLPErrorSubset: SetSize<0"); mlpproperties(network, ref nin, ref nout, ref wcount); // // Estimate error using subset of training set. // if( mlpissoftmax(network) ) { rowsize = nin+1; } else { rowsize = nin+nout; } apserv.rmatrixsetlengthatleast(ref network.xy, 1, rowsize); if( subsetsize<0 ) { result = mlperror(network, xy, setsize); } else { result = 0; for(i=0; i<=subsetsize-1; i++) { for(i_=0; i_<=rowsize-1;i_++) { network.xy[0,i_] = xy[subset[i],i_]; } result = result+mlperror(network, network.xy, 1); } } return result; } /************************************************************************* Error of the neural network on dataset. INPUT PARAMETERS: Network - neural network; XY - training set, see below for information on the training set format. This function checks correctness of the dataset (no NANs/INFs, class numbers are correct) and throws exception when incorrect dataset is passed. Sparse matrix must use CRS format for storage. SetSize - real size of XY, SetSize>=0; it is used when SubsetSize<0; Subset - subset of SubsetSize elements, array[SubsetSize]; SubsetSize- number of elements in Subset[] array. RESULT: sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2) DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following dataset format is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 04.09.2012 by Bochkanov Sergey *************************************************************************/ public static double mlperrorsparsesubset(multilayerperceptron network, sparse.sparsematrix xy, int setsize, int[] subset, int subsetsize) { double result = 0; int rowsize = 0; int nin = 0; int nout = 0; int wcount = 0; double e = 0; int t0 = 0; int t1 = 0; bool iscls = new bool(); int i = 0; int j = 0; int i_ = 0; alglib.ap.assert(setsize>=0, "MLPErrorSparseSubset: SetSize<0."); alglib.ap.assert(sparse.sparseiscrs(xy), "MLPErrorSparseSubset: sparse matrix XY has not CRS format."); // // Check dataset correctness // t0 = 0; t1 = 0; mlpproperties(network, ref nin, ref nout, ref wcount); iscls = mlpissoftmax(network); if( !iscls ) { while( sparse.sparseenumerate(xy, ref t0, ref t1, ref i, ref j, ref e) ) { alglib.ap.assert(math.isfinite(e), "MLPErrorSparseSubset: sparse matrix XY contains Infinite or NaN."); } } else { while( sparse.sparseenumerate(xy, ref t0, ref t1, ref i, ref j, ref e) ) { if( j!=nin ) { alglib.ap.assert(math.isfinite(e), "MLPErrorSparseSubset: sparse matrix XY contains Infinite or NaN."); } else { alglib.ap.assert((math.isfinite(e) && (int)Math.Round(e)>=0) && (int)Math.Round(e)=NClasses)."); } } } // // Estimate error using subset of training set. // if( iscls ) { rowsize = nin+1; } else { rowsize = nin+nout; } apserv.rmatrixsetlengthatleast(ref network.xy, 1, rowsize); if( subsetsize<0 ) { result = mlperrorsparse(network, xy, setsize); } else { result = 0; for(i=0; i<=subsetsize-1; i++) { sparse.sparsegetrow(xy, subset[i], ref network.xyrow); for(i_=0; i_<=rowsize-1;i_++) { network.xy[0,i_] = network.xyrow[i_]; } result = result+mlperror(network, network.xy, 1); } } return result; } /************************************************************************* Internal subroutine: adding new input layer to network *************************************************************************/ private static void addinputlayer(int ncount, ref int[] lsizes, ref int[] ltypes, ref int[] lconnfirst, ref int[] lconnlast, ref int lastproc) { lsizes[0] = ncount; ltypes[0] = -2; lconnfirst[0] = 0; lconnlast[0] = 0; lastproc = 0; } /************************************************************************* Internal subroutine: adding new summator layer to network *************************************************************************/ private static void addbiasedsummatorlayer(int ncount, ref int[] lsizes, ref int[] ltypes, ref int[] lconnfirst, ref int[] lconnlast, ref int lastproc) { lsizes[lastproc+1] = 1; ltypes[lastproc+1] = -3; lconnfirst[lastproc+1] = 0; lconnlast[lastproc+1] = 0; lsizes[lastproc+2] = ncount; ltypes[lastproc+2] = 0; lconnfirst[lastproc+2] = lastproc; lconnlast[lastproc+2] = lastproc+1; lastproc = lastproc+2; } /************************************************************************* Internal subroutine: adding new summator layer to network *************************************************************************/ private static void addactivationlayer(int functype, ref int[] lsizes, ref int[] ltypes, ref int[] lconnfirst, ref int[] lconnlast, ref int lastproc) { alglib.ap.assert(functype>0 || functype==-5, "AddActivationLayer: incorrect function type"); lsizes[lastproc+1] = lsizes[lastproc]; ltypes[lastproc+1] = functype; lconnfirst[lastproc+1] = lastproc; lconnlast[lastproc+1] = lastproc; lastproc = lastproc+1; } /************************************************************************* Internal subroutine: adding new zero layer to network *************************************************************************/ private static void addzerolayer(ref int[] lsizes, ref int[] ltypes, ref int[] lconnfirst, ref int[] lconnlast, ref int lastproc) { lsizes[lastproc+1] = 1; ltypes[lastproc+1] = -4; lconnfirst[lastproc+1] = 0; lconnlast[lastproc+1] = 0; lastproc = lastproc+1; } /************************************************************************* This routine adds input layer to the high-level description of the network. It modifies Network.HLConnections and Network.HLNeurons and assumes that these arrays have enough place to store data. It accepts following parameters: Network - network ConnIdx - index of the first free entry in the HLConnections NeuroIdx - index of the first free entry in the HLNeurons StructInfoIdx- index of the first entry in the low level description of the current layer (in the StructInfo array) NIn - number of inputs It modified Network and indices. *************************************************************************/ private static void hladdinputlayer(multilayerperceptron network, ref int connidx, ref int neuroidx, ref int structinfoidx, int nin) { int i = 0; int offs = 0; offs = hlnfieldwidth*neuroidx; for(i=0; i<=nin-1; i++) { network.hlneurons[offs+0] = 0; network.hlneurons[offs+1] = i; network.hlneurons[offs+2] = -1; network.hlneurons[offs+3] = -1; offs = offs+hlnfieldwidth; } neuroidx = neuroidx+nin; structinfoidx = structinfoidx+nin; } /************************************************************************* This routine adds output layer to the high-level description of the network. It modifies Network.HLConnections and Network.HLNeurons and assumes that these arrays have enough place to store data. It accepts following parameters: Network - network ConnIdx - index of the first free entry in the HLConnections NeuroIdx - index of the first free entry in the HLNeurons StructInfoIdx- index of the first entry in the low level description of the current layer (in the StructInfo array) WeightsIdx - index of the first entry in the Weights array which corresponds to the current layer K - current layer index NPrev - number of neurons in the previous layer NOut - number of outputs IsCls - is it classifier network? IsLinear - is it network with linear output? It modified Network and ConnIdx/NeuroIdx/StructInfoIdx/WeightsIdx. *************************************************************************/ private static void hladdoutputlayer(multilayerperceptron network, ref int connidx, ref int neuroidx, ref int structinfoidx, ref int weightsidx, int k, int nprev, int nout, bool iscls, bool islinearout) { int i = 0; int j = 0; int neurooffs = 0; int connoffs = 0; alglib.ap.assert((iscls && islinearout) || !iscls, "HLAddOutputLayer: internal error"); neurooffs = hlnfieldwidth*neuroidx; connoffs = hlconnfieldwidth*connidx; if( !iscls ) { // // Regression network // for(i=0; i<=nout-1; i++) { network.hlneurons[neurooffs+0] = k; network.hlneurons[neurooffs+1] = i; network.hlneurons[neurooffs+2] = structinfoidx+1+nout+i; network.hlneurons[neurooffs+3] = weightsidx+nprev+(nprev+1)*i; neurooffs = neurooffs+hlnfieldwidth; } for(i=0; i<=nprev-1; i++) { for(j=0; j<=nout-1; j++) { network.hlconnections[connoffs+0] = k-1; network.hlconnections[connoffs+1] = i; network.hlconnections[connoffs+2] = k; network.hlconnections[connoffs+3] = j; network.hlconnections[connoffs+4] = weightsidx+i+j*(nprev+1); connoffs = connoffs+hlconnfieldwidth; } } connidx = connidx+nprev*nout; neuroidx = neuroidx+nout; structinfoidx = structinfoidx+2*nout+1; weightsidx = weightsidx+nout*(nprev+1); } else { // // Classification network // for(i=0; i<=nout-2; i++) { network.hlneurons[neurooffs+0] = k; network.hlneurons[neurooffs+1] = i; network.hlneurons[neurooffs+2] = -1; network.hlneurons[neurooffs+3] = weightsidx+nprev+(nprev+1)*i; neurooffs = neurooffs+hlnfieldwidth; } network.hlneurons[neurooffs+0] = k; network.hlneurons[neurooffs+1] = i; network.hlneurons[neurooffs+2] = -1; network.hlneurons[neurooffs+3] = -1; for(i=0; i<=nprev-1; i++) { for(j=0; j<=nout-2; j++) { network.hlconnections[connoffs+0] = k-1; network.hlconnections[connoffs+1] = i; network.hlconnections[connoffs+2] = k; network.hlconnections[connoffs+3] = j; network.hlconnections[connoffs+4] = weightsidx+i+j*(nprev+1); connoffs = connoffs+hlconnfieldwidth; } } connidx = connidx+nprev*(nout-1); neuroidx = neuroidx+nout; structinfoidx = structinfoidx+nout+2; weightsidx = weightsidx+(nout-1)*(nprev+1); } } /************************************************************************* This routine adds hidden layer to the high-level description of the network. It modifies Network.HLConnections and Network.HLNeurons and assumes that these arrays have enough place to store data. It accepts following parameters: Network - network ConnIdx - index of the first free entry in the HLConnections NeuroIdx - index of the first free entry in the HLNeurons StructInfoIdx- index of the first entry in the low level description of the current layer (in the StructInfo array) WeightsIdx - index of the first entry in the Weights array which corresponds to the current layer K - current layer index NPrev - number of neurons in the previous layer NCur - number of neurons in the current layer It modified Network and ConnIdx/NeuroIdx/StructInfoIdx/WeightsIdx. *************************************************************************/ private static void hladdhiddenlayer(multilayerperceptron network, ref int connidx, ref int neuroidx, ref int structinfoidx, ref int weightsidx, int k, int nprev, int ncur) { int i = 0; int j = 0; int neurooffs = 0; int connoffs = 0; neurooffs = hlnfieldwidth*neuroidx; connoffs = hlconnfieldwidth*connidx; for(i=0; i<=ncur-1; i++) { network.hlneurons[neurooffs+0] = k; network.hlneurons[neurooffs+1] = i; network.hlneurons[neurooffs+2] = structinfoidx+1+ncur+i; network.hlneurons[neurooffs+3] = weightsidx+nprev+(nprev+1)*i; neurooffs = neurooffs+hlnfieldwidth; } for(i=0; i<=nprev-1; i++) { for(j=0; j<=ncur-1; j++) { network.hlconnections[connoffs+0] = k-1; network.hlconnections[connoffs+1] = i; network.hlconnections[connoffs+2] = k; network.hlconnections[connoffs+3] = j; network.hlconnections[connoffs+4] = weightsidx+i+j*(nprev+1); connoffs = connoffs+hlconnfieldwidth; } } connidx = connidx+nprev*ncur; neuroidx = neuroidx+ncur; structinfoidx = structinfoidx+2*ncur+1; weightsidx = weightsidx+ncur*(nprev+1); } /************************************************************************* This function fills high level information about network created using internal MLPCreate() function. This function does NOT examine StructInfo for low level information, it just expects that network has following structure: input neuron \ ... | input layer input neuron / "-1" neuron \ biased summator | ... | biased summator | hidden layer(s), if there are exists any activation function | ... | activation function / "-1" neuron \ biased summator | output layer: ... | biased summator | * we have NOut summators/activators for regression networks activation function | * we have only NOut-1 summators and no activators for classifiers ... | * we have "0" neuron only when we have classifier activation function | "0" neuron / -- ALGLIB -- Copyright 30.03.2008 by Bochkanov Sergey *************************************************************************/ private static void fillhighlevelinformation(multilayerperceptron network, int nin, int nhid1, int nhid2, int nout, bool iscls, bool islinearout) { int idxweights = 0; int idxstruct = 0; int idxneuro = 0; int idxconn = 0; alglib.ap.assert((iscls && islinearout) || !iscls, "FillHighLevelInformation: internal error"); // // Preparations common to all types of networks // idxweights = 0; idxneuro = 0; idxstruct = 0; idxconn = 0; network.hlnetworktype = 0; // // network without hidden layers // if( nhid1==0 ) { network.hllayersizes = new int[2]; network.hllayersizes[0] = nin; network.hllayersizes[1] = nout; if( !iscls ) { network.hlconnections = new int[hlconnfieldwidth*nin*nout]; network.hlneurons = new int[hlnfieldwidth*(nin+nout)]; network.hlnormtype = 0; } else { network.hlconnections = new int[hlconnfieldwidth*nin*(nout-1)]; network.hlneurons = new int[hlnfieldwidth*(nin+nout)]; network.hlnormtype = 1; } hladdinputlayer(network, ref idxconn, ref idxneuro, ref idxstruct, nin); hladdoutputlayer(network, ref idxconn, ref idxneuro, ref idxstruct, ref idxweights, 1, nin, nout, iscls, islinearout); return; } // // network with one hidden layers // if( nhid2==0 ) { network.hllayersizes = new int[3]; network.hllayersizes[0] = nin; network.hllayersizes[1] = nhid1; network.hllayersizes[2] = nout; if( !iscls ) { network.hlconnections = new int[hlconnfieldwidth*(nin*nhid1+nhid1*nout)]; network.hlneurons = new int[hlnfieldwidth*(nin+nhid1+nout)]; network.hlnormtype = 0; } else { network.hlconnections = new int[hlconnfieldwidth*(nin*nhid1+nhid1*(nout-1))]; network.hlneurons = new int[hlnfieldwidth*(nin+nhid1+nout)]; network.hlnormtype = 1; } hladdinputlayer(network, ref idxconn, ref idxneuro, ref idxstruct, nin); hladdhiddenlayer(network, ref idxconn, ref idxneuro, ref idxstruct, ref idxweights, 1, nin, nhid1); hladdoutputlayer(network, ref idxconn, ref idxneuro, ref idxstruct, ref idxweights, 2, nhid1, nout, iscls, islinearout); return; } // // Two hidden layers // network.hllayersizes = new int[4]; network.hllayersizes[0] = nin; network.hllayersizes[1] = nhid1; network.hllayersizes[2] = nhid2; network.hllayersizes[3] = nout; if( !iscls ) { network.hlconnections = new int[hlconnfieldwidth*(nin*nhid1+nhid1*nhid2+nhid2*nout)]; network.hlneurons = new int[hlnfieldwidth*(nin+nhid1+nhid2+nout)]; network.hlnormtype = 0; } else { network.hlconnections = new int[hlconnfieldwidth*(nin*nhid1+nhid1*nhid2+nhid2*(nout-1))]; network.hlneurons = new int[hlnfieldwidth*(nin+nhid1+nhid2+nout)]; network.hlnormtype = 1; } hladdinputlayer(network, ref idxconn, ref idxneuro, ref idxstruct, nin); hladdhiddenlayer(network, ref idxconn, ref idxneuro, ref idxstruct, ref idxweights, 1, nin, nhid1); hladdhiddenlayer(network, ref idxconn, ref idxneuro, ref idxstruct, ref idxweights, 2, nhid1, nhid2); hladdoutputlayer(network, ref idxconn, ref idxneuro, ref idxstruct, ref idxweights, 3, nhid2, nout, iscls, islinearout); } /************************************************************************* Internal subroutine. -- ALGLIB -- Copyright 04.11.2007 by Bochkanov Sergey *************************************************************************/ private static void mlpcreate(int nin, int nout, int[] lsizes, int[] ltypes, int[] lconnfirst, int[] lconnlast, int layerscount, bool isclsnet, multilayerperceptron network) { int i = 0; int j = 0; int ssize = 0; int ntotal = 0; int wcount = 0; int offs = 0; int nprocessed = 0; int wallocated = 0; int[] localtemp = new int[0]; int[] lnfirst = new int[0]; int[] lnsyn = new int[0]; // // Check // alglib.ap.assert(layerscount>0, "MLPCreate: wrong parameters!"); alglib.ap.assert(ltypes[0]==-2, "MLPCreate: wrong LTypes[0] (must be -2)!"); for(i=0; i<=layerscount-1; i++) { alglib.ap.assert(lsizes[i]>0, "MLPCreate: wrong LSizes!"); alglib.ap.assert(lconnfirst[i]>=0 && (lconnfirst[i]=lconnfirst[i] && (lconnlast[i]=0 || ltypes[i]==-5 ) { lnsyn[i] = 0; for(j=lconnfirst[i]; j<=lconnlast[i]; j++) { lnsyn[i] = lnsyn[i]+lsizes[j]; } } else { if( (ltypes[i]==-2 || ltypes[i]==-3) || ltypes[i]==-4 ) { lnsyn[i] = 0; } } alglib.ap.assert(lnsyn[i]>=0, "MLPCreate: internal error #0!"); // // Other info // lnfirst[i] = ntotal; ntotal = ntotal+lsizes[i]; if( ltypes[i]==0 ) { wcount = wcount+lnsyn[i]*lsizes[i]; } } ssize = 7+ntotal*nfieldwidth; // // Allocate // network.structinfo = new int[ssize-1+1]; network.weights = new double[wcount-1+1]; if( isclsnet ) { network.columnmeans = new double[nin-1+1]; network.columnsigmas = new double[nin-1+1]; } else { network.columnmeans = new double[nin+nout-1+1]; network.columnsigmas = new double[nin+nout-1+1]; } network.neurons = new double[ntotal-1+1]; network.chunks = new double[3*ntotal+1, chunksize-1+1]; network.nwbuf = new double[Math.Max(wcount, 2*nout)-1+1]; network.integerbuf = new int[3+1]; network.dfdnet = new double[ntotal-1+1]; network.x = new double[nin-1+1]; network.y = new double[nout-1+1]; network.derror = new double[ntotal-1+1]; // // Fill structure: global info // network.structinfo[0] = ssize; network.structinfo[1] = nin; network.structinfo[2] = nout; network.structinfo[3] = ntotal; network.structinfo[4] = wcount; network.structinfo[5] = 7; if( isclsnet ) { network.structinfo[6] = 1; } else { network.structinfo[6] = 0; } // // Fill structure: neuron connections // nprocessed = 0; wallocated = 0; for(i=0; i<=layerscount-1; i++) { for(j=0; j<=lsizes[i]-1; j++) { offs = network.structinfo[5]+nprocessed*nfieldwidth; network.structinfo[offs+0] = ltypes[i]; if( ltypes[i]==0 ) { // // Adaptive summator: // * connections with weights to previous neurons // network.structinfo[offs+1] = lnsyn[i]; network.structinfo[offs+2] = lnfirst[lconnfirst[i]]; network.structinfo[offs+3] = wallocated; wallocated = wallocated+lnsyn[i]; nprocessed = nprocessed+1; } if( ltypes[i]>0 || ltypes[i]==-5 ) { // // Activation layer: // * each neuron connected to one (only one) of previous neurons. // * no weights // network.structinfo[offs+1] = 1; network.structinfo[offs+2] = lnfirst[lconnfirst[i]]+j; network.structinfo[offs+3] = -1; nprocessed = nprocessed+1; } if( (ltypes[i]==-2 || ltypes[i]==-3) || ltypes[i]==-4 ) { nprocessed = nprocessed+1; } } } alglib.ap.assert(wallocated==wcount, "MLPCreate: internal error #1!"); alglib.ap.assert(nprocessed==ntotal, "MLPCreate: internal error #2!"); // // Fill weights by small random values // Initialize means and sigmas // for(i=0; i<=wcount-1; i++) { network.weights[i] = math.randomreal()-0.5; } for(i=0; i<=nin-1; i++) { network.columnmeans[i] = 0; network.columnsigmas[i] = 1; } if( !isclsnet ) { for(i=0; i<=nout-1; i++) { network.columnmeans[nin+i] = 0; network.columnsigmas[nin+i] = 1; } } } /************************************************************************* Internal subroutine for Hessian calculation. WARNING!!! Unspeakable math far beyong human capabilities :) *************************************************************************/ private static void mlphessianbatchinternal(multilayerperceptron network, double[,] xy, int ssize, bool naturalerr, ref double e, ref double[] grad, ref double[,] h) { int nin = 0; int nout = 0; int wcount = 0; int ntotal = 0; int istart = 0; int i = 0; int j = 0; int k = 0; int kl = 0; int offs = 0; int n1 = 0; int n2 = 0; int w1 = 0; int w2 = 0; double s = 0; double t = 0; double v = 0; double et = 0; bool bflag = new bool(); double f = 0; double df = 0; double d2f = 0; double deidyj = 0; double mx = 0; double q = 0; double z = 0; double s2 = 0; double expi = 0; double expj = 0; double[] x = new double[0]; double[] desiredy = new double[0]; double[] gt = new double[0]; double[] zeros = new double[0]; double[,] rx = new double[0,0]; double[,] ry = new double[0,0]; double[,] rdx = new double[0,0]; double[,] rdy = new double[0,0]; int i_ = 0; int i1_ = 0; e = 0; mlpproperties(network, ref nin, ref nout, ref wcount); ntotal = network.structinfo[3]; istart = network.structinfo[5]; // // Prepare // x = new double[nin-1+1]; desiredy = new double[nout-1+1]; zeros = new double[wcount-1+1]; gt = new double[wcount-1+1]; rx = new double[ntotal+nout-1+1, wcount-1+1]; ry = new double[ntotal+nout-1+1, wcount-1+1]; rdx = new double[ntotal+nout-1+1, wcount-1+1]; rdy = new double[ntotal+nout-1+1, wcount-1+1]; e = 0; for(i=0; i<=wcount-1; i++) { zeros[i] = 0; } for(i_=0; i_<=wcount-1;i_++) { grad[i_] = zeros[i_]; } for(i=0; i<=wcount-1; i++) { for(i_=0; i_<=wcount-1;i_++) { h[i,i_] = zeros[i_]; } } // // Process // for(k=0; k<=ssize-1; k++) { // // Process vector with MLPGradN. // Now Neurons, DFDNET and DError contains results of the last run. // for(i_=0; i_<=nin-1;i_++) { x[i_] = xy[k,i_]; } if( mlpissoftmax(network) ) { // // class labels outputs // kl = (int)Math.Round(xy[k,nin]); for(i=0; i<=nout-1; i++) { if( i==kl ) { desiredy[i] = 1; } else { desiredy[i] = 0; } } } else { // // real outputs // i1_ = (nin) - (0); for(i_=0; i_<=nout-1;i_++) { desiredy[i_] = xy[k,i_+i1_]; } } if( naturalerr ) { mlpgradn(network, x, desiredy, ref et, ref gt); } else { mlpgrad(network, x, desiredy, ref et, ref gt); } // // grad, error // e = e+et; for(i_=0; i_<=wcount-1;i_++) { grad[i_] = grad[i_] + gt[i_]; } // // Hessian. // Forward pass of the R-algorithm // for(i=0; i<=ntotal-1; i++) { offs = istart+i*nfieldwidth; for(i_=0; i_<=wcount-1;i_++) { rx[i,i_] = zeros[i_]; } for(i_=0; i_<=wcount-1;i_++) { ry[i,i_] = zeros[i_]; } if( network.structinfo[offs+0]>0 || network.structinfo[offs+0]==-5 ) { // // Activation function // n1 = network.structinfo[offs+2]; for(i_=0; i_<=wcount-1;i_++) { rx[i,i_] = ry[n1,i_]; } v = network.dfdnet[i]; for(i_=0; i_<=wcount-1;i_++) { ry[i,i_] = v*rx[i,i_]; } continue; } if( network.structinfo[offs+0]==0 ) { // // Adaptive summator // n1 = network.structinfo[offs+2]; n2 = n1+network.structinfo[offs+1]-1; w1 = network.structinfo[offs+3]; w2 = w1+network.structinfo[offs+1]-1; for(j=n1; j<=n2; j++) { v = network.weights[w1+j-n1]; for(i_=0; i_<=wcount-1;i_++) { rx[i,i_] = rx[i,i_] + v*ry[j,i_]; } rx[i,w1+j-n1] = rx[i,w1+j-n1]+network.neurons[j]; } for(i_=0; i_<=wcount-1;i_++) { ry[i,i_] = rx[i,i_]; } continue; } if( network.structinfo[offs+0]<0 ) { bflag = true; if( network.structinfo[offs+0]==-2 ) { // // input neuron, left unchanged // bflag = false; } if( network.structinfo[offs+0]==-3 ) { // // "-1" neuron, left unchanged // bflag = false; } if( network.structinfo[offs+0]==-4 ) { // // "0" neuron, left unchanged // bflag = false; } alglib.ap.assert(!bflag, "MLPHessianNBatch: internal error - unknown neuron type!"); continue; } } // // Hessian. Backward pass of the R-algorithm. // // Stage 1. Initialize RDY // for(i=0; i<=ntotal+nout-1; i++) { for(i_=0; i_<=wcount-1;i_++) { rdy[i,i_] = zeros[i_]; } } if( network.structinfo[6]==0 ) { // // Standardisation. // // In context of the Hessian calculation standardisation // is considered as additional layer with weightless // activation function: // // F(NET) := Sigma*NET // // So we add one more layer to forward pass, and // make forward/backward pass through this layer. // for(i=0; i<=nout-1; i++) { n1 = ntotal-nout+i; n2 = ntotal+i; // // Forward pass from N1 to N2 // for(i_=0; i_<=wcount-1;i_++) { rx[n2,i_] = ry[n1,i_]; } v = network.columnsigmas[nin+i]; for(i_=0; i_<=wcount-1;i_++) { ry[n2,i_] = v*rx[n2,i_]; } // // Initialization of RDY // for(i_=0; i_<=wcount-1;i_++) { rdy[n2,i_] = ry[n2,i_]; } // // Backward pass from N2 to N1: // 1. Calculate R(dE/dX). // 2. No R(dE/dWij) is needed since weight of activation neuron // is fixed to 1. So we can update R(dE/dY) for // the connected neuron (note that Vij=0, Wij=1) // df = network.columnsigmas[nin+i]; for(i_=0; i_<=wcount-1;i_++) { rdx[n2,i_] = df*rdy[n2,i_]; } for(i_=0; i_<=wcount-1;i_++) { rdy[n1,i_] = rdy[n1,i_] + rdx[n2,i_]; } } } else { // // Softmax. // // Initialize RDY using generalized expression for ei'(yi) // (see expression (9) from p. 5 of "Fast Exact Multiplication by the Hessian"). // // When we are working with softmax network, generalized // expression for ei'(yi) is used because softmax // normalization leads to ei, which depends on all y's // if( naturalerr ) { // // softmax + cross-entropy. // We have: // // S = sum(exp(yk)), // ei = sum(trn)*exp(yi)/S-trn_i // // j=i: d(ei)/d(yj) = T*exp(yi)*(S-exp(yi))/S^2 // j<>i: d(ei)/d(yj) = -T*exp(yi)*exp(yj)/S^2 // t = 0; for(i=0; i<=nout-1; i++) { t = t+desiredy[i]; } mx = network.neurons[ntotal-nout]; for(i=0; i<=nout-1; i++) { mx = Math.Max(mx, network.neurons[ntotal-nout+i]); } s = 0; for(i=0; i<=nout-1; i++) { network.nwbuf[i] = Math.Exp(network.neurons[ntotal-nout+i]-mx); s = s+network.nwbuf[i]; } for(i=0; i<=nout-1; i++) { for(j=0; j<=nout-1; j++) { if( j==i ) { deidyj = t*network.nwbuf[i]*(s-network.nwbuf[i])/math.sqr(s); for(i_=0; i_<=wcount-1;i_++) { rdy[ntotal-nout+i,i_] = rdy[ntotal-nout+i,i_] + deidyj*ry[ntotal-nout+i,i_]; } } else { deidyj = -(t*network.nwbuf[i]*network.nwbuf[j]/math.sqr(s)); for(i_=0; i_<=wcount-1;i_++) { rdy[ntotal-nout+i,i_] = rdy[ntotal-nout+i,i_] + deidyj*ry[ntotal-nout+j,i_]; } } } } } else { // // For a softmax + squared error we have expression // far beyond human imagination so we dont even try // to comment on it. Just enjoy the code... // // P.S. That's why "natural error" is called "natural" - // compact beatiful expressions, fast code.... // mx = network.neurons[ntotal-nout]; for(i=0; i<=nout-1; i++) { mx = Math.Max(mx, network.neurons[ntotal-nout+i]); } s = 0; s2 = 0; for(i=0; i<=nout-1; i++) { network.nwbuf[i] = Math.Exp(network.neurons[ntotal-nout+i]-mx); s = s+network.nwbuf[i]; s2 = s2+math.sqr(network.nwbuf[i]); } q = 0; for(i=0; i<=nout-1; i++) { q = q+(network.y[i]-desiredy[i])*network.nwbuf[i]; } for(i=0; i<=nout-1; i++) { z = -q+(network.y[i]-desiredy[i])*s; expi = network.nwbuf[i]; for(j=0; j<=nout-1; j++) { expj = network.nwbuf[j]; if( j==i ) { deidyj = expi/math.sqr(s)*((z+expi)*(s-2*expi)/s+expi*s2/math.sqr(s)); } else { deidyj = expi*expj/math.sqr(s)*(s2/math.sqr(s)-2*z/s-(expi+expj)/s+(network.y[i]-desiredy[i])-(network.y[j]-desiredy[j])); } for(i_=0; i_<=wcount-1;i_++) { rdy[ntotal-nout+i,i_] = rdy[ntotal-nout+i,i_] + deidyj*ry[ntotal-nout+j,i_]; } } } } } // // Hessian. Backward pass of the R-algorithm // // Stage 2. Process. // for(i=ntotal-1; i>=0; i--) { // // Possible variants: // 1. Activation function // 2. Adaptive summator // 3. Special neuron // offs = istart+i*nfieldwidth; if( network.structinfo[offs+0]>0 || network.structinfo[offs+0]==-5 ) { n1 = network.structinfo[offs+2]; // // First, calculate R(dE/dX). // mlpactivationfunction(network.neurons[n1], network.structinfo[offs+0], ref f, ref df, ref d2f); v = d2f*network.derror[i]; for(i_=0; i_<=wcount-1;i_++) { rdx[i,i_] = df*rdy[i,i_]; } for(i_=0; i_<=wcount-1;i_++) { rdx[i,i_] = rdx[i,i_] + v*rx[i,i_]; } // // No R(dE/dWij) is needed since weight of activation neuron // is fixed to 1. // // So we can update R(dE/dY) for the connected neuron. // (note that Vij=0, Wij=1) // for(i_=0; i_<=wcount-1;i_++) { rdy[n1,i_] = rdy[n1,i_] + rdx[i,i_]; } continue; } if( network.structinfo[offs+0]==0 ) { // // Adaptive summator // n1 = network.structinfo[offs+2]; n2 = n1+network.structinfo[offs+1]-1; w1 = network.structinfo[offs+3]; w2 = w1+network.structinfo[offs+1]-1; // // First, calculate R(dE/dX). // for(i_=0; i_<=wcount-1;i_++) { rdx[i,i_] = rdy[i,i_]; } // // Then, calculate R(dE/dWij) // for(j=w1; j<=w2; j++) { v = network.neurons[n1+j-w1]; for(i_=0; i_<=wcount-1;i_++) { h[j,i_] = h[j,i_] + v*rdx[i,i_]; } v = network.derror[i]; for(i_=0; i_<=wcount-1;i_++) { h[j,i_] = h[j,i_] + v*ry[n1+j-w1,i_]; } } // // And finally, update R(dE/dY) for connected neurons. // for(j=w1; j<=w2; j++) { v = network.weights[j]; for(i_=0; i_<=wcount-1;i_++) { rdy[n1+j-w1,i_] = rdy[n1+j-w1,i_] + v*rdx[i,i_]; } rdy[n1+j-w1,j] = rdy[n1+j-w1,j]+network.derror[i]; } continue; } if( network.structinfo[offs+0]<0 ) { bflag = false; if( (network.structinfo[offs+0]==-2 || network.structinfo[offs+0]==-3) || network.structinfo[offs+0]==-4 ) { // // Special neuron type, no back-propagation required // bflag = true; } alglib.ap.assert(bflag, "MLPHessianNBatch: unknown neuron type!"); continue; } } } } /************************************************************************* Internal subroutine Network must be processed by MLPProcess on X *************************************************************************/ private static void mlpinternalcalculategradient(multilayerperceptron network, double[] neurons, double[] weights, ref double[] derror, ref double[] grad, bool naturalerrorfunc) { int i = 0; int n1 = 0; int n2 = 0; int w1 = 0; int w2 = 0; int ntotal = 0; int istart = 0; int nin = 0; int nout = 0; int offs = 0; double dedf = 0; double dfdnet = 0; double v = 0; double fown = 0; double deown = 0; double net = 0; double mx = 0; bool bflag = new bool(); int i_ = 0; int i1_ = 0; // // Read network geometry // nin = network.structinfo[1]; nout = network.structinfo[2]; ntotal = network.structinfo[3]; istart = network.structinfo[5]; // // Pre-processing of dError/dOut: // from dError/dOut(normalized) to dError/dOut(non-normalized) // alglib.ap.assert(network.structinfo[6]==0 || network.structinfo[6]==1, "MLPInternalCalculateGradient: unknown normalization type!"); if( network.structinfo[6]==1 ) { // // Softmax // if( !naturalerrorfunc ) { mx = network.neurons[ntotal-nout]; for(i=0; i<=nout-1; i++) { mx = Math.Max(mx, network.neurons[ntotal-nout+i]); } net = 0; for(i=0; i<=nout-1; i++) { network.nwbuf[i] = Math.Exp(network.neurons[ntotal-nout+i]-mx); net = net+network.nwbuf[i]; } i1_ = (0)-(ntotal-nout); v = 0.0; for(i_=ntotal-nout; i_<=ntotal-1;i_++) { v += network.derror[i_]*network.nwbuf[i_+i1_]; } for(i=0; i<=nout-1; i++) { fown = network.nwbuf[i]; deown = network.derror[ntotal-nout+i]; network.nwbuf[nout+i] = (-v+deown*fown+deown*(net-fown))*fown/math.sqr(net); } for(i=0; i<=nout-1; i++) { network.derror[ntotal-nout+i] = network.nwbuf[nout+i]; } } } else { // // Un-standardisation // for(i=0; i<=nout-1; i++) { network.derror[ntotal-nout+i] = network.derror[ntotal-nout+i]*network.columnsigmas[nin+i]; } } // // Backpropagation // for(i=ntotal-1; i>=0; i--) { // // Extract info // offs = istart+i*nfieldwidth; if( network.structinfo[offs+0]>0 || network.structinfo[offs+0]==-5 ) { // // Activation function // dedf = network.derror[i]; dfdnet = network.dfdnet[i]; derror[network.structinfo[offs+2]] = derror[network.structinfo[offs+2]]+dedf*dfdnet; continue; } if( network.structinfo[offs+0]==0 ) { // // Adaptive summator // n1 = network.structinfo[offs+2]; n2 = n1+network.structinfo[offs+1]-1; w1 = network.structinfo[offs+3]; w2 = w1+network.structinfo[offs+1]-1; dedf = network.derror[i]; dfdnet = 1.0; v = dedf*dfdnet; i1_ = (n1) - (w1); for(i_=w1; i_<=w2;i_++) { grad[i_] = v*neurons[i_+i1_]; } i1_ = (w1) - (n1); for(i_=n1; i_<=n2;i_++) { derror[i_] = derror[i_] + v*weights[i_+i1_]; } continue; } if( network.structinfo[offs+0]<0 ) { bflag = false; if( (network.structinfo[offs+0]==-2 || network.structinfo[offs+0]==-3) || network.structinfo[offs+0]==-4 ) { // // Special neuron type, no back-propagation required // bflag = true; } alglib.ap.assert(bflag, "MLPInternalCalculateGradient: unknown neuron type!"); continue; } } } /************************************************************************* Internal subroutine, chunked gradient *************************************************************************/ private static void mlpchunkedgradient(multilayerperceptron network, double[,] xy, int cstart, int csize, ref double e, ref double[] grad, bool naturalerrorfunc) { int i = 0; int j = 0; int k = 0; int kl = 0; int n1 = 0; int n2 = 0; int w1 = 0; int w2 = 0; int c1 = 0; int c2 = 0; int ntotal = 0; int nin = 0; int nout = 0; int offs = 0; double f = 0; double df = 0; double d2f = 0; double v = 0; double s = 0; double fown = 0; double deown = 0; double net = 0; double lnnet = 0; double mx = 0; bool bflag = new bool(); int istart = 0; int ineurons = 0; int idfdnet = 0; int iderror = 0; int izeros = 0; int i_ = 0; int i1_ = 0; // // Read network geometry, prepare data // nin = network.structinfo[1]; nout = network.structinfo[2]; ntotal = network.structinfo[3]; istart = network.structinfo[5]; c1 = cstart; c2 = cstart+csize-1; ineurons = 0; idfdnet = ntotal; iderror = 2*ntotal; izeros = 3*ntotal; for(j=0; j<=csize-1; j++) { network.chunks[izeros,j] = 0; } // // Forward pass: // 1. Load inputs from XY to Chunks[0:NIn-1,0:CSize-1] // 2. Forward pass // for(i=0; i<=nin-1; i++) { for(j=0; j<=csize-1; j++) { if( (double)(network.columnsigmas[i])!=(double)(0) ) { network.chunks[i,j] = (xy[c1+j,i]-network.columnmeans[i])/network.columnsigmas[i]; } else { network.chunks[i,j] = xy[c1+j,i]-network.columnmeans[i]; } } } for(i=0; i<=ntotal-1; i++) { offs = istart+i*nfieldwidth; if( network.structinfo[offs+0]>0 || network.structinfo[offs+0]==-5 ) { // // Activation function: // * calculate F vector, F(i) = F(NET(i)) // n1 = network.structinfo[offs+2]; for(i_=0; i_<=csize-1;i_++) { network.chunks[i,i_] = network.chunks[n1,i_]; } for(j=0; j<=csize-1; j++) { mlpactivationfunction(network.chunks[i,j], network.structinfo[offs+0], ref f, ref df, ref d2f); network.chunks[i,j] = f; network.chunks[idfdnet+i,j] = df; } continue; } if( network.structinfo[offs+0]==0 ) { // // Adaptive summator: // * calculate NET vector, NET(i) = SUM(W(j,i)*Neurons(j),j=N1..N2) // n1 = network.structinfo[offs+2]; n2 = n1+network.structinfo[offs+1]-1; w1 = network.structinfo[offs+3]; w2 = w1+network.structinfo[offs+1]-1; for(i_=0; i_<=csize-1;i_++) { network.chunks[i,i_] = network.chunks[izeros,i_]; } for(j=n1; j<=n2; j++) { v = network.weights[w1+j-n1]; for(i_=0; i_<=csize-1;i_++) { network.chunks[i,i_] = network.chunks[i,i_] + v*network.chunks[j,i_]; } } continue; } if( network.structinfo[offs+0]<0 ) { bflag = false; if( network.structinfo[offs+0]==-2 ) { // // input neuron, left unchanged // bflag = true; } if( network.structinfo[offs+0]==-3 ) { // // "-1" neuron // for(k=0; k<=csize-1; k++) { network.chunks[i,k] = -1; } bflag = true; } if( network.structinfo[offs+0]==-4 ) { // // "0" neuron // for(k=0; k<=csize-1; k++) { network.chunks[i,k] = 0; } bflag = true; } alglib.ap.assert(bflag, "MLPChunkedGradient: internal error - unknown neuron type!"); continue; } } // // Post-processing, error, dError/dOut // for(i=0; i<=ntotal-1; i++) { for(i_=0; i_<=csize-1;i_++) { network.chunks[iderror+i,i_] = network.chunks[izeros,i_]; } } alglib.ap.assert(network.structinfo[6]==0 || network.structinfo[6]==1, "MLPChunkedGradient: unknown normalization type!"); if( network.structinfo[6]==1 ) { // // Softmax output, classification network. // // For each K = 0..CSize-1 do: // 1. place exp(outputs[k]) to NWBuf[0:NOut-1] // 2. place sum(exp(..)) to NET // 3. calculate dError/dOut and place it to the second block of Chunks // for(k=0; k<=csize-1; k++) { // // Normalize // mx = network.chunks[ntotal-nout,k]; for(i=1; i<=nout-1; i++) { mx = Math.Max(mx, network.chunks[ntotal-nout+i,k]); } net = 0; for(i=0; i<=nout-1; i++) { network.nwbuf[i] = Math.Exp(network.chunks[ntotal-nout+i,k]-mx); net = net+network.nwbuf[i]; } // // Calculate error function and dError/dOut // if( naturalerrorfunc ) { // // Natural error func. // // s = 1; lnnet = Math.Log(net); kl = (int)Math.Round(xy[cstart+k,nin]); for(i=0; i<=nout-1; i++) { if( i==kl ) { v = 1; } else { v = 0; } network.chunks[iderror+ntotal-nout+i,k] = s*network.nwbuf[i]/net-v; e = e+safecrossentropy(v, network.nwbuf[i]/net); } } else { // // Least squares error func // Error, dError/dOut(normalized) // kl = (int)Math.Round(xy[cstart+k,nin]); for(i=0; i<=nout-1; i++) { if( i==kl ) { v = network.nwbuf[i]/net-1; } else { v = network.nwbuf[i]/net; } network.nwbuf[nout+i] = v; e = e+math.sqr(v)/2; } // // From dError/dOut(normalized) to dError/dOut(non-normalized) // i1_ = (0)-(nout); v = 0.0; for(i_=nout; i_<=2*nout-1;i_++) { v += network.nwbuf[i_]*network.nwbuf[i_+i1_]; } for(i=0; i<=nout-1; i++) { fown = network.nwbuf[i]; deown = network.nwbuf[nout+i]; network.chunks[iderror+ntotal-nout+i,k] = (-v+deown*fown+deown*(net-fown))*fown/math.sqr(net); } } } } else { // // Normal output, regression network // // For each K = 0..CSize-1 do: // 1. calculate dError/dOut and place it to the second block of Chunks // for(i=0; i<=nout-1; i++) { for(j=0; j<=csize-1; j++) { v = network.chunks[ntotal-nout+i,j]*network.columnsigmas[nin+i]+network.columnmeans[nin+i]-xy[cstart+j,nin+i]; network.chunks[iderror+ntotal-nout+i,j] = v*network.columnsigmas[nin+i]; e = e+math.sqr(v)/2; } } } // // Backpropagation // for(i=ntotal-1; i>=0; i--) { // // Extract info // offs = istart+i*nfieldwidth; if( network.structinfo[offs+0]>0 || network.structinfo[offs+0]==-5 ) { // // Activation function // n1 = network.structinfo[offs+2]; for(k=0; k<=csize-1; k++) { network.chunks[iderror+i,k] = network.chunks[iderror+i,k]*network.chunks[idfdnet+i,k]; } for(i_=0; i_<=csize-1;i_++) { network.chunks[iderror+n1,i_] = network.chunks[iderror+n1,i_] + network.chunks[iderror+i,i_]; } continue; } if( network.structinfo[offs+0]==0 ) { // // "Normal" activation function // n1 = network.structinfo[offs+2]; n2 = n1+network.structinfo[offs+1]-1; w1 = network.structinfo[offs+3]; w2 = w1+network.structinfo[offs+1]-1; for(j=w1; j<=w2; j++) { v = 0.0; for(i_=0; i_<=csize-1;i_++) { v += network.chunks[n1+j-w1,i_]*network.chunks[iderror+i,i_]; } grad[j] = grad[j]+v; } for(j=n1; j<=n2; j++) { v = network.weights[w1+j-n1]; for(i_=0; i_<=csize-1;i_++) { network.chunks[iderror+j,i_] = network.chunks[iderror+j,i_] + v*network.chunks[iderror+i,i_]; } } continue; } if( network.structinfo[offs+0]<0 ) { bflag = false; if( (network.structinfo[offs+0]==-2 || network.structinfo[offs+0]==-3) || network.structinfo[offs+0]==-4 ) { // // Special neuron type, no back-propagation required // bflag = true; } alglib.ap.assert(bflag, "MLPInternalCalculateGradient: unknown neuron type!"); continue; } } } /************************************************************************* Returns T*Ln(T/Z), guarded against overflow/underflow. Internal subroutine. *************************************************************************/ private static double safecrossentropy(double t, double z) { double result = 0; double r = 0; if( (double)(t)==(double)(0) ) { result = 0; } else { if( (double)(Math.Abs(z))>(double)(1) ) { // // Shouldn't be the case with softmax, // but we just want to be sure. // if( (double)(t/z)==(double)(0) ) { r = math.minrealnumber; } else { r = t/z; } } else { // // Normal case // if( (double)(z)==(double)(0) || (double)(Math.Abs(t))>=(double)(math.maxrealnumber*Math.Abs(z)) ) { r = math.maxrealnumber; } else { r = t/z; } } result = t*Math.Log(r); } return result; } } public class logit { public class logitmodel : apobject { public double[] w; public logitmodel() { init(); } public override void init() { w = new double[0]; } public override alglib.apobject make_copy() { logitmodel _result = new logitmodel(); _result.w = (double[])w.Clone(); return _result; } }; public class logitmcstate : apobject { public bool brackt; public bool stage1; public int infoc; public double dg; public double dgm; public double dginit; public double dgtest; public double dgx; public double dgxm; public double dgy; public double dgym; public double finit; public double ftest1; public double fm; public double fx; public double fxm; public double fy; public double fym; public double stx; public double sty; public double stmin; public double stmax; public double width; public double width1; public double xtrapf; public logitmcstate() { init(); } public override void init() { } public override alglib.apobject make_copy() { logitmcstate _result = new logitmcstate(); _result.brackt = brackt; _result.stage1 = stage1; _result.infoc = infoc; _result.dg = dg; _result.dgm = dgm; _result.dginit = dginit; _result.dgtest = dgtest; _result.dgx = dgx; _result.dgxm = dgxm; _result.dgy = dgy; _result.dgym = dgym; _result.finit = finit; _result.ftest1 = ftest1; _result.fm = fm; _result.fx = fx; _result.fxm = fxm; _result.fy = fy; _result.fym = fym; _result.stx = stx; _result.sty = sty; _result.stmin = stmin; _result.stmax = stmax; _result.width = width; _result.width1 = width1; _result.xtrapf = xtrapf; return _result; } }; /************************************************************************* MNLReport structure contains information about training process: * NGrad - number of gradient calculations * NHess - number of Hessian calculations *************************************************************************/ public class mnlreport : apobject { public int ngrad; public int nhess; public mnlreport() { init(); } public override void init() { } public override alglib.apobject make_copy() { mnlreport _result = new mnlreport(); _result.ngrad = ngrad; _result.nhess = nhess; return _result; } }; public const double xtol = 100*math.machineepsilon; public const double ftol = 0.0001; public const double gtol = 0.3; public const int maxfev = 20; public const double stpmin = 1.0E-2; public const double stpmax = 1.0E5; public const int logitvnum = 6; /************************************************************************* This subroutine trains logit model. INPUT PARAMETERS: XY - training set, array[0..NPoints-1,0..NVars] First NVars columns store values of independent variables, next column stores number of class (from 0 to NClasses-1) which dataset element belongs to. Fractional values are rounded to nearest integer. NPoints - training set size, NPoints>=1 NVars - number of independent variables, NVars>=1 NClasses - number of classes, NClasses>=2 OUTPUT PARAMETERS: Info - return code: * -2, if there is a point with class number outside of [0..NClasses-1]. * -1, if incorrect parameters was passed (NPoints=nclasses ) { info = -2; return; } } info = 1; // // Initialize data // rep.ngrad = 0; rep.nhess = 0; // // Allocate array // wdim = (nvars+1)*(nclasses-1); offs = 5; expoffs = offs+wdim; ssize = 5+(nvars+1)*(nclasses-1)+nclasses; lm.w = new double[ssize-1+1]; lm.w[0] = ssize; lm.w[1] = logitvnum; lm.w[2] = nvars; lm.w[3] = nclasses; lm.w[4] = offs; // // Degenerate case: all outputs are equal // allsame = true; for(i=1; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nvars])!=(int)Math.Round(xy[i-1,nvars]) ) { allsame = false; } } if( allsame ) { for(i=0; i<=(nvars+1)*(nclasses-1)-1; i++) { lm.w[offs+i] = 0; } v = -(2*Math.Log(math.minrealnumber)); k = (int)Math.Round(xy[0,nvars]); if( k==nclasses-1 ) { for(i=0; i<=nclasses-2; i++) { lm.w[offs+i*(nvars+1)+nvars] = -v; } } else { for(i=0; i<=nclasses-2; i++) { if( i==k ) { lm.w[offs+i*(nvars+1)+nvars] = v; } else { lm.w[offs+i*(nvars+1)+nvars] = 0; } } } return; } // // General case. // Prepare task and network. Allocate space. // mlpbase.mlpcreatec0(nvars, nclasses, network); mlpbase.mlpinitpreprocessor(network, xy, npoints); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); for(i=0; i<=wcount-1; i++) { network.weights[i] = (2*math.randomreal()-1)/nvars; } g = new double[wcount-1+1]; h = new double[wcount-1+1, wcount-1+1]; wbase = new double[wcount-1+1]; wdir = new double[wcount-1+1]; work = new double[wcount-1+1]; // // First stage: optimize in gradient direction. // for(k=0; k<=wcount/3+10; k++) { // // Calculate gradient in starting point // mlpbase.mlpgradnbatch(network, xy, npoints, ref e, ref g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; // // Setup optimization scheme // for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -g[i_]; } v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += wdir[i_]*wdir[i_]; } wstep = Math.Sqrt(v); v = 1/Math.Sqrt(v); for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = v*wdir[i_]; } mcstage = 0; mnlmcsrch(wcount, ref network.weights, ref e, ref g, wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, mcstate, ref mcstage); while( mcstage!=0 ) { mlpbase.mlpgradnbatch(network, xy, npoints, ref e, ref g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; mnlmcsrch(wcount, ref network.weights, ref e, ref g, wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, mcstate, ref mcstage); } } // // Second stage: use Hessian when we are close to the minimum // while( true ) { // // Calculate and update E/G/H // mlpbase.mlphessiannbatch(network, xy, npoints, ref e, ref g, ref h); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } for(k=0; k<=wcount-1; k++) { h[k,k] = h[k,k]+decay; } rep.nhess = rep.nhess+1; // // Select step direction // NOTE: it is important to use lower-triangle Cholesky // factorization since it is much faster than higher-triangle version. // spd = trfac.spdmatrixcholesky(ref h, wcount, false); densesolver.spdmatrixcholeskysolve(h, wcount, false, g, ref solverinfo, solverrep, ref wdir); spd = solverinfo>0; if( spd ) { // // H is positive definite. // Step in Newton direction. // for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -1*wdir[i_]; } spd = true; } else { // // H is indefinite. // Step in gradient direction. // for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -g[i_]; } spd = false; } // // Optimize in WDir direction // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += wdir[i_]*wdir[i_]; } wstep = Math.Sqrt(v); v = 1/Math.Sqrt(v); for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = v*wdir[i_]; } mcstage = 0; mnlmcsrch(wcount, ref network.weights, ref e, ref g, wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, mcstate, ref mcstage); while( mcstage!=0 ) { mlpbase.mlpgradnbatch(network, xy, npoints, ref e, ref g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; mnlmcsrch(wcount, ref network.weights, ref e, ref g, wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, mcstate, ref mcstage); } if( spd && ((mcinfo==2 || mcinfo==4) || mcinfo==6) ) { break; } } // // Convert from NN format to MNL format // i1_ = (0) - (offs); for(i_=offs; i_<=offs+wcount-1;i_++) { lm.w[i_] = network.weights[i_+i1_]; } for(k=0; k<=nvars-1; k++) { for(i=0; i<=nclasses-2; i++) { s = network.columnsigmas[k]; if( (double)(s)==(double)(0) ) { s = 1; } j = offs+(nvars+1)*i; v = lm.w[j+k]; lm.w[j+k] = v/s; lm.w[j+nvars] = lm.w[j+nvars]+v*network.columnmeans[k]/s; } } for(k=0; k<=nclasses-2; k++) { lm.w[offs+(nvars+1)*k+nvars] = -lm.w[offs+(nvars+1)*k+nvars]; } } /************************************************************************* Procesing INPUT PARAMETERS: LM - logit model, passed by non-constant reference (some fields of structure are used as temporaries when calculating model output). X - input vector, array[0..NVars-1]. Y - (possibly) preallocated buffer; if size of Y is less than NClasses, it will be reallocated.If it is large enough, it is NOT reallocated, so we can save some time on reallocation. OUTPUT PARAMETERS: Y - result, array[0..NClasses-1] Vector of posterior probabilities for classification task. -- ALGLIB -- Copyright 10.09.2008 by Bochkanov Sergey *************************************************************************/ public static void mnlprocess(logitmodel lm, double[] x, ref double[] y) { int nvars = 0; int nclasses = 0; int offs = 0; int i = 0; int i1 = 0; double s = 0; alglib.ap.assert((double)(lm.w[1])==(double)(logitvnum), "MNLProcess: unexpected model version"); nvars = (int)Math.Round(lm.w[2]); nclasses = (int)Math.Round(lm.w[3]); offs = (int)Math.Round(lm.w[4]); mnliexp(ref lm.w, x); s = 0; i1 = offs+(nvars+1)*(nclasses-1); for(i=i1; i<=i1+nclasses-1; i++) { s = s+lm.w[i]; } if( alglib.ap.len(y)=0 && (int)Math.Round(xy[i,nvars])(double)(0) ) { result = result-Math.Log(worky[(int)Math.Round(xy[i,nvars])]); } else { result = result-Math.Log(math.minrealnumber); } } result = result/(npoints*Math.Log(2)); return result; } /************************************************************************* Relative classification error on the test set INPUT PARAMETERS: LM - logit model XY - test set NPoints - test set size RESULT: percent of incorrectly classified cases. -- ALGLIB -- Copyright 10.09.2008 by Bochkanov Sergey *************************************************************************/ public static double mnlrelclserror(logitmodel lm, double[,] xy, int npoints) { double result = 0; result = (double)mnlclserror(lm, xy, npoints)/(double)npoints; return result; } /************************************************************************* RMS error on the test set INPUT PARAMETERS: LM - logit model XY - test set NPoints - test set size RESULT: root mean square error (error when estimating posterior probabilities). -- ALGLIB -- Copyright 30.08.2008 by Bochkanov Sergey *************************************************************************/ public static double mnlrmserror(logitmodel lm, double[,] xy, int npoints) { double result = 0; double relcls = 0; double avgce = 0; double rms = 0; double avg = 0; double avgrel = 0; alglib.ap.assert((int)Math.Round(lm.w[1])==logitvnum, "MNLRMSError: Incorrect MNL version!"); mnlallerrors(lm, xy, npoints, ref relcls, ref avgce, ref rms, ref avg, ref avgrel); result = rms; return result; } /************************************************************************* Average error on the test set INPUT PARAMETERS: LM - logit model XY - test set NPoints - test set size RESULT: average error (error when estimating posterior probabilities). -- ALGLIB -- Copyright 30.08.2008 by Bochkanov Sergey *************************************************************************/ public static double mnlavgerror(logitmodel lm, double[,] xy, int npoints) { double result = 0; double relcls = 0; double avgce = 0; double rms = 0; double avg = 0; double avgrel = 0; alglib.ap.assert((int)Math.Round(lm.w[1])==logitvnum, "MNLRMSError: Incorrect MNL version!"); mnlallerrors(lm, xy, npoints, ref relcls, ref avgce, ref rms, ref avg, ref avgrel); result = avg; return result; } /************************************************************************* Average relative error on the test set INPUT PARAMETERS: LM - logit model XY - test set NPoints - test set size RESULT: average relative error (error when estimating posterior probabilities). -- ALGLIB -- Copyright 30.08.2008 by Bochkanov Sergey *************************************************************************/ public static double mnlavgrelerror(logitmodel lm, double[,] xy, int ssize) { double result = 0; double relcls = 0; double avgce = 0; double rms = 0; double avg = 0; double avgrel = 0; alglib.ap.assert((int)Math.Round(lm.w[1])==logitvnum, "MNLRMSError: Incorrect MNL version!"); mnlallerrors(lm, xy, ssize, ref relcls, ref avgce, ref rms, ref avg, ref avgrel); result = avgrel; return result; } /************************************************************************* Classification error on test set = MNLRelClsError*NPoints -- ALGLIB -- Copyright 10.09.2008 by Bochkanov Sergey *************************************************************************/ public static int mnlclserror(logitmodel lm, double[,] xy, int npoints) { int result = 0; int nvars = 0; int nclasses = 0; int i = 0; int j = 0; double[] workx = new double[0]; double[] worky = new double[0]; int nmax = 0; int i_ = 0; alglib.ap.assert((double)(lm.w[1])==(double)(logitvnum), "MNLClsError: unexpected model version"); nvars = (int)Math.Round(lm.w[2]); nclasses = (int)Math.Round(lm.w[3]); workx = new double[nvars-1+1]; worky = new double[nclasses-1+1]; result = 0; for(i=0; i<=npoints-1; i++) { // // Process // for(i_=0; i_<=nvars-1;i_++) { workx[i_] = xy[i,i_]; } mnlprocess(lm, workx, ref worky); // // Logit version of the answer // nmax = 0; for(j=0; j<=nclasses-1; j++) { if( (double)(worky[j])>(double)(worky[nmax]) ) { nmax = j; } } // // compare // if( nmax!=(int)Math.Round(xy[i,nvars]) ) { result = result+1; } } return result; } /************************************************************************* Internal subroutine. Places exponents of the anti-overflow shifted internal linear outputs into the service part of the W array. *************************************************************************/ private static void mnliexp(ref double[] w, double[] x) { int nvars = 0; int nclasses = 0; int offs = 0; int i = 0; int i1 = 0; double v = 0; double mx = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert((double)(w[1])==(double)(logitvnum), "LOGIT: unexpected model version"); nvars = (int)Math.Round(w[2]); nclasses = (int)Math.Round(w[3]); offs = (int)Math.Round(w[4]); i1 = offs+(nvars+1)*(nclasses-1); for(i=0; i<=nclasses-2; i++) { i1_ = (0)-(offs+i*(nvars+1)); v = 0.0; for(i_=offs+i*(nvars+1); i_<=offs+i*(nvars+1)+nvars-1;i_++) { v += w[i_]*x[i_+i1_]; } w[i1+i] = v+w[offs+i*(nvars+1)+nvars]; } w[i1+nclasses-1] = 0; mx = 0; for(i=i1; i<=i1+nclasses-1; i++) { mx = Math.Max(mx, w[i]); } for(i=i1; i<=i1+nclasses-1; i++) { w[i] = Math.Exp(w[i]-mx); } } /************************************************************************* Calculation of all types of errors -- ALGLIB -- Copyright 30.08.2008 by Bochkanov Sergey *************************************************************************/ private static void mnlallerrors(logitmodel lm, double[,] xy, int npoints, ref double relcls, ref double avgce, ref double rms, ref double avg, ref double avgrel) { int nvars = 0; int nclasses = 0; int i = 0; double[] buf = new double[0]; double[] workx = new double[0]; double[] y = new double[0]; double[] dy = new double[0]; int i_ = 0; relcls = 0; avgce = 0; rms = 0; avg = 0; avgrel = 0; alglib.ap.assert((int)Math.Round(lm.w[1])==logitvnum, "MNL unit: Incorrect MNL version!"); nvars = (int)Math.Round(lm.w[2]); nclasses = (int)Math.Round(lm.w[3]); workx = new double[nvars-1+1]; y = new double[nclasses-1+1]; dy = new double[0+1]; bdss.dserrallocate(nclasses, ref buf); for(i=0; i<=npoints-1; i++) { for(i_=0; i_<=nvars-1;i_++) { workx[i_] = xy[i,i_]; } mnlprocess(lm, workx, ref y); dy[0] = xy[i,nvars]; bdss.dserraccumulate(ref buf, y, dy); } bdss.dserrfinish(ref buf); relcls = buf[0]; avgce = buf[1]; rms = buf[2]; avg = buf[3]; avgrel = buf[4]; } /************************************************************************* THE PURPOSE OF MCSRCH IS TO FIND A STEP WHICH SATISFIES A SUFFICIENT DECREASE CONDITION AND A CURVATURE CONDITION. AT EACH STAGE THE SUBROUTINE UPDATES AN INTERVAL OF UNCERTAINTY WITH ENDPOINTS STX AND STY. THE INTERVAL OF UNCERTAINTY IS INITIALLY CHOSEN SO THAT IT CONTAINS A MINIMIZER OF THE MODIFIED FUNCTION F(X+STP*S) - F(X) - FTOL*STP*(GRADF(X)'S). IF A STEP IS OBTAINED FOR WHICH THE MODIFIED FUNCTION HAS A NONPOSITIVE FUNCTION VALUE AND NONNEGATIVE DERIVATIVE, THEN THE INTERVAL OF UNCERTAINTY IS CHOSEN SO THAT IT CONTAINS A MINIMIZER OF F(X+STP*S). THE ALGORITHM IS DESIGNED TO FIND A STEP WHICH SATISFIES THE SUFFICIENT DECREASE CONDITION F(X+STP*S) .LE. F(X) + FTOL*STP*(GRADF(X)'S), AND THE CURVATURE CONDITION ABS(GRADF(X+STP*S)'S)) .LE. GTOL*ABS(GRADF(X)'S). IF FTOL IS LESS THAN GTOL AND IF, FOR EXAMPLE, THE FUNCTION IS BOUNDED BELOW, THEN THERE IS ALWAYS A STEP WHICH SATISFIES BOTH CONDITIONS. IF NO STEP CAN BE FOUND WHICH SATISFIES BOTH CONDITIONS, THEN THE ALGORITHM USUALLY STOPS WHEN ROUNDING ERRORS PREVENT FURTHER PROGRESS. IN THIS CASE STP ONLY SATISFIES THE SUFFICIENT DECREASE CONDITION. PARAMETERS DESCRIPRION N IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER OF VARIABLES. X IS AN ARRAY OF LENGTH N. ON INPUT IT MUST CONTAIN THE BASE POINT FOR THE LINE SEARCH. ON OUTPUT IT CONTAINS X+STP*S. F IS A VARIABLE. ON INPUT IT MUST CONTAIN THE VALUE OF F AT X. ON OUTPUT IT CONTAINS THE VALUE OF F AT X + STP*S. G IS AN ARRAY OF LENGTH N. ON INPUT IT MUST CONTAIN THE GRADIENT OF F AT X. ON OUTPUT IT CONTAINS THE GRADIENT OF F AT X + STP*S. S IS AN INPUT ARRAY OF LENGTH N WHICH SPECIFIES THE SEARCH DIRECTION. STP IS A NONNEGATIVE VARIABLE. ON INPUT STP CONTAINS AN INITIAL ESTIMATE OF A SATISFACTORY STEP. ON OUTPUT STP CONTAINS THE FINAL ESTIMATE. FTOL AND GTOL ARE NONNEGATIVE INPUT VARIABLES. TERMINATION OCCURS WHEN THE SUFFICIENT DECREASE CONDITION AND THE DIRECTIONAL DERIVATIVE CONDITION ARE SATISFIED. XTOL IS A NONNEGATIVE INPUT VARIABLE. TERMINATION OCCURS WHEN THE RELATIVE WIDTH OF THE INTERVAL OF UNCERTAINTY IS AT MOST XTOL. STPMIN AND STPMAX ARE NONNEGATIVE INPUT VARIABLES WHICH SPECIFY LOWER AND UPPER BOUNDS FOR THE STEP. MAXFEV IS A POSITIVE INTEGER INPUT VARIABLE. TERMINATION OCCURS WHEN THE NUMBER OF CALLS TO FCN IS AT LEAST MAXFEV BY THE END OF AN ITERATION. INFO IS AN INTEGER OUTPUT VARIABLE SET AS FOLLOWS: INFO = 0 IMPROPER INPUT PARAMETERS. INFO = 1 THE SUFFICIENT DECREASE CONDITION AND THE DIRECTIONAL DERIVATIVE CONDITION HOLD. INFO = 2 RELATIVE WIDTH OF THE INTERVAL OF UNCERTAINTY IS AT MOST XTOL. INFO = 3 NUMBER OF CALLS TO FCN HAS REACHED MAXFEV. INFO = 4 THE STEP IS AT THE LOWER BOUND STPMIN. INFO = 5 THE STEP IS AT THE UPPER BOUND STPMAX. INFO = 6 ROUNDING ERRORS PREVENT FURTHER PROGRESS. THERE MAY NOT BE A STEP WHICH SATISFIES THE SUFFICIENT DECREASE AND CURVATURE CONDITIONS. TOLERANCES MAY BE TOO SMALL. NFEV IS AN INTEGER OUTPUT VARIABLE SET TO THE NUMBER OF CALLS TO FCN. WA IS A WORK ARRAY OF LENGTH N. ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JUNE 1983 JORGE J. MORE', DAVID J. THUENTE *************************************************************************/ private static void mnlmcsrch(int n, ref double[] x, ref double f, ref double[] g, double[] s, ref double stp, ref int info, ref int nfev, ref double[] wa, logitmcstate state, ref int stage) { double v = 0; double p5 = 0; double p66 = 0; double zero = 0; int i_ = 0; // // init // p5 = 0.5; p66 = 0.66; state.xtrapf = 4.0; zero = 0; // // Main cycle // while( true ) { if( stage==0 ) { // // NEXT // stage = 2; continue; } if( stage==2 ) { state.infoc = 1; info = 0; // // CHECK THE INPUT PARAMETERS FOR ERRORS. // if( ((((((n<=0 || (double)(stp)<=(double)(0)) || (double)(ftol)<(double)(0)) || (double)(gtol)<(double)(zero)) || (double)(xtol)<(double)(zero)) || (double)(stpmin)<(double)(zero)) || (double)(stpmax)<(double)(stpmin)) || maxfev<=0 ) { stage = 0; return; } // // COMPUTE THE INITIAL GRADIENT IN THE SEARCH DIRECTION // AND CHECK THAT S IS A DESCENT DIRECTION. // v = 0.0; for(i_=0; i_<=n-1;i_++) { v += g[i_]*s[i_]; } state.dginit = v; if( (double)(state.dginit)>=(double)(0) ) { stage = 0; return; } // // INITIALIZE LOCAL VARIABLES. // state.brackt = false; state.stage1 = true; nfev = 0; state.finit = f; state.dgtest = ftol*state.dginit; state.width = stpmax-stpmin; state.width1 = state.width/p5; for(i_=0; i_<=n-1;i_++) { wa[i_] = x[i_]; } // // THE VARIABLES STX, FX, DGX CONTAIN THE VALUES OF THE STEP, // FUNCTION, AND DIRECTIONAL DERIVATIVE AT THE BEST STEP. // THE VARIABLES STY, FY, DGY CONTAIN THE VALUE OF THE STEP, // FUNCTION, AND DERIVATIVE AT THE OTHER ENDPOINT OF // THE INTERVAL OF UNCERTAINTY. // THE VARIABLES STP, F, DG CONTAIN THE VALUES OF THE STEP, // FUNCTION, AND DERIVATIVE AT THE CURRENT STEP. // state.stx = 0; state.fx = state.finit; state.dgx = state.dginit; state.sty = 0; state.fy = state.finit; state.dgy = state.dginit; // // NEXT // stage = 3; continue; } if( stage==3 ) { // // START OF ITERATION. // // SET THE MINIMUM AND MAXIMUM STEPS TO CORRESPOND // TO THE PRESENT INTERVAL OF UNCERTAINTY. // if( state.brackt ) { if( (double)(state.stx)<(double)(state.sty) ) { state.stmin = state.stx; state.stmax = state.sty; } else { state.stmin = state.sty; state.stmax = state.stx; } } else { state.stmin = state.stx; state.stmax = stp+state.xtrapf*(stp-state.stx); } // // FORCE THE STEP TO BE WITHIN THE BOUNDS STPMAX AND STPMIN. // if( (double)(stp)>(double)(stpmax) ) { stp = stpmax; } if( (double)(stp)<(double)(stpmin) ) { stp = stpmin; } // // IF AN UNUSUAL TERMINATION IS TO OCCUR THEN LET // STP BE THE LOWEST POINT OBTAINED SO FAR. // if( (((state.brackt && ((double)(stp)<=(double)(state.stmin) || (double)(stp)>=(double)(state.stmax))) || nfev>=maxfev-1) || state.infoc==0) || (state.brackt && (double)(state.stmax-state.stmin)<=(double)(xtol*state.stmax)) ) { stp = state.stx; } // // EVALUATE THE FUNCTION AND GRADIENT AT STP // AND COMPUTE THE DIRECTIONAL DERIVATIVE. // for(i_=0; i_<=n-1;i_++) { x[i_] = wa[i_]; } for(i_=0; i_<=n-1;i_++) { x[i_] = x[i_] + stp*s[i_]; } // // NEXT // stage = 4; return; } if( stage==4 ) { info = 0; nfev = nfev+1; v = 0.0; for(i_=0; i_<=n-1;i_++) { v += g[i_]*s[i_]; } state.dg = v; state.ftest1 = state.finit+stp*state.dgtest; // // TEST FOR CONVERGENCE. // if( (state.brackt && ((double)(stp)<=(double)(state.stmin) || (double)(stp)>=(double)(state.stmax))) || state.infoc==0 ) { info = 6; } if( ((double)(stp)==(double)(stpmax) && (double)(f)<=(double)(state.ftest1)) && (double)(state.dg)<=(double)(state.dgtest) ) { info = 5; } if( (double)(stp)==(double)(stpmin) && ((double)(f)>(double)(state.ftest1) || (double)(state.dg)>=(double)(state.dgtest)) ) { info = 4; } if( nfev>=maxfev ) { info = 3; } if( state.brackt && (double)(state.stmax-state.stmin)<=(double)(xtol*state.stmax) ) { info = 2; } if( (double)(f)<=(double)(state.ftest1) && (double)(Math.Abs(state.dg))<=(double)(-(gtol*state.dginit)) ) { info = 1; } // // CHECK FOR TERMINATION. // if( info!=0 ) { stage = 0; return; } // // IN THE FIRST STAGE WE SEEK A STEP FOR WHICH THE MODIFIED // FUNCTION HAS A NONPOSITIVE VALUE AND NONNEGATIVE DERIVATIVE. // if( (state.stage1 && (double)(f)<=(double)(state.ftest1)) && (double)(state.dg)>=(double)(Math.Min(ftol, gtol)*state.dginit) ) { state.stage1 = false; } // // A MODIFIED FUNCTION IS USED TO PREDICT THE STEP ONLY IF // WE HAVE NOT OBTAINED A STEP FOR WHICH THE MODIFIED // FUNCTION HAS A NONPOSITIVE FUNCTION VALUE AND NONNEGATIVE // DERIVATIVE, AND IF A LOWER FUNCTION VALUE HAS BEEN // OBTAINED BUT THE DECREASE IS NOT SUFFICIENT. // if( (state.stage1 && (double)(f)<=(double)(state.fx)) && (double)(f)>(double)(state.ftest1) ) { // // DEFINE THE MODIFIED FUNCTION AND DERIVATIVE VALUES. // state.fm = f-stp*state.dgtest; state.fxm = state.fx-state.stx*state.dgtest; state.fym = state.fy-state.sty*state.dgtest; state.dgm = state.dg-state.dgtest; state.dgxm = state.dgx-state.dgtest; state.dgym = state.dgy-state.dgtest; // // CALL CSTEP TO UPDATE THE INTERVAL OF UNCERTAINTY // AND TO COMPUTE THE NEW STEP. // mnlmcstep(ref state.stx, ref state.fxm, ref state.dgxm, ref state.sty, ref state.fym, ref state.dgym, ref stp, state.fm, state.dgm, ref state.brackt, state.stmin, state.stmax, ref state.infoc); // // RESET THE FUNCTION AND GRADIENT VALUES FOR F. // state.fx = state.fxm+state.stx*state.dgtest; state.fy = state.fym+state.sty*state.dgtest; state.dgx = state.dgxm+state.dgtest; state.dgy = state.dgym+state.dgtest; } else { // // CALL MCSTEP TO UPDATE THE INTERVAL OF UNCERTAINTY // AND TO COMPUTE THE NEW STEP. // mnlmcstep(ref state.stx, ref state.fx, ref state.dgx, ref state.sty, ref state.fy, ref state.dgy, ref stp, f, state.dg, ref state.brackt, state.stmin, state.stmax, ref state.infoc); } // // FORCE A SUFFICIENT DECREASE IN THE SIZE OF THE // INTERVAL OF UNCERTAINTY. // if( state.brackt ) { if( (double)(Math.Abs(state.sty-state.stx))>=(double)(p66*state.width1) ) { stp = state.stx+p5*(state.sty-state.stx); } state.width1 = state.width; state.width = Math.Abs(state.sty-state.stx); } // // NEXT. // stage = 3; continue; } } } private static void mnlmcstep(ref double stx, ref double fx, ref double dx, ref double sty, ref double fy, ref double dy, ref double stp, double fp, double dp, ref bool brackt, double stmin, double stmax, ref int info) { bool bound = new bool(); double gamma = 0; double p = 0; double q = 0; double r = 0; double s = 0; double sgnd = 0; double stpc = 0; double stpf = 0; double stpq = 0; double theta = 0; info = 0; // // CHECK THE INPUT PARAMETERS FOR ERRORS. // if( ((brackt && ((double)(stp)<=(double)(Math.Min(stx, sty)) || (double)(stp)>=(double)(Math.Max(stx, sty)))) || (double)(dx*(stp-stx))>=(double)(0)) || (double)(stmax)<(double)(stmin) ) { return; } // // DETERMINE IF THE DERIVATIVES HAVE OPPOSITE SIGN. // sgnd = dp*(dx/Math.Abs(dx)); // // FIRST CASE. A HIGHER FUNCTION VALUE. // THE MINIMUM IS BRACKETED. IF THE CUBIC STEP IS CLOSER // TO STX THAN THE QUADRATIC STEP, THE CUBIC STEP IS TAKEN, // ELSE THE AVERAGE OF THE CUBIC AND QUADRATIC STEPS IS TAKEN. // if( (double)(fp)>(double)(fx) ) { info = 1; bound = true; theta = 3*(fx-fp)/(stp-stx)+dx+dp; s = Math.Max(Math.Abs(theta), Math.Max(Math.Abs(dx), Math.Abs(dp))); gamma = s*Math.Sqrt(math.sqr(theta/s)-dx/s*(dp/s)); if( (double)(stp)<(double)(stx) ) { gamma = -gamma; } p = gamma-dx+theta; q = gamma-dx+gamma+dp; r = p/q; stpc = stx+r*(stp-stx); stpq = stx+dx/((fx-fp)/(stp-stx)+dx)/2*(stp-stx); if( (double)(Math.Abs(stpc-stx))<(double)(Math.Abs(stpq-stx)) ) { stpf = stpc; } else { stpf = stpc+(stpq-stpc)/2; } brackt = true; } else { if( (double)(sgnd)<(double)(0) ) { // // SECOND CASE. A LOWER FUNCTION VALUE AND DERIVATIVES OF // OPPOSITE SIGN. THE MINIMUM IS BRACKETED. IF THE CUBIC // STEP IS CLOSER TO STX THAN THE QUADRATIC (SECANT) STEP, // THE CUBIC STEP IS TAKEN, ELSE THE QUADRATIC STEP IS TAKEN. // info = 2; bound = false; theta = 3*(fx-fp)/(stp-stx)+dx+dp; s = Math.Max(Math.Abs(theta), Math.Max(Math.Abs(dx), Math.Abs(dp))); gamma = s*Math.Sqrt(math.sqr(theta/s)-dx/s*(dp/s)); if( (double)(stp)>(double)(stx) ) { gamma = -gamma; } p = gamma-dp+theta; q = gamma-dp+gamma+dx; r = p/q; stpc = stp+r*(stx-stp); stpq = stp+dp/(dp-dx)*(stx-stp); if( (double)(Math.Abs(stpc-stp))>(double)(Math.Abs(stpq-stp)) ) { stpf = stpc; } else { stpf = stpq; } brackt = true; } else { if( (double)(Math.Abs(dp))<(double)(Math.Abs(dx)) ) { // // THIRD CASE. A LOWER FUNCTION VALUE, DERIVATIVES OF THE // SAME SIGN, AND THE MAGNITUDE OF THE DERIVATIVE DECREASES. // THE CUBIC STEP IS ONLY USED IF THE CUBIC TENDS TO INFINITY // IN THE DIRECTION OF THE STEP OR IF THE MINIMUM OF THE CUBIC // IS BEYOND STP. OTHERWISE THE CUBIC STEP IS DEFINED TO BE // EITHER STPMIN OR STPMAX. THE QUADRATIC (SECANT) STEP IS ALSO // COMPUTED AND IF THE MINIMUM IS BRACKETED THEN THE THE STEP // CLOSEST TO STX IS TAKEN, ELSE THE STEP FARTHEST AWAY IS TAKEN. // info = 3; bound = true; theta = 3*(fx-fp)/(stp-stx)+dx+dp; s = Math.Max(Math.Abs(theta), Math.Max(Math.Abs(dx), Math.Abs(dp))); // // THE CASE GAMMA = 0 ONLY ARISES IF THE CUBIC DOES NOT TEND // TO INFINITY IN THE DIRECTION OF THE STEP. // gamma = s*Math.Sqrt(Math.Max(0, math.sqr(theta/s)-dx/s*(dp/s))); if( (double)(stp)>(double)(stx) ) { gamma = -gamma; } p = gamma-dp+theta; q = gamma+(dx-dp)+gamma; r = p/q; if( (double)(r)<(double)(0) && (double)(gamma)!=(double)(0) ) { stpc = stp+r*(stx-stp); } else { if( (double)(stp)>(double)(stx) ) { stpc = stmax; } else { stpc = stmin; } } stpq = stp+dp/(dp-dx)*(stx-stp); if( brackt ) { if( (double)(Math.Abs(stp-stpc))<(double)(Math.Abs(stp-stpq)) ) { stpf = stpc; } else { stpf = stpq; } } else { if( (double)(Math.Abs(stp-stpc))>(double)(Math.Abs(stp-stpq)) ) { stpf = stpc; } else { stpf = stpq; } } } else { // // FOURTH CASE. A LOWER FUNCTION VALUE, DERIVATIVES OF THE // SAME SIGN, AND THE MAGNITUDE OF THE DERIVATIVE DOES // NOT DECREASE. IF THE MINIMUM IS NOT BRACKETED, THE STEP // IS EITHER STPMIN OR STPMAX, ELSE THE CUBIC STEP IS TAKEN. // info = 4; bound = false; if( brackt ) { theta = 3*(fp-fy)/(sty-stp)+dy+dp; s = Math.Max(Math.Abs(theta), Math.Max(Math.Abs(dy), Math.Abs(dp))); gamma = s*Math.Sqrt(math.sqr(theta/s)-dy/s*(dp/s)); if( (double)(stp)>(double)(sty) ) { gamma = -gamma; } p = gamma-dp+theta; q = gamma-dp+gamma+dy; r = p/q; stpc = stp+r*(sty-stp); stpf = stpc; } else { if( (double)(stp)>(double)(stx) ) { stpf = stmax; } else { stpf = stmin; } } } } } // // UPDATE THE INTERVAL OF UNCERTAINTY. THIS UPDATE DOES NOT // DEPEND ON THE NEW STEP OR THE CASE ANALYSIS ABOVE. // if( (double)(fp)>(double)(fx) ) { sty = stp; fy = fp; dy = dp; } else { if( (double)(sgnd)<(double)(0.0) ) { sty = stx; fy = fx; dy = dx; } stx = stp; fx = fp; dx = dp; } // // COMPUTE THE NEW STEP AND SAFEGUARD IT. // stpf = Math.Min(stmax, stpf); stpf = Math.Max(stmin, stpf); stp = stpf; if( brackt && bound ) { if( (double)(sty)>(double)(stx) ) { stp = Math.Min(stx+0.66*(sty-stx), stp); } else { stp = Math.Max(stx+0.66*(sty-stx), stp); } } } } public class mcpd { /************************************************************************* This structure is a MCPD (Markov Chains for Population Data) solver. You should use ALGLIB functions in order to work with this object. -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public class mcpdstate : apobject { public int n; public int[] states; public int npairs; public double[,] data; public double[,] ec; public double[,] bndl; public double[,] bndu; public double[,] c; public int[] ct; public int ccnt; public double[] pw; public double[,] priorp; public double regterm; public minbleic.minbleicstate bs; public int repinneriterationscount; public int repouteriterationscount; public int repnfev; public int repterminationtype; public minbleic.minbleicreport br; public double[] tmpp; public double[] effectivew; public double[] effectivebndl; public double[] effectivebndu; public double[,] effectivec; public int[] effectivect; public double[] h; public double[,] p; public mcpdstate() { init(); } public override void init() { states = new int[0]; data = new double[0,0]; ec = new double[0,0]; bndl = new double[0,0]; bndu = new double[0,0]; c = new double[0,0]; ct = new int[0]; pw = new double[0]; priorp = new double[0,0]; bs = new minbleic.minbleicstate(); br = new minbleic.minbleicreport(); tmpp = new double[0]; effectivew = new double[0]; effectivebndl = new double[0]; effectivebndu = new double[0]; effectivec = new double[0,0]; effectivect = new int[0]; h = new double[0]; p = new double[0,0]; } public override alglib.apobject make_copy() { mcpdstate _result = new mcpdstate(); _result.n = n; _result.states = (int[])states.Clone(); _result.npairs = npairs; _result.data = (double[,])data.Clone(); _result.ec = (double[,])ec.Clone(); _result.bndl = (double[,])bndl.Clone(); _result.bndu = (double[,])bndu.Clone(); _result.c = (double[,])c.Clone(); _result.ct = (int[])ct.Clone(); _result.ccnt = ccnt; _result.pw = (double[])pw.Clone(); _result.priorp = (double[,])priorp.Clone(); _result.regterm = regterm; _result.bs = (minbleic.minbleicstate)bs.make_copy(); _result.repinneriterationscount = repinneriterationscount; _result.repouteriterationscount = repouteriterationscount; _result.repnfev = repnfev; _result.repterminationtype = repterminationtype; _result.br = (minbleic.minbleicreport)br.make_copy(); _result.tmpp = (double[])tmpp.Clone(); _result.effectivew = (double[])effectivew.Clone(); _result.effectivebndl = (double[])effectivebndl.Clone(); _result.effectivebndu = (double[])effectivebndu.Clone(); _result.effectivec = (double[,])effectivec.Clone(); _result.effectivect = (int[])effectivect.Clone(); _result.h = (double[])h.Clone(); _result.p = (double[,])p.Clone(); return _result; } }; /************************************************************************* This structure is a MCPD training report: InnerIterationsCount - number of inner iterations of the underlying optimization algorithm OuterIterationsCount - number of outer iterations of the underlying optimization algorithm NFEV - number of merit function evaluations TerminationType - termination type (same as for MinBLEIC optimizer, positive values denote success, negative ones - failure) -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public class mcpdreport : apobject { public int inneriterationscount; public int outeriterationscount; public int nfev; public int terminationtype; public mcpdreport() { init(); } public override void init() { } public override alglib.apobject make_copy() { mcpdreport _result = new mcpdreport(); _result.inneriterationscount = inneriterationscount; _result.outeriterationscount = outeriterationscount; _result.nfev = nfev; _result.terminationtype = terminationtype; return _result; } }; public const double xtol = 1.0E-8; /************************************************************************* DESCRIPTION: This function creates MCPD (Markov Chains for Population Data) solver. This solver can be used to find transition matrix P for N-dimensional prediction problem where transition from X[i] to X[i+1] is modelled as X[i+1] = P*X[i] where X[i] and X[i+1] are N-dimensional population vectors (components of each X are non-negative), and P is a N*N transition matrix (elements of P are non-negative, each column sums to 1.0). Such models arise when when: * there is some population of individuals * individuals can have different states * individuals can transit from one state to another * population size is constant, i.e. there is no new individuals and no one leaves population * you want to model transitions of individuals from one state into another USAGE: Here we give very brief outline of the MCPD. We strongly recommend you to read examples in the ALGLIB Reference Manual and to read ALGLIB User Guide on data analysis which is available at http://www.alglib.net/dataanalysis/ 1. User initializes algorithm state with MCPDCreate() call 2. User adds one or more tracks - sequences of states which describe evolution of a system being modelled from different starting conditions 3. User may add optional boundary, equality and/or linear constraints on the coefficients of P by calling one of the following functions: * MCPDSetEC() to set equality constraints * MCPDSetBC() to set bound constraints * MCPDSetLC() to set linear constraints 4. Optionally, user may set custom weights for prediction errors (by default, algorithm assigns non-equal, automatically chosen weights for errors in the prediction of different components of X). It can be done with a call of MCPDSetPredictionWeights() function. 5. User calls MCPDSolve() function which takes algorithm state and pointer (delegate, etc.) to callback function which calculates F/G. 6. User calls MCPDResults() to get solution INPUT PARAMETERS: N - problem dimension, N>=1 OUTPUT PARAMETERS: State - structure stores algorithm state -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdcreate(int n, mcpdstate s) { alglib.ap.assert(n>=1, "MCPDCreate: N<1"); mcpdinit(n, -1, -1, s); } /************************************************************************* DESCRIPTION: This function is a specialized version of MCPDCreate() function, and we recommend you to read comments for this function for general information about MCPD solver. This function creates MCPD (Markov Chains for Population Data) solver for "Entry-state" model, i.e. model where transition from X[i] to X[i+1] is modelled as X[i+1] = P*X[i] where X[i] and X[i+1] are N-dimensional state vectors P is a N*N transition matrix and one selected component of X[] is called "entry" state and is treated in a special way: system state always transits from "entry" state to some another state system state can not transit from any state into "entry" state Such conditions basically mean that row of P which corresponds to "entry" state is zero. Such models arise when: * there is some population of individuals * individuals can have different states * individuals can transit from one state to another * population size is NOT constant - at every moment of time there is some (unpredictable) amount of "new" individuals, which can transit into one of the states at the next turn, but still no one leaves population * you want to model transitions of individuals from one state into another * but you do NOT want to predict amount of "new" individuals because it does not depends on individuals already present (hence system can not transit INTO entry state - it can only transit FROM it). This model is discussed in more details in the ALGLIB User Guide (see http://www.alglib.net/dataanalysis/ for more data). INPUT PARAMETERS: N - problem dimension, N>=2 EntryState- index of entry state, in 0..N-1 OUTPUT PARAMETERS: State - structure stores algorithm state -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdcreateentry(int n, int entrystate, mcpdstate s) { alglib.ap.assert(n>=2, "MCPDCreateEntry: N<2"); alglib.ap.assert(entrystate>=0, "MCPDCreateEntry: EntryState<0"); alglib.ap.assert(entrystate=N"); mcpdinit(n, entrystate, -1, s); } /************************************************************************* DESCRIPTION: This function is a specialized version of MCPDCreate() function, and we recommend you to read comments for this function for general information about MCPD solver. This function creates MCPD (Markov Chains for Population Data) solver for "Exit-state" model, i.e. model where transition from X[i] to X[i+1] is modelled as X[i+1] = P*X[i] where X[i] and X[i+1] are N-dimensional state vectors P is a N*N transition matrix and one selected component of X[] is called "exit" state and is treated in a special way: system state can transit from any state into "exit" state system state can not transit from "exit" state into any other state transition operator discards "exit" state (makes it zero at each turn) Such conditions basically mean that column of P which corresponds to "exit" state is zero. Multiplication by such P may decrease sum of vector components. Such models arise when: * there is some population of individuals * individuals can have different states * individuals can transit from one state to another * population size is NOT constant - individuals can move into "exit" state and leave population at the next turn, but there are no new individuals * amount of individuals which leave population can be predicted * you want to model transitions of individuals from one state into another (including transitions into the "exit" state) This model is discussed in more details in the ALGLIB User Guide (see http://www.alglib.net/dataanalysis/ for more data). INPUT PARAMETERS: N - problem dimension, N>=2 ExitState- index of exit state, in 0..N-1 OUTPUT PARAMETERS: State - structure stores algorithm state -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdcreateexit(int n, int exitstate, mcpdstate s) { alglib.ap.assert(n>=2, "MCPDCreateExit: N<2"); alglib.ap.assert(exitstate>=0, "MCPDCreateExit: ExitState<0"); alglib.ap.assert(exitstate=N"); mcpdinit(n, -1, exitstate, s); } /************************************************************************* DESCRIPTION: This function is a specialized version of MCPDCreate() function, and we recommend you to read comments for this function for general information about MCPD solver. This function creates MCPD (Markov Chains for Population Data) solver for "Entry-Exit-states" model, i.e. model where transition from X[i] to X[i+1] is modelled as X[i+1] = P*X[i] where X[i] and X[i+1] are N-dimensional state vectors P is a N*N transition matrix one selected component of X[] is called "entry" state and is treated in a special way: system state always transits from "entry" state to some another state system state can not transit from any state into "entry" state and another one component of X[] is called "exit" state and is treated in a special way too: system state can transit from any state into "exit" state system state can not transit from "exit" state into any other state transition operator discards "exit" state (makes it zero at each turn) Such conditions basically mean that: row of P which corresponds to "entry" state is zero column of P which corresponds to "exit" state is zero Multiplication by such P may decrease sum of vector components. Such models arise when: * there is some population of individuals * individuals can have different states * individuals can transit from one state to another * population size is NOT constant * at every moment of time there is some (unpredictable) amount of "new" individuals, which can transit into one of the states at the next turn * some individuals can move (predictably) into "exit" state and leave population at the next turn * you want to model transitions of individuals from one state into another, including transitions from the "entry" state and into the "exit" state. * but you do NOT want to predict amount of "new" individuals because it does not depends on individuals already present (hence system can not transit INTO entry state - it can only transit FROM it). This model is discussed in more details in the ALGLIB User Guide (see http://www.alglib.net/dataanalysis/ for more data). INPUT PARAMETERS: N - problem dimension, N>=2 EntryState- index of entry state, in 0..N-1 ExitState- index of exit state, in 0..N-1 OUTPUT PARAMETERS: State - structure stores algorithm state -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdcreateentryexit(int n, int entrystate, int exitstate, mcpdstate s) { alglib.ap.assert(n>=2, "MCPDCreateEntryExit: N<2"); alglib.ap.assert(entrystate>=0, "MCPDCreateEntryExit: EntryState<0"); alglib.ap.assert(entrystate=N"); alglib.ap.assert(exitstate>=0, "MCPDCreateEntryExit: ExitState<0"); alglib.ap.assert(exitstate=N"); alglib.ap.assert(entrystate!=exitstate, "MCPDCreateEntryExit: EntryState=ExitState"); mcpdinit(n, entrystate, exitstate, s); } /************************************************************************* This function is used to add a track - sequence of system states at the different moments of its evolution. You may add one or several tracks to the MCPD solver. In case you have several tracks, they won't overwrite each other. For example, if you pass two tracks, A1-A2-A3 (system at t=A+1, t=A+2 and t=A+3) and B1-B2-B3, then solver will try to model transitions from t=A+1 to t=A+2, t=A+2 to t=A+3, t=B+1 to t=B+2, t=B+2 to t=B+3. But it WONT mix these two tracks - i.e. it wont try to model transition from t=A+3 to t=B+1. INPUT PARAMETERS: S - solver XY - track, array[K,N]: * I-th row is a state at t=I * elements of XY must be non-negative (exception will be thrown on negative elements) K - number of points in a track * if given, only leading K rows of XY are used * if not given, automatically determined from size of XY NOTES: 1. Track may contain either proportional or population data: * with proportional data all rows of XY must sum to 1.0, i.e. we have proportions instead of absolute population values * with population data rows of XY contain population counts and generally do not sum to 1.0 (although they still must be non-negative) -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdaddtrack(mcpdstate s, double[,] xy, int k) { int i = 0; int j = 0; int n = 0; double s0 = 0; double s1 = 0; n = s.n; alglib.ap.assert(k>=0, "MCPDAddTrack: K<0"); alglib.ap.assert(alglib.ap.cols(xy)>=n, "MCPDAddTrack: Cols(XY)=k, "MCPDAddTrack: Rows(XY)=(double)(0), "MCPDAddTrack: XY contains negative elements"); } } if( k<2 ) { return; } if( alglib.ap.rows(s.data)=0 ) { s0 = s0+xy[i,j]; } if( s.states[j]<=0 ) { s1 = s1+xy[i+1,j]; } } if( (double)(s0)>(double)(0) && (double)(s1)>(double)(0) ) { for(j=0; j<=n-1; j++) { if( s.states[j]>=0 ) { s.data[s.npairs,j] = xy[i,j]/s0; } else { s.data[s.npairs,j] = 0.0; } if( s.states[j]<=0 ) { s.data[s.npairs,n+j] = xy[i+1,j]/s1; } else { s.data[s.npairs,n+j] = 0.0; } } s.npairs = s.npairs+1; } } } /************************************************************************* This function is used to add equality constraints on the elements of the transition matrix P. MCPD solver has four types of constraints which can be placed on P: * user-specified equality constraints (optional) * user-specified bound constraints (optional) * user-specified general linear constraints (optional) * basic constraints (always present): * non-negativity: P[i,j]>=0 * consistency: every column of P sums to 1.0 Final constraints which are passed to the underlying optimizer are calculated as intersection of all present constraints. For example, you may specify boundary constraint on P[0,0] and equality one: 0.1<=P[0,0]<=0.9 P[0,0]=0.5 Such combination of constraints will be silently reduced to their intersection, which is P[0,0]=0.5. This function can be used to place equality constraints on arbitrary subset of elements of P. Set of constraints is specified by EC, which may contain either NAN's or finite numbers from [0,1]. NAN denotes absence of constraint, finite number denotes equality constraint on specific element of P. You can also use MCPDAddEC() function which allows to ADD equality constraint for one element of P without changing constraints for other elements. These functions (MCPDSetEC and MCPDAddEC) interact as follows: * there is internal matrix of equality constraints which is stored in the MCPD solver * MCPDSetEC() replaces this matrix by another one (SET) * MCPDAddEC() modifies one element of this matrix and leaves other ones unchanged (ADD) * thus MCPDAddEC() call preserves all modifications done by previous calls, while MCPDSetEC() completely discards all changes done to the equality constraints. INPUT PARAMETERS: S - solver EC - equality constraints, array[N,N]. Elements of EC can be either NAN's or finite numbers from [0,1]. NAN denotes absence of constraints, while finite value denotes equality constraint on the corresponding element of P. NOTES: 1. infinite values of EC will lead to exception being thrown. Values less than 0.0 or greater than 1.0 will lead to error code being returned after call to MCPDSolve(). -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdsetec(mcpdstate s, double[,] ec) { int i = 0; int j = 0; int n = 0; n = s.n; alglib.ap.assert(alglib.ap.cols(ec)>=n, "MCPDSetEC: Cols(EC)=n, "MCPDSetEC: Rows(EC)=0 * consistency: every column of P sums to 1.0 Final constraints which are passed to the underlying optimizer are calculated as intersection of all present constraints. For example, you may specify boundary constraint on P[0,0] and equality one: 0.1<=P[0,0]<=0.9 P[0,0]=0.5 Such combination of constraints will be silently reduced to their intersection, which is P[0,0]=0.5. This function can be used to ADD equality constraint for one element of P without changing constraints for other elements. You can also use MCPDSetEC() function which allows you to specify arbitrary set of equality constraints in one call. These functions (MCPDSetEC and MCPDAddEC) interact as follows: * there is internal matrix of equality constraints which is stored in the MCPD solver * MCPDSetEC() replaces this matrix by another one (SET) * MCPDAddEC() modifies one element of this matrix and leaves other ones unchanged (ADD) * thus MCPDAddEC() call preserves all modifications done by previous calls, while MCPDSetEC() completely discards all changes done to the equality constraints. INPUT PARAMETERS: S - solver I - row index of element being constrained J - column index of element being constrained C - value (constraint for P[I,J]). Can be either NAN (no constraint) or finite value from [0,1]. NOTES: 1. infinite values of C will lead to exception being thrown. Values less than 0.0 or greater than 1.0 will lead to error code being returned after call to MCPDSolve(). -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdaddec(mcpdstate s, int i, int j, double c) { alglib.ap.assert(i>=0, "MCPDAddEC: I<0"); alglib.ap.assert(i=N"); alglib.ap.assert(j>=0, "MCPDAddEC: J<0"); alglib.ap.assert(j=N"); alglib.ap.assert(Double.IsNaN(c) || math.isfinite(c), "MCPDAddEC: C is not finite number or NAN"); s.ec[i,j] = c; } /************************************************************************* This function is used to add bound constraints on the elements of the transition matrix P. MCPD solver has four types of constraints which can be placed on P: * user-specified equality constraints (optional) * user-specified bound constraints (optional) * user-specified general linear constraints (optional) * basic constraints (always present): * non-negativity: P[i,j]>=0 * consistency: every column of P sums to 1.0 Final constraints which are passed to the underlying optimizer are calculated as intersection of all present constraints. For example, you may specify boundary constraint on P[0,0] and equality one: 0.1<=P[0,0]<=0.9 P[0,0]=0.5 Such combination of constraints will be silently reduced to their intersection, which is P[0,0]=0.5. This function can be used to place bound constraints on arbitrary subset of elements of P. Set of constraints is specified by BndL/BndU matrices, which may contain arbitrary combination of finite numbers or infinities (like -INF=n, "MCPDSetBC: Cols(BndL)=n, "MCPDSetBC: Rows(BndL)=n, "MCPDSetBC: Cols(BndU)=n, "MCPDSetBC: Rows(BndU)=0 * consistency: every column of P sums to 1.0 Final constraints which are passed to the underlying optimizer are calculated as intersection of all present constraints. For example, you may specify boundary constraint on P[0,0] and equality one: 0.1<=P[0,0]<=0.9 P[0,0]=0.5 Such combination of constraints will be silently reduced to their intersection, which is P[0,0]=0.5. This function can be used to ADD bound constraint for one element of P without changing constraints for other elements. You can also use MCPDSetBC() function which allows to place bound constraints on arbitrary subset of elements of P. Set of constraints is specified by BndL/BndU matrices, which may contain arbitrary combination of finite numbers or infinities (like -INF=0, "MCPDAddBC: I<0"); alglib.ap.assert(i=N"); alglib.ap.assert(j>=0, "MCPDAddBC: J<0"); alglib.ap.assert(j=N"); alglib.ap.assert(math.isfinite(bndl) || Double.IsNegativeInfinity(bndl), "MCPDAddBC: BndL is NAN or +INF"); alglib.ap.assert(math.isfinite(bndu) || Double.IsPositiveInfinity(bndu), "MCPDAddBC: BndU is NAN or -INF"); s.bndl[i,j] = bndl; s.bndu[i,j] = bndu; } /************************************************************************* This function is used to set linear equality/inequality constraints on the elements of the transition matrix P. This function can be used to set one or several general linear constraints on the elements of P. Two types of constraints are supported: * equality constraints * inequality constraints (both less-or-equal and greater-or-equal) Coefficients of constraints are specified by matrix C (one of the parameters). One row of C corresponds to one constraint. Because transition matrix P has N*N elements, we need N*N columns to store all coefficients (they are stored row by row), and one more column to store right part - hence C has N*N+1 columns. Constraint kind is stored in the CT array. Thus, I-th linear constraint is P[0,0]*C[I,0] + P[0,1]*C[I,1] + .. + P[0,N-1]*C[I,N-1] + + P[1,0]*C[I,N] + P[1,1]*C[I,N+1] + ... + + P[N-1,N-1]*C[I,N*N-1] ?=? C[I,N*N] where ?=? can be either "=" (CT[i]=0), "<=" (CT[i]<0) or ">=" (CT[i]>0). Your constraint may involve only some subset of P (less than N*N elements). For example it can be something like P[0,0] + P[0,1] = 0.5 In this case you still should pass matrix with N*N+1 columns, but all its elements (except for C[0,0], C[0,1] and C[0,N*N-1]) will be zero. INPUT PARAMETERS: S - solver C - array[K,N*N+1] - coefficients of constraints (see above for complete description) CT - array[K] - constraint types (see above for complete description) K - number of equality/inequality constraints, K>=0: * if given, only leading K elements of C/CT are used * if not given, automatically determined from sizes of C/CT -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdsetlc(mcpdstate s, double[,] c, int[] ct, int k) { int i = 0; int j = 0; int n = 0; n = s.n; alglib.ap.assert(alglib.ap.cols(c)>=n*n+1, "MCPDSetLC: Cols(C)=k, "MCPDSetLC: Rows(C)=k, "MCPDSetLC: Len(CT)=(double)(0.0), "MCPDSetTikhonovRegularizer: V is less than zero"); s.regterm = v; } /************************************************************************* This function allows to set prior values used for regularization of your problem. By default, regularizing term is equal to r*||P-prior_P||^2, where r is a small non-zero value, P is transition matrix, prior_P is identity matrix, ||X||^2 is a sum of squared elements of X. This function allows you to change prior values prior_P. You can also change r with MCPDSetTikhonovRegularizer() function. INPUT PARAMETERS: S - solver PP - array[N,N], matrix of prior values: 1. elements must be real numbers from [0,1] 2. columns must sum to 1.0. First property is checked (exception is thrown otherwise), while second one is not checked/enforced. -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdsetprior(mcpdstate s, double[,] pp) { int i = 0; int j = 0; int n = 0; pp = (double[,])pp.Clone(); n = s.n; alglib.ap.assert(alglib.ap.cols(pp)>=n, "MCPDSetPrior: Cols(PP)=n, "MCPDSetPrior: Rows(PP)=(double)(0.0) && (double)(pp[i,j])<=(double)(1.0), "MCPDSetPrior: PP[i,j] is less than 0.0 or greater than 1.0"); s.priorp[i,j] = pp[i,j]; } } } /************************************************************************* This function is used to change prediction weights MCPD solver scales prediction errors as follows Error(P) = ||W*(y-P*x)||^2 where x is a system state at time t y is a system state at time t+1 P is a transition matrix W is a diagonal scaling matrix By default, weights are chosen in order to minimize relative prediction error instead of absolute one. For example, if one component of state is about 0.5 in magnitude and another one is about 0.05, then algorithm will make corresponding weights equal to 2.0 and 20.0. INPUT PARAMETERS: S - solver PW - array[N], weights: * must be non-negative values (exception will be thrown otherwise) * zero values will be replaced by automatically chosen values -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdsetpredictionweights(mcpdstate s, double[] pw) { int i = 0; int n = 0; n = s.n; alglib.ap.assert(alglib.ap.len(pw)>=n, "MCPDSetPredictionWeights: Length(PW)=(double)(0), "MCPDSetPredictionWeights: PW containts negative elements"); s.pw[i] = pw[i]; } } /************************************************************************* This function is used to start solution of the MCPD problem. After return from this function, you can use MCPDResults() to get solution and completion code. -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdsolve(mcpdstate s) { int n = 0; int npairs = 0; int ccnt = 0; int i = 0; int j = 0; int k = 0; int k2 = 0; double v = 0; double vv = 0; int i_ = 0; int i1_ = 0; n = s.n; npairs = s.npairs; // // init fields of S // s.repterminationtype = 0; s.repinneriterationscount = 0; s.repouteriterationscount = 0; s.repnfev = 0; for(k=0; k<=n-1; k++) { for(k2=0; k2<=n-1; k2++) { s.p[k,k2] = Double.NaN; } } // // Generate "effective" weights for prediction and calculate preconditioner // for(i=0; i<=n-1; i++) { if( (double)(s.pw[i])==(double)(0) ) { v = 0; k = 0; for(j=0; j<=npairs-1; j++) { if( (double)(s.data[j,n+i])!=(double)(0) ) { v = v+s.data[j,n+i]; k = k+1; } } if( k!=0 ) { s.effectivew[i] = k/v; } else { s.effectivew[i] = 1.0; } } else { s.effectivew[i] = s.pw[i]; } } for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { s.h[i*n+j] = 2*s.regterm; } } for(k=0; k<=npairs-1; k++) { for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { s.h[i*n+j] = s.h[i*n+j]+2*math.sqr(s.effectivew[i])*math.sqr(s.data[k,j]); } } } for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { if( (double)(s.h[i*n+j])==(double)(0) ) { s.h[i*n+j] = 1; } } } // // Generate "effective" BndL/BndU // for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { // // Set default boundary constraints. // Lower bound is always zero, upper bound is calculated // with respect to entry/exit states. // s.effectivebndl[i*n+j] = 0.0; if( s.states[i]>0 || s.states[j]<0 ) { s.effectivebndu[i*n+j] = 0.0; } else { s.effectivebndu[i*n+j] = 1.0; } // // Calculate intersection of the default and user-specified bound constraints. // This code checks consistency of such combination. // if( math.isfinite(s.bndl[i,j]) && (double)(s.bndl[i,j])>(double)(s.effectivebndl[i*n+j]) ) { s.effectivebndl[i*n+j] = s.bndl[i,j]; } if( math.isfinite(s.bndu[i,j]) && (double)(s.bndu[i,j])<(double)(s.effectivebndu[i*n+j]) ) { s.effectivebndu[i*n+j] = s.bndu[i,j]; } if( (double)(s.effectivebndl[i*n+j])>(double)(s.effectivebndu[i*n+j]) ) { s.repterminationtype = -3; return; } // // Calculate intersection of the effective bound constraints // and user-specified equality constraints. // This code checks consistency of such combination. // if( math.isfinite(s.ec[i,j]) ) { if( (double)(s.ec[i,j])<(double)(s.effectivebndl[i*n+j]) || (double)(s.ec[i,j])>(double)(s.effectivebndu[i*n+j]) ) { s.repterminationtype = -3; return; } s.effectivebndl[i*n+j] = s.ec[i,j]; s.effectivebndu[i*n+j] = s.ec[i,j]; } } } // // Generate linear constraints: // * "default" sums-to-one constraints (not generated for "exit" states) // apserv.rmatrixsetlengthatleast(ref s.effectivec, s.ccnt+n, n*n+1); apserv.ivectorsetlengthatleast(ref s.effectivect, s.ccnt+n); ccnt = s.ccnt; for(i=0; i<=s.ccnt-1; i++) { for(j=0; j<=n*n; j++) { s.effectivec[i,j] = s.c[i,j]; } s.effectivect[i] = s.ct[i]; } for(i=0; i<=n-1; i++) { if( s.states[i]>=0 ) { for(k=0; k<=n*n-1; k++) { s.effectivec[ccnt,k] = 0; } for(k=0; k<=n-1; k++) { s.effectivec[ccnt,k*n+i] = 1; } s.effectivec[ccnt,n*n] = 1.0; s.effectivect[ccnt] = 0; ccnt = ccnt+1; } } // // create optimizer // for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { s.tmpp[i*n+j] = (double)1/(double)n; } } minbleic.minbleicrestartfrom(s.bs, s.tmpp); minbleic.minbleicsetbc(s.bs, s.effectivebndl, s.effectivebndu); minbleic.minbleicsetlc(s.bs, s.effectivec, s.effectivect, ccnt); minbleic.minbleicsetcond(s.bs, 0.0, 0.0, xtol, 0); minbleic.minbleicsetprecdiag(s.bs, s.h); // // solve problem // while( minbleic.minbleiciteration(s.bs) ) { alglib.ap.assert(s.bs.needfg, "MCPDSolve: internal error"); if( s.bs.needfg ) { // // Calculate regularization term // s.bs.f = 0.0; vv = s.regterm; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { s.bs.f = s.bs.f+vv*math.sqr(s.bs.x[i*n+j]-s.priorp[i,j]); s.bs.g[i*n+j] = 2*vv*(s.bs.x[i*n+j]-s.priorp[i,j]); } } // // calculate prediction error/gradient for K-th pair // for(k=0; k<=npairs-1; k++) { for(i=0; i<=n-1; i++) { i1_ = (0)-(i*n); v = 0.0; for(i_=i*n; i_<=i*n+n-1;i_++) { v += s.bs.x[i_]*s.data[k,i_+i1_]; } vv = s.effectivew[i]; s.bs.f = s.bs.f+math.sqr(vv*(v-s.data[k,n+i])); for(j=0; j<=n-1; j++) { s.bs.g[i*n+j] = s.bs.g[i*n+j]+2*vv*vv*(v-s.data[k,n+i])*s.data[k,j]; } } } // // continue // continue; } } minbleic.minbleicresultsbuf(s.bs, ref s.tmpp, s.br); for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { s.p[i,j] = s.tmpp[i*n+j]; } } s.repterminationtype = s.br.terminationtype; s.repinneriterationscount = s.br.inneriterationscount; s.repouteriterationscount = s.br.outeriterationscount; s.repnfev = s.br.nfev; } /************************************************************************* MCPD results INPUT PARAMETERS: State - algorithm state OUTPUT PARAMETERS: P - array[N,N], transition matrix Rep - optimization report. You should check Rep.TerminationType in order to distinguish successful termination from unsuccessful one. Speaking short, positive values denote success, negative ones are failures. More information about fields of this structure can be found in the comments on MCPDReport datatype. -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ public static void mcpdresults(mcpdstate s, ref double[,] p, mcpdreport rep) { int i = 0; int j = 0; p = new double[0,0]; p = new double[s.n, s.n]; for(i=0; i<=s.n-1; i++) { for(j=0; j<=s.n-1; j++) { p[i,j] = s.p[i,j]; } } rep.terminationtype = s.repterminationtype; rep.inneriterationscount = s.repinneriterationscount; rep.outeriterationscount = s.repouteriterationscount; rep.nfev = s.repnfev; } /************************************************************************* Internal initialization function -- ALGLIB -- Copyright 23.05.2010 by Bochkanov Sergey *************************************************************************/ private static void mcpdinit(int n, int entrystate, int exitstate, mcpdstate s) { int i = 0; int j = 0; alglib.ap.assert(n>=1, "MCPDCreate: N<1"); s.n = n; s.states = new int[n]; for(i=0; i<=n-1; i++) { s.states[i] = 0; } if( entrystate>=0 ) { s.states[entrystate] = 1; } if( exitstate>=0 ) { s.states[exitstate] = -1; } s.npairs = 0; s.regterm = 1.0E-8; s.ccnt = 0; s.p = new double[n, n]; s.ec = new double[n, n]; s.bndl = new double[n, n]; s.bndu = new double[n, n]; s.pw = new double[n]; s.priorp = new double[n, n]; s.tmpp = new double[n*n]; s.effectivew = new double[n]; s.effectivebndl = new double[n*n]; s.effectivebndu = new double[n*n]; s.h = new double[n*n]; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { s.p[i,j] = 0.0; s.priorp[i,j] = 0.0; s.bndl[i,j] = Double.NegativeInfinity; s.bndu[i,j] = Double.PositiveInfinity; s.ec[i,j] = Double.NaN; } s.pw[i] = 0.0; s.priorp[i,i] = 1.0; } s.data = new double[1, 2*n]; for(i=0; i<=2*n-1; i++) { s.data[0,i] = 0.0; } for(i=0; i<=n*n-1; i++) { s.tmpp[i] = 0.0; } minbleic.minbleiccreate(n*n, s.tmpp, s.bs); } } public class mlpe { /************************************************************************* Neural networks ensemble *************************************************************************/ public class mlpensemble : apobject { public int ensemblesize; public double[] weights; public double[] columnmeans; public double[] columnsigmas; public mlpbase.multilayerperceptron network; public double[] y; public mlpensemble() { init(); } public override void init() { weights = new double[0]; columnmeans = new double[0]; columnsigmas = new double[0]; network = new mlpbase.multilayerperceptron(); y = new double[0]; } public override alglib.apobject make_copy() { mlpensemble _result = new mlpensemble(); _result.ensemblesize = ensemblesize; _result.weights = (double[])weights.Clone(); _result.columnmeans = (double[])columnmeans.Clone(); _result.columnsigmas = (double[])columnsigmas.Clone(); _result.network = (mlpbase.multilayerperceptron)network.make_copy(); _result.y = (double[])y.Clone(); return _result; } }; public const int mlpefirstversion = 1; /************************************************************************* Like MLPCreate0, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreate0(int nin, int nout, int ensemblesize, mlpensemble ensemble) { mlpbase.multilayerperceptron net = new mlpbase.multilayerperceptron(); mlpbase.mlpcreate0(nin, nout, net); mlpecreatefromnetwork(net, ensemblesize, ensemble); } /************************************************************************* Like MLPCreate1, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreate1(int nin, int nhid, int nout, int ensemblesize, mlpensemble ensemble) { mlpbase.multilayerperceptron net = new mlpbase.multilayerperceptron(); mlpbase.mlpcreate1(nin, nhid, nout, net); mlpecreatefromnetwork(net, ensemblesize, ensemble); } /************************************************************************* Like MLPCreate2, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreate2(int nin, int nhid1, int nhid2, int nout, int ensemblesize, mlpensemble ensemble) { mlpbase.multilayerperceptron net = new mlpbase.multilayerperceptron(); mlpbase.mlpcreate2(nin, nhid1, nhid2, nout, net); mlpecreatefromnetwork(net, ensemblesize, ensemble); } /************************************************************************* Like MLPCreateB0, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreateb0(int nin, int nout, double b, double d, int ensemblesize, mlpensemble ensemble) { mlpbase.multilayerperceptron net = new mlpbase.multilayerperceptron(); mlpbase.mlpcreateb0(nin, nout, b, d, net); mlpecreatefromnetwork(net, ensemblesize, ensemble); } /************************************************************************* Like MLPCreateB1, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreateb1(int nin, int nhid, int nout, double b, double d, int ensemblesize, mlpensemble ensemble) { mlpbase.multilayerperceptron net = new mlpbase.multilayerperceptron(); mlpbase.mlpcreateb1(nin, nhid, nout, b, d, net); mlpecreatefromnetwork(net, ensemblesize, ensemble); } /************************************************************************* Like MLPCreateB2, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreateb2(int nin, int nhid1, int nhid2, int nout, double b, double d, int ensemblesize, mlpensemble ensemble) { mlpbase.multilayerperceptron net = new mlpbase.multilayerperceptron(); mlpbase.mlpcreateb2(nin, nhid1, nhid2, nout, b, d, net); mlpecreatefromnetwork(net, ensemblesize, ensemble); } /************************************************************************* Like MLPCreateR0, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreater0(int nin, int nout, double a, double b, int ensemblesize, mlpensemble ensemble) { mlpbase.multilayerperceptron net = new mlpbase.multilayerperceptron(); mlpbase.mlpcreater0(nin, nout, a, b, net); mlpecreatefromnetwork(net, ensemblesize, ensemble); } /************************************************************************* Like MLPCreateR1, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreater1(int nin, int nhid, int nout, double a, double b, int ensemblesize, mlpensemble ensemble) { mlpbase.multilayerperceptron net = new mlpbase.multilayerperceptron(); mlpbase.mlpcreater1(nin, nhid, nout, a, b, net); mlpecreatefromnetwork(net, ensemblesize, ensemble); } /************************************************************************* Like MLPCreateR2, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreater2(int nin, int nhid1, int nhid2, int nout, double a, double b, int ensemblesize, mlpensemble ensemble) { mlpbase.multilayerperceptron net = new mlpbase.multilayerperceptron(); mlpbase.mlpcreater2(nin, nhid1, nhid2, nout, a, b, net); mlpecreatefromnetwork(net, ensemblesize, ensemble); } /************************************************************************* Like MLPCreateC0, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreatec0(int nin, int nout, int ensemblesize, mlpensemble ensemble) { mlpbase.multilayerperceptron net = new mlpbase.multilayerperceptron(); mlpbase.mlpcreatec0(nin, nout, net); mlpecreatefromnetwork(net, ensemblesize, ensemble); } /************************************************************************* Like MLPCreateC1, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreatec1(int nin, int nhid, int nout, int ensemblesize, mlpensemble ensemble) { mlpbase.multilayerperceptron net = new mlpbase.multilayerperceptron(); mlpbase.mlpcreatec1(nin, nhid, nout, net); mlpecreatefromnetwork(net, ensemblesize, ensemble); } /************************************************************************* Like MLPCreateC2, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreatec2(int nin, int nhid1, int nhid2, int nout, int ensemblesize, mlpensemble ensemble) { mlpbase.multilayerperceptron net = new mlpbase.multilayerperceptron(); mlpbase.mlpcreatec2(nin, nhid1, nhid2, nout, net); mlpecreatefromnetwork(net, ensemblesize, ensemble); } /************************************************************************* Creates ensemble from network. Only network geometry is copied. -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreatefromnetwork(mlpbase.multilayerperceptron network, int ensemblesize, mlpensemble ensemble) { int i = 0; int ccount = 0; int wcount = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert(ensemblesize>0, "MLPECreate: incorrect ensemble size!"); // // Copy network // mlpbase.mlpcopy(network, ensemble.network); // // network properties // if( mlpbase.mlpissoftmax(network) ) { ccount = mlpbase.mlpgetinputscount(ensemble.network); } else { ccount = mlpbase.mlpgetinputscount(ensemble.network)+mlpbase.mlpgetoutputscount(ensemble.network); } wcount = mlpbase.mlpgetweightscount(ensemble.network); ensemble.ensemblesize = ensemblesize; // // weights, means, sigmas // ensemble.weights = new double[ensemblesize*wcount]; ensemble.columnmeans = new double[ensemblesize*ccount]; ensemble.columnsigmas = new double[ensemblesize*ccount]; for(i=0; i<=ensemblesize*wcount-1; i++) { ensemble.weights[i] = math.randomreal()-0.5; } for(i=0; i<=ensemblesize-1; i++) { i1_ = (0) - (i*ccount); for(i_=i*ccount; i_<=(i+1)*ccount-1;i_++) { ensemble.columnmeans[i_] = network.columnmeans[i_+i1_]; } i1_ = (0) - (i*ccount); for(i_=i*ccount; i_<=(i+1)*ccount-1;i_++) { ensemble.columnsigmas[i_] = network.columnsigmas[i_+i1_]; } } // // temporaries, internal buffers // ensemble.y = new double[mlpbase.mlpgetoutputscount(ensemble.network)]; } /************************************************************************* Copying of MLPEnsemble strucure INPUT PARAMETERS: Ensemble1 - original OUTPUT PARAMETERS: Ensemble2 - copy -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecopy(mlpensemble ensemble1, mlpensemble ensemble2) { int ccount = 0; int wcount = 0; int i_ = 0; // // Unload info // if( mlpbase.mlpissoftmax(ensemble1.network) ) { ccount = mlpbase.mlpgetinputscount(ensemble1.network); } else { ccount = mlpbase.mlpgetinputscount(ensemble1.network)+mlpbase.mlpgetoutputscount(ensemble1.network); } wcount = mlpbase.mlpgetweightscount(ensemble1.network); // // Allocate space // ensemble2.weights = new double[ensemble1.ensemblesize*wcount]; ensemble2.columnmeans = new double[ensemble1.ensemblesize*ccount]; ensemble2.columnsigmas = new double[ensemble1.ensemblesize*ccount]; ensemble2.y = new double[mlpbase.mlpgetoutputscount(ensemble1.network)]; // // Copy // ensemble2.ensemblesize = ensemble1.ensemblesize; for(i_=0; i_<=ensemble1.ensemblesize*wcount-1;i_++) { ensemble2.weights[i_] = ensemble1.weights[i_]; } for(i_=0; i_<=ensemble1.ensemblesize*ccount-1;i_++) { ensemble2.columnmeans[i_] = ensemble1.columnmeans[i_]; } for(i_=0; i_<=ensemble1.ensemblesize*ccount-1;i_++) { ensemble2.columnsigmas[i_] = ensemble1.columnsigmas[i_]; } mlpbase.mlpcopy(ensemble1.network, ensemble2.network); } /************************************************************************* Randomization of MLP ensemble -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlperandomize(mlpensemble ensemble) { int i = 0; int wcount = 0; wcount = mlpbase.mlpgetweightscount(ensemble.network); for(i=0; i<=ensemble.ensemblesize*wcount-1; i++) { ensemble.weights[i] = math.randomreal()-0.5; } } /************************************************************************* Return ensemble properties (number of inputs and outputs). -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpeproperties(mlpensemble ensemble, ref int nin, ref int nout) { nin = 0; nout = 0; nin = mlpbase.mlpgetinputscount(ensemble.network); nout = mlpbase.mlpgetoutputscount(ensemble.network); } /************************************************************************* Return normalization type (whether ensemble is SOFTMAX-normalized or not). -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static bool mlpeissoftmax(mlpensemble ensemble) { bool result = new bool(); result = mlpbase.mlpissoftmax(ensemble.network); return result; } /************************************************************************* Procesing INPUT PARAMETERS: Ensemble- neural networks ensemble X - input vector, array[0..NIn-1]. Y - (possibly) preallocated buffer; if size of Y is less than NOut, it will be reallocated. If it is large enough, it is NOT reallocated, so we can save some time on reallocation. OUTPUT PARAMETERS: Y - result. Regression estimate when solving regression task, vector of posterior probabilities for classification task. -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpeprocess(mlpensemble ensemble, double[] x, ref double[] y) { int i = 0; int es = 0; int wc = 0; int cc = 0; double v = 0; int nout = 0; int i_ = 0; int i1_ = 0; if( alglib.ap.len(y)=0.001 Decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 0.001. Restarts - number of restarts from random position, >0. If you don't know what Restarts to choose, use 2. OUTPUT PARAMETERS: Network - trained neural network. Info - return code: * -9, if internal matrix inverse subroutine failed * -2, if there is a point with class number outside of [0..NOut-1]. * -1, if wrong parameters specified (NPoints<0, Restarts<1). * 2, if task has been solved. Rep - training report -- ALGLIB -- Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlptrainlm(mlpbase.multilayerperceptron network, double[,] xy, int npoints, double decay, int restarts, ref int info, mlpreport rep) { int nin = 0; int nout = 0; int wcount = 0; double lmftol = 0; double lmsteptol = 0; int i = 0; int k = 0; double v = 0; double e = 0; double enew = 0; double xnorm2 = 0; double stepnorm = 0; double[] g = new double[0]; double[] d = new double[0]; double[,] h = new double[0,0]; double[,] hmod = new double[0,0]; double[,] z = new double[0,0]; bool spd = new bool(); double nu = 0; double lambdav = 0; double lambdaup = 0; double lambdadown = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); double[] x = new double[0]; double[] y = new double[0]; double[] wbase = new double[0]; double[] wdir = new double[0]; double[] wt = new double[0]; double[] wx = new double[0]; int pass = 0; double[] wbest = new double[0]; double ebest = 0; int invinfo = 0; matinv.matinvreport invrep = new matinv.matinvreport(); int solverinfo = 0; densesolver.densesolverreport solverrep = new densesolver.densesolverreport(); int i_ = 0; info = 0; mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); lambdaup = 10; lambdadown = 0.3; lmftol = 0.001; lmsteptol = 0.001; // // Test for inputs // if( npoints<=0 || restarts<1 ) { info = -1; return; } if( mlpbase.mlpissoftmax(network) ) { for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nin])<0 || (int)Math.Round(xy[i,nin])>=nout ) { info = -2; return; } } } decay = Math.Max(decay, mindecay); info = 2; // // Initialize data // rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; // // General case. // Prepare task and network. Allocate space. // mlpbase.mlpinitpreprocessor(network, xy, npoints); g = new double[wcount-1+1]; h = new double[wcount-1+1, wcount-1+1]; hmod = new double[wcount-1+1, wcount-1+1]; wbase = new double[wcount-1+1]; wdir = new double[wcount-1+1]; wbest = new double[wcount-1+1]; wt = new double[wcount-1+1]; wx = new double[wcount-1+1]; ebest = math.maxrealnumber; // // Multiple passes // for(pass=1; pass<=restarts; pass++) { // // Initialize weights // mlpbase.mlprandomize(network); // // First stage of the hybrid algorithm: LBFGS // for(i_=0; i_<=wcount-1;i_++) { wbase[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 5), wbase, state); minlbfgs.minlbfgssetcond(state, 0, 0, 0, Math.Max(25, wcount)); while( minlbfgs.minlbfgsiteration(state) ) { // // gradient // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradbatch(network, xy, npoints, ref state.f, ref state.g); // // weight decay // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + decay*network.weights[i_]; } // // next iteration // rep.ngrad = rep.ngrad+1; } minlbfgs.minlbfgsresults(state, ref wbase, internalrep); for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wbase[i_]; } // // Second stage of the hybrid algorithm: LM // // Initialize H with identity matrix, // G with gradient, // E with regularized error. // mlpbase.mlphessianbatch(network, xy, npoints, ref e, ref g, ref h); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } for(k=0; k<=wcount-1; k++) { h[k,k] = h[k,k]+decay; } rep.nhess = rep.nhess+1; lambdav = 0.001; nu = 2; while( true ) { // // 1. HMod = H+lambda*I // 2. Try to solve (H+Lambda*I)*dx = -g. // Increase lambda if left part is not positive definite. // for(i=0; i<=wcount-1; i++) { for(i_=0; i_<=wcount-1;i_++) { hmod[i,i_] = h[i,i_]; } hmod[i,i] = hmod[i,i]+lambdav; } spd = trfac.spdmatrixcholesky(ref hmod, wcount, true); rep.ncholesky = rep.ncholesky+1; if( !spd ) { lambdav = lambdav*lambdaup*nu; nu = nu*2; continue; } densesolver.spdmatrixcholeskysolve(hmod, wcount, true, g, ref solverinfo, solverrep, ref wdir); if( solverinfo<0 ) { lambdav = lambdav*lambdaup*nu; nu = nu*2; continue; } for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -1*wdir[i_]; } // // Lambda found. // 1. Save old w in WBase // 1. Test some stopping criterions // 2. If error(w+wdir)>error(w), increase lambda // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = network.weights[i_] + wdir[i_]; } xnorm2 = 0.0; for(i_=0; i_<=wcount-1;i_++) { xnorm2 += network.weights[i_]*network.weights[i_]; } stepnorm = 0.0; for(i_=0; i_<=wcount-1;i_++) { stepnorm += wdir[i_]*wdir[i_]; } stepnorm = Math.Sqrt(stepnorm); enew = mlpbase.mlperror(network, xy, npoints)+0.5*decay*xnorm2; if( (double)(stepnorm)<(double)(lmsteptol*(1+Math.Sqrt(xnorm2))) ) { break; } if( (double)(enew)>(double)(e) ) { lambdav = lambdav*lambdaup*nu; nu = nu*2; continue; } // // Optimize using inv(cholesky(H)) as preconditioner // matinv.rmatrixtrinverse(ref hmod, wcount, true, false, ref invinfo, invrep); if( invinfo<=0 ) { // // if matrix can't be inverted then exit with errors // TODO: make WCount steps in direction suggested by HMod // info = -9; return; } for(i_=0; i_<=wcount-1;i_++) { wbase[i_] = network.weights[i_]; } for(i=0; i<=wcount-1; i++) { wt[i] = 0; } minlbfgs.minlbfgscreatex(wcount, wcount, wt, 1, 0.0, state); minlbfgs.minlbfgssetcond(state, 0, 0, 0, 5); while( minlbfgs.minlbfgsiteration(state) ) { // // gradient // for(i=0; i<=wcount-1; i++) { v = 0.0; for(i_=i; i_<=wcount-1;i_++) { v += state.x[i_]*hmod[i,i_]; } network.weights[i] = wbase[i]+v; } mlpbase.mlpgradbatch(network, xy, npoints, ref state.f, ref g); for(i=0; i<=wcount-1; i++) { state.g[i] = 0; } for(i=0; i<=wcount-1; i++) { v = g[i]; for(i_=i; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + v*hmod[i,i_]; } } // // weight decay // grad(x'*x) = A'*(x0+A*t) // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i=0; i<=wcount-1; i++) { v = decay*network.weights[i]; for(i_=i; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + v*hmod[i,i_]; } } // // next iteration // rep.ngrad = rep.ngrad+1; } minlbfgs.minlbfgsresults(state, ref wt, internalrep); // // Accept new position. // Calculate Hessian // for(i=0; i<=wcount-1; i++) { v = 0.0; for(i_=i; i_<=wcount-1;i_++) { v += wt[i_]*hmod[i,i_]; } network.weights[i] = wbase[i]+v; } mlpbase.mlphessianbatch(network, xy, npoints, ref e, ref g, ref h); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } for(k=0; k<=wcount-1; k++) { h[k,k] = h[k,k]+decay; } rep.nhess = rep.nhess+1; // // Update lambda // lambdav = lambdav*lambdadown; nu = 2; } // // update WBest // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = 0.5*decay*v+mlpbase.mlperror(network, xy, npoints); if( (double)(e)<(double)(ebest) ) { ebest = e; for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } } } // // copy WBest to output // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wbest[i_]; } } /************************************************************************* Neural network training using L-BFGS algorithm with regularization. Subroutine trains neural network with restarts from random positions. Algorithm is well suited for problems of any dimensionality (memory requirements and step complexity are linear by weights number). INPUT PARAMETERS: Network - neural network with initialized geometry XY - training set NPoints - training set size Decay - weight decay constant, >=0.001 Decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 0.001. Restarts - number of restarts from random position, >0. If you don't know what Restarts to choose, use 2. WStep - stopping criterion. Algorithm stops if step size is less than WStep. Recommended value - 0.01. Zero step size means stopping after MaxIts iterations. MaxIts - stopping criterion. Algorithm stops after MaxIts iterations (NOT gradient calculations). Zero MaxIts means stopping when step is sufficiently small. OUTPUT PARAMETERS: Network - trained neural network. Info - return code: * -8, if both WStep=0 and MaxIts=0 * -2, if there is a point with class number outside of [0..NOut-1]. * -1, if wrong parameters specified (NPoints<0, Restarts<1). * 2, if task has been solved. Rep - training report -- ALGLIB -- Copyright 09.12.2007 by Bochkanov Sergey *************************************************************************/ public static void mlptrainlbfgs(mlpbase.multilayerperceptron network, double[,] xy, int npoints, double decay, int restarts, double wstep, int maxits, ref int info, mlpreport rep) { int i = 0; int pass = 0; int nin = 0; int nout = 0; int wcount = 0; double[] w = new double[0]; double[] wbest = new double[0]; double e = 0; double v = 0; double ebest = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); int i_ = 0; info = 0; // // Test inputs, parse flags, read network geometry // if( (double)(wstep)==(double)(0) && maxits==0 ) { info = -8; return; } if( ((npoints<=0 || restarts<1) || (double)(wstep)<(double)(0)) || maxits<0 ) { info = -1; return; } mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); if( mlpbase.mlpissoftmax(network) ) { for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nin])<0 || (int)Math.Round(xy[i,nin])>=nout ) { info = -2; return; } } } decay = Math.Max(decay, mindecay); info = 2; // // Prepare // mlpbase.mlpinitpreprocessor(network, xy, npoints); w = new double[wcount-1+1]; wbest = new double[wcount-1+1]; ebest = math.maxrealnumber; // // Multiple starts // rep.ncholesky = 0; rep.nhess = 0; rep.ngrad = 0; for(pass=1; pass<=restarts; pass++) { // // Process // mlpbase.mlprandomize(network); for(i_=0; i_<=wcount-1;i_++) { w[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 10), w, state); minlbfgs.minlbfgssetcond(state, 0.0, 0.0, wstep, maxits); while( minlbfgs.minlbfgsiteration(state) ) { for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradnbatch(network, xy, npoints, ref state.f, ref state.g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; } minlbfgs.minlbfgsresults(state, ref w, internalrep); for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = w[i_]; } // // Compare with best // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = mlpbase.mlperrorn(network, xy, npoints)+0.5*decay*v; if( (double)(e)<(double)(ebest) ) { for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } ebest = e; } } // // The best network // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wbest[i_]; } } /************************************************************************* Neural network training using early stopping (base algorithm - L-BFGS with regularization). INPUT PARAMETERS: Network - neural network with initialized geometry TrnXY - training set TrnSize - training set size, TrnSize>0 ValXY - validation set ValSize - validation set size, ValSize>0 Decay - weight decay constant, >=0.001 Decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 0.001. Restarts - number of restarts, either: * strictly positive number - algorithm make specified number of restarts from random position. * -1, in which case algorithm makes exactly one run from the initial state of the network (no randomization). If you don't know what Restarts to choose, choose one one the following: * -1 (deterministic start) * +1 (one random restart) * +5 (moderate amount of random restarts) OUTPUT PARAMETERS: Network - trained neural network. Info - return code: * -2, if there is a point with class number outside of [0..NOut-1]. * -1, if wrong parameters specified (NPoints<0, Restarts<1, ...). * 2, task has been solved, stopping criterion met - sufficiently small step size. Not expected (we use EARLY stopping) but possible and not an error. * 6, task has been solved, stopping criterion met - increasing of validation set error. Rep - training report NOTE: Algorithm stops if validation set error increases for a long enough or step size is small enought (there are task where validation set may decrease for eternity). In any case solution returned corresponds to the minimum of validation set error. -- ALGLIB -- Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlptraines(mlpbase.multilayerperceptron network, double[,] trnxy, int trnsize, double[,] valxy, int valsize, double decay, int restarts, ref int info, mlpreport rep) { int i = 0; int pass = 0; int nin = 0; int nout = 0; int wcount = 0; double[] w = new double[0]; double[] wbest = new double[0]; double e = 0; double v = 0; double ebest = 0; double[] wfinal = new double[0]; double efinal = 0; int itcnt = 0; int itbest = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); double wstep = 0; bool needrandomization = new bool(); int i_ = 0; info = 0; wstep = 0.001; // // Test inputs, parse flags, read network geometry // if( ((trnsize<=0 || valsize<=0) || (restarts<1 && restarts!=-1)) || (double)(decay)<(double)(0) ) { info = -1; return; } if( restarts==-1 ) { needrandomization = false; restarts = 1; } else { needrandomization = true; } mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); if( mlpbase.mlpissoftmax(network) ) { for(i=0; i<=trnsize-1; i++) { if( (int)Math.Round(trnxy[i,nin])<0 || (int)Math.Round(trnxy[i,nin])>=nout ) { info = -2; return; } } for(i=0; i<=valsize-1; i++) { if( (int)Math.Round(valxy[i,nin])<0 || (int)Math.Round(valxy[i,nin])>=nout ) { info = -2; return; } } } info = 2; // // Prepare // mlpbase.mlpinitpreprocessor(network, trnxy, trnsize); w = new double[wcount-1+1]; wbest = new double[wcount-1+1]; wfinal = new double[wcount-1+1]; efinal = math.maxrealnumber; for(i=0; i<=wcount-1; i++) { wfinal[i] = 0; } // // Multiple starts // rep.ncholesky = 0; rep.nhess = 0; rep.ngrad = 0; for(pass=1; pass<=restarts; pass++) { // // Process // if( needrandomization ) { mlpbase.mlprandomize(network); } ebest = mlpbase.mlperror(network, valxy, valsize); for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } itbest = 0; itcnt = 0; for(i_=0; i_<=wcount-1;i_++) { w[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 10), w, state); minlbfgs.minlbfgssetcond(state, 0.0, 0.0, wstep, 0); minlbfgs.minlbfgssetxrep(state, true); while( minlbfgs.minlbfgsiteration(state) ) { // // Calculate gradient // if( state.needfg ) { for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradnbatch(network, trnxy, trnsize, ref state.f, ref state.g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; } // // Validation set // if( state.xupdated ) { for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = state.x[i_]; } e = mlpbase.mlperror(network, valxy, valsize); if( (double)(e)<(double)(ebest) ) { ebest = e; for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } itbest = itcnt; } if( itcnt>30 && (double)(itcnt)>(double)(1.5*itbest) ) { info = 6; break; } itcnt = itcnt+1; } } minlbfgs.minlbfgsresults(state, ref w, internalrep); // // Compare with final answer // if( (double)(ebest)<(double)(efinal) ) { for(i_=0; i_<=wcount-1;i_++) { wfinal[i_] = wbest[i_]; } efinal = ebest; } } // // The best network // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wfinal[i_]; } } /************************************************************************* Cross-validation estimate of generalization error. Base algorithm - L-BFGS. INPUT PARAMETERS: Network - neural network with initialized geometry. Network is not changed during cross-validation - it is used only as a representative of its architecture. XY - training set. SSize - training set size Decay - weight decay, same as in MLPTrainLBFGS Restarts - number of restarts, >0. restarts are counted for each partition separately, so total number of restarts will be Restarts*FoldsCount. WStep - stopping criterion, same as in MLPTrainLBFGS MaxIts - stopping criterion, same as in MLPTrainLBFGS FoldsCount - number of folds in k-fold cross-validation, 2<=FoldsCount<=SSize. recommended value: 10. OUTPUT PARAMETERS: Info - return code, same as in MLPTrainLBFGS Rep - report, same as in MLPTrainLM/MLPTrainLBFGS CVRep - generalization error estimates -- ALGLIB -- Copyright 09.12.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpkfoldcvlbfgs(mlpbase.multilayerperceptron network, double[,] xy, int npoints, double decay, int restarts, double wstep, int maxits, int foldscount, ref int info, mlpreport rep, mlpcvreport cvrep) { info = 0; mlpkfoldcvgeneral(network, xy, npoints, decay, restarts, foldscount, false, wstep, maxits, ref info, rep, cvrep); } /************************************************************************* Cross-validation estimate of generalization error. Base algorithm - Levenberg-Marquardt. INPUT PARAMETERS: Network - neural network with initialized geometry. Network is not changed during cross-validation - it is used only as a representative of its architecture. XY - training set. SSize - training set size Decay - weight decay, same as in MLPTrainLBFGS Restarts - number of restarts, >0. restarts are counted for each partition separately, so total number of restarts will be Restarts*FoldsCount. FoldsCount - number of folds in k-fold cross-validation, 2<=FoldsCount<=SSize. recommended value: 10. OUTPUT PARAMETERS: Info - return code, same as in MLPTrainLBFGS Rep - report, same as in MLPTrainLM/MLPTrainLBFGS CVRep - generalization error estimates -- ALGLIB -- Copyright 09.12.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpkfoldcvlm(mlpbase.multilayerperceptron network, double[,] xy, int npoints, double decay, int restarts, int foldscount, ref int info, mlpreport rep, mlpcvreport cvrep) { info = 0; mlpkfoldcvgeneral(network, xy, npoints, decay, restarts, foldscount, true, 0.0, 0, ref info, rep, cvrep); } /************************************************************************* This function estimates generalization error using cross-validation on the current dataset with current training settings. INPUT PARAMETERS: S - trainer object Network - neural network. It must have same number of inputs and output/classes as was specified during creation of the trainer object. Network is not changed during cross- validation and is not trained - it is used only as representative of its architecture. I.e., we estimate generalization properties of ARCHITECTURE, not some specific network. NRestarts - number of restarts, >=0: * NRestarts>0 means that for each cross-validation round specified number of random restarts is performed, with best network being chosen after training. * NRestarts=0 is same as NRestarts=1 FoldsCount - number of folds in k-fold cross-validation: * 2<=FoldsCount<=size of dataset * recommended value: 10. * values larger than dataset size will be silently truncated down to dataset size OUTPUT PARAMETERS: Rep - structure which contains cross-validation estimates: * Rep.RelCLSError - fraction of misclassified cases. * Rep.AvgCE - acerage cross-entropy * Rep.RMSError - root-mean-square error * Rep.AvgError - average error * Rep.AvgRelError - average relative error NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(), or subset with only one point was given, zeros are returned as estimates. NOTE: this method performs FoldsCount cross-validation rounds, each one with NRestarts random starts. Thus, FoldsCount*NRestarts networks are trained in total. NOTE: Rep.RelCLSError/Rep.AvgCE are zero on regression problems. NOTE: on classification problems Rep.RMSError/Rep.AvgError/Rep.AvgRelError contain errors in prediction of posterior probabilities. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpkfoldcv(mlptrainer s, mlpbase.multilayerperceptron network, int nrestarts, int foldscount, mlpreport rep) { alglib.smp.shared_pool pooldatacv = new alglib.smp.shared_pool(); mlpparallelizationcv datacv = new mlpparallelizationcv(); mlpparallelizationcv sdatacv = null; double[,] cvy = new double[0,0]; int[] folds = new int[0]; double[] buf = new double[0]; double[] dy = new double[0]; int nin = 0; int nout = 0; int wcount = 0; int rowsize = 0; int ntype = 0; int ttype = 0; int i = 0; int j = 0; int k = 0; int i_ = 0; int i1_ = 0; if( !mlpbase.mlpissoftmax(network) ) { ntype = 0; } else { ntype = 1; } if( s.rcpar ) { ttype = 0; } else { ttype = 1; } alglib.ap.assert(ntype==ttype, "MLPKFoldCV: type of input network is not similar to network type in trainer object"); alglib.ap.assert(s.npoints>=0, "MLPKFoldCV: possible trainer S is not initialized(S.NPoints<0)"); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); alglib.ap.assert(s.nin==nin, "MLPKFoldCV: number of inputs in trainer is not equal to number of inputs in network"); alglib.ap.assert(s.nout==nout, "MLPKFoldCV: number of outputs in trainer is not equal to number of outputs in network"); alglib.ap.assert(nrestarts>=0, "MLPKFoldCV: NRestarts<0"); alglib.ap.assert(foldscount>=2, "MLPKFoldCV: FoldsCount<2"); if( foldscount>s.npoints ) { foldscount = s.npoints; } rep.relclserror = 0; rep.avgce = 0; rep.rmserror = 0; rep.avgerror = 0; rep.avgrelerror = 0; rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; if( s.npoints==0 || s.npoints==1 ) { return; } // // Read network geometry, test parameters // if( s.rcpar ) { rowsize = nin+nout; dy = new double[nout]; bdss.dserrallocate(-nout, ref buf); } else { rowsize = nin+1; dy = new double[1]; bdss.dserrallocate(nout, ref buf); } // // Folds // folds = new int[s.npoints]; for(i=0; i<=s.npoints-1; i++) { folds[i] = i*foldscount/s.npoints; } for(i=0; i<=s.npoints-2; i++) { j = i+math.randominteger(s.npoints-i); if( j!=i ) { k = folds[i]; folds[i] = folds[j]; folds[j] = k; } } cvy = new double[s.npoints, nout]; // // Initialize SEED-value for shared pool // datacv.ngrad = 0; mlpbase.mlpcopy(network, datacv.network); mlpbase.mlpcopy(network, datacv.tnetwork); datacv.subset = new int[s.npoints]; datacv.xyrow = new double[rowsize]; datacv.bufwbest = new double[wcount]; datacv.bufwfinal = new double[wcount]; datacv.y = new double[nout]; // // Initialize LBFGS optimizer // minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, s.lbfgsfactor), network.weights, datacv.state); minlbfgs.minlbfgssetcond(datacv.state, 0.0, 0.0, s.wstep, s.maxits); minlbfgs.minlbfgssetxrep(datacv.state, true); // // Create shared pool // alglib.smp.ae_shared_pool_set_seed(pooldatacv, datacv); // // Parallelization // mthreadcv(s, rowsize, nrestarts, folds, 0, foldscount, cvy, pooldatacv); // // Calculate value for NGrad // alglib.smp.ae_shared_pool_first_recycled(pooldatacv, ref sdatacv); while( sdatacv!=null ) { rep.ngrad = rep.ngrad+sdatacv.ngrad; alglib.smp.ae_shared_pool_next_recycled(pooldatacv, ref sdatacv); } // // Connect of results and calculate cross-validation error // for(i=0; i<=s.npoints-1; i++) { if( s.datatype==0 ) { for(i_=0; i_<=rowsize-1;i_++) { datacv.xyrow[i_] = s.densexy[i,i_]; } } if( s.datatype==1 ) { sparse.sparsegetrow(s.sparsexy, i, ref datacv.xyrow); } for(i_=0; i_<=nout-1;i_++) { datacv.y[i_] = cvy[i,i_]; } if( s.rcpar ) { i1_ = (nin) - (0); for(i_=0; i_<=nout-1;i_++) { dy[i_] = datacv.xyrow[i_+i1_]; } } else { dy[0] = datacv.xyrow[nin]; } bdss.dserraccumulate(ref buf, datacv.y, dy); } bdss.dserrfinish(ref buf); rep.relclserror = buf[0]; rep.avgce = buf[1]; rep.rmserror = buf[2]; rep.avgerror = buf[3]; rep.avgrelerror = buf[4]; } /************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_mlpkfoldcv(mlptrainer s, mlpbase.multilayerperceptron network, int nrestarts, int foldscount, mlpreport rep) { mlpkfoldcv(s,network,nrestarts,foldscount,rep); } /************************************************************************* Creation of the network trainer object for regression networks INPUT PARAMETERS: NIn - number of inputs, NIn>=1 NOut - number of outputs, NOut>=1 OUTPUT PARAMETERS: S - neural network trainer object. This structure can be used to train any regression network with NIn inputs and NOut outputs. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpcreatetrainer(int nin, int nout, mlptrainer s) { alglib.ap.assert(nin>=1, "MLPCreateTrainer: NIn<1."); alglib.ap.assert(nout>=1, "MLPCreateTrainer: NOut<1."); s.nin = nin; s.nout = nout; s.rcpar = true; s.lbfgsfactor = 10; s.decay = 1.0E-6; mlpsetcond(s, 0, 0); s.datatype = 0; s.npoints = 0; } /************************************************************************* Creation of the network trainer object for classification networks INPUT PARAMETERS: NIn - number of inputs, NIn>=1 NClasses - number of classes, NClasses>=2 OUTPUT PARAMETERS: S - neural network trainer object. This structure can be used to train any classification network with NIn inputs and NOut outputs. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpcreatetrainercls(int nin, int nclasses, mlptrainer s) { alglib.ap.assert(nin>=1, "MLPCreateTrainerCls: NIn<1."); alglib.ap.assert(nclasses>=2, "MLPCreateTrainerCls: NClasses<2."); s.nin = nin; s.nout = nclasses; s.rcpar = false; s.lbfgsfactor = 10; s.decay = 1.0E-6; mlpsetcond(s, 0, 0); s.datatype = 0; s.npoints = 0; } /************************************************************************* This function sets "current dataset" of the trainer object to one passed by user. INPUT PARAMETERS: S - trainer object XY - training set, see below for information on the training set format. This function checks correctness of the dataset (no NANs/INFs, class numbers are correct) and throws exception when incorrect dataset is passed. NPoints - points count, >=0. DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following datasetformat is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpsetdataset(mlptrainer s, double[,] xy, int npoints) { int ndim = 0; int i = 0; int j = 0; alglib.ap.assert(s.nin>=1, "MLPSetDataset: possible parameter S is not initialized or spoiled(S.NIn<=0)."); alglib.ap.assert(npoints>=0, "MLPSetDataset: NPoint<0"); alglib.ap.assert(npoints<=alglib.ap.rows(xy), "MLPSetDataset: invalid size of matrix XY(NPoint more then rows of matrix XY)"); s.datatype = 0; s.npoints = npoints; if( npoints==0 ) { return; } if( s.rcpar ) { alglib.ap.assert(s.nout>=1, "MLPSetDataset: possible parameter S is not initialized or is spoiled(NOut<1 for regression)."); ndim = s.nin+s.nout; alglib.ap.assert(ndim<=alglib.ap.cols(xy), "MLPSetDataset: invalid size of matrix XY(too few columns in matrix XY)."); alglib.ap.assert(apserv.apservisfinitematrix(xy, npoints, ndim), "MLPSetDataset: parameter XY contains Infinite or NaN."); } else { alglib.ap.assert(s.nout>=2, "MLPSetDataset: possible parameter S is not initialized or is spoiled(NClasses<2 for classifier)."); ndim = s.nin+1; alglib.ap.assert(ndim<=alglib.ap.cols(xy), "MLPSetDataset: invalid size of matrix XY(too few columns in matrix XY)."); alglib.ap.assert(apserv.apservisfinitematrix(xy, npoints, ndim), "MLPSetDataset: parameter XY contains Infinite or NaN."); for(i=0; i<=npoints-1; i++) { alglib.ap.assert((int)Math.Round(xy[i,s.nin])>=0 && (int)Math.Round(xy[i,s.nin])=NClasses)."); } } apserv.rmatrixsetlengthatleast(ref s.densexy, npoints, ndim); for(i=0; i<=npoints-1; i++) { for(j=0; j<=ndim-1; j++) { s.densexy[i,j] = xy[i,j]; } } } /************************************************************************* This function sets "current dataset" of the trainer object to one passed by user (sparse matrix is used to store dataset). INPUT PARAMETERS: S - trainer object XY - training set, see below for information on the training set format. This function checks correctness of the dataset (no NANs/INFs, class numbers are correct) and throws exception when incorrect dataset is passed. Any sparse storage format can be used: Hash-table, CRS... NPoints - points count, >=0 DATASET FORMAT: This function uses two different dataset formats - one for regression networks, another one for classification networks. For regression networks with NIn inputs and NOut outputs following dataset format is used: * dataset is given by NPoints*(NIn+NOut) matrix * each row corresponds to one example * first NIn columns are inputs, next NOut columns are outputs For classification networks with NIn inputs and NClasses clases following datasetformat is used: * dataset is given by NPoints*(NIn+1) matrix * each row corresponds to one example * first NIn columns are inputs, last column stores class number (from 0 to NClasses-1). -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpsetsparsedataset(mlptrainer s, sparse.sparsematrix xy, int npoints) { double v = 0; int t0 = 0; int t1 = 0; int i = 0; int j = 0; alglib.ap.assert(s.nin>0, "MLPSetSparseDataset: possible parameter S is not initialized or spoiled(S.NIn<=0)."); alglib.ap.assert(npoints>=0, "MLPSetSparseDataset: NPoint<0"); alglib.ap.assert(npoints<=sparse.sparsegetnrows(xy), "MLPSetSparseDataset: invalid size of sparse matrix XY(NPoint more then rows of matrix XY)"); s.datatype = 1; s.npoints = npoints; if( npoints==0 ) { return; } t0 = 0; t1 = 0; if( s.rcpar ) { alglib.ap.assert(s.nout>=1, "MLPSetSparseDataset: possible parameter S is not initialized or is spoiled(NOut<1 for regression)."); alglib.ap.assert(s.nin+s.nout<=sparse.sparsegetncols(xy), "MLPSetSparseDataset: invalid size of sparse matrix XY(too few columns in sparse matrix XY)."); while( sparse.sparseenumerate(xy, ref t0, ref t1, ref i, ref j, ref v) ) { if( i=2, "MLPSetSparseDataset: possible parameter S is not initialized or is spoiled(NClasses<2 for classifier)."); alglib.ap.assert(s.nin+1<=sparse.sparsegetncols(xy), "MLPSetSparseDataset: invalid size of sparse matrix XY(too few columns in sparse matrix XY)."); while( sparse.sparseenumerate(xy, ref t0, ref t1, ref i, ref j, ref v) ) { if( i=0) && (int)Math.Round(v)=NClasses)."); } } } } sparse.sparsecopytocrs(xy, s.sparsexy); } /************************************************************************* This function sets weight decay coefficient which is used for training. INPUT PARAMETERS: S - trainer object Decay - weight decay coefficient, >=0. Weight decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 1.0E-3. Weight decay can be set to zero, in this case network is trained without weight decay. NOTE: by default network uses some small nonzero value for weight decay. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpsetdecay(mlptrainer s, double decay) { alglib.ap.assert(math.isfinite(decay), "MLPSetDecay: parameter Decay contains Infinite or NaN."); alglib.ap.assert((double)(decay)>=(double)(0), "MLPSetDecay: Decay<0."); s.decay = decay; } /************************************************************************* This function sets stopping criteria for the optimizer. INPUT PARAMETERS: S - trainer object WStep - stopping criterion. Algorithm stops if step size is less than WStep. Recommended value - 0.01. Zero step size means stopping after MaxIts iterations. WStep>=0. MaxIts - stopping criterion. Algorithm stops after MaxIts iterations (NOT gradient calculations). Zero MaxIts means stopping when step is sufficiently small. MaxIts>=0. NOTE: by default, WStep=0.005 and MaxIts=0 are used. These values are also used when MLPSetCond() is called with WStep=0 and MaxIts=0. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpsetcond(mlptrainer s, double wstep, int maxits) { alglib.ap.assert(math.isfinite(wstep), "MLPSetCond: parameter WStep contains Infinite or NaN."); alglib.ap.assert((double)(wstep)>=(double)(0), "MLPSetCond: WStep<0."); alglib.ap.assert(maxits>=0, "MLPSetCond: MaxIts<0."); if( (double)(wstep)!=(double)(0) || maxits!=0 ) { s.wstep = wstep; s.maxits = maxits; } else { s.wstep = 0.005; s.maxits = 0; } } /************************************************************************* This function trains neural network passed to this function, using current dataset (one which was passed to MLPSetDataset() or MLPSetSparseDataset()) and current training settings. Training from NRestarts random starting positions is performed, best network is chosen. Training is performed using current training algorithm. INPUT PARAMETERS: S - trainer object Network - neural network. It must have same number of inputs and output/classes as was specified during creation of the trainer object. NRestarts - number of restarts, >=0: * NRestarts>0 means that specified number of random restarts are performed, best network is chosen after training * NRestarts=0 means that current state of the network is used for training. OUTPUT PARAMETERS: Network - trained network NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(), network is filled by zero values. Same behavior for functions MLPStartTraining and MLPContinueTraining. NOTE: this method uses sum-of-squares error function for training. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlptrainnetwork(mlptrainer s, mlpbase.multilayerperceptron network, int nrestarts, mlpreport rep) { int nin = 0; int nout = 0; int wcount = 0; int ntype = 0; int ttype = 0; alglib.ap.assert(s.npoints>=0, "MLPTrainNetwork: parameter S is not initialized or is spoiled(S.NPoints<0)"); if( !mlpbase.mlpissoftmax(network) ) { ntype = 0; } else { ntype = 1; } if( s.rcpar ) { ttype = 0; } else { ttype = 1; } alglib.ap.assert(ntype==ttype, "MLPTrainNetwork: type of input network is not similar to network type in trainer object"); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); alglib.ap.assert(s.nin==nin, "MLPTrainNetwork: number of inputs in trainer is not equal to number of inputs in network"); alglib.ap.assert(s.nout==nout, "MLPTrainNetwork: number of outputs in trainer is not equal to number of outputs in network"); alglib.ap.assert(nrestarts>=0, "MLPTrainNetwork: NRestarts<0."); apserv.rvectorsetlengthatleast(ref s.wbest, wcount); apserv.rvectorsetlengthatleast(ref s.wfinal, wcount); // // Create LBFGS optimizer // minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, s.lbfgsfactor), network.weights, s.tstate); minlbfgs.minlbfgssetcond(s.tstate, 0.0, 0.0, s.wstep, s.maxits); minlbfgs.minlbfgssetxrep(s.tstate, true); // // Create duplicate of the network // mlpbase.mlpcopy(network, s.tnetwork); // // Train // mlptrainnetworkx(s, network, s.tnetwork, s.tstate, nrestarts, s.subset, -1, s.subset, 0, s.wbest, s.wfinal, rep); } /************************************************************************* IMPORTANT: this is an "expert" version of the MLPTrain() function. We do not recommend you to use it unless you are pretty sure that you need ability to monitor training progress. This function performs step-by-step training of the neural network. Here "step-by-step" means that training starts with MLPStartTraining() call, and then user subsequently calls MLPContinueTraining() to perform one more iteration of the training. After call to this function trainer object remembers network and is ready to train it. However, no training is performed until first call to MLPContinueTraining() function. Subsequent calls to MLPContinueTraining() will advance training progress one iteration further. EXAMPLE: > > ...initialize network and trainer object.... > > MLPStartTraining(Trainer, Network, True) > while MLPContinueTraining(Trainer, Network) do > ...visualize training progress... > INPUT PARAMETERS: S - trainer object Network - neural network. It must have same number of inputs and output/classes as was specified during creation of the trainer object. RandomStart - randomize network before training or not: * True means that network is randomized and its initial state (one which was passed to the trainer object) is lost. * False means that training is started from the current state of the network OUTPUT PARAMETERS: Network - neural network which is ready to training (weights are initialized, preprocessor is initialized using current training set) NOTE: this method uses sum-of-squares error function for training. NOTE: it is expected that trainer object settings are NOT changed during step-by-step training, i.e. no one changes stopping criteria or training set during training. It is possible and there is no defense against such actions, but algorithm behavior in such cases is undefined and can be unpredictable. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpstarttraining(mlptrainer s, mlpbase.multilayerperceptron network, bool randomstart) { int nin = 0; int nout = 0; int wcount = 0; int ntype = 0; int ttype = 0; alglib.ap.assert(s.npoints>=0, "MLPStartTraining: parameter S is not initialized or is spoiled(S.NPoints<0)"); if( !mlpbase.mlpissoftmax(network) ) { ntype = 0; } else { ntype = 1; } if( s.rcpar ) { ttype = 0; } else { ttype = 1; } alglib.ap.assert(ntype==ttype, "MLPStartTraining: type of input network is not similar to network type in trainer object"); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); alglib.ap.assert(s.nin==nin, "MLPStartTraining: number of inputs in trainer is not equal to number of inputs in the network."); alglib.ap.assert(s.nout==nout, "MLPStartTraining: number of outputs in trainer is not equal to number of outputs in the network."); // // Create LBFGS optimizer // minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, s.lbfgsfactor), network.weights, s.tstate); minlbfgs.minlbfgssetcond(s.tstate, 0.0, 0.0, s.wstep, s.maxits); minlbfgs.minlbfgssetxrep(s.tstate, true); // // Create duplicate of the network // mlpbase.mlpcopy(network, s.tnetwork); // // Train network // mlpstarttrainingx(s, network, s.tnetwork, s.tstate, randomstart, s.subset, -1); } /************************************************************************* IMPORTANT: this is an "expert" version of the MLPTrain() function. We do not recommend you to use it unless you are pretty sure that you need ability to monitor training progress. This function performs step-by-step training of the neural network. Here "step-by-step" means that training starts with MLPStartTraining() call, and then user subsequently calls MLPContinueTraining() to perform one more iteration of the training. This function performs one more iteration of the training and returns either True (training continues) or False (training stopped). In case True was returned, Network weights are updated according to the current state of the optimization progress. In case False was returned, no additional updates is performed (previous update of the network weights moved us to the final point, and no additional updates is needed). EXAMPLE: > > [initialize network and trainer object] > > MLPStartTraining(Trainer, Network, True) > while MLPContinueTraining(Trainer, Network) do > [visualize training progress] > INPUT PARAMETERS: S - trainer object Network - neural network structure, which is used to store current state of the training process. OUTPUT PARAMETERS: Network - weights of the neural network are rewritten by the current approximation. NOTE: this method uses sum-of-squares error function for training. NOTE: it is expected that trainer object settings are NOT changed during step-by-step training, i.e. no one changes stopping criteria or training set during training. It is possible and there is no defense against such actions, but algorithm behavior in such cases is undefined and can be unpredictable. NOTE: It is expected that Network is the same one which was passed to MLPStartTraining() function. However, THIS function checks only following: * that number of network inputs is consistent with trainer object settings * that number of network outputs/classes is consistent with trainer object settings * that number of network weights is the same as number of weights in the network passed to MLPStartTraining() function Exception is thrown when these conditions are violated. It is also expected that you do not change state of the network on your own - the only party who has right to change network during its training is a trainer object. Any attempt to interfere with trainer may lead to unpredictable results. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static bool mlpcontinuetraining(mlptrainer s, mlpbase.multilayerperceptron network) { bool result = new bool(); int nin = 0; int nout = 0; int wcount = 0; int ntype = 0; int ttype = 0; alglib.ap.assert(s.npoints>=0, "MLPContinueTraining: parameter S is not initialized or is spoiled(S.NPoints<0)"); if( s.rcpar ) { ttype = 0; } else { ttype = 1; } if( !mlpbase.mlpissoftmax(network) ) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype==ttype, "MLPContinueTraining: type of input network is not similar to network type in trainer object."); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); alglib.ap.assert(s.nin==nin, "MLPContinueTraining: number of inputs in trainer is not equal to number of inputs in the network."); alglib.ap.assert(s.nout==nout, "MLPContinueTraining: number of outputs in trainer is not equal to number of outputs in the network."); result = mlpcontinuetrainingx(s, network, s.tnetwork, s.tstate, s.subset, -1, ref s.ngradbatch); return result; } /************************************************************************* Training neural networks ensemble using bootstrap aggregating (bagging). Modified Levenberg-Marquardt algorithm is used as base training method. INPUT PARAMETERS: Ensemble - model with initialized geometry XY - training set NPoints - training set size Decay - weight decay coefficient, >=0.001 Restarts - restarts, >0. OUTPUT PARAMETERS: Ensemble - trained model Info - return code: * -2, if there is a point with class number outside of [0..NClasses-1]. * -1, if incorrect parameters was passed (NPoints<0, Restarts<1). * 2, if task has been solved. Rep - training report. OOBErrors - out-of-bag generalization error estimate -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpebagginglm(mlpe.mlpensemble ensemble, double[,] xy, int npoints, double decay, int restarts, ref int info, mlpreport rep, mlpcvreport ooberrors) { info = 0; mlpebagginginternal(ensemble, xy, npoints, decay, restarts, 0.0, 0, true, ref info, rep, ooberrors); } /************************************************************************* Training neural networks ensemble using bootstrap aggregating (bagging). L-BFGS algorithm is used as base training method. INPUT PARAMETERS: Ensemble - model with initialized geometry XY - training set NPoints - training set size Decay - weight decay coefficient, >=0.001 Restarts - restarts, >0. WStep - stopping criterion, same as in MLPTrainLBFGS MaxIts - stopping criterion, same as in MLPTrainLBFGS OUTPUT PARAMETERS: Ensemble - trained model Info - return code: * -8, if both WStep=0 and MaxIts=0 * -2, if there is a point with class number outside of [0..NClasses-1]. * -1, if incorrect parameters was passed (NPoints<0, Restarts<1). * 2, if task has been solved. Rep - training report. OOBErrors - out-of-bag generalization error estimate -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpebagginglbfgs(mlpe.mlpensemble ensemble, double[,] xy, int npoints, double decay, int restarts, double wstep, int maxits, ref int info, mlpreport rep, mlpcvreport ooberrors) { info = 0; mlpebagginginternal(ensemble, xy, npoints, decay, restarts, wstep, maxits, false, ref info, rep, ooberrors); } /************************************************************************* Training neural networks ensemble using early stopping. INPUT PARAMETERS: Ensemble - model with initialized geometry XY - training set NPoints - training set size Decay - weight decay coefficient, >=0.001 Restarts - restarts, >0. OUTPUT PARAMETERS: Ensemble - trained model Info - return code: * -2, if there is a point with class number outside of [0..NClasses-1]. * -1, if incorrect parameters was passed (NPoints<0, Restarts<1). * 6, if task has been solved. Rep - training report. OOBErrors - out-of-bag generalization error estimate -- ALGLIB -- Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpetraines(mlpe.mlpensemble ensemble, double[,] xy, int npoints, double decay, int restarts, ref int info, mlpreport rep) { int i = 0; int k = 0; int ccount = 0; int pcount = 0; double[,] trnxy = new double[0,0]; double[,] valxy = new double[0,0]; int trnsize = 0; int valsize = 0; int tmpinfo = 0; mlpreport tmprep = new mlpreport(); int nin = 0; int nout = 0; int wcount = 0; int i_ = 0; int i1_ = 0; info = 0; nin = mlpbase.mlpgetinputscount(ensemble.network); nout = mlpbase.mlpgetoutputscount(ensemble.network); wcount = mlpbase.mlpgetweightscount(ensemble.network); if( (npoints<2 || restarts<1) || (double)(decay)<(double)(0) ) { info = -1; return; } if( mlpbase.mlpissoftmax(ensemble.network) ) { for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nin])<0 || (int)Math.Round(xy[i,nin])>=nout ) { info = -2; return; } } } info = 6; // // allocate // if( mlpbase.mlpissoftmax(ensemble.network) ) { ccount = nin+1; pcount = nin; } else { ccount = nin+nout; pcount = nin+nout; } trnxy = new double[npoints, ccount]; valxy = new double[npoints, ccount]; rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; // // train networks // for(k=0; k<=ensemble.ensemblesize-1; k++) { // // Split set // do { trnsize = 0; valsize = 0; for(i=0; i<=npoints-1; i++) { if( (double)(math.randomreal())<(double)(0.66) ) { // // Assign sample to training set // for(i_=0; i_<=ccount-1;i_++) { trnxy[trnsize,i_] = xy[i,i_]; } trnsize = trnsize+1; } else { // // Assign sample to validation set // for(i_=0; i_<=ccount-1;i_++) { valxy[valsize,i_] = xy[i,i_]; } valsize = valsize+1; } } } while( !(trnsize!=0 && valsize!=0) ); // // Train // mlptraines(ensemble.network, trnxy, trnsize, valxy, valsize, decay, restarts, ref tmpinfo, tmprep); if( tmpinfo<0 ) { info = tmpinfo; return; } // // save results // i1_ = (0) - (k*wcount); for(i_=k*wcount; i_<=(k+1)*wcount-1;i_++) { ensemble.weights[i_] = ensemble.network.weights[i_+i1_]; } i1_ = (0) - (k*pcount); for(i_=k*pcount; i_<=(k+1)*pcount-1;i_++) { ensemble.columnmeans[i_] = ensemble.network.columnmeans[i_+i1_]; } i1_ = (0) - (k*pcount); for(i_=k*pcount; i_<=(k+1)*pcount-1;i_++) { ensemble.columnsigmas[i_] = ensemble.network.columnsigmas[i_+i1_]; } rep.ngrad = rep.ngrad+tmprep.ngrad; rep.nhess = rep.nhess+tmprep.nhess; rep.ncholesky = rep.ncholesky+tmprep.ncholesky; } } /************************************************************************* This function trains neural network ensemble passed to this function using current dataset and early stopping training algorithm. Each early stopping round performs NRestarts random restarts (thus, EnsembleSize*NRestarts training rounds is performed in total). INPUT PARAMETERS: S - trainer object; Ensemble - neural network ensemble. It must have same number of inputs and outputs/classes as was specified during creation of the trainer object. NRestarts - number of restarts, >=0: * NRestarts>0 means that specified number of random restarts are performed during each ES round; * NRestarts=0 is silently replaced by 1. OUTPUT PARAMETERS: Ensemble - trained ensemble; Rep - it contains all type of errors. NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(), or single-point dataset was passed, ensemble is filled by zero values. NOTE: this method uses sum-of-squares error function for training. -- ALGLIB -- Copyright 22.08.2012 by Bochkanov Sergey *************************************************************************/ public static void mlptrainensemblees(mlptrainer s, mlpe.mlpensemble ensemble, int nrestarts, mlpreport rep) { int pcount = 0; mlpreport tmprep = new mlpreport(); int nin = 0; int nout = 0; int wcount = 0; int ntype = 0; int ttype = 0; int i = 0; int k = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert(s.npoints>=0, "MLPTrainEnsembleES: parameter S is not initialized or is spoiled(S.NPoints<0)"); if( !mlpe.mlpeissoftmax(ensemble) ) { ntype = 0; } else { ntype = 1; } if( s.rcpar ) { ttype = 0; } else { ttype = 1; } alglib.ap.assert(ntype==ttype, "MLPTrainEnsembleES: internal error - type of input network is not similar to network type in trainer object"); nin = mlpbase.mlpgetinputscount(ensemble.network); alglib.ap.assert(s.nin==nin, "MLPTrainEnsembleES: number of inputs in trainer is not equal to number of inputs in ensemble network"); nout = mlpbase.mlpgetoutputscount(ensemble.network); alglib.ap.assert(s.nout==nout, "MLPTrainEnsembleES: number of outputs in trainer is not equal to number of outputs in ensemble network"); alglib.ap.assert(nrestarts>=0, "MLPTrainEnsembleES: NRestarts<0."); wcount = mlpbase.mlpgetweightscount(ensemble.network); // // Initialize parameter Rep // rep.relclserror = 0; rep.avgce = 0; rep.rmserror = 0; rep.avgerror = 0; rep.avgrelerror = 0; rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; // // Allocate // if( mlpbase.mlpissoftmax(ensemble.network) ) { pcount = nin; } else { pcount = nin+nout; } apserv.ivectorsetlengthatleast(ref s.subset, s.npoints); apserv.ivectorsetlengthatleast(ref s.valsubset, s.npoints); apserv.rvectorsetlengthatleast(ref s.wbest, wcount); apserv.rvectorsetlengthatleast(ref s.wfinal, wcount); // // Create LBFGS optimizer // minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, s.lbfgsfactor), ensemble.network.weights, s.tstate); minlbfgs.minlbfgssetcond(s.tstate, 0.0, 0.0, s.wstep, s.maxits); minlbfgs.minlbfgssetxrep(s.tstate, true); mlpbase.mlpcopy(ensemble.network, s.tnetwork); // // Train networks // if( (s.datatype==0 || s.datatype==1) && s.npoints>1 ) { for(k=0; k<=ensemble.ensemblesize-1; k++) { // // Split set // do { s.subsetsize = 0; s.valsubsetsize = 0; for(i=0; i<=s.npoints-1; i++) { if( (double)(math.randomreal())<(double)(0.66) ) { // // Assign sample to training set // s.subset[s.subsetsize] = i; s.subsetsize = s.subsetsize+1; } else { // // Assign sample to validation set // s.valsubset[s.valsubsetsize] = i; s.valsubsetsize = s.valsubsetsize+1; } } } while( !(s.subsetsize!=0 && s.valsubsetsize!=0) ); // // Train // mlptrainnetworkx(s, ensemble.network, s.tnetwork, s.tstate, nrestarts, s.subset, s.subsetsize, s.valsubset, s.valsubsetsize, s.wbest, s.wfinal, tmprep); rep.ngrad = rep.ngrad+tmprep.ngrad; // // Save results // i1_ = (0) - (k*wcount); for(i_=k*wcount; i_<=(k+1)*wcount-1;i_++) { ensemble.weights[i_] = ensemble.network.weights[i_+i1_]; } i1_ = (0) - (k*pcount); for(i_=k*pcount; i_<=(k+1)*pcount-1;i_++) { ensemble.columnmeans[i_] = ensemble.network.columnmeans[i_+i1_]; } i1_ = (0) - (k*pcount); for(i_=k*pcount; i_<=(k+1)*pcount-1;i_++) { ensemble.columnsigmas[i_] = ensemble.network.columnsigmas[i_+i1_]; } } } else { for(i=0; i<=ensemble.ensemblesize*wcount-1; i++) { ensemble.network.weights[i] = 0.0; ensemble.columnmeans[i] = 0.0; ensemble.columnsigmas[i] = 1.0; } } // // Calculate errors. // if( s.datatype==0 ) { mlpe.mlpeallerrors(ensemble, s.densexy, s.npoints, ref rep.relclserror, ref rep.avgce, ref rep.rmserror, ref rep.avgerror, ref rep.avgrelerror); } if( s.datatype==1 ) { mlpe.mlpeallerrorssparse(ensemble, s.sparsexy, s.npoints, ref rep.relclserror, ref rep.avgce, ref rep.rmserror, ref rep.avgerror, ref rep.avgrelerror); } } /************************************************************************* Internal cross-validation subroutine *************************************************************************/ private static void mlpkfoldcvgeneral(mlpbase.multilayerperceptron n, double[,] xy, int npoints, double decay, int restarts, int foldscount, bool lmalgorithm, double wstep, int maxits, ref int info, mlpreport rep, mlpcvreport cvrep) { int i = 0; int fold = 0; int j = 0; int k = 0; mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron(); int nin = 0; int nout = 0; int rowlen = 0; int wcount = 0; int nclasses = 0; int tssize = 0; int cvssize = 0; double[,] cvset = new double[0,0]; double[,] testset = new double[0,0]; int[] folds = new int[0]; int relcnt = 0; mlpreport internalrep = new mlpreport(); double[] x = new double[0]; double[] y = new double[0]; int i_ = 0; info = 0; // // Read network geometry, test parameters // mlpbase.mlpproperties(n, ref nin, ref nout, ref wcount); if( mlpbase.mlpissoftmax(n) ) { nclasses = nout; rowlen = nin+1; } else { nclasses = -nout; rowlen = nin+nout; } if( (npoints<=0 || foldscount<2) || foldscount>npoints ) { info = -1; return; } mlpbase.mlpcopy(n, network); // // K-fold out cross-validation. // First, estimate generalization error // testset = new double[npoints-1+1, rowlen-1+1]; cvset = new double[npoints-1+1, rowlen-1+1]; x = new double[nin-1+1]; y = new double[nout-1+1]; mlpkfoldsplit(xy, npoints, nclasses, foldscount, false, ref folds); cvrep.relclserror = 0; cvrep.avgce = 0; cvrep.rmserror = 0; cvrep.avgerror = 0; cvrep.avgrelerror = 0; rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; relcnt = 0; for(fold=0; fold<=foldscount-1; fold++) { // // Separate set // tssize = 0; cvssize = 0; for(i=0; i<=npoints-1; i++) { if( folds[i]==fold ) { for(i_=0; i_<=rowlen-1;i_++) { testset[tssize,i_] = xy[i,i_]; } tssize = tssize+1; } else { for(i_=0; i_<=rowlen-1;i_++) { cvset[cvssize,i_] = xy[i,i_]; } cvssize = cvssize+1; } } // // Train on CV training set // if( lmalgorithm ) { mlptrainlm(network, cvset, cvssize, decay, restarts, ref info, internalrep); } else { mlptrainlbfgs(network, cvset, cvssize, decay, restarts, wstep, maxits, ref info, internalrep); } if( info<0 ) { cvrep.relclserror = 0; cvrep.avgce = 0; cvrep.rmserror = 0; cvrep.avgerror = 0; cvrep.avgrelerror = 0; return; } rep.ngrad = rep.ngrad+internalrep.ngrad; rep.nhess = rep.nhess+internalrep.nhess; rep.ncholesky = rep.ncholesky+internalrep.ncholesky; // // Estimate error using CV test set // if( mlpbase.mlpissoftmax(network) ) { // // classification-only code // cvrep.relclserror = cvrep.relclserror+mlpbase.mlpclserror(network, testset, tssize); cvrep.avgce = cvrep.avgce+mlpbase.mlperrorn(network, testset, tssize); } for(i=0; i<=tssize-1; i++) { for(i_=0; i_<=nin-1;i_++) { x[i_] = testset[i,i_]; } mlpbase.mlpprocess(network, x, ref y); if( mlpbase.mlpissoftmax(network) ) { // // Classification-specific code // k = (int)Math.Round(testset[i,nin]); for(j=0; j<=nout-1; j++) { if( j==k ) { cvrep.rmserror = cvrep.rmserror+math.sqr(y[j]-1); cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-1); cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs(y[j]-1); relcnt = relcnt+1; } else { cvrep.rmserror = cvrep.rmserror+math.sqr(y[j]); cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]); } } } else { // // Regression-specific code // for(j=0; j<=nout-1; j++) { cvrep.rmserror = cvrep.rmserror+math.sqr(y[j]-testset[i,nin+j]); cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-testset[i,nin+j]); if( (double)(testset[i,nin+j])!=(double)(0) ) { cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs((y[j]-testset[i,nin+j])/testset[i,nin+j]); relcnt = relcnt+1; } } } } } if( mlpbase.mlpissoftmax(network) ) { cvrep.relclserror = cvrep.relclserror/npoints; cvrep.avgce = cvrep.avgce/(Math.Log(2)*npoints); } cvrep.rmserror = Math.Sqrt(cvrep.rmserror/(npoints*nout)); cvrep.avgerror = cvrep.avgerror/(npoints*nout); if( relcnt>0 ) { cvrep.avgrelerror = cvrep.avgrelerror/relcnt; } info = 1; } /************************************************************************* Subroutine prepares K-fold split of the training set. NOTES: "NClasses>0" means that we have classification task. "NClasses<0" means regression task with -NClasses real outputs. *************************************************************************/ private static void mlpkfoldsplit(double[,] xy, int npoints, int nclasses, int foldscount, bool stratifiedsplits, ref int[] folds) { int i = 0; int j = 0; int k = 0; folds = new int[0]; // // test parameters // alglib.ap.assert(npoints>0, "MLPKFoldSplit: wrong NPoints!"); alglib.ap.assert(nclasses>1 || nclasses<0, "MLPKFoldSplit: wrong NClasses!"); alglib.ap.assert(foldscount>=2 && foldscount<=npoints, "MLPKFoldSplit: wrong FoldsCount!"); alglib.ap.assert(!stratifiedsplits, "MLPKFoldSplit: stratified splits are not supported!"); // // Folds // folds = new int[npoints-1+1]; for(i=0; i<=npoints-1; i++) { folds[i] = i*foldscount/npoints; } for(i=0; i<=npoints-2; i++) { j = i+math.randominteger(npoints-i); if( j!=i ) { k = folds[i]; folds[i] = folds[j]; folds[j] = k; } } } private static void mthreadcv(mlptrainer s, int rowsize, int nrestarts, int[] folds, int fold, int dfold, double[,] cvy, alglib.smp.shared_pool pooldatacv) { mlpparallelizationcv datacv = null; int i = 0; int i_ = 0; if( fold==dfold-1 ) { // // Separate set // alglib.smp.ae_shared_pool_retrieve(pooldatacv, ref datacv); datacv.subsetsize = 0; for(i=0; i<=s.npoints-1; i++) { if( folds[i]!=fold ) { datacv.subset[datacv.subsetsize] = i; datacv.subsetsize = datacv.subsetsize+1; } } // // Train on CV training set // mlptrainnetworkx(s, datacv.network, datacv.tnetwork, datacv.state, nrestarts, datacv.subset, datacv.subsetsize, datacv.subset, 0, datacv.bufwbest, datacv.bufwfinal, datacv.rep); datacv.ngrad = datacv.ngrad+datacv.rep.ngrad; // // Estimate error using CV test set // for(i=0; i<=s.npoints-1; i++) { if( folds[i]==fold ) { if( s.datatype==0 ) { for(i_=0; i_<=rowsize-1;i_++) { datacv.xyrow[i_] = s.densexy[i,i_]; } } if( s.datatype==1 ) { sparse.sparsegetrow(s.sparsexy, i, ref datacv.xyrow); } mlpbase.mlpprocess(datacv.network, datacv.xyrow, ref datacv.y); for(i_=0; i_<=s.nout-1;i_++) { cvy[i,i_] = datacv.y[i_]; } } } alglib.smp.ae_shared_pool_recycle(pooldatacv, ref datacv); } else { alglib.ap.assert(foldDFold-1)."); mthreadcv(s, rowsize, nrestarts, folds, fold, (fold+dfold)/2, cvy, pooldatacv); mthreadcv(s, rowsize, nrestarts, folds, (fold+dfold)/2, dfold, cvy, pooldatacv); } } /************************************************************************* This function trains neural network passed to this function, using current dataset (one which was passed to MLPSetDataset() or MLPSetSparseDataset()) and current training settings. Training from NRestarts random starting positions is performed, best network is chosen. Training is performed using current training algorithm. INPUT PARAMETERS: S - trainer object; Network - neural network. It must have same number of inputs and output/classes as was specified during creation of the trainer object; TNetwork - the training neural network. User may look weights in parameter Network while continue training process. It has architecture like Network. You have to copy or create new network with architecture like Network. State - created LBFGS optimizer; NRestarts - number of restarts, >=0: * NRestarts>0 means that specified number of random restarts are performed, best network is chosen after training * NRestarts=0 means that current state of the network is used for training. TrnSubset - some subset from training set(it stores row's numbers), used as trainig set; TrnSubsetSize- size of subset(if TrnSubsetSize<0 - used full dataset); when TrnSubsetSize=0, network is filled by zero value, and ValSubset parameter is IGNORED; ValSubset - some subset from training set(it stores row's numbers), used as validation set; ValSubsetSize- size of subset(if ValSubsetSize<0 - used full dataset); when ValSubsetSize<>0 this mean that is used early stopping training algorithm; BufWBest - buffer for storing interim resuls (BufWBest[0:WCOunt-1] it has be allocated by user); BufWFinal - buffer for storing interim resuls(BufWFinal[0:WCOunt-1] it has be allocated by user). OUTPUT PARAMETERS: Network - trained network; Rep - training report. NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(), network is filled by zero values. Same behavior for functions MLPStartTraining and MLPContinueTraining. NOTE: this method uses sum-of-squares error function for training. -- ALGLIB -- Copyright 13.08.2012 by Bochkanov Sergey *************************************************************************/ private static void mlptrainnetworkx(mlptrainer s, mlpbase.multilayerperceptron network, mlpbase.multilayerperceptron tnetwork, minlbfgs.minlbfgsstate state, int nrestarts, int[] trnsubset, int trnsubsetsize, int[] valsubset, int valsubsetsize, double[] bufwbest, double[] bufwfinal, mlpreport rep) { mlpbase.modelerrors modrep = new mlpbase.modelerrors(); double eval = 0; double v = 0; double ebestcur = 0; double efinal = 0; int ngradbatch = 0; int nin = 0; int nout = 0; int wcount = 0; int twcount = 0; int itbest = 0; int itcnt = 0; int ntype = 0; int ttype = 0; bool rndstart = new bool(); int pass = 0; int i = 0; int i_ = 0; alglib.ap.assert(s.npoints>=0, "MLPTrainNetworkX: internal error - parameter S is not initialized or is spoiled(S.NPoints<0)"); if( s.rcpar ) { ttype = 0; } else { ttype = 1; } if( !mlpbase.mlpissoftmax(network) ) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype==ttype, "MLPTrainNetworkX: internal error - type of the resulting network is not similar to network type in trainer object"); if( !mlpbase.mlpissoftmax(tnetwork) ) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype==ttype, "MLPTrainNetworkX: internal error - type of the training network is not similar to network type in trainer object"); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); alglib.ap.assert(s.nin==nin, "MLPTrainNetworkX: internal error - number of inputs in trainer is not equal to number of inputs in the network."); alglib.ap.assert(s.nout==nout, "MLPTrainNetworkX: internal error - number of outputs in trainer is not equal to number of outputs in the network."); mlpbase.mlpproperties(tnetwork, ref nin, ref nout, ref twcount); alglib.ap.assert(s.nin==nin, "MLPTrainNetworkX: internal error - number of inputs in trainer is not equal to number of inputs in the training network."); alglib.ap.assert(s.nout==nout, "MLPTrainNetworkX: internal error - number of outputs in trainer is not equal to number of outputs in the training network."); alglib.ap.assert(twcount==wcount, "MLPTrainNetworkX: internal error - number of weights the resulting network is not equal to number of weights in the training network."); alglib.ap.assert(nrestarts>=0, "MLPTrainNetworkX: internal error - NRestarts<0."); alglib.ap.assert(alglib.ap.len(trnsubset)>=trnsubsetsize, "MLPTrainNetworkX: internal error - parameter TrnSubsetSize more than input subset size(Length(TrnSubset)=0 && trnsubset[i]<=s.npoints-1, "MLPTrainNetworkX: internal error - parameter TrnSubset contains incorrect index(TrnSubset[I]<0 or TrnSubset[I]>S.NPoints-1)"); } alglib.ap.assert(alglib.ap.len(valsubset)>=valsubsetsize, "MLPTrainNetworkX: internal error - parameter ValSubsetSize more than input subset size(Length(ValSubset)=0 && valsubset[i]<=s.npoints-1, "MLPTrainNetworkX: internal error - parameter ValSubset contains incorrect index(ValSubset[I]<0 or ValSubset[I]>S.NPoints-1)"); } // // Initialize parameter Rep // rep.relclserror = 0; rep.avgce = 0; rep.rmserror = 0; rep.avgerror = 0; rep.avgrelerror = 0; rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; if( ((s.datatype==0 || s.datatype==1) && s.npoints>0) && trnsubsetsize!=0 ) { // // Prepare // efinal = math.maxrealnumber; if( nrestarts!=0 ) { rndstart = true; } else { rndstart = false; nrestarts = 1; } ngradbatch = 0; eval = 0; ebestcur = 0; for(pass=1; pass<=nrestarts; pass++) { mlpstarttrainingx(s, network, tnetwork, state, rndstart, trnsubset, trnsubsetsize); itbest = 0; itcnt = 0; if( s.datatype==0 ) { ebestcur = mlpbase.mlperrorsubset(network, s.densexy, s.npoints, valsubset, valsubsetsize); } if( s.datatype==1 ) { ebestcur = mlpbase.mlperrorsparsesubset(network, s.sparsexy, s.npoints, valsubset, valsubsetsize); } for(i_=0; i_<=wcount-1;i_++) { bufwbest[i_] = network.weights[i_]; } while( mlpcontinuetrainingx(s, network, tnetwork, state, trnsubset, trnsubsetsize, ref ngradbatch) ) { if( s.datatype==0 ) { eval = mlpbase.mlperrorsubset(network, s.densexy, s.npoints, valsubset, valsubsetsize); } if( s.datatype==1 ) { eval = mlpbase.mlperrorsparsesubset(network, s.sparsexy, s.npoints, valsubset, valsubsetsize); } if( (double)(eval)<=(double)(ebestcur) ) { for(i_=0; i_<=wcount-1;i_++) { bufwbest[i_] = network.weights[i_]; } ebestcur = eval; itbest = itcnt; } if( itcnt>30 && (double)(itcnt)>(double)(1.5*itbest) ) { break; } itcnt = itcnt+1; } for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = bufwbest[i_]; } // // Compare with final(the best) answer. // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += bufwbest[i_]*bufwbest[i_]; } if( s.datatype==0 ) { ebestcur = mlpbase.mlperrorsubset(network, s.densexy, s.npoints, trnsubset, trnsubsetsize)+0.5*s.decay*v; } if( s.datatype==1 ) { ebestcur = mlpbase.mlperrorsparsesubset(network, s.sparsexy, s.npoints, trnsubset, trnsubsetsize)+0.5*s.decay*v; } if( (double)(ebestcur)<(double)(efinal) ) { for(i_=0; i_<=wcount-1;i_++) { bufwfinal[i_] = bufwbest[i_]; } efinal = ebestcur; } } // // Final network // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = bufwfinal[i_]; } rep.ngrad = ngradbatch; } else { for(i=0; i<=wcount-1; i++) { network.weights[i] = 0; } } // // Calculate errors. // if( s.datatype==0 ) { mlpbase.mlpallerrorssubset(network, s.densexy, s.npoints, trnsubset, trnsubsetsize, modrep); } if( s.datatype==1 ) { mlpbase.mlpallerrorssparsesubset(network, s.sparsexy, s.npoints, trnsubset, trnsubsetsize, modrep); } rep.relclserror = modrep.relclserror; rep.avgce = modrep.avgce; rep.rmserror = modrep.rmserror; rep.avgerror = modrep.avgerror; rep.avgrelerror = modrep.avgrelerror; } /************************************************************************* This function performs step-by-step training of the neural network. Here "step-by-step" means that training starts with MLPStartTrainingX call, and then user subsequently calls MLPContinueTrainingX to perform one more iteration of the training. After call to this function trainer object remembers network and is ready to train it. However, no training is performed until first call to MLPContinueTraining() function. Subsequent calls to MLPContinueTraining() will advance traing progress one iteration further. EXAMPLE: > > ...initialize network and trainer object.... > > MLPStartTraining(Trainer, Network, True) > while MLPContinueTraining(Trainer, Network) do > ...visualize training progress... > INPUT PARAMETERS: S - trainer object; Network - neural network which receives A COPY of the actual network which is trained by the algorithm. After each training roung state of the network being trained is copied to this variable. It must have same number of inputs and output/classes as was specified during creation of the trainer object and it must have exactly same architecture as the second network (TNetwork). TNetwork - neural network being trained. State - LBFGS optimizer, already initialized, number of dimensions must be equal to number of weights in the networks. RandomStart - randomize network before training or not: * True means that network is randomized and its initial state (one which was passed to the trainer object) is lost; * False means that training is started from the current state of the network. Subset - some subset from training set(it stores row's numbers); SubsetSize - size of subset(if SubsetSize<0 - used full dataset). OUTPUT PARAMETERS: Network - neural network which is ready to training (weights are initialized, preprocessor is initialized using current training set) NOTE: this method uses sum-of-squares error function for training. NOTE: it is expected that trainer object settings are NOT changed during step-by-step training, i.e. no one changes stopping criteria or training set during training. It is possible and there is no defense against such actions, but algorithm behavior in such cases is undefined and can be unpredictable. -- ALGLIB -- Copyright 13.08.2012 by Bochkanov Sergey *************************************************************************/ private static void mlpstarttrainingx(mlptrainer s, mlpbase.multilayerperceptron network, mlpbase.multilayerperceptron tnetwork, minlbfgs.minlbfgsstate state, bool randomstart, int[] subset, int subsetsize) { int nin = 0; int nout = 0; int wcount = 0; int twcount = 0; int ntype = 0; int ttype = 0; int i = 0; int i_ = 0; alglib.ap.assert(s.npoints>=0, "MLPStartTrainingX: internal error - parameter S is not initialized or is spoiled(S.NPoints<0)"); if( s.rcpar ) { ttype = 0; } else { ttype = 1; } if( !mlpbase.mlpissoftmax(network) ) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype==ttype, "MLPStartTrainingX: internal error - type of the resulting network is not similar to network type in trainer object"); if( !mlpbase.mlpissoftmax(tnetwork) ) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype==ttype, "MLPStartTrainingX: internal error - type of the training network is not similar to network type in trainer object"); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); alglib.ap.assert(s.nin==nin, "MLPStartTrainingX: number of inputs in trainer is not equal to number of inputs in the network."); alglib.ap.assert(s.nout==nout, "MLPStartTrainingX: number of outputs in trainer is not equal to number of outputs in the network."); mlpbase.mlpproperties(tnetwork, ref nin, ref nout, ref twcount); alglib.ap.assert(s.nin==nin, "MLPStartTrainingX: number of inputs in trainer is not equal to number of inputs in the training network."); alglib.ap.assert(s.nout==nout, "MLPStartTrainingX: number of outputs in trainer is not equal to number of outputs in the training network."); alglib.ap.assert(twcount==wcount, "MLPStartTrainingX: number of weights the resulting network is not equal to number of weights in the training network."); alglib.ap.assert(alglib.ap.len(subset)>=subsetsize, "MLPStartTrainingX: internal error - parameter SubsetSize more than input subset size(Length(Subset)=0 && subset[i]<=s.npoints-1, "MLPStartTrainingX: internal error - parameter Subset contains incorrect index(Subset[I]<0 or Subset[I]>S.NPoints-1)"); } if( ((s.datatype==0 || s.datatype==1) && s.npoints>0) && subsetsize!=0 ) { // // Prepare // if( s.datatype==0 ) { mlpbase.mlpinitpreprocessorsubset(network, s.densexy, s.npoints, subset, subsetsize); mlpbase.mlpinitpreprocessorsubset(tnetwork, s.densexy, s.npoints, subset, subsetsize); } if( s.datatype==1 ) { mlpbase.mlpinitpreprocessorsparsesubset(network, s.sparsexy, s.npoints, subset, subsetsize); mlpbase.mlpinitpreprocessorsparsesubset(tnetwork, s.sparsexy, s.npoints, subset, subsetsize); } // // Process // if( randomstart ) { mlpbase.mlprandomize(network); } minlbfgs.minlbfgsrestartfrom(state, network.weights); } else { for(i=0; i<=wcount-1; i++) { network.weights[i] = 0; } } // // Copy weights // for(i_=0; i_<=wcount-1;i_++) { tnetwork.weights[i_] = network.weights[i_]; } } /************************************************************************* This function performs step-by-step training of the neural network. Here "step-by-step" means that training starts with MLPStartTrainingX call, and then user subsequently calls MLPContinueTrainingX to perform one more iteration of the training. This function performs one more iteration of the training and returns either True (training continues) or False (training stopped). In case True was returned, Network weights are updated according to the current state of the optimization progress. In case False was returned, no additional updates is performed (previous update of the network weights moved us to the final point, and no additional updates is needed). EXAMPLE: > > [initialize network and trainer object] > > MLPStartTraining(Trainer, Network, True) > while MLPContinueTraining(Trainer, Network) do > [visualize training progress] > INPUT PARAMETERS: S - trainer object Network - neural network which receives A COPY of the actual network which is trained by the algorithm. After each training roung state of the network being trained is copied to this variable. It must have same number of inputs and output/classes as was specified during creation of the trainer object and it must have exactly same architecture as the second network (TNetwork). TNetwork - neural network being trained. State - LBFGS optimizer, already initialized, number of dimensions must be equal to number of weights in the networks. Subset - some subset from training set(it stores row's numbers); SubsetSize - size of subset(if SubsetSize<0 - used full dataset). NGradBatch - number of calls MLPGradBatch function. Initial value is zero; OUTPUT PARAMETERS: Network - weights of the neural network are rewritten by the current approximation; NGradBatch - number of calls MLPGradBatch function after training. NOTE: this method uses sum-of-squares error function for training. NOTE: it is expected that trainer object settings are NOT changed during step-by-step training, i.e. no one changes stopping criteria or training set during training. It is possible and there is no defense against such actions, but algorithm behavior in such cases is undefined and can be unpredictable. NOTE: It is expected that Network is the same one which was passed to MLPStartTraining() function. However, THIS function checks only following: * that number of network inputs is consistent with trainer object settings * that number of network outputs/classes is consistent with trainer object settings * that number of network weights is the same as number of weights in the network passed to MLPStartTraining() function Exception is thrown when these conditions are violated. It is also expected that you do not change state of the network on your own - the only party who has right to change network during its training is a trainer object. Any attempt to interfere with trainer may lead to unpredictable results. -- ALGLIB -- Copyright 13.08.2012 by Bochkanov Sergey *************************************************************************/ private static bool mlpcontinuetrainingx(mlptrainer s, mlpbase.multilayerperceptron network, mlpbase.multilayerperceptron tnetwork, minlbfgs.minlbfgsstate state, int[] subset, int subsetsize, ref int ngradbatch) { bool result = new bool(); int nin = 0; int nout = 0; int wcount = 0; int twcount = 0; int ntype = 0; int ttype = 0; double decay = 0; double v = 0; int i = 0; int i_ = 0; alglib.ap.assert(s.npoints>=0, "MLPContinueTrainingX: internal error - parameter S is not initialized or is spoiled(S.NPoints<0)."); if( s.rcpar ) { ttype = 0; } else { ttype = 1; } if( !mlpbase.mlpissoftmax(network) ) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype==ttype, "MLPContinueTrainingX: internal error - type of the resulting network is not similar to network type in trainer object."); if( !mlpbase.mlpissoftmax(tnetwork) ) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype==ttype, "MLPContinueTrainingX: internal error - type of the training network is not similar to network type in trainer object."); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); alglib.ap.assert(s.nin==nin, "MLPContinueTrainingX: internal error - number of inputs in trainer is not equal to number of inputs in the network."); alglib.ap.assert(s.nout==nout, "MLPContinueTrainingX: internal error - number of outputs in trainer is not equal to number of outputs in the network."); mlpbase.mlpproperties(tnetwork, ref nin, ref nout, ref twcount); alglib.ap.assert(s.nin==nin, "MLPContinueTrainingX: internal error - number of inputs in trainer is not equal to number of inputs in the training network."); alglib.ap.assert(s.nout==nout, "MLPContinueTrainingX: internal error - number of outputs in trainer is not equal to number of outputs in the training network."); alglib.ap.assert(twcount==wcount, "MLPContinueTrainingX: internal error - number of weights the resulting network is not equal to number of weights in the training network."); alglib.ap.assert(alglib.ap.len(subset)>=subsetsize, "MLPContinueTrainingX: internal error - parameter SubsetSize more than input subset size(Length(Subset)=0 && subset[i]<=s.npoints-1, "MLPContinueTrainingX: internal error - parameter Subset contains incorrect index(Subset[I]<0 or Subset[I]>S.NPoints-1)."); } if( ((s.datatype==0 || s.datatype==1) && s.npoints>0) && subsetsize!=0 ) { decay = s.decay; while( minlbfgs.minlbfgsiteration(state) ) { if( state.xupdated ) { for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = tnetwork.weights[i_]; } result = true; return result; } for(i_=0; i_<=wcount-1;i_++) { tnetwork.weights[i_] = state.x[i_]; } if( s.datatype==0 ) { mlpbase.mlpgradbatchsubset(tnetwork, s.densexy, s.npoints, subset, subsetsize, ref state.f, ref state.g); } if( s.datatype==1 ) { mlpbase.mlpgradbatchsparsesubset(tnetwork, s.sparsexy, s.npoints, subset, subsetsize, ref state.f, ref state.g); } // // Increment number of operations performed on batch gradient // ngradbatch = ngradbatch+1; v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += tnetwork.weights[i_]*tnetwork.weights[i_]; } state.f = state.f+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + decay*tnetwork.weights[i_]; } } for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = tnetwork.weights[i_]; } } result = false; return result; } /************************************************************************* Internal bagging subroutine. -- ALGLIB -- Copyright 19.02.2009 by Bochkanov Sergey *************************************************************************/ private static void mlpebagginginternal(mlpe.mlpensemble ensemble, double[,] xy, int npoints, double decay, int restarts, double wstep, int maxits, bool lmalgorithm, ref int info, mlpreport rep, mlpcvreport ooberrors) { double[,] xys = new double[0,0]; bool[] s = new bool[0]; double[,] oobbuf = new double[0,0]; int[] oobcntbuf = new int[0]; double[] x = new double[0]; double[] y = new double[0]; double[] dy = new double[0]; double[] dsbuf = new double[0]; int ccnt = 0; int pcnt = 0; int i = 0; int j = 0; int k = 0; double v = 0; mlpreport tmprep = new mlpreport(); int nin = 0; int nout = 0; int wcount = 0; int i_ = 0; int i1_ = 0; info = 0; nin = mlpbase.mlpgetinputscount(ensemble.network); nout = mlpbase.mlpgetoutputscount(ensemble.network); wcount = mlpbase.mlpgetweightscount(ensemble.network); // // Test for inputs // if( (!lmalgorithm && (double)(wstep)==(double)(0)) && maxits==0 ) { info = -8; return; } if( ((npoints<=0 || restarts<1) || (double)(wstep)<(double)(0)) || maxits<0 ) { info = -1; return; } if( mlpbase.mlpissoftmax(ensemble.network) ) { for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nin])<0 || (int)Math.Round(xy[i,nin])>=nout ) { info = -2; return; } } } // // allocate temporaries // info = 2; rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; ooberrors.relclserror = 0; ooberrors.avgce = 0; ooberrors.rmserror = 0; ooberrors.avgerror = 0; ooberrors.avgrelerror = 0; if( mlpbase.mlpissoftmax(ensemble.network) ) { ccnt = nin+1; pcnt = nin; } else { ccnt = nin+nout; pcnt = nin+nout; } xys = new double[npoints, ccnt]; s = new bool[npoints]; oobbuf = new double[npoints, nout]; oobcntbuf = new int[npoints]; x = new double[nin]; y = new double[nout]; if( mlpbase.mlpissoftmax(ensemble.network) ) { dy = new double[1]; } else { dy = new double[nout]; } for(i=0; i<=npoints-1; i++) { for(j=0; j<=nout-1; j++) { oobbuf[i,j] = 0; } } for(i=0; i<=npoints-1; i++) { oobcntbuf[i] = 0; } // // main bagging cycle // for(k=0; k<=ensemble.ensemblesize-1; k++) { // // prepare dataset // for(i=0; i<=npoints-1; i++) { s[i] = false; } for(i=0; i<=npoints-1; i++) { j = math.randominteger(npoints); s[j] = true; for(i_=0; i_<=ccnt-1;i_++) { xys[i,i_] = xy[j,i_]; } } // // train // if( lmalgorithm ) { mlptrainlm(ensemble.network, xys, npoints, decay, restarts, ref info, tmprep); } else { mlptrainlbfgs(ensemble.network, xys, npoints, decay, restarts, wstep, maxits, ref info, tmprep); } if( info<0 ) { return; } // // save results // rep.ngrad = rep.ngrad+tmprep.ngrad; rep.nhess = rep.nhess+tmprep.nhess; rep.ncholesky = rep.ncholesky+tmprep.ncholesky; i1_ = (0) - (k*wcount); for(i_=k*wcount; i_<=(k+1)*wcount-1;i_++) { ensemble.weights[i_] = ensemble.network.weights[i_+i1_]; } i1_ = (0) - (k*pcnt); for(i_=k*pcnt; i_<=(k+1)*pcnt-1;i_++) { ensemble.columnmeans[i_] = ensemble.network.columnmeans[i_+i1_]; } i1_ = (0) - (k*pcnt); for(i_=k*pcnt; i_<=(k+1)*pcnt-1;i_++) { ensemble.columnsigmas[i_] = ensemble.network.columnsigmas[i_+i1_]; } // // OOB estimates // for(i=0; i<=npoints-1; i++) { if( !s[i] ) { for(i_=0; i_<=nin-1;i_++) { x[i_] = xy[i,i_]; } mlpbase.mlpprocess(ensemble.network, x, ref y); for(i_=0; i_<=nout-1;i_++) { oobbuf[i,i_] = oobbuf[i,i_] + y[i_]; } oobcntbuf[i] = oobcntbuf[i]+1; } } } // // OOB estimates // if( mlpbase.mlpissoftmax(ensemble.network) ) { bdss.dserrallocate(nout, ref dsbuf); } else { bdss.dserrallocate(-nout, ref dsbuf); } for(i=0; i<=npoints-1; i++) { if( oobcntbuf[i]!=0 ) { v = (double)1/(double)oobcntbuf[i]; for(i_=0; i_<=nout-1;i_++) { y[i_] = v*oobbuf[i,i_]; } if( mlpbase.mlpissoftmax(ensemble.network) ) { dy[0] = xy[i,nin]; } else { i1_ = (nin) - (0); for(i_=0; i_<=nout-1;i_++) { dy[i_] = v*xy[i,i_+i1_]; } } bdss.dserraccumulate(ref dsbuf, y, dy); } } bdss.dserrfinish(ref dsbuf); ooberrors.relclserror = dsbuf[0]; ooberrors.avgce = dsbuf[1]; ooberrors.rmserror = dsbuf[2]; ooberrors.avgerror = dsbuf[3]; ooberrors.avgrelerror = dsbuf[4]; } } public class pca { /************************************************************************* Principal components analysis Subroutine builds orthogonal basis where first axis corresponds to direction with maximum variance, second axis maximizes variance in subspace orthogonal to first axis and so on. It should be noted that, unlike LDA, PCA does not use class labels. INPUT PARAMETERS: X - dataset, array[0..NPoints-1,0..NVars-1]. matrix contains ONLY INDEPENDENT VARIABLES. NPoints - dataset size, NPoints>=0 NVars - number of independent variables, NVars>=1 ÂÛÕÎÄÍÛÅ ÏÀÐÀÌÅÒÐÛ: Info - return code: * -4, if SVD subroutine haven't converged * -1, if wrong parameters has been passed (NPoints<0, NVars<1) * 1, if task is solved S2 - array[0..NVars-1]. variance values corresponding to basis vectors. V - array[0..NVars-1,0..NVars-1] matrix, whose columns store basis vectors. -- ALGLIB -- Copyright 25.08.2008 by Bochkanov Sergey *************************************************************************/ public static void pcabuildbasis(double[,] x, int npoints, int nvars, ref int info, ref double[] s2, ref double[,] v) { double[,] a = new double[0,0]; double[,] u = new double[0,0]; double[,] vt = new double[0,0]; double[] m = new double[0]; double[] t = new double[0]; int i = 0; int j = 0; double mean = 0; double variance = 0; double skewness = 0; double kurtosis = 0; int i_ = 0; info = 0; s2 = new double[0]; v = new double[0,0]; // // Check input data // if( npoints<0 || nvars<1 ) { info = -1; return; } info = 1; // // Special case: NPoints=0 // if( npoints==0 ) { s2 = new double[nvars-1+1]; v = new double[nvars-1+1, nvars-1+1]; for(i=0; i<=nvars-1; i++) { s2[i] = 0; } for(i=0; i<=nvars-1; i++) { for(j=0; j<=nvars-1; j++) { if( i==j ) { v[i,j] = 1; } else { v[i,j] = 0; } } } return; } // // Calculate means // m = new double[nvars-1+1]; t = new double[npoints-1+1]; for(j=0; j<=nvars-1; j++) { for(i_=0; i_<=npoints-1;i_++) { t[i_] = x[i_,j]; } basestat.samplemoments(t, npoints, ref mean, ref variance, ref skewness, ref kurtosis); m[j] = mean; } // // Center, apply SVD, prepare output // a = new double[Math.Max(npoints, nvars)-1+1, nvars-1+1]; for(i=0; i<=npoints-1; i++) { for(i_=0; i_<=nvars-1;i_++) { a[i,i_] = x[i,i_]; } for(i_=0; i_<=nvars-1;i_++) { a[i,i_] = a[i,i_] - m[i_]; } } for(i=npoints; i<=nvars-1; i++) { for(j=0; j<=nvars-1; j++) { a[i,j] = 0; } } if( !svd.rmatrixsvd(a, Math.Max(npoints, nvars), nvars, 0, 1, 2, ref s2, ref u, ref vt) ) { info = -4; return; } if( npoints!=1 ) { for(i=0; i<=nvars-1; i++) { s2[i] = math.sqr(s2[i])/(npoints-1); } } v = new double[nvars-1+1, nvars-1+1]; blas.copyandtranspose(vt, 0, nvars-1, 0, nvars-1, ref v, 0, nvars-1, 0, nvars-1); } } }