[3839] | 1 | /*************************************************************************
|
---|
| 2 | Copyright (c) 2008, Sergey Bochkanov (ALGLIB project).
|
---|
| 3 |
|
---|
| 4 | >>> SOURCE LICENSE >>>
|
---|
| 5 | This program is free software; you can redistribute it and/or modify
|
---|
| 6 | it under the terms of the GNU General Public License as published by
|
---|
| 7 | the Free Software Foundation (www.fsf.org); either version 2 of the
|
---|
| 8 | License, or (at your option) any later version.
|
---|
| 9 |
|
---|
| 10 | This program is distributed in the hope that it will be useful,
|
---|
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 13 | GNU General Public License for more details.
|
---|
| 14 |
|
---|
| 15 | A copy of the GNU General Public License is available at
|
---|
| 16 | http://www.fsf.org/licensing/licenses
|
---|
| 17 |
|
---|
| 18 | >>> END OF LICENSE >>>
|
---|
| 19 | *************************************************************************/
|
---|
| 20 |
|
---|
| 21 | using System;
|
---|
| 22 |
|
---|
| 23 | namespace alglib
|
---|
| 24 | {
|
---|
| 25 | public class pca
|
---|
| 26 | {
|
---|
| 27 | /*************************************************************************
|
---|
| 28 | Principal components analysis
|
---|
| 29 |
|
---|
| 30 | Subroutine builds orthogonal basis where first axis corresponds to
|
---|
| 31 | direction with maximum variance, second axis maximizes variance in subspace
|
---|
| 32 | orthogonal to first axis and so on.
|
---|
| 33 |
|
---|
| 34 | It should be noted that, unlike LDA, PCA does not use class labels.
|
---|
| 35 |
|
---|
| 36 | INPUT PARAMETERS:
|
---|
| 37 | X - dataset, array[0..NPoints-1,0..NVars-1].
|
---|
| 38 | matrix contains ONLY INDEPENDENT VARIABLES.
|
---|
| 39 | NPoints - dataset size, NPoints>=0
|
---|
| 40 | NVars - number of independent variables, NVars>=1
|
---|
| 41 |
|
---|
| 42 | ÂÛÕÎÄÍÛÅ ÏÀÐÀÌÅÒÐÛ:
|
---|
| 43 | Info - return code:
|
---|
| 44 | * -4, if SVD subroutine haven't converged
|
---|
| 45 | * -1, if wrong parameters has been passed (NPoints<0,
|
---|
| 46 | NVars<1)
|
---|
| 47 | * 1, if task is solved
|
---|
| 48 | S2 - array[0..NVars-1]. variance values corresponding
|
---|
| 49 | to basis vectors.
|
---|
| 50 | V - array[0..NVars-1,0..NVars-1]
|
---|
| 51 | matrix, whose columns store basis vectors.
|
---|
| 52 |
|
---|
| 53 | -- ALGLIB --
|
---|
| 54 | Copyright 25.08.2008 by Bochkanov Sergey
|
---|
| 55 | *************************************************************************/
|
---|
| 56 | public static void pcabuildbasis(ref double[,] x,
|
---|
| 57 | int npoints,
|
---|
| 58 | int nvars,
|
---|
| 59 | ref int info,
|
---|
| 60 | ref double[] s2,
|
---|
| 61 | ref double[,] v)
|
---|
| 62 | {
|
---|
| 63 | double[,] a = new double[0,0];
|
---|
| 64 | double[,] u = new double[0,0];
|
---|
| 65 | double[,] vt = new double[0,0];
|
---|
| 66 | double[] m = new double[0];
|
---|
| 67 | double[] t = new double[0];
|
---|
| 68 | int i = 0;
|
---|
| 69 | int j = 0;
|
---|
| 70 | double mean = 0;
|
---|
| 71 | double variance = 0;
|
---|
| 72 | double skewness = 0;
|
---|
| 73 | double kurtosis = 0;
|
---|
| 74 | int i_ = 0;
|
---|
| 75 |
|
---|
| 76 |
|
---|
| 77 | //
|
---|
| 78 | // Check input data
|
---|
| 79 | //
|
---|
| 80 | if( npoints<0 | nvars<1 )
|
---|
| 81 | {
|
---|
| 82 | info = -1;
|
---|
| 83 | return;
|
---|
| 84 | }
|
---|
| 85 | info = 1;
|
---|
| 86 |
|
---|
| 87 | //
|
---|
| 88 | // Special case: NPoints=0
|
---|
| 89 | //
|
---|
| 90 | if( npoints==0 )
|
---|
| 91 | {
|
---|
| 92 | s2 = new double[nvars-1+1];
|
---|
| 93 | v = new double[nvars-1+1, nvars-1+1];
|
---|
| 94 | for(i=0; i<=nvars-1; i++)
|
---|
| 95 | {
|
---|
| 96 | s2[i] = 0;
|
---|
| 97 | }
|
---|
| 98 | for(i=0; i<=nvars-1; i++)
|
---|
| 99 | {
|
---|
| 100 | for(j=0; j<=nvars-1; j++)
|
---|
| 101 | {
|
---|
| 102 | if( i==j )
|
---|
| 103 | {
|
---|
| 104 | v[i,j] = 1;
|
---|
| 105 | }
|
---|
| 106 | else
|
---|
| 107 | {
|
---|
| 108 | v[i,j] = 0;
|
---|
| 109 | }
|
---|
| 110 | }
|
---|
| 111 | }
|
---|
| 112 | return;
|
---|
| 113 | }
|
---|
| 114 |
|
---|
| 115 | //
|
---|
| 116 | // Calculate means
|
---|
| 117 | //
|
---|
| 118 | m = new double[nvars-1+1];
|
---|
| 119 | t = new double[npoints-1+1];
|
---|
| 120 | for(j=0; j<=nvars-1; j++)
|
---|
| 121 | {
|
---|
| 122 | for(i_=0; i_<=npoints-1;i_++)
|
---|
| 123 | {
|
---|
| 124 | t[i_] = x[i_,j];
|
---|
| 125 | }
|
---|
| 126 | descriptivestatistics.calculatemoments(ref t, npoints, ref mean, ref variance, ref skewness, ref kurtosis);
|
---|
| 127 | m[j] = mean;
|
---|
| 128 | }
|
---|
| 129 |
|
---|
| 130 | //
|
---|
| 131 | // Center, apply SVD, prepare output
|
---|
| 132 | //
|
---|
| 133 | a = new double[Math.Max(npoints, nvars)-1+1, nvars-1+1];
|
---|
| 134 | for(i=0; i<=npoints-1; i++)
|
---|
| 135 | {
|
---|
| 136 | for(i_=0; i_<=nvars-1;i_++)
|
---|
| 137 | {
|
---|
| 138 | a[i,i_] = x[i,i_];
|
---|
| 139 | }
|
---|
| 140 | for(i_=0; i_<=nvars-1;i_++)
|
---|
| 141 | {
|
---|
| 142 | a[i,i_] = a[i,i_] - m[i_];
|
---|
| 143 | }
|
---|
| 144 | }
|
---|
| 145 | for(i=npoints; i<=nvars-1; i++)
|
---|
| 146 | {
|
---|
| 147 | for(j=0; j<=nvars-1; j++)
|
---|
| 148 | {
|
---|
| 149 | a[i,j] = 0;
|
---|
| 150 | }
|
---|
| 151 | }
|
---|
| 152 | if( !svd.rmatrixsvd(a, Math.Max(npoints, nvars), nvars, 0, 1, 2, ref s2, ref u, ref vt) )
|
---|
| 153 | {
|
---|
| 154 | info = -4;
|
---|
| 155 | return;
|
---|
| 156 | }
|
---|
| 157 | if( npoints!=1 )
|
---|
| 158 | {
|
---|
| 159 | for(i=0; i<=nvars-1; i++)
|
---|
| 160 | {
|
---|
| 161 | s2[i] = AP.Math.Sqr(s2[i])/(npoints-1);
|
---|
| 162 | }
|
---|
| 163 | }
|
---|
| 164 | v = new double[nvars-1+1, nvars-1+1];
|
---|
| 165 | blas.copyandtranspose(ref vt, 0, nvars-1, 0, nvars-1, ref v, 0, nvars-1, 0, nvars-1);
|
---|
| 166 | }
|
---|
| 167 | }
|
---|
| 168 | }
|
---|