Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GaussianProcessTuning/ILNumerics.2.14.4735.573/Algorithms/MachineLearning/ILPCA.cs @ 10617

Last change on this file since 10617 was 9102, checked in by gkronber, 12 years ago

#1967: ILNumerics source for experimentation

File size: 11.5 KB
Line 
1///
2///    This file is part of ILNumerics Community Edition.
3///
4///    ILNumerics Community Edition - high performance computing for applications.
5///    Copyright (C) 2006 - 2012 Haymo Kutschbach, http://ilnumerics.net
6///
7///    ILNumerics Community Edition is free software: you can redistribute it and/or modify
8///    it under the terms of the GNU General Public License version 3 as published by
9///    the Free Software Foundation.
10///
11///    ILNumerics Community Edition is distributed in the hope that it will be useful,
12///    but WITHOUT ANY WARRANTY; without even the implied warranty of
13///    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14///    GNU General Public License for more details.
15///
16///    You should have received a copy of the GNU General Public License
17///    along with ILNumerics Community Edition. See the file License.txt in the root
18///    of your distribution package. If not, see <http://www.gnu.org/licenses/>.
19///
20///    In addition this software uses the following components and/or licenses:
21///
22///    =================================================================================
23///    The Open Toolkit Library License
24///   
25///    Copyright (c) 2006 - 2009 the Open Toolkit library.
26///   
27///    Permission is hereby granted, free of charge, to any person obtaining a copy
28///    of this software and associated documentation files (the "Software"), to deal
29///    in the Software without restriction, including without limitation the rights to
30///    use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
31///    the Software, and to permit persons to whom the Software is furnished to do
32///    so, subject to the following conditions:
33///
34///    The above copyright notice and this permission notice shall be included in all
35///    copies or substantial portions of the Software.
36///
37///    =================================================================================
38///   
39
40using System;
41using System.Collections.Generic;
42using System.Linq;
43using System.Text;
44
45namespace ILNumerics {
46    // ToDo: DOKU Example PCA
47    public partial class ILMath {
48
49        /// <summary>
50        /// Principal Component Analysis (PCA)
51        /// </summary>
52        /// <param name="A">Data matrix, size (m,n); each of n observations is expected in a column of m variables</param>
53        /// <param name="outWeights">[Output] Weights for scaling the components according to the original data</param>
54        /// <param name="outCenter">[Output] Vector pointing to the center of the input data A</param>
55        /// <param name="outScores">[Output] Scaling factors for the component to recreate original data</param>
56        /// <returns>PCA components; weights, center and scores are returned at optional output parameters</returns>
57        /// <remarks>Principal Component Analysis (PCA) is commonly used as method for dimension reduction. It computes
58        /// a number of 'principal components' which span a space of orthogonal directions. The nice property is, these
59        /// directions are choosen such, as to maximize the variance of original data, once they are projected onto them.
60        /// We can simply pick only a subset of components, having associated a high variance and leave out other components, which do
61        /// not contribute much to the distribution of the original data. The resulting subspace is constructed of fewer
62        /// dimensions as the original space - with a smaller reconstrution error. Therefore, PCA is commonly used
63        /// for visualizing higher dimensional data in only two or three dimensional plots. It helps analyzing datasets which
64        /// otherwise could only be visualized by picking individual dimensions. By help of PCA, 'interesting' directions
65        /// in the data are identified.
66        /// <para>Any output parameter are optional and may be ommited ore provided as null parameter:
67        /// <list type="bullet">
68        /// <item><b>components</b> (return value) prinicipal components. Matrix of size m x m, m components are provided in columns. The first
69        /// component marks the direction in the data A, which corresponds to the largest variance (i.e. by projecting the data onto
70        /// that direction, the largest variance would be created). Adjacent components are all orthogonal to each other.
71        /// The components are ordered in columns of decreasing variance.</item>
72        /// <item><b>weights</b> vectors. While the components returned are normalized to length 1, the 'weights' vector
73        /// contains the factors needed, to scale the components in order to reflect the real spacial distances in the
74        /// original data.</item>
75        /// <item><b>center</b> of the original data. The vector points to the weight middle of A.</item>
76        /// <item><b>scores</b> is a matrix of size m by n. For each datapoint given in A, it contains factors for each component
77        /// needed to reproduce the original data point in terms of the components.</item>
78        /// </list></para>
79        /// </remarks>
80        public static ILRetArray<double> pca(ILInArray<double> A,ILOutArray<double> outWeights = null,
81                                              ILOutArray<double> outCenter = null, ILOutArray<double> outScores = null) {
82            using (ILScope.Enter(A)) {
83                if (isnull(A))
84                    throw new Exceptions.ILArgumentException("data input argument A must not be null");
85                ILArray<double> ret = empty();
86                if (!A.IsEmpty) {
87                    ILArray<double> Center = sum(A, 1) / A.S[1];
88                    ILArray<double> centeredA = A - Center;
89                    if (!isnull(outWeights)) {
90                        outWeights.a = eigSymm(multiply(centeredA, centeredA.T), ret);
91                        if (!outWeights.IsVector) {
92                            outWeights.a = diag(outWeights);
93                        }
94                        outWeights.a = flipud(outWeights);
95                    } else {
96                        eigSymm(multiply(centeredA, centeredA.T), ret).Dispose();
97                    }
98                    ret = fliplr(ret);
99                    if (!isnull(outScores))
100                        outScores.a = multiply(ret.T, centeredA);
101                    if (!isnull(outCenter))
102                        outCenter.a = Center;
103                    return ret;
104                } else {
105                    if (!isnull(outWeights))
106                        outWeights.a = empty<double>();
107                    if (!isnull(outCenter))
108                        outCenter.a = empty<double>();
109                    if (!isnull(outScores))
110                        outScores.a = empty<double>();
111                    return empty<double>();
112                }
113            }
114        }
115
116        /// <summary>
117        /// Principal Component Analysis (PCA)
118        /// </summary>
119        /// <param name="A">Data matrix, size (m,n); each of n observations is expected in a column of m variables</param>
120        /// <param name="outWeights">[Output] Weights for scaling the components according to the original data</param>
121        /// <param name="outCenter">[Output] Vector pointing to the center of the input data A</param>
122        /// <param name="outScores">[Output] Scaling factors for the component to recreate original data</param>
123        /// <returns>PCA components; weights, center and scores are returned at optional output parameters</returns>
124        /// <remarks>Principal Component Analysis (PCA) is commonly used as method for dimension reduction. It computes
125        /// a number of 'principal components' which span a space of orthogonal directions. The nice property is, these
126        /// directions are choosen such, as to maximize the variance of original data, once they are projected onto them.
127        /// We can simply pick only a subset of components, having associated a high variance and leave out other components, which do
128        /// not contribute much to the distribution of the original data. The resulting subspace is constructed of fewer
129        /// dimensions as the original space - with a smaller reconstrution error. Therefore, PCA is commonly used
130        /// for visualizing higher dimensional data in only two or three dimensional plots. It helps analyzing datasets which
131        /// otherwise could only be visualized by picking individual dimensions. By help of PCA, 'interesting' directions
132        /// in the data are identified.
133        /// <para>Any output parameter are optional and may be ommited ore provided as null parameter:
134        /// <list type="bullet">
135        /// <item><b>components</b> (return value) prinicipal components. Matrix of size m x m, m components ar provided in columns. The first
136        /// component marks the direction in the data A, which corresponds to the largest variance (i.e. by projecting the data onto
137        /// that direction, the largest variance would be created). Adjacent components are all orthogonal to each other.
138        /// The components are ordered in columns of decreasing variance.</item>
139        /// <item><b>weights</b> vectors. While the components returned are normalized to length 1, the 'weights' vector
140        /// contains the factors needed, to scale the components in order to reflect the real spacial distances in the
141        /// original data.</item>
142        /// <item><b>center</b> of the original data. The vector points to the weight middle of A.</item>
143        /// <item><b>scores</b> is a matrix of size m by n. For each datapoint given in A, it contains factors for each component
144        /// needed to reproduce the original data point in terms of the components.</item>
145        /// </list></para>
146        /// </remarks>
147        public static ILRetArray<float> pca(ILInArray<float> A, ILOutArray<float> outWeights = null,
148                                              ILOutArray<float> outCenter = null, ILOutArray<float> outScores = null) {
149            using (ILScope.Enter(A)) {
150                if (isnull(A))
151                    throw new Exceptions.ILArgumentException("data input argument A must not be null");
152                ILArray<float> ret = empty<float>();
153                if (!A.IsEmpty) {
154                    ILArray<float> Center = sum(A, 1) / A.S[1];
155                    ILArray<float> centeredA = A - Center;
156                    if (!isnull(outWeights)) {
157                        outWeights.a = eigSymm(multiply(centeredA, centeredA.T), ret);
158                        if (!outWeights.IsVector) {
159                            outWeights.a = diag(outWeights);
160                        }
161                        outWeights.a = flipud<float>(outWeights);
162                    } else {
163                        eigSymm(multiply(centeredA, centeredA.T), ret).Dispose();
164                    }
165                    if (!isnull(outScores))
166                        outScores.a = multiply(ret.T, centeredA);
167                    if (!isnull(outCenter))
168                        outCenter.a = Center;
169                    return fliplr<float>(ret);
170                } else {
171                    if (!isnull(outWeights))
172                        outWeights.a = empty<float>();
173                    if (!isnull(outCenter))
174                        outCenter.a = empty<float>();
175                    if (!isnull(outScores))
176                        outScores.a = empty<float>();
177                    return empty<float>();
178                }
179            }
180        }
181    }
182}
Note: See TracBrowser for help on using the repository browser.