1 | ///
|
---|
2 | /// This file is part of ILNumerics Community Edition.
|
---|
3 | ///
|
---|
4 | /// ILNumerics Community Edition - high performance computing for applications.
|
---|
5 | /// Copyright (C) 2006 - 2012 Haymo Kutschbach, http://ilnumerics.net
|
---|
6 | ///
|
---|
7 | /// ILNumerics Community Edition is free software: you can redistribute it and/or modify
|
---|
8 | /// it under the terms of the GNU General Public License version 3 as published by
|
---|
9 | /// the Free Software Foundation.
|
---|
10 | ///
|
---|
11 | /// ILNumerics Community Edition is distributed in the hope that it will be useful,
|
---|
12 | /// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
14 | /// GNU General Public License for more details.
|
---|
15 | ///
|
---|
16 | /// You should have received a copy of the GNU General Public License
|
---|
17 | /// along with ILNumerics Community Edition. See the file License.txt in the root
|
---|
18 | /// of your distribution package. If not, see <http://www.gnu.org/licenses/>.
|
---|
19 | ///
|
---|
20 | /// In addition this software uses the following components and/or licenses:
|
---|
21 | ///
|
---|
22 | /// =================================================================================
|
---|
23 | /// The Open Toolkit Library License
|
---|
24 | ///
|
---|
25 | /// Copyright (c) 2006 - 2009 the Open Toolkit library.
|
---|
26 | ///
|
---|
27 | /// Permission is hereby granted, free of charge, to any person obtaining a copy
|
---|
28 | /// of this software and associated documentation files (the "Software"), to deal
|
---|
29 | /// in the Software without restriction, including without limitation the rights to
|
---|
30 | /// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
---|
31 | /// the Software, and to permit persons to whom the Software is furnished to do
|
---|
32 | /// so, subject to the following conditions:
|
---|
33 | ///
|
---|
34 | /// The above copyright notice and this permission notice shall be included in all
|
---|
35 | /// copies or substantial portions of the Software.
|
---|
36 | ///
|
---|
37 | /// =================================================================================
|
---|
38 | ///
|
---|
39 |
|
---|
40 | using System;
|
---|
41 | using System.Collections.Generic;
|
---|
42 | using System.Linq;
|
---|
43 | using System.Text;
|
---|
44 |
|
---|
45 | namespace ILNumerics {
|
---|
46 | // ToDo: DOKU Example PCA
|
---|
47 | public partial class ILMath {
|
---|
48 |
|
---|
49 | /// <summary>
|
---|
50 | /// Principal Component Analysis (PCA)
|
---|
51 | /// </summary>
|
---|
52 | /// <param name="A">Data matrix, size (m,n); each of n observations is expected in a column of m variables</param>
|
---|
53 | /// <param name="outWeights">[Output] Weights for scaling the components according to the original data</param>
|
---|
54 | /// <param name="outCenter">[Output] Vector pointing to the center of the input data A</param>
|
---|
55 | /// <param name="outScores">[Output] Scaling factors for the component to recreate original data</param>
|
---|
56 | /// <returns>PCA components; weights, center and scores are returned at optional output parameters</returns>
|
---|
57 | /// <remarks>Principal Component Analysis (PCA) is commonly used as method for dimension reduction. It computes
|
---|
58 | /// a number of 'principal components' which span a space of orthogonal directions. The nice property is, these
|
---|
59 | /// directions are choosen such, as to maximize the variance of original data, once they are projected onto them.
|
---|
60 | /// We can simply pick only a subset of components, having associated a high variance and leave out other components, which do
|
---|
61 | /// not contribute much to the distribution of the original data. The resulting subspace is constructed of fewer
|
---|
62 | /// dimensions as the original space - with a smaller reconstrution error. Therefore, PCA is commonly used
|
---|
63 | /// for visualizing higher dimensional data in only two or three dimensional plots. It helps analyzing datasets which
|
---|
64 | /// otherwise could only be visualized by picking individual dimensions. By help of PCA, 'interesting' directions
|
---|
65 | /// in the data are identified.
|
---|
66 | /// <para>Any output parameter are optional and may be ommited ore provided as null parameter:
|
---|
67 | /// <list type="bullet">
|
---|
68 | /// <item><b>components</b> (return value) prinicipal components. Matrix of size m x m, m components are provided in columns. The first
|
---|
69 | /// component marks the direction in the data A, which corresponds to the largest variance (i.e. by projecting the data onto
|
---|
70 | /// that direction, the largest variance would be created). Adjacent components are all orthogonal to each other.
|
---|
71 | /// The components are ordered in columns of decreasing variance.</item>
|
---|
72 | /// <item><b>weights</b> vectors. While the components returned are normalized to length 1, the 'weights' vector
|
---|
73 | /// contains the factors needed, to scale the components in order to reflect the real spacial distances in the
|
---|
74 | /// original data.</item>
|
---|
75 | /// <item><b>center</b> of the original data. The vector points to the weight middle of A.</item>
|
---|
76 | /// <item><b>scores</b> is a matrix of size m by n. For each datapoint given in A, it contains factors for each component
|
---|
77 | /// needed to reproduce the original data point in terms of the components.</item>
|
---|
78 | /// </list></para>
|
---|
79 | /// </remarks>
|
---|
80 | public static ILRetArray<double> pca(ILInArray<double> A,ILOutArray<double> outWeights = null,
|
---|
81 | ILOutArray<double> outCenter = null, ILOutArray<double> outScores = null) {
|
---|
82 | using (ILScope.Enter(A)) {
|
---|
83 | if (isnull(A))
|
---|
84 | throw new Exceptions.ILArgumentException("data input argument A must not be null");
|
---|
85 | ILArray<double> ret = empty();
|
---|
86 | if (!A.IsEmpty) {
|
---|
87 | ILArray<double> Center = sum(A, 1) / A.S[1];
|
---|
88 | ILArray<double> centeredA = A - Center;
|
---|
89 | if (!isnull(outWeights)) {
|
---|
90 | outWeights.a = eigSymm(multiply(centeredA, centeredA.T), ret);
|
---|
91 | if (!outWeights.IsVector) {
|
---|
92 | outWeights.a = diag(outWeights);
|
---|
93 | }
|
---|
94 | outWeights.a = flipud(outWeights);
|
---|
95 | } else {
|
---|
96 | eigSymm(multiply(centeredA, centeredA.T), ret).Dispose();
|
---|
97 | }
|
---|
98 | ret = fliplr(ret);
|
---|
99 | if (!isnull(outScores))
|
---|
100 | outScores.a = multiply(ret.T, centeredA);
|
---|
101 | if (!isnull(outCenter))
|
---|
102 | outCenter.a = Center;
|
---|
103 | return ret;
|
---|
104 | } else {
|
---|
105 | if (!isnull(outWeights))
|
---|
106 | outWeights.a = empty<double>();
|
---|
107 | if (!isnull(outCenter))
|
---|
108 | outCenter.a = empty<double>();
|
---|
109 | if (!isnull(outScores))
|
---|
110 | outScores.a = empty<double>();
|
---|
111 | return empty<double>();
|
---|
112 | }
|
---|
113 | }
|
---|
114 | }
|
---|
115 |
|
---|
116 | /// <summary>
|
---|
117 | /// Principal Component Analysis (PCA)
|
---|
118 | /// </summary>
|
---|
119 | /// <param name="A">Data matrix, size (m,n); each of n observations is expected in a column of m variables</param>
|
---|
120 | /// <param name="outWeights">[Output] Weights for scaling the components according to the original data</param>
|
---|
121 | /// <param name="outCenter">[Output] Vector pointing to the center of the input data A</param>
|
---|
122 | /// <param name="outScores">[Output] Scaling factors for the component to recreate original data</param>
|
---|
123 | /// <returns>PCA components; weights, center and scores are returned at optional output parameters</returns>
|
---|
124 | /// <remarks>Principal Component Analysis (PCA) is commonly used as method for dimension reduction. It computes
|
---|
125 | /// a number of 'principal components' which span a space of orthogonal directions. The nice property is, these
|
---|
126 | /// directions are choosen such, as to maximize the variance of original data, once they are projected onto them.
|
---|
127 | /// We can simply pick only a subset of components, having associated a high variance and leave out other components, which do
|
---|
128 | /// not contribute much to the distribution of the original data. The resulting subspace is constructed of fewer
|
---|
129 | /// dimensions as the original space - with a smaller reconstrution error. Therefore, PCA is commonly used
|
---|
130 | /// for visualizing higher dimensional data in only two or three dimensional plots. It helps analyzing datasets which
|
---|
131 | /// otherwise could only be visualized by picking individual dimensions. By help of PCA, 'interesting' directions
|
---|
132 | /// in the data are identified.
|
---|
133 | /// <para>Any output parameter are optional and may be ommited ore provided as null parameter:
|
---|
134 | /// <list type="bullet">
|
---|
135 | /// <item><b>components</b> (return value) prinicipal components. Matrix of size m x m, m components ar provided in columns. The first
|
---|
136 | /// component marks the direction in the data A, which corresponds to the largest variance (i.e. by projecting the data onto
|
---|
137 | /// that direction, the largest variance would be created). Adjacent components are all orthogonal to each other.
|
---|
138 | /// The components are ordered in columns of decreasing variance.</item>
|
---|
139 | /// <item><b>weights</b> vectors. While the components returned are normalized to length 1, the 'weights' vector
|
---|
140 | /// contains the factors needed, to scale the components in order to reflect the real spacial distances in the
|
---|
141 | /// original data.</item>
|
---|
142 | /// <item><b>center</b> of the original data. The vector points to the weight middle of A.</item>
|
---|
143 | /// <item><b>scores</b> is a matrix of size m by n. For each datapoint given in A, it contains factors for each component
|
---|
144 | /// needed to reproduce the original data point in terms of the components.</item>
|
---|
145 | /// </list></para>
|
---|
146 | /// </remarks>
|
---|
147 | public static ILRetArray<float> pca(ILInArray<float> A, ILOutArray<float> outWeights = null,
|
---|
148 | ILOutArray<float> outCenter = null, ILOutArray<float> outScores = null) {
|
---|
149 | using (ILScope.Enter(A)) {
|
---|
150 | if (isnull(A))
|
---|
151 | throw new Exceptions.ILArgumentException("data input argument A must not be null");
|
---|
152 | ILArray<float> ret = empty<float>();
|
---|
153 | if (!A.IsEmpty) {
|
---|
154 | ILArray<float> Center = sum(A, 1) / A.S[1];
|
---|
155 | ILArray<float> centeredA = A - Center;
|
---|
156 | if (!isnull(outWeights)) {
|
---|
157 | outWeights.a = eigSymm(multiply(centeredA, centeredA.T), ret);
|
---|
158 | if (!outWeights.IsVector) {
|
---|
159 | outWeights.a = diag(outWeights);
|
---|
160 | }
|
---|
161 | outWeights.a = flipud<float>(outWeights);
|
---|
162 | } else {
|
---|
163 | eigSymm(multiply(centeredA, centeredA.T), ret).Dispose();
|
---|
164 | }
|
---|
165 | if (!isnull(outScores))
|
---|
166 | outScores.a = multiply(ret.T, centeredA);
|
---|
167 | if (!isnull(outCenter))
|
---|
168 | outCenter.a = Center;
|
---|
169 | return fliplr<float>(ret);
|
---|
170 | } else {
|
---|
171 | if (!isnull(outWeights))
|
---|
172 | outWeights.a = empty<float>();
|
---|
173 | if (!isnull(outCenter))
|
---|
174 | outCenter.a = empty<float>();
|
---|
175 | if (!isnull(outScores))
|
---|
176 | outScores.a = empty<float>();
|
---|
177 | return empty<float>();
|
---|
178 | }
|
---|
179 | }
|
---|
180 | }
|
---|
181 | }
|
---|
182 | }
|
---|