[9102] | 1 | ///
|
---|
| 2 | /// This file is part of ILNumerics Community Edition.
|
---|
| 3 | ///
|
---|
| 4 | /// ILNumerics Community Edition - high performance computing for applications.
|
---|
| 5 | /// Copyright (C) 2006 - 2012 Haymo Kutschbach, http://ilnumerics.net
|
---|
| 6 | ///
|
---|
| 7 | /// ILNumerics Community Edition is free software: you can redistribute it and/or modify
|
---|
| 8 | /// it under the terms of the GNU General Public License version 3 as published by
|
---|
| 9 | /// the Free Software Foundation.
|
---|
| 10 | ///
|
---|
| 11 | /// ILNumerics Community Edition is distributed in the hope that it will be useful,
|
---|
| 12 | /// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 13 | /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 14 | /// GNU General Public License for more details.
|
---|
| 15 | ///
|
---|
| 16 | /// You should have received a copy of the GNU General Public License
|
---|
| 17 | /// along with ILNumerics Community Edition. See the file License.txt in the root
|
---|
| 18 | /// of your distribution package. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | ///
|
---|
| 20 | /// In addition this software uses the following components and/or licenses:
|
---|
| 21 | ///
|
---|
| 22 | /// =================================================================================
|
---|
| 23 | /// The Open Toolkit Library License
|
---|
| 24 | ///
|
---|
| 25 | /// Copyright (c) 2006 - 2009 the Open Toolkit library.
|
---|
| 26 | ///
|
---|
| 27 | /// Permission is hereby granted, free of charge, to any person obtaining a copy
|
---|
| 28 | /// of this software and associated documentation files (the "Software"), to deal
|
---|
| 29 | /// in the Software without restriction, including without limitation the rights to
|
---|
| 30 | /// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
---|
| 31 | /// the Software, and to permit persons to whom the Software is furnished to do
|
---|
| 32 | /// so, subject to the following conditions:
|
---|
| 33 | ///
|
---|
| 34 | /// The above copyright notice and this permission notice shall be included in all
|
---|
| 35 | /// copies or substantial portions of the Software.
|
---|
| 36 | ///
|
---|
| 37 | /// =================================================================================
|
---|
| 38 | ///
|
---|
| 39 |
|
---|
| 40 | using System;
|
---|
| 41 | using System.Collections.Generic;
|
---|
| 42 | using System.Linq;
|
---|
| 43 | using System.Text;
|
---|
| 44 |
|
---|
| 45 | namespace ILNumerics {
|
---|
| 46 | // ToDo: DOKU Example PCA
|
---|
| 47 | public partial class ILMath {
|
---|
| 48 |
|
---|
| 49 | /// <summary>
|
---|
| 50 | /// Principal Component Analysis (PCA)
|
---|
| 51 | /// </summary>
|
---|
| 52 | /// <param name="A">Data matrix, size (m,n); each of n observations is expected in a column of m variables</param>
|
---|
| 53 | /// <param name="outWeights">[Output] Weights for scaling the components according to the original data</param>
|
---|
| 54 | /// <param name="outCenter">[Output] Vector pointing to the center of the input data A</param>
|
---|
| 55 | /// <param name="outScores">[Output] Scaling factors for the component to recreate original data</param>
|
---|
| 56 | /// <returns>PCA components; weights, center and scores are returned at optional output parameters</returns>
|
---|
| 57 | /// <remarks>Principal Component Analysis (PCA) is commonly used as method for dimension reduction. It computes
|
---|
| 58 | /// a number of 'principal components' which span a space of orthogonal directions. The nice property is, these
|
---|
| 59 | /// directions are choosen such, as to maximize the variance of original data, once they are projected onto them.
|
---|
| 60 | /// We can simply pick only a subset of components, having associated a high variance and leave out other components, which do
|
---|
| 61 | /// not contribute much to the distribution of the original data. The resulting subspace is constructed of fewer
|
---|
| 62 | /// dimensions as the original space - with a smaller reconstrution error. Therefore, PCA is commonly used
|
---|
| 63 | /// for visualizing higher dimensional data in only two or three dimensional plots. It helps analyzing datasets which
|
---|
| 64 | /// otherwise could only be visualized by picking individual dimensions. By help of PCA, 'interesting' directions
|
---|
| 65 | /// in the data are identified.
|
---|
| 66 | /// <para>Any output parameter are optional and may be ommited ore provided as null parameter:
|
---|
| 67 | /// <list type="bullet">
|
---|
| 68 | /// <item><b>components</b> (return value) prinicipal components. Matrix of size m x m, m components are provided in columns. The first
|
---|
| 69 | /// component marks the direction in the data A, which corresponds to the largest variance (i.e. by projecting the data onto
|
---|
| 70 | /// that direction, the largest variance would be created). Adjacent components are all orthogonal to each other.
|
---|
| 71 | /// The components are ordered in columns of decreasing variance.</item>
|
---|
| 72 | /// <item><b>weights</b> vectors. While the components returned are normalized to length 1, the 'weights' vector
|
---|
| 73 | /// contains the factors needed, to scale the components in order to reflect the real spacial distances in the
|
---|
| 74 | /// original data.</item>
|
---|
| 75 | /// <item><b>center</b> of the original data. The vector points to the weight middle of A.</item>
|
---|
| 76 | /// <item><b>scores</b> is a matrix of size m by n. For each datapoint given in A, it contains factors for each component
|
---|
| 77 | /// needed to reproduce the original data point in terms of the components.</item>
|
---|
| 78 | /// </list></para>
|
---|
| 79 | /// </remarks>
|
---|
| 80 | public static ILRetArray<double> pca(ILInArray<double> A,ILOutArray<double> outWeights = null,
|
---|
| 81 | ILOutArray<double> outCenter = null, ILOutArray<double> outScores = null) {
|
---|
| 82 | using (ILScope.Enter(A)) {
|
---|
| 83 | if (isnull(A))
|
---|
| 84 | throw new Exceptions.ILArgumentException("data input argument A must not be null");
|
---|
| 85 | ILArray<double> ret = empty();
|
---|
| 86 | if (!A.IsEmpty) {
|
---|
| 87 | ILArray<double> Center = sum(A, 1) / A.S[1];
|
---|
| 88 | ILArray<double> centeredA = A - Center;
|
---|
| 89 | if (!isnull(outWeights)) {
|
---|
| 90 | outWeights.a = eigSymm(multiply(centeredA, centeredA.T), ret);
|
---|
| 91 | if (!outWeights.IsVector) {
|
---|
| 92 | outWeights.a = diag(outWeights);
|
---|
| 93 | }
|
---|
| 94 | outWeights.a = flipud(outWeights);
|
---|
| 95 | } else {
|
---|
| 96 | eigSymm(multiply(centeredA, centeredA.T), ret).Dispose();
|
---|
| 97 | }
|
---|
| 98 | ret = fliplr(ret);
|
---|
| 99 | if (!isnull(outScores))
|
---|
| 100 | outScores.a = multiply(ret.T, centeredA);
|
---|
| 101 | if (!isnull(outCenter))
|
---|
| 102 | outCenter.a = Center;
|
---|
| 103 | return ret;
|
---|
| 104 | } else {
|
---|
| 105 | if (!isnull(outWeights))
|
---|
| 106 | outWeights.a = empty<double>();
|
---|
| 107 | if (!isnull(outCenter))
|
---|
| 108 | outCenter.a = empty<double>();
|
---|
| 109 | if (!isnull(outScores))
|
---|
| 110 | outScores.a = empty<double>();
|
---|
| 111 | return empty<double>();
|
---|
| 112 | }
|
---|
| 113 | }
|
---|
| 114 | }
|
---|
| 115 |
|
---|
| 116 | /// <summary>
|
---|
| 117 | /// Principal Component Analysis (PCA)
|
---|
| 118 | /// </summary>
|
---|
| 119 | /// <param name="A">Data matrix, size (m,n); each of n observations is expected in a column of m variables</param>
|
---|
| 120 | /// <param name="outWeights">[Output] Weights for scaling the components according to the original data</param>
|
---|
| 121 | /// <param name="outCenter">[Output] Vector pointing to the center of the input data A</param>
|
---|
| 122 | /// <param name="outScores">[Output] Scaling factors for the component to recreate original data</param>
|
---|
| 123 | /// <returns>PCA components; weights, center and scores are returned at optional output parameters</returns>
|
---|
| 124 | /// <remarks>Principal Component Analysis (PCA) is commonly used as method for dimension reduction. It computes
|
---|
| 125 | /// a number of 'principal components' which span a space of orthogonal directions. The nice property is, these
|
---|
| 126 | /// directions are choosen such, as to maximize the variance of original data, once they are projected onto them.
|
---|
| 127 | /// We can simply pick only a subset of components, having associated a high variance and leave out other components, which do
|
---|
| 128 | /// not contribute much to the distribution of the original data. The resulting subspace is constructed of fewer
|
---|
| 129 | /// dimensions as the original space - with a smaller reconstrution error. Therefore, PCA is commonly used
|
---|
| 130 | /// for visualizing higher dimensional data in only two or three dimensional plots. It helps analyzing datasets which
|
---|
| 131 | /// otherwise could only be visualized by picking individual dimensions. By help of PCA, 'interesting' directions
|
---|
| 132 | /// in the data are identified.
|
---|
| 133 | /// <para>Any output parameter are optional and may be ommited ore provided as null parameter:
|
---|
| 134 | /// <list type="bullet">
|
---|
| 135 | /// <item><b>components</b> (return value) prinicipal components. Matrix of size m x m, m components ar provided in columns. The first
|
---|
| 136 | /// component marks the direction in the data A, which corresponds to the largest variance (i.e. by projecting the data onto
|
---|
| 137 | /// that direction, the largest variance would be created). Adjacent components are all orthogonal to each other.
|
---|
| 138 | /// The components are ordered in columns of decreasing variance.</item>
|
---|
| 139 | /// <item><b>weights</b> vectors. While the components returned are normalized to length 1, the 'weights' vector
|
---|
| 140 | /// contains the factors needed, to scale the components in order to reflect the real spacial distances in the
|
---|
| 141 | /// original data.</item>
|
---|
| 142 | /// <item><b>center</b> of the original data. The vector points to the weight middle of A.</item>
|
---|
| 143 | /// <item><b>scores</b> is a matrix of size m by n. For each datapoint given in A, it contains factors for each component
|
---|
| 144 | /// needed to reproduce the original data point in terms of the components.</item>
|
---|
| 145 | /// </list></para>
|
---|
| 146 | /// </remarks>
|
---|
| 147 | public static ILRetArray<float> pca(ILInArray<float> A, ILOutArray<float> outWeights = null,
|
---|
| 148 | ILOutArray<float> outCenter = null, ILOutArray<float> outScores = null) {
|
---|
| 149 | using (ILScope.Enter(A)) {
|
---|
| 150 | if (isnull(A))
|
---|
| 151 | throw new Exceptions.ILArgumentException("data input argument A must not be null");
|
---|
| 152 | ILArray<float> ret = empty<float>();
|
---|
| 153 | if (!A.IsEmpty) {
|
---|
| 154 | ILArray<float> Center = sum(A, 1) / A.S[1];
|
---|
| 155 | ILArray<float> centeredA = A - Center;
|
---|
| 156 | if (!isnull(outWeights)) {
|
---|
| 157 | outWeights.a = eigSymm(multiply(centeredA, centeredA.T), ret);
|
---|
| 158 | if (!outWeights.IsVector) {
|
---|
| 159 | outWeights.a = diag(outWeights);
|
---|
| 160 | }
|
---|
| 161 | outWeights.a = flipud<float>(outWeights);
|
---|
| 162 | } else {
|
---|
| 163 | eigSymm(multiply(centeredA, centeredA.T), ret).Dispose();
|
---|
| 164 | }
|
---|
| 165 | if (!isnull(outScores))
|
---|
| 166 | outScores.a = multiply(ret.T, centeredA);
|
---|
| 167 | if (!isnull(outCenter))
|
---|
| 168 | outCenter.a = Center;
|
---|
| 169 | return fliplr<float>(ret);
|
---|
| 170 | } else {
|
---|
| 171 | if (!isnull(outWeights))
|
---|
| 172 | outWeights.a = empty<float>();
|
---|
| 173 | if (!isnull(outCenter))
|
---|
| 174 | outCenter.a = empty<float>();
|
---|
| 175 | if (!isnull(outScores))
|
---|
| 176 | outScores.a = empty<float>();
|
---|
| 177 | return empty<float>();
|
---|
| 178 | }
|
---|
| 179 | }
|
---|
| 180 | }
|
---|
| 181 | }
|
---|
| 182 | }
|
---|