///
/// This file is part of ILNumerics Community Edition.
///
/// ILNumerics Community Edition - high performance computing for applications.
/// Copyright (C) 2006 - 2012 Haymo Kutschbach, http://ilnumerics.net
///
/// ILNumerics Community Edition is free software: you can redistribute it and/or modify
/// it under the terms of the GNU General Public License version 3 as published by
/// the Free Software Foundation.
///
/// ILNumerics Community Edition is distributed in the hope that it will be useful,
/// but WITHOUT ANY WARRANTY; without even the implied warranty of
/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
/// GNU General Public License for more details.
///
/// You should have received a copy of the GNU General Public License
/// along with ILNumerics Community Edition. See the file License.txt in the root
/// of your distribution package. If not, see .
///
/// In addition this software uses the following components and/or licenses:
///
/// =================================================================================
/// The Open Toolkit Library License
///
/// Copyright (c) 2006 - 2009 the Open Toolkit library.
///
/// Permission is hereby granted, free of charge, to any person obtaining a copy
/// of this software and associated documentation files (the "Software"), to deal
/// in the Software without restriction, including without limitation the rights to
/// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
/// the Software, and to permit persons to whom the Software is furnished to do
/// so, subject to the following conditions:
///
/// The above copyright notice and this permission notice shall be included in all
/// copies or substantial portions of the Software.
///
/// =================================================================================
///
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace ILNumerics {
// ToDo: DOKU Example PCA
public partial class ILMath {
///
/// Principal Component Analysis (PCA)
///
/// Data matrix, size (m,n); each of n observations is expected in a column of m variables
/// [Output] Weights for scaling the components according to the original data
/// [Output] Vector pointing to the center of the input data A
/// [Output] Scaling factors for the component to recreate original data
/// PCA components; weights, center and scores are returned at optional output parameters
/// Principal Component Analysis (PCA) is commonly used as method for dimension reduction. It computes
/// a number of 'principal components' which span a space of orthogonal directions. The nice property is, these
/// directions are choosen such, as to maximize the variance of original data, once they are projected onto them.
/// We can simply pick only a subset of components, having associated a high variance and leave out other components, which do
/// not contribute much to the distribution of the original data. The resulting subspace is constructed of fewer
/// dimensions as the original space - with a smaller reconstrution error. Therefore, PCA is commonly used
/// for visualizing higher dimensional data in only two or three dimensional plots. It helps analyzing datasets which
/// otherwise could only be visualized by picking individual dimensions. By help of PCA, 'interesting' directions
/// in the data are identified.
/// Any output parameter are optional and may be ommited ore provided as null parameter:
///
/// - components (return value) prinicipal components. Matrix of size m x m, m components are provided in columns. The first
/// component marks the direction in the data A, which corresponds to the largest variance (i.e. by projecting the data onto
/// that direction, the largest variance would be created). Adjacent components are all orthogonal to each other.
/// The components are ordered in columns of decreasing variance.
/// - weights vectors. While the components returned are normalized to length 1, the 'weights' vector
/// contains the factors needed, to scale the components in order to reflect the real spacial distances in the
/// original data.
/// - center of the original data. The vector points to the weight middle of A.
/// - scores is a matrix of size m by n. For each datapoint given in A, it contains factors for each component
/// needed to reproduce the original data point in terms of the components.
///
///
public static ILRetArray pca(ILInArray A,ILOutArray outWeights = null,
ILOutArray outCenter = null, ILOutArray outScores = null) {
using (ILScope.Enter(A)) {
if (isnull(A))
throw new Exceptions.ILArgumentException("data input argument A must not be null");
ILArray ret = empty();
if (!A.IsEmpty) {
ILArray Center = sum(A, 1) / A.S[1];
ILArray centeredA = A - Center;
if (!isnull(outWeights)) {
outWeights.a = eigSymm(multiply(centeredA, centeredA.T), ret);
if (!outWeights.IsVector) {
outWeights.a = diag(outWeights);
}
outWeights.a = flipud(outWeights);
} else {
eigSymm(multiply(centeredA, centeredA.T), ret).Dispose();
}
ret = fliplr(ret);
if (!isnull(outScores))
outScores.a = multiply(ret.T, centeredA);
if (!isnull(outCenter))
outCenter.a = Center;
return ret;
} else {
if (!isnull(outWeights))
outWeights.a = empty();
if (!isnull(outCenter))
outCenter.a = empty();
if (!isnull(outScores))
outScores.a = empty();
return empty();
}
}
}
///
/// Principal Component Analysis (PCA)
///
/// Data matrix, size (m,n); each of n observations is expected in a column of m variables
/// [Output] Weights for scaling the components according to the original data
/// [Output] Vector pointing to the center of the input data A
/// [Output] Scaling factors for the component to recreate original data
/// PCA components; weights, center and scores are returned at optional output parameters
/// Principal Component Analysis (PCA) is commonly used as method for dimension reduction. It computes
/// a number of 'principal components' which span a space of orthogonal directions. The nice property is, these
/// directions are choosen such, as to maximize the variance of original data, once they are projected onto them.
/// We can simply pick only a subset of components, having associated a high variance and leave out other components, which do
/// not contribute much to the distribution of the original data. The resulting subspace is constructed of fewer
/// dimensions as the original space - with a smaller reconstrution error. Therefore, PCA is commonly used
/// for visualizing higher dimensional data in only two or three dimensional plots. It helps analyzing datasets which
/// otherwise could only be visualized by picking individual dimensions. By help of PCA, 'interesting' directions
/// in the data are identified.
/// Any output parameter are optional and may be ommited ore provided as null parameter:
///
/// - components (return value) prinicipal components. Matrix of size m x m, m components ar provided in columns. The first
/// component marks the direction in the data A, which corresponds to the largest variance (i.e. by projecting the data onto
/// that direction, the largest variance would be created). Adjacent components are all orthogonal to each other.
/// The components are ordered in columns of decreasing variance.
/// - weights vectors. While the components returned are normalized to length 1, the 'weights' vector
/// contains the factors needed, to scale the components in order to reflect the real spacial distances in the
/// original data.
/// - center of the original data. The vector points to the weight middle of A.
/// - scores is a matrix of size m by n. For each datapoint given in A, it contains factors for each component
/// needed to reproduce the original data point in terms of the components.
///
///
public static ILRetArray pca(ILInArray A, ILOutArray outWeights = null,
ILOutArray outCenter = null, ILOutArray outScores = null) {
using (ILScope.Enter(A)) {
if (isnull(A))
throw new Exceptions.ILArgumentException("data input argument A must not be null");
ILArray ret = empty();
if (!A.IsEmpty) {
ILArray Center = sum(A, 1) / A.S[1];
ILArray centeredA = A - Center;
if (!isnull(outWeights)) {
outWeights.a = eigSymm(multiply(centeredA, centeredA.T), ret);
if (!outWeights.IsVector) {
outWeights.a = diag(outWeights);
}
outWeights.a = flipud(outWeights);
} else {
eigSymm(multiply(centeredA, centeredA.T), ret).Dispose();
}
if (!isnull(outScores))
outScores.a = multiply(ret.T, centeredA);
if (!isnull(outCenter))
outCenter.a = Center;
return fliplr(ret);
} else {
if (!isnull(outWeights))
outWeights.a = empty();
if (!isnull(outCenter))
outCenter.a = empty();
if (!isnull(outScores))
outScores.a = empty();
return empty();
}
}
}
}
}