Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GaussianProcessTuning/ILNumerics.2.14.4735.573/Algorithms/MachineLearning/ILEM.cs @ 11826

Last change on this file since 11826 was 9102, checked in by gkronber, 12 years ago

#1967: ILNumerics source for experimentation

File size: 15.8 KB
Line 
1///
2///    This file is part of ILNumerics Community Edition.
3///
4///    ILNumerics Community Edition - high performance computing for applications.
5///    Copyright (C) 2006 - 2012 Haymo Kutschbach, http://ilnumerics.net
6///
7///    ILNumerics Community Edition is free software: you can redistribute it and/or modify
8///    it under the terms of the GNU General Public License version 3 as published by
9///    the Free Software Foundation.
10///
11///    ILNumerics Community Edition is distributed in the hope that it will be useful,
12///    but WITHOUT ANY WARRANTY; without even the implied warranty of
13///    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14///    GNU General Public License for more details.
15///
16///    You should have received a copy of the GNU General Public License
17///    along with ILNumerics Community Edition. See the file License.txt in the root
18///    of your distribution package. If not, see <http://www.gnu.org/licenses/>.
19///
20///    In addition this software uses the following components and/or licenses:
21///
22///    =================================================================================
23///    The Open Toolkit Library License
24///   
25///    Copyright (c) 2006 - 2009 the Open Toolkit library.
26///   
27///    Permission is hereby granted, free of charge, to any person obtaining a copy
28///    of this software and associated documentation files (the "Software"), to deal
29///    in the Software without restriction, including without limitation the rights to
30///    use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
31///    the Software, and to permit persons to whom the Software is furnished to do
32///    so, subject to the following conditions:
33///
34///    The above copyright notice and this permission notice shall be included in all
35///    copies or substantial portions of the Software.
36///
37///    =================================================================================
38///   
39
40using System;
41using System.Collections.Generic;
42using System.Linq;
43using System.Text;
44using ILNumerics.Exceptions;
45
46
47namespace ILNumerics {
48    public partial class ILMath {
49
50        /// <summary>
51        /// Determine method of center initialization for EM algorithm
52        /// </summary>
53        public enum EMInitializationMethod {
54            /// <summary>
55            /// Use the kmeans algorithm, choose random samples as centers for start
56            /// </summary>
57            KMeans_random,
58            /// <summary>
59            /// Use the kmeans algorithm, choose first k samples as centers for start
60            /// </summary>
61            KMeans_firstK,
62            /// <summary>
63            /// Provide custom centers in the 'InitCenter' argument
64            /// </summary>
65            User
66        }
67
68
69
70        /// <summary>
71        /// Expectation maximization algorithm
72        /// </summary>
73        /// <param name="Samples">Input data, data points in columns</param>
74        /// <param name="k">Number of clusters</param>
75        /// <param name="method">[Optional] Method used for initializing the cluster centers, default: kmeans_random</param>
76        /// <param name="UserCenters">[Optional] For method 'user': initial cluster centers, size samples.D[0] x k, for other methods ignored</param>
77        /// <param name="maxiterexit">[Optional] Break after that number of iterations, if no convergence was reached</param>
78        /// <param name="Sigma">[Output] Covariance estimation for all clusters, size d x d x k, d = samples.D[0]</param>
79        /// <param name="centerconverg_exit">[Optional] Exit iteration if norm(L) falls below that value, default: 0.001</param>
80        /// <returns>Estimated centers for all clusters, size samples.D[0] x k</returns>
81        /// <remarks><para>The EM algorithm expects the data samples to be drawn from <paramref name="k"/> multivariate normal distributions.
82        /// It estimates the parameters 'center' and 'sigma (covariance)' of every distribution. Therefore, the position and 'shape'
83        /// of each distribution is calculated in such a way, that the likelyhood of generating the given sample points is maximized.</para>
84        /// <para>The parameter k must be determined by the user. This reflects the a priori knowledge of the number of distributions
85        /// or clusters in the data.</para>
86        /// <para>The algorithm exits, if one of the exit criteria is reached:
87        /// <list type="bullet"><item>norm(L) &lt; 'centerconverg_exit' - where L is the difference between
88        /// the centers from the last step and the centers just computed in the current step</item>
89        /// <item>the number of iteration steps exceeds the limit of 'maxiterexit' iterations.</item></list></para>
90        /// </remarks>
91        public static ILRetArray<double> em(ILInArray<double> Samples,
92                                    int k, ILOutArray<double> Sigma = null, EMInitializationMethod method = EMInitializationMethod.KMeans_random,
93                                    ILInArray<double> UserCenters = null, int maxiterexit = 10000, double centerconverg_exit = 0.001) {
94            using (ILScope.Enter(Samples, UserCenters)) {
95
96                ILArray<double> samples = check(Samples);
97                ILArray<double> userCenters = check(UserCenters, allowNullInput: true);
98                if (k < 0)
99                    throw new ILArgumentException("k must be greater or equal 0");
100                if (isnull(samples))
101                    throw new ILArgumentException("input argument 'samples' must not be null");
102                if (method == EMInitializationMethod.User) {
103                    if (isnull(userCenters))
104                        throw new ILArgumentException("if initialization method 'user' was choosen, the parameter 'userCenters' must be used to provide custom initialization centers");
105                    if (userCenters.S[0] != samples.S[0])
106                        throw new ILArgumentException("the dimensionality (number of rows) of 'samples' and 'userCenters' must match");
107                }
108                // initialization
109                int d = samples.S[0], n = samples.S[1], count = 0;
110                ILArray<double> mu = empty<double>();
111                ILArray<double> sigm = repmat(eye<double>(d, d), 1, 1, k);
112                ILArray<double> gamma = zeros<double>(k, n);
113                ILArray<double> priors = ones<double>(1, k) / k;
114                ILArray<double> oldmu = zeros<double>(d, k);
115                switch (method) {
116                    case EMInitializationMethod.KMeans_random:
117                        kMeansClust(samples, k, outCenters: mu, centerInitRandom: true).Dispose();
118                        break;
119                    case EMInitializationMethod.KMeans_firstK:
120                        kMeansClust(samples, k, outCenters: mu, centerInitRandom: false).Dispose();
121                        break;
122                    case EMInitializationMethod.User:
123                        mu.a = userCenters;
124                        break;
125                    default:
126                        throw new ILArgumentException("invalid 'method' argument given");
127                }
128                // main loop
129                while (true) {
130                    using (ILScope.Enter()) {
131                        // E - Step
132                        for (int i = 0; i < k; i++) {
133                            gamma[i, full] = gauss(samples, mu[full, i], sigm[full, full, i]) / priors[i];
134                        }
135                        gamma.a = gamma / max(sum(gamma, 0), MachineParameterDouble.eps);
136
137                        // M - Step
138                        ILArray<double> Nk = sum(gamma, 1);
139                        priors.a = Nk / n;
140                        for (int i = 0; i < k; i++) {
141                            using (ILScope.Enter()) {
142                                mu[full, i] = sum(gamma[i, full] * samples, 1) / Nk[i];
143                                ILArray<double> tmpXnMinMuk = samples - mu[full, i];
144                                sigm[full, full, i] = multiply((gamma[i, full] * tmpXnMinMuk), tmpXnMinMuk.T) / Nk[i];
145                            }
146                        }
147
148                        // check exit condition
149                        if (norm(oldmu - mu) < (double)centerconverg_exit || count > maxiterexit)
150                            break;
151                        else {
152                            oldmu = mu;
153                            count = count + 1;
154                        }
155                    }
156                }
157                if (!isnull(Sigma)) {
158                    Sigma.a = sigm;
159                }
160                return mu;
161            }
162        }
163
164        private static ILRetArray<double> gauss(ILInArray<double> samples, ILInArray<double> mu, ILRetArray<double> sigma) {
165            using (ILScope.Enter(samples, mu, sigma)) {
166                int d = samples.S[0], n = samples.S[1];
167                ILArray<double> sampMinMu = samples - mu;
168                ILArray<double> sigInv = linsolve(eye<double>(d, d), sigma[full,full]);
169                return 1 / ((double)pow(sqrt(2 * pi), d) * det(sigma)) * exp((double)-0.5 * sum(multiply(sigInv.T, sampMinMu) * sampMinMu, 0)); 
170            }
171        }
172             
173
174#region HYCALPER AUTO GENERATED CODE
175
176
177        /// <summary>
178        /// Expectation maximization algorithm
179        /// </summary>
180        /// <param name="Samples">Input data, data points in columns</param>
181        /// <param name="k">Number of clusters</param>
182        /// <param name="method">[Optional] Method used for initializing the cluster centers, default: kmeans_random</param>
183        /// <param name="UserCenters">[Optional] For method 'user': initial cluster centers, size samples.D[0] x k, for other methods ignored</param>
184        /// <param name="maxiterexit">[Optional] Break after that number of iterations, if no convergence was reached</param>
185        /// <param name="Sigma">[Output] Covariance estimation for all clusters, size d x d x k, d = samples.D[0]</param>
186        /// <param name="centerconverg_exit">[Optional] Exit iteration if norm(L) falls below that value, default: 0.001</param>
187        /// <returns>Estimated centers for all clusters, size samples.D[0] x k</returns>
188        /// <remarks><para>The EM algorithm expects the data samples to be drawn from <paramref name="k"/> multivariate normal distributions.
189        /// It estimates the parameters 'center' and 'sigma (covariance)' of every distribution. Therefore, the position and 'shape'
190        /// of each distribution is calculated in such a way, that the likelyhood of generating the given sample points is maximized.</para>
191        /// <para>The parameter k must be determined by the user. This reflects the a priori knowledge of the number of distributions
192        /// or clusters in the data.</para>
193        /// <para>The algorithm exits, if one of the exit criteria is reached:
194        /// <list type="bullet"><item>norm(L) &lt; 'centerconverg_exit' - where L is the difference between
195        /// the centers from the last step and the centers just computed in the current step</item>
196        /// <item>the number of iteration steps exceeds the limit of 'maxiterexit' iterations.</item></list></para>
197        /// </remarks>
198        public static ILRetArray<float> em(ILInArray<float> Samples,
199                                    int k, ILOutArray<float> Sigma = null, EMInitializationMethod method = EMInitializationMethod.KMeans_random,
200                                    ILInArray<float> UserCenters = null, int maxiterexit = 10000, double centerconverg_exit = 0.001) {
201            using (ILScope.Enter(Samples, UserCenters)) {
202
203                ILArray<float> samples = check(Samples);
204                ILArray<float> userCenters = check(UserCenters, allowNullInput: true);
205                if (k < 0)
206                    throw new ILArgumentException("k must be greater or equal 0");
207                if (isnull(samples))
208                    throw new ILArgumentException("input argument 'samples' must not be null");
209                if (method == EMInitializationMethod.User) {
210                    if (isnull(userCenters))
211                        throw new ILArgumentException("if initialization method 'user' was choosen, the parameter 'userCenters' must be used to provide custom initialization centers");
212                    if (userCenters.S[0] != samples.S[0])
213                        throw new ILArgumentException("the dimensionality (number of rows) of 'samples' and 'userCenters' must match");
214                }
215                // initialization
216                int d = samples.S[0], n = samples.S[1], count = 0;
217                ILArray<float> mu = empty<float>();
218                ILArray<float> sigm = repmat(eye<float>(d, d), 1, 1, k);
219                ILArray<float> gamma = zeros<float>(k, n);
220                ILArray<float> priors = ones<float>(1, k) / k;
221                ILArray<float> oldmu = zeros<float>(d, k);
222                switch (method) {
223                    case EMInitializationMethod.KMeans_random:
224                        kMeansClust(samples, k, outCenters: mu, centerInitRandom: true).Dispose();
225                        break;
226                    case EMInitializationMethod.KMeans_firstK:
227                        kMeansClust(samples, k, outCenters: mu, centerInitRandom: false).Dispose();
228                        break;
229                    case EMInitializationMethod.User:
230                        mu.a = userCenters;
231                        break;
232                    default:
233                        throw new ILArgumentException("invalid 'method' argument given");
234                }
235                // main loop
236                while (true) {
237                    using (ILScope.Enter()) {
238                        // E - Step
239                        for (int i = 0; i < k; i++) {
240                            gamma[i, full] = gauss(samples, mu[full, i], sigm[full, full, i]) / priors[i];
241                        }
242                        gamma.a = gamma / max(sum(gamma, 0), MachineParameterSingle.eps);
243
244                        // M - Step
245                        ILArray<float> Nk = sum(gamma, 1);
246                        priors.a = Nk / n;
247                        for (int i = 0; i < k; i++) {
248                            using (ILScope.Enter()) {
249                                mu[full, i] = sum(gamma[i, full] * samples, 1) / Nk[i];
250                                ILArray<float> tmpXnMinMuk = samples - mu[full, i];
251                                sigm[full, full, i] = multiply((gamma[i, full] * tmpXnMinMuk), tmpXnMinMuk.T) / Nk[i];
252                            }
253                        }
254
255                        // check exit condition
256                        if (norm(oldmu - mu) < (float)centerconverg_exit || count > maxiterexit)
257                            break;
258                        else {
259                            oldmu = mu;
260                            count = count + 1;
261                        }
262                    }
263                }
264                if (!isnull(Sigma)) {
265                    Sigma.a = sigm;
266                }
267                return mu;
268            }
269        }
270
271        private static ILRetArray<float> gauss(ILInArray<float> samples, ILInArray<float> mu, ILRetArray<float> sigma) {
272            using (ILScope.Enter(samples, mu, sigma)) {
273                int d = samples.S[0], n = samples.S[1];
274                ILArray<float> sampMinMu = samples - mu;
275                ILArray<float> sigInv = linsolve(eye<float>(d, d), sigma[full,full]);
276                return 1 / ((float)pow(sqrt(2 * pi), d) * det(sigma)) * exp((float)-0.5 * sum(multiply(sigInv.T, sampMinMu) * sampMinMu, 0)); 
277            }
278        }
279             
280
281#endregion HYCALPER AUTO GENERATED CODE
282   }
283}
Note: See TracBrowser for help on using the repository browser.