1 | ///
|
---|
2 | /// This file is part of ILNumerics Community Edition.
|
---|
3 | ///
|
---|
4 | /// ILNumerics Community Edition - high performance computing for applications.
|
---|
5 | /// Copyright (C) 2006 - 2012 Haymo Kutschbach, http://ilnumerics.net
|
---|
6 | ///
|
---|
7 | /// ILNumerics Community Edition is free software: you can redistribute it and/or modify
|
---|
8 | /// it under the terms of the GNU General Public License version 3 as published by
|
---|
9 | /// the Free Software Foundation.
|
---|
10 | ///
|
---|
11 | /// ILNumerics Community Edition is distributed in the hope that it will be useful,
|
---|
12 | /// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
14 | /// GNU General Public License for more details.
|
---|
15 | ///
|
---|
16 | /// You should have received a copy of the GNU General Public License
|
---|
17 | /// along with ILNumerics Community Edition. See the file License.txt in the root
|
---|
18 | /// of your distribution package. If not, see <http://www.gnu.org/licenses/>.
|
---|
19 | ///
|
---|
20 | /// In addition this software uses the following components and/or licenses:
|
---|
21 | ///
|
---|
22 | /// =================================================================================
|
---|
23 | /// The Open Toolkit Library License
|
---|
24 | ///
|
---|
25 | /// Copyright (c) 2006 - 2009 the Open Toolkit library.
|
---|
26 | ///
|
---|
27 | /// Permission is hereby granted, free of charge, to any person obtaining a copy
|
---|
28 | /// of this software and associated documentation files (the "Software"), to deal
|
---|
29 | /// in the Software without restriction, including without limitation the rights to
|
---|
30 | /// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
---|
31 | /// the Software, and to permit persons to whom the Software is furnished to do
|
---|
32 | /// so, subject to the following conditions:
|
---|
33 | ///
|
---|
34 | /// The above copyright notice and this permission notice shall be included in all
|
---|
35 | /// copies or substantial portions of the Software.
|
---|
36 | ///
|
---|
37 | /// =================================================================================
|
---|
38 | ///
|
---|
39 |
|
---|
40 | using System;
|
---|
41 | using System.Collections.Generic;
|
---|
42 | using System.Linq;
|
---|
43 | using System.Text;
|
---|
44 | using ILNumerics.Exceptions;
|
---|
45 |
|
---|
46 |
|
---|
47 | namespace ILNumerics {
|
---|
48 | public partial class ILMath {
|
---|
49 |
|
---|
50 | /// <summary>
|
---|
51 | /// Determine method of center initialization for EM algorithm
|
---|
52 | /// </summary>
|
---|
53 | public enum EMInitializationMethod {
|
---|
54 | /// <summary>
|
---|
55 | /// Use the kmeans algorithm, choose random samples as centers for start
|
---|
56 | /// </summary>
|
---|
57 | KMeans_random,
|
---|
58 | /// <summary>
|
---|
59 | /// Use the kmeans algorithm, choose first k samples as centers for start
|
---|
60 | /// </summary>
|
---|
61 | KMeans_firstK,
|
---|
62 | /// <summary>
|
---|
63 | /// Provide custom centers in the 'InitCenter' argument
|
---|
64 | /// </summary>
|
---|
65 | User
|
---|
66 | }
|
---|
67 |
|
---|
68 | |
---|
69 |
|
---|
70 | /// <summary>
|
---|
71 | /// Expectation maximization algorithm
|
---|
72 | /// </summary>
|
---|
73 | /// <param name="Samples">Input data, data points in columns</param>
|
---|
74 | /// <param name="k">Number of clusters</param>
|
---|
75 | /// <param name="method">[Optional] Method used for initializing the cluster centers, default: kmeans_random</param>
|
---|
76 | /// <param name="UserCenters">[Optional] For method 'user': initial cluster centers, size samples.D[0] x k, for other methods ignored</param>
|
---|
77 | /// <param name="maxiterexit">[Optional] Break after that number of iterations, if no convergence was reached</param>
|
---|
78 | /// <param name="Sigma">[Output] Covariance estimation for all clusters, size d x d x k, d = samples.D[0]</param>
|
---|
79 | /// <param name="centerconverg_exit">[Optional] Exit iteration if norm(L) falls below that value, default: 0.001</param>
|
---|
80 | /// <returns>Estimated centers for all clusters, size samples.D[0] x k</returns>
|
---|
81 | /// <remarks><para>The EM algorithm expects the data samples to be drawn from <paramref name="k"/> multivariate normal distributions.
|
---|
82 | /// It estimates the parameters 'center' and 'sigma (covariance)' of every distribution. Therefore, the position and 'shape'
|
---|
83 | /// of each distribution is calculated in such a way, that the likelyhood of generating the given sample points is maximized.</para>
|
---|
84 | /// <para>The parameter k must be determined by the user. This reflects the a priori knowledge of the number of distributions
|
---|
85 | /// or clusters in the data.</para>
|
---|
86 | /// <para>The algorithm exits, if one of the exit criteria is reached:
|
---|
87 | /// <list type="bullet"><item>norm(L) < 'centerconverg_exit' - where L is the difference between
|
---|
88 | /// the centers from the last step and the centers just computed in the current step</item>
|
---|
89 | /// <item>the number of iteration steps exceeds the limit of 'maxiterexit' iterations.</item></list></para>
|
---|
90 | /// </remarks>
|
---|
91 | public static ILRetArray<double> em(ILInArray<double> Samples,
|
---|
92 | int k, ILOutArray<double> Sigma = null, EMInitializationMethod method = EMInitializationMethod.KMeans_random,
|
---|
93 | ILInArray<double> UserCenters = null, int maxiterexit = 10000, double centerconverg_exit = 0.001) {
|
---|
94 | using (ILScope.Enter(Samples, UserCenters)) {
|
---|
95 |
|
---|
96 | ILArray<double> samples = check(Samples);
|
---|
97 | ILArray<double> userCenters = check(UserCenters, allowNullInput: true);
|
---|
98 | if (k < 0)
|
---|
99 | throw new ILArgumentException("k must be greater or equal 0");
|
---|
100 | if (isnull(samples))
|
---|
101 | throw new ILArgumentException("input argument 'samples' must not be null");
|
---|
102 | if (method == EMInitializationMethod.User) {
|
---|
103 | if (isnull(userCenters))
|
---|
104 | throw new ILArgumentException("if initialization method 'user' was choosen, the parameter 'userCenters' must be used to provide custom initialization centers");
|
---|
105 | if (userCenters.S[0] != samples.S[0])
|
---|
106 | throw new ILArgumentException("the dimensionality (number of rows) of 'samples' and 'userCenters' must match");
|
---|
107 | }
|
---|
108 | // initialization
|
---|
109 | int d = samples.S[0], n = samples.S[1], count = 0;
|
---|
110 | ILArray<double> mu = empty<double>();
|
---|
111 | ILArray<double> sigm = repmat(eye<double>(d, d), 1, 1, k);
|
---|
112 | ILArray<double> gamma = zeros<double>(k, n);
|
---|
113 | ILArray<double> priors = ones<double>(1, k) / k;
|
---|
114 | ILArray<double> oldmu = zeros<double>(d, k);
|
---|
115 | switch (method) {
|
---|
116 | case EMInitializationMethod.KMeans_random:
|
---|
117 | kMeansClust(samples, k, outCenters: mu, centerInitRandom: true).Dispose();
|
---|
118 | break;
|
---|
119 | case EMInitializationMethod.KMeans_firstK:
|
---|
120 | kMeansClust(samples, k, outCenters: mu, centerInitRandom: false).Dispose();
|
---|
121 | break;
|
---|
122 | case EMInitializationMethod.User:
|
---|
123 | mu.a = userCenters;
|
---|
124 | break;
|
---|
125 | default:
|
---|
126 | throw new ILArgumentException("invalid 'method' argument given");
|
---|
127 | }
|
---|
128 | // main loop
|
---|
129 | while (true) {
|
---|
130 | using (ILScope.Enter()) {
|
---|
131 | // E - Step
|
---|
132 | for (int i = 0; i < k; i++) {
|
---|
133 | gamma[i, full] = gauss(samples, mu[full, i], sigm[full, full, i]) / priors[i];
|
---|
134 | }
|
---|
135 | gamma.a = gamma / max(sum(gamma, 0), MachineParameterDouble.eps);
|
---|
136 |
|
---|
137 | // M - Step
|
---|
138 | ILArray<double> Nk = sum(gamma, 1);
|
---|
139 | priors.a = Nk / n;
|
---|
140 | for (int i = 0; i < k; i++) {
|
---|
141 | using (ILScope.Enter()) {
|
---|
142 | mu[full, i] = sum(gamma[i, full] * samples, 1) / Nk[i];
|
---|
143 | ILArray<double> tmpXnMinMuk = samples - mu[full, i];
|
---|
144 | sigm[full, full, i] = multiply((gamma[i, full] * tmpXnMinMuk), tmpXnMinMuk.T) / Nk[i];
|
---|
145 | }
|
---|
146 | }
|
---|
147 |
|
---|
148 | // check exit condition
|
---|
149 | if (norm(oldmu - mu) < (double)centerconverg_exit || count > maxiterexit)
|
---|
150 | break;
|
---|
151 | else {
|
---|
152 | oldmu = mu;
|
---|
153 | count = count + 1;
|
---|
154 | }
|
---|
155 | }
|
---|
156 | }
|
---|
157 | if (!isnull(Sigma)) {
|
---|
158 | Sigma.a = sigm;
|
---|
159 | }
|
---|
160 | return mu;
|
---|
161 | }
|
---|
162 | }
|
---|
163 |
|
---|
164 | private static ILRetArray<double> gauss(ILInArray<double> samples, ILInArray<double> mu, ILRetArray<double> sigma) {
|
---|
165 | using (ILScope.Enter(samples, mu, sigma)) {
|
---|
166 | int d = samples.S[0], n = samples.S[1];
|
---|
167 | ILArray<double> sampMinMu = samples - mu;
|
---|
168 | ILArray<double> sigInv = linsolve(eye<double>(d, d), sigma[full,full]);
|
---|
169 | return 1 / ((double)pow(sqrt(2 * pi), d) * det(sigma)) * exp((double)-0.5 * sum(multiply(sigInv.T, sampMinMu) * sampMinMu, 0));
|
---|
170 | }
|
---|
171 | }
|
---|
172 |
|
---|
173 | |
---|
174 | #region HYCALPER AUTO GENERATED CODE
|
---|
175 | |
---|
176 |
|
---|
177 | /// <summary>
|
---|
178 | /// Expectation maximization algorithm
|
---|
179 | /// </summary>
|
---|
180 | /// <param name="Samples">Input data, data points in columns</param>
|
---|
181 | /// <param name="k">Number of clusters</param>
|
---|
182 | /// <param name="method">[Optional] Method used for initializing the cluster centers, default: kmeans_random</param>
|
---|
183 | /// <param name="UserCenters">[Optional] For method 'user': initial cluster centers, size samples.D[0] x k, for other methods ignored</param>
|
---|
184 | /// <param name="maxiterexit">[Optional] Break after that number of iterations, if no convergence was reached</param>
|
---|
185 | /// <param name="Sigma">[Output] Covariance estimation for all clusters, size d x d x k, d = samples.D[0]</param>
|
---|
186 | /// <param name="centerconverg_exit">[Optional] Exit iteration if norm(L) falls below that value, default: 0.001</param>
|
---|
187 | /// <returns>Estimated centers for all clusters, size samples.D[0] x k</returns>
|
---|
188 | /// <remarks><para>The EM algorithm expects the data samples to be drawn from <paramref name="k"/> multivariate normal distributions.
|
---|
189 | /// It estimates the parameters 'center' and 'sigma (covariance)' of every distribution. Therefore, the position and 'shape'
|
---|
190 | /// of each distribution is calculated in such a way, that the likelyhood of generating the given sample points is maximized.</para>
|
---|
191 | /// <para>The parameter k must be determined by the user. This reflects the a priori knowledge of the number of distributions
|
---|
192 | /// or clusters in the data.</para>
|
---|
193 | /// <para>The algorithm exits, if one of the exit criteria is reached:
|
---|
194 | /// <list type="bullet"><item>norm(L) < 'centerconverg_exit' - where L is the difference between
|
---|
195 | /// the centers from the last step and the centers just computed in the current step</item>
|
---|
196 | /// <item>the number of iteration steps exceeds the limit of 'maxiterexit' iterations.</item></list></para>
|
---|
197 | /// </remarks>
|
---|
198 | public static ILRetArray<float> em(ILInArray<float> Samples,
|
---|
199 | int k, ILOutArray<float> Sigma = null, EMInitializationMethod method = EMInitializationMethod.KMeans_random,
|
---|
200 | ILInArray<float> UserCenters = null, int maxiterexit = 10000, double centerconverg_exit = 0.001) {
|
---|
201 | using (ILScope.Enter(Samples, UserCenters)) {
|
---|
202 |
|
---|
203 | ILArray<float> samples = check(Samples);
|
---|
204 | ILArray<float> userCenters = check(UserCenters, allowNullInput: true);
|
---|
205 | if (k < 0)
|
---|
206 | throw new ILArgumentException("k must be greater or equal 0");
|
---|
207 | if (isnull(samples))
|
---|
208 | throw new ILArgumentException("input argument 'samples' must not be null");
|
---|
209 | if (method == EMInitializationMethod.User) {
|
---|
210 | if (isnull(userCenters))
|
---|
211 | throw new ILArgumentException("if initialization method 'user' was choosen, the parameter 'userCenters' must be used to provide custom initialization centers");
|
---|
212 | if (userCenters.S[0] != samples.S[0])
|
---|
213 | throw new ILArgumentException("the dimensionality (number of rows) of 'samples' and 'userCenters' must match");
|
---|
214 | }
|
---|
215 | // initialization
|
---|
216 | int d = samples.S[0], n = samples.S[1], count = 0;
|
---|
217 | ILArray<float> mu = empty<float>();
|
---|
218 | ILArray<float> sigm = repmat(eye<float>(d, d), 1, 1, k);
|
---|
219 | ILArray<float> gamma = zeros<float>(k, n);
|
---|
220 | ILArray<float> priors = ones<float>(1, k) / k;
|
---|
221 | ILArray<float> oldmu = zeros<float>(d, k);
|
---|
222 | switch (method) {
|
---|
223 | case EMInitializationMethod.KMeans_random:
|
---|
224 | kMeansClust(samples, k, outCenters: mu, centerInitRandom: true).Dispose();
|
---|
225 | break;
|
---|
226 | case EMInitializationMethod.KMeans_firstK:
|
---|
227 | kMeansClust(samples, k, outCenters: mu, centerInitRandom: false).Dispose();
|
---|
228 | break;
|
---|
229 | case EMInitializationMethod.User:
|
---|
230 | mu.a = userCenters;
|
---|
231 | break;
|
---|
232 | default:
|
---|
233 | throw new ILArgumentException("invalid 'method' argument given");
|
---|
234 | }
|
---|
235 | // main loop
|
---|
236 | while (true) {
|
---|
237 | using (ILScope.Enter()) {
|
---|
238 | // E - Step
|
---|
239 | for (int i = 0; i < k; i++) {
|
---|
240 | gamma[i, full] = gauss(samples, mu[full, i], sigm[full, full, i]) / priors[i];
|
---|
241 | }
|
---|
242 | gamma.a = gamma / max(sum(gamma, 0), MachineParameterSingle.eps);
|
---|
243 |
|
---|
244 | // M - Step
|
---|
245 | ILArray<float> Nk = sum(gamma, 1);
|
---|
246 | priors.a = Nk / n;
|
---|
247 | for (int i = 0; i < k; i++) {
|
---|
248 | using (ILScope.Enter()) {
|
---|
249 | mu[full, i] = sum(gamma[i, full] * samples, 1) / Nk[i];
|
---|
250 | ILArray<float> tmpXnMinMuk = samples - mu[full, i];
|
---|
251 | sigm[full, full, i] = multiply((gamma[i, full] * tmpXnMinMuk), tmpXnMinMuk.T) / Nk[i];
|
---|
252 | }
|
---|
253 | }
|
---|
254 |
|
---|
255 | // check exit condition
|
---|
256 | if (norm(oldmu - mu) < (float)centerconverg_exit || count > maxiterexit)
|
---|
257 | break;
|
---|
258 | else {
|
---|
259 | oldmu = mu;
|
---|
260 | count = count + 1;
|
---|
261 | }
|
---|
262 | }
|
---|
263 | }
|
---|
264 | if (!isnull(Sigma)) {
|
---|
265 | Sigma.a = sigm;
|
---|
266 | }
|
---|
267 | return mu;
|
---|
268 | }
|
---|
269 | }
|
---|
270 |
|
---|
271 | private static ILRetArray<float> gauss(ILInArray<float> samples, ILInArray<float> mu, ILRetArray<float> sigma) {
|
---|
272 | using (ILScope.Enter(samples, mu, sigma)) {
|
---|
273 | int d = samples.S[0], n = samples.S[1];
|
---|
274 | ILArray<float> sampMinMu = samples - mu;
|
---|
275 | ILArray<float> sigInv = linsolve(eye<float>(d, d), sigma[full,full]);
|
---|
276 | return 1 / ((float)pow(sqrt(2 * pi), d) * det(sigma)) * exp((float)-0.5 * sum(multiply(sigInv.T, sampMinMu) * sampMinMu, 0));
|
---|
277 | }
|
---|
278 | }
|
---|
279 |
|
---|
280 |
|
---|
281 | #endregion HYCALPER AUTO GENERATED CODE
|
---|
282 | }
|
---|
283 | }
|
---|