Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GaussianProcessTuning/ILNumerics.2.14.4735.573/Algorithms/MachineLearning/kmeansclust.cs @ 10884

Last change on this file since 10884 was 9102, checked in by gkronber, 12 years ago

#1967: ILNumerics source for experimentation

File size: 13.7 KB
Line 
1///
2///    This file is part of ILNumerics Community Edition.
3///
4///    ILNumerics Community Edition - high performance computing for applications.
5///    Copyright (C) 2006 - 2012 Haymo Kutschbach, http://ilnumerics.net
6///
7///    ILNumerics Community Edition is free software: you can redistribute it and/or modify
8///    it under the terms of the GNU General Public License version 3 as published by
9///    the Free Software Foundation.
10///
11///    ILNumerics Community Edition is distributed in the hope that it will be useful,
12///    but WITHOUT ANY WARRANTY; without even the implied warranty of
13///    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14///    GNU General Public License for more details.
15///
16///    You should have received a copy of the GNU General Public License
17///    along with ILNumerics Community Edition. See the file License.txt in the root
18///    of your distribution package. If not, see <http://www.gnu.org/licenses/>.
19///
20///    In addition this software uses the following components and/or licenses:
21///
22///    =================================================================================
23///    The Open Toolkit Library License
24///   
25///    Copyright (c) 2006 - 2009 the Open Toolkit library.
26///   
27///    Permission is hereby granted, free of charge, to any person obtaining a copy
28///    of this software and associated documentation files (the "Software"), to deal
29///    in the Software without restriction, including without limitation the rights to
30///    use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
31///    the Software, and to permit persons to whom the Software is furnished to do
32///    so, subject to the following conditions:
33///
34///    The above copyright notice and this permission notice shall be included in all
35///    copies or substantial portions of the Software.
36///
37///    =================================================================================
38///   
39
40using System;
41using System.Collections.Generic;
42using System.Text;
43using ILNumerics.Exceptions;
44
45
46
47namespace ILNumerics {
48
49
50    public partial class ILMath {
51
52
53        /// <summary>
54        /// K-Means clustering: find clusters in data matrix X
55        /// </summary>
56        /// <param name="X">Data matrix, data points are given as columns</param>
57        /// <param name="k">Initial number of clusters expected</param>
58        /// <param name="centerInitRandom">[Optional] false: pick the first k data points as initial centers, true: pick random datapoints (default)</param>
59        /// <param name="maxIterations">[Optional] Maximum number of iterations, the computation will exit after that many iterations, default: 10.000</param>
60        /// <param name="outCenters">[Input/Output/Optional] If not null on entry, outCenters will contain the centers of the clusters found, default: null</param>
61        /// <returns>Vector of length n with with indices of the clustersm which were assigned to each datapoint</returns>
62        /// <remarks><para>If <paramref name="outCenters"/> is given not null on input, the algorithm returns the computed centers in that parameter. A
63        /// matrix may be given on input, in order to give a hint of the initial center positions. This may help to find correct cluster centers - even if
64        /// the initial hint is not exact. In order to do so, the matrix given must be of the correct size (X.D[0] by k) and <paramref name="centerInitRandom"/>
65        /// must be set to <c>false</c>.</para></remarks>
66        public static ILRetArray<double> kMeansClust(ILInArray<double> X, ILBaseArray k, int maxIterations = 10000,
67                                                                    bool centerInitRandom = true, ILOutArray<double> outCenters = null) {
68            using (ILScope.Enter(X, k)) {
69                if (object.Equals(X, null)) {
70                    throw new ILArgumentException("X must be data matrix (not null)");
71                }
72                if (X.IsEmpty) {
73                    if (!object.Equals(outCenters, null)) {
74                        if (X.S[0] > 0) {
75                            outCenters.a = empty<double>(new ILSize(X.S[0], 0));
76                        } else {
77                            outCenters.a = empty<double>(new ILSize(0, X.S[1]));
78                        }
79                        return empty<double>(X.S);
80                    }
81                }
82                if (object.Equals(k, null) || !k.IsScalar || !k.IsNumeric) {
83                    throw new ILArgumentException("number of clusters k must be numeric scalar");
84                }
85                int iK = toint32(k).GetValue(0);
86                if (X.S[1] < iK) {
87                    throw new ILArgumentException("too few datapoints provided for " + iK.ToString() + " clusters");
88                }
89                if (iK < 0) {
90                    throw new ILArgumentException("number of clusters must be positive");
91                }
92                int d = X.S[0], n = X.S[1];
93                if (iK == 0) {
94                    if (!object.Equals(outCenters, null)) {
95                        outCenters.a = empty<double>(new ILSize(d, iK));
96                    }
97                    return empty<double>(new ILSize(0, n));
98                }
99
100                // initialize centers by using random datapoints
101                ILArray<double> centers = empty<double>();
102                if (centerInitRandom) {
103                    ILArray<double> pickIndices = empty();
104                    sort(rand(1, n), pickIndices, 1, false).Dispose();
105                    centers.a = X[full, pickIndices[r(0, iK - 1)]];
106                } else {
107                    if (!isnull(outCenters) && outCenters.S[0] == d && outCenters.S[1] == iK) {
108                        centers.a = outCenters;
109                    } else {
110                        centers.a = X[full, r(0, iK - 1)];
111                    }
112                }
113
114                ILArray<double> classes = zeros<double>(1, n);
115                ILArray<double> oldCenters = centers.C;
116#if KMEANSVERBOSE
117                System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
118#endif
119                while (maxIterations-- > 0) {
120#if KMEANSVERBOSE
121                    sw.Restart();
122#endif
123                    for (int i = 0; i < n; i++) {
124                        // find cluster affiliates
125                        using (ILScope.Enter()) {
126                            ILArray<double> minDistIdx = empty();
127                            //min(sum(abs(centers - X[full, i])), minDistIdx, 1).Dispose();
128                            min(distL1(centers, X[full, i]), minDistIdx, 1).Dispose();
129                            classes[i] = (double)minDistIdx[0];
130                        }
131                    }
132                    System.Diagnostics.Debug.Print("kmeans: 1 of {0} MemoryPool.Info: {1}", maxIterations, ILMemoryPool.Pool.Info(true));
133                    for (int i = 0; i < iK; i++) {
134                        using (ILScope.Enter())
135                        {
136                            ILArray<double> inClass = X[full, find(classes == i)];
137                            if (inClass.IsEmpty) {
138                                centers[full, i] = double.NaN;
139                            } else {
140                                centers[full, i] = mean(inClass, 1);
141                            }
142                        }
143                    }
144#if KMEANSVERBOSE
145                    sw.Stop();
146                    Console.Out.WriteLine("Changed centers: {0} elapsed: {1}ms",(double)sum(any(oldCenters != centers)), sw.ElapsedMilliseconds);
147#endif
148                    if (allall(oldCenters == centers)) break;
149                    oldCenters.a = centers.C;
150                }
151                if (!object.Equals(outCenters, null))
152                    outCenters.a = centers;
153                return classes;
154            }
155        }
156
157#region HYCALPER AUTO GENERATED CODE
158
159        /// <summary>
160        /// K-Means clustering: find clusters in data matrix X
161        /// </summary>
162        /// <param name="X">Data matrix, data points are given as columns</param>
163        /// <param name="k">Initial number of clusters expected</param>
164        /// <param name="centerInitRandom">[Optional] false: pick the first k data points as initial centers, true: pick random datapoints (default)</param>
165        /// <param name="maxIterations">[Optional] Maximum number of iterations, the computation will exit after that many iterations, default: 10.000</param>
166        /// <param name="outCenters">[Input/Output/Optional] If not null on entry, outCenters will contain the centers of the clusters found, default: null</param>
167        /// <returns>Vector of length n with with indices of the clustersm which were assigned to each datapoint</returns>
168        /// <remarks><para>If <paramref name="outCenters"/> is given not null on input, the algorithm returns the computed centers in that parameter. A
169        /// matrix may be given on input, in order to give a hint of the initial center positions. This may help to find correct cluster centers - even if
170        /// the initial hint is not exact. In order to do so, the matrix given must be of the correct size (X.D[0] by k) and <paramref name="centerInitRandom"/>
171        /// must be set to <c>false</c>.</para></remarks>
172        public static ILRetArray<float> kMeansClust(ILInArray<float> X, ILBaseArray k, int maxIterations = 10000,
173                                                                    bool centerInitRandom = true, ILOutArray<float> outCenters = null) {
174            using (ILScope.Enter(X, k)) {
175                if (object.Equals(X, null)) {
176                    throw new ILArgumentException("X must be data matrix (not null)");
177                }
178                if (X.IsEmpty) {
179                    if (!object.Equals(outCenters, null)) {
180                        if (X.S[0] > 0) {
181                            outCenters.a = empty<float>(new ILSize(X.S[0], 0));
182                        } else {
183                            outCenters.a = empty<float>(new ILSize(0, X.S[1]));
184                        }
185                        return empty<float>(X.S);
186                    }
187                }
188                if (object.Equals(k, null) || !k.IsScalar || !k.IsNumeric) {
189                    throw new ILArgumentException("number of clusters k must be numeric scalar");
190                }
191                int iK = toint32(k).GetValue(0);
192                if (X.S[1] < iK) {
193                    throw new ILArgumentException("too few datapoints provided for " + iK.ToString() + " clusters");
194                }
195                if (iK < 0) {
196                    throw new ILArgumentException("number of clusters must be positive");
197                }
198                int d = X.S[0], n = X.S[1];
199                if (iK == 0) {
200                    if (!object.Equals(outCenters, null)) {
201                        outCenters.a = empty<float>(new ILSize(d, iK));
202                    }
203                    return empty<float>(new ILSize(0, n));
204                }
205
206                // initialize centers by using random datapoints
207                ILArray<float> centers = empty<float>();
208                if (centerInitRandom) {
209                    ILArray<double> pickIndices = empty();
210                    sort(rand(1, n), pickIndices, 1, false).Dispose();
211                    centers.a = X[full, pickIndices[r(0, iK - 1)]];
212                } else {
213                    if (!isnull(outCenters) && outCenters.S[0] == d && outCenters.S[1] == iK) {
214                        centers.a = outCenters;
215                    } else {
216                        centers.a = X[full, r(0, iK - 1)];
217                    }
218                }
219
220                ILArray<float> classes = zeros<float>(1, n);
221                ILArray<float> oldCenters = centers.C;
222#if KMEANSVERBOSE
223                System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
224#endif
225                while (maxIterations-- > 0) {
226#if KMEANSVERBOSE
227                    sw.Restart();
228#endif
229                    for (int i = 0; i < n; i++) {
230                        // find cluster affiliates
231                        using (ILScope.Enter()) {
232                            ILArray<double> minDistIdx = empty();
233                            //min(sum(abs(centers - X[full, i])), minDistIdx, 1).Dispose();
234                            min(distL1(centers, X[full, i]), minDistIdx, 1).Dispose();
235                            classes[i] = (float)minDistIdx[0];
236                        }
237                    }
238                    System.Diagnostics.Debug.Print("kmeans: 1 of {0} MemoryPool.Info: {1}", maxIterations, ILMemoryPool.Pool.Info(true));
239                    for (int i = 0; i < iK; i++) {
240                        using (ILScope.Enter())
241                        {
242                            ILArray<float> inClass = X[full, find(classes == i)];
243                            if (inClass.IsEmpty) {
244                                centers[full, i] = float.NaN;
245                            } else {
246                                centers[full, i] = mean(inClass, 1);
247                            }
248                        }
249                    }
250#if KMEANSVERBOSE
251                    sw.Stop();
252                    Console.Out.WriteLine("Changed centers: {0} elapsed: {1}ms",(double)sum(any(oldCenters != centers)), sw.ElapsedMilliseconds);
253#endif
254                    if (allall(oldCenters == centers)) break;
255                    oldCenters.a = centers.C;
256                }
257                if (!object.Equals(outCenters, null))
258                    outCenters.a = centers;
259                return classes;
260            }
261        }
262
263#endregion HYCALPER AUTO GENERATED CODE
264
265    }
266}
Note: See TracBrowser for help on using the repository browser.