Context Navigation

source: branches/ParameterBinding/HeuristicLab.ExtLibs/HeuristicLab.ALGLIB/2.5.0/ALGLIB-2.5.0/kmeans.cs @ 12547

Visit:

Last change on this file since 12547 was 4068, checked in by swagner, 14 years ago
Sorted usings and removed unused usings in entire solution (#1094)
File size: 10.3 KB

Line
1	/*************************************************************************
2	Copyright (c) 2008, Sergey Bochkanov (ALGLIB project).
3
4	>>> SOURCE LICENSE >>>
5	This program is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published by
7	the Free Software Foundation (www.fsf.org); either version 2 of the
8	License, or (at your option) any later version.
9
10	This program is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	A copy of the GNU General Public License is available at
16	http://www.fsf.org/licensing/licenses
17
18	>>> END OF LICENSE >>>
19	*************************************************************************/
20
21
22	namespace alglib {
23	public class kmeans {
24	/*************************************************************************
25	k-means++ clusterization
26
27	INPUT PARAMETERS:
28	XY - dataset, array [0..NPoints-1,0..NVars-1].
29	NPoints - dataset size, NPoints>=K
30	NVars - number of variables, NVars>=1
31	K - desired number of clusters, K>=1
32	Restarts - number of restarts, Restarts>=1
33
34	OUTPUT PARAMETERS:
35	Info - return code:
36	* -3, if taskis degenerate (number of distinct points is
37	less than K)
38	* -1, if incorrect NPoints/NFeatures/K/Restarts was passed
39	* 1, if subroutine finished successfully
40	C - array[0..NVars-1,0..K-1].matrix whose columns store
41	cluster's centers
42	XYC - array which contains number of clusters dataset points
43	belong to.
44
45	-- ALGLIB --
46	Copyright 21.03.2009 by Bochkanov Sergey
47	*************************************************************************/
48	public static void kmeansgenerate(ref double[,] xy,
49	int npoints,
50	int nvars,
51	int k,
52	int restarts,
53	ref int info,
54	ref double[,] c,
55	ref int[] xyc) {
56	int i = 0;
57	int j = 0;
58	double[,] ct = new double[0, 0];
59	double[,] ctbest = new double[0, 0];
60	double e = 0;
61	double ebest = 0;
62	double[] x = new double[0];
63	double[] tmp = new double[0];
64	double[] d2 = new double[0];
65	double[] p = new double[0];
66	int[] csizes = new int[0];
67	bool[] cbusy = new bool[0];
68	double v = 0;
69	int cclosest = 0;
70	double dclosest = 0;
71	double[] work = new double[0];
72	bool waschanges = new bool();
73	bool zerosizeclusters = new bool();
74	int pass = 0;
75	int i_ = 0;
76
77
78	//
79	// Test parameters
80	//
81	if (npoints < k \| nvars < 1 \| k < 1 \| restarts < 1) {
82	info = -1;
83	return;
84	}
85
86	//
87	// TODO: special case K=1
88	// TODO: special case K=NPoints
89	//
90	info = 1;
91
92	//
93	// Multiple passes of k-means++ algorithm
94	//
95	ct = new double[k - 1 + 1, nvars - 1 + 1];
96	ctbest = new double[k - 1 + 1, nvars - 1 + 1];
97	xyc = new int[npoints - 1 + 1];
98	d2 = new double[npoints - 1 + 1];
99	p = new double[npoints - 1 + 1];
100	tmp = new double[nvars - 1 + 1];
101	csizes = new int[k - 1 + 1];
102	cbusy = new bool[k - 1 + 1];
103	ebest = AP.Math.MaxRealNumber;
104	for (pass = 1; pass <= restarts; pass++) {
105
106	//
107	// Select initial centers using k-means++ algorithm
108	// 1. Choose first center at random
109	// 2. Choose next centers using their distance from centers already chosen
110	//
111	// Note that for performance reasons centers are stored in ROWS of CT, not
112	// in columns. We'll transpose CT in the end and store it in the C.
113	//
114	i = AP.Math.RandomInteger(npoints);
115	for (i_ = 0; i_ <= nvars - 1; i_++) {
116	ct[0, i_] = xy[i, i_];
117	}
118	cbusy[0] = true;
119	for (i = 1; i <= k - 1; i++) {
120	cbusy[i] = false;
121	}
122	if (!selectcenterpp(ref xy, npoints, nvars, ref ct, cbusy, k, ref d2, ref p, ref tmp)) {
123	info = -3;
124	return;
125	}
126
127	//
128	// Update centers:
129	// 2. update center positions
130	//
131	while (true) {
132
133	//
134	// fill XYC with center numbers
135	//
136	waschanges = false;
137	for (i = 0; i <= npoints - 1; i++) {
138	cclosest = -1;
139	dclosest = AP.Math.MaxRealNumber;
140	for (j = 0; j <= k - 1; j++) {
141	for (i_ = 0; i_ <= nvars - 1; i_++) {
142	tmp[i_] = xy[i, i_];
143	}
144	for (i_ = 0; i_ <= nvars - 1; i_++) {
145	tmp[i_] = tmp[i_] - ct[j, i_];
146	}
147	v = 0.0;
148	for (i_ = 0; i_ <= nvars - 1; i_++) {
149	v += tmp[i_] * tmp[i_];
150	}
151	if ((double)(v) < (double)(dclosest)) {
152	cclosest = j;
153	dclosest = v;
154	}
155	}
156	if (xyc[i] != cclosest) {
157	waschanges = true;
158	}
159	xyc[i] = cclosest;
160	}
161
162	//
163	// Update centers
164	//
165	for (j = 0; j <= k - 1; j++) {
166	csizes[j] = 0;
167	}
168	for (i = 0; i <= k - 1; i++) {
169	for (j = 0; j <= nvars - 1; j++) {
170	ct[i, j] = 0;
171	}
172	}
173	for (i = 0; i <= npoints - 1; i++) {
174	csizes[xyc[i]] = csizes[xyc[i]] + 1;
175	for (i_ = 0; i_ <= nvars - 1; i_++) {
176	ct[xyc[i], i_] = ct[xyc[i], i_] + xy[i, i_];
177	}
178	}
179	zerosizeclusters = false;
180	for (i = 0; i <= k - 1; i++) {
181	cbusy[i] = csizes[i] != 0;
182	zerosizeclusters = zerosizeclusters \| csizes[i] == 0;
183	}
184	if (zerosizeclusters) {
185
186	//
187	// Some clusters have zero size - rare, but possible.
188	// We'll choose new centers for such clusters using k-means++ rule
189	// and restart algorithm
190	//
191	if (!selectcenterpp(ref xy, npoints, nvars, ref ct, cbusy, k, ref d2, ref p, ref tmp)) {
192	info = -3;
193	return;
194	}
195	continue;
196	}
197	for (j = 0; j <= k - 1; j++) {
198	v = (double)(1) / (double)(csizes[j]);
199	for (i_ = 0; i_ <= nvars - 1; i_++) {
200	ct[j, i_] = v * ct[j, i_];
201	}
202	}
203
204	//
205	// if nothing has changed during iteration
206	//
207	if (!waschanges) {
208	break;
209	}
210	}
211
212	//
213	// 3. Calculate E, compare with best centers found so far
214	//
215	e = 0;
216	for (i = 0; i <= npoints - 1; i++) {
217	for (i_ = 0; i_ <= nvars - 1; i_++) {
218	tmp[i_] = xy[i, i_];
219	}
220	for (i_ = 0; i_ <= nvars - 1; i_++) {
221	tmp[i_] = tmp[i_] - ct[xyc[i], i_];
222	}
223	v = 0.0;
224	for (i_ = 0; i_ <= nvars - 1; i_++) {
225	v += tmp[i_] * tmp[i_];
226	}
227	e = e + v;
228	}
229	if ((double)(e) < (double)(ebest)) {
230
231	//
232	// store partition
233	//
234	blas.copymatrix(ref ct, 0, k - 1, 0, nvars - 1, ref ctbest, 0, k - 1, 0, nvars - 1);
235	}
236	}
237
238	//
239	// Copy and transpose
240	//
241	c = new double[nvars - 1 + 1, k - 1 + 1];
242	blas.copyandtranspose(ref ctbest, 0, k - 1, 0, nvars - 1, ref c, 0, nvars - 1, 0, k - 1);
243	}
244
245
246	/*************************************************************************
247	Select center for a new cluster using k-means++ rule
248	*************************************************************************/
249	private static bool selectcenterpp(ref double[,] xy,
250	int npoints,
251	int nvars,
252	ref double[,] centers,
253	bool[] busycenters,
254	int ccnt,
255	ref double[] d2,
256	ref double[] p,
257	ref double[] tmp) {
258	bool result = new bool();
259	int i = 0;
260	int j = 0;
261	int cc = 0;
262	double v = 0;
263	double s = 0;
264	int i_ = 0;
265
266	busycenters = (bool[])busycenters.Clone();
267
268	result = true;
269	for (cc = 0; cc <= ccnt - 1; cc++) {
270	if (!busycenters[cc]) {
271
272	//
273	// fill D2
274	//
275	for (i = 0; i <= npoints - 1; i++) {
276	d2[i] = AP.Math.MaxRealNumber;
277	for (j = 0; j <= ccnt - 1; j++) {
278	if (busycenters[j]) {
279	for (i_ = 0; i_ <= nvars - 1; i_++) {
280	tmp[i_] = xy[i, i_];
281	}
282	for (i_ = 0; i_ <= nvars - 1; i_++) {
283	tmp[i_] = tmp[i_] - centers[j, i_];
284	}
285	v = 0.0;
286	for (i_ = 0; i_ <= nvars - 1; i_++) {
287	v += tmp[i_] * tmp[i_];
288	}
289	if ((double)(v) < (double)(d2[i])) {
290	d2[i] = v;
291	}
292	}
293	}
294	}
295
296	//
297	// calculate P (non-cumulative)
298	//
299	s = 0;
300	for (i = 0; i <= npoints - 1; i++) {
301	s = s + d2[i];
302	}
303	if ((double)(s) == (double)(0)) {
304	result = false;
305	return result;
306	}
307	s = 1 / s;
308	for (i_ = 0; i_ <= npoints - 1; i_++) {
309	p[i_] = s * d2[i_];
310	}
311
312	//
313	// choose one of points with probability P
314	// random number within (0,1) is generated and
315	// inverse empirical CDF is used to randomly choose a point.
316	//
317	s = 0;
318	v = AP.Math.RandomReal();
319	for (i = 0; i <= npoints - 1; i++) {
320	s = s + p[i];
321	if ((double)(v) <= (double)(s) \| i == npoints - 1) {
322	for (i_ = 0; i_ <= nvars - 1; i_++) {
323	centers[cc, i_] = xy[i, i_];
324	}
325	busycenters[cc] = true;
326	break;
327	}
328	}
329	}
330	}
331	return result;
332	}
333	}
334	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences