/// /// This file is part of ILNumerics Community Edition. /// /// ILNumerics Community Edition - high performance computing for applications. /// Copyright (C) 2006 - 2012 Haymo Kutschbach, http://ilnumerics.net /// /// ILNumerics Community Edition is free software: you can redistribute it and/or modify /// it under the terms of the GNU General Public License version 3 as published by /// the Free Software Foundation. /// /// ILNumerics Community Edition is distributed in the hope that it will be useful, /// but WITHOUT ANY WARRANTY; without even the implied warranty of /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the /// GNU General Public License for more details. /// /// You should have received a copy of the GNU General Public License /// along with ILNumerics Community Edition. See the file License.txt in the root /// of your distribution package. If not, see . /// /// In addition this software uses the following components and/or licenses: /// /// ================================================================================= /// The Open Toolkit Library License /// /// Copyright (c) 2006 - 2009 the Open Toolkit library. /// /// Permission is hereby granted, free of charge, to any person obtaining a copy /// of this software and associated documentation files (the "Software"), to deal /// in the Software without restriction, including without limitation the rights to /// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of /// the Software, and to permit persons to whom the Software is furnished to do /// so, subject to the following conditions: /// /// The above copyright notice and this permission notice shall be included in all /// copies or substantial portions of the Software. /// /// ================================================================================= /// using System; using System.Collections.Generic; using System.Text; using ILNumerics.Exceptions; namespace ILNumerics { public partial class ILMath { /// /// find clusters for data matrix X /// /// data matrix, data points are given as columns /// initial number of clusters expected /// false: pick the first k data points as initial centers, true: pick random datapoints /// maximum number of iterations, the computation will exit after that many iterations. /// vector of length n with with indices of clusters assigned to each datapoint public static ILRetArray kMeansClust(ILInArray X, ILBaseArray k, int maxIterations, bool centerInitRandom) { return kMeansClust(X, k, maxIterations, centerInitRandom, null); } /// /// find clusters for data matrix X /// /// data matrix, data points are given as columns /// initial number of clusters expected /// false: pick the first k data points as initial centers, true: pick random datapoints /// maximum number of iterations, the computation will exit after that many iterations. /// return type. if assigned on entry, outCenters will contain the centers of the clusters found. /// vector of length n with with indices of clusters assigned to each datapoint public static ILRetArray kMeansClust (ILInArray X, ILBaseArray k, int maxIterations, bool centerInitRandom, ILOutArray outCenters) { using (ILScope.Enter(X, k)) { if (object.Equals(X,null)) { throw new ILArgumentException("X must be data matrix (not null)"); } if (X.IsEmpty) { if (!object.Equals(outCenters, null)) { if (X.D[0] > 0) { outCenters.a = empty(new ILSize(X.D[0], 0)); } else { outCenters.a = empty(new ILSize(0, X.D[1])); } return empty(X.D); } } if (object.Equals(k,null) || !k.IsScalar || !k.IsNumeric) { throw new ILArgumentException("number of clusters k must be numeric scalar"); } int iK = toint32(k).GetValue(0); if (X.D[1] < iK) { throw new ILArgumentException("too few datapoints provided for " + iK.ToString() + " clusters"); } if (iK < 0) { throw new ILArgumentException("number of clusters must be positive"); } int d = X.D[0], n = X.D[1]; if (iK == 0) { if (!object.Equals(outCenters, null)) { outCenters.a = empty(new ILSize(d, iK)); } return empty(new ILSize(0, n)); } // initialize centers by using random datapoints ILArray centers = empty(); if (centerInitRandom) { ILArray pickIndices = empty(); sort(rand(1,n),pickIndices,1,false).Dispose(); centers.a = X[full,pickIndices[r(0,iK-1)]]; } else { centers.a = X[full,r(0,iK-1)]; } ILArray classes = zeros(1,n); ILArray oldCenters = centers.C; #if KMEANSVERBOSE System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); #endif while (maxIterations --> 0) { #if KMEANSVERBOSE sw.Restart(); #endif //ILArray distances = zeros(1, iK); double[] Xarr = X.GetArrayForRead(); double[] Carr = classes.GetArrayForWrite(); double[] CentArr = centers.GetArrayForRead(); double[] Xcur = ILMemoryPool.Pool.New(X.D[0]); for (int i = 0; i < n; i++) { // copy current X[i] int startInd = i * X.D[0]; for (int a = X.D[0]; a --> 0; ) { Xcur[a] = Xarr[startInd + a]; } // distances to all centers double dist = double.MaxValue; for (int c = 0; c < iK; c++) { double tmp = 0, tmp1 = 0; startInd = c * X.D[0]; for (int c1 = X.D[0]; c1-->0; ) { tmp = CentArr[c1 + startInd] - Xcur[c1]; tmp1 += tmp * tmp; } if (tmp1 < dist) { dist = tmp1; Carr[i] = c; if (dist == 0) break; } } } ILMemoryPool.Pool.RegisterObject(Xcur); // find cluster affiliates //using (ILScope.Enter()) { //// - for testing a more "similar 2 Fortran" implementation: //ILArray tmpX = X[full, i]; //for (int j = 0; j < iK; j++) { // using (ILScope.Enter()) { // //! ... find its nearest cluster // //do j = 1, K // // distances(j) = sum( & // // abs( & // // X(1:M,i) - centers(1:M,j))) // //end do // //tmpArr = minloc ( distances(1:K) ) // //classes(i) = tmpArr(1); // distances[j] = sum(abs(tmpX - centers[full, j])); // } //} //ILArray minDistIdx = empty(); //min(distances, minDistIdx, 1).Dispose(); //int found = (int)minDistIdx[0]; //classes[i] = found; //ILArray minDistIdx = empty(); //min(sum(apply((a, b) => { return Math.Abs(a - b); }, centers, repmat(X[full, i], 1, iK))), minDistIdx, 1).Dispose(); //int found = (int)minDistIdx[0]; //classes[i] = found; //ILArray minDistIdx = empty(); //min(sum(abs(centers - repmat(X[full, i], 1, iK))), minDistIdx, 1).Dispose(); //int found = (int)minDistIdx[0]; //classes[i] = found; //numInClass[found] = numInClass[found] + 1; //} //} System.Diagnostics.Debug.Print("kmeans: 1 of {0} MemoryPool.Info: {1}",maxIterations, ILMemoryPool.Pool.Info(true)); // update centroids //centers[full] = 0; //for (int i = 0; i < n; i++) { // centers[full,classes[i]] = centers[full,classes[i]] + X[full,i]; //} //numInClass[numInClass == 0] = double.NaN; //centers = centers / repmat(numInClass,d,1); for (int i = 0; i < iK; i++) { using (EnterScope()) { ILArray inClass = X[full, find(classes == i)]; if (inClass.IsEmpty) { centers[full, i] = double.NaN; } else { centers[full, i] = mean(inClass, 1); } } } #if KMEANSVERBOSE sw.Stop(); Console.Out.WriteLine("Changed centers: {0} elapsed: {1}ms",(double)sum(any(oldCenters != centers)), sw.ElapsedMilliseconds); #endif if (allall(oldCenters == centers)) break; oldCenters.a = centers.C; } if (!object.Equals(outCenters, null)) outCenters.a = centers; return classes; } } } }