/// /// This file is part of ILNumerics Community Edition. /// /// ILNumerics Community Edition - high performance computing for applications. /// Copyright (C) 2006 - 2012 Haymo Kutschbach, http://ilnumerics.net /// /// ILNumerics Community Edition is free software: you can redistribute it and/or modify /// it under the terms of the GNU General Public License version 3 as published by /// the Free Software Foundation. /// /// ILNumerics Community Edition is distributed in the hope that it will be useful, /// but WITHOUT ANY WARRANTY; without even the implied warranty of /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the /// GNU General Public License for more details. /// /// You should have received a copy of the GNU General Public License /// along with ILNumerics Community Edition. See the file License.txt in the root /// of your distribution package. If not, see . /// /// In addition this software uses the following components and/or licenses: /// /// ================================================================================= /// The Open Toolkit Library License /// /// Copyright (c) 2006 - 2009 the Open Toolkit library. /// /// Permission is hereby granted, free of charge, to any person obtaining a copy /// of this software and associated documentation files (the "Software"), to deal /// in the Software without restriction, including without limitation the rights to /// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of /// the Software, and to permit persons to whom the Software is furnished to do /// so, subject to the following conditions: /// /// The above copyright notice and this permission notice shall be included in all /// copies or substantial portions of the Software. /// /// ================================================================================= /// using System; using System.Collections.Generic; using System.Text; using ILNumerics.Exceptions; namespace ILNumerics { public partial class ILMath { /// /// K-Means clustering: find clusters in data matrix X /// /// Data matrix, data points are given as columns /// Initial number of clusters expected /// [Optional] false: pick the first k data points as initial centers, true: pick random datapoints (default) /// [Optional] Maximum number of iterations, the computation will exit after that many iterations, default: 10.000 /// [Input/Output/Optional] If not null on entry, outCenters will contain the centers of the clusters found, default: null /// Vector of length n with with indices of the clustersm which were assigned to each datapoint /// If is given not null on input, the algorithm returns the computed centers in that parameter. A /// matrix may be given on input, in order to give a hint of the initial center positions. This may help to find correct cluster centers - even if /// the initial hint is not exact. In order to do so, the matrix given must be of the correct size (X.D[0] by k) and /// must be set to false. public static ILRetArray kMeansClust(ILInArray X, ILBaseArray k, int maxIterations = 10000, bool centerInitRandom = true, ILOutArray outCenters = null) { using (ILScope.Enter(X, k)) { if (object.Equals(X, null)) { throw new ILArgumentException("X must be data matrix (not null)"); } if (X.IsEmpty) { if (!object.Equals(outCenters, null)) { if (X.S[0] > 0) { outCenters.a = empty(new ILSize(X.S[0], 0)); } else { outCenters.a = empty(new ILSize(0, X.S[1])); } return empty(X.S); } } if (object.Equals(k, null) || !k.IsScalar || !k.IsNumeric) { throw new ILArgumentException("number of clusters k must be numeric scalar"); } int iK = toint32(k).GetValue(0); if (X.S[1] < iK) { throw new ILArgumentException("too few datapoints provided for " + iK.ToString() + " clusters"); } if (iK < 0) { throw new ILArgumentException("number of clusters must be positive"); } int d = X.S[0], n = X.S[1]; if (iK == 0) { if (!object.Equals(outCenters, null)) { outCenters.a = empty(new ILSize(d, iK)); } return empty(new ILSize(0, n)); } // initialize centers by using random datapoints ILArray centers = empty(); if (centerInitRandom) { ILArray pickIndices = empty(); sort(rand(1, n), pickIndices, 1, false).Dispose(); centers.a = X[full, pickIndices[r(0, iK - 1)]]; } else { if (!isnull(outCenters) && outCenters.S[0] == d && outCenters.S[1] == iK) { centers.a = outCenters; } else { centers.a = X[full, r(0, iK - 1)]; } } ILArray classes = zeros(1, n); ILArray oldCenters = centers.C; #if KMEANSVERBOSE System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); #endif while (maxIterations-- > 0) { #if KMEANSVERBOSE sw.Restart(); #endif for (int i = 0; i < n; i++) { // find cluster affiliates using (ILScope.Enter()) { ILArray minDistIdx = empty(); //min(sum(abs(centers - X[full, i])), minDistIdx, 1).Dispose(); min(distL1(centers, X[full, i]), minDistIdx, 1).Dispose(); classes[i] = (double)minDistIdx[0]; } } System.Diagnostics.Debug.Print("kmeans: 1 of {0} MemoryPool.Info: {1}", maxIterations, ILMemoryPool.Pool.Info(true)); for (int i = 0; i < iK; i++) { using (ILScope.Enter()) { ILArray inClass = X[full, find(classes == i)]; if (inClass.IsEmpty) { centers[full, i] = double.NaN; } else { centers[full, i] = mean(inClass, 1); } } } #if KMEANSVERBOSE sw.Stop(); Console.Out.WriteLine("Changed centers: {0} elapsed: {1}ms",(double)sum(any(oldCenters != centers)), sw.ElapsedMilliseconds); #endif if (allall(oldCenters == centers)) break; oldCenters.a = centers.C; } if (!object.Equals(outCenters, null)) outCenters.a = centers; return classes; } } #region HYCALPER AUTO GENERATED CODE /// /// K-Means clustering: find clusters in data matrix X /// /// Data matrix, data points are given as columns /// Initial number of clusters expected /// [Optional] false: pick the first k data points as initial centers, true: pick random datapoints (default) /// [Optional] Maximum number of iterations, the computation will exit after that many iterations, default: 10.000 /// [Input/Output/Optional] If not null on entry, outCenters will contain the centers of the clusters found, default: null /// Vector of length n with with indices of the clustersm which were assigned to each datapoint /// If is given not null on input, the algorithm returns the computed centers in that parameter. A /// matrix may be given on input, in order to give a hint of the initial center positions. This may help to find correct cluster centers - even if /// the initial hint is not exact. In order to do so, the matrix given must be of the correct size (X.D[0] by k) and /// must be set to false. public static ILRetArray kMeansClust(ILInArray X, ILBaseArray k, int maxIterations = 10000, bool centerInitRandom = true, ILOutArray outCenters = null) { using (ILScope.Enter(X, k)) { if (object.Equals(X, null)) { throw new ILArgumentException("X must be data matrix (not null)"); } if (X.IsEmpty) { if (!object.Equals(outCenters, null)) { if (X.S[0] > 0) { outCenters.a = empty(new ILSize(X.S[0], 0)); } else { outCenters.a = empty(new ILSize(0, X.S[1])); } return empty(X.S); } } if (object.Equals(k, null) || !k.IsScalar || !k.IsNumeric) { throw new ILArgumentException("number of clusters k must be numeric scalar"); } int iK = toint32(k).GetValue(0); if (X.S[1] < iK) { throw new ILArgumentException("too few datapoints provided for " + iK.ToString() + " clusters"); } if (iK < 0) { throw new ILArgumentException("number of clusters must be positive"); } int d = X.S[0], n = X.S[1]; if (iK == 0) { if (!object.Equals(outCenters, null)) { outCenters.a = empty(new ILSize(d, iK)); } return empty(new ILSize(0, n)); } // initialize centers by using random datapoints ILArray centers = empty(); if (centerInitRandom) { ILArray pickIndices = empty(); sort(rand(1, n), pickIndices, 1, false).Dispose(); centers.a = X[full, pickIndices[r(0, iK - 1)]]; } else { if (!isnull(outCenters) && outCenters.S[0] == d && outCenters.S[1] == iK) { centers.a = outCenters; } else { centers.a = X[full, r(0, iK - 1)]; } } ILArray classes = zeros(1, n); ILArray oldCenters = centers.C; #if KMEANSVERBOSE System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); #endif while (maxIterations-- > 0) { #if KMEANSVERBOSE sw.Restart(); #endif for (int i = 0; i < n; i++) { // find cluster affiliates using (ILScope.Enter()) { ILArray minDistIdx = empty(); //min(sum(abs(centers - X[full, i])), minDistIdx, 1).Dispose(); min(distL1(centers, X[full, i]), minDistIdx, 1).Dispose(); classes[i] = (float)minDistIdx[0]; } } System.Diagnostics.Debug.Print("kmeans: 1 of {0} MemoryPool.Info: {1}", maxIterations, ILMemoryPool.Pool.Info(true)); for (int i = 0; i < iK; i++) { using (ILScope.Enter()) { ILArray inClass = X[full, find(classes == i)]; if (inClass.IsEmpty) { centers[full, i] = float.NaN; } else { centers[full, i] = mean(inClass, 1); } } } #if KMEANSVERBOSE sw.Stop(); Console.Out.WriteLine("Changed centers: {0} elapsed: {1}ms",(double)sum(any(oldCenters != centers)), sw.ElapsedMilliseconds); #endif if (allall(oldCenters == centers)) break; oldCenters.a = centers.C; } if (!object.Equals(outCenters, null)) outCenters.a = centers; return classes; } } #endregion HYCALPER AUTO GENERATED CODE } }