/// /// This file is part of ILNumerics Community Edition. /// /// ILNumerics Community Edition - high performance computing for applications. /// Copyright (C) 2006 - 2012 Haymo Kutschbach, http://ilnumerics.net /// /// ILNumerics Community Edition is free software: you can redistribute it and/or modify /// it under the terms of the GNU General Public License version 3 as published by /// the Free Software Foundation. /// /// ILNumerics Community Edition is distributed in the hope that it will be useful, /// but WITHOUT ANY WARRANTY; without even the implied warranty of /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the /// GNU General Public License for more details. /// /// You should have received a copy of the GNU General Public License /// along with ILNumerics Community Edition. See the file License.txt in the root /// of your distribution package. If not, see . /// /// In addition this software uses the following components and/or licenses: /// /// ================================================================================= /// The Open Toolkit Library License /// /// Copyright (c) 2006 - 2009 the Open Toolkit library. /// /// Permission is hereby granted, free of charge, to any person obtaining a copy /// of this software and associated documentation files (the "Software"), to deal /// in the Software without restriction, including without limitation the rights to /// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of /// the Software, and to permit persons to whom the Software is furnished to do /// so, subject to the following conditions: /// /// The above copyright notice and this permission notice shall be included in all /// copies or substantial portions of the Software. /// /// ================================================================================= /// using System; using System.Collections.Generic; using System.Linq; using System.Text; using ILNumerics.Exceptions; namespace ILNumerics { public partial class ILMath { public enum DistanceMetrics { Euclidian_L2, Mahalanobis, Manhattan_L1, Minkowski, Chebychev, Cosine, Pearsons, Hamming, Jaccard, Spearman } /// /// Search for k nearest neighbors for every sample in samples /// /// Samples matrix, samples in columns, the number of rows (dimensionality) must match the number of rows in /// Matrix of training samples/ neighbors, this will be searched for matching points, rows: dimensionality, columns: number of points /// [Optional] Number of neighbors to return, k must lay in range: 0 ≤ k < neighbors.D[1]; default: 1 /// [Optional] Distance metric, one out of the enumeration. Supported are: Euclidian_L2,Manhattan_L1, /// Minkowski, Cosine, Pearsons and Hamming distances; default: 'Euclidian_L2' /// [Optional] Exponent for minkowski distance; default: 2 /// [Optional] For cosine and pearson distances: if some samples lead to numerical instabilities, an exception is generated; default: true /// Matrix of nearest neighbors, size: k x samples.D[1]; indices of points in matrix public static ILRetArray knn(ILInArray Samples, ILInArray Neighbors, int k = 10, DistanceMetrics metric = DistanceMetrics.Euclidian_L2, double minkowski_parameter = 2.0, bool unstable_error = true) { using (ILScope.Enter(Samples, Neighbors)) { ILArray samples = Samples; ILArray neighbors = Neighbors; if (k < 0) { throw new ILArgumentException("k must be greater or equal 0"); } if (isnullorempty(neighbors)) { throw new ILArgumentException("input argument 'neighbors' must not be null or empty"); } if (isnull(samples)) { throw new ILArgumentException("input argument 'samples' must not be null"); } if (samples.S[0] != neighbors.S[0]) throw new ILArgumentException("number of rows for 'neighbors' and 'samples' must match"); if (k > neighbors.S[1]) throw new ILArgumentException("k must be smaller or equal to the number of datapoints (number of columns) in A"); int nn = neighbors.S[1], am = neighbors.S[0], sn = samples.S[1]; ILArray ret = zeros(k, sn); switch (metric) { case DistanceMetrics.Euclidian_L2: for (int i = 0; i < sn; i++) { using (ILScope.Enter()) { ILArray dist = neighbors - samples[full, i]; dist.a = sum(dist * dist, 0); ILArray indices = empty(); if (k == 1) { min(dist, indices, 1).Dispose(); ret[full, i] = indices[0]; } else { sort(dist, indices, 1, false).Dispose(); ret[full, i] = indices[r(0, k - 1)]; } } } break; case DistanceMetrics.Manhattan_L1: for (int i = 0; i < sn; i++) { using (ILScope.Enter()) { ILArray dist = neighbors - samples[full, i]; dist.a = sum(abs(dist), 0); ILArray indices = empty(); if (k == 1) { min(dist, indices, 1).Dispose(); ret[full, i] = indices[0]; } else { sort(dist, indices, 1, false).Dispose(); ret[full, i] = indices[r(0, k - 1)]; } } } break; case DistanceMetrics.Minkowski: for (int i = 0; i < sn; i++) { using (ILScope.Enter()) { ILArray dist = neighbors - samples[full, i]; dist.a = sum(pow(dist,(double)minkowski_parameter), 0); ILArray indices = empty(); if (k == 1) { min(dist, indices, 0).Dispose(); ret[full, i] = indices[0]; } else { sort(dist, indices, 0, false).Dispose(); ret[full, i] = indices[r(0, k - 1)]; } } } break; case DistanceMetrics.Cosine: ILArray samples_normalized = sqrt(sum(samples * samples, 0)); ILArray neighbs_normalized = sqrt(sum(neighbors * neighbors, 0)); if (unstable_error && !testStable(samples_normalized)) { throw new ILArgumentException("possibly numerical instability: some samples are too close to 0. Try using a different metric instead!"); } if (unstable_error && !testStable(neighbs_normalized)) { throw new ILArgumentException("possibly numerical instability: some neighbors are too close to 0. Try using a different metric instead!"); } neighbs_normalized.a = neighbors / neighbs_normalized; for (int i = 0; i < sn; i++) { using (ILScope.Enter()) { ILArray dist = 1 - multiply(neighbs_normalized.T, samples[full, i]) / samples_normalized[i]; ILArray indices = empty(); if (k == 1) { min(dist, indices, 0).Dispose(); ret[full, i] = indices[0]; } else { sort(dist, indices, 0, false).Dispose(); ret[full, i] = indices[r(0, k - 1)]; } } } break; case DistanceMetrics.Pearsons: ILArray samples_centered = samples - mean(samples, 0); ILArray neighbs_centered = neighbors - mean(neighbors, 0); samples_normalized = sqrt(sum(samples_centered * samples_centered, 0)); neighbs_normalized = sqrt(sum(neighbs_centered * neighbs_centered, 0)); if (unstable_error && !testStable(samples_normalized)) { throw new ILArgumentException("possibly numerical instability: standard deviation for some neighbor points is close to zero. Try using a different metric instead!"); } if (unstable_error && !testStable(neighbs_normalized)) { throw new ILArgumentException("possibly numerical instability: standard deviation for some neighbor points is close to zero. Try using a different metric instead!"); } neighbs_normalized.a = neighbs_centered / neighbs_normalized; for (int i = 0; i < sn; i++) { using (ILScope.Enter()) { ILArray dist = 1 - multiply(neighbs_normalized.T, samples_centered[full, i]) / samples_normalized[i]; ILArray indices = empty(); if (k == 1) { min(dist,indices,0).Dispose(); ret[full, i] = indices[0]; } else { sort(dist, indices, 0, false).Dispose(); ret[full, i] = indices[r(0, k - 1)]; } } } break; case DistanceMetrics.Hamming: if (samples.Any((a) => { return a != 0 && a != 1; })) { throw new ILArgumentException("hamming distance requires 0 and 1 as value for all elements of 'samples'"); } if (neighbors.Any((a) => { return a != 0 && a != 1; })) { throw new ILArgumentException("hamming distance requires 0 and 1 as value for all elements of 'neighbors'"); } for (int i = 0; i < sn; i++) { using (ILScope.Enter()) { ILArray dist = sum(abs(neighbors - samples[full, i]), 0) / am; ILArray indices = empty(); if (k == 1) { min(dist, indices, 1).Dispose(); ret[full, i] = indices[0]; } else { sort(dist, indices, 1, false).Dispose(); ret[full, i] = indices[r(0, k - 1)]; } } } break; default: throw new ILArgumentException("the selected distance is not supported"); } return ret; } } /// /// Test for numerical instability, expects positive data only! /// /// Input data /// true: no instability detected, false, possible instablility private static bool testStable(ILInArray samples_normalized) { using (ILScope.Enter(samples_normalized)) { double max, min; samples_normalized.GetLimits(out min, out max); return min > MachineParameterDouble.eps * max; } } #region HYCALPER AUTO GENERATED CODE /// /// Search for k nearest neighbors for every sample in samples /// /// Samples matrix, samples in columns, the number of rows (dimensionality) must match the number of rows in /// Matrix of training samples/ neighbors, this will be searched for matching points, rows: dimensionality, columns: number of points /// [Optional] Number of neighbors to return, k must lay in range: 0 ≤ k < neighbors.D[1]; default: 1 /// [Optional] Distance metric, one out of the enumeration. Supported are: Euclidian_L2,Manhattan_L1, /// Minkowski, Cosine, Pearsons and Hamming distances; default: 'Euclidian_L2' /// [Optional] Exponent for minkowski distance; default: 2 /// [Optional] For cosine and pearson distances: if some samples lead to numerical instabilities, an exception is generated; default: true /// Matrix of nearest neighbors, size: k x samples.D[1]; indices of points in matrix public static ILRetArray knn(ILInArray Samples, ILInArray Neighbors, int k = 10, DistanceMetrics metric = DistanceMetrics.Euclidian_L2, double minkowski_parameter = 2.0, bool unstable_error = true) { using (ILScope.Enter(Samples, Neighbors)) { ILArray samples = Samples; ILArray neighbors = Neighbors; if (k < 0) { throw new ILArgumentException("k must be greater or equal 0"); } if (isnullorempty(neighbors)) { throw new ILArgumentException("input argument 'neighbors' must not be null or empty"); } if (isnull(samples)) { throw new ILArgumentException("input argument 'samples' must not be null"); } if (samples.S[0] != neighbors.S[0]) throw new ILArgumentException("number of rows for 'neighbors' and 'samples' must match"); if (k > neighbors.S[1]) throw new ILArgumentException("k must be smaller or equal to the number of datapoints (number of columns) in A"); int nn = neighbors.S[1], am = neighbors.S[0], sn = samples.S[1]; ILArray ret = zeros(k, sn); switch (metric) { case DistanceMetrics.Euclidian_L2: for (int i = 0; i < sn; i++) { using (ILScope.Enter()) { ILArray dist = neighbors - samples[full, i]; dist.a = sum(dist * dist, 0); ILArray indices = empty(); if (k == 1) { min(dist, indices, 1).Dispose(); ret[full, i] = indices[0]; } else { sort(dist, indices, 1, false).Dispose(); ret[full, i] = indices[r(0, k - 1)]; } } } break; case DistanceMetrics.Manhattan_L1: for (int i = 0; i < sn; i++) { using (ILScope.Enter()) { ILArray dist = neighbors - samples[full, i]; dist.a = sum(abs(dist), 0); ILArray indices = empty(); if (k == 1) { min(dist, indices, 1).Dispose(); ret[full, i] = indices[0]; } else { sort(dist, indices, 1, false).Dispose(); ret[full, i] = indices[r(0, k - 1)]; } } } break; case DistanceMetrics.Minkowski: for (int i = 0; i < sn; i++) { using (ILScope.Enter()) { ILArray dist = neighbors - samples[full, i]; dist.a = sum(pow(dist,(float)minkowski_parameter), 0); ILArray indices = empty(); if (k == 1) { min(dist, indices, 0).Dispose(); ret[full, i] = indices[0]; } else { sort(dist, indices, 0, false).Dispose(); ret[full, i] = indices[r(0, k - 1)]; } } } break; case DistanceMetrics.Cosine: ILArray samples_normalized = sqrt(sum(samples * samples, 0)); ILArray neighbs_normalized = sqrt(sum(neighbors * neighbors, 0)); if (unstable_error && !testStable(samples_normalized)) { throw new ILArgumentException("possibly numerical instability: some samples are too close to 0. Try using a different metric instead!"); } if (unstable_error && !testStable(neighbs_normalized)) { throw new ILArgumentException("possibly numerical instability: some neighbors are too close to 0. Try using a different metric instead!"); } neighbs_normalized.a = neighbors / neighbs_normalized; for (int i = 0; i < sn; i++) { using (ILScope.Enter()) { ILArray dist = 1 - multiply(neighbs_normalized.T, samples[full, i]) / samples_normalized[i]; ILArray indices = empty(); if (k == 1) { min(dist, indices, 0).Dispose(); ret[full, i] = indices[0]; } else { sort(dist, indices, 0, false).Dispose(); ret[full, i] = indices[r(0, k - 1)]; } } } break; case DistanceMetrics.Pearsons: ILArray samples_centered = samples - mean(samples, 0); ILArray neighbs_centered = neighbors - mean(neighbors, 0); samples_normalized = sqrt(sum(samples_centered * samples_centered, 0)); neighbs_normalized = sqrt(sum(neighbs_centered * neighbs_centered, 0)); if (unstable_error && !testStable(samples_normalized)) { throw new ILArgumentException("possibly numerical instability: standard deviation for some neighbor points is close to zero. Try using a different metric instead!"); } if (unstable_error && !testStable(neighbs_normalized)) { throw new ILArgumentException("possibly numerical instability: standard deviation for some neighbor points is close to zero. Try using a different metric instead!"); } neighbs_normalized.a = neighbs_centered / neighbs_normalized; for (int i = 0; i < sn; i++) { using (ILScope.Enter()) { ILArray dist = 1 - multiply(neighbs_normalized.T, samples_centered[full, i]) / samples_normalized[i]; ILArray indices = empty(); if (k == 1) { min(dist,indices,0).Dispose(); ret[full, i] = indices[0]; } else { sort(dist, indices, 0, false).Dispose(); ret[full, i] = indices[r(0, k - 1)]; } } } break; case DistanceMetrics.Hamming: if (samples.Any((a) => { return a != 0 && a != 1; })) { throw new ILArgumentException("hamming distance requires 0 and 1 as value for all elements of 'samples'"); } if (neighbors.Any((a) => { return a != 0 && a != 1; })) { throw new ILArgumentException("hamming distance requires 0 and 1 as value for all elements of 'neighbors'"); } for (int i = 0; i < sn; i++) { using (ILScope.Enter()) { ILArray dist = sum(abs(neighbors - samples[full, i]), 0) / am; ILArray indices = empty(); if (k == 1) { min(dist, indices, 1).Dispose(); ret[full, i] = indices[0]; } else { sort(dist, indices, 1, false).Dispose(); ret[full, i] = indices[r(0, k - 1)]; } } } break; default: throw new ILArgumentException("the selected distance is not supported"); } return ret; } } /// /// Test for numerical instability, expects positive data only! /// /// Input data /// true: no instability detected, false, possible instablility private static bool testStable(ILInArray samples_normalized) { using (ILScope.Enter(samples_normalized)) { float max, min; samples_normalized.GetLimits(out min, out max); return min > MachineParameterSingle.eps * max; } } #endregion HYCALPER AUTO GENERATED CODE } }