///
/// This file is part of ILNumerics Community Edition.
///
/// ILNumerics Community Edition - high performance computing for applications.
/// Copyright (C) 2006 - 2012 Haymo Kutschbach, http://ilnumerics.net
///
/// ILNumerics Community Edition is free software: you can redistribute it and/or modify
/// it under the terms of the GNU General Public License version 3 as published by
/// the Free Software Foundation.
///
/// ILNumerics Community Edition is distributed in the hope that it will be useful,
/// but WITHOUT ANY WARRANTY; without even the implied warranty of
/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
/// GNU General Public License for more details.
///
/// You should have received a copy of the GNU General Public License
/// along with ILNumerics Community Edition. See the file License.txt in the root
/// of your distribution package. If not, see .
///
/// In addition this software uses the following components and/or licenses:
///
/// =================================================================================
/// The Open Toolkit Library License
///
/// Copyright (c) 2006 - 2009 the Open Toolkit library.
///
/// Permission is hereby granted, free of charge, to any person obtaining a copy
/// of this software and associated documentation files (the "Software"), to deal
/// in the Software without restriction, including without limitation the rights to
/// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
/// the Software, and to permit persons to whom the Software is furnished to do
/// so, subject to the following conditions:
///
/// The above copyright notice and this permission notice shall be included in all
/// copies or substantial portions of the Software.
///
/// =================================================================================
///
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using ILNumerics.Exceptions;
namespace ILNumerics {
public partial class ILMath {
public enum DistanceMetrics {
Euclidian_L2,
Mahalanobis,
Manhattan_L1,
Minkowski,
Chebychev,
Cosine,
Pearsons,
Hamming,
Jaccard,
Spearman
}
///
/// Search for k nearest neighbors for every sample in samples
///
/// Samples matrix, samples in columns, the number of rows (dimensionality) must match the number of rows in
/// Matrix of training samples/ neighbors, this will be searched for matching points, rows: dimensionality, columns: number of points
/// [Optional] Number of neighbors to return, k must lay in range: 0 ≤ k < neighbors.D[1]; default: 1
/// [Optional] Distance metric, one out of the enumeration. Supported are: Euclidian_L2,Manhattan_L1,
/// Minkowski, Cosine, Pearsons and Hamming distances; default: 'Euclidian_L2'
/// [Optional] Exponent for minkowski distance; default: 2
/// [Optional] For cosine and pearson distances: if some samples lead to numerical instabilities, an exception is generated; default: true
/// Matrix of nearest neighbors, size: k x samples.D[1]; indices of points in matrix
public static ILRetArray knn(ILInArray Samples, ILInArray Neighbors, int k = 10,
DistanceMetrics metric = DistanceMetrics.Euclidian_L2, double minkowski_parameter = 2.0,
bool unstable_error = true) {
using (ILScope.Enter(Samples, Neighbors)) {
ILArray samples = Samples;
ILArray neighbors = Neighbors;
if (k < 0) {
throw new ILArgumentException("k must be greater or equal 0");
}
if (isnullorempty(neighbors)) {
throw new ILArgumentException("input argument 'neighbors' must not be null or empty");
}
if (isnull(samples)) {
throw new ILArgumentException("input argument 'samples' must not be null");
}
if (samples.S[0] != neighbors.S[0])
throw new ILArgumentException("number of rows for 'neighbors' and 'samples' must match");
if (k > neighbors.S[1])
throw new ILArgumentException("k must be smaller or equal to the number of datapoints (number of columns) in A");
int nn = neighbors.S[1], am = neighbors.S[0], sn = samples.S[1];
ILArray ret = zeros(k, sn);
switch (metric) {
case DistanceMetrics.Euclidian_L2:
for (int i = 0; i < sn; i++) {
using (ILScope.Enter()) {
ILArray dist = neighbors - samples[full, i];
dist.a = sum(dist * dist, 0);
ILArray indices = empty();
if (k == 1) {
min(dist, indices, 1).Dispose();
ret[full, i] = indices[0];
} else {
sort(dist, indices, 1, false).Dispose();
ret[full, i] = indices[r(0, k - 1)];
}
}
}
break;
case DistanceMetrics.Manhattan_L1:
for (int i = 0; i < sn; i++) {
using (ILScope.Enter()) {
ILArray dist = neighbors - samples[full, i];
dist.a = sum(abs(dist), 0);
ILArray indices = empty();
if (k == 1) {
min(dist, indices, 1).Dispose();
ret[full, i] = indices[0];
} else {
sort(dist, indices, 1, false).Dispose();
ret[full, i] = indices[r(0, k - 1)];
}
}
}
break;
case DistanceMetrics.Minkowski:
for (int i = 0; i < sn; i++) {
using (ILScope.Enter()) {
ILArray dist = neighbors - samples[full, i];
dist.a = sum(pow(dist,(double)minkowski_parameter), 0);
ILArray indices = empty();
if (k == 1) {
min(dist, indices, 0).Dispose();
ret[full, i] = indices[0];
} else {
sort(dist, indices, 0, false).Dispose();
ret[full, i] = indices[r(0, k - 1)];
}
}
}
break;
case DistanceMetrics.Cosine:
ILArray samples_normalized = sqrt(sum(samples * samples, 0));
ILArray neighbs_normalized = sqrt(sum(neighbors * neighbors, 0));
if (unstable_error && !testStable(samples_normalized)) {
throw new ILArgumentException("possibly numerical instability: some samples are too close to 0. Try using a different metric instead!");
}
if (unstable_error && !testStable(neighbs_normalized)) {
throw new ILArgumentException("possibly numerical instability: some neighbors are too close to 0. Try using a different metric instead!");
}
neighbs_normalized.a = neighbors / neighbs_normalized;
for (int i = 0; i < sn; i++) {
using (ILScope.Enter()) {
ILArray dist = 1 - multiply(neighbs_normalized.T, samples[full, i]) / samples_normalized[i];
ILArray indices = empty();
if (k == 1) {
min(dist, indices, 0).Dispose();
ret[full, i] = indices[0];
} else {
sort(dist, indices, 0, false).Dispose();
ret[full, i] = indices[r(0, k - 1)];
}
}
}
break;
case DistanceMetrics.Pearsons:
ILArray samples_centered = samples - mean(samples, 0);
ILArray neighbs_centered = neighbors - mean(neighbors, 0);
samples_normalized = sqrt(sum(samples_centered * samples_centered, 0));
neighbs_normalized = sqrt(sum(neighbs_centered * neighbs_centered, 0));
if (unstable_error && !testStable(samples_normalized)) {
throw new ILArgumentException("possibly numerical instability: standard deviation for some neighbor points is close to zero. Try using a different metric instead!");
}
if (unstable_error && !testStable(neighbs_normalized)) {
throw new ILArgumentException("possibly numerical instability: standard deviation for some neighbor points is close to zero. Try using a different metric instead!");
}
neighbs_normalized.a = neighbs_centered / neighbs_normalized;
for (int i = 0; i < sn; i++) {
using (ILScope.Enter()) {
ILArray dist = 1 - multiply(neighbs_normalized.T, samples_centered[full, i]) / samples_normalized[i];
ILArray indices = empty();
if (k == 1) {
min(dist,indices,0).Dispose();
ret[full, i] = indices[0];
} else {
sort(dist, indices, 0, false).Dispose();
ret[full, i] = indices[r(0, k - 1)];
}
}
}
break;
case DistanceMetrics.Hamming:
if (samples.Any((a) => { return a != 0 && a != 1; })) {
throw new ILArgumentException("hamming distance requires 0 and 1 as value for all elements of 'samples'");
}
if (neighbors.Any((a) => { return a != 0 && a != 1; })) {
throw new ILArgumentException("hamming distance requires 0 and 1 as value for all elements of 'neighbors'");
}
for (int i = 0; i < sn; i++) {
using (ILScope.Enter()) {
ILArray dist = sum(abs(neighbors - samples[full, i]), 0) / am;
ILArray indices = empty();
if (k == 1) {
min(dist, indices, 1).Dispose();
ret[full, i] = indices[0];
} else {
sort(dist, indices, 1, false).Dispose();
ret[full, i] = indices[r(0, k - 1)];
}
}
}
break;
default:
throw new ILArgumentException("the selected distance is not supported");
}
return ret;
}
}
///
/// Test for numerical instability, expects positive data only!
///
/// Input data
/// true: no instability detected, false, possible instablility
private static bool testStable(ILInArray samples_normalized) {
using (ILScope.Enter(samples_normalized)) {
double max, min;
samples_normalized.GetLimits(out min, out max);
return min > MachineParameterDouble.eps * max;
}
}
#region HYCALPER AUTO GENERATED CODE
///
/// Search for k nearest neighbors for every sample in samples
///
/// Samples matrix, samples in columns, the number of rows (dimensionality) must match the number of rows in
/// Matrix of training samples/ neighbors, this will be searched for matching points, rows: dimensionality, columns: number of points
/// [Optional] Number of neighbors to return, k must lay in range: 0 ≤ k < neighbors.D[1]; default: 1
/// [Optional] Distance metric, one out of the enumeration. Supported are: Euclidian_L2,Manhattan_L1,
/// Minkowski, Cosine, Pearsons and Hamming distances; default: 'Euclidian_L2'
/// [Optional] Exponent for minkowski distance; default: 2
/// [Optional] For cosine and pearson distances: if some samples lead to numerical instabilities, an exception is generated; default: true
/// Matrix of nearest neighbors, size: k x samples.D[1]; indices of points in matrix
public static ILRetArray knn(ILInArray Samples, ILInArray Neighbors, int k = 10,
DistanceMetrics metric = DistanceMetrics.Euclidian_L2, double minkowski_parameter = 2.0,
bool unstable_error = true) {
using (ILScope.Enter(Samples, Neighbors)) {
ILArray samples = Samples;
ILArray neighbors = Neighbors;
if (k < 0) {
throw new ILArgumentException("k must be greater or equal 0");
}
if (isnullorempty(neighbors)) {
throw new ILArgumentException("input argument 'neighbors' must not be null or empty");
}
if (isnull(samples)) {
throw new ILArgumentException("input argument 'samples' must not be null");
}
if (samples.S[0] != neighbors.S[0])
throw new ILArgumentException("number of rows for 'neighbors' and 'samples' must match");
if (k > neighbors.S[1])
throw new ILArgumentException("k must be smaller or equal to the number of datapoints (number of columns) in A");
int nn = neighbors.S[1], am = neighbors.S[0], sn = samples.S[1];
ILArray ret = zeros(k, sn);
switch (metric) {
case DistanceMetrics.Euclidian_L2:
for (int i = 0; i < sn; i++) {
using (ILScope.Enter()) {
ILArray dist = neighbors - samples[full, i];
dist.a = sum(dist * dist, 0);
ILArray indices = empty();
if (k == 1) {
min(dist, indices, 1).Dispose();
ret[full, i] = indices[0];
} else {
sort(dist, indices, 1, false).Dispose();
ret[full, i] = indices[r(0, k - 1)];
}
}
}
break;
case DistanceMetrics.Manhattan_L1:
for (int i = 0; i < sn; i++) {
using (ILScope.Enter()) {
ILArray dist = neighbors - samples[full, i];
dist.a = sum(abs(dist), 0);
ILArray indices = empty();
if (k == 1) {
min(dist, indices, 1).Dispose();
ret[full, i] = indices[0];
} else {
sort(dist, indices, 1, false).Dispose();
ret[full, i] = indices[r(0, k - 1)];
}
}
}
break;
case DistanceMetrics.Minkowski:
for (int i = 0; i < sn; i++) {
using (ILScope.Enter()) {
ILArray dist = neighbors - samples[full, i];
dist.a = sum(pow(dist,(float)minkowski_parameter), 0);
ILArray indices = empty();
if (k == 1) {
min(dist, indices, 0).Dispose();
ret[full, i] = indices[0];
} else {
sort(dist, indices, 0, false).Dispose();
ret[full, i] = indices[r(0, k - 1)];
}
}
}
break;
case DistanceMetrics.Cosine:
ILArray samples_normalized = sqrt(sum(samples * samples, 0));
ILArray neighbs_normalized = sqrt(sum(neighbors * neighbors, 0));
if (unstable_error && !testStable(samples_normalized)) {
throw new ILArgumentException("possibly numerical instability: some samples are too close to 0. Try using a different metric instead!");
}
if (unstable_error && !testStable(neighbs_normalized)) {
throw new ILArgumentException("possibly numerical instability: some neighbors are too close to 0. Try using a different metric instead!");
}
neighbs_normalized.a = neighbors / neighbs_normalized;
for (int i = 0; i < sn; i++) {
using (ILScope.Enter()) {
ILArray dist = 1 - multiply(neighbs_normalized.T, samples[full, i]) / samples_normalized[i];
ILArray indices = empty();
if (k == 1) {
min(dist, indices, 0).Dispose();
ret[full, i] = indices[0];
} else {
sort(dist, indices, 0, false).Dispose();
ret[full, i] = indices[r(0, k - 1)];
}
}
}
break;
case DistanceMetrics.Pearsons:
ILArray samples_centered = samples - mean(samples, 0);
ILArray neighbs_centered = neighbors - mean(neighbors, 0);
samples_normalized = sqrt(sum(samples_centered * samples_centered, 0));
neighbs_normalized = sqrt(sum(neighbs_centered * neighbs_centered, 0));
if (unstable_error && !testStable(samples_normalized)) {
throw new ILArgumentException("possibly numerical instability: standard deviation for some neighbor points is close to zero. Try using a different metric instead!");
}
if (unstable_error && !testStable(neighbs_normalized)) {
throw new ILArgumentException("possibly numerical instability: standard deviation for some neighbor points is close to zero. Try using a different metric instead!");
}
neighbs_normalized.a = neighbs_centered / neighbs_normalized;
for (int i = 0; i < sn; i++) {
using (ILScope.Enter()) {
ILArray dist = 1 - multiply(neighbs_normalized.T, samples_centered[full, i]) / samples_normalized[i];
ILArray indices = empty();
if (k == 1) {
min(dist,indices,0).Dispose();
ret[full, i] = indices[0];
} else {
sort(dist, indices, 0, false).Dispose();
ret[full, i] = indices[r(0, k - 1)];
}
}
}
break;
case DistanceMetrics.Hamming:
if (samples.Any((a) => { return a != 0 && a != 1; })) {
throw new ILArgumentException("hamming distance requires 0 and 1 as value for all elements of 'samples'");
}
if (neighbors.Any((a) => { return a != 0 && a != 1; })) {
throw new ILArgumentException("hamming distance requires 0 and 1 as value for all elements of 'neighbors'");
}
for (int i = 0; i < sn; i++) {
using (ILScope.Enter()) {
ILArray dist = sum(abs(neighbors - samples[full, i]), 0) / am;
ILArray indices = empty();
if (k == 1) {
min(dist, indices, 1).Dispose();
ret[full, i] = indices[0];
} else {
sort(dist, indices, 1, false).Dispose();
ret[full, i] = indices[r(0, k - 1)];
}
}
}
break;
default:
throw new ILArgumentException("the selected distance is not supported");
}
return ret;
}
}
///
/// Test for numerical instability, expects positive data only!
///
/// Input data
/// true: no instability detected, false, possible instablility
private static bool testStable(ILInArray samples_normalized) {
using (ILScope.Enter(samples_normalized)) {
float max, min;
samples_normalized.GetLimits(out min, out max);
return min > MachineParameterSingle.eps * max;
}
}
#endregion HYCALPER AUTO GENERATED CODE
}
}