Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GaussianProcessTuning/ILNumerics.2.14.4735.573/Algorithms/MachineLearning/ILKNN.cs @ 11826

Last change on this file since 11826 was 9102, checked in by gkronber, 12 years ago

#1967: ILNumerics source for experimentation

File size: 24.7 KB
Line 
1///
2///    This file is part of ILNumerics Community Edition.
3///
4///    ILNumerics Community Edition - high performance computing for applications.
5///    Copyright (C) 2006 - 2012 Haymo Kutschbach, http://ilnumerics.net
6///
7///    ILNumerics Community Edition is free software: you can redistribute it and/or modify
8///    it under the terms of the GNU General Public License version 3 as published by
9///    the Free Software Foundation.
10///
11///    ILNumerics Community Edition is distributed in the hope that it will be useful,
12///    but WITHOUT ANY WARRANTY; without even the implied warranty of
13///    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14///    GNU General Public License for more details.
15///
16///    You should have received a copy of the GNU General Public License
17///    along with ILNumerics Community Edition. See the file License.txt in the root
18///    of your distribution package. If not, see <http://www.gnu.org/licenses/>.
19///
20///    In addition this software uses the following components and/or licenses:
21///
22///    =================================================================================
23///    The Open Toolkit Library License
24///   
25///    Copyright (c) 2006 - 2009 the Open Toolkit library.
26///   
27///    Permission is hereby granted, free of charge, to any person obtaining a copy
28///    of this software and associated documentation files (the "Software"), to deal
29///    in the Software without restriction, including without limitation the rights to
30///    use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
31///    the Software, and to permit persons to whom the Software is furnished to do
32///    so, subject to the following conditions:
33///
34///    The above copyright notice and this permission notice shall be included in all
35///    copies or substantial portions of the Software.
36///
37///    =================================================================================
38///   
39
40using System;
41using System.Collections.Generic;
42using System.Linq;
43using System.Text;
44using ILNumerics.Exceptions;
45
46
47namespace ILNumerics {
48    public partial class ILMath {
49
50        public enum DistanceMetrics {
51            Euclidian_L2,
52            Mahalanobis,
53            Manhattan_L1,
54            Minkowski,
55            Chebychev,
56            Cosine,
57            Pearsons,
58            Hamming,
59            Jaccard,
60            Spearman
61        }
62
63
64        /// <summary>
65        /// Search for k nearest neighbors for every sample in <paramref name="Samples"/> samples
66        /// </summary>
67        /// <param name="Samples">Samples matrix, samples in columns, the number of rows (dimensionality) must match the number of rows in <paramref name="Neighbors"/> </param>
68        /// <param name="Neighbors">Matrix of training samples/ neighbors, this will be searched for matching points, rows: dimensionality, columns: number of points</param>
69        /// <param name="k">[Optional] Number of neighbors to return, k must lay in range: 0 &le; k &lt neighbors.D[1]; default: 1</param>
70        /// <param name="metric">[Optional] Distance metric, one out of the <see cref="ILNumerics.ILMath.DistanceMetrics"/> enumeration. Supported are: Euclidian_L2,Manhattan_L1,
71        /// Minkowski, Cosine, Pearsons and Hamming distances; default: 'Euclidian_L2'</param>
72        /// <param name="minkowski_parameter">[Optional] Exponent for minkowski distance; default: 2</param>
73        /// <param name="unstable_error">[Optional] For cosine and pearson distances: if some samples lead to numerical instabilities, an exception is generated; default: true</param>
74        /// <returns>Matrix of nearest neighbors, size: k x samples.D[1]; indices of points in <paramref name="Neighbors"/> matrix</returns>
75        public static ILRetArray<double> knn(ILInArray<double> Samples, ILInArray<double> Neighbors, int k = 10,
76                                                DistanceMetrics metric = DistanceMetrics.Euclidian_L2, double minkowski_parameter = 2.0,
77                                                bool unstable_error = true) {
78            using (ILScope.Enter(Samples, Neighbors)) {
79
80                ILArray<double> samples = Samples;
81                ILArray<double> neighbors = Neighbors;
82
83                if (k < 0) {
84                    throw new ILArgumentException("k must be greater or equal 0"); 
85                }
86                if (isnullorempty(neighbors)) {
87                    throw new ILArgumentException("input argument 'neighbors' must not be null or empty");
88                }
89                if (isnull(samples)) {
90                    throw new ILArgumentException("input argument 'samples' must not be null");
91                }
92                if (samples.S[0] != neighbors.S[0])
93                    throw new ILArgumentException("number of rows for 'neighbors' and 'samples' must match");
94                if (k > neighbors.S[1])
95                    throw new ILArgumentException("k must be smaller or equal to the number of datapoints (number of columns) in A");
96                int nn = neighbors.S[1], am = neighbors.S[0], sn = samples.S[1];
97                ILArray<double> ret = zeros<double>(k, sn);
98                switch (metric) {
99                    case DistanceMetrics.Euclidian_L2:
100                        for (int i = 0; i < sn; i++) {
101                            using (ILScope.Enter()) {
102                                ILArray<double> dist = neighbors - samples[full, i];
103                                dist.a = sum(dist * dist, 0);
104                                ILArray<double> indices = empty<double>();
105                                if (k == 1) {
106                                    min(dist, indices, 1).Dispose();
107                                    ret[full, i] = indices[0];
108                                } else {
109                                    sort(dist, indices, 1, false).Dispose();
110                                    ret[full, i] = indices[r(0, k - 1)];
111                                }
112                            }
113                        }
114                        break;
115                    case DistanceMetrics.Manhattan_L1:
116                        for (int i = 0; i < sn; i++) {
117                            using (ILScope.Enter()) {
118                                ILArray<double> dist = neighbors - samples[full, i];
119                                dist.a = sum(abs(dist), 0);
120                                ILArray<double> indices = empty<double>();
121                                if (k == 1) {
122                                    min(dist, indices, 1).Dispose();
123                                    ret[full, i] = indices[0];
124                                } else {
125                                    sort(dist, indices, 1, false).Dispose();
126                                    ret[full, i] = indices[r(0, k - 1)];
127                                }
128                            }
129                        }
130                        break;
131                    case DistanceMetrics.Minkowski:
132                        for (int i = 0; i < sn; i++) {
133                            using (ILScope.Enter()) {
134                                ILArray<double> dist = neighbors - samples[full, i];
135                                dist.a = sum(pow(dist,(double)minkowski_parameter), 0);
136                                ILArray<double> indices = empty<double>();
137                                if (k == 1) {
138                                    min(dist, indices, 0).Dispose();
139                                    ret[full, i] = indices[0];
140                                } else {
141                                    sort(dist, indices, 0, false).Dispose();
142                                    ret[full, i] = indices[r(0, k - 1)];
143                                }
144                            }
145                        }
146                        break;
147                    case DistanceMetrics.Cosine:
148                        ILArray<double> samples_normalized = sqrt(sum(samples * samples, 0));
149                        ILArray<double> neighbs_normalized = sqrt(sum(neighbors * neighbors, 0));
150                        if (unstable_error && !testStable(samples_normalized)) {
151                            throw new ILArgumentException("possibly numerical instability: some samples are too close to 0. Try using a different metric instead!");
152                        }
153                        if (unstable_error && !testStable(neighbs_normalized)) {
154                            throw new ILArgumentException("possibly numerical instability: some neighbors are too close to 0. Try using a different metric instead!");
155                        }
156                        neighbs_normalized.a = neighbors / neighbs_normalized;
157                        for (int i = 0; i < sn; i++) {
158                            using (ILScope.Enter()) {
159                                ILArray<double> dist = 1 - multiply(neighbs_normalized.T, samples[full, i]) / samples_normalized[i];
160                                ILArray<double> indices = empty<double>();
161                                if (k == 1) {
162                                    min(dist, indices, 0).Dispose();
163                                    ret[full, i] = indices[0];
164                                } else {
165                                    sort(dist, indices, 0, false).Dispose();
166                                    ret[full, i] = indices[r(0, k - 1)];
167                                }
168                            }
169                        }
170                        break;
171                    case DistanceMetrics.Pearsons:
172                        ILArray<double> samples_centered = samples - mean(samples, 0);
173                        ILArray<double> neighbs_centered = neighbors - mean(neighbors, 0);
174                        samples_normalized = sqrt(sum(samples_centered * samples_centered, 0));
175                        neighbs_normalized = sqrt(sum(neighbs_centered * neighbs_centered, 0));
176                        if (unstable_error && !testStable(samples_normalized)) {
177                            throw new ILArgumentException("possibly numerical instability: standard deviation for some neighbor points is close to zero. Try using a different metric instead!");
178                        }
179                        if (unstable_error && !testStable(neighbs_normalized)) {
180                            throw new ILArgumentException("possibly numerical instability: standard deviation for some neighbor points is close to zero. Try using a different metric instead!");
181                        }
182                        neighbs_normalized.a = neighbs_centered / neighbs_normalized;
183                        for (int i = 0; i < sn; i++) {
184                            using (ILScope.Enter()) {
185                                ILArray<double> dist = 1 - multiply(neighbs_normalized.T, samples_centered[full, i]) / samples_normalized[i];
186                                ILArray<double> indices = empty<double>();
187                                if (k == 1) {
188                                    min(dist,indices,0).Dispose();
189                                    ret[full, i] = indices[0];
190                                } else {
191                                    sort(dist, indices, 0, false).Dispose();
192                                    ret[full, i] = indices[r(0, k - 1)];
193                                }
194                            }
195                        }
196                        break;
197                    case DistanceMetrics.Hamming:
198                        if (samples.Any((a) => { return a != 0 && a != 1; })) {
199                            throw new ILArgumentException("hamming distance requires 0 and 1 as value for all elements of 'samples'");
200                        }
201                        if (neighbors.Any((a) => { return a != 0 && a != 1; })) {
202                            throw new ILArgumentException("hamming distance requires 0 and 1 as value for all elements of 'neighbors'");
203                        }
204                        for (int i = 0; i < sn; i++) {
205                            using (ILScope.Enter()) {
206                                ILArray<double> dist = sum(abs(neighbors - samples[full, i]), 0) / am;
207                                ILArray<double> indices = empty<double>();
208                                if (k == 1) {
209                                    min(dist, indices, 1).Dispose();
210                                    ret[full, i] = indices[0];
211                                } else {
212                                    sort(dist, indices, 1, false).Dispose();
213                                    ret[full, i] = indices[r(0, k - 1)];
214                                }
215                            }
216                        }
217                        break;
218                    default:
219                        throw new ILArgumentException("the selected distance is not supported");
220                }
221                return ret;
222            }
223             
224        }
225        /// <summary>
226        /// Test for numerical instability, expects positive data only!
227        /// </summary>
228        /// <param name="samples_normalized">Input data</param>
229        /// <returns>true: no instability detected, false, possible instablility</returns>
230        private static bool testStable(ILInArray<double> samples_normalized) {
231            using (ILScope.Enter(samples_normalized)) {
232               
233                double max, min;
234                samples_normalized.GetLimits(out min, out max);
235                return min > MachineParameterDouble.eps * max;
236            }
237        }
238
239#region HYCALPER AUTO GENERATED CODE
240
241        /// <summary>
242        /// Search for k nearest neighbors for every sample in <paramref name="Samples"/> samples
243        /// </summary>
244        /// <param name="Samples">Samples matrix, samples in columns, the number of rows (dimensionality) must match the number of rows in <paramref name="Neighbors"/> </param>
245        /// <param name="Neighbors">Matrix of training samples/ neighbors, this will be searched for matching points, rows: dimensionality, columns: number of points</param>
246        /// <param name="k">[Optional] Number of neighbors to return, k must lay in range: 0 &le; k &lt neighbors.D[1]; default: 1</param>
247        /// <param name="metric">[Optional] Distance metric, one out of the <see cref="ILNumerics.ILMath.DistanceMetrics"/> enumeration. Supported are: Euclidian_L2,Manhattan_L1,
248        /// Minkowski, Cosine, Pearsons and Hamming distances; default: 'Euclidian_L2'</param>
249        /// <param name="minkowski_parameter">[Optional] Exponent for minkowski distance; default: 2</param>
250        /// <param name="unstable_error">[Optional] For cosine and pearson distances: if some samples lead to numerical instabilities, an exception is generated; default: true</param>
251        /// <returns>Matrix of nearest neighbors, size: k x samples.D[1]; indices of points in <paramref name="Neighbors"/> matrix</returns>
252        public static ILRetArray<double> knn(ILInArray<float> Samples, ILInArray<float> Neighbors, int k = 10,
253                                                DistanceMetrics metric = DistanceMetrics.Euclidian_L2, double minkowski_parameter = 2.0,
254                                                bool unstable_error = true) {
255            using (ILScope.Enter(Samples, Neighbors)) {
256
257                ILArray<float> samples = Samples;
258                ILArray<float> neighbors = Neighbors;
259
260                if (k < 0) {
261                    throw new ILArgumentException("k must be greater or equal 0"); 
262                }
263                if (isnullorempty(neighbors)) {
264                    throw new ILArgumentException("input argument 'neighbors' must not be null or empty");
265                }
266                if (isnull(samples)) {
267                    throw new ILArgumentException("input argument 'samples' must not be null");
268                }
269                if (samples.S[0] != neighbors.S[0])
270                    throw new ILArgumentException("number of rows for 'neighbors' and 'samples' must match");
271                if (k > neighbors.S[1])
272                    throw new ILArgumentException("k must be smaller or equal to the number of datapoints (number of columns) in A");
273                int nn = neighbors.S[1], am = neighbors.S[0], sn = samples.S[1];
274                ILArray<double> ret = zeros<double>(k, sn);
275                switch (metric) {
276                    case DistanceMetrics.Euclidian_L2:
277                        for (int i = 0; i < sn; i++) {
278                            using (ILScope.Enter()) {
279                                ILArray<float> dist = neighbors - samples[full, i];
280                                dist.a = sum(dist * dist, 0);
281                                ILArray<double> indices = empty<double>();
282                                if (k == 1) {
283                                    min(dist, indices, 1).Dispose();
284                                    ret[full, i] = indices[0];
285                                } else {
286                                    sort(dist, indices, 1, false).Dispose();
287                                    ret[full, i] = indices[r(0, k - 1)];
288                                }
289                            }
290                        }
291                        break;
292                    case DistanceMetrics.Manhattan_L1:
293                        for (int i = 0; i < sn; i++) {
294                            using (ILScope.Enter()) {
295                                ILArray<float> dist = neighbors - samples[full, i];
296                                dist.a = sum(abs(dist), 0);
297                                ILArray<double> indices = empty<double>();
298                                if (k == 1) {
299                                    min(dist, indices, 1).Dispose();
300                                    ret[full, i] = indices[0];
301                                } else {
302                                    sort(dist, indices, 1, false).Dispose();
303                                    ret[full, i] = indices[r(0, k - 1)];
304                                }
305                            }
306                        }
307                        break;
308                    case DistanceMetrics.Minkowski:
309                        for (int i = 0; i < sn; i++) {
310                            using (ILScope.Enter()) {
311                                ILArray<float> dist = neighbors - samples[full, i];
312                                dist.a = sum(pow(dist,(float)minkowski_parameter), 0);
313                                ILArray<double> indices = empty<double>();
314                                if (k == 1) {
315                                    min(dist, indices, 0).Dispose();
316                                    ret[full, i] = indices[0];
317                                } else {
318                                    sort(dist, indices, 0, false).Dispose();
319                                    ret[full, i] = indices[r(0, k - 1)];
320                                }
321                            }
322                        }
323                        break;
324                    case DistanceMetrics.Cosine:
325                        ILArray<float> samples_normalized = sqrt(sum(samples * samples, 0));
326                        ILArray<float> neighbs_normalized = sqrt(sum(neighbors * neighbors, 0));
327                        if (unstable_error && !testStable(samples_normalized)) {
328                            throw new ILArgumentException("possibly numerical instability: some samples are too close to 0. Try using a different metric instead!");
329                        }
330                        if (unstable_error && !testStable(neighbs_normalized)) {
331                            throw new ILArgumentException("possibly numerical instability: some neighbors are too close to 0. Try using a different metric instead!");
332                        }
333                        neighbs_normalized.a = neighbors / neighbs_normalized;
334                        for (int i = 0; i < sn; i++) {
335                            using (ILScope.Enter()) {
336                                ILArray<float> dist = 1 - multiply(neighbs_normalized.T, samples[full, i]) / samples_normalized[i];
337                                ILArray<double> indices = empty<double>();
338                                if (k == 1) {
339                                    min(dist, indices, 0).Dispose();
340                                    ret[full, i] = indices[0];
341                                } else {
342                                    sort(dist, indices, 0, false).Dispose();
343                                    ret[full, i] = indices[r(0, k - 1)];
344                                }
345                            }
346                        }
347                        break;
348                    case DistanceMetrics.Pearsons:
349                        ILArray<float> samples_centered = samples - mean(samples, 0);
350                        ILArray<float> neighbs_centered = neighbors - mean(neighbors, 0);
351                        samples_normalized = sqrt(sum(samples_centered * samples_centered, 0));
352                        neighbs_normalized = sqrt(sum(neighbs_centered * neighbs_centered, 0));
353                        if (unstable_error && !testStable(samples_normalized)) {
354                            throw new ILArgumentException("possibly numerical instability: standard deviation for some neighbor points is close to zero. Try using a different metric instead!");
355                        }
356                        if (unstable_error && !testStable(neighbs_normalized)) {
357                            throw new ILArgumentException("possibly numerical instability: standard deviation for some neighbor points is close to zero. Try using a different metric instead!");
358                        }
359                        neighbs_normalized.a = neighbs_centered / neighbs_normalized;
360                        for (int i = 0; i < sn; i++) {
361                            using (ILScope.Enter()) {
362                                ILArray<float> dist = 1 - multiply(neighbs_normalized.T, samples_centered[full, i]) / samples_normalized[i];
363                                ILArray<double> indices = empty<double>();
364                                if (k == 1) {
365                                    min(dist,indices,0).Dispose();
366                                    ret[full, i] = indices[0];
367                                } else {
368                                    sort(dist, indices, 0, false).Dispose();
369                                    ret[full, i] = indices[r(0, k - 1)];
370                                }
371                            }
372                        }
373                        break;
374                    case DistanceMetrics.Hamming:
375                        if (samples.Any((a) => { return a != 0 && a != 1; })) {
376                            throw new ILArgumentException("hamming distance requires 0 and 1 as value for all elements of 'samples'");
377                        }
378                        if (neighbors.Any((a) => { return a != 0 && a != 1; })) {
379                            throw new ILArgumentException("hamming distance requires 0 and 1 as value for all elements of 'neighbors'");
380                        }
381                        for (int i = 0; i < sn; i++) {
382                            using (ILScope.Enter()) {
383                                ILArray<float> dist = sum(abs(neighbors - samples[full, i]), 0) / am;
384                                ILArray<double> indices = empty<double>();
385                                if (k == 1) {
386                                    min(dist, indices, 1).Dispose();
387                                    ret[full, i] = indices[0];
388                                } else {
389                                    sort(dist, indices, 1, false).Dispose();
390                                    ret[full, i] = indices[r(0, k - 1)];
391                                }
392                            }
393                        }
394                        break;
395                    default:
396                        throw new ILArgumentException("the selected distance is not supported");
397                }
398                return ret;
399            }
400             
401        }
402        /// <summary>
403        /// Test for numerical instability, expects positive data only!
404        /// </summary>
405        /// <param name="samples_normalized">Input data</param>
406        /// <returns>true: no instability detected, false, possible instablility</returns>
407        private static bool testStable(ILInArray<float> samples_normalized) {
408            using (ILScope.Enter(samples_normalized)) {
409               
410                float max, min;
411                samples_normalized.GetLimits(out min, out max);
412                return min > MachineParameterSingle.eps * max;
413            }
414        }
415
416#endregion HYCALPER AUTO GENERATED CODE
417   }
418}
Note: See TracBrowser for help on using the repository browser.