Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/ExpressionClustering/flann/include/flann/util/cuda/result_set.h @ 15840

Last change on this file since 15840 was 15840, checked in by gkronber, 6 years ago

#2886 added utility console program for clustering of expressions

File size: 15.5 KB
Line 
1/**********************************************************************
2 * Software License Agreement (BSD License)
3 *
4 * Copyright 2011  Andreas Muetzel (amuetzel@uni-koblenz.de). All rights reserved.
5 *
6 * THE BSD LICENSE
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *************************************************************************/
29#ifndef FLANN_UTIL_CUDA_RESULTSET_H
30#define FLANN_UTIL_CUDA_RESULTSET_H
31
32#include <flann/util/cuda/heap.h>
33#include <limits>
34
35__device__ __forceinline__
36float infinity()
37{
38       return __int_as_float(0x7f800000);
39}
40
41#ifndef INFINITY
42#define INFINITY infinity()
43#endif
44
45namespace flann
46{
47namespace cuda
48{
49//! result set for the 1nn search. Doesn't do any global memory accesses on its own,
50template< typename DistanceType >
51struct SingleResultSet
52{
53    int bestIndex;
54    DistanceType bestDist;
55    const DistanceType epsError;
56
57    __device__
58    SingleResultSet( DistanceType eps ) : bestIndex(-1),bestDist(INFINITY), epsError(eps){ }
59
60    __device__
61    inline float
62    worstDist()
63    {
64        return bestDist;
65    }
66
67    __device__
68    inline void
69    insert(int index, DistanceType dist)
70    {
71        if( dist <= bestDist ) {
72            bestIndex=index;
73            bestDist=dist;
74        }
75    }
76
77    DistanceType* resultDist;
78    int* resultIndex;
79
80    __device__
81    inline void
82    setResultLocation( DistanceType* dists, int* index, int thread, int stride )
83    {
84        resultDist=dists+thread*stride;
85        resultIndex=index+thread*stride;
86        if( stride != 1 ) {
87            for( int i=1; i<stride; i++ ) {
88                resultDist[i]=INFINITY;
89                resultIndex[i]=-1;
90            }
91        }
92    }
93
94    __device__
95    inline void
96    finish()
97    {
98        resultDist[0]=bestDist;
99        resultIndex[0]=bestIndex;
100    }
101};
102
103template< typename DistanceType >
104struct GreaterThan
105{
106    __device__
107    bool operator()(DistanceType a, DistanceType b)
108    {
109        return a>b;
110    }
111};
112
113
114// using this and the template uses 2 or 3 registers more than the direct implementation in the kNearestKernel, but
115// there is no speed difference.
116// Setting useHeap as a template parameter leads to a whole lot of things being
117// optimized away by nvcc.
118// Register counts are the same as when removing not-needed variables in explicit specializations
119// and the "if( useHeap )" branches are eliminated at compile time.
120// The downside of this: a bit more complex kernel launch code.
121template< typename DistanceType, bool useHeap >
122struct KnnResultSet
123{
124    int foundNeighbors;
125    DistanceType largestHeapDist;
126    int maxDistIndex;
127    const int k;
128    const bool sorted;
129    const DistanceType epsError;
130
131
132    __device__
133    KnnResultSet(int knn, bool sortResults, DistanceType eps) : foundNeighbors(0),largestHeapDist(INFINITY),k(knn), sorted(sortResults), epsError(eps){ }
134
135    //          __host__ __device__
136    //          KnnResultSet(const KnnResultSet& o):foundNeighbors(o.foundNeighbors),largestHeapDist(o.largestHeapDist),k(o.k){ }
137
138    __device__
139    inline DistanceType
140    worstDist()
141    {
142        return largestHeapDist;
143    }
144
145    __device__
146    inline void
147    insert(int index, DistanceType dist)
148    {
149        if( foundNeighbors<k ) {
150            resultDist[foundNeighbors]=dist;
151            resultIndex[foundNeighbors]=index;
152            if( foundNeighbors==k-1) {
153                if( useHeap ) {
154                    flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
155                    largestHeapDist=resultDist[0];
156                }
157                else {
158                    findLargestDistIndex();
159                }
160
161            }
162            foundNeighbors++;
163        }
164        else if( dist < largestHeapDist ) {
165            if( useHeap ) {
166                resultDist[0]=dist;
167                resultIndex[0]=index;
168                flann::cuda::heap::sift_down(resultDist,resultIndex,0,k,GreaterThan<DistanceType>());
169                largestHeapDist=resultDist[0];
170            }
171            else {
172                resultDist[maxDistIndex]=dist;
173                resultIndex[maxDistIndex]=index;
174                findLargestDistIndex();
175            }
176
177        }
178    }
179
180    __device__
181    void
182    findLargestDistIndex( )
183    {
184        largestHeapDist=resultDist[0];
185        maxDistIndex=0;
186        for( int i=1; i<k; i++ )
187            if( resultDist[i] > largestHeapDist ) {
188                maxDistIndex=i;
189                largestHeapDist=resultDist[i];
190            }
191    }
192
193    float* resultDist;
194    int* resultIndex;
195
196    __device__
197    inline void
198    setResultLocation( DistanceType* dists, int* index, int thread, int stride )
199    {
200        resultDist=dists+stride*thread;
201        resultIndex=index+stride*thread;
202        for( int i=0; i<stride; i++ ) {
203            resultDist[i]=INFINITY;
204            resultIndex[i]=-1;
205            //                  resultIndex[tid+i*blockDim.x]=-1;
206            //                  resultDist[tid+i*blockDim.x]=INFINITY;
207        }
208    }
209
210    __host__ __device__
211    inline void
212    finish()
213    {
214        if( sorted ) {
215            if( !useHeap ) flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
216            for( int i=k-1; i>0; i-- ) {
217                flann::cuda::swap( resultDist[0], resultDist[i] );
218                flann::cuda::swap( resultIndex[0], resultIndex[i] );
219                flann::cuda::heap::sift_down( resultDist,resultIndex, 0, i, GreaterThan<DistanceType>() );
220            }
221        }
222    }
223};
224
225template <typename DistanceType>
226struct CountingRadiusResultSet
227{
228    int count_;
229    DistanceType radius_sq_;
230    int max_neighbors_;
231
232    __device__
233    CountingRadiusResultSet(DistanceType radius, int max_neighbors) : count_(0),radius_sq_(radius), max_neighbors_(max_neighbors){ }
234
235    __device__
236    inline DistanceType
237    worstDist()
238    {
239        return radius_sq_;
240    }
241
242    __device__
243    inline void
244    insert(int index, float dist)
245    {
246        if( dist < radius_sq_ ) {
247            count_++;
248        }
249    }
250
251    int* resultIndex;
252
253    __device__
254    inline void
255    setResultLocation( DistanceType* /*dists*/, int* count, int thread, int stride )
256    {
257        resultIndex=count+thread*stride;
258    }
259
260    __device__
261    inline void
262    finish()
263    {
264        if(( max_neighbors_<=0) ||( count_<=max_neighbors_) ) resultIndex[0]=count_;
265        else resultIndex[0]=max_neighbors_;
266    }
267};
268
269template<typename DistanceType, bool useHeap>
270struct RadiusKnnResultSet
271{
272    int foundNeighbors;
273    DistanceType largestHeapDist;
274    int maxDistElem;
275    const int k;
276    const bool sorted;
277    const DistanceType radius_sq_;
278    int* segment_starts_;
279    //          int count_;
280
281
282    __device__
283    RadiusKnnResultSet(DistanceType radius, int knn, int* segment_starts, bool sortResults) : foundNeighbors(0),largestHeapDist(radius),k(knn), sorted(sortResults), radius_sq_(radius),segment_starts_(segment_starts) { }
284
285    //          __host__ __device__
286    //          KnnResultSet(const KnnResultSet& o):foundNeighbors(o.foundNeighbors),largestHeapDist(o.largestHeapDist),k(o.k){ }
287
288    __device__
289    inline DistanceType
290    worstDist()
291    {
292        return largestHeapDist;
293    }
294
295    __device__
296    inline void
297    insert(int index, DistanceType dist)
298    {
299        if( dist < radius_sq_ ) {
300            if( foundNeighbors<k ) {
301                resultDist[foundNeighbors]=dist;
302                resultIndex[foundNeighbors]=index;
303                if(( foundNeighbors==k-1) && useHeap) {
304                    if( useHeap ) {
305                        flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
306                        largestHeapDist=resultDist[0];
307                    }
308                    else {
309                        findLargestDistIndex();
310                    }
311                }
312                foundNeighbors++;
313
314            }
315            else if( dist < largestHeapDist ) {
316                if( useHeap ) {
317                    resultDist[0]=dist;
318                    resultIndex[0]=index;
319                    flann::cuda::heap::sift_down(resultDist,resultIndex,0,k,GreaterThan<DistanceType>());
320                    largestHeapDist=resultDist[0];
321                }
322                else {
323                    resultDist[maxDistElem]=dist;
324                    resultIndex[maxDistElem]=index;
325                    findLargestDistIndex();
326                }
327            }
328        }
329    }
330
331    __device__
332    void
333    findLargestDistIndex( )
334    {
335        largestHeapDist=resultDist[0];
336        maxDistElem=0;
337        for( int i=1; i<k; i++ )
338            if( resultDist[i] > largestHeapDist ) {
339                maxDistElem=i;
340                largestHeapDist=resultDist[i];
341            }
342    }
343
344
345    DistanceType* resultDist;
346    int* resultIndex;
347
348    __device__
349    inline void
350    setResultLocation( DistanceType* dists, int* index, int thread, int /*stride*/ )
351    {
352        resultDist=dists+segment_starts_[thread];
353        resultIndex=index+segment_starts_[thread];
354    }
355
356    __device__
357    inline void
358    finish()
359    {
360        if( sorted ) {
361            if( !useHeap ) flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
362            for( int i=foundNeighbors-1; i>0; i-- ) {
363                flann::cuda::swap( resultDist[0], resultDist[i] );
364                flann::cuda::swap( resultIndex[0], resultIndex[i] );
365                flann::cuda::heap::sift_down( resultDist,resultIndex, 0, i, GreaterThan<DistanceType>() );
366            }
367        }
368    }
369};
370
371// Difference to RadiusKnnResultSet: Works like KnnResultSet, doesn't pack the results densely (as the RadiusResultSet does)
372template <typename DistanceType, bool useHeap>
373struct KnnRadiusResultSet
374{
375    int foundNeighbors;
376    DistanceType largestHeapDist;
377    int maxDistIndex;
378    const int k;
379    const bool sorted;
380    const DistanceType epsError;
381    const DistanceType radius_sq;
382
383
384    __device__
385    KnnRadiusResultSet(int knn, bool sortResults, DistanceType eps, DistanceType radius) : foundNeighbors(0),largestHeapDist(radius),k(knn), sorted(sortResults), epsError(eps),radius_sq(radius){ }
386
387    //          __host__ __device__
388    //          KnnResultSet(const KnnResultSet& o):foundNeighbors(o.foundNeighbors),largestHeapDist(o.largestHeapDist),k(o.k){ }
389
390    __device__
391    inline DistanceType
392    worstDist()
393    {
394        return largestHeapDist;
395    }
396
397    __device__
398    inline void
399    insert(int index, DistanceType dist)
400    {
401        if( dist < largestHeapDist ) {
402            if( foundNeighbors<k ) {
403                resultDist[foundNeighbors]=dist;
404                resultIndex[foundNeighbors]=index;
405                if( foundNeighbors==k-1 ) {
406                    if( useHeap ) {
407                        flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
408                        largestHeapDist=resultDist[0];
409                    }
410                    else {
411                        findLargestDistIndex();
412                    }
413                }
414                foundNeighbors++;
415            }
416            else { //if( dist < largestHeapDist )
417                if( useHeap ) {
418                    resultDist[0]=dist;
419                    resultIndex[0]=index;
420                    flann::cuda::heap::sift_down(resultDist,resultIndex,0,k,GreaterThan<DistanceType>());
421                    largestHeapDist=resultDist[0];
422                }
423                else {
424                    resultDist[maxDistIndex]=dist;
425                    resultIndex[maxDistIndex]=index;
426                    findLargestDistIndex();
427                }
428            }
429        }
430    }
431    __device__
432    void
433    findLargestDistIndex( )
434    {
435        largestHeapDist=resultDist[0];
436        maxDistIndex=0;
437        for( int i=1; i<k; i++ )
438            if( resultDist[i] > largestHeapDist ) {
439                maxDistIndex=i;
440                largestHeapDist=resultDist[i];
441            }
442    }
443
444    DistanceType* resultDist;
445    int* resultIndex;
446
447    __device__
448    inline void
449    setResultLocation( DistanceType* dists, int* index, int thread, int stride )
450    {
451        resultDist=dists+stride*thread;
452        resultIndex=index+stride*thread;
453        for( int i=0; i<stride; i++ ) {
454            resultDist[i]=INFINITY;
455            resultIndex[i]=-1;
456            //                  resultIndex[tid+i*blockDim.x]=-1;
457            //                  resultDist[tid+i*blockDim.x]=INFINITY;
458        }
459    }
460
461    __device__
462    inline void
463    finish()
464    {
465        if( sorted ) {
466            if( !useHeap ) flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
467            for( int i=k-1; i>0; i-- ) {
468                flann::cuda::swap( resultDist[0], resultDist[i] );
469                flann::cuda::swap( resultIndex[0], resultIndex[i] );
470                flann::cuda::heap::sift_down( resultDist,resultIndex, 0, i, GreaterThan<DistanceType>() );
471            }
472        }
473    }
474};
475
476//! fills the radius output buffer.
477//! IMPORTANT ASSERTION: ASSUMES THAT THERE IS ENOUGH SPACE FOR EVERY NEIGHBOR! IF THIS ISN'T
478//! TRUE, USE KnnRadiusResultSet! (Otherwise, the neighbors of one element might overflow into the next element, or past the buffer.)
479template< typename DistanceType >
480struct RadiusResultSet
481{
482    DistanceType radius_sq_;
483    int* segment_starts_;
484    int count_;
485    bool sorted_;
486
487    __device__
488    RadiusResultSet(DistanceType radius, int* segment_starts, bool sorted) : radius_sq_(radius), segment_starts_(segment_starts), count_(0), sorted_(sorted){ }
489
490    __device__
491    inline DistanceType
492    worstDist()
493    {
494        return radius_sq_;
495    }
496
497    __device__
498    inline void
499    insert(int index, DistanceType dist)
500    {
501        if( dist < radius_sq_ ) {
502            resultIndex[count_]=index;
503            resultDist[count_]=dist;
504            count_++;
505        }
506    }
507
508    int* resultIndex;
509    DistanceType* resultDist;
510
511    __device__
512    inline void
513    setResultLocation( DistanceType* dists, int* index, int thread, int /*stride*/ )
514    {
515        resultIndex=index+segment_starts_[thread];
516        resultDist=dists+segment_starts_[thread];
517    }
518
519    __device__
520    inline void
521    finish()
522    {
523        if( sorted_ ) {
524            flann::cuda::heap::make_heap( resultDist,resultIndex, count_, GreaterThan<DistanceType>() );
525            for( int i=count_-1; i>0; i-- ) {
526                flann::cuda::swap( resultDist[0], resultDist[i] );
527                flann::cuda::swap( resultIndex[0], resultIndex[i] );
528                flann::cuda::heap::sift_down( resultDist,resultIndex, 0, i, GreaterThan<DistanceType>() );
529            }
530        }
531    }
532};
533}
534}
535
536#endif
Note: See TracBrowser for help on using the repository browser.