Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/VantagePointTree.cs @ 15207

Last change on this file since 15207 was 15207, checked in by bwerth, 7 years ago

#2700 worked on TSNE (mostly removing unused code)

File size: 7.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21//Code is based on an implementation from Laurens van der Maaten
22
23/*
24*
25* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
26* All rights reserved.
27*
28* Redistribution and use in source and binary forms, with or without
29* modification, are permitted provided that the following conditions are met:
30* 1. Redistributions of source code must retain the above copyright
31*    notice, this list of conditions and the following disclaimer.
32* 2. Redistributions in binary form must reproduce the above copyright
33*    notice, this list of conditions and the following disclaimer in the
34*    documentation and/or other materials provided with the distribution.
35* 3. All advertising materials mentioning features or use of this software
36*    must display the following acknowledgement:
37*    This product includes software developed by the Delft University of Technology.
38* 4. Neither the name of the Delft University of Technology nor the names of
39*    its contributors may be used to endorse or promote products derived from
40*    this software without specific prior written permission.
41*
42* THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
43* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
44* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
45* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
47* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
48* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
49* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
50* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
51* OF SUCH DAMAGE.
52*
53*/
54#endregion
55
56using System.Collections.Generic;
57using System.Linq;
58using HeuristicLab.Collections;
59using HeuristicLab.Random;
60
61namespace HeuristicLab.Algorithms.DataAnalysis {
62  /// <summary>
63  /// Vantage point tree  (or VP tree) is a metric tree that segregates data in a metric space by choosing
64  /// a position in the space (the "vantage point") and partitioning the data points into two parts:
65  /// those points that are nearer to the vantage point than a threshold, and those points that are not.
66  /// By recursively applying this procedure to partition the data into smaller and smaller sets, a tree
67  /// data structure is created where neighbors in the tree are likely to be neighbors in the space.
68  /// </summary>
69  /// <typeparam name="T"></typeparam>
70  public class VantagePointTree<T> {
71    private readonly List<T> items;
72    private readonly Node root;
73    private readonly IDistance<T> distance;
74
75    public VantagePointTree(IDistance<T> distance, IEnumerable<T> items) : base() {
76      root = null;
77      this.distance = distance;
78      this.items = items.Select(x => x).ToList();
79      root = BuildFromPoints(0, this.items.Count);
80    }
81
82    /// <summary>
83    /// provides the k-nearest neighbours to a certain target element
84    /// </summary>
85    /// <param name="target">The target element</param>
86    /// <param name="k">How many neighbours</param>
87    /// <param name="results">The nearest neighbouring elements</param>
88    /// <param name="distances">The distances form the target corresponding to the neighbouring elements</param>
89    public void Search(T target, int k, out IList<T> results, out IList<double> distances) {
90      var heap = new PriorityQueue<double, IndexedItem<double>>(double.MaxValue, double.MinValue, k);
91      var tau = double.MaxValue;
92      Search(root, target, k, heap, ref tau);
93      var res = new List<T>();
94      var dist = new List<double>();
95      while (heap.Size > 0) {
96        res.Add(items[heap.PeekMinValue().Index]);// actually max distance
97        dist.Add(heap.PeekMinValue().Value);
98        heap.RemoveMin();
99      }
100      res.Reverse();
101      dist.Reverse();
102      results = res;
103      distances = dist;
104    }
105
106    private void Search(Node node, T target, int k, PriorityQueue<double, IndexedItem<double>> heap, ref double tau) {
107      if (node == null) return;
108      var dist = distance.Get(items[node.index], target);
109      if (dist < tau) {
110        if (heap.Size == k) heap.RemoveMin();   // remove furthest node from result list (if we already have k results)
111        heap.Insert(-dist, new IndexedItem<double>(node.index, dist));
112        if (heap.Size == k) tau = heap.PeekMinValue().Value;
113      }
114      if (node.left == null && node.right == null) return;
115
116      if (dist < node.threshold) {
117        if (dist - tau <= node.threshold) Search(node.left, target, k, heap, ref tau);   // if there can still be neighbors inside the ball, recursively search left child first
118        if (dist + tau >= node.threshold) Search(node.right, target, k, heap, ref tau);  // if there can still be neighbors outside the ball, recursively search right child
119      } else {
120        if (dist + tau >= node.threshold) Search(node.right, target, k, heap, ref tau);  // if there can still be neighbors outside the ball, recursively search right child first
121        if (dist - tau <= node.threshold) Search(node.left, target, k, heap, ref tau);   // if there can still be neighbors inside the ball, recursively search left child
122      }
123    }
124
125    private Node BuildFromPoints(int lower, int upper) {
126      if (upper == lower)      // indicates that we're done here!
127        return null;
128
129      // Lower index is center of current node
130      var node = new Node { index = lower };
131      var r = new MersenneTwister(); //outer behaviour does not change with the random seed => no need to take the IRandom from the algorithm
132      if (upper - lower <= 1) return node;
133
134      // if we did not arrive at leaf yet
135      // Choose an arbitrary point and move it to the start
136      var i = (int)(r.NextDouble() * (upper - lower - 1)) + lower;
137      items.Swap(lower, i);
138
139      // Partition around the median distance
140      var median = (upper + lower) / 2;
141      items.NthElement(lower + 1, upper - 1, median, distance.GetDistanceComparer(items[lower]));
142
143      // Threshold of the new node will be the distance to the median
144      node.threshold = distance.Get(items[lower], items[median]);
145
146      // Recursively build tree
147      node.index = lower;
148      node.left = BuildFromPoints(lower + 1, median);
149      node.right = BuildFromPoints(median, upper);
150
151      // Return result
152      return node;
153    }
154
155    private sealed class Node {
156      public int index;
157      public double threshold;
158      public Node left;
159      public Node right;
160
161      internal Node() {
162        index = 0;
163        threshold = 0;
164        left = null;
165        right = null;
166      }
167    }
168  }
169}
Note: See TracBrowser for help on using the repository browser.