1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20 


21  //Code is based on an implementation from Laurens van der Maaten


22 


23  /*


24  *


25  * Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)


26  * All rights reserved.


27  *


28  * Redistribution and use in source and binary forms, with or without


29  * modification, are permitted provided that the following conditions are met:


30  * 1. Redistributions of source code must retain the above copyright


31  * notice, this list of conditions and the following disclaimer.


32  * 2. Redistributions in binary form must reproduce the above copyright


33  * notice, this list of conditions and the following disclaimer in the


34  * documentation and/or other materials provided with the distribution.


35  * 3. All advertising materials mentioning features or use of this software


36  * must display the following acknowledgement:


37  * This product includes software developed by the Delft University of Technology.


38  * 4. Neither the name of the Delft University of Technology nor the names of


39  * its contributors may be used to endorse or promote products derived from


40  * this software without specific prior written permission.


41  *


42  * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS


43  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES


44  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO


45  * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,


46  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,


47  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR


48  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN


49  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING


50  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY


51  * OF SUCH DAMAGE.


52  *


53  */


54  #endregion


55 


56  using System.Collections.Generic;


57  using System.Linq;


58  using HeuristicLab.Collections;


59  using HeuristicLab.Random;


60 


61  namespace HeuristicLab.Algorithms.DataAnalysis {


62  /// <summary>


63  /// Vantage point tree (or VP tree) is a metric tree that segregates data in a metric space by choosing


64  /// a position in the space (the "vantage point") and partitioning the data points into two parts:


65  /// those points that are nearer to the vantage point than a threshold, and those points that are not.


66  /// By recursively applying this procedure to partition the data into smaller and smaller sets, a tree


67  /// data structure is created where neighbors in the tree are likely to be neighbors in the space.


68  /// </summary>


69  /// <typeparam name="T"></typeparam>


70  public class VantagePointTree<T> : IVantagePointTree<T> {


71  #region properties


72  private readonly List<T> items;


73  private readonly Node root;


74  private readonly IDistance<T> distance;


75  #endregion


76 


77  public VantagePointTree(IDistance<T> distance, IEnumerable<T> items) : base() {


78  root = null;


79  this.distance = distance;


80  this.items = items.Select(x => x).ToList();


81  root = BuildFromPoints(0, this.items.Count);


82  }


83 


84  public void Search(T target, int k, out IList<T> results, out IList<double> distances) {


85  var heap = new PriorityQueue<double, IndexedItem<double>>(double.MaxValue, double.MinValue, k);


86  double tau = double.MaxValue;


87  Search(root, target, k, heap, ref tau);


88  var res = new List<T>();


89  var dist = new List<double>();


90  while (heap.Size > 0) {


91  res.Add(items[heap.PeekMinValue().Index]); // actually max distance


92  dist.Add(heap.PeekMinValue().Value);


93  heap.RemoveMin();


94  }


95  res.Reverse();


96  dist.Reverse();


97  results = res;


98  distances = dist;


99  }


100 


101  private void Search(Node node, T target, int k, PriorityQueue<double, IndexedItem<double>> heap, ref double tau) {


102  if (node == null) return;


103  var dist = distance.Get(items[node.index], target);


104  if (dist < tau) {


105  if (heap.Size == k) heap.RemoveMin(); // remove furthest node from result list (if we already have k results)


106  heap.Insert(dist, new IndexedItem<double>(node.index, dist));


107  if (heap.Size == k) tau = heap.PeekMinValue().Value;


108  }


109  if (node.left == null && node.right == null) return;


110 


111  if (dist < node.threshold) {


112  if (dist  tau <= node.threshold) Search(node.left, target, k, heap, ref tau); // if there can still be neighbors inside the ball, recursively search left child first


113  if (dist + tau >= node.threshold) Search(node.right, target, k, heap, ref tau); // if there can still be neighbors outside the ball, recursively search right child


114  } else {


115  if (dist + tau >= node.threshold) Search(node.right, target, k, heap, ref tau); // if there can still be neighbors outside the ball, recursively search right child first


116  if (dist  tau <= node.threshold) Search(node.left, target, k, heap, ref tau); // if there can still be neighbors inside the ball, recursively search left child


117  }


118  }


119 


120  private Node BuildFromPoints(int lower, int upper) {


121  if (upper == lower) // indicates that we're done here!


122  return null;


123 


124  // Lower index is center of current node


125  var node = new Node { index = lower };


126  var r = new MersenneTwister(); //outer behaviour does not change with the random seed => no need to take the IRandom from the algorithm


127  if (upper  lower <= 1) return node;


128 


129  // if we did not arrive at leaf yet


130  // Choose an arbitrary point and move it to the start


131  var i = (int)(r.NextDouble() * (upper  lower  1)) + lower;


132  items.Swap(lower, i);


133 


134  // Partition around the median distance


135  var median = (upper + lower) / 2;


136  items.NthElement(lower + 1, upper  1, median, distance.GetDistanceComparer(items[lower]));


137 


138  // Threshold of the new node will be the distance to the median


139  node.threshold = distance.Get(items[lower], items[median]);


140 


141  // Recursively build tree


142  node.index = lower;


143  node.left = BuildFromPoints(lower + 1, median);


144  node.right = BuildFromPoints(median, upper);


145 


146  // Return result


147  return node;


148  }


149 


150  private sealed class Node {


151  public int index;


152  public double threshold;


153  public Node left;


154  public Node right;


155 


156  internal Node() {


157  index = 0;


158  threshold = 0;


159  left = null;


160  right = null;


161  }


162  }


163  }


164  }

