Changeset 14855


Ignore:
Timestamp:
04/12/17 12:15:37 (2 weeks ago)
Author:
gkronber
Message:

#2700: made some changes / bug-fixes while reviewing

Location:
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/SpacePartitioningTree.cs

    r14806 r14855  
    7676    private Cell boundary;
    7777
     78    private double[,] data;
     79
    7880    // Indices in this space-partitioning tree node, corresponding center-of-mass, and list of all children
    79     private double[,] data;
    80 
    8181    private double[] centerOfMass;
    8282    private readonly int[] index = new int[QT_NODE_CAPACITY];
     
    9191      var meanY = new double[d];
    9292      var minY = new double[d];
    93       for(var i = 0; i < d; i++) minY[i] = double.MaxValue;
     93      for (var i = 0; i < d; i++) minY[i] = double.MaxValue;
    9494      var maxY = new double[d];
    95       for(var i = 0; i < d; i++) maxY[i] = double.MinValue;
    96       for(uint i = 0; i < n; i++) {
    97         for(uint j = 0; j < d; j++) {
    98           meanY[j] = inpData[i, j];
    99           if(inpData[i, j] < minY[j]) minY[j] = inpData[i, j];
    100           if(inpData[i, j] > maxY[j]) maxY[j] = inpData[i, j];
    101         }
    102       }
    103       for(var i = 0; i < d; i++) meanY[i] /= n;
     95      for (var i = 0; i < d; i++) maxY[i] = double.MinValue;
     96      for (uint i = 0; i < n; i++) {
     97        for (uint j = 0; j < d; j++) {
     98          meanY[j] += inpData[i, j];
     99          if (inpData[i, j] < minY[j]) minY[j] = inpData[i, j];
     100          if (inpData[i, j] > maxY[j]) maxY[j] = inpData[i, j];
     101        }
     102      }
     103      for (var i = 0; i < d; i++) meanY[i] /= n;
    104104      var width = new double[d];
    105       for(var i = 0; i < d; i++) width[i] = Math.Max(maxY[i] - meanY[i], meanY[i] - maxY[i]) + 1e-5; // TODO constant?
     105      for (var i = 0; i < d; i++) width[i] = Math.Max(maxY[i] - meanY[i], meanY[i] - minY[i]) + 1e-5;
    106106      Init(null, inpData, meanY, width);
    107107      Fill(n);
     
    123123      var point = new double[dimension];
    124124      Buffer.BlockCopy(data, sizeof(double) * dimension * newIndex, point, 0, sizeof(double) * dimension);
    125       if(!boundary.ContainsPoint(point)) return false;
     125      if (!boundary.ContainsPoint(point)) return false;
    126126      cumulativeSize++;
    127127      // Online update of cumulative size and center-of-mass
    128128      var mult1 = (double)(cumulativeSize - 1) / cumulativeSize;
    129129      var mult2 = 1.0 / cumulativeSize;
    130       for(var i = 0; i < dimension; i++) centerOfMass[i] *= mult1;
    131       for(var i = 0; i < dimension; i++) centerOfMass[i] += mult2 * point[i];
     130      for (var i = 0; i < dimension; i++) centerOfMass[i] *= mult1;
     131      for (var i = 0; i < dimension; i++) centerOfMass[i] += mult2 * point[i];
    132132
    133133      // If there is space in this quad tree and it is a leaf, add the object here
    134       if(isLeaf && size < QT_NODE_CAPACITY) {
     134      if (isLeaf && size < QT_NODE_CAPACITY) {
    135135        index[size] = newIndex;
    136136        size++;
     
    140140      // Don't add duplicates for now (this is not very nice)
    141141      var anyDuplicate = false;
    142       for(uint n = 0; n < size; n++) {
     142      for (uint n = 0; n < size; n++) {
    143143        var duplicate = true;
    144         for(var d = 0; d < dimension; d++) {
    145           if(Math.Abs(point[d] - data[index[n], d]) < double.Epsilon) continue;
     144        for (var d = 0; d < dimension; d++) {
     145          if (Math.Abs(point[d] - data[index[n], d]) < double.Epsilon) continue;
    146146          duplicate = false; break;
    147147        }
    148148        anyDuplicate = anyDuplicate | duplicate;
    149149      }
    150       if(anyDuplicate) return true;
     150      if (anyDuplicate) return true;
    151151
    152152      // Otherwise, we need to subdivide the current cell
    153       if(isLeaf) Subdivide();
     153      if (isLeaf) Subdivide();
    154154      // Find out where the point can be inserted
    155       for(var i = 0; i < noChildren; i++) {
    156         if(children[i].Insert(newIndex)) return true;
     155      for (var i = 0; i < noChildren; i++) {
     156        if (children[i].Insert(newIndex)) return true;
    157157      }
    158158
     
    165165      var newCorner = new double[dimension];
    166166      var newWidth = new double[dimension];
    167       for(var i = 0; i < noChildren; i++) {
     167      for (var i = 0; i < noChildren; i++) {
    168168        var div = 1;
    169         for(var d = 0; d < dimension; d++) {
     169        for (var d = 0; d < dimension; d++) {
    170170          newWidth[d] = .5 * boundary.GetWidth(d);
    171           if((i / div) % 2 == 1) newCorner[d] = boundary.GetCorner(d) - .5 * boundary.GetWidth(d);
     171          if ((i / div) % 2 == 1) newCorner[d] = boundary.GetCorner(d) - .5 * boundary.GetWidth(d);
    172172          else newCorner[d] = boundary.GetCorner(d) + .5 * boundary.GetWidth(d);
    173173          div *= 2;
     
    177177
    178178      // Move existing points to correct children
    179       for(var i = 0; i < size; i++) {
     179      for (var i = 0; i < size; i++) {
    180180        var success = false;
    181         for(var j = 0; j < noChildren; j++) {
    182           if(!success) success = children[j].Insert(index[i]);
    183         }
    184         index[i] = int.MaxValue;
     181        for (var j = 0; j < noChildren; j++) {
     182          if (!success) success = children[j].Insert(index[i]);
     183        }
     184        index[i] = -1; // as in tSNE implementation by van der Maaten
    185185      }
    186186
     
    192192    public bool IsCorrect() {
    193193      var row = new double[dimension];
    194       for(var n = 0; n < size; n++) Buffer.BlockCopy(data, sizeof(double) * dimension * n, row, 0, sizeof(double) * dimension);
    195       if(!boundary.ContainsPoint(row)) return false;
    196       if(isLeaf) return true;
     194      for (var n = 0; n < size; n++) {
     195        Buffer.BlockCopy(data, sizeof(double) * dimension * index[n], row, 0, sizeof(double) * dimension);
     196        if (!boundary.ContainsPoint(row)) return false;
     197      }
     198      if (isLeaf) return true;
    197199      var correct = true;
    198       for(var i = 0; i < noChildren; i++) correct = correct && children[i].IsCorrect();
     200      for (var i = 0; i < noChildren; i++) correct = correct && children[i].IsCorrect();
    199201      return correct;
    200202    }
     
    206208    public int GetAllIndices(int[] indices, int loc) {
    207209      // Gather indices in current quadrant
    208       for(var i = 0; i < size; i++) indices[loc + i] = index[i];
     210      for (var i = 0; i < size; i++) indices[loc + i] = index[i];
    209211      loc += (int)size;
    210212      // Gather indices in children
    211       if(isLeaf) return loc;
    212       for(var i = 0; i < noChildren; i++) loc = children[i].GetAllIndices(indices, loc);
     213      if (isLeaf) return loc;
     214      for (var i = 0; i < noChildren; i++) loc = children[i].GetAllIndices(indices, loc);
    213215      return loc;
    214216    }
     
    220222    public void ComputeNonEdgeForces(int pointIndex, double theta, double[] negF, ref double sumQ) {
    221223      // Make sure that we spend no time on empty nodes or self-interactions
    222       if(cumulativeSize == 0 || (isLeaf && size == 1 && index[0] == pointIndex)) return;
     224      if (cumulativeSize == 0 || (isLeaf && size == 1 && index[0] == pointIndex)) return;
    223225
    224226      // Compute distance between point and center-of-mass
    225       // TODO: squared distance with normalized axes is used here!
    226227      var D = .0;
    227       for(var d = 0; d < dimension; d++) buff[d] = data[pointIndex, d] - centerOfMass[d];
    228       for(var d = 0; d < dimension; d++) D += buff[d] * buff[d];
     228      for (var d = 0; d < dimension; d++) buff[d] = data[pointIndex, d] - centerOfMass[d];
     229      for (var d = 0; d < dimension; d++) D += buff[d] * buff[d];
    229230
    230231      // Check whether we can use this node as a "summary"
    231232      var maxWidth = 0.0;
    232       for(var d = 0; d < dimension; d++) {
     233      for (var d = 0; d < dimension; d++) {
    233234        var curWidth = boundary.GetWidth(d);
    234235        maxWidth = (maxWidth > curWidth) ? maxWidth : curWidth;
    235236      }
    236       if(isLeaf || maxWidth / Math.Sqrt(D) < theta) {
     237      if (isLeaf || maxWidth / Math.Sqrt(D) < theta) {
    237238
    238239        // Compute and add t-SNE force between point and current node
     
    241242        sumQ += mult;
    242243        mult *= D;
    243         for(var d = 0; d < dimension; d++) negF[d] += mult * buff[d];
     244        for (var d = 0; d < dimension; d++) negF[d] += mult * buff[d];
    244245      } else {
    245246
    246247        // Recursively apply Barnes-Hut to children
    247         for(var i = 0; i < noChildren; i++) children[i].ComputeNonEdgeForces(pointIndex, theta, negF, ref sumQ);
     248        for (var i = 0; i < noChildren; i++) children[i].ComputeNonEdgeForces(pointIndex, theta, negF, ref sumQ);
    248249      }
    249250    }
     
    251252    public void ComputeEdgeForces(int[] rowP, int[] colP, double[] valP, int n, double[,] posF) {
    252253      // Loop over all edges in the graph
    253       for(var k = 0; k < n; k++) {
    254         for(var i = rowP[k]; i < rowP[k + 1]; i++) {
     254      for (var k = 0; k < n; k++) {
     255        for (var i = rowP[k]; i < rowP[k + 1]; i++) {
    255256
    256257          // Compute pairwise distance and Q-value
    257258          // uses squared distance
    258259          var d = 1.0;
    259           for(var j = 0; j < dimension; j++) buff[j] = data[k, j] - data[colP[i], j];
    260           for(var j = 0; j < dimension; j++) d += buff[j] * buff[j];
     260          for (var j = 0; j < dimension; j++) buff[j] = data[k, j] - data[colP[i], j];
     261          for (var j = 0; j < dimension; j++) d += buff[j] * buff[j];
    261262          d = valP[i] / d;
    262263
    263264          // Sum positive force
    264           for(var j = 0; j < dimension; j++) posF[k, j] += d * buff[j];
     265          for (var j = 0; j < dimension; j++) posF[k, j] += d * buff[j];
    265266        }
    266267      }
     
    269270    #region Helpers
    270271    private void Fill(int n) {
    271       for(var i = 0; i < n; i++) Insert(i);
     272      for (var i = 0; i < n; i++) Insert(i);
    272273    }
    273274    private void Init(SpacePartitioningTree p, double[,] inpData, IEnumerable<double> inpCorner, IEnumerable<double> inpWidth) {
     
    275276      dimension = inpData.GetLength(1);
    276277      noChildren = 2;
    277       for(uint i = 1; i < dimension; i++) noChildren *= 2;
     278      for (uint i = 1; i < dimension; i++) noChildren *= 2;
    278279      data = inpData;
    279280      isLeaf = true;
     
    316317      }
    317318      public bool ContainsPoint(double[] point) {
    318         for(var d = 0; d < dimension; d++)
    319           if(corner[d] - width[d] > point[d] || corner[d] + width[d] < point[d]) return false;
     319        for (var d = 0; d < dimension; d++)
     320          if (corner[d] - width[d] > point[d] || corner[d] + width[d] < point[d]) return false;
    320321        return true;
    321322      }
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAlgorithm.cs

    r14837 r14855  
    285285        } else if((problemData.Dataset as Dataset).VariableHasType<double>(Classes)) {
    286286          var classValues = problemData.Dataset.GetDoubleValues(Classes).ToArray();
    287           var max = classValues.Max() + 0.1;     // TODO consts
     287          var max = classValues.Max() + 0.1;   
    288288          var min = classValues.Min() - 0.1;
    289289          const int contours = 8;
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEStatic.cs

    r14837 r14855  
    285285          var minBeta = double.MinValue;
    286286          var maxBeta = double.MaxValue;
    287           const double tol = 1e-5;  // TODO: why 1e-5?
     287          const double tol = 1e-5; 
    288288
    289289          // Iterate until we found a good perplexity
    290290          var iter = 0; double sumP = 0;
    291           while(!found && iter < 200) {  // TODO 200 iterations always ok?
     291          while(!found && iter < 200) { 
    292292
    293293            // Compute Gaussian kernel row
    294             for(var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]); // TODO distances m+1?
     294            for(var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]);
    295295
    296296            // Compute entropy of current row
     
    298298            for(var m = 0; m < k; m++) sumP += curP[m];
    299299            var h = .0;
    300             for(var m = 0; m < k; m++) h += beta * (distances[m + 1] * curP[m]); // TODO: distances m+1?
     300            for(var m = 0; m < k; m++) h += beta * (distances[m + 1] * curP[m]);
    301301            h = h / sumP + Math.Log(sumP);
    302302
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/VantagePointTree.cs

    r14787 r14855  
    8484    public void Search(T target, int k, out IList<T> results, out IList<double> distances) {
    8585      var heap = new PriorityQueue<double, IndexedItem<double>>(double.MaxValue, double.MinValue, k);
    86       Search(root, target, k, heap, double.MaxValue);
     86      double tau = double.MaxValue;
     87      Search(root, target, k, heap, ref tau);
    8788      var res = new List<T>();
    8889      var dist = new List<double>();
    8990      while (heap.Size > 0) {
    90         res.Add(items[heap.PeekMinValue().Index]);
     91        res.Add(items[heap.PeekMinValue().Index]);          // actually max distance
    9192        dist.Add(heap.PeekMinValue().Value);
    9293        heap.RemoveMin();
    9394      }
    94       res.Reverse();
     95      res.Reverse(); 
    9596      dist.Reverse();
    9697      results = res;
     
    9899    }
    99100
    100     private void Search(Node node, T target, int k, PriorityQueue<double, IndexedItem<double>> heap, double tau = double.MaxValue) {
     101    private void Search(Node node, T target, int k, PriorityQueue<double, IndexedItem<double>> heap, ref double tau) {
    101102      if (node == null) return;
    102103      var dist = distance.Get(items[node.index], target);
    103104      if (dist < tau) {
    104         if (heap.Size == k) heap.RemoveMin();
    105         heap.Insert(-dist, new IndexedItem<double>(node.index, dist));//TODO check if minheap or maxheap should be used here
     105        if (heap.Size == k) heap.RemoveMin();   // remove furthest node from result list (if we already have k results)
     106        heap.Insert(-dist, new IndexedItem<double>(node.index, dist));     
    106107        if (heap.Size == k) tau = heap.PeekMinValue().Value;
    107108      }
     
    109110
    110111      if (dist < node.threshold) {
    111         if (dist - tau <= node.threshold) Search(node.left, target, k, heap, tau);   // if there can still be neighbors inside the ball, recursively search left child first
    112         if (dist + tau >= node.threshold) Search(node.right, target, k, heap, tau);  // if there can still be neighbors outside the ball, recursively search right child
     112        if (dist - tau <= node.threshold) Search(node.left, target, k, heap, ref tau);   // if there can still be neighbors inside the ball, recursively search left child first
     113        if (dist + tau >= node.threshold) Search(node.right, target, k, heap, ref tau);  // if there can still be neighbors outside the ball, recursively search right child
    113114      } else {
    114         if (dist + tau >= node.threshold) Search(node.right, target, k, heap, tau);  // if there can still be neighbors outside the ball, recursively search right child first
    115         if (dist - tau <= node.threshold) Search(node.left, target, k, heap, tau);   // if there can still be neighbors inside the ball, recursively search left child
     115        if (dist + tau >= node.threshold) Search(node.right, target, k, heap, ref tau);  // if there can still be neighbors outside the ball, recursively search right child first
     116        if (dist - tau <= node.threshold) Search(node.left, target, k, heap, ref tau);   // if there can still be neighbors inside the ball, recursively search left child
    116117      }
    117118    }
     
    124125      var node = new Node { index = lower };
    125126      var r = new MersenneTwister(); //outer behaviour does not change with the random seed => no need to take the IRandom from the algorithm
    126       if (upper - lower <= 1) return node; // if we did not arrive at leaf yet
     127      if (upper - lower <= 1) return node;
    127128
     129      // if we did not arrive at leaf yet
    128130      // Choose an arbitrary point and move it to the start
    129       var i = (int)(r.NextDouble() / 1 * (upper - lower - 1)) + lower;
     131      var i = (int)(r.NextDouble() * (upper - lower - 1)) + lower;
    130132      items.Swap(lower, i);
    131133
Note: See TracChangeset for help on using the changeset viewer.