- Timestamp:
- 04/12/17 12:15:37 (8 years ago)
- Location:
- branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/SpacePartitioningTree.cs
r14806 r14855 76 76 private Cell boundary; 77 77 78 private double[,] data; 79 78 80 // Indices in this space-partitioning tree node, corresponding center-of-mass, and list of all children 79 private double[,] data;80 81 81 private double[] centerOfMass; 82 82 private readonly int[] index = new int[QT_NODE_CAPACITY]; … … 91 91 var meanY = new double[d]; 92 92 var minY = new double[d]; 93 for (var i = 0; i < d; i++) minY[i] = double.MaxValue;93 for (var i = 0; i < d; i++) minY[i] = double.MaxValue; 94 94 var maxY = new double[d]; 95 for (var i = 0; i < d; i++) maxY[i] = double.MinValue;96 for (uint i = 0; i < n; i++) {97 for (uint j = 0; j < d; j++) {98 meanY[j] = inpData[i, j];99 if (inpData[i, j] < minY[j]) minY[j] = inpData[i, j];100 if (inpData[i, j] > maxY[j]) maxY[j] = inpData[i, j];101 } 102 } 103 for (var i = 0; i < d; i++) meanY[i] /= n;95 for (var i = 0; i < d; i++) maxY[i] = double.MinValue; 96 for (uint i = 0; i < n; i++) { 97 for (uint j = 0; j < d; j++) { 98 meanY[j] += inpData[i, j]; 99 if (inpData[i, j] < minY[j]) minY[j] = inpData[i, j]; 100 if (inpData[i, j] > maxY[j]) maxY[j] = inpData[i, j]; 101 } 102 } 103 for (var i = 0; i < d; i++) meanY[i] /= n; 104 104 var width = new double[d]; 105 for (var i = 0; i < d; i++) width[i] = Math.Max(maxY[i] - meanY[i], meanY[i] - maxY[i]) + 1e-5; // TODO constant?105 for (var i = 0; i < d; i++) width[i] = Math.Max(maxY[i] - meanY[i], meanY[i] - minY[i]) + 1e-5; 106 106 Init(null, inpData, meanY, width); 107 107 Fill(n); … … 123 123 var point = new double[dimension]; 124 124 Buffer.BlockCopy(data, sizeof(double) * dimension * newIndex, point, 0, sizeof(double) * dimension); 125 if (!boundary.ContainsPoint(point)) return false;125 if (!boundary.ContainsPoint(point)) return false; 126 126 cumulativeSize++; 127 127 // Online update of cumulative size and center-of-mass 128 128 var mult1 = (double)(cumulativeSize - 1) / cumulativeSize; 129 129 var mult2 = 1.0 / cumulativeSize; 130 for (var i = 0; i < dimension; i++) centerOfMass[i] *= mult1;131 for (var i = 0; i < dimension; i++) centerOfMass[i] += mult2 * point[i];130 for (var i = 0; i < dimension; i++) centerOfMass[i] *= mult1; 131 for (var i = 0; i < dimension; i++) centerOfMass[i] += mult2 * point[i]; 132 132 133 133 // If there is space in this quad tree and it is a leaf, add the object here 134 if (isLeaf && size < QT_NODE_CAPACITY) {134 if (isLeaf && size < QT_NODE_CAPACITY) { 135 135 index[size] = newIndex; 136 136 size++; … … 140 140 // Don't add duplicates for now (this is not very nice) 141 141 var anyDuplicate = false; 142 for (uint n = 0; n < size; n++) {142 for (uint n = 0; n < size; n++) { 143 143 var duplicate = true; 144 for (var d = 0; d < dimension; d++) {145 if (Math.Abs(point[d] - data[index[n], d]) < double.Epsilon) continue;144 for (var d = 0; d < dimension; d++) { 145 if (Math.Abs(point[d] - data[index[n], d]) < double.Epsilon) continue; 146 146 duplicate = false; break; 147 147 } 148 148 anyDuplicate = anyDuplicate | duplicate; 149 149 } 150 if (anyDuplicate) return true;150 if (anyDuplicate) return true; 151 151 152 152 // Otherwise, we need to subdivide the current cell 153 if (isLeaf) Subdivide();153 if (isLeaf) Subdivide(); 154 154 // Find out where the point can be inserted 155 for (var i = 0; i < noChildren; i++) {156 if (children[i].Insert(newIndex)) return true;155 for (var i = 0; i < noChildren; i++) { 156 if (children[i].Insert(newIndex)) return true; 157 157 } 158 158 … … 165 165 var newCorner = new double[dimension]; 166 166 var newWidth = new double[dimension]; 167 for (var i = 0; i < noChildren; i++) {167 for (var i = 0; i < noChildren; i++) { 168 168 var div = 1; 169 for (var d = 0; d < dimension; d++) {169 for (var d = 0; d < dimension; d++) { 170 170 newWidth[d] = .5 * boundary.GetWidth(d); 171 if ((i / div) % 2 == 1) newCorner[d] = boundary.GetCorner(d) - .5 * boundary.GetWidth(d);171 if ((i / div) % 2 == 1) newCorner[d] = boundary.GetCorner(d) - .5 * boundary.GetWidth(d); 172 172 else newCorner[d] = boundary.GetCorner(d) + .5 * boundary.GetWidth(d); 173 173 div *= 2; … … 177 177 178 178 // Move existing points to correct children 179 for (var i = 0; i < size; i++) {179 for (var i = 0; i < size; i++) { 180 180 var success = false; 181 for (var j = 0; j < noChildren; j++) {182 if (!success) success = children[j].Insert(index[i]);183 } 184 index[i] = int.MaxValue;181 for (var j = 0; j < noChildren; j++) { 182 if (!success) success = children[j].Insert(index[i]); 183 } 184 index[i] = -1; // as in tSNE implementation by van der Maaten 185 185 } 186 186 … … 192 192 public bool IsCorrect() { 193 193 var row = new double[dimension]; 194 for(var n = 0; n < size; n++) Buffer.BlockCopy(data, sizeof(double) * dimension * n, row, 0, sizeof(double) * dimension); 195 if(!boundary.ContainsPoint(row)) return false; 196 if(isLeaf) return true; 194 for (var n = 0; n < size; n++) { 195 Buffer.BlockCopy(data, sizeof(double) * dimension * index[n], row, 0, sizeof(double) * dimension); 196 if (!boundary.ContainsPoint(row)) return false; 197 } 198 if (isLeaf) return true; 197 199 var correct = true; 198 for (var i = 0; i < noChildren; i++) correct = correct && children[i].IsCorrect();200 for (var i = 0; i < noChildren; i++) correct = correct && children[i].IsCorrect(); 199 201 return correct; 200 202 } … … 206 208 public int GetAllIndices(int[] indices, int loc) { 207 209 // Gather indices in current quadrant 208 for (var i = 0; i < size; i++) indices[loc + i] = index[i];210 for (var i = 0; i < size; i++) indices[loc + i] = index[i]; 209 211 loc += (int)size; 210 212 // Gather indices in children 211 if (isLeaf) return loc;212 for (var i = 0; i < noChildren; i++) loc = children[i].GetAllIndices(indices, loc);213 if (isLeaf) return loc; 214 for (var i = 0; i < noChildren; i++) loc = children[i].GetAllIndices(indices, loc); 213 215 return loc; 214 216 } … … 220 222 public void ComputeNonEdgeForces(int pointIndex, double theta, double[] negF, ref double sumQ) { 221 223 // Make sure that we spend no time on empty nodes or self-interactions 222 if (cumulativeSize == 0 || (isLeaf && size == 1 && index[0] == pointIndex)) return;224 if (cumulativeSize == 0 || (isLeaf && size == 1 && index[0] == pointIndex)) return; 223 225 224 226 // Compute distance between point and center-of-mass 225 // TODO: squared distance with normalized axes is used here!226 227 var D = .0; 227 for (var d = 0; d < dimension; d++) buff[d] = data[pointIndex, d] - centerOfMass[d];228 for (var d = 0; d < dimension; d++) D += buff[d] * buff[d];228 for (var d = 0; d < dimension; d++) buff[d] = data[pointIndex, d] - centerOfMass[d]; 229 for (var d = 0; d < dimension; d++) D += buff[d] * buff[d]; 229 230 230 231 // Check whether we can use this node as a "summary" 231 232 var maxWidth = 0.0; 232 for (var d = 0; d < dimension; d++) {233 for (var d = 0; d < dimension; d++) { 233 234 var curWidth = boundary.GetWidth(d); 234 235 maxWidth = (maxWidth > curWidth) ? maxWidth : curWidth; 235 236 } 236 if (isLeaf || maxWidth / Math.Sqrt(D) < theta) {237 if (isLeaf || maxWidth / Math.Sqrt(D) < theta) { 237 238 238 239 // Compute and add t-SNE force between point and current node … … 241 242 sumQ += mult; 242 243 mult *= D; 243 for (var d = 0; d < dimension; d++) negF[d] += mult * buff[d];244 for (var d = 0; d < dimension; d++) negF[d] += mult * buff[d]; 244 245 } else { 245 246 246 247 // Recursively apply Barnes-Hut to children 247 for (var i = 0; i < noChildren; i++) children[i].ComputeNonEdgeForces(pointIndex, theta, negF, ref sumQ);248 for (var i = 0; i < noChildren; i++) children[i].ComputeNonEdgeForces(pointIndex, theta, negF, ref sumQ); 248 249 } 249 250 } … … 251 252 public void ComputeEdgeForces(int[] rowP, int[] colP, double[] valP, int n, double[,] posF) { 252 253 // Loop over all edges in the graph 253 for (var k = 0; k < n; k++) {254 for (var i = rowP[k]; i < rowP[k + 1]; i++) {254 for (var k = 0; k < n; k++) { 255 for (var i = rowP[k]; i < rowP[k + 1]; i++) { 255 256 256 257 // Compute pairwise distance and Q-value 257 258 // uses squared distance 258 259 var d = 1.0; 259 for (var j = 0; j < dimension; j++) buff[j] = data[k, j] - data[colP[i], j];260 for (var j = 0; j < dimension; j++) d += buff[j] * buff[j];260 for (var j = 0; j < dimension; j++) buff[j] = data[k, j] - data[colP[i], j]; 261 for (var j = 0; j < dimension; j++) d += buff[j] * buff[j]; 261 262 d = valP[i] / d; 262 263 263 264 // Sum positive force 264 for (var j = 0; j < dimension; j++) posF[k, j] += d * buff[j];265 for (var j = 0; j < dimension; j++) posF[k, j] += d * buff[j]; 265 266 } 266 267 } … … 269 270 #region Helpers 270 271 private void Fill(int n) { 271 for (var i = 0; i < n; i++) Insert(i);272 for (var i = 0; i < n; i++) Insert(i); 272 273 } 273 274 private void Init(SpacePartitioningTree p, double[,] inpData, IEnumerable<double> inpCorner, IEnumerable<double> inpWidth) { … … 275 276 dimension = inpData.GetLength(1); 276 277 noChildren = 2; 277 for (uint i = 1; i < dimension; i++) noChildren *= 2;278 for (uint i = 1; i < dimension; i++) noChildren *= 2; 278 279 data = inpData; 279 280 isLeaf = true; … … 316 317 } 317 318 public bool ContainsPoint(double[] point) { 318 for (var d = 0; d < dimension; d++)319 if (corner[d] - width[d] > point[d] || corner[d] + width[d] < point[d]) return false;319 for (var d = 0; d < dimension; d++) 320 if (corner[d] - width[d] > point[d] || corner[d] + width[d] < point[d]) return false; 320 321 return true; 321 322 } -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAlgorithm.cs
r14837 r14855 285 285 } else if((problemData.Dataset as Dataset).VariableHasType<double>(Classes)) { 286 286 var classValues = problemData.Dataset.GetDoubleValues(Classes).ToArray(); 287 var max = classValues.Max() + 0.1; // TODO consts287 var max = classValues.Max() + 0.1; 288 288 var min = classValues.Min() - 0.1; 289 289 const int contours = 8; -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEStatic.cs
r14837 r14855 285 285 var minBeta = double.MinValue; 286 286 var maxBeta = double.MaxValue; 287 const double tol = 1e-5; // TODO: why 1e-5?287 const double tol = 1e-5; 288 288 289 289 // Iterate until we found a good perplexity 290 290 var iter = 0; double sumP = 0; 291 while(!found && iter < 200) { // TODO 200 iterations always ok?291 while(!found && iter < 200) { 292 292 293 293 // Compute Gaussian kernel row 294 for(var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]); // TODO distances m+1?294 for(var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]); 295 295 296 296 // Compute entropy of current row … … 298 298 for(var m = 0; m < k; m++) sumP += curP[m]; 299 299 var h = .0; 300 for(var m = 0; m < k; m++) h += beta * (distances[m + 1] * curP[m]); // TODO: distances m+1?300 for(var m = 0; m < k; m++) h += beta * (distances[m + 1] * curP[m]); 301 301 h = h / sumP + Math.Log(sumP); 302 302 -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/VantagePointTree.cs
r14787 r14855 84 84 public void Search(T target, int k, out IList<T> results, out IList<double> distances) { 85 85 var heap = new PriorityQueue<double, IndexedItem<double>>(double.MaxValue, double.MinValue, k); 86 Search(root, target, k, heap, double.MaxValue); 86 double tau = double.MaxValue; 87 Search(root, target, k, heap, ref tau); 87 88 var res = new List<T>(); 88 89 var dist = new List<double>(); 89 90 while (heap.Size > 0) { 90 res.Add(items[heap.PeekMinValue().Index]); 91 res.Add(items[heap.PeekMinValue().Index]); // actually max distance 91 92 dist.Add(heap.PeekMinValue().Value); 92 93 heap.RemoveMin(); 93 94 } 94 res.Reverse(); 95 res.Reverse(); 95 96 dist.Reverse(); 96 97 results = res; … … 98 99 } 99 100 100 private void Search(Node node, T target, int k, PriorityQueue<double, IndexedItem<double>> heap, double tau = double.MaxValue) {101 private void Search(Node node, T target, int k, PriorityQueue<double, IndexedItem<double>> heap, ref double tau) { 101 102 if (node == null) return; 102 103 var dist = distance.Get(items[node.index], target); 103 104 if (dist < tau) { 104 if (heap.Size == k) heap.RemoveMin(); 105 heap.Insert(-dist, new IndexedItem<double>(node.index, dist)); //TODO check if minheap or maxheap should be used here105 if (heap.Size == k) heap.RemoveMin(); // remove furthest node from result list (if we already have k results) 106 heap.Insert(-dist, new IndexedItem<double>(node.index, dist)); 106 107 if (heap.Size == k) tau = heap.PeekMinValue().Value; 107 108 } … … 109 110 110 111 if (dist < node.threshold) { 111 if (dist - tau <= node.threshold) Search(node.left, target, k, heap, tau); // if there can still be neighbors inside the ball, recursively search left child first112 if (dist + tau >= node.threshold) Search(node.right, target, k, heap, tau); // if there can still be neighbors outside the ball, recursively search right child112 if (dist - tau <= node.threshold) Search(node.left, target, k, heap, ref tau); // if there can still be neighbors inside the ball, recursively search left child first 113 if (dist + tau >= node.threshold) Search(node.right, target, k, heap, ref tau); // if there can still be neighbors outside the ball, recursively search right child 113 114 } else { 114 if (dist + tau >= node.threshold) Search(node.right, target, k, heap, tau); // if there can still be neighbors outside the ball, recursively search right child first115 if (dist - tau <= node.threshold) Search(node.left, target, k, heap, tau); // if there can still be neighbors inside the ball, recursively search left child115 if (dist + tau >= node.threshold) Search(node.right, target, k, heap, ref tau); // if there can still be neighbors outside the ball, recursively search right child first 116 if (dist - tau <= node.threshold) Search(node.left, target, k, heap, ref tau); // if there can still be neighbors inside the ball, recursively search left child 116 117 } 117 118 } … … 124 125 var node = new Node { index = lower }; 125 126 var r = new MersenneTwister(); //outer behaviour does not change with the random seed => no need to take the IRandom from the algorithm 126 if (upper - lower <= 1) return node; // if we did not arrive at leaf yet127 if (upper - lower <= 1) return node; 127 128 129 // if we did not arrive at leaf yet 128 130 // Choose an arbitrary point and move it to the start 129 var i = (int)(r.NextDouble() / 1* (upper - lower - 1)) + lower;131 var i = (int)(r.NextDouble() * (upper - lower - 1)) + lower; 130 132 items.Swap(lower, i); 131 133
Note: See TracChangeset
for help on using the changeset viewer.