Changeset 14855 for branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/SpacePartitioningTree.cs
- Timestamp:
- 04/12/17 12:15:37 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/SpacePartitioningTree.cs
r14806 r14855 76 76 private Cell boundary; 77 77 78 private double[,] data; 79 78 80 // Indices in this space-partitioning tree node, corresponding center-of-mass, and list of all children 79 private double[,] data;80 81 81 private double[] centerOfMass; 82 82 private readonly int[] index = new int[QT_NODE_CAPACITY]; … … 91 91 var meanY = new double[d]; 92 92 var minY = new double[d]; 93 for (var i = 0; i < d; i++) minY[i] = double.MaxValue;93 for (var i = 0; i < d; i++) minY[i] = double.MaxValue; 94 94 var maxY = new double[d]; 95 for (var i = 0; i < d; i++) maxY[i] = double.MinValue;96 for (uint i = 0; i < n; i++) {97 for (uint j = 0; j < d; j++) {98 meanY[j] = inpData[i, j];99 if (inpData[i, j] < minY[j]) minY[j] = inpData[i, j];100 if (inpData[i, j] > maxY[j]) maxY[j] = inpData[i, j];101 } 102 } 103 for (var i = 0; i < d; i++) meanY[i] /= n;95 for (var i = 0; i < d; i++) maxY[i] = double.MinValue; 96 for (uint i = 0; i < n; i++) { 97 for (uint j = 0; j < d; j++) { 98 meanY[j] += inpData[i, j]; 99 if (inpData[i, j] < minY[j]) minY[j] = inpData[i, j]; 100 if (inpData[i, j] > maxY[j]) maxY[j] = inpData[i, j]; 101 } 102 } 103 for (var i = 0; i < d; i++) meanY[i] /= n; 104 104 var width = new double[d]; 105 for (var i = 0; i < d; i++) width[i] = Math.Max(maxY[i] - meanY[i], meanY[i] - maxY[i]) + 1e-5; // TODO constant?105 for (var i = 0; i < d; i++) width[i] = Math.Max(maxY[i] - meanY[i], meanY[i] - minY[i]) + 1e-5; 106 106 Init(null, inpData, meanY, width); 107 107 Fill(n); … … 123 123 var point = new double[dimension]; 124 124 Buffer.BlockCopy(data, sizeof(double) * dimension * newIndex, point, 0, sizeof(double) * dimension); 125 if (!boundary.ContainsPoint(point)) return false;125 if (!boundary.ContainsPoint(point)) return false; 126 126 cumulativeSize++; 127 127 // Online update of cumulative size and center-of-mass 128 128 var mult1 = (double)(cumulativeSize - 1) / cumulativeSize; 129 129 var mult2 = 1.0 / cumulativeSize; 130 for (var i = 0; i < dimension; i++) centerOfMass[i] *= mult1;131 for (var i = 0; i < dimension; i++) centerOfMass[i] += mult2 * point[i];130 for (var i = 0; i < dimension; i++) centerOfMass[i] *= mult1; 131 for (var i = 0; i < dimension; i++) centerOfMass[i] += mult2 * point[i]; 132 132 133 133 // If there is space in this quad tree and it is a leaf, add the object here 134 if (isLeaf && size < QT_NODE_CAPACITY) {134 if (isLeaf && size < QT_NODE_CAPACITY) { 135 135 index[size] = newIndex; 136 136 size++; … … 140 140 // Don't add duplicates for now (this is not very nice) 141 141 var anyDuplicate = false; 142 for (uint n = 0; n < size; n++) {142 for (uint n = 0; n < size; n++) { 143 143 var duplicate = true; 144 for (var d = 0; d < dimension; d++) {145 if (Math.Abs(point[d] - data[index[n], d]) < double.Epsilon) continue;144 for (var d = 0; d < dimension; d++) { 145 if (Math.Abs(point[d] - data[index[n], d]) < double.Epsilon) continue; 146 146 duplicate = false; break; 147 147 } 148 148 anyDuplicate = anyDuplicate | duplicate; 149 149 } 150 if (anyDuplicate) return true;150 if (anyDuplicate) return true; 151 151 152 152 // Otherwise, we need to subdivide the current cell 153 if (isLeaf) Subdivide();153 if (isLeaf) Subdivide(); 154 154 // Find out where the point can be inserted 155 for (var i = 0; i < noChildren; i++) {156 if (children[i].Insert(newIndex)) return true;155 for (var i = 0; i < noChildren; i++) { 156 if (children[i].Insert(newIndex)) return true; 157 157 } 158 158 … … 165 165 var newCorner = new double[dimension]; 166 166 var newWidth = new double[dimension]; 167 for (var i = 0; i < noChildren; i++) {167 for (var i = 0; i < noChildren; i++) { 168 168 var div = 1; 169 for (var d = 0; d < dimension; d++) {169 for (var d = 0; d < dimension; d++) { 170 170 newWidth[d] = .5 * boundary.GetWidth(d); 171 if ((i / div) % 2 == 1) newCorner[d] = boundary.GetCorner(d) - .5 * boundary.GetWidth(d);171 if ((i / div) % 2 == 1) newCorner[d] = boundary.GetCorner(d) - .5 * boundary.GetWidth(d); 172 172 else newCorner[d] = boundary.GetCorner(d) + .5 * boundary.GetWidth(d); 173 173 div *= 2; … … 177 177 178 178 // Move existing points to correct children 179 for (var i = 0; i < size; i++) {179 for (var i = 0; i < size; i++) { 180 180 var success = false; 181 for (var j = 0; j < noChildren; j++) {182 if (!success) success = children[j].Insert(index[i]);183 } 184 index[i] = int.MaxValue;181 for (var j = 0; j < noChildren; j++) { 182 if (!success) success = children[j].Insert(index[i]); 183 } 184 index[i] = -1; // as in tSNE implementation by van der Maaten 185 185 } 186 186 … … 192 192 public bool IsCorrect() { 193 193 var row = new double[dimension]; 194 for(var n = 0; n < size; n++) Buffer.BlockCopy(data, sizeof(double) * dimension * n, row, 0, sizeof(double) * dimension); 195 if(!boundary.ContainsPoint(row)) return false; 196 if(isLeaf) return true; 194 for (var n = 0; n < size; n++) { 195 Buffer.BlockCopy(data, sizeof(double) * dimension * index[n], row, 0, sizeof(double) * dimension); 196 if (!boundary.ContainsPoint(row)) return false; 197 } 198 if (isLeaf) return true; 197 199 var correct = true; 198 for (var i = 0; i < noChildren; i++) correct = correct && children[i].IsCorrect();200 for (var i = 0; i < noChildren; i++) correct = correct && children[i].IsCorrect(); 199 201 return correct; 200 202 } … … 206 208 public int GetAllIndices(int[] indices, int loc) { 207 209 // Gather indices in current quadrant 208 for (var i = 0; i < size; i++) indices[loc + i] = index[i];210 for (var i = 0; i < size; i++) indices[loc + i] = index[i]; 209 211 loc += (int)size; 210 212 // Gather indices in children 211 if (isLeaf) return loc;212 for (var i = 0; i < noChildren; i++) loc = children[i].GetAllIndices(indices, loc);213 if (isLeaf) return loc; 214 for (var i = 0; i < noChildren; i++) loc = children[i].GetAllIndices(indices, loc); 213 215 return loc; 214 216 } … … 220 222 public void ComputeNonEdgeForces(int pointIndex, double theta, double[] negF, ref double sumQ) { 221 223 // Make sure that we spend no time on empty nodes or self-interactions 222 if (cumulativeSize == 0 || (isLeaf && size == 1 && index[0] == pointIndex)) return;224 if (cumulativeSize == 0 || (isLeaf && size == 1 && index[0] == pointIndex)) return; 223 225 224 226 // Compute distance between point and center-of-mass 225 // TODO: squared distance with normalized axes is used here!226 227 var D = .0; 227 for (var d = 0; d < dimension; d++) buff[d] = data[pointIndex, d] - centerOfMass[d];228 for (var d = 0; d < dimension; d++) D += buff[d] * buff[d];228 for (var d = 0; d < dimension; d++) buff[d] = data[pointIndex, d] - centerOfMass[d]; 229 for (var d = 0; d < dimension; d++) D += buff[d] * buff[d]; 229 230 230 231 // Check whether we can use this node as a "summary" 231 232 var maxWidth = 0.0; 232 for (var d = 0; d < dimension; d++) {233 for (var d = 0; d < dimension; d++) { 233 234 var curWidth = boundary.GetWidth(d); 234 235 maxWidth = (maxWidth > curWidth) ? maxWidth : curWidth; 235 236 } 236 if (isLeaf || maxWidth / Math.Sqrt(D) < theta) {237 if (isLeaf || maxWidth / Math.Sqrt(D) < theta) { 237 238 238 239 // Compute and add t-SNE force between point and current node … … 241 242 sumQ += mult; 242 243 mult *= D; 243 for (var d = 0; d < dimension; d++) negF[d] += mult * buff[d];244 for (var d = 0; d < dimension; d++) negF[d] += mult * buff[d]; 244 245 } else { 245 246 246 247 // Recursively apply Barnes-Hut to children 247 for (var i = 0; i < noChildren; i++) children[i].ComputeNonEdgeForces(pointIndex, theta, negF, ref sumQ);248 for (var i = 0; i < noChildren; i++) children[i].ComputeNonEdgeForces(pointIndex, theta, negF, ref sumQ); 248 249 } 249 250 } … … 251 252 public void ComputeEdgeForces(int[] rowP, int[] colP, double[] valP, int n, double[,] posF) { 252 253 // Loop over all edges in the graph 253 for (var k = 0; k < n; k++) {254 for (var i = rowP[k]; i < rowP[k + 1]; i++) {254 for (var k = 0; k < n; k++) { 255 for (var i = rowP[k]; i < rowP[k + 1]; i++) { 255 256 256 257 // Compute pairwise distance and Q-value 257 258 // uses squared distance 258 259 var d = 1.0; 259 for (var j = 0; j < dimension; j++) buff[j] = data[k, j] - data[colP[i], j];260 for (var j = 0; j < dimension; j++) d += buff[j] * buff[j];260 for (var j = 0; j < dimension; j++) buff[j] = data[k, j] - data[colP[i], j]; 261 for (var j = 0; j < dimension; j++) d += buff[j] * buff[j]; 261 262 d = valP[i] / d; 262 263 263 264 // Sum positive force 264 for (var j = 0; j < dimension; j++) posF[k, j] += d * buff[j];265 for (var j = 0; j < dimension; j++) posF[k, j] += d * buff[j]; 265 266 } 266 267 } … … 269 270 #region Helpers 270 271 private void Fill(int n) { 271 for (var i = 0; i < n; i++) Insert(i);272 for (var i = 0; i < n; i++) Insert(i); 272 273 } 273 274 private void Init(SpacePartitioningTree p, double[,] inpData, IEnumerable<double> inpCorner, IEnumerable<double> inpWidth) { … … 275 276 dimension = inpData.GetLength(1); 276 277 noChildren = 2; 277 for (uint i = 1; i < dimension; i++) noChildren *= 2;278 for (uint i = 1; i < dimension; i++) noChildren *= 2; 278 279 data = inpData; 279 280 isLeaf = true; … … 316 317 } 317 318 public bool ContainsPoint(double[] point) { 318 for (var d = 0; d < dimension; d++)319 if (corner[d] - width[d] > point[d] || corner[d] + width[d] < point[d]) return false;319 for (var d = 0; d < dimension; d++) 320 if (corner[d] - width[d] > point[d] || corner[d] + width[d] < point[d]) return false; 320 321 return true; 321 322 }
Note: See TracChangeset
for help on using the changeset viewer.