Changeset 16308 for branches/2845_EnhancedProgress/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEStatic.cs
- Timestamp:
- 11/20/18 13:52:40 (6 years ago)
- Location:
- branches/2845_EnhancedProgress
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2845_EnhancedProgress
- Property svn:mergeinfo changed
/stable reverse-merged: 15587-15588 /trunk/sources removed
- Property svn:mergeinfo changed
-
branches/2845_EnhancedProgress/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
/stable/HeuristicLab.Algorithms.DataAnalysis reverse-merged: 15587 /trunk/sources/HeuristicLab.Algorithms.DataAnalysis removed
- Property svn:mergeinfo changed
-
branches/2845_EnhancedProgress/HeuristicLab.Algorithms.DataAnalysis/3.4
- Property svn:mergeinfo deleted
-
branches/2845_EnhancedProgress/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEStatic.cs
r16307 r16308 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 8Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 65 65 [StorableClass] 66 66 public class TSNEStatic<T> { 67 67 68 [StorableClass] 68 69 public sealed class TSNEState : DeepCloneable { … … 169 170 [StorableConstructor] 170 171 public TSNEState(bool deserializing) { } 171 172 public TSNEState(IReadOnlyList<T> data, IDistance<T> distance, IRandom random, int newDimensions, double perplexity, 173 double theta, int stopLyingIter, int momSwitchIter, double momentum, double finalMomentum, double eta, bool randomInit) { 172 public TSNEState(T[] data, IDistance<T> distance, IRandom random, int newDimensions, double perplexity, double theta, int stopLyingIter, int momSwitchIter, double momentum, double finalMomentum, double eta) { 174 173 this.distance = distance; 175 174 this.random = random; … … 184 183 185 184 // initialize 186 noDatapoints = data. Count;185 noDatapoints = data.Length; 187 186 if (noDatapoints - 1 < 3 * perplexity) 188 187 throw new ArgumentException("Perplexity too large for the number of data points!"); … … 194 193 gains = new double[noDatapoints, newDimensions]; 195 194 for (var i = 0; i < noDatapoints; i++) 196 for (var j = 0; j < newDimensions; j++)197 gains[i, j] = 1.0;195 for (var j = 0; j < newDimensions; j++) 196 gains[i, j] = 1.0; 198 197 199 198 p = null; … … 213 212 var rand = new NormalDistributedRandom(random, 0, 1); 214 213 for (var i = 0; i < noDatapoints; i++) 215 for (var j = 0; j < newDimensions; j++) 216 newData[i, j] = rand.NextDouble() * .0001; 217 218 if (!(data[0] is IReadOnlyList<double>) || randomInit) return; 219 for (var i = 0; i < noDatapoints; i++) 220 for (var j = 0; j < newDimensions; j++) { 221 var row = (IReadOnlyList<double>) data[i]; 222 newData[i, j] = row[j % row.Count]; 223 } 214 for (var j = 0; j < newDimensions; j++) 215 newData[i, j] = rand.NextDouble() * .0001; 224 216 } 225 217 #endregion 226 218 227 219 public double EvaluateError() { 228 return exact ? EvaluateErrorExact(p, newData, noDatapoints, newDimensions) : EvaluateErrorApproximate(rowP, colP, valP, newData, theta); 220 return exact ? 221 EvaluateErrorExact(p, newData, noDatapoints, newDimensions) : 222 EvaluateErrorApproximate(rowP, colP, valP, newData, theta); 229 223 } 230 224 231 225 #region Helpers 232 private static void CalculateApproximateSimilarities( IReadOnlyList<T>data, IDistance<T> distance, double perplexity, out int[] rowP, out int[] colP, out double[] valP) {226 private static void CalculateApproximateSimilarities(T[] data, IDistance<T> distance, double perplexity, out int[] rowP, out int[] colP, out double[] valP) { 233 227 // Compute asymmetric pairwise input similarities 234 ComputeGaussianPerplexity(data, distance, out rowP, out colP, out valP, perplexity, (int) 228 ComputeGaussianPerplexity(data, distance, out rowP, out colP, out valP, perplexity, (int)(3 * perplexity)); 235 229 // Symmetrize input similarities 236 230 int[] sRowP, symColP; … … 241 235 valP = sValP; 242 236 var sumP = .0; 243 for (var i = 0; i < rowP[data.Count]; i++) sumP += valP[i]; 244 for (var i = 0; i < rowP[data.Count]; i++) valP[i] /= sumP; 245 } 246 private static double[,] CalculateExactSimilarites(IReadOnlyList<T> data, IDistance<T> distance, double perplexity) { 237 for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i]; 238 for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP; 239 } 240 241 private static double[,] CalculateExactSimilarites(T[] data, IDistance<T> distance, double perplexity) { 247 242 // Compute similarities 248 var p = new double[data. Count, data.Count];243 var p = new double[data.Length, data.Length]; 249 244 ComputeGaussianPerplexity(data, distance, p, perplexity); 250 245 // Symmetrize input similarities 251 for (var n = 0; n < data. Count; n++) {252 for (var m = n + 1; m < data. Count; m++) {246 for (var n = 0; n < data.Length; n++) { 247 for (var m = n + 1; m < data.Length; m++) { 253 248 p[n, m] += p[m, n]; 254 249 p[m, n] = p[n, m]; … … 256 251 } 257 252 var sumP = .0; 258 for (var i = 0; i < data.Count; i++) { 259 for (var j = 0; j < data.Count; j++) { 260 sumP += p[i, j]; 261 } 262 } 263 for (var i = 0; i < data.Count; i++) { 264 for (var j = 0; j < data.Count; j++) { 265 p[i, j] /= sumP; 266 } 267 } 253 for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j]; 254 for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP; 268 255 return p; 269 256 } 257 270 258 private static void ComputeGaussianPerplexity(IReadOnlyList<T> x, IDistance<T> distance, out int[] rowP, out int[] colP, out double[] valP, double perplexity, int k) { 271 259 if (perplexity > k) throw new ArgumentException("Perplexity should be lower than k!"); … … 302 290 303 291 // Iterate until we found a good perplexity 304 var iter = 0; 305 double sumP = 0; 292 var iter = 0; double sumP = 0; 306 293 while (!found && iter < 200) { 294 307 295 // Compute Gaussian kernel row 308 296 for (var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]); … … 319 307 if (hdiff < tol && -hdiff < tol) { 320 308 found = true; 321 } 322 else { 309 } else { 323 310 if (hdiff > 0) { 324 311 minBeta = beta; … … 327 314 else 328 315 beta = (beta + maxBeta) / 2.0; 329 } 330 else { 316 } else { 331 317 maxBeta = beta; 332 318 if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue)) … … 349 335 } 350 336 } 351 private static void ComputeGaussianPerplexity( IReadOnlyList<T>x, IDistance<T> distance, double[,] p, double perplexity) {337 private static void ComputeGaussianPerplexity(T[] x, IDistance<T> distance, double[,] p, double perplexity) { 352 338 // Compute the distance matrix 353 339 var dd = ComputeDistances(x, distance); 354 340 355 var n = x. Count;341 var n = x.Length; 356 342 // Compute the Gaussian kernel row by row 357 343 for (var i = 0; i < n; i++) { … … 366 352 // Iterate until we found a good perplexity 367 353 var iter = 0; 368 while (!found && iter < 200) { // 200 iterations as in tSNE implementation by van der Maarten354 while (!found && iter < 200) { // 200 iterations as in tSNE implementation by van der Maarten 369 355 370 356 // Compute Gaussian kernel row … … 383 369 if (hdiff < tol && -hdiff < tol) { 384 370 found = true; 385 } 386 else { 371 } else { 387 372 if (hdiff > 0) { 388 373 minBeta = beta; … … 391 376 else 392 377 beta = (beta + maxBeta) / 2.0; 393 } 394 else { 378 } else { 395 379 maxBeta = beta; 396 380 if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue)) … … 409 393 } 410 394 } 411 private static double[][] ComputeDistances(IReadOnlyList<T> x, IDistance<T> distance) { 412 var res = new double[x.Count][]; 413 for (var r = 0; r < x.Count; r++) { 414 var rowV = new double[x.Count]; 395 396 private static double[][] ComputeDistances(T[] x, IDistance<T> distance) { 397 var res = new double[x.Length][]; 398 for (var r = 0; r < x.Length; r++) { 399 var rowV = new double[x.Length]; 415 400 // all distances must be symmetric 416 401 for (var c = 0; c < r; c++) { … … 418 403 } 419 404 rowV[r] = 0.0; // distance to self is zero for all distances 420 for (var c = r + 1; c < x. Count; c++) {405 for (var c = r + 1; c < x.Length; c++) { 421 406 rowV[c] = distance.Get(x[r], x[c]); 422 407 } … … 426 411 // return x.Select(m => x.Select(n => distance.Get(m, n)).ToArray()).ToArray(); 427 412 } 413 428 414 private static double EvaluateErrorExact(double[,] p, double[,] y, int n, int d) { 429 415 // Compute the squared Euclidean distance matrix … … 439 425 q[n1, m] = 1 / (1 + dd[n1, m]); 440 426 sumQ += q[n1, m]; 441 } 442 else q[n1, m] = double.Epsilon; 427 } else q[n1, m] = double.Epsilon; 443 428 } 444 429 } … … 448 433 var c = .0; 449 434 for (var i = 0; i < n; i++) 450 for (var j = 0; j < n; j++) {451 c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon));452 }435 for (var j = 0; j < n; j++) { 436 c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon)); 437 } 453 438 return c; 454 439 } 440 455 441 private static double EvaluateErrorApproximate(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) { 456 442 // Get estimate of normalization term … … 477 463 } 478 464 private static void SymmetrizeMatrix(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, out int[] symRowP, out int[] symColP, out double[] symValP) { 465 479 466 // Count number of elements and row counts of symmetric matrix 480 467 var n = rowP.Count - 1; … … 482 469 for (var j = 0; j < n; j++) { 483 470 for (var i = rowP[j]; i < rowP[j + 1]; i++) { 471 484 472 // Check whether element (col_P[i], n) is present 485 473 var present = false; … … 509 497 var offset = new int[n]; 510 498 for (var j = 0; j < n; j++) { 511 for (var i = rowP[j]; i < rowP[j + 1]; i++) { // considering element(n, colP[i])499 for (var i = rowP[j]; i < rowP[j + 1]; i++) { // considering element(n, colP[i]) 512 500 513 501 // Check whether element (col_P[i], n) is present … … 561 549 public static double[,] Run(T[] data, IDistance<T> distance, IRandom random, 562 550 int newDimensions = 2, double perplexity = 25, int iterations = 1000, 563 double theta = 0, int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, 551 double theta = 0, 552 int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, 564 553 double finalMomentum = .8, double eta = 10.0 565 ) {554 ) { 566 555 var state = CreateState(data, distance, random, newDimensions, perplexity, 567 556 theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta); … … 576 565 int newDimensions = 2, double perplexity = 25, double theta = 0, 577 566 int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, 578 double finalMomentum = .8, double eta = 10.0 , bool randomInit = true579 ) {580 return new TSNEState(data, distance, random, newDimensions, perplexity, theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta , randomInit);567 double finalMomentum = .8, double eta = 10.0 568 ) { 569 return new TSNEState(data, distance, random, newDimensions, perplexity, theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta); 581 570 } 582 571 … … 591 580 for (var j = 0; j < state.newDimensions; j++) { 592 581 state.gains[i, j] = Math.Sign(state.dY[i, j]) != Math.Sign(state.uY[i, j]) 593 ? state.gains[i, j] + .2 // +0.2 nd *0.8 are used in two separate implementations of tSNE -> seems to be correct582 ? state.gains[i, j] + .2 // +0.2 nd *0.8 are used in two separate implementations of tSNE -> seems to be correct 594 583 : state.gains[i, j] * .8; 584 595 585 if (state.gains[i, j] < .01) state.gains[i, j] = .01; 596 586 } 597 587 } 588 598 589 599 590 // Perform gradient update (with momentum and gains) 600 591 for (var i = 0; i < state.noDatapoints; i++) 601 for (var j = 0; j < state.newDimensions; j++)602 state.uY[i, j] = state.currentMomentum * state.uY[i, j] - state.eta * state.gains[i, j] * state.dY[i, j];592 for (var j = 0; j < state.newDimensions; j++) 593 state.uY[i, j] = state.currentMomentum * state.uY[i, j] - state.eta * state.gains[i, j] * state.dY[i, j]; 603 594 604 595 for (var i = 0; i < state.noDatapoints; i++) 605 for (var j = 0; j < state.newDimensions; j++)606 state.newData[i, j] = state.newData[i, j] + state.uY[i, j];596 for (var j = 0; j < state.newDimensions; j++) 597 state.newData[i, j] = state.newData[i, j] + state.uY[i, j]; 607 598 608 599 // Make solution zero-mean … … 613 604 if (state.exact) 614 605 for (var i = 0; i < state.noDatapoints; i++) 615 for (var j = 0; j < state.noDatapoints; j++)616 state.p[i, j] /= 12.0;606 for (var j = 0; j < state.noDatapoints; j++) 607 state.p[i, j] /= 12.0; 617 608 else 618 609 for (var i = 0; i < state.rowP[state.noDatapoints]; i++) … … 643 634 // Compute final t-SNE gradient 644 635 for (var i = 0; i < n; i++) 645 for (var j = 0; j < d; j++) {646 dC[i, j] = posF[i, j] - negF[i, j] / sumQ;647 }636 for (var j = 0; j < d; j++) { 637 dC[i, j] = posF[i, j] - negF[i, j] / sumQ; 638 } 648 639 } 649 640
Note: See TracChangeset
for help on using the changeset viewer.