1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20 


21  //Code is based on an implementation from Laurens van der Maaten


22 


23  /*


24  *


25  * Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)


26  * All rights reserved.


27  *


28  * Redistribution and use in source and binary forms, with or without


29  * modification, are permitted provided that the following conditions are met:


30  * 1. Redistributions of source code must retain the above copyright


31  * notice, this list of conditions and the following disclaimer.


32  * 2. Redistributions in binary form must reproduce the above copyright


33  * notice, this list of conditions and the following disclaimer in the


34  * documentation and/or other materials provided with the distribution.


35  * 3. All advertising materials mentioning features or use of this software


36  * must display the following acknowledgement:


37  * This product includes software developed by the Delft University of Technology.


38  * 4. Neither the name of the Delft University of Technology nor the names of


39  * its contributors may be used to endorse or promote products derived from


40  * this software without specific prior written permission.


41  *


42  * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS


43  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES


44  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO


45  * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,


46  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,


47  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR


48  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN


49  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING


50  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY


51  * OF SUCH DAMAGE.


52  *


53  */


54  #endregion


55 


56  using System;


57  using System.Collections.Generic;


58  using HeuristicLab.Collections;


59  using HeuristicLab.Common;


60  using HeuristicLab.Core;


61  using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;


62  using HeuristicLab.Random;


63 


64  namespace HeuristicLab.Algorithms.DataAnalysis {


65  [StorableClass]


66  public class TSNEStatic<T> {


67  [StorableClass]


68  public sealed class TSNEState : DeepCloneable {


69  #region Storables


70  // initialized once


71  [Storable]


72  public IDistance<T> distance;


73  [Storable]


74  public IRandom random;


75  [Storable]


76  public double perplexity;


77  [Storable]


78  public bool exact;


79  [Storable]


80  public int noDatapoints;


81  [Storable]


82  public double finalMomentum;


83  [Storable]


84  public int momSwitchIter;


85  [Storable]


86  public int stopLyingIter;


87  [Storable]


88  public double theta;


89  [Storable]


90  public double eta;


91  [Storable]


92  public int newDimensions;


93 


94  // for approximate version: sparse representation of similarity/distance matrix


95  [Storable]


96  public double[] valP; // similarity/distance


97  [Storable]


98  public int[] rowP; // row index


99  [Storable]


100  public int[] colP; // col index


101 


102  // for exact version: dense representation of distance/similarity matrix


103  [Storable]


104  public double[,] p;


105 


106  // mapped data


107  [Storable]


108  public double[,] newData;


109 


110  [Storable]


111  public int iter;


112  [Storable]


113  public double currentMomentum;


114 


115  // helper variables (updated in each iteration)


116  [Storable]


117  public double[,] gains;


118  [Storable]


119  public double[,] uY;


120  [Storable]


121  public double[,] dY;


122  #endregion


123 


124  #region Constructors & Cloning


125  private TSNEState(TSNEState original, Cloner cloner) : base(original, cloner) {


126  distance = cloner.Clone(original.distance);


127  random = cloner.Clone(original.random);


128  perplexity = original.perplexity;


129  exact = original.exact;


130  noDatapoints = original.noDatapoints;


131  finalMomentum = original.finalMomentum;


132  momSwitchIter = original.momSwitchIter;


133  stopLyingIter = original.stopLyingIter;


134  theta = original.theta;


135  eta = original.eta;


136  newDimensions = original.newDimensions;


137  if (original.valP != null) {


138  valP = new double[original.valP.Length];


139  Array.Copy(original.valP, valP, valP.Length);


140  }


141  if (original.rowP != null) {


142  rowP = new int[original.rowP.Length];


143  Array.Copy(original.rowP, rowP, rowP.Length);


144  }


145  if (original.colP != null) {


146  colP = new int[original.colP.Length];


147  Array.Copy(original.colP, colP, colP.Length);


148  }


149  if (original.p != null) {


150  p = new double[original.p.GetLength(0), original.p.GetLength(1)];


151  Array.Copy(original.p, p, p.Length);


152  }


153  newData = new double[original.newData.GetLength(0), original.newData.GetLength(1)];


154  Array.Copy(original.newData, newData, newData.Length);


155  iter = original.iter;


156  currentMomentum = original.currentMomentum;


157  gains = new double[original.gains.GetLength(0), original.gains.GetLength(1)];


158  Array.Copy(original.gains, gains, gains.Length);


159  uY = new double[original.uY.GetLength(0), original.uY.GetLength(1)];


160  Array.Copy(original.uY, uY, uY.Length);


161  dY = new double[original.dY.GetLength(0), original.dY.GetLength(1)];


162  Array.Copy(original.dY, dY, dY.Length);


163  }


164 


165  public override IDeepCloneable Clone(Cloner cloner) {


166  return new TSNEState(this, cloner);


167  }


168 


169  [StorableConstructor]


170  public TSNEState(bool deserializing) { }


171 


172  public TSNEState(IReadOnlyList<T> data, IDistance<T> distance, IRandom random, int newDimensions, double perplexity,


173  double theta, int stopLyingIter, int momSwitchIter, double momentum, double finalMomentum, double eta, bool randomInit) {


174  this.distance = distance;


175  this.random = random;


176  this.newDimensions = newDimensions;


177  this.perplexity = perplexity;


178  this.theta = theta;


179  this.stopLyingIter = stopLyingIter;


180  this.momSwitchIter = momSwitchIter;


181  currentMomentum = momentum;


182  this.finalMomentum = finalMomentum;


183  this.eta = eta;


184 


185  // initialize


186  noDatapoints = data.Count;


187  if (noDatapoints  1 < 3 * perplexity)


188  throw new ArgumentException("Perplexity too large for the number of data points!");


189 


190  exact = Math.Abs(theta) < double.Epsilon;


191  newData = new double[noDatapoints, newDimensions];


192  dY = new double[noDatapoints, newDimensions];


193  uY = new double[noDatapoints, newDimensions];


194  gains = new double[noDatapoints, newDimensions];


195  for (var i = 0; i < noDatapoints; i++)


196  for (var j = 0; j < newDimensions; j++)


197  gains[i, j] = 1.0;


198 


199  p = null;


200  rowP = null;


201  colP = null;


202  valP = null;


203 


204  //Calculate Similarities


205  if (exact) p = CalculateExactSimilarites(data, distance, perplexity);


206  else CalculateApproximateSimilarities(data, distance, perplexity, out rowP, out colP, out valP);


207 


208  // Lie about the Pvalues (factor is 4 in the MATLAB implementation)


209  if (exact) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] *= 12.0;


210  else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] *= 12.0;


211 


212  // Initialize solution (randomly)


213  var rand = new NormalDistributedRandom(random, 0, 1);


214  for (var i = 0; i < noDatapoints; i++)


215  for (var j = 0; j < newDimensions; j++)


216  newData[i, j] = rand.NextDouble() * .0001;


217 


218  if (!(data[0] is IReadOnlyList<double>)  randomInit) return;


219  for (var i = 0; i < noDatapoints; i++)


220  for (var j = 0; j < newDimensions; j++) {


221  var row = (IReadOnlyList<double>) data[i];


222  newData[i, j] = row[j % row.Count];


223  }


224  }


225  #endregion


226 


227  public double EvaluateError() {


228  return exact ? EvaluateErrorExact(p, newData, noDatapoints, newDimensions) : EvaluateErrorApproximate(rowP, colP, valP, newData, theta);


229  }


230 


231  #region Helpers


232  private static void CalculateApproximateSimilarities(IReadOnlyList<T> data, IDistance<T> distance, double perplexity, out int[] rowP, out int[] colP, out double[] valP) {


233  // Compute asymmetric pairwise input similarities


234  ComputeGaussianPerplexity(data, distance, out rowP, out colP, out valP, perplexity, (int) (3 * perplexity));


235  // Symmetrize input similarities


236  int[] sRowP, symColP;


237  double[] sValP;


238  SymmetrizeMatrix(rowP, colP, valP, out sRowP, out symColP, out sValP);


239  rowP = sRowP;


240  colP = symColP;


241  valP = sValP;


242  var sumP = .0;


243  for (var i = 0; i < rowP[data.Count]; i++) sumP += valP[i];


244  for (var i = 0; i < rowP[data.Count]; i++) valP[i] /= sumP;


245  }


246  private static double[,] CalculateExactSimilarites(IReadOnlyList<T> data, IDistance<T> distance, double perplexity) {


247  // Compute similarities


248  var p = new double[data.Count, data.Count];


249  ComputeGaussianPerplexity(data, distance, p, perplexity);


250  // Symmetrize input similarities


251  for (var n = 0; n < data.Count; n++) {


252  for (var m = n + 1; m < data.Count; m++) {


253  p[n, m] += p[m, n];


254  p[m, n] = p[n, m];


255  }


256  }


257  var sumP = .0;


258  for (var i = 0; i < data.Count; i++) {


259  for (var j = 0; j < data.Count; j++) {


260  sumP += p[i, j];


261  }


262  }


263  for (var i = 0; i < data.Count; i++) {


264  for (var j = 0; j < data.Count; j++) {


265  p[i, j] /= sumP;


266  }


267  }


268  return p;


269  }


270  private static void ComputeGaussianPerplexity(IReadOnlyList<T> x, IDistance<T> distance, out int[] rowP, out int[] colP, out double[] valP, double perplexity, int k) {


271  if (perplexity > k) throw new ArgumentException("Perplexity should be lower than k!");


272 


273  var n = x.Count;


274  // Allocate the memory we need


275  rowP = new int[n + 1];


276  colP = new int[n * k];


277  valP = new double[n * k];


278  var curP = new double[n  1];


279  rowP[0] = 0;


280  for (var i = 0; i < n; i++) rowP[i + 1] = rowP[i] + k;


281 


282  var objX = new List<IndexedItem<T>>();


283  for (var i = 0; i < n; i++) objX.Add(new IndexedItem<T>(i, x[i]));


284 


285  // Build ball tree on data set


286  var tree = new VantagePointTree<IndexedItem<T>>(new IndexedItemDistance<T>(distance), objX);


287 


288  // Loop over all points to find nearest neighbors


289  for (var i = 0; i < n; i++) {


290  IList<IndexedItem<T>> indices;


291  IList<double> distances;


292 


293  // Find nearest neighbors


294  tree.Search(objX[i], k + 1, out indices, out distances);


295 


296  // Initialize some variables for binary search


297  var found = false;


298  var beta = 1.0;


299  var minBeta = double.MinValue;


300  var maxBeta = double.MaxValue;


301  const double tol = 1e5;


302 


303  // Iterate until we found a good perplexity


304  var iter = 0;


305  double sumP = 0;


306  while (!found && iter < 200) {


307  // Compute Gaussian kernel row


308  for (var m = 0; m < k; m++) curP[m] = Math.Exp(beta * distances[m + 1]);


309 


310  // Compute entropy of current row


311  sumP = double.Epsilon;


312  for (var m = 0; m < k; m++) sumP += curP[m];


313  var h = .0;


314  for (var m = 0; m < k; m++) h += beta * (distances[m + 1] * curP[m]);


315  h = h / sumP + Math.Log(sumP);


316 


317  // Evaluate whether the entropy is within the tolerance level


318  var hdiff = h  Math.Log(perplexity);


319  if (hdiff < tol && hdiff < tol) {


320  found = true;


321  }


322  else {


323  if (hdiff > 0) {


324  minBeta = beta;


325  if (maxBeta.IsAlmost(double.MaxValue)  maxBeta.IsAlmost(double.MinValue))


326  beta *= 2.0;


327  else


328  beta = (beta + maxBeta) / 2.0;


329  }


330  else {


331  maxBeta = beta;


332  if (minBeta.IsAlmost(double.MinValue)  minBeta.IsAlmost(double.MaxValue))


333  beta /= 2.0;


334  else


335  beta = (beta + minBeta) / 2.0;


336  }


337  }


338 


339  // Update iteration counter


340  iter++;


341  }


342 


343  // Rownormalize current row of P and store in matrix


344  for (var m = 0; m < k; m++) curP[m] /= sumP;


345  for (var m = 0; m < k; m++) {


346  colP[rowP[i] + m] = indices[m + 1].Index;


347  valP[rowP[i] + m] = curP[m];


348  }


349  }


350  }


351  private static void ComputeGaussianPerplexity(IReadOnlyList<T> x, IDistance<T> distance, double[,] p, double perplexity) {


352  // Compute the distance matrix


353  var dd = ComputeDistances(x, distance);


354 


355  var n = x.Count;


356  // Compute the Gaussian kernel row by row


357  for (var i = 0; i < n; i++) {


358  // Initialize some variables


359  var found = false;


360  var beta = 1.0;


361  var minBeta = double.MinValue;


362  var maxBeta = double.MaxValue;


363  const double tol = 1e5;


364  double sumP = 0;


365 


366  // Iterate until we found a good perplexity


367  var iter = 0;


368  while (!found && iter < 200) { // 200 iterations as in tSNE implementation by van der Maarten


369 


370  // Compute Gaussian kernel row


371  for (var m = 0; m < n; m++) p[i, m] = Math.Exp(beta * dd[i][m]);


372  p[i, i] = double.Epsilon;


373 


374  // Compute entropy of current row


375  sumP = double.Epsilon;


376  for (var m = 0; m < n; m++) sumP += p[i, m];


377  var h = 0.0;


378  for (var m = 0; m < n; m++) h += beta * (dd[i][m] * p[i, m]);


379  h = h / sumP + Math.Log(sumP);


380 


381  // Evaluate whether the entropy is within the tolerance level


382  var hdiff = h  Math.Log(perplexity);


383  if (hdiff < tol && hdiff < tol) {


384  found = true;


385  }


386  else {


387  if (hdiff > 0) {


388  minBeta = beta;


389  if (maxBeta.IsAlmost(double.MaxValue)  maxBeta.IsAlmost(double.MinValue))


390  beta *= 2.0;


391  else


392  beta = (beta + maxBeta) / 2.0;


393  }


394  else {


395  maxBeta = beta;


396  if (minBeta.IsAlmost(double.MinValue)  minBeta.IsAlmost(double.MaxValue))


397  beta /= 2.0;


398  else


399  beta = (beta + minBeta) / 2.0;


400  }


401  }


402 


403  // Update iteration counter


404  iter++;


405  }


406 


407  // Row normalize P


408  for (var m = 0; m < n; m++) p[i, m] /= sumP;


409  }


410  }


411  private static double[][] ComputeDistances(IReadOnlyList<T> x, IDistance<T> distance) {


412  var res = new double[x.Count][];


413  for (var r = 0; r < x.Count; r++) {


414  var rowV = new double[x.Count];


415  // all distances must be symmetric


416  for (var c = 0; c < r; c++) {


417  rowV[c] = res[c][r];


418  }


419  rowV[r] = 0.0; // distance to self is zero for all distances


420  for (var c = r + 1; c < x.Count; c++) {


421  rowV[c] = distance.Get(x[r], x[c]);


422  }


423  res[r] = rowV;


424  }


425  return res;


426  // return x.Select(m => x.Select(n => distance.Get(m, n)).ToArray()).ToArray();


427  }


428  private static double EvaluateErrorExact(double[,] p, double[,] y, int n, int d) {


429  // Compute the squared Euclidean distance matrix


430  var dd = new double[n, n];


431  var q = new double[n, n];


432  ComputeSquaredEuclideanDistance(y, n, d, dd);


433 


434  // Compute Qmatrix and normalization sum


435  var sumQ = double.Epsilon;


436  for (var n1 = 0; n1 < n; n1++) {


437  for (var m = 0; m < n; m++) {


438  if (n1 != m) {


439  q[n1, m] = 1 / (1 + dd[n1, m]);


440  sumQ += q[n1, m];


441  }


442  else q[n1, m] = double.Epsilon;


443  }


444  }


445  for (var i = 0; i < n; i++) for (var j = 0; j < n; j++) q[i, j] /= sumQ;


446 


447  // Sum tSNE error


448  var c = .0;


449  for (var i = 0; i < n; i++)


450  for (var j = 0; j < n; j++) {


451  c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon));


452  }


453  return c;


454  }


455  private static double EvaluateErrorApproximate(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) {


456  // Get estimate of normalization term


457  var n = y.GetLength(0);


458  var d = y.GetLength(1);


459  var tree = new SpacePartitioningTree(y);


460  var buff = new double[d];


461  var sumQ = 0.0;


462  for (var i = 0; i < n; i++) tree.ComputeNonEdgeForces(i, theta, buff, ref sumQ);


463 


464  // Loop over all edges to compute tSNE error


465  var c = .0;


466  for (var k = 0; k < n; k++) {


467  for (var i = rowP[k]; i < rowP[k + 1]; i++) {


468  var q = .0;


469  for (var j = 0; j < d; j++) buff[j] = y[k, j];


470  for (var j = 0; j < d; j++) buff[j] = y[colP[i], j];


471  for (var j = 0; j < d; j++) q += buff[j] * buff[j];


472  q = (1.0 / (1.0 + q)) / sumQ;


473  c += valP[i] * Math.Log((valP[i] + float.Epsilon) / (q + float.Epsilon));


474  }


475  }


476  return c;


477  }


478  private static void SymmetrizeMatrix(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, out int[] symRowP, out int[] symColP, out double[] symValP) {


479  // Count number of elements and row counts of symmetric matrix


480  var n = rowP.Count  1;


481  var rowCounts = new int[n];


482  for (var j = 0; j < n; j++) {


483  for (var i = rowP[j]; i < rowP[j + 1]; i++) {


484  // Check whether element (col_P[i], n) is present


485  var present = false;


486  for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {


487  if (colP[m] == j) present = true;


488  }


489  if (present) rowCounts[j]++;


490  else {


491  rowCounts[j]++;


492  rowCounts[colP[i]]++;


493  }


494  }


495  }


496  var noElem = 0;


497  for (var i = 0; i < n; i++) noElem += rowCounts[i];


498 


499  // Allocate memory for symmetrized matrix


500  symRowP = new int[n + 1];


501  symColP = new int[noElem];


502  symValP = new double[noElem];


503 


504  // Construct new row indices for symmetric matrix


505  symRowP[0] = 0;


506  for (var i = 0; i < n; i++) symRowP[i + 1] = symRowP[i] + rowCounts[i];


507 


508  // Fill the result matrix


509  var offset = new int[n];


510  for (var j = 0; j < n; j++) {


511  for (var i = rowP[j]; i < rowP[j + 1]; i++) { // considering element(n, colP[i])


512 


513  // Check whether element (col_P[i], n) is present


514  var present = false;


515  for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {


516  if (colP[m] != j) continue;


517  present = true;


518  if (j > colP[i]) continue; // make sure we do not add elements twice


519  symColP[symRowP[j] + offset[j]] = colP[i];


520  symColP[symRowP[colP[i]] + offset[colP[i]]] = j;


521  symValP[symRowP[j] + offset[j]] = valP[i] + valP[m];


522  symValP[symRowP[colP[i]] + offset[colP[i]]] = valP[i] + valP[m];


523  }


524 


525  // If (colP[i], n) is not present, there is no addition involved


526  if (!present) {


527  symColP[symRowP[j] + offset[j]] = colP[i];


528  symColP[symRowP[colP[i]] + offset[colP[i]]] = j;


529  symValP[symRowP[j] + offset[j]] = valP[i];


530  symValP[symRowP[colP[i]] + offset[colP[i]]] = valP[i];


531  }


532 


533  // Update offsets


534  if (present && (j > colP[i])) continue;


535  offset[j]++;


536  if (colP[i] != j) offset[colP[i]]++;


537  }


538  }


539 


540  for (var i = 0; i < noElem; i++) symValP[i] /= 2.0;


541  }


542  #endregion


543  }


544 


545  /// <summary>


546  /// Static interface to tSNE


547  /// </summary>


548  /// <param name="data"></param>


549  /// <param name="distance">The distance function used to differentiate similar from nonsimilar points, e.g. Euclidean distance.</param>


550  /// <param name="random">Random number generator</param>


551  /// <param name="newDimensions">Dimensionality of projected space (usually 2 for easy visual analysis).</param>


552  /// <param name="perplexity">Perplexity parameter of tSNE. Comparable to k in a knearest neighbour algorithm. Recommended value is floor(number of points /3) or lower</param>


553  /// <param name="iterations">Maximum number of iterations for gradient descent.</param>


554  /// <param name="theta">Value describing how much appoximated gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise. CAUTION: exact calculation of forces requires building a nonsparse N*N matrix where N is the number of data points. This may exceed memory limitations.</param>


555  /// <param name="stopLyingIter">Number of iterations after which p is no longer approximated.</param>


556  /// <param name="momSwitchIter">Number of iterations after which the momentum in the gradient descent is switched.</param>


557  /// <param name="momentum">The initial momentum in the gradient descent.</param>


558  /// <param name="finalMomentum">The final momentum in gradient descent (after momentum switch).</param>


559  /// <param name="eta">Gradient descent learning rate.</param>


560  /// <returns></returns>


561  public static double[,] Run(T[] data, IDistance<T> distance, IRandom random,


562  int newDimensions = 2, double perplexity = 25, int iterations = 1000,


563  double theta = 0, int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5,


564  double finalMomentum = .8, double eta = 10.0


565  ) {


566  var state = CreateState(data, distance, random, newDimensions, perplexity,


567  theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta);


568 


569  for (var i = 0; i < iterations  1; i++) {


570  Iterate(state);


571  }


572  return Iterate(state);


573  }


574 


575  public static TSNEState CreateState(T[] data, IDistance<T> distance, IRandom random,


576  int newDimensions = 2, double perplexity = 25, double theta = 0,


577  int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5,


578  double finalMomentum = .8, double eta = 10.0, bool randomInit = true


579  ) {


580  return new TSNEState(data, distance, random, newDimensions, perplexity, theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta, randomInit);


581  }


582 


583  public static double[,] Iterate(TSNEState state) {


584  if (state.exact)


585  ComputeExactGradient(state.p, state.newData, state.noDatapoints, state.newDimensions, state.dY);


586  else


587  ComputeApproximateGradient(state.rowP, state.colP, state.valP, state.newData, state.noDatapoints, state.newDimensions, state.dY, state.theta);


588 


589  // Update gains


590  for (var i = 0; i < state.noDatapoints; i++) {


591  for (var j = 0; j < state.newDimensions; j++) {


592  state.gains[i, j] = Math.Sign(state.dY[i, j]) != Math.Sign(state.uY[i, j])


593  ? state.gains[i, j] + .2 // +0.2 nd *0.8 are used in two separate implementations of tSNE > seems to be correct


594  : state.gains[i, j] * .8;


595  if (state.gains[i, j] < .01) state.gains[i, j] = .01;


596  }


597  }


598 


599  // Perform gradient update (with momentum and gains)


600  for (var i = 0; i < state.noDatapoints; i++)


601  for (var j = 0; j < state.newDimensions; j++)


602  state.uY[i, j] = state.currentMomentum * state.uY[i, j]  state.eta * state.gains[i, j] * state.dY[i, j];


603 


604  for (var i = 0; i < state.noDatapoints; i++)


605  for (var j = 0; j < state.newDimensions; j++)


606  state.newData[i, j] = state.newData[i, j] + state.uY[i, j];


607 


608  // Make solution zeromean


609  ZeroMean(state.newData);


610 


611  // Stop lying about the Pvalues after a while, and switch momentum


612  if (state.iter == state.stopLyingIter) {


613  if (state.exact)


614  for (var i = 0; i < state.noDatapoints; i++)


615  for (var j = 0; j < state.noDatapoints; j++)


616  state.p[i, j] /= 12.0;


617  else


618  for (var i = 0; i < state.rowP[state.noDatapoints]; i++)


619  state.valP[i] /= 12.0;


620  }


621 


622  if (state.iter == state.momSwitchIter)


623  state.currentMomentum = state.finalMomentum;


624 


625  state.iter++;


626  return state.newData;


627  }


628 


629  #region Helpers


630  private static void ComputeApproximateGradient(int[] rowP, int[] colP, double[] valP, double[,] y, int n, int d, double[,] dC, double theta) {


631  var tree = new SpacePartitioningTree(y);


632  var sumQ = 0.0;


633  var posF = new double[n, d];


634  var negF = new double[n, d];


635  SpacePartitioningTree.ComputeEdgeForces(rowP, colP, valP, n, posF, y, d);


636  var row = new double[d];


637  for (var n1 = 0; n1 < n; n1++) {


638  Array.Clear(row, 0, row.Length);


639  tree.ComputeNonEdgeForces(n1, theta, row, ref sumQ);


640  Buffer.BlockCopy(row, 0, negF, (sizeof(double) * n1 * d), d * sizeof(double));


641  }


642 


643  // Compute final tSNE gradient


644  for (var i = 0; i < n; i++)


645  for (var j = 0; j < d; j++) {


646  dC[i, j] = posF[i, j]  negF[i, j] / sumQ;


647  }


648  }


649 


650  private static void ComputeExactGradient(double[,] p, double[,] y, int n, int d, double[,] dC) {


651  // Make sure the current gradient contains zeros


652  for (var i = 0; i < n; i++) for (var j = 0; j < d; j++) dC[i, j] = 0.0;


653 


654  // Compute the squared Euclidean distance matrix


655  var dd = new double[n, n];


656  ComputeSquaredEuclideanDistance(y, n, d, dd);


657 


658  // Compute Qmatrix and normalization sum


659  var q = new double[n, n];


660  var sumQ = .0;


661  for (var n1 = 0; n1 < n; n1++) {


662  for (var m = 0; m < n; m++) {


663  if (n1 == m) continue;


664  q[n1, m] = 1 / (1 + dd[n1, m]);


665  sumQ += q[n1, m];


666  }


667  }


668 


669  // Perform the computation of the gradient


670  for (var n1 = 0; n1 < n; n1++) {


671  for (var m = 0; m < n; m++) {


672  if (n1 == m) continue;


673  var mult = (p[n1, m]  q[n1, m] / sumQ) * q[n1, m];


674  for (var d1 = 0; d1 < d; d1++) {


675  dC[n1, d1] += (y[n1, d1]  y[m, d1]) * mult;


676  }


677  }


678  }


679  }


680 


681  private static void ComputeSquaredEuclideanDistance(double[,] x, int n, int d, double[,] dd) {


682  var dataSums = new double[n];


683  for (var i = 0; i < n; i++) {


684  for (var j = 0; j < d; j++) {


685  dataSums[i] += x[i, j] * x[i, j];


686  }


687  }


688  for (var i = 0; i < n; i++) {


689  for (var m = 0; m < n; m++) {


690  dd[i, m] = dataSums[i] + dataSums[m];


691  }


692  }


693  for (var i = 0; i < n; i++) {


694  dd[i, i] = 0.0;


695  for (var m = i + 1; m < n; m++) {


696  dd[i, m] = 0.0;


697  for (var j = 0; j < d; j++) {


698  dd[i, m] += (x[i, j]  x[m, j]) * (x[i, j]  x[m, j]);


699  }


700  dd[m, i] = dd[i, m];


701  }


702  }


703  }


704 


705  private static void ZeroMean(double[,] x) {


706  // Compute data mean


707  var n = x.GetLength(0);


708  var d = x.GetLength(1);


709  var mean = new double[d];


710  for (var i = 0; i < n; i++) {


711  for (var j = 0; j < d; j++) {


712  mean[j] += x[i, j];


713  }


714  }


715  for (var i = 0; i < d; i++) {


716  mean[i] /= n;


717  }


718  // Subtract data mean


719  for (var i = 0; i < n; i++) {


720  for (var j = 0; j < d; j++) {


721  x[i, j] = mean[j];


722  }


723  }


724  }


725  #endregion


726  }


727  } 
