1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20 


21  //Code is based on an implementation from Laurens van der Maaten


22 


23  /*


24  *


25  * Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)


26  * All rights reserved.


27  *


28  * Redistribution and use in source and binary forms, with or without


29  * modification, are permitted provided that the following conditions are met:


30  * 1. Redistributions of source code must retain the above copyright


31  * notice, this list of conditions and the following disclaimer.


32  * 2. Redistributions in binary form must reproduce the above copyright


33  * notice, this list of conditions and the following disclaimer in the


34  * documentation and/or other materials provided with the distribution.


35  * 3. All advertising materials mentioning features or use of this software


36  * must display the following acknowledgement:


37  * This product includes software developed by the Delft University of Technology.


38  * 4. Neither the name of the Delft University of Technology nor the names of


39  * its contributors may be used to endorse or promote products derived from


40  * this software without specific prior written permission.


41  *


42  * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS


43  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES


44  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO


45  * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,


46  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,


47  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR


48  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN


49  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING


50  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY


51  * OF SUCH DAMAGE.


52  *


53  */


54  #endregion


55 


56  using System;


57  using System.Collections.Generic;


58  using HeuristicLab.Collections;


59  using HeuristicLab.Common;


60  using HeuristicLab.Core;


61  using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;


62  using HeuristicLab.Random;


63 


64  namespace HeuristicLab.Algorithms.DataAnalysis {


65  [StorableClass]


66  public class TSNEStatic<T> {


67 


68  [StorableClass]


69  public sealed class TSNEState : DeepCloneable {


70  #region Storables


71  // initialized once


72  [Storable]


73  public IDistance<T> distance;


74  [Storable]


75  public IRandom random;


76  [Storable]


77  public double perplexity;


78  [Storable]


79  public bool exact;


80  [Storable]


81  public int noDatapoints;


82  [Storable]


83  public double finalMomentum;


84  [Storable]


85  public int momSwitchIter;


86  [Storable]


87  public int stopLyingIter;


88  [Storable]


89  public double theta;


90  [Storable]


91  public double eta;


92  [Storable]


93  public int newDimensions;


94 


95  // for approximate version: sparse representation of similarity/distance matrix


96  [Storable]


97  public double[] valP; // similarity/distance


98  [Storable]


99  public int[] rowP; // row index


100  [Storable]


101  public int[] colP; // col index


102 


103  // for exact version: dense representation of distance/similarity matrix


104  [Storable]


105  public double[,] p;


106 


107  // mapped data


108  [Storable]


109  public double[,] newData;


110 


111  [Storable]


112  public int iter;


113  [Storable]


114  public double currentMomentum;


115 


116  // helper variables (updated in each iteration)


117  [Storable]


118  public double[,] gains;


119  [Storable]


120  public double[,] uY;


121  [Storable]


122  public double[,] dY;


123  #endregion


124 


125  #region Constructors & Cloning


126  private TSNEState(TSNEState original, Cloner cloner) : base(original, cloner) {


127  distance = cloner.Clone(original.distance);


128  random = cloner.Clone(original.random);


129  perplexity = original.perplexity;


130  exact = original.exact;


131  noDatapoints = original.noDatapoints;


132  finalMomentum = original.finalMomentum;


133  momSwitchIter = original.momSwitchIter;


134  stopLyingIter = original.stopLyingIter;


135  theta = original.theta;


136  eta = original.eta;


137  newDimensions = original.newDimensions;


138  if (original.valP != null) {


139  valP = new double[original.valP.Length];


140  Array.Copy(original.valP, valP, valP.Length);


141  }


142  if (original.rowP != null) {


143  rowP = new int[original.rowP.Length];


144  Array.Copy(original.rowP, rowP, rowP.Length);


145  }


146  if (original.colP != null) {


147  colP = new int[original.colP.Length];


148  Array.Copy(original.colP, colP, colP.Length);


149  }


150  if (original.p != null) {


151  p = new double[original.p.GetLength(0), original.p.GetLength(1)];


152  Array.Copy(original.p, p, p.Length);


153  }


154  newData = new double[original.newData.GetLength(0), original.newData.GetLength(1)];


155  Array.Copy(original.newData, newData, newData.Length);


156  iter = original.iter;


157  currentMomentum = original.currentMomentum;


158  gains = new double[original.gains.GetLength(0), original.gains.GetLength(1)];


159  Array.Copy(original.gains, gains, gains.Length);


160  uY = new double[original.uY.GetLength(0), original.uY.GetLength(1)];


161  Array.Copy(original.uY, uY, uY.Length);


162  dY = new double[original.dY.GetLength(0), original.dY.GetLength(1)];


163  Array.Copy(original.dY, dY, dY.Length);


164  }


165 


166  public override IDeepCloneable Clone(Cloner cloner) {


167  return new TSNEState(this, cloner);


168  }


169 


170  [StorableConstructor]


171  public TSNEState(bool deserializing) { }


172  public TSNEState(T[] data, IDistance<T> distance, IRandom random, int newDimensions, double perplexity, double theta, int stopLyingIter, int momSwitchIter, double momentum, double finalMomentum, double eta) {


173  this.distance = distance;


174  this.random = random;


175  this.newDimensions = newDimensions;


176  this.perplexity = perplexity;


177  this.theta = theta;


178  this.stopLyingIter = stopLyingIter;


179  this.momSwitchIter = momSwitchIter;


180  currentMomentum = momentum;


181  this.finalMomentum = finalMomentum;


182  this.eta = eta;


183 


184  // initialize


185  noDatapoints = data.Length;


186  if (noDatapoints  1 < 3 * perplexity)


187  throw new ArgumentException("Perplexity too large for the number of data points!");


188 


189  exact = Math.Abs(theta) < double.Epsilon;


190  newData = new double[noDatapoints, newDimensions];


191  dY = new double[noDatapoints, newDimensions];


192  uY = new double[noDatapoints, newDimensions];


193  gains = new double[noDatapoints, newDimensions];


194  for (var i = 0; i < noDatapoints; i++)


195  for (var j = 0; j < newDimensions; j++)


196  gains[i, j] = 1.0;


197 


198  p = null;


199  rowP = null;


200  colP = null;


201  valP = null;


202 


203  //Calculate Similarities


204  if (exact) p = CalculateExactSimilarites(data, distance, perplexity);


205  else CalculateApproximateSimilarities(data, distance, perplexity, out rowP, out colP, out valP);


206 


207  // Lie about the Pvalues (factor is 4 in the MATLAB implementation)


208  if (exact) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] *= 12.0;


209  else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] *= 12.0;


210 


211  // Initialize solution (randomly)


212  var rand = new NormalDistributedRandom(random, 0, 1);


213  for (var i = 0; i < noDatapoints; i++)


214  for (var j = 0; j < newDimensions; j++)


215  newData[i, j] = rand.NextDouble() * .0001;


216  }


217  #endregion


218 


219  public double EvaluateError() {


220  return exact ?


221  EvaluateErrorExact(p, newData, noDatapoints, newDimensions) :


222  EvaluateErrorApproximate(rowP, colP, valP, newData, theta);


223  }


224 


225  #region Helpers


226  private static void CalculateApproximateSimilarities(T[] data, IDistance<T> distance, double perplexity, out int[] rowP, out int[] colP, out double[] valP) {


227  // Compute asymmetric pairwise input similarities


228  ComputeGaussianPerplexity(data, distance, out rowP, out colP, out valP, perplexity, (int)(3 * perplexity));


229  // Symmetrize input similarities


230  int[] sRowP, symColP;


231  double[] sValP;


232  SymmetrizeMatrix(rowP, colP, valP, out sRowP, out symColP, out sValP);


233  rowP = sRowP;


234  colP = symColP;


235  valP = sValP;


236  var sumP = .0;


237  for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i];


238  for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP;


239  }


240 


241  private static double[,] CalculateExactSimilarites(T[] data, IDistance<T> distance, double perplexity) {


242  // Compute similarities


243  var p = new double[data.Length, data.Length];


244  ComputeGaussianPerplexity(data, distance, p, perplexity);


245  // Symmetrize input similarities


246  for (var n = 0; n < data.Length; n++) {


247  for (var m = n + 1; m < data.Length; m++) {


248  p[n, m] += p[m, n];


249  p[m, n] = p[n, m];


250  }


251  }


252  var sumP = .0;


253  for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j];


254  for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP;


255  return p;


256  }


257 


258  private static void ComputeGaussianPerplexity(IReadOnlyList<T> x, IDistance<T> distance, out int[] rowP, out int[] colP, out double[] valP, double perplexity, int k) {


259  if (perplexity > k) throw new ArgumentException("Perplexity should be lower than k!");


260 


261  var n = x.Count;


262  // Allocate the memory we need


263  rowP = new int[n + 1];


264  colP = new int[n * k];


265  valP = new double[n * k];


266  var curP = new double[n  1];


267  rowP[0] = 0;


268  for (var i = 0; i < n; i++) rowP[i + 1] = rowP[i] + k;


269 


270  var objX = new List<IndexedItem<T>>();


271  for (var i = 0; i < n; i++) objX.Add(new IndexedItem<T>(i, x[i]));


272 


273  // Build ball tree on data set


274  var tree = new VantagePointTree<IndexedItem<T>>(new IndexedItemDistance<T>(distance), objX);


275 


276  // Loop over all points to find nearest neighbors


277  for (var i = 0; i < n; i++) {


278  IList<IndexedItem<T>> indices;


279  IList<double> distances;


280 


281  // Find nearest neighbors


282  tree.Search(objX[i], k + 1, out indices, out distances);


283 


284  // Initialize some variables for binary search


285  var found = false;


286  var beta = 1.0;


287  var minBeta = double.MinValue;


288  var maxBeta = double.MaxValue;


289  const double tol = 1e5;


290 


291  // Iterate until we found a good perplexity


292  var iter = 0; double sumP = 0;


293  while (!found && iter < 200) {


294 


295  // Compute Gaussian kernel row


296  for (var m = 0; m < k; m++) curP[m] = Math.Exp(beta * distances[m + 1]);


297 


298  // Compute entropy of current row


299  sumP = double.Epsilon;


300  for (var m = 0; m < k; m++) sumP += curP[m];


301  var h = .0;


302  for (var m = 0; m < k; m++) h += beta * (distances[m + 1] * curP[m]);


303  h = h / sumP + Math.Log(sumP);


304 


305  // Evaluate whether the entropy is within the tolerance level


306  var hdiff = h  Math.Log(perplexity);


307  if (hdiff < tol && hdiff < tol) {


308  found = true;


309  } else {


310  if (hdiff > 0) {


311  minBeta = beta;


312  if (maxBeta.IsAlmost(double.MaxValue)  maxBeta.IsAlmost(double.MinValue))


313  beta *= 2.0;


314  else


315  beta = (beta + maxBeta) / 2.0;


316  } else {


317  maxBeta = beta;


318  if (minBeta.IsAlmost(double.MinValue)  minBeta.IsAlmost(double.MaxValue))


319  beta /= 2.0;


320  else


321  beta = (beta + minBeta) / 2.0;


322  }


323  }


324 


325  // Update iteration counter


326  iter++;


327  }


328 


329  // Rownormalize current row of P and store in matrix


330  for (var m = 0; m < k; m++) curP[m] /= sumP;


331  for (var m = 0; m < k; m++) {


332  colP[rowP[i] + m] = indices[m + 1].Index;


333  valP[rowP[i] + m] = curP[m];


334  }


335  }


336  }


337  private static void ComputeGaussianPerplexity(T[] x, IDistance<T> distance, double[,] p, double perplexity) {


338  // Compute the distance matrix


339  var dd = ComputeDistances(x, distance);


340 


341  var n = x.Length;


342  // Compute the Gaussian kernel row by row


343  for (var i = 0; i < n; i++) {


344  // Initialize some variables


345  var found = false;


346  var beta = 1.0;


347  var minBeta = double.MinValue;


348  var maxBeta = double.MaxValue;


349  const double tol = 1e5;


350  double sumP = 0;


351 


352  // Iterate until we found a good perplexity


353  var iter = 0;


354  while (!found && iter < 200) { // 200 iterations as in tSNE implementation by van der Maarten


355 


356  // Compute Gaussian kernel row


357  for (var m = 0; m < n; m++) p[i, m] = Math.Exp(beta * dd[i][m]);


358  p[i, i] = double.Epsilon;


359 


360  // Compute entropy of current row


361  sumP = double.Epsilon;


362  for (var m = 0; m < n; m++) sumP += p[i, m];


363  var h = 0.0;


364  for (var m = 0; m < n; m++) h += beta * (dd[i][m] * p[i, m]);


365  h = h / sumP + Math.Log(sumP);


366 


367  // Evaluate whether the entropy is within the tolerance level


368  var hdiff = h  Math.Log(perplexity);


369  if (hdiff < tol && hdiff < tol) {


370  found = true;


371  } else {


372  if (hdiff > 0) {


373  minBeta = beta;


374  if (maxBeta.IsAlmost(double.MaxValue)  maxBeta.IsAlmost(double.MinValue))


375  beta *= 2.0;


376  else


377  beta = (beta + maxBeta) / 2.0;


378  } else {


379  maxBeta = beta;


380  if (minBeta.IsAlmost(double.MinValue)  minBeta.IsAlmost(double.MaxValue))


381  beta /= 2.0;


382  else


383  beta = (beta + minBeta) / 2.0;


384  }


385  }


386 


387  // Update iteration counter


388  iter++;


389  }


390 


391  // Row normalize P


392  for (var m = 0; m < n; m++) p[i, m] /= sumP;


393  }


394  }


395 


396  private static double[][] ComputeDistances(T[] x, IDistance<T> distance) {


397  var res = new double[x.Length][];


398  for (var r = 0; r < x.Length; r++) {


399  var rowV = new double[x.Length];


400  // all distances must be symmetric


401  for (var c = 0; c < r; c++) {


402  rowV[c] = res[c][r];


403  }


404  rowV[r] = 0.0; // distance to self is zero for all distances


405  for (var c = r + 1; c < x.Length; c++) {


406  rowV[c] = distance.Get(x[r], x[c]);


407  }


408  res[r] = rowV;


409  }


410  return res;


411  // return x.Select(m => x.Select(n => distance.Get(m, n)).ToArray()).ToArray();


412  }


413 


414  private static double EvaluateErrorExact(double[,] p, double[,] y, int n, int d) {


415  // Compute the squared Euclidean distance matrix


416  var dd = new double[n, n];


417  var q = new double[n, n];


418  ComputeSquaredEuclideanDistance(y, n, d, dd);


419 


420  // Compute Qmatrix and normalization sum


421  var sumQ = double.Epsilon;


422  for (var n1 = 0; n1 < n; n1++) {


423  for (var m = 0; m < n; m++) {


424  if (n1 != m) {


425  q[n1, m] = 1 / (1 + dd[n1, m]);


426  sumQ += q[n1, m];


427  } else q[n1, m] = double.Epsilon;


428  }


429  }


430  for (var i = 0; i < n; i++) for (var j = 0; j < n; j++) q[i, j] /= sumQ;


431 


432  // Sum tSNE error


433  var c = .0;


434  for (var i = 0; i < n; i++)


435  for (var j = 0; j < n; j++) {


436  c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon));


437  }


438  return c;


439  }


440 


441  private static double EvaluateErrorApproximate(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) {


442  // Get estimate of normalization term


443  var n = y.GetLength(0);


444  var d = y.GetLength(1);


445  var tree = new SpacePartitioningTree(y);


446  var buff = new double[d];


447  var sumQ = 0.0;


448  for (var i = 0; i < n; i++) tree.ComputeNonEdgeForces(i, theta, buff, ref sumQ);


449 


450  // Loop over all edges to compute tSNE error


451  var c = .0;


452  for (var k = 0; k < n; k++) {


453  for (var i = rowP[k]; i < rowP[k + 1]; i++) {


454  var q = .0;


455  for (var j = 0; j < d; j++) buff[j] = y[k, j];


456  for (var j = 0; j < d; j++) buff[j] = y[colP[i], j];


457  for (var j = 0; j < d; j++) q += buff[j] * buff[j];


458  q = (1.0 / (1.0 + q)) / sumQ;


459  c += valP[i] * Math.Log((valP[i] + float.Epsilon) / (q + float.Epsilon));


460  }


461  }


462  return c;


463  }


464  private static void SymmetrizeMatrix(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, out int[] symRowP, out int[] symColP, out double[] symValP) {


465 


466  // Count number of elements and row counts of symmetric matrix


467  var n = rowP.Count  1;


468  var rowCounts = new int[n];


469  for (var j = 0; j < n; j++) {


470  for (var i = rowP[j]; i < rowP[j + 1]; i++) {


471 


472  // Check whether element (col_P[i], n) is present


473  var present = false;


474  for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {


475  if (colP[m] == j) present = true;


476  }


477  if (present) rowCounts[j]++;


478  else {


479  rowCounts[j]++;


480  rowCounts[colP[i]]++;


481  }


482  }


483  }


484  var noElem = 0;


485  for (var i = 0; i < n; i++) noElem += rowCounts[i];


486 


487  // Allocate memory for symmetrized matrix


488  symRowP = new int[n + 1];


489  symColP = new int[noElem];


490  symValP = new double[noElem];


491 


492  // Construct new row indices for symmetric matrix


493  symRowP[0] = 0;


494  for (var i = 0; i < n; i++) symRowP[i + 1] = symRowP[i] + rowCounts[i];


495 


496  // Fill the result matrix


497  var offset = new int[n];


498  for (var j = 0; j < n; j++) {


499  for (var i = rowP[j]; i < rowP[j + 1]; i++) { // considering element(n, colP[i])


500 


501  // Check whether element (col_P[i], n) is present


502  var present = false;


503  for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {


504  if (colP[m] != j) continue;


505  present = true;


506  if (j > colP[i]) continue; // make sure we do not add elements twice


507  symColP[symRowP[j] + offset[j]] = colP[i];


508  symColP[symRowP[colP[i]] + offset[colP[i]]] = j;


509  symValP[symRowP[j] + offset[j]] = valP[i] + valP[m];


510  symValP[symRowP[colP[i]] + offset[colP[i]]] = valP[i] + valP[m];


511  }


512 


513  // If (colP[i], n) is not present, there is no addition involved


514  if (!present) {


515  symColP[symRowP[j] + offset[j]] = colP[i];


516  symColP[symRowP[colP[i]] + offset[colP[i]]] = j;


517  symValP[symRowP[j] + offset[j]] = valP[i];


518  symValP[symRowP[colP[i]] + offset[colP[i]]] = valP[i];


519  }


520 


521  // Update offsets


522  if (present && (j > colP[i])) continue;


523  offset[j]++;


524  if (colP[i] != j) offset[colP[i]]++;


525  }


526  }


527 


528  for (var i = 0; i < noElem; i++) symValP[i] /= 2.0;


529  }


530  #endregion


531  }


532 


533  /// <summary>


534  /// Static interface to tSNE


535  /// </summary>


536  /// <param name="data"></param>


537  /// <param name="distance">The distance function used to differentiate similar from nonsimilar points, e.g. Euclidean distance.</param>


538  /// <param name="random">Random number generator</param>


539  /// <param name="newDimensions">Dimensionality of projected space (usually 2 for easy visual analysis).</param>


540  /// <param name="perplexity">Perplexity parameter of tSNE. Comparable to k in a knearest neighbour algorithm. Recommended value is floor(number of points /3) or lower</param>


541  /// <param name="iterations">Maximum number of iterations for gradient descent.</param>


542  /// <param name="theta">Value describing how much appoximated gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise. CAUTION: exact calculation of forces requires building a nonsparse N*N matrix where N is the number of data points. This may exceed memory limitations.</param>


543  /// <param name="stopLyingIter">Number of iterations after which p is no longer approximated.</param>


544  /// <param name="momSwitchIter">Number of iterations after which the momentum in the gradient descent is switched.</param>


545  /// <param name="momentum">The initial momentum in the gradient descent.</param>


546  /// <param name="finalMomentum">The final momentum in gradient descent (after momentum switch).</param>


547  /// <param name="eta">Gradient descent learning rate.</param>


548  /// <returns></returns>


549  public static double[,] Run(T[] data, IDistance<T> distance, IRandom random,


550  int newDimensions = 2, double perplexity = 25, int iterations = 1000,


551  double theta = 0,


552  int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5,


553  double finalMomentum = .8, double eta = 10.0


554  ) {


555  var state = CreateState(data, distance, random, newDimensions, perplexity,


556  theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta);


557 


558  for (var i = 0; i < iterations  1; i++) {


559  Iterate(state);


560  }


561  return Iterate(state);


562  }


563 


564  public static TSNEState CreateState(T[] data, IDistance<T> distance, IRandom random,


565  int newDimensions = 2, double perplexity = 25, double theta = 0,


566  int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5,


567  double finalMomentum = .8, double eta = 10.0


568  ) {


569  return new TSNEState(data, distance, random, newDimensions, perplexity, theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta);


570  }


571 


572  public static double[,] Iterate(TSNEState state) {


573  if (state.exact)


574  ComputeExactGradient(state.p, state.newData, state.noDatapoints, state.newDimensions, state.dY);


575  else


576  ComputeApproximateGradient(state.rowP, state.colP, state.valP, state.newData, state.noDatapoints, state.newDimensions, state.dY, state.theta);


577 


578  // Update gains


579  for (var i = 0; i < state.noDatapoints; i++) {


580  for (var j = 0; j < state.newDimensions; j++) {


581  state.gains[i, j] = Math.Sign(state.dY[i, j]) != Math.Sign(state.uY[i, j])


582  ? state.gains[i, j] + .2 // +0.2 nd *0.8 are used in two separate implementations of tSNE > seems to be correct


583  : state.gains[i, j] * .8;


584 


585  if (state.gains[i, j] < .01) state.gains[i, j] = .01;


586  }


587  }


588 


589 


590  // Perform gradient update (with momentum and gains)


591  for (var i = 0; i < state.noDatapoints; i++)


592  for (var j = 0; j < state.newDimensions; j++)


593  state.uY[i, j] = state.currentMomentum * state.uY[i, j]  state.eta * state.gains[i, j] * state.dY[i, j];


594 


595  for (var i = 0; i < state.noDatapoints; i++)


596  for (var j = 0; j < state.newDimensions; j++)


597  state.newData[i, j] = state.newData[i, j] + state.uY[i, j];


598 


599  // Make solution zeromean


600  ZeroMean(state.newData);


601 


602  // Stop lying about the Pvalues after a while, and switch momentum


603  if (state.iter == state.stopLyingIter) {


604  if (state.exact)


605  for (var i = 0; i < state.noDatapoints; i++)


606  for (var j = 0; j < state.noDatapoints; j++)


607  state.p[i, j] /= 12.0;


608  else


609  for (var i = 0; i < state.rowP[state.noDatapoints]; i++)


610  state.valP[i] /= 12.0;


611  }


612 


613  if (state.iter == state.momSwitchIter)


614  state.currentMomentum = state.finalMomentum;


615 


616  state.iter++;


617  return state.newData;


618  }


619 


620  #region Helpers


621  private static void ComputeApproximateGradient(int[] rowP, int[] colP, double[] valP, double[,] y, int n, int d, double[,] dC, double theta) {


622  var tree = new SpacePartitioningTree(y);


623  var sumQ = 0.0;


624  var posF = new double[n, d];


625  var negF = new double[n, d];


626  SpacePartitioningTree.ComputeEdgeForces(rowP, colP, valP, n, posF, y, d);


627  var row = new double[d];


628  for (var n1 = 0; n1 < n; n1++) {


629  Array.Clear(row, 0, row.Length);


630  tree.ComputeNonEdgeForces(n1, theta, row, ref sumQ);


631  Buffer.BlockCopy(row, 0, negF, (sizeof(double) * n1 * d), d * sizeof(double));


632  }


633 


634  // Compute final tSNE gradient


635  for (var i = 0; i < n; i++)


636  for (var j = 0; j < d; j++) {


637  dC[i, j] = posF[i, j]  negF[i, j] / sumQ;


638  }


639  }


640 


641  private static void ComputeExactGradient(double[,] p, double[,] y, int n, int d, double[,] dC) {


642  // Make sure the current gradient contains zeros


643  for (var i = 0; i < n; i++) for (var j = 0; j < d; j++) dC[i, j] = 0.0;


644 


645  // Compute the squared Euclidean distance matrix


646  var dd = new double[n, n];


647  ComputeSquaredEuclideanDistance(y, n, d, dd);


648 


649  // Compute Qmatrix and normalization sum


650  var q = new double[n, n];


651  var sumQ = .0;


652  for (var n1 = 0; n1 < n; n1++) {


653  for (var m = 0; m < n; m++) {


654  if (n1 == m) continue;


655  q[n1, m] = 1 / (1 + dd[n1, m]);


656  sumQ += q[n1, m];


657  }


658  }


659 


660  // Perform the computation of the gradient


661  for (var n1 = 0; n1 < n; n1++) {


662  for (var m = 0; m < n; m++) {


663  if (n1 == m) continue;


664  var mult = (p[n1, m]  q[n1, m] / sumQ) * q[n1, m];


665  for (var d1 = 0; d1 < d; d1++) {


666  dC[n1, d1] += (y[n1, d1]  y[m, d1]) * mult;


667  }


668  }


669  }


670  }


671 


672  private static void ComputeSquaredEuclideanDistance(double[,] x, int n, int d, double[,] dd) {


673  var dataSums = new double[n];


674  for (var i = 0; i < n; i++) {


675  for (var j = 0; j < d; j++) {


676  dataSums[i] += x[i, j] * x[i, j];


677  }


678  }


679  for (var i = 0; i < n; i++) {


680  for (var m = 0; m < n; m++) {


681  dd[i, m] = dataSums[i] + dataSums[m];


682  }


683  }


684  for (var i = 0; i < n; i++) {


685  dd[i, i] = 0.0;


686  for (var m = i + 1; m < n; m++) {


687  dd[i, m] = 0.0;


688  for (var j = 0; j < d; j++) {


689  dd[i, m] += (x[i, j]  x[m, j]) * (x[i, j]  x[m, j]);


690  }


691  dd[m, i] = dd[i, m];


692  }


693  }


694  }


695 


696  private static void ZeroMean(double[,] x) {


697  // Compute data mean


698  var n = x.GetLength(0);


699  var d = x.GetLength(1);


700  var mean = new double[d];


701  for (var i = 0; i < n; i++) {


702  for (var j = 0; j < d; j++) {


703  mean[j] += x[i, j];


704  }


705  }


706  for (var i = 0; i < d; i++) {


707  mean[i] /= n;


708  }


709  // Subtract data mean


710  for (var i = 0; i < n; i++) {


711  for (var j = 0; j < d; j++) {


712  x[i, j] = mean[j];


713  }


714  }


715  }


716  #endregion


717  }


718  }

