Context Navigation

← Previous Change
Next Change →

Changeset 14788 for branches

Timestamp:

03/27/17 17:27:03 (8 years ago)

Author:

gkronber

Message:

#2700: refactoring

Location:

branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4

Files:

: 4 edited

Interfaces/TSNEInterfaces/ISpacePartitioningTree.cs (modified) (1 diff)
TSNE/SpacePartitioningTree.cs (modified) (3 diffs)
TSNE/TSNEAlgorithm.cs (modified) (8 diffs)
TSNE/TSNEStatic.cs (modified) (5 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/ISpacePartitioningTree.cs

r14785	r14788
33	33	void GetAllIndices(int[] indices);
34	34	int GetAllIndices(int[] indices, int loc);
35		void ComputeNonEdgeForces(int pointIndex, double theta, double[] negF, ~~double[]~~ sumQ);
	35	void ComputeNonEdgeForces(int pointIndex, double theta, double[] negF, ref double sumQ);
36	36	void ComputeEdgeForces(int[] rowP, int[] colP, double[] valP, int n, double[,] posF);
37	37

branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/SpacePartitioningTree.cs

-                      r14787
+                      r14788
+    }
+    public void ComputeNonEdgeForces(int pointIndex, double theta, double[] negF, double[] sumQ) {
+    public void ComputeNonEdgeForces(int pointIndex, double theta, double[] negF, ref double sumQ)
+    {
       // Make sure that we spend no time on empty nodes or self-interactions
       if (cumulativeSize == 0 || (isLeaf && size == 1 && index[0] == pointIndex)) return;
 …
         D = 1.0 / (1.0 + D);
         var mult = cumulativeSize * D;
         sumQ[0] += mult;
+        sumQ += mult;
         mult *= D;
         for (var d = 0; d < dimension; d++) negF[d] += mult * buff[d];
 …
         // Recursively apply Barnes-Hut to children
         for (var i = 0; i < noChildren; i++) children[i].ComputeNonEdgeForces(pointIndex, theta, negF, sumQ);
+        for (var i = 0; i < noChildren; i++) children[i].ComputeNonEdgeForces(pointIndex, theta, negF, ref sumQ);
+      }
+    }

branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAlgorithm.cs

-                      r14785
+                      r14788
 using HeuristicLab.Core;
 using HeuristicLab.Data;
-using HeuristicLab.Encodings.RealVectorEncoding;
 using HeuristicLab.Optimization;
 using HeuristicLab.Parameters;
 …
     private const string ClassesParameterName = "ClassNames";
     private const string NormalizationParameterName = "Normalization";
+    #endregion
+    #region result names
+    private const string IterationResultName = "Iteration";
+    private const string ErrorResultName = "Error";
+    private const string ErrorPlotResultName = "Error plot";
+    private const string ScatterPlotResultName = "Scatterplot";
+    private const string DataResultName = "Projected data";
     #endregion
 …
     #endregion
+    public override void Stop() {
+      base.Stop();
+      if (tsne != null) tsne.Running = false;
+    }
+    [Storable]
+    private Dictionary<string, List<int>> dataRowNames;    // TODO
+    [Storable]
+    private Dictionary<string, ScatterPlotDataRow> dataRows; // TODO
     protected override void Run(CancellationToken cancellationToken) {
+      var dataRowNames = new Dictionary<string, List<int>>();
+      var rows = new Dictionary<string, ScatterPlotDataRow>();
+      var problemData = Problem.ProblemData;
+      // set up and run tSNE
+      if (SetSeedRandomly) Seed = new System.Random().Next();
+      var random = new MersenneTwister((uint)Seed);
+      var dataset = problemData.Dataset;
+      var allowedInputVariables = problemData.AllowedInputVariables.ToArray();
+      var data = new double[dataset.Rows][];
+      for (var row = 0; row < dataset.Rows; row++) data[row] = allowedInputVariables.Select(col => dataset.GetDoubleValue(col, row)).ToArray();
+      if (Normalization) data = NormalizeData(data);
+      var tsneState = TSNE<double[]>.CreateState(data, Distance, random, NewDimensions, Perplexity, Theta, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta);
+      SetUpResults(data);
+      for (int iter = 0; iter < MaxIterations && !cancellationToken.IsCancellationRequested; iter++)
+      {
+        TSNE<double[]>.Iterate(tsneState);
+        Analyze(tsneState);
+      }
+    }
+    private void SetUpResults(IReadOnlyCollection<double[]> data) {
+      if (Results == null) return;
+      var results = Results;
+      dataRowNames = new Dictionary<string, List<int>>();
+      dataRows = new Dictionary<string, ScatterPlotDataRow>();
       var problemData = Problem.ProblemData;
 …
             var contourname = GetContourName(i, min, max, contours);
             dataRowNames.Add(contourname, new List<int>());
             rows.Add(contourname, new ScatterPlotDataRow(contourname, "", new List<Point2D<double>>()));
             rows[contourname].VisualProperties.Color = GetHeatMapColor(i, contours);
             rows[contourname].VisualProperties.PointSize = i + 3;
+            dataRows.Add(contourname, new ScatterPlotDataRow(contourname, "", new List<Point2D<double>>()));
+            dataRows[contourname].VisualProperties.Color = GetHeatMapColor(i, contours);
+            dataRows[contourname].VisualProperties.PointSize = i + 3;
+          }
           for (var i = 0; i < classValues.Length; i++) {
 …
+      }
+      // set up and run tSNE
+      if (SetSeedRandomly) Seed = new System.Random().Next();
+      var random = new MersenneTwister((uint)Seed);
+      tsne = new TSNE<double[]>(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta, dataRowNames, rows);
+      var dataset = problemData.Dataset;
+      var allowedInputVariables = problemData.AllowedInputVariables.ToArray();
+      var data = new double[dataset.Rows][];
+      for (var row = 0; row < dataset.Rows; row++) data[row] = allowedInputVariables.Select(col => dataset.GetDoubleValue(col, row)).ToArray();
+      if (Normalization) data = NormalizeData(data);
+      tsne.Run(data, NewDimensions, Perplexity, Theta);
+      if (!results.ContainsKey(IterationResultName)) results.Add(new Result(IterationResultName, new IntValue(0)));
+      else ((IntValue)results[IterationResultName].Value).Value = 0;
+      if (!results.ContainsKey(ErrorResultName)) results.Add(new Result(ErrorResultName, new DoubleValue(0)));
+      else ((DoubleValue)results[ErrorResultName].Value).Value = 0;
+      if (!results.ContainsKey(ErrorPlotResultName)) results.Add(new Result(ErrorPlotResultName, new DataTable(ErrorPlotResultName, "Development of errors during gradient descent")));
+      else results[ErrorPlotResultName].Value = new DataTable(ErrorPlotResultName, "Development of errors during gradient descent");
+      var plot = results[ErrorPlotResultName].Value as DataTable;
+      if (plot == null) throw new ArgumentException("could not create/access error data table in results collection");
+      if (!plot.Rows.ContainsKey("errors")) plot.Rows.Add(new DataRow("errors"));
+      plot.Rows["errors"].Values.Clear();
+      results.Add(new Result(ScatterPlotResultName, "Plot of the projected data", new ScatterPlot(DataResultName, "")));
+      results.Add(new Result(DataResultName, "Projected Data", new DoubleMatrix()));
+    }
+    private void Analyze(TSNE<double[]>.TSNEState tsneState) {
+      if (Results == null) return;
+      var results = Results;
+      var plot = results[ErrorPlotResultName].Value as DataTable;
+      if (plot == null) throw new ArgumentException("Could not create/access error data table in results collection.");
+      var errors = plot.Rows["errors"].Values;
+      var c = tsneState.EvaluateError();
+      errors.Add(c);
+      ((IntValue)results[IterationResultName].Value).Value = tsneState.iter + 1;
+      ((DoubleValue)results[ErrorResultName].Value).Value = errors.Last();
+      var ndata = Normalize(tsneState.newData);
+      results[DataResultName].Value = new DoubleMatrix(ndata);
+      var splot = results[ScatterPlotResultName].Value as ScatterPlot;
+      FillScatterPlot(ndata, splot);
+    }
+    private void FillScatterPlot(double[,] lowDimData, ScatterPlot plot) {
+      foreach (var rowName in dataRowNames.Keys) {
+        if (!plot.Rows.ContainsKey(rowName))
+          plot.Rows.Add(dataRows.ContainsKey(rowName) ? dataRows[rowName] : new ScatterPlotDataRow(rowName, "", new List<Point2D<double>>()));
+        plot.Rows[rowName].Points.Replace(dataRowNames[rowName].Select(i => new Point2D<double>(lowDimData[i, 0], lowDimData[i, 1])));
+      }
+    }
+    private static double[,] Normalize(double[,] data) {
+      var max = new double[data.GetLength(1)];
+      var min = new double[data.GetLength(1)];
+      var res = new double[data.GetLength(0), data.GetLength(1)];
+      for (var i = 0; i < max.Length; i++) max[i] = min[i] = data[0, i];
+      for (var i = 0; i < data.GetLength(0); i++)
+        for (var j = 0; j < data.GetLength(1); j++) {
+          var v = data[i, j];
+          max[j] = Math.Max(max[j], v);
+          min[j] = Math.Min(min[j], v);
+        }
+      for (var i = 0; i < data.GetLength(0); i++) {
+        for (var j = 0; j < data.GetLength(1); j++) {
+          res[i, j] = (data[i, j] - (max[j] + min[j]) / 2) / (max[j] - min[j]);
+        }
+      }
+      return res;
+    }
 …
       return nData;
+    }
     private static Color GetHeatMapColor(int contourNr, int noContours) {
       var q = (double)contourNr / noContours;  // q in [0,1]
 …
       return c;
+    }
     private static string GetContourName(double value, double min, double max, int noContours) {
       var size = (max - min) / noContours;
 …
       return GetContourName(contourNr, min, max, noContours);
+    }
     private static string GetContourName(int i, double min, double max, int noContours) {
       var size = (max - min) / noContours;

branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEStatic.cs

-                      r14785
+                      r14788
 using System.Collections.Generic;
 using System.Linq;
-using HeuristicLab.Analysis;
 using HeuristicLab.Collections;
 using HeuristicLab.Common;
 using HeuristicLab.Core;
-using HeuristicLab.Data;
-using HeuristicLab.Optimization;
 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
 using HeuristicLab.Random;
 …
 namespace HeuristicLab.Algorithms.DataAnalysis {
   [StorableClass]
+  public class TSNE<T> : DeepCloneable /*where T : class, IDeepCloneable*/ {
+    private const string IterationResultName = "Iteration";
+    private const string ErrorResultName = "Error";
+    private const string ErrorPlotResultName = "ErrorPlot";
+    private const string ScatterPlotResultName = "Scatterplot";
+    private const string DataResultName = "Projected Data";
+    #region Properties
+    [Storable]
+    private IDistance<T> distance;
+    [Storable]
+    private int maxIter;
+    [Storable]
+    private int stopLyingIter;
+    [Storable]
+    private int momSwitchIter;
+    [Storable]
+    double momentum;
+    [Storable]
+    private double finalMomentum;
+    [Storable]
+    private double eta;
+    [Storable]
+    private IRandom random;
+    [Storable]
+    private ResultCollection results;
+    [Storable]
+    private Dictionary<string, List<int>> dataRowLookup;
+    [Storable]
+    private Dictionary<string, ScatterPlotDataRow> dataRows;
+    #endregion
+    #region Stopping
+    public volatile bool Running;      // TODO
+    #endregion
+    #region HLConstructors & Cloning
+    [StorableConstructor]
+    protected TSNE(bool deserializing) { }
+    protected TSNE(TSNE<T> original, Cloner cloner) : base(original, cloner) {
+      distance = cloner.Clone(original.distance);
+      maxIter = original.maxIter;
+      stopLyingIter = original.stopLyingIter;
+      momSwitchIter = original.momSwitchIter;
+      momentum = original.momentum;
+      finalMomentum = original.finalMomentum;
+      eta = original.eta;
+      random = cloner.Clone(random);
+      results = cloner.Clone(results);
+      dataRowLookup = original.dataRowLookup.ToDictionary(entry => entry.Key, entry => entry.Value.Select(x => x).ToList());
+      dataRows = original.dataRows.ToDictionary(entry => entry.Key, entry => cloner.Clone(entry.Value));
+  public class TSNE<T> {
+    [StorableClass]
+    public sealed class TSNEState : DeepCloneable {
+      // initialized once
+      public IDistance<T> distance;
+      public IRandom random;
+      public double perplexity;
+      public bool exact;
+      public int noDatapoints;
+      public double finalMomentum;
+      public int momSwitchIter;
+      public int stopLyingIter;
+      public double theta;
+      public double eta;
+      public int newDimensions;
+      // for approximate version: sparse representation of similarity/distance matrix
+      public double[] valP; // similarity/distance
+      public int[] rowP; // row index
+      public int[] colP; // col index
+      // for exact version: dense representation of distance/similarity matrix
+      public double[,] p;
+      // mapped data
+      public double[,] newData;
+      public int iter;
+      public double currentMomentum;
+      // helper variables (updated in each iteration)
+      public double[,] gains;
+      public double[,] uY;
+      public double[,] dY;
+      private TSNEState(TSNEState original, Cloner cloner) : base(original, cloner) {
+      }
+      public override IDeepCloneable Clone(Cloner cloner) {
+        return new TSNEState(this, cloner);
+      }
+      public TSNEState(T[] data, IDistance<T> distance, IRandom random, int newDimensions, double perplexity, double theta, int stopLyingIter, int momSwitchIter, double momentum, double finalMomentum, double eta) {
+        this.distance = distance;
+        this.random = random;
+        this.newDimensions = newDimensions;
+        this.perplexity = perplexity;
+        this.theta = theta;
+        this.stopLyingIter = stopLyingIter;
+        this.momSwitchIter = momSwitchIter;
+        this.currentMomentum = momentum;
+        this.finalMomentum = finalMomentum;
+        this.eta = eta;
+        // initialize
+        noDatapoints = data.Length;
+        if (noDatapoints - 1 < 3 * perplexity) throw new ArgumentException("Perplexity too large for the number of data points!");
+        exact = Math.Abs(theta) < double.Epsilon;
+        newData = new double[noDatapoints, newDimensions];
+        dY = new double[noDatapoints, newDimensions];
+        uY = new double[noDatapoints, newDimensions];
+        gains = new double[noDatapoints, newDimensions];
+        for (var i = 0; i < noDatapoints; i++)
+          for (var j = 0; j < newDimensions; j++)
+            gains[i, j] = 1.0;
+        p = null;
+        rowP = null;
+        colP = null;
+        valP = null;
+        //Calculate Similarities
+        if (exact) p = CalculateExactSimilarites(data, distance, perplexity);
+        else CalculateApproximateSimilarities(data, distance, perplexity, out rowP, out colP, out valP);
+        // Lie about the P-values
+        if (exact) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] *= 12.0;
+        else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] *= 12.0;
+        // Initialize solution (randomly)
+        var rand = new NormalDistributedRandom(random, 0, 1);
+        for (var i = 0; i < noDatapoints; i++)
+          for (var j = 0; j < newDimensions; j++)
+            newData[i, j] = rand.NextDouble() * .0001;  // TODO const
+      }
+      public double EvaluateError() {
+        return exact ? EvaluateErrorExact(p, newData, noDatapoints, newDimensions) : EvaluateErrorApproximate(rowP, colP, valP, newData, theta);
+      }
+      private static void CalculateApproximateSimilarities(T[] data, IDistance<T> distance, double perplexity, out int[] rowP, out int[] colP, out double[] valP) {
+        // Compute asymmetric pairwise input similarities
+        ComputeGaussianPerplexity(data, distance, out rowP, out colP, out valP, perplexity, (int)(3 * perplexity));        // TODO: why 3?
+        // Symmetrize input similarities
+        int[] sRowP, symColP;
+        double[] sValP;
+        SymmetrizeMatrix(rowP, colP, valP, out sRowP, out symColP, out sValP);
+        rowP = sRowP;
+        colP = symColP;
+        valP = sValP;
+        var sumP = .0;
+        for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i];
+        for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP;
+      }
+      private static double[,] CalculateExactSimilarites(T[] data, IDistance<T> distance, double perplexity) {
+        // Compute similarities
+        var p = new double[data.Length, data.Length];
+        ComputeGaussianPerplexity(data, distance, p, perplexity);
+        // Symmetrize input similarities
+        for (var n = 0; n < data.Length; n++) {
+          for (var m = n + 1; m < data.Length; m++) {
+            p[n, m] += p[m, n];
+            p[m, n] = p[n, m];
+          }
+        }
+        var sumP = .0;
+        for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j];
+        for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP;
+        return p;
+      }
+      private static void ComputeGaussianPerplexity(IReadOnlyList<T> x, IDistance<T> distance, out int[] rowP, out int[] colP, out double[] valP, double perplexity, int k) {
+        if (perplexity > k) throw new ArgumentException("Perplexity should be lower than k!");
+        int n = x.Count;
+        // Allocate the memory we need
+        rowP = new int[n + 1];
+        colP = new int[n * k];
+        valP = new double[n * k];
+        var curP = new double[n - 1];
+        rowP[0] = 0;
+        for (var i = 0; i < n; i++) rowP[i + 1] = rowP[i] + k;
+        var objX = new List<IndexedItem<T>>();
+        for (var i = 0; i < n; i++) objX.Add(new IndexedItem<T>(i, x[i]));
+        // Build ball tree on data set
+        var tree = new VantagePointTree<IndexedItem<T>>(new IndexedItemDistance<T>(distance), objX);           // do we really want to re-create the tree on each call?
+        // Loop over all points to find nearest neighbors
+        for (var i = 0; i < n; i++) {
+          IList<IndexedItem<T>> indices;
+          IList<double> distances;
+          // Find nearest neighbors
+          tree.Search(objX[i], k + 1, out indices, out distances);
+          // Initialize some variables for binary search
+          var found = false;
+          var beta = 1.0;
+          var minBeta = double.MinValue;
+          var maxBeta = double.MaxValue;
+          const double tol = 1e-5;  // TODO: why 1e-5?
+          // Iterate until we found a good perplexity
+          var iter = 0; double sumP = 0;
+          while (!found && iter < 200) {
+            // Compute Gaussian kernel row
+            for (var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]);
+            // Compute entropy of current row
+            sumP = double.Epsilon;
+            for (var m = 0; m < k; m++) sumP += curP[m];
+            var h = .0;
+            for (var m = 0; m < k; m++) h += beta * (distances[m + 1] * curP[m]);
+            h = h / sumP + Math.Log(sumP);
+            // Evaluate whether the entropy is within the tolerance level
+            var hdiff = h - Math.Log(perplexity);
+            if (hdiff < tol && -hdiff < tol) {
+              found = true;
+            } else {
+              if (hdiff > 0) {
+                minBeta = beta;
+                if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue))
+                  beta *= 2.0;
+                else
+                  beta = (beta + maxBeta) / 2.0;
+              } else {
+                maxBeta = beta;
+                if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))
+                  beta /= 2.0;
+                else
+                  beta = (beta + minBeta) / 2.0;
+              }
+            }
+            // Update iteration counter
+            iter++;
+          }
+          // Row-normalize current row of P and store in matrix
+          for (var m = 0; m < k; m++) curP[m] /= sumP;
+          for (var m = 0; m < k; m++) {
+            colP[rowP[i] + m] = indices[m + 1].Index;
+            valP[rowP[i] + m] = curP[m];
+          }
+        }
+      }
+      private static void ComputeGaussianPerplexity(T[] x, IDistance<T> distance, double[,] p, double perplexity) {
+        // Compute the distance matrix
+        var dd = ComputeDistances(x, distance);
+        int n = x.Length;
+        // Compute the Gaussian kernel row by row
+        for (var i = 0; i < n; i++) {
+          // Initialize some variables
+          var found = false;
+          var beta = 1.0;
+          var minBeta = -double.MaxValue;
+          var maxBeta = double.MaxValue;
+          const double tol = 1e-5;
+          double sumP = 0;
+          // Iterate until we found a good perplexity
+          var iter = 0;
+          while (!found && iter < 200) {       // TODO constant
+            // Compute Gaussian kernel row
+            for (var m = 0; m < n; m++) p[i, m] = Math.Exp(-beta * dd[i][m]);
+            p[i, i] = double.Epsilon;
+            // Compute entropy of current row
+            sumP = double.Epsilon;
+            for (var m = 0; m < n; m++) sumP += p[i, m];
+            var h = 0.0;
+            for (var m = 0; m < n; m++) h += beta * (dd[i][m] * p[i, m]);
+            h = h / sumP + Math.Log(sumP);
+            // Evaluate whether the entropy is within the tolerance level
+            var hdiff = h - Math.Log(perplexity);
+            if (hdiff < tol && -hdiff < tol) {
+              found = true;
+            } else {
+              if (hdiff > 0) {
+                minBeta = beta;
+                if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue))
+                  beta *= 2.0;
+                else
+                  beta = (beta + maxBeta) / 2.0;
+              } else {
+                maxBeta = beta;
+                if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))
+                  beta /= 2.0;
+                else
+                  beta = (beta + minBeta) / 2.0;
+              }
+            }
+            // Update iteration counter
+            iter++;
+          }
+          // Row normalize P
+          for (var m = 0; m < n; m++) p[i, m] /= sumP;
+        }
+      }
+      private static double[][] ComputeDistances(T[] x, IDistance<T> distance) {
+        return x.Select(m => x.Select(n => distance.Get(m, n)).ToArray()).ToArray();
+      }
+      private static double EvaluateErrorExact(double[,] p, double[,] y, int n, int d) {
+        // Compute the squared Euclidean distance matrix
+        var dd = new double[n, n];
+        var q = new double[n, n];
+        ComputeSquaredEuclideanDistance(y, n, d, dd);
+        // Compute Q-matrix and normalization sum
+        var sumQ = double.Epsilon;
+        for (var n1 = 0; n1 < n; n1++) {
+          for (var m = 0; m < n; m++) {
+            if (n1 != m) {
+              q[n1, m] = 1 / (1 + dd[n1, m]);
+              sumQ += q[n1, m];
+            } else q[n1, m] = double.Epsilon;
+          }
+        }
+        for (var i = 0; i < n; i++) for (var j = 0; j < n; j++) q[i, j] /= sumQ;
+        // Sum t-SNE error
+        var c = .0;
+        for (var i = 0; i < n; i++)
+          for (var j = 0; j < n; j++) {
+            c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon));
+          }
+        return c;
+      }
+      private static double EvaluateErrorApproximate(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) {
+        // Get estimate of normalization term
+        var n = y.GetLength(0);
+        var d = y.GetLength(1);
+        var tree = new SpacePartitioningTree(y);
+        var buff = new double[d];
+        double sumQ = 0.0;
+        for (var i = 0; i < n; i++) tree.ComputeNonEdgeForces(i, theta, buff, ref sumQ);
+        // Loop over all edges to compute t-SNE error
+        var c = .0;
+        for (var k = 0; k < n; k++) {
+          for (var i = rowP[k]; i < rowP[k + 1]; i++) {
+            var q = .0;
+            for (var j = 0; j < d; j++) buff[j] = y[k, j];
+            for (var j = 0; j < d; j++) buff[j] -= y[colP[i], j];
+            for (var j = 0; j < d; j++) q += buff[j] * buff[j];
+            q = 1.0 / (1.0 + q) / sumQ;
+            c += valP[i] * Math.Log((valP[i] + float.Epsilon) / (q + float.Epsilon));
+          }
+        }
+        return c;
+      }
+      private static void SymmetrizeMatrix(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, out int[] symRowP, out int[] symColP, out double[] symValP) {
+        // Count number of elements and row counts of symmetric matrix
+        var n = rowP.Count - 1;
+        var rowCounts = new int[n];
+        for (var j = 0; j < n; j++) {
+          for (var i = rowP[j]; i < rowP[j + 1]; i++) {
+            // Check whether element (col_P[i], n) is present
+            var present = false;
+            for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {
+              if (colP[m] == j) present = true;
+            }
+            if (present) rowCounts[j]++;
+            else {
+              rowCounts[j]++;
+              rowCounts[colP[i]]++;
+            }
+          }
+        }
+        var noElem = 0;
+        for (var i = 0; i < n; i++) noElem += rowCounts[i];
+        // Allocate memory for symmetrized matrix
+        symRowP = new int[n + 1];
+        symColP = new int[noElem];
+        symValP = new double[noElem];
+        // Construct new row indices for symmetric matrix
+        symRowP[0] = 0;
+        for (var i = 0; i < n; i++) symRowP[i + 1] = symRowP[i] + rowCounts[i];
+        // Fill the result matrix
+        var offset = new int[n];
+        for (var j = 0; j < n; j++) {
+          for (var i = rowP[j]; i < rowP[j + 1]; i++) {                                  // considering element(n, colP[i])
+            // Check whether element (col_P[i], n) is present
+            var present = false;
+            for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {
+              if (colP[m] != j) continue;
+              present = true;
+              if (j > colP[i]) continue; // make sure we do not add elements twice
+              symColP[symRowP[j] + offset[j]] = colP[i];
+              symColP[symRowP[colP[i]] + offset[colP[i]]] = j;
+              symValP[symRowP[j] + offset[j]] = valP[i] + valP[m];
+              symValP[symRowP[colP[i]] + offset[colP[i]]] = valP[i] + valP[m];
+            }
+            // If (colP[i], n) is not present, there is no addition involved
+            if (!present) {
+              symColP[symRowP[j] + offset[j]] = colP[i];
+              symColP[symRowP[colP[i]] + offset[colP[i]]] = j;
+              symValP[symRowP[j] + offset[j]] = valP[i];
+              symValP[symRowP[colP[i]] + offset[colP[i]]] = valP[i];
+            }
+            // Update offsets
+            if (present && (j > colP[i])) continue;
+            offset[j]++;
+            if (colP[i] != j) offset[colP[i]]++;
+          }
+        }
+        // Divide the result by two
+        for (var i = 0; i < noElem; i++) symValP[i] /= 2.0;
+      }
+    }
+    public override IDeepCloneable Clone(Cloner cloner) { return new TSNE<T>(this, cloner); }
+    public TSNE(IDistance<T> distance, IRandom random, ResultCollection results = null, int maxIter = 1000, int stopLyingIter = 250, int momSwitchIter = 250, double momentum = .5, double finalMomentum = .8, double eta = 200.0, Dictionary<string, List<int>> dataRowLookup = null, Dictionary<string, ScatterPlotDataRow> dataRows = null) {
+      this.distance = distance;
+      this.maxIter = maxIter;
+      this.stopLyingIter = stopLyingIter;
+      this.momSwitchIter = momSwitchIter;
+      this.momentum = momentum;
+      this.finalMomentum = finalMomentum;
+      this.eta = eta;
+      this.random = random;
+      this.results = results;
+      this.dataRowLookup = dataRowLookup;
+      if (dataRows != null) this.dataRows = dataRows;
+      else { this.dataRows = new Dictionary<string, ScatterPlotDataRow>(); }
+    public static TSNEState CreateState(T[] data, IDistance<T> distance, IRandom random, int newDimensions = 2, double perplexity = 25, double theta = 0,
+      int stopLyingIter = 250, int momSwitchIter = 250, double momentum = .5, double finalMomentum = .8, double eta = 200.0
+      ) {
+      return new TSNEState(data, distance, random, newDimensions, perplexity, theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta);
+    }
+    #endregion
+    public double[,] Run(T[] data, int newDimensions, double perplexity, double theta) {
+      var currentMomentum = momentum;
+      var noDatapoints = data.Length;
+      if (noDatapoints - 1 < 3 * perplexity) throw new ArgumentException("Perplexity too large for the number of data points!");
+      SetUpResults(data);
+      Running = true;
+      var exact = Math.Abs(theta) < double.Epsilon;
+      var newData = new double[noDatapoints, newDimensions];
+      var dY = new double[noDatapoints, newDimensions];
+      var uY = new double[noDatapoints, newDimensions];
+      var gains = new double[noDatapoints, newDimensions];
+      for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) gains[i, j] = 1.0;
+      double[,] p = null;
+      int[] rowP = null;
+      int[] colP = null;
+      double[] valP = null;
+      var rand = new NormalDistributedRandom(random, 0, 1);
+      //Calculate Similarities
+      if (exact) p = CalculateExactSimilarites(data, perplexity);
+      else CalculateApproximateSimilarities(data, perplexity, out rowP, out colP, out valP);
+      // Lie about the P-values
+      if (exact) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] *= 12.0;
+      else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] *= 12.0;
+      // Initialize solution (randomly)
+      for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) newData[i, j] = rand.NextDouble() * .0001;  // TODO const
+      // Perform main training loop
+      for (var iter = 0; iter < maxIter && Running; iter++) {
+        if (exact) ComputeExactGradient(p, newData, noDatapoints, newDimensions, dY);
+        else ComputeApproximateGradient(rowP, colP, valP, newData, noDatapoints, newDimensions, dY, theta);
+        // Update gains
+        for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) gains[i, j] = Math.Sign(dY[i, j]) != Math.Sign(uY[i, j]) ? gains[i, j] + .2 : gains[i, j] * .8;
+        for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) if (gains[i, j] < .01) gains[i, j] = .01;
+        // Perform gradient update (with momentum and gains)
+        for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) uY[i, j] = currentMomentum * uY[i, j] - eta * gains[i, j] * dY[i, j];
+        for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) newData[i, j] = newData[i, j] + uY[i, j];
+        // Make solution zero-mean
+        ZeroMean(newData);
+        // Stop lying about the P-values after a while, and switch momentum
+        if (iter == stopLyingIter) {
+          if (exact) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] /= 12.0;
+          else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] /= 12.0;
+        }
+        if (iter == momSwitchIter) currentMomentum = finalMomentum;
+        Analyze(exact, iter, p, rowP, colP, valP, newData, noDatapoints, newDimensions, theta);
+      }
+      return newData;
+    public static double[,] Iterate(TSNEState state) {
+      if (state.exact)
+        ComputeExactGradient(state.p, state.newData, state.noDatapoints, state.newDimensions, state.dY);
+      else
+        ComputeApproximateGradient(state.rowP, state.colP, state.valP, state.newData, state.noDatapoints, state.newDimensions, state.dY, state.theta);
+      // Update gains
+      for (var i = 0; i < state.noDatapoints; i++) {
+        for (var j = 0; j < state.newDimensions; j++) {
+          state.gains[i, j] = Math.Sign(state.dY[i, j]) != Math.Sign(state.uY[i, j])
+            ? state.gains[i, j] + .2
+            : state.gains[i, j] * .8; // 20% up or 20% down // TODO: +0.2?!
+          if (state.gains[i, j] < .01) state.gains[i, j] = .01;
+        }
+      }
+      // Perform gradient update (with momentum and gains)
+      for (var i = 0; i < state.noDatapoints; i++)
+        for (var j = 0; j < state.newDimensions; j++)
+          state.uY[i, j] = state.currentMomentum * state.uY[i, j] - state.eta * state.gains[i, j] * state.dY[i, j];
+      for (var i = 0; i < state.noDatapoints; i++)
+        for (var j = 0; j < state.newDimensions; j++)
+          state.newData[i, j] = state.newData[i, j] + state.uY[i, j];
+      // Make solution zero-mean
+      ZeroMean(state.newData);
+      // Stop lying about the P-values after a while, and switch momentum
+      if (state.iter == state.stopLyingIter) {
+        if (state.exact)
+          for (var i = 0; i < state.noDatapoints; i++) for (var j = 0; j < state.noDatapoints; j++) state.p[i, j] /= 12.0;                                   //XXX why 12?
+        else
+          for (var i = 0; i < state.rowP[state.noDatapoints]; i++) state.valP[i] /= 12.0;                       // XXX are we not scaling all values?
+      }
+      if (state.iter == state.momSwitchIter)
+        state.currentMomentum = state.finalMomentum;
+      state.iter++;
+      return state.newData;
+    }
+    #region helpers
+    private void SetUpResults(IReadOnlyCollection<T> data) {
+      if (dataRowLookup == null) dataRowLookup = new Dictionary<string, List<int>> { { "Data", Enumerable.Range(0, data.Count).ToList() } };
+      if (results == null) return;
+      if (!results.ContainsKey(IterationResultName)) results.Add(new Result(IterationResultName, new IntValue(0)));
+      else ((IntValue)results[IterationResultName].Value).Value = 0;
+      if (!results.ContainsKey(ErrorResultName)) results.Add(new Result(ErrorResultName, new DoubleValue(0)));
+      else ((DoubleValue)results[ErrorResultName].Value).Value = 0;
+      if (!results.ContainsKey(ErrorPlotResultName)) results.Add(new Result(ErrorPlotResultName, new DataTable(ErrorPlotResultName, "Development of errors during gradient descent")));
+      else results[ErrorPlotResultName].Value = new DataTable(ErrorPlotResultName, "Development of errors during gradient descent");
+      var plot = results[ErrorPlotResultName].Value as DataTable;
+      if (plot == null) throw new ArgumentException("could not create/access error data table in results collection");
+      if (!plot.Rows.ContainsKey("errors")) plot.Rows.Add(new DataRow("errors"));
+      plot.Rows["errors"].Values.Clear();
+      results.Add(new Result(ScatterPlotResultName, "Plot of the projected data", new ScatterPlot(DataResultName, "")));
+      results.Add(new Result(DataResultName, "Projected Data", new DoubleMatrix()));
+    }
+    private void Analyze(bool exact, int iter, double[,] p, int[] rowP, int[] colP, double[] valP, double[,] newData, int noDatapoints, int newDimensions, double theta) {
+      if (results == null) return;
+      var plot = results[ErrorPlotResultName].Value as DataTable;
+      if (plot == null) throw new ArgumentException("Could not create/access error data table in results collection. Was it removed by some effect?");
+      var errors = plot.Rows["errors"].Values;
+      var c = exact
+        ? EvaluateErrorExact(p, newData, noDatapoints, newDimensions)
+        : EvaluateErrorApproximate(rowP, colP, valP, newData, theta);
+      errors.Add(c);
+      ((IntValue)results[IterationResultName].Value).Value = iter + 1;
+      ((DoubleValue)results[ErrorResultName].Value).Value = errors.Last();
+      var ndata = Normalize(newData);
+      results[DataResultName].Value = new DoubleMatrix(ndata);
+      var splot = results[ScatterPlotResultName].Value as ScatterPlot;
+      FillScatterPlot(ndata, splot);
+    }
+    private void FillScatterPlot(double[,] lowDimData, ScatterPlot plot) {
+      foreach (var rowName in dataRowLookup.Keys) {
+        if (!plot.Rows.ContainsKey(rowName))
+          plot.Rows.Add(dataRows.ContainsKey(rowName) ? dataRows[rowName] : new ScatterPlotDataRow(rowName, "", new List<Point2D<double>>()));
+        plot.Rows[rowName].Points.Replace(dataRowLookup[rowName].Select(i => new Point2D<double>(lowDimData[i, 0], lowDimData[i, 1])));
+      }
+    }
+    private static double[,] Normalize(double[,] data) {
+      var max = new double[data.GetLength(1)];
+      var min = new double[data.GetLength(1)];
+      var res = new double[data.GetLength(0), data.GetLength(1)];
+      for (var i = 0; i < max.Length; i++) max[i] = min[i] = data[0, i];
+      for (var i = 0; i < data.GetLength(0); i++)
+        for (var j = 0; j < data.GetLength(1); j++) {
+          var v = data[i, j];
+          max[j] = Math.Max(max[j], v);
+          min[j] = Math.Min(min[j], v);
+        }
+      for (var i = 0; i < data.GetLength(0); i++) {
+        for (var j = 0; j < data.GetLength(1); j++) {
+          res[i, j] = (data[i, j] - (max[j] + min[j]) / 2) / (max[j] - min[j]);
+        }
+      }
+      return res;
+    }
+    private void CalculateApproximateSimilarities(T[] data, double perplexity, out int[] rowP, out int[] colP, out double[] valP) {
+      // Compute asymmetric pairwise input similarities
+      ComputeGaussianPerplexity(data, data.Length, out rowP, out colP, out valP, perplexity, (int)(3 * perplexity));
+      // Symmetrize input similarities
+      int[] sRowP, symColP;
+      double[] sValP;
+      SymmetrizeMatrix(rowP, colP, valP, out sRowP, out symColP, out sValP);
+      rowP = sRowP;
+      colP = symColP;
+      valP = sValP;
+      var sumP = .0;
+      for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i];
+      for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP;
+    }
+    private double[,] CalculateExactSimilarites(T[] data, double perplexity) {
+      // Compute similarities
+      var p = new double[data.Length, data.Length];
+      ComputeGaussianPerplexity(data, data.Length, p, perplexity);
+      // Symmetrize input similarities
+      for (var n = 0; n < data.Length; n++) {
+        for (var m = n + 1; m < data.Length; m++) {
+          p[n, m] += p[m, n];
+          p[m, n] = p[n, m];
+        }
+      }
+      var sumP = .0;
+      for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j];
+      for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP;
+      return p;
+    }
+    private void ComputeGaussianPerplexity(IReadOnlyList<T> x, int n, out int[] rowP, out int[] colP, out double[] valP, double perplexity, int k) {
+      if (perplexity > k) throw new ArgumentException("Perplexity should be lower than K!");
+      // Allocate the memory we need
+      rowP = new int[n + 1];
+      colP = new int[n * k];
+      valP = new double[n * k];
+      var curP = new double[n - 1];
+      rowP[0] = 0;
+      for (var i = 0; i < n; i++) rowP[i + 1] = rowP[i] + k;
+      var objX = new List<IndexedItem<T>>();
+      for (var i = 0; i < n; i++) objX.Add(new IndexedItem<T>(i, x[i]));
+      // Build ball tree on data set
+      var tree = new VantagePointTree<IndexedItem<T>>(new IndexedItemDistance<T>(distance), objX);           // do we really want to re-create the tree on each call?
+      // Loop over all points to find nearest neighbors
+      for (var i = 0; i < n; i++) {
+        IList<IndexedItem<T>> indices;
+        IList<double> distances;
+        // Find nearest neighbors
+        tree.Search(objX[i], k + 1, out indices, out distances);
+        // Initialize some variables for binary search
+        var found = false;
+        var beta = 1.0;
+        var minBeta = double.MinValue;
+        var maxBeta = double.MaxValue;
+        const double tol = 1e-5;  // TODO: why 1e-5?
+        // Iterate until we found a good perplexity
+        var iter = 0; double sumP = 0;
+        while (!found && iter < 200) {
+          // Compute Gaussian kernel row
+          for (var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]);
+          // Compute entropy of current row
+          sumP = double.Epsilon;
+          for (var m = 0; m < k; m++) sumP += curP[m];
+          var h = .0;
+          for (var m = 0; m < k; m++) h += beta * (distances[m + 1] * curP[m]);
+          h = h / sumP + Math.Log(sumP);
+          // Evaluate whether the entropy is within the tolerance level
+          var hdiff = h - Math.Log(perplexity);
+          if (hdiff < tol && -hdiff < tol) {
+            found = true;
+          } else {
+            if (hdiff > 0) {
+              minBeta = beta;
+              if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue))
+                beta *= 2.0;
+              else
+                beta = (beta + maxBeta) / 2.0;
+            } else {
+              maxBeta = beta;
+              if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))
+                beta /= 2.0;
+              else
+                beta = (beta + minBeta) / 2.0;
+            }
+          }
+          // Update iteration counter
+          iter++;
+        }
+        // Row-normalize current row of P and store in matrix
+        for (var m = 0; m < k; m++) curP[m] /= sumP;
+        for (var m = 0; m < k; m++) {
+          colP[rowP[i] + m] = indices[m + 1].Index;
+          valP[rowP[i] + m] = curP[m];
+        }
+      }
+    }
+    private void ComputeGaussianPerplexity(T[] x, int n, double[,] p, double perplexity) {
+      // Compute the distance matrix
+      var dd = ComputeDistances(x);
+      // Compute the Gaussian kernel row by row
+      for (var i = 0; i < n; i++) {
+        // Initialize some variables
+        var found = false;
+        var beta = 1.0;
+        var minBeta = -double.MaxValue;
+        var maxBeta = double.MaxValue;
+        const double tol = 1e-5;
+        double sumP = 0;
+        // Iterate until we found a good perplexity
+        var iter = 0;
+        while (!found && iter < 200) {       // TODO constant
+          // Compute Gaussian kernel row
+          for (var m = 0; m < n; m++) p[i, m] = Math.Exp(-beta * dd[i][m]);
+          p[i, i] = double.Epsilon;
+          // Compute entropy of current row
+          sumP = double.Epsilon;
+          for (var m = 0; m < n; m++) sumP += p[i, m];
+          var h = 0.0;
+          for (var m = 0; m < n; m++) h += beta * (dd[i][m] * p[i, m]);
+          h = h / sumP + Math.Log(sumP);
+          // Evaluate whether the entropy is within the tolerance level
+          var hdiff = h - Math.Log(perplexity);
+          if (hdiff < tol && -hdiff < tol) {
+            found = true;
+          } else {
+            if (hdiff > 0) {
+              minBeta = beta;
+              if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue))
+                beta *= 2.0;
+              else
+                beta = (beta + maxBeta) / 2.0;
+            } else {
+              maxBeta = beta;
+              if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))
+                beta /= 2.0;
+              else
+                beta = (beta + minBeta) / 2.0;
+            }
+          }
+          // Update iteration counter
+          iter++;
+        }
+        // Row normalize P
+        for (var m = 0; m < n; m++) p[i, m] /= sumP;
+      }
+    }
+    private double[][] ComputeDistances(T[] x) {
+      return x.Select(m => x.Select(n => distance.Get(m, n)).ToArray()).ToArray();
+    private static void ComputeApproximateGradient(int[] rowP, int[] colP, double[] valP, double[,] y, int n, int d, double[,] dC, double theta) {
+      var tree = new SpacePartitioningTree(y);
+      double sumQ = 0.0;
+      var posF = new double[n, d];
+      var negF = new double[n, d];
+      tree.ComputeEdgeForces(rowP, colP, valP, n, posF);
+      var row = new double[d];
+      for (var n1 = 0; n1 < n; n1++) {
+        Buffer.BlockCopy(negF, (sizeof(double) * n1 * d), row, 0, d);
+        tree.ComputeNonEdgeForces(n1, theta, row, ref sumQ);
+      }
+      // Compute final t-SNE gradient
+      for (var i = 0; i < n; i++)
+        for (var j = 0; j < d; j++) {
+          dC[i, j] = posF[i, j] - negF[i, j] / sumQ;
+        }
+    }
 …
+      }
+    }
     private static void ComputeSquaredEuclideanDistance(double[,] x, int n, int d, double[,] dd) {
       var dataSums = new double[n];
 …
+      }
+    }
+    private static void ComputeApproximateGradient(int[] rowP, int[] colP, double[] valP, double[,] y, int n, int d, double[,] dC, double theta) {
+      var tree = new SpacePartitioningTree(y);
+      double[] sumQ = { 0 };
+      var posF = new double[n, d];
+      var negF = new double[n, d];
+      tree.ComputeEdgeForces(rowP, colP, valP, n, posF);
+      var row = new double[d];
+      for (var n1 = 0; n1 < n; n1++) {
+        Buffer.BlockCopy(negF, (sizeof(double) * n1 * d), row, 0, d);
+        tree.ComputeNonEdgeForces(n1, theta, row, sumQ);
+      }
+      // Compute final t-SNE gradient
+      for (var i = 0; i < n; i++)
+        for (var j = 0; j < d; j++) {
+          dC[i, j] = (posF[i, j] - negF[i, j]) / sumQ[0]; // TODO: check parenthesis
+        }
+    }
+    private static double EvaluateErrorExact(double[,] p, double[,] y, int n, int d) {
+      // Compute the squared Euclidean distance matrix
+      var dd = new double[n, n];
+      var q = new double[n, n];
+      ComputeSquaredEuclideanDistance(y, n, d, dd);
+      // Compute Q-matrix and normalization sum
+      var sumQ = double.Epsilon;
+      for (var n1 = 0; n1 < n; n1++) {
+        for (var m = 0; m < n; m++) {
+          if (n1 != m) {
+            q[n1, m] = 1 / (1 + dd[n1, m]);
+            sumQ += q[n1, m];
+          } else q[n1, m] = double.Epsilon;
+        }
+      }
+      for (var i = 0; i < n; i++) for (var j = 0; j < n; j++) q[i, j] /= sumQ;
+      // Sum t-SNE error
+      var c = .0;
+      for (var i = 0; i < n; i++)
+        for (var j = 0; j < n; j++) {
+          c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon));
+        }
+      return c;
+    }
+    private static double EvaluateErrorApproximate(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) {
+      // Get estimate of normalization term
+      var n = y.GetLength(0);
+      var d = y.GetLength(1);
+      var tree = new SpacePartitioningTree(y);
+      var buff = new double[d];
+      double[] sumQ = { 0 };
+      for (var i = 0; i < n; i++) tree.ComputeNonEdgeForces(i, theta, buff, sumQ);
+      // Loop over all edges to compute t-SNE error
+      var c = .0;
+      for (var k = 0; k < n; k++) {
+        for (var i = rowP[k]; i < rowP[k + 1]; i++) {
+          var q = .0;
+          for (var j = 0; j < d; j++) buff[j] = y[k, j];
+          for (var j = 0; j < d; j++) buff[j] -= y[colP[i], j];
+          for (var j = 0; j < d; j++) q += buff[j] * buff[j];
+          q = 1.0 / (1.0 + q) / sumQ[0];
+          c += valP[i] * Math.Log((valP[i] + float.Epsilon) / (q + float.Epsilon));
+        }
+      }
+      return c;
+    }
+    private static void SymmetrizeMatrix(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, out int[] symRowP, out int[] symColP, out double[] symValP) {
+      // Count number of elements and row counts of symmetric matrix
+      var n = rowP.Count - 1;
+      var rowCounts = new int[n];
+      for (var j = 0; j < n; j++) {
+        for (var i = rowP[j]; i < rowP[j + 1]; i++) {
+          // Check whether element (col_P[i], n) is present
+          var present = false;
+          for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {
+            if (colP[m] == j) present = true;
+          }
+          if (present) rowCounts[j]++;
+          else {
+            rowCounts[j]++;
+            rowCounts[colP[i]]++;
+          }
+        }
+      }
+      var noElem = 0;
+      for (var i = 0; i < n; i++) noElem += rowCounts[i];
+      // Allocate memory for symmetrized matrix
+      symRowP = new int[n + 1];
+      symColP = new int[noElem];
+      symValP = new double[noElem];
+      // Construct new row indices for symmetric matrix
+      symRowP[0] = 0;
+      for (var i = 0; i < n; i++) symRowP[i + 1] = symRowP[i] + rowCounts[i];
+      // Fill the result matrix
+      var offset = new int[n];
+      for (var j = 0; j < n; j++) {
+        for (var i = rowP[j]; i < rowP[j + 1]; i++) {                                  // considering element(n, colP[i])
+          // Check whether element (col_P[i], n) is present
+          var present = false;
+          for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {
+            if (colP[m] != j) continue;
+            present = true;
+            if (j > colP[i]) continue; // make sure we do not add elements twice
+            symColP[symRowP[j] + offset[j]] = colP[i];
+            symColP[symRowP[colP[i]] + offset[colP[i]]] = j;
+            symValP[symRowP[j] + offset[j]] = valP[i] + valP[m];
+            symValP[symRowP[colP[i]] + offset[colP[i]]] = valP[i] + valP[m];
+          }
+          // If (colP[i], n) is not present, there is no addition involved
+          if (!present) {
+            symColP[symRowP[j] + offset[j]] = colP[i];
+            symColP[symRowP[colP[i]] + offset[colP[i]]] = j;
+            symValP[symRowP[j] + offset[j]] = valP[i];
+            symValP[symRowP[colP[i]] + offset[colP[i]]] = valP[i];
+          }
+          // Update offsets
+          if (present && (j > colP[i])) continue;
+          offset[j]++;
+          if (colP[i] != j) offset[colP[i]]++;
+        }
+      }
+      // Divide the result by two
+      for (var i = 0; i < noElem; i++) symValP[i] /= 2.0;
+    }
     private static void ZeroMean(double[,] x) {
       // Compute data mean
 …
+      }
+    }
-    #endregion
+  }
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 14788 for branches

Legend:

branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/ISpacePartitioningTree.cs

branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/SpacePartitioningTree.cs

branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAlgorithm.cs

branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEStatic.cs

Download in other formats: