Free cookie consent management tool by TermsFeed Policy Generator

source: branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEStatic.cs @ 14785

Last change on this file since 14785 was 14785, checked in by gkronber, 7 years ago

#2700: changes and while reviewing

File size: 26.4 KB
RevLine 
[14414]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21//Code is based on an implementation from Laurens van der Maaten
22
23/*
24*
25* Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology)
26* All rights reserved.
27*
28* Redistribution and use in source and binary forms, with or without
29* modification, are permitted provided that the following conditions are met:
30* 1. Redistributions of source code must retain the above copyright
31*    notice, this list of conditions and the following disclaimer.
32* 2. Redistributions in binary form must reproduce the above copyright
33*    notice, this list of conditions and the following disclaimer in the
34*    documentation and/or other materials provided with the distribution.
35* 3. All advertising materials mentioning features or use of this software
36*    must display the following acknowledgement:
37*    This product includes software developed by the Delft University of Technology.
38* 4. Neither the name of the Delft University of Technology nor the names of
39*    its contributors may be used to endorse or promote products derived from
40*    this software without specific prior written permission.
41*
42* THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS
43* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
44* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
45* EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
47* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
48* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
49* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
50* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
51* OF SUCH DAMAGE.
52*
53*/
54#endregion
55
56using System;
57using System.Collections.Generic;
58using System.Linq;
[14512]59using HeuristicLab.Analysis;
[14785]60using HeuristicLab.Collections;
[14414]61using HeuristicLab.Common;
62using HeuristicLab.Core;
63using HeuristicLab.Data;
64using HeuristicLab.Optimization;
65using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
66using HeuristicLab.Random;
67
68namespace HeuristicLab.Algorithms.DataAnalysis {
69  [StorableClass]
[14785]70  public class TSNE<T> : DeepCloneable /*where T : class, IDeepCloneable*/ {
[14414]71
72    private const string IterationResultName = "Iteration";
73    private const string ErrorResultName = "Error";
[14512]74    private const string ErrorPlotResultName = "ErrorPlot";
75    private const string ScatterPlotResultName = "Scatterplot";
76    private const string DataResultName = "Projected Data";
[14414]77
78    #region Properties
79    [Storable]
80    private IDistance<T> distance;
81    [Storable]
82    private int maxIter;
83    [Storable]
84    private int stopLyingIter;
85    [Storable]
86    private int momSwitchIter;
87    [Storable]
88    double momentum;
89    [Storable]
90    private double finalMomentum;
91    [Storable]
92    private double eta;
93    [Storable]
94    private IRandom random;
95    [Storable]
96    private ResultCollection results;
[14512]97    [Storable]
98    private Dictionary<string, List<int>> dataRowLookup;
99    [Storable]
100    private Dictionary<string, ScatterPlotDataRow> dataRows;
[14414]101    #endregion
102
[14512]103    #region Stopping
[14785]104    public volatile bool Running;      // TODO
[14512]105    #endregion
106
[14414]107    #region HLConstructors & Cloning
108    [StorableConstructor]
[14518]109    protected TSNE(bool deserializing) { }
[14414]110    protected TSNE(TSNE<T> original, Cloner cloner) : base(original, cloner) {
111      distance = cloner.Clone(original.distance);
112      maxIter = original.maxIter;
113      stopLyingIter = original.stopLyingIter;
114      momSwitchIter = original.momSwitchIter;
115      momentum = original.momentum;
116      finalMomentum = original.finalMomentum;
117      eta = original.eta;
118      random = cloner.Clone(random);
119      results = cloner.Clone(results);
[14512]120      dataRowLookup = original.dataRowLookup.ToDictionary(entry => entry.Key, entry => entry.Value.Select(x => x).ToList());
121      dataRows = original.dataRows.ToDictionary(entry => entry.Key, entry => cloner.Clone(entry.Value));
[14414]122    }
123    public override IDeepCloneable Clone(Cloner cloner) { return new TSNE<T>(this, cloner); }
[14512]124    public TSNE(IDistance<T> distance, IRandom random, ResultCollection results = null, int maxIter = 1000, int stopLyingIter = 250, int momSwitchIter = 250, double momentum = .5, double finalMomentum = .8, double eta = 200.0, Dictionary<string, List<int>> dataRowLookup = null, Dictionary<string, ScatterPlotDataRow> dataRows = null) {
[14414]125      this.distance = distance;
126      this.maxIter = maxIter;
127      this.stopLyingIter = stopLyingIter;
128      this.momSwitchIter = momSwitchIter;
129      this.momentum = momentum;
130      this.finalMomentum = finalMomentum;
131      this.eta = eta;
132      this.random = random;
133      this.results = results;
[14512]134      this.dataRowLookup = dataRowLookup;
[14742]135      if (dataRows != null) this.dataRows = dataRows;
[14512]136      else { this.dataRows = new Dictionary<string, ScatterPlotDataRow>(); }
[14414]137    }
138    #endregion
139
140    public double[,] Run(T[] data, int newDimensions, double perplexity, double theta) {
141      var currentMomentum = momentum;
142      var noDatapoints = data.Length;
143      if (noDatapoints - 1 < 3 * perplexity) throw new ArgumentException("Perplexity too large for the number of data points!");
[14512]144      SetUpResults(data);
145      Running = true;
[14414]146      var exact = Math.Abs(theta) < double.Epsilon;
147      var newData = new double[noDatapoints, newDimensions];
148      var dY = new double[noDatapoints, newDimensions];
149      var uY = new double[noDatapoints, newDimensions];
150      var gains = new double[noDatapoints, newDimensions];
151      for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) gains[i, j] = 1.0;
152      double[,] p = null;
153      int[] rowP = null;
154      int[] colP = null;
155      double[] valP = null;
[14512]156      var rand = new NormalDistributedRandom(random, 0, 1);
[14414]157
[14512]158      //Calculate Similarities
159      if (exact) p = CalculateExactSimilarites(data, perplexity);
160      else CalculateApproximateSimilarities(data, perplexity, out rowP, out colP, out valP);
161
[14414]162      // Lie about the P-values
[14512]163      if (exact) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] *= 12.0;
164      else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] *= 12.0;
[14414]165
166      // Initialize solution (randomly)
[14785]167      for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) newData[i, j] = rand.NextDouble() * .0001;  // TODO const
[14414]168
169      // Perform main training loop
[14512]170      for (var iter = 0; iter < maxIter && Running; iter++) {
[14414]171        if (exact) ComputeExactGradient(p, newData, noDatapoints, newDimensions, dY);
[14785]172        else ComputeApproximateGradient(rowP, colP, valP, newData, noDatapoints, newDimensions, dY, theta);
[14414]173        // Update gains
[14512]174        for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) gains[i, j] = Math.Sign(dY[i, j]) != Math.Sign(uY[i, j]) ? gains[i, j] + .2 : gains[i, j] * .8;
[14414]175        for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) if (gains[i, j] < .01) gains[i, j] = .01;
176        // Perform gradient update (with momentum and gains)
177        for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) uY[i, j] = currentMomentum * uY[i, j] - eta * gains[i, j] * dY[i, j];
178        for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) newData[i, j] = newData[i, j] + uY[i, j];
179        // Make solution zero-mean
180        ZeroMean(newData);
181        // Stop lying about the P-values after a while, and switch momentum
182        if (iter == stopLyingIter) {
[14512]183          if (exact) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] /= 12.0;
184          else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] /= 12.0;
[14414]185        }
186        if (iter == momSwitchIter) currentMomentum = finalMomentum;
187
[14512]188        Analyze(exact, iter, p, rowP, colP, valP, newData, noDatapoints, newDimensions, theta);
[14414]189      }
190      return newData;
191    }
192
193    #region helpers
[14512]194
195    private void SetUpResults(IReadOnlyCollection<T> data) {
[14742]196      if (dataRowLookup == null) dataRowLookup = new Dictionary<string, List<int>> { { "Data", Enumerable.Range(0, data.Count).ToList() } };
[14512]197      if (results == null) return;
[14742]198
[14512]199      if (!results.ContainsKey(IterationResultName)) results.Add(new Result(IterationResultName, new IntValue(0)));
200      else ((IntValue)results[IterationResultName].Value).Value = 0;
201
202      if (!results.ContainsKey(ErrorResultName)) results.Add(new Result(ErrorResultName, new DoubleValue(0)));
203      else ((DoubleValue)results[ErrorResultName].Value).Value = 0;
204
[14785]205      if (!results.ContainsKey(ErrorPlotResultName)) results.Add(new Result(ErrorPlotResultName, new DataTable(ErrorPlotResultName, "Development of errors during gradient descent")));
206      else results[ErrorPlotResultName].Value = new DataTable(ErrorPlotResultName, "Development of errors during gradient descent");
[14512]207
208      var plot = results[ErrorPlotResultName].Value as DataTable;
[14785]209      if (plot == null) throw new ArgumentException("could not create/access error data table in results collection");
[14742]210
211      if (!plot.Rows.ContainsKey("errors")) plot.Rows.Add(new DataRow("errors"));
[14512]212      plot.Rows["errors"].Values.Clear();
[14742]213
[14512]214      results.Add(new Result(ScatterPlotResultName, "Plot of the projected data", new ScatterPlot(DataResultName, "")));
215      results.Add(new Result(DataResultName, "Projected Data", new DoubleMatrix()));
216
217    }
218    private void Analyze(bool exact, int iter, double[,] p, int[] rowP, int[] colP, double[] valP, double[,] newData, int noDatapoints, int newDimensions, double theta) {
219      if (results == null) return;
220      var plot = results[ErrorPlotResultName].Value as DataTable;
[14767]221      if (plot == null) throw new ArgumentException("Could not create/access error data table in results collection. Was it removed by some effect?");
[14512]222      var errors = plot.Rows["errors"].Values;
223      var c = exact
[14785]224        ? EvaluateErrorExact(p, newData, noDatapoints, newDimensions)
225        : EvaluateErrorApproximate(rowP, colP, valP, newData, theta);
[14512]226      errors.Add(c);
227      ((IntValue)results[IterationResultName].Value).Value = iter + 1;
228      ((DoubleValue)results[ErrorResultName].Value).Value = errors.Last();
229
230      var ndata = Normalize(newData);
231      results[DataResultName].Value = new DoubleMatrix(ndata);
232      var splot = results[ScatterPlotResultName].Value as ScatterPlot;
233      FillScatterPlot(ndata, splot);
234
235
236    }
237    private void FillScatterPlot(double[,] lowDimData, ScatterPlot plot) {
238      foreach (var rowName in dataRowLookup.Keys) {
[14742]239        if (!plot.Rows.ContainsKey(rowName))
[14512]240          plot.Rows.Add(dataRows.ContainsKey(rowName) ? dataRows[rowName] : new ScatterPlotDataRow(rowName, "", new List<Point2D<double>>()));
[14558]241        plot.Rows[rowName].Points.Replace(dataRowLookup[rowName].Select(i => new Point2D<double>(lowDimData[i, 0], lowDimData[i, 1])));
[14512]242      }
243    }
244    private static double[,] Normalize(double[,] data) {
245      var max = new double[data.GetLength(1)];
246      var min = new double[data.GetLength(1)];
247      var res = new double[data.GetLength(0), data.GetLength(1)];
248      for (var i = 0; i < max.Length; i++) max[i] = min[i] = data[0, i];
249      for (var i = 0; i < data.GetLength(0); i++)
250        for (var j = 0; j < data.GetLength(1); j++) {
251          var v = data[i, j];
252          max[j] = Math.Max(max[j], v);
253          min[j] = Math.Min(min[j], v);
254        }
255      for (var i = 0; i < data.GetLength(0); i++) {
256        for (var j = 0; j < data.GetLength(1); j++) {
257          res[i, j] = (data[i, j] - (max[j] + min[j]) / 2) / (max[j] - min[j]);
258        }
259      }
260      return res;
261    }
262    private void CalculateApproximateSimilarities(T[] data, double perplexity, out int[] rowP, out int[] colP, out double[] valP) {
263      // Compute asymmetric pairwise input similarities
264      ComputeGaussianPerplexity(data, data.Length, out rowP, out colP, out valP, perplexity, (int)(3 * perplexity));
265      // Symmetrize input similarities
266      int[] sRowP, symColP;
267      double[] sValP;
268      SymmetrizeMatrix(rowP, colP, valP, out sRowP, out symColP, out sValP);
269      rowP = sRowP;
270      colP = symColP;
271      valP = sValP;
272      var sumP = .0;
273      for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i];
274      for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP;
275    }
276    private double[,] CalculateExactSimilarites(T[] data, double perplexity) {
277      // Compute similarities
278      var p = new double[data.Length, data.Length];
279      ComputeGaussianPerplexity(data, data.Length, p, perplexity);
280      // Symmetrize input similarities
281      for (var n = 0; n < data.Length; n++) {
282        for (var m = n + 1; m < data.Length; m++) {
283          p[n, m] += p[m, n];
284          p[m, n] = p[n, m];
285        }
286      }
287      var sumP = .0;
288      for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j];
289      for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP;
290      return p;
291    }
292
[14414]293    private void ComputeGaussianPerplexity(IReadOnlyList<T> x, int n, out int[] rowP, out int[] colP, out double[] valP, double perplexity, int k) {
294      if (perplexity > k) throw new ArgumentException("Perplexity should be lower than K!");
295
296      // Allocate the memory we need
297      rowP = new int[n + 1];
298      colP = new int[n * k];
299      valP = new double[n * k];
300      var curP = new double[n - 1];
301      rowP[0] = 0;
302      for (var i = 0; i < n; i++) rowP[i + 1] = rowP[i] + k;
303
[14785]304      var objX = new List<IndexedItem<T>>();
305      for (var i = 0; i < n; i++) objX.Add(new IndexedItem<T>(i, x[i]));
306
[14414]307      // Build ball tree on data set
[14785]308      var tree = new VantagePointTree<IndexedItem<T>>(new IndexedItemDistance<T>(distance), objX);           // do we really want to re-create the tree on each call?
[14414]309
310      // Loop over all points to find nearest neighbors
311      for (var i = 0; i < n; i++) {
[14785]312        IList<IndexedItem<T>> indices;
313        IList<double> distances;
[14414]314
315        // Find nearest neighbors
316        tree.Search(objX[i], k + 1, out indices, out distances);
317
318        // Initialize some variables for binary search
319        var found = false;
320        var beta = 1.0;
[14785]321        var minBeta = double.MinValue;
[14414]322        var maxBeta = double.MaxValue;
[14785]323        const double tol = 1e-5;  // TODO: why 1e-5?
[14414]324
325        // Iterate until we found a good perplexity
326        var iter = 0; double sumP = 0;
327        while (!found && iter < 200) {
328
329          // Compute Gaussian kernel row
330          for (var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]);
331
332          // Compute entropy of current row
333          sumP = double.Epsilon;
334          for (var m = 0; m < k; m++) sumP += curP[m];
335          var h = .0;
336          for (var m = 0; m < k; m++) h += beta * (distances[m + 1] * curP[m]);
337          h = h / sumP + Math.Log(sumP);
338
339          // Evaluate whether the entropy is within the tolerance level
340          var hdiff = h - Math.Log(perplexity);
341          if (hdiff < tol && -hdiff < tol) {
342            found = true;
343          } else {
344            if (hdiff > 0) {
345              minBeta = beta;
346              if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue))
347                beta *= 2.0;
348              else
349                beta = (beta + maxBeta) / 2.0;
350            } else {
351              maxBeta = beta;
352              if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))
353                beta /= 2.0;
354              else
355                beta = (beta + minBeta) / 2.0;
356            }
357          }
358
359          // Update iteration counter
360          iter++;
361        }
362
363        // Row-normalize current row of P and store in matrix
364        for (var m = 0; m < k; m++) curP[m] /= sumP;
365        for (var m = 0; m < k; m++) {
366          colP[rowP[i] + m] = indices[m + 1].Index;
367          valP[rowP[i] + m] = curP[m];
368        }
369      }
370    }
371    private void ComputeGaussianPerplexity(T[] x, int n, double[,] p, double perplexity) {
[14785]372      // Compute the distance matrix
[14414]373      var dd = ComputeDistances(x);
[14785]374
[14414]375      // Compute the Gaussian kernel row by row
376      for (var i = 0; i < n; i++) {
377        // Initialize some variables
378        var found = false;
379        var beta = 1.0;
380        var minBeta = -double.MaxValue;
381        var maxBeta = double.MaxValue;
382        const double tol = 1e-5;
383        double sumP = 0;
384
385        // Iterate until we found a good perplexity
386        var iter = 0;
[14785]387        while (!found && iter < 200) {       // TODO constant
[14414]388
389          // Compute Gaussian kernel row
390          for (var m = 0; m < n; m++) p[i, m] = Math.Exp(-beta * dd[i][m]);
391          p[i, i] = double.Epsilon;
392
393          // Compute entropy of current row
394          sumP = double.Epsilon;
395          for (var m = 0; m < n; m++) sumP += p[i, m];
396          var h = 0.0;
397          for (var m = 0; m < n; m++) h += beta * (dd[i][m] * p[i, m]);
398          h = h / sumP + Math.Log(sumP);
399
400          // Evaluate whether the entropy is within the tolerance level
401          var hdiff = h - Math.Log(perplexity);
402          if (hdiff < tol && -hdiff < tol) {
403            found = true;
404          } else {
405            if (hdiff > 0) {
406              minBeta = beta;
407              if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue))
408                beta *= 2.0;
409              else
410                beta = (beta + maxBeta) / 2.0;
411            } else {
412              maxBeta = beta;
413              if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))
414                beta /= 2.0;
415              else
416                beta = (beta + minBeta) / 2.0;
417            }
418          }
419
420          // Update iteration counter
421          iter++;
422        }
423
424        // Row normalize P
425        for (var m = 0; m < n; m++) p[i, m] /= sumP;
426      }
427    }
[14785]428
[14414]429    private double[][] ComputeDistances(T[] x) {
430      return x.Select(m => x.Select(n => distance.Get(m, n)).ToArray()).ToArray();
431    }
[14785]432
[14414]433    private static void ComputeExactGradient(double[,] p, double[,] y, int n, int d, double[,] dC) {
434
435      // Make sure the current gradient contains zeros
436      for (var i = 0; i < n; i++) for (var j = 0; j < d; j++) dC[i, j] = 0.0;
437
438      // Compute the squared Euclidean distance matrix
439      var dd = new double[n, n];
440      ComputeSquaredEuclideanDistance(y, n, d, dd);
441
442      // Compute Q-matrix and normalization sum
443      var q = new double[n, n];
444      var sumQ = .0;
445      for (var n1 = 0; n1 < n; n1++) {
446        for (var m = 0; m < n; m++) {
447          if (n1 == m) continue;
448          q[n1, m] = 1 / (1 + dd[n1, m]);
449          sumQ += q[n1, m];
450        }
451      }
452
453      // Perform the computation of the gradient
454      for (var n1 = 0; n1 < n; n1++) {
455        for (var m = 0; m < n; m++) {
456          if (n1 == m) continue;
457          var mult = (p[n1, m] - q[n1, m] / sumQ) * q[n1, m];
458          for (var d1 = 0; d1 < d; d1++) {
459            dC[n1, d1] += (y[n1, d1] - y[m, d1]) * mult;
460          }
461        }
462      }
463    }
464    private static void ComputeSquaredEuclideanDistance(double[,] x, int n, int d, double[,] dd) {
465      var dataSums = new double[n];
466      for (var i = 0; i < n; i++) {
467        for (var j = 0; j < d; j++) {
468          dataSums[i] += x[i, j] * x[i, j];
469        }
470      }
471      for (var i = 0; i < n; i++) {
472        for (var m = 0; m < n; m++) {
473          dd[i, m] = dataSums[i] + dataSums[m];
474        }
475      }
476      for (var i = 0; i < n; i++) {
477        dd[i, i] = 0.0;
478        for (var m = i + 1; m < n; m++) {
479          dd[i, m] = 0.0;
480          for (var j = 0; j < d; j++) {
481            dd[i, m] += (x[i, j] - x[m, j]) * (x[i, j] - x[m, j]);
482          }
483          dd[m, i] = dd[i, m];
484        }
485      }
486    }
[14785]487    private static void ComputeApproximateGradient(int[] rowP, int[] colP, double[] valP, double[,] y, int n, int d, double[,] dC, double theta) {
488      var tree = new SpacePartitioningTree(y);
[14414]489      double[] sumQ = { 0 };
490      var posF = new double[n, d];
491      var negF = new double[n, d];
492      tree.ComputeEdgeForces(rowP, colP, valP, n, posF);
493      var row = new double[d];
[14742]494      for (var n1 = 0; n1 < n; n1++) {
[14414]495        Buffer.BlockCopy(negF, (sizeof(double) * n1 * d), row, 0, d);
496        tree.ComputeNonEdgeForces(n1, theta, row, sumQ);
497      }
498
499      // Compute final t-SNE gradient
500      for (var i = 0; i < n; i++)
501        for (var j = 0; j < d; j++) {
[14785]502          dC[i, j] = (posF[i, j] - negF[i, j]) / sumQ[0]; // TODO: check parenthesis
[14414]503        }
504    }
[14785]505
506    private static double EvaluateErrorExact(double[,] p, double[,] y, int n, int d) {
[14414]507      // Compute the squared Euclidean distance matrix
508      var dd = new double[n, n];
509      var q = new double[n, n];
510      ComputeSquaredEuclideanDistance(y, n, d, dd);
511
512      // Compute Q-matrix and normalization sum
513      var sumQ = double.Epsilon;
514      for (var n1 = 0; n1 < n; n1++) {
515        for (var m = 0; m < n; m++) {
516          if (n1 != m) {
517            q[n1, m] = 1 / (1 + dd[n1, m]);
518            sumQ += q[n1, m];
519          } else q[n1, m] = double.Epsilon;
520        }
521      }
522      for (var i = 0; i < n; i++) for (var j = 0; j < n; j++) q[i, j] /= sumQ;
523
524      // Sum t-SNE error
525      var c = .0;
526      for (var i = 0; i < n; i++)
527        for (var j = 0; j < n; j++) {
528          c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon));
529        }
530      return c;
531    }
[14785]532    private static double EvaluateErrorApproximate(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) {
[14414]533      // Get estimate of normalization term
534      var n = y.GetLength(0);
535      var d = y.GetLength(1);
[14785]536      var tree = new SpacePartitioningTree(y);
[14414]537      var buff = new double[d];
538      double[] sumQ = { 0 };
539      for (var i = 0; i < n; i++) tree.ComputeNonEdgeForces(i, theta, buff, sumQ);
540
541      // Loop over all edges to compute t-SNE error
542      var c = .0;
543      for (var k = 0; k < n; k++) {
544        for (var i = rowP[k]; i < rowP[k + 1]; i++) {
545          var q = .0;
546          for (var j = 0; j < d; j++) buff[j] = y[k, j];
547          for (var j = 0; j < d; j++) buff[j] -= y[colP[i], j];
548          for (var j = 0; j < d; j++) q += buff[j] * buff[j];
549          q = 1.0 / (1.0 + q) / sumQ[0];
550          c += valP[i] * Math.Log((valP[i] + float.Epsilon) / (q + float.Epsilon));
551        }
552      }
553      return c;
554    }
555    private static void SymmetrizeMatrix(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, out int[] symRowP, out int[] symColP, out double[] symValP) {
556
557      // Count number of elements and row counts of symmetric matrix
558      var n = rowP.Count - 1;
559      var rowCounts = new int[n];
560      for (var j = 0; j < n; j++) {
561        for (var i = rowP[j]; i < rowP[j + 1]; i++) {
562
563          // Check whether element (col_P[i], n) is present
564          var present = false;
565          for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {
566            if (colP[m] == j) present = true;
567          }
568          if (present) rowCounts[j]++;
569          else {
570            rowCounts[j]++;
571            rowCounts[colP[i]]++;
572          }
573        }
574      }
575      var noElem = 0;
576      for (var i = 0; i < n; i++) noElem += rowCounts[i];
577
578      // Allocate memory for symmetrized matrix
579      symRowP = new int[n + 1];
580      symColP = new int[noElem];
581      symValP = new double[noElem];
582
583      // Construct new row indices for symmetric matrix
584      symRowP[0] = 0;
585      for (var i = 0; i < n; i++) symRowP[i + 1] = symRowP[i] + rowCounts[i];
586
587      // Fill the result matrix
588      var offset = new int[n];
589      for (var j = 0; j < n; j++) {
590        for (var i = rowP[j]; i < rowP[j + 1]; i++) {                                  // considering element(n, colP[i])
591
592          // Check whether element (col_P[i], n) is present
593          var present = false;
594          for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {
595            if (colP[m] != j) continue;
596            present = true;
597            if (j > colP[i]) continue; // make sure we do not add elements twice
598            symColP[symRowP[j] + offset[j]] = colP[i];
599            symColP[symRowP[colP[i]] + offset[colP[i]]] = j;
600            symValP[symRowP[j] + offset[j]] = valP[i] + valP[m];
601            symValP[symRowP[colP[i]] + offset[colP[i]]] = valP[i] + valP[m];
602          }
603
604          // If (colP[i], n) is not present, there is no addition involved
605          if (!present) {
606            symColP[symRowP[j] + offset[j]] = colP[i];
607            symColP[symRowP[colP[i]] + offset[colP[i]]] = j;
608            symValP[symRowP[j] + offset[j]] = valP[i];
609            symValP[symRowP[colP[i]] + offset[colP[i]]] = valP[i];
610          }
611
612          // Update offsets
613          if (present && (j > colP[i])) continue;
614          offset[j]++;
615          if (colP[i] != j) offset[colP[i]]++;
616        }
617      }
618
619      // Divide the result by two
620      for (var i = 0; i < noElem; i++) symValP[i] /= 2.0;
621    }
622    private static void ZeroMean(double[,] x) {
623      // Compute data mean
624      var n = x.GetLength(0);
625      var d = x.GetLength(1);
626      var mean = new double[d];
627      for (var i = 0; i < n; i++) {
628        for (var j = 0; j < d; j++) {
629          mean[j] += x[i, j];
630        }
631      }
632      for (var i = 0; i < d; i++) {
633        mean[i] /= n;
634      }
635      // Subtract data mean
636      for (var i = 0; i < n; i++) {
637        for (var j = 0; j < d; j++) {
638          x[i, j] -= mean[j];
639        }
640      }
641    }
642    #endregion
643  }
644}
Note: See TracBrowser for help on using the repository browser.