#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ //Code is based on an implementation from Laurens van der Maaten /* * * Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Delft University of Technology. * 4. Neither the name of the Delft University of Technology nor the names of * its contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Analysis; using HeuristicLab.Collections; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Optimization; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Random; namespace HeuristicLab.Algorithms.DataAnalysis { [StorableClass] public class TSNE : DeepCloneable /*where T : class, IDeepCloneable*/ { private const string IterationResultName = "Iteration"; private const string ErrorResultName = "Error"; private const string ErrorPlotResultName = "ErrorPlot"; private const string ScatterPlotResultName = "Scatterplot"; private const string DataResultName = "Projected Data"; #region Properties [Storable] private IDistance distance; [Storable] private int maxIter; [Storable] private int stopLyingIter; [Storable] private int momSwitchIter; [Storable] double momentum; [Storable] private double finalMomentum; [Storable] private double eta; [Storable] private IRandom random; [Storable] private ResultCollection results; [Storable] private Dictionary> dataRowLookup; [Storable] private Dictionary dataRows; #endregion #region Stopping public volatile bool Running; // TODO #endregion #region HLConstructors & Cloning [StorableConstructor] protected TSNE(bool deserializing) { } protected TSNE(TSNE original, Cloner cloner) : base(original, cloner) { distance = cloner.Clone(original.distance); maxIter = original.maxIter; stopLyingIter = original.stopLyingIter; momSwitchIter = original.momSwitchIter; momentum = original.momentum; finalMomentum = original.finalMomentum; eta = original.eta; random = cloner.Clone(random); results = cloner.Clone(results); dataRowLookup = original.dataRowLookup.ToDictionary(entry => entry.Key, entry => entry.Value.Select(x => x).ToList()); dataRows = original.dataRows.ToDictionary(entry => entry.Key, entry => cloner.Clone(entry.Value)); } public override IDeepCloneable Clone(Cloner cloner) { return new TSNE(this, cloner); } public TSNE(IDistance distance, IRandom random, ResultCollection results = null, int maxIter = 1000, int stopLyingIter = 250, int momSwitchIter = 250, double momentum = .5, double finalMomentum = .8, double eta = 200.0, Dictionary> dataRowLookup = null, Dictionary dataRows = null) { this.distance = distance; this.maxIter = maxIter; this.stopLyingIter = stopLyingIter; this.momSwitchIter = momSwitchIter; this.momentum = momentum; this.finalMomentum = finalMomentum; this.eta = eta; this.random = random; this.results = results; this.dataRowLookup = dataRowLookup; if (dataRows != null) this.dataRows = dataRows; else { this.dataRows = new Dictionary(); } } #endregion public double[,] Run(T[] data, int newDimensions, double perplexity, double theta) { var currentMomentum = momentum; var noDatapoints = data.Length; if (noDatapoints - 1 < 3 * perplexity) throw new ArgumentException("Perplexity too large for the number of data points!"); SetUpResults(data); Running = true; var exact = Math.Abs(theta) < double.Epsilon; var newData = new double[noDatapoints, newDimensions]; var dY = new double[noDatapoints, newDimensions]; var uY = new double[noDatapoints, newDimensions]; var gains = new double[noDatapoints, newDimensions]; for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) gains[i, j] = 1.0; double[,] p = null; int[] rowP = null; int[] colP = null; double[] valP = null; var rand = new NormalDistributedRandom(random, 0, 1); //Calculate Similarities if (exact) p = CalculateExactSimilarites(data, perplexity); else CalculateApproximateSimilarities(data, perplexity, out rowP, out colP, out valP); // Lie about the P-values if (exact) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] *= 12.0; else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] *= 12.0; // Initialize solution (randomly) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) newData[i, j] = rand.NextDouble() * .0001; // TODO const // Perform main training loop for (var iter = 0; iter < maxIter && Running; iter++) { if (exact) ComputeExactGradient(p, newData, noDatapoints, newDimensions, dY); else ComputeApproximateGradient(rowP, colP, valP, newData, noDatapoints, newDimensions, dY, theta); // Update gains for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) gains[i, j] = Math.Sign(dY[i, j]) != Math.Sign(uY[i, j]) ? gains[i, j] + .2 : gains[i, j] * .8; for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) if (gains[i, j] < .01) gains[i, j] = .01; // Perform gradient update (with momentum and gains) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) uY[i, j] = currentMomentum * uY[i, j] - eta * gains[i, j] * dY[i, j]; for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) newData[i, j] = newData[i, j] + uY[i, j]; // Make solution zero-mean ZeroMean(newData); // Stop lying about the P-values after a while, and switch momentum if (iter == stopLyingIter) { if (exact) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] /= 12.0; else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] /= 12.0; } if (iter == momSwitchIter) currentMomentum = finalMomentum; Analyze(exact, iter, p, rowP, colP, valP, newData, noDatapoints, newDimensions, theta); } return newData; } #region helpers private void SetUpResults(IReadOnlyCollection data) { if (dataRowLookup == null) dataRowLookup = new Dictionary> { { "Data", Enumerable.Range(0, data.Count).ToList() } }; if (results == null) return; if (!results.ContainsKey(IterationResultName)) results.Add(new Result(IterationResultName, new IntValue(0))); else ((IntValue)results[IterationResultName].Value).Value = 0; if (!results.ContainsKey(ErrorResultName)) results.Add(new Result(ErrorResultName, new DoubleValue(0))); else ((DoubleValue)results[ErrorResultName].Value).Value = 0; if (!results.ContainsKey(ErrorPlotResultName)) results.Add(new Result(ErrorPlotResultName, new DataTable(ErrorPlotResultName, "Development of errors during gradient descent"))); else results[ErrorPlotResultName].Value = new DataTable(ErrorPlotResultName, "Development of errors during gradient descent"); var plot = results[ErrorPlotResultName].Value as DataTable; if (plot == null) throw new ArgumentException("could not create/access error data table in results collection"); if (!plot.Rows.ContainsKey("errors")) plot.Rows.Add(new DataRow("errors")); plot.Rows["errors"].Values.Clear(); results.Add(new Result(ScatterPlotResultName, "Plot of the projected data", new ScatterPlot(DataResultName, ""))); results.Add(new Result(DataResultName, "Projected Data", new DoubleMatrix())); } private void Analyze(bool exact, int iter, double[,] p, int[] rowP, int[] colP, double[] valP, double[,] newData, int noDatapoints, int newDimensions, double theta) { if (results == null) return; var plot = results[ErrorPlotResultName].Value as DataTable; if (plot == null) throw new ArgumentException("Could not create/access error data table in results collection. Was it removed by some effect?"); var errors = plot.Rows["errors"].Values; var c = exact ? EvaluateErrorExact(p, newData, noDatapoints, newDimensions) : EvaluateErrorApproximate(rowP, colP, valP, newData, theta); errors.Add(c); ((IntValue)results[IterationResultName].Value).Value = iter + 1; ((DoubleValue)results[ErrorResultName].Value).Value = errors.Last(); var ndata = Normalize(newData); results[DataResultName].Value = new DoubleMatrix(ndata); var splot = results[ScatterPlotResultName].Value as ScatterPlot; FillScatterPlot(ndata, splot); } private void FillScatterPlot(double[,] lowDimData, ScatterPlot plot) { foreach (var rowName in dataRowLookup.Keys) { if (!plot.Rows.ContainsKey(rowName)) plot.Rows.Add(dataRows.ContainsKey(rowName) ? dataRows[rowName] : new ScatterPlotDataRow(rowName, "", new List>())); plot.Rows[rowName].Points.Replace(dataRowLookup[rowName].Select(i => new Point2D(lowDimData[i, 0], lowDimData[i, 1]))); } } private static double[,] Normalize(double[,] data) { var max = new double[data.GetLength(1)]; var min = new double[data.GetLength(1)]; var res = new double[data.GetLength(0), data.GetLength(1)]; for (var i = 0; i < max.Length; i++) max[i] = min[i] = data[0, i]; for (var i = 0; i < data.GetLength(0); i++) for (var j = 0; j < data.GetLength(1); j++) { var v = data[i, j]; max[j] = Math.Max(max[j], v); min[j] = Math.Min(min[j], v); } for (var i = 0; i < data.GetLength(0); i++) { for (var j = 0; j < data.GetLength(1); j++) { res[i, j] = (data[i, j] - (max[j] + min[j]) / 2) / (max[j] - min[j]); } } return res; } private void CalculateApproximateSimilarities(T[] data, double perplexity, out int[] rowP, out int[] colP, out double[] valP) { // Compute asymmetric pairwise input similarities ComputeGaussianPerplexity(data, data.Length, out rowP, out colP, out valP, perplexity, (int)(3 * perplexity)); // Symmetrize input similarities int[] sRowP, symColP; double[] sValP; SymmetrizeMatrix(rowP, colP, valP, out sRowP, out symColP, out sValP); rowP = sRowP; colP = symColP; valP = sValP; var sumP = .0; for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i]; for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP; } private double[,] CalculateExactSimilarites(T[] data, double perplexity) { // Compute similarities var p = new double[data.Length, data.Length]; ComputeGaussianPerplexity(data, data.Length, p, perplexity); // Symmetrize input similarities for (var n = 0; n < data.Length; n++) { for (var m = n + 1; m < data.Length; m++) { p[n, m] += p[m, n]; p[m, n] = p[n, m]; } } var sumP = .0; for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j]; for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP; return p; } private void ComputeGaussianPerplexity(IReadOnlyList x, int n, out int[] rowP, out int[] colP, out double[] valP, double perplexity, int k) { if (perplexity > k) throw new ArgumentException("Perplexity should be lower than K!"); // Allocate the memory we need rowP = new int[n + 1]; colP = new int[n * k]; valP = new double[n * k]; var curP = new double[n - 1]; rowP[0] = 0; for (var i = 0; i < n; i++) rowP[i + 1] = rowP[i] + k; var objX = new List>(); for (var i = 0; i < n; i++) objX.Add(new IndexedItem(i, x[i])); // Build ball tree on data set var tree = new VantagePointTree>(new IndexedItemDistance(distance), objX); // do we really want to re-create the tree on each call? // Loop over all points to find nearest neighbors for (var i = 0; i < n; i++) { IList> indices; IList distances; // Find nearest neighbors tree.Search(objX[i], k + 1, out indices, out distances); // Initialize some variables for binary search var found = false; var beta = 1.0; var minBeta = double.MinValue; var maxBeta = double.MaxValue; const double tol = 1e-5; // TODO: why 1e-5? // Iterate until we found a good perplexity var iter = 0; double sumP = 0; while (!found && iter < 200) { // Compute Gaussian kernel row for (var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]); // Compute entropy of current row sumP = double.Epsilon; for (var m = 0; m < k; m++) sumP += curP[m]; var h = .0; for (var m = 0; m < k; m++) h += beta * (distances[m + 1] * curP[m]); h = h / sumP + Math.Log(sumP); // Evaluate whether the entropy is within the tolerance level var hdiff = h - Math.Log(perplexity); if (hdiff < tol && -hdiff < tol) { found = true; } else { if (hdiff > 0) { minBeta = beta; if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue)) beta *= 2.0; else beta = (beta + maxBeta) / 2.0; } else { maxBeta = beta; if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue)) beta /= 2.0; else beta = (beta + minBeta) / 2.0; } } // Update iteration counter iter++; } // Row-normalize current row of P and store in matrix for (var m = 0; m < k; m++) curP[m] /= sumP; for (var m = 0; m < k; m++) { colP[rowP[i] + m] = indices[m + 1].Index; valP[rowP[i] + m] = curP[m]; } } } private void ComputeGaussianPerplexity(T[] x, int n, double[,] p, double perplexity) { // Compute the distance matrix var dd = ComputeDistances(x); // Compute the Gaussian kernel row by row for (var i = 0; i < n; i++) { // Initialize some variables var found = false; var beta = 1.0; var minBeta = -double.MaxValue; var maxBeta = double.MaxValue; const double tol = 1e-5; double sumP = 0; // Iterate until we found a good perplexity var iter = 0; while (!found && iter < 200) { // TODO constant // Compute Gaussian kernel row for (var m = 0; m < n; m++) p[i, m] = Math.Exp(-beta * dd[i][m]); p[i, i] = double.Epsilon; // Compute entropy of current row sumP = double.Epsilon; for (var m = 0; m < n; m++) sumP += p[i, m]; var h = 0.0; for (var m = 0; m < n; m++) h += beta * (dd[i][m] * p[i, m]); h = h / sumP + Math.Log(sumP); // Evaluate whether the entropy is within the tolerance level var hdiff = h - Math.Log(perplexity); if (hdiff < tol && -hdiff < tol) { found = true; } else { if (hdiff > 0) { minBeta = beta; if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue)) beta *= 2.0; else beta = (beta + maxBeta) / 2.0; } else { maxBeta = beta; if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue)) beta /= 2.0; else beta = (beta + minBeta) / 2.0; } } // Update iteration counter iter++; } // Row normalize P for (var m = 0; m < n; m++) p[i, m] /= sumP; } } private double[][] ComputeDistances(T[] x) { return x.Select(m => x.Select(n => distance.Get(m, n)).ToArray()).ToArray(); } private static void ComputeExactGradient(double[,] p, double[,] y, int n, int d, double[,] dC) { // Make sure the current gradient contains zeros for (var i = 0; i < n; i++) for (var j = 0; j < d; j++) dC[i, j] = 0.0; // Compute the squared Euclidean distance matrix var dd = new double[n, n]; ComputeSquaredEuclideanDistance(y, n, d, dd); // Compute Q-matrix and normalization sum var q = new double[n, n]; var sumQ = .0; for (var n1 = 0; n1 < n; n1++) { for (var m = 0; m < n; m++) { if (n1 == m) continue; q[n1, m] = 1 / (1 + dd[n1, m]); sumQ += q[n1, m]; } } // Perform the computation of the gradient for (var n1 = 0; n1 < n; n1++) { for (var m = 0; m < n; m++) { if (n1 == m) continue; var mult = (p[n1, m] - q[n1, m] / sumQ) * q[n1, m]; for (var d1 = 0; d1 < d; d1++) { dC[n1, d1] += (y[n1, d1] - y[m, d1]) * mult; } } } } private static void ComputeSquaredEuclideanDistance(double[,] x, int n, int d, double[,] dd) { var dataSums = new double[n]; for (var i = 0; i < n; i++) { for (var j = 0; j < d; j++) { dataSums[i] += x[i, j] * x[i, j]; } } for (var i = 0; i < n; i++) { for (var m = 0; m < n; m++) { dd[i, m] = dataSums[i] + dataSums[m]; } } for (var i = 0; i < n; i++) { dd[i, i] = 0.0; for (var m = i + 1; m < n; m++) { dd[i, m] = 0.0; for (var j = 0; j < d; j++) { dd[i, m] += (x[i, j] - x[m, j]) * (x[i, j] - x[m, j]); } dd[m, i] = dd[i, m]; } } } private static void ComputeApproximateGradient(int[] rowP, int[] colP, double[] valP, double[,] y, int n, int d, double[,] dC, double theta) { var tree = new SpacePartitioningTree(y); double[] sumQ = { 0 }; var posF = new double[n, d]; var negF = new double[n, d]; tree.ComputeEdgeForces(rowP, colP, valP, n, posF); var row = new double[d]; for (var n1 = 0; n1 < n; n1++) { Buffer.BlockCopy(negF, (sizeof(double) * n1 * d), row, 0, d); tree.ComputeNonEdgeForces(n1, theta, row, sumQ); } // Compute final t-SNE gradient for (var i = 0; i < n; i++) for (var j = 0; j < d; j++) { dC[i, j] = (posF[i, j] - negF[i, j]) / sumQ[0]; // TODO: check parenthesis } } private static double EvaluateErrorExact(double[,] p, double[,] y, int n, int d) { // Compute the squared Euclidean distance matrix var dd = new double[n, n]; var q = new double[n, n]; ComputeSquaredEuclideanDistance(y, n, d, dd); // Compute Q-matrix and normalization sum var sumQ = double.Epsilon; for (var n1 = 0; n1 < n; n1++) { for (var m = 0; m < n; m++) { if (n1 != m) { q[n1, m] = 1 / (1 + dd[n1, m]); sumQ += q[n1, m]; } else q[n1, m] = double.Epsilon; } } for (var i = 0; i < n; i++) for (var j = 0; j < n; j++) q[i, j] /= sumQ; // Sum t-SNE error var c = .0; for (var i = 0; i < n; i++) for (var j = 0; j < n; j++) { c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon)); } return c; } private static double EvaluateErrorApproximate(IReadOnlyList rowP, IReadOnlyList colP, IReadOnlyList valP, double[,] y, double theta) { // Get estimate of normalization term var n = y.GetLength(0); var d = y.GetLength(1); var tree = new SpacePartitioningTree(y); var buff = new double[d]; double[] sumQ = { 0 }; for (var i = 0; i < n; i++) tree.ComputeNonEdgeForces(i, theta, buff, sumQ); // Loop over all edges to compute t-SNE error var c = .0; for (var k = 0; k < n; k++) { for (var i = rowP[k]; i < rowP[k + 1]; i++) { var q = .0; for (var j = 0; j < d; j++) buff[j] = y[k, j]; for (var j = 0; j < d; j++) buff[j] -= y[colP[i], j]; for (var j = 0; j < d; j++) q += buff[j] * buff[j]; q = 1.0 / (1.0 + q) / sumQ[0]; c += valP[i] * Math.Log((valP[i] + float.Epsilon) / (q + float.Epsilon)); } } return c; } private static void SymmetrizeMatrix(IReadOnlyList rowP, IReadOnlyList colP, IReadOnlyList valP, out int[] symRowP, out int[] symColP, out double[] symValP) { // Count number of elements and row counts of symmetric matrix var n = rowP.Count - 1; var rowCounts = new int[n]; for (var j = 0; j < n; j++) { for (var i = rowP[j]; i < rowP[j + 1]; i++) { // Check whether element (col_P[i], n) is present var present = false; for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) { if (colP[m] == j) present = true; } if (present) rowCounts[j]++; else { rowCounts[j]++; rowCounts[colP[i]]++; } } } var noElem = 0; for (var i = 0; i < n; i++) noElem += rowCounts[i]; // Allocate memory for symmetrized matrix symRowP = new int[n + 1]; symColP = new int[noElem]; symValP = new double[noElem]; // Construct new row indices for symmetric matrix symRowP[0] = 0; for (var i = 0; i < n; i++) symRowP[i + 1] = symRowP[i] + rowCounts[i]; // Fill the result matrix var offset = new int[n]; for (var j = 0; j < n; j++) { for (var i = rowP[j]; i < rowP[j + 1]; i++) { // considering element(n, colP[i]) // Check whether element (col_P[i], n) is present var present = false; for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) { if (colP[m] != j) continue; present = true; if (j > colP[i]) continue; // make sure we do not add elements twice symColP[symRowP[j] + offset[j]] = colP[i]; symColP[symRowP[colP[i]] + offset[colP[i]]] = j; symValP[symRowP[j] + offset[j]] = valP[i] + valP[m]; symValP[symRowP[colP[i]] + offset[colP[i]]] = valP[i] + valP[m]; } // If (colP[i], n) is not present, there is no addition involved if (!present) { symColP[symRowP[j] + offset[j]] = colP[i]; symColP[symRowP[colP[i]] + offset[colP[i]]] = j; symValP[symRowP[j] + offset[j]] = valP[i]; symValP[symRowP[colP[i]] + offset[colP[i]]] = valP[i]; } // Update offsets if (present && (j > colP[i])) continue; offset[j]++; if (colP[i] != j) offset[colP[i]]++; } } // Divide the result by two for (var i = 0; i < noElem; i++) symValP[i] /= 2.0; } private static void ZeroMean(double[,] x) { // Compute data mean var n = x.GetLength(0); var d = x.GetLength(1); var mean = new double[d]; for (var i = 0; i < n; i++) { for (var j = 0; j < d; j++) { mean[j] += x[i, j]; } } for (var i = 0; i < d; i++) { mean[i] /= n; } // Subtract data mean for (var i = 0; i < n; i++) { for (var j = 0; j < d; j++) { x[i, j] -= mean[j]; } } } #endregion } }