Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
11/20/18 13:52:40 (6 years ago)
Author:
pfleck
Message:

#2845 reverted the last merge (r16307) because some revisions were missing

Location:
branches/2845_EnhancedProgress
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/2845_EnhancedProgress

  • branches/2845_EnhancedProgress/HeuristicLab.Algorithms.DataAnalysis

  • branches/2845_EnhancedProgress/HeuristicLab.Algorithms.DataAnalysis/3.4

    • Property svn:mergeinfo deleted
  • branches/2845_EnhancedProgress/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEStatic.cs

    r16307 r16308  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    6565  [StorableClass]
    6666  public class TSNEStatic<T> {
     67
    6768    [StorableClass]
    6869    public sealed class TSNEState : DeepCloneable {
     
    169170      [StorableConstructor]
    170171      public TSNEState(bool deserializing) { }
    171 
    172       public TSNEState(IReadOnlyList<T> data, IDistance<T> distance, IRandom random, int newDimensions, double perplexity,
    173         double theta, int stopLyingIter, int momSwitchIter, double momentum, double finalMomentum, double eta, bool randomInit) {
     172      public TSNEState(T[] data, IDistance<T> distance, IRandom random, int newDimensions, double perplexity, double theta, int stopLyingIter, int momSwitchIter, double momentum, double finalMomentum, double eta) {
    174173        this.distance = distance;
    175174        this.random = random;
     
    184183
    185184        // initialize
    186         noDatapoints = data.Count;
     185        noDatapoints = data.Length;
    187186        if (noDatapoints - 1 < 3 * perplexity)
    188187          throw new ArgumentException("Perplexity too large for the number of data points!");
     
    194193        gains = new double[noDatapoints, newDimensions];
    195194        for (var i = 0; i < noDatapoints; i++)
    196         for (var j = 0; j < newDimensions; j++)
    197           gains[i, j] = 1.0;
     195          for (var j = 0; j < newDimensions; j++)
     196            gains[i, j] = 1.0;
    198197
    199198        p = null;
     
    213212        var rand = new NormalDistributedRandom(random, 0, 1);
    214213        for (var i = 0; i < noDatapoints; i++)
    215         for (var j = 0; j < newDimensions; j++)
    216           newData[i, j] = rand.NextDouble() * .0001;
    217 
    218         if (!(data[0] is IReadOnlyList<double>) || randomInit) return;
    219         for (var i = 0; i < noDatapoints; i++)
    220         for (var j = 0; j < newDimensions; j++) {
    221           var row = (IReadOnlyList<double>) data[i];
    222           newData[i, j] = row[j % row.Count];
    223         }
     214          for (var j = 0; j < newDimensions; j++)
     215            newData[i, j] = rand.NextDouble() * .0001;
    224216      }
    225217      #endregion
    226218
    227219      public double EvaluateError() {
    228         return exact ? EvaluateErrorExact(p, newData, noDatapoints, newDimensions) : EvaluateErrorApproximate(rowP, colP, valP, newData, theta);
     220        return exact ?
     221          EvaluateErrorExact(p, newData, noDatapoints, newDimensions) :
     222          EvaluateErrorApproximate(rowP, colP, valP, newData, theta);
    229223      }
    230224
    231225      #region Helpers
    232       private static void CalculateApproximateSimilarities(IReadOnlyList<T> data, IDistance<T> distance, double perplexity, out int[] rowP, out int[] colP, out double[] valP) {
     226      private static void CalculateApproximateSimilarities(T[] data, IDistance<T> distance, double perplexity, out int[] rowP, out int[] colP, out double[] valP) {
    233227        // Compute asymmetric pairwise input similarities
    234         ComputeGaussianPerplexity(data, distance, out rowP, out colP, out valP, perplexity, (int) (3 * perplexity));
     228        ComputeGaussianPerplexity(data, distance, out rowP, out colP, out valP, perplexity, (int)(3 * perplexity));
    235229        // Symmetrize input similarities
    236230        int[] sRowP, symColP;
     
    241235        valP = sValP;
    242236        var sumP = .0;
    243         for (var i = 0; i < rowP[data.Count]; i++) sumP += valP[i];
    244         for (var i = 0; i < rowP[data.Count]; i++) valP[i] /= sumP;
    245       }
    246       private static double[,] CalculateExactSimilarites(IReadOnlyList<T> data, IDistance<T> distance, double perplexity) {
     237        for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i];
     238        for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP;
     239      }
     240
     241      private static double[,] CalculateExactSimilarites(T[] data, IDistance<T> distance, double perplexity) {
    247242        // Compute similarities
    248         var p = new double[data.Count, data.Count];
     243        var p = new double[data.Length, data.Length];
    249244        ComputeGaussianPerplexity(data, distance, p, perplexity);
    250245        // Symmetrize input similarities
    251         for (var n = 0; n < data.Count; n++) {
    252           for (var m = n + 1; m < data.Count; m++) {
     246        for (var n = 0; n < data.Length; n++) {
     247          for (var m = n + 1; m < data.Length; m++) {
    253248            p[n, m] += p[m, n];
    254249            p[m, n] = p[n, m];
     
    256251        }
    257252        var sumP = .0;
    258         for (var i = 0; i < data.Count; i++) {
    259           for (var j = 0; j < data.Count; j++) {
    260             sumP += p[i, j];
    261           }
    262         }
    263         for (var i = 0; i < data.Count; i++) {
    264           for (var j = 0; j < data.Count; j++) {
    265             p[i, j] /= sumP;
    266           }
    267         }
     253        for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j];
     254        for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP;
    268255        return p;
    269256      }
     257
    270258      private static void ComputeGaussianPerplexity(IReadOnlyList<T> x, IDistance<T> distance, out int[] rowP, out int[] colP, out double[] valP, double perplexity, int k) {
    271259        if (perplexity > k) throw new ArgumentException("Perplexity should be lower than k!");
     
    302290
    303291          // Iterate until we found a good perplexity
    304           var iter = 0;
    305           double sumP = 0;
     292          var iter = 0; double sumP = 0;
    306293          while (!found && iter < 200) {
     294
    307295            // Compute Gaussian kernel row
    308296            for (var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]);
     
    319307            if (hdiff < tol && -hdiff < tol) {
    320308              found = true;
    321             }
    322             else {
     309            } else {
    323310              if (hdiff > 0) {
    324311                minBeta = beta;
     
    327314                else
    328315                  beta = (beta + maxBeta) / 2.0;
    329               }
    330               else {
     316              } else {
    331317                maxBeta = beta;
    332318                if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))
     
    349335        }
    350336      }
    351       private static void ComputeGaussianPerplexity(IReadOnlyList<T> x, IDistance<T> distance, double[,] p, double perplexity) {
     337      private static void ComputeGaussianPerplexity(T[] x, IDistance<T> distance, double[,] p, double perplexity) {
    352338        // Compute the distance matrix
    353339        var dd = ComputeDistances(x, distance);
    354340
    355         var n = x.Count;
     341        var n = x.Length;
    356342        // Compute the Gaussian kernel row by row
    357343        for (var i = 0; i < n; i++) {
     
    366352          // Iterate until we found a good perplexity
    367353          var iter = 0;
    368           while (!found && iter < 200) { // 200 iterations as in tSNE implementation by van der Maarten
     354          while (!found && iter < 200) {      // 200 iterations as in tSNE implementation by van der Maarten
    369355
    370356            // Compute Gaussian kernel row
     
    383369            if (hdiff < tol && -hdiff < tol) {
    384370              found = true;
    385             }
    386             else {
     371            } else {
    387372              if (hdiff > 0) {
    388373                minBeta = beta;
     
    391376                else
    392377                  beta = (beta + maxBeta) / 2.0;
    393               }
    394               else {
     378              } else {
    395379                maxBeta = beta;
    396380                if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))
     
    409393        }
    410394      }
    411       private static double[][] ComputeDistances(IReadOnlyList<T> x, IDistance<T> distance) {
    412         var res = new double[x.Count][];
    413         for (var r = 0; r < x.Count; r++) {
    414           var rowV = new double[x.Count];
     395
     396      private static double[][] ComputeDistances(T[] x, IDistance<T> distance) {
     397        var res = new double[x.Length][];
     398        for (var r = 0; r < x.Length; r++) {
     399          var rowV = new double[x.Length];
    415400          // all distances must be symmetric
    416401          for (var c = 0; c < r; c++) {
     
    418403          }
    419404          rowV[r] = 0.0; // distance to self is zero for all distances
    420           for (var c = r + 1; c < x.Count; c++) {
     405          for (var c = r + 1; c < x.Length; c++) {
    421406            rowV[c] = distance.Get(x[r], x[c]);
    422407          }
     
    426411        // return x.Select(m => x.Select(n => distance.Get(m, n)).ToArray()).ToArray();
    427412      }
     413
    428414      private static double EvaluateErrorExact(double[,] p, double[,] y, int n, int d) {
    429415        // Compute the squared Euclidean distance matrix
     
    439425              q[n1, m] = 1 / (1 + dd[n1, m]);
    440426              sumQ += q[n1, m];
    441             }
    442             else q[n1, m] = double.Epsilon;
     427            } else q[n1, m] = double.Epsilon;
    443428          }
    444429        }
     
    448433        var c = .0;
    449434        for (var i = 0; i < n; i++)
    450         for (var j = 0; j < n; j++) {
    451           c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon));
    452         }
     435          for (var j = 0; j < n; j++) {
     436            c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon));
     437          }
    453438        return c;
    454439      }
     440
    455441      private static double EvaluateErrorApproximate(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) {
    456442        // Get estimate of normalization term
     
    477463      }
    478464      private static void SymmetrizeMatrix(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, out int[] symRowP, out int[] symColP, out double[] symValP) {
     465
    479466        // Count number of elements and row counts of symmetric matrix
    480467        var n = rowP.Count - 1;
     
    482469        for (var j = 0; j < n; j++) {
    483470          for (var i = rowP[j]; i < rowP[j + 1]; i++) {
     471
    484472            // Check whether element (col_P[i], n) is present
    485473            var present = false;
     
    509497        var offset = new int[n];
    510498        for (var j = 0; j < n; j++) {
    511           for (var i = rowP[j]; i < rowP[j + 1]; i++) { // considering element(n, colP[i])
     499          for (var i = rowP[j]; i < rowP[j + 1]; i++) {                                  // considering element(n, colP[i])
    512500
    513501            // Check whether element (col_P[i], n) is present
     
    561549    public static double[,] Run(T[] data, IDistance<T> distance, IRandom random,
    562550      int newDimensions = 2, double perplexity = 25, int iterations = 1000,
    563       double theta = 0, int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5,
     551      double theta = 0,
     552      int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5,
    564553      double finalMomentum = .8, double eta = 10.0
    565     ) {
     554      ) {
    566555      var state = CreateState(data, distance, random, newDimensions, perplexity,
    567556        theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta);
     
    576565      int newDimensions = 2, double perplexity = 25, double theta = 0,
    577566      int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5,
    578       double finalMomentum = .8, double eta = 10.0, bool randomInit = true
    579     ) {
    580       return new TSNEState(data, distance, random, newDimensions, perplexity, theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta, randomInit);
     567      double finalMomentum = .8, double eta = 10.0
     568      ) {
     569      return new TSNEState(data, distance, random, newDimensions, perplexity, theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta);
    581570    }
    582571
     
    591580        for (var j = 0; j < state.newDimensions; j++) {
    592581          state.gains[i, j] = Math.Sign(state.dY[i, j]) != Math.Sign(state.uY[i, j])
    593             ? state.gains[i, j] + .2 // +0.2 nd *0.8 are used in two separate implementations of tSNE -> seems to be correct
     582            ? state.gains[i, j] + .2  // +0.2 nd *0.8 are used in two separate implementations of tSNE -> seems to be correct
    594583            : state.gains[i, j] * .8;
     584
    595585          if (state.gains[i, j] < .01) state.gains[i, j] = .01;
    596586        }
    597587      }
     588
    598589
    599590      // Perform gradient update (with momentum and gains)
    600591      for (var i = 0; i < state.noDatapoints; i++)
    601       for (var j = 0; j < state.newDimensions; j++)
    602         state.uY[i, j] = state.currentMomentum * state.uY[i, j] - state.eta * state.gains[i, j] * state.dY[i, j];
     592        for (var j = 0; j < state.newDimensions; j++)
     593          state.uY[i, j] = state.currentMomentum * state.uY[i, j] - state.eta * state.gains[i, j] * state.dY[i, j];
    603594
    604595      for (var i = 0; i < state.noDatapoints; i++)
    605       for (var j = 0; j < state.newDimensions; j++)
    606         state.newData[i, j] = state.newData[i, j] + state.uY[i, j];
     596        for (var j = 0; j < state.newDimensions; j++)
     597          state.newData[i, j] = state.newData[i, j] + state.uY[i, j];
    607598
    608599      // Make solution zero-mean
     
    613604        if (state.exact)
    614605          for (var i = 0; i < state.noDatapoints; i++)
    615           for (var j = 0; j < state.noDatapoints; j++)
    616             state.p[i, j] /= 12.0;
     606            for (var j = 0; j < state.noDatapoints; j++)
     607              state.p[i, j] /= 12.0;
    617608        else
    618609          for (var i = 0; i < state.rowP[state.noDatapoints]; i++)
     
    643634      // Compute final t-SNE gradient
    644635      for (var i = 0; i < n; i++)
    645       for (var j = 0; j < d; j++) {
    646         dC[i, j] = posF[i, j] - negF[i, j] / sumQ;
    647       }
     636        for (var j = 0; j < d; j++) {
     637          dC[i, j] = posF[i, j] - negF[i, j] / sumQ;
     638        }
    648639    }
    649640
Note: See TracChangeset for help on using the changeset viewer.