Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
11/20/18 15:26:57 (5 years ago)
Author:
pfleck
Message:

#2845 Merged trunk changes into branch (15406-15681, 15683-16308)

Location:
branches/2845_EnhancedProgress
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/2845_EnhancedProgress

  • branches/2845_EnhancedProgress/HeuristicLab.Algorithms.DataAnalysis

  • branches/2845_EnhancedProgress/HeuristicLab.Algorithms.DataAnalysis/3.4

    • Property svn:mergeinfo set to (toggle deleted branches)
      /branches/2839_HiveProjectManagement/HeuristicLab.Algorithms.DataAnalysis/3.4mergedeligible
      /stable/HeuristicLab.Algorithms.DataAnalysis/3.4mergedeligible
      /trunk/HeuristicLab.Algorithms.DataAnalysis/3.4mergedeligible
      /branches/1721-RandomForestPersistence/HeuristicLab.Algorithms.DataAnalysis/3.410321-10322
      /branches/Async/HeuristicLab.Algorithms.DataAnalysis/3.413329-15286
      /branches/Benchmarking/sources/HeuristicLab.Algorithms.DataAnalysis/3.46917-7005
      /branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.49070-13099
      /branches/CloningRefactoring/HeuristicLab.Algorithms.DataAnalysis/3.44656-4721
      /branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.45471-5808
      /branches/DataAnalysis SolutionEnsembles/HeuristicLab.Algorithms.DataAnalysis/3.45815-6180
      /branches/DataAnalysis/HeuristicLab.Algorithms.DataAnalysis/3.44458-4459,​4462,​4464
      /branches/DataPreprocessing/HeuristicLab.Algorithms.DataAnalysis/3.410085-11101
      /branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.46284-6795
      /branches/GP.Symbols (TimeLag, Diff, Integral)/HeuristicLab.Algorithms.DataAnalysis/3.45060
      /branches/HeuristicLab.DatasetRefactor/sources/HeuristicLab.Algorithms.DataAnalysis/3.411570-12508
      /branches/HeuristicLab.Problems.Orienteering/HeuristicLab.Algorithms.DataAnalysis/3.411130-12721
      /branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.413819-14091
      /branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.48116-8789
      /branches/LogResidualEvaluator/HeuristicLab.Algorithms.DataAnalysis/3.410202-10483
      /branches/NET40/sources/HeuristicLab.Algorithms.DataAnalysis/3.45138-5162
      /branches/ParallelEngine/HeuristicLab.Algorithms.DataAnalysis/3.45175-5192
      /branches/ProblemInstancesRegressionAndClassification/HeuristicLab.Algorithms.DataAnalysis/3.47773-7810
      /branches/QAPAlgorithms/HeuristicLab.Algorithms.DataAnalysis/3.46350-6627
      /branches/Restructure trunk solution/HeuristicLab.Algorithms.DataAnalysis/3.46828
      /branches/SpectralKernelForGaussianProcesses/HeuristicLab.Algorithms.DataAnalysis/3.410204-10479
      /branches/SuccessProgressAnalysis/HeuristicLab.Algorithms.DataAnalysis/3.45370-5682
      /branches/Trunk/HeuristicLab.Algorithms.DataAnalysis/3.46829-6865
      /branches/VNS/HeuristicLab.Algorithms.DataAnalysis/3.45594-5752
      /branches/Weighted TSNE/3.415451-15531
      /branches/histogram/HeuristicLab.Algorithms.DataAnalysis/3.45959-6341
      /branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.414232-14825
      /trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.415406-15681
  • branches/2845_EnhancedProgress/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEStatic.cs

    r16308 r16311  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    6565  [StorableClass]
    6666  public class TSNEStatic<T> {
    67 
    6867    [StorableClass]
    6968    public sealed class TSNEState : DeepCloneable {
     
    170169      [StorableConstructor]
    171170      public TSNEState(bool deserializing) { }
    172       public TSNEState(T[] data, IDistance<T> distance, IRandom random, int newDimensions, double perplexity, double theta, int stopLyingIter, int momSwitchIter, double momentum, double finalMomentum, double eta) {
     171
     172      public TSNEState(IReadOnlyList<T> data, IDistance<T> distance, IRandom random, int newDimensions, double perplexity,
     173        double theta, int stopLyingIter, int momSwitchIter, double momentum, double finalMomentum, double eta, bool randomInit) {
    173174        this.distance = distance;
    174175        this.random = random;
     
    183184
    184185        // initialize
    185         noDatapoints = data.Length;
     186        noDatapoints = data.Count;
    186187        if (noDatapoints - 1 < 3 * perplexity)
    187188          throw new ArgumentException("Perplexity too large for the number of data points!");
     
    193194        gains = new double[noDatapoints, newDimensions];
    194195        for (var i = 0; i < noDatapoints; i++)
    195           for (var j = 0; j < newDimensions; j++)
    196             gains[i, j] = 1.0;
     196        for (var j = 0; j < newDimensions; j++)
     197          gains[i, j] = 1.0;
    197198
    198199        p = null;
     
    212213        var rand = new NormalDistributedRandom(random, 0, 1);
    213214        for (var i = 0; i < noDatapoints; i++)
    214           for (var j = 0; j < newDimensions; j++)
    215             newData[i, j] = rand.NextDouble() * .0001;
     215        for (var j = 0; j < newDimensions; j++)
     216          newData[i, j] = rand.NextDouble() * .0001;
     217
     218        if (!(data[0] is IReadOnlyList<double>) || randomInit) return;
     219        for (var i = 0; i < noDatapoints; i++)
     220        for (var j = 0; j < newDimensions; j++) {
     221          var row = (IReadOnlyList<double>) data[i];
     222          newData[i, j] = row[j % row.Count];
     223        }
    216224      }
    217225      #endregion
    218226
    219227      public double EvaluateError() {
    220         return exact ?
    221           EvaluateErrorExact(p, newData, noDatapoints, newDimensions) :
    222           EvaluateErrorApproximate(rowP, colP, valP, newData, theta);
     228        return exact ? EvaluateErrorExact(p, newData, noDatapoints, newDimensions) : EvaluateErrorApproximate(rowP, colP, valP, newData, theta);
    223229      }
    224230
    225231      #region Helpers
    226       private static void CalculateApproximateSimilarities(T[] data, IDistance<T> distance, double perplexity, out int[] rowP, out int[] colP, out double[] valP) {
     232      private static void CalculateApproximateSimilarities(IReadOnlyList<T> data, IDistance<T> distance, double perplexity, out int[] rowP, out int[] colP, out double[] valP) {
    227233        // Compute asymmetric pairwise input similarities
    228         ComputeGaussianPerplexity(data, distance, out rowP, out colP, out valP, perplexity, (int)(3 * perplexity));
     234        ComputeGaussianPerplexity(data, distance, out rowP, out colP, out valP, perplexity, (int) (3 * perplexity));
    229235        // Symmetrize input similarities
    230236        int[] sRowP, symColP;
     
    235241        valP = sValP;
    236242        var sumP = .0;
    237         for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i];
    238         for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP;
    239       }
    240 
    241       private static double[,] CalculateExactSimilarites(T[] data, IDistance<T> distance, double perplexity) {
     243        for (var i = 0; i < rowP[data.Count]; i++) sumP += valP[i];
     244        for (var i = 0; i < rowP[data.Count]; i++) valP[i] /= sumP;
     245      }
     246      private static double[,] CalculateExactSimilarites(IReadOnlyList<T> data, IDistance<T> distance, double perplexity) {
    242247        // Compute similarities
    243         var p = new double[data.Length, data.Length];
     248        var p = new double[data.Count, data.Count];
    244249        ComputeGaussianPerplexity(data, distance, p, perplexity);
    245250        // Symmetrize input similarities
    246         for (var n = 0; n < data.Length; n++) {
    247           for (var m = n + 1; m < data.Length; m++) {
     251        for (var n = 0; n < data.Count; n++) {
     252          for (var m = n + 1; m < data.Count; m++) {
    248253            p[n, m] += p[m, n];
    249254            p[m, n] = p[n, m];
     
    251256        }
    252257        var sumP = .0;
    253         for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j];
    254         for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP;
     258        for (var i = 0; i < data.Count; i++) {
     259          for (var j = 0; j < data.Count; j++) {
     260            sumP += p[i, j];
     261          }
     262        }
     263        for (var i = 0; i < data.Count; i++) {
     264          for (var j = 0; j < data.Count; j++) {
     265            p[i, j] /= sumP;
     266          }
     267        }
    255268        return p;
    256269      }
    257 
    258270      private static void ComputeGaussianPerplexity(IReadOnlyList<T> x, IDistance<T> distance, out int[] rowP, out int[] colP, out double[] valP, double perplexity, int k) {
    259271        if (perplexity > k) throw new ArgumentException("Perplexity should be lower than k!");
     
    290302
    291303          // Iterate until we found a good perplexity
    292           var iter = 0; double sumP = 0;
     304          var iter = 0;
     305          double sumP = 0;
    293306          while (!found && iter < 200) {
    294 
    295307            // Compute Gaussian kernel row
    296308            for (var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]);
     
    307319            if (hdiff < tol && -hdiff < tol) {
    308320              found = true;
    309             } else {
     321            }
     322            else {
    310323              if (hdiff > 0) {
    311324                minBeta = beta;
     
    314327                else
    315328                  beta = (beta + maxBeta) / 2.0;
    316               } else {
     329              }
     330              else {
    317331                maxBeta = beta;
    318332                if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))
     
    335349        }
    336350      }
    337       private static void ComputeGaussianPerplexity(T[] x, IDistance<T> distance, double[,] p, double perplexity) {
     351      private static void ComputeGaussianPerplexity(IReadOnlyList<T> x, IDistance<T> distance, double[,] p, double perplexity) {
    338352        // Compute the distance matrix
    339353        var dd = ComputeDistances(x, distance);
    340354
    341         var n = x.Length;
     355        var n = x.Count;
    342356        // Compute the Gaussian kernel row by row
    343357        for (var i = 0; i < n; i++) {
     
    352366          // Iterate until we found a good perplexity
    353367          var iter = 0;
    354           while (!found && iter < 200) {      // 200 iterations as in tSNE implementation by van der Maarten
     368          while (!found && iter < 200) { // 200 iterations as in tSNE implementation by van der Maarten
    355369
    356370            // Compute Gaussian kernel row
     
    369383            if (hdiff < tol && -hdiff < tol) {
    370384              found = true;
    371             } else {
     385            }
     386            else {
    372387              if (hdiff > 0) {
    373388                minBeta = beta;
     
    376391                else
    377392                  beta = (beta + maxBeta) / 2.0;
    378               } else {
     393              }
     394              else {
    379395                maxBeta = beta;
    380396                if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))
     
    393409        }
    394410      }
    395 
    396       private static double[][] ComputeDistances(T[] x, IDistance<T> distance) {
    397         var res = new double[x.Length][];
    398         for (var r = 0; r < x.Length; r++) {
    399           var rowV = new double[x.Length];
     411      private static double[][] ComputeDistances(IReadOnlyList<T> x, IDistance<T> distance) {
     412        var res = new double[x.Count][];
     413        for (var r = 0; r < x.Count; r++) {
     414          var rowV = new double[x.Count];
    400415          // all distances must be symmetric
    401416          for (var c = 0; c < r; c++) {
     
    403418          }
    404419          rowV[r] = 0.0; // distance to self is zero for all distances
    405           for (var c = r + 1; c < x.Length; c++) {
     420          for (var c = r + 1; c < x.Count; c++) {
    406421            rowV[c] = distance.Get(x[r], x[c]);
    407422          }
     
    411426        // return x.Select(m => x.Select(n => distance.Get(m, n)).ToArray()).ToArray();
    412427      }
    413 
    414428      private static double EvaluateErrorExact(double[,] p, double[,] y, int n, int d) {
    415429        // Compute the squared Euclidean distance matrix
     
    425439              q[n1, m] = 1 / (1 + dd[n1, m]);
    426440              sumQ += q[n1, m];
    427             } else q[n1, m] = double.Epsilon;
     441            }
     442            else q[n1, m] = double.Epsilon;
    428443          }
    429444        }
     
    433448        var c = .0;
    434449        for (var i = 0; i < n; i++)
    435           for (var j = 0; j < n; j++) {
    436             c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon));
    437           }
     450        for (var j = 0; j < n; j++) {
     451          c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon));
     452        }
    438453        return c;
    439454      }
    440 
    441455      private static double EvaluateErrorApproximate(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) {
    442456        // Get estimate of normalization term
     
    463477      }
    464478      private static void SymmetrizeMatrix(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, out int[] symRowP, out int[] symColP, out double[] symValP) {
    465 
    466479        // Count number of elements and row counts of symmetric matrix
    467480        var n = rowP.Count - 1;
     
    469482        for (var j = 0; j < n; j++) {
    470483          for (var i = rowP[j]; i < rowP[j + 1]; i++) {
    471 
    472484            // Check whether element (col_P[i], n) is present
    473485            var present = false;
     
    497509        var offset = new int[n];
    498510        for (var j = 0; j < n; j++) {
    499           for (var i = rowP[j]; i < rowP[j + 1]; i++) {                                  // considering element(n, colP[i])
     511          for (var i = rowP[j]; i < rowP[j + 1]; i++) { // considering element(n, colP[i])
    500512
    501513            // Check whether element (col_P[i], n) is present
     
    549561    public static double[,] Run(T[] data, IDistance<T> distance, IRandom random,
    550562      int newDimensions = 2, double perplexity = 25, int iterations = 1000,
    551       double theta = 0,
    552       int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5,
     563      double theta = 0, int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5,
    553564      double finalMomentum = .8, double eta = 10.0
    554       ) {
     565    ) {
    555566      var state = CreateState(data, distance, random, newDimensions, perplexity,
    556567        theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta);
     
    565576      int newDimensions = 2, double perplexity = 25, double theta = 0,
    566577      int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5,
    567       double finalMomentum = .8, double eta = 10.0
    568       ) {
    569       return new TSNEState(data, distance, random, newDimensions, perplexity, theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta);
     578      double finalMomentum = .8, double eta = 10.0, bool randomInit = true
     579    ) {
     580      return new TSNEState(data, distance, random, newDimensions, perplexity, theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta, randomInit);
    570581    }
    571582
     
    580591        for (var j = 0; j < state.newDimensions; j++) {
    581592          state.gains[i, j] = Math.Sign(state.dY[i, j]) != Math.Sign(state.uY[i, j])
    582             ? state.gains[i, j] + .2  // +0.2 nd *0.8 are used in two separate implementations of tSNE -> seems to be correct
     593            ? state.gains[i, j] + .2 // +0.2 nd *0.8 are used in two separate implementations of tSNE -> seems to be correct
    583594            : state.gains[i, j] * .8;
    584 
    585595          if (state.gains[i, j] < .01) state.gains[i, j] = .01;
    586596        }
    587597      }
    588 
    589598
    590599      // Perform gradient update (with momentum and gains)
    591600      for (var i = 0; i < state.noDatapoints; i++)
    592         for (var j = 0; j < state.newDimensions; j++)
    593           state.uY[i, j] = state.currentMomentum * state.uY[i, j] - state.eta * state.gains[i, j] * state.dY[i, j];
     601      for (var j = 0; j < state.newDimensions; j++)
     602        state.uY[i, j] = state.currentMomentum * state.uY[i, j] - state.eta * state.gains[i, j] * state.dY[i, j];
    594603
    595604      for (var i = 0; i < state.noDatapoints; i++)
    596         for (var j = 0; j < state.newDimensions; j++)
    597           state.newData[i, j] = state.newData[i, j] + state.uY[i, j];
     605      for (var j = 0; j < state.newDimensions; j++)
     606        state.newData[i, j] = state.newData[i, j] + state.uY[i, j];
    598607
    599608      // Make solution zero-mean
     
    604613        if (state.exact)
    605614          for (var i = 0; i < state.noDatapoints; i++)
    606             for (var j = 0; j < state.noDatapoints; j++)
    607               state.p[i, j] /= 12.0;
     615          for (var j = 0; j < state.noDatapoints; j++)
     616            state.p[i, j] /= 12.0;
    608617        else
    609618          for (var i = 0; i < state.rowP[state.noDatapoints]; i++)
     
    634643      // Compute final t-SNE gradient
    635644      for (var i = 0; i < n; i++)
    636         for (var j = 0; j < d; j++) {
    637           dC[i, j] = posF[i, j] - negF[i, j] / sumQ;
    638         }
     645      for (var j = 0; j < d; j++) {
     646        dC[i, j] = posF[i, j] - negF[i, j] / sumQ;
     647      }
    639648    }
    640649
Note: See TracChangeset for help on using the changeset viewer.