Changeset 14767


Ignore:
Timestamp:
03/19/17 18:47:00 (3 months ago)
Author:
gkronber
Message:

#2700: made some changes while reviewing the code

Location:
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
6 deleted
11 edited

Legend:

Unmodified
Added
Removed
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r14682 r14767  
    331331    <Compile Include="Interfaces\ISupportVectorMachineSolution.cs" />
    332332    <Compile Include="Interfaces\IDataAnalysisAlgorithm.cs" />
    333     <Compile Include="Interfaces\TSNEInterfaces\IKernelFunction.cs" />
     333    <Compile Include="Interfaces\TSNEInterfaces\ICell.cs" />
     334    <Compile Include="Interfaces\TSNEInterfaces\IDataPoint.cs" />
     335    <Compile Include="Interfaces\TSNEInterfaces\IDistance.cs" />
     336    <Compile Include="Interfaces\TSNEInterfaces\ISPTree.cs" />
     337    <Compile Include="Interfaces\TSNEInterfaces\IVPTree.cs" />
    334338    <Compile Include="kMeans\KMeansClustering.cs" />
    335339    <Compile Include="kMeans\KMeansClusteringModel.cs" />
     
    416420    <Compile Include="TSNE\Cell.cs" />
    417421    <Compile Include="TSNE\DataPoint.cs" />
    418     <Compile Include="TSNE\Distances\FuctionalDistance.cs" />
    419422    <Compile Include="TSNE\Distances\DistanceBase.cs" />
    420423    <Compile Include="TSNE\Distances\DataPointDistance.cs" />
    421424    <Compile Include="TSNE\Distances\EuclidianDistance.cs" />
    422     <Compile Include="Interfaces\TSNEInterfaces\IDistance.cs" />
    423425    <Compile Include="TSNE\Distances\InnerProductDistance.cs" />
    424426    <Compile Include="TSNE\TSNEAnalysis.cs" />
     
    426428    <Compile Include="TSNE\SPtree.cs" />
    427429    <Compile Include="TSNE\TSNE.cs" />
    428     <Compile Include="Interfaces\TSNEInterfaces\ICell.cs" />
    429     <Compile Include="Interfaces\TSNEInterfaces\IDataPoint.cs" />
    430     <Compile Include="Interfaces\TSNEInterfaces\IHeap.cs" />
    431     <Compile Include="Interfaces\TSNEInterfaces\ISPTree.cs" />
    432     <Compile Include="Interfaces\TSNEInterfaces\ITSNE.cs" />
    433     <Compile Include="Interfaces\TSNEInterfaces\IVPTree.cs" />
    434430    <Compile Include="TSNE\TSNEUtils.cs" />
    435431    <Compile Include="TSNE\VPTree.cs" />
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IDistance.cs

    r14512 r14767  
    2626  public interface IDistance<in T> : IItem {
    2727    /// <summary>
    28     /// Calculates a distance measure between two arbitrary Vectors. The distance value d is
    29     /// 1.) positive  d(x,y)>=0
     28    /// Calculates a distance measure between two objects.
     29    /// 1.) non-negative d(x,y) >= 0
    3030    /// 2.) symmetric d(x,y) = d(y,x)
    31     /// 3.) zero-reflexive d(x,x) =0;
     31    /// 3.) zero-reflexive d(x,x) = 0;
    3232    /// </summary>
    33     /// <param name="a">an array representing point x</param>
    34     /// <param name="b">>an array representing point y</param>
    3533    /// <returns>d(x,y)</returns>
    36     double Get(T a, T b);
    37 
    38     /// <summary>
    39     /// Calculates the correct kernel measure if it is smaller than threshold, but any value greater then threshold if the correct distance is greater.
    40     /// This is for performace only
    41     /// </summary>
    42     /// <param name="a"></param>
    43     /// <param name="b"></param>
    44     /// <param name="threshold"></param>
    45     /// <returns></returns>
    46     double Get(T a, T b, double threshold);
     34    double Get(T x, T y);
    4735
    4836    /// <summary>
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/DataPoint.cs

    r14518 r14767  
    8888    #endregion
    8989
    90 
    91 
    9290  }
    9391}
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DataPointDistance.cs

    r14512 r14767  
    4949      return dist.Get(a.X, b.X);
    5050    }
    51 
    52     public override double Get(IDataPoint<T> a, IDataPoint<T> b, double threshold) {
    53       return dist.Get(a.X, b.X, threshold);
    54     }
    5551  }
    5652}
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DistanceBase.cs

    r14512 r14767  
    3737
    3838    public abstract double Get(T a, T b);
    39     public abstract double Get(T a, T b, double threshold);
    4039
    4140    public IComparer<T> GetDistanceComparer(T item) {
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/EuclidianDistance.cs

    r14512 r14767  
    4545      return Math.Sqrt(point1.Zip(point2, (a1, b1) => (a1 - b1) * (a1 - b1)).Sum());
    4646    }
    47     public static double GetDistance(IEnumerable<double> a, IEnumerable<double> b, double threshold) {
    48       double sum = 0;
    49       var it1 = a.GetEnumerator();
    50       var it2 = b.GetEnumerator();
    51       while (sum > threshold * threshold && it1.MoveNext() && it2.MoveNext()) {
    52         var d = it1.Current - it2.Current;
    53         sum += d * d;
    54       }
    55       it1.Dispose();
    56       it2.Dispose();
    57       return sum;
    58     }
    5947    #endregion
    6048
     
    6250      return GetDistance(a.ToArray(), b.ToArray());
    6351    }
    64     public override double Get(IEnumerable<double> a, IEnumerable<double> b, double threshold) {
    65       return GetDistance(a, b, threshold);
    66     }
    6752  }
    6853}
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/InnerProductDistance.cs

    r14512 r14767  
    3030
    3131  /// <summary>
    32   /// this is not a proper distance
     32  /// The angluar distance as defined as a normalized distance measure dependent on the angle between two vectors.
     33  /// It is designed for vectors with all positive coordinates.
    3334  /// </summary>
    3435  [StorableClass]
    35   [Item("InnerProductDistance", "The Angluar Distance as defined as a normalized distance measure dependent on the angle between two vectors.\nIt is designed for vectors with all positive coordinates")]
    36   public class InnerProductDistance : DistanceBase<IReadOnlyList<double>> {
     36  [Item("InnerProductDistance", "The angluar distance as defined as a normalized distance measure dependent on the angle between two vectors.\nIt is designed for vectors with all positive coordinates")]
     37  public class InnerProductDistance : DistanceBase<IEnumerable<double>> {
    3738
    3839    #region HLConstructors
     
    4849
    4950    #region statics
    50     public static double GetDistance(IReadOnlyList<double> point1, IReadOnlyList<double> point2) {
    51       if (point1.Count != point2.Count) throw new ArgumentException("Inner Product distance not defined on vectors of different length");
    52       return point1.Zip(point2, (x, y) => x * y).Sum();
    53     }
    54     public static double GetDistance(IEnumerable<double> a, IEnumerable<double> b, double threshold) {
    55       return GetDistance(a.ToArray(), b.ToArray());     //no shortcut evaluation for Inner Product (summands may be negative => no way of telling if threshold is reached or not)
     51    public static double GetDistance(IEnumerable<double> point1, IEnumerable<double> point2) {
     52      var xs = point1.GetEnumerator();
     53      var ys = point2.GetEnumerator();
     54      var sum = 0.0;
     55      while(xs.MoveNext() & ys.MoveNext()) {
     56        if(xs.Current < 0 || ys.Current < 0) throw new ArgumentException("Inner product distance is only defined for vectors with non-negative elements");
     57        sum += xs.Current * ys.Current;
     58      }
     59      if(xs.MoveNext() || ys.MoveNext()) throw new ArgumentException("Enumerables contain a different number of elements");
     60      return sum;
    5661    }
    5762    #endregion
    58     public override double Get(IReadOnlyList<double> a, IReadOnlyList<double> b) {
     63    public override double Get(IEnumerable<double> a, IEnumerable<double> b) {
    5964      return GetDistance(a, b);
    60     }
    61     public override double Get(IReadOnlyList<double> a, IReadOnlyList<double> b, double threshold) {
    62       return GetDistance(a, b, threshold);
    6365    }
    6466  }
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/PriorityQueue.cs

    r14414 r14767  
    2929  // implementation based on C++ version from Peter Sanders
    3030  // http://www.mpi-inf.mpg.de/~sanders/programs/spq/
    31   public sealed class PriorityQueue<TK, TV> : IHeap<TK, TV> where TK : IComparable {
     31  public sealed class PriorityQueue<TK, TV> where TK : IComparable {
    3232    private class KNElement {
    3333      public TK Key { get; set; }
     
    3535    }
    3636
    37     private int capacity;
     37    private readonly int capacity;
    3838    private int size; // index of last used element
    3939    private int finalLayerSize; // size of first layer with free space
    4040    private int finalLayerDist; // distance to end of layer
    4141                                //private KNElement[] rawData;
    42     private KNElement[] data;    // alligned version of rawData
     42    private readonly KNElement[] data;    // aligned version of rawData
    4343
    4444    public PriorityQueue(TK supremum, TK infimum, int cap) {
     
    5353    }
    5454
    55     public int Size
    56     {
     55    public int Size {
    5756      get { return size; }
    5857    }
    5958
    60     public TK Supremum
    61     {
     59    public TK Supremum {
    6260      get { return data[capacity + 1].Key; }
    6361    }
    6462
    6563    public KeyValuePair<TK, TV> PeekMin() {
    66       if (size == 0) {
     64      if(size == 0) {
    6765        throw new InvalidOperationException("Heap is empty");
    6866      }
     
    8785      size = sz - 1;
    8886      finalLayerDist++;
    89       if (finalLayerDist == finalLayerSize) {
     87      if(finalLayerDist == finalLayerSize) {
    9088        finalLayerSize >>= 2;
    9189        finalLayerDist = 0;
    9290      }
    9391
    94       while (succ < sz) {
     92      while(succ < sz) {
    9593        var minKey = data[succ].Key;
    9694        var delta = 0;
    9795
    9896        var otherKey = data[succ + 1].Key;
    99         if (otherKey.CompareTo(minKey) < 0) {
     97        if(otherKey.CompareTo(minKey) < 0) {
    10098          minKey = otherKey;
    10199          delta = 1;
    102100        }
    103101        otherKey = data[succ + 2].Key;
    104         if (otherKey.CompareTo(minKey) < 0) {
     102        if(otherKey.CompareTo(minKey) < 0) {
    105103          minKey = otherKey;
    106104          delta = 2;
    107105        }
    108106        otherKey = data[succ + 3].Key;
    109         if (otherKey.CompareTo(minKey) < 0) {
     107        if(otherKey.CompareTo(minKey) < 0) {
    110108          minKey = otherKey;
    111109          delta = 3;
     
    137135      pred = pred - layerDist; // finally preds index
    138136
    139       while (data[pred].Key.CompareTo(bubble) > 0) {  // must terminate since inf at root
     137      while(data[pred].Key.CompareTo(bubble) > 0) {  // must terminate since inf at root
    140138        data[hole].Key = data[pred].Key;
    141139        data[hole].Value = data[pred].Value;
     
    159157      finalLayerDist--;
    160158
    161       if (finalLayerDist == -1) { // layer full
    162                                   // start next layer
     159      if(finalLayerDist == -1) { // layer full
     160                                 // start next layer
    163161        finalLayerSize <<= 2;
    164162        finalLayerDist = finalLayerSize - 1;
     
    173171      pred = pred - layerDist; // finally preds index
    174172      var predKey = data[pred].Key;
    175       while (predKey.CompareTo(key) > 0) {
     173      while(predKey.CompareTo(key) > 0) {
    176174        data[hole].Key = predKey;
    177175        data[hole].Value = data[pred].Value;
     
    196194      var sup = Supremum;
    197195      var cap = capacity;
    198       for (var i = 1; i <= cap; i++) {
     196      for(var i = 1; i <= cap; i++) {
    199197        data[i].Key = sup;
    200198      }
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNE.cs

    r14742 r14767  
    6767namespace HeuristicLab.Algorithms.DataAnalysis {
    6868  [StorableClass]
    69   public class TSNE<T> : DeepCloneable, ITSNE<T> where T : class, IDeepCloneable {
     69  public class TSNE<T> : DeepCloneable where T : class, IDeepCloneable {
    7070
    7171    private const string IterationResultName = "Iteration";
     
    192192      return new TSNE<TR>(distance, random).Run(data, newDimensions, perplexity, theta);
    193193    }
    194     public static double[,] Run<TR>(TR[] data, int newDimensions, double perplexity, double theta, Func<TR, TR, double> distance, IRandom random) where TR : class, IDeepCloneable {
    195       return new TSNE<TR>(new FuctionalDistance<TR>(distance), random).Run(data, newDimensions, perplexity, theta);
    196     }
    197194
    198195    #region helpers
     
    224221      if (results == null) return;
    225222      var plot = results[ErrorPlotResultName].Value as DataTable;
    226       if (plot == null) throw new ArgumentException("Could not create/access Error-DataTable in Results-Collection. Was it removed by some effect?");
     223      if (plot == null) throw new ArgumentException("Could not create/access error data table in results collection. Was it removed by some effect?");
    227224      var errors = plot.Rows["errors"].Values;
    228225      var c = exact
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAnalysis.cs

    r14742 r14767  
    3838namespace HeuristicLab.Algorithms.DataAnalysis {
    3939  /// <summary>
    40   /// Linear regression data analysis algorithm.
     40  /// t-distributed stochastic neighbourhood embedding (TSNE) projects the data in a low dimensional
     41  /// space to allow visual cluster identification.
    4142  /// </summary>
    42   [Item("TSNE", "t-distributed stochastic neighbourhood embedding projects the data in a low dimensional space to allow visual cluster identification")]
     43  [Item("TSNE", "t-distributed stochastic neighbourhood embedding projects the data in a low " +
     44                "dimensional space to allow visual cluster identification.")]
    4345  [Creatable(CreatableAttribute.Categories.DataAnalysis, Priority = 100)]
    4446  [StorableClass]
    4547  public sealed class TSNEAnalysis : BasicAlgorithm {
    46     public override bool SupportsPause
    47     {
     48    public override bool SupportsPause {
    4849      get { return false; }
    4950    }
    50     public override Type ProblemType
    51     {
     51    public override Type ProblemType {
    5252      get { return typeof(IDataAnalysisProblem); }
    5353    }
    54     public new IDataAnalysisProblem Problem
    55     {
     54    public new IDataAnalysisProblem Problem {
    5655      get { return (IDataAnalysisProblem)base.Problem; }
    5756      set { base.Problem = value; }
     
    7675
    7776    #region Parameterproperties
    78     public IFixedValueParameter<DoubleValue> PerplexityParameter
    79     {
     77    public IFixedValueParameter<DoubleValue> PerplexityParameter {
    8078      get { return Parameters[PerplexityParameterName] as IFixedValueParameter<DoubleValue>; }
    8179    }
    82     public OptionalValueParameter<DoubleValue> ThetaParameter
    83     {
     80    public OptionalValueParameter<DoubleValue> ThetaParameter {
    8481      get { return Parameters[ThetaParameterName] as OptionalValueParameter<DoubleValue>; }
    8582    }
    86     public IFixedValueParameter<IntValue> NewDimensionsParameter
    87     {
     83    public IFixedValueParameter<IntValue> NewDimensionsParameter {
    8884      get { return Parameters[NewDimensionsParameterName] as IFixedValueParameter<IntValue>; }
    8985    }
    90     public IValueParameter<IDistance<RealVector>> DistanceParameter
    91     {
     86    public IValueParameter<IDistance<RealVector>> DistanceParameter {
    9287      get { return Parameters[DistanceParameterName] as IValueParameter<IDistance<RealVector>>; }
    9388    }
    94     public IFixedValueParameter<IntValue> MaxIterationsParameter
    95     {
     89    public IFixedValueParameter<IntValue> MaxIterationsParameter {
    9690      get { return Parameters[MaxIterationsParameterName] as IFixedValueParameter<IntValue>; }
    9791    }
    98     public IFixedValueParameter<IntValue> StopLyingIterationParameter
    99     {
     92    public IFixedValueParameter<IntValue> StopLyingIterationParameter {
    10093      get { return Parameters[StopLyingIterationParameterName] as IFixedValueParameter<IntValue>; }
    10194    }
    102     public IFixedValueParameter<IntValue> MomentumSwitchIterationParameter
    103     {
     95    public IFixedValueParameter<IntValue> MomentumSwitchIterationParameter {
    10496      get { return Parameters[MomentumSwitchIterationParameterName] as IFixedValueParameter<IntValue>; }
    10597    }
    106     public IFixedValueParameter<DoubleValue> InitialMomentumParameter
    107     {
     98    public IFixedValueParameter<DoubleValue> InitialMomentumParameter {
    10899      get { return Parameters[InitialMomentumParameterName] as IFixedValueParameter<DoubleValue>; }
    109100    }
    110     public IFixedValueParameter<DoubleValue> FinalMomentumParameter
    111     {
     101    public IFixedValueParameter<DoubleValue> FinalMomentumParameter {
    112102      get { return Parameters[FinalMomentumParameterName] as IFixedValueParameter<DoubleValue>; }
    113103    }
    114     public IFixedValueParameter<DoubleValue> EtaParameter
    115     {
     104    public IFixedValueParameter<DoubleValue> EtaParameter {
    116105      get { return Parameters[EtaParameterName] as IFixedValueParameter<DoubleValue>; }
    117106    }
    118     public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter
    119     {
     107    public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter {
    120108      get { return Parameters[SetSeedRandomlyParameterName] as IFixedValueParameter<BoolValue>; }
    121109    }
    122     public IFixedValueParameter<IntValue> SeedParameter
    123     {
     110    public IFixedValueParameter<IntValue> SeedParameter {
    124111      get { return Parameters[SeedParameterName] as IFixedValueParameter<IntValue>; }
    125112    }
    126     public IFixedValueParameter<StringValue> ClassesParameter
    127     {
     113    public IFixedValueParameter<StringValue> ClassesParameter {
    128114      get { return Parameters[ClassesParameterName] as IFixedValueParameter<StringValue>; }
    129115    }
    130     public IFixedValueParameter<BoolValue> NormalizationParameter
    131     {
     116    public IFixedValueParameter<BoolValue> NormalizationParameter {
    132117      get { return Parameters[NormalizationParameterName] as IFixedValueParameter<BoolValue>; }
    133118    }
     
    135120
    136121    #region  Properties
    137     public IDistance<RealVector> Distance
    138     {
     122    public IDistance<RealVector> Distance {
    139123      get { return DistanceParameter.Value; }
    140124    }
    141     public double Perplexity
    142     {
     125    public double Perplexity {
    143126      get { return PerplexityParameter.Value.Value; }
    144127    }
    145     public double Theta
    146     {
     128    public double Theta {
    147129      get { return ThetaParameter.Value == null ? 0 : ThetaParameter.Value.Value; }
    148130    }
    149     public int NewDimensions
    150     {
     131    public int NewDimensions {
    151132      get { return NewDimensionsParameter.Value.Value; }
    152133    }
    153     public int MaxIterations
    154     {
     134    public int MaxIterations {
    155135      get { return MaxIterationsParameter.Value.Value; }
    156136    }
    157     public int StopLyingIteration
    158     {
     137    public int StopLyingIteration {
    159138      get { return StopLyingIterationParameter.Value.Value; }
    160139    }
    161     public int MomentumSwitchIteration
    162     {
     140    public int MomentumSwitchIteration {
    163141      get { return MomentumSwitchIterationParameter.Value.Value; }
    164142    }
    165     public double InitialMomentum
    166     {
     143    public double InitialMomentum {
    167144      get { return InitialMomentumParameter.Value.Value; }
    168145    }
    169     public double FinalMomentum
    170     {
     146    public double FinalMomentum {
    171147      get { return FinalMomentumParameter.Value.Value; }
    172148    }
    173     public double Eta
    174     {
    175       get
    176       {
     149    public double Eta {
     150      get {
    177151        return EtaParameter.Value == null ? 0 : EtaParameter.Value.Value;
    178152      }
    179153    }
    180     public bool SetSeedRandomly
    181     {
     154    public bool SetSeedRandomly {
    182155      get { return SetSeedRandomlyParameter.Value.Value; }
    183156    }
    184     public uint Seed
    185     {
     157    public uint Seed {
    186158      get { return (uint)SeedParameter.Value.Value; }
    187159    }
    188     public string Classes
    189     {
     160    public string Classes {
    190161      get { return ClassesParameter.Value.Value; }
    191162    }
    192     public bool Normalization
    193     {
     163    public bool Normalization {
    194164      get { return NormalizationParameter.Value.Value; }
    195165    }
     
    230200    public override void Stop() {
    231201      base.Stop();
    232       if (tsne != null) tsne.Running = false;
     202      if(tsne != null) tsne.Running = false;
    233203    }
    234204
     
    239209
    240210      //color datapoints acording to Classes-Variable (be it double or string)
    241       if (problemData.Dataset.VariableNames.Contains(Classes)) {
    242         if ((problemData.Dataset as Dataset).VariableHasType<string>(Classes)) {
     211      if(problemData.Dataset.VariableNames.Contains(Classes)) {
     212        if((problemData.Dataset as Dataset).VariableHasType<string>(Classes)) {
    243213          var classes = problemData.Dataset.GetStringValues(Classes).ToArray();
    244           for (var i = 0; i < classes.Length; i++) {
    245             if (!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List<int>());
     214          for(var i = 0; i < classes.Length; i++) {
     215            if(!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List<int>());
    246216            dataRowNames[classes[i]].Add(i);
    247217          }
    248         } else if ((problemData.Dataset as Dataset).VariableHasType<double>(Classes)) {
     218        } else if((problemData.Dataset as Dataset).VariableHasType<double>(Classes)) {
    249219          var classValues = problemData.Dataset.GetDoubleValues(Classes).ToArray();
    250220          var max = classValues.Max() + 0.1;
    251221          var min = classValues.Min() - 0.1;
    252222          const int contours = 8;
    253           for (var i = 0; i < contours; i++) {
     223          for(var i = 0; i < contours; i++) {
    254224            var contourname = GetContourName(i, min, max, contours);
    255225            dataRowNames.Add(contourname, new List<int>());
     
    258228            rows[contourname].VisualProperties.PointSize = i + 3;
    259229          }
    260           for (var i = 0; i < classValues.Length; i++) {
     230          for(var i = 0; i < classValues.Length; i++) {
    261231            dataRowNames[GetContourName(classValues[i], min, max, contours)].Add(i);
    262232          }
     
    268238
    269239      //Set up and run TSNE
    270       if (SetSeedRandomly) SeedParameter.Value.Value = new System.Random().Next();
     240      if(SetSeedRandomly) SeedParameter.Value.Value = new System.Random().Next();
    271241      var random = new MersenneTwister(Seed);
    272242      tsne = new TSNE<RealVector>(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta, dataRowNames, rows);
     
    274244      var allowedInputVariables = problemData.AllowedInputVariables.ToArray();
    275245      var data = new RealVector[dataset.Rows];
    276       for (var row = 0; row < dataset.Rows; row++) data[row] = new RealVector(allowedInputVariables.Select(col => dataset.GetDoubleValue(col, row)).ToArray());
    277       if (Normalization) data = NormalizeData(data);
     246      for(var row = 0; row < dataset.Rows; row++) data[row] = new RealVector(allowedInputVariables.Select(col => dataset.GetDoubleValue(col, row)).ToArray());
     247      if(Normalization) data = NormalizeData(data);
    278248      tsne.Run(data, NewDimensions, Perplexity, Theta);
    279249    }
     
    284254      var sd = new double[n];
    285255      var nData = new RealVector[data.Count];
    286       for (var i = 0; i < n; i++) {
     256      for(var i = 0; i < n; i++) {
    287257        var i1 = i;
    288258        sd[i] = Enumerable.Range(0, data.Count).Select(x => data[x][i1]).StandardDeviation();
    289259        mean[i] = Enumerable.Range(0, data.Count).Select(x => data[x][i1]).Average();
    290260      }
    291       for (var i = 0; i < data.Count; i++) {
     261      for(var i = 0; i < data.Count; i++) {
    292262        nData[i] = new RealVector(n);
    293         for (var j = 0; j < n; j++) nData[i][j] = (data[i][j] - mean[j]) / sd[j];
     263        for(var j = 0; j < n; j++) nData[i][j] = (data[i][j] - mean[j]) / sd[j];
    294264      }
    295265      return nData;
     
    309279      return "[" + (min + i * size) + ";" + (min + (i + 1) * size) + ")";
    310280    }
    311 
    312281  }
    313282}
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/VPTree.cs

    r14518 r14767  
    9999
    100100    public void Search(T target, int k, out List<T> results, out List<double> distances) {
    101       IHeap<double, HeapItem> heap = new PriorityQueue<double, HeapItem>(double.MaxValue, double.MinValue, k);
     101      var heap = new PriorityQueue<double, HeapItem>(double.MaxValue, double.MinValue, k);
    102102      tau = double.MaxValue;
    103103      Search(root, target, k, heap);
     
    142142    }
    143143
    144     private void Search(Node node, T target, int k, IHeap<double, HeapItem> heap) {
     144    private void Search(Node node, T target, int k, PriorityQueue<double, HeapItem> heap) {
    145145      if (node == null) return;
    146146      var dist = distance.Get(items[node.index], target);
    147147      if (dist < tau) {
    148148        if (heap.Size == k) heap.RemoveMin();
    149         heap.Insert(-dist, new HeapItem(node.index, dist));//TODO check if minheap or maxheap schould be used here
     149        heap.Insert(-dist, new HeapItem(node.index, dist));//TODO check if minheap or maxheap should be used here
    150150        if (heap.Size == k) tau = heap.PeekMinValue().Dist;
    151151      }
Note: See TracChangeset for help on using the changeset viewer.