Changeset 14785


Ignore:
Timestamp:
03/27/17 15:15:23 (5 years ago)
Author:
gkronber
Message:

#2700: changes and while reviewing

Location:
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
1 deleted
3 edited
6 moved

Legend:

Unmodified
Added
Removed
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r14784 r14785  
    334334    <Compile Include="Interfaces\TSNEInterfaces\IDataPoint.cs" />
    335335    <Compile Include="Interfaces\TSNEInterfaces\IDistance.cs" />
    336     <Compile Include="Interfaces\TSNEInterfaces\ISPTree.cs" />
    337     <Compile Include="Interfaces\TSNEInterfaces\IVPTree.cs" />
     336    <Compile Include="Interfaces\TSNEInterfaces\ISpacePartitioningTree.cs" />
     337    <Compile Include="Interfaces\TSNEInterfaces\IVantagePointTree.cs" />
    338338    <Compile Include="kMeans\KMeansClustering.cs" />
    339339    <Compile Include="kMeans\KMeansClusteringModel.cs" />
     
    419419    <Compile Include="TimeSeries\AutoregressiveModeling.cs" />
    420420    <Compile Include="TSNE\Cell.cs" />
    421     <Compile Include="TSNE\DataPoint.cs" />
    422421    <Compile Include="TSNE\Distances\DistanceBase.cs" />
    423     <Compile Include="TSNE\Distances\DataPointDistance.cs" />
    424422    <Compile Include="TSNE\Distances\EuclideanDistance.cs" />
     423    <Compile Include="TSNE\Distances\IndexedItemDistance.cs" />
    425424    <Compile Include="TSNE\Distances\InnerProductDistance.cs" />
    426425    <Compile Include="TSNE\PriorityQueue.cs" />
    427     <Compile Include="TSNE\SPtree.cs" />
    428     <Compile Include="TSNE\TSNE.cs" />
     426    <Compile Include="TSNE\SpacePartitioningTree.cs" />
     427    <Compile Include="TSNE\TSNEAlgorithm.cs" />
    429428    <Compile Include="TSNE\TSNEStatic.cs" />
    430429    <Compile Include="TSNE\TSNEUtils.cs" />
    431     <Compile Include="TSNE\VPTree.cs" />
     430    <Compile Include="TSNE\VantagePointTree.cs" />
    432431  </ItemGroup>
    433432  <ItemGroup>
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/ISpacePartitioningTree.cs

    r14784 r14785  
    2323
    2424namespace HeuristicLab.Algorithms.DataAnalysis {
    25   public interface ISPTree : IDeepCloneable {
     25  public interface ISpacePartitioningTree {
    2626
    2727    double[,] Data { set; }
    2828    int GetDepth();
    29     ISPTree GetParent();
     29    ISpacePartitioningTree GetParent();
    3030    bool Insert(int newIndex);
    3131    void Subdivide();
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IVantagePointTree.cs

    r14784 r14785  
    2424
    2525namespace HeuristicLab.Algorithms.DataAnalysis {
    26   public interface IVPTree<T> : IDeepCloneable {
    27     void Create(IEnumerable<T> items);
    28     void Search(T target, int k, out List<T> results, out List<double> distances);
     26  public interface IVantagePointTree<T> {
     27    void Search(T target, int k, out IList<T> results, out IList<double> distances);
    2928  }
    3029}
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/IndexedItemDistance.cs

    r14784 r14785  
    2020#endregion
    2121
     22using HeuristicLab.Collections;
    2223using HeuristicLab.Common;
    2324using HeuristicLab.Core;
     
    2627namespace HeuristicLab.Algorithms.DataAnalysis {
    2728  [StorableClass]
    28   [Item("DataPointDistance", "A distance wrapper for DataPoints")]
    29   internal class DataPointDistance<T> : DistanceBase<IDataPoint<T>> where T : class {
    30 
    31     #region properties
     29  [Item("IndexedItemDistance", "A distance wrapper for indexed items")]
     30  internal class IndexedItemDistance<T> : DistanceBase<IndexedItem<T>> {
    3231    [Storable]
    33     private IDistance<T> dist;
    34     #endregion
     32    private readonly IDistance<T> dist;
    3533
    3634    #region HLConstructors & Cloning
    3735    [StorableConstructor]
    38     protected DataPointDistance(bool deserializing) : base(deserializing) { }
    39     protected DataPointDistance(DataPointDistance<T> original, Cloner cloner) : base(original, cloner) {
     36    protected IndexedItemDistance(bool deserializing) : base(deserializing) { }
     37    protected IndexedItemDistance(IndexedItemDistance<T> original, Cloner cloner) : base(original, cloner) {
    4038      dist = cloner.Clone(original.dist);
    4139    }
    42     public override IDeepCloneable Clone(Cloner cloner) { return new DataPointDistance<T>(this, cloner); }
    43     public DataPointDistance(IDistance<T> distance) {
     40    public override IDeepCloneable Clone(Cloner cloner) { return new IndexedItemDistance<T>(this, cloner); }
     41    public IndexedItemDistance(IDistance<T> distance) {
    4442      dist = distance;
    4543    }
    4644    #endregion
    4745
    48     public override double Get(IDataPoint<T> a, IDataPoint<T> b) {
    49       return dist.Get(a.X, b.X);
     46    public override double Get(IndexedItem<T> a, IndexedItem<T> b) {
     47      return dist.Get(a.Value, b.Value);
    5048    }
    5149  }
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/PriorityQueue.cs

    r14767 r14785  
    2525
    2626namespace HeuristicLab.Algorithms.DataAnalysis {
     27  // TODO: move to HeuristicLab.Collections
    2728
    2829  // Code Taken from branch: RoutePlanning, Heap4
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/SpacePartitioningTree.cs

    r14784 r14785  
    6262namespace HeuristicLab.Algorithms.DataAnalysis {
    6363  /// <summary>
    64   /// Space partitioning tree
     64  /// Space partitioning tree (SPTree)
    6565  /// </summary>
    6666  [StorableClass]
    67   public class SPTree : DeepCloneable, ISPTree {
     67  public class SpacePartitioningTree : DeepCloneable, ISpacePartitioningTree {
    6868    private const uint QT_NODE_CAPACITY = 1;
    6969
     
    7272    private double[] buff;
    7373    [Storable]
    74     private ISPTree parent;
     74    private SpacePartitioningTree parent;
    7575    [Storable]
    7676    private int dimension;
     
    9595
    9696    // Children
    97     private ISPTree[] children;
     97    private SpacePartitioningTree[] children;
    9898    [Storable]
    9999    private uint noChildren;
     
    102102    #region HLConstructors & Cloning
    103103    [StorableConstructor]
    104     protected SPTree(bool deserializing) { }
    105     protected SPTree(SPTree original, Cloner cloner)
     104    protected SpacePartitioningTree(bool deserializing) { }
     105    protected SpacePartitioningTree(SpacePartitioningTree original, Cloner cloner)
    106106      : base(original, cloner) {
    107107      buff = original.buff.Select(x => x).ToArray();
     
    121121      noChildren = original.noChildren;
    122122    }
    123     public override IDeepCloneable Clone(Cloner cloner) { return new SPTree(this, cloner); }
    124 
    125     public SPTree(double[,] inpData) {
     123    public override IDeepCloneable Clone(Cloner cloner) { return new SpacePartitioningTree(this, cloner); }
     124
     125    public SpacePartitioningTree(double[,] inpData) {
    126126      var d = inpData.GetLength(1);
    127127      var n = inpData.GetLength(0);
     
    145145    }
    146146
    147     public SPTree(double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) {
     147    public SpacePartitioningTree(double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) {
    148148      Init(null, inpData, impCorner, impWith);
    149149    }
    150     public SPTree(ISPTree parent, double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) {
     150    public SpacePartitioningTree(SpacePartitioningTree parent, double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) {
    151151      Init(parent, inpData, impCorner, impWith);
    152152    }
    153153    #endregion
    154154
    155     public ISPTree GetParent() {
     155    public ISpacePartitioningTree GetParent() {
    156156      return parent;
    157157    }
     
    211211          div *= 2;
    212212        }
    213         children[i] = new SPTree(this, Data, newCorner, newWidth);
     213        children[i] = new SpacePartitioningTree(this, Data, newCorner, newWidth);
    214214      }
    215215
     
    307307      for (var i = 0; i < n; i++) Insert(i);
    308308    }
    309     private void Init(ISPTree p, double[,] inpData, IEnumerable<double> inpCorner, IEnumerable<double> inpWidth) {
     309    private void Init(SpacePartitioningTree p, double[,] inpData, IEnumerable<double> inpCorner, IEnumerable<double> inpWidth) {
    310310      parent = p;
    311311      dimension = inpData.GetLength(1);
     
    320320      inpWidth.ForEach((i, x) => boundary.SetWidth(i, x));
    321321
    322       children = new ISPTree[noChildren];
     322      children = new SpacePartitioningTree[noChildren];
    323323      centerOfMass = new double[dimension];
    324324      buff = new double[dimension];
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAlgorithm.cs

    r14784 r14785  
    3838namespace HeuristicLab.Algorithms.DataAnalysis {
    3939  /// <summary>
    40   /// t-distributed stochastic neighbourhood embedding (TSNE) projects the data in a low dimensional
     40  /// t-distributed stochastic neighbourhood embedding (tSNE) projects the data in a low dimensional
    4141  /// space to allow visual cluster identification.
    4242  /// </summary>
    43   [Item("TSNE", "t-distributed stochastic neighbourhood embedding projects the data in a low " +
     43  [Item("tSNE", "t-distributed stochastic neighbourhood embedding projects the data in a low " +
    4444                "dimensional space to allow visual cluster identification.")]
    4545  [Creatable(CreatableAttribute.Categories.DataAnalysis, Priority = 100)]
    4646  [StorableClass]
    47   public sealed class TSNEAnalysis : BasicAlgorithm {
     47  public sealed class TSNEAlgorithm : BasicAlgorithm {
    4848    public override bool SupportsPause {
    4949      get { return false; }
     
    5757    }
    5858
    59     #region Parameternames
     59    #region parameter names
    6060    private const string DistanceParameterName = "DistanceFunction";
    6161    private const string PerplexityParameterName = "Perplexity";
     
    7474    #endregion
    7575
    76     #region Parameterproperties
     76    #region parameter properties
    7777    public IFixedValueParameter<DoubleValue> PerplexityParameter {
    7878      get { return Parameters[PerplexityParameterName] as IFixedValueParameter<DoubleValue>; }
    7979    }
    80     public OptionalValueParameter<DoubleValue> ThetaParameter {
    81       get { return Parameters[ThetaParameterName] as OptionalValueParameter<DoubleValue>; }
     80    public IFixedValueParameter<DoubleValue> ThetaParameter {
     81      get { return Parameters[ThetaParameterName] as IFixedValueParameter<DoubleValue>; }
    8282    }
    8383    public IFixedValueParameter<IntValue> NewDimensionsParameter {
    8484      get { return Parameters[NewDimensionsParameterName] as IFixedValueParameter<IntValue>; }
    8585    }
    86     public IValueParameter<IDistance<RealVector>> DistanceParameter {
    87       get { return Parameters[DistanceParameterName] as IValueParameter<IDistance<RealVector>>; }
     86    public IValueParameter<IDistance<double[]>> DistanceParameter {
     87      get { return Parameters[DistanceParameterName] as IValueParameter<IDistance<double[]>>; }
    8888    }
    8989    public IFixedValueParameter<IntValue> MaxIterationsParameter {
     
    120120
    121121    #region  Properties
    122     public IDistance<RealVector> Distance {
     122    public IDistance<double[]> Distance {
    123123      get { return DistanceParameter.Value; }
    124124    }
    125125    public double Perplexity {
    126126      get { return PerplexityParameter.Value.Value; }
     127      set { PerplexityParameter.Value.Value = value; }
    127128    }
    128129    public double Theta {
    129       get { return ThetaParameter.Value == null ? 0 : ThetaParameter.Value.Value; }
     130      get { return ThetaParameter.Value.Value; }
     131      set { ThetaParameter.Value.Value = value; }
    130132    }
    131133    public int NewDimensions {
    132134      get { return NewDimensionsParameter.Value.Value; }
     135      set { NewDimensionsParameter.Value.Value = value; }
    133136    }
    134137    public int MaxIterations {
    135138      get { return MaxIterationsParameter.Value.Value; }
     139      set { MaxIterationsParameter.Value.Value = value; }
    136140    }
    137141    public int StopLyingIteration {
    138142      get { return StopLyingIterationParameter.Value.Value; }
     143      set { StopLyingIterationParameter.Value.Value = value; }
    139144    }
    140145    public int MomentumSwitchIteration {
    141146      get { return MomentumSwitchIterationParameter.Value.Value; }
     147      set { MomentumSwitchIterationParameter.Value.Value = value; }
    142148    }
    143149    public double InitialMomentum {
    144150      get { return InitialMomentumParameter.Value.Value; }
     151      set { InitialMomentumParameter.Value.Value = value; }
    145152    }
    146153    public double FinalMomentum {
    147154      get { return FinalMomentumParameter.Value.Value; }
     155      set { FinalMomentumParameter.Value.Value = value; }
    148156    }
    149157    public double Eta {
    150       get {
    151         return EtaParameter.Value == null ? 0 : EtaParameter.Value.Value;
    152       }
     158      get { return EtaParameter.Value.Value; }
     159      set { EtaParameter.Value.Value = value; }
    153160    }
    154161    public bool SetSeedRandomly {
    155162      get { return SetSeedRandomlyParameter.Value.Value; }
    156     }
    157     public uint Seed {
    158       get { return (uint)SeedParameter.Value.Value; }
     163      set { SetSeedRandomlyParameter.Value.Value = value; }
     164    }
     165    public int Seed {
     166      get { return SeedParameter.Value.Value; }
     167      set { SeedParameter.Value.Value = value; }
    159168    }
    160169    public string Classes {
    161170      get { return ClassesParameter.Value.Value; }
     171      set { ClassesParameter.Value.Value = value; }
    162172    }
    163173    public bool Normalization {
    164174      get { return NormalizationParameter.Value.Value; }
     175      set { NormalizationParameter.Value.Value = value; }
    165176    }
    166177    [Storable]
    167     public TSNE<RealVector> tsne;
     178    public TSNE<double[]> tsne;
    168179    #endregion
    169180
    170181    #region Constructors & Cloning
    171182    [StorableConstructor]
    172     private TSNEAnalysis(bool deserializing) : base(deserializing) { }
    173     private TSNEAnalysis(TSNEAnalysis original, Cloner cloner) : base(original, cloner) { }
    174     public override IDeepCloneable Clone(Cloner cloner) { return new TSNEAnalysis(this, cloner); }
    175     public TSNEAnalysis() {
     183    private TSNEAlgorithm(bool deserializing) : base(deserializing) { }
     184    private TSNEAlgorithm(TSNEAlgorithm original, Cloner cloner) : base(original, cloner) { }
     185    public override IDeepCloneable Clone(Cloner cloner) { return new TSNEAlgorithm(this, cloner); }
     186    public TSNEAlgorithm() {
    176187      Problem = new RegressionProblem();
    177       Parameters.Add(new ValueParameter<IDistance<RealVector>>(DistanceParameterName, "The distance function used to differentiate similar from non-similar points", new EuclideanDistance()));
    178       Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-Parameter of TSNE. Comparable to k in a k-nearest neighbour algorithm. Recommended Value is Floor(number of points /3) or lower", new DoubleValue(25)));
    179       Parameters.Add(new OptionalValueParameter<DoubleValue>(ThetaParameterName, "Value describing how much appoximated gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise \n CAUTION: exact calculation of forces requires building a non-sparse N*N matrix where N is the number of data points\n This may exceed memory limitations", new DoubleValue(0.1)));
    180       Parameters.Add(new FixedValueParameter<IntValue>(NewDimensionsParameterName, "Dimensionality of projected space (usually 2 for easy visual analysis", new IntValue(2)));
     188      Parameters.Add(new ValueParameter<IDistance<double[]>>(DistanceParameterName, "The distance function used to differentiate similar from non-similar points", new EuclideanDistance()));
     189      Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-Parameter of tSNE. Comparable to k in a k-nearest neighbour algorithm. Recommended value is floor(number of points /3) or lower", new DoubleValue(25)));
     190      Parameters.Add(new FixedValueParameter<DoubleValue>(ThetaParameterName, "Value describing how much appoximated gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise \n CAUTION: exact calculation of forces requires building a non-sparse N*N matrix where N is the number of data points\n This may exceed memory limitations", new DoubleValue(0)));
     191      Parameters.Add(new FixedValueParameter<IntValue>(NewDimensionsParameterName, "Dimensionality of projected space (usually 2 for easy visual analysis)", new IntValue(2)));
    181192      Parameters.Add(new FixedValueParameter<IntValue>(MaxIterationsParameterName, "Maximum number of iterations for gradient descent", new IntValue(1000)));
    182193      Parameters.Add(new FixedValueParameter<IntValue>(StopLyingIterationParameterName, "Number of iterations after which p is no longer approximated", new IntValue(0)));
     
    184195      Parameters.Add(new FixedValueParameter<DoubleValue>(InitialMomentumParameterName, "The initial momentum in the gradient descent", new DoubleValue(0.5)));
    185196      Parameters.Add(new FixedValueParameter<DoubleValue>(FinalMomentumParameterName, "The final momentum", new DoubleValue(0.8)));
    186       Parameters.Add(new FixedValueParameter<DoubleValue>(EtaParameterName, "Gradient Descent learning rate", new DoubleValue(200)));
     197      Parameters.Add(new FixedValueParameter<DoubleValue>(EtaParameterName, "Gradient descent learning rate", new DoubleValue(200)));
    187198      Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "If the seed should be random", new BoolValue(true)));
    188199      Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The seed used if it should not be random", new IntValue(0)));
    189       Parameters.Add(new FixedValueParameter<StringValue>(ClassesParameterName, "name of the column specifying the class lables of each data point. \n if the lable column can not be found Training/Test is used as labels", new StringValue("none")));
    190       Parameters.Add(new FixedValueParameter<BoolValue>(NormalizationParameterName, "Wether the data should be zero centered and have variance of 1 for each variable, so different scalings are ignored", new BoolValue(true)));
     200      Parameters.Add(new FixedValueParameter<StringValue>(ClassesParameterName, "name of the column specifying the class lables of each data point. \n if the lable column can not be found training/test is used as labels", new StringValue("none")));
     201      Parameters.Add(new FixedValueParameter<BoolValue>(NormalizationParameterName, "Whether the data should be zero centered and have variance of 1 for each variable, so different scalings are ignored", new BoolValue(true)));
    191202
    192203      MomentumSwitchIterationParameter.Hidden = true;
     
    200211    public override void Stop() {
    201212      base.Stop();
    202       if(tsne != null) tsne.Running = false;
     213      if (tsne != null) tsne.Running = false;
    203214    }
    204215
     
    208219      var problemData = Problem.ProblemData;
    209220
    210       //color datapoints acording to Classes-Variable (be it double or string)
    211       if(problemData.Dataset.VariableNames.Contains(Classes)) {
    212         if((problemData.Dataset as Dataset).VariableHasType<string>(Classes)) {
     221      //color datapoints acording to classes variable (be it double or string)
     222      if (problemData.Dataset.VariableNames.Contains(Classes)) {
     223        if ((problemData.Dataset as Dataset).VariableHasType<string>(Classes)) {
    213224          var classes = problemData.Dataset.GetStringValues(Classes).ToArray();
    214           for(var i = 0; i < classes.Length; i++) {
    215             if(!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List<int>());
     225          for (var i = 0; i < classes.Length; i++) {
     226            if (!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List<int>());
    216227            dataRowNames[classes[i]].Add(i);
    217228          }
    218         } else if((problemData.Dataset as Dataset).VariableHasType<double>(Classes)) {
     229        } else if ((problemData.Dataset as Dataset).VariableHasType<double>(Classes)) {
    219230          var classValues = problemData.Dataset.GetDoubleValues(Classes).ToArray();
    220           var max = classValues.Max() + 0.1;
     231          var max = classValues.Max() + 0.1;     // TODO consts
    221232          var min = classValues.Min() - 0.1;
    222233          const int contours = 8;
    223           for(var i = 0; i < contours; i++) {
     234          for (var i = 0; i < contours; i++) {
    224235            var contourname = GetContourName(i, min, max, contours);
    225236            dataRowNames.Add(contourname, new List<int>());
     
    228239            rows[contourname].VisualProperties.PointSize = i + 3;
    229240          }
    230           for(var i = 0; i < classValues.Length; i++) {
     241          for (var i = 0; i < classValues.Length; i++) {
    231242            dataRowNames[GetContourName(classValues[i], min, max, contours)].Add(i);
    232243          }
     
    237248      }
    238249
    239       //Set up and run TSNE
    240       if(SetSeedRandomly) SeedParameter.Value.Value = new System.Random().Next();
    241       var random = new MersenneTwister(Seed);
    242       tsne = new TSNE<RealVector>(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta, dataRowNames, rows);
     250      // set up and run tSNE
     251      if (SetSeedRandomly) Seed = new System.Random().Next();
     252      var random = new MersenneTwister((uint)Seed);
     253      tsne = new TSNE<double[]>(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta, dataRowNames, rows);
    243254      var dataset = problemData.Dataset;
    244255      var allowedInputVariables = problemData.AllowedInputVariables.ToArray();
    245       var data = new RealVector[dataset.Rows];
    246       for(var row = 0; row < dataset.Rows; row++) data[row] = new RealVector(allowedInputVariables.Select(col => dataset.GetDoubleValue(col, row)).ToArray());
    247       if(Normalization) data = NormalizeData(data);
     256      var data = new double[dataset.Rows][];
     257      for (var row = 0; row < dataset.Rows; row++) data[row] = allowedInputVariables.Select(col => dataset.GetDoubleValue(col, row)).ToArray();
     258      if (Normalization) data = NormalizeData(data);
    248259      tsne.Run(data, NewDimensions, Perplexity, Theta);
    249260    }
    250261
    251     private static RealVector[] NormalizeData(IReadOnlyList<RealVector> data) {
     262    private static double[][] NormalizeData(IReadOnlyList<double[]> data) {
    252263      var n = data[0].Length;
    253264      var mean = new double[n];
    254265      var sd = new double[n];
    255       var nData = new RealVector[data.Count];
    256       for(var i = 0; i < n; i++) {
     266      var nData = new double[data.Count][];
     267      for (var i = 0; i < n; i++) {
    257268        var i1 = i;
    258269        sd[i] = Enumerable.Range(0, data.Count).Select(x => data[x][i1]).StandardDeviation();
    259270        mean[i] = Enumerable.Range(0, data.Count).Select(x => data[x][i1]).Average();
    260271      }
    261       for(var i = 0; i < data.Count; i++) {
    262         nData[i] = new RealVector(n);
    263         for(var j = 0; j < n; j++) nData[i][j] = (data[i][j] - mean[j]) / sd[j];
     272      for (var i = 0; i < data.Count; i++) {
     273        nData[i] = new double[n];
     274        for (var j = 0; j < n; j++) nData[i][j] = (data[i][j] - mean[j]) / sd[j];
    264275      }
    265276      return nData;
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEStatic.cs

    r14782 r14785  
    5858using System.Linq;
    5959using HeuristicLab.Analysis;
     60using HeuristicLab.Collections;
    6061using HeuristicLab.Common;
    6162using HeuristicLab.Core;
     
    6768namespace HeuristicLab.Algorithms.DataAnalysis {
    6869  [StorableClass]
    69   public class TSNE<T> : DeepCloneable where T : class, IDeepCloneable {
     70  public class TSNE<T> : DeepCloneable /*where T : class, IDeepCloneable*/ {
    7071
    7172    private const string IterationResultName = "Iteration";
     
    101102
    102103    #region Stopping
    103     public volatile bool Running;
     104    public volatile bool Running;      // TODO
    104105    #endregion
    105106
     
    164165
    165166      // Initialize solution (randomly)
    166       for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) newData[i, j] = rand.NextDouble() * .0001;
     167      for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) newData[i, j] = rand.NextDouble() * .0001;  // TODO const
    167168
    168169      // Perform main training loop
    169170      for (var iter = 0; iter < maxIter && Running; iter++) {
    170171        if (exact) ComputeExactGradient(p, newData, noDatapoints, newDimensions, dY);
    171         else ComputeGradient(rowP, colP, valP, newData, noDatapoints, newDimensions, dY, theta);
     172        else ComputeApproximateGradient(rowP, colP, valP, newData, noDatapoints, newDimensions, dY, theta);
    172173        // Update gains
    173174        for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) gains[i, j] = Math.Sign(dY[i, j]) != Math.Sign(uY[i, j]) ? gains[i, j] + .2 : gains[i, j] * .8;
     
    189190      return newData;
    190191    }
    191     public static double[,] Run<TR>(TR[] data, int newDimensions, double perplexity, double theta, IDistance<TR> distance, IRandom random) where TR : class, IDeepCloneable {
    192       return new TSNE<TR>(distance, random).Run(data, newDimensions, perplexity, theta);
    193     }
    194192
    195193    #region helpers
     
    205203      else ((DoubleValue)results[ErrorResultName].Value).Value = 0;
    206204
    207       if (!results.ContainsKey(ErrorPlotResultName)) results.Add(new Result(ErrorPlotResultName, new DataTable(ErrorPlotResultName, "Development of errors during Gradiant descent")));
    208       else results[ErrorPlotResultName].Value = new DataTable(ErrorPlotResultName, "Development of errors during Gradiant descent");
     205      if (!results.ContainsKey(ErrorPlotResultName)) results.Add(new Result(ErrorPlotResultName, new DataTable(ErrorPlotResultName, "Development of errors during gradient descent")));
     206      else results[ErrorPlotResultName].Value = new DataTable(ErrorPlotResultName, "Development of errors during gradient descent");
    209207
    210208      var plot = results[ErrorPlotResultName].Value as DataTable;
    211       if (plot == null) throw new ArgumentException("could not create/access Error-DataTable in Results-Collection");
     209      if (plot == null) throw new ArgumentException("could not create/access error data table in results collection");
    212210
    213211      if (!plot.Rows.ContainsKey("errors")) plot.Rows.Add(new DataRow("errors"));
     
    224222      var errors = plot.Rows["errors"].Values;
    225223      var c = exact
    226         ? EvaluateError(p, newData, noDatapoints, newDimensions)
    227         : EvaluateError(rowP, colP, valP, newData, theta);
     224        ? EvaluateErrorExact(p, newData, noDatapoints, newDimensions)
     225        : EvaluateErrorApproximate(rowP, colP, valP, newData, theta);
    228226      errors.Add(c);
    229227      ((IntValue)results[IterationResultName].Value).Value = iter + 1;
     
    304302      for (var i = 0; i < n; i++) rowP[i + 1] = rowP[i] + k;
    305303
     304      var objX = new List<IndexedItem<T>>();
     305      for (var i = 0; i < n; i++) objX.Add(new IndexedItem<T>(i, x[i]));
     306
    306307      // Build ball tree on data set
    307       var tree = new VPTree<IDataPoint<T>>(new DataPointDistance<T>(distance));
    308       var objX = new List<IDataPoint<T>>();
    309       for (var i = 0; i < n; i++) objX.Add(new DataPoint<T>(i, x[i]));
    310       tree.Create(objX);
     308      var tree = new VantagePointTree<IndexedItem<T>>(new IndexedItemDistance<T>(distance), objX);           // do we really want to re-create the tree on each call?
    311309
    312310      // Loop over all points to find nearest neighbors
    313       var indices = new List<IDataPoint<T>>();
    314       var distances = new List<double>();
    315311      for (var i = 0; i < n; i++) {
     312        IList<IndexedItem<T>> indices;
     313        IList<double> distances;
    316314
    317315        // Find nearest neighbors
    318         indices.Clear();
    319         distances.Clear();
    320316        tree.Search(objX[i], k + 1, out indices, out distances);
    321317
     
    323319        var found = false;
    324320        var beta = 1.0;
    325         var minBeta = -double.MaxValue;
     321        var minBeta = double.MinValue;
    326322        var maxBeta = double.MaxValue;
    327         const double tol = 1e-5;
     323        const double tol = 1e-5;  // TODO: why 1e-5?
    328324
    329325        // Iterate until we found a good perplexity
     
    374370    }
    375371    private void ComputeGaussianPerplexity(T[] x, int n, double[,] p, double perplexity) {
    376       // Compute the squared Euclidean distance matrix
     372      // Compute the distance matrix
    377373      var dd = ComputeDistances(x);
     374
    378375      // Compute the Gaussian kernel row by row
    379 
    380376      for (var i = 0; i < n; i++) {
    381377        // Initialize some variables
     
    389385        // Iterate until we found a good perplexity
    390386        var iter = 0;
    391         while (!found && iter < 200) {
     387        while (!found && iter < 200) {       // TODO constant
    392388
    393389          // Compute Gaussian kernel row
     
    430426      }
    431427    }
     428
    432429    private double[][] ComputeDistances(T[] x) {
    433430      return x.Select(m => x.Select(n => distance.Get(m, n)).ToArray()).ToArray();
    434431    }
     432
    435433    private static void ComputeExactGradient(double[,] p, double[,] y, int n, int d, double[,] dC) {
    436434
     
    487485      }
    488486    }
    489     private static void ComputeGradient(int[] rowP, int[] colP, double[] valP, double[,] y, int n, int d, double[,] dC, double theta) {
    490       var tree = new SPTree(y);
     487    private static void ComputeApproximateGradient(int[] rowP, int[] colP, double[] valP, double[,] y, int n, int d, double[,] dC, double theta) {
     488      var tree = new SpacePartitioningTree(y);
    491489      double[] sumQ = { 0 };
    492490      var posF = new double[n, d];
     
    502500      for (var i = 0; i < n; i++)
    503501        for (var j = 0; j < d; j++) {
    504           dC[i, j] = posF[i, j] - negF[i, j] / sumQ[0];
    505         }
    506     }
    507     private static double EvaluateError(double[,] p, double[,] y, int n, int d) {
     502          dC[i, j] = (posF[i, j] - negF[i, j]) / sumQ[0]; // TODO: check parenthesis
     503        }
     504    }
     505
     506    private static double EvaluateErrorExact(double[,] p, double[,] y, int n, int d) {
    508507      // Compute the squared Euclidean distance matrix
    509508      var dd = new double[n, n];
     
    531530      return c;
    532531    }
    533     private static double EvaluateError(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) {
     532    private static double EvaluateErrorApproximate(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) {
    534533      // Get estimate of normalization term
    535534      var n = y.GetLength(0);
    536535      var d = y.GetLength(1);
    537       var tree = new SPTree(y);
     536      var tree = new SpacePartitioningTree(y);
    538537      var buff = new double[d];
    539538      double[] sumQ = { 0 };
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/VantagePointTree.cs

    r14784 r14785  
    5757using System.Collections.Generic;
    5858using System.Linq;
     59using HeuristicLab.Collections;
    5960using HeuristicLab.Common;
    6061using HeuristicLab.Core;
     
    7273  /// <typeparam name="T"></typeparam>
    7374  [StorableClass]
    74   public class VPTree<T> : DeepCloneable, IVPTree<T> where T : class, IDeepCloneable {
     75  public class VantagePointTree<T> : IVantagePointTree<T> {
    7576    #region properties
    76     [Storable]
    77     private List<T> items;
    78     [Storable]
    79     private double tau;
    80     [Storable]
    81     private Node root;
    82     [Storable]
    83     private IDistance<T> distance;
     77    private readonly List<T> items;
     78    private readonly Node root;
     79    private readonly IDistance<T> distance;
    8480    #endregion
    8581
    86     #region HLConstructors & Cloning
    87     [StorableConstructor]
    88     protected VPTree(bool deserializing) { }
    89     protected VPTree(VPTree<T> original, Cloner cloner)
    90       : base(original, cloner) {
    91       items = original.items.Select(cloner.Clone).ToList();
    92       tau = original.tau;
    93       root = cloner.Clone(original.root);
    94       distance = cloner.Clone(distance);
    95     }
    96     public override IDeepCloneable Clone(Cloner cloner) { return new VPTree<T>(this, cloner); }
    97     public VPTree(IDistance<T> distance) {
     82    public VantagePointTree(IDistance<T> distance, IEnumerable<T> items) : base() {
    9883      root = null;
    9984      this.distance = distance;
    100     }
    101     #endregion
    102 
    103     public void Create(IEnumerable<T> items) {
    10485      this.items = items.Select(x => x).ToList();
    10586      root = BuildFromPoints(0, this.items.Count);
    10687    }
    10788
    108     public void Search(T target, int k, out List<T> results, out List<double> distances) {
    109       var heap = new PriorityQueue<double, HeapItem>(double.MaxValue, double.MinValue, k);
    110       tau = double.MaxValue;
    111       Search(root, target, k, heap);
    112       results = new List<T>();
    113       distances = new List<double>();
     89    public void Search(T target, int k, out IList<T> results, out IList<double> distances) {
     90      var heap = new PriorityQueue<double, IndexedItem<double>>(double.MaxValue, double.MinValue, k);
     91      Search(root, target, k, heap, double.MaxValue);
     92      var res = new List<T>();
     93      var dist = new List<double>();
    11494      while (heap.Size > 0) {
    115         results.Add(items[heap.PeekMinValue().Index]);
    116         distances.Add(heap.PeekMinValue().Dist);
     95        res.Add(items[heap.PeekMinValue().Index]);
     96        dist.Add(heap.PeekMinValue().Value);
    11797        heap.RemoveMin();
    11898      }
    119       results.Reverse();
    120       distances.Reverse();
     99      res.Reverse();
     100      dist.Reverse();
     101      results = res;
     102      distances = dist;
     103    }
     104
     105    private void Search(Node node, T target, int k, PriorityQueue<double, IndexedItem<double>> heap, double tau = double.MaxValue) {
     106      if (node == null) return;
     107      var dist = distance.Get(items[node.index], target);
     108      if (dist < tau) {
     109        if (heap.Size == k) heap.RemoveMin();
     110        heap.Insert(-dist, new IndexedItem<double>(node.index, dist));//TODO check if minheap or maxheap should be used here
     111        if (heap.Size == k) tau = heap.PeekMinValue().Value;
     112      }
     113      if (node.left == null && node.right == null) return;
     114
     115      if (dist < node.threshold) {
     116        if (dist - tau <= node.threshold) Search(node.left, target, k, heap, tau);   // if there can still be neighbors inside the ball, recursively search left child first
     117        if (dist + tau >= node.threshold) Search(node.right, target, k, heap, tau);  // if there can still be neighbors outside the ball, recursively search right child
     118      } else {
     119        if (dist + tau >= node.threshold) Search(node.right, target, k, heap, tau);  // if there can still be neighbors outside the ball, recursively search right child first
     120        if (dist - tau <= node.threshold) Search(node.left, target, k, heap, tau);   // if there can still be neighbors inside the ball, recursively search left child
     121      }
    121122    }
    122123
     
    150151    }
    151152
    152     private void Search(Node node, T target, int k, PriorityQueue<double, HeapItem> heap) {
    153       if (node == null) return;
    154       var dist = distance.Get(items[node.index], target);
    155       if (dist < tau) {
    156         if (heap.Size == k) heap.RemoveMin();
    157         heap.Insert(-dist, new HeapItem(node.index, dist));//TODO check if minheap or maxheap should be used here
    158         if (heap.Size == k) tau = heap.PeekMinValue().Dist;
    159       }
    160       if (node.left == null && node.right == null) return;
    161 
    162       if (dist < node.threshold) {
    163         if (dist - tau <= node.threshold) Search(node.left, target, k, heap);   // if there can still be neighbors inside the ball, recursively search left child first
    164         if (dist + tau >= node.threshold) Search(node.right, target, k, heap);  // if there can still be neighbors outside the ball, recursively search right child
    165       } else {
    166         if (dist + tau >= node.threshold) Search(node.right, target, k, heap);  // if there can still be neighbors outside the ball, recursively search right child first
    167         if (dist - tau <= node.threshold) Search(node.left, target, k, heap);   // if there can still be neighbors inside the ball, recursively search left child
    168       }
    169 
    170     }
    171 
    172     [StorableClass]
    173     private sealed class Node : Item {
    174       [Storable]
     153    private sealed class Node {
    175154      public int index;
    176       [Storable]
    177155      public double threshold;
    178       [Storable]
    179156      public Node left;
    180       [Storable]
    181157      public Node right;
    182158
    183       #region HLConstructors & Cloning
    184       [StorableConstructor]
    185       private Node(bool deserializing) : base(deserializing) { }
    186       private Node(Node original, Cloner cloner) : base(original, cloner) {
    187         index = original.index;
    188         threshold = original.threshold;
    189         left = (Node)original.left.Clone(cloner);
    190         right = (Node)original.right.Clone(cloner);
    191       }
    192159      internal Node() {
    193160        index = 0;
     
    196163        right = null;
    197164      }
    198       public override IDeepCloneable Clone(Cloner cloner) {
    199         return new Node(this, cloner);
    200       }
    201       #endregion
    202     }
    203 
    204     [StorableClass]
    205     private sealed class HeapItem : Item, IComparable<HeapItem> {
    206       [Storable]
    207       public int Index;
    208       [Storable]
    209       public double Dist;
    210 
    211       #region HLConstructors & Cloning
    212       [StorableConstructor]
    213       private HeapItem(bool deserializing) : base(deserializing) { }
    214       private HeapItem(HeapItem original, Cloner cloner)
    215       : base(original, cloner) {
    216         Index = original.Index;
    217         Dist = original.Dist;
    218       }
    219       public override IDeepCloneable Clone(Cloner cloner) { return new HeapItem(this, cloner); }
    220       public HeapItem(int index, double dist) {
    221         Index = index;
    222         Dist = dist;
    223       }
    224       #endregion
    225 
    226       public int CompareTo(HeapItem other) {
    227         return Dist.CompareTo(Dist);
    228       }
    229165    }
    230166  }
Note: See TracChangeset for help on using the changeset viewer.