Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
07/15/17 10:29:40 (7 years ago)
Author:
gkronber
Message:

#2699,#2700
merged r14862, r14863, r14911, r14936, r15156, r15157, r15158, r15164, r15169, r15207:15209, r15225, r15227, r15234, r15248 from trunk to stable

Location:
stable
Files:
2 deleted
20 edited
5 copied

Legend:

Unmodified
Added
Removed
  • stable

  • stable/HeuristicLab.Algorithms.DataAnalysis

  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r15142 r15249  
    240240    <Compile Include="Interfaces\ISupportVectorMachineSolution.cs" />
    241241    <Compile Include="Interfaces\IDataAnalysisAlgorithm.cs" />
     242    <Compile Include="KernelRidgeRegression\KernelFunctions\CicularKernel.cs" />
     243    <Compile Include="KernelRidgeRegression\KernelFunctions\GaussianKernel.cs" />
     244    <Compile Include="KernelRidgeRegression\KernelFunctions\IKernel.cs" />
     245    <Compile Include="KernelRidgeRegression\KernelFunctions\InverseMultiquadraticKernel.cs" />
     246    <Compile Include="KernelRidgeRegression\KernelFunctions\KernelBase.cs" />
     247    <Compile Include="KernelRidgeRegression\KernelFunctions\MultiquadraticKernel.cs" />
     248    <Compile Include="KernelRidgeRegression\KernelFunctions\PolysplineKernel.cs" />
     249    <Compile Include="KernelRidgeRegression\KernelFunctions\ThinPlatePolysplineKernel.cs" />
     250    <Compile Include="KernelRidgeRegression\KernelRidgeRegression.cs" />
     251    <Compile Include="KernelRidgeRegression\KernelRidgeRegressionModel.cs" />
    242252    <Compile Include="kMeans\KMeansClustering.cs" />
    243253    <Compile Include="kMeans\KMeansClusteringModel.cs" />
     
    305315    </Compile>
    306316    <Compile Include="TimeSeries\AutoregressiveModeling.cs" />
     317    <Compile Include="TSNE\Distances\CosineDistance.cs" />
     318    <Compile Include="TSNE\Distances\DistanceBase.cs" />
     319    <Compile Include="TSNE\Distances\EuclideanDistance.cs" />
     320    <Compile Include="TSNE\Distances\IndexedItemDistance.cs" />
     321    <Compile Include="TSNE\Distances\ManhattanDistance.cs" />
     322    <Compile Include="TSNE\Distances\IDistance.cs" />
     323    <Compile Include="TSNE\PriorityQueue.cs" />
     324    <Compile Include="TSNE\SpacePartitioningTree.cs" />
     325    <Compile Include="TSNE\TSNEAlgorithm.cs" />
     326    <Compile Include="TSNE\TSNEStatic.cs" />
     327    <Compile Include="TSNE\TSNEUtils.cs" />
     328    <Compile Include="TSNE\VantagePointTree.cs" />
    307329  </ItemGroup>
    308330  <ItemGroup>
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/CicularKernel.cs

    r14892 r15249  
    2525using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2626
    27 namespace HeuristicLab.Algorithms.DataAnalysis.KernelRidgeRegression {
     27namespace HeuristicLab.Algorithms.DataAnalysis {
    2828  [StorableClass]
    2929  [Item("CircularKernel", "A circular kernel function 2*pi*(acos(-d)-d*(1-d²)^(0.5)) where n = ||x-c|| and d = n/beta \n  As described in http://crsouza.com/2010/03/17/kernel-functions-for-machine-learning-applications/")]
    3030  public class CircularKernel : KernelBase {
    31 
    32     #region HLConstructors & Boilerplate
    3331    [StorableConstructor]
    3432    protected CircularKernel(bool deserializing) : base(deserializing) { }
    35     [StorableHook(HookType.AfterDeserialization)]
    36     private void AfterDeserialization() { }
     33
    3734    protected CircularKernel(CircularKernel original, Cloner cloner) : base(original, cloner) { }
    38     public CircularKernel() {
    39     }
     35
     36    public CircularKernel() { }
     37
    4038    public override IDeepCloneable Clone(Cloner cloner) {
    4139      return new CircularKernel(this, cloner);
    4240    }
    43     #endregion
    4441
    4542    protected override double Get(double norm) {
     43      if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance while Beta is null");
    4644      var beta = Beta.Value;
    4745      if (Math.Abs(beta) < double.Epsilon) return double.NaN;
     
    5351    // 4*pi*n^3 / (beta^4 * sqrt(1-n^2/beta^2)
    5452    protected override double GetGradient(double norm) {
     53      if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance gradient while Beta is null");
    5554      var beta = Beta.Value;
    5655      if (Math.Abs(beta) < double.Epsilon) return double.NaN;
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/GaussianKernel.cs

    r14891 r15249  
    2727using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2828
    29 namespace HeuristicLab.Algorithms.DataAnalysis.KernelRidgeRegression {
     29namespace HeuristicLab.Algorithms.DataAnalysis {
    3030  [StorableClass]
    3131  [Item("GaussianKernel", "A kernel function that uses Gaussian function exp(-n²/beta²). As described in http://crsouza.com/2010/03/17/kernel-functions-for-machine-learning-applications/")]
    3232  public class GaussianKernel : KernelBase {
    33 
    34     #region HLConstructors & Boilerplate
    3533    [StorableConstructor]
    3634    protected GaussianKernel(bool deserializing) : base(deserializing) { }
    37     [StorableHook(HookType.AfterDeserialization)]
    38     private void AfterDeserialization() { }
     35
    3936    protected GaussianKernel(GaussianKernel original, Cloner cloner) : base(original, cloner) { }
     37
    4038    public GaussianKernel() {
    4139    }
     40
    4241    public override IDeepCloneable Clone(Cloner cloner) {
    4342      return new GaussianKernel(this, cloner);
    4443    }
    45     #endregion
    4644
    4745    protected override double Get(double norm) {
     46      if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance while Beta is null");
    4847      var beta = Beta.Value;
    4948      if (Math.Abs(beta) < double.Epsilon) return double.NaN;
     
    5453    //2 * n²/b²* 1/b * exp(-n²/b²)
    5554    protected override double GetGradient(double norm) {
     55      if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance gradient while Beta is null");
    5656      var beta = Beta.Value;
    5757      if (Math.Abs(beta) < double.Epsilon) return double.NaN;
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/IKernel.cs

    r14887 r15249  
    2121
    2222
    23 namespace HeuristicLab.Algorithms.DataAnalysis.KernelRidgeRegression {
     23using System;
     24
     25namespace HeuristicLab.Algorithms.DataAnalysis {
    2426  public interface IKernel : ICovarianceFunction {
    2527    double? Beta { get; set; } // a kernel parameter
    2628    IDistance Distance { get; set; } // the distance function to use
     29
     30    event EventHandler BetaChanged;
     31    event EventHandler DistanceChanged;
    2732  }
    2833}
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/InverseMultiquadraticKernel.cs

    r14891 r15249  
    2525using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2626
    27 namespace HeuristicLab.Algorithms.DataAnalysis.KernelRidgeRegression {
     27namespace HeuristicLab.Algorithms.DataAnalysis {
    2828  [StorableClass]
    2929  [Item("InverseMultiquadraticKernel", "A kernel function that uses the inverse multi-quadratic function  1 / sqrt(1+||x-c||²/beta²). Similar to http://crsouza.com/2010/03/17/kernel-functions-for-machine-learning-applications/ with beta as a scaling factor.")]
    3030  public class InverseMultiquadraticKernel : KernelBase {
     31    private const double C = 1.0;
    3132
    32     private const double C = 1.0;
    33     #region HLConstructors & Boilerplate
    3433    [StorableConstructor]
    3534    protected InverseMultiquadraticKernel(bool deserializing) : base(deserializing) { }
    36     [StorableHook(HookType.AfterDeserialization)]
    37     private void AfterDeserialization() { }
     35
    3836    protected InverseMultiquadraticKernel(InverseMultiquadraticKernel original, Cloner cloner) : base(original, cloner) { }
     37
    3938    public InverseMultiquadraticKernel() { }
     39
    4040    public override IDeepCloneable Clone(Cloner cloner) {
    4141      return new InverseMultiquadraticKernel(this, cloner);
    4242    }
    43     #endregion
    4443
    4544    protected override double Get(double norm) {
     45      if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance while Beta is null");
    4646      var beta = Beta.Value;
    4747      if (Math.Abs(beta) < double.Epsilon) return double.NaN;
     
    5252    //n²/(b³(n²/b² + C)^1.5)
    5353    protected override double GetGradient(double norm) {
     54      if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance gradient while Beta is null");
    5455      var beta = Beta.Value;
    5556      if (Math.Abs(beta) < double.Epsilon) return double.NaN;
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/KernelBase.cs

    r14887 r15249  
    2828using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2929
    30 namespace HeuristicLab.Algorithms.DataAnalysis.KernelRidgeRegression {
     30namespace HeuristicLab.Algorithms.DataAnalysis {
    3131  [StorableClass]
    3232  public abstract class KernelBase : ParameterizedNamedItem, IKernel {
    3333
    34     #region Parameternames
    3534    private const string DistanceParameterName = "Distance";
    36     #endregion
    37     #region Parameterproperties
    38     public ValueParameter<IDistance> DistanceParameter {
    39       get { return Parameters[DistanceParameterName] as ValueParameter<IDistance>; }
     35
     36    public IValueParameter<IDistance> DistanceParameter {
     37      get { return (IValueParameter<IDistance>)Parameters[DistanceParameterName]; }
    4038    }
    4139
    4240    [Storable]
    43     public double? Beta { get; set; }
    44     #endregion
    45     #region Properties
     41    private double? beta;
     42    public double? Beta {
     43      get { return beta; }
     44      set {
     45        if (value != beta) {
     46          beta = value;
     47          RaiseBetaChanged();
     48        }
     49      }
     50    }
     51
    4652    public IDistance Distance {
    4753      get { return DistanceParameter.Value; }
    48       set { DistanceParameter.Value = value; }
     54      set {
     55        if (DistanceParameter.Value != value) {
     56          DistanceParameter.Value = value;
     57        }
     58      }
    4959    }
    50 
    51     #endregion
    5260
    5361    [StorableConstructor]
    5462    protected KernelBase(bool deserializing) : base(deserializing) { }
    55     [StorableHook(HookType.AfterDeserialization)]
    56     private void AfterDeserialization() { }
    5763
    5864    protected KernelBase(KernelBase original, Cloner cloner)
    5965      : base(original, cloner) {
    60       Beta = original.Beta;
     66      beta = original.beta;
     67      RegisterEvents();
    6168    }
    6269
     
    6471      Parameters.Add(new ValueParameter<IDistance>(DistanceParameterName, "The distance function used for kernel calculation"));
    6572      DistanceParameter.Value = new EuclideanDistance();
     73      RegisterEvents();
     74    }
     75
     76    [StorableHook(HookType.AfterDeserialization)]
     77    private void AfterDeserialization() {
     78      RegisterEvents();
     79    }
     80
     81    private void RegisterEvents() {
     82      DistanceParameter.ValueChanged += (sender, args) => RaiseDistanceChanged();
    6683    }
    6784
     
    8299    public ParameterizedCovarianceFunction GetParameterizedCovarianceFunction(double[] p, int[] columnIndices) {
    83100      if (p.Length != GetNumberOfParameters(columnIndices.Length)) throw new ArgumentException("Illegal parametrization");
    84       var myClone = (KernelBase)Clone(new Cloner());
     101      var myClone = (KernelBase)Clone();
    85102      myClone.SetParameter(p);
    86103      var cov = new ParameterizedCovarianceFunction {
     
    101118      return dist.Get(r1, r2);
    102119    }
     120
     121    #region events
     122    public event EventHandler BetaChanged;
     123    public event EventHandler DistanceChanged;
     124
     125    protected void RaiseBetaChanged() {
     126      var handler = BetaChanged;
     127      if (handler != null) handler(this, EventArgs.Empty);
     128    }
     129
     130    protected void RaiseDistanceChanged() {
     131      var handler = DistanceChanged;
     132      if (handler != null) handler(this, EventArgs.Empty);
     133    }
     134    #endregion
    103135  }
    104136}
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/MultiquadraticKernel.cs

    r14891 r15249  
    2525using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2626
    27 namespace HeuristicLab.Algorithms.DataAnalysis.KernelRidgeRegression {
     27namespace HeuristicLab.Algorithms.DataAnalysis {
    2828  [StorableClass]
    2929  // conditionally positive definite. (need to add polynomials) see http://num.math.uni-goettingen.de/schaback/teaching/sc.pdf
     
    3232
    3333    private const double C = 1.0;
    34     #region HLConstructors & Boilerplate
     34
    3535    [StorableConstructor]
    3636    protected MultiquadraticKernel(bool deserializing) : base(deserializing) { }
    37     [StorableHook(HookType.AfterDeserialization)]
    38     private void AfterDeserialization() { }
    39     protected MultiquadraticKernel(MultiquadraticKernel original, Cloner cloner)
    40                 : base(original, cloner) { }
    4137
    42     public MultiquadraticKernel() {
    43     }
     38    protected MultiquadraticKernel(MultiquadraticKernel original, Cloner cloner) : base(original, cloner) { }
     39
     40    public MultiquadraticKernel() { }
     41
    4442    public override IDeepCloneable Clone(Cloner cloner) {
    4543      return new MultiquadraticKernel(this, cloner);
    4644    }
    47     #endregion
     45
    4846    protected override double Get(double norm) {
     47      if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance while Beta is null");
    4948      var beta = Beta.Value;
    5049      if (Math.Abs(beta) < double.Epsilon) return double.NaN;
     
    5554    //-n²/(d³*sqrt(C+n²/d²))
    5655    protected override double GetGradient(double norm) {
     56      if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance gradient while Beta is null");
    5757      var beta = Beta.Value;
    5858      if (Math.Abs(beta) < double.Epsilon) return double.NaN;
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/PolysplineKernel.cs

    r14892 r15249  
    2727using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2828
    29 namespace HeuristicLab.Algorithms.DataAnalysis.KernelRidgeRegression {
     29namespace HeuristicLab.Algorithms.DataAnalysis {
    3030  [StorableClass]
    3131  // conditionally positive definite. (need to add polynomials) see http://num.math.uni-goettingen.de/schaback/teaching/sc.pdf
     
    3333  public class PolysplineKernel : KernelBase {
    3434
    35     #region Parameternames
    3635    private const string DegreeParameterName = "Degree";
    37     #endregion
    38     #region Parameterproperties
     36
    3937    public IFixedValueParameter<DoubleValue> DegreeParameter {
    40       get { return Parameters[DegreeParameterName] as IFixedValueParameter<DoubleValue>; }
     38      get { return (IFixedValueParameter<DoubleValue>)Parameters[DegreeParameterName]; }
    4139    }
    42     #endregion
    43     #region Properties
     40
    4441    public DoubleValue Degree {
    4542      get { return DegreeParameter.Value; }
    4643    }
    47     #endregion
    4844
    49     #region HLConstructors & Boilerplate
    5045    [StorableConstructor]
    5146    protected PolysplineKernel(bool deserializing) : base(deserializing) { }
    52     [StorableHook(HookType.AfterDeserialization)]
    53     private void AfterDeserialization() { }
     47
    5448    protected PolysplineKernel(PolysplineKernel original, Cloner cloner) : base(original, cloner) { }
     49
    5550    public PolysplineKernel() {
    5651      Parameters.Add(new FixedValueParameter<DoubleValue>(DegreeParameterName, "The degree of the kernel. Needs to be greater than zero.", new DoubleValue(1.0)));
    5752    }
     53
    5854    public override IDeepCloneable Clone(Cloner cloner) {
    5955      return new PolysplineKernel(this, cloner);
    6056    }
    61     #endregion
    6257
    6358    protected override double Get(double norm) {
     59      if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance gradient while Beta is null");
    6460      var beta = Beta.Value;
    6561      if (Math.Abs(beta) < double.Epsilon) return double.NaN;
     
    7066    //-degree/beta * (norm/beta)^degree
    7167    protected override double GetGradient(double norm) {
     68      if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance gradient while Beta is null");
    7269      var beta = Beta.Value;
    7370      if (Math.Abs(beta) < double.Epsilon) return double.NaN;
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/ThinPlatePolysplineKernel.cs

    r14892 r15249  
    2727using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2828
    29 namespace HeuristicLab.Algorithms.DataAnalysis.KernelRidgeRegression {
     29namespace HeuristicLab.Algorithms.DataAnalysis {
    3030  [StorableClass]
    3131  // conditionally positive definite. (need to add polynomials) see http://num.math.uni-goettingen.de/schaback/teaching/sc.pdf
     
    3333  public class ThinPlatePolysplineKernel : KernelBase {
    3434
    35     #region Parameternames
    3635    private const string DegreeParameterName = "Degree";
    37     #endregion
    38     #region Parameterproperties
     36
    3937    public IFixedValueParameter<DoubleValue> DegreeParameter {
    40       get { return Parameters[DegreeParameterName] as IFixedValueParameter<DoubleValue>; }
     38      get { return (IFixedValueParameter<DoubleValue>)Parameters[DegreeParameterName]; }
    4139    }
    42     #endregion
    43     #region Properties
    4440    public DoubleValue Degree {
    4541      get { return DegreeParameter.Value; }
    4642    }
    47     #endregion
    4843
    49     #region HLConstructors & Boilerplate
    5044    [StorableConstructor]
    5145    protected ThinPlatePolysplineKernel(bool deserializing) : base(deserializing) { }
    52     [StorableHook(HookType.AfterDeserialization)]
    53     private void AfterDeserialization() { }
     46
    5447    protected ThinPlatePolysplineKernel(ThinPlatePolysplineKernel original, Cloner cloner) : base(original, cloner) { }
     48
    5549    public ThinPlatePolysplineKernel() {
    5650      Parameters.Add(new FixedValueParameter<DoubleValue>(DegreeParameterName, "The degree of the kernel. Needs to be greater than zero.", new DoubleValue(2.0)));
    5751    }
     52
    5853    public override IDeepCloneable Clone(Cloner cloner) {
    5954      return new ThinPlatePolysplineKernel(this, cloner);
    6055    }
    61     #endregion
    6256
    6357    protected override double Get(double norm) {
     58      if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance while Beta is null");
    6459      var beta = Beta.Value;
    6560      if (Math.Abs(beta) < double.Epsilon) return double.NaN;
     
    7166    // (Degree/beta) * (norm/beta)^Degree * log(norm/beta)
    7267    protected override double GetGradient(double norm) {
     68      if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance gradient while Beta is null");
    7369      var beta = Beta.Value;
    7470      if (Math.Abs(beta) < double.Epsilon) return double.NaN;
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelRidgeRegression.cs

    r14888 r15249  
    2929using HeuristicLab.Parameters;
    3030using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     31using HeuristicLab.PluginInfrastructure;
    3132using HeuristicLab.Problems.DataAnalysis;
    3233
    33 namespace HeuristicLab.Algorithms.DataAnalysis.KernelRidgeRegression {
     34namespace HeuristicLab.Algorithms.DataAnalysis {
    3435  [Item("Kernel Ridge Regression", "Kernelized ridge regression e.g. for radial basis function (RBF) regression.")]
    3536  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 100)]
     
    5758
    5859    #region parameter properties
    59     public ValueParameter<IKernel> KernelParameter {
    60       get { return (ValueParameter<IKernel>)Parameters[KernelParameterName]; }
     60    public IConstrainedValueParameter<IKernel> KernelParameter {
     61      get { return (IConstrainedValueParameter<IKernel>)Parameters[KernelParameterName]; }
    6162    }
    6263
     
    102103    public KernelRidgeRegression() {
    103104      Problem = new RegressionProblem();
    104       Parameters.Add(new ValueParameter<IKernel>(KernelParameterName, "The kernel", new GaussianKernel()));
     105      var values = new ItemSet<IKernel>(ApplicationManager.Manager.GetInstances<IKernel>());
     106      Parameters.Add(new ConstrainedValueParameter<IKernel>(KernelParameterName, "The kernel", values, values.OfType<GaussianKernel>().FirstOrDefault()));
    105107      Parameters.Add(new FixedValueParameter<BoolValue>(ScaleInputVariablesParameterName, "Set to true if the input variables should be scaled to the interval [0..1]", new BoolValue(true)));
    106108      Parameters.Add(new FixedValueParameter<DoubleValue>(LambdaParameterName, "The log10-transformed weight for the regularization term lambda [-inf..+inf]. Small values produce more complex models, large values produce models with larger errors. Set to very small value (e.g. -1.0e15) for almost exact approximation", new DoubleValue(-2)));
    107       Parameters.Add(new FixedValueParameter<DoubleValue>(BetaParameterName, "The beta parameter for the kernel", new DoubleValue(2)));
     109      Parameters.Add(new FixedValueParameter<DoubleValue>(BetaParameterName, "The inverse width of the kernel ]0..+inf]. The distance between points is divided by this value before being plugged into the kernel.", new DoubleValue(2)));
    108110    }
    109     [StorableHook(HookType.AfterDeserialization)]
    110     private void AfterDeserialization() { }
    111111
    112112    public override IDeepCloneable Clone(Cloner cloner) {
     
    125125
    126126    public static IRegressionSolution CreateRadialBasisRegressionSolution(IRegressionProblemData problemData, ICovarianceFunction kernel, double lambda, bool scaleInputs, out double rmsError, out double looCvRMSE) {
    127       var model = new KernelRidgeRegressionModel(problemData.Dataset, problemData.TargetVariable, problemData.AllowedInputVariables, problemData.TrainingIndices, scaleInputs, kernel, lambda);
     127      var model = KernelRidgeRegressionModel.Create(problemData.Dataset, problemData.TargetVariable, problemData.AllowedInputVariables, problemData.TrainingIndices, scaleInputs, kernel, lambda);
    128128      rmsError = double.NaN;
    129129      if (problemData.TestIndices.Any()) {
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelRidgeRegressionModel.cs

    r14892 r15249  
    2828using HeuristicLab.Problems.DataAnalysis;
    2929
    30 namespace HeuristicLab.Algorithms.DataAnalysis.KernelRidgeRegression {
     30namespace HeuristicLab.Algorithms.DataAnalysis {
    3131  [StorableClass]
    3232  [Item("KernelRidgeRegressionModel", "A kernel ridge regression model")]
     
    3939    private readonly string[] allowedInputVariables;
    4040    public string[] AllowedInputVariables {
    41       get { return allowedInputVariables; }
     41      get { return allowedInputVariables.ToArray(); }
    4242    }
    4343
     
    8181      yOffset = original.yOffset;
    8282      yScale = original.yScale;
    83       if (original.kernel != null)
    84         kernel = cloner.Clone(original.kernel);
     83      kernel = original.kernel;
    8584    }
    8685    public override IDeepCloneable Clone(Cloner cloner) {
     
    8887    }
    8988
    90     public KernelRidgeRegressionModel(IDataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,
    91       bool scaleInputs, ICovarianceFunction kernel, double lambda = 0.1) : base(targetVariable) {
    92       if (kernel.GetNumberOfParameters(allowedInputVariables.Count()) > 0) throw new ArgumentException("All parameters in the kernel function must be specified.");
    93       name = ItemName;
    94       description = ItemDescription;
    95       this.allowedInputVariables = allowedInputVariables.ToArray();
     89    public static KernelRidgeRegressionModel Create(IDataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,
     90      bool scaleInputs, ICovarianceFunction kernel, double lambda = 0.1) {
    9691      var trainingRows = rows.ToArray();
    97       this.kernel = (ICovarianceFunction)kernel.Clone();
    98       this.lambda = lambda;
     92      var model = new KernelRidgeRegressionModel(dataset, targetVariable, allowedInputVariables, trainingRows, scaleInputs, kernel, lambda);
     93
    9994      try {
    100         if (scaleInputs)
    101           scaling = CreateScaling(dataset, trainingRows);
    102         trainX = ExtractData(dataset, trainingRows, scaling);
     95        int info;
     96        int n = model.trainX.GetLength(0);
     97        alglib.densesolverreport denseSolveRep;
     98        var gram = BuildGramMatrix(model.trainX, lambda, kernel);
     99        var l = new double[n, n];
     100        Array.Copy(gram, l, l.Length);
     101
     102        double[] alpha = new double[n];
     103        double[,] invG;
    103104        var y = dataset.GetDoubleValues(targetVariable, trainingRows).ToArray();
    104         yOffset = y.Average();
    105         yScale = 1.0 / y.StandardDeviation();
    106105        for (int i = 0; i < y.Length; i++) {
    107           y[i] -= yOffset;
    108           y[i] *= yScale;
    109         }
    110         int info;
    111         int n = trainX.GetLength(0);
    112         alglib.densesolverreport denseSolveRep;
    113         var gram = BuildGramMatrix(trainX, lambda);
    114         var l = new double[n, n]; Array.Copy(gram, l, l.Length);
    115 
    116         double[,] invG;
     106          y[i] -= model.yOffset;
     107          y[i] *= model.yScale;
     108        }
    117109        // cholesky decomposition
    118110        var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);
    119         if (res == false) { //throw new ArgumentException("Could not decompose matrix. Is it quadratic symmetric positive definite?");
     111        if (res == false) { //try lua decomposition if cholesky faild
    120112          int[] pivots;
    121113          var lua = new double[n, n];
     
    127119          invG = lua;  // rename
    128120          alglib.rmatrixluinverse(ref invG, pivots, n, out info, out rep);
    129           if (info != 1) throw new ArgumentException("Could not invert Gram matrix.");
    130121        } else {
    131122          alglib.spdmatrixcholeskysolve(l, n, false, y, out info, out denseSolveRep, out alpha);
     
    135126          invG = l;   // rename
    136127          alglib.spdmatrixcholeskyinverse(ref invG, n, false, out info, out rep);
    137           if (info != 1) throw new ArgumentException("Could not invert Gram matrix.");
    138         }
     128        }
     129        if (info != 1) throw new ArgumentException("Could not invert Gram matrix.");
    139130
    140131        var ssqLooError = 0.0;
     
    142133          var pred_i = Util.ScalarProd(Util.GetRow(gram, i).ToArray(), alpha);
    143134          var looPred_i = pred_i - alpha[i] / invG[i, i];
    144           var error = (y[i] - looPred_i) / yScale;
     135          var error = (y[i] - looPred_i) / model.yScale;
    145136          ssqLooError += error * error;
    146137        }
    147         LooCvRMSE = Math.Sqrt(ssqLooError / n);
     138
     139        Array.Copy(alpha, model.alpha, n);
     140        model.LooCvRMSE = Math.Sqrt(ssqLooError / n);
    148141      } catch (alglib.alglibexception ae) {
    149142        // wrap exception so that calling code doesn't have to know about alglib implementation
    150143        throw new ArgumentException("There was a problem in the calculation of the kernel ridge regression model", ae);
    151144      }
     145      return model;
     146    }
     147
     148    private KernelRidgeRegressionModel(IDataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, int[] rows,
     149      bool scaleInputs, ICovarianceFunction kernel, double lambda = 0.1) : base(targetVariable) {
     150      this.allowedInputVariables = allowedInputVariables.ToArray();
     151      if (kernel.GetNumberOfParameters(this.allowedInputVariables.Length) > 0) throw new ArgumentException("All parameters in the kernel function must be specified.");
     152      name = ItemName;
     153      description = ItemDescription;
     154
     155      this.kernel = (ICovarianceFunction)kernel.Clone();
     156      this.lambda = lambda;
     157      if (scaleInputs) scaling = CreateScaling(dataset, rows, this.allowedInputVariables);
     158      trainX = ExtractData(dataset, rows, this.allowedInputVariables, scaling);
     159      var y = dataset.GetDoubleValues(targetVariable, rows).ToArray();
     160      yOffset = y.Average();
     161      yScale = 1.0 / y.StandardDeviation();
     162      alpha = new double[trainX.GetLength(0)];
    152163    }
    153164
     
    155166    #region IRegressionModel Members
    156167    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    157       var newX = ExtractData(dataset, rows, scaling);
     168      var newX = ExtractData(dataset, rows, allowedInputVariables, scaling);
    158169      var dim = newX.GetLength(1);
    159170      var cov = kernel.GetParameterizedCovarianceFunction(new double[0], Enumerable.Range(0, dim).ToArray());
     
    175186
    176187    #region helpers
    177     private double[,] BuildGramMatrix(double[,] data, double lambda) {
     188    private static double[,] BuildGramMatrix(double[,] data, double lambda, ICovarianceFunction kernel) {
    178189      var n = data.GetLength(0);
    179190      var dim = data.GetLength(1);
     
    190201    }
    191202
    192     private ITransformation<double>[] CreateScaling(IDataset dataset, int[] rows) {
    193       var trans = new ITransformation<double>[allowedInputVariables.Length];
     203    private static ITransformation<double>[] CreateScaling(IDataset dataset, int[] rows, IReadOnlyCollection<string> allowedInputVariables) {
     204      var trans = new ITransformation<double>[allowedInputVariables.Count];
    194205      int i = 0;
    195206      foreach (var variable in allowedInputVariables) {
     
    205216    }
    206217
    207     private double[,] ExtractData(IDataset dataset, IEnumerable<int> rows, ITransformation<double>[] scaling = null) {
     218    private static double[,] ExtractData(IDataset dataset, IEnumerable<int> rows, IReadOnlyCollection<string> allowedInputVariables, ITransformation<double>[] scaling = null) {
    208219      double[][] variables;
    209220      if (scaling != null) {
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/CosineDistance.cs

    r15209 r15249  
    3030
    3131  /// <summary>
    32   /// The angluar distance as defined as a normalized distance measure dependent on the angle between two vectors.
    33   /// It is designed for vectors with all positive coordinates.
     32  /// The angular distance as defined as a normalized distance measure dependent on the angle between two vectors.
    3433  /// </summary>
    3534  [StorableClass]
    36   [Item("CosineDistance", "The angluar distance as defined as a normalized distance measure dependent on the angle between two vectors.\nIt is designed for vectors with all positive coordinates")]
     35  [Item("CosineDistance", "The angular distance as defined as a normalized distance measure dependent on the angle between two vectors.")]
    3736  public class CosineDistance : DistanceBase<IEnumerable<double>> {
    3837
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DistanceBase.cs

    r14767 r15249  
    2020#endregion
    2121
     22using System.Collections;
    2223using System.Collections.Generic;
    2324using HeuristicLab.Common;
     
    2930  public abstract class DistanceBase<T> : Item, IDistance<T> {
    3031
    31     #region HLConstructors & Boilerplate
     32    #region HLConstructors & Cloning
    3233    [StorableConstructor]
    3334    protected DistanceBase(bool deserializing) : base(deserializing) { }
     
    4243    }
    4344
    44     private class DistanceComparer : IComparer<T> {
     45    public double Get(object x, object y) {
     46      return Get((T)x, (T)y);
     47    }
     48
     49    public IComparer GetDistanceComparer(object item) {
     50      return new DistanceComparer((T)item, this);
     51    }
     52
     53    private class DistanceComparer : IComparer<T>, IComparer {
    4554      private readonly T item;
    4655      private readonly IDistance<T> dist;
     
    5463        return dist.Get(x, item).CompareTo(dist.Get(y, item));
    5564      }
     65
     66      public int Compare(object x, object y) {
     67        return Compare((T)x, (T)y);
     68      }
    5669    }
    5770  }
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/EuclideanDistance.cs

    r14784 r15249  
    3232  public class EuclideanDistance : DistanceBase<IEnumerable<double>> {
    3333
    34     #region HLConstructors & Boilerplate
     34    #region HLConstructors & Cloning
    3535    [StorableConstructor]
    3636    protected EuclideanDistance(bool deserializing) : base(deserializing) { }
     
    4040    #endregion
    4141
    42     public static double GetDistance(double[] point1, double[] point2) {
    43       if (point1.Length != point2.Length) throw new ArgumentException("Euclidean distance not defined on vectors of different length");
    44       return Math.Sqrt(point1.Zip(point2, (a1, b1) => (a1 - b1) * (a1 - b1)).Sum());
     42    public static double GetDistance(IReadOnlyList<double> point1, IReadOnlyList<double> point2) {
     43      if (point1.Count != point2.Count) throw new ArgumentException("Euclidean distance not defined on vectors of different length");
     44      var sum = 0.0;
     45      for (var i = 0; i < point1.Count; i++) {
     46        var d = point1[i] - point2[i];
     47        sum += d * d;
     48      }
     49
     50      return Math.Sqrt(sum);
    4551    }
    4652
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/ManhattanDistance.cs

    r14911 r15249  
    3232  public class ManhattanDistance : DistanceBase<IEnumerable<double>> {
    3333
    34     #region HLConstructors & Boilerplate
     34    #region HLConstructors & Cloning
    3535    [StorableConstructor]
    3636    protected ManhattanDistance(bool deserializing) : base(deserializing) { }
     
    4747    public static double GetDistance(double[] point1, double[] point2) {
    4848      if (point1.Length != point2.Length) throw new ArgumentException("Manhattan distance not defined on vectors of different length");
    49       return point1.Zip(point2, (a1, b1) => Math.Abs(a1 - b1)).Sum();
     49      var sum = 0.0;
     50      for (var i = 0; i < point1.Length; i++)
     51        sum += Math.Abs(point1[i] + point2[i]);
     52      return sum;
    5053    }
    5154
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/PriorityQueue.cs

    r14785 r15249  
    2525
    2626namespace HeuristicLab.Algorithms.DataAnalysis {
    27   // TODO: move to HeuristicLab.Collections
    2827
    2928  // Code Taken from branch: RoutePlanning, Heap4
     
    6362
    6463    public KeyValuePair<TK, TV> PeekMin() {
    65       if(size == 0) {
    66         throw new InvalidOperationException("Heap is empty");
    67       }
     64      if (size == 0) throw new InvalidOperationException("Heap is empty");
    6865      return new KeyValuePair<TK, TV>(data[1].Key, data[1].Value);
    6966    }
     
    8683      size = sz - 1;
    8784      finalLayerDist++;
    88       if(finalLayerDist == finalLayerSize) {
     85      if (finalLayerDist == finalLayerSize) {
    8986        finalLayerSize >>= 2;
    9087        finalLayerDist = 0;
    9188      }
    9289
    93       while(succ < sz) {
     90      while (succ < sz) {
    9491        var minKey = data[succ].Key;
    9592        var delta = 0;
    9693
    97         var otherKey = data[succ + 1].Key;
    98         if(otherKey.CompareTo(minKey) < 0) {
     94        for (var i = 1; i <= 3; i++) {
     95          var otherKey = data[succ + i].Key;
     96          if (otherKey.CompareTo(minKey) >= 0) continue;
    9997          minKey = otherKey;
    100           delta = 1;
     98          delta = i;
    10199        }
    102         otherKey = data[succ + 2].Key;
    103         if(otherKey.CompareTo(minKey) < 0) {
    104           minKey = otherKey;
    105           delta = 2;
    106         }
    107         otherKey = data[succ + 3].Key;
    108         if(otherKey.CompareTo(minKey) < 0) {
    109           minKey = otherKey;
    110           delta = 3;
    111         }
     100       
    112101        succ += delta;
    113102        layerPos += delta;
     
    136125      pred = pred - layerDist; // finally preds index
    137126
    138       while(data[pred].Key.CompareTo(bubble) > 0) {  // must terminate since inf at root
     127      while (data[pred].Key.CompareTo(bubble) > 0) {  // must terminate since inf at root
    139128        data[hole].Key = data[pred].Key;
    140129        data[hole].Value = data[pred].Value;
     
    149138      data[hole].Key = bubble;
    150139      data[hole].Value = data[sz].Value;
    151 
    152140      data[sz].Key = Supremum; // mark as deleted
    153141    }
     
    158146      finalLayerDist--;
    159147
    160       if(finalLayerDist == -1) { // layer full
    161                                  // start next layer
     148      if (finalLayerDist == -1) { // layer full
     149                                  // start next layer
    162150        finalLayerSize <<= 2;
    163151        finalLayerDist = finalLayerSize - 1;
     
    172160      pred = pred - layerDist; // finally preds index
    173161      var predKey = data[pred].Key;
    174       while(predKey.CompareTo(key) > 0) {
     162      while (predKey.CompareTo(key) > 0) {
    175163        data[hole].Key = predKey;
    176164        data[hole].Value = data[pred].Value;
     
    195183      var sup = Supremum;
    196184      var cap = capacity;
    197       for(var i = 1; i <= cap; i++) {
     185      for (var i = 1; i <= cap; i++) {
    198186        data[i].Key = sup;
    199187      }
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/SpacePartitioningTree.cs

    r14862 r15249  
    5656using System;
    5757using System.Collections.Generic;
    58 using System.Linq;
    59 using HeuristicLab.Common;
    6058
    6159namespace HeuristicLab.Algorithms.DataAnalysis {
     
    6361  /// Space partitioning tree (SPTree)
    6462  /// </summary>
    65   public class SpacePartitioningTree : ISpacePartitioningTree {
    66     private const uint QT_NODE_CAPACITY = 1;
    67 
    68     private double[] buff;
    69     private SpacePartitioningTree parent;
     63  internal class SpacePartitioningTree {
     64    private const uint QtNodeCapacity = 1;
     65
     66    #region Fields
    7067    private int dimension;
    7168    private bool isLeaf;
     
    8077    // Indices in this space-partitioning tree node, corresponding center-of-mass, and list of all children
    8178    private double[] centerOfMass;
    82     private readonly int[] index = new int[QT_NODE_CAPACITY];
     79    private readonly int[] index = new int[QtNodeCapacity];
    8380
    8481    // Children
    8582    private SpacePartitioningTree[] children;
    8683    private uint noChildren;
     84    #endregion
    8785
    8886    public SpacePartitioningTree(double[,] inpData) {
     
    104102      var width = new double[d];
    105103      for (var i = 0; i < d; i++) width[i] = Math.Max(maxY[i] - meanY[i], meanY[i] - minY[i]) + 1e-5;
    106       Init(null, inpData, meanY, width);
     104      Init(inpData, meanY, width);
    107105      Fill(n);
    108106    }
    109107
    110     public SpacePartitioningTree(double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) {
    111       Init(null, inpData, impCorner, impWith);
    112     }
    113     public SpacePartitioningTree(SpacePartitioningTree parent, double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) {
    114       Init(parent, inpData, impCorner, impWith);
    115     }
    116 
    117     public ISpacePartitioningTree GetParent() {
    118       return parent;
     108    private SpacePartitioningTree(double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) {
     109      Init(inpData, impCorner, impWith);
    119110    }
    120111
     
    132123
    133124      // If there is space in this quad tree and it is a leaf, add the object here
    134       if (isLeaf && size < QT_NODE_CAPACITY) {
     125      if (isLeaf && size < QtNodeCapacity) {
    135126        index[size] = newIndex;
    136127        size++;
     
    138129      }
    139130
    140       // Don't add duplicates for now (this is not very nice)
     131      // Don't add duplicates
    141132      var anyDuplicate = false;
    142133      for (uint n = 0; n < size; n++) {
     
    161152    }
    162153
    163     public void Subdivide() {
    164       // Create new children
    165       var newCorner = new double[dimension];
    166       var newWidth = new double[dimension];
    167       for (var i = 0; i < noChildren; i++) {
    168         var div = 1;
    169         for (var d = 0; d < dimension; d++) {
    170           newWidth[d] = .5 * boundary.GetWidth(d);
    171           if ((i / div) % 2 == 1) newCorner[d] = boundary.GetCorner(d) - .5 * boundary.GetWidth(d);
    172           else newCorner[d] = boundary.GetCorner(d) + .5 * boundary.GetWidth(d);
    173           div *= 2;
    174         }
    175         children[i] = new SpacePartitioningTree(this, data, newCorner, newWidth);
    176       }
    177 
    178       // Move existing points to correct children
    179       for (var i = 0; i < size; i++) {
    180         var success = false;
    181         for (var j = 0; j < noChildren; j++) {
    182           if (!success) success = children[j].Insert(index[i]);
    183         }
    184         index[i] = -1; // as in tSNE implementation by van der Maaten
    185       }
    186 
    187       // Empty parent node
    188       size = 0;
    189       isLeaf = false;
    190     }
    191 
    192     public bool IsCorrect() {
    193       var row = new double[dimension];
    194       for (var n = 0; n < size; n++) {
    195         Buffer.BlockCopy(data, sizeof(double) * dimension * index[n], row, 0, sizeof(double) * dimension);
    196         if (!boundary.ContainsPoint(row)) return false;
    197       }
    198       if (isLeaf) return true;
    199       var correct = true;
    200       for (var i = 0; i < noChildren; i++) correct = correct && children[i].IsCorrect();
    201       return correct;
    202     }
    203 
    204     public void GetAllIndices(int[] indices) {
    205       GetAllIndices(indices, 0);
    206     }
    207 
    208     public int GetAllIndices(int[] indices, int loc) {
    209       // Gather indices in current quadrant
    210       for (var i = 0; i < size; i++) indices[loc + i] = index[i];
    211       loc += (int)size;
    212       // Gather indices in children
    213       if (isLeaf) return loc;
    214       for (var i = 0; i < noChildren; i++) loc = children[i].GetAllIndices(indices, loc);
    215       return loc;
    216     }
    217 
    218     public int GetDepth() {
    219       return isLeaf ? 1 : 1 + children.Max(x => x.GetDepth());
    220     }
    221 
    222154    public void ComputeNonEdgeForces(int pointIndex, double theta, double[] negF, ref double sumQ) {
    223155      // Make sure that we spend no time on empty nodes or self-interactions
     
    226158      // Compute distance between point and center-of-mass
    227159      var D = .0;
     160      var buff = new double[dimension];
    228161      for (var d = 0; d < dimension; d++) buff[d] = data[pointIndex, d] - centerOfMass[d];
    229162      for (var d = 0; d < dimension; d++) D += buff[d] * buff[d];
     
    233166      for (var d = 0; d < dimension; d++) {
    234167        var curWidth = boundary.GetWidth(d);
    235         maxWidth = (maxWidth > curWidth) ? maxWidth : curWidth;
     168        maxWidth = maxWidth > curWidth ? maxWidth : curWidth;
    236169      }
    237170      if (isLeaf || maxWidth / Math.Sqrt(D) < theta) {
     
    250183    }
    251184
    252     // does not use the tree
    253     public void ComputeEdgeForces(int[] rowP, int[] colP, double[] valP, int n, double[,] posF) {
     185    public static void ComputeEdgeForces(int[] rowP, int[] colP, double[] valP, int n, double[,] posF, double[,] data, int dimension) {
    254186      // Loop over all edges in the graph
    255187      for (var k = 0; k < n; k++) {
     
    259191          // uses squared distance
    260192          var d = 1.0;
     193          var buff = new double[dimension];
    261194          for (var j = 0; j < dimension; j++) buff[j] = data[k, j] - data[colP[i], j];
    262195          for (var j = 0; j < dimension; j++) d += buff[j] * buff[j];
     
    273206      for (var i = 0; i < n; i++) Insert(i);
    274207    }
    275     private void Init(SpacePartitioningTree p, double[,] inpData, IEnumerable<double> inpCorner, IEnumerable<double> inpWidth) {
    276       parent = p;
     208
     209    private void Init(double[,] inpData, IEnumerable<double> inpCorner, IEnumerable<double> inpWidth) {
    277210      dimension = inpData.GetLength(1);
    278211      noChildren = 2;
     
    283216      cumulativeSize = 0;
    284217      boundary = new Cell((uint)dimension);
     218
    285219      inpCorner.ForEach((i, x) => boundary.SetCorner(i, x));
    286220      inpWidth.ForEach((i, x) => boundary.SetWidth(i, x));
     
    288222      children = new SpacePartitioningTree[noChildren];
    289223      centerOfMass = new double[dimension];
    290       buff = new double[dimension];
    291 
     224    }
     225
     226    private void Subdivide() {
     227      // Create new children
     228      var newCorner = new double[dimension];
     229      var newWidth = new double[dimension];
     230      for (var i = 0; i < noChildren; i++) {
     231        var div = 1;
     232        for (var d = 0; d < dimension; d++) {
     233          newWidth[d] = .5 * boundary.GetWidth(d);
     234          if (i / div % 2 == 1) newCorner[d] = boundary.GetCorner(d) - .5 * boundary.GetWidth(d);
     235          else newCorner[d] = boundary.GetCorner(d) + .5 * boundary.GetWidth(d);
     236          div *= 2;
     237        }
     238        children[i] = new SpacePartitioningTree(data, newCorner, newWidth);
     239      }
     240
     241      // Move existing points to correct children
     242      for (var i = 0; i < size; i++) {
     243        var success = false;
     244        for (var j = 0; j < noChildren; j++) {
     245          if (!success) success = children[j].Insert(index[i]);
     246        }
     247        index[i] = -1; // as in tSNE implementation by van der Maaten
     248      }
     249      // Empty parent node
     250      size = 0;
     251      isLeaf = false;
    292252    }
    293253    #endregion
    294 
    295254
    296255    private class Cell {
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAlgorithm.cs

    r14863 r15249  
    3232using HeuristicLab.Parameters;
    3333using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     34using HeuristicLab.PluginInfrastructure;
    3435using HeuristicLab.Problems.DataAnalysis;
    3536using HeuristicLab.Random;
     
    4142  /// </summary>
    4243  [Item("tSNE", "t-distributed stochastic neighbourhood embedding projects the data in a low " +
    43                 "dimensional space to allow visual cluster identification.")]
     44                "dimensional space to allow visual cluster identification. Implemented similar to: https://lvdmaaten.github.io/tsne/#implementations (Barnes-Hut t-SNE). Described in : https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf")]
    4445  [Creatable(CreatableAttribute.Categories.DataAnalysis, Priority = 100)]
    4546  [StorableClass]
     
    5758
    5859    #region parameter names
    59     private const string DistanceParameterName = "DistanceFunction";
     60    private const string DistanceFunctionParameterName = "DistanceFunction";
    6061    private const string PerplexityParameterName = "Perplexity";
    6162    private const string ThetaParameterName = "Theta";
     
    6970    private const string SetSeedRandomlyParameterName = "SetSeedRandomly";
    7071    private const string SeedParameterName = "Seed";
    71     private const string ClassesParameterName = "ClassNames";
     72    private const string ClassesNameParameterName = "ClassesName";
    7273    private const string NormalizationParameterName = "Normalization";
    7374    private const string UpdateIntervalParameterName = "UpdateInterval";
     
    8687      get { return Parameters[PerplexityParameterName] as IFixedValueParameter<DoubleValue>; }
    8788    }
    88     public IFixedValueParameter<DoubleValue> ThetaParameter {
    89       get { return Parameters[ThetaParameterName] as IFixedValueParameter<DoubleValue>; }
     89    public IFixedValueParameter<PercentValue> ThetaParameter {
     90      get { return Parameters[ThetaParameterName] as IFixedValueParameter<PercentValue>; }
    9091    }
    9192    public IFixedValueParameter<IntValue> NewDimensionsParameter {
    9293      get { return Parameters[NewDimensionsParameterName] as IFixedValueParameter<IntValue>; }
    9394    }
    94     public IValueParameter<IDistance<double[]>> DistanceParameter {
    95       get { return Parameters[DistanceParameterName] as IValueParameter<IDistance<double[]>>; }
     95    public IConstrainedValueParameter<IDistance<double[]>> DistanceFunctionParameter {
     96      get { return Parameters[DistanceFunctionParameterName] as IConstrainedValueParameter<IDistance<double[]>>; }
    9697    }
    9798    public IFixedValueParameter<IntValue> MaxIterationsParameter {
     
    119120      get { return Parameters[SeedParameterName] as IFixedValueParameter<IntValue>; }
    120121    }
    121     public IFixedValueParameter<StringValue> ClassesParameter {
    122       get { return Parameters[ClassesParameterName] as IFixedValueParameter<StringValue>; }
     122    public IConstrainedValueParameter<StringValue> ClassesNameParameter {
     123      get { return Parameters[ClassesNameParameterName] as IConstrainedValueParameter<StringValue>; }
    123124    }
    124125    public IFixedValueParameter<BoolValue> NormalizationParameter {
     
    131132
    132133    #region  Properties
    133     public IDistance<double[]> Distance {
    134       get { return DistanceParameter.Value; }
     134    public IDistance<double[]> DistanceFunction {
     135      get { return DistanceFunctionParameter.Value; }
    135136    }
    136137    public double Perplexity {
     
    178179      set { SeedParameter.Value.Value = value; }
    179180    }
    180     public string Classes {
    181       get { return ClassesParameter.Value.Value; }
    182       set { ClassesParameter.Value.Value = value; }
     181    public string ClassesName {
     182      get { return ClassesNameParameter.Value != null ? ClassesNameParameter.Value.Value : null; }
     183      set { ClassesNameParameter.Value.Value = value; }
    183184    }
    184185    public bool Normalization {
     
    198199
    199200    private TSNEAlgorithm(TSNEAlgorithm original, Cloner cloner) : base(original, cloner) {
    200       if(original.dataRowNames!=null)
    201       this.dataRowNames = new Dictionary<string, List<int>>(original.dataRowNames);
     201      if (original.dataRowNames != null)
     202        this.dataRowNames = new Dictionary<string, List<int>>(original.dataRowNames);
    202203      if (original.dataRows != null)
    203204        this.dataRows = original.dataRows.ToDictionary(kvp => kvp.Key, kvp => cloner.Clone(kvp.Value));
     
    208209    public override IDeepCloneable Clone(Cloner cloner) { return new TSNEAlgorithm(this, cloner); }
    209210    public TSNEAlgorithm() {
    210       Problem = new RegressionProblem();
    211       Parameters.Add(new ValueParameter<IDistance<double[]>>(DistanceParameterName, "The distance function used to differentiate similar from non-similar points", new EuclideanDistance()));
     211      var distances = new ItemSet<IDistance<double[]>>(ApplicationManager.Manager.GetInstances<IDistance<double[]>>());
     212      Parameters.Add(new ConstrainedValueParameter<IDistance<double[]>>(DistanceFunctionParameterName, "The distance function used to differentiate similar from non-similar points", distances, distances.OfType<EuclideanDistance>().FirstOrDefault()));
    212213      Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-parameter of tSNE. Comparable to k in a k-nearest neighbour algorithm. Recommended value is floor(number of points /3) or lower", new DoubleValue(25)));
    213       Parameters.Add(new FixedValueParameter<DoubleValue>(ThetaParameterName, "Value describing how much appoximated " +
     214      Parameters.Add(new FixedValueParameter<PercentValue>(ThetaParameterName, "Value describing how much appoximated " +
    214215                                                                              "gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise. " +
    215216                                                                              "Appropriate values for theta are between 0.1 and 0.7 (default = 0.5). CAUTION: exact calculation of " +
    216217                                                                              "forces requires building a non-sparse N*N matrix where N is the number of data points. This may " +
    217218                                                                              "exceed memory limitations. The function is designed to run on large (N > 5000) data sets. It may give" +
    218                                                                               " poor performance on very small data sets(it is better to use a standard t - SNE implementation on such data).", new DoubleValue(0)));
     219                                                                              " poor performance on very small data sets(it is better to use a standard t - SNE implementation on such data).", new PercentValue(0)));
    219220      Parameters.Add(new FixedValueParameter<IntValue>(NewDimensionsParameterName, "Dimensionality of projected space (usually 2 for easy visual analysis)", new IntValue(2)));
    220221      Parameters.Add(new FixedValueParameter<IntValue>(MaxIterationsParameterName, "Maximum number of iterations for gradient descent.", new IntValue(1000)));
     
    226227      Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "If the seed should be random.", new BoolValue(true)));
    227228      Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The seed used if it should not be random.", new IntValue(0)));
    228       Parameters.Add(new FixedValueParameter<StringValue>(ClassesParameterName, "name of the column specifying the class lables of each data point. If the label column can not be found training/test is used as labels.", new StringValue("none")));
     229      Parameters.Add(new OptionalConstrainedValueParameter<StringValue>(ClassesNameParameterName, "Name of the column specifying the class lables of each data point. If this is not set training/test is used as labels."));
    229230      Parameters.Add(new FixedValueParameter<BoolValue>(NormalizationParameterName, "Whether the data should be zero centered and have variance of 1 for each variable, so different scalings are ignored.", new BoolValue(true)));
    230       Parameters.Add(new FixedValueParameter<IntValue>(UpdateIntervalParameterName, "", new IntValue(50)));
     231      Parameters.Add(new FixedValueParameter<IntValue>(UpdateIntervalParameterName, "The interval after which the results will be updated.", new IntValue(50)));
    231232      Parameters[UpdateIntervalParameterName].Hidden = true;
    232233
     
    236237      StopLyingIterationParameter.Hidden = true;
    237238      EtaParameter.Hidden = false;
     239      Problem = new RegressionProblem();
    238240    }
    239241    #endregion
     
    269271        if (Normalization) data = NormalizeData(data);
    270272
    271         state = TSNEStatic<double[]>.CreateState(data, Distance, random, NewDimensions, Perplexity, Theta,
     273        state = TSNEStatic<double[]>.CreateState(data, DistanceFunction, random, NewDimensions, Perplexity, Theta,
    272274          StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta);
    273275
     
    283285    }
    284286
     287    #region Events
     288    protected override void OnProblemChanged() {
     289      base.OnProblemChanged();
     290      if (Problem == null) return;
     291      OnProblemDataChanged(this, null);
     292    }
     293
     294    protected override void RegisterProblemEvents() {
     295      base.RegisterProblemEvents();
     296      Problem.ProblemDataChanged += OnProblemDataChanged;
     297    }
     298    protected override void DeregisterProblemEvents() {
     299      base.DeregisterProblemEvents();
     300      Problem.ProblemDataChanged -= OnProblemDataChanged;
     301    }
     302
     303    private void OnProblemDataChanged(object sender, EventArgs args) {
     304      if (Problem == null || Problem.ProblemData == null) return;
     305      if (!Parameters.ContainsKey(ClassesNameParameterName)) return;
     306      ClassesNameParameter.ValidValues.Clear();
     307      foreach (var input in Problem.ProblemData.InputVariables) ClassesNameParameter.ValidValues.Add(input);
     308    }
     309
     310    #endregion
     311
     312    #region Helpers
    285313    private void SetUpResults(IReadOnlyCollection<double[]> data) {
    286314      if (Results == null) return;
     
    291319
    292320      //color datapoints acording to classes variable (be it double or string)
    293       if (problemData.Dataset.VariableNames.Contains(Classes)) {
    294         if ((problemData.Dataset as Dataset).VariableHasType<string>(Classes)) {
    295           var classes = problemData.Dataset.GetStringValues(Classes).ToArray();
     321      if (problemData.Dataset.VariableNames.Contains(ClassesName)) {
     322        if ((problemData.Dataset as Dataset).VariableHasType<string>(ClassesName)) {
     323          var classes = problemData.Dataset.GetStringValues(ClassesName).ToArray();
    296324          for (var i = 0; i < classes.Length; i++) {
    297325            if (!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List<int>());
    298326            dataRowNames[classes[i]].Add(i);
    299327          }
    300         } else if ((problemData.Dataset as Dataset).VariableHasType<double>(Classes)) {
    301           var classValues = problemData.Dataset.GetDoubleValues(Classes).ToArray();
     328        } else if ((problemData.Dataset as Dataset).VariableHasType<double>(ClassesName)) {
     329          var classValues = problemData.Dataset.GetDoubleValues(ClassesName).ToArray();
    302330          var max = classValues.Max() + 0.1;
    303331          var min = classValues.Min() - 0.1;
     
    377405      for (var i = 0; i < data.GetLength(0); i++) {
    378406        for (var j = 0; j < data.GetLength(1); j++) {
    379           res[i, j] = (data[i, j] - (max[j] + min[j]) / 2) / (max[j] - min[j]);
     407          var d = max[j] - min[j];
     408          var s = data[i, j] - (max[j] + min[j]) / 2;  //shift data
     409          if (d.IsAlmost(0)) res[i, j] = data[i, j];   //no scaling possible
     410          else res[i, j] = s / d;  //scale data
    380411        }
    381412      }
     
    395426      for (var i = 0; i < data.Count; i++) {
    396427        nData[i] = new double[n];
    397         for (var j = 0; j < n; j++) nData[i][j] = (data[i][j] - mean[j]) / max[j];
     428        for (var j = 0; j < n; j++) nData[i][j] = max[j].IsAlmost(0) ? data[i][j] - mean[j] : (data[i][j] - mean[j]) / max[j];
    398429      }
    399430      return nData;
     
    416447      return "[" + (min + i * size) + ";" + (min + (i + 1) * size) + ")";
    417448    }
     449    #endregion
    418450  }
    419451}
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEStatic.cs

    r14862 r15249  
    5656using System;
    5757using System.Collections.Generic;
    58 using System.Linq;
    5958using HeuristicLab.Collections;
    6059using HeuristicLab.Common;
    6160using HeuristicLab.Core;
    62 using HeuristicLab.Optimization;
    6361using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    6462using HeuristicLab.Random;
     
    7068    [StorableClass]
    7169    public sealed class TSNEState : DeepCloneable {
     70      #region Storables
    7271      // initialized once
    7372      [Storable]
     
    122121      [Storable]
    123122      public double[,] dY;
    124 
     123      #endregion
     124
     125      #region Constructors & Cloning
    125126      private TSNEState(TSNEState original, Cloner cloner) : base(original, cloner) {
    126         this.distance = cloner.Clone(original.distance);
    127         this.random = cloner.Clone(original.random);
    128         this.perplexity = original.perplexity;
    129         this.exact = original.exact;
    130         this.noDatapoints = original.noDatapoints;
    131         this.finalMomentum = original.finalMomentum;
    132         this.momSwitchIter = original.momSwitchIter;
    133         this.stopLyingIter = original.stopLyingIter;
    134         this.theta = original.theta;
    135         this.eta = original.eta;
    136         this.newDimensions = original.newDimensions;
    137         if(original.valP != null) {
    138           this.valP = new double[original.valP.Length];
    139           Array.Copy(original.valP, this.valP, this.valP.Length);
    140         }
    141         if(original.rowP != null) {
    142           this.rowP = new int[original.rowP.Length];
    143           Array.Copy(original.rowP, this.rowP, this.rowP.Length);
    144         }
    145         if(original.colP != null) {
    146           this.colP = new int[original.colP.Length];
    147           Array.Copy(original.colP, this.colP, this.colP.Length);
    148         }
    149         if(original.p != null) {
    150           this.p = new double[original.p.GetLength(0), original.p.GetLength(1)];
    151           Array.Copy(original.p, this.p, this.p.Length);
    152         }
    153         this.newData = new double[original.newData.GetLength(0), original.newData.GetLength(1)];
    154         Array.Copy(original.newData, this.newData, this.newData.Length);
    155         this.iter = original.iter;
    156         this.currentMomentum = original.currentMomentum;
    157         this.gains = new double[original.gains.GetLength(0), original.gains.GetLength(1)];
    158         Array.Copy(original.gains, this.gains, this.gains.Length);
    159         this.uY = new double[original.uY.GetLength(0), original.uY.GetLength(1)];
    160         Array.Copy(original.uY, this.uY, this.uY.Length);
    161         this.dY = new double[original.dY.GetLength(0), original.dY.GetLength(1)];
    162         Array.Copy(original.dY, this.dY, this.dY.Length);
     127        distance = cloner.Clone(original.distance);
     128        random = cloner.Clone(original.random);
     129        perplexity = original.perplexity;
     130        exact = original.exact;
     131        noDatapoints = original.noDatapoints;
     132        finalMomentum = original.finalMomentum;
     133        momSwitchIter = original.momSwitchIter;
     134        stopLyingIter = original.stopLyingIter;
     135        theta = original.theta;
     136        eta = original.eta;
     137        newDimensions = original.newDimensions;
     138        if (original.valP != null) {
     139          valP = new double[original.valP.Length];
     140          Array.Copy(original.valP, valP, valP.Length);
     141        }
     142        if (original.rowP != null) {
     143          rowP = new int[original.rowP.Length];
     144          Array.Copy(original.rowP, rowP, rowP.Length);
     145        }
     146        if (original.colP != null) {
     147          colP = new int[original.colP.Length];
     148          Array.Copy(original.colP, colP, colP.Length);
     149        }
     150        if (original.p != null) {
     151          p = new double[original.p.GetLength(0), original.p.GetLength(1)];
     152          Array.Copy(original.p, p, p.Length);
     153        }
     154        newData = new double[original.newData.GetLength(0), original.newData.GetLength(1)];
     155        Array.Copy(original.newData, newData, newData.Length);
     156        iter = original.iter;
     157        currentMomentum = original.currentMomentum;
     158        gains = new double[original.gains.GetLength(0), original.gains.GetLength(1)];
     159        Array.Copy(original.gains, gains, gains.Length);
     160        uY = new double[original.uY.GetLength(0), original.uY.GetLength(1)];
     161        Array.Copy(original.uY, uY, uY.Length);
     162        dY = new double[original.dY.GetLength(0), original.dY.GetLength(1)];
     163        Array.Copy(original.dY, dY, dY.Length);
    163164      }
    164165
     
    177178        this.stopLyingIter = stopLyingIter;
    178179        this.momSwitchIter = momSwitchIter;
    179         this.currentMomentum = momentum;
     180        currentMomentum = momentum;
    180181        this.finalMomentum = finalMomentum;
    181182        this.eta = eta;
    182183
    183 
    184184        // initialize
    185185        noDatapoints = data.Length;
    186         if(noDatapoints - 1 < 3 * perplexity)
     186        if (noDatapoints - 1 < 3 * perplexity)
    187187          throw new ArgumentException("Perplexity too large for the number of data points!");
    188188
     
    192192        uY = new double[noDatapoints, newDimensions];
    193193        gains = new double[noDatapoints, newDimensions];
    194         for(var i = 0; i < noDatapoints; i++)
    195           for(var j = 0; j < newDimensions; j++)
     194        for (var i = 0; i < noDatapoints; i++)
     195          for (var j = 0; j < newDimensions; j++)
    196196            gains[i, j] = 1.0;
    197197
     
    206206
    207207        // Lie about the P-values (factor is 4 in the MATLAB implementation)
    208         if(exact) for(var i = 0; i < noDatapoints; i++) for(var j = 0; j < noDatapoints; j++) p[i, j] *= 12.0;
    209         else for(var i = 0; i < rowP[noDatapoints]; i++) valP[i] *= 12.0;
     208        if (exact) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] *= 12.0;
     209        else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] *= 12.0;
    210210
    211211        // Initialize solution (randomly)
    212212        var rand = new NormalDistributedRandom(random, 0, 1);
    213         for(var i = 0; i < noDatapoints; i++)
    214           for(var j = 0; j < newDimensions; j++)
     213        for (var i = 0; i < noDatapoints; i++)
     214          for (var j = 0; j < newDimensions; j++)
    215215            newData[i, j] = rand.NextDouble() * .0001;
    216216      }
     217      #endregion
    217218
    218219      public double EvaluateError() {
     
    222223      }
    223224
     225      #region Helpers
    224226      private static void CalculateApproximateSimilarities(T[] data, IDistance<T> distance, double perplexity, out int[] rowP, out int[] colP, out double[] valP) {
    225227        // Compute asymmetric pairwise input similarities
     
    233235        valP = sValP;
    234236        var sumP = .0;
    235         for(var i = 0; i < rowP[data.Length]; i++) sumP += valP[i];
    236         for(var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP;
     237        for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i];
     238        for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP;
    237239      }
    238240
     
    242244        ComputeGaussianPerplexity(data, distance, p, perplexity);
    243245        // Symmetrize input similarities
    244         for(var n = 0; n < data.Length; n++) {
    245           for(var m = n + 1; m < data.Length; m++) {
     246        for (var n = 0; n < data.Length; n++) {
     247          for (var m = n + 1; m < data.Length; m++) {
    246248            p[n, m] += p[m, n];
    247249            p[m, n] = p[n, m];
     
    249251        }
    250252        var sumP = .0;
    251         for(var i = 0; i < data.Length; i++) for(var j = 0; j < data.Length; j++) sumP += p[i, j];
    252         for(var i = 0; i < data.Length; i++) for(var j = 0; j < data.Length; j++) p[i, j] /= sumP;
     253        for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j];
     254        for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP;
    253255        return p;
    254256      }
    255257
    256258      private static void ComputeGaussianPerplexity(IReadOnlyList<T> x, IDistance<T> distance, out int[] rowP, out int[] colP, out double[] valP, double perplexity, int k) {
    257         if(perplexity > k) throw new ArgumentException("Perplexity should be lower than k!");
    258 
    259         int n = x.Count;
     259        if (perplexity > k) throw new ArgumentException("Perplexity should be lower than k!");
     260
     261        var n = x.Count;
    260262        // Allocate the memory we need
    261263        rowP = new int[n + 1];
     
    264266        var curP = new double[n - 1];
    265267        rowP[0] = 0;
    266         for(var i = 0; i < n; i++) rowP[i + 1] = rowP[i] + k;
     268        for (var i = 0; i < n; i++) rowP[i + 1] = rowP[i] + k;
    267269
    268270        var objX = new List<IndexedItem<T>>();
    269         for(var i = 0; i < n; i++) objX.Add(new IndexedItem<T>(i, x[i]));
     271        for (var i = 0; i < n; i++) objX.Add(new IndexedItem<T>(i, x[i]));
    270272
    271273        // Build ball tree on data set
     
    273275
    274276        // Loop over all points to find nearest neighbors
    275         for(var i = 0; i < n; i++) {
     277        for (var i = 0; i < n; i++) {
    276278          IList<IndexedItem<T>> indices;
    277279          IList<double> distances;
     
    285287          var minBeta = double.MinValue;
    286288          var maxBeta = double.MaxValue;
    287           const double tol = 1e-5; 
     289          const double tol = 1e-5;
    288290
    289291          // Iterate until we found a good perplexity
    290292          var iter = 0; double sumP = 0;
    291           while(!found && iter < 200) { 
     293          while (!found && iter < 200) {
    292294
    293295            // Compute Gaussian kernel row
    294             for(var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]);
     296            for (var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]);
    295297
    296298            // Compute entropy of current row
    297299            sumP = double.Epsilon;
    298             for(var m = 0; m < k; m++) sumP += curP[m];
     300            for (var m = 0; m < k; m++) sumP += curP[m];
    299301            var h = .0;
    300             for(var m = 0; m < k; m++) h += beta * (distances[m + 1] * curP[m]);
     302            for (var m = 0; m < k; m++) h += beta * (distances[m + 1] * curP[m]);
    301303            h = h / sumP + Math.Log(sumP);
    302304
    303305            // Evaluate whether the entropy is within the tolerance level
    304306            var hdiff = h - Math.Log(perplexity);
    305             if(hdiff < tol && -hdiff < tol) {
     307            if (hdiff < tol && -hdiff < tol) {
    306308              found = true;
    307309            } else {
    308               if(hdiff > 0) {
     310              if (hdiff > 0) {
    309311                minBeta = beta;
    310                 if(maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue))
     312                if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue))
    311313                  beta *= 2.0;
    312314                else
     
    314316              } else {
    315317                maxBeta = beta;
    316                 if(minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))
     318                if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))
    317319                  beta /= 2.0;
    318320                else
     
    326328
    327329          // Row-normalize current row of P and store in matrix
    328           for(var m = 0; m < k; m++) curP[m] /= sumP;
    329           for(var m = 0; m < k; m++) {
     330          for (var m = 0; m < k; m++) curP[m] /= sumP;
     331          for (var m = 0; m < k; m++) {
    330332            colP[rowP[i] + m] = indices[m + 1].Index;
    331333            valP[rowP[i] + m] = curP[m];
     
    337339        var dd = ComputeDistances(x, distance);
    338340
    339         int n = x.Length;
     341        var n = x.Length;
    340342        // Compute the Gaussian kernel row by row
    341         for(var i = 0; i < n; i++) {
     343        for (var i = 0; i < n; i++) {
    342344          // Initialize some variables
    343345          var found = false;
     
    350352          // Iterate until we found a good perplexity
    351353          var iter = 0;
    352           while(!found && iter < 200) {      // 200 iterations as in tSNE implementation by van der Maarten
     354          while (!found && iter < 200) {      // 200 iterations as in tSNE implementation by van der Maarten
    353355
    354356            // Compute Gaussian kernel row
    355             for(var m = 0; m < n; m++) p[i, m] = Math.Exp(-beta * dd[i][m]);
     357            for (var m = 0; m < n; m++) p[i, m] = Math.Exp(-beta * dd[i][m]);
    356358            p[i, i] = double.Epsilon;
    357359
    358360            // Compute entropy of current row
    359361            sumP = double.Epsilon;
    360             for(var m = 0; m < n; m++) sumP += p[i, m];
     362            for (var m = 0; m < n; m++) sumP += p[i, m];
    361363            var h = 0.0;
    362             for(var m = 0; m < n; m++) h += beta * (dd[i][m] * p[i, m]);
     364            for (var m = 0; m < n; m++) h += beta * (dd[i][m] * p[i, m]);
    363365            h = h / sumP + Math.Log(sumP);
    364366
    365367            // Evaluate whether the entropy is within the tolerance level
    366368            var hdiff = h - Math.Log(perplexity);
    367             if(hdiff < tol && -hdiff < tol) {
     369            if (hdiff < tol && -hdiff < tol) {
    368370              found = true;
    369371            } else {
    370               if(hdiff > 0) {
     372              if (hdiff > 0) {
    371373                minBeta = beta;
    372                 if(maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue))
     374                if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue))
    373375                  beta *= 2.0;
    374376                else
     
    376378              } else {
    377379                maxBeta = beta;
    378                 if(minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))
     380                if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))
    379381                  beta /= 2.0;
    380382                else
     
    388390
    389391          // Row normalize P
    390           for(var m = 0; m < n; m++) p[i, m] /= sumP;
     392          for (var m = 0; m < n; m++) p[i, m] /= sumP;
    391393        }
    392394      }
     
    394396      private static double[][] ComputeDistances(T[] x, IDistance<T> distance) {
    395397        var res = new double[x.Length][];
    396         for(int r = 0; r < x.Length; r++) {
     398        for (var r = 0; r < x.Length; r++) {
    397399          var rowV = new double[x.Length];
    398400          // all distances must be symmetric
    399           for(int c = 0; c < r; c++) {
     401          for (var c = 0; c < r; c++) {
    400402            rowV[c] = res[c][r];
    401403          }
    402404          rowV[r] = 0.0; // distance to self is zero for all distances
    403           for(int c = r + 1; c < x.Length; c++) {
     405          for (var c = r + 1; c < x.Length; c++) {
    404406            rowV[c] = distance.Get(x[r], x[c]);
    405407          }
     
    418420        // Compute Q-matrix and normalization sum
    419421        var sumQ = double.Epsilon;
    420         for(var n1 = 0; n1 < n; n1++) {
    421           for(var m = 0; m < n; m++) {
    422             if(n1 != m) {
     422        for (var n1 = 0; n1 < n; n1++) {
     423          for (var m = 0; m < n; m++) {
     424            if (n1 != m) {
    423425              q[n1, m] = 1 / (1 + dd[n1, m]);
    424426              sumQ += q[n1, m];
     
    426428          }
    427429        }
    428         for(var i = 0; i < n; i++) for(var j = 0; j < n; j++) q[i, j] /= sumQ;
     430        for (var i = 0; i < n; i++) for (var j = 0; j < n; j++) q[i, j] /= sumQ;
    429431
    430432        // Sum t-SNE error
    431433        var c = .0;
    432         for(var i = 0; i < n; i++)
    433           for(var j = 0; j < n; j++) {
     434        for (var i = 0; i < n; i++)
     435          for (var j = 0; j < n; j++) {
    434436            c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon));
    435437          }
     
    437439      }
    438440
    439       // The mapping of the approximate tSNE looks good but the error curve never changes.
    440441      private static double EvaluateErrorApproximate(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) {
    441442        // Get estimate of normalization term
     
    444445        var tree = new SpacePartitioningTree(y);
    445446        var buff = new double[d];
    446         double sumQ = 0.0;
    447         for(var i = 0; i < n; i++) tree.ComputeNonEdgeForces(i, theta, buff, ref sumQ);
     447        var sumQ = 0.0;
     448        for (var i = 0; i < n; i++) tree.ComputeNonEdgeForces(i, theta, buff, ref sumQ);
    448449
    449450        // Loop over all edges to compute t-SNE error
    450451        var c = .0;
    451         for(var k = 0; k < n; k++) {
    452           for(var i = rowP[k]; i < rowP[k + 1]; i++) {
     452        for (var k = 0; k < n; k++) {
     453          for (var i = rowP[k]; i < rowP[k + 1]; i++) {
    453454            var q = .0;
    454             for(var j = 0; j < d; j++) buff[j] = y[k, j];
    455             for(var j = 0; j < d; j++) buff[j] -= y[colP[i], j];
    456             for(var j = 0; j < d; j++) q += buff[j] * buff[j];
     455            for (var j = 0; j < d; j++) buff[j] = y[k, j];
     456            for (var j = 0; j < d; j++) buff[j] -= y[colP[i], j];
     457            for (var j = 0; j < d; j++) q += buff[j] * buff[j];
    457458            q = (1.0 / (1.0 + q)) / sumQ;
    458459            c += valP[i] * Math.Log((valP[i] + float.Epsilon) / (q + float.Epsilon));
     
    466467        var n = rowP.Count - 1;
    467468        var rowCounts = new int[n];
    468         for(var j = 0; j < n; j++) {
    469           for(var i = rowP[j]; i < rowP[j + 1]; i++) {
     469        for (var j = 0; j < n; j++) {
     470          for (var i = rowP[j]; i < rowP[j + 1]; i++) {
    470471
    471472            // Check whether element (col_P[i], n) is present
    472473            var present = false;
    473             for(var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {
    474               if(colP[m] == j) present = true;
     474            for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {
     475              if (colP[m] == j) present = true;
    475476            }
    476             if(present) rowCounts[j]++;
     477            if (present) rowCounts[j]++;
    477478            else {
    478479              rowCounts[j]++;
     
    482483        }
    483484        var noElem = 0;
    484         for(var i = 0; i < n; i++) noElem += rowCounts[i];
     485        for (var i = 0; i < n; i++) noElem += rowCounts[i];
    485486
    486487        // Allocate memory for symmetrized matrix
     
    491492        // Construct new row indices for symmetric matrix
    492493        symRowP[0] = 0;
    493         for(var i = 0; i < n; i++) symRowP[i + 1] = symRowP[i] + rowCounts[i];
     494        for (var i = 0; i < n; i++) symRowP[i + 1] = symRowP[i] + rowCounts[i];
    494495
    495496        // Fill the result matrix
    496497        var offset = new int[n];
    497         for(var j = 0; j < n; j++) {
    498           for(var i = rowP[j]; i < rowP[j + 1]; i++) {                                  // considering element(n, colP[i])
     498        for (var j = 0; j < n; j++) {
     499          for (var i = rowP[j]; i < rowP[j + 1]; i++) {                                  // considering element(n, colP[i])
    499500
    500501            // Check whether element (col_P[i], n) is present
    501502            var present = false;
    502             for(var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {
    503               if(colP[m] != j) continue;
     503            for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {
     504              if (colP[m] != j) continue;
    504505              present = true;
    505               if(j > colP[i]) continue; // make sure we do not add elements twice
     506              if (j > colP[i]) continue; // make sure we do not add elements twice
    506507              symColP[symRowP[j] + offset[j]] = colP[i];
    507508              symColP[symRowP[colP[i]] + offset[colP[i]]] = j;
     
    511512
    512513            // If (colP[i], n) is not present, there is no addition involved
    513             if(!present) {
     514            if (!present) {
    514515              symColP[symRowP[j] + offset[j]] = colP[i];
    515516              symColP[symRowP[colP[i]] + offset[colP[i]]] = j;
     
    519520
    520521            // Update offsets
    521             if(present && (j > colP[i])) continue;
     522            if (present && (j > colP[i])) continue;
    522523            offset[j]++;
    523             if(colP[i] != j) offset[colP[i]]++;
    524           }
    525         }
    526 
    527         for(var i = 0; i < noElem; i++) symValP[i] /= 2.0;
    528       }
     524            if (colP[i] != j) offset[colP[i]]++;
     525          }
     526        }
     527
     528        for (var i = 0; i < noElem; i++) symValP[i] /= 2.0;
     529      }
     530      #endregion
    529531    }
    530532
    531533    /// <summary>
    532     /// Simple interface to tSNE
     534    /// Static interface to tSNE
    533535    /// </summary>
    534536    /// <param name="data"></param>
     
    548550      int newDimensions = 2, double perplexity = 25, int iterations = 1000,
    549551      double theta = 0,
    550       int stopLyingIter = 250, int momSwitchIter = 250, double momentum = .5,
    551       double finalMomentum = .8, double eta = 200.0
     552      int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5,
     553      double finalMomentum = .8, double eta = 10.0
    552554      ) {
    553555      var state = CreateState(data, distance, random, newDimensions, perplexity,
    554556        theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta);
    555557
    556       for(int i = 0; i < iterations - 1; i++) {
     558      for (var i = 0; i < iterations - 1; i++) {
    557559        Iterate(state);
    558560      }
     
    562564    public static TSNEState CreateState(T[] data, IDistance<T> distance, IRandom random,
    563565      int newDimensions = 2, double perplexity = 25, double theta = 0,
    564       int stopLyingIter = 250, int momSwitchIter = 250, double momentum = .5,
    565       double finalMomentum = .8, double eta = 200.0
     566      int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5,
     567      double finalMomentum = .8, double eta = 10.0
    566568      ) {
    567569      return new TSNEState(data, distance, random, newDimensions, perplexity, theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta);
    568570    }
    569571
    570 
    571572    public static double[,] Iterate(TSNEState state) {
    572       if(state.exact)
     573      if (state.exact)
    573574        ComputeExactGradient(state.p, state.newData, state.noDatapoints, state.newDimensions, state.dY);
    574575      else
     
    576577
    577578      // Update gains
    578       for(var i = 0; i < state.noDatapoints; i++) {
    579         for(var j = 0; j < state.newDimensions; j++) {
     579      for (var i = 0; i < state.noDatapoints; i++) {
     580        for (var j = 0; j < state.newDimensions; j++) {
    580581          state.gains[i, j] = Math.Sign(state.dY[i, j]) != Math.Sign(state.uY[i, j])
    581582            ? state.gains[i, j] + .2  // +0.2 nd *0.8 are used in two separate implementations of tSNE -> seems to be correct
    582583            : state.gains[i, j] * .8;
    583584
    584           if(state.gains[i, j] < .01) state.gains[i, j] = .01;
     585          if (state.gains[i, j] < .01) state.gains[i, j] = .01;
    585586        }
    586587      }
     
    588589
    589590      // Perform gradient update (with momentum and gains)
    590       for(var i = 0; i < state.noDatapoints; i++)
    591         for(var j = 0; j < state.newDimensions; j++)
     591      for (var i = 0; i < state.noDatapoints; i++)
     592        for (var j = 0; j < state.newDimensions; j++)
    592593          state.uY[i, j] = state.currentMomentum * state.uY[i, j] - state.eta * state.gains[i, j] * state.dY[i, j];
    593594
    594       for(var i = 0; i < state.noDatapoints; i++)
    595         for(var j = 0; j < state.newDimensions; j++)
     595      for (var i = 0; i < state.noDatapoints; i++)
     596        for (var j = 0; j < state.newDimensions; j++)
    596597          state.newData[i, j] = state.newData[i, j] + state.uY[i, j];
    597598
     
    600601
    601602      // Stop lying about the P-values after a while, and switch momentum
    602       if(state.iter == state.stopLyingIter) {
    603         if(state.exact)
    604           for(var i = 0; i < state.noDatapoints; i++)
    605             for(var j = 0; j < state.noDatapoints; j++)
     603      if (state.iter == state.stopLyingIter) {
     604        if (state.exact)
     605          for (var i = 0; i < state.noDatapoints; i++)
     606            for (var j = 0; j < state.noDatapoints; j++)
    606607              state.p[i, j] /= 12.0;
    607608        else
    608           for(var i = 0; i < state.rowP[state.noDatapoints]; i++)
     609          for (var i = 0; i < state.rowP[state.noDatapoints]; i++)
    609610            state.valP[i] /= 12.0;
    610611      }
    611612
    612       if(state.iter == state.momSwitchIter)
     613      if (state.iter == state.momSwitchIter)
    613614        state.currentMomentum = state.finalMomentum;
    614615
     
    617618    }
    618619
    619 
    620 
     620    #region Helpers
    621621    private static void ComputeApproximateGradient(int[] rowP, int[] colP, double[] valP, double[,] y, int n, int d, double[,] dC, double theta) {
    622622      var tree = new SpacePartitioningTree(y);
    623       double sumQ = 0.0;
     623      var sumQ = 0.0;
    624624      var posF = new double[n, d];
    625625      var negF = new double[n, d];
    626       tree.ComputeEdgeForces(rowP, colP, valP, n, posF);
     626      SpacePartitioningTree.ComputeEdgeForces(rowP, colP, valP, n, posF, y, d);
    627627      var row = new double[d];
    628       for(var n1 = 0; n1 < n; n1++) {
    629         Array.Clear(row,0,row.Length);
     628      for (var n1 = 0; n1 < n; n1++) {
     629        Array.Clear(row, 0, row.Length);
    630630        tree.ComputeNonEdgeForces(n1, theta, row, ref sumQ);
    631         Buffer.BlockCopy(row, 0, negF, (sizeof(double) * n1 * d), d*sizeof(double));
     631        Buffer.BlockCopy(row, 0, negF, (sizeof(double) * n1 * d), d * sizeof(double));
    632632      }
    633633
    634634      // Compute final t-SNE gradient
    635635      for (var i = 0; i < n; i++)
    636         for(var j = 0; j < d; j++) {
     636        for (var j = 0; j < d; j++) {
    637637          dC[i, j] = posF[i, j] - negF[i, j] / sumQ;
    638638        }
     
    640640
    641641    private static void ComputeExactGradient(double[,] p, double[,] y, int n, int d, double[,] dC) {
    642 
    643642      // Make sure the current gradient contains zeros
    644       for(var i = 0; i < n; i++) for(var j = 0; j < d; j++) dC[i, j] = 0.0;
     643      for (var i = 0; i < n; i++) for (var j = 0; j < d; j++) dC[i, j] = 0.0;
    645644
    646645      // Compute the squared Euclidean distance matrix
     
    651650      var q = new double[n, n];
    652651      var sumQ = .0;
    653       for(var n1 = 0; n1 < n; n1++) {
    654         for(var m = 0; m < n; m++) {
    655           if(n1 == m) continue;
     652      for (var n1 = 0; n1 < n; n1++) {
     653        for (var m = 0; m < n; m++) {
     654          if (n1 == m) continue;
    656655          q[n1, m] = 1 / (1 + dd[n1, m]);
    657656          sumQ += q[n1, m];
     
    660659
    661660      // Perform the computation of the gradient
    662       for(var n1 = 0; n1 < n; n1++) {
    663         for(var m = 0; m < n; m++) {
    664           if(n1 == m) continue;
     661      for (var n1 = 0; n1 < n; n1++) {
     662        for (var m = 0; m < n; m++) {
     663          if (n1 == m) continue;
    665664          var mult = (p[n1, m] - q[n1, m] / sumQ) * q[n1, m];
    666           for(var d1 = 0; d1 < d; d1++) {
     665          for (var d1 = 0; d1 < d; d1++) {
    667666            dC[n1, d1] += (y[n1, d1] - y[m, d1]) * mult;
    668667          }
     
    673672    private static void ComputeSquaredEuclideanDistance(double[,] x, int n, int d, double[,] dd) {
    674673      var dataSums = new double[n];
    675       for(var i = 0; i < n; i++) {
    676         for(var j = 0; j < d; j++) {
     674      for (var i = 0; i < n; i++) {
     675        for (var j = 0; j < d; j++) {
    677676          dataSums[i] += x[i, j] * x[i, j];
    678677        }
    679678      }
    680       for(var i = 0; i < n; i++) {
    681         for(var m = 0; m < n; m++) {
     679      for (var i = 0; i < n; i++) {
     680        for (var m = 0; m < n; m++) {
    682681          dd[i, m] = dataSums[i] + dataSums[m];
    683682        }
    684683      }
    685       for(var i = 0; i < n; i++) {
     684      for (var i = 0; i < n; i++) {
    686685        dd[i, i] = 0.0;
    687         for(var m = i + 1; m < n; m++) {
     686        for (var m = i + 1; m < n; m++) {
    688687          dd[i, m] = 0.0;
    689           for(var j = 0; j < d; j++) {
     688          for (var j = 0; j < d; j++) {
    690689            dd[i, m] += (x[i, j] - x[m, j]) * (x[i, j] - x[m, j]);
    691690          }
     
    700699      var d = x.GetLength(1);
    701700      var mean = new double[d];
    702       for(var i = 0; i < n; i++) {
    703         for(var j = 0; j < d; j++) {
     701      for (var i = 0; i < n; i++) {
     702        for (var j = 0; j < d; j++) {
    704703          mean[j] += x[i, j];
    705704        }
    706705      }
    707       for(var i = 0; i < d; i++) {
     706      for (var i = 0; i < d; i++) {
    708707        mean[i] /= n;
    709708      }
    710709      // Subtract data mean
    711       for(var i = 0; i < n; i++) {
    712         for(var j = 0; j < d; j++) {
     710      for (var i = 0; i < n; i++) {
     711        for (var j = 0; j < d; j++) {
    713712          x[i, j] -= mean[j];
    714713        }
    715714      }
    716715    }
     716    #endregion
    717717  }
    718718}
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/VantagePointTree.cs

    r14862 r15249  
    6868  /// </summary>
    6969  /// <typeparam name="T"></typeparam>
    70   public class VantagePointTree<T> : IVantagePointTree<T> {
    71     #region properties
     70  public class VantagePointTree<T> {
    7271    private readonly List<T> items;
    7372    private readonly Node root;
    7473    private readonly IDistance<T> distance;
    75     #endregion
    7674
    7775    public VantagePointTree(IDistance<T> distance, IEnumerable<T> items) : base() {
     
    8280    }
    8381
     82    /// <summary>
     83    /// provides the k-nearest neighbours to a certain target element
     84    /// </summary>
     85    /// <param name="target">The target element</param>
     86    /// <param name="k">How many neighbours</param>
     87    /// <param name="results">The nearest neighbouring elements</param>
     88    /// <param name="distances">The distances form the target corresponding to the neighbouring elements</param>
    8489    public void Search(T target, int k, out IList<T> results, out IList<double> distances) {
    8590      var heap = new PriorityQueue<double, IndexedItem<double>>(double.MaxValue, double.MinValue, k);
    86       double tau = double.MaxValue;
     91      var tau = double.MaxValue;
    8792      Search(root, target, k, heap, ref tau);
    8893      var res = new List<T>();
    8994      var dist = new List<double>();
    9095      while (heap.Size > 0) {
    91         res.Add(items[heap.PeekMinValue().Index]);          // actually max distance
     96        res.Add(items[heap.PeekMinValue().Index]);// actually max distance
    9297        dist.Add(heap.PeekMinValue().Value);
    9398        heap.RemoveMin();
    9499      }
    95       res.Reverse(); 
     100      res.Reverse();
    96101      dist.Reverse();
    97102      results = res;
     
    104109      if (dist < tau) {
    105110        if (heap.Size == k) heap.RemoveMin();   // remove furthest node from result list (if we already have k results)
    106         heap.Insert(-dist, new IndexedItem<double>(node.index, dist));     
     111        heap.Insert(-dist, new IndexedItem<double>(node.index, dist));
    107112        if (heap.Size == k) tau = heap.PeekMinValue().Value;
    108113      }
Note: See TracChangeset for help on using the changeset viewer.