Changeset 15225


Ignore:
Timestamp:
07/13/17 11:07:11 (11 days ago)
Author:
bwerth
Message:

#2700 worked on review comments for tsne

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAlgorithm.cs

    r15207 r15225  
    3232using HeuristicLab.Parameters;
    3333using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     34using HeuristicLab.PluginInfrastructure;
    3435using HeuristicLab.Problems.DataAnalysis;
    3536using HeuristicLab.Random;
     
    8687      get { return Parameters[PerplexityParameterName] as IFixedValueParameter<DoubleValue>; }
    8788    }
    88     public IFixedValueParameter<DoubleValue> ThetaParameter {
    89       get { return Parameters[ThetaParameterName] as IFixedValueParameter<DoubleValue>; }
     89    public IFixedValueParameter<PercentValue> ThetaParameter {
     90      get { return Parameters[ThetaParameterName] as IFixedValueParameter<PercentValue>; }
    9091    }
    9192    public IFixedValueParameter<IntValue> NewDimensionsParameter {
    9293      get { return Parameters[NewDimensionsParameterName] as IFixedValueParameter<IntValue>; }
    9394    }
    94     public IValueParameter<IDistance<double[]>> DistanceParameter {
    95       get { return Parameters[DistanceParameterName] as IValueParameter<IDistance<double[]>>; }
     95    public IConstrainedValueParameter<IDistance<double[]>> DistanceParameter {
     96      get { return Parameters[DistanceParameterName] as IConstrainedValueParameter<IDistance<double[]>>; }
    9697    }
    9798    public IFixedValueParameter<IntValue> MaxIterationsParameter {
     
    119120      get { return Parameters[SeedParameterName] as IFixedValueParameter<IntValue>; }
    120121    }
    121     public IFixedValueParameter<StringValue> ClassesParameter {
    122       get { return Parameters[ClassesParameterName] as IFixedValueParameter<StringValue>; }
     122    public IConstrainedValueParameter<StringValue> ClassesParameter {
     123      get { return Parameters[ClassesParameterName] as IConstrainedValueParameter<StringValue>; }
    123124    }
    124125    public IFixedValueParameter<BoolValue> NormalizationParameter {
     
    179180    }
    180181    public string Classes {
    181       get { return ClassesParameter.Value.Value; }
     182      get { return ClassesParameter.Value != null ? ClassesParameter.Value.Value : null; }
    182183      set { ClassesParameter.Value.Value = value; }
    183184    }
     
    208209    public override IDeepCloneable Clone(Cloner cloner) { return new TSNEAlgorithm(this, cloner); }
    209210    public TSNEAlgorithm() {
    210       Problem = new RegressionProblem();
    211       Parameters.Add(new ValueParameter<IDistance<double[]>>(DistanceParameterName, "The distance function used to differentiate similar from non-similar points", new EuclideanDistance()));
     211      var distances = new ItemSet<IDistance<double[]>>(ApplicationManager.Manager.GetInstances<IDistance<double[]>>());
     212      Parameters.Add(new ConstrainedValueParameter<IDistance<double[]>>(DistanceParameterName, "The distance function used to differentiate similar from non-similar points", distances, distances.OfType<EuclideanDistance>().FirstOrDefault()));
    212213      Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-parameter of tSNE. Comparable to k in a k-nearest neighbour algorithm. Recommended value is floor(number of points /3) or lower", new DoubleValue(25)));
    213       Parameters.Add(new FixedValueParameter<DoubleValue>(ThetaParameterName, "Value describing how much appoximated " +
     214      Parameters.Add(new FixedValueParameter<PercentValue>(ThetaParameterName, "Value describing how much appoximated " +
    214215                                                                              "gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise. " +
    215216                                                                              "Appropriate values for theta are between 0.1 and 0.7 (default = 0.5). CAUTION: exact calculation of " +
    216217                                                                              "forces requires building a non-sparse N*N matrix where N is the number of data points. This may " +
    217218                                                                              "exceed memory limitations. The function is designed to run on large (N > 5000) data sets. It may give" +
    218                                                                               " poor performance on very small data sets(it is better to use a standard t - SNE implementation on such data).", new DoubleValue(0)));
     219                                                                              " poor performance on very small data sets(it is better to use a standard t - SNE implementation on such data).", new PercentValue(0)));
    219220      Parameters.Add(new FixedValueParameter<IntValue>(NewDimensionsParameterName, "Dimensionality of projected space (usually 2 for easy visual analysis)", new IntValue(2)));
    220221      Parameters.Add(new FixedValueParameter<IntValue>(MaxIterationsParameterName, "Maximum number of iterations for gradient descent.", new IntValue(1000)));
     
    226227      Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "If the seed should be random.", new BoolValue(true)));
    227228      Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The seed used if it should not be random.", new IntValue(0)));
    228       Parameters.Add(new FixedValueParameter<StringValue>(ClassesParameterName, "Name of the column specifying the class lables of each data point. If the label column can not be found training/test is used as labels.", new StringValue("none")));
     229
     230      //Name of the column specifying the class lables of each data point.If the label column can not be found training/test is used as labels."
     231      Parameters.Add(new OptionalConstrainedValueParameter<StringValue>(ClassesParameterName, "Name of the column specifying the class lables of each data point."));
    229232      Parameters.Add(new FixedValueParameter<BoolValue>(NormalizationParameterName, "Whether the data should be zero centered and have variance of 1 for each variable, so different scalings are ignored.", new BoolValue(true)));
    230233      Parameters.Add(new FixedValueParameter<IntValue>(UpdateIntervalParameterName, "", new IntValue(50)));
     
    236239      StopLyingIterationParameter.Hidden = true;
    237240      EtaParameter.Hidden = false;
     241      Problem = new RegressionProblem();
    238242    }
    239243    #endregion
     
    283287    }
    284288
     289    #region Events
     290    protected override void OnProblemChanged() {
     291      base.OnProblemChanged();
     292      if (Problem == null) return;
     293      OnProblemDataChanged(this, null);
     294    }
     295
     296    protected override void RegisterProblemEvents() {
     297      base.RegisterProblemEvents();
     298      Problem.ProblemDataChanged += OnProblemDataChanged;
     299    }
     300    protected override void DeregisterProblemEvents() {
     301      base.DeregisterProblemEvents();
     302      Problem.ProblemDataChanged -= OnProblemDataChanged;
     303    }
     304
     305    private void OnProblemDataChanged(object sender, EventArgs args) {
     306      if (Problem == null || Problem.ProblemData == null) return;
     307      if (!Parameters.ContainsKey(ClassesParameterName)) return;
     308      ClassesParameter.ValidValues.Clear();
     309      foreach (var input in Problem.ProblemData.InputVariables) ClassesParameter.ValidValues.Add(input);
     310    }
     311
     312    #endregion
     313
     314    #region Helpers
    285315    private void SetUpResults(IReadOnlyCollection<double[]> data) {
    286316      if (Results == null) return;
     
    377407      for (var i = 0; i < data.GetLength(0); i++) {
    378408        for (var j = 0; j < data.GetLength(1); j++) {
    379           res[i, j] = (data[i, j] - (max[j] + min[j]) / 2) / (max[j] - min[j]);
     409          var d = max[j] - min[j];
     410          var s = data[i, j] - (max[j] + min[j]) / 2;  //shift data
     411          if (d.IsAlmost(0)) res[i, j] = data[i, j];   //no scaling possible
     412          else res[i, j] = s / d;  //scale data
    380413        }
    381414      }
     
    395428      for (var i = 0; i < data.Count; i++) {
    396429        nData[i] = new double[n];
    397         for (var j = 0; j < n; j++) nData[i][j] = (data[i][j] - mean[j]) / max[j];
     430        for (var j = 0; j < n; j++) nData[i][j] = max[j].IsAlmost(0) ? data[i][j] - mean[j] : (data[i][j] - mean[j]) / max[j];
    398431      }
    399432      return nData;
     
    416449      return "[" + (min + i * size) + ";" + (min + (i + 1) * size) + ")";
    417450    }
     451    #endregion
    418452  }
    419453}
Note: See TracChangeset for help on using the changeset viewer.