Changeset 15225 for trunk/sources/HeuristicLab.Algorithms.DataAnalysis
- Timestamp:
- 07/13/17 11:07:11 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAlgorithm.cs
r15207 r15225 32 32 using HeuristicLab.Parameters; 33 33 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 34 using HeuristicLab.PluginInfrastructure; 34 35 using HeuristicLab.Problems.DataAnalysis; 35 36 using HeuristicLab.Random; … … 86 87 get { return Parameters[PerplexityParameterName] as IFixedValueParameter<DoubleValue>; } 87 88 } 88 public IFixedValueParameter< DoubleValue> ThetaParameter {89 get { return Parameters[ThetaParameterName] as IFixedValueParameter< DoubleValue>; }89 public IFixedValueParameter<PercentValue> ThetaParameter { 90 get { return Parameters[ThetaParameterName] as IFixedValueParameter<PercentValue>; } 90 91 } 91 92 public IFixedValueParameter<IntValue> NewDimensionsParameter { 92 93 get { return Parameters[NewDimensionsParameterName] as IFixedValueParameter<IntValue>; } 93 94 } 94 public I ValueParameter<IDistance<double[]>> DistanceParameter {95 get { return Parameters[DistanceParameterName] as I ValueParameter<IDistance<double[]>>; }95 public IConstrainedValueParameter<IDistance<double[]>> DistanceParameter { 96 get { return Parameters[DistanceParameterName] as IConstrainedValueParameter<IDistance<double[]>>; } 96 97 } 97 98 public IFixedValueParameter<IntValue> MaxIterationsParameter { … … 119 120 get { return Parameters[SeedParameterName] as IFixedValueParameter<IntValue>; } 120 121 } 121 public I FixedValueParameter<StringValue> ClassesParameter {122 get { return Parameters[ClassesParameterName] as I FixedValueParameter<StringValue>; }122 public IConstrainedValueParameter<StringValue> ClassesParameter { 123 get { return Parameters[ClassesParameterName] as IConstrainedValueParameter<StringValue>; } 123 124 } 124 125 public IFixedValueParameter<BoolValue> NormalizationParameter { … … 179 180 } 180 181 public string Classes { 181 get { return ClassesParameter.Value .Value; }182 get { return ClassesParameter.Value != null ? ClassesParameter.Value.Value : null; } 182 183 set { ClassesParameter.Value.Value = value; } 183 184 } … … 208 209 public override IDeepCloneable Clone(Cloner cloner) { return new TSNEAlgorithm(this, cloner); } 209 210 public TSNEAlgorithm() { 210 Problem = new RegressionProblem();211 Parameters.Add(new ValueParameter<IDistance<double[]>>(DistanceParameterName, "The distance function used to differentiate similar from non-similar points", new EuclideanDistance()));211 var distances = new ItemSet<IDistance<double[]>>(ApplicationManager.Manager.GetInstances<IDistance<double[]>>()); 212 Parameters.Add(new ConstrainedValueParameter<IDistance<double[]>>(DistanceParameterName, "The distance function used to differentiate similar from non-similar points", distances, distances.OfType<EuclideanDistance>().FirstOrDefault())); 212 213 Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-parameter of tSNE. Comparable to k in a k-nearest neighbour algorithm. Recommended value is floor(number of points /3) or lower", new DoubleValue(25))); 213 Parameters.Add(new FixedValueParameter< DoubleValue>(ThetaParameterName, "Value describing how much appoximated " +214 Parameters.Add(new FixedValueParameter<PercentValue>(ThetaParameterName, "Value describing how much appoximated " + 214 215 "gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise. " + 215 216 "Appropriate values for theta are between 0.1 and 0.7 (default = 0.5). CAUTION: exact calculation of " + 216 217 "forces requires building a non-sparse N*N matrix where N is the number of data points. This may " + 217 218 "exceed memory limitations. The function is designed to run on large (N > 5000) data sets. It may give" + 218 " poor performance on very small data sets(it is better to use a standard t - SNE implementation on such data).", new DoubleValue(0)));219 " poor performance on very small data sets(it is better to use a standard t - SNE implementation on such data).", new PercentValue(0))); 219 220 Parameters.Add(new FixedValueParameter<IntValue>(NewDimensionsParameterName, "Dimensionality of projected space (usually 2 for easy visual analysis)", new IntValue(2))); 220 221 Parameters.Add(new FixedValueParameter<IntValue>(MaxIterationsParameterName, "Maximum number of iterations for gradient descent.", new IntValue(1000))); … … 226 227 Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "If the seed should be random.", new BoolValue(true))); 227 228 Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The seed used if it should not be random.", new IntValue(0))); 228 Parameters.Add(new FixedValueParameter<StringValue>(ClassesParameterName, "Name of the column specifying the class lables of each data point. If the label column can not be found training/test is used as labels.", new StringValue("none"))); 229 230 //Name of the column specifying the class lables of each data point.If the label column can not be found training/test is used as labels." 231 Parameters.Add(new OptionalConstrainedValueParameter<StringValue>(ClassesParameterName, "Name of the column specifying the class lables of each data point.")); 229 232 Parameters.Add(new FixedValueParameter<BoolValue>(NormalizationParameterName, "Whether the data should be zero centered and have variance of 1 for each variable, so different scalings are ignored.", new BoolValue(true))); 230 233 Parameters.Add(new FixedValueParameter<IntValue>(UpdateIntervalParameterName, "", new IntValue(50))); … … 236 239 StopLyingIterationParameter.Hidden = true; 237 240 EtaParameter.Hidden = false; 241 Problem = new RegressionProblem(); 238 242 } 239 243 #endregion … … 283 287 } 284 288 289 #region Events 290 protected override void OnProblemChanged() { 291 base.OnProblemChanged(); 292 if (Problem == null) return; 293 OnProblemDataChanged(this, null); 294 } 295 296 protected override void RegisterProblemEvents() { 297 base.RegisterProblemEvents(); 298 Problem.ProblemDataChanged += OnProblemDataChanged; 299 } 300 protected override void DeregisterProblemEvents() { 301 base.DeregisterProblemEvents(); 302 Problem.ProblemDataChanged -= OnProblemDataChanged; 303 } 304 305 private void OnProblemDataChanged(object sender, EventArgs args) { 306 if (Problem == null || Problem.ProblemData == null) return; 307 if (!Parameters.ContainsKey(ClassesParameterName)) return; 308 ClassesParameter.ValidValues.Clear(); 309 foreach (var input in Problem.ProblemData.InputVariables) ClassesParameter.ValidValues.Add(input); 310 } 311 312 #endregion 313 314 #region Helpers 285 315 private void SetUpResults(IReadOnlyCollection<double[]> data) { 286 316 if (Results == null) return; … … 377 407 for (var i = 0; i < data.GetLength(0); i++) { 378 408 for (var j = 0; j < data.GetLength(1); j++) { 379 res[i, j] = (data[i, j] - (max[j] + min[j]) / 2) / (max[j] - min[j]); 409 var d = max[j] - min[j]; 410 var s = data[i, j] - (max[j] + min[j]) / 2; //shift data 411 if (d.IsAlmost(0)) res[i, j] = data[i, j]; //no scaling possible 412 else res[i, j] = s / d; //scale data 380 413 } 381 414 } … … 395 428 for (var i = 0; i < data.Count; i++) { 396 429 nData[i] = new double[n]; 397 for (var j = 0; j < n; j++) nData[i][j] = (data[i][j] - mean[j]) / max[j];430 for (var j = 0; j < n; j++) nData[i][j] = max[j].IsAlmost(0) ? data[i][j] - mean[j] : (data[i][j] - mean[j]) / max[j]; 398 431 } 399 432 return nData; … … 416 449 return "[" + (min + i * size) + ";" + (min + (i + 1) * size) + ")"; 417 450 } 451 #endregion 418 452 } 419 453 }
Note: See TracChangeset
for help on using the changeset viewer.