Changeset 15532 for trunk/sources/HeuristicLab.Algorithms.DataAnalysis
- Timestamp:
- 12/18/17 14:12:18 (7 years ago)
- Location:
- trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 13 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4
-
Property
svn:mergeinfo
set to
(toggle deleted branches)
/branches/Weighted TSNE/3.4 merged eligible /stable/HeuristicLab.Algorithms.DataAnalysis/3.4 merged eligible /branches/1721-RandomForestPersistence/HeuristicLab.Algorithms.DataAnalysis/3.4 10321-10322 /branches/Async/HeuristicLab.Algorithms.DataAnalysis/3.4 13329-15286 /branches/Benchmarking/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 6917-7005 /branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4 9070-13099 /branches/CloningRefactoring/HeuristicLab.Algorithms.DataAnalysis/3.4 4656-4721 /branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4 5471-5808 /branches/DataAnalysis SolutionEnsembles/HeuristicLab.Algorithms.DataAnalysis/3.4 5815-6180 /branches/DataAnalysis/HeuristicLab.Algorithms.DataAnalysis/3.4 4458-4459,4462,4464 /branches/DataPreprocessing/HeuristicLab.Algorithms.DataAnalysis/3.4 10085-11101 /branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4 6284-6795 /branches/GP.Symbols (TimeLag, Diff, Integral)/HeuristicLab.Algorithms.DataAnalysis/3.4 5060 /branches/HeuristicLab.DatasetRefactor/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 11570-12508 /branches/HeuristicLab.Problems.Orienteering/HeuristicLab.Algorithms.DataAnalysis/3.4 11130-12721 /branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4 13819-14091 /branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4 8116-8789 /branches/LogResidualEvaluator/HeuristicLab.Algorithms.DataAnalysis/3.4 10202-10483 /branches/NET40/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 5138-5162 /branches/ParallelEngine/HeuristicLab.Algorithms.DataAnalysis/3.4 5175-5192 /branches/ProblemInstancesRegressionAndClassification/HeuristicLab.Algorithms.DataAnalysis/3.4 7773-7810 /branches/QAPAlgorithms/HeuristicLab.Algorithms.DataAnalysis/3.4 6350-6627 /branches/Restructure trunk solution/HeuristicLab.Algorithms.DataAnalysis/3.4 6828 /branches/SpectralKernelForGaussianProcesses/HeuristicLab.Algorithms.DataAnalysis/3.4 10204-10479 /branches/SuccessProgressAnalysis/HeuristicLab.Algorithms.DataAnalysis/3.4 5370-5682 /branches/Trunk/HeuristicLab.Algorithms.DataAnalysis/3.4 6829-6865 /branches/VNS/HeuristicLab.Algorithms.DataAnalysis/3.4 5594-5752 /branches/histogram/HeuristicLab.Algorithms.DataAnalysis/3.4 5959-6341 /branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4 14232-14825
-
Property
svn:mergeinfo
set to
(toggle deleted branches)
-
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/FixedDataAnalysisAlgorithm.cs
r15287 r15532 30 30 namespace HeuristicLab.Algorithms.DataAnalysis { 31 31 [StorableClass] 32 public abstract class FixedDataAnalysisAlgorithm<T> : BasicAlgorithm where T : class, IDataAnalysisProblem {32 public abstract class FixedDataAnalysisAlgorithm<T> : BasicAlgorithm, IDataAnalysisAlgorithm<T> where T : class, IDataAnalysisProblem { 33 33 #region Properties 34 34 public override Type ProblemType { -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessRegression.cs
r14185 r15532 39 39 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 160)] 40 40 [StorableClass] 41 public sealed class GaussianProcessRegression : GaussianProcessBase, IStorableContent {41 public sealed class GaussianProcessRegression : GaussianProcessBase, IStorableContent, IDataAnalysisAlgorithm<IRegressionProblem> { 42 42 public string Filename { get; set; } 43 43 -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r15209 r15532 320 320 <Compile Include="TSNE\Distances\IndexedItemDistance.cs" /> 321 321 <Compile Include="TSNE\Distances\ManhattanDistance.cs" /> 322 <Compile Include="TSNE\Distances\WeightedEuclideanDistance.cs" /> 322 323 <Compile Include="TSNE\Distances\IDistance.cs" /> 323 324 <Compile Include="TSNE\PriorityQueue.cs" /> -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelRidgeRegression.cs
r15248 r15532 36 36 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 100)] 37 37 [StorableClass] 38 public sealed class KernelRidgeRegression : BasicAlgorithm {38 public sealed class KernelRidgeRegression : BasicAlgorithm, IDataAnalysisAlgorithm<IRegressionProblem> { 39 39 private const string SolutionResultName = "Kernel ridge regression solution"; 40 40 -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/CosineDistance.cs
r15234 r15532 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Linq;25 24 using HeuristicLab.Common; 26 25 using HeuristicLab.Core; … … 28 27 29 28 namespace HeuristicLab.Algorithms.DataAnalysis { 30 31 29 /// <summary> 32 30 /// The angular distance as defined as a normalized distance measure dependent on the angle between two vectors. … … 35 33 [Item("CosineDistance", "The angular distance as defined as a normalized distance measure dependent on the angle between two vectors.")] 36 34 public class CosineDistance : DistanceBase<IEnumerable<double>> { 37 38 35 #region HLConstructors & Cloning 39 36 [StorableConstructor] … … 48 45 49 46 #region statics 50 public static double GetDistance(IReadOnlyList<double> point1, IReadOnlyList<double> point2) { 51 if (point1.Count != point2.Count) throw new ArgumentException("Cosine distance not defined on vectors of different length"); 52 var innerprod = 0.0; 53 var length1 = 0.0; 54 var length2 = 0.0; 55 56 for (var i = 0; i < point1.Count; i++) { 57 double d1 = point1[i], d2 = point2[i]; 58 innerprod += d1 * d2; 59 length1 += d1 * d1; 60 length2 += d2 * d2; 47 public static double GetDistance(IEnumerable<double> point1, IEnumerable<double> point2) { 48 using (IEnumerator<double> p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator()) { 49 var innerprod = 0.0; 50 var length1 = 0.0; 51 var length2 = 0.0; 52 while (p1Enum.MoveNext() & p2Enum.MoveNext()) { 53 double d1 = p1Enum.Current, d2 = p2Enum.Current; 54 innerprod += d1 * d2; 55 length1 += d1 * d1; 56 length2 += d2 * d2; 57 } 58 var divisor = Math.Sqrt(length1 * length2); 59 if (divisor.IsAlmost(0)) throw new ArgumentException("Cosine distance is not defined on vectors of length 0"); 60 if (p1Enum.MoveNext() || p2Enum.MoveNext()) throw new ArgumentException("Cosine distance not defined on vectors of different length"); 61 return 1 - innerprod / divisor; 61 62 } 62 var l = Math.Sqrt(length1 * length2);63 if (l.IsAlmost(0)) throw new ArgumentException("Cosine distance is not defined on vectors of length 0");64 return 1 - innerprod / l;65 63 } 66 64 #endregion 67 65 public override double Get(IEnumerable<double> a, IEnumerable<double> b) { 68 return GetDistance(a .ToArray(), b.ToArray());66 return GetDistance(a, b); 69 67 } 70 68 } -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DistanceBase.cs
r15207 r15532 29 29 [StorableClass] 30 30 public abstract class DistanceBase<T> : Item, IDistance<T> { 31 32 31 #region HLConstructors & Cloning 33 32 [StorableConstructor] … … 44 43 45 44 public double Get(object x, object y) { 46 return Get((T) x, (T)y);45 return Get((T) x, (T) y); 47 46 } 48 47 49 48 public IComparer GetDistanceComparer(object item) { 50 return new DistanceComparer((T) item, this);49 return new DistanceComparer((T) item, this); 51 50 } 52 51 53 privateclass DistanceComparer : IComparer<T>, IComparer {52 internal class DistanceComparer : IComparer<T>, IComparer { 54 53 private readonly T item; 55 54 private readonly IDistance<T> dist; … … 65 64 66 65 public int Compare(object x, object y) { 67 return Compare((T) x, (T)y);66 return Compare((T) x, (T) y); 68 67 } 69 68 } -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/EuclideanDistance.cs
r15207 r15532 31 31 [Item("EuclideanDistance", "A norm function that uses Euclidean distance")] 32 32 public class EuclideanDistance : DistanceBase<IEnumerable<double>> { 33 34 33 #region HLConstructors & Cloning 35 34 [StorableConstructor] 36 35 protected EuclideanDistance(bool deserializing) : base(deserializing) { } 37 36 protected EuclideanDistance(EuclideanDistance original, Cloner cloner) : base(original, cloner) { } 38 public override IDeepCloneable Clone(Cloner cloner) { return new EuclideanDistance(this, cloner); } 37 public override IDeepCloneable Clone(Cloner cloner) { 38 return new EuclideanDistance(this, cloner); 39 } 39 40 public EuclideanDistance() { } 40 41 #endregion 41 42 42 public static double GetDistance(IReadOnlyList<double> point1, IReadOnlyList<double> point2) { 43 if (point1.Count != point2.Count) throw new ArgumentException("Euclidean distance not defined on vectors of different length"); 44 var sum = 0.0; 45 for (var i = 0; i < point1.Count; i++) { 46 var d = point1[i] - point2[i]; 47 sum += d * d; 43 public static double GetDistance(IEnumerable<double> point1, IEnumerable<double> point2) { 44 using (IEnumerator<double> p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator()) { 45 var sum = 0.0; 46 while (p1Enum.MoveNext() & p2Enum.MoveNext()) { 47 var d = p1Enum.Current - p2Enum.Current; 48 sum += d * d; 49 } 50 if (p1Enum.MoveNext() || p2Enum.MoveNext()) throw new ArgumentException("Euclidean distance not defined on vectors of different length"); 51 return Math.Sqrt(sum); 48 52 } 49 50 return Math.Sqrt(sum);51 53 } 52 54 53 55 public override double Get(IEnumerable<double> a, IEnumerable<double> b) { 54 return GetDistance(a .ToArray(), b.ToArray());56 return GetDistance(a, b); 55 57 } 56 58 } -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/ManhattanDistance.cs
r15207 r15532 31 31 [Item("ManhattanDistance", "A distance function that uses block distance")] 32 32 public class ManhattanDistance : DistanceBase<IEnumerable<double>> { 33 34 33 #region HLConstructors & Cloning 35 34 [StorableConstructor] … … 45 44 #endregion 46 45 47 public static double GetDistance(double[] point1, double[] point2) { 48 if (point1.Length != point2.Length) throw new ArgumentException("Manhattan distance not defined on vectors of different length"); 49 var sum = 0.0; 50 for (var i = 0; i < point1.Length; i++) 51 sum += Math.Abs(point1[i] + point2[i]); 52 return sum; 46 public static double GetDistance(IEnumerable<double> point1, IEnumerable<double> point2) { 47 using (IEnumerator<double> p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator()) { 48 var sum = 0.0; 49 while (p1Enum.MoveNext() & p2Enum.MoveNext()) 50 sum += Math.Abs(p1Enum.Current - p2Enum.Current); 51 if (p1Enum.MoveNext() || p2Enum.MoveNext()) throw new ArgumentException("Manhattan distance not defined on vectors of different length"); 52 return sum; 53 } 53 54 } 54 55 55 56 public override double Get(IEnumerable<double> a, IEnumerable<double> b) { 56 return GetDistance(a .ToArray(), b.ToArray());57 return GetDistance(a, b); 57 58 } 58 59 } -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAlgorithm.cs
r15234 r15532 53 53 } 54 54 public new IDataAnalysisProblem Problem { 55 get { return (IDataAnalysisProblem) base.Problem; }55 get { return (IDataAnalysisProblem) base.Problem; } 56 56 set { base.Problem = value; } 57 57 } 58 58 59 #region parameter names59 #region Parameter names 60 60 private const string DistanceFunctionParameterName = "DistanceFunction"; 61 61 private const string PerplexityParameterName = "Perplexity"; … … 72 72 private const string ClassesNameParameterName = "ClassesName"; 73 73 private const string NormalizationParameterName = "Normalization"; 74 private const string RandomInitializationParameterName = "RandomInitialization"; 74 75 private const string UpdateIntervalParameterName = "UpdateInterval"; 75 76 #endregion 76 77 77 #region result names78 #region Result names 78 79 private const string IterationResultName = "Iteration"; 79 80 private const string ErrorResultName = "Error"; … … 83 84 #endregion 84 85 85 #region parameter properties86 #region Parameter properties 86 87 public IFixedValueParameter<DoubleValue> PerplexityParameter { 87 get { return Parameters[PerplexityParameterName] as IFixedValueParameter<DoubleValue>; }88 get { return (IFixedValueParameter<DoubleValue>) Parameters[PerplexityParameterName]; } 88 89 } 89 90 public IFixedValueParameter<PercentValue> ThetaParameter { 90 get { return Parameters[ThetaParameterName] as IFixedValueParameter<PercentValue>; }91 get { return (IFixedValueParameter<PercentValue>) Parameters[ThetaParameterName]; } 91 92 } 92 93 public IFixedValueParameter<IntValue> NewDimensionsParameter { 93 get { return Parameters[NewDimensionsParameterName] as IFixedValueParameter<IntValue>; }94 get { return (IFixedValueParameter<IntValue>) Parameters[NewDimensionsParameterName]; } 94 95 } 95 96 public IConstrainedValueParameter<IDistance<double[]>> DistanceFunctionParameter { 96 get { return Parameters[DistanceFunctionParameterName] as IConstrainedValueParameter<IDistance<double[]>>; }97 get { return (IConstrainedValueParameter<IDistance<double[]>>) Parameters[DistanceFunctionParameterName]; } 97 98 } 98 99 public IFixedValueParameter<IntValue> MaxIterationsParameter { 99 get { return Parameters[MaxIterationsParameterName] as IFixedValueParameter<IntValue>; }100 get { return (IFixedValueParameter<IntValue>) Parameters[MaxIterationsParameterName]; } 100 101 } 101 102 public IFixedValueParameter<IntValue> StopLyingIterationParameter { 102 get { return Parameters[StopLyingIterationParameterName] as IFixedValueParameter<IntValue>; }103 get { return (IFixedValueParameter<IntValue>) Parameters[StopLyingIterationParameterName]; } 103 104 } 104 105 public IFixedValueParameter<IntValue> MomentumSwitchIterationParameter { 105 get { return Parameters[MomentumSwitchIterationParameterName] as IFixedValueParameter<IntValue>; }106 get { return (IFixedValueParameter<IntValue>) Parameters[MomentumSwitchIterationParameterName]; } 106 107 } 107 108 public IFixedValueParameter<DoubleValue> InitialMomentumParameter { 108 get { return Parameters[InitialMomentumParameterName] as IFixedValueParameter<DoubleValue>; }109 get { return (IFixedValueParameter<DoubleValue>) Parameters[InitialMomentumParameterName]; } 109 110 } 110 111 public IFixedValueParameter<DoubleValue> FinalMomentumParameter { 111 get { return Parameters[FinalMomentumParameterName] as IFixedValueParameter<DoubleValue>; }112 get { return (IFixedValueParameter<DoubleValue>) Parameters[FinalMomentumParameterName]; } 112 113 } 113 114 public IFixedValueParameter<DoubleValue> EtaParameter { 114 get { return Parameters[EtaParameterName] as IFixedValueParameter<DoubleValue>; }115 get { return (IFixedValueParameter<DoubleValue>) Parameters[EtaParameterName]; } 115 116 } 116 117 public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter { 117 get { return Parameters[SetSeedRandomlyParameterName] as IFixedValueParameter<BoolValue>; }118 get { return (IFixedValueParameter<BoolValue>) Parameters[SetSeedRandomlyParameterName]; } 118 119 } 119 120 public IFixedValueParameter<IntValue> SeedParameter { 120 get { return Parameters[SeedParameterName] as IFixedValueParameter<IntValue>; }121 get { return (IFixedValueParameter<IntValue>) Parameters[SeedParameterName]; } 121 122 } 122 123 public IConstrainedValueParameter<StringValue> ClassesNameParameter { 123 get { return Parameters[ClassesNameParameterName] as IConstrainedValueParameter<StringValue>; }124 get { return (IConstrainedValueParameter<StringValue>) Parameters[ClassesNameParameterName]; } 124 125 } 125 126 public IFixedValueParameter<BoolValue> NormalizationParameter { 126 get { return Parameters[NormalizationParameterName] as IFixedValueParameter<BoolValue>; } 127 get { return (IFixedValueParameter<BoolValue>) Parameters[NormalizationParameterName]; } 128 } 129 public IFixedValueParameter<BoolValue> RandomInitializationParameter { 130 get { return (IFixedValueParameter<BoolValue>) Parameters[RandomInitializationParameterName]; } 127 131 } 128 132 public IFixedValueParameter<IntValue> UpdateIntervalParameter { 129 get { return Parameters[UpdateIntervalParameterName] as IFixedValueParameter<IntValue>; }133 get { return (IFixedValueParameter<IntValue>) Parameters[UpdateIntervalParameterName]; } 130 134 } 131 135 #endregion … … 187 191 set { NormalizationParameter.Value.Value = value; } 188 192 } 189 193 public bool RandomInitialization { 194 get { return RandomInitializationParameter.Value.Value; } 195 set { RandomInitializationParameter.Value.Value = value; } 196 } 190 197 public int UpdateInterval { 191 198 get { return UpdateIntervalParameter.Value.Value; } … … 194 201 #endregion 195 202 203 #region Storable poperties 204 [Storable] 205 private Dictionary<string, List<int>> dataRowNames; 206 [Storable] 207 private Dictionary<string, ScatterPlotDataRow> dataRows; 208 [Storable] 209 private TSNEStatic<double[]>.TSNEState state; 210 [Storable] 211 private int iter; 212 #endregion 213 196 214 #region Constructors & Cloning 197 215 [StorableConstructor] 198 216 private TSNEAlgorithm(bool deserializing) : base(deserializing) { } 199 217 218 [StorableHook(HookType.AfterDeserialization)] 219 private void AfterDeserialization() { 220 if (Parameters.ContainsKey(RandomInitializationParameterName)) 221 Parameters.Add(new FixedValueParameter<BoolValue>(RandomInitializationParameterName, "Wether data points should be randomly initialized or according to the first 2 dimensions", new BoolValue(true))); 222 RegisterParameterEvents(); 223 } 200 224 private TSNEAlgorithm(TSNEAlgorithm original, Cloner cloner) : base(original, cloner) { 201 225 if (original.dataRowNames != null) 202 this.dataRowNames = new Dictionary<string, List<int>>(original.dataRowNames);226 dataRowNames = new Dictionary<string, List<int>>(original.dataRowNames); 203 227 if (original.dataRows != null) 204 this.dataRows = original.dataRows.ToDictionary(kvp => kvp.Key, kvp => cloner.Clone(kvp.Value));228 dataRows = original.dataRows.ToDictionary(kvp => kvp.Key, kvp => cloner.Clone(kvp.Value)); 205 229 if (original.state != null) 206 this.state = cloner.Clone(original.state); 207 this.iter = original.iter; 208 } 209 public override IDeepCloneable Clone(Cloner cloner) { return new TSNEAlgorithm(this, cloner); } 230 state = cloner.Clone(original.state); 231 iter = original.iter; 232 } 233 public override IDeepCloneable Clone(Cloner cloner) { 234 return new TSNEAlgorithm(this, cloner); 235 } 210 236 public TSNEAlgorithm() { 211 237 var distances = new ItemSet<IDistance<double[]>>(ApplicationManager.Manager.GetInstances<IDistance<double[]>>()); … … 213 239 Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-parameter of tSNE. Comparable to k in a k-nearest neighbour algorithm. Recommended value is floor(number of points /3) or lower", new DoubleValue(25))); 214 240 Parameters.Add(new FixedValueParameter<PercentValue>(ThetaParameterName, "Value describing how much appoximated " + 215 "gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise. " +216 "Appropriate values for theta are between 0.1 and 0.7 (default = 0.5). CAUTION: exact calculation of " +217 "forces requires building a non-sparse N*N matrix where N is the number of data points. This may " +218 "exceed memory limitations. The function is designed to run on large (N > 5000) data sets. It may give" +219 " poor performance on very small data sets(it is better to use a standard t - SNE implementation on such data).", new PercentValue(0)));241 "gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise. " + 242 "Appropriate values for theta are between 0.1 and 0.7 (default = 0.5). CAUTION: exact calculation of " + 243 "forces requires building a non-sparse N*N matrix where N is the number of data points. This may " + 244 "exceed memory limitations. The function is designed to run on large (N > 5000) data sets. It may give" + 245 " poor performance on very small data sets(it is better to use a standard t - SNE implementation on such data).", new PercentValue(0))); 220 246 Parameters.Add(new FixedValueParameter<IntValue>(NewDimensionsParameterName, "Dimensionality of projected space (usually 2 for easy visual analysis)", new IntValue(2))); 221 247 Parameters.Add(new FixedValueParameter<IntValue>(MaxIterationsParameterName, "Maximum number of iterations for gradient descent.", new IntValue(1000))); … … 230 256 Parameters.Add(new FixedValueParameter<BoolValue>(NormalizationParameterName, "Whether the data should be zero centered and have variance of 1 for each variable, so different scalings are ignored.", new BoolValue(true))); 231 257 Parameters.Add(new FixedValueParameter<IntValue>(UpdateIntervalParameterName, "The interval after which the results will be updated.", new IntValue(50))); 258 Parameters.Add(new FixedValueParameter<BoolValue>(RandomInitializationParameterName, "Wether data points should be randomly initialized or according to the first 2 dimensions", new BoolValue(true))); 259 232 260 Parameters[UpdateIntervalParameterName].Hidden = true; 233 261 … … 238 266 EtaParameter.Hidden = false; 239 267 Problem = new RegressionProblem(); 240 } 241 #endregion 242 243 [Storable] 244 private Dictionary<string, List<int>> dataRowNames; 245 [Storable] 246 private Dictionary<string, ScatterPlotDataRow> dataRows; 247 [Storable] 248 private TSNEStatic<double[]>.TSNEState state; 249 [Storable] 250 private int iter; 268 RegisterParameterEvents(); 269 } 270 #endregion 251 271 252 272 public override void Prepare() { … … 259 279 protected override void Run(CancellationToken cancellationToken) { 260 280 var problemData = Problem.ProblemData; 261 // set up and initialized everything if necessary 281 // set up and initialize everything if necessary 282 var wdist = DistanceFunction as WeightedEuclideanDistance; 283 if (wdist != null) wdist.Initialize(problemData); 262 284 if (state == null) { 263 285 if (SetSeedRandomly) Seed = new System.Random().Next(); 264 var random = new MersenneTwister((uint) Seed);286 var random = new MersenneTwister((uint) Seed); 265 287 var dataset = problemData.Dataset; 266 288 var allowedInputVariables = problemData.AllowedInputVariables.ToArray(); 267 var data = new double[dataset.Rows][]; 268 for (var row = 0; row < dataset.Rows; row++) 269 data[row] = allowedInputVariables.Select(col => dataset.GetDoubleValue(col, row)).ToArray(); 270 271 if (Normalization) data = NormalizeData(data); 272 273 state = TSNEStatic<double[]>.CreateState(data, DistanceFunction, random, NewDimensions, Perplexity, Theta, 274 StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta); 275 276 SetUpResults(data); 289 var allindices = Problem.ProblemData.AllIndices.ToArray(); 290 291 // jagged array is required to meet the static method declarations of TSNEStatic<T> 292 var data = Enumerable.Range(0, dataset.Rows).Select(x => new double[allowedInputVariables.Length]).ToArray(); 293 var col = 0; 294 foreach (var s in allowedInputVariables) { 295 var row = 0; 296 foreach (var d in dataset.GetDoubleValues(s)) { 297 data[row][col] = d; 298 row++; 299 } 300 col++; 301 } 302 303 if (Normalization) data = NormalizeInputData(data); 304 state = TSNEStatic<double[]>.CreateState(data, DistanceFunction, random, NewDimensions, Perplexity, Theta, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta, RandomInitialization); 305 SetUpResults(allindices); 277 306 iter = 0; 278 307 } 279 308 for (; iter < MaxIterations && !cancellationToken.IsCancellationRequested; iter++) { 280 if (iter % UpdateInterval == 0) 281 Analyze(state); 309 if (iter % UpdateInterval == 0) Analyze(state); 282 310 TSNEStatic<double[]>.Iterate(state); 283 311 } … … 296 324 Problem.ProblemDataChanged += OnProblemDataChanged; 297 325 } 326 298 327 protected override void DeregisterProblemEvents() { 299 328 base.DeregisterProblemEvents(); … … 301 330 } 302 331 332 protected override void OnStopped() { 333 base.OnStopped(); 334 state = null; 335 dataRowNames = null; 336 dataRows = null; 337 } 338 303 339 private void OnProblemDataChanged(object sender, EventArgs args) { 304 340 if (Problem == null || Problem.ProblemData == null) return; 341 OnPerplexityChanged(this, null); 342 OnColumnsChanged(this, null); 343 Problem.ProblemData.Changed += OnPerplexityChanged; 344 Problem.ProblemData.Changed += OnColumnsChanged; 345 Problem.ProblemData.Dataset.RowsChanged += OnPerplexityChanged; 346 Problem.ProblemData.Dataset.ColumnsChanged += OnColumnsChanged; 305 347 if (!Parameters.ContainsKey(ClassesNameParameterName)) return; 306 348 ClassesNameParameter.ValidValues.Clear(); … … 308 350 } 309 351 352 private void OnColumnsChanged(object sender, EventArgs e) { 353 if (Problem == null || Problem.ProblemData == null || Problem.ProblemData.Dataset == null || !Parameters.ContainsKey(DistanceFunctionParameterName)) return; 354 DistanceFunctionParameter.ValidValues.OfType<WeightedEuclideanDistance>().Single().AdaptToProblemData(Problem.ProblemData); 355 } 356 357 private void RegisterParameterEvents() { 358 PerplexityParameter.Value.ValueChanged -= OnPerplexityChanged; 359 PerplexityParameter.Value.ValueChanged += OnPerplexityChanged; 360 } 361 362 private void OnPerplexityChanged(object sender, EventArgs e) { 363 if (Problem == null || Problem.ProblemData == null || Problem.ProblemData.Dataset == null || !Parameters.ContainsKey(PerplexityParameterName)) return; 364 PerplexityParameter.Value.ValueChanged -= OnPerplexityChanged; 365 PerplexityParameter.Value.Value = Math.Max(1, Math.Min((Problem.ProblemData.Dataset.Rows - 1) / 3.0, Perplexity)); 366 PerplexityParameter.Value.ValueChanged += OnPerplexityChanged; 367 } 310 368 #endregion 311 369 312 370 #region Helpers 313 private void SetUpResults(IReadOnly Collection<double[]> data) {371 private void SetUpResults(IReadOnlyList<int> allIndices) { 314 372 if (Results == null) return; 315 373 var results = Results; … … 318 376 var problemData = Problem.ProblemData; 319 377 378 if (!results.ContainsKey(IterationResultName)) results.Add(new Result(IterationResultName, new IntValue(0))); 379 if (!results.ContainsKey(ErrorResultName)) results.Add(new Result(ErrorResultName, new DoubleValue(0))); 380 if (!results.ContainsKey(ScatterPlotResultName)) results.Add(new Result(ScatterPlotResultName, "Plot of the projected data", new ScatterPlot(DataResultName, ""))); 381 if (!results.ContainsKey(DataResultName)) results.Add(new Result(DataResultName, "Projected Data", new DoubleMatrix())); 382 if (!results.ContainsKey(ErrorPlotResultName)) { 383 var errortable = new DataTable(ErrorPlotResultName, "Development of errors during gradient descent") { 384 VisualProperties = { 385 XAxisTitle = "UpdateIntervall", 386 YAxisTitle = "Error", 387 YAxisLogScale = true 388 } 389 }; 390 errortable.Rows.Add(new DataRow("Errors")); 391 errortable.Rows["Errors"].VisualProperties.StartIndexZero = true; 392 results.Add(new Result(ErrorPlotResultName, errortable)); 393 } 394 320 395 //color datapoints acording to classes variable (be it double or string) 321 if (problemData.Dataset.VariableNames.Contains(ClassesName)) { 322 if ((problemData.Dataset as Dataset).VariableHasType<string>(ClassesName)) { 323 var classes = problemData.Dataset.GetStringValues(ClassesName).ToArray(); 324 for (var i = 0; i < classes.Length; i++) { 325 if (!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List<int>()); 326 dataRowNames[classes[i]].Add(i); 327 } 328 } else if ((problemData.Dataset as Dataset).VariableHasType<double>(ClassesName)) { 329 var classValues = problemData.Dataset.GetDoubleValues(ClassesName).ToArray(); 330 var max = classValues.Max() + 0.1; 331 var min = classValues.Min() - 0.1; 332 const int contours = 8; 333 for (var i = 0; i < contours; i++) { 334 var contourname = GetContourName(i, min, max, contours); 335 dataRowNames.Add(contourname, new List<int>()); 336 dataRows.Add(contourname, new ScatterPlotDataRow(contourname, "", new List<Point2D<double>>())); 337 dataRows[contourname].VisualProperties.Color = GetHeatMapColor(i, contours); 338 dataRows[contourname].VisualProperties.PointSize = i + 3; 339 } 340 for (var i = 0; i < classValues.Length; i++) { 341 dataRowNames[GetContourName(classValues[i], min, max, contours)].Add(i); 342 } 343 } 344 } else { 396 if (!problemData.Dataset.VariableNames.Contains(ClassesName)) { 345 397 dataRowNames.Add("Training", problemData.TrainingIndices.ToList()); 346 398 dataRowNames.Add("Test", problemData.TestIndices.ToList()); 347 } 348 349 if (!results.ContainsKey(IterationResultName)) results.Add(new Result(IterationResultName, new IntValue(0))); 350 else ((IntValue)results[IterationResultName].Value).Value = 0; 351 352 if (!results.ContainsKey(ErrorResultName)) results.Add(new Result(ErrorResultName, new DoubleValue(0))); 353 else ((DoubleValue)results[ErrorResultName].Value).Value = 0; 354 355 if (!results.ContainsKey(ErrorPlotResultName)) results.Add(new Result(ErrorPlotResultName, new DataTable(ErrorPlotResultName, "Development of errors during gradient descent"))); 356 else results[ErrorPlotResultName].Value = new DataTable(ErrorPlotResultName, "Development of errors during gradient descent"); 357 358 var plot = results[ErrorPlotResultName].Value as DataTable; 359 if (plot == null) throw new ArgumentException("could not create/access error data table in results collection"); 360 361 if (!plot.Rows.ContainsKey("errors")) plot.Rows.Add(new DataRow("errors")); 362 plot.Rows["errors"].Values.Clear(); 363 plot.Rows["errors"].VisualProperties.StartIndexZero = true; 364 365 results.Add(new Result(ScatterPlotResultName, "Plot of the projected data", new ScatterPlot(DataResultName, ""))); 366 results.Add(new Result(DataResultName, "Projected Data", new DoubleMatrix())); 399 return; 400 } 401 var classificationData = problemData as ClassificationProblemData; 402 if (classificationData != null && classificationData.TargetVariable.Equals(ClassesName)) { 403 var classNames = classificationData.ClassValues.Zip(classificationData.ClassNames, (v, n) => new {v, n}).ToDictionary(x => x.v, x => x.n); 404 var classes = classificationData.Dataset.GetDoubleValues(classificationData.TargetVariable, allIndices).Select(v => classNames[v]).ToArray(); 405 for (var i = 0; i < classes.Length; i++) { 406 if (!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List<int>()); 407 dataRowNames[classes[i]].Add(i); 408 } 409 } 410 else if (((Dataset) problemData.Dataset).VariableHasType<string>(ClassesName)) { 411 var classes = problemData.Dataset.GetStringValues(ClassesName, allIndices).ToArray(); 412 for (var i = 0; i < classes.Length; i++) { 413 if (!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List<int>()); 414 dataRowNames[classes[i]].Add(i); 415 } 416 } 417 else if (((Dataset) problemData.Dataset).VariableHasType<double>(ClassesName)) { 418 var clusterdata = new Dataset(problemData.Dataset.DoubleVariables, problemData.Dataset.DoubleVariables.Select(v => problemData.Dataset.GetDoubleValues(v, allIndices).ToList())); 419 const int contours = 8; 420 Dictionary<int, string> contourMap; 421 IClusteringModel clusterModel; 422 double[][] borders; 423 CreateClusters(clusterdata, ClassesName, contours, out clusterModel, out contourMap, out borders); 424 var contourorder = borders.Select((x, i) => new {x, i}).OrderBy(x => x.x[0]).Select(x => x.i).ToArray(); 425 for (var i = 0; i < contours; i++) { 426 var c = contourorder[i]; 427 var contourname = contourMap[c]; 428 dataRowNames.Add(contourname, new List<int>()); 429 dataRows.Add(contourname, new ScatterPlotDataRow(contourname, "", new List<Point2D<double>>())); 430 dataRows[contourname].VisualProperties.Color = GetHeatMapColor(i, contours); 431 } 432 var allClusters = clusterModel.GetClusterValues(clusterdata, Enumerable.Range(0, clusterdata.Rows)).ToArray(); 433 for (var i = 0; i < clusterdata.Rows; i++) dataRowNames[contourMap[allClusters[i] - 1]].Add(i); 434 } 435 else if (((Dataset) problemData.Dataset).VariableHasType<DateTime>(ClassesName)) { 436 var clusterdata = new Dataset(problemData.Dataset.DateTimeVariables, problemData.Dataset.DateTimeVariables.Select(v => problemData.Dataset.GetDoubleValues(v, allIndices).ToList())); 437 const int contours = 8; 438 Dictionary<int, string> contourMap; 439 IClusteringModel clusterModel; 440 double[][] borders; 441 CreateClusters(clusterdata, ClassesName, contours, out clusterModel, out contourMap, out borders); 442 var contourorder = borders.Select((x, i) => new {x, i}).OrderBy(x => x.x[0]).Select(x => x.i).ToArray(); 443 for (var i = 0; i < contours; i++) { 444 var c = contourorder[i]; 445 var contourname = contourMap[c]; 446 dataRowNames.Add(contourname, new List<int>()); 447 dataRows.Add(contourname, new ScatterPlotDataRow(contourname, "", new List<Point2D<double>>())); 448 dataRows[contourname].VisualProperties.Color = GetHeatMapColor(i, contours); 449 } 450 var allClusters = clusterModel.GetClusterValues(clusterdata, Enumerable.Range(0, clusterdata.Rows)).ToArray(); 451 for (var i = 0; i < clusterdata.Rows; i++) dataRowNames[contourMap[allClusters[i] - 1]].Add(i); 452 } 453 else { 454 dataRowNames.Add("Training", problemData.TrainingIndices.ToList()); 455 dataRowNames.Add("Test", problemData.TestIndices.ToList()); 456 } 367 457 } 368 458 … … 372 462 var plot = results[ErrorPlotResultName].Value as DataTable; 373 463 if (plot == null) throw new ArgumentException("Could not create/access error data table in results collection."); 374 var errors = plot.Rows[" errors"].Values;464 var errors = plot.Rows["Errors"].Values; 375 465 var c = tsneState.EvaluateError(); 376 466 errors.Add(c); 377 ((IntValue) results[IterationResultName].Value).Value = tsneState.iter;378 ((DoubleValue) results[ErrorResultName].Value).Value = errors.Last();379 380 var ndata = Normalize (tsneState.newData);467 ((IntValue) results[IterationResultName].Value).Value = tsneState.iter; 468 ((DoubleValue) results[ErrorResultName].Value).Value = errors.Last(); 469 470 var ndata = NormalizeProjectedData(tsneState.newData); 381 471 results[DataResultName].Value = new DoubleMatrix(ndata); 382 472 var splot = results[ScatterPlotResultName].Value as ScatterPlot; … … 386 476 private void FillScatterPlot(double[,] lowDimData, ScatterPlot plot) { 387 477 foreach (var rowName in dataRowNames.Keys) { 388 if (!plot.Rows.ContainsKey(rowName)) 478 if (!plot.Rows.ContainsKey(rowName)) { 389 479 plot.Rows.Add(dataRows.ContainsKey(rowName) ? dataRows[rowName] : new ScatterPlotDataRow(rowName, "", new List<Point2D<double>>())); 480 plot.Rows[rowName].VisualProperties.PointSize = 8; 481 } 390 482 plot.Rows[rowName].Points.Replace(dataRowNames[rowName].Select(i => new Point2D<double>(lowDimData[i, 0], lowDimData[i, 1]))); 391 483 } 392 484 } 393 485 394 private static double[,] Normalize (double[,] data) {486 private static double[,] NormalizeProjectedData(double[,] data) { 395 487 var max = new double[data.GetLength(1)]; 396 488 var min = new double[data.GetLength(1)]; … … 398 490 for (var i = 0; i < max.Length; i++) max[i] = min[i] = data[0, i]; 399 491 for (var i = 0; i < data.GetLength(0); i++) 400 401 402 403 404 492 for (var j = 0; j < data.GetLength(1); j++) { 493 var v = data[i, j]; 494 max[j] = Math.Max(max[j], v); 495 min[j] = Math.Min(min[j], v); 496 } 405 497 for (var i = 0; i < data.GetLength(0); i++) { 406 498 for (var j = 0; j < data.GetLength(1); j++) { 407 499 var d = max[j] - min[j]; 408 var s = data[i, j] - (max[j] + min[j]) / 2; 409 if (d.IsAlmost(0)) res[i, j] = data[i, j]; 410 else res[i, j] = s / d; 500 var s = data[i, j] - (max[j] + min[j]) / 2; //shift data 501 if (d.IsAlmost(0)) res[i, j] = data[i, j]; //no scaling possible 502 else res[i, j] = s / d; //scale data 411 503 } 412 504 } … … 414 506 } 415 507 416 private static double[][] Normalize Data(IReadOnlyList<double[]> data) {508 private static double[][] NormalizeInputData(IReadOnlyList<IReadOnlyList<double>> data) { 417 509 // as in tSNE implementation by van der Maaten 418 var n = data[0]. Length;510 var n = data[0].Count; 419 511 var mean = new double[n]; 420 512 var max = new double[n]; … … 432 524 433 525 private static Color GetHeatMapColor(int contourNr, int noContours) { 434 var q = (double)contourNr / noContours; // q in [0,1] 435 var c = q < 0.5 ? Color.FromArgb((int)(q * 2 * 255), 255, 0) : Color.FromArgb(255, (int)((1 - q) * 2 * 255), 0); 436 return c; 437 } 438 439 private static string GetContourName(double value, double min, double max, int noContours) { 440 var size = (max - min) / noContours; 441 var contourNr = (int)((value - min) / size); 442 return GetContourName(contourNr, min, max, noContours); 443 } 444 445 private static string GetContourName(int i, double min, double max, int noContours) { 446 var size = (max - min) / noContours; 447 return "[" + (min + i * size) + ";" + (min + (i + 1) * size) + ")"; 526 return ConvertTotalToRgb(0, noContours, contourNr); 527 } 528 529 private static void CreateClusters(IDataset data, string target, int contours, out IClusteringModel contourCluster, out Dictionary<int, string> contourNames, out double[][] borders) { 530 var cpd = new ClusteringProblemData((Dataset) data, new[] {target}); 531 contourCluster = KMeansClustering.CreateKMeansSolution(cpd, contours, 3).Model; 532 533 borders = Enumerable.Range(0, contours).Select(x => new[] {double.MaxValue, double.MinValue}).ToArray(); 534 var clusters = contourCluster.GetClusterValues(cpd.Dataset, cpd.AllIndices).ToArray(); 535 var targetvalues = cpd.Dataset.GetDoubleValues(target).ToArray(); 536 foreach (var i in cpd.AllIndices) { 537 var cl = clusters[i] - 1; 538 var clv = targetvalues[i]; 539 if (borders[cl][0] > clv) borders[cl][0] = clv; 540 if (borders[cl][1] < clv) borders[cl][1] = clv; 541 } 542 543 contourNames = new Dictionary<int, string>(); 544 for (var i = 0; i < contours; i++) 545 contourNames.Add(i, "[" + borders[i][0] + ";" + borders[i][1] + "]"); 546 } 547 548 private static Color ConvertTotalToRgb(double low, double high, double cell) { 549 var colorGradient = ColorGradient.Colors; 550 var range = high - low; 551 var h = Math.Min(cell / range * colorGradient.Count, colorGradient.Count - 1); 552 return colorGradient[(int) h]; 448 553 } 449 554 #endregion -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEStatic.cs
r15207 r15532 65 65 [StorableClass] 66 66 public class TSNEStatic<T> { 67 68 67 [StorableClass] 69 68 public sealed class TSNEState : DeepCloneable { … … 170 169 [StorableConstructor] 171 170 public TSNEState(bool deserializing) { } 172 public TSNEState(T[] data, IDistance<T> distance, IRandom random, int newDimensions, double perplexity, double theta, int stopLyingIter, int momSwitchIter, double momentum, double finalMomentum, double eta) { 171 172 public TSNEState(IReadOnlyList<T> data, IDistance<T> distance, IRandom random, int newDimensions, double perplexity, 173 double theta, int stopLyingIter, int momSwitchIter, double momentum, double finalMomentum, double eta, bool randomInit) { 173 174 this.distance = distance; 174 175 this.random = random; … … 183 184 184 185 // initialize 185 noDatapoints = data. Length;186 noDatapoints = data.Count; 186 187 if (noDatapoints - 1 < 3 * perplexity) 187 188 throw new ArgumentException("Perplexity too large for the number of data points!"); … … 193 194 gains = new double[noDatapoints, newDimensions]; 194 195 for (var i = 0; i < noDatapoints; i++) 195 196 196 for (var j = 0; j < newDimensions; j++) 197 gains[i, j] = 1.0; 197 198 198 199 p = null; … … 212 213 var rand = new NormalDistributedRandom(random, 0, 1); 213 214 for (var i = 0; i < noDatapoints; i++) 214 for (var j = 0; j < newDimensions; j++) 215 newData[i, j] = rand.NextDouble() * .0001; 215 for (var j = 0; j < newDimensions; j++) 216 newData[i, j] = rand.NextDouble() * .0001; 217 218 if (!(data[0] is IReadOnlyList<double>) || randomInit) return; 219 for (var i = 0; i < noDatapoints; i++) 220 for (var j = 0; j < newDimensions; j++) { 221 var row = (IReadOnlyList<double>) data[i]; 222 newData[i, j] = row[j % row.Count]; 223 } 216 224 } 217 225 #endregion 218 226 219 227 public double EvaluateError() { 220 return exact ? 221 EvaluateErrorExact(p, newData, noDatapoints, newDimensions) : 222 EvaluateErrorApproximate(rowP, colP, valP, newData, theta); 228 return exact ? EvaluateErrorExact(p, newData, noDatapoints, newDimensions) : EvaluateErrorApproximate(rowP, colP, valP, newData, theta); 223 229 } 224 230 225 231 #region Helpers 226 private static void CalculateApproximateSimilarities( T[]data, IDistance<T> distance, double perplexity, out int[] rowP, out int[] colP, out double[] valP) {232 private static void CalculateApproximateSimilarities(IReadOnlyList<T> data, IDistance<T> distance, double perplexity, out int[] rowP, out int[] colP, out double[] valP) { 227 233 // Compute asymmetric pairwise input similarities 228 ComputeGaussianPerplexity(data, distance, out rowP, out colP, out valP, perplexity, (int) (3 * perplexity));234 ComputeGaussianPerplexity(data, distance, out rowP, out colP, out valP, perplexity, (int) (3 * perplexity)); 229 235 // Symmetrize input similarities 230 236 int[] sRowP, symColP; … … 235 241 valP = sValP; 236 242 var sumP = .0; 237 for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i]; 238 for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP; 239 } 240 241 private static double[,] CalculateExactSimilarites(T[] data, IDistance<T> distance, double perplexity) { 243 for (var i = 0; i < rowP[data.Count]; i++) sumP += valP[i]; 244 for (var i = 0; i < rowP[data.Count]; i++) valP[i] /= sumP; 245 } 246 private static double[,] CalculateExactSimilarites(IReadOnlyList<T> data, IDistance<T> distance, double perplexity) { 242 247 // Compute similarities 243 var p = new double[data. Length, data.Length];248 var p = new double[data.Count, data.Count]; 244 249 ComputeGaussianPerplexity(data, distance, p, perplexity); 245 250 // Symmetrize input similarities 246 for (var n = 0; n < data. Length; n++) {247 for (var m = n + 1; m < data. Length; m++) {251 for (var n = 0; n < data.Count; n++) { 252 for (var m = n + 1; m < data.Count; m++) { 248 253 p[n, m] += p[m, n]; 249 254 p[m, n] = p[n, m]; … … 251 256 } 252 257 var sumP = .0; 253 for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j]; 254 for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP; 258 for (var i = 0; i < data.Count; i++) { 259 for (var j = 0; j < data.Count; j++) { 260 sumP += p[i, j]; 261 } 262 } 263 for (var i = 0; i < data.Count; i++) { 264 for (var j = 0; j < data.Count; j++) { 265 p[i, j] /= sumP; 266 } 267 } 255 268 return p; 256 269 } 257 258 270 private static void ComputeGaussianPerplexity(IReadOnlyList<T> x, IDistance<T> distance, out int[] rowP, out int[] colP, out double[] valP, double perplexity, int k) { 259 271 if (perplexity > k) throw new ArgumentException("Perplexity should be lower than k!"); … … 290 302 291 303 // Iterate until we found a good perplexity 292 var iter = 0; double sumP = 0; 304 var iter = 0; 305 double sumP = 0; 293 306 while (!found && iter < 200) { 294 295 307 // Compute Gaussian kernel row 296 308 for (var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]); … … 307 319 if (hdiff < tol && -hdiff < tol) { 308 320 found = true; 309 } else { 321 } 322 else { 310 323 if (hdiff > 0) { 311 324 minBeta = beta; … … 314 327 else 315 328 beta = (beta + maxBeta) / 2.0; 316 } else { 329 } 330 else { 317 331 maxBeta = beta; 318 332 if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue)) … … 335 349 } 336 350 } 337 private static void ComputeGaussianPerplexity( T[]x, IDistance<T> distance, double[,] p, double perplexity) {351 private static void ComputeGaussianPerplexity(IReadOnlyList<T> x, IDistance<T> distance, double[,] p, double perplexity) { 338 352 // Compute the distance matrix 339 353 var dd = ComputeDistances(x, distance); 340 354 341 var n = x. Length;355 var n = x.Count; 342 356 // Compute the Gaussian kernel row by row 343 357 for (var i = 0; i < n; i++) { … … 352 366 // Iterate until we found a good perplexity 353 367 var iter = 0; 354 while (!found && iter < 200) { 368 while (!found && iter < 200) { // 200 iterations as in tSNE implementation by van der Maarten 355 369 356 370 // Compute Gaussian kernel row … … 369 383 if (hdiff < tol && -hdiff < tol) { 370 384 found = true; 371 } else { 385 } 386 else { 372 387 if (hdiff > 0) { 373 388 minBeta = beta; … … 376 391 else 377 392 beta = (beta + maxBeta) / 2.0; 378 } else { 393 } 394 else { 379 395 maxBeta = beta; 380 396 if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue)) … … 393 409 } 394 410 } 395 396 private static double[][] ComputeDistances(T[] x, IDistance<T> distance) { 397 var res = new double[x.Length][]; 398 for (var r = 0; r < x.Length; r++) { 399 var rowV = new double[x.Length]; 411 private static double[][] ComputeDistances(IReadOnlyList<T> x, IDistance<T> distance) { 412 var res = new double[x.Count][]; 413 for (var r = 0; r < x.Count; r++) { 414 var rowV = new double[x.Count]; 400 415 // all distances must be symmetric 401 416 for (var c = 0; c < r; c++) { … … 403 418 } 404 419 rowV[r] = 0.0; // distance to self is zero for all distances 405 for (var c = r + 1; c < x. Length; c++) {420 for (var c = r + 1; c < x.Count; c++) { 406 421 rowV[c] = distance.Get(x[r], x[c]); 407 422 } … … 411 426 // return x.Select(m => x.Select(n => distance.Get(m, n)).ToArray()).ToArray(); 412 427 } 413 414 428 private static double EvaluateErrorExact(double[,] p, double[,] y, int n, int d) { 415 429 // Compute the squared Euclidean distance matrix … … 425 439 q[n1, m] = 1 / (1 + dd[n1, m]); 426 440 sumQ += q[n1, m]; 427 } else q[n1, m] = double.Epsilon; 441 } 442 else q[n1, m] = double.Epsilon; 428 443 } 429 444 } … … 433 448 var c = .0; 434 449 for (var i = 0; i < n; i++) 435 436 437 450 for (var j = 0; j < n; j++) { 451 c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon)); 452 } 438 453 return c; 439 454 } 440 441 455 private static double EvaluateErrorApproximate(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) { 442 456 // Get estimate of normalization term … … 463 477 } 464 478 private static void SymmetrizeMatrix(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, out int[] symRowP, out int[] symColP, out double[] symValP) { 465 466 479 // Count number of elements and row counts of symmetric matrix 467 480 var n = rowP.Count - 1; … … 469 482 for (var j = 0; j < n; j++) { 470 483 for (var i = rowP[j]; i < rowP[j + 1]; i++) { 471 472 484 // Check whether element (col_P[i], n) is present 473 485 var present = false; … … 497 509 var offset = new int[n]; 498 510 for (var j = 0; j < n; j++) { 499 for (var i = rowP[j]; i < rowP[j + 1]; i++) { 511 for (var i = rowP[j]; i < rowP[j + 1]; i++) { // considering element(n, colP[i]) 500 512 501 513 // Check whether element (col_P[i], n) is present … … 549 561 public static double[,] Run(T[] data, IDistance<T> distance, IRandom random, 550 562 int newDimensions = 2, double perplexity = 25, int iterations = 1000, 551 double theta = 0, 552 int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, 563 double theta = 0, int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, 553 564 double finalMomentum = .8, double eta = 10.0 554 565 ) { 555 566 var state = CreateState(data, distance, random, newDimensions, perplexity, 556 567 theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta); … … 565 576 int newDimensions = 2, double perplexity = 25, double theta = 0, 566 577 int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, 567 double finalMomentum = .8, double eta = 10.0 568 569 return new TSNEState(data, distance, random, newDimensions, perplexity, theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta );578 double finalMomentum = .8, double eta = 10.0, bool randomInit = true 579 ) { 580 return new TSNEState(data, distance, random, newDimensions, perplexity, theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta, randomInit); 570 581 } 571 582 … … 580 591 for (var j = 0; j < state.newDimensions; j++) { 581 592 state.gains[i, j] = Math.Sign(state.dY[i, j]) != Math.Sign(state.uY[i, j]) 582 ? state.gains[i, j] + .2 593 ? state.gains[i, j] + .2 // +0.2 nd *0.8 are used in two separate implementations of tSNE -> seems to be correct 583 594 : state.gains[i, j] * .8; 584 585 595 if (state.gains[i, j] < .01) state.gains[i, j] = .01; 586 596 } 587 597 } 588 589 598 590 599 // Perform gradient update (with momentum and gains) 591 600 for (var i = 0; i < state.noDatapoints; i++) 592 593 601 for (var j = 0; j < state.newDimensions; j++) 602 state.uY[i, j] = state.currentMomentum * state.uY[i, j] - state.eta * state.gains[i, j] * state.dY[i, j]; 594 603 595 604 for (var i = 0; i < state.noDatapoints; i++) 596 597 605 for (var j = 0; j < state.newDimensions; j++) 606 state.newData[i, j] = state.newData[i, j] + state.uY[i, j]; 598 607 599 608 // Make solution zero-mean … … 604 613 if (state.exact) 605 614 for (var i = 0; i < state.noDatapoints; i++) 606 607 615 for (var j = 0; j < state.noDatapoints; j++) 616 state.p[i, j] /= 12.0; 608 617 else 609 618 for (var i = 0; i < state.rowP[state.noDatapoints]; i++) … … 634 643 // Compute final t-SNE gradient 635 644 for (var i = 0; i < n; i++) 636 637 638 645 for (var j = 0; j < d; j++) { 646 dC[i, j] = posF[i, j] - negF[i, j] / sumQ; 647 } 639 648 } 640 649 -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEUtils.cs
r14414 r15532 35 35 } 36 36 37 internal static IList<T>Swap<T>(this IList<T> list, int indexA, int indexB) {37 internal static void Swap<T>(this IList<T> list, int indexA, int indexB) { 38 38 var tmp = list[indexA]; 39 39 list[indexA] = list[indexB]; 40 40 list[indexB] = tmp; 41 return list;42 41 } 43 42 44 internalstatic int Partition<T>(this IList<T> list, int left, int right, int pivotindex, IComparer<T> comparer) {43 private static int Partition<T>(this IList<T> list, int left, int right, int pivotindex, IComparer<T> comparer) { 45 44 var pivotValue = list[pivotindex]; 46 45 list.Swap(pivotindex, right); … … 67 66 /// <param name="comparer">comparer for list elemnts </param> 68 67 /// <returns></returns> 69 internal static T NthElement<T>(this IList<T> list, int left, int right, int n, IComparer<T> comparer) {68 internal static void PartialSort<T>(this IList<T> list, int left, int right, int n, IComparer<T> comparer) { 70 69 while (true) { 71 if (left == right) return list[left];72 var pivotindex = left + (int) Math.Floor(new System.Random().Next() % (right - (double)left + 1));70 if (left == right) return; 71 var pivotindex = left + (int) Math.Floor(new System.Random().Next() % (right - (double) left + 1)); 73 72 pivotindex = list.Partition(left, right, pivotindex, comparer); 74 if (n == pivotindex) return list[n];73 if (n == pivotindex) return; 75 74 if (n < pivotindex) right = pivotindex - 1; 76 75 else left = pivotindex + 1; -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/VantagePointTree.cs
r15207 r15532 139 139 // Partition around the median distance 140 140 var median = (upper + lower) / 2; 141 items. NthElement(lower + 1, upper - 1, median, distance.GetDistanceComparer(items[lower]));141 items.PartialSort(lower + 1, upper - 1, median, distance.GetDistanceComparer(items[lower])); 142 142 143 143 // Threshold of the new node will be the distance to the median
Note: See TracChangeset
for help on using the changeset viewer.