Changeset 15571
- Timestamp:
- 01/02/18 17:45:15 (7 years ago)
- Location:
- stable
- Files:
-
- 18 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
stable
- Property svn:mergeinfo changed
/trunk/sources merged: 15532,15545,15548,15551,15556
- Property svn:mergeinfo changed
-
stable/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
/trunk/sources/HeuristicLab.Algorithms.DataAnalysis merged: 15532,15545,15548,15551,15556
- Property svn:mergeinfo changed
-
stable/HeuristicLab.Algorithms.DataAnalysis/3.4
-
Property
svn:mergeinfo
set to
(toggle deleted branches)
/trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 merged eligible /branches/1721-RandomForestPersistence/HeuristicLab.Algorithms.DataAnalysis/3.4 10321-10322 /branches/Async/HeuristicLab.Algorithms.DataAnalysis/3.4 13329-15286 /branches/Benchmarking/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 6917-7005 /branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4 9070-13099 /branches/CloningRefactoring/HeuristicLab.Algorithms.DataAnalysis/3.4 4656-4721 /branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4 5471-5808 /branches/DataAnalysis SolutionEnsembles/HeuristicLab.Algorithms.DataAnalysis/3.4 5815-6180 /branches/DataAnalysis/HeuristicLab.Algorithms.DataAnalysis/3.4 4458-4459,4462,4464 /branches/DataPreprocessing/HeuristicLab.Algorithms.DataAnalysis/3.4 10085-11101 /branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4 6284-6795 /branches/GP.Symbols (TimeLag, Diff, Integral)/HeuristicLab.Algorithms.DataAnalysis/3.4 5060 /branches/HeuristicLab.DatasetRefactor/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 11570-12508 /branches/HeuristicLab.Problems.Orienteering/HeuristicLab.Algorithms.DataAnalysis/3.4 11130-12721 /branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4 13819-14091 /branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4 8116-8789 /branches/LogResidualEvaluator/HeuristicLab.Algorithms.DataAnalysis/3.4 10202-10483 /branches/NET40/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 5138-5162 /branches/ParallelEngine/HeuristicLab.Algorithms.DataAnalysis/3.4 5175-5192 /branches/ProblemInstancesRegressionAndClassification/HeuristicLab.Algorithms.DataAnalysis/3.4 7773-7810 /branches/QAPAlgorithms/HeuristicLab.Algorithms.DataAnalysis/3.4 6350-6627 /branches/Restructure trunk solution/HeuristicLab.Algorithms.DataAnalysis/3.4 6828 /branches/SpectralKernelForGaussianProcesses/HeuristicLab.Algorithms.DataAnalysis/3.4 10204-10479 /branches/SuccessProgressAnalysis/HeuristicLab.Algorithms.DataAnalysis/3.4 5370-5682 /branches/Trunk/HeuristicLab.Algorithms.DataAnalysis/3.4 6829-6865 /branches/VNS/HeuristicLab.Algorithms.DataAnalysis/3.4 5594-5752 /branches/Weighted TSNE/3.4 15451-15531 /branches/histogram/HeuristicLab.Algorithms.DataAnalysis/3.4 5959-6341 /branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4 14232-14825
-
Property
svn:mergeinfo
set to
(toggle deleted branches)
-
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/FixedDataAnalysisAlgorithm.cs
r15292 r15571 30 30 namespace HeuristicLab.Algorithms.DataAnalysis { 31 31 [StorableClass] 32 public abstract class FixedDataAnalysisAlgorithm<T> : BasicAlgorithm where T : class, IDataAnalysisProblem {32 public abstract class FixedDataAnalysisAlgorithm<T> : BasicAlgorithm, IDataAnalysisAlgorithm<T> where T : class, IDataAnalysisProblem { 33 33 #region Properties 34 34 public override Type ProblemType { -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessRegression.cs
r14186 r15571 39 39 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 160)] 40 40 [StorableClass] 41 public sealed class GaussianProcessRegression : GaussianProcessBase, IStorableContent {41 public sealed class GaussianProcessRegression : GaussianProcessBase, IStorableContent, IDataAnalysisAlgorithm<IRegressionProblem> { 42 42 public string Filename { get; set; } 43 43 -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r15249 r15571 320 320 <Compile Include="TSNE\Distances\IndexedItemDistance.cs" /> 321 321 <Compile Include="TSNE\Distances\ManhattanDistance.cs" /> 322 <Compile Include="TSNE\Distances\WeightedEuclideanDistance.cs" /> 322 323 <Compile Include="TSNE\Distances\IDistance.cs" /> 323 324 <Compile Include="TSNE\PriorityQueue.cs" /> -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelRidgeRegression.cs
r15249 r15571 36 36 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 100)] 37 37 [StorableClass] 38 public sealed class KernelRidgeRegression : BasicAlgorithm {38 public sealed class KernelRidgeRegression : BasicAlgorithm, IDataAnalysisAlgorithm<IRegressionProblem> { 39 39 private const string SolutionResultName = "Kernel ridge regression solution"; 40 40 -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/CosineDistance.cs
r15249 r15571 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Linq;25 24 using HeuristicLab.Common; 26 25 using HeuristicLab.Core; … … 28 27 29 28 namespace HeuristicLab.Algorithms.DataAnalysis { 30 31 29 /// <summary> 32 30 /// The angular distance as defined as a normalized distance measure dependent on the angle between two vectors. … … 35 33 [Item("CosineDistance", "The angular distance as defined as a normalized distance measure dependent on the angle between two vectors.")] 36 34 public class CosineDistance : DistanceBase<IEnumerable<double>> { 37 38 35 #region HLConstructors & Cloning 39 36 [StorableConstructor] … … 48 45 49 46 #region statics 50 public static double GetDistance(IReadOnlyList<double> point1, IReadOnlyList<double> point2) { 51 if (point1.Count != point2.Count) throw new ArgumentException("Cosine distance not defined on vectors of different length"); 52 var innerprod = 0.0; 53 var length1 = 0.0; 54 var length2 = 0.0; 55 56 for (var i = 0; i < point1.Count; i++) { 57 double d1 = point1[i], d2 = point2[i]; 58 innerprod += d1 * d2; 59 length1 += d1 * d1; 60 length2 += d2 * d2; 47 public static double GetDistance(IEnumerable<double> point1, IEnumerable<double> point2) { 48 using (IEnumerator<double> p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator()) { 49 var innerprod = 0.0; 50 var length1 = 0.0; 51 var length2 = 0.0; 52 while (p1Enum.MoveNext() & p2Enum.MoveNext()) { 53 double d1 = p1Enum.Current, d2 = p2Enum.Current; 54 innerprod += d1 * d2; 55 length1 += d1 * d1; 56 length2 += d2 * d2; 57 } 58 var divisor = Math.Sqrt(length1 * length2); 59 if (divisor.IsAlmost(0)) throw new ArgumentException("Cosine distance is not defined on vectors of length 0"); 60 if (p1Enum.MoveNext() || p2Enum.MoveNext()) throw new ArgumentException("Cosine distance not defined on vectors of different length"); 61 return 1 - innerprod / divisor; 61 62 } 62 var l = Math.Sqrt(length1 * length2);63 if (l.IsAlmost(0)) throw new ArgumentException("Cosine distance is not defined on vectors of length 0");64 return 1 - innerprod / l;65 63 } 66 64 #endregion 67 65 public override double Get(IEnumerable<double> a, IEnumerable<double> b) { 68 return GetDistance(a .ToArray(), b.ToArray());66 return GetDistance(a, b); 69 67 } 70 68 } -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DistanceBase.cs
r15249 r15571 29 29 [StorableClass] 30 30 public abstract class DistanceBase<T> : Item, IDistance<T> { 31 32 31 #region HLConstructors & Cloning 33 32 [StorableConstructor] … … 44 43 45 44 public double Get(object x, object y) { 46 return Get((T) x, (T)y);45 return Get((T) x, (T) y); 47 46 } 48 47 49 48 public IComparer GetDistanceComparer(object item) { 50 return new DistanceComparer((T) item, this);49 return new DistanceComparer((T) item, this); 51 50 } 52 51 53 privateclass DistanceComparer : IComparer<T>, IComparer {52 internal class DistanceComparer : IComparer<T>, IComparer { 54 53 private readonly T item; 55 54 private readonly IDistance<T> dist; … … 65 64 66 65 public int Compare(object x, object y) { 67 return Compare((T) x, (T)y);66 return Compare((T) x, (T) y); 68 67 } 69 68 } -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/EuclideanDistance.cs
r15249 r15571 31 31 [Item("EuclideanDistance", "A norm function that uses Euclidean distance")] 32 32 public class EuclideanDistance : DistanceBase<IEnumerable<double>> { 33 34 33 #region HLConstructors & Cloning 35 34 [StorableConstructor] 36 35 protected EuclideanDistance(bool deserializing) : base(deserializing) { } 37 36 protected EuclideanDistance(EuclideanDistance original, Cloner cloner) : base(original, cloner) { } 38 public override IDeepCloneable Clone(Cloner cloner) { return new EuclideanDistance(this, cloner); } 37 public override IDeepCloneable Clone(Cloner cloner) { 38 return new EuclideanDistance(this, cloner); 39 } 39 40 public EuclideanDistance() { } 40 41 #endregion 41 42 42 public static double GetDistance(IReadOnlyList<double> point1, IReadOnlyList<double> point2) { 43 if (point1.Count != point2.Count) throw new ArgumentException("Euclidean distance not defined on vectors of different length"); 44 var sum = 0.0; 45 for (var i = 0; i < point1.Count; i++) { 46 var d = point1[i] - point2[i]; 47 sum += d * d; 43 public static double GetDistance(IEnumerable<double> point1, IEnumerable<double> point2) { 44 using (IEnumerator<double> p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator()) { 45 var sum = 0.0; 46 while (p1Enum.MoveNext() & p2Enum.MoveNext()) { 47 var d = p1Enum.Current - p2Enum.Current; 48 sum += d * d; 49 } 50 if (p1Enum.MoveNext() || p2Enum.MoveNext()) throw new ArgumentException("Euclidean distance not defined on vectors of different length"); 51 return Math.Sqrt(sum); 48 52 } 49 50 return Math.Sqrt(sum);51 53 } 52 54 53 55 public override double Get(IEnumerable<double> a, IEnumerable<double> b) { 54 return GetDistance(a .ToArray(), b.ToArray());56 return GetDistance(a, b); 55 57 } 56 58 } -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/ManhattanDistance.cs
r15249 r15571 31 31 [Item("ManhattanDistance", "A distance function that uses block distance")] 32 32 public class ManhattanDistance : DistanceBase<IEnumerable<double>> { 33 34 33 #region HLConstructors & Cloning 35 34 [StorableConstructor] … … 45 44 #endregion 46 45 47 public static double GetDistance(double[] point1, double[] point2) { 48 if (point1.Length != point2.Length) throw new ArgumentException("Manhattan distance not defined on vectors of different length"); 49 var sum = 0.0; 50 for (var i = 0; i < point1.Length; i++) 51 sum += Math.Abs(point1[i] + point2[i]); 52 return sum; 46 public static double GetDistance(IEnumerable<double> point1, IEnumerable<double> point2) { 47 using (IEnumerator<double> p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator()) { 48 var sum = 0.0; 49 while (p1Enum.MoveNext() & p2Enum.MoveNext()) 50 sum += Math.Abs(p1Enum.Current - p2Enum.Current); 51 if (p1Enum.MoveNext() || p2Enum.MoveNext()) throw new ArgumentException("Manhattan distance not defined on vectors of different length"); 52 return sum; 53 } 53 54 } 54 55 55 56 public override double Get(IEnumerable<double> a, IEnumerable<double> b) { 56 return GetDistance(a .ToArray(), b.ToArray());57 return GetDistance(a, b); 57 58 } 58 59 } -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/WeightedEuclideanDistance.cs
r15532 r15571 50 50 [StorableConstructor] 51 51 protected WeightedEuclideanDistance(bool deserializing) : base(deserializing) { } 52 [StorableHook(HookType.AfterDeserialization)] 52 53 private void AfterDeserialization() { 53 54 RegisterParameterEvents(); -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAlgorithm.cs
r15249 r15571 38 38 namespace HeuristicLab.Algorithms.DataAnalysis { 39 39 /// <summary> 40 /// t- distributed stochastic neighbourhood embedding (tSNE) projects the data in a low dimensional40 /// t-Distributed Stochastic Neighbor Embedding (tSNE) projects the data in a low dimensional 41 41 /// space to allow visual cluster identification. 42 42 /// </summary> 43 [Item("t SNE", "t-distributed stochastic neighbourhood embedding projects the data in a low " +44 "dimensional space to allow visual cluster identification. Implemented similar to: https://lvdmaaten.github.io/tsne/#implementations (Barnes-Hut t-SNE). Described in : https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf")]43 [Item("t-Distributed Stochastic Neighbor Embedding (tSNE)", "t-Distributed Stochastic Neighbor Embedding projects the data in a low " + 44 "dimensional space to allow visual cluster identification. Implemented similar to: https://lvdmaaten.github.io/tsne/#implementations (Barnes-Hut t-SNE). Described in : https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf")] 45 45 [Creatable(CreatableAttribute.Categories.DataAnalysis, Priority = 100)] 46 46 [StorableClass] … … 57 57 } 58 58 59 #region parameter names59 #region Parameter names 60 60 private const string DistanceFunctionParameterName = "DistanceFunction"; 61 61 private const string PerplexityParameterName = "Perplexity"; … … 72 72 private const string ClassesNameParameterName = "ClassesName"; 73 73 private const string NormalizationParameterName = "Normalization"; 74 private const string RandomInitializationParameterName = "RandomInitialization"; 74 75 private const string UpdateIntervalParameterName = "UpdateInterval"; 75 76 #endregion 76 77 77 #region result names78 #region Result names 78 79 private const string IterationResultName = "Iteration"; 79 80 private const string ErrorResultName = "Error"; … … 83 84 #endregion 84 85 85 #region parameter properties86 #region Parameter properties 86 87 public IFixedValueParameter<DoubleValue> PerplexityParameter { 87 get { return Parameters[PerplexityParameterName] as IFixedValueParameter<DoubleValue>; }88 get { return (IFixedValueParameter<DoubleValue>)Parameters[PerplexityParameterName]; } 88 89 } 89 90 public IFixedValueParameter<PercentValue> ThetaParameter { 90 get { return Parameters[ThetaParameterName] as IFixedValueParameter<PercentValue>; }91 get { return (IFixedValueParameter<PercentValue>)Parameters[ThetaParameterName]; } 91 92 } 92 93 public IFixedValueParameter<IntValue> NewDimensionsParameter { 93 get { return Parameters[NewDimensionsParameterName] as IFixedValueParameter<IntValue>; }94 get { return (IFixedValueParameter<IntValue>)Parameters[NewDimensionsParameterName]; } 94 95 } 95 96 public IConstrainedValueParameter<IDistance<double[]>> DistanceFunctionParameter { 96 get { return Parameters[DistanceFunctionParameterName] as IConstrainedValueParameter<IDistance<double[]>>; }97 get { return (IConstrainedValueParameter<IDistance<double[]>>)Parameters[DistanceFunctionParameterName]; } 97 98 } 98 99 public IFixedValueParameter<IntValue> MaxIterationsParameter { 99 get { return Parameters[MaxIterationsParameterName] as IFixedValueParameter<IntValue>; }100 get { return (IFixedValueParameter<IntValue>)Parameters[MaxIterationsParameterName]; } 100 101 } 101 102 public IFixedValueParameter<IntValue> StopLyingIterationParameter { 102 get { return Parameters[StopLyingIterationParameterName] as IFixedValueParameter<IntValue>; }103 get { return (IFixedValueParameter<IntValue>)Parameters[StopLyingIterationParameterName]; } 103 104 } 104 105 public IFixedValueParameter<IntValue> MomentumSwitchIterationParameter { 105 get { return Parameters[MomentumSwitchIterationParameterName] as IFixedValueParameter<IntValue>; }106 get { return (IFixedValueParameter<IntValue>)Parameters[MomentumSwitchIterationParameterName]; } 106 107 } 107 108 public IFixedValueParameter<DoubleValue> InitialMomentumParameter { 108 get { return Parameters[InitialMomentumParameterName] as IFixedValueParameter<DoubleValue>; }109 get { return (IFixedValueParameter<DoubleValue>)Parameters[InitialMomentumParameterName]; } 109 110 } 110 111 public IFixedValueParameter<DoubleValue> FinalMomentumParameter { 111 get { return Parameters[FinalMomentumParameterName] as IFixedValueParameter<DoubleValue>; }112 get { return (IFixedValueParameter<DoubleValue>)Parameters[FinalMomentumParameterName]; } 112 113 } 113 114 public IFixedValueParameter<DoubleValue> EtaParameter { 114 get { return Parameters[EtaParameterName] as IFixedValueParameter<DoubleValue>; }115 get { return (IFixedValueParameter<DoubleValue>)Parameters[EtaParameterName]; } 115 116 } 116 117 public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter { 117 get { return Parameters[SetSeedRandomlyParameterName] as IFixedValueParameter<BoolValue>; }118 get { return (IFixedValueParameter<BoolValue>)Parameters[SetSeedRandomlyParameterName]; } 118 119 } 119 120 public IFixedValueParameter<IntValue> SeedParameter { 120 get { return Parameters[SeedParameterName] as IFixedValueParameter<IntValue>; }121 get { return (IFixedValueParameter<IntValue>)Parameters[SeedParameterName]; } 121 122 } 122 123 public IConstrainedValueParameter<StringValue> ClassesNameParameter { 123 get { return Parameters[ClassesNameParameterName] as IConstrainedValueParameter<StringValue>; }124 get { return (IConstrainedValueParameter<StringValue>)Parameters[ClassesNameParameterName]; } 124 125 } 125 126 public IFixedValueParameter<BoolValue> NormalizationParameter { 126 get { return Parameters[NormalizationParameterName] as IFixedValueParameter<BoolValue>; } 127 get { return (IFixedValueParameter<BoolValue>)Parameters[NormalizationParameterName]; } 128 } 129 public IFixedValueParameter<BoolValue> RandomInitializationParameter { 130 get { return (IFixedValueParameter<BoolValue>)Parameters[RandomInitializationParameterName]; } 127 131 } 128 132 public IFixedValueParameter<IntValue> UpdateIntervalParameter { 129 get { return Parameters[UpdateIntervalParameterName] as IFixedValueParameter<IntValue>; }133 get { return (IFixedValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; } 130 134 } 131 135 #endregion … … 187 191 set { NormalizationParameter.Value.Value = value; } 188 192 } 189 193 public bool RandomInitialization { 194 get { return RandomInitializationParameter.Value.Value; } 195 set { RandomInitializationParameter.Value.Value = value; } 196 } 190 197 public int UpdateInterval { 191 198 get { return UpdateIntervalParameter.Value.Value; } … … 194 201 #endregion 195 202 203 #region Storable poperties 204 [Storable] 205 private Dictionary<string, IList<int>> dataRowIndices; 206 [Storable] 207 private TSNEStatic<double[]>.TSNEState state; 208 #endregion 209 196 210 #region Constructors & Cloning 197 211 [StorableConstructor] 198 212 private TSNEAlgorithm(bool deserializing) : base(deserializing) { } 199 213 214 [StorableHook(HookType.AfterDeserialization)] 215 private void AfterDeserialization() { 216 if (!Parameters.ContainsKey(RandomInitializationParameterName)) 217 Parameters.Add(new FixedValueParameter<BoolValue>(RandomInitializationParameterName, "Wether data points should be randomly initialized or according to the first 2 dimensions", new BoolValue(true))); 218 RegisterParameterEvents(); 219 } 200 220 private TSNEAlgorithm(TSNEAlgorithm original, Cloner cloner) : base(original, cloner) { 201 if (original.dataRowNames != null) 202 this.dataRowNames = new Dictionary<string, List<int>>(original.dataRowNames); 203 if (original.dataRows != null) 204 this.dataRows = original.dataRows.ToDictionary(kvp => kvp.Key, kvp => cloner.Clone(kvp.Value)); 221 if (original.dataRowIndices != null) 222 dataRowIndices = new Dictionary<string, IList<int>>(original.dataRowIndices); 205 223 if (original.state != null) 206 this.state = cloner.Clone(original.state); 207 this.iter = original.iter; 208 } 209 public override IDeepCloneable Clone(Cloner cloner) { return new TSNEAlgorithm(this, cloner); } 224 state = cloner.Clone(original.state); 225 RegisterParameterEvents(); 226 } 227 public override IDeepCloneable Clone(Cloner cloner) { 228 return new TSNEAlgorithm(this, cloner); 229 } 210 230 public TSNEAlgorithm() { 211 231 var distances = new ItemSet<IDistance<double[]>>(ApplicationManager.Manager.GetInstances<IDistance<double[]>>()); … … 213 233 Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-parameter of tSNE. Comparable to k in a k-nearest neighbour algorithm. Recommended value is floor(number of points /3) or lower", new DoubleValue(25))); 214 234 Parameters.Add(new FixedValueParameter<PercentValue>(ThetaParameterName, "Value describing how much appoximated " + 215 "gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise. " +216 "Appropriate values for theta are between 0.1 and 0.7 (default = 0.5). CAUTION: exact calculation of " +217 "forces requires building a non-sparse N*N matrix where N is the number of data points. This may " +218 "exceed memory limitations. The function is designed to run on large (N > 5000) data sets. It may give" +219 " poor performance on very small data sets(it is better to use a standard t - SNE implementation on such data).", new PercentValue(0)));235 "gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise. " + 236 "Appropriate values for theta are between 0.1 and 0.7 (default = 0.5). CAUTION: exact calculation of " + 237 "forces requires building a non-sparse N*N matrix where N is the number of data points. This may " + 238 "exceed memory limitations. The function is designed to run on large (N > 5000) data sets. It may give" + 239 " poor performance on very small data sets(it is better to use a standard t - SNE implementation on such data).", new PercentValue(0))); 220 240 Parameters.Add(new FixedValueParameter<IntValue>(NewDimensionsParameterName, "Dimensionality of projected space (usually 2 for easy visual analysis)", new IntValue(2))); 221 241 Parameters.Add(new FixedValueParameter<IntValue>(MaxIterationsParameterName, "Maximum number of iterations for gradient descent.", new IntValue(1000))); … … 230 250 Parameters.Add(new FixedValueParameter<BoolValue>(NormalizationParameterName, "Whether the data should be zero centered and have variance of 1 for each variable, so different scalings are ignored.", new BoolValue(true))); 231 251 Parameters.Add(new FixedValueParameter<IntValue>(UpdateIntervalParameterName, "The interval after which the results will be updated.", new IntValue(50))); 232 Parameters[UpdateIntervalParameterName].Hidden = true; 233 252 Parameters.Add(new FixedValueParameter<BoolValue>(RandomInitializationParameterName, "Wether data points should be randomly initialized or according to the first 2 dimensions", new BoolValue(true))); 253 254 UpdateIntervalParameter.Hidden = true; 234 255 MomentumSwitchIterationParameter.Hidden = true; 235 256 InitialMomentumParameter.Hidden = true; … … 238 259 EtaParameter.Hidden = false; 239 260 Problem = new RegressionProblem(); 240 } 241 #endregion 242 243 [Storable] 244 private Dictionary<string, List<int>> dataRowNames; 245 [Storable] 246 private Dictionary<string, ScatterPlotDataRow> dataRows; 247 [Storable] 248 private TSNEStatic<double[]>.TSNEState state; 249 [Storable] 250 private int iter; 261 RegisterParameterEvents(); 262 } 263 #endregion 251 264 252 265 public override void Prepare() { 253 266 base.Prepare(); 254 dataRowNames = null; 255 dataRows = null; 267 dataRowIndices = null; 256 268 state = null; 257 269 } … … 259 271 protected override void Run(CancellationToken cancellationToken) { 260 272 var problemData = Problem.ProblemData; 261 // set up and initialized everything if necessary 273 // set up and initialize everything if necessary 274 var wdist = DistanceFunction as WeightedEuclideanDistance; 275 if (wdist != null) wdist.Initialize(problemData); 262 276 if (state == null) { 263 277 if (SetSeedRandomly) Seed = new System.Random().Next(); … … 265 279 var dataset = problemData.Dataset; 266 280 var allowedInputVariables = problemData.AllowedInputVariables.ToArray(); 267 var data = new double[dataset.Rows][]; 268 for (var row = 0; row < dataset.Rows; row++) 269 data[row] = allowedInputVariables.Select(col => dataset.GetDoubleValue(col, row)).ToArray(); 270 271 if (Normalization) data = NormalizeData(data); 272 273 state = TSNEStatic<double[]>.CreateState(data, DistanceFunction, random, NewDimensions, Perplexity, Theta, 274 StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta); 275 276 SetUpResults(data); 277 iter = 0; 278 } 279 for (; iter < MaxIterations && !cancellationToken.IsCancellationRequested; iter++) { 280 if (iter % UpdateInterval == 0) 281 Analyze(state); 281 var allindices = Problem.ProblemData.AllIndices.ToArray(); 282 283 // jagged array is required to meet the static method declarations of TSNEStatic<T> 284 var data = Enumerable.Range(0, dataset.Rows).Select(x => new double[allowedInputVariables.Length]).ToArray(); 285 var col = 0; 286 foreach (var s in allowedInputVariables) { 287 var row = 0; 288 foreach (var d in dataset.GetDoubleValues(s)) { 289 data[row][col] = d; 290 row++; 291 } 292 col++; 293 } 294 if (Normalization) data = NormalizeInputData(data); 295 state = TSNEStatic<double[]>.CreateState(data, DistanceFunction, random, NewDimensions, Perplexity, Theta, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta, RandomInitialization); 296 SetUpResults(allindices); 297 } 298 while (state.iter < MaxIterations && !cancellationToken.IsCancellationRequested) { 299 if (state.iter % UpdateInterval == 0) Analyze(state); 282 300 TSNEStatic<double[]>.Iterate(state); 283 301 } … … 294 312 protected override void RegisterProblemEvents() { 295 313 base.RegisterProblemEvents(); 314 if (Problem == null) return; 296 315 Problem.ProblemDataChanged += OnProblemDataChanged; 297 } 316 if (Problem.ProblemData == null) return; 317 Problem.ProblemData.Changed += OnPerplexityChanged; 318 Problem.ProblemData.Changed += OnColumnsChanged; 319 if (Problem.ProblemData.Dataset == null) return; 320 Problem.ProblemData.Dataset.RowsChanged += OnPerplexityChanged; 321 Problem.ProblemData.Dataset.ColumnsChanged += OnColumnsChanged; 322 } 323 298 324 protected override void DeregisterProblemEvents() { 299 325 base.DeregisterProblemEvents(); 326 if (Problem == null) return; 300 327 Problem.ProblemDataChanged -= OnProblemDataChanged; 328 if (Problem.ProblemData == null) return; 329 Problem.ProblemData.Changed -= OnPerplexityChanged; 330 Problem.ProblemData.Changed -= OnColumnsChanged; 331 if (Problem.ProblemData.Dataset == null) return; 332 Problem.ProblemData.Dataset.RowsChanged -= OnPerplexityChanged; 333 Problem.ProblemData.Dataset.ColumnsChanged -= OnColumnsChanged; 334 } 335 336 protected override void OnStopped() { 337 base.OnStopped(); 338 //bwerth: state objects can be very large; avoid state serialization 339 state = null; 340 dataRowIndices = null; 301 341 } 302 342 303 343 private void OnProblemDataChanged(object sender, EventArgs args) { 304 344 if (Problem == null || Problem.ProblemData == null) return; 345 OnPerplexityChanged(this, null); 346 OnColumnsChanged(this, null); 347 Problem.ProblemData.Changed += OnPerplexityChanged; 348 Problem.ProblemData.Changed += OnColumnsChanged; 349 if (Problem.ProblemData.Dataset == null) return; 350 Problem.ProblemData.Dataset.RowsChanged += OnPerplexityChanged; 351 Problem.ProblemData.Dataset.ColumnsChanged += OnColumnsChanged; 305 352 if (!Parameters.ContainsKey(ClassesNameParameterName)) return; 306 353 ClassesNameParameter.ValidValues.Clear(); … … 308 355 } 309 356 357 private void OnColumnsChanged(object sender, EventArgs e) { 358 if (Problem == null || Problem.ProblemData == null || Problem.ProblemData.Dataset == null || !Parameters.ContainsKey(DistanceFunctionParameterName)) return; 359 DistanceFunctionParameter.ValidValues.OfType<WeightedEuclideanDistance>().Single().AdaptToProblemData(Problem.ProblemData); 360 } 361 362 private void RegisterParameterEvents() { 363 PerplexityParameter.Value.ValueChanged += OnPerplexityChanged; 364 } 365 366 private void OnPerplexityChanged(object sender, EventArgs e) { 367 if (Problem == null || Problem.ProblemData == null || Problem.ProblemData.Dataset == null || !Parameters.ContainsKey(PerplexityParameterName)) return; 368 PerplexityParameter.Value.Value = Math.Max(1, Math.Min((Problem.ProblemData.Dataset.Rows - 1) / 3.0, Perplexity)); 369 } 310 370 #endregion 311 371 312 372 #region Helpers 313 private void SetUpResults(IReadOnly Collection<double[]> data) {373 private void SetUpResults(IReadOnlyList<int> allIndices) { 314 374 if (Results == null) return; 315 375 var results = Results; 316 dataRowNames = new Dictionary<string, List<int>>(); 317 dataRows = new Dictionary<string, ScatterPlotDataRow>(); 376 dataRowIndices = new Dictionary<string, IList<int>>(); 318 377 var problemData = Problem.ProblemData; 319 378 320 //color datapoints acording to classes variable (be it double or string) 321 if (problemData.Dataset.VariableNames.Contains(ClassesName)) { 322 if ((problemData.Dataset as Dataset).VariableHasType<string>(ClassesName)) { 323 var classes = problemData.Dataset.GetStringValues(ClassesName).ToArray(); 324 for (var i = 0; i < classes.Length; i++) { 325 if (!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List<int>()); 326 dataRowNames[classes[i]].Add(i); 379 if (!results.ContainsKey(IterationResultName)) results.Add(new Result(IterationResultName, new IntValue(0))); 380 if (!results.ContainsKey(ErrorResultName)) results.Add(new Result(ErrorResultName, new DoubleValue(0))); 381 if (!results.ContainsKey(ScatterPlotResultName)) results.Add(new Result(ScatterPlotResultName, "Plot of the projected data", new ScatterPlot(DataResultName, ""))); 382 if (!results.ContainsKey(DataResultName)) results.Add(new Result(DataResultName, "Projected Data", new DoubleMatrix())); 383 if (!results.ContainsKey(ErrorPlotResultName)) { 384 var errortable = new DataTable(ErrorPlotResultName, "Development of errors during gradient descent") { 385 VisualProperties = { 386 XAxisTitle = "UpdateIntervall", 387 YAxisTitle = "Error", 388 YAxisLogScale = true 327 389 } 328 } else if ((problemData.Dataset as Dataset).VariableHasType<double>(ClassesName)) { 329 var classValues = problemData.Dataset.GetDoubleValues(ClassesName).ToArray(); 330 var max = classValues.Max() + 0.1; 331 var min = classValues.Min() - 0.1; 332 const int contours = 8; 333 for (var i = 0; i < contours; i++) { 334 var contourname = GetContourName(i, min, max, contours); 335 dataRowNames.Add(contourname, new List<int>()); 336 dataRows.Add(contourname, new ScatterPlotDataRow(contourname, "", new List<Point2D<double>>())); 337 dataRows[contourname].VisualProperties.Color = GetHeatMapColor(i, contours); 338 dataRows[contourname].VisualProperties.PointSize = i + 3; 339 } 340 for (var i = 0; i < classValues.Length; i++) { 341 dataRowNames[GetContourName(classValues[i], min, max, contours)].Add(i); 342 } 343 } 390 }; 391 errortable.Rows.Add(new DataRow("Errors")); 392 errortable.Rows["Errors"].VisualProperties.StartIndexZero = true; 393 results.Add(new Result(ErrorPlotResultName, errortable)); 394 } 395 396 //color datapoints acording to classes variable (be it double, datetime or string) 397 if (!problemData.Dataset.VariableNames.Contains(ClassesName)) { 398 dataRowIndices.Add("Training", problemData.TrainingIndices.ToList()); 399 dataRowIndices.Add("Test", problemData.TestIndices.ToList()); 400 return; 401 } 402 403 var classificationData = problemData as ClassificationProblemData; 404 if (classificationData != null && classificationData.TargetVariable.Equals(ClassesName)) { 405 var classNames = classificationData.ClassValues.Zip(classificationData.ClassNames, (v, n) => new {v, n}).ToDictionary(x => x.v, x => x.n); 406 var classes = classificationData.Dataset.GetDoubleValues(classificationData.TargetVariable, allIndices).Select(v => classNames[v]).ToArray(); 407 for (var i = 0; i < classes.Length; i++) { 408 if (!dataRowIndices.ContainsKey(classes[i])) dataRowIndices.Add(classes[i], new List<int>()); 409 dataRowIndices[classes[i]].Add(i); 410 } 411 } else if (((Dataset)problemData.Dataset).VariableHasType<string>(ClassesName)) { 412 var classes = problemData.Dataset.GetStringValues(ClassesName, allIndices).ToArray(); 413 for (var i = 0; i < classes.Length; i++) { 414 if (!dataRowIndices.ContainsKey(classes[i])) dataRowIndices.Add(classes[i], new List<int>()); 415 dataRowIndices[classes[i]].Add(i); 416 } 417 } else if (((Dataset)problemData.Dataset).VariableHasType<double>(ClassesName)) { 418 var clusterdata = new Dataset(problemData.Dataset.DoubleVariables, problemData.Dataset.DoubleVariables.Select(v => problemData.Dataset.GetDoubleValues(v, allIndices).ToList())); 419 const int contours = 8; 420 Dictionary<int, string> contourMap; 421 IClusteringModel clusterModel; 422 double[][] borders; 423 CreateClusters(clusterdata, ClassesName, contours, out clusterModel, out contourMap, out borders); 424 var contourorder = borders.Select((x, i) => new {x, i}).OrderBy(x => x.x[0]).Select(x => x.i).ToArray(); 425 for (var i = 0; i < contours; i++) { 426 var c = contourorder[i]; 427 var contourname = contourMap[c]; 428 dataRowIndices.Add(contourname, new List<int>()); 429 var row = new ScatterPlotDataRow(contourname, "", new List<Point2D<double>>()) {VisualProperties = {Color = GetHeatMapColor(i, contours), PointSize = 8}}; 430 ((ScatterPlot)results[ScatterPlotResultName].Value).Rows.Add(row); 431 } 432 var allClusters = clusterModel.GetClusterValues(clusterdata, Enumerable.Range(0, clusterdata.Rows)).ToArray(); 433 for (var i = 0; i < clusterdata.Rows; i++) dataRowIndices[contourMap[allClusters[i] - 1]].Add(i); 434 } else if (((Dataset)problemData.Dataset).VariableHasType<DateTime>(ClassesName)) { 435 var clusterdata = new Dataset(problemData.Dataset.DateTimeVariables, problemData.Dataset.DateTimeVariables.Select(v => problemData.Dataset.GetDoubleValues(v, allIndices).ToList())); 436 const int contours = 8; 437 Dictionary<int, string> contourMap; 438 IClusteringModel clusterModel; 439 double[][] borders; 440 CreateClusters(clusterdata, ClassesName, contours, out clusterModel, out contourMap, out borders); 441 var contourorder = borders.Select((x, i) => new {x, i}).OrderBy(x => x.x[0]).Select(x => x.i).ToArray(); 442 for (var i = 0; i < contours; i++) { 443 var c = contourorder[i]; 444 var contourname = contourMap[c]; 445 dataRowIndices.Add(contourname, new List<int>()); 446 var row = new ScatterPlotDataRow(contourname, "", new List<Point2D<double>>()) {VisualProperties = {Color = GetHeatMapColor(i, contours), PointSize = 8}}; 447 row.VisualProperties.PointSize = 8; 448 ((ScatterPlot)results[ScatterPlotResultName].Value).Rows.Add(row); 449 } 450 var allClusters = clusterModel.GetClusterValues(clusterdata, Enumerable.Range(0, clusterdata.Rows)).ToArray(); 451 for (var i = 0; i < clusterdata.Rows; i++) dataRowIndices[contourMap[allClusters[i] - 1]].Add(i); 344 452 } else { 345 dataRowNames.Add("Training", problemData.TrainingIndices.ToList()); 346 dataRowNames.Add("Test", problemData.TestIndices.ToList()); 347 } 348 349 if (!results.ContainsKey(IterationResultName)) results.Add(new Result(IterationResultName, new IntValue(0))); 350 else ((IntValue)results[IterationResultName].Value).Value = 0; 351 352 if (!results.ContainsKey(ErrorResultName)) results.Add(new Result(ErrorResultName, new DoubleValue(0))); 353 else ((DoubleValue)results[ErrorResultName].Value).Value = 0; 354 355 if (!results.ContainsKey(ErrorPlotResultName)) results.Add(new Result(ErrorPlotResultName, new DataTable(ErrorPlotResultName, "Development of errors during gradient descent"))); 356 else results[ErrorPlotResultName].Value = new DataTable(ErrorPlotResultName, "Development of errors during gradient descent"); 357 358 var plot = results[ErrorPlotResultName].Value as DataTable; 359 if (plot == null) throw new ArgumentException("could not create/access error data table in results collection"); 360 361 if (!plot.Rows.ContainsKey("errors")) plot.Rows.Add(new DataRow("errors")); 362 plot.Rows["errors"].Values.Clear(); 363 plot.Rows["errors"].VisualProperties.StartIndexZero = true; 364 365 results.Add(new Result(ScatterPlotResultName, "Plot of the projected data", new ScatterPlot(DataResultName, ""))); 366 results.Add(new Result(DataResultName, "Projected Data", new DoubleMatrix())); 453 dataRowIndices.Add("Training", problemData.TrainingIndices.ToList()); 454 dataRowIndices.Add("Test", problemData.TestIndices.ToList()); 455 } 367 456 } 368 457 … … 372 461 var plot = results[ErrorPlotResultName].Value as DataTable; 373 462 if (plot == null) throw new ArgumentException("Could not create/access error data table in results collection."); 374 var errors = plot.Rows[" errors"].Values;463 var errors = plot.Rows["Errors"].Values; 375 464 var c = tsneState.EvaluateError(); 376 465 errors.Add(c); … … 378 467 ((DoubleValue)results[ErrorResultName].Value).Value = errors.Last(); 379 468 380 var ndata = Normalize (tsneState.newData);469 var ndata = NormalizeProjectedData(tsneState.newData); 381 470 results[DataResultName].Value = new DoubleMatrix(ndata); 382 471 var splot = results[ScatterPlotResultName].Value as ScatterPlot; 383 FillScatterPlot(ndata, splot); 384 } 385 386 private void FillScatterPlot(double[,] lowDimData, ScatterPlot plot) { 387 foreach (var rowName in dataRowNames.Keys) { 388 if (!plot.Rows.ContainsKey(rowName)) 389 plot.Rows.Add(dataRows.ContainsKey(rowName) ? dataRows[rowName] : new ScatterPlotDataRow(rowName, "", new List<Point2D<double>>())); 390 plot.Rows[rowName].Points.Replace(dataRowNames[rowName].Select(i => new Point2D<double>(lowDimData[i, 0], lowDimData[i, 1]))); 391 } 392 } 393 394 private static double[,] Normalize(double[,] data) { 472 FillScatterPlot(ndata, splot, dataRowIndices); 473 } 474 475 private static void FillScatterPlot(double[,] lowDimData, ScatterPlot plot, Dictionary<string, IList<int>> dataRowIndices) { 476 foreach (var rowName in dataRowIndices.Keys) { 477 if (!plot.Rows.ContainsKey(rowName)) { 478 plot.Rows.Add(new ScatterPlotDataRow(rowName, "", new List<Point2D<double>>())); 479 plot.Rows[rowName].VisualProperties.PointSize = 8; 480 } 481 plot.Rows[rowName].Points.Replace(dataRowIndices[rowName].Select(i => new Point2D<double>(lowDimData[i, 0], lowDimData[i, 1]))); 482 } 483 } 484 485 private static double[,] NormalizeProjectedData(double[,] data) { 395 486 var max = new double[data.GetLength(1)]; 396 487 var min = new double[data.GetLength(1)]; … … 398 489 for (var i = 0; i < max.Length; i++) max[i] = min[i] = data[0, i]; 399 490 for (var i = 0; i < data.GetLength(0); i++) 400 401 402 403 404 491 for (var j = 0; j < data.GetLength(1); j++) { 492 var v = data[i, j]; 493 max[j] = Math.Max(max[j], v); 494 min[j] = Math.Min(min[j], v); 495 } 405 496 for (var i = 0; i < data.GetLength(0); i++) { 406 497 for (var j = 0; j < data.GetLength(1); j++) { 407 498 var d = max[j] - min[j]; 408 var s = data[i, j] - (max[j] + min[j]) / 2; 409 if (d.IsAlmost(0)) res[i, j] = data[i, j]; 410 else res[i, j] = s / d; 499 var s = data[i, j] - (max[j] + min[j]) / 2; //shift data 500 if (d.IsAlmost(0)) res[i, j] = data[i, j]; //no scaling possible 501 else res[i, j] = s / d; //scale data 411 502 } 412 503 } … … 414 505 } 415 506 416 private static double[][] Normalize Data(IReadOnlyList<double[]> data) {507 private static double[][] NormalizeInputData(IReadOnlyList<IReadOnlyList<double>> data) { 417 508 // as in tSNE implementation by van der Maaten 418 var n = data[0]. Length;509 var n = data[0].Count; 419 510 var mean = new double[n]; 420 511 var max = new double[n]; … … 426 517 for (var i = 0; i < data.Count; i++) { 427 518 nData[i] = new double[n]; 428 for (var j = 0; j < n; j++) nData[i][j] = max[j].IsAlmost(0) ? data[i][j] - mean[j] : (data[i][j] - mean[j]) / max[j]; 519 for (var j = 0; j < n; j++) 520 nData[i][j] = max[j].IsAlmost(0) ? data[i][j] - mean[j] : (data[i][j] - mean[j]) / max[j]; 429 521 } 430 522 return nData; … … 432 524 433 525 private static Color GetHeatMapColor(int contourNr, int noContours) { 434 var q = (double)contourNr / noContours; // q in [0,1] 435 var c = q < 0.5 ? Color.FromArgb((int)(q * 2 * 255), 255, 0) : Color.FromArgb(255, (int)((1 - q) * 2 * 255), 0); 436 return c; 437 } 438 439 private static string GetContourName(double value, double min, double max, int noContours) { 440 var size = (max - min) / noContours; 441 var contourNr = (int)((value - min) / size); 442 return GetContourName(contourNr, min, max, noContours); 443 } 444 445 private static string GetContourName(int i, double min, double max, int noContours) { 446 var size = (max - min) / noContours; 447 return "[" + (min + i * size) + ";" + (min + (i + 1) * size) + ")"; 526 return ConvertTotalToRgb(0, noContours, contourNr); 527 } 528 529 private static void CreateClusters(IDataset data, string target, int contours, out IClusteringModel contourCluster, out Dictionary<int, string> contourNames, out double[][] borders) { 530 var cpd = new ClusteringProblemData((Dataset)data, new[] {target}); 531 contourCluster = KMeansClustering.CreateKMeansSolution(cpd, contours, 3).Model; 532 533 borders = Enumerable.Range(0, contours).Select(x => new[] {double.MaxValue, double.MinValue}).ToArray(); 534 var clusters = contourCluster.GetClusterValues(cpd.Dataset, cpd.AllIndices).ToArray(); 535 var targetvalues = cpd.Dataset.GetDoubleValues(target).ToArray(); 536 foreach (var i in cpd.AllIndices) { 537 var cl = clusters[i] - 1; 538 var clv = targetvalues[i]; 539 if (borders[cl][0] > clv) borders[cl][0] = clv; 540 if (borders[cl][1] < clv) borders[cl][1] = clv; 541 } 542 543 contourNames = new Dictionary<int, string>(); 544 for (var i = 0; i < contours; i++) 545 contourNames.Add(i, "[" + borders[i][0] + ";" + borders[i][1] + "]"); 546 } 547 548 private static Color ConvertTotalToRgb(double low, double high, double cell) { 549 var colorGradient = ColorGradient.Colors; 550 var range = high - low; 551 var h = Math.Min(cell / range * colorGradient.Count, colorGradient.Count - 1); 552 return colorGradient[(int)h]; 448 553 } 449 554 #endregion -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEStatic.cs
r15249 r15571 65 65 [StorableClass] 66 66 public class TSNEStatic<T> { 67 68 67 [StorableClass] 69 68 public sealed class TSNEState : DeepCloneable { … … 170 169 [StorableConstructor] 171 170 public TSNEState(bool deserializing) { } 172 public TSNEState(T[] data, IDistance<T> distance, IRandom random, int newDimensions, double perplexity, double theta, int stopLyingIter, int momSwitchIter, double momentum, double finalMomentum, double eta) { 171 172 public TSNEState(IReadOnlyList<T> data, IDistance<T> distance, IRandom random, int newDimensions, double perplexity, 173 double theta, int stopLyingIter, int momSwitchIter, double momentum, double finalMomentum, double eta, bool randomInit) { 173 174 this.distance = distance; 174 175 this.random = random; … … 183 184 184 185 // initialize 185 noDatapoints = data. Length;186 noDatapoints = data.Count; 186 187 if (noDatapoints - 1 < 3 * perplexity) 187 188 throw new ArgumentException("Perplexity too large for the number of data points!"); … … 193 194 gains = new double[noDatapoints, newDimensions]; 194 195 for (var i = 0; i < noDatapoints; i++) 195 196 196 for (var j = 0; j < newDimensions; j++) 197 gains[i, j] = 1.0; 197 198 198 199 p = null; … … 212 213 var rand = new NormalDistributedRandom(random, 0, 1); 213 214 for (var i = 0; i < noDatapoints; i++) 214 for (var j = 0; j < newDimensions; j++) 215 newData[i, j] = rand.NextDouble() * .0001; 215 for (var j = 0; j < newDimensions; j++) 216 newData[i, j] = rand.NextDouble() * .0001; 217 218 if (!(data[0] is IReadOnlyList<double>) || randomInit) return; 219 for (var i = 0; i < noDatapoints; i++) 220 for (var j = 0; j < newDimensions; j++) { 221 var row = (IReadOnlyList<double>) data[i]; 222 newData[i, j] = row[j % row.Count]; 223 } 216 224 } 217 225 #endregion 218 226 219 227 public double EvaluateError() { 220 return exact ? 221 EvaluateErrorExact(p, newData, noDatapoints, newDimensions) : 222 EvaluateErrorApproximate(rowP, colP, valP, newData, theta); 228 return exact ? EvaluateErrorExact(p, newData, noDatapoints, newDimensions) : EvaluateErrorApproximate(rowP, colP, valP, newData, theta); 223 229 } 224 230 225 231 #region Helpers 226 private static void CalculateApproximateSimilarities( T[]data, IDistance<T> distance, double perplexity, out int[] rowP, out int[] colP, out double[] valP) {232 private static void CalculateApproximateSimilarities(IReadOnlyList<T> data, IDistance<T> distance, double perplexity, out int[] rowP, out int[] colP, out double[] valP) { 227 233 // Compute asymmetric pairwise input similarities 228 ComputeGaussianPerplexity(data, distance, out rowP, out colP, out valP, perplexity, (int) (3 * perplexity));234 ComputeGaussianPerplexity(data, distance, out rowP, out colP, out valP, perplexity, (int) (3 * perplexity)); 229 235 // Symmetrize input similarities 230 236 int[] sRowP, symColP; … … 235 241 valP = sValP; 236 242 var sumP = .0; 237 for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i]; 238 for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP; 239 } 240 241 private static double[,] CalculateExactSimilarites(T[] data, IDistance<T> distance, double perplexity) { 243 for (var i = 0; i < rowP[data.Count]; i++) sumP += valP[i]; 244 for (var i = 0; i < rowP[data.Count]; i++) valP[i] /= sumP; 245 } 246 private static double[,] CalculateExactSimilarites(IReadOnlyList<T> data, IDistance<T> distance, double perplexity) { 242 247 // Compute similarities 243 var p = new double[data. Length, data.Length];248 var p = new double[data.Count, data.Count]; 244 249 ComputeGaussianPerplexity(data, distance, p, perplexity); 245 250 // Symmetrize input similarities 246 for (var n = 0; n < data. Length; n++) {247 for (var m = n + 1; m < data. Length; m++) {251 for (var n = 0; n < data.Count; n++) { 252 for (var m = n + 1; m < data.Count; m++) { 248 253 p[n, m] += p[m, n]; 249 254 p[m, n] = p[n, m]; … … 251 256 } 252 257 var sumP = .0; 253 for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j]; 254 for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP; 258 for (var i = 0; i < data.Count; i++) { 259 for (var j = 0; j < data.Count; j++) { 260 sumP += p[i, j]; 261 } 262 } 263 for (var i = 0; i < data.Count; i++) { 264 for (var j = 0; j < data.Count; j++) { 265 p[i, j] /= sumP; 266 } 267 } 255 268 return p; 256 269 } 257 258 270 private static void ComputeGaussianPerplexity(IReadOnlyList<T> x, IDistance<T> distance, out int[] rowP, out int[] colP, out double[] valP, double perplexity, int k) { 259 271 if (perplexity > k) throw new ArgumentException("Perplexity should be lower than k!"); … … 290 302 291 303 // Iterate until we found a good perplexity 292 var iter = 0; double sumP = 0; 304 var iter = 0; 305 double sumP = 0; 293 306 while (!found && iter < 200) { 294 295 307 // Compute Gaussian kernel row 296 308 for (var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]); … … 307 319 if (hdiff < tol && -hdiff < tol) { 308 320 found = true; 309 } else { 321 } 322 else { 310 323 if (hdiff > 0) { 311 324 minBeta = beta; … … 314 327 else 315 328 beta = (beta + maxBeta) / 2.0; 316 } else { 329 } 330 else { 317 331 maxBeta = beta; 318 332 if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue)) … … 335 349 } 336 350 } 337 private static void ComputeGaussianPerplexity( T[]x, IDistance<T> distance, double[,] p, double perplexity) {351 private static void ComputeGaussianPerplexity(IReadOnlyList<T> x, IDistance<T> distance, double[,] p, double perplexity) { 338 352 // Compute the distance matrix 339 353 var dd = ComputeDistances(x, distance); 340 354 341 var n = x. Length;355 var n = x.Count; 342 356 // Compute the Gaussian kernel row by row 343 357 for (var i = 0; i < n; i++) { … … 352 366 // Iterate until we found a good perplexity 353 367 var iter = 0; 354 while (!found && iter < 200) { 368 while (!found && iter < 200) { // 200 iterations as in tSNE implementation by van der Maarten 355 369 356 370 // Compute Gaussian kernel row … … 369 383 if (hdiff < tol && -hdiff < tol) { 370 384 found = true; 371 } else { 385 } 386 else { 372 387 if (hdiff > 0) { 373 388 minBeta = beta; … … 376 391 else 377 392 beta = (beta + maxBeta) / 2.0; 378 } else { 393 } 394 else { 379 395 maxBeta = beta; 380 396 if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue)) … … 393 409 } 394 410 } 395 396 private static double[][] ComputeDistances(T[] x, IDistance<T> distance) { 397 var res = new double[x.Length][]; 398 for (var r = 0; r < x.Length; r++) { 399 var rowV = new double[x.Length]; 411 private static double[][] ComputeDistances(IReadOnlyList<T> x, IDistance<T> distance) { 412 var res = new double[x.Count][]; 413 for (var r = 0; r < x.Count; r++) { 414 var rowV = new double[x.Count]; 400 415 // all distances must be symmetric 401 416 for (var c = 0; c < r; c++) { … … 403 418 } 404 419 rowV[r] = 0.0; // distance to self is zero for all distances 405 for (var c = r + 1; c < x. Length; c++) {420 for (var c = r + 1; c < x.Count; c++) { 406 421 rowV[c] = distance.Get(x[r], x[c]); 407 422 } … … 411 426 // return x.Select(m => x.Select(n => distance.Get(m, n)).ToArray()).ToArray(); 412 427 } 413 414 428 private static double EvaluateErrorExact(double[,] p, double[,] y, int n, int d) { 415 429 // Compute the squared Euclidean distance matrix … … 425 439 q[n1, m] = 1 / (1 + dd[n1, m]); 426 440 sumQ += q[n1, m]; 427 } else q[n1, m] = double.Epsilon; 441 } 442 else q[n1, m] = double.Epsilon; 428 443 } 429 444 } … … 433 448 var c = .0; 434 449 for (var i = 0; i < n; i++) 435 436 437 450 for (var j = 0; j < n; j++) { 451 c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon)); 452 } 438 453 return c; 439 454 } 440 441 455 private static double EvaluateErrorApproximate(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) { 442 456 // Get estimate of normalization term … … 463 477 } 464 478 private static void SymmetrizeMatrix(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, out int[] symRowP, out int[] symColP, out double[] symValP) { 465 466 479 // Count number of elements and row counts of symmetric matrix 467 480 var n = rowP.Count - 1; … … 469 482 for (var j = 0; j < n; j++) { 470 483 for (var i = rowP[j]; i < rowP[j + 1]; i++) { 471 472 484 // Check whether element (col_P[i], n) is present 473 485 var present = false; … … 497 509 var offset = new int[n]; 498 510 for (var j = 0; j < n; j++) { 499 for (var i = rowP[j]; i < rowP[j + 1]; i++) { 511 for (var i = rowP[j]; i < rowP[j + 1]; i++) { // considering element(n, colP[i]) 500 512 501 513 // Check whether element (col_P[i], n) is present … … 549 561 public static double[,] Run(T[] data, IDistance<T> distance, IRandom random, 550 562 int newDimensions = 2, double perplexity = 25, int iterations = 1000, 551 double theta = 0, 552 int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, 563 double theta = 0, int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, 553 564 double finalMomentum = .8, double eta = 10.0 554 565 ) { 555 566 var state = CreateState(data, distance, random, newDimensions, perplexity, 556 567 theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta); … … 565 576 int newDimensions = 2, double perplexity = 25, double theta = 0, 566 577 int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, 567 double finalMomentum = .8, double eta = 10.0 568 569 return new TSNEState(data, distance, random, newDimensions, perplexity, theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta );578 double finalMomentum = .8, double eta = 10.0, bool randomInit = true 579 ) { 580 return new TSNEState(data, distance, random, newDimensions, perplexity, theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta, randomInit); 570 581 } 571 582 … … 580 591 for (var j = 0; j < state.newDimensions; j++) { 581 592 state.gains[i, j] = Math.Sign(state.dY[i, j]) != Math.Sign(state.uY[i, j]) 582 ? state.gains[i, j] + .2 593 ? state.gains[i, j] + .2 // +0.2 nd *0.8 are used in two separate implementations of tSNE -> seems to be correct 583 594 : state.gains[i, j] * .8; 584 585 595 if (state.gains[i, j] < .01) state.gains[i, j] = .01; 586 596 } 587 597 } 588 589 598 590 599 // Perform gradient update (with momentum and gains) 591 600 for (var i = 0; i < state.noDatapoints; i++) 592 593 601 for (var j = 0; j < state.newDimensions; j++) 602 state.uY[i, j] = state.currentMomentum * state.uY[i, j] - state.eta * state.gains[i, j] * state.dY[i, j]; 594 603 595 604 for (var i = 0; i < state.noDatapoints; i++) 596 597 605 for (var j = 0; j < state.newDimensions; j++) 606 state.newData[i, j] = state.newData[i, j] + state.uY[i, j]; 598 607 599 608 // Make solution zero-mean … … 604 613 if (state.exact) 605 614 for (var i = 0; i < state.noDatapoints; i++) 606 607 615 for (var j = 0; j < state.noDatapoints; j++) 616 state.p[i, j] /= 12.0; 608 617 else 609 618 for (var i = 0; i < state.rowP[state.noDatapoints]; i++) … … 634 643 // Compute final t-SNE gradient 635 644 for (var i = 0; i < n; i++) 636 637 638 645 for (var j = 0; j < d; j++) { 646 dC[i, j] = posF[i, j] - negF[i, j] / sumQ; 647 } 639 648 } 640 649 -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEUtils.cs
r14414 r15571 35 35 } 36 36 37 internal static IList<T>Swap<T>(this IList<T> list, int indexA, int indexB) {37 internal static void Swap<T>(this IList<T> list, int indexA, int indexB) { 38 38 var tmp = list[indexA]; 39 39 list[indexA] = list[indexB]; 40 40 list[indexB] = tmp; 41 return list;42 41 } 43 42 44 internalstatic int Partition<T>(this IList<T> list, int left, int right, int pivotindex, IComparer<T> comparer) {43 private static int Partition<T>(this IList<T> list, int left, int right, int pivotindex, IComparer<T> comparer) { 45 44 var pivotValue = list[pivotindex]; 46 45 list.Swap(pivotindex, right); … … 67 66 /// <param name="comparer">comparer for list elemnts </param> 68 67 /// <returns></returns> 69 internal static T NthElement<T>(this IList<T> list, int left, int right, int n, IComparer<T> comparer) {68 internal static void PartialSort<T>(this IList<T> list, int left, int right, int n, IComparer<T> comparer) { 70 69 while (true) { 71 if (left == right) return list[left];72 var pivotindex = left + (int) Math.Floor(new System.Random().Next() % (right - (double)left + 1));70 if (left == right) return; 71 var pivotindex = left + (int) Math.Floor(new System.Random().Next() % (right - (double) left + 1)); 73 72 pivotindex = list.Partition(left, right, pivotindex, comparer); 74 if (n == pivotindex) return list[n];73 if (n == pivotindex) return; 75 74 if (n < pivotindex) right = pivotindex - 1; 76 75 else left = pivotindex + 1; -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/VantagePointTree.cs
r15249 r15571 139 139 // Partition around the median distance 140 140 var median = (upper + lower) / 2; 141 items. NthElement(lower + 1, upper - 1, median, distance.GetDistanceComparer(items[lower]));141 items.PartialSort(lower + 1, upper - 1, median, distance.GetDistanceComparer(items[lower])); 142 142 143 143 // Threshold of the new node will be the distance to the median -
stable/HeuristicLab.Data.Views
- Property svn:mergeinfo changed
/trunk/sources/HeuristicLab.Data.Views merged: 15548
- Property svn:mergeinfo changed
-
stable/HeuristicLab.Data.Views/3.3
- Property svn:mergeinfo changed
/trunk/sources/HeuristicLab.Data.Views/3.3 merged: 15548
- Property svn:mergeinfo changed
-
stable/HeuristicLab.Data.Views/3.3/StringConvertibleArrayView.Designer.cs
r14968 r15571 65 65 // lengthTextBox 66 66 // 67 this.lengthTextBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) 67 this.lengthTextBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) 68 68 | System.Windows.Forms.AnchorStyles.Right))); 69 69 this.lengthTextBox.Location = new System.Drawing.Point(52, 0); … … 79 79 this.dataGridView.AllowUserToAddRows = false; 80 80 this.dataGridView.AllowUserToDeleteRows = false; 81 this.dataGridView.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) 82 | System.Windows.Forms.AnchorStyles.Left) 81 this.dataGridView.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) 82 | System.Windows.Forms.AnchorStyles.Left) 83 83 | System.Windows.Forms.AnchorStyles.Right))); 84 84 this.dataGridView.ClipboardCopyMode = System.Windows.Forms.DataGridViewClipboardCopyMode.Disable; … … 87 87 this.dataGridView.Location = new System.Drawing.Point(0, 26); 88 88 this.dataGridView.Name = "dataGridView"; 89 this.dataGridView.RowHeadersWidthSizeMode = System.Windows.Forms.DataGridViewRowHeadersWidthSizeMode.AutoSizeToDisplayedHeaders; 89 90 this.dataGridView.Size = new System.Drawing.Size(424, 378); 90 91 this.dataGridView.TabIndex = 2;
Note: See TracChangeset
for help on using the changeset viewer.