Changeset 14785 for branches/TSNE
- Timestamp:
- 03/27/17 15:15:23 (8 years ago)
- Location:
- branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 1 deleted
- 3 edited
- 6 moved
Legend:
- Unmodified
- Added
- Removed
-
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r14784 r14785 334 334 <Compile Include="Interfaces\TSNEInterfaces\IDataPoint.cs" /> 335 335 <Compile Include="Interfaces\TSNEInterfaces\IDistance.cs" /> 336 <Compile Include="Interfaces\TSNEInterfaces\IS PTree.cs" />337 <Compile Include="Interfaces\TSNEInterfaces\IV PTree.cs" />336 <Compile Include="Interfaces\TSNEInterfaces\ISpacePartitioningTree.cs" /> 337 <Compile Include="Interfaces\TSNEInterfaces\IVantagePointTree.cs" /> 338 338 <Compile Include="kMeans\KMeansClustering.cs" /> 339 339 <Compile Include="kMeans\KMeansClusteringModel.cs" /> … … 419 419 <Compile Include="TimeSeries\AutoregressiveModeling.cs" /> 420 420 <Compile Include="TSNE\Cell.cs" /> 421 <Compile Include="TSNE\DataPoint.cs" />422 421 <Compile Include="TSNE\Distances\DistanceBase.cs" /> 423 <Compile Include="TSNE\Distances\DataPointDistance.cs" />424 422 <Compile Include="TSNE\Distances\EuclideanDistance.cs" /> 423 <Compile Include="TSNE\Distances\IndexedItemDistance.cs" /> 425 424 <Compile Include="TSNE\Distances\InnerProductDistance.cs" /> 426 425 <Compile Include="TSNE\PriorityQueue.cs" /> 427 <Compile Include="TSNE\S Ptree.cs" />428 <Compile Include="TSNE\TSNE .cs" />426 <Compile Include="TSNE\SpacePartitioningTree.cs" /> 427 <Compile Include="TSNE\TSNEAlgorithm.cs" /> 429 428 <Compile Include="TSNE\TSNEStatic.cs" /> 430 429 <Compile Include="TSNE\TSNEUtils.cs" /> 431 <Compile Include="TSNE\V PTree.cs" />430 <Compile Include="TSNE\VantagePointTree.cs" /> 432 431 </ItemGroup> 433 432 <ItemGroup> -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/ISpacePartitioningTree.cs
r14784 r14785 23 23 24 24 namespace HeuristicLab.Algorithms.DataAnalysis { 25 public interface IS PTree : IDeepCloneable {25 public interface ISpacePartitioningTree { 26 26 27 27 double[,] Data { set; } 28 28 int GetDepth(); 29 IS PTree GetParent();29 ISpacePartitioningTree GetParent(); 30 30 bool Insert(int newIndex); 31 31 void Subdivide(); -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IVantagePointTree.cs
r14784 r14785 24 24 25 25 namespace HeuristicLab.Algorithms.DataAnalysis { 26 public interface IVPTree<T> : IDeepCloneable { 27 void Create(IEnumerable<T> items); 28 void Search(T target, int k, out List<T> results, out List<double> distances); 26 public interface IVantagePointTree<T> { 27 void Search(T target, int k, out IList<T> results, out IList<double> distances); 29 28 } 30 29 } -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/IndexedItemDistance.cs
r14784 r14785 20 20 #endregion 21 21 22 using HeuristicLab.Collections; 22 23 using HeuristicLab.Common; 23 24 using HeuristicLab.Core; … … 26 27 namespace HeuristicLab.Algorithms.DataAnalysis { 27 28 [StorableClass] 28 [Item("DataPointDistance", "A distance wrapper for DataPoints")] 29 internal class DataPointDistance<T> : DistanceBase<IDataPoint<T>> where T : class { 30 31 #region properties 29 [Item("IndexedItemDistance", "A distance wrapper for indexed items")] 30 internal class IndexedItemDistance<T> : DistanceBase<IndexedItem<T>> { 32 31 [Storable] 33 private IDistance<T> dist; 34 #endregion 32 private readonly IDistance<T> dist; 35 33 36 34 #region HLConstructors & Cloning 37 35 [StorableConstructor] 38 protected DataPointDistance(bool deserializing) : base(deserializing) { }39 protected DataPointDistance(DataPointDistance<T> original, Cloner cloner) : base(original, cloner) {36 protected IndexedItemDistance(bool deserializing) : base(deserializing) { } 37 protected IndexedItemDistance(IndexedItemDistance<T> original, Cloner cloner) : base(original, cloner) { 40 38 dist = cloner.Clone(original.dist); 41 39 } 42 public override IDeepCloneable Clone(Cloner cloner) { return new DataPointDistance<T>(this, cloner); }43 public DataPointDistance(IDistance<T> distance) {40 public override IDeepCloneable Clone(Cloner cloner) { return new IndexedItemDistance<T>(this, cloner); } 41 public IndexedItemDistance(IDistance<T> distance) { 44 42 dist = distance; 45 43 } 46 44 #endregion 47 45 48 public override double Get(I DataPoint<T> a, IDataPoint<T> b) {49 return dist.Get(a. X, b.X);46 public override double Get(IndexedItem<T> a, IndexedItem<T> b) { 47 return dist.Get(a.Value, b.Value); 50 48 } 51 49 } -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/PriorityQueue.cs
r14767 r14785 25 25 26 26 namespace HeuristicLab.Algorithms.DataAnalysis { 27 // TODO: move to HeuristicLab.Collections 27 28 28 29 // Code Taken from branch: RoutePlanning, Heap4 -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/SpacePartitioningTree.cs
r14784 r14785 62 62 namespace HeuristicLab.Algorithms.DataAnalysis { 63 63 /// <summary> 64 /// Space partitioning tree 64 /// Space partitioning tree (SPTree) 65 65 /// </summary> 66 66 [StorableClass] 67 public class S PTree : DeepCloneable, ISPTree {67 public class SpacePartitioningTree : DeepCloneable, ISpacePartitioningTree { 68 68 private const uint QT_NODE_CAPACITY = 1; 69 69 … … 72 72 private double[] buff; 73 73 [Storable] 74 private ISPTree parent;74 private SpacePartitioningTree parent; 75 75 [Storable] 76 76 private int dimension; … … 95 95 96 96 // Children 97 private ISPTree[] children;97 private SpacePartitioningTree[] children; 98 98 [Storable] 99 99 private uint noChildren; … … 102 102 #region HLConstructors & Cloning 103 103 [StorableConstructor] 104 protected S PTree(bool deserializing) { }105 protected S PTree(SPTree original, Cloner cloner)104 protected SpacePartitioningTree(bool deserializing) { } 105 protected SpacePartitioningTree(SpacePartitioningTree original, Cloner cloner) 106 106 : base(original, cloner) { 107 107 buff = original.buff.Select(x => x).ToArray(); … … 121 121 noChildren = original.noChildren; 122 122 } 123 public override IDeepCloneable Clone(Cloner cloner) { return new S PTree(this, cloner); }124 125 public S PTree(double[,] inpData) {123 public override IDeepCloneable Clone(Cloner cloner) { return new SpacePartitioningTree(this, cloner); } 124 125 public SpacePartitioningTree(double[,] inpData) { 126 126 var d = inpData.GetLength(1); 127 127 var n = inpData.GetLength(0); … … 145 145 } 146 146 147 public S PTree(double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) {147 public SpacePartitioningTree(double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) { 148 148 Init(null, inpData, impCorner, impWith); 149 149 } 150 public S PTree(ISPTree parent, double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) {150 public SpacePartitioningTree(SpacePartitioningTree parent, double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) { 151 151 Init(parent, inpData, impCorner, impWith); 152 152 } 153 153 #endregion 154 154 155 public IS PTree GetParent() {155 public ISpacePartitioningTree GetParent() { 156 156 return parent; 157 157 } … … 211 211 div *= 2; 212 212 } 213 children[i] = new S PTree(this, Data, newCorner, newWidth);213 children[i] = new SpacePartitioningTree(this, Data, newCorner, newWidth); 214 214 } 215 215 … … 307 307 for (var i = 0; i < n; i++) Insert(i); 308 308 } 309 private void Init( ISPTree p, double[,] inpData, IEnumerable<double> inpCorner, IEnumerable<double> inpWidth) {309 private void Init(SpacePartitioningTree p, double[,] inpData, IEnumerable<double> inpCorner, IEnumerable<double> inpWidth) { 310 310 parent = p; 311 311 dimension = inpData.GetLength(1); … … 320 320 inpWidth.ForEach((i, x) => boundary.SetWidth(i, x)); 321 321 322 children = new ISPTree[noChildren];322 children = new SpacePartitioningTree[noChildren]; 323 323 centerOfMass = new double[dimension]; 324 324 buff = new double[dimension]; -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAlgorithm.cs
r14784 r14785 38 38 namespace HeuristicLab.Algorithms.DataAnalysis { 39 39 /// <summary> 40 /// t-distributed stochastic neighbourhood embedding ( TSNE) projects the data in a low dimensional40 /// t-distributed stochastic neighbourhood embedding (tSNE) projects the data in a low dimensional 41 41 /// space to allow visual cluster identification. 42 42 /// </summary> 43 [Item(" TSNE", "t-distributed stochastic neighbourhood embedding projects the data in a low " +43 [Item("tSNE", "t-distributed stochastic neighbourhood embedding projects the data in a low " + 44 44 "dimensional space to allow visual cluster identification.")] 45 45 [Creatable(CreatableAttribute.Categories.DataAnalysis, Priority = 100)] 46 46 [StorableClass] 47 public sealed class TSNEA nalysis: BasicAlgorithm {47 public sealed class TSNEAlgorithm : BasicAlgorithm { 48 48 public override bool SupportsPause { 49 49 get { return false; } … … 57 57 } 58 58 59 #region Parameternames59 #region parameter names 60 60 private const string DistanceParameterName = "DistanceFunction"; 61 61 private const string PerplexityParameterName = "Perplexity"; … … 74 74 #endregion 75 75 76 #region Parameterproperties76 #region parameter properties 77 77 public IFixedValueParameter<DoubleValue> PerplexityParameter { 78 78 get { return Parameters[PerplexityParameterName] as IFixedValueParameter<DoubleValue>; } 79 79 } 80 public OptionalValueParameter<DoubleValue> ThetaParameter {81 get { return Parameters[ThetaParameterName] as OptionalValueParameter<DoubleValue>; }80 public IFixedValueParameter<DoubleValue> ThetaParameter { 81 get { return Parameters[ThetaParameterName] as IFixedValueParameter<DoubleValue>; } 82 82 } 83 83 public IFixedValueParameter<IntValue> NewDimensionsParameter { 84 84 get { return Parameters[NewDimensionsParameterName] as IFixedValueParameter<IntValue>; } 85 85 } 86 public IValueParameter<IDistance< RealVector>> DistanceParameter {87 get { return Parameters[DistanceParameterName] as IValueParameter<IDistance< RealVector>>; }86 public IValueParameter<IDistance<double[]>> DistanceParameter { 87 get { return Parameters[DistanceParameterName] as IValueParameter<IDistance<double[]>>; } 88 88 } 89 89 public IFixedValueParameter<IntValue> MaxIterationsParameter { … … 120 120 121 121 #region Properties 122 public IDistance< RealVector> Distance {122 public IDistance<double[]> Distance { 123 123 get { return DistanceParameter.Value; } 124 124 } 125 125 public double Perplexity { 126 126 get { return PerplexityParameter.Value.Value; } 127 set { PerplexityParameter.Value.Value = value; } 127 128 } 128 129 public double Theta { 129 get { return ThetaParameter.Value == null ? 0 : ThetaParameter.Value.Value; } 130 get { return ThetaParameter.Value.Value; } 131 set { ThetaParameter.Value.Value = value; } 130 132 } 131 133 public int NewDimensions { 132 134 get { return NewDimensionsParameter.Value.Value; } 135 set { NewDimensionsParameter.Value.Value = value; } 133 136 } 134 137 public int MaxIterations { 135 138 get { return MaxIterationsParameter.Value.Value; } 139 set { MaxIterationsParameter.Value.Value = value; } 136 140 } 137 141 public int StopLyingIteration { 138 142 get { return StopLyingIterationParameter.Value.Value; } 143 set { StopLyingIterationParameter.Value.Value = value; } 139 144 } 140 145 public int MomentumSwitchIteration { 141 146 get { return MomentumSwitchIterationParameter.Value.Value; } 147 set { MomentumSwitchIterationParameter.Value.Value = value; } 142 148 } 143 149 public double InitialMomentum { 144 150 get { return InitialMomentumParameter.Value.Value; } 151 set { InitialMomentumParameter.Value.Value = value; } 145 152 } 146 153 public double FinalMomentum { 147 154 get { return FinalMomentumParameter.Value.Value; } 155 set { FinalMomentumParameter.Value.Value = value; } 148 156 } 149 157 public double Eta { 150 get { 151 return EtaParameter.Value == null ? 0 : EtaParameter.Value.Value; 152 } 158 get { return EtaParameter.Value.Value; } 159 set { EtaParameter.Value.Value = value; } 153 160 } 154 161 public bool SetSeedRandomly { 155 162 get { return SetSeedRandomlyParameter.Value.Value; } 156 } 157 public uint Seed { 158 get { return (uint)SeedParameter.Value.Value; } 163 set { SetSeedRandomlyParameter.Value.Value = value; } 164 } 165 public int Seed { 166 get { return SeedParameter.Value.Value; } 167 set { SeedParameter.Value.Value = value; } 159 168 } 160 169 public string Classes { 161 170 get { return ClassesParameter.Value.Value; } 171 set { ClassesParameter.Value.Value = value; } 162 172 } 163 173 public bool Normalization { 164 174 get { return NormalizationParameter.Value.Value; } 175 set { NormalizationParameter.Value.Value = value; } 165 176 } 166 177 [Storable] 167 public TSNE< RealVector> tsne;178 public TSNE<double[]> tsne; 168 179 #endregion 169 180 170 181 #region Constructors & Cloning 171 182 [StorableConstructor] 172 private TSNEA nalysis(bool deserializing) : base(deserializing) { }173 private TSNEA nalysis(TSNEAnalysisoriginal, Cloner cloner) : base(original, cloner) { }174 public override IDeepCloneable Clone(Cloner cloner) { return new TSNEA nalysis(this, cloner); }175 public TSNEA nalysis() {183 private TSNEAlgorithm(bool deserializing) : base(deserializing) { } 184 private TSNEAlgorithm(TSNEAlgorithm original, Cloner cloner) : base(original, cloner) { } 185 public override IDeepCloneable Clone(Cloner cloner) { return new TSNEAlgorithm(this, cloner); } 186 public TSNEAlgorithm() { 176 187 Problem = new RegressionProblem(); 177 Parameters.Add(new ValueParameter<IDistance< RealVector>>(DistanceParameterName, "The distance function used to differentiate similar from non-similar points", new EuclideanDistance()));178 Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-Parameter of TSNE. Comparable to k in a k-nearest neighbour algorithm. Recommended Value is Floor(number of points /3) or lower", new DoubleValue(25)));179 Parameters.Add(new OptionalValueParameter<DoubleValue>(ThetaParameterName, "Value describing how much appoximated gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise \n CAUTION: exact calculation of forces requires building a non-sparse N*N matrix where N is the number of data points\n This may exceed memory limitations", new DoubleValue(0.1)));180 Parameters.Add(new FixedValueParameter<IntValue>(NewDimensionsParameterName, "Dimensionality of projected space (usually 2 for easy visual analysis ", new IntValue(2)));188 Parameters.Add(new ValueParameter<IDistance<double[]>>(DistanceParameterName, "The distance function used to differentiate similar from non-similar points", new EuclideanDistance())); 189 Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-Parameter of tSNE. Comparable to k in a k-nearest neighbour algorithm. Recommended value is floor(number of points /3) or lower", new DoubleValue(25))); 190 Parameters.Add(new FixedValueParameter<DoubleValue>(ThetaParameterName, "Value describing how much appoximated gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise \n CAUTION: exact calculation of forces requires building a non-sparse N*N matrix where N is the number of data points\n This may exceed memory limitations", new DoubleValue(0))); 191 Parameters.Add(new FixedValueParameter<IntValue>(NewDimensionsParameterName, "Dimensionality of projected space (usually 2 for easy visual analysis)", new IntValue(2))); 181 192 Parameters.Add(new FixedValueParameter<IntValue>(MaxIterationsParameterName, "Maximum number of iterations for gradient descent", new IntValue(1000))); 182 193 Parameters.Add(new FixedValueParameter<IntValue>(StopLyingIterationParameterName, "Number of iterations after which p is no longer approximated", new IntValue(0))); … … 184 195 Parameters.Add(new FixedValueParameter<DoubleValue>(InitialMomentumParameterName, "The initial momentum in the gradient descent", new DoubleValue(0.5))); 185 196 Parameters.Add(new FixedValueParameter<DoubleValue>(FinalMomentumParameterName, "The final momentum", new DoubleValue(0.8))); 186 Parameters.Add(new FixedValueParameter<DoubleValue>(EtaParameterName, "Gradient Descent learning rate", new DoubleValue(200)));197 Parameters.Add(new FixedValueParameter<DoubleValue>(EtaParameterName, "Gradient descent learning rate", new DoubleValue(200))); 187 198 Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "If the seed should be random", new BoolValue(true))); 188 199 Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The seed used if it should not be random", new IntValue(0))); 189 Parameters.Add(new FixedValueParameter<StringValue>(ClassesParameterName, "name of the column specifying the class lables of each data point. \n if the lable column can not be found Training/Test is used as labels", new StringValue("none")));190 Parameters.Add(new FixedValueParameter<BoolValue>(NormalizationParameterName, "W ether the data should be zero centered and have variance of 1 for each variable, so different scalings are ignored", new BoolValue(true)));200 Parameters.Add(new FixedValueParameter<StringValue>(ClassesParameterName, "name of the column specifying the class lables of each data point. \n if the lable column can not be found training/test is used as labels", new StringValue("none"))); 201 Parameters.Add(new FixedValueParameter<BoolValue>(NormalizationParameterName, "Whether the data should be zero centered and have variance of 1 for each variable, so different scalings are ignored", new BoolValue(true))); 191 202 192 203 MomentumSwitchIterationParameter.Hidden = true; … … 200 211 public override void Stop() { 201 212 base.Stop(); 202 if (tsne != null) tsne.Running = false;213 if (tsne != null) tsne.Running = false; 203 214 } 204 215 … … 208 219 var problemData = Problem.ProblemData; 209 220 210 //color datapoints acording to Classes-Variable (be it double or string)211 if (problemData.Dataset.VariableNames.Contains(Classes)) {212 if ((problemData.Dataset as Dataset).VariableHasType<string>(Classes)) {221 //color datapoints acording to classes variable (be it double or string) 222 if (problemData.Dataset.VariableNames.Contains(Classes)) { 223 if ((problemData.Dataset as Dataset).VariableHasType<string>(Classes)) { 213 224 var classes = problemData.Dataset.GetStringValues(Classes).ToArray(); 214 for (var i = 0; i < classes.Length; i++) {215 if (!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List<int>());225 for (var i = 0; i < classes.Length; i++) { 226 if (!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List<int>()); 216 227 dataRowNames[classes[i]].Add(i); 217 228 } 218 } else if ((problemData.Dataset as Dataset).VariableHasType<double>(Classes)) {229 } else if ((problemData.Dataset as Dataset).VariableHasType<double>(Classes)) { 219 230 var classValues = problemData.Dataset.GetDoubleValues(Classes).ToArray(); 220 var max = classValues.Max() + 0.1; 231 var max = classValues.Max() + 0.1; // TODO consts 221 232 var min = classValues.Min() - 0.1; 222 233 const int contours = 8; 223 for (var i = 0; i < contours; i++) {234 for (var i = 0; i < contours; i++) { 224 235 var contourname = GetContourName(i, min, max, contours); 225 236 dataRowNames.Add(contourname, new List<int>()); … … 228 239 rows[contourname].VisualProperties.PointSize = i + 3; 229 240 } 230 for (var i = 0; i < classValues.Length; i++) {241 for (var i = 0; i < classValues.Length; i++) { 231 242 dataRowNames[GetContourName(classValues[i], min, max, contours)].Add(i); 232 243 } … … 237 248 } 238 249 239 // Set up and run TSNE240 if (SetSeedRandomly) SeedParameter.Value.Value= new System.Random().Next();241 var random = new MersenneTwister( Seed);242 tsne = new TSNE< RealVector>(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta, dataRowNames, rows);250 // set up and run tSNE 251 if (SetSeedRandomly) Seed = new System.Random().Next(); 252 var random = new MersenneTwister((uint)Seed); 253 tsne = new TSNE<double[]>(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta, dataRowNames, rows); 243 254 var dataset = problemData.Dataset; 244 255 var allowedInputVariables = problemData.AllowedInputVariables.ToArray(); 245 var data = new RealVector[dataset.Rows];246 for (var row = 0; row < dataset.Rows; row++) data[row] = new RealVector(allowedInputVariables.Select(col => dataset.GetDoubleValue(col, row)).ToArray());247 if (Normalization) data = NormalizeData(data);256 var data = new double[dataset.Rows][]; 257 for (var row = 0; row < dataset.Rows; row++) data[row] = allowedInputVariables.Select(col => dataset.GetDoubleValue(col, row)).ToArray(); 258 if (Normalization) data = NormalizeData(data); 248 259 tsne.Run(data, NewDimensions, Perplexity, Theta); 249 260 } 250 261 251 private static RealVector[] NormalizeData(IReadOnlyList<RealVector> data) {262 private static double[][] NormalizeData(IReadOnlyList<double[]> data) { 252 263 var n = data[0].Length; 253 264 var mean = new double[n]; 254 265 var sd = new double[n]; 255 var nData = new RealVector[data.Count];256 for (var i = 0; i < n; i++) {266 var nData = new double[data.Count][]; 267 for (var i = 0; i < n; i++) { 257 268 var i1 = i; 258 269 sd[i] = Enumerable.Range(0, data.Count).Select(x => data[x][i1]).StandardDeviation(); 259 270 mean[i] = Enumerable.Range(0, data.Count).Select(x => data[x][i1]).Average(); 260 271 } 261 for (var i = 0; i < data.Count; i++) {262 nData[i] = new RealVector(n);263 for (var j = 0; j < n; j++) nData[i][j] = (data[i][j] - mean[j]) / sd[j];272 for (var i = 0; i < data.Count; i++) { 273 nData[i] = new double[n]; 274 for (var j = 0; j < n; j++) nData[i][j] = (data[i][j] - mean[j]) / sd[j]; 264 275 } 265 276 return nData; -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEStatic.cs
r14782 r14785 58 58 using System.Linq; 59 59 using HeuristicLab.Analysis; 60 using HeuristicLab.Collections; 60 61 using HeuristicLab.Common; 61 62 using HeuristicLab.Core; … … 67 68 namespace HeuristicLab.Algorithms.DataAnalysis { 68 69 [StorableClass] 69 public class TSNE<T> : DeepCloneable where T : class, IDeepCloneable{70 public class TSNE<T> : DeepCloneable /*where T : class, IDeepCloneable*/ { 70 71 71 72 private const string IterationResultName = "Iteration"; … … 101 102 102 103 #region Stopping 103 public volatile bool Running; 104 public volatile bool Running; // TODO 104 105 #endregion 105 106 … … 164 165 165 166 // Initialize solution (randomly) 166 for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) newData[i, j] = rand.NextDouble() * .0001; 167 for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) newData[i, j] = rand.NextDouble() * .0001; // TODO const 167 168 168 169 // Perform main training loop 169 170 for (var iter = 0; iter < maxIter && Running; iter++) { 170 171 if (exact) ComputeExactGradient(p, newData, noDatapoints, newDimensions, dY); 171 else Compute Gradient(rowP, colP, valP, newData, noDatapoints, newDimensions, dY, theta);172 else ComputeApproximateGradient(rowP, colP, valP, newData, noDatapoints, newDimensions, dY, theta); 172 173 // Update gains 173 174 for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) gains[i, j] = Math.Sign(dY[i, j]) != Math.Sign(uY[i, j]) ? gains[i, j] + .2 : gains[i, j] * .8; … … 189 190 return newData; 190 191 } 191 public static double[,] Run<TR>(TR[] data, int newDimensions, double perplexity, double theta, IDistance<TR> distance, IRandom random) where TR : class, IDeepCloneable {192 return new TSNE<TR>(distance, random).Run(data, newDimensions, perplexity, theta);193 }194 192 195 193 #region helpers … … 205 203 else ((DoubleValue)results[ErrorResultName].Value).Value = 0; 206 204 207 if (!results.ContainsKey(ErrorPlotResultName)) results.Add(new Result(ErrorPlotResultName, new DataTable(ErrorPlotResultName, "Development of errors during Gradiant descent")));208 else results[ErrorPlotResultName].Value = new DataTable(ErrorPlotResultName, "Development of errors during Gradiant descent");205 if (!results.ContainsKey(ErrorPlotResultName)) results.Add(new Result(ErrorPlotResultName, new DataTable(ErrorPlotResultName, "Development of errors during gradient descent"))); 206 else results[ErrorPlotResultName].Value = new DataTable(ErrorPlotResultName, "Development of errors during gradient descent"); 209 207 210 208 var plot = results[ErrorPlotResultName].Value as DataTable; 211 if (plot == null) throw new ArgumentException("could not create/access Error-DataTable in Results-Collection");209 if (plot == null) throw new ArgumentException("could not create/access error data table in results collection"); 212 210 213 211 if (!plot.Rows.ContainsKey("errors")) plot.Rows.Add(new DataRow("errors")); … … 224 222 var errors = plot.Rows["errors"].Values; 225 223 var c = exact 226 ? EvaluateError (p, newData, noDatapoints, newDimensions)227 : EvaluateError (rowP, colP, valP, newData, theta);224 ? EvaluateErrorExact(p, newData, noDatapoints, newDimensions) 225 : EvaluateErrorApproximate(rowP, colP, valP, newData, theta); 228 226 errors.Add(c); 229 227 ((IntValue)results[IterationResultName].Value).Value = iter + 1; … … 304 302 for (var i = 0; i < n; i++) rowP[i + 1] = rowP[i] + k; 305 303 304 var objX = new List<IndexedItem<T>>(); 305 for (var i = 0; i < n; i++) objX.Add(new IndexedItem<T>(i, x[i])); 306 306 307 // Build ball tree on data set 307 var tree = new VPTree<IDataPoint<T>>(new DataPointDistance<T>(distance)); 308 var objX = new List<IDataPoint<T>>(); 309 for (var i = 0; i < n; i++) objX.Add(new DataPoint<T>(i, x[i])); 310 tree.Create(objX); 308 var tree = new VantagePointTree<IndexedItem<T>>(new IndexedItemDistance<T>(distance), objX); // do we really want to re-create the tree on each call? 311 309 312 310 // Loop over all points to find nearest neighbors 313 var indices = new List<IDataPoint<T>>();314 var distances = new List<double>();315 311 for (var i = 0; i < n; i++) { 312 IList<IndexedItem<T>> indices; 313 IList<double> distances; 316 314 317 315 // Find nearest neighbors 318 indices.Clear();319 distances.Clear();320 316 tree.Search(objX[i], k + 1, out indices, out distances); 321 317 … … 323 319 var found = false; 324 320 var beta = 1.0; 325 var minBeta = -double.MaxValue;321 var minBeta = double.MinValue; 326 322 var maxBeta = double.MaxValue; 327 const double tol = 1e-5; 323 const double tol = 1e-5; // TODO: why 1e-5? 328 324 329 325 // Iterate until we found a good perplexity … … 374 370 } 375 371 private void ComputeGaussianPerplexity(T[] x, int n, double[,] p, double perplexity) { 376 // Compute the squared Euclideandistance matrix372 // Compute the distance matrix 377 373 var dd = ComputeDistances(x); 374 378 375 // Compute the Gaussian kernel row by row 379 380 376 for (var i = 0; i < n; i++) { 381 377 // Initialize some variables … … 389 385 // Iterate until we found a good perplexity 390 386 var iter = 0; 391 while (!found && iter < 200) { 387 while (!found && iter < 200) { // TODO constant 392 388 393 389 // Compute Gaussian kernel row … … 430 426 } 431 427 } 428 432 429 private double[][] ComputeDistances(T[] x) { 433 430 return x.Select(m => x.Select(n => distance.Get(m, n)).ToArray()).ToArray(); 434 431 } 432 435 433 private static void ComputeExactGradient(double[,] p, double[,] y, int n, int d, double[,] dC) { 436 434 … … 487 485 } 488 486 } 489 private static void Compute Gradient(int[] rowP, int[] colP, double[] valP, double[,] y, int n, int d, double[,] dC, double theta) {490 var tree = new S PTree(y);487 private static void ComputeApproximateGradient(int[] rowP, int[] colP, double[] valP, double[,] y, int n, int d, double[,] dC, double theta) { 488 var tree = new SpacePartitioningTree(y); 491 489 double[] sumQ = { 0 }; 492 490 var posF = new double[n, d]; … … 502 500 for (var i = 0; i < n; i++) 503 501 for (var j = 0; j < d; j++) { 504 dC[i, j] = posF[i, j] - negF[i, j] / sumQ[0]; 505 } 506 } 507 private static double EvaluateError(double[,] p, double[,] y, int n, int d) { 502 dC[i, j] = (posF[i, j] - negF[i, j]) / sumQ[0]; // TODO: check parenthesis 503 } 504 } 505 506 private static double EvaluateErrorExact(double[,] p, double[,] y, int n, int d) { 508 507 // Compute the squared Euclidean distance matrix 509 508 var dd = new double[n, n]; … … 531 530 return c; 532 531 } 533 private static double EvaluateError (IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) {532 private static double EvaluateErrorApproximate(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) { 534 533 // Get estimate of normalization term 535 534 var n = y.GetLength(0); 536 535 var d = y.GetLength(1); 537 var tree = new S PTree(y);536 var tree = new SpacePartitioningTree(y); 538 537 var buff = new double[d]; 539 538 double[] sumQ = { 0 }; -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/VantagePointTree.cs
r14784 r14785 57 57 using System.Collections.Generic; 58 58 using System.Linq; 59 using HeuristicLab.Collections; 59 60 using HeuristicLab.Common; 60 61 using HeuristicLab.Core; … … 72 73 /// <typeparam name="T"></typeparam> 73 74 [StorableClass] 74 public class V PTree<T> : DeepCloneable, IVPTree<T> where T : class, IDeepCloneable{75 public class VantagePointTree<T> : IVantagePointTree<T> { 75 76 #region properties 76 [Storable] 77 private List<T> items; 78 [Storable] 79 private double tau; 80 [Storable] 81 private Node root; 82 [Storable] 83 private IDistance<T> distance; 77 private readonly List<T> items; 78 private readonly Node root; 79 private readonly IDistance<T> distance; 84 80 #endregion 85 81 86 #region HLConstructors & Cloning 87 [StorableConstructor] 88 protected VPTree(bool deserializing) { } 89 protected VPTree(VPTree<T> original, Cloner cloner) 90 : base(original, cloner) { 91 items = original.items.Select(cloner.Clone).ToList(); 92 tau = original.tau; 93 root = cloner.Clone(original.root); 94 distance = cloner.Clone(distance); 95 } 96 public override IDeepCloneable Clone(Cloner cloner) { return new VPTree<T>(this, cloner); } 97 public VPTree(IDistance<T> distance) { 82 public VantagePointTree(IDistance<T> distance, IEnumerable<T> items) : base() { 98 83 root = null; 99 84 this.distance = distance; 100 }101 #endregion102 103 public void Create(IEnumerable<T> items) {104 85 this.items = items.Select(x => x).ToList(); 105 86 root = BuildFromPoints(0, this.items.Count); 106 87 } 107 88 108 public void Search(T target, int k, out List<T> results, out List<double> distances) { 109 var heap = new PriorityQueue<double, HeapItem>(double.MaxValue, double.MinValue, k); 110 tau = double.MaxValue; 111 Search(root, target, k, heap); 112 results = new List<T>(); 113 distances = new List<double>(); 89 public void Search(T target, int k, out IList<T> results, out IList<double> distances) { 90 var heap = new PriorityQueue<double, IndexedItem<double>>(double.MaxValue, double.MinValue, k); 91 Search(root, target, k, heap, double.MaxValue); 92 var res = new List<T>(); 93 var dist = new List<double>(); 114 94 while (heap.Size > 0) { 115 res ults.Add(items[heap.PeekMinValue().Index]);116 dist ances.Add(heap.PeekMinValue().Dist);95 res.Add(items[heap.PeekMinValue().Index]); 96 dist.Add(heap.PeekMinValue().Value); 117 97 heap.RemoveMin(); 118 98 } 119 results.Reverse(); 120 distances.Reverse(); 99 res.Reverse(); 100 dist.Reverse(); 101 results = res; 102 distances = dist; 103 } 104 105 private void Search(Node node, T target, int k, PriorityQueue<double, IndexedItem<double>> heap, double tau = double.MaxValue) { 106 if (node == null) return; 107 var dist = distance.Get(items[node.index], target); 108 if (dist < tau) { 109 if (heap.Size == k) heap.RemoveMin(); 110 heap.Insert(-dist, new IndexedItem<double>(node.index, dist));//TODO check if minheap or maxheap should be used here 111 if (heap.Size == k) tau = heap.PeekMinValue().Value; 112 } 113 if (node.left == null && node.right == null) return; 114 115 if (dist < node.threshold) { 116 if (dist - tau <= node.threshold) Search(node.left, target, k, heap, tau); // if there can still be neighbors inside the ball, recursively search left child first 117 if (dist + tau >= node.threshold) Search(node.right, target, k, heap, tau); // if there can still be neighbors outside the ball, recursively search right child 118 } else { 119 if (dist + tau >= node.threshold) Search(node.right, target, k, heap, tau); // if there can still be neighbors outside the ball, recursively search right child first 120 if (dist - tau <= node.threshold) Search(node.left, target, k, heap, tau); // if there can still be neighbors inside the ball, recursively search left child 121 } 121 122 } 122 123 … … 150 151 } 151 152 152 private void Search(Node node, T target, int k, PriorityQueue<double, HeapItem> heap) { 153 if (node == null) return; 154 var dist = distance.Get(items[node.index], target); 155 if (dist < tau) { 156 if (heap.Size == k) heap.RemoveMin(); 157 heap.Insert(-dist, new HeapItem(node.index, dist));//TODO check if minheap or maxheap should be used here 158 if (heap.Size == k) tau = heap.PeekMinValue().Dist; 159 } 160 if (node.left == null && node.right == null) return; 161 162 if (dist < node.threshold) { 163 if (dist - tau <= node.threshold) Search(node.left, target, k, heap); // if there can still be neighbors inside the ball, recursively search left child first 164 if (dist + tau >= node.threshold) Search(node.right, target, k, heap); // if there can still be neighbors outside the ball, recursively search right child 165 } else { 166 if (dist + tau >= node.threshold) Search(node.right, target, k, heap); // if there can still be neighbors outside the ball, recursively search right child first 167 if (dist - tau <= node.threshold) Search(node.left, target, k, heap); // if there can still be neighbors inside the ball, recursively search left child 168 } 169 170 } 171 172 [StorableClass] 173 private sealed class Node : Item { 174 [Storable] 153 private sealed class Node { 175 154 public int index; 176 [Storable]177 155 public double threshold; 178 [Storable]179 156 public Node left; 180 [Storable]181 157 public Node right; 182 158 183 #region HLConstructors & Cloning184 [StorableConstructor]185 private Node(bool deserializing) : base(deserializing) { }186 private Node(Node original, Cloner cloner) : base(original, cloner) {187 index = original.index;188 threshold = original.threshold;189 left = (Node)original.left.Clone(cloner);190 right = (Node)original.right.Clone(cloner);191 }192 159 internal Node() { 193 160 index = 0; … … 196 163 right = null; 197 164 } 198 public override IDeepCloneable Clone(Cloner cloner) {199 return new Node(this, cloner);200 }201 #endregion202 }203 204 [StorableClass]205 private sealed class HeapItem : Item, IComparable<HeapItem> {206 [Storable]207 public int Index;208 [Storable]209 public double Dist;210 211 #region HLConstructors & Cloning212 [StorableConstructor]213 private HeapItem(bool deserializing) : base(deserializing) { }214 private HeapItem(HeapItem original, Cloner cloner)215 : base(original, cloner) {216 Index = original.Index;217 Dist = original.Dist;218 }219 public override IDeepCloneable Clone(Cloner cloner) { return new HeapItem(this, cloner); }220 public HeapItem(int index, double dist) {221 Index = index;222 Dist = dist;223 }224 #endregion225 226 public int CompareTo(HeapItem other) {227 return Dist.CompareTo(Dist);228 }229 165 } 230 166 }
Note: See TracChangeset
for help on using the changeset viewer.