Changeset 14512 for branches/TSNE/HeuristicLab.Algorithms.DataAnalysis
- Timestamp:
- 12/20/16 15:50:11 (8 years ago)
- Location:
- branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 18 added
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4
- Property svn:ignore
-
old new 1 *.user 2 *.vs10x 3 .vs 4 HeuristicLab.Algorithms.DataAnalysis-3.4.csproj.user 5 HeuristicLabAlgorithmsDataAnalysisPlugin.cs 6 Plugin.cs 1 7 bin 2 8 obj 3 HeuristicLabAlgorithmsDataAnalysisPlugin.cs4 HeuristicLab.Algorithms.DataAnalysis-3.4.csproj.user5 *.vs10x6 Plugin.cs7 *.user8 .vs
-
- Property svn:ignore
-
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r14413 r14512 331 331 <Compile Include="Interfaces\ISupportVectorMachineSolution.cs" /> 332 332 <Compile Include="Interfaces\IDataAnalysisAlgorithm.cs" /> 333 <Compile Include="Interfaces\TSNEInterfaces\IKernelFunction.cs" /> 333 334 <Compile Include="kMeans\KMeansClustering.cs" /> 334 335 <Compile Include="kMeans\KMeansClusteringModel.cs" /> … … 337 338 <SubType>Code</SubType> 338 339 </Compile> 340 <Compile Include="KPCA\SelfOrganizingMap.cs" /> 341 <Compile Include="KPCA\KernelFunctions\CicularKernel.cs" /> 342 <Compile Include="KPCA\KernelFunctions\GaussianKernel.cs" /> 343 <Compile Include="KPCA\KernelFunctions\InverseMultiquadraticKernel .cs" /> 344 <Compile Include="KPCA\KernelFunctions\LaplacianKernel.cs" /> 345 <Compile Include="KPCA\KernelFunctions\MultiquadraticKernel.cs" /> 346 <Compile Include="KPCA\KernelFunctions\NoKernel.cs" /> 347 <Compile Include="KPCA\KernelFunctions\PolysplineKernel.cs" /> 348 <Compile Include="KPCA\KernelFunctions\RadialBasisKernelBase.cs" /> 349 <Compile Include="KPCA\KernelFunctions\ThinPlatePolysplineKernel.cs" /> 350 <Compile Include="KPCA\KernelFunctions\TricubicKernel.cs" /> 351 <Compile Include="KPCA\KernelPrincipleComponentAnalysis.cs" /> 352 <Compile Include="KPCA\Isomap.cs" /> 353 <Compile Include="KPCA\KPCA.cs" /> 354 <Compile Include="KPCA\MatrixUtilities.cs" /> 339 355 <Compile Include="Linear\AlglibUtil.cs" /> 340 356 <Compile Include="Linear\Scaling.cs" /> … … 415 431 <Compile Include="TSNE\Cell.cs" /> 416 432 <Compile Include="TSNE\DataPoint.cs" /> 433 <Compile Include="TSNE\Distances\FuctionalDistance.cs" /> 417 434 <Compile Include="TSNE\Distances\DistanceBase.cs" /> 418 435 <Compile Include="TSNE\Distances\DataPointDistance.cs" /> 419 436 <Compile Include="TSNE\Distances\EuclidianDistance.cs" /> 420 <Compile Include="TSNE\TSNEInterfaces\IDistance.cs" /> 437 <Compile Include="Interfaces\TSNEInterfaces\IDistance.cs" /> 438 <Compile Include="TSNE\Distances\InnerProductDistance.cs" /> 421 439 <Compile Include="TSNE\TSNEAnalysis.cs" /> 422 440 <Compile Include="TSNE\PriorityQueue.cs" /> 423 441 <Compile Include="TSNE\SPtree.cs" /> 424 442 <Compile Include="TSNE\TSNE.cs" /> 425 <Compile Include=" TSNE\TSNEInterfaces\ICell.cs" />426 <Compile Include=" TSNE\TSNEInterfaces\IDataPoint.cs" />427 <Compile Include=" TSNE\TSNEInterfaces\IHeap.cs" />428 <Compile Include=" TSNE\TSNEInterfaces\ISPTree.cs" />429 <Compile Include=" TSNE\TSNEInterfaces\ITSNE.cs" />430 <Compile Include=" TSNE\TSNEInterfaces\IVPTree.cs" />443 <Compile Include="Interfaces\TSNEInterfaces\ICell.cs" /> 444 <Compile Include="Interfaces\TSNEInterfaces\IDataPoint.cs" /> 445 <Compile Include="Interfaces\TSNEInterfaces\IHeap.cs" /> 446 <Compile Include="Interfaces\TSNEInterfaces\ISPTree.cs" /> 447 <Compile Include="Interfaces\TSNEInterfaces\ITSNE.cs" /> 448 <Compile Include="Interfaces\TSNEInterfaces\IVPTree.cs" /> 431 449 <Compile Include="TSNE\TSNEUtils.cs" /> 432 450 <Compile Include="TSNE\VPTree.cs" /> -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DataPointDistance.cs
r14414 r14512 20 20 #endregion 21 21 22 using HeuristicLab.Algorithms.DataAnalysis.Distances;23 22 using HeuristicLab.Common; 24 23 using HeuristicLab.Core; -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DistanceBase.cs
r14414 r14512 25 25 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 26 26 27 namespace HeuristicLab.Algorithms.DataAnalysis .Distances{27 namespace HeuristicLab.Algorithms.DataAnalysis { 28 28 [StorableClass] 29 29 public abstract class DistanceBase<T> : Item, IDistance<T> { -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/EuclidianDistance.cs
r14414 r14512 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using HeuristicLab.Algorithms.DataAnalysis.Distances;26 25 using HeuristicLab.Common; 27 26 using HeuristicLab.Core; -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNE.cs
r14414 r14512 57 57 using System.Collections.Generic; 58 58 using System.Linq; 59 using HeuristicLab.Analysis; 59 60 using HeuristicLab.Common; 60 61 using HeuristicLab.Core; … … 70 71 private const string IterationResultName = "Iteration"; 71 72 private const string ErrorResultName = "Error"; 73 private const string ErrorPlotResultName = "ErrorPlot"; 74 private const string ScatterPlotResultName = "Scatterplot"; 75 private const string DataResultName = "Projected Data"; 72 76 73 77 #region Properties … … 90 94 [Storable] 91 95 private ResultCollection results; 96 [Storable] 97 private Dictionary<string, List<int>> dataRowLookup; 98 [Storable] 99 private Dictionary<string, ScatterPlotDataRow> dataRows; 100 #endregion 101 102 #region Stopping 103 public volatile bool Running; 92 104 #endregion 93 105 … … 105 117 random = cloner.Clone(random); 106 118 results = cloner.Clone(results); 119 dataRowLookup = original.dataRowLookup.ToDictionary(entry => entry.Key, entry => entry.Value.Select(x => x).ToList()); 120 dataRows = original.dataRows.ToDictionary(entry => entry.Key, entry => cloner.Clone(entry.Value)); 107 121 } 108 122 public override IDeepCloneable Clone(Cloner cloner) { return new TSNE<T>(this, cloner); } 109 public TSNE(IDistance<T> distance, IRandom random, ResultCollection results = null, int maxIter = 1000, int stopLyingIter = 250, int momSwitchIter = 250, double momentum = .5, double finalMomentum = .8, double eta = 200.0 ) {123 public TSNE(IDistance<T> distance, IRandom random, ResultCollection results = null, int maxIter = 1000, int stopLyingIter = 250, int momSwitchIter = 250, double momentum = .5, double finalMomentum = .8, double eta = 200.0, Dictionary<string, List<int>> dataRowLookup = null, Dictionary<string, ScatterPlotDataRow> dataRows = null) { 110 124 this.distance = distance; 111 125 this.maxIter = maxIter; … … 117 131 this.random = random; 118 132 this.results = results; 133 this.dataRowLookup = dataRowLookup; 134 if (dataRows != null) 135 this.dataRows = dataRows; 136 else { this.dataRows = new Dictionary<string, ScatterPlotDataRow>(); } 119 137 } 120 138 #endregion … … 124 142 var noDatapoints = data.Length; 125 143 if (noDatapoints - 1 < 3 * perplexity) throw new ArgumentException("Perplexity too large for the number of data points!"); 126 127 if (results != null) { 128 if (!results.ContainsKey(IterationResultName)) { 129 results.Add(new Result(IterationResultName, new IntValue(0))); 130 } else ((IntValue)results[IterationResultName].Value).Value = 0; 131 if (!results.ContainsKey(ErrorResultName)) { 132 results.Add(new Result(ErrorResultName, new DoubleValue(0))); 133 } else ((DoubleValue)results[ErrorResultName].Value).Value = 0; 134 } 135 136 // Determine whether we are using an exact algorithm 144 SetUpResults(data); 145 Running = true; 137 146 var exact = Math.Abs(theta) < double.Epsilon; 138 147 var newData = new double[noDatapoints, newDimensions]; … … 141 150 var gains = new double[noDatapoints, newDimensions]; 142 151 for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) gains[i, j] = 1.0; 143 144 // Compute input similarities for exact t-SNE145 152 double[,] p = null; 146 153 int[] rowP = null; 147 154 int[] colP = null; 148 155 double[] valP = null; 149 if (exact) { 150 // Compute similarities 151 p = new double[noDatapoints, noDatapoints]; 152 ComputeGaussianPerplexity(data, noDatapoints, p, perplexity); 153 // Symmetrize input similarities 154 for (var n = 0; n < noDatapoints; n++) { 155 for (var m = n + 1; m < noDatapoints; m++) { 156 p[n, m] += p[m, n]; 157 p[m, n] = p[n, m]; 158 } 159 } 160 var sumP = .0; 161 for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) sumP += p[i, j]; 162 for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] /= sumP; 163 } // Compute input similarities for approximate t-SNE 164 else { 165 // Compute asymmetric pairwise input similarities 166 ComputeGaussianPerplexity(data, noDatapoints, out rowP, out colP, out valP, perplexity, (int)(3 * perplexity)); 167 // Symmetrize input similarities 168 int[] sRowP, symColP; 169 double[] sValP; 170 SymmetrizeMatrix(rowP, colP, valP, out sRowP, out symColP, out sValP); 171 rowP = sRowP; 172 colP = symColP; 173 valP = sValP; 174 var sumP = .0; 175 for (var i = 0; i < rowP[noDatapoints]; i++) sumP += valP[i]; 176 for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] /= sumP; 177 } 156 var rand = new NormalDistributedRandom(random, 0, 1); 157 158 //Calculate Similarities 159 if (exact) p = CalculateExactSimilarites(data, perplexity); 160 else CalculateApproximateSimilarities(data, perplexity, out rowP, out colP, out valP); 178 161 179 162 // Lie about the P-values 180 if (exact) { 181 for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] *= 12.0; 182 } else { 183 for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] *= 12.0; 184 } 185 186 var rand = new NormalDistributedRandom(random, 0, 1); 163 if (exact) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] *= 12.0; 164 else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] *= 12.0; 165 187 166 // Initialize solution (randomly) 188 for (var i = 0; i < noDatapoints; i++) 189 for (var j = 0; j < newDimensions; j++) 190 newData[i, j] = rand.NextDouble() * .0001; 167 for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) newData[i, j] = rand.NextDouble() * .0001; 191 168 192 169 // Perform main training loop 193 for (var iter = 0; iter < maxIter; iter++) { 194 195 // Compute (approximate) gradient 170 for (var iter = 0; iter < maxIter && Running; iter++) { 196 171 if (exact) ComputeExactGradient(p, newData, noDatapoints, newDimensions, dY); 197 172 else ComputeGradient(rowP, colP, valP, newData, noDatapoints, newDimensions, dY, theta); 198 199 173 // Update gains 200 for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) 201 gains[i, j] = Math.Sign(dY[i, j]) != Math.Sign(uY[i, j]) ? gains[i, j] + .2 : gains[i, j] * .8; 174 for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) gains[i, j] = Math.Sign(dY[i, j]) != Math.Sign(uY[i, j]) ? gains[i, j] + .2 : gains[i, j] * .8; 202 175 for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) if (gains[i, j] < .01) gains[i, j] = .01; 203 204 176 // Perform gradient update (with momentum and gains) 205 177 for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) uY[i, j] = currentMomentum * uY[i, j] - eta * gains[i, j] * dY[i, j]; 206 178 for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) newData[i, j] = newData[i, j] + uY[i, j]; 207 208 179 // Make solution zero-mean 209 180 ZeroMean(newData); 210 211 181 // Stop lying about the P-values after a while, and switch momentum 212 182 if (iter == stopLyingIter) { 213 if (exact) { 214 for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] /= 12.0; 215 } else { 216 for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] /= 12.0; 217 } 183 if (exact) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] /= 12.0; 184 else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] /= 12.0; 218 185 } 219 186 if (iter == momSwitchIter) currentMomentum = finalMomentum; 220 187 221 if (results == null) continue; 222 var errors = new List<double>(); 223 // Print out progress 224 var c = exact 225 ? EvaluateError(p, newData, noDatapoints, newDimensions) 226 : EvaluateError(rowP, colP, valP, newData, theta); 227 errors.Add(c); 228 ((IntValue)results[IterationResultName].Value).Value = iter + 1; 229 ((DoubleValue)results[ErrorResultName].Value).Value = errors.Last(); 188 Analyze(exact, iter, p, rowP, colP, valP, newData, noDatapoints, newDimensions, theta); 230 189 } 231 190 return newData; … … 234 193 return new TSNE<TR>(distance, random).Run(data, newDimensions, perplexity, theta); 235 194 } 195 public static double[,] Run<TR>(TR[] data, int newDimensions, double perplexity, double theta, Func<TR, TR, double> distance, IRandom random) where TR : class, IDeepCloneable { 196 return new TSNE<TR>(new FuctionalDistance<TR>(distance), random).Run(data, newDimensions, perplexity, theta); 197 } 236 198 237 199 #region helpers 200 201 private void SetUpResults(IReadOnlyCollection<T> data) { 202 if (dataRowLookup == null) { 203 dataRowLookup = new Dictionary<string, List<int>>(); 204 dataRowLookup.Add("Data", Enumerable.Range(0, data.Count).ToList()); 205 } 206 if (results == null) return; 207 if (!results.ContainsKey(IterationResultName)) results.Add(new Result(IterationResultName, new IntValue(0))); 208 else ((IntValue)results[IterationResultName].Value).Value = 0; 209 210 if (!results.ContainsKey(ErrorResultName)) results.Add(new Result(ErrorResultName, new DoubleValue(0))); 211 else ((DoubleValue)results[ErrorResultName].Value).Value = 0; 212 213 if (!results.ContainsKey(ErrorPlotResultName)) results.Add(new Result(ErrorPlotResultName, new DataTable(ErrorPlotResultName, "Development of errors during Gradiant descent"))); 214 else results[ErrorPlotResultName].Value = new DataTable(ErrorPlotResultName, "Development of errors during Gradiant descent"); 215 216 var plot = results[ErrorPlotResultName].Value as DataTable; 217 if (plot == null) throw new ArgumentException("could not create/access Error-DataTable in Results-Collection"); 218 if (!plot.Rows.ContainsKey("errors")) { 219 plot.Rows.Add(new DataRow("errors")); 220 } 221 plot.Rows["errors"].Values.Clear(); 222 results.Add(new Result(ScatterPlotResultName, "Plot of the projected data", new ScatterPlot(DataResultName, ""))); 223 results.Add(new Result(DataResultName, "Projected Data", new DoubleMatrix())); 224 225 } 226 private void Analyze(bool exact, int iter, double[,] p, int[] rowP, int[] colP, double[] valP, double[,] newData, int noDatapoints, int newDimensions, double theta) { 227 if (results == null) return; 228 var plot = results[ErrorPlotResultName].Value as DataTable; 229 if (plot == null) throw new ArgumentException("Could not create/access Error-DataTable in Results-Collection. Was it removed by some effect?"); 230 var errors = plot.Rows["errors"].Values; 231 var c = exact 232 ? EvaluateError(p, newData, noDatapoints, newDimensions) 233 : EvaluateError(rowP, colP, valP, newData, theta); 234 errors.Add(c); 235 ((IntValue)results[IterationResultName].Value).Value = iter + 1; 236 ((DoubleValue)results[ErrorResultName].Value).Value = errors.Last(); 237 238 var ndata = Normalize(newData); 239 results[DataResultName].Value = new DoubleMatrix(ndata); 240 var splot = results[ScatterPlotResultName].Value as ScatterPlot; 241 FillScatterPlot(ndata, splot); 242 243 244 } 245 private void FillScatterPlot(double[,] lowDimData, ScatterPlot plot) { 246 foreach (var rowName in dataRowLookup.Keys) { 247 if (!plot.Rows.ContainsKey(rowName)) { 248 plot.Rows.Add(dataRows.ContainsKey(rowName) ? dataRows[rowName] : new ScatterPlotDataRow(rowName, "", new List<Point2D<double>>())); 249 } else plot.Rows[rowName].Points.Clear(); 250 plot.Rows[rowName].Points.AddRange(dataRowLookup[rowName].Select(i => new Point2D<double>(lowDimData[i, 0], lowDimData[i, 1]))); 251 } 252 } 253 private static double[,] Normalize(double[,] data) { 254 var max = new double[data.GetLength(1)]; 255 var min = new double[data.GetLength(1)]; 256 var res = new double[data.GetLength(0), data.GetLength(1)]; 257 for (var i = 0; i < max.Length; i++) max[i] = min[i] = data[0, i]; 258 for (var i = 0; i < data.GetLength(0); i++) 259 for (var j = 0; j < data.GetLength(1); j++) { 260 var v = data[i, j]; 261 max[j] = Math.Max(max[j], v); 262 min[j] = Math.Min(min[j], v); 263 } 264 for (var i = 0; i < data.GetLength(0); i++) { 265 for (var j = 0; j < data.GetLength(1); j++) { 266 res[i, j] = (data[i, j] - (max[j] + min[j]) / 2) / (max[j] - min[j]); 267 } 268 } 269 return res; 270 } 271 private void CalculateApproximateSimilarities(T[] data, double perplexity, out int[] rowP, out int[] colP, out double[] valP) { 272 // Compute asymmetric pairwise input similarities 273 ComputeGaussianPerplexity(data, data.Length, out rowP, out colP, out valP, perplexity, (int)(3 * perplexity)); 274 // Symmetrize input similarities 275 int[] sRowP, symColP; 276 double[] sValP; 277 SymmetrizeMatrix(rowP, colP, valP, out sRowP, out symColP, out sValP); 278 rowP = sRowP; 279 colP = symColP; 280 valP = sValP; 281 var sumP = .0; 282 for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i]; 283 for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP; 284 } 285 private double[,] CalculateExactSimilarites(T[] data, double perplexity) { 286 // Compute similarities 287 var p = new double[data.Length, data.Length]; 288 ComputeGaussianPerplexity(data, data.Length, p, perplexity); 289 // Symmetrize input similarities 290 for (var n = 0; n < data.Length; n++) { 291 for (var m = n + 1; m < data.Length; m++) { 292 p[n, m] += p[m, n]; 293 p[m, n] = p[n, m]; 294 } 295 } 296 var sumP = .0; 297 for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j]; 298 for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP; 299 return p; 300 } 301 238 302 private void ComputeGaussianPerplexity(IReadOnlyList<T> x, int n, out int[] rowP, out int[] colP, out double[] valP, double perplexity, int k) { 239 303 if (perplexity > k) throw new ArgumentException("Perplexity should be lower than K!"); -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAnalysis.cs
r14414 r14512 20 20 #endregion 21 21 22 using System; 22 using System.Collections.Generic; 23 using System.Drawing; 23 24 using System.Linq; 24 25 using HeuristicLab.Analysis; … … 27 28 using HeuristicLab.Data; 28 29 using HeuristicLab.Encodings.RealVectorEncoding; 29 using HeuristicLab.Optimization;30 30 using HeuristicLab.Parameters; 31 31 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; … … 60 60 private const string SetSeedRandomlyParameterName = "SetSeedRandomly"; 61 61 private const string SeedParameterName = "Seed"; 62 private const string ClassesParameterName = "ClassNames"; 62 63 #endregion 63 64 … … 67 68 get { return Parameters[PerplexityParameterName] as IFixedValueParameter<DoubleValue>; } 68 69 } 69 public IFixedValueParameter<DoubleValue> ThetaParameter70 { 71 get { return Parameters[ThetaParameterName] as IFixedValueParameter<DoubleValue>; }70 public OptionalValueParameter<DoubleValue> ThetaParameter 71 { 72 get { return Parameters[ThetaParameterName] as OptionalValueParameter<DoubleValue>; } 72 73 } 73 74 public IFixedValueParameter<IntValue> NewDimensionsParameter … … 110 111 { 111 112 get { return Parameters[SeedParameterName] as IFixedValueParameter<IntValue>; } 113 } 114 public IFixedValueParameter<StringValue> ClassesParameter 115 { 116 get { return Parameters[ClassesParameterName] as IFixedValueParameter<StringValue>; } 112 117 } 113 118 #endregion … … 124 129 public double Theta 125 130 { 126 get { return ThetaParameter.Value .Value; }131 get { return ThetaParameter.Value == null ? 0 : ThetaParameter.Value.Value; } 127 132 } 128 133 public int NewDimensions … … 152 157 public double Eta 153 158 { 154 get { return EtaParameter.Value.Value; } 159 get 160 { 161 return EtaParameter.Value == null ? 0 : EtaParameter.Value.Value; 162 } 155 163 } 156 164 public bool SetSeedRandomly … … 162 170 get { return (uint)SeedParameter.Value.Value; } 163 171 } 172 public string Classes 173 { 174 get { return ClassesParameter.Value.Value; } 175 } 176 177 [Storable] 178 public TSNE<RealVector> tsne; 164 179 #endregion 165 180 … … 172 187 Problem = new RegressionProblem(); 173 188 Parameters.Add(new ValueParameter<IDistance<RealVector>>(DistanceParameterName, "The distance function used to differentiate similar from non-similar points", new EuclidianDistance())); 174 Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-Parameter of TSNE. Comparable to k in a k-nearest neighbour algorithm ", new DoubleValue(25)));175 Parameters.Add(new FixedValueParameter<DoubleValue>(ThetaParameterName, "Value describing how much appoximated gradients my differ from exact gradients. Set to 0 for exact calculation", new DoubleValue(0.1)));189 Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-Parameter of TSNE. Comparable to k in a k-nearest neighbour algorithm. Recommended Value is Floor(number of points /3) or lower", new DoubleValue(25))); 190 Parameters.Add(new OptionalValueParameter<DoubleValue>(ThetaParameterName, "Value describing how much appoximated gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise \n CAUTION: exact calculation of forces requires building a non-sparse N*N matrix where N is the number of data points\n This may exceed memory limitations", new DoubleValue(0.1))); 176 191 Parameters.Add(new FixedValueParameter<IntValue>(NewDimensionsParameterName, "Dimensionality of projected space (usually 2 for easy visual analysis", new IntValue(2))); 177 192 Parameters.Add(new FixedValueParameter<IntValue>(MaxIterationsParameterName, "Maximum number of iterations for gradient descent", new IntValue(1000))); … … 183 198 Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "If the seed should be random", new BoolValue(true))); 184 199 Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The seed used if it should not be random", new IntValue(0))); 200 Parameters.Add(new FixedValueParameter<StringValue>(ClassesParameterName, "name of the column specifying the class lables of each data point. \n if the lable column can not be found Training/Test is used as labels", new StringValue("none"))); 185 201 } 186 202 #endregion 187 203 188 204 protected override void Run() { 189 var lowDimData = new DoubleMatrix(GetProjectedData(Problem.ProblemData)); 190 Results.Add(new Result(ScatterPlotResultName, "Plot of the projected data", CreateScatterPlot(lowDimData, Problem.ProblemData))); 191 Results.Add(new Result(DataResultName, "Projected Data", lowDimData)); 192 } 193 194 private ScatterPlot CreateScatterPlot(DoubleMatrix lowDimData, IDataAnalysisProblemData problemData) { 195 var plot = new ScatterPlot(DataResultName, ""); 196 Normalize(lowDimData); 197 plot.Rows.Add(new ScatterPlotDataRow("Training", "Points of the training set", problemData.TrainingIndices.Select(i => new Point2D<double>(lowDimData[i, 0], lowDimData[i, 1])))); 198 plot.Rows.Add(new ScatterPlotDataRow("Test", "Points of the test set", problemData.TestIndices.Select(i => new Point2D<double>(lowDimData[i, 0], lowDimData[i, 1])))); 199 return plot; 200 } 201 202 private double[,] GetProjectedData(IDataAnalysisProblemData problemData) { 205 var data = CalculateProjectedData(Problem.ProblemData); 206 var lowDimData = new DoubleMatrix(data); 207 } 208 209 public override void Stop() { 210 base.Stop(); 211 if (tsne != null) tsne.Running = false; 212 } 213 214 private double[,] CalculateProjectedData(IDataAnalysisProblemData problemData) { 215 var DataRowNames = new Dictionary<string, List<int>>(); 216 var rows = new Dictionary<string, ScatterPlotDataRow>(); 217 218 if (problemData.Dataset.VariableNames.Contains(Classes)) { 219 if ((problemData.Dataset as Dataset).VariableHasType<string>(Classes)) { 220 var classes = problemData.Dataset.GetStringValues(Classes).ToArray(); 221 for (int i = 0; i < classes.Length; i++) { 222 if (!DataRowNames.ContainsKey(classes[i])) DataRowNames.Add(classes[i], new List<int>()); 223 DataRowNames[classes[i]].Add(i); //always succeeds 224 } 225 } else if ((problemData.Dataset as Dataset).VariableHasType<double>(Classes)) { 226 var classValues = problemData.Dataset.GetDoubleValues(Classes).ToArray(); 227 var max = classValues.Max() + 0.1; 228 var min = classValues.Min() - 0.1; 229 var contours = 8; 230 for (int i = 0; i < contours; i++) { 231 var name = GetContourName(i, min, max, contours); 232 DataRowNames.Add(name, new List<int>()); 233 rows.Add(name, new ScatterPlotDataRow(name, "", new List<Point2D<double>>())); 234 rows[name].VisualProperties.Color = GetHeatMapColor(i, contours); 235 rows[name].VisualProperties.PointSize = i+3; 236 } 237 for (int i = 0; i < classValues.Length; i++) { 238 DataRowNames[GetContourName(classValues[i], min, max, contours)].Add(i); //always succeeds 239 } 240 241 } 242 243 244 } else { 245 DataRowNames.Add("Training", problemData.TrainingIndices.ToList()); 246 DataRowNames.Add("Test", problemData.TestIndices.ToList()); 247 } 248 203 249 var random = SetSeedRandomly ? new MersenneTwister() : new MersenneTwister(Seed); 204 var tsne = new TSNE<RealVector>(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta);250 tsne = new TSNE<RealVector>(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta, DataRowNames, rows); 205 251 var dataset = problemData.Dataset; 206 252 var allowedInputVariables = problemData.AllowedInputVariables.ToArray(); … … 210 256 } 211 257 212 private static void Normalize(DoubleMatrix data) { 213 var max = new double[data.Columns]; 214 var min = new double[data.Columns]; 215 for (var i = 0; i < max.Length; i++) max[i] = min[i] = data[0, i]; 216 for (var i = 0; i < data.Rows; i++) 217 for (var j = 0; j < data.Columns; j++) { 218 var v = data[i, j]; 219 max[j] = Math.Max(max[j], v); 220 min[j] = Math.Min(min[j], v); 221 } 222 for (var i = 0; i < data.Rows; i++) { 223 for (var j = 0; j < data.Columns; j++) { 224 data[i, j] = (data[i, j] - (max[j] + min[j]) / 2) / (max[j] - min[j]); 225 } 226 } 227 228 } 258 private static Color GetHeatMapColor(int contourNr, int noContours) { 259 var q = (double)contourNr / noContours; // q in [0,1] 260 var c = q < 0.5 ? Color.FromArgb((int)(q * 2 * 255), 255, 0) : Color.FromArgb(255, (int)((1 - q) * 2 * 255), 0); 261 return c; 262 } 263 private static string GetContourName(double value, double min, double max, int noContours) { 264 var size = (max - min) / noContours; 265 var contourNr = (int)((value - min) / size); 266 return GetContourName(contourNr, min, max, noContours); 267 } 268 private static string GetContourName(int i, double min, double max, int noContours) { 269 var size = (max - min) / noContours; 270 return "[" + (min + i * size) + ";" + (min + (i + 1) * size) + ")"; 271 } 272 229 273 } 230 274 }
Note: See TracChangeset
for help on using the changeset viewer.