Changeset 14859
- Timestamp:
- 04/13/17 13:59:42 (8 years ago)
- Location:
- branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r14836 r14859 54 54 <DebugType>pdbonly</DebugType> 55 55 <Optimize>true</Optimize> 56 <OutputPath> $(SolutionDir)\bin\</OutputPath>56 <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath> 57 57 <DefineConstants>TRACE</DefineConstants> 58 58 <ErrorReport>prompt</ErrorReport> … … 65 65 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' "> 66 66 <DebugSymbols>true</DebugSymbols> 67 <OutputPath> $(SolutionDir)\bin\</OutputPath>67 <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath> 68 68 <DefineConstants>DEBUG;TRACE</DefineConstants> 69 69 <DebugType>full</DebugType> … … 74 74 </PropertyGroup> 75 75 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' "> 76 <OutputPath> $(SolutionDir)\bin\</OutputPath>76 <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath> 77 77 <DefineConstants>TRACE</DefineConstants> 78 78 <DocumentationFile> … … 87 87 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' "> 88 88 <DebugSymbols>true</DebugSymbols> 89 <OutputPath> $(SolutionDir)\bin\</OutputPath>89 <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath> 90 90 <DefineConstants>DEBUG;TRACE</DefineConstants> 91 91 <DebugType>full</DebugType> … … 96 96 </PropertyGroup> 97 97 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' "> 98 <OutputPath> $(SolutionDir)\bin\</OutputPath>98 <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath> 99 99 <DefineConstants>TRACE</DefineConstants> 100 100 <DocumentationFile> -
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAlgorithm.cs
r14855 r14859 71 71 private const string ClassesParameterName = "ClassNames"; 72 72 private const string NormalizationParameterName = "Normalization"; 73 private const string UpdateIntervalParameterName = "UpdateInterval"; 73 74 #endregion 74 75 … … 124 125 get { return Parameters[NormalizationParameterName] as IFixedValueParameter<BoolValue>; } 125 126 } 127 public IFixedValueParameter<IntValue> UpdateIntervalParameter { 128 get { return Parameters[UpdateIntervalParameterName] as IFixedValueParameter<IntValue>; } 129 } 126 130 #endregion 127 131 … … 182 186 set { NormalizationParameter.Value.Value = value; } 183 187 } 188 189 public int UpdateInterval { 190 get { return UpdateIntervalParameter.Value.Value; } 191 set { UpdateIntervalParameter.Value.Value = value; } 192 } 184 193 #endregion 185 194 … … 191 200 this.dataRowNames = new Dictionary<string, List<int>>(original.dataRowNames); 192 201 this.dataRows = original.dataRows.ToDictionary(kvp => kvp.Key, kvp => cloner.Clone(kvp.Value)); 193 if (original.state != null)202 if (original.state != null) 194 203 this.state = cloner.Clone(original.state); 195 204 this.iter = original.iter; … … 217 226 Parameters.Add(new FixedValueParameter<StringValue>(ClassesParameterName, "name of the column specifying the class lables of each data point. If the label column can not be found training/test is used as labels.", new StringValue("none"))); 218 227 Parameters.Add(new FixedValueParameter<BoolValue>(NormalizationParameterName, "Whether the data should be zero centered and have variance of 1 for each variable, so different scalings are ignored.", new BoolValue(true))); 228 Parameters.Add(new FixedValueParameter<IntValue>(UpdateIntervalParameterName, "", new IntValue(50))); 229 Parameters[UpdateIntervalParameterName].Hidden = true; 219 230 220 231 MomentumSwitchIterationParameter.Hidden = true; … … 245 256 var problemData = Problem.ProblemData; 246 257 // set up and initialized everything if necessary 247 if (state == null) {248 if (SetSeedRandomly) Seed = new System.Random().Next();258 if (state == null) { 259 if (SetSeedRandomly) Seed = new System.Random().Next(); 249 260 var random = new MersenneTwister((uint)Seed); 250 261 var dataset = problemData.Dataset; 251 262 var allowedInputVariables = problemData.AllowedInputVariables.ToArray(); 252 263 var data = new double[dataset.Rows][]; 253 for (var row = 0; row < dataset.Rows; row++)264 for (var row = 0; row < dataset.Rows; row++) 254 265 data[row] = allowedInputVariables.Select(col => dataset.GetDoubleValue(col, row)).ToArray(); 255 266 256 if (Normalization) data = NormalizeData(data);267 if (Normalization) data = NormalizeData(data); 257 268 258 269 state = TSNEStatic<double[]>.CreateState(data, Distance, random, NewDimensions, Perplexity, Theta, … … 262 273 iter = 0; 263 274 } 264 for(; iter < MaxIterations && !cancellationToken.IsCancellationRequested; iter++) { 275 for (; iter < MaxIterations && !cancellationToken.IsCancellationRequested; iter++) { 276 if (iter % UpdateInterval == 0) 277 Analyze(state); 265 278 TSNEStatic<double[]>.Iterate(state); 266 Analyze(state);267 }279 } 280 Analyze(state); 268 281 } 269 282 270 283 private void SetUpResults(IReadOnlyCollection<double[]> data) { 271 if (Results == null) return;284 if (Results == null) return; 272 285 var results = Results; 273 286 dataRowNames = new Dictionary<string, List<int>>(); … … 276 289 277 290 //color datapoints acording to classes variable (be it double or string) 278 if (problemData.Dataset.VariableNames.Contains(Classes)) {279 if ((problemData.Dataset as Dataset).VariableHasType<string>(Classes)) {291 if (problemData.Dataset.VariableNames.Contains(Classes)) { 292 if ((problemData.Dataset as Dataset).VariableHasType<string>(Classes)) { 280 293 var classes = problemData.Dataset.GetStringValues(Classes).ToArray(); 281 for (var i = 0; i < classes.Length; i++) {282 if (!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List<int>());294 for (var i = 0; i < classes.Length; i++) { 295 if (!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List<int>()); 283 296 dataRowNames[classes[i]].Add(i); 284 297 } 285 } else if ((problemData.Dataset as Dataset).VariableHasType<double>(Classes)) {298 } else if ((problemData.Dataset as Dataset).VariableHasType<double>(Classes)) { 286 299 var classValues = problemData.Dataset.GetDoubleValues(Classes).ToArray(); 287 var max = classValues.Max() + 0.1; 300 var max = classValues.Max() + 0.1; 288 301 var min = classValues.Min() - 0.1; 289 302 const int contours = 8; 290 for (var i = 0; i < contours; i++) {303 for (var i = 0; i < contours; i++) { 291 304 var contourname = GetContourName(i, min, max, contours); 292 305 dataRowNames.Add(contourname, new List<int>()); … … 295 308 dataRows[contourname].VisualProperties.PointSize = i + 3; 296 309 } 297 for (var i = 0; i < classValues.Length; i++) {310 for (var i = 0; i < classValues.Length; i++) { 298 311 dataRowNames[GetContourName(classValues[i], min, max, contours)].Add(i); 299 312 } … … 304 317 } 305 318 306 if (!results.ContainsKey(IterationResultName)) results.Add(new Result(IterationResultName, new IntValue(0)));319 if (!results.ContainsKey(IterationResultName)) results.Add(new Result(IterationResultName, new IntValue(0))); 307 320 else ((IntValue)results[IterationResultName].Value).Value = 0; 308 321 309 if (!results.ContainsKey(ErrorResultName)) results.Add(new Result(ErrorResultName, new DoubleValue(0)));322 if (!results.ContainsKey(ErrorResultName)) results.Add(new Result(ErrorResultName, new DoubleValue(0))); 310 323 else ((DoubleValue)results[ErrorResultName].Value).Value = 0; 311 324 312 if (!results.ContainsKey(ErrorPlotResultName)) results.Add(new Result(ErrorPlotResultName, new DataTable(ErrorPlotResultName, "Development of errors during gradient descent")));325 if (!results.ContainsKey(ErrorPlotResultName)) results.Add(new Result(ErrorPlotResultName, new DataTable(ErrorPlotResultName, "Development of errors during gradient descent"))); 313 326 else results[ErrorPlotResultName].Value = new DataTable(ErrorPlotResultName, "Development of errors during gradient descent"); 314 327 315 328 var plot = results[ErrorPlotResultName].Value as DataTable; 316 if (plot == null) throw new ArgumentException("could not create/access error data table in results collection");317 318 if (!plot.Rows.ContainsKey("errors")) plot.Rows.Add(new DataRow("errors"));329 if (plot == null) throw new ArgumentException("could not create/access error data table in results collection"); 330 331 if (!plot.Rows.ContainsKey("errors")) plot.Rows.Add(new DataRow("errors")); 319 332 plot.Rows["errors"].Values.Clear(); 333 plot.Rows["errors"].VisualProperties.StartIndexZero = true; 320 334 321 335 results.Add(new Result(ScatterPlotResultName, "Plot of the projected data", new ScatterPlot(DataResultName, ""))); … … 324 338 325 339 private void Analyze(TSNEStatic<double[]>.TSNEState tsneState) { 326 if (Results == null) return;340 if (Results == null) return; 327 341 var results = Results; 328 342 var plot = results[ErrorPlotResultName].Value as DataTable; 329 if (plot == null) throw new ArgumentException("Could not create/access error data table in results collection.");343 if (plot == null) throw new ArgumentException("Could not create/access error data table in results collection."); 330 344 var errors = plot.Rows["errors"].Values; 331 345 var c = tsneState.EvaluateError(); … … 341 355 342 356 private void FillScatterPlot(double[,] lowDimData, ScatterPlot plot) { 343 foreach (var rowName in dataRowNames.Keys) {344 if (!plot.Rows.ContainsKey(rowName))357 foreach (var rowName in dataRowNames.Keys) { 358 if (!plot.Rows.ContainsKey(rowName)) 345 359 plot.Rows.Add(dataRows.ContainsKey(rowName) ? dataRows[rowName] : new ScatterPlotDataRow(rowName, "", new List<Point2D<double>>())); 346 360 plot.Rows[rowName].Points.Replace(dataRowNames[rowName].Select(i => new Point2D<double>(lowDimData[i, 0], lowDimData[i, 1]))); … … 352 366 var min = new double[data.GetLength(1)]; 353 367 var res = new double[data.GetLength(0), data.GetLength(1)]; 354 for (var i = 0; i < max.Length; i++) max[i] = min[i] = data[0, i];355 for (var i = 0; i < data.GetLength(0); i++)356 for (var j = 0; j < data.GetLength(1); j++) {368 for (var i = 0; i < max.Length; i++) max[i] = min[i] = data[0, i]; 369 for (var i = 0; i < data.GetLength(0); i++) 370 for (var j = 0; j < data.GetLength(1); j++) { 357 371 var v = data[i, j]; 358 372 max[j] = Math.Max(max[j], v); 359 373 min[j] = Math.Min(min[j], v); 360 374 } 361 for (var i = 0; i < data.GetLength(0); i++) {362 for (var j = 0; j < data.GetLength(1); j++) {375 for (var i = 0; i < data.GetLength(0); i++) { 376 for (var j = 0; j < data.GetLength(1); j++) { 363 377 res[i, j] = (data[i, j] - (max[j] + min[j]) / 2) / (max[j] - min[j]); 364 378 } … … 368 382 369 383 private static double[][] NormalizeData(IReadOnlyList<double[]> data) { 384 // as in tSNE implementation by van der Maaten 370 385 var n = data[0].Length; 371 386 var mean = new double[n]; 372 var sd= new double[n];387 var max = new double[n]; 373 388 var nData = new double[data.Count][]; 374 for(var i = 0; i < n; i++) { 375 var i1 = i; 376 sd[i] = Enumerable.Range(0, data.Count).Select(x => data[x][i1]).StandardDeviation(); 377 mean[i] = Enumerable.Range(0, data.Count).Select(x => data[x][i1]).Average(); 378 } 379 for(var i = 0; i < data.Count; i++) { 389 for (var i = 0; i < n; i++) { 390 mean[i] = Enumerable.Range(0, data.Count).Select(x => data[x][i]).Average(); 391 max[i] = Enumerable.Range(0, data.Count).Max(x => Math.Abs(data[x][i])); 392 } 393 for (var i = 0; i < data.Count; i++) { 380 394 nData[i] = new double[n]; 381 for (var j = 0; j < n; j++) nData[i][j] = (data[i][j] - mean[j]) / sd[j];395 for (var j = 0; j < n; j++) nData[i][j] = (data[i][j] - mean[j]) / max[j]; 382 396 } 383 397 return nData;
Note: See TracChangeset
for help on using the changeset viewer.