- Timestamp:
- 12/20/16 15:50:11 (7 years ago)
- Location:
- branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4
- Property svn:ignore
-
old new 1 *.user 2 *.vs10x 3 .vs 4 HeuristicLab.Algorithms.DataAnalysis-3.4.csproj.user 5 HeuristicLabAlgorithmsDataAnalysisPlugin.cs 6 Plugin.cs 1 7 bin 2 8 obj 3 HeuristicLabAlgorithmsDataAnalysisPlugin.cs4 HeuristicLab.Algorithms.DataAnalysis-3.4.csproj.user5 *.vs10x6 Plugin.cs7 *.user8 .vs
-
- Property svn:ignore
-
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAnalysis.cs
r14414 r14512 20 20 #endregion 21 21 22 using System; 22 using System.Collections.Generic; 23 using System.Drawing; 23 24 using System.Linq; 24 25 using HeuristicLab.Analysis; … … 27 28 using HeuristicLab.Data; 28 29 using HeuristicLab.Encodings.RealVectorEncoding; 29 using HeuristicLab.Optimization;30 30 using HeuristicLab.Parameters; 31 31 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; … … 60 60 private const string SetSeedRandomlyParameterName = "SetSeedRandomly"; 61 61 private const string SeedParameterName = "Seed"; 62 private const string ClassesParameterName = "ClassNames"; 62 63 #endregion 63 64 … … 67 68 get { return Parameters[PerplexityParameterName] as IFixedValueParameter<DoubleValue>; } 68 69 } 69 public IFixedValueParameter<DoubleValue> ThetaParameter70 { 71 get { return Parameters[ThetaParameterName] as IFixedValueParameter<DoubleValue>; }70 public OptionalValueParameter<DoubleValue> ThetaParameter 71 { 72 get { return Parameters[ThetaParameterName] as OptionalValueParameter<DoubleValue>; } 72 73 } 73 74 public IFixedValueParameter<IntValue> NewDimensionsParameter … … 110 111 { 111 112 get { return Parameters[SeedParameterName] as IFixedValueParameter<IntValue>; } 113 } 114 public IFixedValueParameter<StringValue> ClassesParameter 115 { 116 get { return Parameters[ClassesParameterName] as IFixedValueParameter<StringValue>; } 112 117 } 113 118 #endregion … … 124 129 public double Theta 125 130 { 126 get { return ThetaParameter.Value .Value; }131 get { return ThetaParameter.Value == null ? 0 : ThetaParameter.Value.Value; } 127 132 } 128 133 public int NewDimensions … … 152 157 public double Eta 153 158 { 154 get { return EtaParameter.Value.Value; } 159 get 160 { 161 return EtaParameter.Value == null ? 0 : EtaParameter.Value.Value; 162 } 155 163 } 156 164 public bool SetSeedRandomly … … 162 170 get { return (uint)SeedParameter.Value.Value; } 163 171 } 172 public string Classes 173 { 174 get { return ClassesParameter.Value.Value; } 175 } 176 177 [Storable] 178 public TSNE<RealVector> tsne; 164 179 #endregion 165 180 … … 172 187 Problem = new RegressionProblem(); 173 188 Parameters.Add(new ValueParameter<IDistance<RealVector>>(DistanceParameterName, "The distance function used to differentiate similar from non-similar points", new EuclidianDistance())); 174 Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-Parameter of TSNE. Comparable to k in a k-nearest neighbour algorithm ", new DoubleValue(25)));175 Parameters.Add(new FixedValueParameter<DoubleValue>(ThetaParameterName, "Value describing how much appoximated gradients my differ from exact gradients. Set to 0 for exact calculation", new DoubleValue(0.1)));189 Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-Parameter of TSNE. Comparable to k in a k-nearest neighbour algorithm. Recommended Value is Floor(number of points /3) or lower", new DoubleValue(25))); 190 Parameters.Add(new OptionalValueParameter<DoubleValue>(ThetaParameterName, "Value describing how much appoximated gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise \n CAUTION: exact calculation of forces requires building a non-sparse N*N matrix where N is the number of data points\n This may exceed memory limitations", new DoubleValue(0.1))); 176 191 Parameters.Add(new FixedValueParameter<IntValue>(NewDimensionsParameterName, "Dimensionality of projected space (usually 2 for easy visual analysis", new IntValue(2))); 177 192 Parameters.Add(new FixedValueParameter<IntValue>(MaxIterationsParameterName, "Maximum number of iterations for gradient descent", new IntValue(1000))); … … 183 198 Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "If the seed should be random", new BoolValue(true))); 184 199 Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The seed used if it should not be random", new IntValue(0))); 200 Parameters.Add(new FixedValueParameter<StringValue>(ClassesParameterName, "name of the column specifying the class lables of each data point. \n if the lable column can not be found Training/Test is used as labels", new StringValue("none"))); 185 201 } 186 202 #endregion 187 203 188 204 protected override void Run() { 189 var lowDimData = new DoubleMatrix(GetProjectedData(Problem.ProblemData)); 190 Results.Add(new Result(ScatterPlotResultName, "Plot of the projected data", CreateScatterPlot(lowDimData, Problem.ProblemData))); 191 Results.Add(new Result(DataResultName, "Projected Data", lowDimData)); 192 } 193 194 private ScatterPlot CreateScatterPlot(DoubleMatrix lowDimData, IDataAnalysisProblemData problemData) { 195 var plot = new ScatterPlot(DataResultName, ""); 196 Normalize(lowDimData); 197 plot.Rows.Add(new ScatterPlotDataRow("Training", "Points of the training set", problemData.TrainingIndices.Select(i => new Point2D<double>(lowDimData[i, 0], lowDimData[i, 1])))); 198 plot.Rows.Add(new ScatterPlotDataRow("Test", "Points of the test set", problemData.TestIndices.Select(i => new Point2D<double>(lowDimData[i, 0], lowDimData[i, 1])))); 199 return plot; 200 } 201 202 private double[,] GetProjectedData(IDataAnalysisProblemData problemData) { 205 var data = CalculateProjectedData(Problem.ProblemData); 206 var lowDimData = new DoubleMatrix(data); 207 } 208 209 public override void Stop() { 210 base.Stop(); 211 if (tsne != null) tsne.Running = false; 212 } 213 214 private double[,] CalculateProjectedData(IDataAnalysisProblemData problemData) { 215 var DataRowNames = new Dictionary<string, List<int>>(); 216 var rows = new Dictionary<string, ScatterPlotDataRow>(); 217 218 if (problemData.Dataset.VariableNames.Contains(Classes)) { 219 if ((problemData.Dataset as Dataset).VariableHasType<string>(Classes)) { 220 var classes = problemData.Dataset.GetStringValues(Classes).ToArray(); 221 for (int i = 0; i < classes.Length; i++) { 222 if (!DataRowNames.ContainsKey(classes[i])) DataRowNames.Add(classes[i], new List<int>()); 223 DataRowNames[classes[i]].Add(i); //always succeeds 224 } 225 } else if ((problemData.Dataset as Dataset).VariableHasType<double>(Classes)) { 226 var classValues = problemData.Dataset.GetDoubleValues(Classes).ToArray(); 227 var max = classValues.Max() + 0.1; 228 var min = classValues.Min() - 0.1; 229 var contours = 8; 230 for (int i = 0; i < contours; i++) { 231 var name = GetContourName(i, min, max, contours); 232 DataRowNames.Add(name, new List<int>()); 233 rows.Add(name, new ScatterPlotDataRow(name, "", new List<Point2D<double>>())); 234 rows[name].VisualProperties.Color = GetHeatMapColor(i, contours); 235 rows[name].VisualProperties.PointSize = i+3; 236 } 237 for (int i = 0; i < classValues.Length; i++) { 238 DataRowNames[GetContourName(classValues[i], min, max, contours)].Add(i); //always succeeds 239 } 240 241 } 242 243 244 } else { 245 DataRowNames.Add("Training", problemData.TrainingIndices.ToList()); 246 DataRowNames.Add("Test", problemData.TestIndices.ToList()); 247 } 248 203 249 var random = SetSeedRandomly ? new MersenneTwister() : new MersenneTwister(Seed); 204 var tsne = new TSNE<RealVector>(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta);250 tsne = new TSNE<RealVector>(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta, DataRowNames, rows); 205 251 var dataset = problemData.Dataset; 206 252 var allowedInputVariables = problemData.AllowedInputVariables.ToArray(); … … 210 256 } 211 257 212 private static void Normalize(DoubleMatrix data) { 213 var max = new double[data.Columns]; 214 var min = new double[data.Columns]; 215 for (var i = 0; i < max.Length; i++) max[i] = min[i] = data[0, i]; 216 for (var i = 0; i < data.Rows; i++) 217 for (var j = 0; j < data.Columns; j++) { 218 var v = data[i, j]; 219 max[j] = Math.Max(max[j], v); 220 min[j] = Math.Min(min[j], v); 221 } 222 for (var i = 0; i < data.Rows; i++) { 223 for (var j = 0; j < data.Columns; j++) { 224 data[i, j] = (data[i, j] - (max[j] + min[j]) / 2) / (max[j] - min[j]); 225 } 226 } 227 228 } 258 private static Color GetHeatMapColor(int contourNr, int noContours) { 259 var q = (double)contourNr / noContours; // q in [0,1] 260 var c = q < 0.5 ? Color.FromArgb((int)(q * 2 * 255), 255, 0) : Color.FromArgb(255, (int)((1 - q) * 2 * 255), 0); 261 return c; 262 } 263 private static string GetContourName(double value, double min, double max, int noContours) { 264 var size = (max - min) / noContours; 265 var contourNr = (int)((value - min) / size); 266 return GetContourName(contourNr, min, max, noContours); 267 } 268 private static string GetContourName(int i, double min, double max, int noContours) { 269 var size = (max - min) / noContours; 270 return "[" + (min + i * size) + ";" + (min + (i + 1) * size) + ")"; 271 } 272 229 273 } 230 274 }
Note: See TracChangeset
for help on using the changeset viewer.