Free cookie consent management tool by TermsFeed Policy Generator

source: branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAnalysis.cs @ 14503

Last change on this file since 14503 was 14414, checked in by bwerth, 7 years ago

#2700 forgot to add files

File size: 10.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.RealVectorEncoding;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.Problems.DataAnalysis;
33using HeuristicLab.Random;
34
35namespace HeuristicLab.Algorithms.DataAnalysis {
36  /// <summary>
37  /// Linear regression data analysis algorithm.
38  /// </summary>
39  [Item("TSNE", "t-distributed stochastic neighbourhood embedding projects the data in a low dimensional space to allow visual cluster identification")]
40  [Creatable(CreatableAttribute.Categories.DataAnalysis, Priority = 100)]
41  [StorableClass]
42  public sealed class TSNEAnalysis : FixedDataAnalysisAlgorithm<IRegressionProblem> {
43
44    #region Resultnames
45    private const string ScatterPlotResultName = "Scatterplot";
46    private const string DataResultName = "Projected Data";
47    #endregion
48
49    #region Parameternames
50    private const string DistanceParameterName = "DistanceFunction";
51    private const string PerplexityParameterName = "Perplexity";
52    private const string ThetaParameterName = "Theta";
53    private const string NewDimensionsParameterName = "Dimensions";
54    private const string MaxIterationsParameterName = "MaxIterations";
55    private const string StopLyingIterationParameterName = "StopLyingIteration";
56    private const string MomentumSwitchIterationParameterName = "MomentumSwitchIteration";
57    private const string InitialMomentumParameterName = "InitialMomentum";
58    private const string FinalMomentumParameterName = "FinalMomentum";
59    private const string EtaParameterName = "Eta";
60    private const string SetSeedRandomlyParameterName = "SetSeedRandomly";
61    private const string SeedParameterName = "Seed";
62    #endregion
63
64    #region Parameterproperties
65    public IFixedValueParameter<DoubleValue> PerplexityParameter
66    {
67      get { return Parameters[PerplexityParameterName] as IFixedValueParameter<DoubleValue>; }
68    }
69    public IFixedValueParameter<DoubleValue> ThetaParameter
70    {
71      get { return Parameters[ThetaParameterName] as IFixedValueParameter<DoubleValue>; }
72    }
73    public IFixedValueParameter<IntValue> NewDimensionsParameter
74    {
75      get { return Parameters[NewDimensionsParameterName] as IFixedValueParameter<IntValue>; }
76    }
77    public IValueParameter<IDistance<RealVector>> DistanceParameter
78    {
79      get { return Parameters[DistanceParameterName] as IValueParameter<IDistance<RealVector>>; }
80    }
81    public IFixedValueParameter<IntValue> MaxIterationsParameter
82    {
83      get { return Parameters[MaxIterationsParameterName] as IFixedValueParameter<IntValue>; }
84    }
85    public IFixedValueParameter<IntValue> StopLyingIterationParameter
86    {
87      get { return Parameters[StopLyingIterationParameterName] as IFixedValueParameter<IntValue>; }
88    }
89    public IFixedValueParameter<IntValue> MomentumSwitchIterationParameter
90    {
91      get { return Parameters[MomentumSwitchIterationParameterName] as IFixedValueParameter<IntValue>; }
92    }
93    public IFixedValueParameter<DoubleValue> InitialMomentumParameter
94    {
95      get { return Parameters[InitialMomentumParameterName] as IFixedValueParameter<DoubleValue>; }
96    }
97    public IFixedValueParameter<DoubleValue> FinalMomentumParameter
98    {
99      get { return Parameters[FinalMomentumParameterName] as IFixedValueParameter<DoubleValue>; }
100    }
101    public IFixedValueParameter<DoubleValue> EtaParameter
102    {
103      get { return Parameters[EtaParameterName] as IFixedValueParameter<DoubleValue>; }
104    }
105    public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter
106    {
107      get { return Parameters[SetSeedRandomlyParameterName] as IFixedValueParameter<BoolValue>; }
108    }
109    public IFixedValueParameter<IntValue> SeedParameter
110    {
111      get { return Parameters[SeedParameterName] as IFixedValueParameter<IntValue>; }
112    }
113    #endregion
114
115    #region  Properties
116    public IDistance<RealVector> Distance
117    {
118      get { return DistanceParameter.Value; }
119    }
120    public double Perplexity
121    {
122      get { return PerplexityParameter.Value.Value; }
123    }
124    public double Theta
125    {
126      get { return ThetaParameter.Value.Value; }
127    }
128    public int NewDimensions
129    {
130      get { return NewDimensionsParameter.Value.Value; }
131    }
132    public int MaxIterations
133    {
134      get { return MaxIterationsParameter.Value.Value; }
135    }
136    public int StopLyingIteration
137    {
138      get { return StopLyingIterationParameter.Value.Value; }
139    }
140    public int MomentumSwitchIteration
141    {
142      get { return MomentumSwitchIterationParameter.Value.Value; }
143    }
144    public double InitialMomentum
145    {
146      get { return InitialMomentumParameter.Value.Value; }
147    }
148    public double FinalMomentum
149    {
150      get { return FinalMomentumParameter.Value.Value; }
151    }
152    public double Eta
153    {
154      get { return EtaParameter.Value.Value; }
155    }
156    public bool SetSeedRandomly
157    {
158      get { return SetSeedRandomlyParameter.Value.Value; }
159    }
160    public uint Seed
161    {
162      get { return (uint)SeedParameter.Value.Value; }
163    }
164    #endregion
165
166    #region Constructors & Cloning
167    [StorableConstructor]
168    private TSNEAnalysis(bool deserializing) : base(deserializing) { }
169    private TSNEAnalysis(TSNEAnalysis original, Cloner cloner) : base(original, cloner) { }
170    public override IDeepCloneable Clone(Cloner cloner) { return new TSNEAnalysis(this, cloner); }
171    public TSNEAnalysis() {
172      Problem = new RegressionProblem();
173      Parameters.Add(new ValueParameter<IDistance<RealVector>>(DistanceParameterName, "The distance function used to differentiate similar from non-similar points", new EuclidianDistance()));
174      Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-Parameter of TSNE. Comparable to k in a k-nearest neighbour algorithm", new DoubleValue(25)));
175      Parameters.Add(new FixedValueParameter<DoubleValue>(ThetaParameterName, "Value describing how much appoximated gradients my differ from exact gradients. Set to 0 for exact calculation", new DoubleValue(0.1)));
176      Parameters.Add(new FixedValueParameter<IntValue>(NewDimensionsParameterName, "Dimensionality of projected space (usually 2 for easy visual analysis", new IntValue(2)));
177      Parameters.Add(new FixedValueParameter<IntValue>(MaxIterationsParameterName, "Maximum number of iterations for gradient descent", new IntValue(1000)));
178      Parameters.Add(new FixedValueParameter<IntValue>(StopLyingIterationParameterName, "Number of iterations after which p is no longer approximated", new IntValue(250)));
179      Parameters.Add(new FixedValueParameter<IntValue>(MomentumSwitchIterationParameterName, "Number of iterations after which the momentum in the gradient descent is switched", new IntValue(250)));
180      Parameters.Add(new FixedValueParameter<DoubleValue>(InitialMomentumParameterName, "The initial momentum in the gradient descent", new DoubleValue(0.5)));
181      Parameters.Add(new FixedValueParameter<DoubleValue>(FinalMomentumParameterName, "The final momentum", new DoubleValue(0.8)));
182      Parameters.Add(new FixedValueParameter<DoubleValue>(EtaParameterName, "Gradient Descent learning rate", new DoubleValue(200)));
183      Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "If the seed should be random", new BoolValue(true)));
184      Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The seed used if it should not be random", new IntValue(0)));
185    }
186    #endregion
187
188    protected override void Run() {
189      var lowDimData = new DoubleMatrix(GetProjectedData(Problem.ProblemData));
190      Results.Add(new Result(ScatterPlotResultName, "Plot of the projected data", CreateScatterPlot(lowDimData, Problem.ProblemData)));
191      Results.Add(new Result(DataResultName, "Projected Data", lowDimData));
192    }
193
194    private ScatterPlot CreateScatterPlot(DoubleMatrix lowDimData, IDataAnalysisProblemData problemData) {
195      var plot = new ScatterPlot(DataResultName, "");
196      Normalize(lowDimData);
197      plot.Rows.Add(new ScatterPlotDataRow("Training", "Points of the training set", problemData.TrainingIndices.Select(i => new Point2D<double>(lowDimData[i, 0], lowDimData[i, 1]))));
198      plot.Rows.Add(new ScatterPlotDataRow("Test", "Points of the test set", problemData.TestIndices.Select(i => new Point2D<double>(lowDimData[i, 0], lowDimData[i, 1]))));
199      return plot;
200    }
201
202    private double[,] GetProjectedData(IDataAnalysisProblemData problemData) {
203      var random = SetSeedRandomly ? new MersenneTwister() : new MersenneTwister(Seed);
204      var tsne = new TSNE<RealVector>(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta);
205      var dataset = problemData.Dataset;
206      var allowedInputVariables = problemData.AllowedInputVariables.ToArray();
207      var data = new RealVector[dataset.Rows];
208      for (var row = 0; row < dataset.Rows; row++) data[row] = new RealVector(allowedInputVariables.Select(col => dataset.GetDoubleValue(col, row)).ToArray());
209      return tsne.Run(data, NewDimensions, Perplexity, Theta);
210    }
211
212    private static void Normalize(DoubleMatrix data) {
213      var max = new double[data.Columns];
214      var min = new double[data.Columns];
215      for (var i = 0; i < max.Length; i++) max[i] = min[i] = data[0, i];
216      for (var i = 0; i < data.Rows; i++)
217        for (var j = 0; j < data.Columns; j++) {
218          var v = data[i, j];
219          max[j] = Math.Max(max[j], v);
220          min[j] = Math.Min(min[j], v);
221        }
222      for (var i = 0; i < data.Rows; i++) {
223        for (var j = 0; j < data.Columns; j++) {
224          data[i, j] = (data[i, j] - (max[j] + min[j]) / 2) / (max[j] - min[j]);
225        }
226      }
227
228    }
229  }
230}
Note: See TracBrowser for help on using the repository browser.