Changeset 14518


Ignore:
Timestamp:
12/22/16 10:08:25 (3 years ago)
Author:
bwerth
Message:

#2700 TSNEAnalysis is now a BasicAlg, hid some Parameters, added optional data normalization to make TSNE scaling-invariant

Location:
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
10 edited

Legend:

Unmodified
Added
Removed
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/GBM/GradientBoostingRegressionAlgorithm.cs

    r14185 r14518  
    4444  [StorableClass]
    4545  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 350)]
    46   public class GradientBoostingRegressionAlgorithm : BasicAlgorithm {
     46  public class GradientBoostingRegressionAlgorithm : BasicAlgorithm, IDataAnalysisAlgorithm<IRegressionProblem> {
    4747    public override Type ProblemType {
    4848      get { return typeof(IRegressionProblem); }
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessRegression.cs

    r14185 r14518  
    3939  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 160)]
    4040  [StorableClass]
    41   public sealed class GaussianProcessRegression : GaussianProcessBase, IStorableContent {
     41  public sealed class GaussianProcessRegression : GaussianProcessBase, IStorableContent, IDataAnalysisAlgorithm<IRegressionProblem> {
    4242    public string Filename { get; set; }
    4343
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithm.cs

    r14345 r14518  
    3838  [StorableClass]
    3939  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 125)]
    40   public class GradientBoostedTreesAlgorithm : BasicAlgorithm {
    41     public override Type ProblemType {
     40  public class GradientBoostedTreesAlgorithm : BasicAlgorithm, IDataAnalysisAlgorithm<IRegressionProblem> {
     41    public override Type ProblemType
     42    {
    4243      get { return typeof(IRegressionProblem); }
    4344    }
    44     public new IRegressionProblem Problem {
     45    public new IRegressionProblem Problem
     46    {
    4547      get { return (IRegressionProblem)base.Problem; }
    4648      set { base.Problem = value; }
     
    6163
    6264    #region ParameterProperties
    63     public IFixedValueParameter<IntValue> IterationsParameter {
     65    public IFixedValueParameter<IntValue> IterationsParameter
     66    {
    6467      get { return (IFixedValueParameter<IntValue>)Parameters[IterationsParameterName]; }
    6568    }
    66     public IFixedValueParameter<IntValue> MaxSizeParameter {
     69    public IFixedValueParameter<IntValue> MaxSizeParameter
     70    {
    6771      get { return (IFixedValueParameter<IntValue>)Parameters[MaxSizeParameterName]; }
    6872    }
    69     public IFixedValueParameter<DoubleValue> NuParameter {
     73    public IFixedValueParameter<DoubleValue> NuParameter
     74    {
    7075      get { return (IFixedValueParameter<DoubleValue>)Parameters[NuParameterName]; }
    7176    }
    72     public IFixedValueParameter<DoubleValue> RParameter {
     77    public IFixedValueParameter<DoubleValue> RParameter
     78    {
    7379      get { return (IFixedValueParameter<DoubleValue>)Parameters[RParameterName]; }
    7480    }
    75     public IFixedValueParameter<DoubleValue> MParameter {
     81    public IFixedValueParameter<DoubleValue> MParameter
     82    {
    7683      get { return (IFixedValueParameter<DoubleValue>)Parameters[MParameterName]; }
    7784    }
    78     public IFixedValueParameter<IntValue> SeedParameter {
     85    public IFixedValueParameter<IntValue> SeedParameter
     86    {
    7987      get { return (IFixedValueParameter<IntValue>)Parameters[SeedParameterName]; }
    8088    }
    81     public FixedValueParameter<BoolValue> SetSeedRandomlyParameter {
     89    public FixedValueParameter<BoolValue> SetSeedRandomlyParameter
     90    {
    8291      get { return (FixedValueParameter<BoolValue>)Parameters[SetSeedRandomlyParameterName]; }
    8392    }
    84     public IConstrainedValueParameter<ILossFunction> LossFunctionParameter {
     93    public IConstrainedValueParameter<ILossFunction> LossFunctionParameter
     94    {
    8595      get { return (IConstrainedValueParameter<ILossFunction>)Parameters[LossFunctionParameterName]; }
    8696    }
    87     public IFixedValueParameter<IntValue> UpdateIntervalParameter {
     97    public IFixedValueParameter<IntValue> UpdateIntervalParameter
     98    {
    8899      get { return (IFixedValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
    89100    }
    90     public IFixedValueParameter<BoolValue> CreateSolutionParameter {
     101    public IFixedValueParameter<BoolValue> CreateSolutionParameter
     102    {
    91103      get { return (IFixedValueParameter<BoolValue>)Parameters[CreateSolutionParameterName]; }
    92104    }
     
    94106
    95107    #region Properties
    96     public int Iterations {
     108    public int Iterations
     109    {
    97110      get { return IterationsParameter.Value.Value; }
    98111      set { IterationsParameter.Value.Value = value; }
    99112    }
    100     public int Seed {
     113    public int Seed
     114    {
    101115      get { return SeedParameter.Value.Value; }
    102116      set { SeedParameter.Value.Value = value; }
    103117    }
    104     public bool SetSeedRandomly {
     118    public bool SetSeedRandomly
     119    {
    105120      get { return SetSeedRandomlyParameter.Value.Value; }
    106121      set { SetSeedRandomlyParameter.Value.Value = value; }
    107122    }
    108     public int MaxSize {
     123    public int MaxSize
     124    {
    109125      get { return MaxSizeParameter.Value.Value; }
    110126      set { MaxSizeParameter.Value.Value = value; }
    111127    }
    112     public double Nu {
     128    public double Nu
     129    {
    113130      get { return NuParameter.Value.Value; }
    114131      set { NuParameter.Value.Value = value; }
    115132    }
    116     public double R {
     133    public double R
     134    {
    117135      get { return RParameter.Value.Value; }
    118136      set { RParameter.Value.Value = value; }
    119137    }
    120     public double M {
     138    public double M
     139    {
    121140      get { return MParameter.Value.Value; }
    122141      set { MParameter.Value.Value = value; }
    123142    }
    124     public bool CreateSolution {
     143    public bool CreateSolution
     144    {
    125145      get { return CreateSolutionParameter.Value.Value; }
    126146      set { CreateSolutionParameter.Value.Value = value; }
     
    129149
    130150    #region ResultsProperties
    131     private double ResultsBestQuality {
     151    private double ResultsBestQuality
     152    {
    132153      get { return ((DoubleValue)Results["Best Quality"].Value).Value; }
    133154      set { ((DoubleValue)Results["Best Quality"].Value).Value = value; }
    134155    }
    135     private DataTable ResultsQualities {
     156    private DataTable ResultsQualities
     157    {
    136158      get { return ((DataTable)Results["Qualities"].Value); }
    137159    }
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r14512 r14518  
    338338      <SubType>Code</SubType>
    339339    </Compile>
    340     <Compile Include="KPCA\SelfOrganizingMap.cs" />
    341     <Compile Include="KPCA\KernelFunctions\CicularKernel.cs" />
    342     <Compile Include="KPCA\KernelFunctions\GaussianKernel.cs" />
    343     <Compile Include="KPCA\KernelFunctions\InverseMultiquadraticKernel .cs" />
    344     <Compile Include="KPCA\KernelFunctions\LaplacianKernel.cs" />
    345     <Compile Include="KPCA\KernelFunctions\MultiquadraticKernel.cs" />
    346     <Compile Include="KPCA\KernelFunctions\NoKernel.cs" />
    347     <Compile Include="KPCA\KernelFunctions\PolysplineKernel.cs" />
    348     <Compile Include="KPCA\KernelFunctions\RadialBasisKernelBase.cs" />
    349     <Compile Include="KPCA\KernelFunctions\ThinPlatePolysplineKernel.cs" />
    350     <Compile Include="KPCA\KernelFunctions\TricubicKernel.cs" />
    351     <Compile Include="KPCA\KernelPrincipleComponentAnalysis.cs" />
    352     <Compile Include="KPCA\Isomap.cs" />
    353     <Compile Include="KPCA\KPCA.cs" />
    354     <Compile Include="KPCA\MatrixUtilities.cs" />
    355340    <Compile Include="Linear\AlglibUtil.cs" />
    356341    <Compile Include="Linear\Scaling.cs" />
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Cell.cs

    r14414 r14518  
    5757using System.Linq;
    5858using HeuristicLab.Common;
    59 using HeuristicLab.Core;
    6059using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    6160
    6261namespace HeuristicLab.Algorithms.DataAnalysis {
    6362  [StorableClass]
    64   public class Cell : Item, ICell {
     63  public class Cell : DeepCloneable, ICell {
    6564    #region properties
    6665    [Storable]
     
    7473    #region HLConstructors & Cloning
    7574    [StorableConstructor]
    76     protected Cell(bool deserializing) : base(deserializing) { }
     75    protected Cell(bool deserializing) { }
    7776    protected Cell(Cell original, Cloner cloner) : base(original, cloner) {
    7877      dimension = original.dimension;
     
    112111      return true;
    113112    }
    114 
    115 
    116113  }
    117114}
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/DataPoint.cs

    r14414 r14518  
    5555
    5656using HeuristicLab.Common;
    57 using HeuristicLab.Core;
    5857using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    5958
    6059namespace HeuristicLab.Algorithms.DataAnalysis {
    6160  [StorableClass]
    62   public class DataPoint<T> : Item, IDataPoint<T> where T : class, IDeepCloneable {
     61  public class DataPoint<T> : DeepCloneable, IDataPoint<T> where T : class, IDeepCloneable {
    6362    #region properties
    6463    [Storable]
     
    7574    #region HLConstructors & Cloning
    7675    [StorableConstructor]
    77     protected DataPoint(bool deserializing) : base(deserializing) { }
     76    protected DataPoint(bool deserializing) { }
    7877    protected DataPoint(DataPoint<T> original, Cloner cloner)
    7978      : base(original, cloner) {
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/SPtree.cs

    r14414 r14518  
    5858using System.Linq;
    5959using HeuristicLab.Common;
    60 using HeuristicLab.Core;
    6160using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    6261
    6362namespace HeuristicLab.Algorithms.DataAnalysis {
    6463  [StorableClass]
    65   public class SPTree : Item, ISPTree {
     64  public class SPTree : DeepCloneable, ISPTree {
    6665    private const uint QT_NODE_CAPACITY = 1;
    6766
     
    10099    #region HLConstructors & Cloning
    101100    [StorableConstructor]
    102     protected SPTree(bool deserializing) : base(deserializing) { }
     101    protected SPTree(bool deserializing) { }
    103102    protected SPTree(SPTree original, Cloner cloner)
    104103      : base(original, cloner) {
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNE.cs

    r14512 r14518  
    6767namespace HeuristicLab.Algorithms.DataAnalysis {
    6868  [StorableClass]
    69   public class TSNE<T> : Item, ITSNE<T> where T : class, IDeepCloneable {
     69  public class TSNE<T> : DeepCloneable, ITSNE<T> where T : class, IDeepCloneable {
    7070
    7171    private const string IterationResultName = "Iteration";
     
    106106    #region HLConstructors & Cloning
    107107    [StorableConstructor]
    108     protected TSNE(bool deserializing) : base(deserializing) { }
     108    protected TSNE(bool deserializing) { }
    109109    protected TSNE(TSNE<T> original, Cloner cloner) : base(original, cloner) {
    110110      distance = cloner.Clone(original.distance);
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAnalysis.cs

    r14512 r14518  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using System.Drawing;
    2425using System.Linq;
     26using System.Threading;
    2527using HeuristicLab.Analysis;
    2628using HeuristicLab.Common;
     
    2830using HeuristicLab.Data;
    2931using HeuristicLab.Encodings.RealVectorEncoding;
     32using HeuristicLab.Optimization;
    3033using HeuristicLab.Parameters;
    3134using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     
    4043  [Creatable(CreatableAttribute.Categories.DataAnalysis, Priority = 100)]
    4144  [StorableClass]
    42   public sealed class TSNEAnalysis : FixedDataAnalysisAlgorithm<IRegressionProblem> {
    43 
     45  public sealed class TSNEAnalysis : BasicAlgorithm {
     46
     47    public override Type ProblemType
     48    {
     49      get { return typeof(IDataAnalysisProblem); }
     50    }
     51    public new IDataAnalysisProblem Problem
     52    {
     53      get { return (IDataAnalysisProblem)base.Problem; }
     54      set { base.Problem = value; }
     55    }
    4456    #region Resultnames
    4557    private const string ScatterPlotResultName = "Scatterplot";
     
    6173    private const string SeedParameterName = "Seed";
    6274    private const string ClassesParameterName = "ClassNames";
     75    private const string NormalizationParameterName = "Normalization";
    6376    #endregion
    6477
     
    115128    {
    116129      get { return Parameters[ClassesParameterName] as IFixedValueParameter<StringValue>; }
     130    }
     131    public IFixedValueParameter<BoolValue> NormalizationParameter
     132    {
     133      get { return Parameters[NormalizationParameterName] as IFixedValueParameter<BoolValue>; }
    117134    }
    118135    #endregion
     
    174191      get { return ClassesParameter.Value.Value; }
    175192    }
    176 
     193    public bool Normalization
     194    {
     195      get { return NormalizationParameter.Value.Value; }
     196    }
    177197    [Storable]
    178198    public TSNE<RealVector> tsne;
     
    191211      Parameters.Add(new FixedValueParameter<IntValue>(NewDimensionsParameterName, "Dimensionality of projected space (usually 2 for easy visual analysis", new IntValue(2)));
    192212      Parameters.Add(new FixedValueParameter<IntValue>(MaxIterationsParameterName, "Maximum number of iterations for gradient descent", new IntValue(1000)));
    193       Parameters.Add(new FixedValueParameter<IntValue>(StopLyingIterationParameterName, "Number of iterations after which p is no longer approximated", new IntValue(250)));
    194       Parameters.Add(new FixedValueParameter<IntValue>(MomentumSwitchIterationParameterName, "Number of iterations after which the momentum in the gradient descent is switched", new IntValue(250)));
     213      Parameters.Add(new FixedValueParameter<IntValue>(StopLyingIterationParameterName, "Number of iterations after which p is no longer approximated", new IntValue(0)));
     214      Parameters.Add(new FixedValueParameter<IntValue>(MomentumSwitchIterationParameterName, "Number of iterations after which the momentum in the gradient descent is switched", new IntValue(0)));
    195215      Parameters.Add(new FixedValueParameter<DoubleValue>(InitialMomentumParameterName, "The initial momentum in the gradient descent", new DoubleValue(0.5)));
    196216      Parameters.Add(new FixedValueParameter<DoubleValue>(FinalMomentumParameterName, "The final momentum", new DoubleValue(0.8)));
     
    199219      Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The seed used if it should not be random", new IntValue(0)));
    200220      Parameters.Add(new FixedValueParameter<StringValue>(ClassesParameterName, "name of the column specifying the class lables of each data point. \n if the lable column can not be found Training/Test is used as labels", new StringValue("none")));
    201     }
    202     #endregion
    203 
    204     protected override void Run() {
    205       var data = CalculateProjectedData(Problem.ProblemData);
    206       var lowDimData = new DoubleMatrix(data);
    207     }
     221      Parameters.Add(new FixedValueParameter<BoolValue>(NormalizationParameterName, "Wether the data should be zero centered and have variance of 1 for each variable, so different scalings are ignored", new BoolValue(true)));
     222
     223      MomentumSwitchIterationParameter.Hidden = true;
     224      InitialMomentumParameter.Hidden = true;
     225      FinalMomentumParameter.Hidden = true;
     226      StopLyingIterationParameter.Hidden = true;
     227      EtaParameter.Hidden = true;
     228    }
     229    #endregion
    208230
    209231    public override void Stop() {
     
    212234    }
    213235
     236    protected override void Run(CancellationToken cancellationToken) {
     237      var data = CalculateProjectedData(Problem.ProblemData);
     238      var lowDimData = new DoubleMatrix(data);
     239    }
     240
    214241    private double[,] CalculateProjectedData(IDataAnalysisProblemData problemData) {
    215       var DataRowNames = new Dictionary<string, List<int>>();
     242      var dataRowNames = new Dictionary<string, List<int>>();
    216243      var rows = new Dictionary<string, ScatterPlotDataRow>();
    217244
     
    220247          var classes = problemData.Dataset.GetStringValues(Classes).ToArray();
    221248          for (int i = 0; i < classes.Length; i++) {
    222             if (!DataRowNames.ContainsKey(classes[i])) DataRowNames.Add(classes[i], new List<int>());
    223             DataRowNames[classes[i]].Add(i); //always succeeds
     249            if (!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List<int>());
     250            dataRowNames[classes[i]].Add(i); //always succeeds
    224251          }
    225252        } else if ((problemData.Dataset as Dataset).VariableHasType<double>(Classes)) {
     
    228255          var min = classValues.Min() - 0.1;
    229256          var contours = 8;
    230           for (int i = 0; i < contours; i++) {
     257          for (var i = 0; i < contours; i++) {
    231258            var name = GetContourName(i, min, max, contours);
    232             DataRowNames.Add(name, new List<int>());
     259            dataRowNames.Add(name, new List<int>());
    233260            rows.Add(name, new ScatterPlotDataRow(name, "", new List<Point2D<double>>()));
    234261            rows[name].VisualProperties.Color = GetHeatMapColor(i, contours);
    235             rows[name].VisualProperties.PointSize = i+3;
     262            rows[name].VisualProperties.PointSize = i + 3;
    236263          }
    237264          for (int i = 0; i < classValues.Length; i++) {
    238             DataRowNames[GetContourName(classValues[i], min, max, contours)].Add(i); //always succeeds
     265            dataRowNames[GetContourName(classValues[i], min, max, contours)].Add(i); //always succeeds
    239266          }
    240267
    241268        }
    242269
    243 
    244270      } else {
    245         DataRowNames.Add("Training", problemData.TrainingIndices.ToList());
    246         DataRowNames.Add("Test", problemData.TestIndices.ToList());
     271        dataRowNames.Add("Training", problemData.TrainingIndices.ToList());
     272        dataRowNames.Add("Test", problemData.TestIndices.ToList());
    247273      }
    248274
    249275      var random = SetSeedRandomly ? new MersenneTwister() : new MersenneTwister(Seed);
    250       tsne = new TSNE<RealVector>(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta, DataRowNames, rows);
     276      tsne = new TSNE<RealVector>(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta, dataRowNames, rows);
    251277      var dataset = problemData.Dataset;
    252278      var allowedInputVariables = problemData.AllowedInputVariables.ToArray();
    253279      var data = new RealVector[dataset.Rows];
    254280      for (var row = 0; row < dataset.Rows; row++) data[row] = new RealVector(allowedInputVariables.Select(col => dataset.GetDoubleValue(col, row)).ToArray());
     281
     282      if (Normalization) {
     283        data = NormalizeData(data);
     284      }
     285
    255286      return tsne.Run(data, NewDimensions, Perplexity, Theta);
     287    }
     288
     289    private RealVector[] NormalizeData(RealVector[] data) {
     290      var n = data[0].Length;
     291      var mean = new double[n];
     292      var sd = new double[n];
     293      var nData = new RealVector[data.Length];
     294      for (var i = 0; i < n; i++) {
     295        var i1 = i;
     296        sd[i] = Enumerable.Range(0, data.Length).Select(x => data[x][i1]).StandardDeviation();
     297        mean[i] = Enumerable.Range(0, data.Length).Select(x => data[x][i1]).Average();
     298      }
     299      for (int i = 0; i < data.Length; i++) {
     300        nData[i] = new RealVector(n);
     301        for (int j = 0; j < n; j++) {
     302          nData[i][j] = (data[i][j] - mean[j]) / sd[j];
     303        }
     304      }
     305      return nData;
     306
     307
    256308    }
    257309
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/VPTree.cs

    r14414 r14518  
    6464namespace HeuristicLab.Algorithms.DataAnalysis {
    6565  [StorableClass]
    66   public class VPTree<T> : Item, IVPTree<T> where T : class, IDeepCloneable {
     66  public class VPTree<T> : DeepCloneable, IVPTree<T> where T : class, IDeepCloneable {
    6767    #region properties
    6868    [Storable]
     
    7878    #region HLConstructors & Cloning
    7979    [StorableConstructor]
    80     protected VPTree(bool deserializing) : base(deserializing) { }
     80    protected VPTree(bool deserializing) { }
    8181    protected VPTree(VPTree<T> original, Cloner cloner)
    8282      : base(original, cloner) {
Note: See TracChangeset for help on using the changeset viewer.