Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
12/20/16 15:50:11 (7 years ago)
Author:
bwerth
Message:

#2700 worked in several comments from mkommend, extended analysis during algorithm run, added more Distances, made algorithm stoppable

Location:
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4

    • Property svn:ignore
      •  

        old new  
         1*.user
         2*.vs10x
         3.vs
         4HeuristicLab.Algorithms.DataAnalysis-3.4.csproj.user
         5HeuristicLabAlgorithmsDataAnalysisPlugin.cs
         6Plugin.cs
        17bin
        28obj
        3 HeuristicLabAlgorithmsDataAnalysisPlugin.cs
        4 HeuristicLab.Algorithms.DataAnalysis-3.4.csproj.user
        5 *.vs10x
        6 Plugin.cs
        7 *.user
        8 .vs
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAnalysis.cs

    r14414 r14512  
    2020#endregion
    2121
    22 using System;
     22using System.Collections.Generic;
     23using System.Drawing;
    2324using System.Linq;
    2425using HeuristicLab.Analysis;
     
    2728using HeuristicLab.Data;
    2829using HeuristicLab.Encodings.RealVectorEncoding;
    29 using HeuristicLab.Optimization;
    3030using HeuristicLab.Parameters;
    3131using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     
    6060    private const string SetSeedRandomlyParameterName = "SetSeedRandomly";
    6161    private const string SeedParameterName = "Seed";
     62    private const string ClassesParameterName = "ClassNames";
    6263    #endregion
    6364
     
    6768      get { return Parameters[PerplexityParameterName] as IFixedValueParameter<DoubleValue>; }
    6869    }
    69     public IFixedValueParameter<DoubleValue> ThetaParameter
    70     {
    71       get { return Parameters[ThetaParameterName] as IFixedValueParameter<DoubleValue>; }
     70    public OptionalValueParameter<DoubleValue> ThetaParameter
     71    {
     72      get { return Parameters[ThetaParameterName] as OptionalValueParameter<DoubleValue>; }
    7273    }
    7374    public IFixedValueParameter<IntValue> NewDimensionsParameter
     
    110111    {
    111112      get { return Parameters[SeedParameterName] as IFixedValueParameter<IntValue>; }
     113    }
     114    public IFixedValueParameter<StringValue> ClassesParameter
     115    {
     116      get { return Parameters[ClassesParameterName] as IFixedValueParameter<StringValue>; }
    112117    }
    113118    #endregion
     
    124129    public double Theta
    125130    {
    126       get { return ThetaParameter.Value.Value; }
     131      get { return ThetaParameter.Value == null ? 0 : ThetaParameter.Value.Value; }
    127132    }
    128133    public int NewDimensions
     
    152157    public double Eta
    153158    {
    154       get { return EtaParameter.Value.Value; }
     159      get
     160      {
     161        return EtaParameter.Value == null ? 0 : EtaParameter.Value.Value;
     162      }
    155163    }
    156164    public bool SetSeedRandomly
     
    162170      get { return (uint)SeedParameter.Value.Value; }
    163171    }
     172    public string Classes
     173    {
     174      get { return ClassesParameter.Value.Value; }
     175    }
     176
     177    [Storable]
     178    public TSNE<RealVector> tsne;
    164179    #endregion
    165180
     
    172187      Problem = new RegressionProblem();
    173188      Parameters.Add(new ValueParameter<IDistance<RealVector>>(DistanceParameterName, "The distance function used to differentiate similar from non-similar points", new EuclidianDistance()));
    174       Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-Parameter of TSNE. Comparable to k in a k-nearest neighbour algorithm", new DoubleValue(25)));
    175       Parameters.Add(new FixedValueParameter<DoubleValue>(ThetaParameterName, "Value describing how much appoximated gradients my differ from exact gradients. Set to 0 for exact calculation", new DoubleValue(0.1)));
     189      Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-Parameter of TSNE. Comparable to k in a k-nearest neighbour algorithm. Recommended Value is Floor(number of points /3) or lower", new DoubleValue(25)));
     190      Parameters.Add(new OptionalValueParameter<DoubleValue>(ThetaParameterName, "Value describing how much appoximated gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise \n CAUTION: exact calculation of forces requires building a non-sparse N*N matrix where N is the number of data points\n This may exceed memory limitations", new DoubleValue(0.1)));
    176191      Parameters.Add(new FixedValueParameter<IntValue>(NewDimensionsParameterName, "Dimensionality of projected space (usually 2 for easy visual analysis", new IntValue(2)));
    177192      Parameters.Add(new FixedValueParameter<IntValue>(MaxIterationsParameterName, "Maximum number of iterations for gradient descent", new IntValue(1000)));
     
    183198      Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "If the seed should be random", new BoolValue(true)));
    184199      Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The seed used if it should not be random", new IntValue(0)));
     200      Parameters.Add(new FixedValueParameter<StringValue>(ClassesParameterName, "name of the column specifying the class lables of each data point. \n if the lable column can not be found Training/Test is used as labels", new StringValue("none")));
    185201    }
    186202    #endregion
    187203
    188204    protected override void Run() {
    189       var lowDimData = new DoubleMatrix(GetProjectedData(Problem.ProblemData));
    190       Results.Add(new Result(ScatterPlotResultName, "Plot of the projected data", CreateScatterPlot(lowDimData, Problem.ProblemData)));
    191       Results.Add(new Result(DataResultName, "Projected Data", lowDimData));
    192     }
    193 
    194     private ScatterPlot CreateScatterPlot(DoubleMatrix lowDimData, IDataAnalysisProblemData problemData) {
    195       var plot = new ScatterPlot(DataResultName, "");
    196       Normalize(lowDimData);
    197       plot.Rows.Add(new ScatterPlotDataRow("Training", "Points of the training set", problemData.TrainingIndices.Select(i => new Point2D<double>(lowDimData[i, 0], lowDimData[i, 1]))));
    198       plot.Rows.Add(new ScatterPlotDataRow("Test", "Points of the test set", problemData.TestIndices.Select(i => new Point2D<double>(lowDimData[i, 0], lowDimData[i, 1]))));
    199       return plot;
    200     }
    201 
    202     private double[,] GetProjectedData(IDataAnalysisProblemData problemData) {
     205      var data = CalculateProjectedData(Problem.ProblemData);
     206      var lowDimData = new DoubleMatrix(data);
     207    }
     208
     209    public override void Stop() {
     210      base.Stop();
     211      if (tsne != null) tsne.Running = false;
     212    }
     213
     214    private double[,] CalculateProjectedData(IDataAnalysisProblemData problemData) {
     215      var DataRowNames = new Dictionary<string, List<int>>();
     216      var rows = new Dictionary<string, ScatterPlotDataRow>();
     217
     218      if (problemData.Dataset.VariableNames.Contains(Classes)) {
     219        if ((problemData.Dataset as Dataset).VariableHasType<string>(Classes)) {
     220          var classes = problemData.Dataset.GetStringValues(Classes).ToArray();
     221          for (int i = 0; i < classes.Length; i++) {
     222            if (!DataRowNames.ContainsKey(classes[i])) DataRowNames.Add(classes[i], new List<int>());
     223            DataRowNames[classes[i]].Add(i); //always succeeds
     224          }
     225        } else if ((problemData.Dataset as Dataset).VariableHasType<double>(Classes)) {
     226          var classValues = problemData.Dataset.GetDoubleValues(Classes).ToArray();
     227          var max = classValues.Max() + 0.1;
     228          var min = classValues.Min() - 0.1;
     229          var contours = 8;
     230          for (int i = 0; i < contours; i++) {
     231            var name = GetContourName(i, min, max, contours);
     232            DataRowNames.Add(name, new List<int>());
     233            rows.Add(name, new ScatterPlotDataRow(name, "", new List<Point2D<double>>()));
     234            rows[name].VisualProperties.Color = GetHeatMapColor(i, contours);
     235            rows[name].VisualProperties.PointSize = i+3;
     236          }
     237          for (int i = 0; i < classValues.Length; i++) {
     238            DataRowNames[GetContourName(classValues[i], min, max, contours)].Add(i); //always succeeds
     239          }
     240
     241        }
     242
     243
     244      } else {
     245        DataRowNames.Add("Training", problemData.TrainingIndices.ToList());
     246        DataRowNames.Add("Test", problemData.TestIndices.ToList());
     247      }
     248
    203249      var random = SetSeedRandomly ? new MersenneTwister() : new MersenneTwister(Seed);
    204       var tsne = new TSNE<RealVector>(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta);
     250      tsne = new TSNE<RealVector>(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta, DataRowNames, rows);
    205251      var dataset = problemData.Dataset;
    206252      var allowedInputVariables = problemData.AllowedInputVariables.ToArray();
     
    210256    }
    211257
    212     private static void Normalize(DoubleMatrix data) {
    213       var max = new double[data.Columns];
    214       var min = new double[data.Columns];
    215       for (var i = 0; i < max.Length; i++) max[i] = min[i] = data[0, i];
    216       for (var i = 0; i < data.Rows; i++)
    217         for (var j = 0; j < data.Columns; j++) {
    218           var v = data[i, j];
    219           max[j] = Math.Max(max[j], v);
    220           min[j] = Math.Min(min[j], v);
    221         }
    222       for (var i = 0; i < data.Rows; i++) {
    223         for (var j = 0; j < data.Columns; j++) {
    224           data[i, j] = (data[i, j] - (max[j] + min[j]) / 2) / (max[j] - min[j]);
    225         }
    226       }
    227 
    228     }
     258    private static Color GetHeatMapColor(int contourNr, int noContours) {
     259      var q = (double)contourNr / noContours;  // q in [0,1]
     260      var c = q < 0.5 ? Color.FromArgb((int)(q * 2 * 255), 255, 0) : Color.FromArgb(255, (int)((1 - q) * 2 * 255), 0);
     261      return c;
     262    }
     263    private static string GetContourName(double value, double min, double max, int noContours) {
     264      var size = (max - min) / noContours;
     265      var contourNr = (int)((value - min) / size);
     266      return GetContourName(contourNr, min, max, noContours);
     267    }
     268    private static string GetContourName(int i, double min, double max, int noContours) {
     269      var size = (max - min) / noContours;
     270      return "[" + (min + i * size) + ";" + (min + (i + 1) * size) + ")";
     271    }
     272
    229273  }
    230274}
Note: See TracChangeset for help on using the changeset viewer.