Free cookie consent management tool by TermsFeed Policy Generator

Changeset 10908


Ignore:
Timestamp:
05/28/14 15:09:26 (11 years ago)
Author:
mleitner
Message:

Add Feature correlation matrix, Add limit for distinct values in histogramm classification.

Location:
branches/DataPreprocessing
Files:
12 edited
3 copied

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing.Views/3.3/DataPreprocessingView.cs

    r10905 r10908  
    2626using HeuristicLab.Core.Views;
    2727using HeuristicLab.MainForm;
     28using HeuristicLab.DataPreprocessing;
     29using HeuristicLab.Problems.DataAnalysis;
    2830
    2931namespace HeuristicLab.DataPreprocessing.Views {
     
    5052        var manipulationLogic = new ManipulationLogic(data, searchLogic, statisticsLogic, dataGridLogic);
    5153        var chartLogic = new ChartLogic(data);
     54        var correlationMatrixLogic = new ChartLogic(data);
    5255        var filterLogic = new FilterLogic(data);
     56        var creator = new ProblemDataCreator(Content);
     57        var problemData = (DataAnalysisProblemData)creator.CreateProblemData();
    5358        var dataCompletenessLogic = new ChartLogic(data);
    5459
     
    6166          new LineChartContent(chartLogic),
    6267          new HistogramContent(chartLogic),
    63           new ScatterPlotContent(chartLogic)
    64           //,
    65           //new DataCompletenessChartContent(dataCompletenessLogic)
     68          new ScatterPlotContent(chartLogic),
     69          new CorrelationMatrixContent(problemData)
    6670        };
    6771
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing.Views/3.3/HeuristicLab.DataPreprocessing.Views-3.3.csproj

    r10904 r10908  
    6464  </ItemGroup>
    6565  <ItemGroup>
     66    <Compile Include="PreprocessingFeatureCorrelationView.cs">
     67      <SubType>UserControl</SubType>
     68    </Compile>
     69    <Compile Include="PreprocessingFeatureCorrelationView.Designer.cs">
     70      <DependentUpon>PreprocessingFeatureCorrelationView.cs</DependentUpon>
     71    </Compile>
    6672    <Compile Include="CheckedFilterCollectionView.cs">
    6773      <SubType>UserControl</SubType>
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing.Views/3.3/HistogramView.Designer.cs

    r10867 r10908  
    5454      //
    5555      this.classifierComboBox.FormattingEnabled = true;
    56       this.classifierComboBox.Location = new System.Drawing.Point(6, 52);
     56      this.classifierComboBox.Location = new System.Drawing.Point(9, 52);
    5757      this.classifierComboBox.Name = "classifierComboBox";
    5858      this.classifierComboBox.Size = new System.Drawing.Size(121, 21);
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing.Views/3.3/HistogramView.cs

    r10871 r10908  
    99  [Content(typeof(HistogramContent), true)]
    1010  public partial class HistogramView : PreprocessingChartView {
    11 
    1211    private const string HISTOGRAM_CHART_TITLE = "Histogram";
    1312
     
    2625   
    2726        classifierComboBox.Items.Clear();
    28 
    2927        classifierComboBox.Items.Add("None");
    3028
    31         foreach(string var in logic.GetVariableNames()){
     29        foreach(string var in logic.GetVariableNamesForHistogramClassification()){
    3230          classifierComboBox.Items.Add(var);
    3331        }
    3432
     33     
    3534        if (classifierComboBox.SelectedItem == null) {
    36          // classifierComboBox.SelectedIndex = 0;
    37 
    3835          classifierComboBox.SelectedIndex = Content.ClassifierVariableIndex;
    3936        }
     
    4643    }
    4744
    48     private void classifierComboBox_SelectedIndexChanged(object sender, EventArgs e)
    49     {
     45    private void classifierComboBox_SelectedIndexChanged(object sender, EventArgs e) {
    5046      if (classifierComboBox.SelectedItem == null)
    5147        return;
    5248
    53 
    54       if (classifierComboBox.SelectedIndex != 0)
    55       {
    56         classification = logic.GetVariableValues(classifierComboBox.SelectedItem.ToString());
    57       }
    58       else {
    59         classification = null;
     49      if (classifierComboBox.SelectedIndex != 0) {
     50        Classification = logic.GetVariableValues(classifierComboBox.SelectedItem.ToString());
     51      } else {
     52        Classification = null;
    6053      }
    6154
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing.Views/3.3/PreprocessingChartView.cs

    r10867 r10908  
    4949    private const string DEFAULT_CHART_TITLE = "Chart";
    5050
    51     public List<double> classification { get; set; }
     51    public IEnumerable<double> Classification { get; set; }
    5252
    5353    public PreprocessingChartView() {
     
    8282      }
    8383
    84       //if (!Content.AllInOneMode)
    85         if (Content != null && !Content.AllInOneMode)
     84      if (Content != null && !Content.AllInOneMode)
    8685        GenerateChart();
    8786
     
    336335
    337336          PreprocessingDataTableView dataView = new PreprocessingDataTableView();
    338           dataView.classification = classification;
     337          dataView.Classification = Classification;
    339338          enumerator.MoveNext();
    340339          PreprocessingDataTable d = enumerator.Current;
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing.Views/3.3/PreprocessingDataTableView.cs

    r10877 r10908  
    4848    }
    4949
    50     public List<double> classification { get; set; }
     50    public IEnumerable<double> Classification { get; set; }
    5151
    5252    public PreprocessingDataTableView()
     
    115115    #endregion
    116116
    117     protected override void OnContentChanged()
    118     {
     117    protected override void OnContentChanged() {
    119118      base.OnContentChanged();
    120119      invisibleSeries.Clear();
     
    126125      {
    127126
    128         if (classification != null)
    129         {
     127        if (Classification != null)
    130128          chart.Titles[0].Text = Content.Name;
    131         }
    132129
    133130        AddDataRows(Content.Rows);
     
    166163        FillSeriesWithRowValues(series, row);
    167164
    168         if (classification == null)
    169         {
    170         chart.Series.Add(series);
    171       }
     165        if (Classification == null)
     166          chart.Series.Add(series);
    172167      }
    173168
     
    202197          FillSeriesWithRowValues(series, row);
    203198
    204           if (classification == null) {
    205           chart.Series.Add(series);
    206         }
     199          if (Classification == null)
     200            chart.Series.Add(series);
     201       
    207202      }
    208203    }
     
    735730    }
    736731
    737     protected virtual void CalculateHistogram(Series series, DataRow row)
    738     {
    739       if (classification != null)
    740       {
    741 
    742         var qry = row.Values.Select((i, index) => new { i, j = classification[index] })
    743                            .GroupBy((x) => x.j).ToDictionary(x => x.Key, x => x.Select(v => v.i).ToList());
     732    protected virtual void CalculateHistogram(Series series, DataRow row) {
     733      if (Classification != null) {
     734
     735        var valuesPerClass = row.Values.Select((i, index) => new { i, j = Classification.ToList()[index] })
     736                                       .GroupBy((x) => x.j)
     737                                       .ToDictionary(x => x.Key, x => x.Select(v => v.i)
     738                                       .ToList());
     739
    744740        chart.Titles.Add(row.Name);
    745         foreach (KeyValuePair<double, List<double>> entry in qry)
    746         {
     741
     742        foreach (KeyValuePair<double, List<double>> entry in valuesPerClass) {
    747743          var s = new Series(row.Name + entry.Key);
    748744
     
    754750          chart.Series.Add(s);
    755751        }
    756       }
    757       else
    758       {
    759       series.Points.Clear();
     752      } else {
     753        series.Points.Clear();
    760754        ConfigureSeries(series, row);
    761755        AddPointsToHistogramSeries(series, row, null);
     
    763757    }
    764758
    765     private void AddPointsToHistogramSeries(Series series, DataRow row, List<double> values)
    766     {
    767 
     759    private void AddPointsToHistogramSeries(Series series, DataRow row, List<double> values) {
    768760      if (!row.Values.Any()) return;
    769761      int bins = row.VisualProperties.Bins;
     
    819811
    820812      //  shift the chart to the left so the bars are placed on the intervals
    821       if (classification != null || valueFrequencies.First().Item1 < doubleRange.First()) {
     813      if (Classification != null || valueFrequencies.First().Item1 < doubleRange.First()) {
    822814        series.Points.Add(new DataPoint(min - intervalWidth, 0));
    823815        series.Points.Add(new DataPoint(max + intervalWidth, 0));
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing.Views/3.3/PreprocessingFeatureCorrelationView.Designer.cs

    r10870 r10908  
    2323
    2424namespace HeuristicLab.Problems.DataAnalysis.Views {
    25   partial class AbstractFeatureCorrelationView {
     25  partial class PreprocessingFeatureCorrelationView {
    2626    /// <summary>
    2727    /// Required designer variable.
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing.Views/3.3/PreprocessingFeatureCorrelationView.cs

    r10870 r10908  
    2828using HeuristicLab.MainForm.WindowsForms;
    2929using HeuristicLab.PluginInfrastructure;
     30using System;
     31using HeuristicLab.DataPreprocessing;
    3032
    3133namespace HeuristicLab.Problems.DataAnalysis.Views {
    32   [View("Feature Correlation View")]
    33   [Content(typeof(DataAnalysisProblemData), false)]
    34   public abstract partial class AbstractFeatureCorrelationView : AsynchronousContentView {
     34  [View("Preprocessing Feature Correlation View")]
     35  [Content(typeof(CorrelationMatrixContent), false)]
     36  public partial class PreprocessingFeatureCorrelationView : AsynchronousContentView {
    3537    public const string ALLSAMPLES = "All Samples";
    3638    public const string TRAININGSAMPLES = "Training Samples";
     
    4143    protected FeatureCorrelationCalculator fcc;
    4244
    43     public new DataAnalysisProblemData Content {
    44       get { return (DataAnalysisProblemData)base.Content; }
     45    public new CorrelationMatrixContent Content
     46    {
     47      get { return (CorrelationMatrixContent) base.Content; }
    4548      set { base.Content = value; }
    4649    }
    4750
    48     protected AbstractFeatureCorrelationView() {
     51    private FeatureCorrelationCache correlationCache;
     52
     53    public PreprocessingFeatureCorrelationView() {
     54
     55      correlationCache = new FeatureCorrelationCache();
    4956      InitializeComponent();
    5057      fcc = new FeatureCorrelationCalculator();
     
    7582      fcc.TryCancelCalculation();
    7683      if (Content != null) {
    77         fcc.ProblemData = Content;
     84        fcc.ProblemData = Content.ProblemData;
    7885        CalculateCorrelation();
    7986      } else {
     
    8693
    8794    protected virtual bool[] SetInitialVariableVisibility() {
    88       bool[] initialVisibility = new bool[Content.Dataset.DoubleVariables.Count()];
     95      bool[] initialVisibility = new bool[Content.ProblemData.Dataset.DoubleVariables.Count()];
    8996      int i = 0;
    90       foreach (var variable in Content.Dataset.DoubleVariables) {
    91         initialVisibility[i] = Content.AllowedInputVariables.Contains(variable);
     97      foreach (var variable in Content.ProblemData.Dataset.DoubleVariables) {
     98        initialVisibility[i] = Content.ProblemData.AllowedInputVariables.Contains(variable);
    9299        i++;
    93100      }
     
    102109    }
    103110
    104     protected abstract void CalculateCorrelation();
    105     protected abstract void Content_CorrelationCalculationFinished(object sender, FeatureCorrelationCalculator.CorrelationCalculationFinishedArgs e);
     111     protected void CalculateCorrelation() {
     112      if (correlationCalcComboBox.SelectedItem == null) return;
     113      if (partitionComboBox.SelectedItem == null) return;
     114
     115      IDependencyCalculator calc = (IDependencyCalculator)correlationCalcComboBox.SelectedValue;
     116      string partition = (string)partitionComboBox.SelectedValue;
     117      dataView.Enabled = false;
     118      double[,] corr = correlationCache.GetCorrelation(calc, partition);
     119      if (corr == null) {
     120        fcc.CalculateElements(calc, partition);
     121      } else {
     122        fcc.TryCancelCalculation();
     123        var correlation = new DoubleMatrix(corr, Content.ProblemData.Dataset.DoubleVariables, Content.ProblemData.Dataset.DoubleVariables);
     124        UpdateDataView(correlation);
     125      }
     126    }
     127
     128    protected void Content_CorrelationCalculationFinished(object sender, FeatureCorrelationCalculator.CorrelationCalculationFinishedArgs e) {
     129      if (InvokeRequired) {
     130        Invoke(new FeatureCorrelationCalculator.CorrelationCalculationFinishedHandler(Content_CorrelationCalculationFinished), sender, e);
     131        return;
     132      }
     133      correlationCache.SetCorrelation(e.Calculcator, e.Partition, e.Correlation);
     134      var correlation = new DoubleMatrix(e.Correlation, Content.ProblemData.Dataset.DoubleVariables, Content.ProblemData.Dataset.DoubleVariables);
     135      UpdateDataView(correlation);
     136    }
    106137
    107138    protected void UpdateDataView(DoubleMatrix correlation) {
     
    125156      progressBar.Value = e.ProgressPercentage;
    126157    }
     158
     159      [NonDiscoverableType]
     160      private class FeatureCorrelationCache : Object {
     161        private Dictionary<Tuple<IDependencyCalculator, string>, double[,]> correlationsCache;
     162
     163        public FeatureCorrelationCache()
     164          : base() {
     165          InitializeCaches();
     166        }
     167
     168        private void InitializeCaches() {
     169          correlationsCache = new Dictionary<Tuple<IDependencyCalculator, string>, double[,]>();
     170        }
     171
     172        public void Reset() {
     173          InitializeCaches();
     174        }
     175
     176        public double[,] GetCorrelation(IDependencyCalculator calc, string partition) {
     177          double[,] corr;
     178          var key = new Tuple<IDependencyCalculator, string>(calc, partition);
     179          correlationsCache.TryGetValue(key, out corr);
     180          return corr;
     181        }
     182
     183        public void SetCorrelation(IDependencyCalculator calc, string partition, double[,] correlation) {
     184          var key = new Tuple<IDependencyCalculator, string>(calc, partition);
     185          correlationsCache[key] = correlation;
     186        }
     187      }
    127188  }
    128189}
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/HeuristicLab.DataPreprocessing-3.3.csproj

    r10882 r10908  
    8181    <Compile Include="Implementations\PreprocessingDataTable.cs" />
    8282    <Compile Include="Interfaces\IFilteredPreprocessingData.cs" />
     83    <Compile Include="Implementations\CorrelationMatrixContent.cs" />
    8384    <Compile Include="PreprocessingTransformator.cs" />
    8485    <Compile Include="Utils\DataPreprocessingChangedEvent.cs" />
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/ChartLogic.cs

    r10882 r10908  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    3232
    3333  public class ChartLogic : IChartLogic {
    34  
     34    private const int MAX_DISTINCT_VALUES_FOR_CLASSIFCATION = 20;
    3535    private ITransactionalPreprocessingData preprocessingData;
    3636
     
    6363    }
    6464
    65     public List<double> GetVariableValues(string variableName) {
    66       return preprocessingData.GetValues<double>(preprocessingData.GetColumnIndex(variableName)).ToList();
     65    public IEnumerable<double> GetVariableValues(string variableName) {
     66      return preprocessingData.GetValues<double>(preprocessingData.GetColumnIndex(variableName));
    6767    }
    6868
     
    7474        if (preprocessingData.IsType<double>(preprocessingData.GetColumnIndex(variableName)))
    7575          doubleVariableNames.Add(variableName);
     76      }
     77
     78      return doubleVariableNames;
     79    }
     80
     81    public IEnumerable<string> GetVariableNamesForHistogramClassification() {
     82      List<string> doubleVariableNames = new List<string>();
     83
     84      //only return variable names from type double
     85      foreach (string variableName in preprocessingData.VariableNames)
     86      {
     87        int columnIndex           = preprocessingData.GetColumnIndex(variableName);
     88        bool isDouble             = preprocessingData.IsType<double>(columnIndex);
     89        double distinctValueCount = preprocessingData.GetValues<double>(columnIndex).GroupBy(x => x).Count();
     90        bool distinctValuesOk     = distinctValueCount <= MAX_DISTINCT_VALUES_FOR_CLASSIFCATION;
     91
     92        if (isDouble && distinctValuesOk)
     93              doubleVariableNames.Add(variableName);
    7694      }
    7795
     
    146164     
    147165
    148       List<double> xValues = GetVariableValues(variableNameX);
    149       List<double> yValues = GetVariableValues(variableNameY);
     166      List<double> xValues = GetVariableValues(variableNameX).ToList();
     167      List<double> yValues = GetVariableValues(variableNameY).ToList();
    150168
    151169      List<Point2D<double>> points = new List<Point2D<double>>();
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/CorrelationMatrixContent.cs

    r10870 r10908  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2323using HeuristicLab.Common;
    2424using HeuristicLab.Core;
     25using HeuristicLab.Problems.DataAnalysis;
    2526
    2627namespace HeuristicLab.DataPreprocessing {
    2728
    28   [Item("LineChart", "Represents the line chart grid.")]
    29   public class LineChartContent : PreprocessingChartContent {
     29  [Item("Feature Correlation Matrix", "Represents the feature correlation matrix.")]
     30  public class CorrelationMatrixContent : Item, IViewShortcut
     31  {
     32    public DataAnalysisProblemData ProblemData { get; set; }
    3033
    31     public LineChartContent(IChartLogic chartlogic)
    32       : base(chartlogic) {
     34    public CorrelationMatrixContent(DataAnalysisProblemData data) {
     35      ProblemData = data;
    3336    }
    3437
    35     public LineChartContent(LineChartContent content, Cloner cloner)
    36       : base(content, cloner) {
    37 
     38    public CorrelationMatrixContent(CorrelationMatrixContent original, Cloner cloner)
     39      : base(original, cloner) {
     40 
    3841    }
    39 
     42 
    4043    public static new Image StaticItemImage {
    41       get { return HeuristicLab.Common.Resources.VSImageLibrary.Performance; }
     44      get { return HeuristicLab.Common.Resources.VSImageLibrary.Gradient; }
    4245    }
    4346
    4447    public override IDeepCloneable Clone(Cloner cloner) {
    45       return new LineChartContent(this, cloner);
     48      return new CorrelationMatrixContent(this, cloner);
    4649    }
    47 
    4850  }
    4951}
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/FilterLogic.cs

    r10900 r10908  
    2727namespace HeuristicLab.DataPreprocessing {
    2828  public class FilterLogic : IFilterLogic {
    29 
    3029    private IFilteredPreprocessingData preprocessingData;
    3130
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/HistogramContent.cs

    r10871 r10908  
    3636    public HistogramContent(HistogramContent content, Cloner cloner)
    3737      : base(content, cloner) {
    38 
    3938    }
    4039
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IChartLogic.cs

    r10882 r10908  
    5050
    5151    IEnumerable<string> GetVariableNames();
    52     List<double> GetVariableValues(string variableName);
     52    IEnumerable<string> GetVariableNamesForHistogramClassification();
     53
     54    IEnumerable<double> GetVariableValues(string variableName);
    5355  }
    5456}
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/ProblemDataCreator.cs

    r10772 r10908  
    2626
    2727namespace HeuristicLab.DataPreprocessing {
    28   internal class ProblemDataCreator {
     28  public class ProblemDataCreator {
    2929
    3030    private readonly IPreprocessingContext context;
Note: See TracChangeset for help on using the changeset viewer.