Changeset 14724


Ignore:
Timestamp:
03/07/17 15:32:44 (7 months ago)
Author:
mkommend
Message:

#2709: Adapted data preprocessing scatter plot to allow grouping of string variables.

Location:
branches/DataPreprocessing Enhancements
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing Enhancements/HeuristicLab.DataPreprocessing.Views/3.4/ScatterPlotSingleView.Designer.cs

    r14525 r14724  
    144144      // comboBoxGroup
    145145      //
    146       this.comboBoxGroup.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
     146      this.comboBoxGroup.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) 
    147147            | System.Windows.Forms.AnchorStyles.Right)));
    148148      this.comboBoxGroup.AutoCompleteMode = System.Windows.Forms.AutoCompleteMode.Suggest;
    149149      this.comboBoxGroup.AutoCompleteSource = System.Windows.Forms.AutoCompleteSource.ListItems;
     150      this.comboBoxGroup.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
    150151      this.comboBoxGroup.FormattingEnabled = true;
    151152      this.comboBoxGroup.Location = new System.Drawing.Point(51, 94);
     
    158159      // comboBoxYVariable
    159160      //
    160       this.comboBoxYVariable.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
     161      this.comboBoxYVariable.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) 
    161162            | System.Windows.Forms.AnchorStyles.Right)));
    162163      this.comboBoxYVariable.AutoCompleteMode = System.Windows.Forms.AutoCompleteMode.Suggest;
     
    172173      // comboBoxXVariable
    173174      //
    174       this.comboBoxXVariable.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
     175      this.comboBoxXVariable.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) 
    175176            | System.Windows.Forms.AnchorStyles.Right)));
    176177      this.comboBoxXVariable.AutoCompleteMode = System.Windows.Forms.AutoCompleteMode.Suggest;
     
    200201      // regressionTypeComboBox
    201202      //
    202       this.regressionTypeComboBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
     203      this.regressionTypeComboBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) 
    203204            | System.Windows.Forms.AnchorStyles.Right)));
    204205      this.regressionTypeComboBox.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
     
    212213      // polynomialRegressionOrderNumericUpDown
    213214      //
    214       this.polynomialRegressionOrderNumericUpDown.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
     215      this.polynomialRegressionOrderNumericUpDown.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) 
    215216            | System.Windows.Forms.AnchorStyles.Right)));
    216217      this.polynomialRegressionOrderNumericUpDown.Location = new System.Drawing.Point(51, 47);
     
    287288      // gradientPictureBox
    288289      //
    289       this.gradientPictureBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
     290      this.gradientPictureBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) 
    290291            | System.Windows.Forms.AnchorStyles.Left)));
    291292      this.gradientPictureBox.BorderStyle = System.Windows.Forms.BorderStyle.FixedSingle;
  • branches/DataPreprocessing Enhancements/HeuristicLab.DataPreprocessing.Views/3.4/ScatterPlotSingleView.cs

    r14525 r14724  
    3535  [View("Scatter Plot Single View")]
    3636  [Content(typeof(SingleScatterPlotContent), true)]
    37   public partial class ScatterPlotSingleView : ItemView {
    38 
    39     private readonly string NoGroupItem = "-";
     37  public sealed partial class ScatterPlotSingleView : ItemView {
     38    private const int MAX_DISTINCT_VALUES_FOR_GROUPING = 20;
     39    private readonly string NoGroupItem = "";
    4040
    4141    public new SingleScatterPlotContent Content {
     
    5151    }
    5252
    53     public void InitData() {
     53    protected override void SetEnabledStateOfControls() {
     54      base.SetEnabledStateOfControls();
     55      useGradientCheckBox.Enabled = (string)comboBoxGroup.SelectedItem != NoGroupItem;
     56      gradientPanel.Visible = useGradientCheckBox.Enabled && useGradientCheckBox.Checked; ;
     57    }
     58
     59    protected override void OnContentChanged() {
     60      base.OnContentChanged();
     61      if (Content != null) {
     62        InitData();
     63      }
     64    }
     65
     66    private void InitData() {
    5467      IEnumerable<string> variables = Content.PreprocessingData.GetDoubleVariableNames();
    5568
    56       // add variables to combo boxes
    5769      comboBoxXVariable.Items.Clear();
    5870      comboBoxYVariable.Items.Clear();
    5971      comboBoxGroup.Items.Clear();
     72
    6073      comboBoxXVariable.Items.AddRange(variables.ToArray());
    6174      comboBoxYVariable.Items.AddRange(variables.ToArray());
    6275      comboBoxGroup.Items.Add(NoGroupItem);
    63       for (int i = 0; i < Content.PreprocessingData.Columns; ++i) {
    64         if (Content.PreprocessingData.VariableHasType<double>(i)) {
    65           //double distinctValueCount = Content.PreprocessingData.GetValues<double>(i).GroupBy(x => x).Count();
    66           //if (distinctValueCount <= 20)
    67           comboBoxGroup.Items.Add(Content.PreprocessingData.GetVariableName(i));
    68         }
    69       }
     76      foreach (string var in GetVariableNamesForGrouping(Content.PreprocessingData)) {
     77        comboBoxGroup.Items.Add(var);
     78      }
     79      comboBoxGroup.SelectedItem = Content.SelectedGroupVariable;
    7080
    7181      // use x and y variable from content
     
    8494    }
    8595
    86     protected override void OnContentChanged() {
    87       base.OnContentChanged();
    88       if (Content != null) {
    89         InitData();
    90       }
    91     }
    92 
    93     protected override void SetEnabledStateOfControls() {
    94       base.SetEnabledStateOfControls();
    95       useGradientCheckBox.Enabled = (string)comboBoxGroup.SelectedItem != NoGroupItem;
    96       gradientPanel.Visible = useGradientCheckBox.Enabled && useGradientCheckBox.Checked; ;
    97     }
    98 
    9996    private void UpdateScatterPlot() {
    10097      if (comboBoxXVariable.SelectedItem != null && comboBoxYVariable.SelectedItem != null && comboBoxGroup.SelectedItem != null) {
     
    103100        var groupVariable = (string)comboBoxGroup.SelectedItem;
    104101
    105         bool isGroupingActive = groupVariable != NoGroupItem;
    106         double min = 0, max = 1;
    107         if (isGroupingActive) {
    108           var groupValues = Content.PreprocessingData.GetValues<double>(Content.PreprocessingData.GetColumnIndex(groupVariable))
    109             .Distinct().OrderBy(x => x).ToList();
    110           min = groupValues.FirstOrDefault(x => !double.IsNaN(x));
    111           max = groupValues.LastOrDefault(x => !double.IsNaN(x));
    112         }
    113102        ScatterPlot scatterPlot = Content.CreateScatterPlot(xVariable, yVariable, groupVariable);
     103        //rows are saved and removed to avoid firing of visual property changed events
    114104        var rows = scatterPlot.Rows.ToList();
    115105        scatterPlot.Rows.Clear();
     
    122112          row.VisualProperties.PolynomialRegressionOrder = order;
    123113          row.VisualProperties.IsVisibleInLegend = !useGradientCheckBox.Checked;
    124           if (isGroupingActive && useGradientCheckBox.Checked)
    125             row.VisualProperties.Color = GetColor(double.Parse(row.Name), min, max);
    126114        }
    127115        scatterPlot.Rows.AddRange(rows);
     
    133121        scatterPlotControl.Content = scatterPlot;
    134122
    135         if (isGroupingActive) {
    136           gradientMinimumLabel.Text = min.ToString("G5");
    137           gradientMaximumLabel.Text = max.ToString("G5");
    138         }
    139 
    140123        //save selected x and y variable in content
    141124        this.Content.SelectedXVariable = (string)comboBoxXVariable.SelectedItem;
     
    149132      UpdateScatterPlot();
    150133      var newPlot = scatterPlotControl.Content;
    151 
    152134
    153135      if (oldPlot == null || newPlot == null) return;
     
    183165
    184166    private void comboBoxGroup_SelectedIndexChanged(object sender, EventArgs e) {
    185       useGradientCheckBox.Enabled = (string)comboBoxGroup.SelectedItem != NoGroupItem;
     167      useGradientCheckBox.Enabled = (string)comboBoxGroup.SelectedItem != NoGroupItem && Content.PreprocessingData.GetDoubleVariableNames().Contains((string)comboBoxGroup.SelectedItem);
    186168      gradientPanel.Visible = useGradientCheckBox.Enabled && useGradientCheckBox.Checked;
    187169      UpdateScatterPlot();
     
    201183
    202184    private void UpdateRegressionLine() {
     185      if (Content == null) return;
     186      scatterPlotControl.Content.Rows.Clear();
     187
    203188      var regressionType = (RegressionType)regressionTypeComboBox.SelectedValue;
    204189      int order = (int)polynomialRegressionOrderNumericUpDown.Value;
    205190
    206191      var rows = scatterPlotControl.Content.Rows.ToList();
    207       scatterPlotControl.Content.Rows.Clear();
    208192      foreach (var row in rows) {
    209193        row.VisualProperties.IsRegressionVisibleInLegend = false;
     
    258242    }
    259243
    260     private void BatchRowUpdate(Action<ScatterPlotDataRow> rowAction) {
    261       var scatterPlot = scatterPlotControl.Content;
    262       var rows = scatterPlot.Rows.ToList();
    263       // remove rows and re-add them later to avoid firing visual property changd events
    264       scatterPlot.Rows.Clear();
    265       foreach (var row in rows) {
    266         rowAction(row);
    267       }
    268       scatterPlot.Rows.AddRange(rows);
     244    private static IEnumerable<string> GetVariableNamesForGrouping(IPreprocessingData preprocessingData) {
     245      var variableNames = new List<string>();
     246
     247      for (int i = 0; i < preprocessingData.Columns; ++i) {
     248        int distinctValues = Int32.MaxValue;
     249        if (preprocessingData.VariableHasType<double>(i))
     250          distinctValues = preprocessingData.GetValues<double>(i).GroupBy(x => x).Count();
     251        else if (preprocessingData.VariableHasType<string>(i))
     252          distinctValues = preprocessingData.GetValues<string>(i).GroupBy(x => x).Count();
     253        else if (preprocessingData.VariableHasType<DateTime>(i))
     254          distinctValues = preprocessingData.GetValues<DateTime>(i).GroupBy(x => x).Count();
     255
     256        if (distinctValues <= MAX_DISTINCT_VALUES_FOR_GROUPING)
     257          variableNames.Add(preprocessingData.GetVariableName(i));
     258      }
     259      return variableNames;
    269260    }
    270261  }
  • branches/DataPreprocessing Enhancements/HeuristicLab.DataPreprocessing/3.4/Content/ScatterPlotContent.cs

    r14525 r14724  
    2525using HeuristicLab.Analysis;
    2626using HeuristicLab.Common;
    27 using HeuristicLab.Problems.DataAnalysis;
    2827using HeuristicLab.Visualization.ChartControlsExtensions;
    2928
     
    5554          scatterPlot.VisualProperties.XAxisMinimumFixedValue = axisMin;
    5655          scatterPlot.VisualProperties.XAxisMaximumFixedValue = axisMax;
    57         } catch (ArgumentOutOfRangeException) { } // error during CalculateOptimalAxisInterval
     56        }
     57        catch (ArgumentOutOfRangeException) { } // error during CalculateOptimalAxisInterval
    5858        try {
    5959          double axisMin, axisMax, axisInterval;
     
    6363          scatterPlot.VisualProperties.YAxisMinimumFixedValue = axisMin;
    6464          scatterPlot.VisualProperties.YAxisMaximumFixedValue = axisMax;
    65         } catch (ArgumentOutOfRangeException) { } // error during CalculateOptimalAxisInterval
     65        }
     66        catch (ArgumentOutOfRangeException) { } // error during CalculateOptimalAxisInterval
    6667      }
    6768
    68       if (variableNameGroup == null || variableNameGroup == "-") {
     69
     70      //No Grouping
     71      if (string.IsNullOrEmpty(variableNameGroup) || variableNameGroup == "-") {
    6972        ScatterPlotDataRow scdr = new ScatterPlotDataRow(variableNameX + " - " + variableNameY, "", validPoints);
    7073        scdr.VisualProperties.IsVisibleInLegend = false;
    7174        scatterPlot.Rows.Add(scdr);
    72       } else {
    73         var groupValues = PreprocessingData.GetValues<double>(PreprocessingData.GetColumnIndex(variableNameGroup));
    74         var data = points.Zip(groupValues, (p, g) => new { p, g })
    75           .Where(x => !double.IsNaN(x.p.X) && !double.IsNaN(x.p.Y) && !double.IsInfinity(x.p.X) && !double.IsInfinity(x.p.Y))
    76           .ToList();
     75        return scatterPlot;
     76      }
    7777
    78         foreach (var groupValue in groupValues.Distinct().OrderBy(g => g)) {
    79           var values = data.Where(x => x.g == groupValue || (double.IsNaN(x.g) && double.IsNaN(groupValue))).Select(v => v.p);
    80           var row = new ScatterPlotDataRow(string.Format("{0} ({1})", variableNameGroup, groupValue), "", values) {
    81             Name = groupValue.ToString("R"),
    82             VisualProperties = { PointSize = 6 }
    83           };
    84           scatterPlot.Rows.Add(row);
    85         }
     78      //Grouping
     79      int groupVariableIndex = PreprocessingData.GetColumnIndex(variableNameGroup);
     80      var groupingValues = Enumerable.Empty<string>();
     81
     82      if (PreprocessingData.VariableHasType<double>(groupVariableIndex)) {
     83        groupingValues = PreprocessingData.GetValues<double>(groupVariableIndex).Select(x => x.ToString());
     84      } else if (PreprocessingData.VariableHasType<string>(groupVariableIndex)) {
     85        groupingValues = PreprocessingData.GetValues<string>(groupVariableIndex);
     86      } else if (PreprocessingData.VariableHasType<DateTime>(groupVariableIndex)) {
     87        groupingValues = PreprocessingData.GetValues<DateTime>(groupVariableIndex).Select(x => x.ToString());
     88      }
     89      var groups = groupingValues.Zip(validPoints, Tuple.Create).GroupBy(t => t.Item1, t => t.Item2);
     90
     91      foreach (var group in groups) {
     92        var scdr = new ScatterPlotDataRow();
     93        scdr.Name = group.Key;
     94        scdr.VisualProperties.IsVisibleInLegend = true;
     95        scdr.VisualProperties.PointSize = 6;
     96        scdr.Points.AddRange(group);
     97        scatterPlot.Rows.Add(scdr);
    8698      }
    8799      return scatterPlot;
    88100    }
    89101
    90     public DataRow GetCorrelationRow(string variableNameX, string variableNameY) {
    91       var xValues = PreprocessingData.GetValues<double>(PreprocessingData.GetColumnIndex(variableNameX));
    92       var yValues = PreprocessingData.GetValues<double>(PreprocessingData.GetColumnIndex(variableNameY));
    93 
    94       double k, d;
    95       OnlineCalculatorError err;
    96       OnlineLinearScalingParameterCalculator.Calculate(xValues, yValues, out k, out d, out err);
    97       double p = OnlinePearsonsRCalculator.Calculate(xValues, yValues, out err);
    98 
    99       var data = new double[xValues.Count];
    100       for (int i = 0; i < xValues.Count; i++) {
    101         data[i]= k * i + d;
    102       }
    103 
    104       return new DataRow(string.Format("Correlation (R²={0})", p*p), "", data);
    105     }
    106102  }
    107103}
Note: See TracChangeset for help on using the changeset viewer.