source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionPartialDependencePlotView.cs @ 15583

Last change on this file since 15583 was 15583, checked in by swagner, 4 years ago

#2640: Updated year of copyrights in license headers

File size: 22.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.Drawing;
26using System.Globalization;
27using System.Linq;
28using System.Threading.Tasks;
29using System.Windows.Forms;
30using HeuristicLab.Common;
31using HeuristicLab.MainForm;
32using HeuristicLab.Visualization.ChartControlsExtensions;
33
34namespace HeuristicLab.Problems.DataAnalysis.Views {
35  [View("Partial Dependence Plots")]
36  [Content(typeof(IRegressionSolution))]
37  public partial class RegressionSolutionPartialDependencePlotView : DataAnalysisSolutionEvaluationView {
38    private readonly Dictionary<string, IPartialDependencePlot> partialDependencePlots;
39    private readonly Dictionary<string, DensityChart> densityCharts;
40    private readonly Dictionary<string, Panel> groupingPanels;
41    private ModifiableDataset sharedFixedVariables;
42
43    private const int Points = 200;
44    private int MaxColumns = 4;
45
46    private IEnumerable<string> VisibleVariables {
47      get {
48        foreach (ListViewItem item in variableListView.CheckedItems)
49          yield return item.Text;
50      }
51    }
52    private IEnumerable<IPartialDependencePlot> VisiblePartialDependencePlots {
53      get { return VisibleVariables.Select(v => partialDependencePlots[v]); }
54    }
55    private IEnumerable<DensityChart> VisibleDensityCharts {
56      get { return VisibleVariables.Select(v => densityCharts[v]); }
57    }
58    private IEnumerable<Panel> VisibleChartsPanels {
59      get { return VisibleVariables.Select(v => groupingPanels[v]); }
60    }
61
62    public RegressionSolutionPartialDependencePlotView() {
63      InitializeComponent();
64      partialDependencePlots = new Dictionary<string, IPartialDependencePlot>();
65      densityCharts = new Dictionary<string, DensityChart>();
66      groupingPanels = new Dictionary<string, Panel>();
67
68      limitView.Content = new DoubleLimit(0, 1);
69      limitView.Content.ValueChanged += limit_ValueChanged;
70
71      densityComboBox.SelectedIndex = 1; // select Training
72
73      // Avoid additional horizontal scrollbar
74      var vertScrollWidth = SystemInformation.VerticalScrollBarWidth;
75      scrollPanel.Padding = new Padding(0, 0, vertScrollWidth, 0);
76      scrollPanel.AutoScroll = true;
77    }
78
79    public new IRegressionSolution Content {
80      get { return (IRegressionSolution)base.Content; }
81      set { base.Content = value; }
82    }
83
84    protected override void RegisterContentEvents() {
85      base.RegisterContentEvents();
86      Content.ModelChanged += solution_ModelChanged;
87    }
88
89    protected override void DeregisterContentEvents() {
90      Content.ModelChanged -= solution_ModelChanged;
91      base.DeregisterContentEvents();
92    }
93
94    protected override void OnContentChanged() {
95      base.OnContentChanged();
96      if (Content == null) return;
97      var problemData = Content.ProblemData;
98
99      // Init Y-axis range
100      double min = double.MaxValue, max = double.MinValue;
101      var trainingTarget = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices);
102      foreach (var t in trainingTarget) {
103        if (t < min) min = t;
104        if (t > max) max = t;
105      }
106      double range = max - min;
107      const double scale = 1.0 / 3.0;
108      double axisMin, axisMax, axisInterval;
109      ChartUtil.CalculateAxisInterval(min - scale * range, max + scale * range, 5, out axisMin, out axisMax, out axisInterval);
110      automaticYAxisCheckBox.Checked = false;
111      limitView.ReadOnly = false;
112      limitView.Content.Lower = axisMin;
113      limitView.Content.Upper = axisMax;
114
115      // create dataset of problemData input variables and model input variables
116      // necessary workaround to have the variables in the occuring order
117      var inputvariables =
118        new HashSet<string>(Content.ProblemData.AllowedInputVariables.Union(Content.Model.VariablesUsedForPrediction));
119      var allowedInputVariables =
120        Content.ProblemData.Dataset.VariableNames.Where(v => inputvariables.Contains(v)).ToList();
121
122
123      var doubleVariables = allowedInputVariables.Where(problemData.Dataset.VariableHasType<double>);
124      var doubleVariableValues = (IEnumerable<IList>)doubleVariables.Select(x => new List<double> { problemData.Dataset.GetDoubleValues(x, problemData.TrainingIndices).Median() });
125
126      var factorVariables = allowedInputVariables.Where(problemData.Dataset.VariableHasType<string>);
127      var factorVariableValues = (IEnumerable<IList>)factorVariables.Select(x => new List<string> {
128        problemData.Dataset.GetStringValues(x, problemData.TrainingIndices)
129        .GroupBy(val => val).OrderByDescending(g => g.Count()).First().Key // most frequent value
130      });
131
132      if (sharedFixedVariables != null)
133        sharedFixedVariables.ItemChanged -= SharedFixedVariables_ItemChanged;
134
135      sharedFixedVariables = new ModifiableDataset(doubleVariables.Concat(factorVariables), doubleVariableValues.Concat(factorVariableValues));
136
137
138      // create controls
139      partialDependencePlots.Clear();
140      densityCharts.Clear();
141      groupingPanels.Clear();
142      foreach (var variableName in doubleVariables) {
143        var plot = CreatePartialDependencePlot(variableName, sharedFixedVariables);
144        partialDependencePlots.Add(variableName, plot);
145
146        var densityChart = new DensityChart() {
147          Anchor = AnchorStyles.Left | AnchorStyles.Top | AnchorStyles.Right,
148          Margin = Padding.Empty,
149          Height = 12,
150          Visible = false,
151          Top = (int)(plot.Height * 0.1),
152        };
153        densityCharts.Add(variableName, densityChart);
154
155        plot.ZoomChanged += (o, e) => {
156          var pdp = (PartialDependencePlot)o;
157          var density = densityCharts[pdp.FreeVariable];
158          density.Visible = densityComboBox.SelectedIndex != 0 && !pdp.IsZoomed;
159          if (density.Visible)
160            UpdateDensityChart(density, pdp.FreeVariable);
161        };
162        plot.SizeChanged += (o, e) => {
163          var pdp = (PartialDependencePlot)o;
164          var density = densityCharts[pdp.FreeVariable];
165          density.Top = (int)(pdp.Height * 0.1);
166        };
167
168        // Initially, the inner plot areas are not initialized for hidden charts (scollpanel, ...)
169        // This event handler listens for the paint event once (where everything is already initialized) to do some manual layouting.
170        plot.ChartPostPaint += OnPartialDependencePlotPostPaint;
171
172        var panel = new Panel() {
173          Dock = DockStyle.Fill,
174          Margin = Padding.Empty,
175          BackColor = Color.White
176        };
177
178        panel.Controls.Add(densityChart);
179        panel.Controls.Add(plot);
180        groupingPanels.Add(variableName, panel);
181      }
182      foreach (var variableName in factorVariables) {
183        var plot = CreateFactorPartialDependencePlot(variableName, sharedFixedVariables);
184        partialDependencePlots.Add(variableName, plot);
185
186        var densityChart = new DensityChart() {
187          Anchor = AnchorStyles.Left | AnchorStyles.Top | AnchorStyles.Right,
188          Margin = Padding.Empty,
189          Height = 12,
190          Visible = false,
191          Top = (int)(plot.Height * 0.1),
192        };
193        densityCharts.Add(variableName, densityChart);
194        plot.ZoomChanged += (o, e) => {
195          var pdp = (FactorPartialDependencePlot)o;
196          var density = densityCharts[pdp.FreeVariable];
197          density.Visible = densityComboBox.SelectedIndex != 0 && !pdp.IsZoomed;
198          if (density.Visible)
199            UpdateDensityChart(density, pdp.FreeVariable);
200        };
201        plot.SizeChanged += (o, e) => {
202          var pdp = (FactorPartialDependencePlot)o;
203          var density = densityCharts[pdp.FreeVariable];
204          density.Top = (int)(pdp.Height * 0.1);
205        };
206
207        // Initially, the inner plot areas are not initialized for hidden charts (scollpanel, ...)
208        // This event handler listens for the paint event once (where everything is already initialized) to do some manual layouting.
209        plot.ChartPostPaint += OnFactorPartialDependencePlotPostPaint;
210
211        var panel = new Panel() {
212          Dock = DockStyle.Fill,
213          Margin = Padding.Empty,
214          BackColor = Color.White
215        };
216
217        panel.Controls.Add(densityChart);
218        panel.Controls.Add(plot);
219        groupingPanels.Add(variableName, panel);
220      }
221      // update variable list
222      variableListView.ItemChecked -= variableListView_ItemChecked;
223      variableListView.Items.Clear();
224      foreach (var variable in allowedInputVariables)
225        variableListView.Items.Add(key: variable, text: variable, imageIndex: 0);
226
227      foreach (var variable in Content.Model.VariablesUsedForPrediction)
228        variableListView.Items[variable].Checked = true;
229      variableListView.ItemChecked += variableListView_ItemChecked;
230
231      sharedFixedVariables.ItemChanged += SharedFixedVariables_ItemChanged;
232
233      RecalculateAndRelayoutCharts();
234    }
235
236    private void SharedFixedVariables_ItemChanged(object sender, EventArgs<int, int> e) {
237      double yValue = Content.Model.GetEstimatedValues(sharedFixedVariables, new[] { 0 }).Single();
238      string title = Content.ProblemData.TargetVariable + ": " + yValue.ToString("G5", CultureInfo.CurrentCulture);
239      foreach (var chart in partialDependencePlots.Values) {
240        if (!string.IsNullOrEmpty(chart.YAxisTitle)) { // only show title for first column in grid
241          chart.YAxisTitle = title;
242        }
243      }
244    }
245
246
247    private void OnPartialDependencePlotPostPaint(object o, EventArgs e) {
248      var plot = (PartialDependencePlot)o;
249      var density = densityCharts[plot.FreeVariable];
250
251      density.Width = plot.Width;
252
253      var gcPlotPosition = plot.InnerPlotPosition;
254      density.Left = (int)(gcPlotPosition.X / 100.0 * plot.Width);
255      density.Width = (int)(gcPlotPosition.Width / 100.0 * plot.Width);
256      plot.UpdateTitlePosition();
257
258      // removed after succesful layouting due to performance reasons
259      if (gcPlotPosition.Width != 0)
260        plot.ChartPostPaint -= OnPartialDependencePlotPostPaint;
261    }
262
263    private void OnFactorPartialDependencePlotPostPaint(object o, EventArgs e) {
264      var plot = (FactorPartialDependencePlot)o;
265      var density = densityCharts[plot.FreeVariable];
266
267      density.Width = plot.Width;
268
269      var gcPlotPosition = plot.InnerPlotPosition;
270      density.Left = (int)(gcPlotPosition.X / 100.0 * plot.Width);
271      density.Width = (int)(gcPlotPosition.Width / 100.0 * plot.Width);
272      plot.UpdateTitlePosition();
273
274      // removed after succesful layouting due to performance reasons
275      if (gcPlotPosition.Width != 0)
276        plot.ChartPostPaint -= OnFactorPartialDependencePlotPostPaint;
277    }
278
279    private async void RecalculateAndRelayoutCharts() {
280      foreach (var variable in VisibleVariables) {
281        var plot = partialDependencePlots[variable];
282        await plot.RecalculateAsync(false, false);
283      }
284      partialDependencePlotTableLayout.SuspendLayout();
285      SetupYAxis();
286      ReOrderControls();
287      SetStyles();
288      partialDependencePlotTableLayout.ResumeLayout();
289      partialDependencePlotTableLayout.Refresh();
290      foreach (var variable in VisibleVariables) {
291        DensityChart densityChart;
292        if (densityCharts.TryGetValue(variable, out densityChart)) {
293          UpdateDensityChart(densityChart, variable);
294        }
295      }
296    }
297    private PartialDependencePlot CreatePartialDependencePlot(string variableName, ModifiableDataset sharedFixedVariables) {
298      var plot = new PartialDependencePlot {
299        Dock = DockStyle.Fill,
300        Margin = Padding.Empty,
301        ShowLegend = false,
302        ShowCursor = true,
303        ShowConfigButton = false,
304        YAxisTicks = 5,
305      };
306      plot.VariableValueChanged += async (o, e) => {
307        var recalculations = VisiblePartialDependencePlots
308          .Except(new[] { (IPartialDependencePlot)o })
309          .Select(async chart => {
310            await chart.RecalculateAsync(updateOnFinish: false, resetYAxis: false);
311          }).ToList();
312        await Task.WhenAll(recalculations);
313
314        if (recalculations.All(t => t.IsCompleted))
315          SetupYAxis();
316      };
317      plot.Configure(new[] { Content }, sharedFixedVariables, variableName, Points);
318      plot.SolutionAdded += partialDependencePlot_SolutionAdded;
319      plot.SolutionRemoved += partialDependencePlot_SolutionRemoved;
320      return plot;
321    }
322    private FactorPartialDependencePlot CreateFactorPartialDependencePlot(string variableName, ModifiableDataset sharedFixedVariables) {
323      var plot = new FactorPartialDependencePlot {
324        Dock = DockStyle.Fill,
325        Margin = Padding.Empty,
326        ShowLegend = false,
327        ShowCursor = true,
328        YAxisTicks = 5,
329      };
330      plot.VariableValueChanged += async (o, e) => {
331        var recalculations = VisiblePartialDependencePlots
332          .Except(new[] { (FactorPartialDependencePlot)o })
333          .Select(async chart => {
334            await chart.RecalculateAsync(updateOnFinish: false, resetYAxis: false);
335          }).ToList();
336        await Task.WhenAll(recalculations);
337
338        if (recalculations.All(t => t.IsCompleted))
339          SetupYAxis();
340      };
341      var variableValues = Content.ProblemData.Dataset.GetStringValues(variableName).Distinct().OrderBy(n => n).ToList();
342      plot.Configure(new[] { Content }, sharedFixedVariables, variableName, variableValues);
343      plot.SolutionAdded += partialDependencePlot_SolutionAdded;
344      plot.SolutionRemoved += partialDependencePlot_SolutionRemoved;
345      return plot;
346    }
347    private void SetupYAxis() {
348      double axisMin, axisMax;
349      if (automaticYAxisCheckBox.Checked) {
350        double min = double.MaxValue, max = double.MinValue;
351        foreach (var chart in VisiblePartialDependencePlots) {
352          if (chart.YMin < min) min = chart.YMin;
353          if (chart.YMax > max) max = chart.YMax;
354        }
355
356        double axisInterval;
357        ChartUtil.CalculateAxisInterval(min, max, 5, out axisMin, out axisMax, out axisInterval);
358      } else {
359        axisMin = limitView.Content.Lower;
360        axisMax = limitView.Content.Upper;
361      }
362
363      foreach (var chart in VisiblePartialDependencePlots) {
364        chart.FixedYAxisMin = axisMin;
365        chart.FixedYAxisMax = axisMax;
366      }
367    }
368
369    // reorder chart controls so that they always appear in the same order as in the list view
370    // the table layout containing the controls should be suspended before calling this method
371    private void ReOrderControls() {
372      var tl = partialDependencePlotTableLayout;
373      tl.Controls.Clear();
374      int row = 0, column = 0;
375      double yValue = Content.Model.GetEstimatedValues(sharedFixedVariables, new[] { 0 }).Single();
376      string title = Content.ProblemData.TargetVariable + ": " + yValue.ToString("G5", CultureInfo.CurrentCulture);
377
378      foreach (var v in VisibleVariables) {
379        var chartsPanel = groupingPanels[v];
380        tl.Controls.Add(chartsPanel, column, row);
381
382        var chart = partialDependencePlots[v];
383        chart.YAxisTitle = column == 0 ? title : string.Empty;
384        column++;
385
386        if (column == MaxColumns) {
387          row++;
388          column = 0;
389        }
390      }
391    }
392
393    private void SetStyles() {
394      var tl = partialDependencePlotTableLayout;
395      tl.RowStyles.Clear();
396      tl.ColumnStyles.Clear();
397      int numVariables = VisibleVariables.Count();
398      if (numVariables == 0)
399        return;
400
401      // set column styles
402      tl.ColumnCount = Math.Min(numVariables, MaxColumns);
403      for (int c = 0; c < tl.ColumnCount; c++)
404        tl.ColumnStyles.Add(new ColumnStyle(SizeType.Percent, 100.0f / tl.ColumnCount));
405
406      // set row styles
407      tl.RowCount = (int)Math.Ceiling((double)numVariables / tl.ColumnCount);
408      var columnWidth = tl.Width / tl.ColumnCount; // assume all columns have the same width
409      var rowHeight = (int)(0.8 * columnWidth);
410      for (int r = 0; r < tl.RowCount; r++)
411        tl.RowStyles.Add(new RowStyle(SizeType.Absolute, rowHeight));
412    }
413
414    private async void partialDependencePlot_SolutionAdded(object sender, EventArgs<IRegressionSolution> e) {
415      var solution = e.Value;
416      foreach (var chart in partialDependencePlots.Values) {
417        if (sender == chart) continue;
418        await chart.AddSolutionAsync(solution);
419      }
420    }
421
422    private async void partialDependencePlot_SolutionRemoved(object sender, EventArgs<IRegressionSolution> e) {
423      var solution = e.Value;
424      foreach (var chart in partialDependencePlots.Values) {
425        if (sender == chart) continue;
426        await chart.RemoveSolutionAsync(solution);
427      }
428    }
429
430    private async void variableListView_ItemChecked(object sender, ItemCheckedEventArgs e) {
431      var item = e.Item;
432      var variable = item.Text;
433      var plot = partialDependencePlots[variable];
434      var chartsPanel = groupingPanels[variable];
435      var tl = partialDependencePlotTableLayout;
436
437      tl.SuspendLayout();
438      if (item.Checked) {
439        tl.Controls.Add(chartsPanel);
440        await plot.RecalculateAsync(false, false);
441      } else {
442        tl.Controls.Remove(chartsPanel);
443      }
444
445      if (tl.Controls.Count > 0) {
446        SetupYAxis();
447        ReOrderControls();
448        SetStyles();
449      }
450      tl.ResumeLayout();
451      tl.Refresh();
452      densityComboBox_SelectedIndexChanged(this, EventArgs.Empty);
453    }
454
455    private void automaticYAxisCheckBox_CheckedChanged(object sender, EventArgs e) {
456      limitView.ReadOnly = automaticYAxisCheckBox.Checked;
457      SetupYAxis();
458      partialDependencePlotTableLayout.Refresh();
459      densityComboBox_SelectedIndexChanged(this, EventArgs.Empty); // necessary to realign the density plots
460    }
461
462    private void limit_ValueChanged(object sender, EventArgs e) {
463      if (automaticYAxisCheckBox.Checked)
464        return;
465      SetupYAxis();
466      partialDependencePlotTableLayout.Refresh();
467      densityComboBox_SelectedIndexChanged(this, EventArgs.Empty); // necessary to realign the density plots
468    }
469
470    private void densityComboBox_SelectedIndexChanged(object sender, EventArgs e) {
471      if (Content == null)
472        return;
473
474      int si = densityComboBox.SelectedIndex;
475      if (si == 0) {
476        foreach (var densityChart in densityCharts.Values)
477          densityChart.Visible = false;
478      } else {
479        var indices = GetDensityIndices(si).ToList();
480
481        foreach (var entry in densityCharts) {
482          var variableName = entry.Key;
483          var densityChart = entry.Value;
484          if (!VisibleVariables.Contains(variableName) || partialDependencePlots[variableName].IsZoomed)
485            continue;
486
487          UpdateDensityChart(densityChart, variableName, indices);
488        }
489      }
490    }
491    private IEnumerable<int> GetDensityIndices(int selectedIndex) {
492      var problemData = Content.ProblemData;
493      return
494        selectedIndex == 1 ? problemData.TrainingIndices :
495        selectedIndex == 2 ? problemData.TestIndices :
496        problemData.AllIndices;
497    }
498    private void UpdateDensityChart(DensityChart densityChart, string variable, IList<int> indices = null) {
499      if (densityComboBox.SelectedIndex == 0)
500        return;
501      if (indices == null) {
502        indices = GetDensityIndices(densityComboBox.SelectedIndex).ToList();
503      }
504      if (Content.ProblemData.Dataset.VariableHasType<double>(variable)) {
505        var data = Content.ProblemData.Dataset.GetDoubleValues(variable, indices).ToList();
506        var plot = partialDependencePlots[variable] as PartialDependencePlot;
507        if (plot != null) {
508          var min = plot.FixedXAxisMin;
509          var max = plot.FixedXAxisMax;
510          var buckets = plot.DrawingSteps;
511          if (min.HasValue && max.HasValue) {
512            densityChart.UpdateChart(data, min.Value, max.Value, buckets);
513            densityChart.Width = plot.Width;
514
515            var gcPlotPosition = plot.InnerPlotPosition;
516            densityChart.Left = (int)(gcPlotPosition.X / 100.0 * plot.Width);
517            densityChart.Width = (int)(gcPlotPosition.Width / 100.0 * plot.Width);
518
519            densityChart.Visible = true;
520          }
521          plot.UpdateTitlePosition();
522        }
523      } else if (Content.ProblemData.Dataset.VariableHasType<string>(variable)) {
524        var data = Content.ProblemData.Dataset.GetStringValues(variable).ToList();
525        var plot = partialDependencePlots[variable] as FactorPartialDependencePlot;
526        if (plot != null) {
527          densityChart.UpdateChart(data);
528          densityChart.Width = plot.Width;
529
530          var gcPlotPosition = plot.InnerPlotPosition;
531          densityChart.Left = (int)(gcPlotPosition.X / 100.0 * plot.Width);
532          densityChart.Width = (int)(gcPlotPosition.Width / 100.0 * plot.Width);
533
534          densityChart.Visible = true;
535
536          plot.UpdateTitlePosition();
537        }
538      }
539    }
540
541    private void columnsNumericUpDown_ValueChanged(object sender, EventArgs e) {
542      MaxColumns = (int)columnsNumericUpDown.Value;
543      int columns = Math.Min(VisibleVariables.Count(), MaxColumns);
544      if (columns > 0) {
545        var tl = partialDependencePlotTableLayout;
546        MaxColumns = columns;
547        tl.SuspendLayout();
548        ReOrderControls();
549        SetStyles();
550        tl.ResumeLayout();
551        tl.Refresh();
552        densityComboBox_SelectedIndexChanged(this, EventArgs.Empty);
553      }
554    }
555
556    private async void solution_ModelChanged(object sender, EventArgs e) {
557      foreach (var variable in VisibleVariables) {
558        var pdp = partialDependencePlots[variable];
559        var densityChart = densityCharts[variable];
560        // recalculate and refresh
561        await pdp.RecalculateAsync(false, false);
562        pdp.Refresh();
563        UpdateDensityChart(densityChart, variable);
564      }
565    }
566  }
567}
Note: See TracBrowser for help on using the repository browser.