#region License Information /* HeuristicLab * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections; using System.Collections.Generic; using System.Drawing; using System.Globalization; using System.Linq; using System.Threading.Tasks; using System.Windows.Forms; using HeuristicLab.Common; using HeuristicLab.MainForm; using HeuristicLab.Visualization.ChartControlsExtensions; namespace HeuristicLab.Problems.DataAnalysis.Views { [View("Partial Dependence Plots")] [Content(typeof(IRegressionSolution))] public partial class RegressionSolutionPartialDependencePlotView : DataAnalysisSolutionEvaluationView { private readonly Dictionary partialDependencePlots; private readonly Dictionary densityCharts; private readonly Dictionary groupingPanels; private ModifiableDataset sharedFixedVariables; private const int Points = 200; private int MaxColumns = 4; private IEnumerable VisibleVariables { get { foreach (ListViewItem item in variableListView.CheckedItems) yield return item.Text; } } private IEnumerable VisiblePartialDependencePlots { get { return VisibleVariables.Select(v => partialDependencePlots[v]); } } private IEnumerable VisibleDensityCharts { get { return VisibleVariables.Select(v => densityCharts[v]); } } private IEnumerable VisibleChartsPanels { get { return VisibleVariables.Select(v => groupingPanels[v]); } } public RegressionSolutionPartialDependencePlotView() { InitializeComponent(); partialDependencePlots = new Dictionary(); densityCharts = new Dictionary(); groupingPanels = new Dictionary(); limitView.Content = new DoubleLimit(0, 1); limitView.Content.ValueChanged += limit_ValueChanged; densityComboBox.SelectedIndex = 1; // select Training // Avoid additional horizontal scrollbar var vertScrollWidth = SystemInformation.VerticalScrollBarWidth; scrollPanel.Padding = new Padding(0, 0, vertScrollWidth, 0); scrollPanel.AutoScroll = true; } public new IRegressionSolution Content { get { return (IRegressionSolution)base.Content; } set { base.Content = value; } } protected override void RegisterContentEvents() { base.RegisterContentEvents(); Content.ModelChanged += solution_ModelChanged; } protected override void DeregisterContentEvents() { Content.ModelChanged -= solution_ModelChanged; base.DeregisterContentEvents(); } protected override void OnContentChanged() { base.OnContentChanged(); if (Content == null) return; var problemData = Content.ProblemData; if (sharedFixedVariables != null) { sharedFixedVariables.ItemChanged -= SharedFixedVariables_ItemChanged; sharedFixedVariables.Reset -= SharedFixedVariables_Reset; } // Init Y-axis range double min = double.MaxValue, max = double.MinValue; var trainingTarget = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); foreach (var t in trainingTarget) { if (t < min) min = t; if (t > max) max = t; } double range = max - min; const double scale = 1.0 / 3.0; double axisMin, axisMax, axisInterval; ChartUtil.CalculateAxisInterval(min - scale * range, max + scale * range, 5, out axisMin, out axisMax, out axisInterval); automaticYAxisCheckBox.Checked = false; limitView.ReadOnly = false; limitView.Content.Lower = axisMin; limitView.Content.Upper = axisMax; // create dataset of problemData input variables and model input variables // necessary workaround to have the variables in the occurring order var inputvariables = new HashSet(Content.ProblemData.AllowedInputVariables.Union(Content.Model.VariablesUsedForPrediction)); var allowedInputVariables = Content.ProblemData.Dataset.VariableNames.Where(v => inputvariables.Contains(v)).ToList(); var doubleVariables = allowedInputVariables.Where(problemData.Dataset.VariableHasType); var doubleVariableValues = (IEnumerable)doubleVariables.Select(x => new List { problemData.Dataset.GetDoubleValue(x, 0) }); var factorVariables = allowedInputVariables.Where(problemData.Dataset.VariableHasType); var factorVariableValues = (IEnumerable)factorVariables.Select(x => new List { problemData.Dataset.GetStringValue(x, 0) }); sharedFixedVariables = new ModifiableDataset(doubleVariables.Concat(factorVariables), doubleVariableValues.Concat(factorVariableValues)); variableValuesModeComboBox.SelectedItem = "Median"; // triggers UpdateVariableValue and changes shardFixedVariables // create controls partialDependencePlots.Clear(); densityCharts.Clear(); groupingPanels.Clear(); foreach (var variableName in doubleVariables) { var plot = CreatePartialDependencePlot(variableName, sharedFixedVariables); partialDependencePlots.Add(variableName, plot); var densityChart = new DensityChart() { Anchor = AnchorStyles.Left | AnchorStyles.Top | AnchorStyles.Right, Margin = Padding.Empty, Height = 12, Visible = false, Top = (int)(plot.Height * 0.1), }; densityCharts.Add(variableName, densityChart); plot.ZoomChanged += (o, e) => { var pdp = (PartialDependencePlot)o; var density = densityCharts[pdp.FreeVariable]; density.Visible = densityComboBox.SelectedIndex != 0 && !pdp.IsZoomed; if (density.Visible) UpdateDensityChart(density, pdp.FreeVariable); }; plot.SizeChanged += (o, e) => { var pdp = (PartialDependencePlot)o; var density = densityCharts[pdp.FreeVariable]; density.Top = (int)(pdp.Height * 0.1); }; // Initially, the inner plot areas are not initialized for hidden charts (scrollpanel, ...) // This event handler listens for the paint event once (where everything is already initialized) to do some manual layouting. plot.ChartPostPaint += OnPartialDependencePlotPostPaint; var panel = new Panel() { Dock = DockStyle.Fill, Margin = Padding.Empty, BackColor = Color.White }; panel.Controls.Add(densityChart); panel.Controls.Add(plot); groupingPanels.Add(variableName, panel); } foreach (var variableName in factorVariables) { var plot = CreateFactorPartialDependencePlot(variableName, sharedFixedVariables); partialDependencePlots.Add(variableName, plot); var densityChart = new DensityChart() { Anchor = AnchorStyles.Left | AnchorStyles.Top | AnchorStyles.Right, Margin = Padding.Empty, Height = 12, Visible = false, Top = (int)(plot.Height * 0.1), }; densityCharts.Add(variableName, densityChart); plot.ZoomChanged += (o, e) => { var pdp = (FactorPartialDependencePlot)o; var density = densityCharts[pdp.FreeVariable]; density.Visible = densityComboBox.SelectedIndex != 0 && !pdp.IsZoomed; if (density.Visible) UpdateDensityChart(density, pdp.FreeVariable); }; plot.SizeChanged += (o, e) => { var pdp = (FactorPartialDependencePlot)o; var density = densityCharts[pdp.FreeVariable]; density.Top = (int)(pdp.Height * 0.1); }; // Initially, the inner plot areas are not initialized for hidden charts (scrollpanel, ...) // This event handler listens for the paint event once (where everything is already initialized) to do some manual layouting. plot.ChartPostPaint += OnFactorPartialDependencePlotPostPaint; var panel = new Panel() { Dock = DockStyle.Fill, Margin = Padding.Empty, BackColor = Color.White }; panel.Controls.Add(densityChart); panel.Controls.Add(plot); groupingPanels.Add(variableName, panel); } // update variable list variableListView.ItemChecked -= variableListView_ItemChecked; variableListView.Items.Clear(); foreach (var variable in allowedInputVariables) variableListView.Items.Add(key: variable, text: variable, imageIndex: 0); foreach (var variable in Content.Model.VariablesUsedForPrediction) variableListView.Items[variable].Checked = true; variableListView.ItemChecked += variableListView_ItemChecked; sharedFixedVariables.ItemChanged += SharedFixedVariables_ItemChanged; sharedFixedVariables.Reset += SharedFixedVariables_Reset; rowNrNumericUpDown.Maximum = Content.ProblemData.Dataset.Rows - 1; RecalculateAndRelayoutCharts(); } public async Task AddSolutionAsync(IRegressionSolution solution) { foreach (var chart in partialDependencePlots.Values) { await chart.AddSolutionAsync(solution); } } private void SharedFixedVariables_ItemChanged(object sender, EventArgs e) { SharedFixedVariablesChanged(); } private void SharedFixedVariables_Reset(object sender, EventArgs e) { SharedFixedVariablesChanged(); } private void SharedFixedVariablesChanged() { if (!setVariableValues) // set mode to "nothing" if change was not initiated from a "mode change" variableValuesModeComboBox.SelectedIndex = -1; double yValue = Content.Model.GetEstimatedValues(sharedFixedVariables, new[] { 0 }).Single(); string title = Content.ProblemData.TargetVariable + ": " + yValue.ToString("G5", CultureInfo.CurrentCulture); foreach (var chart in partialDependencePlots.Values) { if (!string.IsNullOrEmpty(chart.YAxisTitle)) { // only show title for first column in grid chart.YAxisTitle = title; } } } private void OnPartialDependencePlotPostPaint(object o, EventArgs e) { var plot = (PartialDependencePlot)o; var density = densityCharts[plot.FreeVariable]; density.Width = plot.Width; var gcPlotPosition = plot.InnerPlotPosition; density.Left = (int)(gcPlotPosition.X / 100.0 * plot.Width); density.Width = (int)(gcPlotPosition.Width / 100.0 * plot.Width); plot.UpdateTitlePosition(); // removed after succesful layouting due to performance reasons if (gcPlotPosition.Width != 0) plot.ChartPostPaint -= OnPartialDependencePlotPostPaint; } private void OnFactorPartialDependencePlotPostPaint(object o, EventArgs e) { var plot = (FactorPartialDependencePlot)o; var density = densityCharts[plot.FreeVariable]; density.Width = plot.Width; var gcPlotPosition = plot.InnerPlotPosition; density.Left = (int)(gcPlotPosition.X / 100.0 * plot.Width); density.Width = (int)(gcPlotPosition.Width / 100.0 * plot.Width); plot.UpdateTitlePosition(); // removed after succesful layouting due to performance reasons if (gcPlotPosition.Width != 0) plot.ChartPostPaint -= OnFactorPartialDependencePlotPostPaint; } private async void RecalculateAndRelayoutCharts() { foreach (var variable in VisibleVariables) { var plot = partialDependencePlots[variable]; await plot.RecalculateAsync(false, false); } partialDependencePlotTableLayout.SuspendLayout(); SetupYAxis(); ReOrderControls(); SetStyles(); partialDependencePlotTableLayout.ResumeLayout(); partialDependencePlotTableLayout.Refresh(); foreach (var variable in VisibleVariables) { DensityChart densityChart; if (densityCharts.TryGetValue(variable, out densityChart)) { UpdateDensityChart(densityChart, variable); } } } private PartialDependencePlot CreatePartialDependencePlot(string variableName, ModifiableDataset sharedFixedVariables) { var plot = new PartialDependencePlot { Dock = DockStyle.Fill, Margin = Padding.Empty, ShowLegend = false, ShowCursor = true, ShowConfigButton = false, YAxisTicks = 5, }; plot.VariableValueChanged += async (o, e) => { var recalculations = VisiblePartialDependencePlots .Except(new[] { (IPartialDependencePlot)o }) .Select(async chart => { await chart.RecalculateAsync(updateOnFinish: false, resetYAxis: false); }).ToList(); await Task.WhenAll(recalculations); if (recalculations.All(t => t.IsCompleted)) SetupYAxis(); }; plot.Configure(new[] { Content }, sharedFixedVariables, variableName, Points); plot.SolutionAdded += partialDependencePlot_SolutionAdded; plot.SolutionRemoved += partialDependencePlot_SolutionRemoved; return plot; } private FactorPartialDependencePlot CreateFactorPartialDependencePlot(string variableName, ModifiableDataset sharedFixedVariables) { var plot = new FactorPartialDependencePlot { Dock = DockStyle.Fill, Margin = Padding.Empty, ShowLegend = false, ShowCursor = true, YAxisTicks = 5, }; plot.VariableValueChanged += async (o, e) => { var recalculations = VisiblePartialDependencePlots .Except(new[] { (FactorPartialDependencePlot)o }) .Select(async chart => { await chart.RecalculateAsync(updateOnFinish: false, resetYAxis: false); }).ToList(); await Task.WhenAll(recalculations); if (recalculations.All(t => t.IsCompleted)) SetupYAxis(); }; var variableValues = Content.ProblemData.Dataset.GetStringValues(variableName).Distinct().OrderBy(n => n).ToList(); plot.Configure(new[] { Content }, sharedFixedVariables, variableName, variableValues); plot.SolutionAdded += partialDependencePlot_SolutionAdded; plot.SolutionRemoved += partialDependencePlot_SolutionRemoved; return plot; } private void SetupYAxis() { double axisMin, axisMax; if (automaticYAxisCheckBox.Checked) { double min = double.MaxValue, max = double.MinValue; foreach (var chart in VisiblePartialDependencePlots) { if (chart.YMin < min) min = chart.YMin; if (chart.YMax > max) max = chart.YMax; } double axisInterval; ChartUtil.CalculateAxisInterval(min, max, 5, out axisMin, out axisMax, out axisInterval); } else { axisMin = limitView.Content.Lower; axisMax = limitView.Content.Upper; } foreach (var chart in VisiblePartialDependencePlots) { chart.FixedYAxisMin = axisMin; chart.FixedYAxisMax = axisMax; } } // reorder chart controls so that they always appear in the same order as in the list view // the table layout containing the controls should be suspended before calling this method private void ReOrderControls() { var tl = partialDependencePlotTableLayout; tl.Controls.Clear(); int row = 0, column = 0; double yValue = Content.Model.GetEstimatedValues(sharedFixedVariables, new[] { 0 }).Single(); string title = Content.ProblemData.TargetVariable + ": " + yValue.ToString("G5", CultureInfo.CurrentCulture); foreach (var v in VisibleVariables) { var chartsPanel = groupingPanels[v]; tl.Controls.Add(chartsPanel, column, row); var chart = partialDependencePlots[v]; chart.YAxisTitle = column == 0 ? title : string.Empty; column++; if (column == MaxColumns) { row++; column = 0; } } } private void SetStyles() { var tl = partialDependencePlotTableLayout; tl.RowStyles.Clear(); tl.ColumnStyles.Clear(); int numVariables = VisibleVariables.Count(); if (numVariables == 0) return; // set column styles tl.ColumnCount = Math.Min(numVariables, MaxColumns); for (int c = 0; c < tl.ColumnCount; c++) tl.ColumnStyles.Add(new ColumnStyle(SizeType.Percent, 100.0f / tl.ColumnCount)); // set row styles tl.RowCount = (int)Math.Ceiling((double)numVariables / tl.ColumnCount); var columnWidth = tl.Width / tl.ColumnCount; // assume all columns have the same width var rowHeight = (int)(0.8 * columnWidth); for (int r = 0; r < tl.RowCount; r++) tl.RowStyles.Add(new RowStyle(SizeType.Absolute, rowHeight)); } private async void partialDependencePlot_SolutionAdded(object sender, EventArgs e) { var solution = e.Value; foreach (var chart in partialDependencePlots.Values) { if (sender == chart) continue; await chart.AddSolutionAsync(solution); } } private async void partialDependencePlot_SolutionRemoved(object sender, EventArgs e) { var solution = e.Value; foreach (var chart in partialDependencePlots.Values) { if (sender == chart) continue; await chart.RemoveSolutionAsync(solution); } } private async void variableListView_ItemChecked(object sender, ItemCheckedEventArgs e) { var item = e.Item; var variable = item.Text; var plot = partialDependencePlots[variable]; var chartsPanel = groupingPanels[variable]; var tl = partialDependencePlotTableLayout; tl.SuspendLayout(); if (item.Checked) { tl.Controls.Add(chartsPanel); await plot.RecalculateAsync(false, false); } else { tl.Controls.Remove(chartsPanel); } if (tl.Controls.Count > 0) { SetupYAxis(); ReOrderControls(); SetStyles(); } tl.ResumeLayout(); tl.Refresh(); densityComboBox_SelectedIndexChanged(this, EventArgs.Empty); } private void automaticYAxisCheckBox_CheckedChanged(object sender, EventArgs e) { limitView.ReadOnly = automaticYAxisCheckBox.Checked; SetupYAxis(); partialDependencePlotTableLayout.Refresh(); densityComboBox_SelectedIndexChanged(this, EventArgs.Empty); // necessary to realign the density plots } private void limit_ValueChanged(object sender, EventArgs e) { if (automaticYAxisCheckBox.Checked) return; SetupYAxis(); partialDependencePlotTableLayout.Refresh(); densityComboBox_SelectedIndexChanged(this, EventArgs.Empty); // necessary to realign the density plots } private void densityComboBox_SelectedIndexChanged(object sender, EventArgs e) { if (Content == null) return; int si = densityComboBox.SelectedIndex; if (si == 0) { foreach (var densityChart in densityCharts.Values) densityChart.Visible = false; } else { var indices = GetDensityIndices(si).ToList(); foreach (var entry in densityCharts) { var variableName = entry.Key; var densityChart = entry.Value; if (!VisibleVariables.Contains(variableName) || partialDependencePlots[variableName].IsZoomed) continue; UpdateDensityChart(densityChart, variableName, indices); } } } private IEnumerable GetDensityIndices(int selectedIndex) { var problemData = Content.ProblemData; return selectedIndex == 1 ? problemData.TrainingIndices : selectedIndex == 2 ? problemData.TestIndices : problemData.AllIndices; } private void UpdateDensityChart(DensityChart densityChart, string variable, IList indices = null) { if (densityComboBox.SelectedIndex == 0) return; if (indices == null) { indices = GetDensityIndices(densityComboBox.SelectedIndex).ToList(); } if (Content.ProblemData.Dataset.VariableHasType(variable)) { var data = Content.ProblemData.Dataset.GetDoubleValues(variable, indices).ToList(); var plot = partialDependencePlots[variable] as PartialDependencePlot; if (plot != null) { var min = plot.FixedXAxisMin; var max = plot.FixedXAxisMax; var buckets = plot.DrawingSteps; if (min.HasValue && max.HasValue) { densityChart.UpdateChart(data, min.Value, max.Value, buckets); densityChart.Width = plot.Width; var gcPlotPosition = plot.InnerPlotPosition; densityChart.Left = (int)(gcPlotPosition.X / 100.0 * plot.Width); densityChart.Width = (int)(gcPlotPosition.Width / 100.0 * plot.Width); densityChart.Visible = true; } plot.UpdateTitlePosition(); } } else if (Content.ProblemData.Dataset.VariableHasType(variable)) { var data = Content.ProblemData.Dataset.GetStringValues(variable).ToList(); var plot = partialDependencePlots[variable] as FactorPartialDependencePlot; if (plot != null) { densityChart.UpdateChart(data); densityChart.Width = plot.Width; var gcPlotPosition = plot.InnerPlotPosition; densityChart.Left = (int)(gcPlotPosition.X / 100.0 * plot.Width); densityChart.Width = (int)(gcPlotPosition.Width / 100.0 * plot.Width); densityChart.Visible = true; plot.UpdateTitlePosition(); } } } private void columnsNumericUpDown_ValueChanged(object sender, EventArgs e) { MaxColumns = (int)columnsNumericUpDown.Value; int columns = Math.Min(VisibleVariables.Count(), MaxColumns); if (columns > 0) { var tl = partialDependencePlotTableLayout; MaxColumns = columns; tl.SuspendLayout(); ReOrderControls(); SetStyles(); tl.ResumeLayout(); tl.Refresh(); densityComboBox_SelectedIndexChanged(this, EventArgs.Empty); } } private async void solution_ModelChanged(object sender, EventArgs e) { foreach (var variable in VisibleVariables) { var pdp = partialDependencePlots[variable]; var densityChart = densityCharts[variable]; // recalculate and refresh await pdp.RecalculateAsync(false, false); pdp.Refresh(); UpdateDensityChart(densityChart, variable); } } // flag that the current change is not triggered by a manual change from within a single plot private bool setVariableValues = false; private void variableValuesComboBox_SelectedValueChanged(object sender, EventArgs e) { if (variableValuesModeComboBox.SelectedIndex == -1) return; // changed to "manual" due to manual change of a variable setVariableValues = true; UpdateVariableValues(); setVariableValues = false; } private void rowNrNumericUpDown_ValueChanged(object sender, EventArgs e) { if ((string)variableValuesModeComboBox.SelectedItem != "Row") { variableValuesModeComboBox.SelectedItem = "Row"; // triggers UpdateVariableValues } else { setVariableValues = true; UpdateVariableValues(); setVariableValues = false; } } private void UpdateVariableValues() { string mode = (string)variableValuesModeComboBox.SelectedItem; var dataset = Content.ProblemData.Dataset; object[] newRow; if (mode == "Row") { int rowNumber = (int)rowNrNumericUpDown.Value; newRow = sharedFixedVariables.VariableNames .Select(variableName => { if (dataset.DoubleVariables.Contains(variableName)) { return dataset.GetDoubleValue(variableName, rowNumber); } else if (dataset.StringVariables.Contains(variableName)) { return dataset.GetStringValue(variableName, rowNumber); } else { throw new NotSupportedException("Only double and string(factor) columns are currently supported."); } }).ToArray(); } else { newRow = sharedFixedVariables.VariableNames .Select(variableName => { if (dataset.DoubleVariables.Contains(variableName)) { var values = dataset.GetDoubleValues(variableName); return mode == "Mean" ? values.Average() : mode == "Median" ? values.Median() : mode == "Most Common" ? MostCommon(values) : throw new NotSupportedException(); } else if (dataset.StringVariables.Contains(variableName)) { var values = dataset.GetStringValues(variableName); return mode == "Mean" ? MostCommon(values) : mode == "Median" ? MostCommon(values) : mode == "Most Common" ? MostCommon(values) : throw new NotSupportedException(); } else { throw new NotSupportedException("Only double and string(factor) columns are currently supported."); } }).ToArray(); } sharedFixedVariables.ReplaceRow(0, newRow); } private static T MostCommon(IEnumerable values) { return values.GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First(); } // ToolTips cannot be shown longer than 5000ms, only by using ToolTip.Show manually // See: https://stackoverflow.com/questions/8225807/c-sharp-tooltip-doesnt-display-long-enough private void variableValuesModeComboBox_MouseHover(object sender, EventArgs e) { string tooltipText = @"Sets each variable to a specific value: Row - Selects the value based on a specified row of the dataset. Mean - Sets the value to the arithmetic mean of the variable. Median - Sets the value to the median of the variable. Most Common - Sets the value to the most common value of the variable (first if multiple). Note: For categorical values, the most common value is used when selecting Mean, Median or Most Common."; toolTip.Show(tooltipText, variableValuesModeComboBox, 30000); toolTip.Active = true; } private void variableValuesModeComboBox_MouseLeave(object sender, EventArgs e) { toolTip.Active = false; } } }