Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/HeuristicLab.Problems.DataAnalysis.Views/3.4/Controls/FactorPartialDependencePlot.cs @ 18079

Last change on this file since 18079 was 17939, checked in by mkommend, 4 years ago

#3115: Simplified source code for checking compatibility of solutions by using Hashset and IsSubset method in PDP controls.

File size: 21.9 KB
RevLine 
[14248]1#region License Information
2/* HeuristicLab
[17180]3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[14248]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.Drawing;
26using System.Linq;
27using System.Threading;
28using System.Threading.Tasks;
29using System.Windows.Forms;
30using System.Windows.Forms.DataVisualization.Charting;
31using HeuristicLab.Common;
32using HeuristicLab.MainForm.WindowsForms;
33using HeuristicLab.Visualization.ChartControlsExtensions;
34
35namespace HeuristicLab.Problems.DataAnalysis.Views {
[14852]36  public partial class FactorPartialDependencePlot : UserControl, IPartialDependencePlot {
37    private ModifiableDataset sharedFixedVariables; // used for synchronising variable values between charts
[14248]38    private ModifiableDataset internalDataset; // holds the x values for each point drawn
39
40    private CancellationTokenSource cancelCurrentRecalculateSource;
41
42    private readonly List<IRegressionSolution> solutions;
43    private readonly Dictionary<IRegressionSolution, Series> seriesCache;
44    private readonly Dictionary<IRegressionSolution, Series> ciSeriesCache;
45
46    #region Properties
47    public string XAxisTitle {
48      get { return chart.ChartAreas[0].AxisX.Title; }
49      set { chart.ChartAreas[0].AxisX.Title = value; }
50    }
51
52    public string YAxisTitle {
53      get { return chart.ChartAreas[0].AxisY.Title; }
54      set { chart.ChartAreas[0].AxisY.Title = value; }
55    }
56
57    public bool ShowLegend {
58      get { return chart.Legends[0].Enabled; }
59      set { chart.Legends[0].Enabled = value; }
60    }
61    public bool ShowCursor {
62      get { return chart.Annotations[0].Visible; }
63      set {
64        chart.Annotations[0].Visible = value;
65        if (!value) chart.Titles[0].Text = string.Empty;
66      }
67    }
68
69    private int yAxisTicks = 5;
70    public int YAxisTicks {
71      get { return yAxisTicks; }
72      set {
73        if (value != yAxisTicks) {
74          yAxisTicks = value;
[15211]75          SetupAxis(chart, chart.ChartAreas[0].AxisY, yMin, yMax, YAxisTicks, FixedYAxisMin, FixedYAxisMax);
[14248]76          RecalculateInternalDataset();
77        }
78      }
79    }
80    private double? fixedYAxisMin;
81    public double? FixedYAxisMin {
82      get { return fixedYAxisMin; }
83      set {
84        if ((value.HasValue && fixedYAxisMin.HasValue && !value.Value.IsAlmost(fixedYAxisMin.Value)) || (value.HasValue != fixedYAxisMin.HasValue)) {
85          fixedYAxisMin = value;
[15211]86          SetupAxis(chart, chart.ChartAreas[0].AxisY, yMin, yMax, YAxisTicks, FixedYAxisMin, FixedYAxisMax);
[14248]87        }
88      }
89    }
90    private double? fixedYAxisMax;
91    public double? FixedYAxisMax {
92      get { return fixedYAxisMax; }
93      set {
94        if ((value.HasValue && fixedYAxisMax.HasValue && !value.Value.IsAlmost(fixedYAxisMax.Value)) || (value.HasValue != fixedYAxisMax.HasValue)) {
95          fixedYAxisMax = value;
[15211]96          SetupAxis(chart, chart.ChartAreas[0].AxisY, yMin, yMax, YAxisTicks, FixedYAxisMin, FixedYAxisMax);
[14248]97        }
98      }
99    }
100
101    private string freeVariable;
102    public string FreeVariable {
103      get { return freeVariable; }
104      set {
105        if (value == freeVariable) return;
106        if (solutions.Any(s => !s.ProblemData.Dataset.StringVariables.Contains(value))) {
107          throw new ArgumentException("Variable does not exist in the ProblemData of the Solutions.");
108        }
109        freeVariable = value;
110        RecalculateInternalDataset();
111      }
112    }
113
114    private double yMin;
115    public double YMin {
116      get { return yMin; }
117    }
118    private double yMax;
119    public double YMax {
120      get { return yMax; }
121    }
122
123    public bool IsZoomed {
124      get { return chart.ChartAreas[0].AxisX.ScaleView.IsZoomed; }
125    }
126
127    internal ElementPosition InnerPlotPosition {
128      get { return chart.ChartAreas[0].InnerPlotPosition; }
129    }
130    #endregion
131
132    private List<string> variableValues;
133
134    public event EventHandler ChartPostPaint;
135
[14852]136    public FactorPartialDependencePlot() {
[14248]137      InitializeComponent();
138
139      solutions = new List<IRegressionSolution>();
140      seriesCache = new Dictionary<IRegressionSolution, Series>();
141      ciSeriesCache = new Dictionary<IRegressionSolution, Series>();
142
143      // Configure axis
144      chart.CustomizeAllChartAreas();
[15845]145      chart.ChartAreas[0].CursorX.IsUserSelectionEnabled = false;
146      chart.ChartAreas[0].CursorY.IsUserSelectionEnabled = false;
[14248]147
[15839]148      chart.ChartAreas[0].Axes.ToList().ForEach(x => { x.ScaleView.Zoomable = false; });
149
[14852]150      Disposed += Control_Disposed;
[14248]151    }
152
[14852]153    private void Control_Disposed(object sender, EventArgs e) {
[14248]154      if (cancelCurrentRecalculateSource != null)
155        cancelCurrentRecalculateSource.Cancel();
156    }
157
158    public void Configure(IEnumerable<IRegressionSolution> solutions, ModifiableDataset sharedFixedVariables, string freeVariable, IList<string> variableValues, bool initializeAxisRanges = true) {
159      if (!SolutionsCompatible(solutions))
160        throw new ArgumentException("Solutions are not compatible with the problem data.");
161      this.freeVariable = freeVariable;
162      this.variableValues = new List<string>(variableValues);
163
164      this.solutions.Clear();
165      this.solutions.AddRange(solutions);
166
167      // add an event such that whenever a value is changed in the shared dataset,
168      // this change is reflected in the internal dataset (where the value becomes a whole column)
[16519]169      if (this.sharedFixedVariables != null) {
[14248]170        this.sharedFixedVariables.ItemChanged -= sharedFixedVariables_ItemChanged;
[16519]171        this.sharedFixedVariables.Reset -= sharedFixedVariables_Reset;
172      }
173
[14248]174      this.sharedFixedVariables = sharedFixedVariables;
175      this.sharedFixedVariables.ItemChanged += sharedFixedVariables_ItemChanged;
[16519]176      this.sharedFixedVariables.Reset += sharedFixedVariables_Reset;
[14248]177
178      RecalculateInternalDataset();
179
180      chart.Series.Clear();
181      seriesCache.Clear();
182      ciSeriesCache.Clear();
183      foreach (var solution in this.solutions) {
184        var series = CreateSeries(solution);
185        seriesCache.Add(solution, series.Item1);
186        if (series.Item2 != null)
187          ciSeriesCache.Add(solution, series.Item2);
188      }
189
190      InitSeriesData();
191      OrderAndColorSeries();
192
193    }
194
195    public async Task RecalculateAsync(bool updateOnFinish = true, bool resetYAxis = true) {
196      if (IsDisposed
197        || sharedFixedVariables == null || !solutions.Any() || string.IsNullOrEmpty(freeVariable)
198        || !variableValues.Any())
199        return;
200
201      calculationPendingTimer.Start();
202
203      // cancel previous recalculate call
204      if (cancelCurrentRecalculateSource != null)
205        cancelCurrentRecalculateSource.Cancel();
206      cancelCurrentRecalculateSource = new CancellationTokenSource();
207      var cancellationToken = cancelCurrentRecalculateSource.Token;
208
209      // Update series
210      try {
211        var limits = await UpdateAllSeriesDataAsync(cancellationToken);
[15211]212        chart.Invalidate();
[14248]213
214        yMin = limits.Lower;
215        yMax = limits.Upper;
216        // Set y-axis
217        if (resetYAxis)
[15211]218          SetupAxis(chart, chart.ChartAreas[0].AxisY, yMin, yMax, YAxisTicks, FixedYAxisMin, FixedYAxisMax);
[14248]219
220        calculationPendingTimer.Stop();
221        calculationPendingLabel.Visible = false;
222        if (updateOnFinish)
223          Update();
[17939]224      } catch (OperationCanceledException) {
[16519]225      } catch (AggregateException ae) {
[14248]226        if (!ae.InnerExceptions.Any(e => e is OperationCanceledException))
227          throw;
228      }
229    }
230
231    public void UpdateTitlePosition() {
232      var title = chart.Titles[0];
233      var plotArea = InnerPlotPosition;
234
235      title.Visible = plotArea.Width != 0;
236
237      title.Position.X = plotArea.X + (plotArea.Width / 2);
238    }
239
[15211]240    private static void SetupAxis(EnhancedChart chart, Axis axis, double minValue, double maxValue, int ticks, double? fixedAxisMin, double? fixedAxisMax) {
241      //guard if only one distinct value is present
242      if (minValue.IsAlmost(maxValue)) {
243        minValue = minValue - 0.5;
244        maxValue = minValue + 0.5;
[14248]245      }
246
[15211]247      double axisMin, axisMax, axisInterval;
248      ChartUtil.CalculateAxisInterval(minValue, maxValue, ticks, out axisMin, out axisMax, out axisInterval);
249      axis.Minimum = fixedAxisMin ?? axisMin;
250      axis.Maximum = fixedAxisMax ?? axisMax;
251      axis.Interval = (axis.Maximum - axis.Minimum) / ticks;
252
[15222]253      chart.ChartAreas[0].RecalculateAxesScale();
[14248]254    }
255
256
257    private void RecalculateInternalDataset() {
258      if (sharedFixedVariables == null)
259        return;
260
261      var factorValues = new List<string>(variableValues);
262
263      var variables = sharedFixedVariables.VariableNames.ToList();
264      var values = new List<IList>();
265      foreach (var varName in variables) {
266        if (varName == FreeVariable) {
267          values.Add(factorValues);
268        } else if (sharedFixedVariables.VariableHasType<double>(varName)) {
269          values.Add(Enumerable.Repeat(sharedFixedVariables.GetDoubleValue(varName, 0), factorValues.Count).ToList());
270        } else if (sharedFixedVariables.VariableHasType<string>(varName)) {
271          values.Add(Enumerable.Repeat(sharedFixedVariables.GetStringValue(varName, 0), factorValues.Count).ToList());
272        }
273      }
274
275      internalDataset = new ModifiableDataset(variables, values);
276    }
277
278    private Tuple<Series, Series> CreateSeries(IRegressionSolution solution) {
279      var series = new Series {
280        ChartType = SeriesChartType.Column,
281        Name = solution.ProblemData.TargetVariable + " " + solutions.IndexOf(solution),
282        XValueType = System.Windows.Forms.DataVisualization.Charting.ChartValueType.String
283      };
284      series.LegendText = series.Name;
285
286      Series confidenceIntervalSeries = null;
287      confidenceIntervalSeries = new Series {
288        ChartType = SeriesChartType.BoxPlot,
289        XValueType = System.Windows.Forms.DataVisualization.Charting.ChartValueType.String,
290        Color = Color.Black,
291        YValuesPerPoint = 5,
292        Name = "95% Conf. Interval " + series.Name,
293        IsVisibleInLegend = false
294      };
295      return Tuple.Create(series, confidenceIntervalSeries);
296    }
297
298    private void OrderAndColorSeries() {
299      chart.SuspendRepaint();
300
301      chart.Series.Clear();
302      // Add mean series for applying palette colors
303      foreach (var solution in solutions) {
304        chart.Series.Add(seriesCache[solution]);
305      }
306
307      chart.Palette = ChartColorPalette.BrightPastel;
308      chart.ApplyPaletteColors();
309      chart.Palette = ChartColorPalette.None;
310
311      // Add confidence interval series after its coresponding series for correct z index
312      foreach (var solution in solutions) {
313        Series ciSeries;
314        if (ciSeriesCache.TryGetValue(solution, out ciSeries)) {
315          int idx = chart.Series.IndexOf(seriesCache[solution]);
316          chart.Series.Insert(idx + 1, ciSeries);
317        }
318      }
319
320      chart.ResumeRepaint(true);
321    }
322
323    private async Task<DoubleLimit> UpdateAllSeriesDataAsync(CancellationToken cancellationToken) {
324      var updateTasks = solutions.Select(solution => UpdateSeriesDataAsync(solution, cancellationToken));
325
326      double min = double.MaxValue, max = double.MinValue;
327      foreach (var update in updateTasks) {
328        var limit = await update;
329        if (limit.Lower < min) min = limit.Lower;
330        if (limit.Upper > max) max = limit.Upper;
331      }
332
333      return new DoubleLimit(min, max);
334    }
335
336    private Task<DoubleLimit> UpdateSeriesDataAsync(IRegressionSolution solution, CancellationToken cancellationToken) {
337      return Task.Run(() => {
338        var yvalues = solution.Model.GetEstimatedValues(internalDataset, Enumerable.Range(0, internalDataset.Rows)).ToList();
339
340        double min = double.MaxValue, max = double.MinValue;
341
342        var series = seriesCache[solution];
343        for (int i = 0; i < variableValues.Count; i++) {
344          series.Points[i].SetValueXY(variableValues[i], yvalues[i]);
345          if (yvalues[i] < min) min = yvalues[i];
346          if (yvalues[i] > max) max = yvalues[i];
347        }
348
349        cancellationToken.ThrowIfCancellationRequested();
350
351        var confidenceBoundSolution = solution as IConfidenceRegressionSolution;
352        if (confidenceBoundSolution != null) {
353          var confidenceIntervalSeries = ciSeriesCache[solution];
354          var variances = confidenceBoundSolution.Model.GetEstimatedVariances(internalDataset, Enumerable.Range(0, internalDataset.Rows)).ToList();
355          for (int i = 0; i < variableValues.Count; i++) {
356            var lower = yvalues[i] - 1.96 * Math.Sqrt(variances[i]);
357            var upper = yvalues[i] + 1.96 * Math.Sqrt(variances[i]);
358            confidenceIntervalSeries.Points[i].SetValueXY(variableValues[i], lower, upper, yvalues[i], yvalues[i], yvalues[i]);
359            if (lower < min) min = lower;
360            if (upper > max) max = upper;
361          }
362        }
363
364        cancellationToken.ThrowIfCancellationRequested();
365        return new DoubleLimit(min, max);
366      }, cancellationToken);
367    }
368
369    private void InitSeriesData() {
370      if (internalDataset == null)
371        return;
372
373      foreach (var solution in solutions)
374        InitSeriesData(solution, variableValues);
375    }
376
377    private void InitSeriesData(IRegressionSolution solution, IList<string> values) {
378
379      var series = seriesCache[solution];
380      series.Points.SuspendUpdates();
381      series.Points.Clear();
382      for (int i = 0; i < values.Count; i++) {
383        series.Points.AddXY(values[i], 0.0);
384        series.Points.Last().ToolTip = values[i];
385      }
386
387      UpdateAllSeriesStyles(variableValues.IndexOf(sharedFixedVariables.GetStringValue(FreeVariable, 0)));
388      series.Points.ResumeUpdates();
389
390      Series confidenceIntervalSeries;
391      if (ciSeriesCache.TryGetValue(solution, out confidenceIntervalSeries)) {
392        confidenceIntervalSeries.Points.SuspendUpdates();
393        confidenceIntervalSeries.Points.Clear();
394        for (int i = 0; i < values.Count; i++)
395          confidenceIntervalSeries.Points.AddXY(values[i], 0.0, 0.0, 0.0, 0.0, 0.0);
396        confidenceIntervalSeries.Points.ResumeUpdates();
397      }
398    }
399
400    public async Task AddSolutionAsync(IRegressionSolution solution) {
401      if (!SolutionsCompatible(solutions.Concat(new[] { solution })))
402        throw new ArgumentException("The solution is not compatible with the problem data.");
403      if (solutions.Contains(solution))
404        return;
405
406      solutions.Add(solution);
407
408      var series = CreateSeries(solution);
409      seriesCache.Add(solution, series.Item1);
410      if (series.Item2 != null)
411        ciSeriesCache.Add(solution, series.Item2);
412
413      InitSeriesData(solution, variableValues);
414      OrderAndColorSeries();
415
416      await RecalculateAsync();
417      var args = new EventArgs<IRegressionSolution>(solution);
418      OnSolutionAdded(this, args);
419    }
420
421    public async Task RemoveSolutionAsync(IRegressionSolution solution) {
[15054]422      if (!solutions.Remove(solution))
423        return;
424
425      seriesCache.Remove(solution);
426      ciSeriesCache.Remove(solution);
427
428      await RecalculateAsync();
429      var args = new EventArgs<IRegressionSolution>(solution);
430      OnSolutionRemoved(this, args);
[14248]431    }
432
433    private static bool SolutionsCompatible(IEnumerable<IRegressionSolution> solutions) {
434      var refSolution = solutions.First();
435      var refSolVars = refSolution.ProblemData.Dataset.VariableNames;
[17939]436      var refFactorVars = refSolution.ProblemData.Dataset.StringVariables;
437      var distinctVals = refFactorVars.ToDictionary(fv => fv, fv => refSolution.ProblemData.Dataset.GetStringValues(fv).Distinct().ToArray());
438
[14248]439      foreach (var solution in solutions.Skip(1)) {
[17939]440        var variables1 = new HashSet<string>(solution.ProblemData.Dataset.VariableNames);
441        if (!variables1.IsSubsetOf(refSolVars))
[14248]442          return false;
443
[17939]444        foreach (var factorVar in solution.ProblemData.Dataset.StringVariables) {
445          var refValues = distinctVals[factorVar];
446          var values = new HashSet<string>(solution.ProblemData.Dataset.GetStringValues(factorVar));
447
448          if (!values.IsSubsetOf(refValues))
449            return false;
[14248]450        }
451      }
452      return true;
453    }
454
455    #region Events
456    public event EventHandler<EventArgs<IRegressionSolution>> SolutionAdded;
457    public void OnSolutionAdded(object sender, EventArgs<IRegressionSolution> args) {
458      var added = SolutionAdded;
459      if (added == null) return;
460      added(sender, args);
461    }
462
463    public event EventHandler<EventArgs<IRegressionSolution>> SolutionRemoved;
464    public void OnSolutionRemoved(object sender, EventArgs<IRegressionSolution> args) {
465      var removed = SolutionRemoved;
466      if (removed == null) return;
467      removed(sender, args);
468    }
469
470    public event EventHandler VariableValueChanged;
471    public void OnVariableValueChanged(object sender, EventArgs args) {
472      var changed = VariableValueChanged;
473      if (changed == null) return;
474      changed(sender, args);
475    }
476
477    public event EventHandler ZoomChanged;
478    public void OnZoomChanged(object sender, EventArgs args) {
479      var changed = ZoomChanged;
480      if (changed == null) return;
481      changed(sender, args);
482    }
483
484    private void sharedFixedVariables_ItemChanged(object o, EventArgs<int, int> e) {
485      if (o != sharedFixedVariables) return;
486      var variables = sharedFixedVariables.VariableNames.ToList();
487      var rowIndex = e.Value;
488      var columnIndex = e.Value2;
489
490      var variableName = variables[columnIndex];
491      if (variableName == FreeVariable) return;
492      if (internalDataset.VariableHasType<double>(variableName)) {
493        var v = sharedFixedVariables.GetDoubleValue(variableName, rowIndex);
494        var values = new List<double>(Enumerable.Repeat(v, internalDataset.Rows));
495        internalDataset.ReplaceVariable(variableName, values);
496      } else if (internalDataset.VariableHasType<string>(variableName)) {
497        var v = sharedFixedVariables.GetStringValue(variableName, rowIndex);
498        var values = new List<String>(Enumerable.Repeat(v, internalDataset.Rows));
499        internalDataset.ReplaceVariable(variableName, values);
500      } else {
501        // unsupported type
502        throw new NotSupportedException();
503      }
504    }
505
[16519]506    private void sharedFixedVariables_Reset(object sender, EventArgs e) {
507      var newValue = sharedFixedVariables.GetStringValue(FreeVariable, 0);
508      UpdateSelectedValue(newValue);
509
510      int idx = variableValues.IndexOf(newValue);
511      UpdateAllSeriesStyles(idx);
512    }
513
[14248]514    private async void chart_DragDrop(object sender, DragEventArgs e) {
515      var data = e.Data.GetData(HeuristicLab.Common.Constants.DragDropDataFormat);
516      if (data != null) {
517        var solution = data as IRegressionSolution;
518        if (!solutions.Contains(solution))
519          await AddSolutionAsync(solution);
520      }
521    }
522    private void chart_DragEnter(object sender, DragEventArgs e) {
523      if (!e.Data.GetDataPresent(HeuristicLab.Common.Constants.DragDropDataFormat)) return;
524      e.Effect = DragDropEffects.None;
525
526      var data = e.Data.GetData(HeuristicLab.Common.Constants.DragDropDataFormat);
527      var regressionSolution = data as IRegressionSolution;
528      if (regressionSolution != null) {
529        e.Effect = DragDropEffects.Copy;
530      }
531    }
532
533    private void calculationPendingTimer_Tick(object sender, EventArgs e) {
534      calculationPendingLabel.Visible = true;
535      Update();
536    }
537
538    private void chart_SelectionRangeChanged(object sender, CursorEventArgs e) {
539      OnZoomChanged(this, EventArgs.Empty);
540    }
541
542    private void chart_Resize(object sender, EventArgs e) {
543      UpdateTitlePosition();
544    }
545
546    private void chart_PostPaint(object sender, ChartPaintEventArgs e) {
547      if (ChartPostPaint != null)
548        ChartPostPaint(this, EventArgs.Empty);
549    }
550    #endregion
551
552    private void chart_MouseClick(object sender, MouseEventArgs e) {
553      var hitTestResult = chart.HitTest(e.X, e.Y, ChartElementType.DataPoint);
554      if (hitTestResult != null && hitTestResult.ChartElementType == ChartElementType.DataPoint) {
555        var series = hitTestResult.Series;
556        var dataPoint = (DataPoint)hitTestResult.Object;
557        var idx = series.Points.IndexOf(dataPoint);
558        UpdateSelectedValue(variableValues[idx]);
559
560        UpdateAllSeriesStyles(idx);
561      }
562    }
563
564    private void UpdateAllSeriesStyles(int selectedValueIndex) {
565      if (ShowCursor) {
566        chart.Titles[0].Text = FreeVariable + " : " + variableValues[selectedValueIndex];
567        chart.Update();
568      }
569      foreach (var s in seriesCache.Values) {
570        if (s.ChartType == SeriesChartType.Column)
571          for (int i = 0; i < s.Points.Count; i++) {
572            if (i != selectedValueIndex) {
573              s.Points[i].BorderDashStyle = ChartDashStyle.NotSet;
574            } else {
575              s.Points[i].BorderDashStyle = ChartDashStyle.Dash;
576              s.Points[i].BorderColor = Color.Red;
577            }
578          }
579      }
580    }
581
582    private void UpdateSelectedValue(string variableValue) {
583      sharedFixedVariables.SetVariableValue(variableValue, FreeVariable, 0);
584      OnVariableValueChanged(this, EventArgs.Empty);
585    }
586  }
587}
588
Note: See TracBrowser for help on using the repository browser.