Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionErrorCharacteristicsCurveView.cs @ 15799

Last change on this file since 15799 was 15789, checked in by fholzing, 6 years ago

#2383: Added additional filter for Error Characteristics Curve

File size: 14.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Windows.Forms;
26using System.Windows.Forms.DataVisualization.Charting;
27using HeuristicLab.Algorithms.DataAnalysis;
28using HeuristicLab.Common;
29using HeuristicLab.MainForm;
30using HeuristicLab.Optimization;
31
32namespace HeuristicLab.Problems.DataAnalysis.Views {
33  [View("Error Characteristics Curve")]
34  [Content(typeof(IRegressionSolution))]
35  public partial class RegressionSolutionErrorCharacteristicsCurveView : DataAnalysisSolutionEvaluationView {
36    protected const string TrainingSamples = "Training";
37    protected const string TestSamples = "Test";
38    protected const string AllSamples = "All Samples";
39
40    public RegressionSolutionErrorCharacteristicsCurveView()
41      : base() {
42      InitializeComponent();
43
44      cmbSamples.Items.Add(TrainingSamples);
45      cmbSamples.Items.Add(TestSamples);
46      cmbSamples.Items.Add(AllSamples);
47
48      cmbSamples.SelectedIndex = 0;
49
50      residualComboBox.SelectedIndex = 0;
51
52      chart.CustomizeAllChartAreas();
53      chart.ChartAreas[0].AxisX.Title = residualComboBox.SelectedItem.ToString();
54      chart.ChartAreas[0].AxisX.Minimum = 0.0;
55      chart.ChartAreas[0].AxisX.Maximum = 0.0;
56      chart.ChartAreas[0].AxisX.IntervalAutoMode = IntervalAutoMode.VariableCount;
57      chart.ChartAreas[0].CursorX.Interval = 0.01;
58
59      chart.ChartAreas[0].AxisY.Title = "Ratio of Residuals";
60      chart.ChartAreas[0].AxisY.Minimum = 0.0;
61      chart.ChartAreas[0].AxisY.Maximum = 1.0;
62      chart.ChartAreas[0].AxisY.MajorGrid.Interval = 0.2;
63      chart.ChartAreas[0].CursorY.Interval = 0.01;
64    }
65
66    // the view holds one regression solution as content but also contains several other regression solutions for comparison
67    // the following invariants must hold
68    // (Solutions.IsEmpty && Content == null) ||
69    // (Solutions[0] == Content && Solutions.All(s => s.ProblemData.TargetVariable == Content.TargetVariable))
70
71    public new IRegressionSolution Content {
72      get { return (IRegressionSolution)base.Content; }
73      set { base.Content = value; }
74    }
75
76    private readonly IList<IRegressionSolution> solutions = new List<IRegressionSolution>();
77    public IEnumerable<IRegressionSolution> Solutions {
78      get { return solutions.AsEnumerable(); }
79    }
80
81    public IRegressionProblemData ProblemData {
82      get {
83        if (Content == null) return null;
84        return Content.ProblemData;
85      }
86    }
87
88    protected override void RegisterContentEvents() {
89      base.RegisterContentEvents();
90      Content.ModelChanged += new EventHandler(Content_ModelChanged);
91      Content.ProblemDataChanged += new EventHandler(Content_ProblemDataChanged);
92    }
93    protected override void DeregisterContentEvents() {
94      base.DeregisterContentEvents();
95      Content.ModelChanged -= new EventHandler(Content_ModelChanged);
96      Content.ProblemDataChanged -= new EventHandler(Content_ProblemDataChanged);
97    }
98
99    protected virtual void Content_ModelChanged(object sender, EventArgs e) {
100      if (InvokeRequired) Invoke((Action<object, EventArgs>)Content_ModelChanged, sender, e);
101      else {
102        // recalculate baseline solutions (for symbolic regression models the features used in the model might have changed)
103        solutions.Clear(); // remove all
104        solutions.Add(Content); // re-add the first solution
105        // and recalculate all other solutions
106        foreach (var sol in CreateBaselineSolutions()) {
107          solutions.Add(sol);
108        }
109        UpdateChart();
110      }
111    }
112    protected virtual void Content_ProblemDataChanged(object sender, EventArgs e) {
113      if (InvokeRequired) Invoke((Action<object, EventArgs>)Content_ProblemDataChanged, sender, e);
114      else {
115        // recalculate baseline solutions
116        solutions.Clear(); // remove all
117        solutions.Add(Content); // re-add the first solution
118        // and recalculate all other solutions
119        foreach (var sol in CreateBaselineSolutions()) {
120          solutions.Add(sol);
121        }
122        UpdateChart();
123      }
124    }
125    protected override void OnContentChanged() {
126      base.OnContentChanged();
127      // the content object is always stored as the first element in solutions
128      solutions.Clear();
129      ReadOnly = Content == null;
130      if (Content != null) {
131        // recalculate all solutions
132        solutions.Add(Content);
133        if (ProblemData.TrainingIndices.Any()) {
134          foreach (var sol in CreateBaselineSolutions())
135            solutions.Add(sol);
136          // more solutions can be added by drag&drop
137        }
138      }
139      UpdateChart();
140    }
141
142    protected virtual void UpdateChart() {
143      chart.Series.Clear();
144      chart.Annotations.Clear();
145      chart.ChartAreas[0].AxisX.Maximum = 0.0;
146      chart.ChartAreas[0].CursorX.Interval = 0.01;
147
148      if (Content == null) return;
149      if (cmbSamples.SelectedItem.ToString() == TrainingSamples && !ProblemData.TrainingIndices.Any()) return;
150      if (cmbSamples.SelectedItem.ToString() == TestSamples && !ProblemData.TestIndices.Any()) return;
151
152      foreach (var sol in Solutions) {
153        AddSeries(sol);
154      }
155
156      chart.ChartAreas[0].AxisX.Title = string.Format("{0} ({1})", residualComboBox.SelectedItem, Content.ProblemData.TargetVariable);
157    }
158
159    protected void AddSeries(IRegressionSolution solution) {
160      if (chart.Series.Any(s => s.Name == solution.Name)) return;
161
162      Series solutionSeries = new Series(solution.Name);
163      solutionSeries.Tag = solution;
164      solutionSeries.ChartType = SeriesChartType.FastLine;
165      var residuals = GetResiduals(GetOriginalValues(), GetEstimatedValues(solution));
166      residuals.Remove(double.NaN);
167      residuals.Remove(double.NegativeInfinity);
168      residuals.Remove(double.PositiveInfinity);
169
170      var maxValue = residuals.Max();
171      if (maxValue >= chart.ChartAreas[0].AxisX.Maximum) {
172        double scale = Math.Pow(10, Math.Floor(Math.Log10(maxValue)));
173        var maximum = scale * (1 + (int)(maxValue / scale));
174        chart.ChartAreas[0].AxisX.Maximum = maximum;
175        chart.ChartAreas[0].CursorX.Interval = residuals.Min() / 100;
176      }
177
178      UpdateSeries(residuals, solutionSeries);
179
180      solutionSeries.ToolTip = "Area over Curve: " + CalculateAreaOverCurve(solutionSeries);
181      solutionSeries.LegendToolTip = "Double-click to open model";
182      chart.Series.Add(solutionSeries);
183    }
184
185    protected void UpdateSeries(List<double> residuals, Series series) {
186      series.Points.Clear();
187      residuals.Sort();
188      if (!residuals.Any() || residuals.All(double.IsNaN)) return;
189
190      series.Points.AddXY(0, 0);
191      for (int i = 0; i < residuals.Count; i++) {
192        var point = new DataPoint();
193        if (residuals[i] > chart.ChartAreas[0].AxisX.Maximum) {
194          point.XValue = chart.ChartAreas[0].AxisX.Maximum;
195          point.YValues[0] = ((double)i) / residuals.Count;
196          point.ToolTip = "Error: " + point.XValue + "\n" + "Samples: " + point.YValues[0];
197          series.Points.Add(point);
198          break;
199        }
200
201        point.XValue = residuals[i];
202        point.YValues[0] = ((double)i + 1) / residuals.Count;
203        point.ToolTip = "Error: " + point.XValue + "\n" + "Samples: " + point.YValues[0];
204        series.Points.Add(point);
205      }
206
207      if (series.Points.Last().XValue < chart.ChartAreas[0].AxisX.Maximum) {
208        var point = new DataPoint();
209        point.XValue = chart.ChartAreas[0].AxisX.Maximum;
210        point.YValues[0] = 1;
211        point.ToolTip = "Error: " + point.XValue + "\n" + "Samples: " + point.YValues[0];
212        series.Points.Add(point);
213      }
214    }
215
216    protected IEnumerable<double> GetOriginalValues() {
217      IEnumerable<double> originalValues;
218      switch (cmbSamples.SelectedItem.ToString()) {
219        case TrainingSamples:
220          originalValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices);
221          break;
222        case TestSamples:
223          originalValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices);
224          break;
225        case AllSamples:
226          originalValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable);
227          break;
228        default:
229          throw new NotSupportedException();
230      }
231      return originalValues;
232    }
233
234    protected IEnumerable<double> GetEstimatedValues(IRegressionSolution solution) {
235      IEnumerable<double> estimatedValues;
236      switch (cmbSamples.SelectedItem.ToString()) {
237        case TrainingSamples:
238          estimatedValues = solution.EstimatedTrainingValues;
239          break;
240        case TestSamples:
241          estimatedValues = solution.EstimatedTestValues;
242          break;
243        case AllSamples:
244          estimatedValues = solution.EstimatedValues;
245          break;
246        default:
247          throw new NotSupportedException();
248      }
249      return estimatedValues;
250    }
251
252    protected virtual List<double> GetResiduals(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues) {
253      switch (residualComboBox.SelectedItem.ToString()) {
254        case "Absolute error": return originalValues.Zip(estimatedValues, (x, y) => Math.Abs(x - y)).ToList();
255        case "Squared error": return originalValues.Zip(estimatedValues, (x, y) => (x - y) * (x - y)).ToList();
256        case "Relative error":
257          return originalValues.Zip(estimatedValues, (x, y) => x.IsAlmost(0.0) ? -1 : Math.Abs((x - y) / x))
258.Where(x => x > 0) // remove entries where the original value is 0
259.ToList();
260        default: throw new NotSupportedException();
261      }
262    }
263
264    private double CalculateAreaOverCurve(Series series) {
265      if (series.Points.Count < 1) return 0;
266
267      double auc = 0.0;
268      for (int i = 1; i < series.Points.Count; i++) {
269        double width = series.Points[i].XValue - series.Points[i - 1].XValue;
270        double y1 = 1 - series.Points[i - 1].YValues[0];
271        double y2 = 1 - series.Points[i].YValues[0];
272
273        auc += (y1 + y2) * width / 2;
274      }
275
276      return auc;
277    }
278
279    protected void cmbSamples_SelectedIndexChanged(object sender, EventArgs e) {
280      if (InvokeRequired) Invoke((Action<object, EventArgs>)cmbSamples_SelectedIndexChanged, sender, e);
281      else UpdateChart();
282    }
283
284    private void Chart_MouseDoubleClick(object sender, MouseEventArgs e) {
285      HitTestResult result = chart.HitTest(e.X, e.Y);
286      if (result.ChartElementType != ChartElementType.LegendItem) return;
287
288      MainFormManager.MainForm.ShowContent((IRegressionSolution)result.Series.Tag);
289    }
290
291    protected virtual IEnumerable<IRegressionSolution> CreateBaselineSolutions() {
292      yield return CreateConstantSolution();
293      yield return CreateLinearSolution();
294    }
295
296    private IRegressionSolution CreateConstantSolution() {
297      double averageTrainingTarget = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices).Average();
298      var model = new ConstantModel(averageTrainingTarget, ProblemData.TargetVariable);
299      var solution = model.CreateRegressionSolution(ProblemData);
300      solution.Name = "Baseline (constant)";
301      return solution;
302    }
303    private IRegressionSolution CreateLinearSolution() {
304      double rmsError, cvRmsError;
305      var solution = LinearRegression.CreateLinearRegressionSolution((IRegressionProblemData)ProblemData.Clone(), out rmsError, out cvRmsError);
306      solution.Name = "Baseline (linear)";
307      return solution;
308    }
309
310    private void chart_MouseMove(object sender, MouseEventArgs e) {
311      HitTestResult result = chart.HitTest(e.X, e.Y);
312      if (result.ChartElementType == ChartElementType.LegendItem) {
313        Cursor = Cursors.Hand;
314      } else {
315        Cursor = Cursors.Default;
316      }
317    }
318
319    private void chart_DragDrop(object sender, DragEventArgs e) {
320      if (e.Data.GetDataPresent(HeuristicLab.Common.Constants.DragDropDataFormat)) {
321
322        var data = e.Data.GetData(HeuristicLab.Common.Constants.DragDropDataFormat);
323        var dataAsRegressionSolution = data as IRegressionSolution;
324        var dataAsResult = data as IResult;
325
326        if (dataAsRegressionSolution != null) {
327          solutions.Add((IRegressionSolution)dataAsRegressionSolution.Clone());
328        } else if (dataAsResult != null && dataAsResult.Value is IRegressionSolution) {
329          solutions.Add((IRegressionSolution)dataAsResult.Value.Clone());
330        }
331
332        UpdateChart();
333      }
334    }
335
336    private void chart_DragEnter(object sender, DragEventArgs e) {
337      e.Effect = DragDropEffects.None;
338      if (!e.Data.GetDataPresent(HeuristicLab.Common.Constants.DragDropDataFormat)) return;
339
340      var data = e.Data.GetData(HeuristicLab.Common.Constants.DragDropDataFormat);
341      var dataAsRegressionSolution = data as IRegressionSolution;
342      var dataAsResult = data as IResult;
343
344      if (!ReadOnly &&
345        (dataAsRegressionSolution != null || (dataAsResult != null && dataAsResult.Value is IRegressionSolution))) {
346        e.Effect = DragDropEffects.Copy;
347      }
348    }
349
350    private void residualComboBox_SelectedIndexChanged(object sender, EventArgs e) {
351      UpdateChart();
352    }
353  }
354}
Note: See TracBrowser for help on using the repository browser.