Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2695_dataset-ids/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionErrorCharacteristicsCurveView.cs @ 16189

Last change on this file since 16189 was 14255, checked in by pfleck, 8 years ago

#2632

  • Added the name of the target variable in plots and charts (scatter, line, ...).
  • Renamed MathSymbolicDataAnalysisModelView and added two subclasses for regression and classification that shows the name of the target variable in the equation. (added and used a new Format method to the LatexFormatter that uses the actual target name when encountering the StartSymbol)
File size: 13.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Windows.Forms;
26using System.Windows.Forms.DataVisualization.Charting;
27using HeuristicLab.Algorithms.DataAnalysis;
28using HeuristicLab.Common;
29using HeuristicLab.MainForm;
30using HeuristicLab.Optimization;
31
32namespace HeuristicLab.Problems.DataAnalysis.Views {
33  [View("Error Characteristics Curve")]
34  [Content(typeof(IRegressionSolution))]
35  public partial class RegressionSolutionErrorCharacteristicsCurveView : DataAnalysisSolutionEvaluationView {
36    protected const string TrainingSamples = "Training";
37    protected const string TestSamples = "Test";
38    protected const string AllSamples = "All Samples";
39
40    public RegressionSolutionErrorCharacteristicsCurveView()
41      : base() {
42      InitializeComponent();
43
44      cmbSamples.Items.Add(TrainingSamples);
45      cmbSamples.Items.Add(TestSamples);
46      cmbSamples.Items.Add(AllSamples);
47
48      cmbSamples.SelectedIndex = 0;
49
50      residualComboBox.SelectedIndex = 0;
51
52      chart.CustomizeAllChartAreas();
53      chart.ChartAreas[0].AxisX.Title = residualComboBox.SelectedItem.ToString();
54      chart.ChartAreas[0].AxisX.Minimum = 0.0;
55      chart.ChartAreas[0].AxisX.Maximum = 0.0;
56      chart.ChartAreas[0].AxisX.IntervalAutoMode = IntervalAutoMode.VariableCount;
57      chart.ChartAreas[0].CursorX.Interval = 0.01;
58
59      chart.ChartAreas[0].AxisY.Title = "Ratio of Residuals";
60      chart.ChartAreas[0].AxisY.Minimum = 0.0;
61      chart.ChartAreas[0].AxisY.Maximum = 1.0;
62      chart.ChartAreas[0].AxisY.MajorGrid.Interval = 0.2;
63      chart.ChartAreas[0].CursorY.Interval = 0.01;
64    }
65
66    // the view holds one regression solution as content but also contains several other regression solutions for comparison
67    // the following invariants must hold
68    // (Solutions.IsEmpty && Content == null) ||
69    // (Solutions[0] == Content && Solutions.All(s => s.ProblemData.TargetVariable == Content.TargetVariable))
70
71    public new IRegressionSolution Content {
72      get { return (IRegressionSolution)base.Content; }
73      set { base.Content = value; }
74    }
75
76    private readonly IList<IRegressionSolution> solutions = new List<IRegressionSolution>();
77    public IEnumerable<IRegressionSolution> Solutions {
78      get { return solutions.AsEnumerable(); }
79    }
80
81    public IRegressionProblemData ProblemData {
82      get {
83        if (Content == null) return null;
84        return Content.ProblemData;
85      }
86    }
87
88    protected override void RegisterContentEvents() {
89      base.RegisterContentEvents();
90      Content.ModelChanged += new EventHandler(Content_ModelChanged);
91      Content.ProblemDataChanged += new EventHandler(Content_ProblemDataChanged);
92    }
93    protected override void DeregisterContentEvents() {
94      base.DeregisterContentEvents();
95      Content.ModelChanged -= new EventHandler(Content_ModelChanged);
96      Content.ProblemDataChanged -= new EventHandler(Content_ProblemDataChanged);
97    }
98
99    protected virtual void Content_ModelChanged(object sender, EventArgs e) {
100      if (InvokeRequired) Invoke((Action<object, EventArgs>)Content_ModelChanged, sender, e);
101      else {
102        // recalculate baseline solutions (for symbolic regression models the features used in the model might have changed)
103        solutions.Clear(); // remove all
104        solutions.Add(Content); // re-add the first solution
105        // and recalculate all other solutions
106        foreach (var sol in CreateBaselineSolutions()) {
107          solutions.Add(sol);
108        }
109        UpdateChart();
110      }
111    }
112    protected virtual void Content_ProblemDataChanged(object sender, EventArgs e) {
113      if (InvokeRequired) Invoke((Action<object, EventArgs>)Content_ProblemDataChanged, sender, e);
114      else {
115        // recalculate baseline solutions
116        solutions.Clear(); // remove all
117        solutions.Add(Content); // re-add the first solution
118        // and recalculate all other solutions
119        foreach (var sol in CreateBaselineSolutions()) {
120          solutions.Add(sol);
121        }
122        UpdateChart();
123      }
124    }
125    protected override void OnContentChanged() {
126      base.OnContentChanged();
127      // the content object is always stored as the first element in solutions
128      solutions.Clear();
129      ReadOnly = Content == null;
130      if (Content != null) {
131        // recalculate all solutions
132        solutions.Add(Content);
133        if (ProblemData.TrainingIndices.Any()) {
134          foreach (var sol in CreateBaselineSolutions())
135            solutions.Add(sol);
136          // more solutions can be added by drag&drop
137        }
138      }
139      UpdateChart();
140    }
141
142    protected virtual void UpdateChart() {
143      chart.Series.Clear();
144      chart.Annotations.Clear();
145      chart.ChartAreas[0].AxisX.Maximum = 0.0;
146      chart.ChartAreas[0].CursorX.Interval = 0.01;
147
148      if (Content == null) return;
149      if (cmbSamples.SelectedItem.ToString() == TrainingSamples && !ProblemData.TrainingIndices.Any()) return;
150      if (cmbSamples.SelectedItem.ToString() == TestSamples && !ProblemData.TestIndices.Any()) return;
151
152      foreach (var sol in Solutions) {
153        AddSeries(sol);
154      }
155
156      chart.ChartAreas[0].AxisX.Title = string.Format("{0} ({1})", residualComboBox.SelectedItem, Content.ProblemData.TargetVariable);
157    }
158
159    protected void AddSeries(IRegressionSolution solution) {
160      if (chart.Series.Any(s => s.Name == solution.Name)) return;
161
162      Series solutionSeries = new Series(solution.Name);
163      solutionSeries.Tag = solution;
164      solutionSeries.ChartType = SeriesChartType.FastLine;
165      var residuals = GetResiduals(GetOriginalValues(), GetEstimatedValues(solution));
166
167      var maxValue = residuals.Max();
168      if (maxValue >= chart.ChartAreas[0].AxisX.Maximum) {
169        double scale = Math.Pow(10, Math.Floor(Math.Log10(maxValue)));
170        var maximum = scale * (1 + (int)(maxValue / scale));
171        chart.ChartAreas[0].AxisX.Maximum = maximum;
172        chart.ChartAreas[0].CursorX.Interval = residuals.Min() / 100;
173      }
174
175      UpdateSeries(residuals, solutionSeries);
176
177      solutionSeries.ToolTip = "Area over Curve: " + CalculateAreaOverCurve(solutionSeries);
178      solutionSeries.LegendToolTip = "Double-click to open model";
179      chart.Series.Add(solutionSeries);
180    }
181
182    protected void UpdateSeries(List<double> residuals, Series series) {
183      series.Points.Clear();
184      residuals.Sort();
185      if (!residuals.Any() || residuals.All(double.IsNaN)) return;
186
187      series.Points.AddXY(0, 0);
188      for (int i = 0; i < residuals.Count; i++) {
189        var point = new DataPoint();
190        if (residuals[i] > chart.ChartAreas[0].AxisX.Maximum) {
191          point.XValue = chart.ChartAreas[0].AxisX.Maximum;
192          point.YValues[0] = ((double)i) / residuals.Count;
193          point.ToolTip = "Error: " + point.XValue + "\n" + "Samples: " + point.YValues[0];
194          series.Points.Add(point);
195          break;
196        }
197
198        point.XValue = residuals[i];
199        point.YValues[0] = ((double)i + 1) / residuals.Count;
200        point.ToolTip = "Error: " + point.XValue + "\n" + "Samples: " + point.YValues[0];
201        series.Points.Add(point);
202      }
203
204      if (series.Points.Last().XValue < chart.ChartAreas[0].AxisX.Maximum) {
205        var point = new DataPoint();
206        point.XValue = chart.ChartAreas[0].AxisX.Maximum;
207        point.YValues[0] = 1;
208        point.ToolTip = "Error: " + point.XValue + "\n" + "Samples: " + point.YValues[0];
209        series.Points.Add(point);
210      }
211    }
212
213    protected IEnumerable<double> GetOriginalValues() {
214      IEnumerable<double> originalValues;
215      switch (cmbSamples.SelectedItem.ToString()) {
216        case TrainingSamples:
217          originalValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices);
218          break;
219        case TestSamples:
220          originalValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices);
221          break;
222        case AllSamples:
223          originalValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable);
224          break;
225        default:
226          throw new NotSupportedException();
227      }
228      return originalValues;
229    }
230
231    protected IEnumerable<double> GetEstimatedValues(IRegressionSolution solution) {
232      IEnumerable<double> estimatedValues;
233      switch (cmbSamples.SelectedItem.ToString()) {
234        case TrainingSamples:
235          estimatedValues = solution.EstimatedTrainingValues;
236          break;
237        case TestSamples:
238          estimatedValues = solution.EstimatedTestValues;
239          break;
240        case AllSamples:
241          estimatedValues = solution.EstimatedValues;
242          break;
243        default:
244          throw new NotSupportedException();
245      }
246      return estimatedValues;
247    }
248
249    protected virtual List<double> GetResiduals(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues) {
250      switch (residualComboBox.SelectedItem.ToString()) {
251        case "Absolute error": return originalValues.Zip(estimatedValues, (x, y) => Math.Abs(x - y)).ToList();
252        case "Squared error": return originalValues.Zip(estimatedValues, (x, y) => (x - y) * (x - y)).ToList();
253        case "Relative error": return originalValues.Zip(estimatedValues, (x, y) => x.IsAlmost(0.0) ? -1 : Math.Abs((x - y) / x))
254          .Where(x => x > 0) // remove entries where the original value is 0
255          .ToList();
256        default: throw new NotSupportedException();
257      }
258    }
259
260    private double CalculateAreaOverCurve(Series series) {
261      if (series.Points.Count < 1) return 0;
262
263      double auc = 0.0;
264      for (int i = 1; i < series.Points.Count; i++) {
265        double width = series.Points[i].XValue - series.Points[i - 1].XValue;
266        double y1 = 1 - series.Points[i - 1].YValues[0];
267        double y2 = 1 - series.Points[i].YValues[0];
268
269        auc += (y1 + y2) * width / 2;
270      }
271
272      return auc;
273    }
274
275    protected void cmbSamples_SelectedIndexChanged(object sender, EventArgs e) {
276      if (InvokeRequired) Invoke((Action<object, EventArgs>)cmbSamples_SelectedIndexChanged, sender, e);
277      else UpdateChart();
278    }
279
280    private void Chart_MouseDoubleClick(object sender, MouseEventArgs e) {
281      HitTestResult result = chart.HitTest(e.X, e.Y);
282      if (result.ChartElementType != ChartElementType.LegendItem) return;
283
284      MainFormManager.MainForm.ShowContent((IRegressionSolution)result.Series.Tag);
285    }
286
287    protected virtual IEnumerable<IRegressionSolution> CreateBaselineSolutions() {
288      yield return CreateConstantSolution();
289      yield return CreateLinearSolution();
290    }
291
292    private IRegressionSolution CreateConstantSolution() {
293      double averageTrainingTarget = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices).Average();
294      var model = new ConstantModel(averageTrainingTarget, ProblemData.TargetVariable);
295      var solution = model.CreateRegressionSolution(ProblemData);
296      solution.Name = "Baseline (constant)";
297      return solution;
298    }
299    private IRegressionSolution CreateLinearSolution() {
300      double rmsError, cvRmsError;
301      var solution = LinearRegression.CreateLinearRegressionSolution((IRegressionProblemData)ProblemData.Clone(), out rmsError, out cvRmsError);
302      solution.Name = "Baseline (linear)";
303      return solution;
304    }
305
306    private void chart_MouseMove(object sender, MouseEventArgs e) {
307      HitTestResult result = chart.HitTest(e.X, e.Y);
308      if (result.ChartElementType == ChartElementType.LegendItem) {
309        Cursor = Cursors.Hand;
310      } else {
311        Cursor = Cursors.Default;
312      }
313    }
314
315    private void chart_DragDrop(object sender, DragEventArgs e) {
316      if (e.Data.GetDataPresent(HeuristicLab.Common.Constants.DragDropDataFormat)) {
317
318        var data = e.Data.GetData(HeuristicLab.Common.Constants.DragDropDataFormat);
319        var dataAsRegressionSolution = data as IRegressionSolution;
320        var dataAsResult = data as IResult;
321
322        if (dataAsRegressionSolution != null) {
323          solutions.Add((IRegressionSolution)dataAsRegressionSolution.Clone());
324        } else if (dataAsResult != null && dataAsResult.Value is IRegressionSolution) {
325          solutions.Add((IRegressionSolution)dataAsResult.Value.Clone());
326        }
327
328        UpdateChart();
329      }
330    }
331
332    private void chart_DragEnter(object sender, DragEventArgs e) {
333      e.Effect = DragDropEffects.None;
334      if (!e.Data.GetDataPresent(HeuristicLab.Common.Constants.DragDropDataFormat)) return;
335
336      var data = e.Data.GetData(HeuristicLab.Common.Constants.DragDropDataFormat);
337      var dataAsRegressionSolution = data as IRegressionSolution;
338      var dataAsResult = data as IResult;
339
340      if (!ReadOnly &&
341        (dataAsRegressionSolution != null || (dataAsResult != null && dataAsResult.Value is IRegressionSolution))) {
342        e.Effect = DragDropEffects.Copy;
343      }
344    }
345
346    private void residualComboBox_SelectedIndexChanged(object sender, EventArgs e) {
347      UpdateChart();
348    }
349  }
350}
Note: See TracBrowser for help on using the repository browser.