source: stable/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/ClassificationSolutionVariableImpactsView.cs @ 17500

Last change on this file since 17500 was 17500, checked in by mkommend, 8 months ago

#2973: Merged r17276, r17426, r17430, r17488 into stable.

File size: 10.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading;
26using System.Threading.Tasks;
27using System.Windows.Forms;
28using HeuristicLab.Common;
29using HeuristicLab.Data;
30using HeuristicLab.MainForm;
31
32namespace HeuristicLab.Problems.DataAnalysis.Views {
33  [View("Variable Impacts")]
34  [Content(typeof(IClassificationSolution))]
35  public partial class ClassificationSolutionVariableImpactsView : DataAnalysisSolutionEvaluationView {
36    private enum SortingCriteria {
37      ImpactValue,
38      Occurrence,
39      VariableName
40    }
41    private CancellationTokenSource cancellationToken = new CancellationTokenSource();
42    private List<Tuple<string, double>> rawVariableImpacts = new List<Tuple<string, double>>();
43
44    public new IClassificationSolution Content {
45      get { return (IClassificationSolution)base.Content; }
46      set {
47        base.Content = value;
48      }
49    }
50
51    public ClassificationSolutionVariableImpactsView()
52      : base() {
53      InitializeComponent();
54
55      //Set the default values
56      this.dataPartitionComboBox.SelectedIndex = 0;
57      this.replacementComboBox.SelectedIndex = 3;
58      this.factorVarReplComboBox.SelectedIndex = 0;
59      this.sortByComboBox.SelectedItem = SortingCriteria.ImpactValue;
60    }
61
62    protected override void RegisterContentEvents() {
63      base.RegisterContentEvents();
64      Content.ModelChanged += new EventHandler(Content_ModelChanged);
65      Content.ProblemDataChanged += new EventHandler(Content_ProblemDataChanged);
66    }
67    protected override void DeregisterContentEvents() {
68      base.DeregisterContentEvents();
69      Content.ModelChanged -= new EventHandler(Content_ModelChanged);
70      Content.ProblemDataChanged -= new EventHandler(Content_ProblemDataChanged);
71    }
72
73    protected virtual void Content_ProblemDataChanged(object sender, EventArgs e) {
74      OnContentChanged();
75    }
76    protected virtual void Content_ModelChanged(object sender, EventArgs e) {
77      OnContentChanged();
78    }
79    protected override void OnContentChanged() {
80      base.OnContentChanged();
81      rawVariableImpacts.Clear();
82
83      if (Content == null) {
84        variableImpactsArrayView.Content = null;
85      } else {
86        UpdateVariableImpact();
87      }
88    }
89    protected override void OnVisibleChanged(EventArgs e) {
90      base.OnVisibleChanged(e);
91      if (!this.Visible) {
92        cancellationToken.Cancel();
93      }
94    }
95
96    protected override void OnClosed(FormClosedEventArgs e) {
97      base.OnClosed(e);
98      cancellationToken.Cancel();
99    }
100
101    private void dataPartitionComboBox_SelectedIndexChanged(object sender, EventArgs e) {
102      rawVariableImpacts.Clear();
103      UpdateVariableImpact();
104    }
105    private void replacementComboBox_SelectedIndexChanged(object sender, EventArgs e) {
106      rawVariableImpacts.Clear();
107      UpdateVariableImpact();
108    }
109    private void sortByComboBox_SelectedIndexChanged(object sender, EventArgs e) {
110      //Update the default ordering (asc,desc), but remove the eventHandler beforehand (otherwise the data would be ordered twice)
111      ascendingCheckBox.CheckedChanged -= ascendingCheckBox_CheckedChanged;
112      ascendingCheckBox.Checked = (SortingCriteria)sortByComboBox.SelectedItem != SortingCriteria.ImpactValue;
113      ascendingCheckBox.CheckedChanged += ascendingCheckBox_CheckedChanged;
114
115      UpdateOrdering();
116    }
117    private void ascendingCheckBox_CheckedChanged(object sender, EventArgs e) {
118      UpdateOrdering();
119    }
120
121    private async void UpdateVariableImpact() {
122      IProgress progress;
123
124      //Check if the selection is valid
125      if (Content == null) { return; }
126      if (replacementComboBox.SelectedIndex < 0) { return; }
127      if (dataPartitionComboBox.SelectedIndex < 0) { return; }
128      if (factorVarReplComboBox.SelectedIndex < 0) { return; }
129
130      //Prepare arguments
131      var replMethod = (ClassificationSolutionVariableImpactsCalculator.ReplacementMethodEnum)replacementComboBox.Items[replacementComboBox.SelectedIndex];
132      var factorReplMethod = (ClassificationSolutionVariableImpactsCalculator.FactorReplacementMethodEnum)factorVarReplComboBox.Items[factorVarReplComboBox.SelectedIndex];
133      var dataPartition = (ClassificationSolutionVariableImpactsCalculator.DataPartitionEnum)dataPartitionComboBox.SelectedItem;
134
135      variableImpactsArrayView.Caption = Content.Name + " Variable Impacts";
136      progress = Progress.Show(this, "Calculating variable impacts for " + Content.Name);
137      cancellationToken = new CancellationTokenSource();
138
139      try {
140        var problemData = Content.ProblemData;
141        var inputvariables = new HashSet<string>(problemData.AllowedInputVariables.Union(Content.Model.VariablesUsedForPrediction));
142        //Remember the original ordering of the variables
143        var originalVariableOrdering = problemData.Dataset.VariableNames
144          .Where(v => inputvariables.Contains(v))
145          .Where(v => problemData.Dataset.VariableHasType<double>(v) || problemData.Dataset.VariableHasType<string>(v))
146          .ToList();
147
148        var impacts = await Task.Run(() => CalculateVariableImpacts(originalVariableOrdering, Content.Model, problemData, Content.EstimatedClassValues, dataPartition, replMethod, factorReplMethod, cancellationToken.Token, progress));
149
150        rawVariableImpacts.AddRange(impacts);
151        UpdateOrdering();
152      } catch (OperationCanceledException) {
153      } finally {
154        Progress.Hide(this);
155      }
156    }
157    private List<Tuple<string, double>> CalculateVariableImpacts(List<string> originalVariableOrdering,
158      IClassificationModel model,
159      IClassificationProblemData problemData,
160      IEnumerable<double> estimatedClassValues,
161      ClassificationSolutionVariableImpactsCalculator.DataPartitionEnum dataPartition,
162      ClassificationSolutionVariableImpactsCalculator.ReplacementMethodEnum replMethod,
163      ClassificationSolutionVariableImpactsCalculator.FactorReplacementMethodEnum factorReplMethod,
164      CancellationToken token,
165      IProgress progress) {
166      List<Tuple<string, double>> impacts = new List<Tuple<string, double>>();
167      int count = originalVariableOrdering.Count;
168      int i = 0;
169      var modifiableDataset = ((Dataset)(problemData.Dataset).Clone()).ToModifiable();
170      IEnumerable<int> rows = ClassificationSolutionVariableImpactsCalculator.GetPartitionRows(dataPartition, problemData);
171
172      //Calculate original quality-values (via calculator, default is R²)
173      IEnumerable<double> targetValuesPartition = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
174      IEnumerable<double> estimatedClassValuesPartition = Content.GetEstimatedClassValues(rows);
175
176      var originalCalculatorValue = ClassificationSolutionVariableImpactsCalculator.CalculateQuality(targetValuesPartition, estimatedClassValuesPartition);
177      var clonedModel = (IClassificationModel)model.Clone();
178      foreach (var variableName in originalVariableOrdering) {
179        token.ThrowIfCancellationRequested();
180        progress.ProgressValue = (double)++i / count;
181        progress.Message = string.Format("Calculating impact for variable {0} ({1} of {2})", variableName, i, count);
182
183        double impact = 0;
184        //If the variable isn't used for prediction, it has zero impact.
185        if (model.VariablesUsedForPrediction.Contains(variableName)) {
186          impact = ClassificationSolutionVariableImpactsCalculator.CalculateImpact(variableName, clonedModel, problemData, modifiableDataset, rows, replMethod, factorReplMethod, targetValuesPartition, originalCalculatorValue);
187        }
188        impacts.Add(new Tuple<string, double>(variableName, impact));
189      }
190
191      return impacts;
192    }
193
194    /// <summary>
195    /// Updates the <see cref="variableImpactsArrayView"/> according to the selected ordering <see cref="ascendingCheckBox"/> of the selected Column <see cref="sortByComboBox"/>
196    /// The default is "Descending" by "VariableImpact" (as in previous versions)
197    /// </summary>
198    private void UpdateOrdering() {
199      //Check if valid sortingCriteria is selected and data exists
200      if (sortByComboBox.SelectedIndex == -1) { return; }
201      if (rawVariableImpacts == null) { return; }
202      if (!rawVariableImpacts.Any()) { return; }
203
204      var selectedItem = (SortingCriteria)sortByComboBox.SelectedItem;
205      bool ascending = ascendingCheckBox.Checked;
206
207      IEnumerable<Tuple<string, double>> orderedEntries = null;
208
209      //Sort accordingly
210      switch (selectedItem) {
211        case SortingCriteria.ImpactValue:
212          orderedEntries = rawVariableImpacts.OrderBy(v => v.Item2);
213          break;
214        case SortingCriteria.Occurrence:
215          orderedEntries = rawVariableImpacts;
216          break;
217        case SortingCriteria.VariableName:
218          orderedEntries = rawVariableImpacts.OrderBy(v => v.Item1, new NaturalStringComparer());
219          break;
220        default:
221          throw new NotImplementedException("Ordering for selected SortingCriteria not implemented");
222      }
223
224      if (!ascending) { orderedEntries = orderedEntries.Reverse(); }
225
226      //Write the data back
227      var impactArray = new DoubleArray(orderedEntries.Select(i => i.Item2).ToArray()) {
228        ElementNames = orderedEntries.Select(i => i.Item1)
229      };
230
231      //Could be, if the View was closed
232      if (!variableImpactsArrayView.IsDisposed) {
233        variableImpactsArrayView.Content = (DoubleArray)impactArray.AsReadOnly();
234      }
235    }
236  }
237}
Note: See TracBrowser for help on using the repository browser.