Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2904_CalculateImpacts/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/ClassificationSolutionVariableImpactsView.cs @ 16397

Last change on this file since 16397 was 16397, checked in by mkommend, 5 years ago

#2904: Updated branch with trunk changes.

File size: 10.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading;
26using System.Threading.Tasks;
27using HeuristicLab.Common;
28using HeuristicLab.Data;
29using HeuristicLab.MainForm;
30
31namespace HeuristicLab.Problems.DataAnalysis.Views {
32  [View("Variable Impacts")]
33  [Content(typeof(IClassificationSolution))]
34  public partial class ClassificationSolutionVariableImpactsView : DataAnalysisSolutionEvaluationView {
35    private enum SortingCriteria {
36      ImpactValue,
37      Occurrence,
38      VariableName
39    }
40    private CancellationTokenSource cancellationToken = new CancellationTokenSource();
41    private List<Tuple<string, double>> rawVariableImpacts = new List<Tuple<string, double>>();
42
43    public new IClassificationSolution Content {
44      get { return (IClassificationSolution)base.Content; }
45      set {
46        base.Content = value;
47      }
48    }
49
50    public ClassificationSolutionVariableImpactsView()
51      : base() {
52      InitializeComponent();
53
54      this.sortByComboBox.Items.AddRange(Enum.GetValues(typeof(SortingCriteria)).Cast<object>().ToArray());
55      this.sortByComboBox.SelectedItem = SortingCriteria.ImpactValue;
56      //Set the default values
57      this.dataPartitionComboBox.SelectedIndex = 0;
58      this.replacementComboBox.SelectedIndex = 3;
59      this.factorVarReplComboBox.SelectedIndex = 0;
60      this.sortByComboBox.SelectedItem = SortingCriteria.ImpactValue;
61    }
62
63    protected override void RegisterContentEvents() {
64      base.RegisterContentEvents();
65      Content.ModelChanged += new EventHandler(Content_ModelChanged);
66      Content.ProblemDataChanged += new EventHandler(Content_ProblemDataChanged);
67    }
68    protected override void DeregisterContentEvents() {
69      base.DeregisterContentEvents();
70      Content.ModelChanged -= new EventHandler(Content_ModelChanged);
71      Content.ProblemDataChanged -= new EventHandler(Content_ProblemDataChanged);
72    }
73
74    protected virtual void Content_ProblemDataChanged(object sender, EventArgs e) {
75      OnContentChanged();
76    }
77    protected virtual void Content_ModelChanged(object sender, EventArgs e) {
78      OnContentChanged();
79    }
80    protected override void OnContentChanged() {
81      base.OnContentChanged();
82      if (Content == null) {
83        variableImpactsArrayView.Content = null;
84      } else {
85        UpdateVariableImpact();
86      }
87    }
88    private void ClassificationSolutionVariableImpactsView_VisibleChanged(object sender, EventArgs e) {
89      cancellationToken.Cancel();
90    }
91
92    private void dataPartitionComboBox_SelectedIndexChanged(object sender, EventArgs e) {
93      UpdateVariableImpact();
94    }
95    private void replacementComboBox_SelectedIndexChanged(object sender, EventArgs e) {
96      UpdateVariableImpact();
97    }
98    private void sortByComboBox_SelectedIndexChanged(object sender, EventArgs e) {
99      //Update the default ordering (asc,desc), but remove the eventHandler beforehand (otherwise the data would be ordered twice)
100      ascendingCheckBox.CheckedChanged -= ascendingCheckBox_CheckedChanged;
101      ascendingCheckBox.Checked = (SortingCriteria)sortByComboBox.SelectedItem != SortingCriteria.ImpactValue;
102      ascendingCheckBox.CheckedChanged += ascendingCheckBox_CheckedChanged;
103
104      UpdateOrdering();
105    }
106    private void ascendingCheckBox_CheckedChanged(object sender, EventArgs e) {
107      UpdateOrdering();
108    }
109
110    private async void UpdateVariableImpact() {
111      IProgress progress;
112
113      //Check if the selection is valid
114      if (Content == null) { return; }
115      if (replacementComboBox.SelectedIndex < 0) { return; }
116      if (dataPartitionComboBox.SelectedIndex < 0) { return; }
117      if (factorVarReplComboBox.SelectedIndex < 0) { return; }
118
119      //Prepare arguments
120      var mainForm = (MainForm.WindowsForms.MainForm)MainFormManager.MainForm;
121      var replMethod = (ClassificationSolutionVariableImpactsCalculator.ReplacementMethodEnum)replacementComboBox.Items[replacementComboBox.SelectedIndex];
122      var factorReplMethod = (ClassificationSolutionVariableImpactsCalculator.FactorReplacementMethodEnum)factorVarReplComboBox.Items[factorVarReplComboBox.SelectedIndex];
123      var dataPartition = (ClassificationSolutionVariableImpactsCalculator.DataPartitionEnum)dataPartitionComboBox.SelectedItem;
124
125      variableImpactsArrayView.Caption = Content.Name + " Variable Impacts";
126      progress = mainForm.AddOperationProgressToView(this, "Calculating variable impacts for " + Content.Name);
127      progress.ProgressValue = 0;
128
129      cancellationToken = new CancellationTokenSource();
130
131      try {
132        var problemData = Content.ProblemData;
133        var inputvariables = new HashSet<string>(problemData.AllowedInputVariables.Union(Content.Model.VariablesUsedForPrediction));
134        //Remember the original ordering of the variables
135        var originalVariableOrdering = problemData.Dataset.VariableNames
136          .Where(v => inputvariables.Contains(v))
137          .Where(v => problemData.Dataset.VariableHasType<double>(v) || problemData.Dataset.VariableHasType<string>(v))
138          .ToList();
139
140        List<Tuple<string, double>> impacts = null;
141        await Task.Run(() => { impacts = CalculateVariableImpacts(originalVariableOrdering, Content.Model, problemData, Content.EstimatedClassValues, dataPartition, replMethod, factorReplMethod, cancellationToken.Token, progress); });
142        if (impacts == null) { return; }
143
144        rawVariableImpacts.Clear();
145        rawVariableImpacts.AddRange(impacts);
146        UpdateOrdering();
147      }
148      finally {
149        ((MainForm.WindowsForms.MainForm)MainFormManager.MainForm).RemoveOperationProgressFromView(this);
150      }
151    }
152    private List<Tuple<string, double>> CalculateVariableImpacts(List<string> originalVariableOrdering,
153      IClassificationModel model,
154      IClassificationProblemData problemData,
155      IEnumerable<double> estimatedClassValues,
156      ClassificationSolutionVariableImpactsCalculator.DataPartitionEnum dataPartition,
157      ClassificationSolutionVariableImpactsCalculator.ReplacementMethodEnum replMethod,
158      ClassificationSolutionVariableImpactsCalculator.FactorReplacementMethodEnum factorReplMethod,
159      CancellationToken token,
160      IProgress progress) {
161      List<Tuple<string, double>> impacts = new List<Tuple<string, double>>();
162      int count = originalVariableOrdering.Count;
163      int i = 0;
164      var modifiableDataset = ((Dataset)(problemData.Dataset).Clone()).ToModifiable();
165      IEnumerable<int> rows = ClassificationSolutionVariableImpactsCalculator.GetPartitionRows(dataPartition, problemData);
166
167      //Calculate original quality-values (via calculator, default is R²)
168      IEnumerable<double> targetValuesPartition = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
169      IEnumerable<double> estimatedClassValuesPartition = Content.GetEstimatedClassValues(rows);
170
171      var originalCalculatorValue = ClassificationSolutionVariableImpactsCalculator.CalculateQuality(targetValuesPartition, estimatedClassValuesPartition);
172      var clonedModel = (IClassificationModel)model.Clone();
173      foreach (var variableName in originalVariableOrdering) {
174        if (cancellationToken.Token.IsCancellationRequested) { return null; }
175        progress.ProgressValue = (double)++i / count;
176        progress.Status = string.Format("Calculating impact for variable {0} ({1} of {2})", variableName, i, count);
177
178        double impact = 0;
179        //If the variable isn't used for prediction, it has zero impact.
180        if (model.VariablesUsedForPrediction.Contains(variableName)) {
181          impact = ClassificationSolutionVariableImpactsCalculator.CalculateImpact(variableName, clonedModel, problemData, modifiableDataset, rows, replMethod, factorReplMethod, targetValuesPartition, originalCalculatorValue);
182        }
183        impacts.Add(new Tuple<string, double>(variableName, impact));
184      }
185
186      return impacts;
187    }
188
189    /// <summary>
190    /// Updates the <see cref="variableImpactsArrayView"/> according to the selected ordering <see cref="ascendingCheckBox"/> of the selected Column <see cref="sortByComboBox"/>
191    /// The default is "Descending" by "VariableImpact" (as in previous versions)
192    /// </summary>
193    private void UpdateOrdering() {
194      //Check if valid sortingCriteria is selected and data exists
195      if (sortByComboBox.SelectedIndex == -1) { return; }
196      if (rawVariableImpacts == null) { return; }
197      if (!rawVariableImpacts.Any()) { return; }
198
199      var selectedItem = (SortingCriteria)sortByComboBox.SelectedItem;
200      bool ascending = ascendingCheckBox.Checked;
201
202      IEnumerable<Tuple<string, double>> orderedEntries = null;
203
204      //Sort accordingly
205      switch (selectedItem) {
206        case SortingCriteria.ImpactValue:
207          orderedEntries = rawVariableImpacts.OrderBy(v => v.Item2);
208          break;
209        case SortingCriteria.Occurrence:
210          orderedEntries = rawVariableImpacts;
211          break;
212        case SortingCriteria.VariableName:
213          orderedEntries = rawVariableImpacts.OrderBy(v => v.Item1, new NaturalStringComparer());
214          break;
215        default:
216          throw new NotImplementedException("Ordering for selected SortingCriteria not implemented");
217      }
218
219      if (!ascending) { orderedEntries = orderedEntries.Reverse(); }
220
221      //Write the data back
222      var impactArray = new DoubleArray(orderedEntries.Select(i => i.Item2).ToArray()) {
223        ElementNames = orderedEntries.Select(i => i.Item1)
224      };
225
226      //Could be, if the View was closed
227      if (!variableImpactsArrayView.IsDisposed) {
228        variableImpactsArrayView.Content = (DoubleArray)impactArray.AsReadOnly();
229      }
230    }
231  }
232}
Note: See TracBrowser for help on using the repository browser.