Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/RunCollectionVariableImpactView.cs @ 6932

Last change on this file since 6932 was 6783, checked in by gkronber, 13 years ago

#1635 fixed a bug leading to incorrectly sorted row names in the variable impact view.

File size: 13.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Windows.Forms;
26using HeuristicLab.Common;
27using HeuristicLab.Data;
28using HeuristicLab.MainForm;
29using HeuristicLab.MainForm.WindowsForms;
30using HeuristicLab.Optimization;
31
32namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Views {
33  [Content(typeof(RunCollection), false)]
34  [View("Variable Impacts")]
35  public sealed partial class RunCollectionVariableImpactView : AsynchronousContentView {
36    private const string variableImpactResultName = "Variable impacts";
37    private const string crossValidationFoldsResultName = "CrossValidation Folds";
38    private const string numberOfFoldsParameterName = "Folds";
39    public RunCollectionVariableImpactView() {
40      InitializeComponent();
41    }
42
43    public new RunCollection Content {
44      get { return (RunCollection)base.Content; }
45      set { base.Content = value; }
46    }
47
48    #region events
49    protected override void RegisterContentEvents() {
50      base.RegisterContentEvents();
51      Content.UpdateOfRunsInProgressChanged += new EventHandler(Content_UpdateOfRunsInProgressChanged);
52      Content.ItemsAdded += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_ItemsAdded);
53      Content.ItemsRemoved += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_ItemsRemoved);
54      Content.CollectionReset += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_CollectionReset);
55      RegisterRunEvents(Content);
56    }
57    protected override void DeregisterContentEvents() {
58      base.RegisterContentEvents();
59      Content.UpdateOfRunsInProgressChanged -= new EventHandler(Content_UpdateOfRunsInProgressChanged);
60      Content.ItemsAdded -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_ItemsAdded);
61      Content.ItemsRemoved -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_ItemsRemoved);
62      Content.CollectionReset -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_CollectionReset);
63      DeregisterRunEvents(Content);
64    }
65    private void RegisterRunEvents(IEnumerable<IRun> runs) {
66      foreach (IRun run in runs)
67        run.Changed += new EventHandler(Run_Changed);
68    }
69    private void DeregisterRunEvents(IEnumerable<IRun> runs) {
70      foreach (IRun run in runs)
71        run.Changed -= new EventHandler(Run_Changed);
72    }
73    private void Content_ItemsAdded(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<IRun> e) {
74      RegisterRunEvents(e.Items);
75      UpdateData();
76    }
77    private void Content_ItemsRemoved(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<IRun> e) {
78      DeregisterRunEvents(e.Items);
79      UpdateData();
80    }
81    private void Content_CollectionReset(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<IRun> e) {
82      DeregisterRunEvents(e.OldItems);
83      RegisterRunEvents(e.Items);
84      UpdateData();
85    }
86    private void Content_UpdateOfRunsInProgressChanged(object sender, EventArgs e) {
87      if (!Content.UpdateOfRunsInProgress) UpdateData();
88    }
89    private void Run_Changed(object sender, EventArgs e) {
90      if (!Content.UpdateOfRunsInProgress) UpdateData();
91    }
92    #endregion
93
94    protected override void OnContentChanged() {
95      base.OnContentChanged();
96      this.UpdateData();
97    }
98
99    private void comboBox_SelectedValueChanged(object sender, EventArgs e) {
100      if (comboBox.SelectedItem != null) {
101        var cvRuns = from r in Content
102                     where r.Visible
103                     where r.Parameters.ContainsKey(numberOfFoldsParameterName)
104                     select r;
105        if (comboBox.SelectedIndex == 0) {
106          var selectedFolds = cvRuns
107            .SelectMany(r => (RunCollection)r.Results[crossValidationFoldsResultName])
108            .Where(r => r.Results.ContainsKey(variableImpactResultName));
109          matrixView.Content = CalculateVariableImpactMatrix(selectedFolds.ToArray());
110        } else {
111          var selectedFolds = from r in cvRuns
112                              let foldCollection = (RunCollection)r.Results[crossValidationFoldsResultName]
113                              let run = foldCollection.ElementAt(comboBox.SelectedIndex - 1)
114                              where run.Results.ContainsKey(variableImpactResultName)
115                              select new { run, r.Name };
116          matrixView.Content = CalculateVariableImpactMatrix(selectedFolds.Select(x => x.run).ToArray(), selectedFolds.Select(x => x.Name).ToArray());
117        }
118      }
119    }
120
121
122    private void UpdateData() {
123      if (Content != null) {
124        comboBox.Items.Clear();
125        comboBox.Enabled = false;
126        var visibleRuns = Content.Where(r => r.Visible).ToArray();
127        var representativeCvRun =
128          visibleRuns.Where(r => r.Parameters.ContainsKey(numberOfFoldsParameterName)).FirstOrDefault();
129        if (representativeCvRun != null) {
130          // make sure all runs have the same number of folds
131          int nFolds = ((IntValue)representativeCvRun.Parameters[numberOfFoldsParameterName]).Value;
132          var cvRuns = visibleRuns.Where(r => r.Parameters.ContainsKey(numberOfFoldsParameterName));
133          if (cvRuns.All(r => ((IntValue)r.Parameters[numberOfFoldsParameterName]).Value == nFolds)) {
134            // populate combobox
135            comboBox.Items.Add("Overall");
136            for (int foldIndex = 0; foldIndex < nFolds; foldIndex++) {
137              comboBox.Items.Add("Fold " + foldIndex);
138            }
139            comboBox.SelectedIndex = 0;
140            comboBox.Enabled = true;
141          } else {
142            matrixView.Content = null;
143          }
144        } else {
145          var runsWithVariables = visibleRuns.Where(r => r.Results.ContainsKey(variableImpactResultName)).ToArray();
146          matrixView.Content = CalculateVariableImpactMatrix(runsWithVariables);
147        }
148      }
149    }
150
151    private IStringConvertibleMatrix CalculateVariableImpactMatrix(IRun[] runs) {
152      return CalculateVariableImpactMatrix(runs, runs.Select(r => r.Name).ToArray());
153    }
154
155    private DoubleMatrix CalculateVariableImpactMatrix(IRun[] runs, string[] runNames) {
156      DoubleMatrix matrix = null;
157      IEnumerable<DoubleMatrix> allVariableImpacts = (from run in runs
158                                                      select run.Results[variableImpactResultName]).Cast<DoubleMatrix>();
159      IEnumerable<string> variableNames = (from variableImpact in allVariableImpacts
160                                           from variableName in variableImpact.RowNames
161                                           select variableName)
162                                          .Distinct();
163      // filter variableNames: only include names that have at least one non-zero value in a run
164      List<string> variableNamesList = (from variableName in variableNames
165                                        where GetVariableImpacts(variableName, allVariableImpacts).Any(x => !x.IsAlmost(0.0))
166                                        select variableName)
167                                       .ToList();
168
169      List<string> statictics = new List<string> { "Median Rank", "Mean", "StdDev", "pValue" };
170      List<string> columnNames = new List<string>(runNames);
171      columnNames.AddRange(statictics);
172      int numberOfRuns = runs.Length;
173
174      matrix = new DoubleMatrix(variableNamesList.Count, numberOfRuns + statictics.Count);
175      matrix.SortableView = true;
176      matrix.ColumnNames = columnNames;
177
178      // calculate statistics
179      List<List<double>> variableImpactsOverRuns = (from variableName in variableNamesList
180                                                    select GetVariableImpacts(variableName, allVariableImpacts).ToList())
181                                             .ToList();
182      List<List<double>> variableRanks = (from variableName in variableNamesList
183                                          select GetVariableImpactRanks(variableName, allVariableImpacts).ToList())
184                                      .ToList();
185      if (variableImpactsOverRuns.Count() > 0) {
186        // the variable with the worst median impact value is chosen as the reference variable
187        // this is problematic if all variables are relevant, however works often in practice
188        List<double> referenceImpacts = (from impacts in variableImpactsOverRuns
189                                         let avg = impacts.Median()
190                                         orderby avg
191                                         select impacts)
192                                         .First();
193        // for all variables
194        for (int row = 0; row < variableImpactsOverRuns.Count; row++) {
195          // median rank
196          matrix[row, numberOfRuns] = variableRanks[row].Median();
197          // also show mean and std.dev. of relative variable impacts to indicate the relative difference in impacts of variables
198          matrix[row, numberOfRuns + 1] = Math.Round(variableImpactsOverRuns[row].Average(), 3);
199          matrix[row, numberOfRuns + 2] = Math.Round(variableImpactsOverRuns[row].StandardDeviation(), 3);
200
201          double leftTail = 0; double rightTail = 0; double bothTails = 0;
202          // calc differences of impacts for current variable and reference variable
203          double[] z = new double[referenceImpacts.Count];
204          for (int i = 0; i < z.Length; i++) {
205            z[i] = variableImpactsOverRuns[row][i] - referenceImpacts[i];
206          }
207          // wilcoxon signed rank test is used because the impact values of two variables in a single run are not independent
208          alglib.wsr.wilcoxonsignedranktest(z, z.Length, 0, ref bothTails, ref leftTail, ref rightTail);
209          matrix[row, numberOfRuns + 3] = Math.Round(bothTails, 4);
210        }
211      }
212
213      // fill matrix with impacts from runs
214      for (int i = 0; i < runs.Length; i++) {
215        IRun run = runs[i];
216        DoubleMatrix runVariableImpacts = (DoubleMatrix)run.Results[variableImpactResultName];
217        for (int j = 0; j < runVariableImpacts.Rows; j++) {
218          int rowIndex = variableNamesList.FindIndex(s => s == runVariableImpacts.RowNames.ElementAt(j));
219          if (rowIndex > -1) {
220            matrix[rowIndex, i] = Math.Round(runVariableImpacts[j, 0], 3);
221          }
222        }
223      }
224      // sort by median
225      var sortedMatrix = (DoubleMatrix)matrix.Clone();
226      var sortedIndexes = from i in Enumerable.Range(0, sortedMatrix.Rows)
227                          orderby matrix[i, numberOfRuns]
228                          select i;
229
230      int targetIndex = 0;
231      foreach (var sourceIndex in sortedIndexes) {
232        for (int c = 0; c < matrix.Columns; c++)
233          sortedMatrix[targetIndex, c] = matrix[sourceIndex, c];
234        targetIndex++;
235      }
236      sortedMatrix.RowNames = sortedIndexes.Select(i => variableNamesList[i]);
237
238      return sortedMatrix;
239    }
240
241    private IEnumerable<double> GetVariableImpactRanks(string variableName, IEnumerable<DoubleMatrix> allVariableImpacts) {
242      foreach (DoubleMatrix runVariableImpacts in allVariableImpacts) {
243        // certainly not yet very efficient because ranks are computed multiple times for the same run
244        string[] variableNames = runVariableImpacts.RowNames.ToArray();
245        double[] values = (from row in Enumerable.Range(0, runVariableImpacts.Rows)
246                           select runVariableImpacts[row, 0] * -1)
247                          .ToArray();
248        Array.Sort(values, variableNames);
249        // calculate ranks
250        double[] ranks = new double[values.Length];
251        // check for tied ranks
252        int i = 0;
253        while (i < values.Length) {
254          ranks[i] = i + 1;
255          int j = i + 1;
256          while (j < values.Length && values[i].IsAlmost(values[j])) {
257            ranks[j] = ranks[i];
258            j++;
259          }
260          i = j;
261        }
262        int rankIndex = 0;
263        foreach (string rowVariableName in variableNames) {
264          if (rowVariableName == variableName)
265            yield return ranks[rankIndex];
266          rankIndex++;
267        }
268      }
269    }
270
271    private IEnumerable<double> GetVariableImpacts(string variableName, IEnumerable<DoubleMatrix> allVariableImpacts) {
272      foreach (DoubleMatrix runVariableImpacts in allVariableImpacts) {
273        int row = 0;
274        foreach (string rowName in runVariableImpacts.RowNames) {
275          if (rowName == variableName)
276            yield return runVariableImpacts[row, 0];
277          row++;
278        }
279      }
280    }
281
282  }
283}
Note: See TracBrowser for help on using the repository browser.