Free cookie consent management tool by TermsFeed Policy Generator

source: branches/PersistenceSpeedUp/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/RunCollectionVariableImpactView.cs @ 17534

Last change on this file since 17534 was 6760, checked in by epitzer, 13 years ago

#1530 integrate changes from trunk

File size: 13.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Windows.Forms;
26using HeuristicLab.Common;
27using HeuristicLab.Data;
28using HeuristicLab.MainForm;
29using HeuristicLab.MainForm.WindowsForms;
30using HeuristicLab.Optimization;
31
32namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Views {
33  [Content(typeof(RunCollection), false)]
34  [View("Variable Impacts")]
35  public sealed partial class RunCollectionVariableImpactView : AsynchronousContentView {
36    private const string variableImpactResultName = "Variable impacts";
37    private const string crossValidationFoldsResultName = "CrossValidation Folds";
38    private const string numberOfFoldsParameterName = "Folds";
39    public RunCollectionVariableImpactView() {
40      InitializeComponent();
41    }
42
43    public new RunCollection Content {
44      get { return (RunCollection)base.Content; }
45      set { base.Content = value; }
46    }
47
48    #region events
49    protected override void RegisterContentEvents() {
50      base.RegisterContentEvents();
51      Content.UpdateOfRunsInProgressChanged += new EventHandler(Content_UpdateOfRunsInProgressChanged);
52      Content.ItemsAdded += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_ItemsAdded);
53      Content.ItemsRemoved += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_ItemsRemoved);
54      Content.CollectionReset += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_CollectionReset);
55      RegisterRunEvents(Content);
56    }
57    protected override void DeregisterContentEvents() {
58      base.RegisterContentEvents();
59      Content.UpdateOfRunsInProgressChanged -= new EventHandler(Content_UpdateOfRunsInProgressChanged);
60      Content.ItemsAdded -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_ItemsAdded);
61      Content.ItemsRemoved -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_ItemsRemoved);
62      Content.CollectionReset -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_CollectionReset);
63      DeregisterRunEvents(Content);
64    }
65    private void RegisterRunEvents(IEnumerable<IRun> runs) {
66      foreach (IRun run in runs)
67        run.Changed += new EventHandler(Run_Changed);
68    }
69    private void DeregisterRunEvents(IEnumerable<IRun> runs) {
70      foreach (IRun run in runs)
71        run.Changed -= new EventHandler(Run_Changed);
72    }
73    private void Content_ItemsAdded(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<IRun> e) {
74      RegisterRunEvents(e.Items);
75      UpdateData();
76    }
77    private void Content_ItemsRemoved(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<IRun> e) {
78      DeregisterRunEvents(e.Items);
79      UpdateData();
80    }
81    private void Content_CollectionReset(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<IRun> e) {
82      DeregisterRunEvents(e.OldItems);
83      RegisterRunEvents(e.Items);
84      UpdateData();
85    }
86    private void Content_UpdateOfRunsInProgressChanged(object sender, EventArgs e) {
87      if (!Content.UpdateOfRunsInProgress) UpdateData();
88    }
89    private void Run_Changed(object sender, EventArgs e) {
90      if (!Content.UpdateOfRunsInProgress) UpdateData();
91    }
92    #endregion
93
94    protected override void OnContentChanged() {
95      base.OnContentChanged();
96      this.UpdateData();
97    }
98
99    private void comboBox_SelectedValueChanged(object sender, EventArgs e) {
100      if (comboBox.SelectedItem != null) {
101        var cvRuns = from r in Content
102                     where r.Visible
103                     where r.Parameters.ContainsKey(numberOfFoldsParameterName)
104                     select r;
105        var selectedFolds = from r in cvRuns
106                            let foldCollection = (RunCollection)r.Results[crossValidationFoldsResultName]
107                            select (IRun)foldCollection.ElementAt((int)comboBox.SelectedItem).Clone();
108        matrixView.Content = CalculateVariableImpactMatrix(selectedFolds.ToArray(), cvRuns.Select(r => r.Name).ToArray());
109      }
110    }
111
112
113    private void UpdateData() {
114      if (Content != null) {
115        comboBox.Items.Clear();
116        comboBox.Enabled = false;
117        var visibleRuns = Content.Where(r => r.Visible).ToArray();
118        var representativeCvRun =
119          visibleRuns.Where(r => r.Parameters.ContainsKey(numberOfFoldsParameterName)).FirstOrDefault();
120        if (representativeCvRun != null) {
121          // make sure all runs have the same number of folds
122          int nFolds = ((IntValue)representativeCvRun.Parameters[numberOfFoldsParameterName]).Value;
123          var cvRuns = visibleRuns.Where(r => r.Parameters.ContainsKey(numberOfFoldsParameterName));
124          if (cvRuns.All(r => ((IntValue)r.Parameters[numberOfFoldsParameterName]).Value == nFolds)) {
125            // populate combobox
126            for (int foldIndex = 0; foldIndex < nFolds; foldIndex++) {
127              comboBox.Items.Add(foldIndex);
128            }
129            comboBox.Enabled = true;
130            var selectedFolds = from r in cvRuns
131                                let foldCollection = (RunCollection)r.Results[crossValidationFoldsResultName]
132                                select foldCollection.First();
133            matrixView.Content = CalculateVariableImpactMatrix(selectedFolds.ToArray(), cvRuns.Select(f => f.Name).ToArray());
134          } else {
135            matrixView.Content = null;
136          }
137        } else {
138          var runsWithVariables = visibleRuns.Where(r => r.Results.ContainsKey(variableImpactResultName)).ToArray();
139          matrixView.Content = CalculateVariableImpactMatrix(runsWithVariables);
140        }
141      }
142    }
143
144    private IStringConvertibleMatrix CalculateVariableImpactMatrix(IRun[] runs) {
145      return CalculateVariableImpactMatrix(runs, runs.Select(r => r.Name).ToArray());
146    }
147
148    private DoubleMatrix CalculateVariableImpactMatrix(IRun[] runs, string[] runNames) {
149      DoubleMatrix matrix = null;
150      IEnumerable<DoubleMatrix> allVariableImpacts = (from run in runs
151                                                      select run.Results[variableImpactResultName]).Cast<DoubleMatrix>();
152      IEnumerable<string> variableNames = (from variableImpact in allVariableImpacts
153                                           from variableName in variableImpact.RowNames
154                                           select variableName)
155                                          .Distinct();
156      // filter variableNames: only include names that have at least one non-zero value in a run
157      List<string> variableNamesList = (from variableName in variableNames
158                                        where GetVariableImpacts(variableName, allVariableImpacts).Any(x => !x.IsAlmost(0.0))
159                                        select variableName)
160                                       .ToList();
161
162      List<string> statictics = new List<string> { "Median Rank", "Mean", "StdDev", "pValue" };
163      List<string> columnNames = new List<string>(runNames);
164      columnNames.AddRange(statictics);
165      int numberOfRuns = runs.Length;
166
167      matrix = new DoubleMatrix(variableNamesList.Count, numberOfRuns + statictics.Count);
168      matrix.SortableView = true;
169      matrix.RowNames = variableNamesList;
170      matrix.ColumnNames = columnNames;
171
172      // calculate statistics
173      List<List<double>> variableImpactsOverRuns = (from variableName in variableNamesList
174                                                    select GetVariableImpacts(variableName, allVariableImpacts).ToList())
175                                             .ToList();
176      List<List<double>> variableRanks = (from variableName in variableNamesList
177                                          select GetVariableImpactRanks(variableName, allVariableImpacts).ToList())
178                                      .ToList();
179      if (variableImpactsOverRuns.Count() > 0) {
180        // the variable with the worst median impact value is chosen as the reference variable
181        // this is problematic if all variables are relevant, however works often in practice
182        List<double> referenceImpacts = (from impacts in variableImpactsOverRuns
183                                         let avg = impacts.Median()
184                                         orderby avg
185                                         select impacts)
186                                         .First();
187        // for all variables
188        for (int row = 0; row < variableImpactsOverRuns.Count; row++) {
189          // median rank
190          matrix[row, numberOfRuns] = variableRanks[row].Median();
191          // also show mean and std.dev. of relative variable impacts to indicate the relative difference in impacts of variables
192          matrix[row, numberOfRuns + 1] = Math.Round(variableImpactsOverRuns[row].Average(), 3);
193          matrix[row, numberOfRuns + 2] = Math.Round(variableImpactsOverRuns[row].StandardDeviation(), 3);
194
195          double leftTail = 0; double rightTail = 0; double bothTails = 0;
196          // calc differences of impacts for current variable and reference variable
197          double[] z = new double[referenceImpacts.Count];
198          for (int i = 0; i < z.Length; i++) {
199            z[i] = variableImpactsOverRuns[row][i] - referenceImpacts[i];
200          }
201          // wilcoxon signed rank test is used because the impact values of two variables in a single run are not independent
202          alglib.wsr.wilcoxonsignedranktest(z, z.Length, 0, ref bothTails, ref leftTail, ref rightTail);
203          matrix[row, numberOfRuns + 3] = Math.Round(bothTails, 4);
204        }
205      }
206
207      // fill matrix with impacts from runs
208      for (int i = 0; i < runs.Length; i++) {
209        IRun run = runs[i];
210        DoubleMatrix runVariableImpacts = (DoubleMatrix)run.Results[variableImpactResultName];
211        for (int j = 0; j < runVariableImpacts.Rows; j++) {
212          int rowIndex = variableNamesList.FindIndex(s => s == runVariableImpacts.RowNames.ElementAt(j));
213          if (rowIndex > -1) {
214            matrix[rowIndex, i] = Math.Round(runVariableImpacts[j, 0], 3);
215          }
216        }
217      }
218      // sort by median
219      var sortedMatrix = (DoubleMatrix)matrix.Clone();
220      var sortedIndexes = from i in Enumerable.Range(0, sortedMatrix.Rows)
221                          orderby matrix[i, numberOfRuns]
222                          select i;
223
224      int targetIndex = 0;
225      foreach (var sourceIndex in sortedIndexes) {
226        for (int c = 0; c < matrix.Columns; c++)
227          sortedMatrix[targetIndex, c] = matrix[sourceIndex, c];
228        targetIndex++;
229      }
230      return sortedMatrix;
231    }
232
233    private IEnumerable<double> GetVariableImpactRanks(string variableName, IEnumerable<DoubleMatrix> allVariableImpacts) {
234      foreach (DoubleMatrix runVariableImpacts in allVariableImpacts) {
235        // certainly not yet very efficient because ranks are computed multiple times for the same run
236        string[] variableNames = runVariableImpacts.RowNames.ToArray();
237        double[] values = (from row in Enumerable.Range(0, runVariableImpacts.Rows)
238                           select runVariableImpacts[row, 0] * -1)
239                          .ToArray();
240        Array.Sort(values, variableNames);
241        // calculate ranks
242        double[] ranks = new double[values.Length];
243        // check for tied ranks
244        int i = 0;
245        while (i < values.Length) {
246          ranks[i] = i + 1;
247          int j = i + 1;
248          while (j < values.Length && values[i].IsAlmost(values[j])) {
249            ranks[j] = ranks[i];
250            j++;
251          }
252          i = j;
253        }
254        int rankIndex = 0;
255        foreach (string rowVariableName in variableNames) {
256          if (rowVariableName == variableName)
257            yield return ranks[rankIndex];
258          rankIndex++;
259        }
260      }
261    }
262
263    private IEnumerable<double> GetVariableImpacts(string variableName, IEnumerable<DoubleMatrix> allVariableImpacts) {
264      foreach (DoubleMatrix runVariableImpacts in allVariableImpacts) {
265        int row = 0;
266        foreach (string rowName in runVariableImpacts.RowNames) {
267          if (rowName == variableName)
268            yield return runVariableImpacts[row, 0];
269          row++;
270        }
271      }
272    }
273
274  }
275}
Note: See TracBrowser for help on using the repository browser.