Free cookie consent management tool by TermsFeed Policy Generator

source: branches/GP.Grammar.Editor/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/RunCollectionVariableImpactView.cs @ 6784

Last change on this file since 6784 was 6784, checked in by mkommend, 13 years ago

#1479: Integrated trunk changes.

File size: 13.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Windows.Forms;
26using HeuristicLab.Common;
27using HeuristicLab.Data;
28using HeuristicLab.MainForm;
29using HeuristicLab.MainForm.WindowsForms;
30using HeuristicLab.Optimization;
31
32namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Views {
33  [Content(typeof(RunCollection), false)]
34  [View("Variable Impacts")]
35  public sealed partial class RunCollectionVariableImpactView : AsynchronousContentView {
36    private const string variableImpactResultName = "Variable impacts";
37    private const string crossValidationFoldsResultName = "CrossValidation Folds";
38    private const string numberOfFoldsParameterName = "Folds";
39    public RunCollectionVariableImpactView() {
40      InitializeComponent();
41    }
42
43    public new RunCollection Content {
44      get { return (RunCollection)base.Content; }
45      set { base.Content = value; }
46    }
47
48    #region events
49    protected override void RegisterContentEvents() {
50      base.RegisterContentEvents();
51      Content.UpdateOfRunsInProgressChanged += new EventHandler(Content_UpdateOfRunsInProgressChanged);
52      Content.ItemsAdded += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_ItemsAdded);
53      Content.ItemsRemoved += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_ItemsRemoved);
54      Content.CollectionReset += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_CollectionReset);
55      RegisterRunEvents(Content);
56    }
57    protected override void DeregisterContentEvents() {
58      base.RegisterContentEvents();
59      Content.UpdateOfRunsInProgressChanged -= new EventHandler(Content_UpdateOfRunsInProgressChanged);
60      Content.ItemsAdded -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_ItemsAdded);
61      Content.ItemsRemoved -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_ItemsRemoved);
62      Content.CollectionReset -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<IRun>(Content_CollectionReset);
63      DeregisterRunEvents(Content);
64    }
65    private void RegisterRunEvents(IEnumerable<IRun> runs) {
66      foreach (IRun run in runs)
67        run.Changed += new EventHandler(Run_Changed);
68    }
69    private void DeregisterRunEvents(IEnumerable<IRun> runs) {
70      foreach (IRun run in runs)
71        run.Changed -= new EventHandler(Run_Changed);
72    }
73    private void Content_ItemsAdded(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<IRun> e) {
74      RegisterRunEvents(e.Items);
75      UpdateData();
76    }
77    private void Content_ItemsRemoved(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<IRun> e) {
78      DeregisterRunEvents(e.Items);
79      UpdateData();
80    }
81    private void Content_CollectionReset(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<IRun> e) {
82      DeregisterRunEvents(e.OldItems);
83      RegisterRunEvents(e.Items);
84      UpdateData();
85    }
86    private void Content_UpdateOfRunsInProgressChanged(object sender, EventArgs e) {
87      if (!Content.UpdateOfRunsInProgress) UpdateData();
88    }
89    private void Run_Changed(object sender, EventArgs e) {
90      if (!Content.UpdateOfRunsInProgress) UpdateData();
91    }
92    #endregion
93
94    protected override void OnContentChanged() {
95      base.OnContentChanged();
96      this.UpdateData();
97    }
98
99    private void comboBox_SelectedValueChanged(object sender, EventArgs e) {
100      if (comboBox.SelectedItem != null) {
101        var cvRuns = from r in Content
102                     where r.Visible
103                     where r.Parameters.ContainsKey(numberOfFoldsParameterName)
104                     select r;
105        if (comboBox.SelectedIndex == 0) {
106          var selectedFolds = cvRuns
107            .SelectMany(r => (RunCollection)r.Results[crossValidationFoldsResultName])
108            .Where(r => r.Results.ContainsKey(variableImpactResultName));
109          matrixView.Content = CalculateVariableImpactMatrix(selectedFolds.ToArray());
110        } else {
111          var selectedFolds = from r in cvRuns
112                              let foldCollection = (RunCollection)r.Results[crossValidationFoldsResultName]
113                              let run = foldCollection.ElementAt(comboBox.SelectedIndex - 1)
114                              where run.Results.ContainsKey(variableImpactResultName)
115                              select new { run, r.Name };
116          matrixView.Content = CalculateVariableImpactMatrix(selectedFolds.Select(x => x.run).ToArray(), selectedFolds.Select(x => x.Name).ToArray());
117        }
118      }
119    }
120
121
122    private void UpdateData() {
123      if (Content != null) {
124        comboBox.Items.Clear();
125        comboBox.Enabled = false;
126        var visibleRuns = Content.Where(r => r.Visible).ToArray();
127        var representativeCvRun =
128          visibleRuns.Where(r => r.Parameters.ContainsKey(numberOfFoldsParameterName)).FirstOrDefault();
129        if (representativeCvRun != null) {
130          // make sure all runs have the same number of folds
131          int nFolds = ((IntValue)representativeCvRun.Parameters[numberOfFoldsParameterName]).Value;
132          var cvRuns = visibleRuns.Where(r => r.Parameters.ContainsKey(numberOfFoldsParameterName));
133          if (cvRuns.All(r => ((IntValue)r.Parameters[numberOfFoldsParameterName]).Value == nFolds)) {
134            // populate combobox
135            comboBox.Items.Add("Overall");
136            for (int foldIndex = 0; foldIndex < nFolds; foldIndex++) {
137              comboBox.Items.Add("Fold " + foldIndex);
138            }
139            comboBox.SelectedIndex = 0;
140            comboBox.Enabled = true;
141          } else {
142            matrixView.Content = null;
143          }
144        } else {
145          var runsWithVariables = visibleRuns.Where(r => r.Results.ContainsKey(variableImpactResultName)).ToArray();
146          matrixView.Content = CalculateVariableImpactMatrix(runsWithVariables);
147        }
148      }
149    }
150
151    private IStringConvertibleMatrix CalculateVariableImpactMatrix(IRun[] runs) {
152      return CalculateVariableImpactMatrix(runs, runs.Select(r => r.Name).ToArray());
153    }
154
155    private DoubleMatrix CalculateVariableImpactMatrix(IRun[] runs, string[] runNames) {
156      DoubleMatrix matrix = null;
157      IEnumerable<DoubleMatrix> allVariableImpacts = (from run in runs
158                                                      select run.Results[variableImpactResultName]).Cast<DoubleMatrix>();
159      IEnumerable<string> variableNames = (from variableImpact in allVariableImpacts
160                                           from variableName in variableImpact.RowNames
161                                           select variableName)
162                                          .Distinct();
163      // filter variableNames: only include names that have at least one non-zero value in a run
164      List<string> variableNamesList = (from variableName in variableNames
165                                        where GetVariableImpacts(variableName, allVariableImpacts).Any(x => !x.IsAlmost(0.0))
166                                        select variableName)
167                                       .ToList();
168
169      List<string> statictics = new List<string> { "Median Rank", "Mean", "StdDev", "pValue" };
170      List<string> columnNames = new List<string>(runNames);
171      columnNames.AddRange(statictics);
172      int numberOfRuns = runs.Length;
173
174      matrix = new DoubleMatrix(variableNamesList.Count, numberOfRuns + statictics.Count);
175      matrix.SortableView = true;
176      matrix.RowNames = variableNamesList;
177      matrix.ColumnNames = columnNames;
178
179      // calculate statistics
180      List<List<double>> variableImpactsOverRuns = (from variableName in variableNamesList
181                                                    select GetVariableImpacts(variableName, allVariableImpacts).ToList())
182                                             .ToList();
183      List<List<double>> variableRanks = (from variableName in variableNamesList
184                                          select GetVariableImpactRanks(variableName, allVariableImpacts).ToList())
185                                      .ToList();
186      if (variableImpactsOverRuns.Count() > 0) {
187        // the variable with the worst median impact value is chosen as the reference variable
188        // this is problematic if all variables are relevant, however works often in practice
189        List<double> referenceImpacts = (from impacts in variableImpactsOverRuns
190                                         let avg = impacts.Median()
191                                         orderby avg
192                                         select impacts)
193                                         .First();
194        // for all variables
195        for (int row = 0; row < variableImpactsOverRuns.Count; row++) {
196          // median rank
197          matrix[row, numberOfRuns] = variableRanks[row].Median();
198          // also show mean and std.dev. of relative variable impacts to indicate the relative difference in impacts of variables
199          matrix[row, numberOfRuns + 1] = Math.Round(variableImpactsOverRuns[row].Average(), 3);
200          matrix[row, numberOfRuns + 2] = Math.Round(variableImpactsOverRuns[row].StandardDeviation(), 3);
201
202          double leftTail = 0; double rightTail = 0; double bothTails = 0;
203          // calc differences of impacts for current variable and reference variable
204          double[] z = new double[referenceImpacts.Count];
205          for (int i = 0; i < z.Length; i++) {
206            z[i] = variableImpactsOverRuns[row][i] - referenceImpacts[i];
207          }
208          // wilcoxon signed rank test is used because the impact values of two variables in a single run are not independent
209          alglib.wsr.wilcoxonsignedranktest(z, z.Length, 0, ref bothTails, ref leftTail, ref rightTail);
210          matrix[row, numberOfRuns + 3] = Math.Round(bothTails, 4);
211        }
212      }
213
214      // fill matrix with impacts from runs
215      for (int i = 0; i < runs.Length; i++) {
216        IRun run = runs[i];
217        DoubleMatrix runVariableImpacts = (DoubleMatrix)run.Results[variableImpactResultName];
218        for (int j = 0; j < runVariableImpacts.Rows; j++) {
219          int rowIndex = variableNamesList.FindIndex(s => s == runVariableImpacts.RowNames.ElementAt(j));
220          if (rowIndex > -1) {
221            matrix[rowIndex, i] = Math.Round(runVariableImpacts[j, 0], 3);
222          }
223        }
224      }
225      // sort by median
226      var sortedMatrix = (DoubleMatrix)matrix.Clone();
227      var sortedIndexes = from i in Enumerable.Range(0, sortedMatrix.Rows)
228                          orderby matrix[i, numberOfRuns]
229                          select i;
230
231      int targetIndex = 0;
232      foreach (var sourceIndex in sortedIndexes) {
233        for (int c = 0; c < matrix.Columns; c++)
234          sortedMatrix[targetIndex, c] = matrix[sourceIndex, c];
235        targetIndex++;
236      }
237      return sortedMatrix;
238    }
239
240    private IEnumerable<double> GetVariableImpactRanks(string variableName, IEnumerable<DoubleMatrix> allVariableImpacts) {
241      foreach (DoubleMatrix runVariableImpacts in allVariableImpacts) {
242        // certainly not yet very efficient because ranks are computed multiple times for the same run
243        string[] variableNames = runVariableImpacts.RowNames.ToArray();
244        double[] values = (from row in Enumerable.Range(0, runVariableImpacts.Rows)
245                           select runVariableImpacts[row, 0] * -1)
246                          .ToArray();
247        Array.Sort(values, variableNames);
248        // calculate ranks
249        double[] ranks = new double[values.Length];
250        // check for tied ranks
251        int i = 0;
252        while (i < values.Length) {
253          ranks[i] = i + 1;
254          int j = i + 1;
255          while (j < values.Length && values[i].IsAlmost(values[j])) {
256            ranks[j] = ranks[i];
257            j++;
258          }
259          i = j;
260        }
261        int rankIndex = 0;
262        foreach (string rowVariableName in variableNames) {
263          if (rowVariableName == variableName)
264            yield return ranks[rankIndex];
265          rankIndex++;
266        }
267      }
268    }
269
270    private IEnumerable<double> GetVariableImpacts(string variableName, IEnumerable<DoubleMatrix> allVariableImpacts) {
271      foreach (DoubleMatrix runVariableImpacts in allVariableImpacts) {
272        int row = 0;
273        foreach (string rowName in runVariableImpacts.RowNames) {
274          if (rowName == variableName)
275            yield return runVariableImpacts[row, 0];
276          row++;
277        }
278      }
279    }
280
281  }
282}
Note: See TracBrowser for help on using the repository browser.