source: branches/StatisticalTesting/HeuristicLab.Analysis.Statistics/3.3/StatisticalTestingView.cs @ 11692

Last change on this file since 11692 was 11692, checked in by ascheibe, 8 years ago

#2031

  • fixed a bug in Cohens d / Hedges g calculation
  • fixed calculation of pairwise tests (no more columns with only zeroes)
  • some refactoring
File size: 17.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading.Tasks;
26using System.Windows.Forms;
27using HeuristicLab.Collections;
28using HeuristicLab.Core.Views;
29using HeuristicLab.Data;
30using HeuristicLab.MainForm;
31using HeuristicLab.Optimization;
32using HeuristicLab.Optimization.Views;
33
34namespace HeuristicLab.Analysis.Statistics {
35  [View("Statistical Tests", "HeuristicLab.Analysis.Statistics.InfoResources.StatisticalTestsInfo.rtf")]
36  [Content(typeof(RunCollection), false)]
37  public sealed partial class StatisticalTestingView : ItemView {
38    private const double significanceLevel = 0.05;
39    private const int requiredSampleSize = 5;
40    private double[][] data;
41
42    public StatisticalTestingView() {
43      InitializeComponent();
44    }
45
46    public new RunCollection Content {
47      get { return (RunCollection)base.Content; }
48      set { base.Content = value; }
49    }
50
51    public override bool ReadOnly {
52      get { return true; }
53      set { /*not needed because results are always readonly */}
54    }
55
56    protected override void OnContentChanged() {
57      base.OnContentChanged();
58
59      if (Content != null) {
60        UpdateResultComboBox();
61        UpdateGroupsComboBox();
62        FillCompComboBox();
63        RebuildDataTable();
64      }
65      UpdateCaption();
66    }
67
68    private void UpdateCaption() {
69      Caption = Content != null ? Content.OptimizerName + " Statistical Tests" : ViewAttribute.GetViewName(GetType());
70    }
71
72    #region events
73    protected override void RegisterContentEvents() {
74      base.RegisterContentEvents();
75      Content.ItemsAdded += new CollectionItemsChangedEventHandler<IRun>(Content_ItemsAdded);
76      Content.ItemsRemoved += new CollectionItemsChangedEventHandler<IRun>(Content_ItemsRemoved);
77      Content.CollectionReset += new CollectionItemsChangedEventHandler<IRun>(Content_CollectionReset);
78      Content.UpdateOfRunsInProgressChanged += Content_UpdateOfRunsInProgressChanged;
79    }
80
81    protected override void DeregisterContentEvents() {
82      base.DeregisterContentEvents();
83      Content.ItemsAdded -= new CollectionItemsChangedEventHandler<IRun>(Content_ItemsAdded);
84      Content.ItemsRemoved -= new CollectionItemsChangedEventHandler<IRun>(Content_ItemsRemoved);
85      Content.CollectionReset -= new CollectionItemsChangedEventHandler<IRun>(Content_CollectionReset);
86      Content.UpdateOfRunsInProgressChanged -= Content_UpdateOfRunsInProgressChanged;
87    }
88
89    private void Content_CollectionReset(object sender, CollectionItemsChangedEventArgs<IRun> e) {
90      RebuildDataTable();
91    }
92
93    private void Content_ItemsRemoved(object sender, CollectionItemsChangedEventArgs<IRun> e) {
94      RebuildDataTable();
95    }
96
97    private void Content_ItemsAdded(object sender, CollectionItemsChangedEventArgs<IRun> e) {
98      RebuildDataTable();
99    }
100
101    void Content_UpdateOfRunsInProgressChanged(object sender, EventArgs e) {
102      if (!Content.UpdateOfRunsInProgress) {
103        RebuildDataTable();
104      }
105    }
106    #endregion
107
108    private void UpdateGroupsComboBox() {
109      groupComboBox.Items.Clear();
110
111      var parameters = (from run in Content
112                        where run.Visible
113                        from param in run.Parameters
114                        select param.Key).Distinct().ToArray();
115
116      foreach (var p in parameters) {
117        var variations = (from run in Content
118                          where run.Visible && run.Parameters.ContainsKey(p) &&
119                          (run.Parameters[p] is IntValue || run.Parameters[p] is DoubleValue ||
120                          run.Parameters[p] is StringValue || run.Parameters[p] is BoolValue)
121                          select ((dynamic)run.Parameters[p]).Value).Distinct();
122
123        if (variations.Count() > 1) {
124          groupComboBox.Items.Add(p);
125        }
126      }
127
128      if (groupComboBox.Items.Count > 0) {
129        //try to select something different than "Seed" or "Algorithm Name" as this makes no sense
130        //and takes a long time to group
131        List<int> possibleIndizes = new List<int>();
132        for (int i = 0; i < groupComboBox.Items.Count; i++) {
133          if (groupComboBox.Items[i].ToString() != "Seed"
134            && groupComboBox.Items[i].ToString() != "Algorithm Name") {
135            possibleIndizes.Add(i);
136          }
137        }
138
139        if (possibleIndizes.Count > 0) {
140          groupComboBox.SelectedItem = groupComboBox.Items[possibleIndizes.First()];
141        } else {
142          groupComboBox.SelectedItem = groupComboBox.Items[0];
143        }
144      }
145    }
146
147    private string[] GetColumnNames(IEnumerable<IRun> runs) {
148      string parameterName = (string)groupComboBox.SelectedItem;
149      var r = runs.Where(x => x.Parameters.ContainsKey(parameterName));
150      return r.Select(x => ((dynamic)x.Parameters[parameterName]).Value).Distinct().Select(x => (string)x.ToString()).ToArray();
151    }
152
153    private void UpdateResultComboBox() {
154      resultComboBox.Items.Clear();
155      var results = (from run in Content
156                     where run.Visible
157                     from result in run.Results
158                     where result.Value is IntValue || result.Value is DoubleValue
159                     select result.Key).Distinct().ToArray();
160
161      resultComboBox.Items.AddRange(results);
162      if (resultComboBox.Items.Count > 0) resultComboBox.SelectedItem = resultComboBox.Items[0];
163    }
164
165    private void FillCompComboBox() {
166      string parameterName = (string)groupComboBox.SelectedItem;
167      if (parameterName != null) {
168        string resultName = (string)resultComboBox.SelectedItem;
169        if (resultName != null) {
170          var runs = Content.Where(x => x.Results.ContainsKey(resultName) && x.Visible);
171          var columnNames = GetColumnNames(runs).ToList();
172          groupCompComboBox.Items.Clear();
173          columnNames.ForEach(x => groupCompComboBox.Items.Add(x));
174          if (groupCompComboBox.Items.Count > 0) groupCompComboBox.SelectedItem = groupCompComboBox.Items[0];
175        }
176      }
177    }
178
179    private void RebuildDataTable() {
180      string parameterName = (string)groupComboBox.SelectedItem;
181      if (parameterName != null) {
182        string resultName = (string)resultComboBox.SelectedItem;
183
184        var runs = Content.Where(x => x.Results.ContainsKey(resultName) && x.Visible);
185        var columnNames = GetColumnNames(runs);
186        var groups = GetGroups(columnNames, runs);
187        data = new double[columnNames.Count()][];
188
189        DoubleMatrix dt = new DoubleMatrix(groups.Select(x => x.Count()).Max(), columnNames.Count());
190        dt.ColumnNames = columnNames;
191        DataTable histogramDataTable = new DataTable(resultName);
192
193        for (int i = 0; i < columnNames.Count(); i++) {
194          int j = 0;
195          data[i] = new double[groups[i].Count()];
196          DataRow row = new DataRow(columnNames[i]);
197          row.VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Histogram;
198          histogramDataTable.Rows.Add(row);
199
200          foreach (IRun run in groups[i]) {
201            dt[j, i] = (double)((dynamic)run.Results[resultName]).Value;
202            data[i][j] = dt[j, i];
203            row.Values.Add(dt[j, i]);
204            j++;
205          }
206        }
207
208        GenerateChart(histogramDataTable);
209        stringConvertibleMatrixView.Content = dt;
210      }
211    }
212
213    private void GenerateChart(DataTable histogramTable) {
214      foreach (var row in histogramTable.Rows) {
215        histogramControl.AddPoints(row.Name, row.Values, true);
216      }
217    }
218
219    private List<IEnumerable<IRun>> GetGroups(string[] columnNames, IEnumerable<IRun> runs) {
220      List<IEnumerable<IRun>> runCols = new List<IEnumerable<IRun>>();
221      string parameterName = (string)groupComboBox.SelectedItem;
222
223      foreach (string cn in columnNames) {
224        var tmpRuns = runs.Where(x => ((string)((dynamic)x.Parameters[parameterName]).Value.ToString()) == cn);
225        runCols.Add(tmpRuns);
226      }
227
228      return runCols;
229    }
230
231    private void ResetUI() {
232      normalityLabel.Image = null;
233      groupCompLabel.Image = null;
234      pairwiseLabel.Image = null;
235      pValTextBox.Text = string.Empty;
236      equalDistsTextBox.Text = string.Empty;
237    }
238
239    private void resultComboBox_SelectedValueChanged(object sender, EventArgs e) {
240      RebuildDataTable();
241      ResetUI();
242      CalculateValues();
243    }
244
245    private void groupComboBox_SelectedValueChanged(object sender, EventArgs e) {
246      FillCompComboBox();
247      RebuildDataTable();
248      ResetUI();
249      CalculateValues();
250    }
251
252    private bool VerifyDataLength(bool showMessage) {
253      if (data == null || data.Length == 0)
254        return false;
255
256      //alglib needs at least 5 samples for computation
257      if (data.Any(x => x.Length <= requiredSampleSize)) {
258        if (showMessage)
259          MessageBox.Show(this, "You need to choose samples with a size greater 5.", "HeuristicLab", MessageBoxButtons.OK,
260            MessageBoxIcon.Error);
261        return false;
262      }
263      return true;
264    }
265
266    private void CalculateValues() {
267      if (!VerifyDataLength(true))
268        return;
269
270      if (data != null) {
271        MainFormManager.GetMainForm<HeuristicLab.MainForm.WindowsForms.MainForm>()
272          .AddOperationProgressToView(this, "Calculating...");
273
274        string curItem = (string)groupCompComboBox.SelectedItem;
275        Task.Factory.StartNew(() => CalculateValuesAsync(curItem));
276      }
277    }
278
279    private void CalculateValuesAsync(string groupName) {
280      TestAllGroups();
281      CalculateNormality();
282      CalculateNormalityDetails();
283      CalculatePairwiseTest(groupName);
284      CalculatePairwiseTestDetails(groupName);
285
286      MainFormManager.GetMainForm<HeuristicLab.MainForm.WindowsForms.MainForm>().RemoveOperationProgressFromView(this);
287    }
288
289    private void CalculatePairwise(string groupName) {
290      if (!VerifyDataLength(false))
291        return;
292
293      MainFormManager.GetMainForm<HeuristicLab.MainForm.WindowsForms.MainForm>().AddOperationProgressToView(pairwiseTestGroupBox, "Calculating...");
294      Task.Factory.StartNew(() => CalculatePairwiseAsync(groupName));
295    }
296
297    private void CalculatePairwiseAsync(string groupName) {
298      CalculatePairwiseTest(groupName);
299      CalculatePairwiseTestDetails(groupName);
300
301      MainFormManager.GetMainForm<HeuristicLab.MainForm.WindowsForms.MainForm>().RemoveOperationProgressFromView(pairwiseTestGroupBox);
302    }
303
304    private void TestAllGroups() {
305      double pval = KruskalWallisTest.Test(data);
306      pValTextBox.Text = pval.ToString();
307      if (pval < significanceLevel) {
308        this.Invoke(new Action(() => { groupCompLabel.Image = HeuristicLab.Analysis.Statistics.Resources.Default; }));
309      } else {
310        this.Invoke(new Action(() => { groupCompLabel.Image = HeuristicLab.Common.Resources.VSImageLibrary.Warning; }));
311      }
312    }
313
314    private void CalculateNormality() {
315      double val;
316      List<double> res = new List<double>();
317
318      for (int i = 0; i < data.Length; i++) {
319        alglib.jarqueberatest(data[i], data[i].Length, out val);
320        res.Add(val);
321      }
322
323      // p-value is below significance level and thus the null hypothesis (data is normally distributed) is rejected.
324      if (res.Any(x => x < significanceLevel)) {
325        this.Invoke(new Action(() => { normalityLabel.Image = HeuristicLab.Common.Resources.VSImageLibrary.Warning; }));
326      } else {
327        this.Invoke(new Action(() => { normalityLabel.Image = HeuristicLab.Analysis.Statistics.Resources.Default; }));
328      }
329    }
330
331    private void CalculateNormalityDetails() {
332      DoubleMatrix pValsMatrix = new DoubleMatrix(1, stringConvertibleMatrixView.Content.Columns);
333      pValsMatrix.ColumnNames = stringConvertibleMatrixView.Content.ColumnNames;
334      pValsMatrix.RowNames = new string[] { "p-Value" };
335
336      double val;
337      for (int i = 0; i < data.Length; i++) {
338        alglib.jarqueberatest(data[i], data[i].Length, out val);
339        pValsMatrix[0, i] = val;
340      }
341
342      this.Invoke(new Action(() => {
343        normalityStringConvertibleMatrixView.Content = pValsMatrix;
344        normalityStringConvertibleMatrixView.DataGridView.AutoResizeColumns(DataGridViewAutoSizeColumnsMode.AllCells);
345      }));
346    }
347
348    private void CalculatePairwiseTest(string groupName) {
349      int colIndex = 0;
350      IEnumerable<string> columnNames = null;
351      this.Invoke(new Action(() => { columnNames = stringConvertibleMatrixView.Content.ColumnNames; }));
352
353      foreach (string col in columnNames) {
354        if (col == groupName) {
355          break;
356        }
357        colIndex++;
358      }
359
360      double[][] newData = FilterDataForPairwiseTest(colIndex);
361      int cnt = 0;
362      for (int i = 0; i < newData.Length; i++) {
363        double mwuBothtails = PairwiseTest.MannWhitneyUTest(data[colIndex], newData[i]);
364        if (mwuBothtails > significanceLevel) {
365          cnt++;
366        }
367      }
368
369      double ratio = ((double)cnt) / (data.Length - 1) * 100.0;
370      equalDistsTextBox.Text = ratio.ToString() + " %";
371
372      if (cnt == 0) {
373        this.Invoke(new Action(() => { pairwiseLabel.Image = HeuristicLab.Analysis.Statistics.Resources.Default; }));
374      } else {
375        this.Invoke(new Action(() => { pairwiseLabel.Image = HeuristicLab.Common.Resources.VSImageLibrary.Warning; }));
376      }
377    }
378
379    private double[][] FilterDataForPairwiseTest(int columnToRemove) {
380      double[][] newData = new double[data.Length - 1][];
381
382      int i = 0;
383      int l = 0;
384      while (i < data.Length) {
385        if (i != columnToRemove) {
386          double[] row = new double[data[i].Length - 1];
387          newData[l] = row;
388
389          int j = 0, k = 0;
390          while (j < row.Length) {
391            if (i != columnToRemove) {
392              newData[l][j] = data[i][k];
393              j++;
394              k++;
395            } else {
396              k++;
397            }
398          }
399          i++;
400          l++;
401        } else {
402          i++;
403        }
404      }
405      return newData;
406    }
407
408    private void CalculatePairwiseTestDetails(string groupName) {
409      int colIndex = 0;
410      IEnumerable<string> columnNames = null;
411      this.Invoke(new Action(() => { columnNames = stringConvertibleMatrixView.Content.ColumnNames; }));
412
413      foreach (string col in columnNames) {
414        if (col == groupName) {
415          break;
416        }
417        colIndex++;
418      }
419
420      double[][] newData = FilterDataForPairwiseTest(colIndex);
421
422      columnNames = columnNames.Where(x => x != groupName).ToList();
423
424      var rowNames = new string[] { "p-Value of Mann-Whitney U", "Adjusted p-Value of Mann-Whitney U",
425            "p-Value of T-Test", "Adjusted p-Value of T-Test", "Cohen's d", "Hedges' g" };
426
427      DoubleMatrix pValsMatrix = new DoubleMatrix(rowNames.Length, columnNames.Count());
428      pValsMatrix.ColumnNames = columnNames;
429      pValsMatrix.RowNames = rowNames;
430
431      double mwuBothtails;
432      double tTestBothTails;
433      double[] mwuPValues = new double[newData.Length];
434      double[] tTestPValues = new double[newData.Length];
435      bool[] decision = null;
436      double[] adjustedMwuPValues = null;
437      double[] adjustedTtestPValues = null;
438
439      for (int i = 0; i < newData.Length; i++) {
440        mwuBothtails = PairwiseTest.MannWhitneyUTest(data[colIndex], newData[i]);
441        tTestBothTails = PairwiseTest.TTest(data[colIndex], newData[i]);
442        mwuPValues[i] = mwuBothtails;
443        tTestPValues[i] = tTestBothTails;
444      }
445
446      adjustedMwuPValues = BonferroniHolm.Calculate(significanceLevel, mwuPValues, out decision);
447      adjustedTtestPValues = BonferroniHolm.Calculate(significanceLevel, tTestPValues, out decision);
448
449      for (int i = 0; i < newData.Length; i++) {
450        pValsMatrix[0, i] = mwuPValues[i];
451        pValsMatrix[1, i] = adjustedMwuPValues[i];
452        pValsMatrix[2, i] = tTestPValues[i];
453        pValsMatrix[3, i] = adjustedTtestPValues[i];
454        pValsMatrix[4, i] = SampleSizeDetermination.CalculateCohensD(data[colIndex], newData[i]);
455        pValsMatrix[5, i] = SampleSizeDetermination.CalculateHedgesG(data[colIndex], newData[i]);
456      }
457
458      this.Invoke(new Action(() => {
459        pairwiseStringConvertibleMatrixView.Content = pValsMatrix;
460        pairwiseStringConvertibleMatrixView.DataGridView.AutoResizeColumns(DataGridViewAutoSizeColumnsMode.AllCells);
461      }));
462    }
463
464    private void openBoxPlotToolStripMenuItem_Click(object sender, EventArgs e) {
465      RunCollectionBoxPlotView boxplotView = new RunCollectionBoxPlotView();
466      boxplotView.Content = Content;
467      // TODO: enable as soon as we move to HeuristicLab.Optimization.Views
468      // boxplotView.xAxisComboBox.SelectedItem = xAxisComboBox.SelectedItem;
469      // boxplotView.yAxisComboBox.SelectedItem = yAxisComboBox.SelectedItem;
470      boxplotView.Show();
471    }
472
473    private void groupCompComboBox_SelectedValueChanged(object sender, EventArgs e) {
474      string curItem = (string)groupCompComboBox.SelectedItem;
475      CalculatePairwise(curItem);
476    }
477  }
478}
Note: See TracBrowser for help on using the repository browser.