Free cookie consent management tool by TermsFeed Policy Generator

source: branches/StatisticalTesting/HeuristicLab.Analysis.Statistics/3.3/StatisticalTestingView.cs @ 11601

Last change on this file since 11601 was 11601, checked in by ascheibe, 9 years ago

#2031

  • fixed column width of p-values
  • started working on drawing a normal distribution over the histogram
File size: 18.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading.Tasks;
26using System.Windows.Forms;
27using HeuristicLab.Collections;
28using HeuristicLab.Core.Views;
29using HeuristicLab.Data;
30using HeuristicLab.MainForm;
31using HeuristicLab.Optimization;
32using HeuristicLab.Optimization.Views;
33
34namespace HeuristicLab.Analysis.Statistics {
35  [View("Statistical Tests", "HeuristicLab.Analysis.Statistics.InfoResources.StatisticalTestsInfo.rtf")]
36  [Content(typeof(RunCollection), false)]
37  public sealed partial class StatisticalTestingView : ItemView {
38    private const double significanceLevel = 0.05;
39    private double[][] data;
40
41    public StatisticalTestingView() {
42      InitializeComponent();
43    }
44
45    public new RunCollection Content {
46      get { return (RunCollection)base.Content; }
47      set { base.Content = value; }
48    }
49
50    public override bool ReadOnly {
51      get { return true; }
52      set { /*not needed because results are always readonly */}
53    }
54
55    protected override void OnContentChanged() {
56      base.OnContentChanged();
57
58      if (Content != null) {
59        UpdateResultComboBox();
60        UpdateGroupsComboBox();
61        FillCompComboBox();
62        RebuildDataTable();
63      }
64      UpdateCaption();
65    }
66
67    private void UpdateCaption() {
68      Caption = Content != null ? Content.OptimizerName + " Statistical Tests" : ViewAttribute.GetViewName(GetType());
69    }
70
71    #region events
72    protected override void RegisterContentEvents() {
73      base.RegisterContentEvents();
74      Content.ItemsAdded += new CollectionItemsChangedEventHandler<IRun>(Content_ItemsAdded);
75      Content.ItemsRemoved += new CollectionItemsChangedEventHandler<IRun>(Content_ItemsRemoved);
76      Content.CollectionReset += new CollectionItemsChangedEventHandler<IRun>(Content_CollectionReset);
77      Content.UpdateOfRunsInProgressChanged += Content_UpdateOfRunsInProgressChanged;
78    }
79
80    protected override void DeregisterContentEvents() {
81      base.DeregisterContentEvents();
82      Content.ItemsAdded -= new CollectionItemsChangedEventHandler<IRun>(Content_ItemsAdded);
83      Content.ItemsRemoved -= new CollectionItemsChangedEventHandler<IRun>(Content_ItemsRemoved);
84      Content.CollectionReset -= new CollectionItemsChangedEventHandler<IRun>(Content_CollectionReset);
85      Content.UpdateOfRunsInProgressChanged -= Content_UpdateOfRunsInProgressChanged;
86    }
87
88    private void Content_CollectionReset(object sender, CollectionItemsChangedEventArgs<IRun> e) {
89      RebuildDataTable();
90    }
91
92    private void Content_ItemsRemoved(object sender, CollectionItemsChangedEventArgs<IRun> e) {
93      RebuildDataTable();
94    }
95
96    private void Content_ItemsAdded(object sender, CollectionItemsChangedEventArgs<IRun> e) {
97      RebuildDataTable();
98    }
99
100    void Content_UpdateOfRunsInProgressChanged(object sender, EventArgs e) {
101      if (!Content.UpdateOfRunsInProgress) {
102        RebuildDataTable();
103      }
104    }
105    #endregion
106
107    private void UpdateGroupsComboBox() {
108      groupComboBox.Items.Clear();
109
110      var parameters = (from run in Content
111                        where run.Visible
112                        from param in run.Parameters
113                        select param.Key).Distinct().ToArray();
114
115      foreach (var p in parameters) {
116        var variations = (from run in Content
117                          where run.Visible && run.Parameters.ContainsKey(p) &&
118                          (run.Parameters[p] is IntValue || run.Parameters[p] is DoubleValue ||
119                          run.Parameters[p] is StringValue || run.Parameters[p] is BoolValue)
120                          select ((dynamic)run.Parameters[p]).Value).Distinct();
121
122        if (variations.Count() > 1) {
123          groupComboBox.Items.Add(p);
124        }
125      }
126
127      if (groupComboBox.Items.Count > 0) {
128        //try to select something different than "Seed" or "Algorithm Name" as this makes no sense
129        //and takes a long time to group
130        List<int> possibleIndizes = new List<int>();
131        for (int i = 0; i < groupComboBox.Items.Count; i++) {
132          if (groupComboBox.Items[i].ToString() != "Seed"
133            && groupComboBox.Items[i].ToString() != "Algorithm Name") {
134            possibleIndizes.Add(i);
135          }
136        }
137
138        if (possibleIndizes.Count > 0) {
139          groupComboBox.SelectedItem = groupComboBox.Items[possibleIndizes.First()];
140        } else {
141          groupComboBox.SelectedItem = groupComboBox.Items[0];
142        }
143      }
144    }
145
146    private string[] GetColumnNames(IEnumerable<IRun> runs) {
147      string parameterName = (string)groupComboBox.SelectedItem;
148      var r = runs.Where(x => x.Parameters.ContainsKey(parameterName));
149      return r.Select(x => ((dynamic)x.Parameters[parameterName]).Value).Distinct().Select(x => (string)x.ToString()).ToArray();
150    }
151
152    private void UpdateResultComboBox() {
153      resultComboBox.Items.Clear();
154      var results = (from run in Content
155                     where run.Visible
156                     from result in run.Results
157                     where result.Value is IntValue || result.Value is DoubleValue
158                     select result.Key).Distinct().ToArray();
159
160      resultComboBox.Items.AddRange(results);
161      if (resultComboBox.Items.Count > 0) resultComboBox.SelectedItem = resultComboBox.Items[0];
162    }
163
164    private void FillCompComboBox() {
165      string parameterName = (string)groupComboBox.SelectedItem;
166      if (parameterName != null) {
167        string resultName = (string)resultComboBox.SelectedItem;
168        if (resultName != null) {
169          var runs = Content.Where(x => x.Results.ContainsKey(resultName) && x.Visible);
170          var columnNames = GetColumnNames(runs).ToList();
171          groupCompComboBox.Items.Clear();
172          columnNames.ForEach(x => groupCompComboBox.Items.Add(x));
173          if (groupCompComboBox.Items.Count > 0) groupCompComboBox.SelectedItem = groupCompComboBox.Items[0];
174        }
175      }
176    }
177
178    private void RebuildDataTable() {
179      string parameterName = (string)groupComboBox.SelectedItem;
180      if (parameterName != null) {
181        string resultName = (string)resultComboBox.SelectedItem;
182
183        var runs = Content.Where(x => x.Results.ContainsKey(resultName) && x.Visible);
184        var columnNames = GetColumnNames(runs);
185        var groups = GetGroups(columnNames, runs);
186        data = new double[columnNames.Count()][];
187
188        DoubleMatrix dt = new DoubleMatrix(groups.Select(x => x.Count()).Max(), columnNames.Count());
189        dt.ColumnNames = columnNames;
190        DataTable histogramDataTable = new DataTable(resultName);
191
192        for (int i = 0; i < columnNames.Count(); i++) {
193          int j = 0;
194          data[i] = new double[groups[i].Count()];
195          DataRow row = new DataRow(columnNames[i]);
196          row.VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Histogram;
197          histogramDataTable.Rows.Add(row);
198
199          foreach (IRun run in groups[i]) {
200            dt[j, i] = (double)((dynamic)run.Results[resultName]).Value;
201            data[i][j] = dt[j, i];
202            row.Values.Add(dt[j, i]);
203            j++;
204          }
205        }
206
207        dataTableView.Content = histogramDataTable;
208        stringConvertibleMatrixView.Content = dt;
209      }
210    }
211
212    private List<IEnumerable<IRun>> GetGroups(string[] columnNames, IEnumerable<IRun> runs) {
213      List<IEnumerable<IRun>> runCols = new List<IEnumerable<IRun>>();
214      string parameterName = (string)groupComboBox.SelectedItem;
215
216      foreach (string cn in columnNames) {
217        var tmpRuns = runs.Where(x => ((string)((dynamic)x.Parameters[parameterName]).Value.ToString()) == cn);
218        runCols.Add(tmpRuns);
219      }
220
221      return runCols;
222    }
223
224    private void ResetUI() {
225      normalityLabel.Image = null;
226      groupCompLabel.Image = null;
227      pairwiseLabel.Image = null;
228      pValTextBox.Text = string.Empty;
229      equalDistsTextBox.Text = string.Empty;
230    }
231
232    private void resultComboBox_SelectedValueChanged(object sender, EventArgs e) {
233      RebuildDataTable();
234      ResetUI();
235      CalculateValues();
236    }
237
238    private void groupComboBox_SelectedValueChanged(object sender, EventArgs e) {
239      FillCompComboBox();
240      RebuildDataTable();
241      ResetUI();
242      CalculateValues();
243    }
244
245    private bool VerifyDataLength(bool showMessage) {
246      if (data == null || data.Length == 0)
247        return false;
248
249      //alglib needs at least 5 samples for computation
250      if (data.Any(x => x.Length <= 5)) {
251        if (showMessage)
252          MessageBox.Show(this, "You need to choose samples with a size greater 5.", "HeuristicLab", MessageBoxButtons.OK,
253            MessageBoxIcon.Error);
254        return false;
255      }
256      return true;
257    }
258
259    private void CalculateValues() {
260      if (!VerifyDataLength(true))
261        return;
262
263      if (data != null) {
264        MainFormManager.GetMainForm<HeuristicLab.MainForm.WindowsForms.MainForm>()
265          .AddOperationProgressToView(this, "Calculating...");
266
267        string curItem = (string)groupCompComboBox.SelectedItem;
268        Task.Factory.StartNew(() => CalculateValuesAsync(curItem));
269      }
270    }
271
272    private void CalculateValuesAsync(string groupName) {
273      TestAllGroups();
274      CalculateNormality();
275      CalculateNormalityDetails();
276      CalculatePairwiseTest(groupName);
277      CalculatePairwiseTestDetails(groupName);
278
279      MainFormManager.GetMainForm<HeuristicLab.MainForm.WindowsForms.MainForm>().RemoveOperationProgressFromView(this);
280    }
281
282    private void CalculatePairwise(string groupName) {
283      if (!VerifyDataLength(false))
284        return;
285
286      MainFormManager.GetMainForm<HeuristicLab.MainForm.WindowsForms.MainForm>().AddOperationProgressToView(this, "Calculating...");
287      Task.Factory.StartNew(() => CalculatePairwiseAsync(groupName));
288    }
289
290    private void CalculatePairwiseAsync(string groupName) {
291      CalculatePairwiseTest(groupName);
292      CalculatePairwiseTestDetails(groupName);
293
294      MainFormManager.GetMainForm<HeuristicLab.MainForm.WindowsForms.MainForm>().RemoveOperationProgressFromView(this);
295    }
296
297    private void TestAllGroups() {
298      double pval = KruskalWallis.Test(data);
299      pValTextBox.Text = pval.ToString();
300      if (pval < significanceLevel) {
301        this.Invoke(new Action(() => { groupCompLabel.Image = HeuristicLab.Analysis.Statistics.Resources.Default; }));
302      } else {
303        this.Invoke(new Action(() => { groupCompLabel.Image = HeuristicLab.Common.Resources.VSImageLibrary.Warning; }));
304      }
305    }
306
307    private void CalculateNormality() {
308      double val;
309      List<double> res = new List<double>();
310
311      for (int i = 0; i < data.Length; i++) {
312        alglib.jarqueberatest(data[i], data[i].Length, out val);
313        res.Add(val);
314      }
315
316      // p-value is below significance level and thus the null hypothesis (data is normally distributed) is rejected.
317      if (res.Any(x => x < significanceLevel)) {
318        this.Invoke(new Action(() => { normalityLabel.Image = HeuristicLab.Common.Resources.VSImageLibrary.Warning; }));
319      } else {
320        this.Invoke(new Action(() => { normalityLabel.Image = HeuristicLab.Analysis.Statistics.Resources.Default; }));
321      }
322    }
323
324    private void CalculateNormalityDetails() {
325      DoubleMatrix pValsMatrix = new DoubleMatrix(1, stringConvertibleMatrixView.Content.Columns);
326      pValsMatrix.ColumnNames = stringConvertibleMatrixView.Content.ColumnNames;
327      pValsMatrix.RowNames = new string[] { "p-Value" };
328
329      double val;
330      for (int i = 0; i < data.Length; i++) {
331        alglib.jarqueberatest(data[i], data[i].Length, out val);
332        pValsMatrix[0, i] = val;
333      }
334
335      this.Invoke(new Action(() => {
336        normalityStringConvertibleMatrixView.Content = pValsMatrix;
337        normalityStringConvertibleMatrixView.DataGridView.AutoResizeColumns(DataGridViewAutoSizeColumnsMode.AllCells);
338      }));
339    }
340
341    private void CalculatePairwiseTest(string groupName) {
342      int colIndex = 0;
343      IEnumerable<string> columnNames = null;
344      this.Invoke(new Action(() => { columnNames = stringConvertibleMatrixView.Content.ColumnNames; }));
345
346      foreach (string col in columnNames) {
347        if (col == groupName) {
348          break;
349        }
350        colIndex++;
351      }
352
353      double[][] newData = FilterDataForPairwiseTest(colIndex);
354
355      double mwuBothtails;
356      double mwuLefttail;
357      double mwuRighttail;
358      int cnt = 0;
359
360      for (int i = 0; i < newData.Length; i++) {
361        alglib.mannwhitneyutest(data[colIndex], data[colIndex].Length, newData[i], newData[i].Length, out mwuBothtails, out mwuLefttail, out mwuRighttail);
362        if (mwuBothtails > significanceLevel) {
363          cnt++;
364        }
365      }
366
367      double ratio = ((double)cnt) / (data.Length - 1) * 100.0;
368      equalDistsTextBox.Text = ratio.ToString() + " %";
369
370      if (cnt == 0) {
371        this.Invoke(new Action(() => { pairwiseLabel.Image = HeuristicLab.Analysis.Statistics.Resources.Default; }));
372      } else {
373        this.Invoke(new Action(() => { pairwiseLabel.Image = HeuristicLab.Common.Resources.VSImageLibrary.Warning; }));
374      }
375    }
376
377    private double[][] FilterDataForPairwiseTest(int columnToRemove) {
378      double[][] newData = new double[data.Length - 1][];
379
380      int i = 0;
381      int l = 0;
382      while (i < data.Length) {
383        if (i != columnToRemove) {
384          double[] row = new double[data[i].Length - 1];
385          newData[l] = row;
386
387          int j = 0, k = 0;
388          while (j < row.Length) {
389            if (i != columnToRemove) {
390              newData[l][j] = data[i][k];
391              j++;
392              k++;
393            } else {
394              k++;
395            }
396          }
397          i++;
398          l++;
399        } else {
400          i++;
401        }
402      }
403      return newData;
404    }
405
406    private void CalculatePairwiseTestDetails(string groupName) {
407      int colIndex = 0;
408      IEnumerable<string> columnNames = null;
409      this.Invoke(new Action(() => { columnNames = stringConvertibleMatrixView.Content.ColumnNames; }));
410
411      foreach (string col in columnNames) {
412        if (col == groupName) {
413          break;
414        }
415        colIndex++;
416      }
417
418      double[][] newData = FilterDataForPairwiseTest(colIndex);
419
420      columnNames = columnNames.Where(x => x != groupName).ToList();
421
422      var rowNames = new string[] { "p-Value of Mann-Whitney U", "Adjusted p-Value of Mann-Whitney U",
423            "p-Value of T-Test", "Adjusted p-Value of T-Test", "Necessary Sample Size for T-Test", "Cohen's d", "Hedges' g" };
424
425      DoubleMatrix pValsMatrix = new DoubleMatrix(rowNames.Length, columnNames.Count());
426      pValsMatrix.ColumnNames = columnNames;
427      pValsMatrix.RowNames = rowNames;
428
429      double mwuBothtails;
430      double mwuLefttail;
431      double mwuRighttail;
432      double tTestLefttail;
433      double[] mwuPValues = new double[newData.Length];
434      double[] tTestPValues = new double[newData.Length];
435      bool[] decision = null;
436      double[] adjustedMwuPValues = null;
437      double[] adjustedTtestPValues = null;
438
439      for (int i = 0; i < newData.Length; i++) {
440        if (i != colIndex) {
441          alglib.mannwhitneyutest(data[colIndex], data[colIndex].Length, newData[i], newData[i].Length, out mwuBothtails,
442            out mwuLefttail, out mwuRighttail);
443          tTestLefttail = TTest.Test(data[colIndex], newData[i]);
444          mwuPValues[i] = mwuBothtails;
445          tTestPValues[i] = tTestLefttail;
446        }
447      }
448
449      adjustedMwuPValues = BonferroniHolm.Calculate(significanceLevel, mwuPValues, out decision);
450      adjustedTtestPValues = BonferroniHolm.Calculate(significanceLevel, tTestPValues, out decision);
451
452      for (int i = 0; i < newData.Length; i++) {
453        if (i != colIndex) {
454          pValsMatrix[0, i] = mwuPValues[i];
455          pValsMatrix[1, i] = adjustedMwuPValues[i];
456          pValsMatrix[2, i] = tTestPValues[i];
457          pValsMatrix[3, i] = adjustedTtestPValues[i];
458          pValsMatrix[4, i] = TTest.GetOptimalSampleSize(data[colIndex], newData[i]);
459          pValsMatrix[5, i] = SampleSizeDetermination.CalculateCohensD(data[colIndex], newData[i]);
460          pValsMatrix[6, i] = SampleSizeDetermination.CalculateHedgesG(data[colIndex], newData[i]);
461        }
462      }
463
464      this.Invoke(new Action(() => {
465        pairwiseStringConvertibleMatrixView.Content = pValsMatrix;
466        pairwiseStringConvertibleMatrixView.DataGridView.AutoResizeColumns(DataGridViewAutoSizeColumnsMode.AllCells);
467      }));
468    }
469
470    private void openBoxPlotToolStripMenuItem_Click(object sender, EventArgs e) {
471      RunCollectionBoxPlotView boxplotView = new RunCollectionBoxPlotView();
472      boxplotView.Content = Content;
473      // TODO: enable as soon as we move to HeuristicLab.Optimization.Views
474      // boxplotView.xAxisComboBox.SelectedItem = xAxisComboBox.SelectedItem;
475      // boxplotView.yAxisComboBox.SelectedItem = yAxisComboBox.SelectedItem;
476      boxplotView.Show();
477    }
478
479    private void groupCompComboBox_SelectedValueChanged(object sender, EventArgs e) {
480      string curItem = (string)groupCompComboBox.SelectedItem;
481      CalculatePairwise(curItem);
482    }
483  }
484}
Note: See TracBrowser for help on using the repository browser.