Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Analysis.Statistics.Views/3.3/StatisticalTestsView.cs @ 12416

Last change on this file since 12416 was 12131, checked in by ascheibe, 10 years ago

#2348 fixed condition that checks if there are enough samples for testing

File size: 18.0 KB
RevLine 
[9353]1#region License Information
2/* HeuristicLab
[12012]3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[9353]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
[9913]25using System.Threading.Tasks;
[9353]26using System.Windows.Forms;
[9937]27using HeuristicLab.Collections;
[11693]28using HeuristicLab.Common;
[9353]29using HeuristicLab.Core.Views;
30using HeuristicLab.Data;
31using HeuristicLab.MainForm;
32using HeuristicLab.Optimization;
33using HeuristicLab.Optimization.Views;
34
[11705]35namespace HeuristicLab.Analysis.Statistics.Views {
36  [View("Statistical Tests", "HeuristicLab.Analysis.Statistics.Views.InfoResources.StatisticalTestsInfo.rtf")]
[9353]37  [Content(typeof(RunCollection), false)]
[11693]38  public sealed partial class StatisticalTestsView : ItemView, IConfigureableView {
39    private double significanceLevel = 0.05;
[11612]40    private const int requiredSampleSize = 5;
[9353]41    private double[][] data;
42
[11693]43    public double SignificanceLevel {
44      get { return significanceLevel; }
45      set {
46        if (!significanceLevel.IsAlmost(value)) {
47          significanceLevel = value;
48          ResetUI();
49          CalculateValues();
50        }
51      }
[9353]52    }
53
54    public new RunCollection Content {
55      get { return (RunCollection)base.Content; }
56      set { base.Content = value; }
57    }
58
59    public override bool ReadOnly {
60      get { return true; }
61      set { /*not needed because results are always readonly */}
62    }
63
[11693]64    public StatisticalTestsView() {
65      InitializeComponent();
66    }
67
68    public void ShowConfiguration() {
69      using (StatisticalTestsConfigurationDialog dlg = new StatisticalTestsConfigurationDialog(this)) {
70        dlg.ShowDialog(this);
71      }
72    }
73
[9353]74    protected override void OnContentChanged() {
75      base.OnContentChanged();
76
77      if (Content != null) {
78        UpdateResultComboBox();
79        UpdateGroupsComboBox();
80        RebuildDataTable();
81      }
[9911]82      UpdateCaption();
[9353]83    }
84
[9911]85    private void UpdateCaption() {
[9913]86      Caption = Content != null ? Content.OptimizerName + " Statistical Tests" : ViewAttribute.GetViewName(GetType());
[9911]87    }
88
[9353]89    #region events
90    protected override void RegisterContentEvents() {
91      base.RegisterContentEvents();
[11696]92      Content.ColumnsChanged += Content_ColumnsChanged;
93      Content.RowsChanged += Content_RowsChanged;
[9937]94      Content.CollectionReset += new CollectionItemsChangedEventHandler<IRun>(Content_CollectionReset);
[9911]95      Content.UpdateOfRunsInProgressChanged += Content_UpdateOfRunsInProgressChanged;
[9353]96    }
97
98    protected override void DeregisterContentEvents() {
99      base.DeregisterContentEvents();
[11696]100      Content.ColumnsChanged -= Content_ColumnsChanged;
101      Content.RowsChanged -= Content_RowsChanged;
[9937]102      Content.CollectionReset -= new CollectionItemsChangedEventHandler<IRun>(Content_CollectionReset);
[9911]103      Content.UpdateOfRunsInProgressChanged -= Content_UpdateOfRunsInProgressChanged;
[9353]104    }
[9911]105
[11696]106    void Content_RowsChanged(object sender, EventArgs e) {
[9911]107      RebuildDataTable();
108    }
109
[11696]110    void Content_ColumnsChanged(object sender, EventArgs e) {
[12116]111      if (!Content.UpdateOfRunsInProgress) {
112        RebuildDataTable();
113      }
[9911]114    }
115
[11696]116    private void Content_CollectionReset(object sender, CollectionItemsChangedEventArgs<IRun> e) {
[9911]117      RebuildDataTable();
118    }
119
120    void Content_UpdateOfRunsInProgressChanged(object sender, EventArgs e) {
121      if (!Content.UpdateOfRunsInProgress) {
122        RebuildDataTable();
123      }
124    }
[9353]125    #endregion
126
127    private void UpdateGroupsComboBox() {
128      groupComboBox.Items.Clear();
129
130      var parameters = (from run in Content
131                        where run.Visible
132                        from param in run.Parameters
133                        select param.Key).Distinct().ToArray();
134
135      foreach (var p in parameters) {
136        var variations = (from run in Content
137                          where run.Visible && run.Parameters.ContainsKey(p) &&
138                          (run.Parameters[p] is IntValue || run.Parameters[p] is DoubleValue ||
139                          run.Parameters[p] is StringValue || run.Parameters[p] is BoolValue)
140                          select ((dynamic)run.Parameters[p]).Value).Distinct();
141
142        if (variations.Count() > 1) {
143          groupComboBox.Items.Add(p);
144        }
145      }
146
147      if (groupComboBox.Items.Count > 0) {
148        //try to select something different than "Seed" or "Algorithm Name" as this makes no sense
149        //and takes a long time to group
150        List<int> possibleIndizes = new List<int>();
151        for (int i = 0; i < groupComboBox.Items.Count; i++) {
152          if (groupComboBox.Items[i].ToString() != "Seed"
153            && groupComboBox.Items[i].ToString() != "Algorithm Name") {
154            possibleIndizes.Add(i);
155          }
156        }
157
158        if (possibleIndizes.Count > 0) {
159          groupComboBox.SelectedItem = groupComboBox.Items[possibleIndizes.First()];
160        } else {
161          groupComboBox.SelectedItem = groupComboBox.Items[0];
162        }
163      }
164    }
165
166    private string[] GetColumnNames(IEnumerable<IRun> runs) {
167      string parameterName = (string)groupComboBox.SelectedItem;
168      var r = runs.Where(x => x.Parameters.ContainsKey(parameterName));
169      return r.Select(x => ((dynamic)x.Parameters[parameterName]).Value).Distinct().Select(x => (string)x.ToString()).ToArray();
170    }
171
172    private void UpdateResultComboBox() {
173      resultComboBox.Items.Clear();
174      var results = (from run in Content
175                     where run.Visible
176                     from result in run.Results
177                     where result.Value is IntValue || result.Value is DoubleValue
178                     select result.Key).Distinct().ToArray();
179
180      resultComboBox.Items.AddRange(results);
181      if (resultComboBox.Items.Count > 0) resultComboBox.SelectedItem = resultComboBox.Items[0];
182    }
183
[9389]184    private void FillCompComboBox() {
185      string parameterName = (string)groupComboBox.SelectedItem;
186      if (parameterName != null) {
187        string resultName = (string)resultComboBox.SelectedItem;
188        if (resultName != null) {
189          var runs = Content.Where(x => x.Results.ContainsKey(resultName) && x.Visible);
190          var columnNames = GetColumnNames(runs).ToList();
191          groupCompComboBox.Items.Clear();
192          columnNames.ForEach(x => groupCompComboBox.Items.Add(x));
193          if (groupCompComboBox.Items.Count > 0) groupCompComboBox.SelectedItem = groupCompComboBox.Items[0];
194        }
195      }
196    }
197
[9353]198    private void RebuildDataTable() {
199      string parameterName = (string)groupComboBox.SelectedItem;
200      if (parameterName != null) {
201        string resultName = (string)resultComboBox.SelectedItem;
202
203        var runs = Content.Where(x => x.Results.ContainsKey(resultName) && x.Visible);
204        var columnNames = GetColumnNames(runs);
205        var groups = GetGroups(columnNames, runs);
206        data = new double[columnNames.Count()][];
207
208        DoubleMatrix dt = new DoubleMatrix(groups.Select(x => x.Count()).Max(), columnNames.Count());
209        dt.ColumnNames = columnNames;
[9937]210        DataTable histogramDataTable = new DataTable(resultName);
[9353]211
[9937]212        for (int i = 0; i < columnNames.Count(); i++) {
213          int j = 0;
[9353]214          data[i] = new double[groups[i].Count()];
[9937]215          DataRow row = new DataRow(columnNames[i]);
216          row.VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Histogram;
217          histogramDataTable.Rows.Add(row);
218
[9353]219          foreach (IRun run in groups[i]) {
220            dt[j, i] = (double)((dynamic)run.Results[resultName]).Value;
221            data[i][j] = dt[j, i];
[9937]222            row.Values.Add(dt[j, i]);
[9353]223            j++;
224          }
225        }
226
[11611]227        GenerateChart(histogramDataTable);
[9353]228        stringConvertibleMatrixView.Content = dt;
229      }
230    }
231
[11611]232    private void GenerateChart(DataTable histogramTable) {
[11837]233      histogramControl.ClearPoints();
[11611]234      foreach (var row in histogramTable.Rows) {
[11612]235        histogramControl.AddPoints(row.Name, row.Values, true);
[11611]236      }
237    }
238
[9353]239    private List<IEnumerable<IRun>> GetGroups(string[] columnNames, IEnumerable<IRun> runs) {
240      List<IEnumerable<IRun>> runCols = new List<IEnumerable<IRun>>();
241      string parameterName = (string)groupComboBox.SelectedItem;
242
243      foreach (string cn in columnNames) {
244        var tmpRuns = runs.Where(x => ((string)((dynamic)x.Parameters[parameterName]).Value.ToString()) == cn);
245        runCols.Add(tmpRuns);
246      }
247
248      return runCols;
249    }
250
[9389]251    private void ResetUI() {
252      normalityLabel.Image = null;
[11695]253      normalityTextLabel.Text = string.Empty;
[9389]254      groupCompLabel.Image = null;
[11695]255      groupComTextLabel.Text = string.Empty;
[9749]256      pairwiseLabel.Image = null;
[11695]257      pairwiseTextLabel.Text = string.Empty;
258
[9389]259      pValTextBox.Text = string.Empty;
260      equalDistsTextBox.Text = string.Empty;
261    }
262
[9913]263    private void resultComboBox_SelectedValueChanged(object sender, EventArgs e) {
264      RebuildDataTable();
265      ResetUI();
266      CalculateValues();
267    }
268
269    private void groupComboBox_SelectedValueChanged(object sender, EventArgs e) {
[11837]270      RebuildDataTable();
[9913]271      FillCompComboBox();
272      ResetUI();
273      CalculateValues();
274    }
275
[9937]276    private bool VerifyDataLength(bool showMessage) {
277      if (data == null || data.Length == 0)
278        return false;
279
280      //alglib needs at least 5 samples for computation
[12131]281      if (data.Any(x => x.Length < requiredSampleSize)) {
[9937]282        if (showMessage)
[11695]283          MessageBox.Show(this, "You need at least " + requiredSampleSize
284            + " samples per group for computing hypothesis tests.", "HeuristicLab", MessageBoxButtons.OK,
[9937]285            MessageBoxIcon.Error);
286        return false;
287      }
288      return true;
289    }
290
[9913]291    private void CalculateValues() {
[9937]292      if (!VerifyDataLength(true))
293        return;
294
[12116]295      if (data != null && data.All(x => x != null)) {
[9922]296        MainFormManager.GetMainForm<HeuristicLab.MainForm.WindowsForms.MainForm>()
297          .AddOperationProgressToView(this, "Calculating...");
[9923]298
299        string curItem = (string)groupCompComboBox.SelectedItem;
300        Task.Factory.StartNew(() => CalculateValuesAsync(curItem));
[9922]301      }
[9913]302    }
303
[9923]304    private void CalculateValuesAsync(string groupName) {
[11696]305      CalculateAllGroupsTest();
306      CalculateNormalityTest();
[9923]307      CalculatePairwiseTest(groupName);
[9913]308
309      MainFormManager.GetMainForm<HeuristicLab.MainForm.WindowsForms.MainForm>().RemoveOperationProgressFromView(this);
310    }
311
[9923]312    private void CalculatePairwise(string groupName) {
[9937]313      if (!VerifyDataLength(false))
314        return;
315
[11612]316      MainFormManager.GetMainForm<HeuristicLab.MainForm.WindowsForms.MainForm>().AddOperationProgressToView(pairwiseTestGroupBox, "Calculating...");
[9923]317      Task.Factory.StartNew(() => CalculatePairwiseAsync(groupName));
[9913]318    }
319
[9923]320    private void CalculatePairwiseAsync(string groupName) {
321      CalculatePairwiseTest(groupName);
[9913]322
[11612]323      MainFormManager.GetMainForm<HeuristicLab.MainForm.WindowsForms.MainForm>().RemoveOperationProgressFromView(pairwiseTestGroupBox);
[9913]324    }
325
[11696]326    private void CalculateAllGroupsTest() {
[11692]327      double pval = KruskalWallisTest.Test(data);
[9353]328      pValTextBox.Text = pval.ToString();
[9950]329      if (pval < significanceLevel) {
[11695]330        this.Invoke(new Action(() => {
[11705]331          groupCompLabel.Image = HeuristicLab.Common.Resources.VSImageLibrary.Default;
[11695]332          groupComTextLabel.Text = "There are groups with different distributions";
333        }));
[9389]334      } else {
[11695]335        this.Invoke(new Action(() => {
336          groupCompLabel.Image = HeuristicLab.Common.Resources.VSImageLibrary.Warning;
337          groupComTextLabel.Text = "Groups have an equal distribution";
338        }));
[9389]339      }
[9353]340    }
341
[11696]342    private void CalculateNormalityTest() {
[9353]343      double val;
344      List<double> res = new List<double>();
[11696]345      DoubleMatrix pValsMatrix = new DoubleMatrix(1, stringConvertibleMatrixView.Content.Columns);
346      pValsMatrix.ColumnNames = stringConvertibleMatrixView.Content.ColumnNames;
347      pValsMatrix.RowNames = new string[] { "p-Value" };
[9353]348
349      for (int i = 0; i < data.Length; i++) {
350        alglib.jarqueberatest(data[i], data[i].Length, out val);
351        res.Add(val);
[11696]352        pValsMatrix[0, i] = val;
[9353]353      }
354
[11696]355      // p-value is below significance level and thus the null hypothesis (data is normally distributed) is rejected
[9950]356      if (res.Any(x => x < significanceLevel)) {
[11695]357        this.Invoke(new Action(() => {
358          normalityLabel.Image = HeuristicLab.Common.Resources.VSImageLibrary.Warning;
359          normalityTextLabel.Text = "Some groups may not be normally distributed";
360        }));
[9936]361      } else {
[11695]362        this.Invoke(new Action(() => {
[11705]363          normalityLabel.Image = HeuristicLab.Common.Resources.VSImageLibrary.Default;
[11695]364          normalityTextLabel.Text = "All sample data is normally distributed";
365        }));
[9353]366      }
367
[11601]368      this.Invoke(new Action(() => {
369        normalityStringConvertibleMatrixView.Content = pValsMatrix;
370        normalityStringConvertibleMatrixView.DataGridView.AutoResizeColumns(DataGridViewAutoSizeColumnsMode.AllCells);
371      }));
[9353]372    }
373
[11696]374    private void ShowPairwiseResult(int nrOfEqualDistributions) {
375      double ratio = ((double)nrOfEqualDistributions) / (data.Length - 1) * 100.0;
[9913]376      equalDistsTextBox.Text = ratio.ToString() + " %";
377
[11696]378      if (nrOfEqualDistributions == 0) {
[11695]379        this.Invoke(new Action(() => {
[11705]380          pairwiseLabel.Image = HeuristicLab.Common.Resources.VSImageLibrary.Default;
[11695]381          pairwiseTextLabel.Text = "All groups have different distributions";
382        }));
[9913]383      } else {
[11695]384        this.Invoke(new Action(() => {
385          pairwiseLabel.Image = HeuristicLab.Common.Resources.VSImageLibrary.Warning;
386          pairwiseTextLabel.Text = "Some groups have equal distributions";
387        }));
[9913]388      }
389    }
390
[11696]391    private void CalculatePairwiseTest(string groupName) {
392      var columnNames = stringConvertibleMatrixView.Content.ColumnNames.ToList();
393      int colIndex = columnNames.IndexOf(groupName);
394      columnNames = columnNames.Where(x => x != groupName).ToList();
[9957]395
396      double[][] newData = FilterDataForPairwiseTest(colIndex);
397
[9950]398      var rowNames = new string[] { "p-Value of Mann-Whitney U", "Adjusted p-Value of Mann-Whitney U",
[11691]399            "p-Value of T-Test", "Adjusted p-Value of T-Test", "Cohen's d", "Hedges' g" };
[9950]400
[9957]401      DoubleMatrix pValsMatrix = new DoubleMatrix(rowNames.Length, columnNames.Count());
402      pValsMatrix.ColumnNames = columnNames;
[9950]403      pValsMatrix.RowNames = rowNames;
[9353]404
[11696]405      double mwuBothTails;
[11691]406      double tTestBothTails;
[9957]407      double[] mwuPValues = new double[newData.Length];
408      double[] tTestPValues = new double[newData.Length];
[9950]409      bool[] decision = null;
410      double[] adjustedMwuPValues = null;
411      double[] adjustedTtestPValues = null;
[11696]412      int cnt = 0;
[9950]413
[9957]414      for (int i = 0; i < newData.Length; i++) {
[11696]415        mwuBothTails = PairwiseTest.MannWhitneyUTest(data[colIndex], newData[i]);
[11692]416        tTestBothTails = PairwiseTest.TTest(data[colIndex], newData[i]);
[11696]417        mwuPValues[i] = mwuBothTails;
[11692]418        tTestPValues[i] = tTestBothTails;
[11696]419
420        if (mwuBothTails > significanceLevel) {
421          cnt++;
422        }
[9353]423      }
424
[9950]425      adjustedMwuPValues = BonferroniHolm.Calculate(significanceLevel, mwuPValues, out decision);
426      adjustedTtestPValues = BonferroniHolm.Calculate(significanceLevel, tTestPValues, out decision);
427
[9957]428      for (int i = 0; i < newData.Length; i++) {
[11692]429        pValsMatrix[0, i] = mwuPValues[i];
430        pValsMatrix[1, i] = adjustedMwuPValues[i];
431        pValsMatrix[2, i] = tTestPValues[i];
432        pValsMatrix[3, i] = adjustedTtestPValues[i];
433        pValsMatrix[4, i] = SampleSizeDetermination.CalculateCohensD(data[colIndex], newData[i]);
434        pValsMatrix[5, i] = SampleSizeDetermination.CalculateHedgesG(data[colIndex], newData[i]);
[9950]435      }
436
[11601]437      this.Invoke(new Action(() => {
438        pairwiseStringConvertibleMatrixView.Content = pValsMatrix;
439        pairwiseStringConvertibleMatrixView.DataGridView.AutoResizeColumns(DataGridViewAutoSizeColumnsMode.AllCells);
440      }));
[11696]441
442      ShowPairwiseResult(cnt);
[9353]443    }
444
[11696]445    private double[][] FilterDataForPairwiseTest(int columnToRemove) {
446      double[][] newData = new double[data.Length - 1][];
447
448      int i = 0;
449      int l = 0;
450      while (i < data.Length) {
451        if (i != columnToRemove) {
452          double[] row = new double[data[i].Length - 1];
453          newData[l] = row;
454
455          int j = 0, k = 0;
456          while (j < row.Length) {
457            if (i != columnToRemove) {
458              newData[l][j] = data[i][k];
459              j++;
460              k++;
461            } else {
462              k++;
463            }
464          }
465          i++;
466          l++;
467        } else {
468          i++;
469        }
470      }
471      return newData;
472    }
473
[9353]474    private void openBoxPlotToolStripMenuItem_Click(object sender, EventArgs e) {
475      RunCollectionBoxPlotView boxplotView = new RunCollectionBoxPlotView();
476      boxplotView.Content = Content;
[11715]477      boxplotView.SetXAxis(groupComboBox.SelectedItem.ToString());
478      boxplotView.SetYAxis(resultComboBox.SelectedItem.ToString());
479
[9353]480      boxplotView.Show();
481    }
[9389]482
[9913]483    private void groupCompComboBox_SelectedValueChanged(object sender, EventArgs e) {
[9923]484      string curItem = (string)groupCompComboBox.SelectedItem;
485      CalculatePairwise(curItem);
[9389]486    }
[9353]487  }
488}
Note: See TracBrowser for help on using the repository browser.