Free cookie consent management tool by TermsFeed Policy Generator

source: branches/StatisticalTesting/HeuristicLab.Analysis.Statistics/3.3/KruskalWallisTest.cs @ 11699

Last change on this file since 11699 was 11699, checked in by ascheibe, 9 years ago

#2031

  • fixed confidence intervals calculation
  • added more unit tests
  • some cosmetic changes
File size: 4.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using HeuristicLab.Common;
25
26namespace HeuristicLab.Analysis.Statistics {
27  public static class KruskalWallisTest {
28    /// <summary>
29    /// Performs the Kruskal-Wallis test and returns the p-Value.
30    /// (source based on R's kruskal.test(), GNU GPL)
31    /// </summary>
32    public static double Test(double[][] data) {
33      double[] g;
34      double[] x = FlattenArray(data, out g);
35      int n = x.Length;
36      int parameter = data.Length - 1;
37
38      int[] r = Rank(x);
39      double[][] ties = CountDuplicates(x);
40      double statistic = CalculateStatistic(r, g);
41      double tiesCorrection = CalculateTiesCorrection(ties);
42      statistic = ((12 * statistic / (n * (n + 1)) - 3 * (n + 1)) /
43                  (1 - tiesCorrection / (Math.Pow(n, 3) - n)));
44
45      return alglib.chisquarecdistribution(parameter, statistic);
46    }
47
48    private static double CalculateStatistic(int[] r, double[] g) {
49      double result = 0.0;
50      double lastG = g[0];
51      double curSum = 0.0;
52      int cnt = 0;
53      for (int i = 0; i < r.Length; i++) {
54        if (lastG.IsAlmost(g[i])) {
55          curSum += r[i];
56          cnt++;
57        } else {
58          double sum = Math.Pow(curSum, 2);
59          sum /= cnt;
60          result += sum;
61
62          lastG = g[i];
63          curSum = r[i];
64          cnt = 1;
65        }
66      }
67
68      double lastSum = Math.Pow(curSum, 2);
69      lastSum /= cnt;
70      result += lastSum;
71
72      return result;
73    }
74
75    private static double CalculateTiesCorrection(double[][] ties) {
76      double sum = 0.0;
77      for (int i = 0; i < ties[1].Length; i++) {
78        double tic = Math.Pow(ties[1][i], 3) - ties[1][i];
79        sum += tic;
80      }
81      return sum;
82    }
83
84    private static double[] FlattenArray(double[][] x, out double[] indizes) {
85      int compLenght = 0;
86      for (int i = 0; i < x.Length; i++) {
87        compLenght += x[i].Length;
88      }
89
90      double[] result = new double[compLenght];
91      indizes = new double[compLenght];
92
93      int resultPos = 0;
94      for (int i = 0; i < x.Length; i++) {
95        Array.Copy(x[i], 0, result, resultPos, x[i].Length);
96
97        for (int j = resultPos; j < resultPos + x[i].Length; j++) {
98          indizes[j] = i;
99        }
100        resultPos += x[i].Length;
101      }
102
103      return result;
104    }
105
106    private static double[][] CountDuplicates(double[] x) {
107      List<double> number = new List<double>();
108      List<double> cnt = new List<double>();
109      double[] sortedX = new double[x.Length];
110
111      Array.Copy(x, sortedX, x.Length);
112      Array.Sort(sortedX);
113
114      double last = x[0];
115      number.Add(x[0]);
116      cnt.Add(1);
117
118      for (int i = 1; i < x.Length; i++) {
119        if (!last.IsAlmost(x[i])) {
120          number.Add(x[i]);
121          last = x[i];
122          cnt.Add(1);
123        } else {
124          cnt[cnt.Count - 1] += 1;
125        }
126      }
127
128      double[][] result = new double[2][];
129      result[0] = number.ToArray();
130      result[1] = cnt.ToArray();
131
132      return result;
133    }
134
135    private static int[] Rank(double[] x) {
136      double[] keys = new double[x.Length];
137      int[] items = new int[x.Length];
138      int[] ranks = new int[x.Length];
139
140      Array.Copy(x, keys, x.Length);
141      for (int i = 0; i < x.Length; i++) items[i] = i;
142
143      Array.Sort(keys, items);
144
145      for (int i = 0; i < x.Length; i++) {
146        ranks[items[i]] = i + 1;
147      }
148      return ranks;
149    }
150  }
151}
Note: See TracBrowser for help on using the repository browser.