Free cookie consent management tool by TermsFeed Policy Generator

source: branches/StatisticalTesting/HeuristicLab.Analysis.Statistics/3.3/KruskalWallis.cs @ 10017

Last change on this file since 10017 was 10017, checked in by ascheibe, 11 years ago

#2031 improved sample size estimation and some minor improvements

File size: 4.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24
25namespace HeuristicLab.Analysis.Statistics {
26  public class KruskalWallis {
27    /// <summary>
28    /// Performs the Kruskal-Wallis test and returns the p-Value.
29    /// (source based on R's kruskal.test(), GNU GPL)
30    /// </summary>
31    public static double Test(double[][] data) {
32      double[] g;
33      double[] x = FlattenArray(data, out g);
34      int n = x.Length;
35      int parameter = data.Length - 1;
36
37      int[] r = Rank(x);
38      double[][] ties = CountDuplicates(x);
39      double statistic = CalculateStatistic(r, g);
40      double tiesCorrection = CalculateTiesCorrection(ties);
41      statistic = ((12 * statistic / (n * (n + 1)) - 3 * (n + 1)) /
42                  (1 - tiesCorrection / (Math.Pow(n, 3) - n)));
43
44      return alglib.chisquarecdistribution(parameter, statistic);
45    }
46
47    private static double CalculateStatistic(int[] r, double[] g) {
48      double result = 0.0;
49      double lastG = g[0];
50      double curSum = 0.0;
51      int cnt = 0;
52      for (int i = 0; i < r.Length; i++) {
53        if (lastG == g[i]) {
54          curSum += r[i];
55          cnt++;
56        } else {
57          double sum = Math.Pow(curSum, 2);
58          sum /= cnt;
59          result += sum;
60
61          lastG = g[i];
62          curSum = r[i];
63          cnt = 1;
64        }
65      }
66
67      double lastSum = Math.Pow(curSum, 2);
68      lastSum /= cnt;
69      result += lastSum;
70
71      return result;
72    }
73
74    private static double CalculateTiesCorrection(double[][] ties) {
75      double sum = 0.0;
76      for (int i = 0; i < ties[1].Length; i++) {
77        double tic = Math.Pow(ties[1][i], 3) - ties[1][i];
78        sum += tic;
79      }
80      return sum;
81    }
82
83    private static double[] FlattenArray(double[][] x, out double[] indizes) {
84      int compLenght = 0;
85      for (int i = 0; i < x.Length; i++) {
86        compLenght += x[i].Length;
87      }
88
89      double[] result = new double[compLenght];
90      indizes = new double[compLenght];
91
92      int resultPos = 0;
93      for (int i = 0; i < x.Length; i++) {
94        Array.Copy(x[i], 0, result, resultPos, x[i].Length);
95
96        for (int j = resultPos; j < resultPos + x[i].Length; j++) {
97          indizes[j] = i;
98        }
99        resultPos += x[i].Length;
100      }
101
102      return result;
103    }
104
105    private static double[][] CountDuplicates(double[] x) {
106      List<double> number = new List<double>();
107      List<double> cnt = new List<double>();
108      double[] sortedX = new double[x.Length];
109
110      Array.Copy(x, sortedX, x.Length);
111      Array.Sort(sortedX);
112
113      double last = x[0];
114      number.Add(x[0]);
115      cnt.Add(1);
116
117      for (int i = 1; i < x.Length; i++) {
118        if (last != x[i]) {
119          number.Add(x[i]);
120          last = x[i];
121          cnt.Add(1);
122        } else {
123          cnt[cnt.Count - 1] += 1;
124        }
125      }
126
127      double[][] result = new double[2][];
128      result[0] = number.ToArray();
129      result[1] = cnt.ToArray();
130
131      return result;
132    }
133
134    private static int[] Rank(double[] x) {
135      double[] keys = new double[x.Length];
136      int[] items = new int[x.Length];
137      int[] ranks = new int[x.Length];
138
139      Array.Copy(x, keys, x.Length);
140      for (int i = 0; i < x.Length; i++) items[i] = i;
141
142      Array.Sort(keys, items);
143
144      for (int i = 0; i < x.Length; i++) {
145        ranks[items[i]] = i + 1;
146      }
147      return ranks;
148    }
149  }
150}
Note: See TracBrowser for help on using the repository browser.