1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Linq;


24  using HeuristicLab.Common;


25 


26  namespace HeuristicLab.Analysis.Statistics {


27  public static class SampleSizeDetermination {


28  /// <summary>


29  /// Determines for a given sample the required sample size as described in


30  /// Göran Kauermann, Helmut Küchenhoff: Stichproben: Methoden und praktische Umsetzung mit R, section 2.27.


31  /// </summary>


32  /// <param name="samples">The pilot sample.</param>


33  /// <param name="conf">Confidence Interval.</param>


34  /// <returns>Number of required samples for the given confidence interval. </returns>


35  public static int DetermineSampleSizeByEstimatingMean(double[] samples, double conf = 0.95) {


36  if (conf < 0.0  conf > 1.0) throw new ArgumentException("The confidence interval must be between zero and one.");


37 


38  var confInterval = samples.ConfidenceIntervals(0.95);


39  double e = (confInterval.Item2  confInterval.Item1) / 2;


40  double s = samples.EstimatedStandardDeviation();


41  double z = alglib.invnormaldistribution((conf + 1) / 2);


42  double n = samples.Count();


43 


44  double result = Math.Pow(s, 2) / ((Math.Pow(e, 2) / Math.Pow(z, 2)) + (Math.Pow(s, 2) / n));


45 


46  result = Math.Ceiling(result);


47  if (result > int.MaxValue)


48  return int.MaxValue;


49  else


50  return (int)result;


51  }


52 


53  public static int DetermineSampleSizeByEstimatingMeanForLargeSampleSizes(double[] samples, double conf = 0.95) {


54  if (conf < 0.0  conf > 1.0) throw new ArgumentException("The confidence interval must be between zero and one.");


55 


56  var confInterval = samples.ConfidenceIntervals(0.95);


57  double e = (confInterval.Item2  confInterval.Item1) / 2;


58  double s = samples.EstimatedStandardDeviation();


59  double z = alglib.invnormaldistribution((conf + 1) / 2);


60 


61  double result = Math.Pow(z, 2) * (Math.Pow(s, 2) / Math.Pow(e, 2));


62 


63  result = Math.Ceiling(result);


64  if (result > int.MaxValue)


65  return int.MaxValue;


66  else


67  return (int)result;


68  }


69 


70  /// <summary>


71  /// Calculates Cohen's d.


72  /// </summary>


73  /// <returns>Cohen's d.


74  /// d = 0.2 means small effect


75  /// d = 0.5 means medium effect


76  /// d = 0.8 means big effect


77  /// According to Wikipedia this means: "A lower Cohen's d indicates a necessity of larger sample sizes, and vice versa."


78  /// </returns>


79  public static double CalculateCohensD(double[] d1, double[] d2) {


80  double x1, x2, s1, s2;


81 


82  x1 = d1.Average();


83  x2 = d2.Average();


84  s1 = d1.Variance();


85  s2 = d2.Variance();


86 


87  return (x1  x2) / Math.Sqrt((s1 + s2) / 2);


88  }


89 


90  /// <summary>


91  /// Calculates Hedges' g.


92  /// Hedges' g works like Cohen's d but corrects for bias.


93  /// </summary>


94  /// <returns>Hedges' g</returns>


95  public static double CalculateHedgesG(double[] d1, double[] d2) {


96  double x1, x2, s1, s2, n1, n2, s, g, c;


97 


98  x1 = d1.Average();


99  x2 = d2.Average();


100  s1 = d1.Variance();


101  s2 = d2.Variance();


102  n1 = d1.Count();


103  n2 = d2.Count();


104 


105  s = Math.Sqrt(((n1  1) * s1 + (n2  1) * s2) / (n1 + n2  2));


106  g = (x1  x2) / s;


107  c = (1  (3 / (4 * (n1 + n2)  9))) * g;


108 


109  return c;


110  }


111  }


112  }

