Free cookie consent management tool by TermsFeed Policy Generator

source: branches/crossvalidation-2434/HeuristicLab.Tests/HeuristicLab.Common-3.3/EnumerableStatisticExtensions.cs @ 14648

Last change on this file since 14648 was 13033, checked in by gkronber, 9 years ago

#2418: implemented O(n) algorithm for median (and general quantiles) determination.

File size: 5.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Diagnostics.Contracts;
26using System.Linq;
27using HeuristicLab.Common;
28using HeuristicLab.Random;
29using Microsoft.VisualStudio.TestTools.UnitTesting;
30
31namespace HeuristicLab.Tests {
32  [TestClass]
33  public class EnumerableStatisticExtensionsTest {
34    [TestMethod]
35    [TestCategory("General")]
36    [TestProperty("Time", "short")]
37    public void QuantileTest() {
38      {
39        Assert.AreEqual(2.0, new double[] { 2.0 }.Quantile(0.5));
40        Assert.AreEqual(2.0, new double[] { 2.0 }.Quantile(0.01));
41        Assert.AreEqual(2.0, new double[] { 2.0 }.Quantile(0.99));
42      }
43
44      {
45        Assert.AreEqual(1.5, new double[] { 1.0, 2.0 }.Median());
46        Assert.AreEqual(2.0, new double[] { 1.0, 2.0 }.Quantile(0.99));
47        Assert.AreEqual(1.0, new double[] { 1.0, 2.0 }.Quantile(0.01));
48      }
49      {
50        Assert.AreEqual(2.0, new double[] { 3.0, 1.0, 2.0 }.Median());
51        Assert.AreEqual(3.0, new double[] { 3.0, 1.0, 2.0 }.Quantile(0.99));
52        Assert.AreEqual(1.0, new double[] { 3.0, 1.0, 2.0 }.Quantile(0.01));
53      }
54
55
56      var xs = new double[] { 1, 1, 1, 3, 4, 7, 9, 11, 13, 13 };
57      {
58        var q0 = Quantile(xs, 0.3); // naive implementation using sorting
59        Assert.AreEqual(q0, 2.0, 1E-6);
60
61        var q1 = xs.Quantile(0.3); // using select
62        Assert.AreEqual(q1, 2.0, 1E-6);
63      }
64      {
65        var q0 = Quantile(xs, 0.75); // naive implementation using sorting
66        Assert.AreEqual(q0, 11.0, 1E-6);
67
68        var q1 = xs.Quantile(0.75); // using select
69        Assert.AreEqual(q1, 11.0, 1E-6);
70      }
71      // quantile = 0.5 is equivalent to median
72      {
73        // even number of elements
74        var expected = Median(xs);
75        Assert.AreEqual(expected, Quantile(xs, 0.5), 1E-6); // using sorting
76        Assert.AreEqual(expected, xs.Quantile(0.5), 1E-6); // using select
77      }
78      {
79        // odd number of elements
80        var expected = Median(xs.Take(9));
81        Assert.AreEqual(expected, Quantile(xs.Take(9), 0.5), 1E-6); // using sorting
82        Assert.AreEqual(expected, xs.Take(9).Quantile(0.5), 1E-6); // using select
83      }
84
85      // edge cases
86      {
87        try {
88          new double[] { }.Quantile(0.5); // empty
89          Assert.Fail("expected exception");
90        }
91        catch (Exception) {
92        }
93      }
94      {
95        try {
96          Enumerable.Repeat(0.0, 10).Quantile(1.0); // alpha < 1
97          Assert.Fail("expected exception");
98        }
99        catch (Exception) {
100        }
101      }
102    }
103
104    [TestMethod]
105    [TestCategory("General")]
106    [TestProperty("Time", "medium")]
107    public void QuantilePerformanceTest() {
108      int n = 10;
109      var sw0 = new Stopwatch();
110      var sw1 = new Stopwatch();
111      const int reps = 1000;
112      while (n <= 1000000) {
113        for (int i = 0; i < reps; i++) {
114          var xs = RandomEnumerable.SampleRandomNumbers(0, 10000, n + 1).Select(x => (double)x).ToArray();
115          sw0.Start();
116          var q0 = Median(xs); // sorting
117          sw0.Stop();
118
119
120          sw1.Start();
121          var q1 = xs.Median(); // selection
122          sw1.Stop();
123          Assert.AreEqual(q0, q1, 1E-9);
124        }
125        Console.WriteLine("{0,-10} {1,-10} {2,-10}", n, sw0.ElapsedMilliseconds, sw1.ElapsedMilliseconds);
126
127        n = n * 10;
128      }
129    }
130
131
132    // straight forward implementation of median function (using sorting)
133    private static double Median(IEnumerable<double> values) {
134      // iterate only once
135      double[] valuesArr = values.ToArray();
136      int n = valuesArr.Length;
137      if (n == 0) throw new InvalidOperationException("Enumeration contains no elements.");
138
139      Array.Sort(valuesArr);
140
141      // return the middle element (if n is uneven) or the average of the two middle elements if n is even.
142      if (n % 2 == 1) {
143        return valuesArr[n / 2];
144      } else {
145        return (valuesArr[(n / 2) - 1] + valuesArr[n / 2]) / 2.0;
146      }
147    }
148
149    // straight forward implementation of quantile function (using sorting)
150    private static double Quantile(IEnumerable<double> values, double alpha) {
151      Contract.Assert(alpha > 0 && alpha < 1);
152      // iterate only once
153      double[] valuesArr = values.ToArray();
154      int n = valuesArr.Length;
155      if (n == 0) throw new InvalidOperationException("Enumeration contains no elements.");
156
157      Array.Sort(valuesArr);
158      //  starts at 0
159
160      // return the element at Math.Ceiling (if n*alpha is fractional) or the average of two elements if n*alpha is integer.
161      var pos = n * alpha;
162      Contract.Assert(pos >= 0);
163      Contract.Assert(pos < n);
164      bool isInteger = Math.Round(pos).IsAlmost(pos);
165      if (isInteger) {
166        return 0.5 * (valuesArr[(int)pos - 1] + valuesArr[(int)pos]);
167      } else {
168        return valuesArr[(int)Math.Ceiling(pos) - 1];
169      }
170    }
171  }
172}
Note: See TracBrowser for help on using the repository browser.