Changeset 13033
 Timestamp:
 10/19/15 16:13:14 (8 years ago)
 Location:
 trunk/sources
 Files:

 2 edited
Legend:
 Unmodified
 Added
 Removed

trunk/sources/HeuristicLab.Common/3.3/EnumerableStatisticExtensions.cs
r13025 r13033 33 33 /// <returns></returns> 34 34 public static double Median(this IEnumerable<double> values) { 35 // iterate only once 35 // See unit tests for comparison with naive implementation 36 return Quantile(values, 0.5); 37 } 38 39 /// <summary> 40 /// Calculates the alphaquantile element of the enumeration. 41 /// </summary> 42 /// <param name="values"></param> 43 /// <returns></returns> 44 public static double Quantile(this IEnumerable<double> values, double alpha) { 45 // See unit tests for comparison with naive implementation 36 46 double[] valuesArr = values.ToArray(); 37 47 int n = valuesArr.Length; 38 48 if (n == 0) throw new InvalidOperationException("Enumeration contains no elements."); 39 49 40 Array.Sort(valuesArr); 41 42 // return the middle element (if n is uneven) or the average of the two middle elements if n is even. 43 if (n % 2 == 1) { 44 return valuesArr[n / 2]; 45 } else { 46 return (valuesArr[(n / 2)  1] + valuesArr[n / 2]) / 2.0; 47 } 48 } 49 50 /// <summary> 51 /// Calculates the alphaquantile element of the enumeration. 52 /// </summary> 53 /// <param name="values"></param> 54 /// <returns></returns> 55 public static double Quantile(this IEnumerable<double> values, double alpha) { 56 Contract.Assert(alpha > 0 && alpha < 1); 57 // iterate only once 58 double[] valuesArr = values.ToArray(); 59 int n = valuesArr.Length; 60 if (n == 0) throw new InvalidOperationException("Enumeration contains no elements."); 61 62 Array.Sort(valuesArr); 63 // starts at 0 50 // "When N is even, statistics books define the median as the arithmetic mean of the elements k = N/2 51 // and k = N/2 + 1 (that is, N/2 from the bottom and N/2 from the top). 52 // If you accept such pedantry, you must perform two separate selections to find these elements." 64 53 65 54 // return the element at Math.Ceiling (if n*alpha is fractional) or the average of two elements if n*alpha is integer. … … 69 58 bool isInteger = Math.Round(pos).IsAlmost(pos); 70 59 if (isInteger) { 71 return 0.5 * ( valuesArr[(int)pos  1] + valuesArr[(int)pos]);60 return 0.5 * (Select((int)pos  1, valuesArr) + Select((int)pos, valuesArr)); 72 61 } else { 73 return valuesArr[(int)Math.Ceiling(pos)  1]; 62 return Select((int)Math.Ceiling(pos)  1, valuesArr); 63 } 64 } 65 66 // Numerical Recipes in C++, §8.5 Selecting the Mth Largest, O(n) 67 // Giben k in [0..n1] returns an array value from array arr[0..n1] such that k array values are 68 // lee than or equal to the one returned. The input array will be rearranged to hav this value in 69 // location arr[k], with all smaller elements moved to arr[0..k1] (in arbitrary order) and all 70 // larger elements in arr[k+1..n1] (also in arbitrary order). 71 private static double Select(int k, double[] arr) { 72 Contract.Assert(arr.GetLowerBound(0) == 0); 73 Contract.Assert(k >= 0 && k < arr.Length); 74 int i, ir, j, l, mid, n = arr.Length; 75 double a; 76 l = 0; 77 ir = n  1; 78 for (; ; ) { 79 if (ir <= l + 1) { 80 // Active partition contains 1 or 2 elements. 81 if (ir == l + 1 && arr[ir] < arr[l]) { 82 // if (ir == l + 1 && arr[ir].CompareTo(arr[l]) < 0) { 83 // Case of 2 elements. 84 // SWAP(arr[l], arr[ir]); 85 double temp = arr[l]; 86 arr[l] = arr[ir]; 87 arr[ir] = temp; 88 } 89 return arr[k]; 90 } else { 91 mid = (l + ir) >> 1; // Choose median of left, center, and right elements 92 { 93 // SWAP(arr[mid], arr[l + 1]); // as partitioning element a. Also 94 double temp = arr[mid]; 95 arr[mid] = arr[l + 1]; 96 arr[l + 1] = temp; 97 } 98 99 if (arr[l] > arr[ir]) { 100 // if (arr[l].CompareTo(arr[ir]) > 0) { // rearrange so that arr[l] arr[ir] <= arr[l+1], 101 // SWAP(arr[l], arr[ir]); . arr[ir] >= arr[l+1] 102 double temp = arr[l]; 103 arr[l] = arr[ir]; 104 arr[ir] = temp; 105 } 106 107 if (arr[l + 1] > arr[ir]) { 108 // if (arr[l + 1].CompareTo(arr[ir]) > 0) { 109 // SWAP(arr[l + 1], arr[ir]); 110 double temp = arr[l + 1]; 111 arr[l + 1] = arr[ir]; 112 arr[ir] = temp; 113 } 114 if (arr[l] > arr[l + 1]) { 115 //if (arr[l].CompareTo(arr[l + 1]) > 0) { 116 // SWAP(arr[l], arr[l + 1]); 117 double temp = arr[l]; 118 arr[l] = arr[l + 1]; 119 arr[l + 1] = temp; 120 121 } 122 i = l + 1; // Initialize pointers for partitioning. 123 j = ir; 124 a = arr[l + 1]; // Partitioning element. 125 for (; ; ) { // Beginning of innermost loop. 126 do i++; while (arr[i] < a /* arr[i].CompareTo(a) < 0 */); // Scan up to find element > a. 127 do j; while (arr[j] > a /* arr[j].CompareTo(a) > 0 */); // Scan down to find element < a. 128 if (j < i) break; // Pointers crossed. Partitioning complete. 129 { 130 // SWAP(arr[i], arr[j]); 131 double temp = arr[i]; 132 arr[i] = arr[j]; 133 arr[j] = temp; 134 } 135 } // End of innermost loop. 136 arr[l + 1] = arr[j]; // Insert partitioning element. 137 arr[j] = a; 138 if (j >= k) ir = j  1; // Keep active the partition that contains the 139 if (j <= k) l = i; // kth element. 140 } 74 141 } 75 142 } … … 133 200 /// <summary> 134 201 /// Calculates the pth percentile of the values. 135 /// </summary 202 /// </summary> 136 203 public static double Percentile(this IEnumerable<double> values, double p) { 137 204 // iterate only once 
trunk/sources/HeuristicLab.Tests/HeuristicLab.Common3.3/EnumerableStatisticExtensions.cs
r13025 r13033 20 20 #endregion 21 21 22 using System; 23 using System.Collections.Generic; 24 using System.Diagnostics; 25 using System.Diagnostics.Contracts; 22 26 using System.Linq; 23 27 using HeuristicLab.Common; 28 using HeuristicLab.Random; 24 29 using Microsoft.VisualStudio.TestTools.UnitTesting; 25 30 … … 31 36 [TestProperty("Time", "short")] 32 37 public void QuantileTest() { 38 { 39 Assert.AreEqual(2.0, new double[] { 2.0 }.Quantile(0.5)); 40 Assert.AreEqual(2.0, new double[] { 2.0 }.Quantile(0.01)); 41 Assert.AreEqual(2.0, new double[] { 2.0 }.Quantile(0.99)); 42 } 43 44 { 45 Assert.AreEqual(1.5, new double[] { 1.0, 2.0 }.Median()); 46 Assert.AreEqual(2.0, new double[] { 1.0, 2.0 }.Quantile(0.99)); 47 Assert.AreEqual(1.0, new double[] { 1.0, 2.0 }.Quantile(0.01)); 48 } 49 { 50 Assert.AreEqual(2.0, new double[] { 3.0, 1.0, 2.0 }.Median()); 51 Assert.AreEqual(3.0, new double[] { 3.0, 1.0, 2.0 }.Quantile(0.99)); 52 Assert.AreEqual(1.0, new double[] { 3.0, 1.0, 2.0 }.Quantile(0.01)); 53 } 54 55 33 56 var xs = new double[] { 1, 1, 1, 3, 4, 7, 9, 11, 13, 13 }; 34 57 { 35 var q = xs.Quantile(0.3); 36 Assert.AreEqual(q, 2.0, 1E6); 58 var q0 = Quantile(xs, 0.3); // naive implementation using sorting 59 Assert.AreEqual(q0, 2.0, 1E6); 60 61 var q1 = xs.Quantile(0.3); // using select 62 Assert.AreEqual(q1, 2.0, 1E6); 37 63 } 38 64 { 39 var q = xs.Quantile(0.75); 40 Assert.AreEqual(q, 11.0, 1E6); 65 var q0 = Quantile(xs, 0.75); // naive implementation using sorting 66 Assert.AreEqual(q0, 11.0, 1E6); 67 68 var q1 = xs.Quantile(0.75); // using select 69 Assert.AreEqual(q1, 11.0, 1E6); 41 70 } 42 71 // quantile = 0.5 is equivalent to median 43 72 { 44 73 // even number of elements 45 Assert.AreEqual(xs.Quantile(0.5), xs.Median(), 1E6); 74 var expected = Median(xs); 75 Assert.AreEqual(expected, Quantile(xs, 0.5), 1E6); // using sorting 76 Assert.AreEqual(expected, xs.Quantile(0.5), 1E6); // using select 46 77 } 47 78 { 48 79 // odd number of elements 49 Assert.AreEqual(xs.Take(9).Quantile(0.5), xs.Take(9).Median(), 1E6); 80 var expected = Median(xs.Take(9)); 81 Assert.AreEqual(expected, Quantile(xs.Take(9), 0.5), 1E6); // using sorting 82 Assert.AreEqual(expected, xs.Take(9).Quantile(0.5), 1E6); // using select 83 } 84 85 // edge cases 86 { 87 try { 88 new double[] { }.Quantile(0.5); // empty 89 Assert.Fail("expected exception"); 90 } 91 catch (Exception) { 92 } 93 } 94 { 95 try { 96 Enumerable.Repeat(0.0, 10).Quantile(1.0); // alpha < 1 97 Assert.Fail("expected exception"); 98 } 99 catch (Exception) { 100 } 101 } 102 } 103 104 [TestMethod] 105 [TestCategory("General")] 106 [TestProperty("Time", "medium")] 107 public void QuantilePerformanceTest() { 108 int n = 10; 109 var sw0 = new Stopwatch(); 110 var sw1 = new Stopwatch(); 111 const int reps = 1000; 112 while (n <= 1000000) { 113 for (int i = 0; i < reps; i++) { 114 var xs = RandomEnumerable.SampleRandomNumbers(0, 10000, n + 1).Select(x => (double)x).ToArray(); 115 sw0.Start(); 116 var q0 = Median(xs); // sorting 117 sw0.Stop(); 118 119 120 sw1.Start(); 121 var q1 = xs.Median(); // selection 122 sw1.Stop(); 123 Assert.AreEqual(q0, q1, 1E9); 124 } 125 Console.WriteLine("{0,10} {1,10} {2,10}", n, sw0.ElapsedMilliseconds, sw1.ElapsedMilliseconds); 126 127 n = n * 10; 128 } 129 } 130 131 132 // straight forward implementation of median function (using sorting) 133 private static double Median(IEnumerable<double> values) { 134 // iterate only once 135 double[] valuesArr = values.ToArray(); 136 int n = valuesArr.Length; 137 if (n == 0) throw new InvalidOperationException("Enumeration contains no elements."); 138 139 Array.Sort(valuesArr); 140 141 // return the middle element (if n is uneven) or the average of the two middle elements if n is even. 142 if (n % 2 == 1) { 143 return valuesArr[n / 2]; 144 } else { 145 return (valuesArr[(n / 2)  1] + valuesArr[n / 2]) / 2.0; 146 } 147 } 148 149 // straight forward implementation of quantile function (using sorting) 150 private static double Quantile(IEnumerable<double> values, double alpha) { 151 Contract.Assert(alpha > 0 && alpha < 1); 152 // iterate only once 153 double[] valuesArr = values.ToArray(); 154 int n = valuesArr.Length; 155 if (n == 0) throw new InvalidOperationException("Enumeration contains no elements."); 156 157 Array.Sort(valuesArr); 158 // starts at 0 159 160 // return the element at Math.Ceiling (if n*alpha is fractional) or the average of two elements if n*alpha is integer. 161 var pos = n * alpha; 162 Contract.Assert(pos >= 0); 163 Contract.Assert(pos < n); 164 bool isInteger = Math.Round(pos).IsAlmost(pos); 165 if (isInteger) { 166 return 0.5 * (valuesArr[(int)pos  1] + valuesArr[(int)pos]); 167 } else { 168 return valuesArr[(int)Math.Ceiling(pos)  1]; 50 169 } 51 170 }
Note: See TracChangeset
for help on using the changeset viewer.