Changeset 14029 for branches/crossvalidation-2434/HeuristicLab.Common/3.3/EnumerableStatisticExtensions.cs
- Timestamp:
- 07/08/16 14:40:02 (8 years ago)
- Location:
- branches/crossvalidation-2434
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/crossvalidation-2434
- Property svn:mergeinfo changed
-
branches/crossvalidation-2434/HeuristicLab.Common/3.3/EnumerableStatisticExtensions.cs
r12012 r14029 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Diagnostics.Contracts; 24 25 using System.Linq; 25 26 … … 32 33 /// <returns></returns> 33 34 public static double Median(this IEnumerable<double> values) { 34 // iterate only once 35 // See unit tests for comparison with naive implementation 36 return Quantile(values, 0.5); 37 } 38 39 /// <summary> 40 /// Calculates the alpha-quantile element of the enumeration. 41 /// </summary> 42 /// <param name="values"></param> 43 /// <returns></returns> 44 public static double Quantile(this IEnumerable<double> values, double alpha) { 45 // See unit tests for comparison with naive implementation 35 46 double[] valuesArr = values.ToArray(); 36 47 int n = valuesArr.Length; 37 48 if (n == 0) throw new InvalidOperationException("Enumeration contains no elements."); 38 49 39 Array.Sort(valuesArr); 40 41 // return the middle element (if n is uneven) or the average of the two middle elements if n is even. 42 if (n % 2 == 1) { 43 return valuesArr[n / 2]; 50 // "When N is even, statistics books define the median as the arithmetic mean of the elements k = N/2 51 // and k = N/2 + 1 (that is, N/2 from the bottom and N/2 from the top). 52 // If you accept such pedantry, you must perform two separate selections to find these elements." 53 54 // return the element at Math.Ceiling (if n*alpha is fractional) or the average of two elements if n*alpha is integer. 55 var pos = n * alpha; 56 Contract.Assert(pos >= 0); 57 Contract.Assert(pos < n); 58 bool isInteger = Math.Round(pos).IsAlmost(pos); 59 if (isInteger) { 60 return 0.5 * (Select((int)pos - 1, valuesArr) + Select((int)pos, valuesArr)); 44 61 } else { 45 return (valuesArr[(n / 2) - 1] + valuesArr[n / 2]) / 2.0; 62 return Select((int)Math.Ceiling(pos) - 1, valuesArr); 63 } 64 } 65 66 // Numerical Recipes in C++, §8.5 Selecting the Mth Largest, O(n) 67 // Given k in [0..n-1] returns an array value from array arr[0..n-1] such that k array values are 68 // less than or equal to the one returned. The input array will be rearranged to have this value in 69 // location arr[k], with all smaller elements moved to arr[0..k-1] (in arbitrary order) and all 70 // larger elements in arr[k+1..n-1] (also in arbitrary order). 71 // 72 // Could be changed to Select<T> where T is IComparable but in this case is significantly slower for double values 73 private static double Select(int k, double[] arr) { 74 Contract.Assert(arr.GetLowerBound(0) == 0); 75 Contract.Assert(k >= 0 && k < arr.Length); 76 int i, ir, j, l, mid, n = arr.Length; 77 double a; 78 l = 0; 79 ir = n - 1; 80 for (; ; ) { 81 if (ir <= l + 1) { 82 // Active partition contains 1 or 2 elements. 83 if (ir == l + 1 && arr[ir] < arr[l]) { 84 // if (ir == l + 1 && arr[ir].CompareTo(arr[l]) < 0) { 85 // Case of 2 elements. 86 // SWAP(arr[l], arr[ir]); 87 double temp = arr[l]; 88 arr[l] = arr[ir]; 89 arr[ir] = temp; 90 } 91 return arr[k]; 92 } else { 93 mid = (l + ir) >> 1; // Choose median of left, center, and right elements 94 { 95 // SWAP(arr[mid], arr[l + 1]); // as partitioning element a. Also 96 double temp = arr[mid]; 97 arr[mid] = arr[l + 1]; 98 arr[l + 1] = temp; 99 } 100 101 if (arr[l] > arr[ir]) { 102 // if (arr[l].CompareTo(arr[ir]) > 0) { // rearrange so that arr[l] arr[ir] <= arr[l+1], 103 // SWAP(arr[l], arr[ir]); . arr[ir] >= arr[l+1] 104 double temp = arr[l]; 105 arr[l] = arr[ir]; 106 arr[ir] = temp; 107 } 108 109 if (arr[l + 1] > arr[ir]) { 110 // if (arr[l + 1].CompareTo(arr[ir]) > 0) { 111 // SWAP(arr[l + 1], arr[ir]); 112 double temp = arr[l + 1]; 113 arr[l + 1] = arr[ir]; 114 arr[ir] = temp; 115 } 116 if (arr[l] > arr[l + 1]) { 117 //if (arr[l].CompareTo(arr[l + 1]) > 0) { 118 // SWAP(arr[l], arr[l + 1]); 119 double temp = arr[l]; 120 arr[l] = arr[l + 1]; 121 arr[l + 1] = temp; 122 123 } 124 i = l + 1; // Initialize pointers for partitioning. 125 j = ir; 126 a = arr[l + 1]; // Partitioning element. 127 for (; ; ) { // Beginning of innermost loop. 128 do i++; while (arr[i] < a /* arr[i].CompareTo(a) < 0 */); // Scan up to find element > a. 129 do j--; while (arr[j] > a /* arr[j].CompareTo(a) > 0 */); // Scan down to find element < a. 130 if (j < i) break; // Pointers crossed. Partitioning complete. 131 { 132 // SWAP(arr[i], arr[j]); 133 double temp = arr[i]; 134 arr[i] = arr[j]; 135 arr[j] = temp; 136 } 137 } // End of innermost loop. 138 arr[l + 1] = arr[j]; // Insert partitioning element. 139 arr[j] = a; 140 if (j >= k) ir = j - 1; // Keep active the partition that contains the 141 if (j <= k) l = i; // kth element. 142 } 46 143 } 47 144 } … … 67 164 68 165 /// <summary> 69 /// Calculates the s tandard deviation of values.166 /// Calculates the sample standard deviation of values. 70 167 /// </summary> 71 168 /// <param name="values"></param> … … 76 173 77 174 /// <summary> 78 /// Calculates the variance of values. (sum (x - x_mean)² / n) 175 /// Calculates the population standard deviation of values. 176 /// </summary> 177 /// <param name="values"></param> 178 /// <returns></returns> 179 public static double StandardDeviationPop(this IEnumerable<double> values) { 180 return Math.Sqrt(VariancePop(values)); 181 } 182 183 /// <summary> 184 /// Calculates the sample variance of values. (sum (x - x_mean)² / (n-1)) 79 185 /// </summary> 80 186 /// <param name="values"></param> 81 187 /// <returns></returns> 82 188 public static double Variance(this IEnumerable<double> values) { 189 return Variance(values, true); 190 } 191 192 /// <summary> 193 /// Calculates the population variance of values. (sum (x - x_mean)² / n) 194 /// </summary> 195 /// <param name="values"></param> 196 /// <returns></returns> 197 public static double VariancePop(this IEnumerable<double> values) { 198 return Variance(values, false); 199 } 200 201 private static double Variance(IEnumerable<double> values, bool sampleVariance) { 83 202 int m_n = 0; 84 203 double m_oldM = 0.0; … … 100 219 } 101 220 } 102 return ((m_n > 1) ? m_newS / (m_n - 1) : 0.0); 103 } 104 105 /// <summary> 106 /// Calculates the pth percentile of the values. 107 /// </summary 108 public static double Percentile(this IEnumerable<double> values, double p) { 109 // iterate only once 110 double[] valuesArr = values.ToArray(); 111 int n = valuesArr.Length; 112 if (n == 0) throw new InvalidOperationException("Enumeration contains no elements."); 113 if (n == 1) return values.ElementAt(0); 114 115 if (p.IsAlmost(0.0)) return valuesArr[0]; 116 if (p.IsAlmost(1.0)) return valuesArr[n - 1]; 117 118 double t = p * (n - 1); 119 int index = (int)Math.Floor(t); 120 double percentage = t - index; 121 return valuesArr[index] * (1 - percentage) + valuesArr[index + 1] * percentage; 221 222 if (m_n == 0) return double.NaN; 223 if (m_n == 1) return 0.0; 224 225 if (sampleVariance) return m_newS / (m_n - 1); 226 else return m_newS / m_n; 122 227 } 123 228
Note: See TracChangeset
for help on using the changeset viewer.