Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
07/08/16 14:40:02 (8 years ago)
Author:
gkronber
Message:

#2434: merged trunk changes r12934:14026 from trunk to branch

Location:
branches/crossvalidation-2434
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/crossvalidation-2434

  • branches/crossvalidation-2434/HeuristicLab.Common/3.3/EnumerableStatisticExtensions.cs

    r12012 r14029  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Diagnostics.Contracts;
    2425using System.Linq;
    2526
     
    3233    /// <returns></returns>
    3334    public static double Median(this IEnumerable<double> values) {
    34       // iterate only once
     35      // See unit tests for comparison with naive implementation
     36      return Quantile(values, 0.5);
     37    }
     38
     39    /// <summary>
     40    /// Calculates the alpha-quantile element of the enumeration.
     41    /// </summary>
     42    /// <param name="values"></param>
     43    /// <returns></returns>
     44    public static double Quantile(this IEnumerable<double> values, double alpha) {
     45      // See unit tests for comparison with naive implementation
    3546      double[] valuesArr = values.ToArray();
    3647      int n = valuesArr.Length;
    3748      if (n == 0) throw new InvalidOperationException("Enumeration contains no elements.");
    3849
    39       Array.Sort(valuesArr);
    40 
    41       // return the middle element (if n is uneven) or the average of the two middle elements if n is even.
    42       if (n % 2 == 1) {
    43         return valuesArr[n / 2];
     50      // "When N is even, statistics books define the median as the arithmetic mean of the elements k = N/2
     51      // and k = N/2 + 1 (that is, N/2 from the bottom and N/2 from the top).
     52      // If you accept such pedantry, you must perform two separate selections to find these elements."
     53
     54      // return the element at Math.Ceiling (if n*alpha is fractional) or the average of two elements if n*alpha is integer.
     55      var pos = n * alpha;
     56      Contract.Assert(pos >= 0);
     57      Contract.Assert(pos < n);
     58      bool isInteger = Math.Round(pos).IsAlmost(pos);
     59      if (isInteger) {
     60        return 0.5 * (Select((int)pos - 1, valuesArr) + Select((int)pos, valuesArr));
    4461      } else {
    45         return (valuesArr[(n / 2) - 1] + valuesArr[n / 2]) / 2.0;
     62        return Select((int)Math.Ceiling(pos) - 1, valuesArr);
     63      }
     64    }
     65
     66    // Numerical Recipes in C++, §8.5 Selecting the Mth Largest, O(n)
     67    // Given k in [0..n-1] returns an array value from array arr[0..n-1] such that k array values are
     68    // less than or equal to the one returned. The input array will be rearranged to have this value in
     69    // location arr[k], with all smaller elements moved to arr[0..k-1] (in arbitrary order) and all
     70    // larger elements in arr[k+1..n-1] (also in arbitrary order).
     71    //
     72    // Could be changed to Select<T> where T is IComparable but in this case is significantly slower for double values
     73    private static double Select(int k, double[] arr) {
     74      Contract.Assert(arr.GetLowerBound(0) == 0);
     75      Contract.Assert(k >= 0 && k < arr.Length);
     76      int i, ir, j, l, mid, n = arr.Length;
     77      double a;
     78      l = 0;
     79      ir = n - 1;
     80      for (; ; ) {
     81        if (ir <= l + 1) {
     82          // Active partition contains 1 or 2 elements.
     83          if (ir == l + 1 && arr[ir] < arr[l]) {
     84            // if (ir == l + 1 && arr[ir].CompareTo(arr[l]) < 0) {
     85            // Case of 2 elements.
     86            // SWAP(arr[l], arr[ir]);
     87            double temp = arr[l];
     88            arr[l] = arr[ir];
     89            arr[ir] = temp;
     90          }
     91          return arr[k];
     92        } else {
     93          mid = (l + ir) >> 1; // Choose median of left, center, and right elements
     94          {
     95            // SWAP(arr[mid], arr[l + 1]); // as partitioning element a. Also
     96            double temp = arr[mid];
     97            arr[mid] = arr[l + 1];
     98            arr[l + 1] = temp;
     99          }
     100
     101          if (arr[l] > arr[ir]) {
     102            // if (arr[l].CompareTo(arr[ir]) > 0) {  // rearrange so that arr[l] arr[ir] <= arr[l+1],
     103            // SWAP(arr[l], arr[ir]); . arr[ir] >= arr[l+1]
     104            double temp = arr[l];
     105            arr[l] = arr[ir];
     106            arr[ir] = temp;
     107          }
     108
     109          if (arr[l + 1] > arr[ir]) {
     110            // if (arr[l + 1].CompareTo(arr[ir]) > 0) {
     111            // SWAP(arr[l + 1], arr[ir]);
     112            double temp = arr[l + 1];
     113            arr[l + 1] = arr[ir];
     114            arr[ir] = temp;
     115          }
     116          if (arr[l] > arr[l + 1]) {
     117            //if (arr[l].CompareTo(arr[l + 1]) > 0) {
     118            // SWAP(arr[l], arr[l + 1]);
     119            double temp = arr[l];
     120            arr[l] = arr[l + 1];
     121            arr[l + 1] = temp;
     122
     123          }
     124          i = l + 1; // Initialize pointers for partitioning.
     125          j = ir;
     126          a = arr[l + 1]; // Partitioning element.
     127          for (; ; ) { // Beginning of innermost loop.
     128            do i++; while (arr[i] < a /* arr[i].CompareTo(a) < 0 */); // Scan up to find element > a.
     129            do j--; while (arr[j] > a /* arr[j].CompareTo(a) > 0 */); // Scan down to find element < a.
     130            if (j < i) break; // Pointers crossed. Partitioning complete.
     131            {
     132              // SWAP(arr[i], arr[j]);
     133              double temp = arr[i];
     134              arr[i] = arr[j];
     135              arr[j] = temp;
     136            }
     137          } // End of innermost loop.
     138          arr[l + 1] = arr[j]; // Insert partitioning element.
     139          arr[j] = a;
     140          if (j >= k) ir = j - 1; // Keep active the partition that contains the
     141          if (j <= k) l = i; // kth element.
     142        }
    46143      }
    47144    }
     
    67164
    68165    /// <summary>
    69     /// Calculates the standard deviation of values.
     166    /// Calculates the sample standard deviation of values.
    70167    /// </summary>
    71168    /// <param name="values"></param>
     
    76173
    77174    /// <summary>
    78     /// Calculates the variance of values. (sum (x - x_mean)² / n)
     175    /// Calculates the population standard deviation of values.
     176    /// </summary>
     177    /// <param name="values"></param>
     178    /// <returns></returns>
     179    public static double StandardDeviationPop(this IEnumerable<double> values) {
     180      return Math.Sqrt(VariancePop(values));
     181    }
     182
     183    /// <summary>
     184    /// Calculates the sample variance of values. (sum (x - x_mean)² / (n-1))
    79185    /// </summary>
    80186    /// <param name="values"></param>
    81187    /// <returns></returns>
    82188    public static double Variance(this IEnumerable<double> values) {
     189      return Variance(values, true);
     190    }
     191
     192    /// <summary>
     193    /// Calculates the population variance of values. (sum (x - x_mean)² / n)
     194    /// </summary>
     195    /// <param name="values"></param>
     196    /// <returns></returns>
     197    public static double VariancePop(this IEnumerable<double> values) {
     198      return Variance(values, false);
     199    }
     200
     201    private static double Variance(IEnumerable<double> values, bool sampleVariance) {
    83202      int m_n = 0;
    84203      double m_oldM = 0.0;
     
    100219        }
    101220      }
    102       return ((m_n > 1) ? m_newS / (m_n - 1) : 0.0);
    103     }
    104 
    105     /// <summary>
    106     /// Calculates the pth percentile of the values.
    107     /// </summary
    108     public static double Percentile(this IEnumerable<double> values, double p) {
    109       // iterate only once
    110       double[] valuesArr = values.ToArray();
    111       int n = valuesArr.Length;
    112       if (n == 0) throw new InvalidOperationException("Enumeration contains no elements.");
    113       if (n == 1) return values.ElementAt(0);
    114 
    115       if (p.IsAlmost(0.0)) return valuesArr[0];
    116       if (p.IsAlmost(1.0)) return valuesArr[n - 1];
    117 
    118       double t = p * (n - 1);
    119       int index = (int)Math.Floor(t);
    120       double percentage = t - index;
    121       return valuesArr[index] * (1 - percentage) + valuesArr[index + 1] * percentage;
     221
     222      if (m_n == 0) return double.NaN;
     223      if (m_n == 1) return 0.0;
     224
     225      if (sampleVariance) return m_newS / (m_n - 1);
     226      else return m_newS / m_n;
    122227    }
    123228
Note: See TracChangeset for help on using the changeset viewer.