source: branches/2839_HiveProjectManagement/HeuristicLab.Common/3.3/Statistics/EnumerableStatisticExtensions.cs @ 16057

Last change on this file since 16057 was 16057, checked in by jkarder, 2 years ago

#2839:

File size: 8.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics.Contracts;
25using System.Linq;
26
27namespace HeuristicLab.Common {
28  public static class EnumerableStatisticExtensions {
29    /// <summary>
30    /// Calculates the median element of the enumeration.
31    /// </summary>
32    /// <param name="values"></param>
33    /// <returns></returns>
34    public static double Median(this IEnumerable<double> values) {
35      // See unit tests for comparison with naive implementation
36      return Quantile(values, 0.5);
37    }
38
39    /// <summary>
40    /// Calculates the alpha-quantile element of the enumeration.
41    /// </summary>
42    /// <param name="values"></param>
43    /// <returns></returns>
44    public static double Quantile(this IEnumerable<double> values, double alpha) {
45      // See unit tests for comparison with naive implementation
46      double[] valuesArr = values.ToArray();
47      int n = valuesArr.Length;
48      if (n == 0) throw new InvalidOperationException("Enumeration contains no elements.");
49
50      // "When N is even, statistics books define the median as the arithmetic mean of the elements k = N/2
51      // and k = N/2 + 1 (that is, N/2 from the bottom and N/2 from the top).
52      // If you accept such pedantry, you must perform two separate selections to find these elements."
53
54      // return the element at Math.Ceiling (if n*alpha is fractional) or the average of two elements if n*alpha is integer.
55      var pos = n * alpha;
56      Contract.Assert(pos >= 0);
57      Contract.Assert(pos < n);
58      bool isInteger = Math.Round(pos).IsAlmost(pos);
59      if (isInteger) {
60        return 0.5 * (Select((int)pos - 1, valuesArr) + Select((int)pos, valuesArr));
61      } else {
62        return Select((int)Math.Ceiling(pos) - 1, valuesArr);
63      }
64    }
65
66    // Numerical Recipes in C++, §8.5 Selecting the Mth Largest, O(n)
67    // Given k in [0..n-1] returns an array value from array arr[0..n-1] such that k array values are
68    // less than or equal to the one returned. The input array will be rearranged to have this value in
69    // location arr[k], with all smaller elements moved to arr[0..k-1] (in arbitrary order) and all
70    // larger elements in arr[k+1..n-1] (also in arbitrary order).
71    //
72    // Could be changed to Select<T> where T is IComparable but in this case is significantly slower for double values
73    private static double Select(int k, double[] arr) {
74      Contract.Assert(arr.GetLowerBound(0) == 0);
75      Contract.Assert(k >= 0 && k < arr.Length);
76      int i, ir, j, l, mid, n = arr.Length;
77      double a;
78      l = 0;
79      ir = n - 1;
80      for (; ; ) {
81        if (ir <= l + 1) {
82          // Active partition contains 1 or 2 elements.
83          if (ir == l + 1 && arr[ir] < arr[l]) {
84            // if (ir == l + 1 && arr[ir].CompareTo(arr[l]) < 0) {
85            // Case of 2 elements.
86            // SWAP(arr[l], arr[ir]);
87            double temp = arr[l];
88            arr[l] = arr[ir];
89            arr[ir] = temp;
90          }
91          return arr[k];
92        } else {
93          mid = (l + ir) >> 1; // Choose median of left, center, and right elements
94          {
95            // SWAP(arr[mid], arr[l + 1]); // as partitioning element a. Also
96            double temp = arr[mid];
97            arr[mid] = arr[l + 1];
98            arr[l + 1] = temp;
99          }
100
101          if (arr[l] > arr[ir]) {
102            // if (arr[l].CompareTo(arr[ir]) > 0) {  // rearrange so that arr[l] arr[ir] <= arr[l+1],
103            // SWAP(arr[l], arr[ir]); . arr[ir] >= arr[l+1]
104            double temp = arr[l];
105            arr[l] = arr[ir];
106            arr[ir] = temp;
107          }
108
109          if (arr[l + 1] > arr[ir]) {
110            // if (arr[l + 1].CompareTo(arr[ir]) > 0) {
111            // SWAP(arr[l + 1], arr[ir]);
112            double temp = arr[l + 1];
113            arr[l + 1] = arr[ir];
114            arr[ir] = temp;
115          }
116          if (arr[l] > arr[l + 1]) {
117            //if (arr[l].CompareTo(arr[l + 1]) > 0) {
118            // SWAP(arr[l], arr[l + 1]);
119            double temp = arr[l];
120            arr[l] = arr[l + 1];
121            arr[l + 1] = temp;
122
123          }
124          i = l + 1; // Initialize pointers for partitioning.
125          j = ir;
126          a = arr[l + 1]; // Partitioning element.
127          for (; ; ) { // Beginning of innermost loop.
128            do i++; while (arr[i] < a /* arr[i].CompareTo(a) < 0 */); // Scan up to find element > a.
129            do j--; while (arr[j] > a /* arr[j].CompareTo(a) > 0 */); // Scan down to find element < a.
130            if (j < i) break; // Pointers crossed. Partitioning complete.
131            {
132              // SWAP(arr[i], arr[j]);
133              double temp = arr[i];
134              arr[i] = arr[j];
135              arr[j] = temp;
136            }
137          } // End of innermost loop.
138          arr[l + 1] = arr[j]; // Insert partitioning element.
139          arr[j] = a;
140          if (j >= k) ir = j - 1; // Keep active the partition that contains the
141          if (j <= k) l = i; // kth element.
142        }
143      }
144    }
145
146    /// <summary>
147    /// Calculates the range (max - min) of the enumeration.
148    /// </summary>
149    /// <param name="values"></param>
150    /// <returns></returns>
151    public static double Range(this IEnumerable<double> values) {
152      double min = double.PositiveInfinity;
153      double max = double.NegativeInfinity;
154      int i = 0;
155      foreach (var e in values) {
156        if (min > e) min = e;
157        if (max < e) max = e;
158        i++;
159      }
160      if (i < 1) throw new ArgumentException("The enumerable must contain at least two elements", "values");
161      return max - min;
162    }
163
164
165    /// <summary>
166    /// Calculates the sample standard deviation of values.
167    /// </summary>
168    /// <param name="values"></param>
169    /// <returns></returns>
170    public static double StandardDeviation(this IEnumerable<double> values) {
171      return Math.Sqrt(Variance(values));
172    }
173
174    /// <summary>
175    /// Calculates the population standard deviation of values.
176    /// </summary>
177    /// <param name="values"></param>
178    /// <returns></returns>
179    public static double StandardDeviationPop(this IEnumerable<double> values) {
180      return Math.Sqrt(VariancePop(values));
181    }
182
183    /// <summary>
184    /// Calculates the sample variance of values. (sum (x - x_mean)² / (n-1))
185    /// </summary>
186    /// <param name="values"></param>
187    /// <returns></returns>
188    public static double Variance(this IEnumerable<double> values) {
189      return Variance(values, true);
190    }
191
192    /// <summary>
193    /// Calculates the population variance of values. (sum (x - x_mean)² / n)
194    /// </summary>
195    /// <param name="values"></param>
196    /// <returns></returns>
197    public static double VariancePop(this IEnumerable<double> values) {
198      return Variance(values, false);
199    }
200
201    private static double Variance(IEnumerable<double> values, bool sampleVariance) {
202      int m_n = 0;
203      double m_oldM = 0.0;
204      double m_newM = 0.0;
205      double m_oldS = 0.0;
206      double m_newS = 0.0;
207      foreach (double x in values) {
208        m_n++;
209        if (m_n == 1) {
210          m_oldM = m_newM = x;
211          m_oldS = 0.0;
212        } else {
213          m_newM = m_oldM + (x - m_oldM) / m_n;
214          m_newS = m_oldS + (x - m_oldM) * (x - m_newM);
215
216          // set up for next iteration
217          m_oldM = m_newM;
218          m_oldS = m_newS;
219        }
220      }
221
222      if (m_n == 0) return double.NaN;
223      if (m_n == 1) return 0.0;
224
225      if (sampleVariance) return m_newS / (m_n - 1);
226      else return m_newS / m_n;
227    }
228
229    public static IEnumerable<double> LimitToRange(this IEnumerable<double> values, double min, double max) {
230      if (min > max) throw new ArgumentException(string.Format("Minimum {0} is larger than maximum {1}.", min, max));
231      foreach (var x in values) {
232        if (double.IsNaN(x)) yield return (max + min) / 2.0;
233        else if (x < min) yield return min;
234        else if (x > max) yield return max;
235        else yield return x;
236      }
237    }
238  }
239}
Note: See TracBrowser for help on using the repository browser.