Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.Random/3.3/RandomEnumerable.cs @ 10368

Last change on this file since 10368 was 9456, checked in by swagner, 12 years ago

Updated copyright year and added some missing license headers (#1889)

File size: 13.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Core;
26
27namespace HeuristicLab.Random {
28  public static class RandomEnumerable {
29    public static IEnumerable<int> SampleRandomNumbers(int maxElement, int count) {
30      return SampleRandomNumbers(Environment.TickCount, 0, maxElement, count);
31    }
32
33    public static IEnumerable<int> SampleRandomNumbers(int start, int end, int count) {
34      return SampleRandomNumbers(Environment.TickCount, start, end, count);
35    }
36
37    //algorithm taken from progamming pearls page 127
38    //IMPORTANT because IEnumerables with yield are used the seed must best be specified to return always
39    //the same sequence of numbers without caching the values.
40    public static IEnumerable<int> SampleRandomNumbers(int seed, int start, int end, int count) {
41      int remaining = end - start;
42      var mt = new FastRandom(seed);
43      for (int i = start; i < end && count > 0; i++) {
44        double probability = mt.NextDouble();
45        if (probability < ((double)count) / remaining) {
46          count--;
47          yield return i;
48        }
49        remaining--;
50      }
51    }
52
53    /// <summary>
54    /// Chooses one elements from a sequence giving each element an equal chance.
55    /// </summary>
56    /// <remarks>
57    /// Runtime complexity is O(1) for sequences that are of type <see cref="IList{T}"/> and
58    /// O(N) for all other.
59    /// </remarks>
60    /// <exception cref="ArgumentException">If the sequence is empty.</exception>
61    /// <typeparam name="T">The type of the items to be selected.</typeparam>
62    /// <param name="source">The sequence of elements.</param>
63    /// <param name="random">The random number generator to use, its NextDouble() method must produce values in the range [0;1)</param>
64    /// <param name="count">The number of items to be selected.</param>
65    /// <returns>An element that has been chosen randomly from the sequence.</returns>
66    public static T SampleRandom<T>(this IEnumerable<T> source, IRandom random) {
67      if (!source.Any()) throw new ArgumentException("sequence is empty.", "source");
68      return source.SampleRandom(random, 1).First();
69    }
70
71    /// <summary>
72    /// Chooses <paramref name="count"/> elements from a sequence with repetition with equal chances for each element.
73    /// </summary>
74    /// <remarks>
75    /// Runtime complexity is O(count) for sequences that are <see cref="IList{T}"/> and
76    /// O(N * count) for all other. No exception is thrown if the sequence is empty.
77    ///
78    /// The method is online.
79    /// </remarks>
80    /// <typeparam name="T">The type of the items to be selected.</typeparam>
81    /// <param name="source">The sequence of elements.</param>
82    /// <param name="random">The random number generator to use, its NextDouble() method must produce values in the range [0;1)</param>
83    /// <param name="count">The number of items to be selected.</param>
84    /// <returns>A sequence of elements that have been chosen randomly.</returns>
85    public static IEnumerable<T> SampleRandom<T>(this IEnumerable<T> source, IRandom random, int count) {
86      var listSource = source as IList<T>;
87      if (listSource != null) {
88        while (count > 0) {
89          yield return listSource[random.Next(listSource.Count)];
90          count--;
91        }
92      } else {
93        while (count > 0) {
94          var enumerator = source.GetEnumerator();
95          enumerator.MoveNext();
96          T selectedItem = enumerator.Current;
97          int counter = 1;
98          while (enumerator.MoveNext()) {
99            counter++;
100            if (counter * random.NextDouble() < 1.0)
101              selectedItem = enumerator.Current;
102          }
103          yield return selectedItem;
104          count--;
105        }
106      }
107    }
108
109    /// <summary>
110    /// Chooses <paramref name="count"/> elements from a sequence without repetition with equal chances for each element.
111    /// The items are returned in the same order as they appear in the sequence.
112    /// </summary>
113    /// <remarks>
114    /// Runtime complexity is O(N) for all sequences.
115    /// No exception is thrown if the sequence contains less items than there are to be selected.
116    ///
117    /// The method is online.
118    /// </remarks>
119    /// <typeparam name="T">The type of the items to be selected.</typeparam>
120    /// <param name="source">The sequence of elements.</param>
121    /// <param name="random">The random number generator to use, its NextDouble() method must produce values in the range [0;1)</param>
122    /// <param name="count">The number of items to be selected.</param>
123    /// <param name="sourceCount">Optional parameter specifying the number of elements in the source enumerations</param>
124    /// <returns>A sequence of elements that have been chosen randomly.</returns>
125    public static IEnumerable<T> SampleRandomWithoutRepetition<T>(this IEnumerable<T> source, IRandom random, int count, int sourceCount = -1) {
126      if (sourceCount == -1) sourceCount = source.Count();
127      int remaining = sourceCount;
128      foreach (var item in source) {
129        if (random.NextDouble() * remaining < count) {
130          count--;
131          yield return item;
132          if (count <= 0) break;
133        }
134        remaining--;
135      }
136    }
137
138    /// <summary>
139    /// Chooses elements out of a sequence with repetition. The chance that an item is selected is proportional or inverse-proportional
140    /// to the <paramref name="weights"/>.
141    /// </summary>
142    /// <remarks>
143    /// In case both <paramref name="inverseProportional"/> and <paramref name="windowing"/> are false values must be &gt; 0,
144    /// otherwise an InvalidOperationException is thrown.
145    ///
146    /// The method internally holds two arrays: One that is the sequence itself and another one for the values.
147    /// </remarks>
148    /// <typeparam name="T">The type of the items to be selected.</typeparam>
149    /// <param name="source">The sequence of elements.</param>
150    /// <param name="random">The random number generator to use, its NextDouble() method must produce values in the range [0;1)</param>
151    /// <param name="count">The number of items to be selected.</param>
152    /// <param name="weights">The weight values for the items.</param>
153    /// <param name="windowing">Whether to scale the proportional values or not.</param>
154    /// <param name="inverseProportional">Determines whether to choose proportionally (false) or inverse-proportionally (true).</param>
155    /// <returns>A sequence of selected items. The sequence might contain the same item more than once.</returns>
156    public static IEnumerable<T> SampleProportional<T>(this IEnumerable<T> source, IRandom random, int count, IEnumerable<double> weights, bool windowing = true, bool inverseProportional = false) {
157      return source.SampleProportional(random, weights, windowing, inverseProportional).Take(count);
158    }
159
160    /// <summary>
161    /// Same as <seealso cref="SampleProportional<T>"/>, but chooses an item exactly once.
162    /// </summary>
163    /// <remarks>
164    /// In case both <paramref name="inverseProportional"/> and <paramref name="windowing"/> are false values must be &gt; 0,
165    /// otherwise an InvalidOperationException is thrown.
166    ///
167    /// The method internally holds two arrays: One that is the sequence itself and another one for the values.
168    /// </remarks>
169    /// <typeparam name="T">The type of the items to be selected.</typeparam>
170    /// <param name="source">The sequence of elements.</param>
171    /// <param name="random">The random number generator to use, its NextDouble() method must produce values in the range [0;1)</param>
172    /// <param name="count">The number of items to be selected.</param>
173    /// <param name="weights">The weight values for the items.</param>
174    /// <param name="windowing">Whether to scale the proportional values or not.</param>
175    /// <param name="maximization">Determines whether to choose proportionally (true) or inverse-proportionally (false).</param>
176    /// <returns>A sequence of selected items.</returns>
177    public static IEnumerable<T> SampleProportionalWithoutRepetition<T>(this IEnumerable<T> source, IRandom random, int count, IEnumerable<double> weights, bool windowing = true, bool inverseProportional = false) {
178      return source.SampleProportionalWithoutRepetition(random, weights, windowing, inverseProportional).Take(count);
179    }
180    #region Proportional Helpers
181    private static IEnumerable<T> SampleProportional<T>(this IEnumerable<T> source, IRandom random, IEnumerable<double> weights, bool windowing, bool inverseProportional) {
182      var sourceArray = source.ToArray();
183      var valueArray = PrepareProportional<T>(sourceArray, weights, windowing, inverseProportional);
184      double total = valueArray.Sum();
185
186      while (true) {
187        int index = 0;
188        double ball = valueArray[index], sum = random.NextDouble() * total;
189        while (ball < sum)
190          ball += valueArray[++index];
191        yield return sourceArray[index];
192      }
193    }
194    private static IEnumerable<T> SampleProportionalWithoutRepetition<T>(this IEnumerable<T> source, IRandom random, IEnumerable<double> weights, bool windowing, bool inverseProportional) {
195      var sourceArray = source.ToArray();
196      var valueArray = PrepareProportional<T>(sourceArray, weights, windowing, inverseProportional);
197      double total = valueArray.Sum();
198
199      HashSet<int> chosenIndices = new HashSet<int>();
200      while (chosenIndices.Count < sourceArray.Length) {
201        int index = 0;
202        double ball = valueArray[index], sum = random.NextDouble() * total;
203        while (ball < sum) {
204          index++;
205          if (!chosenIndices.Contains(index))
206            ball += valueArray[++index];
207        }
208        yield return sourceArray[index];
209        chosenIndices.Add(index);
210        total -= valueArray[index];
211      }
212    }
213    private static double[] PrepareProportional<T>(IList<T> sourceArray, IEnumerable<double> weights, bool windowing, bool inverseProportional) {
214      double maxValue = double.MinValue, minValue = double.MaxValue;
215      double[] valueArray = new double[sourceArray.Count];
216
217      var weightsEnum = weights.GetEnumerator();
218      for (int i = 0; i < sourceArray.Count && weightsEnum.MoveNext(); i++) {
219        valueArray[i] = weightsEnum.Current;
220        if (valueArray[i] > maxValue) maxValue = valueArray[i];
221        if (valueArray[i] < minValue) minValue = valueArray[i];
222      }
223      if (minValue == maxValue) {  // all values are equal
224        for (int i = 0; i < sourceArray.Count; i++) {
225          valueArray[i] = 1.0;
226        }
227      } else {
228        if (windowing) {
229          if (inverseProportional) InverseProportionalScale(valueArray, maxValue);
230          else ProportionalScale(valueArray, minValue);
231        } else {
232          if (minValue < 0.0) throw new InvalidOperationException("Proportional selection without windowing does not work with values < 0.");
233          if (inverseProportional) InverseProportionalScale(valueArray, 2 * maxValue);
234        }
235      }
236      return valueArray;
237    }
238    private static void ProportionalScale(double[] values, double minValue) {
239      for (int i = 0; i < values.Length; i++) {
240        values[i] = values[i] - minValue;
241      }
242    }
243    private static void InverseProportionalScale(double[] values, double maxValue) {
244      for (int i = 0; i < values.Length; i++) {
245        values[i] = maxValue - values[i];
246      }
247    }
248    #endregion
249
250    /// <summary>
251    /// Shuffles an enumerable and returns a new enumerable according to the Fisher-Yates shuffle.
252    /// </summary>
253    /// <remarks>
254    /// Note that the source enumerable is transformed into an array.
255    ///
256    /// The implementation is described in http://stackoverflow.com/questions/1287567/c-is-using-random-and-orderby-a-good-shuffle-algorithm.
257    /// </remarks>
258    /// <typeparam name="T">The type of the items that are to be shuffled.</typeparam>
259    /// <param name="source">The enumerable that contains the items.</param>
260    /// <param name="random">The random number generator, its Next(n) method must deliver uniformly distributed random numbers in the range [0;n).</param>
261    /// <returns>An enumerable with the elements shuffled.</returns>
262    public static IEnumerable<T> Shuffle<T>(this IEnumerable<T> source, IRandom random) {
263      T[] elements = source.ToArray();
264      for (int i = elements.Length - 1; i > 0; i--) {
265        // Swap element "i" with a random earlier element (including itself)
266        int swapIndex = random.Next(i + 1);
267        yield return elements[swapIndex];
268        elements[swapIndex] = elements[i];
269        // we don't actually perform the swap, we can forget about the
270        // swapped element because we already returned it.
271      }
272      yield return elements[0];
273    }
274  }
275}
Note: See TracBrowser for help on using the repository browser.