Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Random/3.3/RandomEnumerable.cs @ 8934

Last change on this file since 8934 was 7828, checked in by abeham, 13 years ago

#1848: fixed a bug in SampleProportional

File size: 13.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Core;
26
27namespace HeuristicLab.Random {
28  public static class RandomEnumerable {
29    public static IEnumerable<int> SampleRandomNumbers(int maxElement, int count) {
30      return SampleRandomNumbers(Environment.TickCount, 0, maxElement, count);
31    }
32
33    public static IEnumerable<int> SampleRandomNumbers(int start, int end, int count) {
34      return SampleRandomNumbers(Environment.TickCount, start, end, count);
35    }
36
37    //algorithm taken from progamming pearls page 127
38    //IMPORTANT because IEnumerables with yield are used the seed must best be specified to return always
39    //the same sequence of numbers without caching the values.
40    public static IEnumerable<int> SampleRandomNumbers(int seed, int start, int end, int count) {
41      int remaining = end - start;
42      var mt = new FastRandom(seed);
43      for (int i = start; i < end && count > 0; i++) {
44        double probability = mt.NextDouble();
45        if (probability < ((double)count) / remaining) {
46          count--;
47          yield return i;
48        }
49        remaining--;
50      }
51    }
52
53    /// <summary>
54    /// Chooses one elements from a sequence giving each element an equal chance.
55    /// </summary>
56    /// <remarks>
57    /// Runtime complexity is O(1) for sequences that are of type <see cref="IList{T}"/> and
58    /// O(N) for all other.
59    /// </remarks>
60    /// <exception cref="ArgumentException">If the sequence is empty.</exception>
61    /// <typeparam name="T">The type of the items to be selected.</typeparam>
62    /// <param name="source">The sequence of elements.</param>
63    /// <param name="random">The random number generator to use, its NextDouble() method must produce values in the range [0;1)</param>
64    /// <param name="count">The number of items to be selected.</param>
65    /// <returns>An element that has been chosen randomly from the sequence.</returns>
66    public static T SampleRandom<T>(this IEnumerable<T> source, IRandom random) {
67      if (!source.Any()) throw new ArgumentException("sequence is empty.", "source");
68      return source.SampleRandom(random, 1).First();
69    }
70
71    /// <summary>
72    /// Chooses <paramref name="count"/> elements from a sequence with repetition with equal chances for each element.
73    /// </summary>
74    /// <remarks>
75    /// Runtime complexity is O(count) for sequences that are <see cref="IList{T}"/> and
76    /// O(N * count) for all other. No exception is thrown if the sequence is empty.
77    ///
78    /// The method is online.
79    /// </remarks>
80    /// <typeparam name="T">The type of the items to be selected.</typeparam>
81    /// <param name="source">The sequence of elements.</param>
82    /// <param name="random">The random number generator to use, its NextDouble() method must produce values in the range [0;1)</param>
83    /// <param name="count">The number of items to be selected.</param>
84    /// <returns>A sequence of elements that have been chosen randomly.</returns>
85    public static IEnumerable<T> SampleRandom<T>(this IEnumerable<T> source, IRandom random, int count) {
86      var listSource = source as IList<T>;
87      if (listSource != null) {
88        while (count > 0) {
89          yield return listSource[random.Next(listSource.Count)];
90          count--;
91        }
92      } else {
93        while (count > 0) {
94          var enumerator = source.GetEnumerator();
95          enumerator.MoveNext();
96          T selectedItem = enumerator.Current;
97          int counter = 1;
98          while (enumerator.MoveNext()) {
99            counter++;
100            if (counter * random.NextDouble() < 1.0)
101              selectedItem = enumerator.Current;
102          }
103          yield return selectedItem;
104          count--;
105        }
106      }
107    }
108
109    /// <summary>
110    /// Chooses <paramref name="count"/> elements from a sequence without repetition with equal chances for each element.
111    /// The items are returned in the same order as they appear in the sequence.
112    /// </summary>
113    /// <remarks>
114    /// Runtime complexity is O(N) for all sequences.
115    /// No exception is thrown if the sequence contains less items than there are to be selected.
116    ///
117    /// The method is online.
118    /// </remarks>
119    /// <typeparam name="T">The type of the items to be selected.</typeparam>
120    /// <param name="source">The sequence of elements.</param>
121    /// <param name="random">The random number generator to use, its NextDouble() method must produce values in the range [0;1)</param>
122    /// <param name="count">The number of items to be selected.</param>
123    /// <returns>A sequence of elements that have been chosen randomly.</returns>
124    public static IEnumerable<T> SampleRandomWithoutRepetition<T>(this IEnumerable<T> source, IRandom random, int count) {
125      int remaining = source.Count();
126      foreach (var item in source) {
127        if (random.NextDouble() * remaining < count) {
128          count--;
129          yield return item;
130          if (count <= 0) break;
131        }
132        remaining--;
133      }
134    }
135
136    /// <summary>
137    /// Chooses elements out of a sequence with repetition. The chance that an item is selected is proportional or inverse-proportional
138    /// to the <paramref name="weights"/>.
139    /// </summary>
140    /// <remarks>
141    /// In case both <paramref name="inverseProportional"/> and <paramref name="windowing"/> are false values must be &gt; 0,
142    /// otherwise an InvalidOperationException is thrown.
143    ///
144    /// The method internally holds two arrays: One that is the sequence itself and another one for the values.
145    /// </remarks>
146    /// <typeparam name="T">The type of the items to be selected.</typeparam>
147    /// <param name="source">The sequence of elements.</param>
148    /// <param name="random">The random number generator to use, its NextDouble() method must produce values in the range [0;1)</param>
149    /// <param name="count">The number of items to be selected.</param>
150    /// <param name="weights">The weight values for the items.</param>
151    /// <param name="windowing">Whether to scale the proportional values or not.</param>
152    /// <param name="inverseProportional">Determines whether to choose proportionally (false) or inverse-proportionally (true).</param>
153    /// <returns>A sequence of selected items. The sequence might contain the same item more than once.</returns>
154    public static IEnumerable<T> SampleProportional<T>(this IEnumerable<T> source, IRandom random, int count, IEnumerable<double> weights, bool windowing = true, bool inverseProportional = false) {
155      return source.SampleProportional(random, weights, windowing, inverseProportional).Take(count);
156    }
157
158    /// <summary>
159    /// Same as <seealso cref="SampleProportional<T>"/>, but chooses an item exactly once.
160    /// </summary>
161    /// <remarks>
162    /// In case both <paramref name="inverseProportional"/> and <paramref name="windowing"/> are false values must be &gt; 0,
163    /// otherwise an InvalidOperationException is thrown.
164    ///
165    /// The method internally holds two arrays: One that is the sequence itself and another one for the values.
166    /// </remarks>
167    /// <typeparam name="T">The type of the items to be selected.</typeparam>
168    /// <param name="source">The sequence of elements.</param>
169    /// <param name="random">The random number generator to use, its NextDouble() method must produce values in the range [0;1)</param>
170    /// <param name="count">The number of items to be selected.</param>
171    /// <param name="weights">The weight values for the items.</param>
172    /// <param name="windowing">Whether to scale the proportional values or not.</param>
173    /// <param name="maximization">Determines whether to choose proportionally (true) or inverse-proportionally (false).</param>
174    /// <returns>A sequence of selected items.</returns>
175    public static IEnumerable<T> SampleProportionalWithoutRepetition<T>(this IEnumerable<T> source, IRandom random, int count, IEnumerable<double> weights, bool windowing = true, bool inverseProportional = false) {
176      return source.SampleProportionalWithoutRepetition(random, weights, windowing, inverseProportional).Take(count);
177    }
178    #region Proportional Helpers
179    private static IEnumerable<T> SampleProportional<T>(this IEnumerable<T> source, IRandom random, IEnumerable<double> weights, bool windowing, bool inverseProportional) {
180      var sourceArray = source.ToArray();
181      var valueArray = PrepareProportional<T>(sourceArray, weights, windowing, inverseProportional);
182      double total = valueArray.Sum();
183
184      while (true) {
185        int index = 0;
186        double ball = valueArray[index], sum = random.NextDouble() * total;
187        while (ball < sum)
188          ball += valueArray[++index];
189        yield return sourceArray[index];
190      }
191    }
192    private static IEnumerable<T> SampleProportionalWithoutRepetition<T>(this IEnumerable<T> source, IRandom random, IEnumerable<double> weights, bool windowing, bool inverseProportional) {
193      var sourceArray = source.ToArray();
194      var valueArray = PrepareProportional<T>(sourceArray, weights, windowing, inverseProportional);
195      double total = valueArray.Sum();
196
197      HashSet<int> chosenIndices = new HashSet<int>();
198      while (chosenIndices.Count < sourceArray.Length) {
199        int index = 0;
200        double ball = valueArray[index], sum = random.NextDouble() * total;
201        while (ball < sum) {
202          index++;
203          if (!chosenIndices.Contains(index))
204            ball += valueArray[++index];
205        }
206        yield return sourceArray[index];
207        chosenIndices.Add(index);
208        total -= valueArray[index];
209      }
210    }
211    private static double[] PrepareProportional<T>(IList<T> sourceArray, IEnumerable<double> weights, bool windowing, bool inverseProportional) {
212      double maxValue = double.MinValue, minValue = double.MaxValue;
213      double[] valueArray = new double[sourceArray.Count];
214
215      var weightsEnum = weights.GetEnumerator();
216      for (int i = 0; i < sourceArray.Count && weightsEnum.MoveNext(); i++) {
217        valueArray[i] = weightsEnum.Current;
218        if (valueArray[i] > maxValue) maxValue = valueArray[i];
219        if (valueArray[i] < minValue) minValue = valueArray[i];
220      }
221      if (minValue == maxValue) {  // all values are equal
222        for (int i = 0; i < sourceArray.Count; i++) {
223          valueArray[i] = 1.0;
224        }
225      } else {
226        if (windowing) {
227          if (inverseProportional) InverseProportionalScale(valueArray, maxValue);
228          else ProportionalScale(valueArray, minValue);
229        } else {
230          if (minValue < 0.0) throw new InvalidOperationException("Proportional selection without windowing does not work with values < 0.");
231          if (inverseProportional) InverseProportionalScale(valueArray, 2 * maxValue);
232        }
233      }
234      return valueArray;
235    }
236    private static void ProportionalScale(double[] values, double minValue) {
237      for (int i = 0; i < values.Length; i++) {
238        values[i] = values[i] - minValue;
239      }
240    }
241    private static void InverseProportionalScale(double[] values, double maxValue) {
242      for (int i = 0; i < values.Length; i++) {
243        values[i] = maxValue - values[i];
244      }
245    }
246    #endregion
247
248    /// <summary>
249    /// Shuffles an enumerable and returns a new enumerable according to the Fisher-Yates shuffle.
250    /// </summary>
251    /// <remarks>
252    /// Note that the source enumerable is transformed into an array.
253    ///
254    /// The implementation is described in http://stackoverflow.com/questions/1287567/c-is-using-random-and-orderby-a-good-shuffle-algorithm.
255    /// </remarks>
256    /// <typeparam name="T">The type of the items that are to be shuffled.</typeparam>
257    /// <param name="source">The enumerable that contains the items.</param>
258    /// <param name="random">The random number generator, its Next(n) method must deliver uniformly distributed random numbers in the range [0;n).</param>
259    /// <returns>An enumerable with the elements shuffled.</returns>
260    public static IEnumerable<T> Shuffle<T>(this IEnumerable<T> source, IRandom random) {
261      T[] elements = source.ToArray();
262      for (int i = elements.Length - 1; i > 0; i--) {
263        // Swap element "i" with a random earlier element (including itself)
264        int swapIndex = random.Next(i + 1);
265        yield return elements[swapIndex];
266        elements[swapIndex] = elements[i];
267        // we don't actually perform the swap, we can forget about the
268        // swapped element because we already returned it.
269      }
270      yield return elements[0];
271    }
272  }
273}
Note: See TracBrowser for help on using the repository browser.