Changeset 12893 for branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/ParetoBandit.cs
- Timestamp:
- 08/24/15 13:56:27 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/ParetoBandit.cs
r12876 r12893 12 12 private double[] pZero; 13 13 public int NumArms { get { return alpha.Length; } } 14 public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret15 14 public int OptimalExpectedRewardArm { get; private set; } 16 15 public int OptimalMaximalRewardArm { get; private set; } 17 public double MaxReward { get; private set; }18 public double MinReward { get; private set; }19 16 private readonly Random random; 20 public ParetoBandit(Random random, IEnumerable<double> alpha) : this(random, alpha, alpha.Select(_ => 0.0)) { } 21 public ParetoBandit(Random random, IEnumerable<double> alpha, IEnumerable<double> pZero) { // probability of a zero reward 17 18 public ParetoBandit(Random random, IEnumerable<double> alpha) { 19 this.alpha = alpha.ToArray(); 20 this.pZero = new double[this.alpha.Length]; 21 this.random = random; 22 OptimalExpectedRewardArm = Array.IndexOf(this.alpha, alpha.Min()); 23 OptimalMaximalRewardArm = OptimalExpectedRewardArm; 24 } 25 public ParetoBandit(Random random, IEnumerable<double> alpha, IEnumerable<double> pZero, int bestExpRewardArm, int bestMaxRewardArm) { // probability of a zero reward 22 26 this.alpha = alpha.ToArray(); 23 27 this.pZero = pZero.ToArray(); 24 28 this.random = random; 25 26 // find optimal arms using empirical estimates 27 var bestExpReward = double.NegativeInfinity; 28 var bestMaxReward = double.NegativeInfinity; 29 for (int k = 0; k < NumArms; k++) { 30 double expReward = 0.0; 31 double maxReward = double.NegativeInfinity; 32 for (int i = 0; i < 100000; i++) { 33 var r = Pull(k); 34 expReward += r; 35 maxReward = Math.Max(maxReward, r); 36 } 37 expReward /= 100000; 38 39 if (expReward > bestExpReward) { 40 bestExpReward = expReward; 41 OptimalExpectedRewardArm = k; 42 OptimalExpectedReward = expReward; 43 } 44 if (maxReward > bestMaxReward) { 45 bestMaxReward = maxReward; 46 OptimalMaximalRewardArm = k; 47 } 48 } 29 OptimalExpectedRewardArm = bestExpRewardArm; 30 OptimalMaximalRewardArm = bestMaxRewardArm; 49 31 } 50 32
Note: See TracChangeset
for help on using the changeset viewer.