Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/24/15 13:56:27 (9 years ago)
Author:
gkronber
Message:

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.Bandits/ParetoBandit.cs

    r12876 r12893  
    1212    private double[] pZero;
    1313    public int NumArms { get { return alpha.Length; } }
    14     public double OptimalExpectedReward { get; private set; } // reward of the best arm, for calculating regret
    1514    public int OptimalExpectedRewardArm { get; private set; }
    1615    public int OptimalMaximalRewardArm { get; private set; }
    17     public double MaxReward { get; private set; }
    18     public double MinReward { get; private set; }
    1916    private readonly Random random;
    20     public ParetoBandit(Random random, IEnumerable<double> alpha) : this(random, alpha, alpha.Select(_ => 0.0)) { }
    21     public ParetoBandit(Random random, IEnumerable<double> alpha, IEnumerable<double> pZero) { // probability of a zero reward
     17
     18    public ParetoBandit(Random random, IEnumerable<double> alpha) {
     19      this.alpha = alpha.ToArray();
     20      this.pZero = new double[this.alpha.Length];
     21      this.random = random;
     22      OptimalExpectedRewardArm = Array.IndexOf(this.alpha, alpha.Min());
     23      OptimalMaximalRewardArm = OptimalExpectedRewardArm;
     24    }
     25    public ParetoBandit(Random random, IEnumerable<double> alpha, IEnumerable<double> pZero, int bestExpRewardArm, int bestMaxRewardArm) { // probability of a zero reward
    2226      this.alpha = alpha.ToArray();
    2327      this.pZero = pZero.ToArray();
    2428      this.random = random;
    25 
    26       // find optimal arms using empirical estimates
    27       var bestExpReward = double.NegativeInfinity;
    28       var bestMaxReward = double.NegativeInfinity;
    29       for (int k = 0; k < NumArms; k++) {
    30         double expReward = 0.0;
    31         double maxReward = double.NegativeInfinity;
    32         for (int i = 0; i < 100000; i++) {
    33           var r = Pull(k);
    34           expReward += r;
    35           maxReward = Math.Max(maxReward, r);
    36         }
    37         expReward /= 100000;
    38 
    39         if (expReward > bestExpReward) {
    40           bestExpReward = expReward;
    41           OptimalExpectedRewardArm = k;
    42           OptimalExpectedReward = expReward;
    43         }
    44         if (maxReward > bestMaxReward) {
    45           bestMaxReward = maxReward;
    46           OptimalMaximalRewardArm = k;
    47         }
    48       }
     29      OptimalExpectedRewardArm = bestExpRewardArm;
     30      OptimalMaximalRewardArm = bestMaxRewardArm;
    4931    }
    5032
Note: See TracChangeset for help on using the changeset viewer.