Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/07/15 09:21:46 (9 years ago)
Author:
gkronber
Message:

#2283: refactoring and bug fixes

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs

    r11730 r11732  
    99namespace HeuristicLab.Algorithms.Bandits.Models {
    1010  public class BernoulliModel : IModel {
    11     private readonly int numActions;
    12     private readonly int[] success;
    13     private readonly int[] failure;
     11    private int success;
     12    private int failure;
    1413
    1514    // parameters of beta prior distribution
     
    1716    private readonly double beta;
    1817
    19     public BernoulliModel(int numActions, double alpha = 1.0, double beta = 1.0) {
    20       this.numActions = numActions;
    21       this.success = new int[numActions];
    22       this.failure = new int[numActions];
     18    public BernoulliModel(double alpha = 1.0, double beta = 1.0) {
    2319      this.alpha = alpha;
    2420      this.beta = beta;
    2521    }
    2622
    27 
    28     public double[] SampleExpectedRewards(Random random) {
     23    public double SampleExpectedReward(Random random) {
    2924      // sample bernoulli mean from beta prior
    30       var theta = new double[numActions];
    31       for (int a = 0; a < numActions; a++) {
    32         if (success[a] == -1)
    33           theta[a] = 0.0;
    34         else {
    35           theta[a] = Rand.BetaRand(random, success[a] + alpha, failure[a] + beta);
    36         }
    37       }
    38 
    39       // no need to sample we know the exact expected value
    40       // the expected value of a bernoulli variable is just theta
    41       return theta.Select(t => t).ToArray();
     25      return Rand.BetaRand(random, success + alpha, failure + beta);
    4226    }
    4327
    44     public void Update(int action, double reward) {
    45       const double EPSILON = 1E-6;
    46       Debug.Assert(Math.Abs(reward - 0.0) < EPSILON || Math.Abs(reward - 1.0) < EPSILON);
    47       if (Math.Abs(reward - 1.0) < EPSILON) {
    48         success[action]++;
     28    public void Update(double reward) {
     29      Debug.Assert(reward.IsAlmost(1.0) || reward.IsAlmost(0.0));
     30      if (reward.IsAlmost(1.0)) {
     31        success++;
    4932      } else {
    50         failure[action]++;
     33        failure++;
    5134      }
    5235    }
    5336
    54     public void Disable(int action) {
    55       success[action] = -1;
    56     }
    57 
    5837    public void Reset() {
    59       Array.Clear(success, 0, numActions);
    60       Array.Clear(failure, 0, numActions);
     38      success = 0;
     39      failure = 0;
    6140    }
    6241
    6342    public void PrintStats() {
    64       for (int i = 0; i < numActions; i++) {
    65         Console.Write("{0:F2} ", success[i] / (double)failure[i]);
    66       }
     43      Console.Write("{0:F2} ", success / (double)failure);
     44    }
     45
     46    public object Clone() {
     47      return new BernoulliModel() { failure = this.failure, success = this.success };
    6748    }
    6849  }
Note: See TracChangeset for help on using the changeset viewer.