Context Navigation

← Previous Change
Next Change →

BernoulliModel.cs

Timestamp:

01/07/15 09:21:46 (9 years ago)

Author:

gkronber

Message:

#2283: refactoring and bug fixes

File:

: 1 edited

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs

-                      r11730
+                      r11732
 namespace HeuristicLab.Algorithms.Bandits.Models {
   public class BernoulliModel : IModel {
+    private readonly int numActions;
+    private readonly int[] success;
+    private readonly int[] failure;
+    private int success;
+    private int failure;
     // parameters of beta prior distribution
 …
     private readonly double beta;
+    public BernoulliModel(int numActions, double alpha = 1.0, double beta = 1.0) {
+      this.numActions = numActions;
+      this.success = new int[numActions];
+      this.failure = new int[numActions];
+    public BernoulliModel(double alpha = 1.0, double beta = 1.0) {
       this.alpha = alpha;
       this.beta = beta;
+    }
+    public double[] SampleExpectedRewards(Random random) {
+    public double SampleExpectedReward(Random random) {
       // sample bernoulli mean from beta prior
+      var theta = new double[numActions];
+      for (int a = 0; a < numActions; a++) {
+        if (success[a] == -1)
+          theta[a] = 0.0;
+        else {
+          theta[a] = Rand.BetaRand(random, success[a] + alpha, failure[a] + beta);
+        }
+      }
+      // no need to sample we know the exact expected value
+      // the expected value of a bernoulli variable is just theta
+      return theta.Select(t => t).ToArray();
+      return Rand.BetaRand(random, success + alpha, failure + beta);
+    }
+    public void Update(int action, double reward) {
+      const double EPSILON = 1E-6;
+      Debug.Assert(Math.Abs(reward - 0.0) < EPSILON || Math.Abs(reward - 1.0) < EPSILON);
+      if (Math.Abs(reward - 1.0) < EPSILON) {
+        success[action]++;
+    public void Update(double reward) {
+      Debug.Assert(reward.IsAlmost(1.0) || reward.IsAlmost(0.0));
+      if (reward.IsAlmost(1.0)) {
+        success++;
       } else {
         failure[action]++;
+        failure++;
+      }
+    }
-    public void Disable(int action) {
-      success[action] = -1;
+    }
     public void Reset() {
       Array.Clear(success, 0, numActions);
       Array.Clear(failure, 0, numActions);
+      success = 0;
+      failure = 0;
+    }
     public void PrintStats() {
+      for (int i = 0; i < numActions; i++) {
+        Console.Write("{0:F2} ", success[i] / (double)failure[i]);
+      }
+      Console.Write("{0:F2} ", success / (double)failure);
+    }
+    public object Clone() {
+      return new BernoulliModel() { failure = this.failure, success = this.success };
+    }
+  }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11732 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs

Legend:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs

Download in other formats: