Context Navigation

← Previous Change
Next Change →

TestBanditPolicies.cs

Timestamp:

12/21/14 09:19:54 (9 years ago)

Author:

gkronber

Message:

#2283: more bandit policies and tests

File:

: 1 edited

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs (modified) (4 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs

-                      r11708
+                      r11710
   public class TestBanditPolicies {
     [TestMethod]
     public void ComparePolicies() {
+    public void ComparePoliciesForBernoulliBandit() {
       System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
       var globalRand = new Random(31415);
 …
       var nArms = 10;
       Console.WriteLine("Random");
+      TestPolicy(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), 10));
+      TestPolicyBernoulli(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), 10));
+      Console.WriteLine("UCB1");
+      TestPolicyBernoulli(globalRand, nArms, new UCB1Policy(10));
+      Console.WriteLine("UCB1Tuned");
+      TestPolicyBernoulli(globalRand, nArms, new UCB1TunedPolicy(10));
+      Console.WriteLine("UCB1Normal");
+      TestPolicyBernoulli(globalRand, nArms, new UCBNormalPolicy(10));
       Console.WriteLine("Eps(0.01)");
       TestPolicy(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.01));
+      TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.01));
       Console.WriteLine("Eps(0.05)");
       TestPolicy(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.05));
+      TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.05));
       Console.WriteLine("Eps(0.1)");
       TestPolicy(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.1));
+      TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.1));
       Console.WriteLine("Eps(0.2)");
       TestPolicy(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.2));
+      TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.2));
       Console.WriteLine("Eps(0.5)");
+      TestPolicy(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.5));
+      TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.5));
+    }
+    [TestMethod]
+    public void ComparePoliciesForNormalBandit() {
+      System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
+      var globalRand = new Random(31415);
+      var seedForPolicy = globalRand.Next();
+      var nArms = 10;
+      Console.WriteLine("Random");
+      TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), 10));
+      Console.WriteLine("UCB1");
+      TestPolicyNormal(globalRand, nArms, new UCB1Policy(10));
+      Console.WriteLine("UCB1Tuned");
+      TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(10));
+      Console.WriteLine("UCB1Normal");
+      TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(10));
+      Console.WriteLine("Eps(0.01)");
+      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.01));
+      Console.WriteLine("Eps(0.05)");
+      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.05));
+      Console.WriteLine("Eps(0.1)");
+      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.1));
+      Console.WriteLine("Eps(0.2)");
+      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.2));
+      Console.WriteLine("Eps(0.5)");
+      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.5));
+    }
     private void TestPolicy(Random globalRand, int nArms, IPolicy policy) {
+    private void TestPolicyBernoulli(Random globalRand, int nArms, IPolicy policy) {
       var maxIt = 1E6;
       var reps = 10; // 10 independent runs
 …
       for (int r = 0; r < reps; r++) {
         var nextLogStep = 1;
         var b = new Bandit(new Random(globalRand.Next()), 10);
+        var b = new BernoulliBandit(new Random(globalRand.Next()), 10);
         policy.Reset();
         var totalRegret = 0.0;
 …
+      }
+    }
+    private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) {
+      var maxIt = 1E6;
+      var reps = 10; // 10 independent runs
+      var avgRegretForIteration = new Dictionary<int, double>();
+      // calculate statistics
+      for (int r = 0; r < reps; r++) {
+        var nextLogStep = 1;
+        var b = new TruncatedNormalBandit(new Random(globalRand.Next()), 10);
+        policy.Reset();
+        var totalRegret = 0.0;
+        for (int i = 0; i <= maxIt; i++) {
+          var selectedAction = policy.SelectAction();
+          var reward = b.Pull(selectedAction);
+          totalRegret += b.OptimalExpectedReward - reward;
+          policy.UpdateReward(selectedAction, reward);
+          if (i == nextLogStep) {
+            nextLogStep *= 10;
+            if (!avgRegretForIteration.ContainsKey(i)) {
+              avgRegretForIteration.Add(i, 0.0);
+            }
+            avgRegretForIteration[i] += totalRegret / i;
+          }
+        }
+      }
+      // print
+      foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) {
+        Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret
+      }
+    }
+  }
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11710 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs

Legend:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs

Download in other formats: