Context Navigation

← Previous Changeset
Next Changeset →

Changeset 11710

Timestamp:

12/21/14 09:19:54 (10 years ago)

Author:

gkronber

Message:

#2283: more bandit policies and tests

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization

Files:

: 7 added
: 1 deleted
: 3 edited

HeuristicLab.Algorithms.Bandits/Bandit.cs (deleted)
HeuristicLab.Algorithms.Bandits/Bandits (added)
HeuristicLab.Algorithms.Bandits/BernoulliBandit.cs (added)
HeuristicLab.Algorithms.Bandits/EpsGreedyPolicy.cs (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/Policies (added)
HeuristicLab.Algorithms.Bandits/TruncatedNormalBandit.cs (added)
HeuristicLab.Algorithms.Bandits/UCB1Policy.cs (added)
HeuristicLab.Algorithms.Bandits/UCB1TunedPolicy.cs (added)
HeuristicLab.Algorithms.Bandits/UCBNormalPolicy.cs (added)
HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs (modified) (4 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/EpsGreedyPolicy.cs

-                      r11708
+                      r11710
         var maxReward = double.NegativeInfinity;
         int bestAction = -1;
         int curAction = -1;
         foreach (var avgReward in sumReward.Zip(tries, (r, t) => r / (t + 1))) { // prevent division by zero
           curAction++;
+        for (int i = 0; i < NumActions; i++) {
+          if (tries[i] == 0) return i;
+          var avgReward = sumReward[i] / tries[i];
           if (maxReward < avgReward) {
             maxReward = avgReward;
             bestAction = curAction;
+            bestAction = i;
+          }
+        }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj

-                      r11708
+                      r11710
   </ItemGroup>
   <ItemGroup>
     <Compile Include="Bandit.cs" />
+    <Compile Include="BernoulliBandit.cs" />
     <Compile Include="BanditPolicy.cs" />
+    <Compile Include="TruncatedNormalBandit.cs" />
+    <Compile Include="UCBNormalPolicy.cs" />
+    <Compile Include="UCB1TunedPolicy.cs" />
+    <Compile Include="UCB1Policy.cs" />
     <Compile Include="EpsGreedyPolicy.cs" />
     <Compile Include="IPolicy.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="RandomPolicy.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <Folder Include="Bandits\" />
+    <Folder Include="Policies\" />
   </ItemGroup>
   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs

-                      r11708
+                      r11710
   public class TestBanditPolicies {
     [TestMethod]
     public void ComparePolicies() {
+    public void ComparePoliciesForBernoulliBandit() {
       System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
       var globalRand = new Random(31415);
 …
       var nArms = 10;
       Console.WriteLine("Random");
+      TestPolicy(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), 10));
+      TestPolicyBernoulli(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), 10));
+      Console.WriteLine("UCB1");
+      TestPolicyBernoulli(globalRand, nArms, new UCB1Policy(10));
+      Console.WriteLine("UCB1Tuned");
+      TestPolicyBernoulli(globalRand, nArms, new UCB1TunedPolicy(10));
+      Console.WriteLine("UCB1Normal");
+      TestPolicyBernoulli(globalRand, nArms, new UCBNormalPolicy(10));
       Console.WriteLine("Eps(0.01)");
       TestPolicy(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.01));
+      TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.01));
       Console.WriteLine("Eps(0.05)");
       TestPolicy(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.05));
+      TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.05));
       Console.WriteLine("Eps(0.1)");
       TestPolicy(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.1));
+      TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.1));
       Console.WriteLine("Eps(0.2)");
       TestPolicy(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.2));
+      TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.2));
       Console.WriteLine("Eps(0.5)");
+      TestPolicy(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.5));
+      TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.5));
+    }
+    [TestMethod]
+    public void ComparePoliciesForNormalBandit() {
+      System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
+      var globalRand = new Random(31415);
+      var seedForPolicy = globalRand.Next();
+      var nArms = 10;
+      Console.WriteLine("Random");
+      TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), 10));
+      Console.WriteLine("UCB1");
+      TestPolicyNormal(globalRand, nArms, new UCB1Policy(10));
+      Console.WriteLine("UCB1Tuned");
+      TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(10));
+      Console.WriteLine("UCB1Normal");
+      TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(10));
+      Console.WriteLine("Eps(0.01)");
+      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.01));
+      Console.WriteLine("Eps(0.05)");
+      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.05));
+      Console.WriteLine("Eps(0.1)");
+      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.1));
+      Console.WriteLine("Eps(0.2)");
+      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.2));
+      Console.WriteLine("Eps(0.5)");
+      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.5));
+    }
     private void TestPolicy(Random globalRand, int nArms, IPolicy policy) {
+    private void TestPolicyBernoulli(Random globalRand, int nArms, IPolicy policy) {
       var maxIt = 1E6;
       var reps = 10; // 10 independent runs
 …
       for (int r = 0; r < reps; r++) {
         var nextLogStep = 1;
         var b = new Bandit(new Random(globalRand.Next()), 10);
+        var b = new BernoulliBandit(new Random(globalRand.Next()), 10);
         policy.Reset();
         var totalRegret = 0.0;
 …
+      }
+    }
+    private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) {
+      var maxIt = 1E6;
+      var reps = 10; // 10 independent runs
+      var avgRegretForIteration = new Dictionary<int, double>();
+      // calculate statistics
+      for (int r = 0; r < reps; r++) {
+        var nextLogStep = 1;
+        var b = new TruncatedNormalBandit(new Random(globalRand.Next()), 10);
+        policy.Reset();
+        var totalRegret = 0.0;
+        for (int i = 0; i <= maxIt; i++) {
+          var selectedAction = policy.SelectAction();
+          var reward = b.Pull(selectedAction);
+          totalRegret += b.OptimalExpectedReward - reward;
+          policy.UpdateReward(selectedAction, reward);
+          if (i == nextLogStep) {
+            nextLogStep *= 10;
+            if (!avgRegretForIteration.ContainsKey(i)) {
+              avgRegretForIteration.Add(i, 0.0);
+            }
+            avgRegretForIteration[i] += totalRegret / i;
+          }
+        }
+      }
+      // print
+      foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) {
+        Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret
+      }
+    }
+  }
+}

Note: See TracChangeset for help on using the changeset viewer.