Context Navigation

← Previous Change
Next Change →

TestBanditPolicies.cs

Timestamp:

01/02/15 16:08:21 (9 years ago)

Author:

gkronber

Message:

#2283: several major extensions for grammatical optimization

File:

: 1 edited

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs (modified) (4 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs

-                      r11727
+                      r11730
 using System.Globalization;
 using HeuristicLab.Algorithms.Bandits;
+using HeuristicLab.Algorithms.Bandits.Models;
 using Microsoft.VisualStudio.TestTools.UnitTesting;
 …
   [TestClass]
   public class TestBanditPolicies {
     [TestMethod]
     public void ComparePoliciesForBernoulliBandit() {
+      System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
+      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
       var globalRand = new Random(31415);
       var seedForPolicy = globalRand.Next();
       var nArms = 10;
+      var nArms = 20;
       //Console.WriteLine("Exp3 (gamma=0.01)");
       //TestPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
       //Console.WriteLine("Exp3 (gamma=0.05)");
       //estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
       Console.WriteLine("Thompson (Bernoulli)");
       TestPolicyBernoulli(globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
+      Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli(globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
+      Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new BernoulliModel(nArms)));
       Console.WriteLine("Random");
       TestPolicyBernoulli(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
 …
       //Console.WriteLine("Eps(0.5)");
       //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
+    }
+      Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 0.1));
+      Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 0.5));
+      Console.WriteLine("UCT(1)  "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 1));
+      Console.WriteLine("UCT(2)  "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 2));
+      Console.WriteLine("UCT(5)  "); TestPolicyBernoulli(globalRand, nArms, new UCTPolicy(nArms, 5));
+      Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
+      Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
+      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
+      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
+      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
+      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
+      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
+      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));
+      // not applicable to bernoulli rewards
+      //Console.WriteLine("ThresholdAscent(10, 0.01)  "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
+      //Console.WriteLine("ThresholdAscent(10, 0.05)  "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
+      //Console.WriteLine("ThresholdAscent(10, 0.1)   "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
+      //Console.WriteLine("ThresholdAscent(100, 0.01) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
+      //Console.WriteLine("ThresholdAscent(100, 0.05) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
+      //Console.WriteLine("ThresholdAscent(100, 0.1)  "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
+      //Console.WriteLine("ThresholdAscent(1000, 0.01)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
+      //Console.WriteLine("ThresholdAscent(1000, 0.05)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
+      //Console.WriteLine("ThresholdAscent(1000, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
+    }
     [TestMethod]
     public void ComparePoliciesForNormalBandit() {
+      System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
+      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
       var globalRand = new Random(31415);
       var seedForPolicy = globalRand.Next();
+      var nArms = 10;
+      Console.WriteLine("Thompson (Gaussian)");
+      TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
+      Console.WriteLine("Random");
+      TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
+      Console.WriteLine("UCB1");
+      TestPolicyNormal(globalRand, nArms, new UCB1Policy(nArms));
+      Console.WriteLine("UCB1Tuned");
+      TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(nArms));
+      Console.WriteLine("UCB1Normal");
+      TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(nArms));
+      var nArms = 20;
+      Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms, true));
+      Console.WriteLine("Thompson (Gaussian new)"); TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
+      Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyNormal(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new GaussianModel(nArms, 0.5, 1)));
+      /*
+      Console.WriteLine("Random"); TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
+      Console.WriteLine("UCB1"); TestPolicyNormal(globalRand, nArms, new UCB1Policy(nArms));
+      Console.WriteLine("UCB1Tuned"); TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(nArms));
+      Console.WriteLine("UCB1Normal"); TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(nArms));
       //Console.WriteLine("Exp3 (gamma=0.01)");
       //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));
       //Console.WriteLine("Exp3 (gamma=0.05)");
       //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));
+      Console.WriteLine("Eps(0.01)");
+      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
+      Console.WriteLine("Eps(0.05)");
+      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
+      Console.WriteLine("Eps(0.01)"); TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
+      Console.WriteLine("Eps(0.05)"); TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
       //Console.WriteLine("Eps(0.1)");
       //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
 …
       //Console.WriteLine("Eps(0.5)");
       //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
+    }
+      Console.WriteLine("UCT(0.1)"); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 0.1));
+      Console.WriteLine("UCT(0.5)"); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 0.5));
+      Console.WriteLine("UCT(1)  "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 1));
+      Console.WriteLine("UCT(2)  "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 2));
+      Console.WriteLine("UCT(5)  "); TestPolicyNormal(globalRand, nArms, new UCTPolicy(nArms, 5));
+      Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
+      Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
+      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
+      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
+      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
+      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
+      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
+      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(globalRand, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));
+      Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
+      Console.WriteLine("ThresholdAscent(10,0.05)  "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
+      Console.WriteLine("ThresholdAscent(10,0.1)   "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
+      Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
+      Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
+      Console.WriteLine("ThresholdAscent(100,0.1)  "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
+      Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
+      Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
+      Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
+       */
+    }
+    [TestMethod]
+    public void ComparePoliciesForGaussianMixtureBandit() {
+      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
+      var globalRand = new Random(31415);
+      var seedForPolicy = globalRand.Next();
+      var nArms = 20;
+      Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms, true));
+      Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
+      Console.WriteLine("Generic Thompson (Gaussian)"); TestPolicyGaussianMixture(globalRand, nArms, new GenericThompsonSamplingPolicy(new Random(seedForPolicy), nArms, new GaussianModel(nArms, 0.5, 1)));
+      /*
+      Console.WriteLine("Random"); TestPolicyGaussianMixture(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
+      Console.WriteLine("UCB1"); TestPolicyGaussianMixture(globalRand, nArms, new UCB1Policy(nArms));
+      Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture(globalRand, nArms, new UCB1TunedPolicy(nArms));
+      Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture(globalRand, nArms, new UCBNormalPolicy(nArms));
+      Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
+      Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
+      Console.WriteLine("UCT(1)  "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 1));
+      Console.WriteLine("UCT(2)  "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 2));
+      Console.WriteLine("UCT(5)  "); TestPolicyGaussianMixture(globalRand, nArms, new UCTPolicy(nArms, 5));
+      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
+      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
+      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture(globalRand, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
+      Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
+      Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
+      Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
+      Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01));
+       */
+    }
     private void TestPolicyBernoulli(Random globalRand, int nArms, IPolicy policy) {
+      var maxIt = 1E6;
+      var reps = 10; // 10 independent runs
+      var avgRegretForIteration = new Dictionary<int, double>();
+      TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions));
+    }
+    private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) {
+      TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions));
+    }
+    private void TestPolicyGaussianMixture(Random globalRand, int nArms, IPolicy policy) {
+      TestPolicy(globalRand, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions));
+    }
+    private void TestPolicy(Random globalRand, int nArms, IPolicy policy, Func<Random, int, IBandit> banditFactory) {
+      var maxIt = 1E5;
+      var reps = 30; // independent runs
+      var regretForIteration = new Dictionary<int, List<double>>();
+      var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>();
+      var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>();
       // calculate statistics
       for (int r = 0; r < reps; r++) {
         var nextLogStep = 1;
         var b = new BernoulliBandit(new Random(globalRand.Next()), 10);
+        var b = banditFactory(new Random(globalRand.Next()), nArms);
         policy.Reset();
         var totalRegret = 0.0;
+        var totalPullsOfSuboptimalArmsExp = 0.0;
+        var totalPullsOfSuboptimalArmsMax = 0.0;
         for (int i = 0; i <= maxIt; i++) {
           var selectedAction = policy.SelectAction();
           var reward = b.Pull(selectedAction);
+          policy.UpdateReward(selectedAction, reward);
+          // collect stats
+          if (selectedAction != b.OptimalExpectedRewardArm) totalPullsOfSuboptimalArmsExp++;
+          if (selectedAction != b.OptimalMaximalRewardArm) totalPullsOfSuboptimalArmsMax++;
           totalRegret += b.OptimalExpectedReward - reward;
+          policy.UpdateReward(selectedAction, reward);
           if (i == nextLogStep) {
             nextLogStep *= 10;
             if (!avgRegretForIteration.ContainsKey(i)) {
               avgRegretForIteration.Add(i, 0.0);
+            nextLogStep *= 2;
+            if (!regretForIteration.ContainsKey(i)) {
+              regretForIteration.Add(i, new List<double>());
+            }
+            avgRegretForIteration[i] += totalRegret / i;
+            regretForIteration[i].Add(totalRegret / i);
+            if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) {
+              numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0);
+            }
+            numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp;
+            if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) {
+              numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0);
+            }
+            numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax;
+          }
+        }
+      }
       // print
+      foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) {
+        Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret
+      }
+    }
+    private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) {
+      var maxIt = 1E6;
+      var reps = 10; // 10 independent runs
+      var avgRegretForIteration = new Dictionary<int, double>();
+      // calculate statistics
+      for (int r = 0; r < reps; r++) {
+        var nextLogStep = 1;
+        var b = new TruncatedNormalBandit(new Random(globalRand.Next()), 10);
+        policy.Reset();
+        var totalRegret = 0.0;
+        for (int i = 0; i <= maxIt; i++) {
+          var selectedAction = policy.SelectAction();
+          var reward = b.Pull(selectedAction);
+          totalRegret += b.OptimalExpectedReward - reward;
+          policy.UpdateReward(selectedAction, reward);
+          if (i == nextLogStep) {
+            nextLogStep *= 10;
+            if (!avgRegretForIteration.ContainsKey(i)) {
+              avgRegretForIteration.Add(i, 0.0);
+            }
+            avgRegretForIteration[i] += totalRegret / i;
+          }
+        }
+      }
+      // print
+      foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) {
+        Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret
+      foreach (var p in regretForIteration.Keys.OrderBy(k => k)) {
+        Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2}",
+          p,
+          regretForIteration[p].Average(),
+          regretForIteration[p].Min(),
+          regretForIteration[p].Max(),
+          numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps,
+          numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps
+          );
+      }
+    }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11730 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs

Legend:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs

Download in other formats: