Context Navigation

← Previous Change
Next Change →

TestBanditPolicies.cs

Timestamp:

08/17/15 19:13:19 (9 years ago)

Author:

gkronber

Message:

#2283: implemented first crude version of extreme hunter algorithm in branch

File:

: 1 edited

branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Test/TestBanditPolicies.cs (modified) (4 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Test/TestBanditPolicies.cs

-                      r11745
+                      r12876
       var nArms = 20;
+      // ThresholdAscent only works for rewards in [0..1] so far
+      Console.WriteLine("Thompson (Gaussian est variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1, 1)));
+      Console.WriteLine("Thompson (Gaussian fixed variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 0.1)));
+      Console.WriteLine("GaussianThompson (compat)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
+      Console.WriteLine("GaussianThompson"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy());
+      Console.WriteLine("UCBNormal"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy());
+      Console.WriteLine("Random"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy());
+    }
+      // some of the policies are specific to rewards in [0..1], e.g. Treshold Ascent or UCB1
+      TestPolicyGaussianUnknownVariance(randSeed, nArms, new ExtremeHunterPolicy());
+      TestPolicyGaussianUnknownVariance(randSeed, nArms, new IntervalEstimationPolicy());
+      //TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBPolicy(10));
+      TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy());
+      TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1TunedPolicy());
+      TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1Policy(10));
+      TestPolicyGaussianUnknownVariance(randSeed, nArms, new ActiveLearningPolicy(10));
+      TestPolicyGaussianUnknownVariance(randSeed, nArms, new ChernoffIntervalEstimationPolicy());
+      TestPolicyGaussianUnknownVariance(randSeed, nArms, new BoltzmannExplorationPolicy(100));
+      TestPolicyGaussianUnknownVariance(randSeed, nArms, new EpsGreedyPolicy(0.1));
+      TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy());
+    }
+    [TestMethod]
+    // test case I as described in Extreme Bandits paper
+    public void ComparePoliciesExtremeBandits1() {
+      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
+      var randSeed = 31415;
+      TestPolicyExtremeBandit1(randSeed, new RandomPolicy());
+      TestPolicyExtremeBandit1(randSeed, new ExtremeHunterPolicy());
+      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10000));
+      TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.1));
+      // TestPolicyExtremeBandit1(randSeed, new ThresholdAscentPolicy());
+    }
+    [TestMethod]
+    // test case II as described in Extreme Bandits paper
+    public void ComparePoliciesExtremeBandits2() {
+      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
+      var randSeed = 31415;
+      TestPolicyExtremeBandit2(randSeed, new RandomPolicy());
+      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy());
+      TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10000));
+      TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.1));
+      // TestPolicyExtremeBandit2(randSeed, new ThresholdAscentPolicy());
+    }
     [TestMethod]
 …
+    }
     private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IBanditPolicy policy) {
+      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions));
+      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions, 0, 10));
+    }
+    private void TestPolicyExtremeBandit1(int randSeed, IBanditPolicy policy) {
+      TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 5, 1.1, 2 })); // 3 arms
+    }
+    private void TestPolicyExtremeBandit2(int randSeed, IBanditPolicy policy) {
+      TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 1.5, 1.1, 3 }, new double[] { 0.0, 0.8, 0.0 })); // 3 arms
+    }
     private void TestPolicy(int randSeed, int nArms, IBanditPolicy policy, Func<Random, int, IBandit> banditFactory) {
+      var maxIt = 1E5;
+      var reps = 10; // independent runs
+      var regretForIteration = new Dictionary<int, List<double>>();
+      var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>();
+      var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>();
+      var maxIt = 1E4;
+      var reps = 30; // independent runs
+      //var regretForIteration = new Dictionary<int, List<double>>();
+      //var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>();
+      //var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>();
+      //var bestRewardForIteration = new Dictionary<int, List<double>>();
       var globalRandom = new Random(randSeed);
       var banditRandom = new Random(globalRandom.Next()); // bandits must produce the same rewards for each test
 …
         var totalPullsOfSuboptimalArmsExp = 0.0;
         var totalPullsOfSuboptimalArmsMax = 0.0;
+        var bestReward = double.NegativeInfinity;
         var actionInfos = Enumerable.Range(0, nArms).Select(_ => policy.CreateActionInfo()).ToArray();
         for (int i = 0; i <= maxIt; i++) {
 …
           if (selectedAction != b.OptimalMaximalRewardArm) totalPullsOfSuboptimalArmsMax++;
           totalRegret += b.OptimalExpectedReward - reward;
+          if (i == nextLogStep) {
+            nextLogStep *= 2;
+            if (!regretForIteration.ContainsKey(i)) {
+              regretForIteration.Add(i, new List<double>());
+            }
+            regretForIteration[i].Add(totalRegret / i);
+            if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) {
+              numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0);
+            }
+            numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp;
+            if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) {
+              numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0);
+            }
+            numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax;
+          bestReward = Math.Max(bestReward, reward);
+          if (i + 1 == nextLogStep) {
+            nextLogStep += 100;
+            //if (!regretForIteration.ContainsKey(i)) {
+            //  regretForIteration.Add(i, new List<double>());
+            //}
+            //regretForIteration[i].Add(totalRegret / i);
+            //
+            //if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) {
+            //  numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0);
+            //}
+            //numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp;
+            //
+            //if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) {
+            //  numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0);
+            //}
+            //numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax;
+            //
+            //if (!bestRewardForIteration.ContainsKey(i)) {
+            //  bestRewardForIteration.Add(i, new List<double>());
+            //}
+            //bestRewardForIteration[i].Add(bestReward);
+            Console.WriteLine("{0};{1,8};{2,7:F5};{3,7:F2};{4,7:F2};{5:F2};{6:F2};{7:F2};{8:F2}",
+              policy, i + 1, totalRegret, totalPullsOfSuboptimalArmsExp, totalPullsOfSuboptimalArmsMax, bestReward,
+              totalRegret / (i + 1), totalPullsOfSuboptimalArmsExp / (i + 1), totalPullsOfSuboptimalArmsMax / (i + 1));
+          }
+        }
+      }
       // print
+      foreach (var p in regretForIteration.Keys.OrderBy(k => k)) {
+        Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2}",
+          p,
+          regretForIteration[p].Average(),
+          regretForIteration[p].Min(),
+          regretForIteration[p].Max(),
+          numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps,
+          numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps
+          );
+      }
+      //foreach (var p in regretForIteration.Keys.OrderBy(k => k)) {
+      //  Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2} max rewards: {6}",
+      //    p,
+      //    regretForIteration[p].Average(),
+      //    regretForIteration[p].Min(),
+      //    regretForIteration[p].Max(),
+      //    numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps,
+      //    numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps,
+      //    string.Join(" ", bestRewardForIteration[p])
+      //    );
+      //}
+    }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 12876 for branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Test/TestBanditPolicies.cs

Legend:

branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Test/TestBanditPolicies.cs

Download in other formats: