Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/17/15 19:13:19 (9 years ago)
Author:
gkronber
Message:

#2283: implemented first crude version of extreme hunter algorithm in branch

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Test/TestBanditPolicies.cs

    r11745 r12876  
    1717      var nArms = 20;
    1818
    19       // ThresholdAscent only works for rewards in [0..1] so far
    20 
    21       Console.WriteLine("Thompson (Gaussian est variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 1, 1)));
    22       Console.WriteLine("Thompson (Gaussian fixed variance)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0, 1, 0.1)));
    23       Console.WriteLine("GaussianThompson (compat)"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
    24       Console.WriteLine("GaussianThompson"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new GaussianThompsonSamplingPolicy());
    25       Console.WriteLine("UCBNormal"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy());
    26       Console.WriteLine("Random"); TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy());
    27 
    28     }
    29 
     19      // some of the policies are specific to rewards in [0..1], e.g. Treshold Ascent or UCB1
     20      TestPolicyGaussianUnknownVariance(randSeed, nArms, new ExtremeHunterPolicy());
     21      TestPolicyGaussianUnknownVariance(randSeed, nArms, new IntervalEstimationPolicy());
     22      //TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBPolicy(10));
     23      TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy());
     24      TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1TunedPolicy());
     25      TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1Policy(10));
     26      TestPolicyGaussianUnknownVariance(randSeed, nArms, new ActiveLearningPolicy(10));
     27      TestPolicyGaussianUnknownVariance(randSeed, nArms, new ChernoffIntervalEstimationPolicy());
     28      TestPolicyGaussianUnknownVariance(randSeed, nArms, new BoltzmannExplorationPolicy(100));
     29      TestPolicyGaussianUnknownVariance(randSeed, nArms, new EpsGreedyPolicy(0.1));
     30      TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy());
     31    }
     32
     33    [TestMethod]
     34    // test case I as described in Extreme Bandits paper
     35    public void ComparePoliciesExtremeBandits1() {
     36      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
     37      var randSeed = 31415;
     38      TestPolicyExtremeBandit1(randSeed, new RandomPolicy());
     39      TestPolicyExtremeBandit1(randSeed, new ExtremeHunterPolicy());
     40      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10000));
     41      TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.1));
     42      // TestPolicyExtremeBandit1(randSeed, new ThresholdAscentPolicy());
     43    }
     44
     45    [TestMethod]
     46    // test case II as described in Extreme Bandits paper
     47    public void ComparePoliciesExtremeBandits2() {
     48      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
     49      var randSeed = 31415;
     50      TestPolicyExtremeBandit2(randSeed, new RandomPolicy());
     51      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy());
     52      TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10000));
     53      TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.1));
     54      // TestPolicyExtremeBandit2(randSeed, new ThresholdAscentPolicy());
     55    }
    3056
    3157    [TestMethod]
     
    189215    }
    190216    private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IBanditPolicy policy) {
    191       TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions));
     217      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions, 0, 10));
     218    }
     219
     220    private void TestPolicyExtremeBandit1(int randSeed, IBanditPolicy policy) {
     221      TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 5, 1.1, 2 })); // 3 arms
     222    }
     223    private void TestPolicyExtremeBandit2(int randSeed, IBanditPolicy policy) {
     224      TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 1.5, 1.1, 3 }, new double[] { 0.0, 0.8, 0.0 })); // 3 arms
    192225    }
    193226
    194227
    195228    private void TestPolicy(int randSeed, int nArms, IBanditPolicy policy, Func<Random, int, IBandit> banditFactory) {
    196       var maxIt = 1E5;
    197       var reps = 10; // independent runs
    198       var regretForIteration = new Dictionary<int, List<double>>();
    199       var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>();
    200       var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>();
     229      var maxIt = 1E4;
     230      var reps = 30; // independent runs
     231      //var regretForIteration = new Dictionary<int, List<double>>();
     232      //var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>();
     233      //var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>();
     234      //var bestRewardForIteration = new Dictionary<int, List<double>>();
    201235      var globalRandom = new Random(randSeed);
    202236      var banditRandom = new Random(globalRandom.Next()); // bandits must produce the same rewards for each test
     
    210244        var totalPullsOfSuboptimalArmsExp = 0.0;
    211245        var totalPullsOfSuboptimalArmsMax = 0.0;
     246        var bestReward = double.NegativeInfinity;
    212247        var actionInfos = Enumerable.Range(0, nArms).Select(_ => policy.CreateActionInfo()).ToArray();
    213248        for (int i = 0; i <= maxIt; i++) {
     
    220255          if (selectedAction != b.OptimalMaximalRewardArm) totalPullsOfSuboptimalArmsMax++;
    221256          totalRegret += b.OptimalExpectedReward - reward;
    222 
    223           if (i == nextLogStep) {
    224             nextLogStep *= 2;
    225             if (!regretForIteration.ContainsKey(i)) {
    226               regretForIteration.Add(i, new List<double>());
    227             }
    228             regretForIteration[i].Add(totalRegret / i);
    229 
    230             if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) {
    231               numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0);
    232             }
    233             numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp;
    234 
    235             if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) {
    236               numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0);
    237             }
    238             numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax;
     257          bestReward = Math.Max(bestReward, reward);
     258
     259          if (i + 1 == nextLogStep) {
     260            nextLogStep += 100;
     261            //if (!regretForIteration.ContainsKey(i)) {
     262            //  regretForIteration.Add(i, new List<double>());
     263            //}
     264            //regretForIteration[i].Add(totalRegret / i);
     265            //
     266            //if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) {
     267            //  numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0);
     268            //}
     269            //numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp;
     270            //
     271            //if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) {
     272            //  numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0);
     273            //}
     274            //numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax;
     275            //
     276            //if (!bestRewardForIteration.ContainsKey(i)) {
     277            //  bestRewardForIteration.Add(i, new List<double>());
     278            //}
     279            //bestRewardForIteration[i].Add(bestReward);
     280            Console.WriteLine("{0};{1,8};{2,7:F5};{3,7:F2};{4,7:F2};{5:F2};{6:F2};{7:F2};{8:F2}",
     281              policy, i + 1, totalRegret, totalPullsOfSuboptimalArmsExp, totalPullsOfSuboptimalArmsMax, bestReward,
     282              totalRegret / (i + 1), totalPullsOfSuboptimalArmsExp / (i + 1), totalPullsOfSuboptimalArmsMax / (i + 1));
    239283          }
    240284        }
    241285      }
    242286      // print
    243       foreach (var p in regretForIteration.Keys.OrderBy(k => k)) {
    244         Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2}",
    245           p,
    246           regretForIteration[p].Average(),
    247           regretForIteration[p].Min(),
    248           regretForIteration[p].Max(),
    249           numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps,
    250           numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps
    251           );
    252       }
     287      //foreach (var p in regretForIteration.Keys.OrderBy(k => k)) {
     288      //  Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2} max rewards: {6}",
     289      //    p,
     290      //    regretForIteration[p].Average(),
     291      //    regretForIteration[p].Min(),
     292      //    regretForIteration[p].Max(),
     293      //    numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps,
     294      //    numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps,
     295      //    string.Join(" ", bestRewardForIteration[p])
     296      //    );
     297      //}
    253298    }
    254299
Note: See TracChangeset for help on using the changeset viewer.