Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/24/15 13:56:27 (9 years ago)
Author:
gkronber
Message:

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Test/TestBanditPolicies.cs

    r12876 r12893  
    3737      var randSeed = 31415;
    3838      TestPolicyExtremeBandit1(randSeed, new RandomPolicy());
     39      TestPolicyExtremeBandit1(randSeed, new SingleArmPolicy(1));
    3940      TestPolicyExtremeBandit1(randSeed, new ExtremeHunterPolicy());
    4041      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10000));
     42      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(1000));
     43      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(100));
     44      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10));
     45      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(2));
     46      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(1));
     47      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(0.5));
     48      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(0.1));
    4149      TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.1));
    42       // TestPolicyExtremeBandit1(randSeed, new ThresholdAscentPolicy());
     50      TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.05));
     51      TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.01));
    4352    }
    4453
     
    4857      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
    4958      var randSeed = 31415;
    50       TestPolicyExtremeBandit2(randSeed, new RandomPolicy());
    51       TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy());
    52       TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10000));
    53       TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.1));
    54       // TestPolicyExtremeBandit2(randSeed, new ThresholdAscentPolicy());
     59      //TestPolicyExtremeBandit2(randSeed, new RandomPolicy());
     60      //TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(0));
     61      //TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(1));
     62      //TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(2));
     63      // TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy());
     64      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 1, minPulls: 30));
     65      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 2, minPulls: 30));
     66      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 0.5, minPulls: 30));
     67      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 5, minPulls: 30));
     68      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 1, minPulls: 100));
     69      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 2, minPulls: 100));
     70      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 0.5, minPulls: 100));
     71      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 5, minPulls: 100));
     72      // TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10000));
     73      //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(1000));
     74      //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(100));
     75      //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10));
     76      //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(2));
     77      //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(1));
     78      //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(0.5));
     79      //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(0.1));
     80      //TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.1));
     81      //TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.05));
     82      //TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.01));
     83      //TestPolicyExtremeBandit2(randSeed, new ThresholdAscentPolicy());
     84    }
     85
     86    [TestMethod]
     87    // my own test case for ExtremeHunter
     88    // using truncated normal distributions
     89    public void ComparePoliciesExtremeBandits3() {
     90      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
     91      var randSeed = 31415;
     92      TestPolicyExtremeBandit3(randSeed, new RandomPolicy());
     93      TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(0));
     94      TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(1));
     95      TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(2));
     96      TestPolicyExtremeBandit3(randSeed, new ExtremeHunterPolicy());
     97      TestPolicyExtremeBandit3(randSeed, new UCB1Policy(3));
     98      TestPolicyExtremeBandit3(randSeed, new EpsGreedyPolicy(0.1));
     99    }
     100
     101    [TestMethod]
     102    // a unit test to experiment with bandit policies for completing a GP sentence
     103    public void ComparePoliciesSentenceCompletionProblem() {
     104      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
     105      var randSeed = 31415;
     106
     107
     108      Func<Random, IBandit> sentenceCompletionBanditFactory = (banditRandom) => {
     109        var problem = new SymbolicRegressionPoly10Problem();
     110        return new SentenceBandit(banditRandom, problem, "a*b+c*d+e*f+E", 23);
     111      };
     112
     113      // ignore number of arms
     114
     115      // var b = sentenceCompletionBanditFactory(new Random());
     116      // all reference policies (always pulling one arm)
     117      // for (int i = 0; i < b.NumArms; i++) {
     118      //   TestPolicy(randSeed, new SingleArmPolicy(i), sentenceCompletionBanditFactory);
     119      // }
     120
     121      // for the completition of a*b+c*d+e*f+a*g*i+E the arms 12, 15, and 19 are optimal
     122      TestPolicy(randSeed, new SingleArmPolicy(12), sentenceCompletionBanditFactory);
     123
     124      TestPolicy(randSeed, new RandomPolicy(), sentenceCompletionBanditFactory);
     125
     126      TestPolicy(randSeed, new ExtremeHunterPolicy(), sentenceCompletionBanditFactory);
     127      TestPolicy(randSeed, new ExtremeHunterPolicy(D: 0.5), sentenceCompletionBanditFactory);
     128      TestPolicy(randSeed, new UCB1Policy(3), sentenceCompletionBanditFactory);
     129      TestPolicy(randSeed, new UCB1Policy(1), sentenceCompletionBanditFactory);
     130      TestPolicy(randSeed, new UCB1Policy(0.5), sentenceCompletionBanditFactory);
     131      TestPolicy(randSeed, new ThresholdAscentPolicy(), sentenceCompletionBanditFactory);
     132      TestPolicy(randSeed, new EpsGreedyPolicy(0.1), sentenceCompletionBanditFactory);
    55133    }
    56134
     
    206284
    207285    private void TestPolicyBernoulli(int randSeed, int nArms, IBanditPolicy policy) {
    208       TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions));
     286      TestPolicy(randSeed, policy, (banditRandom) => new BernoulliBandit(banditRandom, nArms));
    209287    }
    210288    private void TestPolicyGaussian(int randSeed, int nArms, IBanditPolicy policy) {
    211       TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions));
     289      TestPolicy(randSeed, policy, (banditRandom) => new TruncatedNormalBandit(banditRandom, nArms));
    212290    }
    213291    private void TestPolicyGaussianMixture(int randSeed, int nArms, IBanditPolicy policy) {
    214       TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions));
     292      TestPolicy(randSeed, policy, (banditRandom) => new GaussianMixtureBandit(banditRandom, nArms));
    215293    }
    216294    private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IBanditPolicy policy) {
    217       TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions, 0, 10));
     295      TestPolicy(randSeed, policy, (banditRandom) => new GaussianBandit(banditRandom, nArms, 0, 10));
    218296    }
    219297
    220298    private void TestPolicyExtremeBandit1(int randSeed, IBanditPolicy policy) {
    221       TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 5, 1.1, 2 })); // 3 arms
     299      TestPolicy(randSeed, policy, (banditRandom) => new ParetoBandit(banditRandom, new double[] { 5, 1.1, 2 }));
    222300    }
    223301    private void TestPolicyExtremeBandit2(int randSeed, IBanditPolicy policy) {
    224       TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 1.5, 1.1, 3 }, new double[] { 0.0, 0.8, 0.0 })); // 3 arms
    225     }
    226 
    227 
    228     private void TestPolicy(int randSeed, int nArms, IBanditPolicy policy, Func<Random, int, IBandit> banditFactory) {
    229       var maxIt = 1E4;
     302      TestPolicy(randSeed, policy, (banditRandom) => new ParetoBandit(banditRandom, new double[] { 1.5, 1.1, 3 }, new double[] { 0.0, 0.8, 0.0 }, 0, 1));
     303    }
     304    private void TestPolicyExtremeBandit3(int randSeed, IBanditPolicy policy) {
     305      TestPolicy(randSeed, policy, (banditRandom) => new Bandit(banditRandom, new IModel[]
     306      {
     307        new GammaModel(10, 1),   // exp=10, var=10
     308        new GammaModel(6, 2),    // exp=12, var=24
     309        new GammaModel(3, 3),    // exp= 9, var=27
     310      }, 1, 2));
     311    }
     312
     313
     314    private void TestPolicy(int randSeed, IBanditPolicy policy, Func<Random, IBandit> banditFactory) {
     315      var maxIt = 1E5;
    230316      var reps = 30; // independent runs
    231317      //var regretForIteration = new Dictionary<int, List<double>>();
     
    240326      for (int r = 0; r < reps; r++) {
    241327        var nextLogStep = 1;
    242         var b = banditFactory(banditRandom, nArms);
    243         var totalRegret = 0.0;
    244         var totalPullsOfSuboptimalArmsExp = 0.0;
    245         var totalPullsOfSuboptimalArmsMax = 0.0;
    246         var bestReward = double.NegativeInfinity;
    247         var actionInfos = Enumerable.Range(0, nArms).Select(_ => policy.CreateActionInfo()).ToArray();
    248         for (int i = 0; i <= maxIt; i++) {
     328        var b = banditFactory(banditRandom);
     329        var totalReward = 0.0;
     330        int totalPullsOfOptimalArmExp = 0;
     331        int totalPullsOfOptimalArmMax = 0;
     332        var maxReward = double.NegativeInfinity;
     333        var actionInfos = Enumerable.Range(0, b.NumArms).Select(_ => policy.CreateActionInfo()).ToArray();
     334        for (int i = 0; i <= maxIt + 1; i++) {
    249335          var selectedAction = policy.SelectAction(policyRandom, actionInfos);
    250336          var reward = b.Pull(selectedAction);
     
    252338
    253339          // collect stats
    254           if (selectedAction != b.OptimalExpectedRewardArm) totalPullsOfSuboptimalArmsExp++;
    255           if (selectedAction != b.OptimalMaximalRewardArm) totalPullsOfSuboptimalArmsMax++;
    256           totalRegret += b.OptimalExpectedReward - reward;
    257           bestReward = Math.Max(bestReward, reward);
    258 
    259           if (i + 1 == nextLogStep) {
    260             nextLogStep += 100;
     340          if (selectedAction == b.OptimalExpectedRewardArm) totalPullsOfOptimalArmExp++;
     341          if (selectedAction == b.OptimalMaximalRewardArm) totalPullsOfOptimalArmMax++;
     342          totalReward += reward;
     343          maxReward = Math.Max(maxReward, reward);
     344
     345          if (i == nextLogStep) {
     346            nextLogStep += 500;
    261347            //if (!regretForIteration.ContainsKey(i)) {
    262348            //  regretForIteration.Add(i, new List<double>());
     
    279365            //bestRewardForIteration[i].Add(bestReward);
    280366            Console.WriteLine("{0};{1,8};{2,7:F5};{3,7:F2};{4,7:F2};{5:F2};{6:F2};{7:F2};{8:F2}",
    281               policy, i + 1, totalRegret, totalPullsOfSuboptimalArmsExp, totalPullsOfSuboptimalArmsMax, bestReward,
    282               totalRegret / (i + 1), totalPullsOfSuboptimalArmsExp / (i + 1), totalPullsOfSuboptimalArmsMax / (i + 1));
     367              policy, i, totalReward, totalPullsOfOptimalArmExp, totalPullsOfOptimalArmMax, maxReward,
     368              totalReward / i, totalPullsOfOptimalArmExp / (double)i, totalPullsOfOptimalArmMax / (double)i);
    283369          }
    284370        }
Note: See TracChangeset for help on using the changeset viewer.