Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs @ 11727

Last change on this file since 11727 was 11727, checked in by gkronber, 10 years ago

#2283: worked on grammatical optimization problem solvers (simple MCTS done)

File size: 6.5 KB
Line 
1using System;
2using System.Linq;
3using System.Collections.Generic;
4using System.Globalization;
5using HeuristicLab.Algorithms.Bandits;
6using Microsoft.VisualStudio.TestTools.UnitTesting;
7
8namespace HeuristicLab.Problems.GrammaticalOptimization.Test {
9  [TestClass]
10  public class TestBanditPolicies {
11    [TestMethod]
12    public void ComparePoliciesForBernoulliBandit() {
13      System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
14      var globalRand = new Random(31415);
15      var seedForPolicy = globalRand.Next();
16      var nArms = 10;
17      //Console.WriteLine("Exp3 (gamma=0.01)");
18      //TestPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
19      //Console.WriteLine("Exp3 (gamma=0.05)");
20      //estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
21      Console.WriteLine("Thompson (Bernoulli)");
22      TestPolicyBernoulli(globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
23      Console.WriteLine("Random");
24      TestPolicyBernoulli(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
25      Console.WriteLine("UCB1");
26      TestPolicyBernoulli(globalRand, nArms, new UCB1Policy(nArms));
27      Console.WriteLine("UCB1Tuned");
28      TestPolicyBernoulli(globalRand, nArms, new UCB1TunedPolicy(nArms));
29      Console.WriteLine("UCB1Normal");
30      TestPolicyBernoulli(globalRand, nArms, new UCBNormalPolicy(nArms));
31      Console.WriteLine("Eps(0.01)");
32      TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
33      Console.WriteLine("Eps(0.05)");
34      TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
35      //Console.WriteLine("Eps(0.1)");
36      //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
37      //Console.WriteLine("Eps(0.2)");
38      //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
39      //Console.WriteLine("Eps(0.5)");
40      //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
41    }
42    [TestMethod]
43    public void ComparePoliciesForNormalBandit() {
44      System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
45      var globalRand = new Random(31415);
46      var seedForPolicy = globalRand.Next();
47      var nArms = 10;
48      Console.WriteLine("Thompson (Gaussian)");
49      TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
50      Console.WriteLine("Random");
51      TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
52      Console.WriteLine("UCB1");
53      TestPolicyNormal(globalRand, nArms, new UCB1Policy(nArms));
54      Console.WriteLine("UCB1Tuned");
55      TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(nArms));
56      Console.WriteLine("UCB1Normal");
57      TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(nArms));
58      //Console.WriteLine("Exp3 (gamma=0.01)");
59      //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));
60      //Console.WriteLine("Exp3 (gamma=0.05)");
61      //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));
62      Console.WriteLine("Eps(0.01)");
63      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
64      Console.WriteLine("Eps(0.05)");
65      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
66      //Console.WriteLine("Eps(0.1)");
67      //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
68      //Console.WriteLine("Eps(0.2)");
69      //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
70      //Console.WriteLine("Eps(0.5)");
71      //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
72    }
73
74    private void TestPolicyBernoulli(Random globalRand, int nArms, IPolicy policy) {
75      var maxIt = 1E6;
76      var reps = 10; // 10 independent runs
77      var avgRegretForIteration = new Dictionary<int, double>();
78      // calculate statistics
79      for (int r = 0; r < reps; r++) {
80        var nextLogStep = 1;
81        var b = new BernoulliBandit(new Random(globalRand.Next()), 10);
82        policy.Reset();
83        var totalRegret = 0.0;
84
85        for (int i = 0; i <= maxIt; i++) {
86          var selectedAction = policy.SelectAction();
87          var reward = b.Pull(selectedAction);
88          totalRegret += b.OptimalExpectedReward - reward;
89          policy.UpdateReward(selectedAction, reward);
90          if (i == nextLogStep) {
91            nextLogStep *= 10;
92            if (!avgRegretForIteration.ContainsKey(i)) {
93              avgRegretForIteration.Add(i, 0.0);
94            }
95            avgRegretForIteration[i] += totalRegret / i;
96          }
97        }
98      }
99      // print
100      foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) {
101        Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret
102      }
103    }
104    private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) {
105      var maxIt = 1E6;
106      var reps = 10; // 10 independent runs
107      var avgRegretForIteration = new Dictionary<int, double>();
108      // calculate statistics
109      for (int r = 0; r < reps; r++) {
110        var nextLogStep = 1;
111        var b = new TruncatedNormalBandit(new Random(globalRand.Next()), 10);
112        policy.Reset();
113        var totalRegret = 0.0;
114
115        for (int i = 0; i <= maxIt; i++) {
116          var selectedAction = policy.SelectAction();
117          var reward = b.Pull(selectedAction);
118          totalRegret += b.OptimalExpectedReward - reward;
119          policy.UpdateReward(selectedAction, reward);
120          if (i == nextLogStep) {
121            nextLogStep *= 10;
122            if (!avgRegretForIteration.ContainsKey(i)) {
123              avgRegretForIteration.Add(i, 0.0);
124            }
125            avgRegretForIteration[i] += totalRegret / i;
126          }
127        }
128      }
129      // print
130      foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) {
131        Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret
132      }
133    }
134
135  }
136}
Note: See TracBrowser for help on using the repository browser.