1 | using System;
|
---|
2 | using System.Linq;
|
---|
3 | using System.Collections.Generic;
|
---|
4 | using System.Globalization;
|
---|
5 | using HeuristicLab.Algorithms.Bandits;
|
---|
6 | using Microsoft.VisualStudio.TestTools.UnitTesting;
|
---|
7 |
|
---|
8 | namespace HeuristicLab.Problems.GrammaticalOptimization.Test {
|
---|
9 | [TestClass]
|
---|
10 | public class TestBanditPolicies {
|
---|
11 | [TestMethod]
|
---|
12 | public void ComparePoliciesForBernoulliBandit() {
|
---|
13 | System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
|
---|
14 | var globalRand = new Random(31415);
|
---|
15 | var seedForPolicy = globalRand.Next();
|
---|
16 | var nArms = 10;
|
---|
17 | //Console.WriteLine("Exp3 (gamma=0.01)");
|
---|
18 | //TestPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
|
---|
19 | //Console.WriteLine("Exp3 (gamma=0.05)");
|
---|
20 | //estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
|
---|
21 | Console.WriteLine("Thompson (Bernoulli)");
|
---|
22 | TestPolicyBernoulli(globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
|
---|
23 | Console.WriteLine("Random");
|
---|
24 | TestPolicyBernoulli(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
|
---|
25 | Console.WriteLine("UCB1");
|
---|
26 | TestPolicyBernoulli(globalRand, nArms, new UCB1Policy(nArms));
|
---|
27 | Console.WriteLine("UCB1Tuned");
|
---|
28 | TestPolicyBernoulli(globalRand, nArms, new UCB1TunedPolicy(nArms));
|
---|
29 | Console.WriteLine("UCB1Normal");
|
---|
30 | TestPolicyBernoulli(globalRand, nArms, new UCBNormalPolicy(nArms));
|
---|
31 | Console.WriteLine("Eps(0.01)");
|
---|
32 | TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
|
---|
33 | Console.WriteLine("Eps(0.05)");
|
---|
34 | TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
|
---|
35 | //Console.WriteLine("Eps(0.1)");
|
---|
36 | //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
|
---|
37 | //Console.WriteLine("Eps(0.2)");
|
---|
38 | //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
|
---|
39 | //Console.WriteLine("Eps(0.5)");
|
---|
40 | //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
|
---|
41 | }
|
---|
42 | [TestMethod]
|
---|
43 | public void ComparePoliciesForNormalBandit() {
|
---|
44 | System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
|
---|
45 | var globalRand = new Random(31415);
|
---|
46 | var seedForPolicy = globalRand.Next();
|
---|
47 | var nArms = 10;
|
---|
48 | Console.WriteLine("Thompson (Gaussian)");
|
---|
49 | TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
|
---|
50 | Console.WriteLine("Random");
|
---|
51 | TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
|
---|
52 | Console.WriteLine("UCB1");
|
---|
53 | TestPolicyNormal(globalRand, nArms, new UCB1Policy(nArms));
|
---|
54 | Console.WriteLine("UCB1Tuned");
|
---|
55 | TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(nArms));
|
---|
56 | Console.WriteLine("UCB1Normal");
|
---|
57 | TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(nArms));
|
---|
58 | //Console.WriteLine("Exp3 (gamma=0.01)");
|
---|
59 | //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));
|
---|
60 | //Console.WriteLine("Exp3 (gamma=0.05)");
|
---|
61 | //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));
|
---|
62 | Console.WriteLine("Eps(0.01)");
|
---|
63 | TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
|
---|
64 | Console.WriteLine("Eps(0.05)");
|
---|
65 | TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
|
---|
66 | //Console.WriteLine("Eps(0.1)");
|
---|
67 | //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
|
---|
68 | //Console.WriteLine("Eps(0.2)");
|
---|
69 | //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
|
---|
70 | //Console.WriteLine("Eps(0.5)");
|
---|
71 | //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
|
---|
72 | }
|
---|
73 |
|
---|
74 | private void TestPolicyBernoulli(Random globalRand, int nArms, IPolicy policy) {
|
---|
75 | var maxIt = 1E6;
|
---|
76 | var reps = 10; // 10 independent runs
|
---|
77 | var avgRegretForIteration = new Dictionary<int, double>();
|
---|
78 | // calculate statistics
|
---|
79 | for (int r = 0; r < reps; r++) {
|
---|
80 | var nextLogStep = 1;
|
---|
81 | var b = new BernoulliBandit(new Random(globalRand.Next()), 10);
|
---|
82 | policy.Reset();
|
---|
83 | var totalRegret = 0.0;
|
---|
84 |
|
---|
85 | for (int i = 0; i <= maxIt; i++) {
|
---|
86 | var selectedAction = policy.SelectAction();
|
---|
87 | var reward = b.Pull(selectedAction);
|
---|
88 | totalRegret += b.OptimalExpectedReward - reward;
|
---|
89 | policy.UpdateReward(selectedAction, reward);
|
---|
90 | if (i == nextLogStep) {
|
---|
91 | nextLogStep *= 10;
|
---|
92 | if (!avgRegretForIteration.ContainsKey(i)) {
|
---|
93 | avgRegretForIteration.Add(i, 0.0);
|
---|
94 | }
|
---|
95 | avgRegretForIteration[i] += totalRegret / i;
|
---|
96 | }
|
---|
97 | }
|
---|
98 | }
|
---|
99 | // print
|
---|
100 | foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) {
|
---|
101 | Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret
|
---|
102 | }
|
---|
103 | }
|
---|
104 | private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) {
|
---|
105 | var maxIt = 1E6;
|
---|
106 | var reps = 10; // 10 independent runs
|
---|
107 | var avgRegretForIteration = new Dictionary<int, double>();
|
---|
108 | // calculate statistics
|
---|
109 | for (int r = 0; r < reps; r++) {
|
---|
110 | var nextLogStep = 1;
|
---|
111 | var b = new TruncatedNormalBandit(new Random(globalRand.Next()), 10);
|
---|
112 | policy.Reset();
|
---|
113 | var totalRegret = 0.0;
|
---|
114 |
|
---|
115 | for (int i = 0; i <= maxIt; i++) {
|
---|
116 | var selectedAction = policy.SelectAction();
|
---|
117 | var reward = b.Pull(selectedAction);
|
---|
118 | totalRegret += b.OptimalExpectedReward - reward;
|
---|
119 | policy.UpdateReward(selectedAction, reward);
|
---|
120 | if (i == nextLogStep) {
|
---|
121 | nextLogStep *= 10;
|
---|
122 | if (!avgRegretForIteration.ContainsKey(i)) {
|
---|
123 | avgRegretForIteration.Add(i, 0.0);
|
---|
124 | }
|
---|
125 | avgRegretForIteration[i] += totalRegret / i;
|
---|
126 | }
|
---|
127 | }
|
---|
128 | }
|
---|
129 | // print
|
---|
130 | foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) {
|
---|
131 | Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret
|
---|
132 | }
|
---|
133 | }
|
---|
134 |
|
---|
135 | }
|
---|
136 | }
|
---|