Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs @ 11727

Visit:

Last change on this file since 11727 was 11727, checked in by gkronber, 10 years ago
#2283: worked on grammatical optimization problem solvers (simple MCTS done)
File size: 6.5 KB

Line
1	using System;
2	using System.Linq;
3	using System.Collections.Generic;
4	using System.Globalization;
5	using HeuristicLab.Algorithms.Bandits;
6	using Microsoft.VisualStudio.TestTools.UnitTesting;
7
8	namespace HeuristicLab.Problems.GrammaticalOptimization.Test {
9	[TestClass]
10	public class TestBanditPolicies {
11	[TestMethod]
12	public void ComparePoliciesForBernoulliBandit() {
13	System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
14	var globalRand = new Random(31415);
15	var seedForPolicy = globalRand.Next();
16	var nArms = 10;
17	//Console.WriteLine("Exp3 (gamma=0.01)");
18	//TestPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
19	//Console.WriteLine("Exp3 (gamma=0.05)");
20	//estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
21	Console.WriteLine("Thompson (Bernoulli)");
22	TestPolicyBernoulli(globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
23	Console.WriteLine("Random");
24	TestPolicyBernoulli(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
25	Console.WriteLine("UCB1");
26	TestPolicyBernoulli(globalRand, nArms, new UCB1Policy(nArms));
27	Console.WriteLine("UCB1Tuned");
28	TestPolicyBernoulli(globalRand, nArms, new UCB1TunedPolicy(nArms));
29	Console.WriteLine("UCB1Normal");
30	TestPolicyBernoulli(globalRand, nArms, new UCBNormalPolicy(nArms));
31	Console.WriteLine("Eps(0.01)");
32	TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
33	Console.WriteLine("Eps(0.05)");
34	TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
35	//Console.WriteLine("Eps(0.1)");
36	//TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
37	//Console.WriteLine("Eps(0.2)");
38	//TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
39	//Console.WriteLine("Eps(0.5)");
40	//TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
41	}
42	[TestMethod]
43	public void ComparePoliciesForNormalBandit() {
44	System.Threading.Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
45	var globalRand = new Random(31415);
46	var seedForPolicy = globalRand.Next();
47	var nArms = 10;
48	Console.WriteLine("Thompson (Gaussian)");
49	TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
50	Console.WriteLine("Random");
51	TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
52	Console.WriteLine("UCB1");
53	TestPolicyNormal(globalRand, nArms, new UCB1Policy(nArms));
54	Console.WriteLine("UCB1Tuned");
55	TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(nArms));
56	Console.WriteLine("UCB1Normal");
57	TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(nArms));
58	//Console.WriteLine("Exp3 (gamma=0.01)");
59	//TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));
60	//Console.WriteLine("Exp3 (gamma=0.05)");
61	//TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));
62	Console.WriteLine("Eps(0.01)");
63	TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
64	Console.WriteLine("Eps(0.05)");
65	TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
66	//Console.WriteLine("Eps(0.1)");
67	//TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
68	//Console.WriteLine("Eps(0.2)");
69	//TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
70	//Console.WriteLine("Eps(0.5)");
71	//TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
72	}
73
74	private void TestPolicyBernoulli(Random globalRand, int nArms, IPolicy policy) {
75	var maxIt = 1E6;
76	var reps = 10; // 10 independent runs
77	var avgRegretForIteration = new Dictionary<int, double>();
78	// calculate statistics
79	for (int r = 0; r < reps; r++) {
80	var nextLogStep = 1;
81	var b = new BernoulliBandit(new Random(globalRand.Next()), 10);
82	policy.Reset();
83	var totalRegret = 0.0;
84
85	for (int i = 0; i <= maxIt; i++) {
86	var selectedAction = policy.SelectAction();
87	var reward = b.Pull(selectedAction);
88	totalRegret += b.OptimalExpectedReward - reward;
89	policy.UpdateReward(selectedAction, reward);
90	if (i == nextLogStep) {
91	nextLogStep *= 10;
92	if (!avgRegretForIteration.ContainsKey(i)) {
93	avgRegretForIteration.Add(i, 0.0);
94	}
95	avgRegretForIteration[i] += totalRegret / i;
96	}
97	}
98	}
99	// print
100	foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) {
101	Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret
102	}
103	}
104	private void TestPolicyNormal(Random globalRand, int nArms, IPolicy policy) {
105	var maxIt = 1E6;
106	var reps = 10; // 10 independent runs
107	var avgRegretForIteration = new Dictionary<int, double>();
108	// calculate statistics
109	for (int r = 0; r < reps; r++) {
110	var nextLogStep = 1;
111	var b = new TruncatedNormalBandit(new Random(globalRand.Next()), 10);
112	policy.Reset();
113	var totalRegret = 0.0;
114
115	for (int i = 0; i <= maxIt; i++) {
116	var selectedAction = policy.SelectAction();
117	var reward = b.Pull(selectedAction);
118	totalRegret += b.OptimalExpectedReward - reward;
119	policy.UpdateReward(selectedAction, reward);
120	if (i == nextLogStep) {
121	nextLogStep *= 10;
122	if (!avgRegretForIteration.ContainsKey(i)) {
123	avgRegretForIteration.Add(i, 0.0);
124	}
125	avgRegretForIteration[i] += totalRegret / i;
126	}
127	}
128	}
129	// print
130	foreach (var p in avgRegretForIteration.Keys.OrderBy(k => k)) {
131	Console.WriteLine("{0} {1}", p, avgRegretForIteration[p] / reps); // print avg. of avg. regret
132	}
133	}
134
135	}
136	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences