Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Test/TestBanditPolicies.cs @ 12893

Visit:

Last change on this file since 12893 was 12893, checked in by gkronber, 9 years ago
#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)
File size: 26.3 KB

Line
1	using System;
2	using System.Linq;
3	using System.Collections.Generic;
4	using System.Globalization;
5	using HeuristicLab.Algorithms.Bandits;
6	using HeuristicLab.Algorithms.Bandits.BanditPolicies;
7	using HeuristicLab.Algorithms.Bandits.Models;
8	using Microsoft.VisualStudio.TestTools.UnitTesting;
9
10	namespace HeuristicLab.Problems.GrammaticalOptimization.Test {
11	[TestClass]
12	public class TestBanditPolicies {
13	[TestMethod]
14	public void ComparePoliciesForGaussianUnknownVarianceBandit() {
15	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
16	var randSeed = 31415;
17	var nArms = 20;
18
19	// some of the policies are specific to rewards in [0..1], e.g. Treshold Ascent or UCB1
20	TestPolicyGaussianUnknownVariance(randSeed, nArms, new ExtremeHunterPolicy());
21	TestPolicyGaussianUnknownVariance(randSeed, nArms, new IntervalEstimationPolicy());
22	//TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBPolicy(10));
23	TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy());
24	TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1TunedPolicy());
25	TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1Policy(10));
26	TestPolicyGaussianUnknownVariance(randSeed, nArms, new ActiveLearningPolicy(10));
27	TestPolicyGaussianUnknownVariance(randSeed, nArms, new ChernoffIntervalEstimationPolicy());
28	TestPolicyGaussianUnknownVariance(randSeed, nArms, new BoltzmannExplorationPolicy(100));
29	TestPolicyGaussianUnknownVariance(randSeed, nArms, new EpsGreedyPolicy(0.1));
30	TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy());
31	}
32
33	[TestMethod]
34	// test case I as described in Extreme Bandits paper
35	public void ComparePoliciesExtremeBandits1() {
36	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
37	var randSeed = 31415;
38	TestPolicyExtremeBandit1(randSeed, new RandomPolicy());
39	TestPolicyExtremeBandit1(randSeed, new SingleArmPolicy(1));
40	TestPolicyExtremeBandit1(randSeed, new ExtremeHunterPolicy());
41	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10000));
42	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(1000));
43	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(100));
44	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10));
45	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(2));
46	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(1));
47	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(0.5));
48	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(0.1));
49	TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.1));
50	TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.05));
51	TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.01));
52	}
53
54	[TestMethod]
55	// test case II as described in Extreme Bandits paper
56	public void ComparePoliciesExtremeBandits2() {
57	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
58	var randSeed = 31415;
59	//TestPolicyExtremeBandit2(randSeed, new RandomPolicy());
60	//TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(0));
61	//TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(1));
62	//TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(2));
63	// TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy());
64	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 1, minPulls: 30));
65	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 2, minPulls: 30));
66	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 0.5, minPulls: 30));
67	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 5, minPulls: 30));
68	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 1, minPulls: 100));
69	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 2, minPulls: 100));
70	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 0.5, minPulls: 100));
71	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 5, minPulls: 100));
72	// TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10000));
73	//TestPolicyExtremeBandit2(randSeed, new UCB1Policy(1000));
74	//TestPolicyExtremeBandit2(randSeed, new UCB1Policy(100));
75	//TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10));
76	//TestPolicyExtremeBandit2(randSeed, new UCB1Policy(2));
77	//TestPolicyExtremeBandit2(randSeed, new UCB1Policy(1));
78	//TestPolicyExtremeBandit2(randSeed, new UCB1Policy(0.5));
79	//TestPolicyExtremeBandit2(randSeed, new UCB1Policy(0.1));
80	//TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.1));
81	//TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.05));
82	//TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.01));
83	//TestPolicyExtremeBandit2(randSeed, new ThresholdAscentPolicy());
84	}
85
86	[TestMethod]
87	// my own test case for ExtremeHunter
88	// using truncated normal distributions
89	public void ComparePoliciesExtremeBandits3() {
90	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
91	var randSeed = 31415;
92	TestPolicyExtremeBandit3(randSeed, new RandomPolicy());
93	TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(0));
94	TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(1));
95	TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(2));
96	TestPolicyExtremeBandit3(randSeed, new ExtremeHunterPolicy());
97	TestPolicyExtremeBandit3(randSeed, new UCB1Policy(3));
98	TestPolicyExtremeBandit3(randSeed, new EpsGreedyPolicy(0.1));
99	}
100
101	[TestMethod]
102	// a unit test to experiment with bandit policies for completing a GP sentence
103	public void ComparePoliciesSentenceCompletionProblem() {
104	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
105	var randSeed = 31415;
106
107
108	Func<Random, IBandit> sentenceCompletionBanditFactory = (banditRandom) => {
109	var problem = new SymbolicRegressionPoly10Problem();
110	return new SentenceBandit(banditRandom, problem, "ab+cd+e*f+E", 23);
111	};
112
113	// ignore number of arms
114
115	// var b = sentenceCompletionBanditFactory(new Random());
116	// all reference policies (always pulling one arm)
117	// for (int i = 0; i < b.NumArms; i++) {
118	// TestPolicy(randSeed, new SingleArmPolicy(i), sentenceCompletionBanditFactory);
119	// }
120
121	// for the completition of ab+cd+ef+ag*i+E the arms 12, 15, and 19 are optimal
122	TestPolicy(randSeed, new SingleArmPolicy(12), sentenceCompletionBanditFactory);
123
124	TestPolicy(randSeed, new RandomPolicy(), sentenceCompletionBanditFactory);
125
126	TestPolicy(randSeed, new ExtremeHunterPolicy(), sentenceCompletionBanditFactory);
127	TestPolicy(randSeed, new ExtremeHunterPolicy(D: 0.5), sentenceCompletionBanditFactory);
128	TestPolicy(randSeed, new UCB1Policy(3), sentenceCompletionBanditFactory);
129	TestPolicy(randSeed, new UCB1Policy(1), sentenceCompletionBanditFactory);
130	TestPolicy(randSeed, new UCB1Policy(0.5), sentenceCompletionBanditFactory);
131	TestPolicy(randSeed, new ThresholdAscentPolicy(), sentenceCompletionBanditFactory);
132	TestPolicy(randSeed, new EpsGreedyPolicy(0.1), sentenceCompletionBanditFactory);
133	}
134
135	[TestMethod]
136	public void ComparePoliciesForBernoulliBandit() {
137	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
138	var randSeed = 31415;
139	var nArms = 20;
140	//Console.WriteLine("Exp3 (gamma=0.01)");
141	//TestPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
142	//Console.WriteLine("Exp3 (gamma=0.05)");
143	//estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
144	Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new BernoulliThompsonSamplingPolicy());
145	Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new GenericThompsonSamplingPolicy(new BernoulliModel()));
146	Console.WriteLine("Random");
147	TestPolicyBernoulli(randSeed, nArms, new RandomPolicy());
148	Console.WriteLine("UCB1");
149	TestPolicyBernoulli(randSeed, nArms, new UCB1Policy());
150	Console.WriteLine("UCB1Tuned");
151	TestPolicyBernoulli(randSeed, nArms, new UCB1TunedPolicy());
152	Console.WriteLine("UCB1Normal");
153	TestPolicyBernoulli(randSeed, nArms, new UCBNormalPolicy());
154	Console.WriteLine("Eps(0.01)");
155	TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.01));
156	Console.WriteLine("Eps(0.05)");
157	TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.05));
158	//Console.WriteLine("Eps(0.1)");
159	//TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.1));
160	//Console.WriteLine("Eps(0.2)");
161	//TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.2));
162	//Console.WriteLine("Eps(0.5)");
163	//TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.5));
164	Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.1));
165	Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.5));
166	Console.WriteLine("UCT(1) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(1));
167	Console.WriteLine("UCT(2) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(2));
168	Console.WriteLine("UCT(5) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(5));
169	Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.1));
170	Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.5));
171	Console.WriteLine("BoltzmannExploration(1) "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(1));
172	Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(10));
173	Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(100));
174	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.01));
175	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.05));
176	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.1));
177
178	// not applicable to bernoulli rewards
179	//Console.WriteLine("ThresholdAscent(10, 0.01) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
180	//Console.WriteLine("ThresholdAscent(10, 0.05) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
181	//Console.WriteLine("ThresholdAscent(10, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
182	//Console.WriteLine("ThresholdAscent(100, 0.01) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
183	//Console.WriteLine("ThresholdAscent(100, 0.05) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
184	//Console.WriteLine("ThresholdAscent(100, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
185	//Console.WriteLine("ThresholdAscent(1000, 0.01)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
186	//Console.WriteLine("ThresholdAscent(1000, 0.05)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
187	//Console.WriteLine("ThresholdAscent(1000, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
188	}
189
190	[TestMethod]
191	public void ComparePoliciesForGaussianBandit() {
192	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
193
194	var randSeed = 31415;
195	var nArms = 20;
196	Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01));
197	Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01));
198	Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01));
199	Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01));
200	Console.WriteLine("Generic Thompson (Gaussian fixed var)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)));
201	Console.WriteLine("Generic Thompson (Gaussian unknown var)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1)));
202	Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
203	Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy());
204
205	/*
206	Console.WriteLine("Random"); TestPolicyNormal(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
207	Console.WriteLine("UCB1"); TestPolicyNormal(randSeed, nArms, new UCB1Policy(nArms));
208	Console.WriteLine("UCB1Tuned"); TestPolicyNormal(randSeed, nArms, new UCB1TunedPolicy(nArms));
209	Console.WriteLine("UCB1Normal"); TestPolicyNormal(randSeed, nArms, new UCBNormalPolicy(nArms));
210	//Console.WriteLine("Exp3 (gamma=0.01)");
211	//TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));
212	//Console.WriteLine("Exp3 (gamma=0.05)");
213	//TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));
214	Console.WriteLine("Eps(0.01)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
215	Console.WriteLine("Eps(0.05)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
216	//Console.WriteLine("Eps(0.1)");
217	//TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
218	//Console.WriteLine("Eps(0.2)");
219	//TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
220	//Console.WriteLine("Eps(0.5)");
221	//TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
222	Console.WriteLine("UCT(0.1)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.1));
223	Console.WriteLine("UCT(0.5)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.5));
224	Console.WriteLine("UCT(1) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 1));
225	Console.WriteLine("UCT(2) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 2));
226	Console.WriteLine("UCT(5) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 5));
227	Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
228	Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
229	Console.WriteLine("BoltzmannExploration(1) "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
230	Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
231	Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
232	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
233	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
234	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));
235	Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
236	Console.WriteLine("ThresholdAscent(10,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
237	Console.WriteLine("ThresholdAscent(10,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
238	Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
239	Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
240	Console.WriteLine("ThresholdAscent(100,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
241	Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
242	Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
243	Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
244	*/
245	}
246
247	[TestMethod]
248	public void ComparePoliciesForGaussianMixtureBandit() {
249	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
250	var randSeed = 31415;
251	var nArms = 20;
252
253	Console.WriteLine("Generic Thompson (Gaussian Mixture)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianMixtureModel()));
254	// Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01));
255	// Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01));
256	// Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01));
257	// Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01));
258	// Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
259	// Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy());
260	// Console.WriteLine("Generic Thompson (Gaussian fixed variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 0.1)));
261	// Console.WriteLine("Generic Thompson (Gaussian unknown variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1)));
262
263	/*
264	Console.WriteLine("Random"); TestPolicyGaussianMixture(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
265	Console.WriteLine("UCB1"); TestPolicyGaussianMixture(randSeed, nArms, new UCB1Policy(nArms));
266	Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture(randSeed, nArms, new UCB1TunedPolicy(nArms));
267	Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture(randSeed, nArms, new UCBNormalPolicy(nArms));
268	Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
269	Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
270	Console.WriteLine("UCT(1) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 1));
271	Console.WriteLine("UCT(2) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 2));
272	Console.WriteLine("UCT(5) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 5));
273	Console.WriteLine("BoltzmannExploration(1) "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
274	Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
275	Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
276
277	Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
278	Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
279	Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
280	Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01));
281	*/
282	}
283
284
285	private void TestPolicyBernoulli(int randSeed, int nArms, IBanditPolicy policy) {
286	TestPolicy(randSeed, policy, (banditRandom) => new BernoulliBandit(banditRandom, nArms));
287	}
288	private void TestPolicyGaussian(int randSeed, int nArms, IBanditPolicy policy) {
289	TestPolicy(randSeed, policy, (banditRandom) => new TruncatedNormalBandit(banditRandom, nArms));
290	}
291	private void TestPolicyGaussianMixture(int randSeed, int nArms, IBanditPolicy policy) {
292	TestPolicy(randSeed, policy, (banditRandom) => new GaussianMixtureBandit(banditRandom, nArms));
293	}
294	private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IBanditPolicy policy) {
295	TestPolicy(randSeed, policy, (banditRandom) => new GaussianBandit(banditRandom, nArms, 0, 10));
296	}
297
298	private void TestPolicyExtremeBandit1(int randSeed, IBanditPolicy policy) {
299	TestPolicy(randSeed, policy, (banditRandom) => new ParetoBandit(banditRandom, new double[] { 5, 1.1, 2 }));
300	}
301	private void TestPolicyExtremeBandit2(int randSeed, IBanditPolicy policy) {
302	TestPolicy(randSeed, policy, (banditRandom) => new ParetoBandit(banditRandom, new double[] { 1.5, 1.1, 3 }, new double[] { 0.0, 0.8, 0.0 }, 0, 1));
303	}
304	private void TestPolicyExtremeBandit3(int randSeed, IBanditPolicy policy) {
305	TestPolicy(randSeed, policy, (banditRandom) => new Bandit(banditRandom, new IModel[]
306	{
307	new GammaModel(10, 1), // exp=10, var=10
308	new GammaModel(6, 2), // exp=12, var=24
309	new GammaModel(3, 3), // exp= 9, var=27
310	}, 1, 2));
311	}
312
313
314	private void TestPolicy(int randSeed, IBanditPolicy policy, Func<Random, IBandit> banditFactory) {
315	var maxIt = 1E5;
316	var reps = 30; // independent runs
317	//var regretForIteration = new Dictionary<int, List<double>>();
318	//var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>();
319	//var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>();
320	//var bestRewardForIteration = new Dictionary<int, List<double>>();
321	var globalRandom = new Random(randSeed);
322	var banditRandom = new Random(globalRandom.Next()); // bandits must produce the same rewards for each test
323	var policyRandom = new Random(globalRandom.Next());
324
325	// calculate statistics
326	for (int r = 0; r < reps; r++) {
327	var nextLogStep = 1;
328	var b = banditFactory(banditRandom);
329	var totalReward = 0.0;
330	int totalPullsOfOptimalArmExp = 0;
331	int totalPullsOfOptimalArmMax = 0;
332	var maxReward = double.NegativeInfinity;
333	var actionInfos = Enumerable.Range(0, b.NumArms).Select(_ => policy.CreateActionInfo()).ToArray();
334	for (int i = 0; i <= maxIt + 1; i++) {
335	var selectedAction = policy.SelectAction(policyRandom, actionInfos);
336	var reward = b.Pull(selectedAction);
337	actionInfos[selectedAction].UpdateReward(reward);
338
339	// collect stats
340	if (selectedAction == b.OptimalExpectedRewardArm) totalPullsOfOptimalArmExp++;
341	if (selectedAction == b.OptimalMaximalRewardArm) totalPullsOfOptimalArmMax++;
342	totalReward += reward;
343	maxReward = Math.Max(maxReward, reward);
344
345	if (i == nextLogStep) {
346	nextLogStep += 500;
347	//if (!regretForIteration.ContainsKey(i)) {
348	// regretForIteration.Add(i, new List<double>());
349	//}
350	//regretForIteration[i].Add(totalRegret / i);
351	//
352	//if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) {
353	// numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0);
354	//}
355	//numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp;
356	//
357	//if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) {
358	// numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0);
359	//}
360	//numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax;
361	//
362	//if (!bestRewardForIteration.ContainsKey(i)) {
363	// bestRewardForIteration.Add(i, new List<double>());
364	//}
365	//bestRewardForIteration[i].Add(bestReward);
366	Console.WriteLine("{0};{1,8};{2,7:F5};{3,7:F2};{4,7:F2};{5:F2};{6:F2};{7:F2};{8:F2}",
367	policy, i, totalReward, totalPullsOfOptimalArmExp, totalPullsOfOptimalArmMax, maxReward,
368	totalReward / i, totalPullsOfOptimalArmExp / (double)i, totalPullsOfOptimalArmMax / (double)i);
369	}
370	}
371	}
372	// print
373	//foreach (var p in regretForIteration.Keys.OrderBy(k => k)) {
374	// Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2} max rewards: {6}",
375	// p,
376	// regretForIteration[p].Average(),
377	// regretForIteration[p].Min(),
378	// regretForIteration[p].Max(),
379	// numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps,
380	// numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps,
381	// string.Join(" ", bestRewardForIteration[p])
382	// );
383	//}
384	}
385
386	}
387	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences