Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Test/TestBanditPolicies.cs @ 12876

Visit:

Last change on this file since 12876 was 12876, checked in by gkronber, 9 years ago
#2283: implemented first crude version of extreme hunter algorithm in branch
File size: 21.7 KB

Line
1	using System;
2	using System.Linq;
3	using System.Collections.Generic;
4	using System.Globalization;
5	using HeuristicLab.Algorithms.Bandits;
6	using HeuristicLab.Algorithms.Bandits.BanditPolicies;
7	using HeuristicLab.Algorithms.Bandits.Models;
8	using Microsoft.VisualStudio.TestTools.UnitTesting;
9
10	namespace HeuristicLab.Problems.GrammaticalOptimization.Test {
11	[TestClass]
12	public class TestBanditPolicies {
13	[TestMethod]
14	public void ComparePoliciesForGaussianUnknownVarianceBandit() {
15	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
16	var randSeed = 31415;
17	var nArms = 20;
18
19	// some of the policies are specific to rewards in [0..1], e.g. Treshold Ascent or UCB1
20	TestPolicyGaussianUnknownVariance(randSeed, nArms, new ExtremeHunterPolicy());
21	TestPolicyGaussianUnknownVariance(randSeed, nArms, new IntervalEstimationPolicy());
22	//TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBPolicy(10));
23	TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy());
24	TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1TunedPolicy());
25	TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1Policy(10));
26	TestPolicyGaussianUnknownVariance(randSeed, nArms, new ActiveLearningPolicy(10));
27	TestPolicyGaussianUnknownVariance(randSeed, nArms, new ChernoffIntervalEstimationPolicy());
28	TestPolicyGaussianUnknownVariance(randSeed, nArms, new BoltzmannExplorationPolicy(100));
29	TestPolicyGaussianUnknownVariance(randSeed, nArms, new EpsGreedyPolicy(0.1));
30	TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy());
31	}
32
33	[TestMethod]
34	// test case I as described in Extreme Bandits paper
35	public void ComparePoliciesExtremeBandits1() {
36	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
37	var randSeed = 31415;
38	TestPolicyExtremeBandit1(randSeed, new RandomPolicy());
39	TestPolicyExtremeBandit1(randSeed, new ExtremeHunterPolicy());
40	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10000));
41	TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.1));
42	// TestPolicyExtremeBandit1(randSeed, new ThresholdAscentPolicy());
43	}
44
45	[TestMethod]
46	// test case II as described in Extreme Bandits paper
47	public void ComparePoliciesExtremeBandits2() {
48	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
49	var randSeed = 31415;
50	TestPolicyExtremeBandit2(randSeed, new RandomPolicy());
51	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy());
52	TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10000));
53	TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.1));
54	// TestPolicyExtremeBandit2(randSeed, new ThresholdAscentPolicy());
55	}
56
57	[TestMethod]
58	public void ComparePoliciesForBernoulliBandit() {
59	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
60	var randSeed = 31415;
61	var nArms = 20;
62	//Console.WriteLine("Exp3 (gamma=0.01)");
63	//TestPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
64	//Console.WriteLine("Exp3 (gamma=0.05)");
65	//estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
66	Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new BernoulliThompsonSamplingPolicy());
67	Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new GenericThompsonSamplingPolicy(new BernoulliModel()));
68	Console.WriteLine("Random");
69	TestPolicyBernoulli(randSeed, nArms, new RandomPolicy());
70	Console.WriteLine("UCB1");
71	TestPolicyBernoulli(randSeed, nArms, new UCB1Policy());
72	Console.WriteLine("UCB1Tuned");
73	TestPolicyBernoulli(randSeed, nArms, new UCB1TunedPolicy());
74	Console.WriteLine("UCB1Normal");
75	TestPolicyBernoulli(randSeed, nArms, new UCBNormalPolicy());
76	Console.WriteLine("Eps(0.01)");
77	TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.01));
78	Console.WriteLine("Eps(0.05)");
79	TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.05));
80	//Console.WriteLine("Eps(0.1)");
81	//TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.1));
82	//Console.WriteLine("Eps(0.2)");
83	//TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.2));
84	//Console.WriteLine("Eps(0.5)");
85	//TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.5));
86	Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.1));
87	Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.5));
88	Console.WriteLine("UCT(1) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(1));
89	Console.WriteLine("UCT(2) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(2));
90	Console.WriteLine("UCT(5) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(5));
91	Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.1));
92	Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.5));
93	Console.WriteLine("BoltzmannExploration(1) "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(1));
94	Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(10));
95	Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(100));
96	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.01));
97	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.05));
98	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.1));
99
100	// not applicable to bernoulli rewards
101	//Console.WriteLine("ThresholdAscent(10, 0.01) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
102	//Console.WriteLine("ThresholdAscent(10, 0.05) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
103	//Console.WriteLine("ThresholdAscent(10, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
104	//Console.WriteLine("ThresholdAscent(100, 0.01) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
105	//Console.WriteLine("ThresholdAscent(100, 0.05) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
106	//Console.WriteLine("ThresholdAscent(100, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
107	//Console.WriteLine("ThresholdAscent(1000, 0.01)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
108	//Console.WriteLine("ThresholdAscent(1000, 0.05)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
109	//Console.WriteLine("ThresholdAscent(1000, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
110	}
111
112	[TestMethod]
113	public void ComparePoliciesForGaussianBandit() {
114	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
115
116	var randSeed = 31415;
117	var nArms = 20;
118	Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01));
119	Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01));
120	Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01));
121	Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01));
122	Console.WriteLine("Generic Thompson (Gaussian fixed var)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)));
123	Console.WriteLine("Generic Thompson (Gaussian unknown var)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1)));
124	Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
125	Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy());
126
127	/*
128	Console.WriteLine("Random"); TestPolicyNormal(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
129	Console.WriteLine("UCB1"); TestPolicyNormal(randSeed, nArms, new UCB1Policy(nArms));
130	Console.WriteLine("UCB1Tuned"); TestPolicyNormal(randSeed, nArms, new UCB1TunedPolicy(nArms));
131	Console.WriteLine("UCB1Normal"); TestPolicyNormal(randSeed, nArms, new UCBNormalPolicy(nArms));
132	//Console.WriteLine("Exp3 (gamma=0.01)");
133	//TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));
134	//Console.WriteLine("Exp3 (gamma=0.05)");
135	//TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));
136	Console.WriteLine("Eps(0.01)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
137	Console.WriteLine("Eps(0.05)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
138	//Console.WriteLine("Eps(0.1)");
139	//TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
140	//Console.WriteLine("Eps(0.2)");
141	//TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
142	//Console.WriteLine("Eps(0.5)");
143	//TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
144	Console.WriteLine("UCT(0.1)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.1));
145	Console.WriteLine("UCT(0.5)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.5));
146	Console.WriteLine("UCT(1) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 1));
147	Console.WriteLine("UCT(2) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 2));
148	Console.WriteLine("UCT(5) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 5));
149	Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
150	Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
151	Console.WriteLine("BoltzmannExploration(1) "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
152	Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
153	Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
154	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
155	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
156	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));
157	Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
158	Console.WriteLine("ThresholdAscent(10,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
159	Console.WriteLine("ThresholdAscent(10,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
160	Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
161	Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
162	Console.WriteLine("ThresholdAscent(100,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
163	Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
164	Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
165	Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
166	*/
167	}
168
169	[TestMethod]
170	public void ComparePoliciesForGaussianMixtureBandit() {
171	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
172	var randSeed = 31415;
173	var nArms = 20;
174
175	Console.WriteLine("Generic Thompson (Gaussian Mixture)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianMixtureModel()));
176	// Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01));
177	// Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01));
178	// Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01));
179	// Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01));
180	// Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
181	// Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy());
182	// Console.WriteLine("Generic Thompson (Gaussian fixed variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 0.1)));
183	// Console.WriteLine("Generic Thompson (Gaussian unknown variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1)));
184
185	/*
186	Console.WriteLine("Random"); TestPolicyGaussianMixture(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
187	Console.WriteLine("UCB1"); TestPolicyGaussianMixture(randSeed, nArms, new UCB1Policy(nArms));
188	Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture(randSeed, nArms, new UCB1TunedPolicy(nArms));
189	Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture(randSeed, nArms, new UCBNormalPolicy(nArms));
190	Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
191	Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
192	Console.WriteLine("UCT(1) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 1));
193	Console.WriteLine("UCT(2) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 2));
194	Console.WriteLine("UCT(5) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 5));
195	Console.WriteLine("BoltzmannExploration(1) "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
196	Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
197	Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
198
199	Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
200	Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
201	Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
202	Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01));
203	*/
204	}
205
206
207	private void TestPolicyBernoulli(int randSeed, int nArms, IBanditPolicy policy) {
208	TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions));
209	}
210	private void TestPolicyGaussian(int randSeed, int nArms, IBanditPolicy policy) {
211	TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions));
212	}
213	private void TestPolicyGaussianMixture(int randSeed, int nArms, IBanditPolicy policy) {
214	TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions));
215	}
216	private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IBanditPolicy policy) {
217	TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions, 0, 10));
218	}
219
220	private void TestPolicyExtremeBandit1(int randSeed, IBanditPolicy policy) {
221	TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 5, 1.1, 2 })); // 3 arms
222	}
223	private void TestPolicyExtremeBandit2(int randSeed, IBanditPolicy policy) {
224	TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 1.5, 1.1, 3 }, new double[] { 0.0, 0.8, 0.0 })); // 3 arms
225	}
226
227
228	private void TestPolicy(int randSeed, int nArms, IBanditPolicy policy, Func<Random, int, IBandit> banditFactory) {
229	var maxIt = 1E4;
230	var reps = 30; // independent runs
231	//var regretForIteration = new Dictionary<int, List<double>>();
232	//var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>();
233	//var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>();
234	//var bestRewardForIteration = new Dictionary<int, List<double>>();
235	var globalRandom = new Random(randSeed);
236	var banditRandom = new Random(globalRandom.Next()); // bandits must produce the same rewards for each test
237	var policyRandom = new Random(globalRandom.Next());
238
239	// calculate statistics
240	for (int r = 0; r < reps; r++) {
241	var nextLogStep = 1;
242	var b = banditFactory(banditRandom, nArms);
243	var totalRegret = 0.0;
244	var totalPullsOfSuboptimalArmsExp = 0.0;
245	var totalPullsOfSuboptimalArmsMax = 0.0;
246	var bestReward = double.NegativeInfinity;
247	var actionInfos = Enumerable.Range(0, nArms).Select(_ => policy.CreateActionInfo()).ToArray();
248	for (int i = 0; i <= maxIt; i++) {
249	var selectedAction = policy.SelectAction(policyRandom, actionInfos);
250	var reward = b.Pull(selectedAction);
251	actionInfos[selectedAction].UpdateReward(reward);
252
253	// collect stats
254	if (selectedAction != b.OptimalExpectedRewardArm) totalPullsOfSuboptimalArmsExp++;
255	if (selectedAction != b.OptimalMaximalRewardArm) totalPullsOfSuboptimalArmsMax++;
256	totalRegret += b.OptimalExpectedReward - reward;
257	bestReward = Math.Max(bestReward, reward);
258
259	if (i + 1 == nextLogStep) {
260	nextLogStep += 100;
261	//if (!regretForIteration.ContainsKey(i)) {
262	// regretForIteration.Add(i, new List<double>());
263	//}
264	//regretForIteration[i].Add(totalRegret / i);
265	//
266	//if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) {
267	// numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0);
268	//}
269	//numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp;
270	//
271	//if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) {
272	// numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0);
273	//}
274	//numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax;
275	//
276	//if (!bestRewardForIteration.ContainsKey(i)) {
277	// bestRewardForIteration.Add(i, new List<double>());
278	//}
279	//bestRewardForIteration[i].Add(bestReward);
280	Console.WriteLine("{0};{1,8};{2,7:F5};{3,7:F2};{4,7:F2};{5:F2};{6:F2};{7:F2};{8:F2}",
281	policy, i + 1, totalRegret, totalPullsOfSuboptimalArmsExp, totalPullsOfSuboptimalArmsMax, bestReward,
282	totalRegret / (i + 1), totalPullsOfSuboptimalArmsExp / (i + 1), totalPullsOfSuboptimalArmsMax / (i + 1));
283	}
284	}
285	}
286	// print
287	//foreach (var p in regretForIteration.Keys.OrderBy(k => k)) {
288	// Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2} max rewards: {6}",
289	// p,
290	// regretForIteration[p].Average(),
291	// regretForIteration[p].Min(),
292	// regretForIteration[p].Max(),
293	// numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps,
294	// numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps,
295	// string.Join(" ", bestRewardForIteration[p])
296	// );
297	//}
298	}
299
300	}
301	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences