Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Test/TestBanditPolicies.cs @ 12876

Last change on this file since 12876 was 12876, checked in by gkronber, 9 years ago

#2283: implemented first crude version of extreme hunter algorithm in branch

File size: 21.7 KB
Line 
1using System;
2using System.Linq;
3using System.Collections.Generic;
4using System.Globalization;
5using HeuristicLab.Algorithms.Bandits;
6using HeuristicLab.Algorithms.Bandits.BanditPolicies;
7using HeuristicLab.Algorithms.Bandits.Models;
8using Microsoft.VisualStudio.TestTools.UnitTesting;
9
10namespace HeuristicLab.Problems.GrammaticalOptimization.Test {
11  [TestClass]
12  public class TestBanditPolicies {
13    [TestMethod]
14    public void ComparePoliciesForGaussianUnknownVarianceBandit() {
15      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
16      var randSeed = 31415;
17      var nArms = 20;
18
19      // some of the policies are specific to rewards in [0..1], e.g. Treshold Ascent or UCB1
20      TestPolicyGaussianUnknownVariance(randSeed, nArms, new ExtremeHunterPolicy());
21      TestPolicyGaussianUnknownVariance(randSeed, nArms, new IntervalEstimationPolicy());
22      //TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBPolicy(10));
23      TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy());
24      TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1TunedPolicy());
25      TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1Policy(10));
26      TestPolicyGaussianUnknownVariance(randSeed, nArms, new ActiveLearningPolicy(10));
27      TestPolicyGaussianUnknownVariance(randSeed, nArms, new ChernoffIntervalEstimationPolicy());
28      TestPolicyGaussianUnknownVariance(randSeed, nArms, new BoltzmannExplorationPolicy(100));
29      TestPolicyGaussianUnknownVariance(randSeed, nArms, new EpsGreedyPolicy(0.1));
30      TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy());
31    }
32
33    [TestMethod]
34    // test case I as described in Extreme Bandits paper
35    public void ComparePoliciesExtremeBandits1() {
36      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
37      var randSeed = 31415;
38      TestPolicyExtremeBandit1(randSeed, new RandomPolicy());
39      TestPolicyExtremeBandit1(randSeed, new ExtremeHunterPolicy());
40      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10000));
41      TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.1));
42      // TestPolicyExtremeBandit1(randSeed, new ThresholdAscentPolicy());
43    }
44
45    [TestMethod]
46    // test case II as described in Extreme Bandits paper
47    public void ComparePoliciesExtremeBandits2() {
48      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
49      var randSeed = 31415;
50      TestPolicyExtremeBandit2(randSeed, new RandomPolicy());
51      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy());
52      TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10000));
53      TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.1));
54      // TestPolicyExtremeBandit2(randSeed, new ThresholdAscentPolicy());
55    }
56
57    [TestMethod]
58    public void ComparePoliciesForBernoulliBandit() {
59      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
60      var randSeed = 31415;
61      var nArms = 20;
62      //Console.WriteLine("Exp3 (gamma=0.01)");
63      //TestPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
64      //Console.WriteLine("Exp3 (gamma=0.05)");
65      //estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
66      Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new BernoulliThompsonSamplingPolicy());
67      Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new GenericThompsonSamplingPolicy(new BernoulliModel()));
68      Console.WriteLine("Random");
69      TestPolicyBernoulli(randSeed, nArms, new RandomPolicy());
70      Console.WriteLine("UCB1");
71      TestPolicyBernoulli(randSeed, nArms, new UCB1Policy());
72      Console.WriteLine("UCB1Tuned");
73      TestPolicyBernoulli(randSeed, nArms, new UCB1TunedPolicy());
74      Console.WriteLine("UCB1Normal");
75      TestPolicyBernoulli(randSeed, nArms, new UCBNormalPolicy());
76      Console.WriteLine("Eps(0.01)");
77      TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.01));
78      Console.WriteLine("Eps(0.05)");
79      TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.05));
80      //Console.WriteLine("Eps(0.1)");
81      //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.1));
82      //Console.WriteLine("Eps(0.2)");
83      //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.2));
84      //Console.WriteLine("Eps(0.5)");
85      //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.5));
86      Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.1));
87      Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.5));
88      Console.WriteLine("UCT(1)  "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(1));
89      Console.WriteLine("UCT(2)  "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(2));
90      Console.WriteLine("UCT(5)  "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(5));
91      Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.1));
92      Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.5));
93      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(1));
94      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(10));
95      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(100));
96      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.01));
97      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.05));
98      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.1));
99
100      // not applicable to bernoulli rewards
101      //Console.WriteLine("ThresholdAscent(10, 0.01)  "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
102      //Console.WriteLine("ThresholdAscent(10, 0.05)  "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
103      //Console.WriteLine("ThresholdAscent(10, 0.1)   "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
104      //Console.WriteLine("ThresholdAscent(100, 0.01) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
105      //Console.WriteLine("ThresholdAscent(100, 0.05) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
106      //Console.WriteLine("ThresholdAscent(100, 0.1)  "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
107      //Console.WriteLine("ThresholdAscent(1000, 0.01)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
108      //Console.WriteLine("ThresholdAscent(1000, 0.05)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
109      //Console.WriteLine("ThresholdAscent(1000, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
110    }
111
112    [TestMethod]
113    public void ComparePoliciesForGaussianBandit() {
114      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
115
116      var randSeed = 31415;
117      var nArms = 20;
118      Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01));
119      Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01));
120      Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01));
121      Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01));
122      Console.WriteLine("Generic Thompson (Gaussian fixed var)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)));
123      Console.WriteLine("Generic Thompson (Gaussian unknown var)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1)));
124      Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
125      Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy());
126
127      /*
128      Console.WriteLine("Random"); TestPolicyNormal(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
129      Console.WriteLine("UCB1"); TestPolicyNormal(randSeed, nArms, new UCB1Policy(nArms));
130      Console.WriteLine("UCB1Tuned"); TestPolicyNormal(randSeed, nArms, new UCB1TunedPolicy(nArms));
131      Console.WriteLine("UCB1Normal"); TestPolicyNormal(randSeed, nArms, new UCBNormalPolicy(nArms));
132      //Console.WriteLine("Exp3 (gamma=0.01)");
133      //TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));
134      //Console.WriteLine("Exp3 (gamma=0.05)");
135      //TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));
136      Console.WriteLine("Eps(0.01)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
137      Console.WriteLine("Eps(0.05)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
138      //Console.WriteLine("Eps(0.1)");
139      //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
140      //Console.WriteLine("Eps(0.2)");
141      //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
142      //Console.WriteLine("Eps(0.5)");
143      //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
144      Console.WriteLine("UCT(0.1)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.1));
145      Console.WriteLine("UCT(0.5)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.5));
146      Console.WriteLine("UCT(1)  "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 1));
147      Console.WriteLine("UCT(2)  "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 2));
148      Console.WriteLine("UCT(5)  "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 5));
149      Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
150      Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
151      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
152      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
153      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
154      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
155      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
156      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));     
157      Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
158      Console.WriteLine("ThresholdAscent(10,0.05)  "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
159      Console.WriteLine("ThresholdAscent(10,0.1)   "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
160      Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
161      Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
162      Console.WriteLine("ThresholdAscent(100,0.1)  "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
163      Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
164      Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
165      Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
166       */
167    }
168
169    [TestMethod]
170    public void ComparePoliciesForGaussianMixtureBandit() {
171      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
172      var randSeed = 31415;
173      var nArms = 20;
174
175      Console.WriteLine("Generic Thompson (Gaussian Mixture)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianMixtureModel()));
176      // Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01));
177      // Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01));
178      // Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01));
179      // Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01));
180      // Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
181      // Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy());
182      // Console.WriteLine("Generic Thompson (Gaussian fixed variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 0.1)));
183      // Console.WriteLine("Generic Thompson (Gaussian unknown variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1)));
184
185      /*
186      Console.WriteLine("Random"); TestPolicyGaussianMixture(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
187      Console.WriteLine("UCB1"); TestPolicyGaussianMixture(randSeed, nArms, new UCB1Policy(nArms));
188      Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture(randSeed, nArms, new UCB1TunedPolicy(nArms));
189      Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture(randSeed, nArms, new UCBNormalPolicy(nArms));
190      Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
191      Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
192      Console.WriteLine("UCT(1)  "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 1));
193      Console.WriteLine("UCT(2)  "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 2));
194      Console.WriteLine("UCT(5)  "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 5));
195      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
196      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
197      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
198
199      Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
200      Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
201      Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
202      Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01));
203       */
204    }
205
206
207    private void TestPolicyBernoulli(int randSeed, int nArms, IBanditPolicy policy) {
208      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new BernoulliBandit(banditRandom, nActions));
209    }
210    private void TestPolicyGaussian(int randSeed, int nArms, IBanditPolicy policy) {
211      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new TruncatedNormalBandit(banditRandom, nActions));
212    }
213    private void TestPolicyGaussianMixture(int randSeed, int nArms, IBanditPolicy policy) {
214      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianMixtureBandit(banditRandom, nActions));
215    }
216    private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IBanditPolicy policy) {
217      TestPolicy(randSeed, nArms, policy, (banditRandom, nActions) => new GaussianBandit(banditRandom, nActions, 0, 10));
218    }
219
220    private void TestPolicyExtremeBandit1(int randSeed, IBanditPolicy policy) {
221      TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 5, 1.1, 2 })); // 3 arms
222    }
223    private void TestPolicyExtremeBandit2(int randSeed, IBanditPolicy policy) {
224      TestPolicy(randSeed, 3, policy, (banditRandom, nActions) => new ParetoBandit(banditRandom, new double[] { 1.5, 1.1, 3 }, new double[] { 0.0, 0.8, 0.0 })); // 3 arms
225    }
226
227
228    private void TestPolicy(int randSeed, int nArms, IBanditPolicy policy, Func<Random, int, IBandit> banditFactory) {
229      var maxIt = 1E4;
230      var reps = 30; // independent runs
231      //var regretForIteration = new Dictionary<int, List<double>>();
232      //var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>();
233      //var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>();
234      //var bestRewardForIteration = new Dictionary<int, List<double>>();
235      var globalRandom = new Random(randSeed);
236      var banditRandom = new Random(globalRandom.Next()); // bandits must produce the same rewards for each test
237      var policyRandom = new Random(globalRandom.Next());
238
239      // calculate statistics
240      for (int r = 0; r < reps; r++) {
241        var nextLogStep = 1;
242        var b = banditFactory(banditRandom, nArms);
243        var totalRegret = 0.0;
244        var totalPullsOfSuboptimalArmsExp = 0.0;
245        var totalPullsOfSuboptimalArmsMax = 0.0;
246        var bestReward = double.NegativeInfinity;
247        var actionInfos = Enumerable.Range(0, nArms).Select(_ => policy.CreateActionInfo()).ToArray();
248        for (int i = 0; i <= maxIt; i++) {
249          var selectedAction = policy.SelectAction(policyRandom, actionInfos);
250          var reward = b.Pull(selectedAction);
251          actionInfos[selectedAction].UpdateReward(reward);
252
253          // collect stats
254          if (selectedAction != b.OptimalExpectedRewardArm) totalPullsOfSuboptimalArmsExp++;
255          if (selectedAction != b.OptimalMaximalRewardArm) totalPullsOfSuboptimalArmsMax++;
256          totalRegret += b.OptimalExpectedReward - reward;
257          bestReward = Math.Max(bestReward, reward);
258
259          if (i + 1 == nextLogStep) {
260            nextLogStep += 100;
261            //if (!regretForIteration.ContainsKey(i)) {
262            //  regretForIteration.Add(i, new List<double>());
263            //}
264            //regretForIteration[i].Add(totalRegret / i);
265            //
266            //if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) {
267            //  numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0);
268            //}
269            //numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp;
270            //
271            //if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) {
272            //  numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0);
273            //}
274            //numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax;
275            //
276            //if (!bestRewardForIteration.ContainsKey(i)) {
277            //  bestRewardForIteration.Add(i, new List<double>());
278            //}
279            //bestRewardForIteration[i].Add(bestReward);
280            Console.WriteLine("{0};{1,8};{2,7:F5};{3,7:F2};{4,7:F2};{5:F2};{6:F2};{7:F2};{8:F2}",
281              policy, i + 1, totalRegret, totalPullsOfSuboptimalArmsExp, totalPullsOfSuboptimalArmsMax, bestReward,
282              totalRegret / (i + 1), totalPullsOfSuboptimalArmsExp / (i + 1), totalPullsOfSuboptimalArmsMax / (i + 1));
283          }
284        }
285      }
286      // print
287      //foreach (var p in regretForIteration.Keys.OrderBy(k => k)) {
288      //  Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2} max rewards: {6}",
289      //    p,
290      //    regretForIteration[p].Average(),
291      //    regretForIteration[p].Min(),
292      //    regretForIteration[p].Max(),
293      //    numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps,
294      //    numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps,
295      //    string.Join(" ", bestRewardForIteration[p])
296      //    );
297      //}
298    }
299
300  }
301}
Note: See TracBrowser for help on using the repository browser.