Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Test/TestBanditPolicies.cs @ 13847

Last change on this file since 13847 was 12893, checked in by gkronber, 9 years ago

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

File size: 26.3 KB
RevLine 
[11708]1using System;
2using System.Linq;
3using System.Collections.Generic;
4using System.Globalization;
5using HeuristicLab.Algorithms.Bandits;
[11742]6using HeuristicLab.Algorithms.Bandits.BanditPolicies;
[11730]7using HeuristicLab.Algorithms.Bandits.Models;
[11708]8using Microsoft.VisualStudio.TestTools.UnitTesting;
9
10namespace HeuristicLab.Problems.GrammaticalOptimization.Test {
11  [TestClass]
12  public class TestBanditPolicies {
[11732]13    [TestMethod]
14    public void ComparePoliciesForGaussianUnknownVarianceBandit() {
15      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
16      var randSeed = 31415;
17      var nArms = 20;
[11730]18
[12876]19      // some of the policies are specific to rewards in [0..1], e.g. Treshold Ascent or UCB1
20      TestPolicyGaussianUnknownVariance(randSeed, nArms, new ExtremeHunterPolicy());
21      TestPolicyGaussianUnknownVariance(randSeed, nArms, new IntervalEstimationPolicy());
22      //TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBPolicy(10));
23      TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy());
24      TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1TunedPolicy());
25      TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1Policy(10));
26      TestPolicyGaussianUnknownVariance(randSeed, nArms, new ActiveLearningPolicy(10));
27      TestPolicyGaussianUnknownVariance(randSeed, nArms, new ChernoffIntervalEstimationPolicy());
28      TestPolicyGaussianUnknownVariance(randSeed, nArms, new BoltzmannExplorationPolicy(100));
29      TestPolicyGaussianUnknownVariance(randSeed, nArms, new EpsGreedyPolicy(0.1));
30      TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy());
31    }
[11742]32
[12876]33    [TestMethod]
34    // test case I as described in Extreme Bandits paper
35    public void ComparePoliciesExtremeBandits1() {
36      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
37      var randSeed = 31415;
38      TestPolicyExtremeBandit1(randSeed, new RandomPolicy());
[12893]39      TestPolicyExtremeBandit1(randSeed, new SingleArmPolicy(1));
[12876]40      TestPolicyExtremeBandit1(randSeed, new ExtremeHunterPolicy());
41      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10000));
[12893]42      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(1000));
43      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(100));
44      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10));
45      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(2));
46      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(1));
47      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(0.5));
48      TestPolicyExtremeBandit1(randSeed, new UCB1Policy(0.1));
[12876]49      TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.1));
[12893]50      TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.05));
51      TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.01));
[12876]52    }
[11730]53
[12876]54    [TestMethod]
55    // test case II as described in Extreme Bandits paper
56    public void ComparePoliciesExtremeBandits2() {
57      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
58      var randSeed = 31415;
[12893]59      //TestPolicyExtremeBandit2(randSeed, new RandomPolicy());
60      //TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(0));
61      //TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(1));
62      //TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(2));
63      // TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy());
64      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 1, minPulls: 30));
65      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 2, minPulls: 30));
66      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 0.5, minPulls: 30));
67      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 5, minPulls: 30));
68      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 1, minPulls: 100));
69      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 2, minPulls: 100));
70      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 0.5, minPulls: 100));
71      TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 5, minPulls: 100));
72      // TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10000));
73      //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(1000));
74      //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(100));
75      //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10));
76      //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(2));
77      //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(1));
78      //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(0.5));
79      //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(0.1));
80      //TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.1));
81      //TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.05));
82      //TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.01));
83      //TestPolicyExtremeBandit2(randSeed, new ThresholdAscentPolicy());
[11732]84    }
85
[11708]86    [TestMethod]
[12893]87    // my own test case for ExtremeHunter
88    // using truncated normal distributions
89    public void ComparePoliciesExtremeBandits3() {
90      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
91      var randSeed = 31415;
92      TestPolicyExtremeBandit3(randSeed, new RandomPolicy());
93      TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(0));
94      TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(1));
95      TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(2));
96      TestPolicyExtremeBandit3(randSeed, new ExtremeHunterPolicy());
97      TestPolicyExtremeBandit3(randSeed, new UCB1Policy(3));
98      TestPolicyExtremeBandit3(randSeed, new EpsGreedyPolicy(0.1));
99    }
100
101    [TestMethod]
102    // a unit test to experiment with bandit policies for completing a GP sentence
103    public void ComparePoliciesSentenceCompletionProblem() {
104      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
105      var randSeed = 31415;
106
107
108      Func<Random, IBandit> sentenceCompletionBanditFactory = (banditRandom) => {
109        var problem = new SymbolicRegressionPoly10Problem();
110        return new SentenceBandit(banditRandom, problem, "a*b+c*d+e*f+E", 23);
111      };
112
113      // ignore number of arms
114
115      // var b = sentenceCompletionBanditFactory(new Random());
116      // all reference policies (always pulling one arm)
117      // for (int i = 0; i < b.NumArms; i++) {
118      //   TestPolicy(randSeed, new SingleArmPolicy(i), sentenceCompletionBanditFactory);
119      // }
120
121      // for the completition of a*b+c*d+e*f+a*g*i+E the arms 12, 15, and 19 are optimal
122      TestPolicy(randSeed, new SingleArmPolicy(12), sentenceCompletionBanditFactory);
123
124      TestPolicy(randSeed, new RandomPolicy(), sentenceCompletionBanditFactory);
125
126      TestPolicy(randSeed, new ExtremeHunterPolicy(), sentenceCompletionBanditFactory);
127      TestPolicy(randSeed, new ExtremeHunterPolicy(D: 0.5), sentenceCompletionBanditFactory);
128      TestPolicy(randSeed, new UCB1Policy(3), sentenceCompletionBanditFactory);
129      TestPolicy(randSeed, new UCB1Policy(1), sentenceCompletionBanditFactory);
130      TestPolicy(randSeed, new UCB1Policy(0.5), sentenceCompletionBanditFactory);
131      TestPolicy(randSeed, new ThresholdAscentPolicy(), sentenceCompletionBanditFactory);
132      TestPolicy(randSeed, new EpsGreedyPolicy(0.1), sentenceCompletionBanditFactory);
133    }
134
135    [TestMethod]
[11710]136    public void ComparePoliciesForBernoulliBandit() {
[11730]137      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
[11732]138      var randSeed = 31415;
[11730]139      var nArms = 20;
[11727]140      //Console.WriteLine("Exp3 (gamma=0.01)");
141      //TestPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
142      //Console.WriteLine("Exp3 (gamma=0.05)");
143      //estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
[11732]144      Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new BernoulliThompsonSamplingPolicy());
145      Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new GenericThompsonSamplingPolicy(new BernoulliModel()));
[11708]146      Console.WriteLine("Random");
[11732]147      TestPolicyBernoulli(randSeed, nArms, new RandomPolicy());
[11710]148      Console.WriteLine("UCB1");
[11732]149      TestPolicyBernoulli(randSeed, nArms, new UCB1Policy());
[11710]150      Console.WriteLine("UCB1Tuned");
[11732]151      TestPolicyBernoulli(randSeed, nArms, new UCB1TunedPolicy());
[11710]152      Console.WriteLine("UCB1Normal");
[11732]153      TestPolicyBernoulli(randSeed, nArms, new UCBNormalPolicy());
[11708]154      Console.WriteLine("Eps(0.01)");
[11732]155      TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.01));
[11708]156      Console.WriteLine("Eps(0.05)");
[11732]157      TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.05));
[11727]158      //Console.WriteLine("Eps(0.1)");
[11732]159      //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.1));
[11727]160      //Console.WriteLine("Eps(0.2)");
[11732]161      //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.2));
[11727]162      //Console.WriteLine("Eps(0.5)");
[11732]163      //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.5));
164      Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.1));
165      Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.5));
166      Console.WriteLine("UCT(1)  "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(1));
167      Console.WriteLine("UCT(2)  "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(2));
168      Console.WriteLine("UCT(5)  "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(5));
169      Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.1));
170      Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.5));
171      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(1));
172      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(10));
173      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(100));
174      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.01));
175      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.05));
176      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.1));
[11730]177
178      // not applicable to bernoulli rewards
179      //Console.WriteLine("ThresholdAscent(10, 0.01)  "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
180      //Console.WriteLine("ThresholdAscent(10, 0.05)  "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
181      //Console.WriteLine("ThresholdAscent(10, 0.1)   "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
182      //Console.WriteLine("ThresholdAscent(100, 0.01) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
183      //Console.WriteLine("ThresholdAscent(100, 0.05) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
184      //Console.WriteLine("ThresholdAscent(100, 0.1)  "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
185      //Console.WriteLine("ThresholdAscent(1000, 0.01)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
186      //Console.WriteLine("ThresholdAscent(1000, 0.05)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
187      //Console.WriteLine("ThresholdAscent(1000, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
[11708]188    }
[11730]189
[11710]190    [TestMethod]
[11732]191    public void ComparePoliciesForGaussianBandit() {
[11730]192      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
193
[11732]194      var randSeed = 31415;
[11730]195      var nArms = 20;
[11742]196      Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01));
197      Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01));
198      Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01));
199      Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01));
200      Console.WriteLine("Generic Thompson (Gaussian fixed var)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)));
201      Console.WriteLine("Generic Thompson (Gaussian unknown var)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1)));
[11732]202      Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
203      Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy());
[11742]204
[11730]205      /*
[11732]206      Console.WriteLine("Random"); TestPolicyNormal(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
207      Console.WriteLine("UCB1"); TestPolicyNormal(randSeed, nArms, new UCB1Policy(nArms));
208      Console.WriteLine("UCB1Tuned"); TestPolicyNormal(randSeed, nArms, new UCB1TunedPolicy(nArms));
209      Console.WriteLine("UCB1Normal"); TestPolicyNormal(randSeed, nArms, new UCBNormalPolicy(nArms));
[11727]210      //Console.WriteLine("Exp3 (gamma=0.01)");
[11732]211      //TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));
[11727]212      //Console.WriteLine("Exp3 (gamma=0.05)");
[11732]213      //TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));
214      Console.WriteLine("Eps(0.01)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
215      Console.WriteLine("Eps(0.05)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
[11727]216      //Console.WriteLine("Eps(0.1)");
[11732]217      //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
[11727]218      //Console.WriteLine("Eps(0.2)");
[11732]219      //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
[11727]220      //Console.WriteLine("Eps(0.5)");
[11732]221      //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
222      Console.WriteLine("UCT(0.1)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.1));
223      Console.WriteLine("UCT(0.5)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.5));
224      Console.WriteLine("UCT(1)  "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 1));
225      Console.WriteLine("UCT(2)  "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 2));
226      Console.WriteLine("UCT(5)  "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 5));
227      Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
228      Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
229      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
230      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
231      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
232      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
233      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
[11742]234      Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));     
[11732]235      Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
236      Console.WriteLine("ThresholdAscent(10,0.05)  "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
237      Console.WriteLine("ThresholdAscent(10,0.1)   "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
238      Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
239      Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
240      Console.WriteLine("ThresholdAscent(100,0.1)  "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
241      Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
242      Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
243      Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
[11730]244       */
[11710]245    }
[11708]246
[11730]247    [TestMethod]
248    public void ComparePoliciesForGaussianMixtureBandit() {
249      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
[11732]250      var randSeed = 31415;
[11730]251      var nArms = 20;
252
[11745]253      Console.WriteLine("Generic Thompson (Gaussian Mixture)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianMixtureModel()));
254      // Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01));
255      // Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01));
256      // Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01));
257      // Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01));
258      // Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
259      // Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy());
260      // Console.WriteLine("Generic Thompson (Gaussian fixed variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 0.1)));
261      // Console.WriteLine("Generic Thompson (Gaussian unknown variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1)));
262
[11730]263      /*
[11732]264      Console.WriteLine("Random"); TestPolicyGaussianMixture(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
265      Console.WriteLine("UCB1"); TestPolicyGaussianMixture(randSeed, nArms, new UCB1Policy(nArms));
266      Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture(randSeed, nArms, new UCB1TunedPolicy(nArms));
267      Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture(randSeed, nArms, new UCBNormalPolicy(nArms));
268      Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
269      Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
270      Console.WriteLine("UCT(1)  "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 1));
271      Console.WriteLine("UCT(2)  "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 2));
272      Console.WriteLine("UCT(5)  "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 5));
273      Console.WriteLine("BoltzmannExploration(1)  "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
274      Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
275      Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
[11730]276
[11732]277      Console.WriteLine("ThresholdAscent(10,0.01)  "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
278      Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
279      Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
280      Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01));
[11730]281       */
282    }
283
284
[11742]285    private void TestPolicyBernoulli(int randSeed, int nArms, IBanditPolicy policy) {
[12893]286      TestPolicy(randSeed, policy, (banditRandom) => new BernoulliBandit(banditRandom, nArms));
[11730]287    }
[11742]288    private void TestPolicyGaussian(int randSeed, int nArms, IBanditPolicy policy) {
[12893]289      TestPolicy(randSeed, policy, (banditRandom) => new TruncatedNormalBandit(banditRandom, nArms));
[11730]290    }
[11742]291    private void TestPolicyGaussianMixture(int randSeed, int nArms, IBanditPolicy policy) {
[12893]292      TestPolicy(randSeed, policy, (banditRandom) => new GaussianMixtureBandit(banditRandom, nArms));
[11730]293    }
[11742]294    private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IBanditPolicy policy) {
[12893]295      TestPolicy(randSeed, policy, (banditRandom) => new GaussianBandit(banditRandom, nArms, 0, 10));
[11732]296    }
[11730]297
[12876]298    private void TestPolicyExtremeBandit1(int randSeed, IBanditPolicy policy) {
[12893]299      TestPolicy(randSeed, policy, (banditRandom) => new ParetoBandit(banditRandom, new double[] { 5, 1.1, 2 }));
[12876]300    }
301    private void TestPolicyExtremeBandit2(int randSeed, IBanditPolicy policy) {
[12893]302      TestPolicy(randSeed, policy, (banditRandom) => new ParetoBandit(banditRandom, new double[] { 1.5, 1.1, 3 }, new double[] { 0.0, 0.8, 0.0 }, 0, 1));
[12876]303    }
[12893]304    private void TestPolicyExtremeBandit3(int randSeed, IBanditPolicy policy) {
305      TestPolicy(randSeed, policy, (banditRandom) => new Bandit(banditRandom, new IModel[]
306      {
307        new GammaModel(10, 1),   // exp=10, var=10
308        new GammaModel(6, 2),    // exp=12, var=24
309        new GammaModel(3, 3),    // exp= 9, var=27
310      }, 1, 2));
311    }
[11730]312
[12876]313
[12893]314    private void TestPolicy(int randSeed, IBanditPolicy policy, Func<Random, IBandit> banditFactory) {
315      var maxIt = 1E5;
[12876]316      var reps = 30; // independent runs
317      //var regretForIteration = new Dictionary<int, List<double>>();
318      //var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>();
319      //var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>();
320      //var bestRewardForIteration = new Dictionary<int, List<double>>();
[11732]321      var globalRandom = new Random(randSeed);
322      var banditRandom = new Random(globalRandom.Next()); // bandits must produce the same rewards for each test
323      var policyRandom = new Random(globalRandom.Next());
324
[11708]325      // calculate statistics
326      for (int r = 0; r < reps; r++) {
327        var nextLogStep = 1;
[12893]328        var b = banditFactory(banditRandom);
329        var totalReward = 0.0;
330        int totalPullsOfOptimalArmExp = 0;
331        int totalPullsOfOptimalArmMax = 0;
332        var maxReward = double.NegativeInfinity;
333        var actionInfos = Enumerable.Range(0, b.NumArms).Select(_ => policy.CreateActionInfo()).ToArray();
334        for (int i = 0; i <= maxIt + 1; i++) {
[11732]335          var selectedAction = policy.SelectAction(policyRandom, actionInfos);
[11708]336          var reward = b.Pull(selectedAction);
[11732]337          actionInfos[selectedAction].UpdateReward(reward);
[11710]338
[11730]339          // collect stats
[12893]340          if (selectedAction == b.OptimalExpectedRewardArm) totalPullsOfOptimalArmExp++;
341          if (selectedAction == b.OptimalMaximalRewardArm) totalPullsOfOptimalArmMax++;
342          totalReward += reward;
343          maxReward = Math.Max(maxReward, reward);
[11730]344
[12893]345          if (i == nextLogStep) {
346            nextLogStep += 500;
[12876]347            //if (!regretForIteration.ContainsKey(i)) {
348            //  regretForIteration.Add(i, new List<double>());
349            //}
350            //regretForIteration[i].Add(totalRegret / i);
351            //
352            //if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) {
353            //  numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0);
354            //}
355            //numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp;
356            //
357            //if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) {
358            //  numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0);
359            //}
360            //numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax;
361            //
362            //if (!bestRewardForIteration.ContainsKey(i)) {
363            //  bestRewardForIteration.Add(i, new List<double>());
364            //}
365            //bestRewardForIteration[i].Add(bestReward);
366            Console.WriteLine("{0};{1,8};{2,7:F5};{3,7:F2};{4,7:F2};{5:F2};{6:F2};{7:F2};{8:F2}",
[12893]367              policy, i, totalReward, totalPullsOfOptimalArmExp, totalPullsOfOptimalArmMax, maxReward,
368              totalReward / i, totalPullsOfOptimalArmExp / (double)i, totalPullsOfOptimalArmMax / (double)i);
[11710]369          }
370        }
371      }
372      // print
[12876]373      //foreach (var p in regretForIteration.Keys.OrderBy(k => k)) {
374      //  Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2} max rewards: {6}",
375      //    p,
376      //    regretForIteration[p].Average(),
377      //    regretForIteration[p].Min(),
378      //    regretForIteration[p].Max(),
379      //    numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps,
380      //    numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps,
381      //    string.Join(" ", bestRewardForIteration[p])
382      //    );
383      //}
[11710]384    }
385
[11708]386  }
387}
Note: See TracBrowser for help on using the repository browser.