using System; using System.Linq; using System.Collections.Generic; using System.Globalization; using HeuristicLab.Algorithms.Bandits; using HeuristicLab.Algorithms.Bandits.BanditPolicies; using HeuristicLab.Algorithms.Bandits.Models; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace HeuristicLab.Problems.GrammaticalOptimization.Test { [TestClass] public class TestBanditPolicies { [TestMethod] public void ComparePoliciesForGaussianUnknownVarianceBandit() { CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; var randSeed = 31415; var nArms = 20; // some of the policies are specific to rewards in [0..1], e.g. Treshold Ascent or UCB1 TestPolicyGaussianUnknownVariance(randSeed, nArms, new ExtremeHunterPolicy()); TestPolicyGaussianUnknownVariance(randSeed, nArms, new IntervalEstimationPolicy()); //TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBPolicy(10)); TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy()); TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1TunedPolicy()); TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1Policy(10)); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ActiveLearningPolicy(10)); TestPolicyGaussianUnknownVariance(randSeed, nArms, new ChernoffIntervalEstimationPolicy()); TestPolicyGaussianUnknownVariance(randSeed, nArms, new BoltzmannExplorationPolicy(100)); TestPolicyGaussianUnknownVariance(randSeed, nArms, new EpsGreedyPolicy(0.1)); TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy()); } [TestMethod] // test case I as described in Extreme Bandits paper public void ComparePoliciesExtremeBandits1() { CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; var randSeed = 31415; TestPolicyExtremeBandit1(randSeed, new RandomPolicy()); TestPolicyExtremeBandit1(randSeed, new SingleArmPolicy(1)); TestPolicyExtremeBandit1(randSeed, new ExtremeHunterPolicy()); TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10000)); TestPolicyExtremeBandit1(randSeed, new UCB1Policy(1000)); TestPolicyExtremeBandit1(randSeed, new UCB1Policy(100)); TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10)); TestPolicyExtremeBandit1(randSeed, new UCB1Policy(2)); TestPolicyExtremeBandit1(randSeed, new UCB1Policy(1)); TestPolicyExtremeBandit1(randSeed, new UCB1Policy(0.5)); TestPolicyExtremeBandit1(randSeed, new UCB1Policy(0.1)); TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.1)); TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.05)); TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.01)); } [TestMethod] // test case II as described in Extreme Bandits paper public void ComparePoliciesExtremeBandits2() { CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; var randSeed = 31415; //TestPolicyExtremeBandit2(randSeed, new RandomPolicy()); //TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(0)); //TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(1)); //TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(2)); // TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy()); TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 1, minPulls: 30)); TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 2, minPulls: 30)); TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 0.5, minPulls: 30)); TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 5, minPulls: 30)); TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 1, minPulls: 100)); TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 2, minPulls: 100)); TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 0.5, minPulls: 100)); TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 5, minPulls: 100)); // TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10000)); //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(1000)); //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(100)); //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10)); //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(2)); //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(1)); //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(0.5)); //TestPolicyExtremeBandit2(randSeed, new UCB1Policy(0.1)); //TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.1)); //TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.05)); //TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.01)); //TestPolicyExtremeBandit2(randSeed, new ThresholdAscentPolicy()); } [TestMethod] // my own test case for ExtremeHunter // using truncated normal distributions public void ComparePoliciesExtremeBandits3() { CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; var randSeed = 31415; TestPolicyExtremeBandit3(randSeed, new RandomPolicy()); TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(0)); TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(1)); TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(2)); TestPolicyExtremeBandit3(randSeed, new ExtremeHunterPolicy()); TestPolicyExtremeBandit3(randSeed, new UCB1Policy(3)); TestPolicyExtremeBandit3(randSeed, new EpsGreedyPolicy(0.1)); } [TestMethod] // a unit test to experiment with bandit policies for completing a GP sentence public void ComparePoliciesSentenceCompletionProblem() { CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; var randSeed = 31415; Func sentenceCompletionBanditFactory = (banditRandom) => { var problem = new SymbolicRegressionPoly10Problem(); return new SentenceBandit(banditRandom, problem, "a*b+c*d+e*f+E", 23); }; // ignore number of arms // var b = sentenceCompletionBanditFactory(new Random()); // all reference policies (always pulling one arm) // for (int i = 0; i < b.NumArms; i++) { // TestPolicy(randSeed, new SingleArmPolicy(i), sentenceCompletionBanditFactory); // } // for the completition of a*b+c*d+e*f+a*g*i+E the arms 12, 15, and 19 are optimal TestPolicy(randSeed, new SingleArmPolicy(12), sentenceCompletionBanditFactory); TestPolicy(randSeed, new RandomPolicy(), sentenceCompletionBanditFactory); TestPolicy(randSeed, new ExtremeHunterPolicy(), sentenceCompletionBanditFactory); TestPolicy(randSeed, new ExtremeHunterPolicy(D: 0.5), sentenceCompletionBanditFactory); TestPolicy(randSeed, new UCB1Policy(3), sentenceCompletionBanditFactory); TestPolicy(randSeed, new UCB1Policy(1), sentenceCompletionBanditFactory); TestPolicy(randSeed, new UCB1Policy(0.5), sentenceCompletionBanditFactory); TestPolicy(randSeed, new ThresholdAscentPolicy(), sentenceCompletionBanditFactory); TestPolicy(randSeed, new EpsGreedyPolicy(0.1), sentenceCompletionBanditFactory); } [TestMethod] public void ComparePoliciesForBernoulliBandit() { CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; var randSeed = 31415; var nArms = 20; //Console.WriteLine("Exp3 (gamma=0.01)"); //TestPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1)); //Console.WriteLine("Exp3 (gamma=0.05)"); //estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1)); Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new BernoulliThompsonSamplingPolicy()); Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new GenericThompsonSamplingPolicy(new BernoulliModel())); Console.WriteLine("Random"); TestPolicyBernoulli(randSeed, nArms, new RandomPolicy()); Console.WriteLine("UCB1"); TestPolicyBernoulli(randSeed, nArms, new UCB1Policy()); Console.WriteLine("UCB1Tuned"); TestPolicyBernoulli(randSeed, nArms, new UCB1TunedPolicy()); Console.WriteLine("UCB1Normal"); TestPolicyBernoulli(randSeed, nArms, new UCBNormalPolicy()); Console.WriteLine("Eps(0.01)"); TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.01)); Console.WriteLine("Eps(0.05)"); TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.05)); //Console.WriteLine("Eps(0.1)"); //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.1)); //Console.WriteLine("Eps(0.2)"); //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.2)); //Console.WriteLine("Eps(0.5)"); //TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.5)); Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.1)); Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.5)); Console.WriteLine("UCT(1) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(1)); Console.WriteLine("UCT(2) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(2)); Console.WriteLine("UCT(5) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(5)); Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.1)); Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.5)); Console.WriteLine("BoltzmannExploration(1) "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(1)); Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(10)); Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(100)); Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.01)); Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.05)); Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.1)); // not applicable to bernoulli rewards //Console.WriteLine("ThresholdAscent(10, 0.01) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01)); //Console.WriteLine("ThresholdAscent(10, 0.05) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05)); //Console.WriteLine("ThresholdAscent(10, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1)); //Console.WriteLine("ThresholdAscent(100, 0.01) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01)); //Console.WriteLine("ThresholdAscent(100, 0.05) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05)); //Console.WriteLine("ThresholdAscent(100, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1)); //Console.WriteLine("ThresholdAscent(1000, 0.01)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01)); //Console.WriteLine("ThresholdAscent(1000, 0.05)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05)); //Console.WriteLine("ThresholdAscent(1000, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1)); } [TestMethod] public void ComparePoliciesForGaussianBandit() { CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; var randSeed = 31415; var nArms = 20; Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01)); Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01)); Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01)); Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01)); Console.WriteLine("Generic Thompson (Gaussian fixed var)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1))); Console.WriteLine("Generic Thompson (Gaussian unknown var)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1))); Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy(true)); Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy()); /* Console.WriteLine("Random"); TestPolicyNormal(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms)); Console.WriteLine("UCB1"); TestPolicyNormal(randSeed, nArms, new UCB1Policy(nArms)); Console.WriteLine("UCB1Tuned"); TestPolicyNormal(randSeed, nArms, new UCB1TunedPolicy(nArms)); Console.WriteLine("UCB1Normal"); TestPolicyNormal(randSeed, nArms, new UCBNormalPolicy(nArms)); //Console.WriteLine("Exp3 (gamma=0.01)"); //TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01)); //Console.WriteLine("Exp3 (gamma=0.05)"); //TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05)); Console.WriteLine("Eps(0.01)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01)); Console.WriteLine("Eps(0.05)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05)); //Console.WriteLine("Eps(0.1)"); //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1)); //Console.WriteLine("Eps(0.2)"); //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2)); //Console.WriteLine("Eps(0.5)"); //TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5)); Console.WriteLine("UCT(0.1)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.1)); Console.WriteLine("UCT(0.5)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.5)); Console.WriteLine("UCT(1) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 1)); Console.WriteLine("UCT(2) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 2)); Console.WriteLine("UCT(5) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 5)); Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1)); Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5)); Console.WriteLine("BoltzmannExploration(1) "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1)); Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10)); Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100)); Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01)); Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05)); Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1)); Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01)); Console.WriteLine("ThresholdAscent(10,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05)); Console.WriteLine("ThresholdAscent(10,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1)); Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01)); Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05)); Console.WriteLine("ThresholdAscent(100,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1)); Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01)); Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05)); Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1)); */ } [TestMethod] public void ComparePoliciesForGaussianMixtureBandit() { CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; var randSeed = 31415; var nArms = 20; Console.WriteLine("Generic Thompson (Gaussian Mixture)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianMixtureModel())); // Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01)); // Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01)); // Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01)); // Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01)); // Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy(true)); // Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy()); // Console.WriteLine("Generic Thompson (Gaussian fixed variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 0.1))); // Console.WriteLine("Generic Thompson (Gaussian unknown variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1))); /* Console.WriteLine("Random"); TestPolicyGaussianMixture(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms)); Console.WriteLine("UCB1"); TestPolicyGaussianMixture(randSeed, nArms, new UCB1Policy(nArms)); Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture(randSeed, nArms, new UCB1TunedPolicy(nArms)); Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture(randSeed, nArms, new UCBNormalPolicy(nArms)); Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01)); Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05)); Console.WriteLine("UCT(1) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 1)); Console.WriteLine("UCT(2) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 2)); Console.WriteLine("UCT(5) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 5)); Console.WriteLine("BoltzmannExploration(1) "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1)); Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10)); Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100)); Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01)); Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01)); Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01)); Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01)); */ } private void TestPolicyBernoulli(int randSeed, int nArms, IBanditPolicy policy) { TestPolicy(randSeed, policy, (banditRandom) => new BernoulliBandit(banditRandom, nArms)); } private void TestPolicyGaussian(int randSeed, int nArms, IBanditPolicy policy) { TestPolicy(randSeed, policy, (banditRandom) => new TruncatedNormalBandit(banditRandom, nArms)); } private void TestPolicyGaussianMixture(int randSeed, int nArms, IBanditPolicy policy) { TestPolicy(randSeed, policy, (banditRandom) => new GaussianMixtureBandit(banditRandom, nArms)); } private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IBanditPolicy policy) { TestPolicy(randSeed, policy, (banditRandom) => new GaussianBandit(banditRandom, nArms, 0, 10)); } private void TestPolicyExtremeBandit1(int randSeed, IBanditPolicy policy) { TestPolicy(randSeed, policy, (banditRandom) => new ParetoBandit(banditRandom, new double[] { 5, 1.1, 2 })); } private void TestPolicyExtremeBandit2(int randSeed, IBanditPolicy policy) { TestPolicy(randSeed, policy, (banditRandom) => new ParetoBandit(banditRandom, new double[] { 1.5, 1.1, 3 }, new double[] { 0.0, 0.8, 0.0 }, 0, 1)); } private void TestPolicyExtremeBandit3(int randSeed, IBanditPolicy policy) { TestPolicy(randSeed, policy, (banditRandom) => new Bandit(banditRandom, new IModel[] { new GammaModel(10, 1), // exp=10, var=10 new GammaModel(6, 2), // exp=12, var=24 new GammaModel(3, 3), // exp= 9, var=27 }, 1, 2)); } private void TestPolicy(int randSeed, IBanditPolicy policy, Func banditFactory) { var maxIt = 1E5; var reps = 30; // independent runs //var regretForIteration = new Dictionary>(); //var numberOfPullsOfSuboptimalArmsForExp = new Dictionary(); //var numberOfPullsOfSuboptimalArmsForMax = new Dictionary(); //var bestRewardForIteration = new Dictionary>(); var globalRandom = new Random(randSeed); var banditRandom = new Random(globalRandom.Next()); // bandits must produce the same rewards for each test var policyRandom = new Random(globalRandom.Next()); // calculate statistics for (int r = 0; r < reps; r++) { var nextLogStep = 1; var b = banditFactory(banditRandom); var totalReward = 0.0; int totalPullsOfOptimalArmExp = 0; int totalPullsOfOptimalArmMax = 0; var maxReward = double.NegativeInfinity; var actionInfos = Enumerable.Range(0, b.NumArms).Select(_ => policy.CreateActionInfo()).ToArray(); for (int i = 0; i <= maxIt + 1; i++) { var selectedAction = policy.SelectAction(policyRandom, actionInfos); var reward = b.Pull(selectedAction); actionInfos[selectedAction].UpdateReward(reward); // collect stats if (selectedAction == b.OptimalExpectedRewardArm) totalPullsOfOptimalArmExp++; if (selectedAction == b.OptimalMaximalRewardArm) totalPullsOfOptimalArmMax++; totalReward += reward; maxReward = Math.Max(maxReward, reward); if (i == nextLogStep) { nextLogStep += 500; //if (!regretForIteration.ContainsKey(i)) { // regretForIteration.Add(i, new List()); //} //regretForIteration[i].Add(totalRegret / i); // //if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) { // numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0); //} //numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp; // //if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) { // numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0); //} //numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax; // //if (!bestRewardForIteration.ContainsKey(i)) { // bestRewardForIteration.Add(i, new List()); //} //bestRewardForIteration[i].Add(bestReward); Console.WriteLine("{0};{1,8};{2,7:F5};{3,7:F2};{4,7:F2};{5:F2};{6:F2};{7:F2};{8:F2}", policy, i, totalReward, totalPullsOfOptimalArmExp, totalPullsOfOptimalArmMax, maxReward, totalReward / i, totalPullsOfOptimalArmExp / (double)i, totalPullsOfOptimalArmMax / (double)i); } } } // print //foreach (var p in regretForIteration.Keys.OrderBy(k => k)) { // Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2} max rewards: {6}", // p, // regretForIteration[p].Average(), // regretForIteration[p].Min(), // regretForIteration[p].Max(), // numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps, // numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps, // string.Join(" ", bestRewardForIteration[p]) // ); //} } } }