Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/Test/TestBanditPolicies.cs @ 12940

Visit:

Last change on this file since 12940 was 12893, checked in by gkronber, 9 years ago
#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)
File size: 26.3 KB

Rev	Line
[11708]	1	using System;
	2	using System.Linq;
	3	using System.Collections.Generic;
	4	using System.Globalization;
	5	using HeuristicLab.Algorithms.Bandits;
[11742]	6	using HeuristicLab.Algorithms.Bandits.BanditPolicies;
[11730]	7	using HeuristicLab.Algorithms.Bandits.Models;
[11708]	8	using Microsoft.VisualStudio.TestTools.UnitTesting;
	9
	10	namespace HeuristicLab.Problems.GrammaticalOptimization.Test {
	11	[TestClass]
	12	public class TestBanditPolicies {
[11732]	13	[TestMethod]
	14	public void ComparePoliciesForGaussianUnknownVarianceBandit() {
	15	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
	16	var randSeed = 31415;
	17	var nArms = 20;
[11730]	18
[12876]	19	// some of the policies are specific to rewards in [0..1], e.g. Treshold Ascent or UCB1
	20	TestPolicyGaussianUnknownVariance(randSeed, nArms, new ExtremeHunterPolicy());
	21	TestPolicyGaussianUnknownVariance(randSeed, nArms, new IntervalEstimationPolicy());
	22	//TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBPolicy(10));
	23	TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCBNormalPolicy());
	24	TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1TunedPolicy());
	25	TestPolicyGaussianUnknownVariance(randSeed, nArms, new UCB1Policy(10));
	26	TestPolicyGaussianUnknownVariance(randSeed, nArms, new ActiveLearningPolicy(10));
	27	TestPolicyGaussianUnknownVariance(randSeed, nArms, new ChernoffIntervalEstimationPolicy());
	28	TestPolicyGaussianUnknownVariance(randSeed, nArms, new BoltzmannExplorationPolicy(100));
	29	TestPolicyGaussianUnknownVariance(randSeed, nArms, new EpsGreedyPolicy(0.1));
	30	TestPolicyGaussianUnknownVariance(randSeed, nArms, new RandomPolicy());
	31	}
[11742]	32
[12876]	33	[TestMethod]
	34	// test case I as described in Extreme Bandits paper
	35	public void ComparePoliciesExtremeBandits1() {
	36	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
	37	var randSeed = 31415;
	38	TestPolicyExtremeBandit1(randSeed, new RandomPolicy());
[12893]	39	TestPolicyExtremeBandit1(randSeed, new SingleArmPolicy(1));
[12876]	40	TestPolicyExtremeBandit1(randSeed, new ExtremeHunterPolicy());
	41	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10000));
[12893]	42	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(1000));
	43	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(100));
	44	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(10));
	45	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(2));
	46	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(1));
	47	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(0.5));
	48	TestPolicyExtremeBandit1(randSeed, new UCB1Policy(0.1));
[12876]	49	TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.1));
[12893]	50	TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.05));
	51	TestPolicyExtremeBandit1(randSeed, new EpsGreedyPolicy(0.01));
[12876]	52	}
[11730]	53
[12876]	54	[TestMethod]
	55	// test case II as described in Extreme Bandits paper
	56	public void ComparePoliciesExtremeBandits2() {
	57	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
	58	var randSeed = 31415;
[12893]	59	//TestPolicyExtremeBandit2(randSeed, new RandomPolicy());
	60	//TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(0));
	61	//TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(1));
	62	//TestPolicyExtremeBandit2(randSeed, new SingleArmPolicy(2));
	63	// TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy());
	64	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 1, minPulls: 30));
	65	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 2, minPulls: 30));
	66	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 0.5, minPulls: 30));
	67	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 5, minPulls: 30));
	68	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 1, minPulls: 100));
	69	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 2, minPulls: 100));
	70	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 0.5, minPulls: 100));
	71	TestPolicyExtremeBandit2(randSeed, new ExtremeHunterPolicy(D: 5, minPulls: 100));
	72	// TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10000));
	73	//TestPolicyExtremeBandit2(randSeed, new UCB1Policy(1000));
	74	//TestPolicyExtremeBandit2(randSeed, new UCB1Policy(100));
	75	//TestPolicyExtremeBandit2(randSeed, new UCB1Policy(10));
	76	//TestPolicyExtremeBandit2(randSeed, new UCB1Policy(2));
	77	//TestPolicyExtremeBandit2(randSeed, new UCB1Policy(1));
	78	//TestPolicyExtremeBandit2(randSeed, new UCB1Policy(0.5));
	79	//TestPolicyExtremeBandit2(randSeed, new UCB1Policy(0.1));
	80	//TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.1));
	81	//TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.05));
	82	//TestPolicyExtremeBandit2(randSeed, new EpsGreedyPolicy(0.01));
	83	//TestPolicyExtremeBandit2(randSeed, new ThresholdAscentPolicy());
[11732]	84	}
	85
[11708]	86	[TestMethod]
[12893]	87	// my own test case for ExtremeHunter
	88	// using truncated normal distributions
	89	public void ComparePoliciesExtremeBandits3() {
	90	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
	91	var randSeed = 31415;
	92	TestPolicyExtremeBandit3(randSeed, new RandomPolicy());
	93	TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(0));
	94	TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(1));
	95	TestPolicyExtremeBandit3(randSeed, new SingleArmPolicy(2));
	96	TestPolicyExtremeBandit3(randSeed, new ExtremeHunterPolicy());
	97	TestPolicyExtremeBandit3(randSeed, new UCB1Policy(3));
	98	TestPolicyExtremeBandit3(randSeed, new EpsGreedyPolicy(0.1));
	99	}
	100
	101	[TestMethod]
	102	// a unit test to experiment with bandit policies for completing a GP sentence
	103	public void ComparePoliciesSentenceCompletionProblem() {
	104	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
	105	var randSeed = 31415;
	106
	107
	108	Func<Random, IBandit> sentenceCompletionBanditFactory = (banditRandom) => {
	109	var problem = new SymbolicRegressionPoly10Problem();
	110	return new SentenceBandit(banditRandom, problem, "ab+cd+e*f+E", 23);
	111	};
	112
	113	// ignore number of arms
	114
	115	// var b = sentenceCompletionBanditFactory(new Random());
	116	// all reference policies (always pulling one arm)
	117	// for (int i = 0; i < b.NumArms; i++) {
	118	// TestPolicy(randSeed, new SingleArmPolicy(i), sentenceCompletionBanditFactory);
	119	// }
	120
	121	// for the completition of ab+cd+ef+ag*i+E the arms 12, 15, and 19 are optimal
	122	TestPolicy(randSeed, new SingleArmPolicy(12), sentenceCompletionBanditFactory);
	123
	124	TestPolicy(randSeed, new RandomPolicy(), sentenceCompletionBanditFactory);
	125
	126	TestPolicy(randSeed, new ExtremeHunterPolicy(), sentenceCompletionBanditFactory);
	127	TestPolicy(randSeed, new ExtremeHunterPolicy(D: 0.5), sentenceCompletionBanditFactory);
	128	TestPolicy(randSeed, new UCB1Policy(3), sentenceCompletionBanditFactory);
	129	TestPolicy(randSeed, new UCB1Policy(1), sentenceCompletionBanditFactory);
	130	TestPolicy(randSeed, new UCB1Policy(0.5), sentenceCompletionBanditFactory);
	131	TestPolicy(randSeed, new ThresholdAscentPolicy(), sentenceCompletionBanditFactory);
	132	TestPolicy(randSeed, new EpsGreedyPolicy(0.1), sentenceCompletionBanditFactory);
	133	}
	134
	135	[TestMethod]
[11710]	136	public void ComparePoliciesForBernoulliBandit() {
[11730]	137	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
[11732]	138	var randSeed = 31415;
[11730]	139	var nArms = 20;
[11727]	140	//Console.WriteLine("Exp3 (gamma=0.01)");
	141	//TestPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
	142	//Console.WriteLine("Exp3 (gamma=0.05)");
	143	//estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
[11732]	144	Console.WriteLine("Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new BernoulliThompsonSamplingPolicy());
	145	Console.WriteLine("Generic Thompson (Bernoulli)"); TestPolicyBernoulli(randSeed, nArms, new GenericThompsonSamplingPolicy(new BernoulliModel()));
[11708]	146	Console.WriteLine("Random");
[11732]	147	TestPolicyBernoulli(randSeed, nArms, new RandomPolicy());
[11710]	148	Console.WriteLine("UCB1");
[11732]	149	TestPolicyBernoulli(randSeed, nArms, new UCB1Policy());
[11710]	150	Console.WriteLine("UCB1Tuned");
[11732]	151	TestPolicyBernoulli(randSeed, nArms, new UCB1TunedPolicy());
[11710]	152	Console.WriteLine("UCB1Normal");
[11732]	153	TestPolicyBernoulli(randSeed, nArms, new UCBNormalPolicy());
[11708]	154	Console.WriteLine("Eps(0.01)");
[11732]	155	TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.01));
[11708]	156	Console.WriteLine("Eps(0.05)");
[11732]	157	TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.05));
[11727]	158	//Console.WriteLine("Eps(0.1)");
[11732]	159	//TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.1));
[11727]	160	//Console.WriteLine("Eps(0.2)");
[11732]	161	//TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.2));
[11727]	162	//Console.WriteLine("Eps(0.5)");
[11732]	163	//TestPolicyBernoulli(randSeed, nArms, new EpsGreedyPolicy(0.5));
	164	Console.WriteLine("UCT(0.1)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.1));
	165	Console.WriteLine("UCT(0.5)"); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(0.5));
	166	Console.WriteLine("UCT(1) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(1));
	167	Console.WriteLine("UCT(2) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(2));
	168	Console.WriteLine("UCT(5) "); TestPolicyBernoulli(randSeed, nArms, new UCTPolicy(5));
	169	Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.1));
	170	Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(0.5));
	171	Console.WriteLine("BoltzmannExploration(1) "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(1));
	172	Console.WriteLine("BoltzmannExploration(10) "); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(10));
	173	Console.WriteLine("BoltzmannExploration(100)"); TestPolicyBernoulli(randSeed, nArms, new BoltzmannExplorationPolicy(100));
	174	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.01));
	175	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.05));
	176	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyBernoulli(randSeed, nArms, new ChernoffIntervalEstimationPolicy(0.1));
[11730]	177
	178	// not applicable to bernoulli rewards
	179	//Console.WriteLine("ThresholdAscent(10, 0.01) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
	180	//Console.WriteLine("ThresholdAscent(10, 0.05) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
	181	//Console.WriteLine("ThresholdAscent(10, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
	182	//Console.WriteLine("ThresholdAscent(100, 0.01) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
	183	//Console.WriteLine("ThresholdAscent(100, 0.05) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
	184	//Console.WriteLine("ThresholdAscent(100, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
	185	//Console.WriteLine("ThresholdAscent(1000, 0.01)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
	186	//Console.WriteLine("ThresholdAscent(1000, 0.05)"); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
	187	//Console.WriteLine("ThresholdAscent(1000, 0.1) "); TestPolicyBernoulli(globalRand, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
[11708]	188	}
[11730]	189
[11710]	190	[TestMethod]
[11732]	191	public void ComparePoliciesForGaussianBandit() {
[11730]	192	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
	193
[11732]	194	var randSeed = 31415;
[11730]	195	var nArms = 20;
[11742]	196	Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01));
	197	Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01));
	198	Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01));
	199	Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussian(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01));
	200	Console.WriteLine("Generic Thompson (Gaussian fixed var)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1)));
	201	Console.WriteLine("Generic Thompson (Gaussian unknown var)"); TestPolicyGaussian(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1)));
[11732]	202	Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
	203	Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussian(randSeed, nArms, new GaussianThompsonSamplingPolicy());
[11742]	204
[11730]	205	/*
[11732]	206	Console.WriteLine("Random"); TestPolicyNormal(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
	207	Console.WriteLine("UCB1"); TestPolicyNormal(randSeed, nArms, new UCB1Policy(nArms));
	208	Console.WriteLine("UCB1Tuned"); TestPolicyNormal(randSeed, nArms, new UCB1TunedPolicy(nArms));
	209	Console.WriteLine("UCB1Normal"); TestPolicyNormal(randSeed, nArms, new UCBNormalPolicy(nArms));
[11727]	210	//Console.WriteLine("Exp3 (gamma=0.01)");
[11732]	211	//TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));
[11727]	212	//Console.WriteLine("Exp3 (gamma=0.05)");
[11732]	213	//TestPolicyNormal(randSeed, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));
	214	Console.WriteLine("Eps(0.01)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
	215	Console.WriteLine("Eps(0.05)"); TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
[11727]	216	//Console.WriteLine("Eps(0.1)");
[11732]	217	//TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
[11727]	218	//Console.WriteLine("Eps(0.2)");
[11732]	219	//TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
[11727]	220	//Console.WriteLine("Eps(0.5)");
[11732]	221	//TestPolicyNormal(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
	222	Console.WriteLine("UCT(0.1)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.1));
	223	Console.WriteLine("UCT(0.5)"); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 0.5));
	224	Console.WriteLine("UCT(1) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 1));
	225	Console.WriteLine("UCT(2) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 2));
	226	Console.WriteLine("UCT(5) "); TestPolicyNormal(randSeed, nArms, new UCTPolicy(nArms, 5));
	227	Console.WriteLine("BoltzmannExploration(0.1)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.1));
	228	Console.WriteLine("BoltzmannExploration(0.5)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 0.5));
	229	Console.WriteLine("BoltzmannExploration(1) "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
	230	Console.WriteLine("BoltzmannExploration(10) "); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
	231	Console.WriteLine("BoltzmannExploration(100)"); TestPolicyNormal(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
	232	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.01)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.01));
	233	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.05)"); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.05));
[11742]	234	Console.WriteLine("ChernoffIntervalEstimationPolicy(0.1) "); TestPolicyNormal(randSeed, nArms, new ChernoffIntervalEstimationPolicy(nArms, 0.1));
[11732]	235	Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
	236	Console.WriteLine("ThresholdAscent(10,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.05));
	237	Console.WriteLine("ThresholdAscent(10,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.1));
	238	Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
	239	Console.WriteLine("ThresholdAscent(100,0.05) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.05));
	240	Console.WriteLine("ThresholdAscent(100,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.1));
	241	Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
	242	Console.WriteLine("ThresholdAscent(1000,0.05)"); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.05));
	243	Console.WriteLine("ThresholdAscent(1000,0.1) "); TestPolicyNormal(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.1));
[11730]	244	*/
[11710]	245	}
[11708]	246
[11730]	247	[TestMethod]
	248	public void ComparePoliciesForGaussianMixtureBandit() {
	249	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
[11732]	250	var randSeed = 31415;
[11730]	251	var nArms = 20;
	252
[11745]	253	Console.WriteLine("Generic Thompson (Gaussian Mixture)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianMixtureModel()));
	254	// Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01));
	255	// Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01));
	256	// Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01));
	257	// Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01));
	258	// Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
	259	// Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy());
	260	// Console.WriteLine("Generic Thompson (Gaussian fixed variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 0.1)));
	261	// Console.WriteLine("Generic Thompson (Gaussian unknown variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1)));
	262
[11730]	263	/*
[11732]	264	Console.WriteLine("Random"); TestPolicyGaussianMixture(randSeed, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
	265	Console.WriteLine("UCB1"); TestPolicyGaussianMixture(randSeed, nArms, new UCB1Policy(nArms));
	266	Console.WriteLine("UCB1Tuned "); TestPolicyGaussianMixture(randSeed, nArms, new UCB1TunedPolicy(nArms));
	267	Console.WriteLine("UCB1Normal"); TestPolicyGaussianMixture(randSeed, nArms, new UCBNormalPolicy(nArms));
	268	Console.WriteLine("Eps(0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
	269	Console.WriteLine("Eps(0.05) "); TestPolicyGaussianMixture(randSeed, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
	270	Console.WriteLine("UCT(1) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 1));
	271	Console.WriteLine("UCT(2) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 2));
	272	Console.WriteLine("UCT(5) "); TestPolicyGaussianMixture(randSeed, nArms, new UCTPolicy(nArms, 5));
	273	Console.WriteLine("BoltzmannExploration(1) "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 1));
	274	Console.WriteLine("BoltzmannExploration(10) "); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 10));
	275	Console.WriteLine("BoltzmannExploration(100)"); TestPolicyGaussianMixture(randSeed, nArms, new BoltzmannExplorationPolicy(new Random(seedForPolicy), nArms, 100));
[11730]	276
[11732]	277	Console.WriteLine("ThresholdAscent(10,0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10, 0.01));
	278	Console.WriteLine("ThresholdAscent(100,0.01) "); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 100, 0.01));
	279	Console.WriteLine("ThresholdAscent(1000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 1000, 0.01));
	280	Console.WriteLine("ThresholdAscent(10000,0.01)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(nArms, 10000, 0.01));
[11730]	281	*/
	282	}
	283
	284
[11742]	285	private void TestPolicyBernoulli(int randSeed, int nArms, IBanditPolicy policy) {
[12893]	286	TestPolicy(randSeed, policy, (banditRandom) => new BernoulliBandit(banditRandom, nArms));
[11730]	287	}
[11742]	288	private void TestPolicyGaussian(int randSeed, int nArms, IBanditPolicy policy) {
[12893]	289	TestPolicy(randSeed, policy, (banditRandom) => new TruncatedNormalBandit(banditRandom, nArms));
[11730]	290	}
[11742]	291	private void TestPolicyGaussianMixture(int randSeed, int nArms, IBanditPolicy policy) {
[12893]	292	TestPolicy(randSeed, policy, (banditRandom) => new GaussianMixtureBandit(banditRandom, nArms));
[11730]	293	}
[11742]	294	private void TestPolicyGaussianUnknownVariance(int randSeed, int nArms, IBanditPolicy policy) {
[12893]	295	TestPolicy(randSeed, policy, (banditRandom) => new GaussianBandit(banditRandom, nArms, 0, 10));
[11732]	296	}
[11730]	297
[12876]	298	private void TestPolicyExtremeBandit1(int randSeed, IBanditPolicy policy) {
[12893]	299	TestPolicy(randSeed, policy, (banditRandom) => new ParetoBandit(banditRandom, new double[] { 5, 1.1, 2 }));
[12876]	300	}
	301	private void TestPolicyExtremeBandit2(int randSeed, IBanditPolicy policy) {
[12893]	302	TestPolicy(randSeed, policy, (banditRandom) => new ParetoBandit(banditRandom, new double[] { 1.5, 1.1, 3 }, new double[] { 0.0, 0.8, 0.0 }, 0, 1));
[12876]	303	}
[12893]	304	private void TestPolicyExtremeBandit3(int randSeed, IBanditPolicy policy) {
	305	TestPolicy(randSeed, policy, (banditRandom) => new Bandit(banditRandom, new IModel[]
	306	{
	307	new GammaModel(10, 1), // exp=10, var=10
	308	new GammaModel(6, 2), // exp=12, var=24
	309	new GammaModel(3, 3), // exp= 9, var=27
	310	}, 1, 2));
	311	}
[11730]	312
[12876]	313
[12893]	314	private void TestPolicy(int randSeed, IBanditPolicy policy, Func<Random, IBandit> banditFactory) {
	315	var maxIt = 1E5;
[12876]	316	var reps = 30; // independent runs
	317	//var regretForIteration = new Dictionary<int, List<double>>();
	318	//var numberOfPullsOfSuboptimalArmsForExp = new Dictionary<int, double>();
	319	//var numberOfPullsOfSuboptimalArmsForMax = new Dictionary<int, double>();
	320	//var bestRewardForIteration = new Dictionary<int, List<double>>();
[11732]	321	var globalRandom = new Random(randSeed);
	322	var banditRandom = new Random(globalRandom.Next()); // bandits must produce the same rewards for each test
	323	var policyRandom = new Random(globalRandom.Next());
	324
[11708]	325	// calculate statistics
	326	for (int r = 0; r < reps; r++) {
	327	var nextLogStep = 1;
[12893]	328	var b = banditFactory(banditRandom);
	329	var totalReward = 0.0;
	330	int totalPullsOfOptimalArmExp = 0;
	331	int totalPullsOfOptimalArmMax = 0;
	332	var maxReward = double.NegativeInfinity;
	333	var actionInfos = Enumerable.Range(0, b.NumArms).Select(_ => policy.CreateActionInfo()).ToArray();
	334	for (int i = 0; i <= maxIt + 1; i++) {
[11732]	335	var selectedAction = policy.SelectAction(policyRandom, actionInfos);
[11708]	336	var reward = b.Pull(selectedAction);
[11732]	337	actionInfos[selectedAction].UpdateReward(reward);
[11710]	338
[11730]	339	// collect stats
[12893]	340	if (selectedAction == b.OptimalExpectedRewardArm) totalPullsOfOptimalArmExp++;
	341	if (selectedAction == b.OptimalMaximalRewardArm) totalPullsOfOptimalArmMax++;
	342	totalReward += reward;
	343	maxReward = Math.Max(maxReward, reward);
[11730]	344
[12893]	345	if (i == nextLogStep) {
	346	nextLogStep += 500;
[12876]	347	//if (!regretForIteration.ContainsKey(i)) {
	348	// regretForIteration.Add(i, new List<double>());
	349	//}
	350	//regretForIteration[i].Add(totalRegret / i);
	351	//
	352	//if (!numberOfPullsOfSuboptimalArmsForExp.ContainsKey(i)) {
	353	// numberOfPullsOfSuboptimalArmsForExp.Add(i, 0.0);
	354	//}
	355	//numberOfPullsOfSuboptimalArmsForExp[i] += totalPullsOfSuboptimalArmsExp;
	356	//
	357	//if (!numberOfPullsOfSuboptimalArmsForMax.ContainsKey(i)) {
	358	// numberOfPullsOfSuboptimalArmsForMax.Add(i, 0.0);
	359	//}
	360	//numberOfPullsOfSuboptimalArmsForMax[i] += totalPullsOfSuboptimalArmsMax;
	361	//
	362	//if (!bestRewardForIteration.ContainsKey(i)) {
	363	// bestRewardForIteration.Add(i, new List<double>());
	364	//}
	365	//bestRewardForIteration[i].Add(bestReward);
	366	Console.WriteLine("{0};{1,8};{2,7:F5};{3,7:F2};{4,7:F2};{5:F2};{6:F2};{7:F2};{8:F2}",
[12893]	367	policy, i, totalReward, totalPullsOfOptimalArmExp, totalPullsOfOptimalArmMax, maxReward,
	368	totalReward / i, totalPullsOfOptimalArmExp / (double)i, totalPullsOfOptimalArmMax / (double)i);
[11710]	369	}
	370	}
	371	}
	372	// print
[12876]	373	//foreach (var p in regretForIteration.Keys.OrderBy(k => k)) {
	374	// Console.WriteLine("iter {0,8} regret avg {1,7:F5} min {2,7:F5} max {3,7:F5} suboptimal pulls (exp) {4,7:F2} suboptimal pulls (max) {5,7:F2} max rewards: {6}",
	375	// p,
	376	// regretForIteration[p].Average(),
	377	// regretForIteration[p].Min(),
	378	// regretForIteration[p].Max(),
	379	// numberOfPullsOfSuboptimalArmsForExp[p] / (double)reps,
	380	// numberOfPullsOfSuboptimalArmsForMax[p] / (double)reps,
	381	// string.Join(" ", bestRewardForIteration[p])
	382	// );
	383	//}
[11710]	384	}
	385
[11708]	386	}
	387	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences